Ejemplo n.º 1
0
from __future__ import unicode_literals

import nltk.corpus
from nltk import FreqDist
from dicttoxml import dicttoxml, xml_escape

#corpus
words = [
    w.decode('utf-8', errors='replace') for w in nltk.corpus.reuters.words()
]
fd = FreqDist(words)
afd = {xml_escape(k): v for k, v in fd.items()}

# special key for sum
afd['__sum__'] = fd.N()

xml = dicttoxml(afd)

f = open('frequencies.xml', 'w')
f.write(xml)
f.close()
Ejemplo n.º 2
0
    def test_greater_than(self):
        escaped_result = xml_escape('>')

        self.assertEqual(u'>', escaped_result)
Ejemplo n.º 3
0
    def test_non_str_or_unicode(self):
        escaped_result = xml_escape(1)

        self.assertEqual(1, escaped_result)
Ejemplo n.º 4
0
    def test_less_than(self):
        escaped_result = xml_escape('<')

        self.assertEqual(u'&lt;', escaped_result)
Ejemplo n.º 5
0
    def test_apostrophe(self):
        escaped_result = xml_escape('\'')

        self.assertEqual(u'&apos;', escaped_result)
Ejemplo n.º 6
0
    def test_quotation_mark(self):
        escaped_result = xml_escape('"')

        self.assertEqual(u'&quot;', escaped_result)
Ejemplo n.º 7
0
    def test_ampersand(self):
        escaped_result = xml_escape('&')

        self.assertEqual(u'&amp;', escaped_result)
from __future__ import unicode_literals

import nltk.corpus
from nltk import FreqDist
from dicttoxml import dicttoxml, xml_escape

#corpus
words = [w.decode('utf-8', errors='replace') for w in nltk.corpus.reuters.words()]
fd = FreqDist(words)
afd = {xml_escape(k):v for k,v in fd.items()}

# special key for sum
afd['__sum__']=fd.N()

xml = dicttoxml(afd)

f=open('frequencies.xml', 'w')
f.write(xml)
f.close()