Esempio n. 1
0
    def test01(self):
        g = EncodingGuesser()
        filename = os.path.join(TESTDATA, "gnosis-readme")
        self.assertEqual(g.guess(filename), None)

        filename = os.path.join(TESTDATA, "cp850a.txt")
        self.assertEqual(g.guess(filename), "cp850")

        filename = os.path.join(TESTDATA, "cp850b.txt")
        self.assertEqual(g.guess(filename), "cp850")

        filename = os.path.join(TESTDATA, "README.TXT")
        self.assertEqual(g.guess(filename), "cp850")

        filename = os.path.join(TESTDATA, "cp1252a.txt")
        self.assertEqual(g.guess(filename), "cp1252")

        filename = os.path.join(TESTDATA, "cp1252b.txt")
        self.assertEqual(g.guess(filename), "cp1252")
Esempio n. 2
0
 def __init__(self, vol):
     #Task.__init__(self)
     self.volume = vol
     self.encodingGuesser = EncodingGuesser()
Esempio n. 3
0
        #self.status("%s : %d words",fileRow.name,len(tokens))
        #print fileRow.path(), ".occurences.deleteAll()"
        fileRow.occurences.deleteAll()
        #self.occurences.query(file=deleteRows(file=fileRow)
        pos = 0
        for token in tokens:
            pos += 1
            self.status(fileRow.path() + ": " + token)
            word = self.words.peek(token)
            if word is None:
                word = self.words.appendRow(id=token)
            #elif word.ignore:
            #    continue
            fileRow.occurences.appendRow(word=word, pos=pos)

encodingGuesser = EncodingGuesser()


def get_reader(fullname):
    base, ext = os.path.splitext(fullname)
    try:
        return readers[ext.lower()]
    except KeyError, e:
        return non_reader


def read_content(sess, fileInstance, fullname):
    r = get_reader(fullname)
    return r(sess, fileInstance, fullname)