コード例 #1
0
ファイル: ppmc.py プロジェクト: worldwise001/stylometry
class RedditPPM(Classifier):
    trie = None

    def train(self, document, order=5):
        if (self.trie is not None):
            del self.trie
        self.trie = Trie(order)
        for c in document:
            self.trie.add(c)

    def test(self, documents):
        results = []
        for row in documents:
            results.append({'id': row['id'],
                            'username': row['username'],
                            'label': (self.user == row['username']),
                            'score': self.score(row['text'])})
        return results

    def score(self, text):
        test_bits = 0
        newtrie = self.trie.duplicate()
        document = text.encode('utf-8')
        for c in document:
            newtrie.add(c)
            test_bits += newtrie.bit_encoding
        del newtrie
        return test_bits/(len(document)*8)

    def __str__(self):
        return '%f %f' % (self.trie.bit_encoding, self.trie.probability_encoding)
コード例 #2
0
ファイル: ppmc.py プロジェクト: worldwise001/stylometry
 def train(self, document, order=5):
     if (self.trie is not None):
         del self.trie
     self.trie = Trie(order)
     for c in document:
         self.trie.add(c)