def setUp(self): super(TestClassifierWorker, self).setUp() self.cls = nltk.NaiveBayesClassifier.train([({'a': 'a', 'b': 'b'}, 'positive')]) self.data = [] for i in range(5): d = RawStreamQueue() d.source = 'test' d.original = {} d.text = "this is a test" self.data.append(d) ClassifiedStream.drop_collection()
def save(self, raw_data): assert isinstance(raw_data, RawStreamQueue) row = ClassifiedStream() row.source = raw_data.source row.original = raw_data.original row.text = raw_data.text row.tokens = list(self.tokenizer.getSearchTokens(row.text)) row.polarity = self.get_classifications(row.text) row.save() self.logger.debug('ClassifiedStream saved %s', row.id)
def setUp(self): super(TestClassifierWorker, self).setUp() self.cls = nltk.NaiveBayesClassifier.train([({ 'a': 'a', 'b': 'b' }, 'positive')]) self.data = [] for i in range(5): d = RawStreamQueue() d.source = 'test' d.original = {} d.text = "this is a test" self.data.append(d) ClassifiedStream.drop_collection()
def test_find_tokens(self): s1 = ClassifiedStream() s1.tokens = ['a', 'b', 'c'] s1.text = 'ab' s1.source = 't' s1.save() s2 = ClassifiedStream() s2.tokens = ['b', 'c'] s2.text = 'bs' s2.source = 't' s2.save() s3 = ClassifiedStream() s3.tokens = ['b', 'c'] s3.text = 'bs' s3.source = 't' s3.save() self.assertEquals(1, len(ClassifiedStream.find_tokens(['a', 'b']))) self.assertEquals(1, len(ClassifiedStream.find_tokens(['a']))) self.assertEquals(1, len(ClassifiedStream.find_tokens(['c', 'b'], s2.id))) self.assertEquals(3, len(ClassifiedStream.find_tokens(['c', 'b'])))
def tearDown(self): ClassifiedStream.drop_collection()
def tearDown(self): ClassifiedStream.drop_collection() RawStreamQueue.drop_collection()