def setUp(self):
     super(TestClassifierWorker, self).setUp()
     self.cls = nltk.NaiveBayesClassifier.train([({'a': 'a', 'b': 'b'}, 'positive')])
     self.data = []
     for i in range(5):
         d = RawStreamQueue()
         d.source = 'test'
         d.original = {}
         d.text = "this is a test"
         self.data.append(d)
     ClassifiedStream.drop_collection()
Example #2
0
 def save(self, raw_data):
     assert isinstance(raw_data, RawStreamQueue)
     row = ClassifiedStream()
     row.source = raw_data.source
     row.original = raw_data.original
     row.text = raw_data.text
     row.tokens = list(self.tokenizer.getSearchTokens(row.text))
     row.polarity = self.get_classifications(row.text)
     row.save()
     self.logger.debug('ClassifiedStream saved %s', row.id)
Example #3
0
 def setUp(self):
     super(TestClassifierWorker, self).setUp()
     self.cls = nltk.NaiveBayesClassifier.train([({
         'a': 'a',
         'b': 'b'
     }, 'positive')])
     self.data = []
     for i in range(5):
         d = RawStreamQueue()
         d.source = 'test'
         d.original = {}
         d.text = "this is a test"
         self.data.append(d)
     ClassifiedStream.drop_collection()
Example #4
0
    def test_find_tokens(self):
        s1 = ClassifiedStream()
        s1.tokens = ['a', 'b', 'c']
        s1.text = 'ab'
        s1.source = 't'
        s1.save()

        s2 = ClassifiedStream()
        s2.tokens = ['b', 'c']
        s2.text = 'bs'
        s2.source = 't'
        s2.save()

        s3 = ClassifiedStream()
        s3.tokens = ['b', 'c']
        s3.text = 'bs'
        s3.source = 't'
        s3.save()

        self.assertEquals(1, len(ClassifiedStream.find_tokens(['a', 'b'])))
        self.assertEquals(1, len(ClassifiedStream.find_tokens(['a'])))
        self.assertEquals(1,
                          len(ClassifiedStream.find_tokens(['c', 'b'], s2.id)))
        self.assertEquals(3, len(ClassifiedStream.find_tokens(['c', 'b'])))
Example #5
0
 def tearDown(self):
     ClassifiedStream.drop_collection()
 def tearDown(self):
     ClassifiedStream.drop_collection()
     RawStreamQueue.drop_collection()
    def test_find_tokens(self):
        s1 = ClassifiedStream()
        s1.tokens = ['a', 'b', 'c']
        s1.text = 'ab'
        s1.source = 't'
        s1.save()

        s2 = ClassifiedStream()
        s2.tokens = ['b', 'c']
        s2.text = 'bs'
        s2.source = 't'
        s2.save()

        s3 = ClassifiedStream()
        s3.tokens = ['b', 'c']
        s3.text = 'bs'
        s3.source = 't'
        s3.save()

        self.assertEquals(1, len(ClassifiedStream.find_tokens(['a', 'b'])))
        self.assertEquals(1, len(ClassifiedStream.find_tokens(['a'])))
        self.assertEquals(1, len(ClassifiedStream.find_tokens(['c', 'b'], s2.id)))
        self.assertEquals(3, len(ClassifiedStream.find_tokens(['c', 'b'])))