def setUp(self):
     super(TestClassifierWorker, self).setUp()
     self.cls = nltk.NaiveBayesClassifier.train([({'a': 'a', 'b': 'b'}, 'positive')])
     self.data = []
     for i in range(5):
         d = RawStreamQueue()
         d.source = 'test'
         d.original = {}
         d.text = "this is a test"
         self.data.append(d)
     ClassifiedStream.drop_collection()
Example #2
0
    def _run(self):
        self.logger.info('Starting')

        while not self.stop.is_set():

            if not RawStreamQueue.objects().count():
                time.sleep(0.5)
                continue

            raw_data = RawStreamQueue.objects().order_by('-id')[0:config.classifier_pool_size]
            for task in raw_data:
                self.queue.put(task)
                task.delete()
Example #3
0
    def _run(self):
        self.logger.info('Starting')

        while not self.stop.is_set():

            if not RawStreamQueue.objects().count():
                time.sleep(0.5)
                continue

            raw_data = RawStreamQueue.objects().order_by('-id')[0:config.classifier_pool_size]
            for task in raw_data:
                self.queue.put(task)
                task.delete()
Example #4
0
 def setUp(self):
     super(TestClassifierWorker, self).setUp()
     self.cls = nltk.NaiveBayesClassifier.train([({
         'a': 'a',
         'b': 'b'
     }, 'positive')])
     self.data = []
     for i in range(5):
         d = RawStreamQueue()
         d.source = 'test'
         d.original = {}
         d.text = "this is a test"
         self.data.append(d)
     ClassifiedStream.drop_collection()
    def test_workerSleep(self):
        """
        twitter collector should sleep if not keywords found
        """
        RawStreamQueue.drop_collection()
        SocketSession.drop_collection()
        kill = threading.Event()

        self.assertEqual(len(SocketSession.get_keywords()), 0)
        w = TwitterWorker(kill)
        w.setDaemon(True)
        w.start()
        time.sleep(1)
        kill.set()
        self.assertEqual(RawStreamQueue.objects.count(), 0)
    def test_workerSleep(self):
        """
        twitter collector should sleep if not keywords found
        """
        RawStreamQueue.drop_collection()
        SocketSession.drop_collection()
        kill = threading.Event()

        self.assertEqual(len(SocketSession.get_keywords()),0)
        w = TwitterWorker(kill)
        w.setDaemon(True)
        w.start()
        time.sleep(1)
        kill.set()
        self.assertEqual(RawStreamQueue.objects.count(), 0)
 def save(self, tweet):
     if self.is_tweet_valid(tweet):
         o = RawStreamQueue()
         o.original = tweet
         o.source = StreamSource.TWITTER
         o.text = tweet['text']
         o.save()
         logger.debug("RawStreamQueue saved with id %s", o.id)
Example #8
0
 def save(self, tweet):
     if self.is_tweet_valid(tweet):
         o = RawStreamQueue()
         o.original = tweet
         o.source = StreamSource.TWITTER
         o.text = tweet['text']
         o.save()
         logger.debug("RawStreamQueue saved with id %s", o.id)
 def tearDown(self):
     RawStreamQueue.drop_collection()
     SocketSession.drop_collection()
 def tearDown(self):
     RawStreamQueue.drop_collection()
     SocketSession.drop_collection()
 def tearDown(self):
     ClassifiedStream.drop_collection()
     RawStreamQueue.drop_collection()
Example #12
0
 def tearDown(self):
     ClassifiedStream.drop_collection()
     RawStreamQueue.drop_collection()