def setUp(self): super(TestClassifierWorker, self).setUp() self.cls = nltk.NaiveBayesClassifier.train([({'a': 'a', 'b': 'b'}, 'positive')]) self.data = [] for i in range(5): d = RawStreamQueue() d.source = 'test' d.original = {} d.text = "this is a test" self.data.append(d) ClassifiedStream.drop_collection()
def _run(self): self.logger.info('Starting') while not self.stop.is_set(): if not RawStreamQueue.objects().count(): time.sleep(0.5) continue raw_data = RawStreamQueue.objects().order_by('-id')[0:config.classifier_pool_size] for task in raw_data: self.queue.put(task) task.delete()
def setUp(self): super(TestClassifierWorker, self).setUp() self.cls = nltk.NaiveBayesClassifier.train([({ 'a': 'a', 'b': 'b' }, 'positive')]) self.data = [] for i in range(5): d = RawStreamQueue() d.source = 'test' d.original = {} d.text = "this is a test" self.data.append(d) ClassifiedStream.drop_collection()
def test_workerSleep(self): """ twitter collector should sleep if not keywords found """ RawStreamQueue.drop_collection() SocketSession.drop_collection() kill = threading.Event() self.assertEqual(len(SocketSession.get_keywords()), 0) w = TwitterWorker(kill) w.setDaemon(True) w.start() time.sleep(1) kill.set() self.assertEqual(RawStreamQueue.objects.count(), 0)
def test_workerSleep(self): """ twitter collector should sleep if not keywords found """ RawStreamQueue.drop_collection() SocketSession.drop_collection() kill = threading.Event() self.assertEqual(len(SocketSession.get_keywords()),0) w = TwitterWorker(kill) w.setDaemon(True) w.start() time.sleep(1) kill.set() self.assertEqual(RawStreamQueue.objects.count(), 0)
def save(self, tweet): if self.is_tweet_valid(tweet): o = RawStreamQueue() o.original = tweet o.source = StreamSource.TWITTER o.text = tweet['text'] o.save() logger.debug("RawStreamQueue saved with id %s", o.id)
def tearDown(self): RawStreamQueue.drop_collection() SocketSession.drop_collection()
def tearDown(self): ClassifiedStream.drop_collection() RawStreamQueue.drop_collection()