class TestWorker: ''' Tests the Worker class. ''' def setup(self): ''' Set up before each test. ''' # Set up the testing configuration settings config = ConfigParser() config.add_section('worker') config.set('worker', 'batch_copy_size', '10') # Set up the mock testing components self.result_queue = MockResultQueue() self.task_queue = MockTaskQueue() self.data_factory = MockDataFactory() # Set up the Worker instance to be tested self.worker = Worker(config, self.result_queue, self.task_queue, self.data_factory) def test_calculate_trending_topics(self): ''' Tests the calculate_trending_topics function. ''' self.data_factory.data = [ MockData({'id' : '1', 'content' : "#yolo something about life!"}), MockData({'id' : '2', 'content' : "#yoLO! something else about life #YOLO but more exciting!"}), MockData({'id' : '3', 'content' : "#KendrickLamar Money trees, shake em!"}), MockData({'id' : '4', 'content' : "#SHESELLSSEASHELLS!!!! #Kendricklamar Snausages!"}) ] # Run the calculate trending topics trending_topics = self.worker.calculate_trending_topics('mock_data_set') # Check the results assert len(trending_topics) == 3, len(trending_topics) assert trending_topics[0][0] == '#YOLO', trending_topics[0][0] assert trending_topics[0][1] == 3, trending_topics[0][1] assert trending_topics[1][0] == '#KENDRICKLAMAR', trending_topics[1][0] assert trending_topics[1][1] == 2, trending_topics[1][1] assert trending_topics[2][0] == '#SHESELLSSEASHELLS', trending_topics[2][0] assert trending_topics[2][1] == 1, trending_topics[2][1] def test_filter_data(self): ''' Tests the _filter_data() function ''' # Set up the input data self.data_factory.data = [ MockData({'id' : '1', 'content' : "This is a test message that should be filtered out!"}), MockData({'id' : '2', 'content' : "There's a gun in our school!; Don't filter me!"}), MockData({'id' : '3', 'content' : "There's a gunman in our school!; Filter me!"}) ] # Run the filter function kwargs = { 'in_data_set_id' : 'in_data_set_id', 'out_data_set_id' : 'out_data_set_id', 'keywords' : ['GUN'] } self.worker._filter_data(**kwargs) # Check the results assert len(self.result_queue.posted_results) == 1, len(self.result_queue.posted_results) assert self.result_queue.posted_results[0]['set_id'] == 'out_data_set_id', \ self.result_queue.posted_results[0]['set_id'] assert len(self.data_factory.copied_data) == 1, len(self.data_factory.copied_data) assert self.data_factory.copied_data[0].get_datum_id() == '2', \ self.data_factory.copied_data[0].get_datum_id() assert self.data_factory.copied_data_set_id == 'out_data_set_id', \ self.data_factory.copied_data_set_id def test_strip_lingering_whitespace(self): ''' Tests using the _filter_data() function with content tokens burdened by hanging whitespace. ''' # Set up the input data self.data_factory.data = [ MockData({'id' : '1', 'content' : "'gun,"}), MockData({'id' : '2', 'content' : "gun;"}), MockData({'id' : '3', 'content' : "gun."}) ] # Run the filter function kwargs = { 'in_data_set_id' : 'in_data_set_id', 'out_data_set_id' : 'out_data_set_id', 'keywords' : ['GUN'] } self.worker._filter_data(**kwargs) # Check the results assert len(self.data_factory.copied_data) == 3, len(self.data_factory.copied_data)
kwargs = { 'num_segments' : 4, 'in_data_set_id' : 'global', 'out_data_set_id' : set_id, 'min_timestamp' : age_limit, 'complex_filters' : [police_beats], 'min_lat' : bounding_box['min_lat'], 'max_lat' : bounding_box['max_lat'], 'min_lon' : bounding_box['min_lon'], 'max_lon' : bounding_box['max_lon'] } print "Filtering data..." worker.filter_data_parallel(**kwargs) print "Done filtering data..." print "Determine the trending topics..." topics = worker.calculate_trending_topics(data_set_id=set_id) for topic in topics: print topic[0], "=", topic[1] # Generate the GeoJSON display print "Generating GeoJSON display..." geojson = transformer.plot_points(set_id) geojson = transformer.plot_polygon(police_beats, geojson, properties={'topics' : str(topics)}) print "Writing out GeoJSON display..." outfile = open('single_beat_trending_topics.geojson', 'w') outfile.write(geojson) outfile.close()