예제 #1
0
 def setup(self):
     ''' Set up before each test. '''
     # Set up the worker and its dependencies
     self.result_queue = MockResultQueue()
     self.task_queue = MockTaskQueue()
     self.tweet_factory = MockTweetFactory()
     self.worker = Worker(self.result_queue, self.task_queue, self.tweet_factory)
     self.worker_thread = Thread(target=self.worker.perform_tasks)
     self.worker_thread.is_daemon = False
예제 #2
0
class TestWorker():
    ''' Unit tests for the Worker class. '''
 
    def setup(self):
        ''' Set up before each test. '''
        # Set up the worker and its dependencies
        self.result_queue = MockResultQueue()
        self.task_queue = MockTaskQueue()
        self.tweet_factory = MockTweetFactory()
        self.worker = Worker(self.result_queue, self.task_queue, self.tweet_factory)
        self.worker_thread = Thread(target=self.worker.perform_tasks)
        self.worker_thread.is_daemon = False

    def test_perform_tasks_count_tweets(self):
        ''' Tests the perform_tasks function when a count_tweets task is received. '''
        # Load the test data
        self.task_queue.task = {
            'job_id' : 'job_id', 
            'task' : 'count_tweets',
            'coordinate_box' : {'min_lat' : 0, 'min_lon' : 0, 'max_lat' : 0, 'max_lon' : 0}
        }
        self.tweet_factory.tweets = ['tweet', 'tweet', 'tweet']
 
        # Launch the worker thread
        self.worker_thread.start()

        # Wait a moment before shutting the thread down
        time.sleep(1)
        self.worker.shutdown()

        # Check the results
        assert self.result_queue.job_id == 'job_id', self.result_queue.job_id
        assert self.result_queue.coordinate_box == {
            'min_lat' : 0, 'min_lon' : 0, 'max_lat' : 0, 'max_lon' : 0
        }, self.result_queue.coordinate_box
        assert self.result_queue.count == 3, self.result_queue.count
        
        assert self.task_queue.finished_task is not None
예제 #3
0
    def setup(self):
        ''' Set up before each test. '''
        # Set up the testing configuration settings
        config = ConfigParser()
        config.add_section('worker')
        config.set('worker', 'batch_copy_size', '10')

        # Set up the mock testing components
        self.result_queue = MockResultQueue()
        self.task_queue = MockTaskQueue()
        self.data_factory = MockDataFactory()
         
        # Set up the Worker instance to be tested
        self.worker = Worker(config, self.result_queue, self.task_queue, self.data_factory)
예제 #4
0
from smcity.transformers.geojson_transformer import GeoJsonTransformer

print "Loading the config settings..."
config = ConfigParser()
configFile = open("config/seattle_smcity.conf")
config.readfp(configFile)
configFile.close()

# Load the GeoJSON description of the police beats
police_beats_geojson = geojson.loads(open("seattle_slides/seattle_police_beats.geojson").read())["features"]

# Set up the components
data_factory = AwsDataFactory(config)
result_queue = MockResultQueue()
task_queue = MockTaskQueue()
worker = Worker(config, result_queue, task_queue, data_factory)
transformer = GeoJsonTransformer(data_factory)
geojson = None

for iter in range(len(police_beats_geojson)):
    police_beat = ComplexPolygonStrategyFactory().from_geojson(police_beats_geojson[iter]["geometry"])

    # Generate the data set of interest
    set_id = str(uuid4())
    age_limit = datetime.datetime.now()
    age_limit -= datetime.timedelta(hours=1)
    age_limit = age_limit.timetuple()
    bounding_box = police_beat.get_bounding_box()
    keywords = ["Mayday", "parade"]
    print "Bounding Box:", bounding_box
    print "Set ID:", set_id
예제 #5
0
class TestWorker:
    ''' Tests the Worker class. '''

    def setup(self):
        ''' Set up before each test. '''
        # Set up the testing configuration settings
        config = ConfigParser()
        config.add_section('worker')
        config.set('worker', 'batch_copy_size', '10')

        # Set up the mock testing components
        self.result_queue = MockResultQueue()
        self.task_queue = MockTaskQueue()
        self.data_factory = MockDataFactory()
         
        # Set up the Worker instance to be tested
        self.worker = Worker(config, self.result_queue, self.task_queue, self.data_factory)

    def test_calculate_trending_topics(self):
        ''' Tests the calculate_trending_topics function. '''
        self.data_factory.data = [
            MockData({'id' : '1', 'content' : "#yolo something about life!"}),
            MockData({'id' : '2', 'content' : "#yoLO! something else about life #YOLO but more exciting!"}),
            MockData({'id' : '3', 'content' : "#KendrickLamar Money trees, shake em!"}),
            MockData({'id' : '4', 'content' : "#SHESELLSSEASHELLS!!!! #Kendricklamar Snausages!"})
        ]

        # Run the calculate trending topics
        trending_topics = self.worker.calculate_trending_topics('mock_data_set')

        # Check the results
        assert len(trending_topics) == 3, len(trending_topics)
        assert trending_topics[0][0] == '#YOLO', trending_topics[0][0]
        assert trending_topics[0][1] == 3, trending_topics[0][1]
        assert trending_topics[1][0] == '#KENDRICKLAMAR', trending_topics[1][0]
        assert trending_topics[1][1] == 2, trending_topics[1][1]
        assert trending_topics[2][0] == '#SHESELLSSEASHELLS', trending_topics[2][0]
        assert trending_topics[2][1] == 1, trending_topics[2][1]

    def test_filter_data(self):
        ''' Tests the _filter_data() function '''
        # Set up the input data
        self.data_factory.data = [
            MockData({'id' : '1', 'content' : "This is a test message that should be filtered out!"}),
            MockData({'id' : '2', 'content' : "There's a gun in our school!; Don't filter me!"}),
            MockData({'id' : '3', 'content' : "There's a gunman in our school!; Filter me!"})
        ]

        # Run the filter function
        kwargs = {
            'in_data_set_id' : 'in_data_set_id',
            'out_data_set_id' : 'out_data_set_id',
            'keywords' : ['GUN']
        }
        self.worker._filter_data(**kwargs)

        # Check the results
        assert len(self.result_queue.posted_results) == 1, len(self.result_queue.posted_results)
        assert self.result_queue.posted_results[0]['set_id'] == 'out_data_set_id', \
            self.result_queue.posted_results[0]['set_id']
        
        assert len(self.data_factory.copied_data) == 1, len(self.data_factory.copied_data)
        assert self.data_factory.copied_data[0].get_datum_id() == '2', \
            self.data_factory.copied_data[0].get_datum_id()
        assert self.data_factory.copied_data_set_id == 'out_data_set_id', \
            self.data_factory.copied_data_set_id

    def test_strip_lingering_whitespace(self):
        ''' Tests using the _filter_data() function with content tokens burdened by hanging whitespace. '''
        # Set up the input data
        self.data_factory.data = [
            MockData({'id' : '1', 'content' : "'gun,"}),
            MockData({'id' : '2', 'content' : "gun;"}),
            MockData({'id' : '3', 'content' : "gun."})            
        ]

        # Run the filter function
        kwargs = {
            'in_data_set_id' : 'in_data_set_id',
            'out_data_set_id' : 'out_data_set_id',
            'keywords' : ['GUN']
        }
        self.worker._filter_data(**kwargs)

        # Check the results
        assert len(self.data_factory.copied_data) == 3, len(self.data_factory.copied_data)
예제 #6
0
# Load the config settings
config = ConfigParser()
configFile = open('config/qa_smcity.conf')
config.readfp(configFile)
configFile.close()

# Load the GeoJSON description of Franklin county
franklin_geojson = geojson.loads(open('manual_tests/franklin.geojson').read())
franklin_county = ComplexPolygonStrategyFactory().from_geojson(franklin_geojson['features'][0]['geometry'])

# Set up the components
data_factory = AwsDataFactory(config)
result_queue = MockResultQueue()
task_queue   = MockTaskQueue()
worker       = Worker(config, result_queue, task_queue, data_factory)
transformer  = GeoJsonTransformer(data_factory)

# Generate the data set of interest
set_id    = str(uuid4())
age_limit = datetime.datetime.now()
age_limit += datetime.timedelta(hours=5) - datetime.timedelta(hours=1)
age_limit = age_limit.timetuple()
bounding_box = franklin_county.get_bounding_box()
print "Bounding Box:", bounding_box
print "Set ID:", set_id
print "Time:", age_limit
kwargs = {
    'num_segments' : 4,
    'in_data_set_id' : 'global',
    'out_data_set_id' : set_id,
print "Loading the config settings..."
config = ConfigParser()
configFile = open('config/seattle_smcity.conf')
config.readfp(configFile)
configFile.close()

# Load the GeoJSON description of the police beats
police_beats_geojson = geojson.loads(open('seattle_slides/seattle_police_beats.geojson').read())
police_beats = ComplexPolygonStrategyFactory().from_geojson(police_beats_geojson['features'][0]['geometry'])

# Set up the components
data_factory = AwsDataFactory(config)
result_queue = MockResultQueue()
task_queue   = MockTaskQueue()
worker       = Worker(config, result_queue, task_queue, data_factory)
transformer  = GeoJsonTransformer(data_factory)

# Generate the data set of interest
set_id    = str(uuid4())
age_limit = datetime.datetime.now()
age_limit -= datetime.timedelta(days=3)
age_limit = age_limit.timetuple()
bounding_box = police_beats.get_bounding_box()
print "Bounding Box:", bounding_box
print "Set ID:", set_id
print "Time:", age_limit
kwargs = {
    'num_segments' : 4,
    'in_data_set_id' : 'global',
    'out_data_set_id' : set_id,