Example #1
0
class TestAwsDataFactory:
    ''' Unit tests for the AwsDataFactory class. '''

    def setup(self):
        ''' Set up before each test '''
        logger.info('Setting up the test data table...')
        self.global_table = Table('test_global_data')
        self.set_table = Table('test_set_data')        
 
        logger.info('Setting up the DataFactory instance...')
        config = ConfigParser()
        config.add_section('database')
        config.set('database', 'global_data_table', 'test_global_data')
        config.set('database', 'set_data_table', 'test_set_data')
        
        self.data_factory = AwsDataFactory(config)

        logger.info('Emptying the contents of the test data table...')
        for data in self.global_table.scan():
            data.delete()
        for data in self.set_table.scan():
            data.delete()
    
    def test_create_global_data(self):
        ''' Tests creating global data. '''
        content   = 'content'
        datum_id  = 'datum_id'
        lat       = 5.5
        lon       = 6.6
        set_id    = 'global'
        timestamp = strptime('2014-01-02 01:02:03', '%Y-%m-%d %H:%M:%S')
        type      = 'type'

        self.data_factory.create_data(content, datum_id, (lon, lat), set_id, timestamp, type)
         
        raw_record = self.global_table.get_item(datum_id=datum_id) # Check the created global data record
        assert raw_record is not None
        
        record     = AwsData(raw_record)
        assert record.get_content() == content, record.get_content()
        assert record.get_datum_id() == datum_id, record.get_datum_id()
        assert record.get_location() == (lon, lat), record.get_location()
        assert record.get_set_id() == set_id, record.get_set_id()
        assert record.get_timestamp() == timestamp, record.get_timestamp()
        assert record.get_type() == type, record.get_type()

        for record in self.set_table.scan(): # Make sure there weren't any set data records created
            assert False, "Should not have record in set table!"

    def test_create_set_data(self):
        ''' Tests creating non-global data. '''
        content   = 'content'
        datum_id  = 'id'
        lat       = 5.5
        lon       = 6.6
        set_id    = 'my_set'
        timestamp = strptime('2014-01-02 01:02:03', '%Y-%m-%d %H:%M:%S')
        type      = 'type'

        self.data_factory.create_data(content, datum_id, (lon, lat), set_id, timestamp, type)
        
        # Check the created set data record
        raw_record = self.set_table.get_item(set_id=set_id, datum_id=datum_id)
        assert raw_record is not None

        record = AwsData(raw_record)
        assert record.get_content() == content, record.get_content()
        assert record.get_datum_id() == datum_id, record.get_datum_id()
        assert record.get_location() == (lon, lat), record.get_location()
        assert record.get_set_id() == set_id, record.get_set_id()
        assert record.get_timestamp() == timestamp, record.get_timestamp()
        assert record.get_type() == 'type', record.get_type()
        
        for record in self.global_table.scan(): # Make sure there weren't any global data records created
            assert False, "Should not have record in set table!"

    def test_copy_data(self):
        ''' Tests the copy_data function. '''
        record1 = AwsData({
            'content' : 'content1',
            'datum_id' : 'id_1',
            'lat' : '10000000',
            'lat_copy' : '10000000',
            'lon' : '20000000',
            'lon_copy' : '20000000',
            'set_id' : 'set_id_1', 
            'timestamp' : 'timestamp1',
            'timestamp_copy' : 'timestamp1',
            'type' : 'type1'
        })
        record2 = AwsData({
            'content' : 'content2',
            'datum_id' : 'id_2',
            'lat' : '30000000',
            'lat_copy' : '300000000',
            'lon' : '40000000',
            'lon_copy' : '40000000',
            'set_id' : 'set_id_2',
            'timestamp' : 'timestamp2',
            'timestamp_copy' : 'timestamp2',
            'type' : 'type2'
        })

        self.data_factory.copy_data('set_id_3', [record1, record2])
         
        record = self.set_table.get_item(set_id='set_id_3', datum_id='id_1')
        assert record is not None
        assert record['content'] == 'content1', record['content']
        assert record['lat'] == '10000000', record['lat']
        assert record['lon'] == '20000000', record['lon']
        assert record['set_id'] == 'set_id_3', record['set_id']
        assert record['timestamp'] == 'timestamp1', record['timestamp']
        assert record['type'] == 'type1', record['type']

        record = self.set_table.get_item(set_id='set_id_3', datum_id='id_2')
        assert record is not None
        assert record['content'] == 'content2', record['content']
        assert record['lat'] == '30000000', record['lat']
        assert record['lon'] == '40000000', record['lon']
        assert record['set_id'] == 'set_id_3', record['set_id']
        assert record['timestamp'] == 'timestamp2', record['timestamp']
        assert record['type'] == 'type2', record['type']

    def test_filter_global_data(self):
        ''' Tests the filter_global_data function. '''
        self.data_factory.create_data(
            'content', 'id1', (0, 0), 'global', strptime('2014-01-02 01:02:03', '%Y-%m-%d %H:%S:%f'), 'type1'
        )
        self.data_factory.create_data(
            'content', 'id2', (0, 0.5), 'global', strptime('2014-01-03 01:02:03', '%Y-%m-%d %H:%S:%f'), 'type2'
        )
        self.data_factory.create_data(
            'content', 'id3', (0.5, 0.5), 'global', strptime('2014-01-02 01:02:05', '%Y-%m-%d %H:%S:%f'), 'type2'
        )

        # Test retrieve data by type only
        datas = self.data_factory.filter_global_data(type='type1')
        for data in datas:
            assert data.get_datum_id() == 'id1', data.get_id()

        # Test retrieve data by location only
        datas = self.data_factory.filter_global_data(min_lat=0, max_lat=1, min_lon=0.25, max_lon=1.25)
        for data in datas:
            assert data.get_datum_id() == 'id3', data.get_id()

        # Test retrieve data by timestamp
        datas = self.data_factory.filter_global_data(
            min_timestamp=strptime('2014-01-03 01:02:03', '%Y-%m-%d %H:%M:%S'),
            max_timestamp=strptime('2014-01-03 01:02:10', '%Y-%m-%d %H:%M:%S')
        )
        for data in datas:
            assert data.get_datum_id() == 'id1' or data.get_datum_id() == 'id3', data.get_datum_id()

    def test_get_set_data(self):
        ''' Tests the get_set_data function. '''
        self.data_factory.create_data(
            'content', 'id1', (0, 0), 'set_1', strptime('2014-01-02 01:02:03', '%Y-%m-%d %H:%S:%f'), 'type1'
        )
        self.data_factory.create_data(
            'content', 'id2', (0, 0), 'set_2', strptime('2014-01-02 01:02:03', '%Y-%m-%d %H:%S:%f'), 'type2'
        )

        datas = self.data_factory.get_data_set('set_1')
        for data in datas:
            assert data.get_datum_id() == 'id1', data.get_datum_id()
        'max_lon' : bounding_box['max_lon']
    }

    print "Set ID:", district_ids[iter]
    print "Bounding Box:", bounding_box
    print "Time:", age_limit

    print "Filtering data..."
    start_time = time.time()
    worker.filter_data_parallel(**kwargs)
    run_time = time.time() - start_time
    print "Done filtering data! Took " + str(run_time) + " seconds!"

    avg_polarity = 0.0
    num_points = 0 
    for data in data_factory.get_data_set(str(district_ids[iter])):
         polarity, objectivity = sentiment(data.get_content())
         
         avg_polarity += polarity
         num_points += 1
    if num_points > 0:
        avg_polarity /= num_points
    print "Avg Polarity:", avg_polarity
    print "# Points:", num_points

    fill_color = '000000'
    if (avg_polarity > 0):
        polar_char = int(avg_polarity * 16)
        if polar_char <= 9:
            polar_char = str(polar_char)
        else: