def setup(self):
        ''' Set up before each test. '''
        logger.info('Setting up the test global data table...')
        self.global_table = Table('test_global_data')

        logger.info('Setting up the DataFactory instance...')
        config = ConfigParser()
        config.add_section('database')
        config.set('database', 'global_data_table', 'test_global_data')
        config.set('database', 'set_data_table', 'test_set_data')       
 
        self.data_factory = AwsDataFactory(config)

        logger.info('Emptying the contents of the test data table...')
        for data in self.global_table.scan():
            data.delete()
Beispiel #2
0
class TestAwsDataFactory:
    ''' Unit tests for the AwsDataFactory class. '''

    def setup(self):
        ''' Set up before each test '''
        logger.info('Setting up the test data table...')
        self.global_table = Table('test_global_data')
        self.set_table = Table('test_set_data')        
 
        logger.info('Setting up the DataFactory instance...')
        config = ConfigParser()
        config.add_section('database')
        config.set('database', 'global_data_table', 'test_global_data')
        config.set('database', 'set_data_table', 'test_set_data')
        
        self.data_factory = AwsDataFactory(config)

        logger.info('Emptying the contents of the test data table...')
        for data in self.global_table.scan():
            data.delete()
        for data in self.set_table.scan():
            data.delete()
    
    def test_create_global_data(self):
        ''' Tests creating global data. '''
        content   = 'content'
        datum_id  = 'datum_id'
        lat       = 5.5
        lon       = 6.6
        set_id    = 'global'
        timestamp = strptime('2014-01-02 01:02:03', '%Y-%m-%d %H:%M:%S')
        type      = 'type'

        self.data_factory.create_data(content, datum_id, (lon, lat), set_id, timestamp, type)
         
        raw_record = self.global_table.get_item(datum_id=datum_id) # Check the created global data record
        assert raw_record is not None
        
        record     = AwsData(raw_record)
        assert record.get_content() == content, record.get_content()
        assert record.get_datum_id() == datum_id, record.get_datum_id()
        assert record.get_location() == (lon, lat), record.get_location()
        assert record.get_set_id() == set_id, record.get_set_id()
        assert record.get_timestamp() == timestamp, record.get_timestamp()
        assert record.get_type() == type, record.get_type()

        for record in self.set_table.scan(): # Make sure there weren't any set data records created
            assert False, "Should not have record in set table!"

    def test_create_set_data(self):
        ''' Tests creating non-global data. '''
        content   = 'content'
        datum_id  = 'id'
        lat       = 5.5
        lon       = 6.6
        set_id    = 'my_set'
        timestamp = strptime('2014-01-02 01:02:03', '%Y-%m-%d %H:%M:%S')
        type      = 'type'

        self.data_factory.create_data(content, datum_id, (lon, lat), set_id, timestamp, type)
        
        # Check the created set data record
        raw_record = self.set_table.get_item(set_id=set_id, datum_id=datum_id)
        assert raw_record is not None

        record = AwsData(raw_record)
        assert record.get_content() == content, record.get_content()
        assert record.get_datum_id() == datum_id, record.get_datum_id()
        assert record.get_location() == (lon, lat), record.get_location()
        assert record.get_set_id() == set_id, record.get_set_id()
        assert record.get_timestamp() == timestamp, record.get_timestamp()
        assert record.get_type() == 'type', record.get_type()
        
        for record in self.global_table.scan(): # Make sure there weren't any global data records created
            assert False, "Should not have record in set table!"

    def test_copy_data(self):
        ''' Tests the copy_data function. '''
        record1 = AwsData({
            'content' : 'content1',
            'datum_id' : 'id_1',
            'lat' : '10000000',
            'lat_copy' : '10000000',
            'lon' : '20000000',
            'lon_copy' : '20000000',
            'set_id' : 'set_id_1', 
            'timestamp' : 'timestamp1',
            'timestamp_copy' : 'timestamp1',
            'type' : 'type1'
        })
        record2 = AwsData({
            'content' : 'content2',
            'datum_id' : 'id_2',
            'lat' : '30000000',
            'lat_copy' : '300000000',
            'lon' : '40000000',
            'lon_copy' : '40000000',
            'set_id' : 'set_id_2',
            'timestamp' : 'timestamp2',
            'timestamp_copy' : 'timestamp2',
            'type' : 'type2'
        })

        self.data_factory.copy_data('set_id_3', [record1, record2])
         
        record = self.set_table.get_item(set_id='set_id_3', datum_id='id_1')
        assert record is not None
        assert record['content'] == 'content1', record['content']
        assert record['lat'] == '10000000', record['lat']
        assert record['lon'] == '20000000', record['lon']
        assert record['set_id'] == 'set_id_3', record['set_id']
        assert record['timestamp'] == 'timestamp1', record['timestamp']
        assert record['type'] == 'type1', record['type']

        record = self.set_table.get_item(set_id='set_id_3', datum_id='id_2')
        assert record is not None
        assert record['content'] == 'content2', record['content']
        assert record['lat'] == '30000000', record['lat']
        assert record['lon'] == '40000000', record['lon']
        assert record['set_id'] == 'set_id_3', record['set_id']
        assert record['timestamp'] == 'timestamp2', record['timestamp']
        assert record['type'] == 'type2', record['type']

    def test_filter_global_data(self):
        ''' Tests the filter_global_data function. '''
        self.data_factory.create_data(
            'content', 'id1', (0, 0), 'global', strptime('2014-01-02 01:02:03', '%Y-%m-%d %H:%S:%f'), 'type1'
        )
        self.data_factory.create_data(
            'content', 'id2', (0, 0.5), 'global', strptime('2014-01-03 01:02:03', '%Y-%m-%d %H:%S:%f'), 'type2'
        )
        self.data_factory.create_data(
            'content', 'id3', (0.5, 0.5), 'global', strptime('2014-01-02 01:02:05', '%Y-%m-%d %H:%S:%f'), 'type2'
        )

        # Test retrieve data by type only
        datas = self.data_factory.filter_global_data(type='type1')
        for data in datas:
            assert data.get_datum_id() == 'id1', data.get_id()

        # Test retrieve data by location only
        datas = self.data_factory.filter_global_data(min_lat=0, max_lat=1, min_lon=0.25, max_lon=1.25)
        for data in datas:
            assert data.get_datum_id() == 'id3', data.get_id()

        # Test retrieve data by timestamp
        datas = self.data_factory.filter_global_data(
            min_timestamp=strptime('2014-01-03 01:02:03', '%Y-%m-%d %H:%M:%S'),
            max_timestamp=strptime('2014-01-03 01:02:10', '%Y-%m-%d %H:%M:%S')
        )
        for data in datas:
            assert data.get_datum_id() == 'id1' or data.get_datum_id() == 'id3', data.get_datum_id()

    def test_get_set_data(self):
        ''' Tests the get_set_data function. '''
        self.data_factory.create_data(
            'content', 'id1', (0, 0), 'set_1', strptime('2014-01-02 01:02:03', '%Y-%m-%d %H:%S:%f'), 'type1'
        )
        self.data_factory.create_data(
            'content', 'id2', (0, 0), 'set_2', strptime('2014-01-02 01:02:03', '%Y-%m-%d %H:%S:%f'), 'type2'
        )

        datas = self.data_factory.get_data_set('set_1')
        for data in datas:
            assert data.get_datum_id() == 'id1', data.get_datum_id()
class TestGlobalDataJanitor:
    ''' Tests the global_data_janitor script. '''

    def setup(self):
        ''' Set up before each test. '''
        logger.info('Setting up the test global data table...')
        self.global_table = Table('test_global_data')

        logger.info('Setting up the DataFactory instance...')
        config = ConfigParser()
        config.add_section('database')
        config.set('database', 'global_data_table', 'test_global_data')
        config.set('database', 'set_data_table', 'test_set_data')       
 
        self.data_factory = AwsDataFactory(config)

        logger.info('Emptying the contents of the test data table...')
        for data in self.global_table.scan():
            data.delete()

    def _dump_err_pipe(self, process):
        err_line = ''

        while True:
            # Handle the out stream
            err = process.stderr.read(1)
            if err == '\n' or err == '\r':
                print err_line # Log the line
                err_line = ''
            elif err != '':
                err_line += err

            # Check if the stream are done
            if err == '' and process.poll() != None:
                break

    def _dump_out_pipe(self, process):
        out_line = ''

        while True:
            # Handle the out stream
            out = process.stdout.read(1)
            if out == '\n' or out == '\r':
                print out_line # Log the line
                out_line = ''
            elif out != '':
                out_line += out

            # Check if the stream are done
            if out == '' and process.poll() != None:
                break

    def test_clean_up_old_data(self):
        ''' Tests cleaning up old data from the global data table. '''
        timestamp = datetime.datetime.now() + datetime.timedelta(hours=5) - datetime.timedelta(days=2)
        self.data_factory.create_data('content', 'id1', (0, 0), 'global', timestamp.timetuple(), 'type')

        timestamp = datetime.datetime.now() + datetime.timedelta(hours=5) - datetime.timedelta(days=4)
        self.data_factory.create_data('content', 'id2', (0, 0), 'global', timestamp.timetuple(), 'type')

        timestamp = datetime.datetime.now() + datetime.timedelta(hours=5)
        self.data_factory.create_data('content', 'id3', (0, 0), 'global', timestamp.timetuple(), 'type')
        
        timestamp = datetime.datetime.now() + datetime.timedelta(hours=5) - datetime.timedelta(hours=3)
        self.data_factory.create_data('content', 'id4', (0, 0), 'global', timestamp.timetuple(), 'type')

        # Launch the sub-process
        process = subprocess.Popen(
            './bin/global_data_janitor ./config/test_smcity.conf',
            shell=True,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE
        )

        # Spin up the logging threads
        out_thread = Thread(target=self._dump_out_pipe, kwargs={'process' : process})
        out_thread.start()
        err_thread = Thread(target=self._dump_err_pipe, kwargs={'process' : process})
        err_thread.start()

        # Wait for the process to finish
        out_thread.join()
        err_thread.join()

        # Verify the correct records were deleted
        for raw_record in self.global_table.scan():
            record = AwsData(raw_record)
            assert (record.get_datum_id() == 'id3') or (record.get_datum_id() == 'id4'), \
                record.get_datum_id()
from smcity.polygons.complex_polygon_strategy import ComplexPolygonStrategyFactory
from smcity.transformers.geojson_transformer import GeoJsonTransformer

print "Loading the config settings..."
config = ConfigParser()
configFile = open('config/qa_smcity.conf')
config.readfp(configFile)
configFile.close()

print "Depickling the congressional districts..."
districts = pickle.load(open("manual_tests/ohio_districts_low_res.pickled", "rb"))
district_ids = []
geojson = None

# Set up the components
data_factory = AwsDataFactory(config)
result_queue = MockResultQueue()
task_queue   = MockTaskQueue()
worker       = Worker(config, result_queue, task_queue, data_factory)
transformer  = GeoJsonTransformer(data_factory)

# Extract the data points inside the time frame and geographic area of interest
age_limit    = datetime.datetime.now()
age_limit   += datetime.timedelta(hours=5) - datetime.timedelta(hours=1)
age_limit    = age_limit.timetuple()

for iter in range(len(districts)):
    print "District", (iter+1)

    district_ids.append(uuid4())
    bounding_box = districts[iter].get_bounding_box()