def setup(self): ''' Set up before each test. ''' logger.info('Setting up the test global data table...') self.global_table = Table('test_global_data') logger.info('Setting up the DataFactory instance...') config = ConfigParser() config.add_section('database') config.set('database', 'global_data_table', 'test_global_data') config.set('database', 'set_data_table', 'test_set_data') self.data_factory = AwsDataFactory(config) logger.info('Emptying the contents of the test data table...') for data in self.global_table.scan(): data.delete()
class TestAwsDataFactory: ''' Unit tests for the AwsDataFactory class. ''' def setup(self): ''' Set up before each test ''' logger.info('Setting up the test data table...') self.global_table = Table('test_global_data') self.set_table = Table('test_set_data') logger.info('Setting up the DataFactory instance...') config = ConfigParser() config.add_section('database') config.set('database', 'global_data_table', 'test_global_data') config.set('database', 'set_data_table', 'test_set_data') self.data_factory = AwsDataFactory(config) logger.info('Emptying the contents of the test data table...') for data in self.global_table.scan(): data.delete() for data in self.set_table.scan(): data.delete() def test_create_global_data(self): ''' Tests creating global data. ''' content = 'content' datum_id = 'datum_id' lat = 5.5 lon = 6.6 set_id = 'global' timestamp = strptime('2014-01-02 01:02:03', '%Y-%m-%d %H:%M:%S') type = 'type' self.data_factory.create_data(content, datum_id, (lon, lat), set_id, timestamp, type) raw_record = self.global_table.get_item(datum_id=datum_id) # Check the created global data record assert raw_record is not None record = AwsData(raw_record) assert record.get_content() == content, record.get_content() assert record.get_datum_id() == datum_id, record.get_datum_id() assert record.get_location() == (lon, lat), record.get_location() assert record.get_set_id() == set_id, record.get_set_id() assert record.get_timestamp() == timestamp, record.get_timestamp() assert record.get_type() == type, record.get_type() for record in self.set_table.scan(): # Make sure there weren't any set data records created assert False, "Should not have record in set table!" def test_create_set_data(self): ''' Tests creating non-global data. ''' content = 'content' datum_id = 'id' lat = 5.5 lon = 6.6 set_id = 'my_set' timestamp = strptime('2014-01-02 01:02:03', '%Y-%m-%d %H:%M:%S') type = 'type' self.data_factory.create_data(content, datum_id, (lon, lat), set_id, timestamp, type) # Check the created set data record raw_record = self.set_table.get_item(set_id=set_id, datum_id=datum_id) assert raw_record is not None record = AwsData(raw_record) assert record.get_content() == content, record.get_content() assert record.get_datum_id() == datum_id, record.get_datum_id() assert record.get_location() == (lon, lat), record.get_location() assert record.get_set_id() == set_id, record.get_set_id() assert record.get_timestamp() == timestamp, record.get_timestamp() assert record.get_type() == 'type', record.get_type() for record in self.global_table.scan(): # Make sure there weren't any global data records created assert False, "Should not have record in set table!" def test_copy_data(self): ''' Tests the copy_data function. ''' record1 = AwsData({ 'content' : 'content1', 'datum_id' : 'id_1', 'lat' : '10000000', 'lat_copy' : '10000000', 'lon' : '20000000', 'lon_copy' : '20000000', 'set_id' : 'set_id_1', 'timestamp' : 'timestamp1', 'timestamp_copy' : 'timestamp1', 'type' : 'type1' }) record2 = AwsData({ 'content' : 'content2', 'datum_id' : 'id_2', 'lat' : '30000000', 'lat_copy' : '300000000', 'lon' : '40000000', 'lon_copy' : '40000000', 'set_id' : 'set_id_2', 'timestamp' : 'timestamp2', 'timestamp_copy' : 'timestamp2', 'type' : 'type2' }) self.data_factory.copy_data('set_id_3', [record1, record2]) record = self.set_table.get_item(set_id='set_id_3', datum_id='id_1') assert record is not None assert record['content'] == 'content1', record['content'] assert record['lat'] == '10000000', record['lat'] assert record['lon'] == '20000000', record['lon'] assert record['set_id'] == 'set_id_3', record['set_id'] assert record['timestamp'] == 'timestamp1', record['timestamp'] assert record['type'] == 'type1', record['type'] record = self.set_table.get_item(set_id='set_id_3', datum_id='id_2') assert record is not None assert record['content'] == 'content2', record['content'] assert record['lat'] == '30000000', record['lat'] assert record['lon'] == '40000000', record['lon'] assert record['set_id'] == 'set_id_3', record['set_id'] assert record['timestamp'] == 'timestamp2', record['timestamp'] assert record['type'] == 'type2', record['type'] def test_filter_global_data(self): ''' Tests the filter_global_data function. ''' self.data_factory.create_data( 'content', 'id1', (0, 0), 'global', strptime('2014-01-02 01:02:03', '%Y-%m-%d %H:%S:%f'), 'type1' ) self.data_factory.create_data( 'content', 'id2', (0, 0.5), 'global', strptime('2014-01-03 01:02:03', '%Y-%m-%d %H:%S:%f'), 'type2' ) self.data_factory.create_data( 'content', 'id3', (0.5, 0.5), 'global', strptime('2014-01-02 01:02:05', '%Y-%m-%d %H:%S:%f'), 'type2' ) # Test retrieve data by type only datas = self.data_factory.filter_global_data(type='type1') for data in datas: assert data.get_datum_id() == 'id1', data.get_id() # Test retrieve data by location only datas = self.data_factory.filter_global_data(min_lat=0, max_lat=1, min_lon=0.25, max_lon=1.25) for data in datas: assert data.get_datum_id() == 'id3', data.get_id() # Test retrieve data by timestamp datas = self.data_factory.filter_global_data( min_timestamp=strptime('2014-01-03 01:02:03', '%Y-%m-%d %H:%M:%S'), max_timestamp=strptime('2014-01-03 01:02:10', '%Y-%m-%d %H:%M:%S') ) for data in datas: assert data.get_datum_id() == 'id1' or data.get_datum_id() == 'id3', data.get_datum_id() def test_get_set_data(self): ''' Tests the get_set_data function. ''' self.data_factory.create_data( 'content', 'id1', (0, 0), 'set_1', strptime('2014-01-02 01:02:03', '%Y-%m-%d %H:%S:%f'), 'type1' ) self.data_factory.create_data( 'content', 'id2', (0, 0), 'set_2', strptime('2014-01-02 01:02:03', '%Y-%m-%d %H:%S:%f'), 'type2' ) datas = self.data_factory.get_data_set('set_1') for data in datas: assert data.get_datum_id() == 'id1', data.get_datum_id()
class TestGlobalDataJanitor: ''' Tests the global_data_janitor script. ''' def setup(self): ''' Set up before each test. ''' logger.info('Setting up the test global data table...') self.global_table = Table('test_global_data') logger.info('Setting up the DataFactory instance...') config = ConfigParser() config.add_section('database') config.set('database', 'global_data_table', 'test_global_data') config.set('database', 'set_data_table', 'test_set_data') self.data_factory = AwsDataFactory(config) logger.info('Emptying the contents of the test data table...') for data in self.global_table.scan(): data.delete() def _dump_err_pipe(self, process): err_line = '' while True: # Handle the out stream err = process.stderr.read(1) if err == '\n' or err == '\r': print err_line # Log the line err_line = '' elif err != '': err_line += err # Check if the stream are done if err == '' and process.poll() != None: break def _dump_out_pipe(self, process): out_line = '' while True: # Handle the out stream out = process.stdout.read(1) if out == '\n' or out == '\r': print out_line # Log the line out_line = '' elif out != '': out_line += out # Check if the stream are done if out == '' and process.poll() != None: break def test_clean_up_old_data(self): ''' Tests cleaning up old data from the global data table. ''' timestamp = datetime.datetime.now() + datetime.timedelta(hours=5) - datetime.timedelta(days=2) self.data_factory.create_data('content', 'id1', (0, 0), 'global', timestamp.timetuple(), 'type') timestamp = datetime.datetime.now() + datetime.timedelta(hours=5) - datetime.timedelta(days=4) self.data_factory.create_data('content', 'id2', (0, 0), 'global', timestamp.timetuple(), 'type') timestamp = datetime.datetime.now() + datetime.timedelta(hours=5) self.data_factory.create_data('content', 'id3', (0, 0), 'global', timestamp.timetuple(), 'type') timestamp = datetime.datetime.now() + datetime.timedelta(hours=5) - datetime.timedelta(hours=3) self.data_factory.create_data('content', 'id4', (0, 0), 'global', timestamp.timetuple(), 'type') # Launch the sub-process process = subprocess.Popen( './bin/global_data_janitor ./config/test_smcity.conf', shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) # Spin up the logging threads out_thread = Thread(target=self._dump_out_pipe, kwargs={'process' : process}) out_thread.start() err_thread = Thread(target=self._dump_err_pipe, kwargs={'process' : process}) err_thread.start() # Wait for the process to finish out_thread.join() err_thread.join() # Verify the correct records were deleted for raw_record in self.global_table.scan(): record = AwsData(raw_record) assert (record.get_datum_id() == 'id3') or (record.get_datum_id() == 'id4'), \ record.get_datum_id()
from smcity.polygons.complex_polygon_strategy import ComplexPolygonStrategyFactory from smcity.transformers.geojson_transformer import GeoJsonTransformer print "Loading the config settings..." config = ConfigParser() configFile = open('config/qa_smcity.conf') config.readfp(configFile) configFile.close() print "Depickling the congressional districts..." districts = pickle.load(open("manual_tests/ohio_districts_low_res.pickled", "rb")) district_ids = [] geojson = None # Set up the components data_factory = AwsDataFactory(config) result_queue = MockResultQueue() task_queue = MockTaskQueue() worker = Worker(config, result_queue, task_queue, data_factory) transformer = GeoJsonTransformer(data_factory) # Extract the data points inside the time frame and geographic area of interest age_limit = datetime.datetime.now() age_limit += datetime.timedelta(hours=5) - datetime.timedelta(hours=1) age_limit = age_limit.timetuple() for iter in range(len(districts)): print "District", (iter+1) district_ids.append(uuid4()) bounding_box = districts[iter].get_bounding_box()