def configure_data(redis_client): """ Configure fixed set of data queues. """ data_queues = { 'update_cell': DataQueue('update_cell', redis_client, queue_key='update_cell'), # BBB 'update_cellarea': DataQueue('update_cellarea', redis_client, queue_key='update_cellarea'), 'update_cellarea_ocid': DataQueue('update_cellarea_ocid', redis_client, queue_key='update_cellarea_ocid'), 'update_score': DataQueue('update_score', redis_client, queue_key='update_score'), } for shard_id in DataMap.shards().keys(): name = 'update_datamap_' + shard_id data_queues[name] = DataQueue(name, redis_client, queue_key=name) for shard_id in CellShard.shards().keys(): name = 'update_cell_' + shard_id data_queues[name] = DataQueue( name, redis_client, queue_key=name) for shard_id in WifiShard.shards().keys(): name = 'update_wifi_' + shard_id data_queues[name] = DataQueue( name, redis_client, queue_key=name) return data_queues
def configure_data(redis_client): """ Configure fixed set of data queues. """ data_queues = { 'update_cell': DataQueue('update_cell', redis_client, queue_key='update_cell'), # BBB 'update_cellarea': DataQueue('update_cellarea', redis_client, queue_key='update_cellarea'), 'update_cellarea_ocid': DataQueue('update_cellarea_ocid', redis_client, queue_key='update_cellarea_ocid'), 'update_score': DataQueue('update_score', redis_client, queue_key='update_score'), } for shard_id in DataMap.shards().keys(): name = 'update_datamap_' + shard_id data_queues[name] = DataQueue(name, redis_client, queue_key=name) for shard_id in CellShard.shards().keys(): name = 'update_cell_' + shard_id data_queues[name] = DataQueue(name, redis_client, queue_key=name) for shard_id in WifiShard.shards().keys(): name = 'update_wifi_' + shard_id data_queues[name] = DataQueue(name, redis_client, queue_key=name) return data_queues
def configure_data(redis_client): """ Configure fixed set of data queues. """ data_queues = { # *_incoming need to be the exact same as in webapp.config 'update_incoming': DataQueue('update_incoming', redis_client, batch=100, compress=True), 'transfer_incoming': DataQueue('transfer_incoming', redis_client, batch=100, compress=True), } for key in ('update_cellarea', ): data_queues[key] = DataQueue(key, redis_client, batch=100, json=False) for shard_id in BlueShard.shards().keys(): key = 'update_blue_' + shard_id data_queues[key] = DataQueue(key, redis_client, batch=500) for shard_id in DataMap.shards().keys(): key = 'update_datamap_' + shard_id data_queues[key] = DataQueue(key, redis_client, batch=500, json=False) for shard_id in CellShard.shards().keys(): key = 'update_cell_' + shard_id data_queues[key] = DataQueue(key, redis_client, batch=500) for shard_id in WifiShard.shards().keys(): key = 'update_wifi_' + shard_id data_queues[key] = DataQueue(key, redis_client, batch=500) return data_queues
def _update_all(self): schedule_export_reports.delay().get() for shard_id in CellShard.shards().keys(): update_cell.delay(shard_id=shard_id).get() for shard_id in WifiShard.shards().keys(): update_wifi.delay(shard_id=shard_id).get()
def _update_all(self): schedule_export_reports.delay().get() for shard_id in CellShard.shards().keys(): update_cell.delay(shard_id=shard_id).get() for shard_id in WifiShard.shards().keys(): update_wifi.delay(shard_id=shard_id).get()
def test_import_local_cell(self): self.import_csv(cell_type='cell') cells = self.session.query(CellShard.shards()['wcdma']).all() self.assertEqual(len(cells), 9) areaids = set([cell.areaid for cell in cells]) self.assertEqual(self.session.query(CellArea).count(), len(areaids)) update_statcounter.delay(ago=0).get() self.check_stat(StatKey.unique_cell, 9)
def test_import_local_cell(self): self.import_csv(cell_type='cell') cells = self.session.query(CellShard.shards()['wcdma']).all() self.assertEqual(len(cells), 9) areaids = set([cell.areaid for cell in cells]) self.assertEqual( self.session.query(CellArea).count(), len(areaids)) update_statcounter.delay(ago=0).get() self.check_stat(StatKey.unique_cell, 9)
def _update_all(self): update_incoming.delay().get() for shard_id in BlueShard.shards().keys(): update_blue.delay(shard_id=shard_id).get() for shard_id in CellShard.shards().keys(): update_cell.delay(shard_id=shard_id).get() for shard_id in WifiShard.shards().keys(): update_wifi.delay(shard_id=shard_id).get()
def test_import_local_cell(self, celery, redis, session): self.import_csv( celery, redis, session, CellShardFactory.build(radio=Radio.wcdma), cell_type='cell') cells = session.query(CellShard.shards()['wcdma']).all() assert len(cells) == 9 areaids = set([cell.areaid for cell in cells]) assert session.query(CellArea).count() == len(areaids) update_statcounter.delay().get() self.check_stat(session, StatKey.unique_cell, 9)
def test_shard_queues(self): # BBB observations = CellObservationFactory.build_batch(3) data_queues = self.celery_app.data_queues single_queue = data_queues['update_cell'] single_queue.enqueue(observations) update_cell.delay().get() self.assertEqual(single_queue.size(), 0) total = 0 for shard_id in CellShard.shards().keys(): total += data_queues['update_cell_' + shard_id].size() self.assertEqual(total, 3)
def test_shard_queues(self): # BBB observations = CellObservationFactory.build_batch(3) data_queues = self.celery_app.data_queues single_queue = data_queues['update_cell'] single_queue.enqueue(observations) update_cell.delay().get() self.assertEqual(single_queue.size(), 0) total = 0 for shard_id in CellShard.shards().keys(): total += data_queues['update_cell_' + shard_id].size() self.assertEqual(total, 3)
def _update_all(self, session, datamap_only=False): ExportConfigFactory(name="internal", batch=0, schema="internal") session.flush() update_incoming.delay().get() if datamap_only: return for shard_id in BlueShard.shards().keys(): update_blue.delay(shard_id=shard_id).get() for shard_id in CellShard.shards().keys(): update_cell.delay(shard_id=shard_id).get() for shard_id in WifiShard.shards().keys(): update_wifi.delay(shard_id=shard_id).get()
def _update_all(self, session, datamap_only=False): ExportConfigFactory(name='internal', batch=0, schema='internal') session.flush() update_incoming.delay().get() if datamap_only: return for shard_id in BlueShard.shards().keys(): update_blue.delay(shard_id=shard_id).get() for shard_id in CellShard.shards().keys(): update_cell.delay(shard_id=shard_id).get() for shard_id in WifiShard.shards().keys(): update_wifi.delay(shard_id=shard_id).get()
def test_lock_timeout(self, celery, db_rw_drop_table, redis, ro_session, session, stats): obs = CellObservationFactory.build() cell = CellShardFactory.build( radio=obs.radio, mcc=obs.mcc, mnc=obs.mnc, lac=obs.lac, cid=obs.cid, samples=10, ) ro_session.add(cell) ro_session.flush() orig_add_area = CellUpdater.add_area_update orig_wait = CellUpdater._retry_wait num = [0] def mock_area(self, updated_areas, key, num=num, ro_session=ro_session): orig_add_area(self, updated_areas, key) num[0] += 1 if num[0] == 2: ro_session.rollback() try: CellUpdater._retry_wait = 0.0001 session.execute('set session innodb_lock_wait_timeout = 1') with mock.patch.object(CellUpdater, 'add_area_update', mock_area): self.queue_and_update(celery, [obs]) # the inner task logic was called exactly twice assert num[0] == 2 shard = CellShard.shard_model(obs.cellid) cells = session.query(shard).all() assert len(cells) == 1 assert cells[0].samples == 1 self.check_statcounter(redis, StatKey.cell, 1) self.check_statcounter(redis, StatKey.unique_cell, 1) stats.check( counter=[('data.observation.insert', 1, ['type:cell'])], timer=[('task', 1, ['task:data.update_cell'])], ) finally: CellUpdater._retry_wait = orig_wait for model in CellShard.shards().values(): session.execute(text( 'drop table %s;' % model.__tablename__))
def configure_data(redis_client): """ Configure fixed set of data queues. """ data_queues = { # update_incoming needs to be the exact same as in webapp.config 'update_incoming': DataQueue('update_incoming', redis_client, batch=100, compress=True), } for key in ('update_cellarea', 'update_cellarea_ocid'): data_queues[key] = DataQueue(key, redis_client, batch=100, json=False) for shard_id in BlueShard.shards().keys(): key = 'update_blue_' + shard_id data_queues[key] = DataQueue(key, redis_client, batch=500) for shard_id in DataMap.shards().keys(): key = 'update_datamap_' + shard_id data_queues[key] = DataQueue(key, redis_client, batch=500, json=False) for shard_id in CellShard.shards().keys(): key = 'update_cell_' + shard_id data_queues[key] = DataQueue(key, redis_client, batch=500) for shard_id in WifiShard.shards().keys(): key = 'update_wifi_' + shard_id data_queues[key] = DataQueue(key, redis_client, batch=500) return data_queues
def write_stations_to_csv(session, path, start_time=None, end_time=None): where = 'radio != 1 AND lat IS NOT NULL AND lon IS NOT NULL' if None not in (start_time, end_time): where = where + ' AND modified >= "%s" AND modified < "%s"' fmt = '%Y-%m-%d %H:%M:%S' where = where % (start_time.strftime(fmt), end_time.strftime(fmt)) header_row = [ 'radio', 'mcc', 'net', 'area', 'cell', 'unit', 'lon', 'lat', 'range', 'samples', 'changeable', 'created', 'updated', 'averageSignal', ] header_row = ','.join(header_row) + '\n' tables = [shard.__tablename__ for shard in CellShard.shards().values()] stmt = '''SELECT CONCAT_WS(",", CASE radio WHEN 0 THEN "GSM" WHEN 2 THEN "UMTS" WHEN 3 THEN "LTE" ELSE "" END, `mcc`, `mnc`, `lac`, `cid`, COALESCE(`psc`, ""), ROUND(`lon`, 7), ROUND(`lat`, 7), COALESCE(`radius`, "0"), COALESCE(`samples`, "0"), "1", COALESCE(UNIX_TIMESTAMP(`created`), ""), COALESCE(UNIX_TIMESTAMP(`modified`), ""), "" ) AS `cell_value` FROM %s WHERE %s ORDER BY `cellid` LIMIT :l OFFSET :o ''' with util.gzip_open(path, 'w', compresslevel=5) as gzip_wrapper: with gzip_wrapper as gzip_file: gzip_file.write(header_row) for table in tables: table_stmt = text(stmt % (table, where)) offset = 0 limit = 25000 while True: rows = session.execute( table_stmt.bindparams(o=offset, l=limit)).fetchall() if rows: buf = '\r\n'.join([row.cell_value for row in rows]) if buf: buf += '\r\n' gzip_file.write(buf) offset += limit else: break
def tearDown(self): for model in CellShard.shards().values(): self.session.execute(text('drop table %s;' % model.__tablename__)) self.setup_tables(self.db_rw.engine) super(TestDatabaseErrors, self).tearDown()
def write_stations_to_csv(session, path, today, start_time=None, end_time=None): linesep = "\r\n" where = "lat IS NOT NULL AND lon IS NOT NULL" if start_time is not None and end_time is not None: where = where + ' AND modified >= "%s" AND modified < "%s"' fmt = "%Y-%m-%d %H:%M:%S" where = where % (start_time.strftime(fmt), end_time.strftime(fmt)) else: # limit to cells modified in the last 12 months one_year = today - timedelta(days=365) where = where + ' AND modified >= "%s"' % one_year.strftime("%Y-%m-%d") header_row = ",".join(_FIELD_NAMES) + linesep tables = [shard.__tablename__ for shard in CellShard.shards().values()] stmt = """SELECT `cellid`, CONCAT_WS(",", CASE radio WHEN 0 THEN "GSM" WHEN 2 THEN "UMTS" WHEN 3 THEN "LTE" ELSE "" END, `mcc`, `mnc`, `lac`, `cid`, COALESCE(`psc`, ""), ROUND(`lon`, 7), ROUND(`lat`, 7), COALESCE(`radius`, "0"), COALESCE(`samples`, "0"), "1", COALESCE(UNIX_TIMESTAMP(`created`), ""), COALESCE(UNIX_TIMESTAMP(`modified`), ""), "" ) AS `cell_value` FROM %s WHERE %s AND `cellid` > :cellid ORDER BY `cellid` LIMIT :limit """ with util.gzip_open(path, "w", compresslevel=5) as gzip_wrapper: with gzip_wrapper as gzip_file: gzip_file.write(header_row) for table in tables: table_stmt = text(stmt % (table, where)) min_cellid = "" limit = 25000 while True: rows = session.execute( table_stmt.bindparams(limit=limit, cellid=min_cellid)).fetchall() if rows: buf = "".join(row.cell_value + linesep for row in rows) gzip_file.write(buf) min_cellid = rows[-1].cellid else: break
def tearDown(self): for model in CellShard.shards().values(): self.session.execute(text('drop table %s;' % model.__tablename__)) self.setup_tables(self.db_rw.engine) super(TestDatabaseErrors, self).tearDown()
def write_stations_to_csv(session, path, start_time=None, end_time=None): where = 'radio != 1 AND lat IS NOT NULL AND lon IS NOT NULL' if None not in (start_time, end_time): where = where + ' AND modified >= "%s" AND modified < "%s"' fmt = '%Y-%m-%d %H:%M:%S' where = where % (start_time.strftime(fmt), end_time.strftime(fmt)) header_row = [ 'radio', 'mcc', 'net', 'area', 'cell', 'unit', 'lon', 'lat', 'range', 'samples', 'changeable', 'created', 'updated', 'averageSignal', ] header_row = ','.join(header_row) + '\n' tables = [shard.__tablename__ for shard in CellShard.shards().values()] stmt = '''SELECT CONCAT_WS(",", CASE radio WHEN 0 THEN "GSM" WHEN 2 THEN "UMTS" WHEN 3 THEN "LTE" ELSE "" END, `mcc`, `mnc`, `lac`, `cid`, COALESCE(`psc`, ""), ROUND(`lon`, 7), ROUND(`lat`, 7), COALESCE(`radius`, "0"), COALESCE(`samples`, "0"), "1", COALESCE(UNIX_TIMESTAMP(`created`), ""), COALESCE(UNIX_TIMESTAMP(`modified`), ""), "" ) AS `cell_value` FROM %s WHERE %s ORDER BY `radio`, `mcc`, `mnc`, `lac`, `cid` LIMIT :l OFFSET :o ''' limit = 10000 offset = 0 with util.gzip_open(path, 'w', compresslevel=5) as gzip_wrapper: with gzip_wrapper as gzip_file: gzip_file.write(header_row) for table in tables: table_stmt = text(stmt % (table, where)) while True: rows = session.execute( table_stmt.bindparams(o=offset, l=limit)).fetchall() if rows: buf = '\r\n'.join([row.cell_value for row in rows]) if buf: buf += '\r\n' gzip_file.write(buf) offset += limit else: break
def celerybeat_schedule(app_config): """Return the celery beat schedule as a dictionary.""" sections = app_config.sections() schedule = { # Monitoring 'monitor-queue-size': { 'task': 'ichnaea.data.tasks.monitor_queue_size', 'schedule': timedelta(seconds=60), 'options': {'expires': 57}, }, 'monitor-api-users': { 'task': 'ichnaea.data.tasks.monitor_api_users', 'schedule': timedelta(seconds=600), 'options': {'expires': 570}, }, 'monitor-api-key-limits': { 'task': 'ichnaea.data.tasks.monitor_api_key_limits', 'schedule': timedelta(seconds=600), 'options': {'expires': 570}, }, # Statistics 'update-statcounter': { 'task': 'ichnaea.data.tasks.update_statcounter', 'args': (1, ), 'schedule': crontab(minute=3), 'options': {'expires': 2700}, }, 'update-statregion': { 'task': 'ichnaea.data.tasks.update_statregion', 'schedule': timedelta(seconds=3600 * 6), 'options': {'expires': 3600 * 5}, }, # Data Pipeline 'schedule-export-reports': { 'task': 'ichnaea.data.tasks.schedule_export_reports', 'schedule': timedelta(seconds=8), 'options': {'expires': 15}, }, 'update-cellarea': { 'task': 'ichnaea.data.tasks.update_cellarea', 'schedule': timedelta(seconds=8), 'args': (100, ), 'options': {'expires': 15}, }, 'update-cellarea-ocid': { 'task': 'ichnaea.data.tasks.update_cellarea_ocid', 'schedule': timedelta(seconds=9), 'args': (100, ), 'options': {'expires': 15}, }, 'update-score': { 'task': 'ichnaea.data.tasks.update_score', 'args': (250, ), 'schedule': timedelta(seconds=9), 'options': {'expires': 10}, }, } for shard_id in CellShard.shards().keys(): schedule.update({ 'update-cell-' + shard_id: { 'task': 'ichnaea.data.tasks.update_cell', 'schedule': timedelta(seconds=7), 'args': (500, shard_id), 'options': {'expires': 10}, } }) for shard_id in DataMap.shards().keys(): schedule.update({ 'update-datamap-' + shard_id: { 'task': 'ichnaea.data.tasks.update_datamap', 'args': (500, shard_id), 'schedule': timedelta(seconds=14), 'options': {'expires': 20}, }, }) for shard_id in WifiShard.shards().keys(): schedule.update({ 'update-wifi-' + shard_id: { 'task': 'ichnaea.data.tasks.update_wifi', 'schedule': timedelta(seconds=6), 'args': (500, shard_id), 'options': {'expires': 10}, } }) if 'assets' in sections and app_config.get('assets', 'bucket', None): # only configure tasks if target bucket is configured schedule.update({ 'cell-export-full': { 'task': 'ichnaea.data.tasks.cell_export_full', 'schedule': crontab(hour=0, minute=13), 'options': {'expires': 39600}, }, 'cell-export-diff': { 'task': 'ichnaea.data.tasks.cell_export_diff', 'schedule': crontab(minute=3), 'options': {'expires': 2700}, }, }) if 'import:ocid' in sections: schedule.update({ 'monitor-ocid-import': { 'task': 'ichnaea.data.tasks.monitor_ocid_import', 'schedule': timedelta(seconds=600), 'options': {'expires': 570}, }, 'cell-import-external': { 'task': 'ichnaea.data.tasks.cell_import_external', 'args': (True, ), 'schedule': crontab(minute=52), 'options': {'expires': 2700}, }, }) return schedule
def celerybeat_schedule(app_config): """Return the celery beat schedule as a dictionary.""" sections = app_config.sections() schedule = { # Monitoring 'monitor-queue-size': { 'task': 'ichnaea.data.tasks.monitor_queue_size', 'schedule': timedelta(seconds=60), 'options': { 'expires': 57 }, }, 'monitor-api-users': { 'task': 'ichnaea.data.tasks.monitor_api_users', 'schedule': timedelta(seconds=600), 'options': { 'expires': 570 }, }, 'monitor-api-key-limits': { 'task': 'ichnaea.data.tasks.monitor_api_key_limits', 'schedule': timedelta(seconds=600), 'options': { 'expires': 570 }, }, # Statistics 'update-statcounter': { 'task': 'ichnaea.data.tasks.update_statcounter', 'args': (1, ), 'schedule': crontab(minute=3), 'options': { 'expires': 2700 }, }, 'update-statregion': { 'task': 'ichnaea.data.tasks.update_statregion', 'schedule': crontab(minute=5), 'options': { 'expires': 2700 }, }, # Data Pipeline 'schedule-export-reports': { 'task': 'ichnaea.data.tasks.schedule_export_reports', 'schedule': timedelta(seconds=8), 'options': { 'expires': 15 }, }, 'update-cellarea': { 'task': 'ichnaea.data.tasks.update_cellarea', 'schedule': timedelta(seconds=8), 'args': (100, ), 'options': { 'expires': 15 }, }, 'update-cellarea-ocid': { 'task': 'ichnaea.data.tasks.update_cellarea_ocid', 'schedule': timedelta(seconds=9), 'args': (100, ), 'options': { 'expires': 15 }, }, 'update-score': { 'task': 'ichnaea.data.tasks.update_score', 'args': (250, ), 'schedule': timedelta(seconds=9), 'options': { 'expires': 10 }, }, } for shard_id in CellShard.shards().keys(): schedule.update({ 'update-cell-' + shard_id: { 'task': 'ichnaea.data.tasks.update_cell', 'schedule': timedelta(seconds=7), 'args': (500, shard_id), 'options': { 'expires': 10 }, } }) for shard_id in DataMap.shards().keys(): schedule.update({ 'update-datamap-' + shard_id: { 'task': 'ichnaea.data.tasks.update_datamap', 'args': (500, shard_id), 'schedule': timedelta(seconds=14), 'options': { 'expires': 20 }, }, }) for shard_id in WifiShard.shards().keys(): schedule.update({ 'update-wifi-' + shard_id: { 'task': 'ichnaea.data.tasks.update_wifi', 'schedule': timedelta(seconds=6), 'args': (500, shard_id), 'options': { 'expires': 10 }, } }) if 'assets' in sections and app_config.get('assets', 'bucket', None): # only configure tasks if target bucket is configured schedule.update({ 'cell-export-full': { 'task': 'ichnaea.data.tasks.cell_export_full', 'schedule': crontab(hour=0, minute=13), 'options': { 'expires': 39600 }, }, 'cell-export-diff': { 'task': 'ichnaea.data.tasks.cell_export_diff', 'schedule': crontab(minute=3), 'options': { 'expires': 2700 }, }, }) if 'import:ocid' in sections: schedule.update({ 'monitor-ocid-import': { 'task': 'ichnaea.data.tasks.monitor_ocid_import', 'schedule': timedelta(seconds=600), 'options': { 'expires': 570 }, }, 'cell-import-external': { 'task': 'ichnaea.data.tasks.cell_import_external', 'args': (True, ), 'schedule': crontab(minute=52), 'options': { 'expires': 2700 }, }, }) return schedule
def write_stations_to_csv(session, path, today, start_time=None, end_time=None): where = 'lat IS NOT NULL AND lon IS NOT NULL' if start_time is not None and end_time is not None: where = where + ' AND modified >= "%s" AND modified < "%s"' fmt = '%Y-%m-%d %H:%M:%S' where = where % (start_time.strftime(fmt), end_time.strftime(fmt)) else: # limit to cells modified in the last 12 months one_year = today - timedelta(days=365) where = where + ' AND modified >= "%s"' % one_year.strftime('%Y-%m-%d') header_row = [ 'radio', 'mcc', 'net', 'area', 'cell', 'unit', 'lon', 'lat', 'range', 'samples', 'changeable', 'created', 'updated', 'averageSignal', ] header_row = ','.join(header_row) + '\n' tables = [shard.__tablename__ for shard in CellShard.shards().values()] stmt = '''SELECT `cellid`, CONCAT_WS(",", CASE radio WHEN 0 THEN "GSM" WHEN 2 THEN "UMTS" WHEN 3 THEN "LTE" ELSE "" END, `mcc`, `mnc`, `lac`, `cid`, COALESCE(`psc`, ""), ROUND(`lon`, 7), ROUND(`lat`, 7), COALESCE(`radius`, "0"), COALESCE(`samples`, "0"), "1", COALESCE(UNIX_TIMESTAMP(`created`), ""), COALESCE(UNIX_TIMESTAMP(`modified`), ""), "" ) AS `cell_value` FROM %s WHERE %s AND `cellid` > :cellid ORDER BY `cellid` LIMIT :limit ''' with util.gzip_open(path, 'w', compresslevel=5) as gzip_wrapper: with gzip_wrapper as gzip_file: gzip_file.write(header_row) for table in tables: table_stmt = text(stmt % (table, where)) min_cellid = '' limit = 25000 while True: rows = session.execute( table_stmt.bindparams(limit=limit, cellid=min_cellid)).fetchall() if rows: buf = '\r\n'.join([row.cell_value for row in rows]) if buf: buf += '\r\n' gzip_file.write(buf) min_cellid = rows[-1].cellid else: break