def load_file(db, redis_client, datatype, filename): # pragma: no cover celery_app.data_queues = configure_data(redis_client) task = FakeTask(celery_app) with redis_pipeline(redis_client) as pipe: with db_worker_session(db) as session: ocid.ImportLocal(task, session, pipe, cell_type=datatype)(filename=filename)
def load_file(db, redis_client, datatype, filename): # pragma: no cover with redis_pipeline(redis_client) as pipe: with db_worker_session(db) as session: ocid.ImportLocal( None, session, pipe, cell_type=datatype, update_area_task=update_area)(filename=filename)
def load_file(db, redis_client, datatype, filename): # pragma: no cover celery_app.data_queues = configure_data(redis_client) task = FakeTask(celery_app) with redis_pipeline(redis_client) as pipe: with db_worker_session(db) as session: ocid.ImportLocal( task, cell_type=datatype)(pipe, session, filename=filename)
def main(argv, _db=None): parser = argparse.ArgumentParser( prog=argv[0], description=( "Import from public cell data into a local dev environment. " "See https://location.services.mozilla.com/downloads"), ) parser.add_argument("filename", help="Path to the csv.gz import file.") args = parser.parse_args(argv[1:]) if not settings("local_dev_env"): print("This script can only be run in a local dev environment.") print("Set LOCAL_DEV_ENV=True in your environment.") return 1 filename = os.path.abspath(os.path.expanduser(args.filename)) if not os.path.isfile(filename): print("File %s not found." % filename) return 1 configure_logging() celery_app = get_eager_celery_app() init_worker(celery_app) cellarea_queue = celery_app.data_queues["update_cellarea"] with db_worker_session(celery_app.db, commit=False) as session: with gzip_open(filename, "r") as file_handle: read_stations_from_csv(session, file_handle, celery_app.redis_client, cellarea_queue) return 0
def db_session(self, commit=True): """ Returns a database session usable as a context manager. :param commit: Should the session be committed or aborted at the end? :type commit: bool """ return db_worker_session(self.app.db_rw, commit=commit)
def db_session(self, commit=True, isolation_level=None): """ Returns a database session usable as a context manager. :param commit: Should the session be committed or aborted at the end? :type commit: bool :param isolation_level: Set a new transaction isolation level for this session """ return db_worker_session( self.app.db, commit=commit, isolation_level=isolation_level )
def main(argv, _db=None, _dump_file=dump_file): parser = argparse.ArgumentParser(prog=argv[0], description='Dump/export data.') parser.add_argument('--datatype', required=True, help='Type of the data file, blue, cell or wifi') parser.add_argument('--filename', required=True, help='Path to the csv.gz export file.') parser.add_argument('--lat', default=None, help='The center latitude of the desired area.') parser.add_argument('--lon', default=None, help='The center longitude of the desired area.') parser.add_argument('--radius', default=None, help='The radius of the desired area.') args = parser.parse_args(argv[1:]) if not args.filename: # pragma: no cover parser.print_help() return 1 filename = os.path.abspath(os.path.expanduser(args.filename)) if os.path.isfile(filename): # pragma: no cover print('File already exists.') return 1 datatype = args.datatype if datatype not in ('blue', 'cell', 'wifi'): # pragma: no cover print('Unknown data type.') return 1 lat, lon, radius = (None, None, None) if (args.lat is not None and args.lon is not None and args.radius is not None): lat = float(args.lat) lon = float(args.lon) radius = int(args.radius) configure_logging() db = configure_db('ro', _db=_db) with db_worker_session(db, commit=False) as session: exit_code = _dump_file(datatype, session, filename, lat=lat, lon=lon, radius=radius) return exit_code
def main(argv, _db=None, _dump_file=dump_file): parser = argparse.ArgumentParser(prog=argv[0], description="Dump/export data.") parser.add_argument("--datatype", required=True, help="Type of the data file, blue, cell or wifi") parser.add_argument("--filename", required=True, help="Path to the csv.gz export file.") parser.add_argument("--lat", default=None, help="The center latitude of the desired area.") parser.add_argument("--lon", default=None, help="The center longitude of the desired area.") parser.add_argument("--radius", default=None, help="The radius of the desired area.") args = parser.parse_args(argv[1:]) if not args.filename: parser.print_help() return 1 filename = os.path.abspath(os.path.expanduser(args.filename)) if os.path.isfile(filename): print("File already exists.") return 1 datatype = args.datatype if datatype not in ("blue", "cell", "wifi"): print("Unknown data type.") return 1 lat, lon, radius = (None, None, None) if args.lat is not None and args.lon is not None and args.radius is not None: lat = float(args.lat) lon = float(args.lon) radius = int(args.radius) configure_logging() db = configure_db("ro", _db=_db, pool=False) with db_worker_session(db, commit=False) as session: exit_code = _dump_file(datatype, session, filename, lat=lat, lon=lon, radius=radius) return exit_code
def show_api_key_details(ctx, key): """Print api key details to stdout.""" db = configure_db("rw") with db_worker_session(db) as session: row = session.query(ApiKey).filter( ApiKey.valid_key == key).one_or_none() if row: api_key = Key.from_obj(row) else: api_key = None if api_key: table = [[name, value] for name, value in api_key.as_dict().items()] print_table(table, delimiter=" : ", stream_write=click_echo_no_nl) else: click.echo(f"API key '{key}' does not exist")
def show_api_key_details(key): """Print api key details to stdout.""" db = configure_db("rw") with db_worker_session(db) as session: columns = ApiKey.__table__.columns fields = [getattr(columns, f) for f in API_KEY_COLUMN_NAMES] row = (session.execute( select(fields).where(columns.valid_key == key))).fetchone() if row is not None: key = Key(**dict(row.items())) else: key = None table = [] for field in API_KEY_COLUMN_NAMES: table.append([field, getattr(key, field, "")]) print_table(table, " : ")
def export_file(filename, tablename, _db=None, _session=None): today = util.utcnow().date() one_year_ago = today - timedelta(days=365) one_year_ago = one_year_ago.strftime('%Y-%m-%d') # this is executed in a worker process stmt = text('''\ SELECT `grid`, CAST(ROUND(DATEDIFF(CURDATE(), `modified`) / 30) AS UNSIGNED) as `num` FROM {tablename} WHERE modified >= '{modified}' LIMIT :limit OFFSET :offset '''.format(tablename=tablename, modified=one_year_ago).replace('\n', ' ')) db = configure_db('ro', _db=_db) offset = 0 limit = 200000 result_rows = 0 with util.gzip_open(filename, 'w', compresslevel=2) as fd: with db_worker_session(db, commit=False) as session: if _session is not None: # testing hook session = _session while True: result = session.execute( stmt.bindparams(limit=limit, offset=offset)) rows = result.fetchall() result.close() if not rows: break lines = [] extend = lines.extend for row in rows: lat, lon = decode_datamap_grid(row.grid) extend(random_points(lat, lon, row.num)) fd.writelines(lines) result_rows += len(lines) offset += limit if not result_rows: os.remove(filename) db.close() return result_rows
def export_file(filename, tablename, _db=None, _session=None): # this is executed in a worker process stmt = text("""\ SELECT `grid`, CAST(ROUND(DATEDIFF(CURDATE(), `modified`) / 30) AS UNSIGNED) as `num` FROM {tablename} WHERE `grid` > :grid ORDER BY `grid` LIMIT :limit """.format(tablename=tablename).replace("\n", " ")) db = configure_db("ro", _db=_db, pool=False) min_grid = b"" limit = 200000 result_rows = 0 with util.gzip_open(filename, "w", compresslevel=2) as fd: with db_worker_session(db, commit=False) as session: if _session is not None: # testing hook session = _session while True: result = session.execute( stmt.bindparams(limit=limit, grid=min_grid)) rows = result.fetchall() result.close() if not rows: break lines = [] extend = lines.extend for row in rows: lat, lon = decode_datamap_grid(row.grid) extend(random_points(lat, lon, row.num)) fd.writelines(lines) result_rows += len(lines) min_grid = rows[-1].grid if not result_rows: os.remove(filename) db.close() return result_rows
def export_file(filename, tablename, _db=None, _session=None): # this is executed in a worker process stmt = text('''\ SELECT `grid`, CAST(ROUND(DATEDIFF(CURDATE(), `modified`) / 30) AS UNSIGNED) as `num` FROM {tablename} WHERE `grid` > :grid ORDER BY `grid` LIMIT :limit '''.format(tablename=tablename).replace('\n', ' ')) db = configure_db('ro', transport='sync', _db=_db) min_grid = b'' limit = 200000 result_rows = 0 with util.gzip_open(filename, 'w', compresslevel=2) as fd: with db_worker_session(db, commit=False) as session: if _session is not None: # testing hook session = _session while True: result = session.execute( stmt.bindparams(limit=limit, grid=min_grid)) rows = result.fetchall() result.close() if not rows: break lines = [] extend = lines.extend for row in rows: lat, lon = decode_datamap_grid(row.grid) extend(random_points(lat, lon, row.num)) fd.writelines(lines) result_rows += len(lines) min_grid = rows[-1].grid if not result_rows: os.remove(filename) db.close() return result_rows
def create_api_key(key): """Create a new api key.""" key = key or str(uuid.uuid4()) db = configure_db("rw") with db_worker_session(db) as session: try: session.execute( insert(ApiKey.__table__).values( valid_key=key, allow_fallback=False, allow_locate=True, allow_region=True, store_sample_locate=100, store_sample_submit=100, )) print("Created API key: %r" % key) except IntegrityError: print("API key %r exists" % key)
def main(argv, _db=None, _dump_file=dump_file): parser = argparse.ArgumentParser( prog=argv[0], description='Dump/export data.') parser.add_argument('--datatype', required=True, help='Type of the data file, blue, cell or wifi') parser.add_argument('--filename', required=True, help='Path to the csv.gz export file.') parser.add_argument('--lat', default=None, help='The center latitude of the desired area.') parser.add_argument('--lon', default=None, help='The center longitude of the desired area.') parser.add_argument('--radius', default=None, help='The radius of the desired area.') args = parser.parse_args(argv[1:]) if not args.filename: # pragma: no cover parser.print_help() return 1 filename = os.path.abspath(os.path.expanduser(args.filename)) if os.path.isfile(filename): # pragma: no cover print('File already exists.') return 1 datatype = args.datatype if datatype not in ('blue', 'cell', 'wifi'): # pragma: no cover print('Unknown data type.') return 1 lat, lon, radius = (None, None, None) if (args.lat is not None and args.lon is not None and args.radius is not None): lat = float(args.lat) lon = float(args.lon) radius = int(args.radius) configure_logging() db = configure_db('ro', transport='sync', _db=_db) with db_worker_session(db, commit=False) as session: exit_code = _dump_file( datatype, session, filename, lat=lat, lon=lon, radius=radius) return exit_code
def export_file(db_url, filename, tablename, _db_rw=None, _session=None): # this is executed in a worker process stmt = text('''\ SELECT `grid`, CAST(ROUND(DATEDIFF(CURDATE(), `modified`) / 30) AS UNSIGNED) as `num` FROM {tablename} LIMIT :limit OFFSET :offset '''.format(tablename=tablename).replace('\n', ' ')) db = configure_db(db_url, _db=_db_rw) offset = 0 limit = 200000 result_rows = 0 with util.gzip_open(filename, 'w', compresslevel=2) as fd: with db_worker_session(db, commit=False) as session: if _session is not None: # testing hook session = _session while True: result = session.execute( stmt.bindparams(limit=limit, offset=offset)) rows = result.fetchall() result.close() if not rows: break lines = [] extend = lines.extend for row in rows: lat, lon = decode_datamap_grid(row.grid) extend(random_points(lat, lon, row.num)) fd.writelines(lines) result_rows += len(lines) offset += limit if not result_rows: os.remove(filename) db.engine.pool.dispose() return result_rows
def list_api_keys(ctx): """List all api keys in db.""" show_fields = [ "valid_key", "allow_fallback", "allow_locate", "allow_region" ] db = configure_db("rw") with db_worker_session(db) as session: columns = ApiKey.__table__.columns fields = [getattr(columns, f) for f in show_fields] rows = session.execute(select(fields)).fetchall() click.echo("%d api keys." % len(rows)) if rows: # Add header row table = [show_fields] # Add rest of the rows; the columns are in the order of show_fields so we # don't have to do any re-ordering table.extend(rows) print_table(table, stream_write=click_echo_no_nl)
def create_api_key(ctx, maxreq, key): """Create a new api key. If KEY is not specified, it uses a uuid4. """ key = key or str(uuid.uuid4()) db = configure_db("rw") with db_worker_session(db) as session: try: session.execute( insert(ApiKey.__table__).values( valid_key=key, maxreq=maxreq, allow_fallback=False, allow_locate=True, allow_region=True, store_sample_locate=100, store_sample_submit=100, )) click.echo("Created API key: %r" % key) except IntegrityError: click.echo("API key %r exists" % key)
def generate(db, bucketname, heka_client, stats_client, upload=True, concurrency=2, datamaps='', output=None): datamaps_encode = os.path.join(datamaps, 'encode') datamaps_enumerate = os.path.join(datamaps, 'enumerate') datamaps_render = os.path.join(datamaps, 'render') with tempdir() as workdir: csv = os.path.join(workdir, 'map.csv') with stats_client.timer("datamaps.export_to_csv"): with db_worker_session(db) as session: result_rows = export_to_csv(session, csv) stats_client.timing('datamaps.csv_rows', result_rows) # create shapefile / quadtree shapes = os.path.join(workdir, 'shapes') cmd = '{encode} -z15 -o {output} {input}'.format( encode=datamaps_encode, output=shapes, input=csv) with stats_client.timer("datamaps.encode"): system_call(cmd) # render tiles if output: tiles = output else: tiles = os.path.join(workdir, 'tiles') cmd = ("{enumerate} -z{zoom} {shapes} | xargs -L1 -P{concurrency} " "sh -c 'mkdir -p {output}/$2/$3; {render} " "-B 12:0.0379:0.874 -c0088FF -t0 " "-O 16:1600:1.5 -G 0.5{extra} $1 $2 $3 $4 | " "pngquant --speed=3 --quality=65-95 32 > " "{output}/$2/$3/$4{suffix}.png' dummy") zoom_0_cmd = cmd.format( enumerate=datamaps_enumerate, zoom=0, shapes=shapes, concurrency=concurrency, render=datamaps_render, output=tiles, extra=' -T 512', suffix='@2x') # create high-res version for zoom level 0 system_call(zoom_0_cmd) zoom_all_cmd = cmd.format( enumerate=datamaps_enumerate, zoom=13, shapes=shapes, concurrency=concurrency, render=datamaps_render, output=tiles, extra='', suffix='') with stats_client.timer("datamaps.render"): system_call(zoom_all_cmd) if upload: # pragma: no cover with stats_client.timer("datamaps.upload_to_s3"): result = upload_to_s3(bucketname, tiles) for metric, value in result.items(): stats_client.timing('datamaps.%s' % metric, value)
def db_session(self): # returns a context manager return db_worker_session(self.app.db_master)
def main( ping_connections=False, _db=None, _geoip_db=None, _http_session=None, _raven_client=None, _redis_client=None, _position_searcher=None, _region_searcher=None, ): """ Configure the web app stored in :data:`ichnaea.webapp.app._APP`. Does connection, logging and view config setup. Attaches some additional functionality to the :class:`pyramid.registry.Registry` instance. At startup ping all outbound connections like the database once, to ensure they are actually up and responding. The parameters starting with an underscore are test-only hooks to provide pre-configured connection objects. :param ping_connections: If True, ping and test outside connections. :type ping_connections: bool :returns: A configured WSGI app, the result of calling :meth:`pyramid.config.Configurator.make_wsgi_app`. """ configure_logging() config = Configurator() check_config() # add support for pt templates config.include("pyramid_chameleon") # add a config setting to skip logging for some views config.registry.skip_logging = set() configure_api(config) configure_content(config) configure_monitor(config) # configure outside connections registry = config.registry registry.db = configure_db("ro", _db=_db) registry.raven_client = raven_client = configure_raven( transport="gevent", tags={"app": "webapp"}, _client=_raven_client ) registry.redis_client = redis_client = configure_redis(_client=_redis_client) configure_stats() registry.http_session = configure_http_session(_session=_http_session) registry.geoip_db = geoip_db = configure_geoip( raven_client=raven_client, _client=_geoip_db ) # Needs to be the exact same as the *_incoming entries in taskapp.config. registry.data_queues = data_queues = { "update_incoming": DataQueue( "update_incoming", redis_client, "report", batch=100, compress=True ) } for name, func, default in ( ("position_searcher", configure_position_searcher, _position_searcher), ("region_searcher", configure_region_searcher, _region_searcher), ): searcher = func( geoip_db=geoip_db, raven_client=raven_client, redis_client=redis_client, data_queues=data_queues, _searcher=default, ) setattr(registry, name, searcher) config.add_tween("ichnaea.db.db_tween_factory", under=EXCVIEW) config.add_tween("ichnaea.log.log_tween_factory", under=EXCVIEW) config.add_request_method(db_session, property=True) # freeze skip logging set config.registry.skip_logging = frozenset(config.registry.skip_logging) # Should we try to initialize and establish the outbound connections? if ping_connections: with db_worker_session(registry.db, commit=False) as session: ping_session(session) registry.redis_client.ping() return config.make_wsgi_app()
def main(global_config, heka_config=None, init=False, _db_master=None, _db_slave=None, _heka_client=None, _redis=None, _stats_client=None, **settings): config = Configurator(settings=settings) # add support for pt templates config.include('pyramid_chameleon') settings = config.registry.settings from ichnaea.content.views import configure_content from ichnaea.logging import configure_heka from ichnaea.logging import configure_stats from ichnaea.service import configure_service configure_content(config) configure_service(config) # configure databases incl. test override hooks if _db_master is None: config.registry.db_master = Database(settings['db_master']) else: config.registry.db_master = _db_master if _db_slave is None: config.registry.db_slave = Database(settings['db_slave']) else: config.registry.db_slave = _db_slave if _redis is None: config.registry.redis_client = None if 'redis_url' in settings: config.registry.redis_client = redis_client(settings['redis_url']) else: config.registry.redis_client = _redis config.registry.geoip_db = configure_geoip(config.registry.settings) if _heka_client is None: config.registry.heka_client = configure_heka(heka_config) else: config.registry.heka_client = _heka_client config.registry.stats_client = configure_stats( settings.get('statsd_host'), _client=_stats_client) config.add_tween('ichnaea.db.db_tween_factory', under=EXCVIEW) config.add_tween('ichnaea.logging.log_tween_factory', under=EXCVIEW) config.add_request_method(db_master_session, property=True) config.add_request_method(db_slave_session, property=True) # replace json renderer with custom json variant config.add_renderer('json', customjson.Renderer()) # Should we try to initialize and establish the outbound connections? if init: # Test the slave DB connection with db_worker_session(config.registry.db_slave) as session: try: session.execute(select([func.now()])).first() except OperationalError: # Let the instance start, so it can recover / reconnect # to the DB later, but provide degraded service in the # meantime. pass # Test the redis connection try: config.registry.redis_client.ping() except ConnectionError: # Same as for the DB, continue with degraded service. pass return config.make_wsgi_app()
def export_to_csv(filename, csv_dir, tablename, row_limit=None, file_limit=None): """ Export a datamap table to a CSV file. :param filename: An output file ending in .csv :param csv_dir: The output directory :param tablename: The name of the datamap table to export :param row_limit: The number of rows to fetch at a time :param file_limit: The number of output rows before rotating files :return: A tuple (rows exported, files created) Each database row is turned into 0 to 6 similar CSV rows by random_points(), based on how recently they were recorded. If file_limit is not reached, the output file will the filename. If file_limit is reached, the output files will have a serial number and be based on the filename. For example, "map.csv" will become "map_0001.csv", "map_0002.csv", etc. """ stmt = text( """\ SELECT `grid`, CAST(ROUND(DATEDIFF(CURDATE(), `modified`) / 30) AS UNSIGNED) as `num` FROM {tablename} WHERE `grid` > :grid ORDER BY `grid` LIMIT :limit """.format( tablename=tablename ).replace( "\n", " " ) ) db = configure_db("ro", pool=False) min_grid = b"" row_limit = row_limit or 200_000 file_limit = file_limit or 10_000_000 result_rows = 0 file_path = os.path.join(csv_dir, filename) fd = open(file_path, "w") file_count = 1 file_rows = 0 orig_filename = filename orig_file_path = file_path assert filename.endswith(".csv") try: with db_worker_session(db, commit=False) as session: while True: result = session.execute( stmt.bindparams(limit=row_limit, grid=min_grid) ) rows = result.fetchall() result.close() if not rows: break lines = [] extend = lines.extend for row in rows: lat, lon = decode_datamap_grid(row.grid) extend(random_points(lat, lon, row.num)) fd.writelines(lines) result_rows += len(lines) # Rotate the file when needed file_rows += len(lines) if result_rows >= file_limit: fd.close() file_count += 1 file_rows = 0 filename = "sub" + orig_filename.replace( ".csv", f"_{file_count:04}.csv" ) file_path = os.path.join(csv_dir, filename) fd = open(file_path, "w") min_grid = rows[-1].grid finally: fd.close() if not file_rows: os.remove(file_path) file_count -= 1 if file_count > 1: # Rename first file to serial CSV format filename = "sub" + orig_filename.replace(".csv", "_0001.csv") file_path = os.path.join(csv_dir, filename) os.rename(orig_file_path, file_path) db.close() return result_rows, file_count
def db_session(self, commit=True): # returns a context manager return db_worker_session(self.app.db_rw, commit=commit)
def main(global_config, heka_config=None, init=False, _db_master=None, _db_slave=None, _heka_client=None, _redis=None, _stats_client=None, **settings): config = Configurator(settings=settings) # add support for pt templates config.include('pyramid_chameleon') settings = config.registry.settings from ichnaea.content.views import configure_content from ichnaea.logging import configure_heka from ichnaea.logging import configure_stats from ichnaea.service import configure_service configure_content(config) configure_service(config) # configure databases incl. test override hooks if _db_master is None: config.registry.db_master = Database(settings['db_master']) else: config.registry.db_master = _db_master if _db_slave is None: config.registry.db_slave = Database(settings['db_slave']) else: config.registry.db_slave = _db_slave if _redis is None: config.registry.redis_client = None if 'redis_url' in settings: config.registry.redis_client = redis_client(settings['redis_url']) else: config.registry.redis_client = _redis if _heka_client is None: # pragma: no cover config.registry.heka_client = heka_client = configure_heka(heka_config) else: config.registry.heka_client = heka_client = _heka_client config.registry.stats_client = configure_stats(settings.get('statsd_host'), _client=_stats_client) config.registry.geoip_db = configure_geoip(config.registry.settings, heka_client=heka_client) config.add_tween('ichnaea.db.db_tween_factory', under=EXCVIEW) config.add_tween('ichnaea.logging.log_tween_factory', under=EXCVIEW) config.add_request_method(db_master_session, property=True) config.add_request_method(db_slave_session, property=True) # replace json renderer with custom json variant config.add_renderer('json', customjson.Renderer()) # Should we try to initialize and establish the outbound connections? if init: # pragma: no cover # Test the slave DB connection with db_worker_session(config.registry.db_slave) as session: try: session.execute(select([func.now()])).first() except OperationalError: # Let the instance start, so it can recover / reconnect # to the DB later, but provide degraded service in the # meantime. pass # Test the redis connection try: config.registry.redis_client.ping() except ConnectionError: # Same as for the DB, continue with degraded service. pass return config.make_wsgi_app()