def merge_geojson(): """ Merge all geojson on transport.data.gouv.fr to create 3 aggregated datasets: - 1 geojsonline - 1 geojson - geopackage and publish those resources on data.gouv """ with log_context(task_id="merge_geojson"): with tempfile.TemporaryDirectory() as tmp_dir: geojson_line_file = _create_merged_geojson_line(tmp_dir) logging.info(f"geojson line created: {geojson_line_file}") geopackage_file = f"{tmp_dir}/public-transit.gpkg" geojson_file = f"{tmp_dir}/public-transit.geojson" utils.run_command([ "ogr2ogr", geopackage_file, f"GeoJSONSeq:{geojson_line_file}" ]) ziped_geojson_file = f"{geojson_file}.zip" ziped_geojson_line_file = f"{geojson_line_file}.zip" utils.run_command( ["ogr2ogr", geojson_file, f"GeoJSONSeq:{geojson_line_file}"]) utils.run_command( ["zip", "--junk-paths", f"{ziped_geojson_file}", geojson_file]) utils.run_command([ "zip", "--junk-paths", f"{ziped_geojson_line_file}", geojson_line_file ]) _publish_to_datagouv(ziped_geojson_file, ziped_geojson_line_file, geopackage_file) logging.info("all files published")
def test_context_manager(context): # Check two level stack in log context with log_context(myField='toto'): fields = log_context.as_dict() assert 'myField' in fields with log_context(myOtherField='toto'): fields = log_context.as_dict() assert 'myOtherField' in fields assert 'myField' in fields fields = log_context.as_dict() assert 'myField' in fields assert 'myOtherField' not in fields fields = log_context.as_dict() assert 'myField' not in fields
def test_deep_update_dict_context_manger(context): log_context.remove('rid') with log_context(myField={'toto': {'tata1': {'titi1': 'tutu'}}}): fields = log_context.as_dict() assert fields == {'myField': {'toto': {'tata1': {'titi1': 'tutu'}}}} # Update tata1 to add titi2 with log_context(myField={'toto': {'tata1': {'titi2': 'tutu'}}}): fields = log_context.as_dict() assert fields == { 'myField': { 'toto': {'tata1': {'titi1': 'tutu', 'titi2': 'tutu'}}}} # Override value `tata1/titi1` with log_context(myField={'toto': {'tata1': {'titi1': 'val'}}}): fields = log_context.as_dict() assert fields == { 'myField': {'toto': {'tata1': {'titi1': 'val', 'titi2': 'tutu'}}}} fields = log_context.as_dict() assert 'myField' not in fields
def test_deep_update_dict_context_manger(context): log_context.remove('rid') with log_context(myField={'toto': {'tata1': {'titi1': 'tutu'}}}): fields = log_context.as_dict() assert fields == {'myField': {'toto': {'tata1': {'titi1': 'tutu'}}}} # Update tata1 to add titi2 with log_context(myField={'toto': {'tata1': {'titi2': 'tutu'}}}): fields = log_context.as_dict() assert fields == { 'myField': { 'toto': { 'tata1': { 'titi1': 'tutu', 'titi2': 'tutu' } } } } # Override value `tata1/titi1` with log_context(myField={'toto': {'tata1': {'titi1': 'val'}}}): fields = log_context.as_dict() assert fields == { 'myField': { 'toto': { 'tata1': { 'titi1': 'val', 'titi2': 'tutu' } } } } fields = log_context.as_dict() assert 'myField' not in fields
def cleanup_old_resources(): """ Delete the community resources when the main resource has been deleted """ with log_context(task_id="cleanup"): logging.info("Cleaning up old resources") r = requests.get(f"{TRANSPORT_API_ENDPOINT}/api/datasets") r.raise_for_status() datasets = r.json() total_cleaned = 0 for d in datasets: dataset_name = d["title"] if d["type"] != "public-transit": continue cleaned = _cleanup_old_dataset_resources(d) total_cleaned += cleaned logging.info(f"{total_cleaned} resources cleaned")
def convert(params): with log_context(task_id=params["datagouv_id"]): try: logging.info( f"Dequeing {params['url']} for datagouv_id {params['datagouv_id']} and {params['conversion_type']} conversions" ) gtfs, fname = utils.download_gtfs(params["url"]) for conversion in params["conversion_type"]: if conversion == "gtfs2netex": _convert_to_netex(gtfs, fname, params["datagouv_id"], params["url"]) if conversion == "gtfs2geojson": _convert_to_geojson(gtfs, fname, params["datagouv_id"], params["url"]) logging.info("job finished") except: logging.exception("job failed") raise