def geojson_to_mongodb(import_file: str, target_db: str, target_col: str, osm=True): # Based on: https://github.com/rtbigdata/geojson-mongo-import.py | MIT License client = MongoClient(uri) db = client[target_db] collection = db[target_col] import_file_path = Path(import_file) if not import_file_path.is_file(): logging.error(f"Finish: Import file {import_file} does not exist.") return None with open(import_file_path, "r") as f: logging.info(f"Start: Opening GeoJSON file {import_file}.") try: geojson_file = json.loads(f.read()) except ValueError as e: logging.error(f"JSON file {import_file} is invalid. Reason: {e}") return None if target_col in db.list_collection_names(): logging.info(f"Start: Dropping existing collection {target_col}.") collection.drop() # create 2dsphere index and text indexes collection.create_index([("geometry", GEOSPHERE)]) if osm: collection.create_index([("properties.type", TEXT), ("properties.id", TEXT)]) bulk = collection.initialize_unordered_bulk_op() for feature in geojson_file["features"]: bulk.insert(feature) logging.info("Finish: Features loaded to object.") try: logging.info(f"Start: Loading features to collection {target_col}.") result = bulk.execute() logging.info( f'Finish: Number of Features successully inserted: {result["nInserted"]} ' ) except BulkWriteError as bwe: n_inserted = bwe.details["nInserted"] err_msg = bwe.details["writeErrors"] logging.info("Errors encountered inserting features") logging.info(f"Number of Features successully inserted: {n_inserted} ") logging.info("The following errors were found:") for item in err_msg: print(f'Index of feature: {item["index"]}') print(f'Error code: {item["code"]}') logging.info( f'Message(truncated due to data length): {item["errmsg"][0:120]}' )
def overpass_to_geojson( output_file: str, area_id: int, out: str = "center", # center, body, geom response_type: str = "json", overpass_endpoint: list[str] = OVERPASS_ENDPOINTS[0], force_download=False, **kwargs, ): today = date.today() output_file_path = Path(output_file) tags_to_download = "".join(f'["{key}"="{value}"]' for key, value in kwargs.items()) if response_type not in ["json", "xml"]: return None try: file_last_mod_date = datetime.fromtimestamp( output_file_path.stat().st_mtime).date() except FileNotFoundError: file_last_mod_date = date(1900, 1, 1) if (output_file_path.is_file() and file_last_mod_date == today and force_download is False): logging.info( f"Finish: File is up to date. (generated: {file_last_mod_date})") return None # 2. Step 2 - connecting and getting data from Overpass else: logging.info( f"Info: Export .geojson file last modification date: {file_last_mod_date}" ) # Overpass Query compact_query = f"[out:{response_type}][timeout:20005];area({area_id})->.searchArea;(node{tags_to_download}(area.searchArea);way{tags_to_download}(area.searchArea);relation{tags_to_download}(area.searchArea););out {out};" query = overpass_endpoint + "?data=" + compact_query logging.info( f"Start: Connecting to Overpass server: {overpass_endpoint}") try: response = requests.get(query) response.raise_for_status() pass except requests.exceptions.HTTPError as err: raise SystemExit(err) if response.status_code != 200: logging.error("End: Server response other than 200") return None try: logging.info( "Start: Getting data and extracting to .geojson object..") if response_type == "json": geojson_response = json2geojson(response.text, log_level="ERROR") else: geojson_response = xml2geojson(response.text, log_level="ERROR") except: logging.error( "Finish: Error when converting response .json to .geojson") return None with open(output_file_path, mode="w", encoding="utf-8") as f: geojson.dump(geojson_response, f) logging.info( "Finish: GeoJSON object successfully dumped to .geojson file") return True