def import_source_locations( vial_http: urllib3.connectionpool.ConnectionPool, import_run_id: str, import_locations: Iterable[load.ImportSourceLocation], import_batch_size: int = 500, ) -> None: """Import source locations""" for import_locations_batch in misc.batch(import_locations, import_batch_size): encoded_ndjson = "\n".join( [loc.json(exclude_none=True) for loc in import_locations_batch]) rsp = vial_http.request( "POST", f"/api/importSourceLocations?import_run_id={import_run_id}", headers={ **vial_http.headers, "Content-Type": "application/x-ndjson" }, body=encoded_ndjson.encode("utf-8"), ) if rsp.status != 200: raise HTTPError( f"/api/importSourceLocations?import_run_id={import_run_id}", rsp.status, rsp.data[:100], dict(rsp.headers), None, )
def retrieve_existing_locations( vial_http: urllib3.connectionpool.ConnectionPool, ) -> Iterator[dict]: """Verifies that header contains valid authorization token""" resp = vial_http.request( "GET", "/api/searchLocations?format=nlgeojson&all=1", preload_content=False ) for line in resp: yield geojson.loads(line) resp.release_conn()
def start_import_run(vial_http: urllib3.connectionpool.ConnectionPool) -> str: """Start import run and return the id for it""" import_resp = vial_http.request("POST", "/api/startImportRun") if import_resp.status != 200: raise Exception(f"Failed to start import run {import_resp.data}") import_data = json.loads(import_resp.data.decode("utf-8")) import_run_id = import_data.get("import_run_id") if not import_run_id: raise Exception(f"Failed to start import run {import_data}") return import_run_id
def import_source_locations( vial_http: urllib3.connectionpool.ConnectionPool, import_run_id: str, import_locations: Iterable[schema.ImportSourceLocation], ) -> urllib3.response.HTTPResponse: """Import source locations""" encoded_ndjson = "\n".join( [loc.json(exclude_none=True) for loc in import_locations] ) return vial_http.request( "POST", f"/api/importSourceLocations?import_run_id={import_run_id}", headers={**vial_http.headers, "Content-Type": "application/x-ndjson"}, body=encoded_ndjson.encode("utf-8"), )
def search_source_locations_as_geojson( vial_http: urllib3.connectionpool.ConnectionPool, **kwds: Any, ) -> Iterator[geojson.Feature]: """Wrapper around search source locations api. Returns geojson.""" params = { **kwds, "format": "nlgeojson", } query = urllib.parse.urlencode(params) path_and_query = f"/api/searchSourceLocations?{query}" logger.info("Contacting VIAL: GET %s", path_and_query) resp = vial_http.request("GET", path_and_query, preload_content=False) line_num = 0 for line_num, line in enumerate(resp, start=1): if line_num % 5000 == 0: logger.info("Processed %d source location records from VIAL.", line_num) try: record = orjson.loads(line) except json.JSONDecodeError as e: logger.warning( "Invalid json record in source search response: %s\n%s", line, str(e)) continue _clean_geojson_record(record) try: feature = geojson.Feature(**record) except ValueError as e: logger.warning( "Invalid geojson record in source search response: %s\n%s", line, str(e)) continue yield feature logger.info("Processed %d total source location records from VIAL.", line_num) resp.release_conn()
def import_source_locations( vial_http: urllib3.connectionpool.ConnectionPool, import_run_id: str, import_locations: Iterable[load.ImportSourceLocation], import_batch_size: int = 500, ) -> None: """Import source locations""" path_and_query = f"/api/importSourceLocations?import_run_id={import_run_id}" logger.info("Contacting VIAL: POST %s", path_and_query) batches = 0 for import_locations_batch in misc.batch(import_locations, import_batch_size): encoded_ndjson = b"\n".join([ orjson.dumps(loc.dict(exclude_none=True)) for loc in import_locations_batch ]) rsp = vial_http.request( "POST", path_and_query, headers={ **vial_http.headers, "Content-Type": "application/x-ndjson" }, body=encoded_ndjson, ) if rsp.status != 200: raise HTTPError( f"/api/importSourceLocations?import_run_id={import_run_id}", rsp.status, rsp.data[:100], dict(rsp.headers), None, ) batches += 1 if batches % 5 == 0: logger.info( "Submitted %d batches of up to %d records to VIAL.", batches, import_batch_size, ) logger.info("Submitted %d total batches to VIAL.", batches)
def search_source_locations_as_summary( vial_http: urllib3.connectionpool.ConnectionPool, **kwds: Any, ) -> Iterator[SourceLocationSummary]: """Wrapper around search source locations api. Returns summary objects.""" params = { **kwds, "format": "summary", } query = urllib.parse.urlencode(params) path_and_query = f"/api/searchSourceLocations?{query}" logger.info("Contacting VIAL: GET %s", path_and_query) resp = vial_http.request("GET", path_and_query, preload_content=False) line_num = 0 for line_num, line in enumerate(resp, start=1): if line_num % 5000 == 0: logger.info("Processed %d source location records from VIAL.", line_num) try: record = orjson.loads(line) except json.JSONDecodeError as e: logger.warning( "Invalid json record in source search response: %s\n%s", line, str(e)) continue if not record.get("source_uid"): continue summary = SourceLocationSummary( source_uid=record["source_uid"], content_hash=record.get("content_hash"), matched=bool(record.get("matched_location_id")), ) yield summary logger.info("Processed %d total source location records from VIAL.", line_num) resp.release_conn()
def search_locations( vial_http: urllib3.connectionpool.ConnectionPool, **kwds: Any, ) -> Iterator[dict]: """Wrapper around search locations api. Returns geojson.""" params = { **kwds, "format": "nlgeojson", } query = urllib.parse.urlencode(params) resp = vial_http.request("GET", f"/api/searchLocations?{query}", preload_content=False) for line in resp: try: yield geojson.loads(line) except json.JSONDecodeError: logger.warning("Invalid json record in search response: %s", line) resp.release_conn()
def verify_token(vial_http: urllib3.connectionpool.ConnectionPool) -> bool: """Verifies that header contains valid authorization token""" verify_resp = vial_http.request("GET", "/api/verifyToken") return verify_resp.status == 200
def import_source_locations( vial_http: urllib3.connectionpool.ConnectionPool, import_run_id: str, import_locations: Iterable[load.ImportSourceLocation], import_batch_size: int = IMPORT_BATCH_SIZE, ) -> ImportSourceLocationsResult: """Import source locations""" created = set() updated = set() path_and_query = f"/api/importSourceLocations?import_run_id={import_run_id}" logger.info("Contacting VIAL: POST %s", path_and_query) batches = 0 for import_locations_batch in misc.batch(import_locations, import_batch_size): encoded_locs = [] for loc in import_locations_batch: if loc.match and loc.match.action == "new": created.add(loc.source_uid) else: updated.add(loc.source_uid) loc_json = orjson.dumps(loc.dict(exclude_none=True)) encoded_locs.append(loc_json) encoded_ndjson = b"\n".join(encoded_locs) try: rsp = vial_http.request( "POST", path_and_query, headers={**vial_http.headers, "Content-Type": "application/x-ndjson"}, body=encoded_ndjson, ) except Exception as e: logger.error( "Error while importing locations: %s (...) %s: %s", encoded_ndjson[:100], encoded_ndjson[-100:], e, ) raise if rsp.status != 200: raise HTTPError( f"/api/importSourceLocations?import_run_id={import_run_id}", rsp.status, rsp.data[:100], dict(rsp.headers), None, ) batches += 1 if batches % 5 == 0: logger.info( "Submitted %d batches of up to %d records to VIAL.", batches, import_batch_size, ) logger.info("Submitted %d total batches to VIAL.", batches) return ImportSourceLocationsResult(created=created, updated=updated)