Ejemplo n.º 1
0
def import_source_locations(
    vial_http: urllib3.connectionpool.ConnectionPool,
    import_run_id: str,
    import_locations: Iterable[load.ImportSourceLocation],
    import_batch_size: int = 500,
) -> None:
    """Import source locations"""
    for import_locations_batch in misc.batch(import_locations,
                                             import_batch_size):
        encoded_ndjson = "\n".join(
            [loc.json(exclude_none=True) for loc in import_locations_batch])

        rsp = vial_http.request(
            "POST",
            f"/api/importSourceLocations?import_run_id={import_run_id}",
            headers={
                **vial_http.headers, "Content-Type": "application/x-ndjson"
            },
            body=encoded_ndjson.encode("utf-8"),
        )

        if rsp.status != 200:
            raise HTTPError(
                f"/api/importSourceLocations?import_run_id={import_run_id}",
                rsp.status,
                rsp.data[:100],
                dict(rsp.headers),
                None,
            )
Ejemplo n.º 2
0
def retrieve_existing_locations(
    vial_http: urllib3.connectionpool.ConnectionPool,
) -> Iterator[dict]:
    """Verifies that header contains valid authorization token"""
    resp = vial_http.request(
        "GET", "/api/searchLocations?format=nlgeojson&all=1", preload_content=False
    )

    for line in resp:
        yield geojson.loads(line)

    resp.release_conn()
Ejemplo n.º 3
0
def start_import_run(vial_http: urllib3.connectionpool.ConnectionPool) -> str:
    """Start import run and return the id for it"""
    import_resp = vial_http.request("POST", "/api/startImportRun")
    if import_resp.status != 200:
        raise Exception(f"Failed to start import run {import_resp.data}")

    import_data = json.loads(import_resp.data.decode("utf-8"))
    import_run_id = import_data.get("import_run_id")

    if not import_run_id:
        raise Exception(f"Failed to start import run {import_data}")

    return import_run_id
Ejemplo n.º 4
0
def import_source_locations(
    vial_http: urllib3.connectionpool.ConnectionPool,
    import_run_id: str,
    import_locations: Iterable[schema.ImportSourceLocation],
) -> urllib3.response.HTTPResponse:
    """Import source locations"""
    encoded_ndjson = "\n".join(
        [loc.json(exclude_none=True) for loc in import_locations]
    )

    return vial_http.request(
        "POST",
        f"/api/importSourceLocations?import_run_id={import_run_id}",
        headers={**vial_http.headers, "Content-Type": "application/x-ndjson"},
        body=encoded_ndjson.encode("utf-8"),
    )
Ejemplo n.º 5
0
def search_source_locations_as_geojson(
    vial_http: urllib3.connectionpool.ConnectionPool,
    **kwds: Any,
) -> Iterator[geojson.Feature]:
    """Wrapper around search source locations api. Returns geojson."""
    params = {
        **kwds,
        "format": "nlgeojson",
    }

    query = urllib.parse.urlencode(params)

    path_and_query = f"/api/searchSourceLocations?{query}"
    logger.info("Contacting VIAL: GET %s", path_and_query)

    resp = vial_http.request("GET", path_and_query, preload_content=False)

    line_num = 0
    for line_num, line in enumerate(resp, start=1):
        if line_num % 5000 == 0:
            logger.info("Processed %d source location records from VIAL.",
                        line_num)

        try:
            record = orjson.loads(line)
        except json.JSONDecodeError as e:
            logger.warning(
                "Invalid json record in source search response: %s\n%s", line,
                str(e))
            continue

        _clean_geojson_record(record)

        try:
            feature = geojson.Feature(**record)
        except ValueError as e:
            logger.warning(
                "Invalid geojson record in source search response: %s\n%s",
                line, str(e))
            continue

        yield feature

    logger.info("Processed %d total source location records from VIAL.",
                line_num)
    resp.release_conn()
Ejemplo n.º 6
0
def import_source_locations(
    vial_http: urllib3.connectionpool.ConnectionPool,
    import_run_id: str,
    import_locations: Iterable[load.ImportSourceLocation],
    import_batch_size: int = 500,
) -> None:
    """Import source locations"""
    path_and_query = f"/api/importSourceLocations?import_run_id={import_run_id}"
    logger.info("Contacting VIAL: POST %s", path_and_query)

    batches = 0
    for import_locations_batch in misc.batch(import_locations,
                                             import_batch_size):
        encoded_ndjson = b"\n".join([
            orjson.dumps(loc.dict(exclude_none=True))
            for loc in import_locations_batch
        ])

        rsp = vial_http.request(
            "POST",
            path_and_query,
            headers={
                **vial_http.headers, "Content-Type": "application/x-ndjson"
            },
            body=encoded_ndjson,
        )

        if rsp.status != 200:
            raise HTTPError(
                f"/api/importSourceLocations?import_run_id={import_run_id}",
                rsp.status,
                rsp.data[:100],
                dict(rsp.headers),
                None,
            )

        batches += 1
        if batches % 5 == 0:
            logger.info(
                "Submitted %d batches of up to %d records to VIAL.",
                batches,
                import_batch_size,
            )

    logger.info("Submitted %d total batches to VIAL.", batches)
Ejemplo n.º 7
0
def search_source_locations_as_summary(
    vial_http: urllib3.connectionpool.ConnectionPool,
    **kwds: Any,
) -> Iterator[SourceLocationSummary]:
    """Wrapper around search source locations api. Returns summary objects."""
    params = {
        **kwds,
        "format": "summary",
    }

    query = urllib.parse.urlencode(params)

    path_and_query = f"/api/searchSourceLocations?{query}"
    logger.info("Contacting VIAL: GET %s", path_and_query)

    resp = vial_http.request("GET", path_and_query, preload_content=False)

    line_num = 0
    for line_num, line in enumerate(resp, start=1):
        if line_num % 5000 == 0:
            logger.info("Processed %d source location records from VIAL.",
                        line_num)

        try:
            record = orjson.loads(line)
        except json.JSONDecodeError as e:
            logger.warning(
                "Invalid json record in source search response: %s\n%s", line,
                str(e))
            continue

        if not record.get("source_uid"):
            continue

        summary = SourceLocationSummary(
            source_uid=record["source_uid"],
            content_hash=record.get("content_hash"),
            matched=bool(record.get("matched_location_id")),
        )

        yield summary

    logger.info("Processed %d total source location records from VIAL.",
                line_num)
    resp.release_conn()
Ejemplo n.º 8
0
def search_locations(
    vial_http: urllib3.connectionpool.ConnectionPool,
    **kwds: Any,
) -> Iterator[dict]:
    """Wrapper around search locations api. Returns geojson."""
    params = {
        **kwds,
        "format": "nlgeojson",
    }

    query = urllib.parse.urlencode(params)

    resp = vial_http.request("GET",
                             f"/api/searchLocations?{query}",
                             preload_content=False)

    for line in resp:
        try:
            yield geojson.loads(line)
        except json.JSONDecodeError:
            logger.warning("Invalid json record in search response: %s", line)

    resp.release_conn()
Ejemplo n.º 9
0
def verify_token(vial_http: urllib3.connectionpool.ConnectionPool) -> bool:
    """Verifies that header contains valid authorization token"""
    verify_resp = vial_http.request("GET", "/api/verifyToken")
    return verify_resp.status == 200
Ejemplo n.º 10
0
def import_source_locations(
    vial_http: urllib3.connectionpool.ConnectionPool,
    import_run_id: str,
    import_locations: Iterable[load.ImportSourceLocation],
    import_batch_size: int = IMPORT_BATCH_SIZE,
) -> ImportSourceLocationsResult:
    """Import source locations"""
    created = set()
    updated = set()

    path_and_query = f"/api/importSourceLocations?import_run_id={import_run_id}"
    logger.info("Contacting VIAL: POST %s", path_and_query)

    batches = 0
    for import_locations_batch in misc.batch(import_locations, import_batch_size):
        encoded_locs = []

        for loc in import_locations_batch:
            if loc.match and loc.match.action == "new":
                created.add(loc.source_uid)
            else:
                updated.add(loc.source_uid)

            loc_json = orjson.dumps(loc.dict(exclude_none=True))
            encoded_locs.append(loc_json)

        encoded_ndjson = b"\n".join(encoded_locs)

        try:
            rsp = vial_http.request(
                "POST",
                path_and_query,
                headers={**vial_http.headers, "Content-Type": "application/x-ndjson"},
                body=encoded_ndjson,
            )
        except Exception as e:
            logger.error(
                "Error while importing locations: %s (...) %s: %s",
                encoded_ndjson[:100],
                encoded_ndjson[-100:],
                e,
            )
            raise

        if rsp.status != 200:
            raise HTTPError(
                f"/api/importSourceLocations?import_run_id={import_run_id}",
                rsp.status,
                rsp.data[:100],
                dict(rsp.headers),
                None,
            )

        batches += 1
        if batches % 5 == 0:
            logger.info(
                "Submitted %d batches of up to %d records to VIAL.",
                batches,
                import_batch_size,
            )

    logger.info("Submitted %d total batches to VIAL.", batches)

    return ImportSourceLocationsResult(created=created, updated=updated)