Example #1
0
    def run(
            file_id: str,
            auth_args: Auth = Auth.shared(),
            pause_time=1,
            **document_kw_args,
    ):
        """Run PrecisionOCR on a specific file id

        Returns the DocumentReference
        """
        auth = Auth(auth_args)
        client = BaseClient(auth.session())

        response = client._api_call(
            "ocr/documents",
            json={
                "project": auth.project_id,
                "fileId": file_id
            },
        )

        document_reference_id = response.data["documentReferenceId"]

        # Unfortunately, we just have to wait for it to be in FSS
        sleep(pause_time)

        return Document.get(id=document_reference_id,
                            auth_args=auth_args,
                            **document_kw_args)
Example #2
0
    def delete(id: str, auth_args: Auth = Auth.shared()):
        auth = Auth(auth_args)
        client = BaseClient(auth.session())

        return client._api_call(
            f"ocr/fhir/projects/{auth.project_id}/documentReferences/{id}",
            http_verb="DELETE",
        )
Example #3
0
    def create(config: OcrConfig, auth_args: Auth = Auth.shared()):
        auth = Auth(auth_args)
        client = BaseClient(auth.session())

        return client._api_call(
            "ocr/config",
            json={
                "project": auth.project_id,
                "config": json.loads(config.json(exclude_none=True)),
            },
        ).data
Example #4
0
    def get_data_frame(auth_args: Auth = Auth.shared()):
        auth = Auth(auth_args)
        client = BaseClient(auth.session())

        response = client._api_call(
            "knowledge/gene-sets",
            http_verb="GET",
            params={"datasetId": auth.project_id},
        )

        frame = pd.DataFrame(response.data["items"])

        if "genes" in frame.columns:
            frame["genes"] = frame.genes.apply(
                lambda genes: ",".join([d["gene"] for d in genes]))

        frame = frame.drop(["datasetId"], errors="ignore")

        return frame
Example #5
0
    def get_data_frame(search: str = "", auth_args: Auth = Auth.shared()):
        auth = Auth(auth_args)
        client = BaseClient(auth.session())

        response = client._api_call(
            "knowledge/genes",
            http_verb="GET",
            params={"datasetId": auth.project_id, "gene": search},
        )

        frame = pd.DataFrame(response.data["items"])

        if "alias" in frame.columns:
            frame["alias"] = frame.alias.apply(
                lambda aliases: ",".join(aliases)
                if isinstance(aliases, list)
                else None
            )

        # We choose to not expand topCancerDrivers and cancerDrivers since it
        # can easily have 50 values in each. If we really need those, the user
        # will have to extract those.
        return frame
Example #6
0
def recursive_paging_api_call(
    path: str,
    params: dict = {},
    http_verb: str = "GET",
    scroll: bool = False,
    progress: Optional[tqdm] = None,
    auth_args: Optional[Auth] = Auth.shared(),
    callback: Union[Callable[[Any, bool], None], None] = None,
    max_pages: Optional[int] = None,
    page_size: Optional[int] = None,
    log: bool = False,
    _current_page: int = 1,
    _prev_results: List[dict] = [],
    _next_page_token: Optional[str] = None,
    _count: Optional[Union[float, int]] = None,
):
    auth = Auth(auth_args)
    client = BaseClient(auth.session())

    if _next_page_token:
        params = {**params, "nextPageToken": _next_page_token}

    if page_size:
        params = {**params, "pageSize": page_size}

    # NOTE: Parallelism is kept with execute_fhir_dsl to unify the API calls
    if scroll is False:
        max_pages = 1

    # Compute count and add to progress
    if _count is None and len(_prev_results) == 0:
        count_response = client._api_call(
            path,
            http_verb=http_verb,
            # Use minimum pageSize in case this endpoint doesn't support count
            params={
                **params, "include": "count",
                "pageSize": 1
            },
        )

        _count = count_response.get("count")
        # Count appears to only go up to 999
        if _count == 999:
            print(f"Results are {_count}+.")
            _count = None

        if _count and (progress is not None):
            progress.reset(_count)

    response = client._api_call(path, http_verb=http_verb, params=params)

    current_results = response.data.get("items", [])

    if progress is not None:
        progress.update(len(current_results))

    is_last_batch = (
        (scroll is False)
        or ((max_pages is not None) and (_current_page >= max_pages))
        # Using the next link is the only completely reliable way to tell if a
        # next page exists
        or (response.data.get("links", {}).get("next") is None))
    results = [] if callback else [*_prev_results, *current_results]

    # Sometimes the count doesn't match the results. We make it sync up if the
    # count doesn't match but we got all results.
    # TODO: Remove this when API fixed
    if ((progress is not None) and scroll and is_last_batch
            and (progress.total != progress.n)):
        count = progress.n
        progress.reset(count)
        progress.update(count)

    if callback and not is_last_batch:
        callback(current_results, False)
    elif callback and is_last_batch:
        return callback(current_results, True)
    elif is_last_batch:
        if progress is not None:
            progress.close()

        # Because count is often wrong, we'll skip the logging here
        # TODO: Uncomment this when API fixed
        # print(
        #     f"Retrieved {len(results)}{f'/{_count}' if _count else ''} results"
        # )
        return results

    return recursive_paging_api_call(
        path,
        params=params,
        http_verb=http_verb,
        progress=progress,
        auth_args=auth_args,
        callback=callback,
        max_pages=max_pages,
        page_size=page_size,
        log=log,
        scroll=scroll,
        _current_page=_current_page + 1,
        _prev_results=results,
        _next_page_token=get_next_page_token(
            response.data.get("links", {}).get("next", "")),
        _count=_count,
    )
Example #7
0
    def get(auth_args: Auth = Auth.shared()):
        auth = Auth(auth_args)
        client = BaseClient(auth.session())

        return client._api_call(f"ocr/config/{auth.project_id}",
                                http_verb="GET").data