Esempio n. 1
0
    def find_count_of_dsl_query(query: dict, auth_args: Auth = Auth.shared()):
        """Find count of a given dsl query

        See https://docs.us.lifeomic.com/development/fhir-service/dsl/

        Attributes
        ----------
        query : dict
            The FHIR query to run a count against

        auth_args : Auth, dict
            Additional arguments for authentication

        Examples
        --------
        >>> import phc.easy as phc
        >>> phc.Auth.set({ 'account': '<your-account-name>' })
        >>> phc.Project.set_current('My Project Name')
        >>> phc.Query.find_count_of_dsl_query({
          "type": "select",
          "columns": "*",
          "from": [{"table": "patient"}],
        })
        """
        if FhirAggregation.is_aggregation_query(query):
            raise ValueError("Count is not support for aggregation queries.")

        auth = Auth(auth_args)
        fhir = Fhir(auth.session())

        response = fhir.execute_es(auth.project_id,
                                   build_query(query, page_size=1),
                                   scroll="true")

        return response.data["hits"]["total"]["value"]
Esempio n. 2
0
    def execute_ga4gh(
        query: dict,
        all_results: bool = False,
        auth_args: dict = Auth.shared()
    ) -> pd.DataFrame:
        auth = Auth(auth_args)
        client = BaseClient(auth.session())
        path = query["path"]
        http_verb = query.get("http_verb", "POST")
        results_key = query["results_key"]
        params = {
            **{
                "datasetIds": [auth.project_id]
            },
            **{
                k: v
                for k, v in query.items() if k not in ["path", "http_verb"]
            },
        }

        return recursive_execute_ga4gh(
            auth=auth,
            client=client,
            path=path,
            http_verb=http_verb,
            results_key=results_key,
            params=params,
            scroll=all_results,
        )
Esempio n. 3
0
    def run(
            file_id: str,
            auth_args: Auth = Auth.shared(),
            pause_time=1,
            **document_kw_args,
    ):
        """Run PrecisionOCR on a specific file id

        Returns the DocumentReference
        """
        auth = Auth(auth_args)
        client = BaseClient(auth.session())

        response = client._api_call(
            "ocr/documents",
            json={
                "project": auth.project_id,
                "fileId": file_id
            },
        )

        document_reference_id = response.data["documentReferenceId"]

        # Unfortunately, we just have to wait for it to be in FSS
        sleep(pause_time)

        return Document.get(id=document_reference_id,
                            auth_args=auth_args,
                            **document_kw_args)
Esempio n. 4
0
    def delete(id: str, auth_args: Auth = Auth.shared()):
        auth = Auth(auth_args)
        client = BaseClient(auth.session())

        return client._api_call(
            f"ocr/fhir/projects/{auth.project_id}/documentReferences/{id}",
            http_verb="DELETE",
        )
Esempio n. 5
0
    def delete(self, record_id: str, auth_args: Auth = Auth.shared()):
        """Perform a DELETE for the DSTU3 resource

        Returns nothing.
        """
        auth = Auth(auth_args)
        client = BaseClient(auth.session())

        return client._fhir_call(f"{self.entity}/{record_id}",
                                 http_verb="DELETE").data
Esempio n. 6
0
    def create(config: OcrConfig, auth_args: Auth = Auth.shared()):
        auth = Auth(auth_args)
        client = BaseClient(auth.session())

        return client._api_call(
            "ocr/config",
            json={
                "project": auth.project_id,
                "config": json.loads(config.json(exclude_none=True)),
            },
        ).data
Esempio n. 7
0
    def upload(source: str,
               folder="ocr-uploads",
               auth_args: Auth = Auth.shared()):
        """Upload a file from a path to the ocr directory (defaults to 'ocr-uploads')"""
        auth = Auth(auth_args)
        files = Files(auth.session())
        filename = source.split("/")[-1]

        return files.upload(auth.project_id,
                            source,
                            file_name=f"/{folder}/{filename}").data
Esempio n. 8
0
    def create(self, data: dict, auth_args: Auth = Auth.shared()):
        """Perform a POST for the DSTU3 resource"""
        auth = Auth(auth_args)
        client = BaseClient(auth.session())

        response = client._fhir_call(f"{self.entity}",
                                     http_verb="POST",
                                     json=data).data

        if response == "Created":
            return True

        raise ValueError(f"Unexpected response: {response}")
Esempio n. 9
0
def execute_single_fhir_dsl(
    query: dict,
    scroll_id: str = "",
    retry_backoff: bool = False,
    auth_args: Auth = Auth.shared(),
    _retry_time: int = 1,
):
    auth = Auth(auth_args)
    fhir = Fhir(auth.session())

    try:
        return fhir.dsl(auth.project_id, query, scroll_id)
    except Exception as err:
        if (
            (_retry_time >= MAX_RETRY_BACKOFF)
            or (retry_backoff is False)
            or ("Internal server error" not in str(err))
        ):
            raise err

        if _retry_time == 1:
            # Base first retry attempt on record count
            record_count = fhir.dsl(
                auth.project_id, build_query(query, page_size=1), scroll="true"
            ).data["hits"]["total"]["value"]

            def backoff_limit(limit: int):
                return min(
                    (get_limit(query) or DEFAULT_SCROLL_SIZE) / 2,
                    math.pow(record_count, 0.85),
                )

        else:

            def backoff_limit(limit: int):
                return math.pow(limit, 0.85)

        new_query = update_limit(query, backoff_limit)

        print(
            f"Received server error. Retrying with page_size={get_limit(new_query)}"
        )

        return execute_single_fhir_dsl(
            new_query,
            scroll_id=scroll_id,
            retry_backoff=True,
            auth_args=auth_args,
            _retry_time=_retry_time + 1,
        )
Esempio n. 10
0
    def put(self, record_id: str, data: dict, auth_args: Auth = Auth.shared()):
        """Perform a PUT on the DSTU3 resource

        (Recommended to use `update(...)` unless a direct PUT is required.)
        """
        auth = Auth(auth_args)
        client = BaseClient(auth.session())

        response = client._fhir_call(f"{self.entity}/{record_id}",
                                     http_verb="PUT",
                                     json=data).data

        if response == "OK":
            return data

        raise ValueError(f"Unexpected response: {response}")
Esempio n. 11
0
    def get_data_frame(auth_args: Auth = Auth.shared()):
        auth = Auth(auth_args)
        client = BaseClient(auth.session())

        response = client._api_call(
            "knowledge/gene-sets",
            http_verb="GET",
            params={"datasetId": auth.project_id},
        )

        frame = pd.DataFrame(response.data["items"])

        if "genes" in frame.columns:
            frame["genes"] = frame.genes.apply(
                lambda genes: ",".join([d["gene"] for d in genes]))

        frame = frame.drop(["datasetId"], errors="ignore")

        return frame
Esempio n. 12
0
    def get(
            self,
            record_id: str,
            auth_args: Auth = Auth.shared(),
            return_if_not_found=True,
    ):
        """Perform a GET on the DSTU3 resource"""
        auth = Auth(auth_args)
        client = BaseClient(auth.session())

        try:
            response = client._fhir_call(f"{self.entity}/{record_id}",
                                         http_verb="GET").data
        except ApiError as e:
            if return_if_not_found and e.response.data == "Not Found":
                return None

            raise e

        return json.loads(response)
Esempio n. 13
0
    def get_data_frame(document_id: str,
                       raw: bool = False,
                       auth_args: Auth = Auth.shared()):
        auth = Auth(auth_args)
        document = Document.get(document_id, auth_args=auth_args)

        file_id = pipe(
            document.get("content", []),
            c.filter(lambda c: c.get("format", {}).get("code") ==
                     "ocr-text-file-id"),
            c.first,
            c.get("attachment", default={}),
            c.get("url"),
            iffy(isa(str), lambda url: url.split("/")[-1]),
        )

        if file_id is None:
            raise ValueError(
                f"No block file found for document: '{document_id}'")

        files = Files(auth.session())
        filename = files.download(file_id, "/tmp/")

        frame = pd.read_json(filename, lines=True)
        os.remove(filename)

        if raw or len(frame) == 0:
            return frame

        return Block.sort(
            frame.drop(["Geometry"], axis=1).join(
                pd.json_normalize(frame.Geometry)).pipe(
                    partial(
                        Frame.expand,
                        custom_columns=[
                            Frame.codeable_like_column_expander("Polygon")
                        ],
                    )).set_index("Id"))
Esempio n. 14
0
    def get_data_frame(search: str = "", auth_args: Auth = Auth.shared()):
        auth = Auth(auth_args)
        client = BaseClient(auth.session())

        response = client._api_call(
            "knowledge/genes",
            http_verb="GET",
            params={"datasetId": auth.project_id, "gene": search},
        )

        frame = pd.DataFrame(response.data["items"])

        if "alias" in frame.columns:
            frame["alias"] = frame.alias.apply(
                lambda aliases: ",".join(aliases)
                if isinstance(aliases, list)
                else None
            )

        # We choose to not expand topCancerDrivers and cancerDrivers since it
        # can easily have 50 values in each. If we really need those, the user
        # will have to extract those.
        return frame
Esempio n. 15
0
def recursive_paging_api_call(
    path: str,
    params: dict = {},
    http_verb: str = "GET",
    scroll: bool = False,
    progress: Optional[tqdm] = None,
    auth_args: Optional[Auth] = Auth.shared(),
    callback: Union[Callable[[Any, bool], None], None] = None,
    max_pages: Optional[int] = None,
    page_size: Optional[int] = None,
    log: bool = False,
    _current_page: int = 1,
    _prev_results: List[dict] = [],
    _next_page_token: Optional[str] = None,
    _count: Optional[Union[float, int]] = None,
):
    auth = Auth(auth_args)
    client = BaseClient(auth.session())

    if _next_page_token:
        params = {**params, "nextPageToken": _next_page_token}

    if page_size:
        params = {**params, "pageSize": page_size}

    # NOTE: Parallelism is kept with execute_fhir_dsl to unify the API calls
    if scroll is False:
        max_pages = 1

    # Compute count and add to progress
    if _count is None and len(_prev_results) == 0:
        count_response = client._api_call(
            path,
            http_verb=http_verb,
            # Use minimum pageSize in case this endpoint doesn't support count
            params={
                **params, "include": "count",
                "pageSize": 1
            },
        )

        _count = count_response.get("count")
        # Count appears to only go up to 999
        if _count == 999:
            print(f"Results are {_count}+.")
            _count = None

        if _count and (progress is not None):
            progress.reset(_count)

    response = client._api_call(path, http_verb=http_verb, params=params)

    current_results = response.data.get("items", [])

    if progress is not None:
        progress.update(len(current_results))

    is_last_batch = (
        (scroll is False)
        or ((max_pages is not None) and (_current_page >= max_pages))
        # Using the next link is the only completely reliable way to tell if a
        # next page exists
        or (response.data.get("links", {}).get("next") is None))
    results = [] if callback else [*_prev_results, *current_results]

    # Sometimes the count doesn't match the results. We make it sync up if the
    # count doesn't match but we got all results.
    # TODO: Remove this when API fixed
    if ((progress is not None) and scroll and is_last_batch
            and (progress.total != progress.n)):
        count = progress.n
        progress.reset(count)
        progress.update(count)

    if callback and not is_last_batch:
        callback(current_results, False)
    elif callback and is_last_batch:
        return callback(current_results, True)
    elif is_last_batch:
        if progress is not None:
            progress.close()

        # Because count is often wrong, we'll skip the logging here
        # TODO: Uncomment this when API fixed
        # print(
        #     f"Retrieved {len(results)}{f'/{_count}' if _count else ''} results"
        # )
        return results

    return recursive_paging_api_call(
        path,
        params=params,
        http_verb=http_verb,
        progress=progress,
        auth_args=auth_args,
        callback=callback,
        max_pages=max_pages,
        page_size=page_size,
        log=log,
        scroll=scroll,
        _current_page=_current_page + 1,
        _prev_results=results,
        _next_page_token=get_next_page_token(
            response.data.get("links", {}).get("next", "")),
        _count=_count,
    )
Esempio n. 16
0
    def get(auth_args: Auth = Auth.shared()):
        auth = Auth(auth_args)
        client = BaseClient(auth.session())

        return client._api_call(f"ocr/config/{auth.project_id}",
                                http_verb="GET").data