Esempio n. 1
0
def recursive_execute_fhir_dsl(
    query: dict,
    scroll: bool = False,
    progress: Union[None, tqdm] = None,
    auth_args: Auth = Auth.shared(),
    callback: Union[Callable[[Any, bool], None], None] = None,
    max_pages: Union[int, None] = None,
    _current_page: int = 1,
    _scroll_id: str = "true",
    _prev_hits: List = [],
):
    will_scroll = query_allows_scrolling(query) and scroll

    response = execute_single_fhir_dsl(
        query,
        scroll_id=_scroll_id if will_scroll else "",
        retry_backoff=will_scroll,
        auth_args=auth_args,
    )

    is_first_iteration = _scroll_id == "true"
    current_results = response.data.get("hits").get("hits")
    _scroll_id = response.data.get("_scroll_id", "")
    actual_count = response.data["hits"]["total"]["value"]
    current_result_count = len(current_results)

    if is_first_iteration and progress:
        progress.reset(actual_count)

    if progress:
        progress.update(current_result_count)

    is_last_batch = ((current_result_count == 0) or (scroll is False)
                     or ((max_pages is not None) and
                         (_current_page >= max_pages)))
    results = [] if callback else [*_prev_hits, *current_results]

    if callback and not is_last_batch:
        callback(current_results, False)
    elif callback and is_last_batch:
        return callback(current_results, True)
    elif is_last_batch:
        suffix = "+" if actual_count == MAX_RESULT_SIZE else ""
        print(f"Retrieved {len(results)}/{actual_count}{suffix} results")

        return results

    return recursive_execute_fhir_dsl(
        query,
        scroll=True,
        progress=progress,
        auth_args=auth_args,
        callback=callback,
        max_pages=max_pages,
        _current_page=_current_page + 1,
        _scroll_id=_scroll_id,
        _prev_hits=results,
    )
Esempio n. 2
0
    def get_data_frame(
        cls,
        name: Optional[str] = None,
        auth_args: Auth = Auth.shared(),
        max_pages: Optional[int] = None,
        page_size: Optional[int] = None,
        log: bool = False,
        show_progress: bool = False,
    ):
        """Execute a request for projects

        ## Parameters

        Query: `phc.easy.projects.ProjectListOptions`

        Execution: `phc.easy.query.Query.execute_paging_api`
        """

        if page_size is None:
            # Projects do not have much data so use a higher page size
            page_size = 100

        get_data_frame = super().get_data_frame

        auth = Auth(auth_args)

        get_data_frame_args = without_keys(
            cls._get_current_args(inspect.currentframe(), locals()),
            ["auth_args", "account", "show_progress"],
        )

        def get_projects_for_account(account: dict):
            df = get_data_frame(
                ignore_cache=True,
                all_results=max_pages is None,
                auth_args=auth.customized({"account": account["id"]}),
                show_progress=show_progress,
                **get_data_frame_args,
            )
            df["account"] = account["id"]
            return df

        frame = pd.concat(list(pmap(get_projects_for_account, auth.accounts())))

        return frame.reset_index(drop=True)
Esempio n. 3
0
    def get_data_frame(auth_args: Auth = Auth.shared()):
        auth = Auth(auth_args)
        client = BaseClient(auth.session())

        response = client._api_call(
            "knowledge/gene-sets",
            http_verb="GET",
            params={"datasetId": auth.project_id},
        )

        frame = pd.DataFrame(response.data["items"])

        if "genes" in frame.columns:
            frame["genes"] = frame.genes.apply(
                lambda genes: ",".join([d["gene"] for d in genes]))

        frame = frame.drop(["datasetId"], errors="ignore")

        return frame
Esempio n. 4
0
    def get(
            self,
            record_id: str,
            auth_args: Auth = Auth.shared(),
            return_if_not_found=True,
    ):
        """Perform a GET on the DSTU3 resource"""
        auth = Auth(auth_args)
        client = BaseClient(auth.session())

        try:
            response = client._fhir_call(f"{self.entity}/{record_id}",
                                         http_verb="GET").data
        except ApiError as e:
            if return_if_not_found and e.response.data == "Not Found":
                return None

            raise e

        return json.loads(response)
Esempio n. 5
0
    def get(cls, id: str, auth_args: Auth = Auth.shared(), **kw_args):
        results = (super().get_data_frame(
            id=id,
            term={
                "meta.tag.code.keyword": "PrecisionOCR Service"
            },
            auth_args=auth_args,
            **kw_args,
        ).to_dict("records"))

        return results[0] if len(results) else None
Esempio n. 6
0
 def get_count(cls, query_overrides: dict = {}, auth_args=Auth.shared()):
     "Get the count for a given FSS query"
     return Query.find_count_of_dsl_query(
         {
             "type": "select",
             "columns": "*",
             "from": [{"table": cls.table_name()}],
             **query_overrides,
         },
         auth_args=auth_args,
     )
Esempio n. 7
0
    def update(
            self,
            record_id: str,
            update: Callable[[dict], dict],
            auth_args: Auth = Auth.shared(),
    ):
        """Perform an update on the DSTU3 resource through an update function"""
        data = self.get(record_id,
                        auth_args=auth_args,
                        return_if_not_found=False)

        return self.put(record_id, update(data), auth_args)
Esempio n. 8
0
 def get_data_frame(cls,
                    all_results=False,
                    auth_args: Auth = Auth.shared(),
                    **kw_args):
     return super().get_data_frame(
         term={"meta.tag.code.keyword": "PrecisionOCR Service"},
         all_results=all_results,
         auth_args=auth_args,
         **{
             "ignore_cache": True,
             **kw_args
         },
     )
Esempio n. 9
0
    def get_data_frame(document_id: str,
                       raw: bool = False,
                       auth_args: Auth = Auth.shared()):
        auth = Auth(auth_args)
        document = Document.get(document_id, auth_args=auth_args)

        file_id = pipe(
            document.get("content", []),
            c.filter(lambda c: c.get("format", {}).get("code") ==
                     "ocr-text-file-id"),
            c.first,
            c.get("attachment", default={}),
            c.get("url"),
            iffy(isa(str), lambda url: url.split("/")[-1]),
        )

        if file_id is None:
            raise ValueError(
                f"No block file found for document: '{document_id}'")

        files = Files(auth.session())
        filename = files.download(file_id, "/tmp/")

        frame = pd.read_json(filename, lines=True)
        os.remove(filename)

        if raw or len(frame) == 0:
            return frame

        return Block.sort(
            frame.drop(["Geometry"], axis=1).join(
                pd.json_normalize(frame.Geometry)).pipe(
                    partial(
                        Frame.expand,
                        custom_columns=[
                            Frame.codeable_like_column_expander("Polygon")
                        ],
                    )).set_index("Id"))
Esempio n. 10
0
    def find(search: str, auth_args: Auth = Auth.shared()):
        """Search for a project using given criteria and return results as a data frame

        Attributes
        ----------
        search : str
            Part of a project's id, name, or description to search for

        auth_args : Any
            The authenication to use for the account and project (defaults to shared)
        """
        projects = Project.get_data_frame(auth_args=auth_args)
        text = projects[SEARCH_COLUMNS].agg(join_strings, axis=1)
        return projects[text.str.contains(search.lower())]
Esempio n. 11
0
def test_passing_options_through_to_paging_api(execute_paging_api):
    execute_paging_api.return_value = pd.DataFrame()

    auth = Auth()

    GenomicShortVariant.get_data_frame(
        [str(uuid4())], raw=True, log=True, auth_args=auth
    )

    kwargs = execute_paging_api.call_args[1]

    assert kwargs.get("auth_args") == auth
    assert kwargs.get("log") == True
    assert kwargs.get("raw") == True
Esempio n. 12
0
    def get_data_frame(search: str = "", auth_args: Auth = Auth.shared()):
        auth = Auth(auth_args)
        client = BaseClient(auth.session())

        response = client._api_call(
            "knowledge/genes",
            http_verb="GET",
            params={"datasetId": auth.project_id, "gene": search},
        )

        frame = pd.DataFrame(response.data["items"])

        if "alias" in frame.columns:
            frame["alias"] = frame.alias.apply(
                lambda aliases: ",".join(aliases)
                if isinstance(aliases, list)
                else None
            )

        # We choose to not expand topCancerDrivers and cancerDrivers since it
        # can easily have 50 values in each. If we really need those, the user
        # will have to extract those.
        return frame
def test_skipping_genomic_tests_if_variant_set_ids(get_data_frame):
    variant_set_ids = [str(uuid4())]

    test_df = GenomicVariant._get_genomic_tests(
        variant_set_ids,
        max_pages=None,
        all_results=False,
        auth_args=Auth(),
        log=False,
    )

    assert_equals(get_data_frame.call_count, 0)

    assert_equals(len(test_df.columns), 1)
    assert_equals(list(test_df.id), variant_set_ids)
Esempio n. 14
0
    def set_current(search: str, auth: Auth = Auth.shared()):
        """Search for a project using given criteria, set it to the authentication
        object, and return the matching projects as a data frame

        Attributes
        ----------
        search : str
            Part of a project's id, name, or description to search for

        auth : Auth
            The authenication to update for the account and project (defaults to shared)
        """
        matches = Project.find(search, auth)

        if len(matches) > 1:
            print("Multiple projects found. Try a more specific search")
        elif len(matches) == 0:
            print(f'No matches found for search "{search}"')
        else:
            project = matches.iloc[0]
            # Uses private method since this is a special case
            auth.update({"account": project.account, "project_id": project.id})

        return matches
Esempio n. 15
0
    def get(
            cls,
            id: str,
            auth_args: Auth = Auth.shared(),
            query_overrides={},
            **kw_args,
    ):
        query_overrides = pipe(
            query_overrides,
            term_adder({"meta.tag.code.keyword": "PrecisionOCR Service"}),
        )

        return (super().get_data_frame(
            id=id,
            auth_args=auth_args,
            query_overrides=query_overrides,
            **kw_args,
        ).to_dict("records")[0])
Esempio n. 16
0
    def get_data_frame(
        cls,
        # Query parameters
        variant_set_ids: List[str] = [],
        include: List[GenomicVariantInclude] = [],
        gene: List[str] = [],
        interpretation: List[str] = [],
        effect: List[CopyNumberStatus] = [],
        in_ckb: Optional[bool] = None,
        # Test parameters
        patient_id: Optional[str] = None,
        test_status: Optional[GenomicTestStatus] = GenomicTestStatus.ACTIVE,
        # Execution parameters,
        all_results: bool = False,
        auth_args: Auth = Auth.shared(),
        max_pages: Optional[int] = None,
        page_size: Optional[int] = None,
        log: bool = False,
        **kw_args,
    ):
        """Execute a request for genomic copy number variants

        ## Parameters

        Query: `phc.easy.omics.options.genomic_copy_number_variant.GenomicCopyNumberVariantOptions`

        Execution: `phc.easy.query.Query.execute_paging_api`

        Expansion: `phc.easy.frame.Frame.expand`
        """

        args = cls._get_current_args(inspect.currentframe(), locals())

        return super().get_data_frame(
            test_type=GenomicTestType.COPY_NUMBER_VARIANT,
            **{
                **kw_args,
                **args
            })
Esempio n. 17
0
    def get_data_frame(
        cls,
        # Query parameters
        variant_set_ids: List[str] = [],
        gene: List[str] = [],
        effect: List[StructuralType] = [],
        interpretation: List[str] = [],
        in_frame: List[InFrame] = [],
        in_ckb: Optional[bool] = None,
        include: List[GenomicVariantInclude] = [],
        # Execution parameters,
        all_results: bool = False,
        auth_args: Auth = Auth.shared(),
        max_pages: Optional[int] = None,
        page_size: Optional[int] = None,
        log: bool = False,
        **kw_args,
    ):
        """Execute a request for genomic structural variants

        ## Parameters

        Query: `phc.easy.omics.options.genomic_structural_variant.GenomicStructuralVariantOptions`

        Execution: `phc.easy.query.Query.execute_paging_api`

        Expansion: `phc.easy.frame.Frame.expand`

        """

        args = cls._get_current_args(inspect.currentframe(), locals())

        return super().get_data_frame(
            test_type=GenomicTestType.STRUCTURAL_VARIANT,
            **{
                **kw_args,
                **args
            })
    def get_data_frame(
        cls,
        # Query parameters
        variant_set_ids: List[str] = [],
        include: List[GenomicVariantInclude] = [],
        gene: List[str] = [],
        expression: Optional[str] = None,
        outlier_std_dev: str = None,
        in_ckb: Optional[bool] = None,
        order_by: Optional[str] = None,
        # Execution parameters,
        all_results: bool = False,
        auth_args: Auth = Auth.shared(),
        max_pages: Optional[int] = None,
        page_size: Optional[int] = None,
        log: bool = False,
        **kw_args,
    ):
        """Execute a request for genomic expression

        ## Parameters

        Query: `phc.easy.omics.options.genomic_expression.GenomicExpressionOptions`

        Execution: `phc.easy.query.Query.execute_paging_api`

        Expansion: `phc.easy.frame.Frame.expand`

        """

        args = cls._get_current_args(inspect.currentframe(), locals())

        return super().get_data_frame(test_type=GenomicTestType.EXPRESSION,
                                      **{
                                          **kw_args,
                                          **args
                                      })
Esempio n. 19
0
    def get_data_frame(
            cls,
            document_id: Optional[str] = None,
            document_ids: List[str] = [],
            all_results=False,
            auth_args: Auth = Auth.shared(),
            query_overrides={},
            **kw_args,
    ):
        query_overrides = pipe(
            query_overrides,
            term_adder({"meta.tag.code.keyword": "PrecisionOCR Service"}),
            foreign_ids_adder(
                foreign_id=document_id,
                foreign_ids=document_ids,
                foreign_key="relatesTo.targetReference.reference",
                foreign_id_prefixes=["DocumentReference/"],
            ),
        )

        frame = super().get_data_frame(
            all_results=all_results,
            auth_args=auth_args,
            query_overrides=query_overrides,
            **{
                "ignore_cache": True,
                **kw_args
            },
        )

        if PAGE_NUMBER_COLUMN in frame.columns:
            frame = frame.astype({PAGE_NUMBER_COLUMN: "int"})

        if document_id is not None and PAGE_NUMBER_COLUMN in frame.columns:
            return frame.sort_values(PAGE_NUMBER_COLUMN)

        return frame
Esempio n. 20
0
    def get_data_frame(
        cls,
        all_results: bool = False,
        raw: bool = False,
        page_size: Union[int, None] = None,
        max_pages: Union[int, None] = None,
        query_overrides: dict = {},
        auth_args=Auth.shared(),
        ignore_cache: bool = False,
        expand_args: dict = {},
        log: bool = False,
        id: Optional[str] = None,
        ids: List[str] = [],
        # Codes
        code: Optional[Union[str, List[str]]] = None,
        display: Optional[Union[str, List[str]]] = None,
        system: Optional[Union[str, List[str]]] = None,
        code_fields: List[str] = [],
    ):
        """Retrieve records

        Attributes
        ----------
        all_results : bool = False
            Retrieve sample of results (10) or entire set of records

        raw : bool = False
            If raw, then values will not be expanded (useful for manual
            inspection if something goes wrong)

        page_size : int
            The number of records to fetch per page

        max_pages : int
            The number of pages to retrieve (useful if working with tons of records)

        query_overrides : dict = {}
            Override any part of the elasticsearch FHIR query

        auth_args : Any
            The authenication to use for the account and project (defaults to shared)

        ignore_cache : bool = False
            Bypass the caching system that auto-saves results to a CSV file.
            Caching only occurs when all results are being retrieved.

        expand_args : Any
            Additional arguments passed to phc.Frame.expand

        log : bool = False
            Whether to log some diagnostic statements for debugging

        id : None or str = None
            Find records for a given id

        ids : List[str]
            Find records for given ids

        code : str | List[str]
            Adds where clause for code value(s)

        display : str | List[str]
            Adds where clause for code display value(s)

        system : str | List[str]
            Adds where clause for code system value(s)

        code_fields : List[str]
            A list of paths to find FHIR codes in (default: codes for the given entity)

        Examples
        --------
        >>> import phc.easy as phc
        >>> phc.Auth.set({'account': '<your-account-name>'})
        >>> phc.Project.set_current('My Project Name')
        >>>
        >>> phc.Observation.get_data_frame(patient_id='<patient-id>')
        >>>
        >>> phc.Goal.get_data_frame(patient_id='<patient-id>')
        """
        query = {
            "type": "select",
            "columns": "*",
            "from": [{
                "table": cls.table_name()
            }],
        }

        code_fields = [*cls.code_fields(), *code_fields]

        def transform(df: pd.DataFrame):
            return cls.transform_results(df, **expand_args)

        return Query.execute_fhir_dsl_with_options(
            query,
            transform,
            all_results,
            raw,
            query_overrides,
            auth_args,
            ignore_cache,
            page_size=page_size,
            max_pages=max_pages,
            log=log,
            # Codes
            code_fields=code_fields,
            code=code,
            display=display,
            system=system,
            id=id,
            ids=ids,
        )
Esempio n. 21
0
    def get_data_frame(
        cls,
        test_type: GenomicTestType,
        # Query parameters
        variant_set_ids: List[str] = [],
        # Test parameters
        patient_id: Optional[str] = None,
        test_status: Optional[GenomicTestStatus] = GenomicTestStatus.ACTIVE,
        # Execution parameters,
        all_results: bool = False,
        auth_args: Auth = Auth.shared(),
        max_pages: Optional[int] = None,
        page_size: Optional[int] = None,
        log: bool = False,
        **kw_args,
    ):
        """Execute a request for genomic variants

        ## Parameters

        Execution: `phc.easy.query.Query.execute_paging_api`

        Expansion: `phc.easy.frame.Frame.expand`
        """
        test_params = ["patient_id", "status"]

        test_args, args = split_by(
            cls._get_current_args(inspect.currentframe(), locals()),
            left_keys=test_params,
        )

        test_df = cls._get_genomic_tests(
            variant_set_ids=variant_set_ids,
            all_results=all_results,
            test_type=test_type,
            page_size=page_size,
            max_pages=max_pages,
            log=log,
            auth_args=auth_args,
            **test_args,
        )
        args["variant_set_ids"] = variant_set_ids = list(test_df.id)

        if len(variant_set_ids) > MAX_VARIANT_SET_IDS and (max_pages or
                                                           (not all_results
                                                            and page_size)):
            print(
                "[WARNING]: All result limit parameters are approximate when performing genomic data retrieval."
            )

        get_data_frame = super().get_data_frame

        def perform_batch(ids: List[str], total_thus_far: int):
            # Determine whether to skip this batch
            if (
                    # Implement approximation of max_pages
                    not all_results and max_pages and
                (total_thus_far >= max_pages * (page_size or 100))) or (
                    # Use 25 or page_size for a sample (when no max_pages)
                    not all_results and not max_pages and total_thus_far >=
                    (page_size or 25)):
                return pd.DataFrame()

            has_multiple_batches = len(ids) != len(variant_set_ids)

            return get_data_frame(
                **kw_args,
                **{
                    **args,
                    "variant_set_ids":
                    list(ids),
                    "all_results":
                    all_results
                    # Scroll through full batches and then honor the max_pages param
                    or (has_multiple_batches and max_pages),
                },
            )

        variants = batch_get_frame(variant_set_ids, MAX_VARIANT_SET_IDS,
                                   perform_batch)

        if len(variants) == 0:
            variants["variant_set_id"] = math.nan

        return variants.join(test_df.set_index("id"),
                             on="variant_set_id",
                             rsuffix=".set")
Esempio n. 22
0
    def _recursive_execute_composite_aggregations(
        table_name: str,
        key_sources_pairs: List[Tuple[str, List[dict]]],
        batch_size: int = 100,
        progress: Union[tqdm, None] = None,
        query_overrides: dict = {},
        log: bool = False,
        auth_args: Auth = Auth.shared(),
        max_pages: Union[int, None] = None,
        _current_page: int = 1,
        _prev_results: dict = {},
        _after_keys: dict = {},
        **query_kwargs,
    ):
        aggregation = Query.execute_fhir_dsl(
            {
                "type":
                "select",
                "columns": [{
                    "type": "elasticsearch",
                    "aggregations": {
                        key: {
                            "composite": {
                                "sources":
                                sources,
                                "size":
                                batch_size,
                                **({
                                    "after": _after_keys[key]
                                } if key in _after_keys else {}),
                            }
                        }
                        for key, sources in key_sources_pairs
                        if (len(_after_keys) == 0) or (key in _after_keys)
                    },
                }],
                "from": [{
                    "table": table_name
                }],
                **query_overrides,
            },
            auth_args=auth_args,
            log=log,
            **query_kwargs,
        )

        current_results = aggregation.data
        results = FhirAggregation.reduce_composite_results(
            _prev_results, current_results)

        if (progress is not None) and (_current_page == 1) and max_pages:
            progress.reset(max_pages)

        if progress is not None:
            # Update by count or pages (if max_pages specified)
            progress.update(1 if max_pages else FhirAggregation.
                            count_composite_results(current_results))

        after_keys = FhirAggregation.find_composite_after_keys(
            current_results, batch_size)

        if len(after_keys) == 0 or ((max_pages is not None) and
                                    (_current_page >= max_pages)):
            print(
                f"Retrieved {FhirAggregation.count_composite_results(results)} results"
            )
            return results

        return Query._recursive_execute_composite_aggregations(
            table_name=table_name,
            key_sources_pairs=key_sources_pairs,
            batch_size=batch_size,
            progress=progress,
            query_overrides=query_overrides,
            log=log,
            auth_args=auth_args,
            max_pages=max_pages,
            _current_page=_current_page + 1,
            _prev_results=results,
            _after_keys=after_keys,
            **query_kwargs,
        )
Esempio n. 23
0
    def execute_composite_aggregations(
        table_name: str,
        key_sources_pairs: List[Tuple[str, List[dict]]],
        batch_size: int = 100,
        query_overrides: dict = {},
        log: bool = False,
        auth_args: Auth = Auth.shared(),
        max_pages: Union[int, None] = None,
        **query_kwargs,
    ):
        """Count records by multiple fields

        Attributes
        ----------
        table_name : str
            The FHIR Search Service table to retrieve from

        key_sources_pairs : str
            Pairs of keys and sources to pull composite results from

            Example Input:
                [
                    ("meta.tag", [{"terms": {"field": "meta.tag.system.keyword"}}])
                ]

        batch_size : int
            The size of each page from elasticsearch to use

        query_overrides : dict
            Parts of the FSS query to override
            (Note that passing certain values can cause the method to error out)

            Example aggregation query executed (can use log=True to inspect):
                {
                    "type": "select",
                    "columns": [{
                        "type": "elasticsearch",
                        "aggregations": {
                            "results": {
                                "composite": {
                                    "sources": [{
                                        "meta.tag": {
                                            "terms": {
                                                "field": "meta.tag.system.keyword"
                                            }
                                        }
                                    }],
                                    "size": 100,
                                }
                            }
                        },
                    }],
                    "from": [{"table": "observation"}],
                }


        auth_args : Auth, dict
            Additional arguments for authentication

        log : bool = False
            Whether to log the elasticsearch query sent to the server

        max_pages : int
            The number of pages to retrieve (useful if working with tons of records)

        query_kwargs : dict
            Arguments to pass to build_query such as patient_id, patient_ids,
            and patient_key. See :func:`~phc.easy.query.fhir_dsl_query.build_query`.

        Examples
        --------
        >>> import phc.easy as phc
        >>> phc.Auth.set({ 'account': '<your-account-name>' })
        >>> phc.Project.set_current('My Project Name')
        >>> phc.Query.execute_composite_aggregations(
            table_name="observation",
            key_sources_pairs=[
                ("meta.tag", [
                    {"code": {"terms": {"field": "meta.tag.code.keyword"}}},
                ]),
                ("code.coding", [
                    {"display": {"terms": {"field": "code.coding.display.keyword"}}}
                ]),
            ]
        )
        """
        if len(key_sources_pairs) == 0:
            raise ValueError("No aggregate composite terms specified.")

        return with_progress(
            tqdm,
            lambda progress: Query._recursive_execute_composite_aggregations(
                table_name=table_name,
                key_sources_pairs=key_sources_pairs,
                batch_size=batch_size,
                progress=progress,
                log=log,
                auth_args=auth_args,
                query_overrides=query_overrides,
                max_pages=max_pages,
                **query_kwargs,
            ),
        )
Esempio n. 24
0
    def execute_paging_api(
        path: str,
        params: dict = {},
        http_verb: str = "GET",
        transform: Callable[[pd.DataFrame], pd.DataFrame] = identity,
        all_results: bool = False,
        auth_args: Auth = Auth.shared(),
        max_pages: Optional[int] = None,
        page_size: Optional[int] = None,
        log: bool = False,
        raw: bool = False,
        ignore_cache: bool = False,
        show_progress: bool = True,
        progress: Optional[tqdm] = None,
        item_key: str = "items",
        try_count: bool = True,
    ):
        """Execute a API query that pages through results

        See https://docs.us.lifeomic.com/api/?shell#lifeomic-core-api-genomics
        for example

        Attributes
        ----------
        path : str
            The API path to hit
            (Special tokens: `:project_id`)

        params : dict
            The parameters to include with request

        http_verb : str
            The HTTP method to use

        all_results : bool = False
            Retrieve sample of results (25) or entire set of records

        auth_args : Auth, dict
            Additional arguments for authentication

        max_pages : int
            The number of pages to retrieve (useful if working with tons of records)

        page_size : int
            The number of records to fetch per page

        log : bool = False
            Whether to log some diagnostic statements for debugging

        progress : Optional[tqdm] = None
            Override the given progress indicator

        item_key : str
            The key to find the results underneath (usually "items" but not always)

        try_count : bool
            Whether to try and send a "count" param to update the progress bar

        Examples
        --------
        >>> import phc.easy as phc
        >>> phc.Auth.set({ 'account': '<your-account-name>' })
        >>> phc.Project.set_current('My Project Name')
        >>> phc.Query.execute_paging_api(
                "genomics/projects/:project_id/tests",
                params={
                    "patientId": "<patient-uuid>"
                }
            )

        """

        auth = Auth(auth_args)

        params = clean_params(params)

        # Do not pull project_id if not in URL (which throws error if project not selected)
        if "project_id" in path:
            path = path.replace(":project_id", auth.project_id)

        query = {"path": path, "method": http_verb, "params": params}

        if all_results and page_size is None:
            # Default to 100 if not provided but getting all results
            page_size = 100

        if log:
            print(json.dumps(query, indent=4))

        use_cache = ((not ignore_cache) and (not raw) and all_results
                     and (max_pages is None))

        if use_cache and APICache.does_cache_for_query_exist(query):
            return APICache.load_cache_for_query(query)

        callback = (APICache.build_cache_callback(
            query, transform, nested_key=None) if use_cache else None)

        results = with_progress(
            lambda: (progress if progress is not None else tqdm())
            if show_progress else None,
            lambda progress: recursive_paging_api_call(
                path,
                params=params,
                http_verb=http_verb,
                callback=callback,
                scroll=all_results or (max_pages is not None),
                max_pages=max_pages,
                page_size=page_size,
                log=log,
                auth_args=auth_args,
                progress=progress,
                item_key=item_key,
                try_count=try_count,
            ),
        )

        df = pd.DataFrame(results)

        if raw:
            return df

        return transform(df)
Esempio n. 25
0
    def get(auth_args: Auth = Auth.shared()):
        auth = Auth(auth_args)
        client = BaseClient(auth.session())

        return client._api_call(f"ocr/config/{auth.project_id}",
                                http_verb="GET").data
Esempio n. 26
0
def recursive_paging_api_call(
    path: str,
    params: dict = {},
    http_verb: str = "GET",
    scroll: bool = False,
    progress: Optional[tqdm] = None,
    auth_args: Optional[Auth] = Auth.shared(),
    callback: Union[Callable[[Any, bool], None], None] = None,
    max_pages: Optional[int] = None,
    page_size: Optional[int] = None,
    log: bool = False,
    _current_page: int = 1,
    _prev_results: List[dict] = [],
    _next_page_token: Optional[str] = None,
    _count: Optional[Union[float, int]] = None,
):
    auth = Auth(auth_args)
    client = BaseClient(auth.session())

    if _next_page_token:
        params = {**params, "nextPageToken": _next_page_token}

    if page_size:
        params = {**params, "pageSize": page_size}

    # NOTE: Parallelism is kept with execute_fhir_dsl to unify the API calls
    if scroll is False:
        max_pages = 1

    # Compute count and add to progress
    if _count is None and len(_prev_results) == 0:
        count_response = client._api_call(
            path,
            http_verb=http_verb,
            # Use minimum pageSize in case this endpoint doesn't support count
            params={
                **params, "include": "count",
                "pageSize": 1
            },
        )

        _count = count_response.get("count")
        # Count appears to only go up to 999
        if _count == 999:
            print(f"Results are {_count}+.")
            _count = None

        if _count and (progress is not None):
            progress.reset(_count)

    response = client._api_call(path, http_verb=http_verb, params=params)

    current_results = response.data.get("items", [])

    if progress is not None:
        progress.update(len(current_results))

    is_last_batch = (
        (scroll is False)
        or ((max_pages is not None) and (_current_page >= max_pages))
        # Using the next link is the only completely reliable way to tell if a
        # next page exists
        or (response.data.get("links", {}).get("next") is None))
    results = [] if callback else [*_prev_results, *current_results]

    # Sometimes the count doesn't match the results. We make it sync up if the
    # count doesn't match but we got all results.
    # TODO: Remove this when API fixed
    if ((progress is not None) and scroll and is_last_batch
            and (progress.total != progress.n)):
        count = progress.n
        progress.reset(count)
        progress.update(count)

    if callback and not is_last_batch:
        callback(current_results, False)
    elif callback and is_last_batch:
        return callback(current_results, True)
    elif is_last_batch:
        if progress is not None:
            progress.close()

        # Because count is often wrong, we'll skip the logging here
        # TODO: Uncomment this when API fixed
        # print(
        #     f"Retrieved {len(results)}{f'/{_count}' if _count else ''} results"
        # )
        return results

    return recursive_paging_api_call(
        path,
        params=params,
        http_verb=http_verb,
        progress=progress,
        auth_args=auth_args,
        callback=callback,
        max_pages=max_pages,
        page_size=page_size,
        log=log,
        scroll=scroll,
        _current_page=_current_page + 1,
        _prev_results=results,
        _next_page_token=get_next_page_token(
            response.data.get("links", {}).get("next", "")),
        _count=_count,
    )
Esempio n. 27
0
    def execute_fhir_dsl(
        query: dict,
        all_results: bool = False,
        auth_args: Auth = Auth.shared(),
        callback: Union[Callable[[Any, bool], None], None] = None,
        max_pages: Union[int, None] = None,
        log: bool = False,
        **query_kwargs,
    ):
        """Execute a FHIR query with the DSL

        See https://docs.us.lifeomic.com/development/fhir-service/dsl/

        Attributes
        ----------
        query : dict
            The FHIR query to run (is a superset of elasticsearch)

        all_results : bool
            Return all results by scrolling through mutliple pages of data
            (Limit is ignored if provided)

        auth_args : Auth, dict
            Additional arguments for authentication

        callback : Callable[[Any, bool], None] (optional)
            A progress function that is invoked for each batch. When the second
            argument passed is true, then the result of the callback function is
            used as the return value. This is useful if writing results out to a
            file and then returning the completed result from that file.

            Example:

                def handle_batch(batch, is_finished):
                    print(len(batch))
                    if is_finished:
                        return "batch finished

        max_pages : int
            The number of pages to retrieve (useful if working with tons of records)

        log : bool = False
            Whether to log the elasticsearch query sent to the server

        query_kwargs : dict
            Arguments to pass to build_query such as patient_id, patient_ids,
            and patient_key. (See phc.easy.query.fhir_dsl_query.build_query)

        Examples
        --------
        >>> import phc.easy as phc
        >>> phc.Auth.set({ 'account': '<your-account-name>' })
        >>> phc.Project.set_current('My Project Name')
        >>> phc.Query.execute_fhir_dsl({
          "type": "select",
          "columns": "*",
          "from": [
              {"table": "patient"}
          ],
        }, all_results=True)

        """
        query = build_query(query, **query_kwargs)

        if log:
            print(json.dumps(query, indent=4))

        if FhirAggregation.is_aggregation_query(query):
            response = execute_single_fhir_dsl(query, auth_args=auth_args)
            return FhirAggregation.from_response(response)

        if all_results:
            return with_progress(
                lambda: tqdm(total=MAX_RESULT_SIZE),
                lambda progress: recursive_execute_fhir_dsl(
                    {
                        "limit": [
                            {
                                "type": "number",
                                "value": 0
                            },
                            # Make window size smaller than maximum to reduce
                            # pressure on API
                            {
                                "type": "number",
                                "value": DEFAULT_SCROLL_SIZE
                            },
                        ],
                        **query,
                    },
                    scroll=all_results,
                    progress=progress,
                    callback=callback,
                    auth_args=auth_args,
                    max_pages=max_pages,
                ),
            )

        return recursive_execute_fhir_dsl(
            query,
            scroll=all_results,
            callback=callback,
            auth_args=auth_args,
            max_pages=max_pages,
        )
Esempio n. 28
0
    def get_count_by_field(
            table_name: str,
            field: str,
            batch_size: int = 1000,
            query_overrides: dict = {},
            log: bool = False,
            auth_args: Auth = Auth.shared(),
            **query_kwargs,
    ):
        """Count records by a given field

        Attributes
        ----------
        table_name : str
            The FHIR Search Service table to retrieve from

        field : str
            The field name to count the values of (e.g. "subject.reference")

        batch_size : int
            The size of each page from elasticsearch to use

        query_overrides : dict
            Parts of the FSS query to override
            (Note that passing certain values can cause the method to error out)

            The aggregation query is similar to this:
                {
                    "type": "select",
                    "columns": [{
                        "type": "elasticsearch",
                        "aggregations": {
                            "results": {
                                "composite": {
                                    "sources": [{
                                        "value": {
                                            "terms": {
                                                "field": "gender.keyword"
                                            }
                                        }
                                    }],
                                    "size": 100,
                                }
                            }
                        },
                    }],
                    "from": [{"table": "patient"}],
                }


        auth_args : Auth, dict
            Additional arguments for authentication

        log : bool = False
            Whether to log the elasticsearch query sent to the server

        query_kwargs : dict
            Arguments to pass to build_query such as patient_id, patient_ids,
            and patient_key. (See phc.easy.query.fhir_dsl_query.build_query)

        Examples
        --------
        >>> import phc.easy as phc
        >>> phc.Auth.set({ 'account': '<your-account-name>' })
        >>> phc.Project.set_current('My Project Name')
        >>> phc.Query.get_count_by_field(
            table_name="patient",
            field="gender"
        )
        """
        data = Query.execute_composite_aggregations(
            table_name=table_name,
            key_sources_pairs=[(
                "results",
                [{
                    "value": {
                        "terms": {
                            "field": f"{field}.keyword"
                        }
                    }
                }],
            )],
            batch_size=batch_size,
            log=log,
            auth_args=auth_args,
            query_overrides=query_overrides,
            **query_kwargs,
        )

        return pd.DataFrame([{
            field: r["key"]["value"],
            "doc_count": r["doc_count"]
        } for r in data["results"]["buckets"]])
def test_getting_genomic_tests(get_data_frame):
    GenomicVariant._get_genomic_tests(
        [], max_pages=None, all_results=False, auth_args=Auth(), log=False
    )

    get_data_frame.assert_called_once()
Esempio n. 30
0
def test_creating_auth_from_another_auth_object():
    auth = Auth({"account": "demo"})

    auth1 = Auth(auth)
    assert auth1.account == "demo"