Ejemplo n.º 1
0
    def load_cache_for_query(query: dict,
                             namespace: Optional[str] = None) -> pd.DataFrame:
        filename = str(
            Path(DIR).expanduser().joinpath(
                APICache.filename_for_query(query, namespace)))
        print(f'[CACHE] Loading from "{filename}"')

        if FhirAggregation.is_aggregation_query(query):
            with open(filename, "r") as f:
                return FhirAggregation(json.load(f))

        return APICache.read_csv(filename)
Ejemplo n.º 2
0
    def find_count_of_dsl_query(query: dict, auth_args: Auth = Auth.shared()):
        """Find count of a given dsl query

        See https://docs.us.lifeomic.com/development/fhir-service/dsl/

        Attributes
        ----------
        query : dict
            The FHIR query to run a count against

        auth_args : Auth, dict
            Additional arguments for authentication

        Examples
        --------
        >>> import phc.easy as phc
        >>> phc.Auth.set({ 'account': '<your-account-name>' })
        >>> phc.Project.set_current('My Project Name')
        >>> phc.Query.find_count_of_dsl_query({
          "type": "select",
          "columns": "*",
          "from": [{"table": "patient"}],
        })
        """
        if FhirAggregation.is_aggregation_query(query):
            raise ValueError("Count is not support for aggregation queries.")

        auth = Auth(auth_args)
        fhir = Fhir(auth.session())

        response = fhir.execute_es(auth.project_id,
                                   build_query(query, page_size=1),
                                   scroll="true")

        return response.data["hits"]["total"]["value"]
def test_is_aggregation_query():
    query = {
        "type":
        "select",
        "columns": [
            # Expression columns are ignored when aggregations are present
            {
                "expr": {
                    "type": "column_ref",
                    "column": "id.keyword"
                }
            },
            {
                "type": "elasticsearch",
                "aggregations": {
                    "results": {
                        "terms": {
                            "field": "subject.reference.keyword"
                        }
                    }
                },
            },
        ],
        "from": [{
            "table": "observation"
        }],
    }

    assert FhirAggregation.is_aggregation_query(query)
def test_find_composite_after_keys():
    assert FhirAggregation.find_composite_after_keys(
        SAMPLE_COMPOSITE_RESULT, batch_size=2) == {
            "meta.tag": {
                "value": "meta.tag.example-after-key"
            }
        }
Ejemplo n.º 5
0
    def filename_for_query(query: dict, namespace: Optional[str] = None):
        "Descriptive filename with hash of query for easy retrieval"
        is_aggregation = FhirAggregation.is_aggregation_query(query)

        agg_description = "agg" if is_aggregation else ""

        column_description = (
            f"{len(query.get('columns', []))}col" if not is_aggregation
            and isinstance(query.get("columns"), list) else "")

        where_description = "where" if query.get("where") else ""

        unique_hash = hashlib.sha256(
            json.dumps(query).encode("utf-8")).hexdigest()[0:8]

        path_name = [
            # Exclude UUIDs but not paths with dashes
            c.replace("-", "_") for c in query.get("path", "").split("/")
            if "-" not in c or len(c) != 36
        ]

        components = [
            namespace or "",
            *path_name,
            *[d.get("table", "") for d in query.get("from", [])],
            agg_description,
            column_description,
            where_description,
            unique_hash,
        ]

        extension = "json" if is_aggregation else "csv"

        return "_".join([c
                         for c in components if len(c) > 0]) + "." + extension
def test_is_not_aggregation_query_with_all_columns():
    query = {
        "type": "select",
        "columns": "*",
        "from": [{
            "table": "observation"
        }],
    }

    assert not FhirAggregation.is_aggregation_query(query)
Ejemplo n.º 7
0
    def execute_fhir_dsl_with_options(
        query: dict,
        transform: Callable[[pd.DataFrame], pd.DataFrame],
        all_results: bool,
        raw: bool,
        query_overrides: dict,
        auth_args: Auth,
        ignore_cache: bool,
        max_pages: Union[int, None],
        log: bool = False,
        **query_kwargs,
    ):
        query = build_query({**query, **query_overrides}, **query_kwargs)

        if log:
            print(json.dumps(query, indent=4))

        use_cache = ((not ignore_cache) and (not raw)
                     and (all_results
                          or FhirAggregation.is_aggregation_query(query))
                     and (max_pages is None))

        if use_cache and APICache.does_cache_for_query_exist(
                query, namespace=FHIR_DSL):
            return APICache.load_cache_for_query(query, namespace=FHIR_DSL)

        callback = (APICache.build_cache_callback(
            query, transform, namespace=FHIR_DSL) if use_cache else None)

        results = Query.execute_fhir_dsl(
            query,
            all_results,
            auth_args,
            callback=callback,
            max_pages=max_pages,
        )

        if isinstance(results, FhirAggregation):
            # Cache isn't written in batches so we need to explicitly do it here
            if use_cache:
                APICache.write_agg(query, results)

            return results

        if isinstance(results, pd.DataFrame):
            return results

        df = pd.DataFrame(map(lambda r: r["_source"], results))

        if raw:
            return df

        return transform(df)
def test_is_not_aggregation_query_with_specific_column_selected():
    query = {
        "type": "select",
        "columns": [{
            "expr": {
                "type": "column_ref",
                "column": "id.keyword"
            }
        }],
        "from": [{
            "table": "observation"
        }],
    }

    assert not FhirAggregation.is_aggregation_query(query)
def test_reduce_composite_results_from_start():
    sample = {
        "meta.tag": {
            "after_key": {
                "value": "meta.tag.example-after-key"
            },
            "buckets": [
                {
                    "key": {
                        "value": "meta.tag.first-example"
                    },
                    "doc_count": 10
                },
                {
                    "key": {
                        "value": "meta.tag.second-example"
                    },
                    "doc_count": 3
                },
            ],
        }
    }

    assert FhirAggregation.reduce_composite_results({}, sample) == {
        "meta.tag": {
            "buckets": [
                {
                    "key": {
                        "value": "meta.tag.first-example"
                    },
                    "doc_count": 10
                },
                {
                    "key": {
                        "value": "meta.tag.second-example"
                    },
                    "doc_count": 3
                },
            ]
        }
    }
Ejemplo n.º 10
0
    def _recursive_execute_composite_aggregations(
        table_name: str,
        key_sources_pairs: List[Tuple[str, List[dict]]],
        batch_size: int = 100,
        progress: Union[tqdm, None] = None,
        query_overrides: dict = {},
        log: bool = False,
        auth_args: Auth = Auth.shared(),
        max_pages: Union[int, None] = None,
        _current_page: int = 1,
        _prev_results: dict = {},
        _after_keys: dict = {},
        **query_kwargs,
    ):
        aggregation = Query.execute_fhir_dsl(
            {
                "type":
                "select",
                "columns": [{
                    "type": "elasticsearch",
                    "aggregations": {
                        key: {
                            "composite": {
                                "sources":
                                sources,
                                "size":
                                batch_size,
                                **({
                                    "after": _after_keys[key]
                                } if key in _after_keys else {}),
                            }
                        }
                        for key, sources in key_sources_pairs
                        if (len(_after_keys) == 0) or (key in _after_keys)
                    },
                }],
                "from": [{
                    "table": table_name
                }],
                **query_overrides,
            },
            auth_args=auth_args,
            log=log,
            **query_kwargs,
        )

        current_results = aggregation.data
        results = FhirAggregation.reduce_composite_results(
            _prev_results, current_results)

        if (progress is not None) and (_current_page == 1) and max_pages:
            progress.reset(max_pages)

        if progress is not None:
            # Update by count or pages (if max_pages specified)
            progress.update(1 if max_pages else FhirAggregation.
                            count_composite_results(current_results))

        after_keys = FhirAggregation.find_composite_after_keys(
            current_results, batch_size)

        if len(after_keys) == 0 or ((max_pages is not None) and
                                    (_current_page >= max_pages)):
            print(
                f"Retrieved {FhirAggregation.count_composite_results(results)} results"
            )
            return results

        return Query._recursive_execute_composite_aggregations(
            table_name=table_name,
            key_sources_pairs=key_sources_pairs,
            batch_size=batch_size,
            progress=progress,
            query_overrides=query_overrides,
            log=log,
            auth_args=auth_args,
            max_pages=max_pages,
            _current_page=_current_page + 1,
            _prev_results=results,
            _after_keys=after_keys,
            **query_kwargs,
        )
Ejemplo n.º 11
0
    def execute_fhir_dsl(
        query: dict,
        all_results: bool = False,
        auth_args: Auth = Auth.shared(),
        callback: Union[Callable[[Any, bool], None], None] = None,
        max_pages: Union[int, None] = None,
        log: bool = False,
        **query_kwargs,
    ):
        """Execute a FHIR query with the DSL

        See https://docs.us.lifeomic.com/development/fhir-service/dsl/

        Attributes
        ----------
        query : dict
            The FHIR query to run (is a superset of elasticsearch)

        all_results : bool
            Return all results by scrolling through mutliple pages of data
            (Limit is ignored if provided)

        auth_args : Auth, dict
            Additional arguments for authentication

        callback : Callable[[Any, bool], None] (optional)
            A progress function that is invoked for each batch. When the second
            argument passed is true, then the result of the callback function is
            used as the return value. This is useful if writing results out to a
            file and then returning the completed result from that file.

            Example:

                def handle_batch(batch, is_finished):
                    print(len(batch))
                    if is_finished:
                        return "batch finished

        max_pages : int
            The number of pages to retrieve (useful if working with tons of records)

        log : bool = False
            Whether to log the elasticsearch query sent to the server

        query_kwargs : dict
            Arguments to pass to build_query such as patient_id, patient_ids,
            and patient_key. (See phc.easy.query.fhir_dsl_query.build_query)

        Examples
        --------
        >>> import phc.easy as phc
        >>> phc.Auth.set({ 'account': '<your-account-name>' })
        >>> phc.Project.set_current('My Project Name')
        >>> phc.Query.execute_fhir_dsl({
          "type": "select",
          "columns": "*",
          "from": [
              {"table": "patient"}
          ],
        }, all_results=True)

        """
        query = build_query(query, **query_kwargs)

        if log:
            print(json.dumps(query, indent=4))

        if FhirAggregation.is_aggregation_query(query):
            response = execute_single_fhir_dsl(query, auth_args=auth_args)
            return FhirAggregation.from_response(response)

        if all_results:
            return with_progress(
                lambda: tqdm(total=MAX_RESULT_SIZE),
                lambda progress: recursive_execute_fhir_dsl(
                    {
                        "limit": [
                            {
                                "type": "number",
                                "value": 0
                            },
                            # Make window size smaller than maximum to reduce
                            # pressure on API
                            {
                                "type": "number",
                                "value": DEFAULT_SCROLL_SIZE
                            },
                        ],
                        **query,
                    },
                    scroll=all_results,
                    progress=progress,
                    callback=callback,
                    auth_args=auth_args,
                    max_pages=max_pages,
                ),
            )

        return recursive_execute_fhir_dsl(
            query,
            scroll=all_results,
            callback=callback,
            auth_args=auth_args,
            max_pages=max_pages,
        )
def test_reduce_composite_results():
    previous = {
        "meta.tag": {
            "after_key": {
                "value": "meta.tag.example-after-key"
            },
            "buckets": [
                {
                    "key": {
                        "value": "meta.tag.first-example"
                    },
                    "doc_count": 10
                },
                {
                    "key": {
                        "value": "meta.tag.second-example"
                    },
                    "doc_count": 3
                },
            ],
        },
        "code.coding": {
            "after_key": {
                "value": "code.coding.example-after-key"
            },
            "buckets": [
                {
                    "key": {
                        "value": "code.coding.first-example"
                    },
                    "doc_count": 7
                },
                {
                    "key": {
                        "value": "code.coding.second-example"
                    },
                    "doc_count": 21,
                },
            ],
        },
    }

    current = {
        "meta.tag": {
            "after_key": {
                "value": "meta.tag.example-next-after-key"
            },
            "buckets": [
                {
                    "key": {
                        "value": "meta.tag.third-example"
                    },
                    "doc_count": 2
                },
                {
                    "key": {
                        "value": "meta.tag.fourth-example"
                    },
                    "doc_count": 1
                },
            ],
        }
    }

    assert FhirAggregation.reduce_composite_results(previous, current) == {
        "meta.tag": {
            "buckets": [
                {
                    "key": {
                        "value": "meta.tag.first-example"
                    },
                    "doc_count": 10
                },
                {
                    "key": {
                        "value": "meta.tag.second-example"
                    },
                    "doc_count": 3
                },
                {
                    "key": {
                        "value": "meta.tag.third-example"
                    },
                    "doc_count": 2
                },
                {
                    "key": {
                        "value": "meta.tag.fourth-example"
                    },
                    "doc_count": 1
                },
            ]
        },
        "code.coding": {
            "buckets": [
                {
                    "key": {
                        "value": "code.coding.first-example"
                    },
                    "doc_count": 7
                },
                {
                    "key": {
                        "value": "code.coding.second-example"
                    },
                    "doc_count": 21,
                },
            ]
        },
    }
def test_count_composite_results():
    assert FhirAggregation.count_composite_results(
        SAMPLE_COMPOSITE_RESULT) == 5