def test_add_patient_id_with_bool_should_query():
    result = build_query(
        {
            "where": {
                "type": "elasticsearch",
                "query": {
                    "bool": {"should": [{"term": {"gender.keyword": "male"}}]}
                },
            }
        },
        patient_ids=["a"],
        patient_key="id",
    )

    assert result == {
        "where": {
            "type": "elasticsearch",
            "query": {
                "bool": {
                    "must": [
                        {
                            "bool": {
                                "should": [{"term": {"gender.keyword": "male"}}]
                            }
                        },
                        {"terms": {"id.keyword": ["Patient/a", "a"]}},
                    ]
                }
            },
        }
    }
Example #2
0
    def find_count_of_dsl_query(query: dict, auth_args: Auth = Auth.shared()):
        """Find count of a given dsl query

        See https://docs.us.lifeomic.com/development/fhir-service/dsl/

        Attributes
        ----------
        query : dict
            The FHIR query to run a count against

        auth_args : Auth, dict
            Additional arguments for authentication

        Examples
        --------
        >>> import phc.easy as phc
        >>> phc.Auth.set({ 'account': '<your-account-name>' })
        >>> phc.Project.set_current('My Project Name')
        >>> phc.Query.find_count_of_dsl_query({
          "type": "select",
          "columns": "*",
          "from": [{"table": "patient"}],
        })
        """
        if FhirAggregation.is_aggregation_query(query):
            raise ValueError("Count is not support for aggregation queries.")

        auth = Auth(auth_args)
        fhir = Fhir(auth.session())

        response = fhir.execute_es(auth.project_id,
                                   build_query(query, page_size=1),
                                   scroll="true")

        return response.data["hits"]["total"]["value"]
def test_add_patient_ids_with_no_where_clause():
    assert build_query({}, patient_ids=["a"]) == {
        "where": {
            "type": "elasticsearch",
            "query": {
                "terms": {"subject.reference.keyword": ["Patient/a", "a"]}
            },
        }
    }
def test_no_modification():
    example = {
        "where": {
            "type": "elasticsearch",
            "query": {"term": {"gender.keyword": "male"}},
        }
    }

    assert build_query(example) == example
def test_add_single_patient_id_to_query():
    result = build_query({}, patient_id="a")

    assert result == {
        "where": {
            "type": "elasticsearch",
            "query": {
                "terms": {"subject.reference.keyword": ["Patient/a", "a"]}
            },
        }
    }
Example #6
0
    def execute_fhir_dsl_with_options(
        query: dict,
        transform: Callable[[pd.DataFrame], pd.DataFrame],
        all_results: bool,
        raw: bool,
        query_overrides: dict,
        auth_args: Auth,
        ignore_cache: bool,
        max_pages: Union[int, None],
        log: bool = False,
        **query_kwargs,
    ):
        query = build_query({**query, **query_overrides}, **query_kwargs)

        if log:
            print(json.dumps(query, indent=4))

        use_cache = ((not ignore_cache) and (not raw)
                     and (all_results
                          or FhirAggregation.is_aggregation_query(query))
                     and (max_pages is None))

        if use_cache and APICache.does_cache_for_query_exist(
                query, namespace=FHIR_DSL):
            return APICache.load_cache_for_query(query, namespace=FHIR_DSL)

        callback = (APICache.build_cache_callback(
            query, transform, namespace=FHIR_DSL) if use_cache else None)

        results = Query.execute_fhir_dsl(
            query,
            all_results,
            auth_args,
            callback=callback,
            max_pages=max_pages,
        )

        if isinstance(results, FhirAggregation):
            # Cache isn't written in batches so we need to explicitly do it here
            if use_cache:
                APICache.write_agg(query, results)

            return results

        if isinstance(results, pd.DataFrame):
            return results

        df = pd.DataFrame(map(lambda r: r["_source"], results))

        if raw:
            return df

        return transform(df)
Example #7
0
def execute_single_fhir_dsl(
    query: dict,
    scroll_id: str = "",
    retry_backoff: bool = False,
    auth_args: Auth = Auth.shared(),
    _retry_time: int = 1,
):
    auth = Auth(auth_args)
    fhir = Fhir(auth.session())

    try:
        return fhir.dsl(auth.project_id, query, scroll_id)
    except Exception as err:
        if (
            (_retry_time >= MAX_RETRY_BACKOFF)
            or (retry_backoff is False)
            or ("Internal server error" not in str(err))
        ):
            raise err

        if _retry_time == 1:
            # Base first retry attempt on record count
            record_count = fhir.dsl(
                auth.project_id, build_query(query, page_size=1), scroll="true"
            ).data["hits"]["total"]["value"]

            def backoff_limit(limit: int):
                return min(
                    (get_limit(query) or DEFAULT_SCROLL_SIZE) / 2,
                    math.pow(record_count, 0.85),
                )

        else:

            def backoff_limit(limit: int):
                return math.pow(limit, 0.85)

        new_query = update_limit(query, backoff_limit)

        print(
            f"Received server error. Retrying with page_size={get_limit(new_query)}"
        )

        return execute_single_fhir_dsl(
            new_query,
            scroll_id=scroll_id,
            retry_backoff=True,
            auth_args=auth_args,
            _retry_time=_retry_time + 1,
        )
def test_add_ids_to_query():
    result = build_query({}, ids=["a", "b"], id="c")
    
    assert_equals(result, {
        "where": {
            "type": "elasticsearch",
            "query": {
                "terms": {
                    "id.keyword": [
                        "a",
                        "b",
                        "c"
                    ]
                }
            }
        }
    })
def test_replace_limit():
    result = build_query(
        {
            "limit": [
                {"type": "number", "value": 0},
                {"type": "number", "value": 100},
            ]
        },
        page_size=1000,
    )

    assert result == {
        "limit": [
            {"type": "number", "value": 0},
            {"type": "number", "value": 1000},
        ]
    }
def test_add_single_patient_id_with_prefix():
    result = build_query(
        {}, patient_id="a", patient_id_prefixes=["Patient/", "urn:uuid:"]
    )

    assert result == {
        "where": {
            "type": "elasticsearch",
            "query": {
                "terms": {
                    "subject.reference.keyword": [
                        "Patient/a",
                        "urn:uuid:a",
                        "a",
                    ]
                }
            },
        }
    }
def test_add_patient_id_and_limit_with_query_term():
    result = build_query(
        {
            "where": {
                "type": "elasticsearch",
                "query": {"term": {"test.field.keyword": "blah"}},
            }
        },
        patient_ids=["a", "b"],
        page_size=100,
    )

    assert result == {
        "where": {
            "type": "elasticsearch",
            "query": {
                "bool": {
                    "must": [
                        {"term": {"test.field.keyword": "blah"}},
                        {
                            "terms": {
                                "subject.reference.keyword": [
                                    "Patient/a",
                                    "Patient/b",
                                    "a",
                                    "b",
                                ]
                            }
                        },
                    ]
                }
            },
        },
        "limit": [
            {"type": "number", "value": 0},
            {"type": "number", "value": 100},
        ],
    }
def test_add_term():
    result = build_query(
        {
            "where": {
                "type": "elasticsearch",
                "query": {"terms": {"a.keyword": [1, 2, 3]}},
            }
        },
        term={"code.coding.code.keyword": "blah"},
    )

    assert result == {
        "where": {
            "type": "elasticsearch",
            "query": {
                "bool": {
                    "must": [
                        {"terms": {"a.keyword": [1, 2, 3]}},
                        {"term": {"code.coding.code.keyword": "blah"}},
                    ]
                }
            },
        }
    }
Example #13
0
    def execute_fhir_dsl(
        query: dict,
        all_results: bool = False,
        auth_args: Auth = Auth.shared(),
        callback: Union[Callable[[Any, bool], None], None] = None,
        max_pages: Union[int, None] = None,
        log: bool = False,
        **query_kwargs,
    ):
        """Execute a FHIR query with the DSL

        See https://docs.us.lifeomic.com/development/fhir-service/dsl/

        Attributes
        ----------
        query : dict
            The FHIR query to run (is a superset of elasticsearch)

        all_results : bool
            Return all results by scrolling through mutliple pages of data
            (Limit is ignored if provided)

        auth_args : Auth, dict
            Additional arguments for authentication

        callback : Callable[[Any, bool], None] (optional)
            A progress function that is invoked for each batch. When the second
            argument passed is true, then the result of the callback function is
            used as the return value. This is useful if writing results out to a
            file and then returning the completed result from that file.

            Example:

                def handle_batch(batch, is_finished):
                    print(len(batch))
                    if is_finished:
                        return "batch finished

        max_pages : int
            The number of pages to retrieve (useful if working with tons of records)

        log : bool = False
            Whether to log the elasticsearch query sent to the server

        query_kwargs : dict
            Arguments to pass to build_query such as patient_id, patient_ids,
            and patient_key. (See phc.easy.query.fhir_dsl_query.build_query)

        Examples
        --------
        >>> import phc.easy as phc
        >>> phc.Auth.set({ 'account': '<your-account-name>' })
        >>> phc.Project.set_current('My Project Name')
        >>> phc.Query.execute_fhir_dsl({
          "type": "select",
          "columns": "*",
          "from": [
              {"table": "patient"}
          ],
        }, all_results=True)

        """
        query = build_query(query, **query_kwargs)

        if log:
            print(json.dumps(query, indent=4))

        if FhirAggregation.is_aggregation_query(query):
            response = execute_single_fhir_dsl(query, auth_args=auth_args)
            return FhirAggregation.from_response(response)

        if all_results:
            return with_progress(
                lambda: tqdm(total=MAX_RESULT_SIZE),
                lambda progress: recursive_execute_fhir_dsl(
                    {
                        "limit": [
                            {
                                "type": "number",
                                "value": 0
                            },
                            # Make window size smaller than maximum to reduce
                            # pressure on API
                            {
                                "type": "number",
                                "value": DEFAULT_SCROLL_SIZE
                            },
                        ],
                        **query,
                    },
                    scroll=all_results,
                    progress=progress,
                    callback=callback,
                    auth_args=auth_args,
                    max_pages=max_pages,
                ),
            )

        return recursive_execute_fhir_dsl(
            query,
            scroll=all_results,
            callback=callback,
            auth_args=auth_args,
            max_pages=max_pages,
        )
def test_throws_with_non_elasticsearch_where():
    build_query({"where": {"query": "blah-blah-blah"}}, patient_id="a")
def test_add_code_filters():
    result = build_query(
        query={},
        code_fields=["meta.tag", "code.coding"],
        code=["1234-5"],
        display="My Code",
        system="http://unitsofmeasure.org",
    )

    assert result == {
        "where": {
            "type": "elasticsearch",
            "query": {
                "bool": {
                    "must": [
                        {
                            "bool": {
                                "should": [
                                    {
                                        "terms": {
                                            "meta.tag.code.keyword": ["1234-5"]
                                        }
                                    },
                                    {
                                        "terms": {
                                            "code.coding.code.keyword": [
                                                "1234-5"
                                            ]
                                        }
                                    },
                                ]
                            }
                        },
                        {
                            "bool": {
                                "should": [
                                    {
                                        "term": {
                                            "meta.tag.display.keyword": "My Code"
                                        }
                                    },
                                    {
                                        "term": {
                                            "code.coding.display.keyword": "My Code"
                                        }
                                    },
                                ]
                            }
                        },
                        {
                            "bool": {
                                "should": [
                                    {
                                        "term": {
                                            "meta.tag.system.keyword": "http://unitsofmeasure.org"
                                        }
                                    },
                                    {
                                        "term": {
                                            "code.coding.system.keyword": "http://unitsofmeasure.org"
                                        }
                                    },
                                ]
                            }
                        },
                    ]
                }
            },
        }
    }