def test_add_patient_id_with_bool_should_query(): result = build_query( { "where": { "type": "elasticsearch", "query": { "bool": {"should": [{"term": {"gender.keyword": "male"}}]} }, } }, patient_ids=["a"], patient_key="id", ) assert result == { "where": { "type": "elasticsearch", "query": { "bool": { "must": [ { "bool": { "should": [{"term": {"gender.keyword": "male"}}] } }, {"terms": {"id.keyword": ["Patient/a", "a"]}}, ] } }, } }
def find_count_of_dsl_query(query: dict, auth_args: Auth = Auth.shared()): """Find count of a given dsl query See https://docs.us.lifeomic.com/development/fhir-service/dsl/ Attributes ---------- query : dict The FHIR query to run a count against auth_args : Auth, dict Additional arguments for authentication Examples -------- >>> import phc.easy as phc >>> phc.Auth.set({ 'account': '<your-account-name>' }) >>> phc.Project.set_current('My Project Name') >>> phc.Query.find_count_of_dsl_query({ "type": "select", "columns": "*", "from": [{"table": "patient"}], }) """ if FhirAggregation.is_aggregation_query(query): raise ValueError("Count is not support for aggregation queries.") auth = Auth(auth_args) fhir = Fhir(auth.session()) response = fhir.execute_es(auth.project_id, build_query(query, page_size=1), scroll="true") return response.data["hits"]["total"]["value"]
def test_add_patient_ids_with_no_where_clause(): assert build_query({}, patient_ids=["a"]) == { "where": { "type": "elasticsearch", "query": { "terms": {"subject.reference.keyword": ["Patient/a", "a"]} }, } }
def test_no_modification(): example = { "where": { "type": "elasticsearch", "query": {"term": {"gender.keyword": "male"}}, } } assert build_query(example) == example
def test_add_single_patient_id_to_query(): result = build_query({}, patient_id="a") assert result == { "where": { "type": "elasticsearch", "query": { "terms": {"subject.reference.keyword": ["Patient/a", "a"]} }, } }
def execute_fhir_dsl_with_options( query: dict, transform: Callable[[pd.DataFrame], pd.DataFrame], all_results: bool, raw: bool, query_overrides: dict, auth_args: Auth, ignore_cache: bool, max_pages: Union[int, None], log: bool = False, **query_kwargs, ): query = build_query({**query, **query_overrides}, **query_kwargs) if log: print(json.dumps(query, indent=4)) use_cache = ((not ignore_cache) and (not raw) and (all_results or FhirAggregation.is_aggregation_query(query)) and (max_pages is None)) if use_cache and APICache.does_cache_for_query_exist( query, namespace=FHIR_DSL): return APICache.load_cache_for_query(query, namespace=FHIR_DSL) callback = (APICache.build_cache_callback( query, transform, namespace=FHIR_DSL) if use_cache else None) results = Query.execute_fhir_dsl( query, all_results, auth_args, callback=callback, max_pages=max_pages, ) if isinstance(results, FhirAggregation): # Cache isn't written in batches so we need to explicitly do it here if use_cache: APICache.write_agg(query, results) return results if isinstance(results, pd.DataFrame): return results df = pd.DataFrame(map(lambda r: r["_source"], results)) if raw: return df return transform(df)
def execute_single_fhir_dsl( query: dict, scroll_id: str = "", retry_backoff: bool = False, auth_args: Auth = Auth.shared(), _retry_time: int = 1, ): auth = Auth(auth_args) fhir = Fhir(auth.session()) try: return fhir.dsl(auth.project_id, query, scroll_id) except Exception as err: if ( (_retry_time >= MAX_RETRY_BACKOFF) or (retry_backoff is False) or ("Internal server error" not in str(err)) ): raise err if _retry_time == 1: # Base first retry attempt on record count record_count = fhir.dsl( auth.project_id, build_query(query, page_size=1), scroll="true" ).data["hits"]["total"]["value"] def backoff_limit(limit: int): return min( (get_limit(query) or DEFAULT_SCROLL_SIZE) / 2, math.pow(record_count, 0.85), ) else: def backoff_limit(limit: int): return math.pow(limit, 0.85) new_query = update_limit(query, backoff_limit) print( f"Received server error. Retrying with page_size={get_limit(new_query)}" ) return execute_single_fhir_dsl( new_query, scroll_id=scroll_id, retry_backoff=True, auth_args=auth_args, _retry_time=_retry_time + 1, )
def test_add_ids_to_query(): result = build_query({}, ids=["a", "b"], id="c") assert_equals(result, { "where": { "type": "elasticsearch", "query": { "terms": { "id.keyword": [ "a", "b", "c" ] } } } })
def test_replace_limit(): result = build_query( { "limit": [ {"type": "number", "value": 0}, {"type": "number", "value": 100}, ] }, page_size=1000, ) assert result == { "limit": [ {"type": "number", "value": 0}, {"type": "number", "value": 1000}, ] }
def test_add_single_patient_id_with_prefix(): result = build_query( {}, patient_id="a", patient_id_prefixes=["Patient/", "urn:uuid:"] ) assert result == { "where": { "type": "elasticsearch", "query": { "terms": { "subject.reference.keyword": [ "Patient/a", "urn:uuid:a", "a", ] } }, } }
def test_add_patient_id_and_limit_with_query_term(): result = build_query( { "where": { "type": "elasticsearch", "query": {"term": {"test.field.keyword": "blah"}}, } }, patient_ids=["a", "b"], page_size=100, ) assert result == { "where": { "type": "elasticsearch", "query": { "bool": { "must": [ {"term": {"test.field.keyword": "blah"}}, { "terms": { "subject.reference.keyword": [ "Patient/a", "Patient/b", "a", "b", ] } }, ] } }, }, "limit": [ {"type": "number", "value": 0}, {"type": "number", "value": 100}, ], }
def test_add_term(): result = build_query( { "where": { "type": "elasticsearch", "query": {"terms": {"a.keyword": [1, 2, 3]}}, } }, term={"code.coding.code.keyword": "blah"}, ) assert result == { "where": { "type": "elasticsearch", "query": { "bool": { "must": [ {"terms": {"a.keyword": [1, 2, 3]}}, {"term": {"code.coding.code.keyword": "blah"}}, ] } }, } }
def execute_fhir_dsl( query: dict, all_results: bool = False, auth_args: Auth = Auth.shared(), callback: Union[Callable[[Any, bool], None], None] = None, max_pages: Union[int, None] = None, log: bool = False, **query_kwargs, ): """Execute a FHIR query with the DSL See https://docs.us.lifeomic.com/development/fhir-service/dsl/ Attributes ---------- query : dict The FHIR query to run (is a superset of elasticsearch) all_results : bool Return all results by scrolling through mutliple pages of data (Limit is ignored if provided) auth_args : Auth, dict Additional arguments for authentication callback : Callable[[Any, bool], None] (optional) A progress function that is invoked for each batch. When the second argument passed is true, then the result of the callback function is used as the return value. This is useful if writing results out to a file and then returning the completed result from that file. Example: def handle_batch(batch, is_finished): print(len(batch)) if is_finished: return "batch finished max_pages : int The number of pages to retrieve (useful if working with tons of records) log : bool = False Whether to log the elasticsearch query sent to the server query_kwargs : dict Arguments to pass to build_query such as patient_id, patient_ids, and patient_key. (See phc.easy.query.fhir_dsl_query.build_query) Examples -------- >>> import phc.easy as phc >>> phc.Auth.set({ 'account': '<your-account-name>' }) >>> phc.Project.set_current('My Project Name') >>> phc.Query.execute_fhir_dsl({ "type": "select", "columns": "*", "from": [ {"table": "patient"} ], }, all_results=True) """ query = build_query(query, **query_kwargs) if log: print(json.dumps(query, indent=4)) if FhirAggregation.is_aggregation_query(query): response = execute_single_fhir_dsl(query, auth_args=auth_args) return FhirAggregation.from_response(response) if all_results: return with_progress( lambda: tqdm(total=MAX_RESULT_SIZE), lambda progress: recursive_execute_fhir_dsl( { "limit": [ { "type": "number", "value": 0 }, # Make window size smaller than maximum to reduce # pressure on API { "type": "number", "value": DEFAULT_SCROLL_SIZE }, ], **query, }, scroll=all_results, progress=progress, callback=callback, auth_args=auth_args, max_pages=max_pages, ), ) return recursive_execute_fhir_dsl( query, scroll=all_results, callback=callback, auth_args=auth_args, max_pages=max_pages, )
def test_throws_with_non_elasticsearch_where(): build_query({"where": {"query": "blah-blah-blah"}}, patient_id="a")
def test_add_code_filters(): result = build_query( query={}, code_fields=["meta.tag", "code.coding"], code=["1234-5"], display="My Code", system="http://unitsofmeasure.org", ) assert result == { "where": { "type": "elasticsearch", "query": { "bool": { "must": [ { "bool": { "should": [ { "terms": { "meta.tag.code.keyword": ["1234-5"] } }, { "terms": { "code.coding.code.keyword": [ "1234-5" ] } }, ] } }, { "bool": { "should": [ { "term": { "meta.tag.display.keyword": "My Code" } }, { "term": { "code.coding.display.keyword": "My Code" } }, ] } }, { "bool": { "should": [ { "term": { "meta.tag.system.keyword": "http://unitsofmeasure.org" } }, { "term": { "code.coding.system.keyword": "http://unitsofmeasure.org" } }, ] } }, ] } }, } }