def execute_composite_aggregations( table_name: str, key_sources_pairs: List[Tuple[str, List[dict]]], batch_size: int = 100, query_overrides: dict = {}, log: bool = False, auth_args: Auth = Auth.shared(), max_pages: Union[int, None] = None, **query_kwargs, ): """Count records by multiple fields Attributes ---------- table_name : str The FHIR Search Service table to retrieve from key_sources_pairs : str Pairs of keys and sources to pull composite results from Example Input: [ ("meta.tag", [{"terms": {"field": "meta.tag.system.keyword"}}]) ] batch_size : int The size of each page from elasticsearch to use query_overrides : dict Parts of the FSS query to override (Note that passing certain values can cause the method to error out) Example aggregation query executed (can use log=True to inspect): { "type": "select", "columns": [{ "type": "elasticsearch", "aggregations": { "results": { "composite": { "sources": [{ "meta.tag": { "terms": { "field": "meta.tag.system.keyword" } } }], "size": 100, } } }, }], "from": [{"table": "observation"}], } auth_args : Auth, dict Additional arguments for authentication log : bool = False Whether to log the elasticsearch query sent to the server max_pages : int The number of pages to retrieve (useful if working with tons of records) query_kwargs : dict Arguments to pass to build_query such as patient_id, patient_ids, and patient_key. See :func:`~phc.easy.query.fhir_dsl_query.build_query`. Examples -------- >>> import phc.easy as phc >>> phc.Auth.set({ 'account': '<your-account-name>' }) >>> phc.Project.set_current('My Project Name') >>> phc.Query.execute_composite_aggregations( table_name="observation", key_sources_pairs=[ ("meta.tag", [ {"code": {"terms": {"field": "meta.tag.code.keyword"}}}, ]), ("code.coding", [ {"display": {"terms": {"field": "code.coding.display.keyword"}}} ]), ] ) """ if len(key_sources_pairs) == 0: raise ValueError("No aggregate composite terms specified.") return with_progress( tqdm, lambda progress: Query._recursive_execute_composite_aggregations( table_name=table_name, key_sources_pairs=key_sources_pairs, batch_size=batch_size, progress=progress, log=log, auth_args=auth_args, query_overrides=query_overrides, max_pages=max_pages, **query_kwargs, ), )
def execute_fhir_dsl( query: dict, all_results: bool = False, auth_args: Auth = Auth.shared(), callback: Union[Callable[[Any, bool], None], None] = None, max_pages: Union[int, None] = None, log: bool = False, **query_kwargs, ): """Execute a FHIR query with the DSL See https://docs.us.lifeomic.com/development/fhir-service/dsl/ Attributes ---------- query : dict The FHIR query to run (is a superset of elasticsearch) all_results : bool Return all results by scrolling through mutliple pages of data (Limit is ignored if provided) auth_args : Auth, dict Additional arguments for authentication callback : Callable[[Any, bool], None] (optional) A progress function that is invoked for each batch. When the second argument passed is true, then the result of the callback function is used as the return value. This is useful if writing results out to a file and then returning the completed result from that file. Example: def handle_batch(batch, is_finished): print(len(batch)) if is_finished: return "batch finished max_pages : int The number of pages to retrieve (useful if working with tons of records) log : bool = False Whether to log the elasticsearch query sent to the server query_kwargs : dict Arguments to pass to build_query such as patient_id, patient_ids, and patient_key. (See phc.easy.query.fhir_dsl_query.build_query) Examples -------- >>> import phc.easy as phc >>> phc.Auth.set({ 'account': '<your-account-name>' }) >>> phc.Project.set_current('My Project Name') >>> phc.Query.execute_fhir_dsl({ "type": "select", "columns": "*", "from": [ {"table": "patient"} ], }, all_results=True) """ query = build_query(query, **query_kwargs) if log: print(json.dumps(query, indent=4)) if FhirAggregation.is_aggregation_query(query): response = execute_single_fhir_dsl(query, auth_args=auth_args) return FhirAggregation.from_response(response) if all_results: return with_progress( lambda: tqdm(total=MAX_RESULT_SIZE), lambda progress: recursive_execute_fhir_dsl( { "limit": [ { "type": "number", "value": 0 }, # Make window size smaller than maximum to reduce # pressure on API { "type": "number", "value": DEFAULT_SCROLL_SIZE }, ], **query, }, scroll=all_results, progress=progress, callback=callback, auth_args=auth_args, max_pages=max_pages, ), ) return recursive_execute_fhir_dsl( query, scroll=all_results, callback=callback, auth_args=auth_args, max_pages=max_pages, )
def execute_paging_api( path: str, params: dict = {}, http_verb: str = "GET", transform: Callable[[pd.DataFrame], pd.DataFrame] = identity, all_results: bool = False, auth_args: Auth = Auth.shared(), max_pages: Optional[int] = None, page_size: Optional[int] = None, log: bool = False, raw: bool = False, ignore_cache: bool = False, show_progress: bool = True, progress: Optional[tqdm] = None, item_key: str = "items", try_count: bool = True, ): """Execute a API query that pages through results See https://docs.us.lifeomic.com/api/?shell#lifeomic-core-api-genomics for example Attributes ---------- path : str The API path to hit (Special tokens: `:project_id`) params : dict The parameters to include with request http_verb : str The HTTP method to use all_results : bool = False Retrieve sample of results (25) or entire set of records auth_args : Auth, dict Additional arguments for authentication max_pages : int The number of pages to retrieve (useful if working with tons of records) page_size : int The number of records to fetch per page log : bool = False Whether to log some diagnostic statements for debugging progress : Optional[tqdm] = None Override the given progress indicator item_key : str The key to find the results underneath (usually "items" but not always) try_count : bool Whether to try and send a "count" param to update the progress bar Examples -------- >>> import phc.easy as phc >>> phc.Auth.set({ 'account': '<your-account-name>' }) >>> phc.Project.set_current('My Project Name') >>> phc.Query.execute_paging_api( "genomics/projects/:project_id/tests", params={ "patientId": "<patient-uuid>" } ) """ auth = Auth(auth_args) params = clean_params(params) # Do not pull project_id if not in URL (which throws error if project not selected) if "project_id" in path: path = path.replace(":project_id", auth.project_id) query = {"path": path, "method": http_verb, "params": params} if all_results and page_size is None: # Default to 100 if not provided but getting all results page_size = 100 if log: print(json.dumps(query, indent=4)) use_cache = ((not ignore_cache) and (not raw) and all_results and (max_pages is None)) if use_cache and APICache.does_cache_for_query_exist(query): return APICache.load_cache_for_query(query) callback = (APICache.build_cache_callback( query, transform, nested_key=None) if use_cache else None) results = with_progress( lambda: (progress if progress is not None else tqdm()) if show_progress else None, lambda progress: recursive_paging_api_call( path, params=params, http_verb=http_verb, callback=callback, scroll=all_results or (max_pages is not None), max_pages=max_pages, page_size=page_size, log=log, auth_args=auth_args, progress=progress, item_key=item_key, try_count=try_count, ), ) df = pd.DataFrame(results) if raw: return df return transform(df)