def run( file_id: str, auth_args: Auth = Auth.shared(), pause_time=1, **document_kw_args, ): """Run PrecisionOCR on a specific file id Returns the DocumentReference """ auth = Auth(auth_args) client = BaseClient(auth.session()) response = client._api_call( "ocr/documents", json={ "project": auth.project_id, "fileId": file_id }, ) document_reference_id = response.data["documentReferenceId"] # Unfortunately, we just have to wait for it to be in FSS sleep(pause_time) return Document.get(id=document_reference_id, auth_args=auth_args, **document_kw_args)
def delete(id: str, auth_args: Auth = Auth.shared()): auth = Auth(auth_args) client = BaseClient(auth.session()) return client._api_call( f"ocr/fhir/projects/{auth.project_id}/documentReferences/{id}", http_verb="DELETE", )
def create(config: OcrConfig, auth_args: Auth = Auth.shared()): auth = Auth(auth_args) client = BaseClient(auth.session()) return client._api_call( "ocr/config", json={ "project": auth.project_id, "config": json.loads(config.json(exclude_none=True)), }, ).data
def get_data_frame(auth_args: Auth = Auth.shared()): auth = Auth(auth_args) client = BaseClient(auth.session()) response = client._api_call( "knowledge/gene-sets", http_verb="GET", params={"datasetId": auth.project_id}, ) frame = pd.DataFrame(response.data["items"]) if "genes" in frame.columns: frame["genes"] = frame.genes.apply( lambda genes: ",".join([d["gene"] for d in genes])) frame = frame.drop(["datasetId"], errors="ignore") return frame
def get_data_frame(search: str = "", auth_args: Auth = Auth.shared()): auth = Auth(auth_args) client = BaseClient(auth.session()) response = client._api_call( "knowledge/genes", http_verb="GET", params={"datasetId": auth.project_id, "gene": search}, ) frame = pd.DataFrame(response.data["items"]) if "alias" in frame.columns: frame["alias"] = frame.alias.apply( lambda aliases: ",".join(aliases) if isinstance(aliases, list) else None ) # We choose to not expand topCancerDrivers and cancerDrivers since it # can easily have 50 values in each. If we really need those, the user # will have to extract those. return frame
def recursive_paging_api_call( path: str, params: dict = {}, http_verb: str = "GET", scroll: bool = False, progress: Optional[tqdm] = None, auth_args: Optional[Auth] = Auth.shared(), callback: Union[Callable[[Any, bool], None], None] = None, max_pages: Optional[int] = None, page_size: Optional[int] = None, log: bool = False, _current_page: int = 1, _prev_results: List[dict] = [], _next_page_token: Optional[str] = None, _count: Optional[Union[float, int]] = None, ): auth = Auth(auth_args) client = BaseClient(auth.session()) if _next_page_token: params = {**params, "nextPageToken": _next_page_token} if page_size: params = {**params, "pageSize": page_size} # NOTE: Parallelism is kept with execute_fhir_dsl to unify the API calls if scroll is False: max_pages = 1 # Compute count and add to progress if _count is None and len(_prev_results) == 0: count_response = client._api_call( path, http_verb=http_verb, # Use minimum pageSize in case this endpoint doesn't support count params={ **params, "include": "count", "pageSize": 1 }, ) _count = count_response.get("count") # Count appears to only go up to 999 if _count == 999: print(f"Results are {_count}+.") _count = None if _count and (progress is not None): progress.reset(_count) response = client._api_call(path, http_verb=http_verb, params=params) current_results = response.data.get("items", []) if progress is not None: progress.update(len(current_results)) is_last_batch = ( (scroll is False) or ((max_pages is not None) and (_current_page >= max_pages)) # Using the next link is the only completely reliable way to tell if a # next page exists or (response.data.get("links", {}).get("next") is None)) results = [] if callback else [*_prev_results, *current_results] # Sometimes the count doesn't match the results. We make it sync up if the # count doesn't match but we got all results. # TODO: Remove this when API fixed if ((progress is not None) and scroll and is_last_batch and (progress.total != progress.n)): count = progress.n progress.reset(count) progress.update(count) if callback and not is_last_batch: callback(current_results, False) elif callback and is_last_batch: return callback(current_results, True) elif is_last_batch: if progress is not None: progress.close() # Because count is often wrong, we'll skip the logging here # TODO: Uncomment this when API fixed # print( # f"Retrieved {len(results)}{f'/{_count}' if _count else ''} results" # ) return results return recursive_paging_api_call( path, params=params, http_verb=http_verb, progress=progress, auth_args=auth_args, callback=callback, max_pages=max_pages, page_size=page_size, log=log, scroll=scroll, _current_page=_current_page + 1, _prev_results=results, _next_page_token=get_next_page_token( response.data.get("links", {}).get("next", "")), _count=_count, )
def get(auth_args: Auth = Auth.shared()): auth = Auth(auth_args) client = BaseClient(auth.session()) return client._api_call(f"ocr/config/{auth.project_id}", http_verb="GET").data