def _load_examples_from_file(): """Loads the examples from a gzipped-ndjson file.""" for file in os.listdir(FLAGS.src_folder): if not file.endswith('.gz'): continue with gzip.open(os.path.join(FLAGS.src_folder, file), 'r') as f: for line in f: yield features.build_example(json.loads(line))
def main(data, context): """Extracts features from a patient bundle for online prediction. This process is broken down into a few steps: 1. Fetch the Resource we get triggered on, and fetch/extract the patient that it is related to. 2. Fetch everything for the patient from step 1, and extract the features we are interested in. 3. Send the features to Cloud ML for online prediction, and write the results back to the FHIR store. Args: data (dict): Cloud PubSub payload. The `data` field is what we are looking for. context (google.cloud.functions.Context): Metadata for the event. """ if 'data' not in data: LOGGER.info('`data` field is not present, skipping...') return resource_name = base64.b64decode(data['data']).decode('utf-8') if (utils.CONDITION_TYPE not in resource_name and utils.PATIENT_TYPE not in resource_name and utils.OBSERVATION_TYPE not in resource_name): LOGGER.info('Skipping resource %s which is irrelevant for prediction.', resource_name) return credentials, _ = google.auth.default() http = AuthorizedHttp(credentials) resource = get_resource(http, resource_name) if resource is None: return patient = get_corresponding_patient(http, resource_name, resource) if patient is None: LOGGER.error('Could not find corresponding patient in resource %s', resource_name) return project_id, location, dataset_id, fhir_store_id, _ = _parse_resource_name( resource_name) patient_id = 'Patient/{}'.format(patient['id']) patient_name = _construct_resource_name(project_id, location, dataset_id, fhir_store_id, patient_id) patient_bundle = get_patient_everything(http, patient_name) if patient_bundle is None: return predictions = predict(features.build_example(patient_bundle)) if predictions is None: return action = get_action(data) create_or_update_risk_assessment(http, patient_name, predictions, action)
def _load_examples_from_gcs(): """Downloads the examples from a GCS bucket.""" client = storage.Client() bucket = storage.Bucket(client, FLAGS.src_bucket) for blob in bucket.list_blobs(prefix=FLAGS.src_folder): if not blob.name.endswith('.gz'): continue print('Downloading patient record file', blob.name) with tempfile.NamedTemporaryFile() as compressed_f: blob.download_to_filename(compressed_f.name) print('Building TF records') with gzip.open(compressed_f.name, 'r') as f: for line in f: yield features.build_example(json.loads(line.decode('utf-8')))