def redcap_registration_complete(redcap_record: dict) -> bool: """ Returns True if a given *redcap_record* shows a participant has completed the enrollment surveys. Otherwise, returns False. >>> redcap_registration_complete(None) False >>> redcap_registration_complete({}) False >>> redcap_registration_complete({ \ 'eligibility_screening_complete': '1', \ 'consent_form_complete': '2', \ 'enrollment_questionnaire_complete': '0'}) False >>> redcap_registration_complete({ \ 'eligibility_screening_complete': '2', \ 'consent_form_complete': '2', \ 'enrollment_questionnaire_complete': '1'}) False >>> redcap_registration_complete({ \ 'eligibility_screening_complete': '2', \ 'consent_form_complete': '2', \ 'enrollment_questionnaire_complete': '2'}) True """ if not redcap_record: return False return (is_complete('eligibility_screening', redcap_record) and \ is_complete('consent_form', redcap_record) and \ is_complete('enrollment_questionnaire', redcap_record))
def collection_date(record: dict) -> Optional[str]: """ Determine sample/specimen collection date from the given REDCap *record*. """ # The back_end_mail_scans is filled out by the logistics team for shipping. # It is only used for mail-in samples. back_end_complete = is_complete('back_end_mail_scans', record) if back_end_complete is None: # An in-person/kiosk record. return record.get("nasal_swab_q") else: # A mail-in record. # Older records pre-dating the scan_kit_reg instrument may have a value # in the collection_date field from PCDEQC. The field stopped being # used on 22 July 2020. if record.get("collection_date"): return record.get("collection_date") elif is_complete('scan_kit_reg', record): return (record.get("date_on_tube") or record.get("kit_reg_date")) else: return (record.get("date_on_tube") or record.get("back_end_scan_date"))
def get_encounter_date(record: REDCapRecord, event_type: EventType) -> Optional[str]: # First try the attestation_date # from the daily attestation survey then try nasal_swab_timestamp from # the kiosk registration and finally the swab-and-send order date. # For all surveys, try the survey _timestamp field (which is in Pacific time) # before custom fields because the custom fields aren't always populated and when # they are populated they use the browser's time zone. # testing_determination_internal is not enabled as a survey, but we attempt to get its # timestamp just in case it ever is enabled as a survey. encounter_date = None if event_type == EventType.ENCOUNTER: encounter_date = extract_date_from_survey_timestamp(record, 'daily_attestation') \ or record.get('attestation_date') \ or extract_date_from_survey_timestamp(record, 'kiosk_registration_4c7f') \ or (record.get('nasal_swab_timestamp') and datetime.strptime(record.get('nasal_swab_timestamp'), '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d')) \ or extract_date_from_survey_timestamp(record, 'test_order_survey') \ or (record.get('time_test_order') and datetime.strptime(record.get('time_test_order'), '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d')) \ or extract_date_from_survey_timestamp(record, 'testing_determination_internal') \ or record.get('testing_date') # We have seen cases when the `attestation_date` is not getting set # by REDCap in the `daily_attesation` instrument. Here, we get the date # based on the instance ID of the `daily_attestation` instrument. It's safe # to do this because the Musher computes the instance from the date. if encounter_date is None and is_complete('daily_attestation', record) and record.repeat_instance: encounter_date = get_date_from_repeat_instance(record.repeat_instance) elif event_type == EventType.ENROLLMENT: encounter_date = extract_date_from_survey_timestamp(record, 'enrollment_questionnaire') \ or record.get('enrollment_date') return encounter_date
def extract_date_from_survey_timestamp(record: REDCapRecord, survey_name: str) -> Optional[str]: """ Extracts as a string the date component of the *survey_name* REDCap survey timestamp, the system timestamp that is captured automatically and is not dependent on the client. This timestamp is in local (Pacific) time. The timestamp will be populated only if the instrument was filled out as a survey. The timestamp field cannot be set via a REDCap data import. """ if record and survey_name and is_complete( survey_name, record) and record.get(f'{survey_name}_timestamp'): return datetime.strptime(record.get(f'{survey_name}_timestamp'), '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d') return None
def decorated(*args, **kwargs): redcap_record = kwargs["redcap_record"] incomplete_instruments = { instrument for instrument in required_instruments if not is_complete(instrument, redcap_record) } if incomplete_instruments: LOG.debug( f"The following required instruments «{incomplete_instruments}» are not yet marked complete." ) return None return routine(*args, **kwargs)
def redcap_det_scan(*, db: DatabaseSession, cache: TTLCache, det: dict, redcap_record: REDCapRecord) -> Optional[dict]: # Add check for `enrollment_questionnaire` is complete because we cannot # include it in the top list of REQUIRED_INSTRUMENTS since the new # SCAN In-Person Enrollment project does not have this instrument. # -Jover, 17 July 2020 if is_complete('enrollment_questionnaire', redcap_record) == False: LOG.debug( "Skipping enrollment with incomplete `enrollment_questionnaire` instrument" ) return None # If the `location_type` field exists, but is not filled in (empty string) # then skip record. Added check because the new SCAN Husky Project # has a different survey flow where participants are asked to fill in their # consent & illness questionnaire online before the doing the in-person # swab. We don't know the site of the swab until this field is completed in # person, so `location_type` is recorded in the nasal_swab_collection # instrument which is completed at the time of the swab. # We cannot check nasal_swab_collection is complete because it exists in # other projects and would delay ingestion of from them. # -Jover, 04 September 2020 if redcap_record.get('location_type') == '': LOG.debug("Skipping enrollment without completed `location_type`") return None # Skip record if the illness_questionnaire is not complete, because this is # a "false" enrollment where the participant was not mailed a swab kit. # We must verify illness_questionnaire with the `illness_q_date` field # since there is a bug in REDCap that sometimes leaves the questionnaire marked incomplete/unverified. # We must have another check of the back_end_mail_scans because sometimes # the `illness_q_date` field is not filled in due to a bug in REDCap. # By verifying illness_questionnaire is complete first, we minimize the # delay in data ingestion since the back_end_mail_scans is completed the day after enrollment. # -Jover, 29 June 2020 # Add check for `illness_questionnaire` is complete because the new # SCAN In-Person Enrollment project does not have the `illness_q_date` field # and it does not have the `back_end_mail_scans` instrument. # -Jover, 16 July 2020 # Add check for `nasal_swab_collection` is complete because the new # SCAN Husky Test project does not have the `illness_questionnaire` instrument # nor the `back_end_mail_scans` instrument. if not (redcap_record.get('illness_q_date') or is_complete('illness_questionnaire', redcap_record) or is_complete('back_end_mail_scans', redcap_record) or is_complete('nasal_swab_collection', redcap_record)): LOG.debug("Skipping incomplete enrollment") return None site_reference = create_site_reference(redcap_record) location_resource_entries = locations(db, cache, redcap_record) patient_entry, patient_reference = create_patient(redcap_record) if not patient_entry: LOG.warning( "Skipping enrollment with insufficient information to construct patient" ) return None initial_encounter_entry, initial_encounter_reference = create_initial_encounter( redcap_record, patient_reference, site_reference, location_resource_entries) if not initial_encounter_entry: LOG.warning( "Skipping enrollment with insufficient information to construct a initial encounter" ) return None initial_questionnaire_entry = create_initial_questionnaire_response( redcap_record, patient_reference, initial_encounter_reference) specimen_entry = None specimen_observation_entry = None specimen_received = is_complete('post_collection_data_entry_qc', redcap_record) # Mail in SCAN projects have `post_collection_data_entry_qc` instrument to # indicate a specimen is received. The SCAN In-Person Enrollmen project # and SCAN Husky project only uses this instrument to mark "never-tested". # So we rely on `nasal_swab_collection` instrument to know that we have # sample data to ingest. # Only rely on `nasal_swab_collection` if the `back_end_mail_scans` instrument # does not exist in the record, i.e. the record is from a kiosk project. # -Jover, 09 September 2020 if not specimen_received and is_complete('back_end_mail_scans', redcap_record) is None: specimen_received = is_complete('nasal_swab_collection', redcap_record) if specimen_received: specimen_entry, specimen_reference = create_specimen( redcap_record, patient_reference) specimen_observation_entry = create_specimen_observation_entry( specimen_reference, patient_reference, initial_encounter_reference) else: LOG.info("Creating encounter for record without sample") if specimen_received and not specimen_entry: LOG.warning( "Skipping enrollment with insufficent information to construct a specimen" ) return None follow_up_encounter_entry = None follow_up_questionnaire_entry = None if is_complete('day_7_follow_up', redcap_record): # Follow-up encounter for 7 day follow-up survey follow_up_encounter_entry, follow_up_encounter_reference = create_follow_up_encounter( redcap_record, patient_reference, site_reference, initial_encounter_reference) follow_up_questionnaire_entry = create_follow_up_questionnaire_response( redcap_record, patient_reference, follow_up_encounter_reference) resource_entries = [ patient_entry, initial_encounter_entry, initial_questionnaire_entry, specimen_entry, *location_resource_entries, specimen_observation_entry, follow_up_encounter_entry, follow_up_questionnaire_entry ] return create_bundle_resource( bundle_id=str(uuid4()), timestamp=datetime.now().astimezone().isoformat(), source= f"{REDCAP_URL}{redcap_record.project.id}/{redcap_record['record_id']}", entries=list(filter(None, resource_entries)))
def generate(record_ids: List[str], project_id: int, token_name: str, since_date: str, until_date: str, instruments: List[str], include_incomplete: bool): """ Generate DET notifications for REDCap records. Specify one or more record ids to only consider those records. If no record ids are given, then all records (or all records matching the date filters) are considered. The REDCap API does not support combining a list of specific record ids with date filters, so this command does not either. Requires environmental variables REDCAP_API_URL and REDCAP_API_TOKEN (or whatever you passed to --token-name). DET notifications are output for all completed instruments for each record by default. Pass --include-incomplete to output DET notifications for incomplete and unverified instruments too. Pass one or more --instrument options to limit output to specific instrument names. All DET notifications are output to stdout as newline-delimited JSON records. You will likely want to redirect stdout to a file. """ api_token = os.environ[token_name] api_url = os.environ['REDCAP_API_URL'] project = Project(api_url, api_token, project_id) LOG.info(f"REDCap project #{project.id}: {project.title}") if bool(since_date or until_date) and bool(record_ids): raise click.UsageError( "The REDCap API does not support fetching records filtered by id *and* date." ) if since_date and until_date: LOG.debug( f"Getting all records that have been created/modified between {since_date} and {until_date}" ) elif since_date: LOG.debug( f"Getting all records that have been created/modified since {since_date}" ) elif until_date: LOG.debug( f"Getting all records that have been created/modified before {until_date}" ) elif record_ids: LOG.debug(f"Getting specified records: {record_ids}") else: LOG.debug(f"Getting all records") records = project.records(since_date=since_date, until_date=until_date, ids=record_ids or None, raw=True) if instruments: LOG.debug( f"Producing DET notifications for the following {'instruments' if include_incomplete else 'complete instruments'}: {instruments}" ) else: LOG.debug( f"Producing DET notifications for all {'instruments' if include_incomplete else 'complete instruments'} ({project.instruments})" ) instruments = project.instruments unknown_instruments = set(instruments) - set(project.instruments) assert not unknown_instruments, \ f"The following --instrument names aren't in the REDCap project: {unknown_instruments}" for record in records: for instrument in instruments: if include_incomplete or is_complete(instrument, record): print(as_json(create_det_records(project, record, instrument)))
def generate(record_ids: List[str], api_url: str, project_id: int, token: str, since_date: str, until_date: str, instruments: List[str], events: List[str], include_incomplete: bool): """ Generate DET notifications for REDCap records. Specify one or more record ids to only consider those records. If no record ids are given, then all records (or all records matching the date filters) are considered. The REDCap API does not support combining a list of specific record ids with date filters, so this command does not either. DET notifications are output for all completed instruments for each record by default. Pass --include-incomplete to output DET notifications for incomplete and unverified instruments too. Pass one or more --instrument options to limit output to specific instrument names. Pass one or more --event options to limit output to specific event names. All DET notifications are output to stdout as newline-delimited JSON records. You will likely want to redirect stdout to a file. """ api_token = os.environ[token] if token else None project = Project(api_url, project_id, token=api_token) LOG.info(f"REDCap project #{project.id}: {project.title}") if bool(since_date or until_date) and bool(record_ids): raise click.UsageError( "The REDCap API does not support fetching records filtered by id *and* date." ) if since_date and until_date: LOG.debug( f"Getting all records that have been created/modified between {since_date} and {until_date}" ) elif since_date: LOG.debug( f"Getting all records that have been created/modified since {since_date}" ) elif until_date: LOG.debug( f"Getting all records that have been created/modified before {until_date}" ) elif record_ids: LOG.debug(f"Getting specified records: {record_ids}") else: LOG.debug(f"Getting all records") if events: LOG.debug( f"Producing DET notifications for the following events: {events}") assert_known_attribute_value(project, 'events', events, 'event') else: LOG.debug( f"Producing DET notifications for all events ({project.events})") events = project.events if instruments: LOG.debug( f"Producing DET notifications for the following {'instruments' if include_incomplete else 'complete instruments'}: {instruments}" ) assert_known_attribute_value(project, 'instruments', instruments, 'instrument') else: LOG.debug( f"Producing DET notifications for all {'instruments' if include_incomplete else 'complete instruments'} ({project.instruments})" ) instruments = project.instruments fields = [ project.record_id_field, *map(completion_status_field, instruments), ] records = project.records(since_date=since_date, until_date=until_date, ids=record_ids or None, fields=fields, events=events, raw=True) for record in records: for instrument in instruments: if include_incomplete or is_complete(instrument, record): print(as_json(det(project, record, instrument)))
def decorated(*args, db: DatabaseSession, log_output: bool, det_limit: int = None, redcap_api_batch_size: int, geocoding_cache: str = None, **kwargs): LOG.debug( f"Starting the REDCap DET ETL routine {name}, revision {revision}" ) project = Project(redcap_url, project_id) if det_limit: LOG.debug(f"Processing up to {det_limit:,} pending DETs") limit = sql.Literal(det_limit) else: LOG.debug(f"Processing all pending DETs") limit = sql.SQL("all") redcap_det = db.cursor(f"redcap-det {name}") redcap_det.execute( sql.SQL(""" select redcap_det_id as id, document from receiving.redcap_det where not processing_log @> %s and document::jsonb @> %s order by id limit {} for update """).format(limit), (Json([etl_id]), Json(det_contains))) # First loop of the DETs to determine how to process each one. # Uses `first_complete_dets` to keep track of which DET to # use to process a unique REDCap record. # Uses `all_dets` to keep track of the status for each DET record # so that they can be processed in order of `redcap_det_id` later. # --Jover, 21 May 2020 first_complete_dets: Dict[str, Any] = {} all_dets: List[Dict[str, str]] = [] for det in redcap_det: instrument = det.document['instrument'] record_id = det.document['record'] # Assume we are loading all DETs # Status will be updated to "skip" if DET does not need to be processed det_record = {"id": det.id, "status": "load"} # Only pull REDCap record if # `include_incomplete` flag was not included and # the current instrument is complete if not include_incomplete and not is_complete( instrument, det.document): det_record.update({ "status": "skip", "reason": "incomplete/unverified DET" }) # Check if this is record has an older DET # Skip latest DET in favor of the first DET # This is done to continue our first-in-first-out # semantics of our receiving tables elif first_complete_dets.get(record_id): det_record.update({ "status": "skip", "reason": "repeat REDCap record" }) else: first_complete_dets[record_id] = det det_record["record_id"] = record_id all_dets.append(det_record) if not first_complete_dets: LOG.info("No new complete DETs found.") else: # Batch request records from REDCap LOG.info(f"Fetching REDCap project {project_id}") record_ids = list(first_complete_dets.keys()) LOG.info( f"Fetching {len(record_ids):,} REDCap records from project {project.id}" ) # Convert list of REDCap records to a dict so that # records can be looked up by record id. # Records with repeating instruments or longitudinal # events will have multiple entries in the list. redcap_records: DefaultDict[str, List[dict]] = defaultdict(list) batches = list(chunked(record_ids, redcap_api_batch_size)) for i, batch in enumerate(batches, 1): LOG.info( f"Fetching REDCap record batch {i:,}/{len(batches):,} of size {len(batch):,}" ) for record in project.records(ids=batch, raw=raw_coded_values): redcap_records[record.id].append(record) # Process all DETs in order of redcap_det_id with pickled_cache(geocoding_cache) as cache: for det in all_dets: with db.savepoint(f"redcap_det {det['id']}"): LOG.info(f"Processing REDCap DET {det['id']}") if det["status"] == "skip": LOG.debug( f"Skipping REDCap DET {det['id']} due to {det['reason']}" ) mark_skipped(db, det["id"], etl_id, det["reason"]) continue received_det = first_complete_dets.pop( det["record_id"]) redcap_record_instances = redcap_records.get( received_det.document["record"]) if not redcap_record_instances: LOG.debug( f"REDCap record is missing or invalid. Skipping REDCap DET {received_det.id}" ) mark_skipped(db, received_det.id, etl_id, "invalid REDCap record") continue bundle = routine( db=db, cache=cache, det=received_det, redcap_record_instances=redcap_record_instances) if not bundle: LOG.debug( f"Skipping REDCap DET {received_det.id} due to insufficient data in REDCap record." ) mark_skipped(db, received_det.id, etl_id, "insufficient data in record") continue if log_output: print(as_json(bundle)) insert_fhir_bundle(db, bundle) mark_loaded(db, received_det.id, etl_id, bundle['id'])
def redcap_det_uw_reopening(*, db: DatabaseSession, cache: TTLCache, det: dict, redcap_record_instances: List[REDCapRecord]) -> Optional[dict]: if redcap_record_instances is None or len(redcap_record_instances) == 0: LOG.warning(f"There are no record instances. Skipping record.") return None enrollments = [record for record in redcap_record_instances if record.event_name == ENROLLMENT_EVENT_NAME] if not len(enrollments) == 1: LOG.warning(f"There are {len(enrollments)} enrollment instances for record: {redcap_record_instances[0].get('record_id')}. Skipping record.") return None enrollment = enrollments[0] incomplete_enrollment_instruments = { instrument for instrument in REQUIRED_ENROLLMENT_INSTRUMENTS if not is_complete(instrument, enrollment) } if incomplete_enrollment_instruments: LOG.debug(f"The following required enrollment instruments «{incomplete_enrollment_instruments}» are not yet marked complete.") return None # If the participant's age < 18 ensure we have parental consent. if (enrollment['core_age_years'] == "" or int(enrollment['core_age_years']) < 18) and \ (is_complete('parental_consent_form', enrollment) == False or enrollment['signature_parent'] == ''): LOG.debug("The participant is < 18 years old and we do not have parental consent. Skipping record.") return None # Create the participant resource entry and reference. # Assumes that the project language is the participant's preferred language. netid = normalize_net_id(enrollment.get('netid')) if netid: patient_entry, patient_reference = create_patient_using_unique_identifier( sex = enrollment['core_sex'], preferred_language = LANGUAGE_CODE[enrollment.project.id], unique_identifier = netid, record = enrollment, system_identifier = INTERNAL_SYSTEM) else: patient_entry, patient_reference = create_patient_using_demographics( sex = enrollment['core_sex'], preferred_language = LANGUAGE_CODE[enrollment.project.id], first_name = enrollment['core_participant_first_name'], last_name = enrollment['core_participant_last_name'], birth_date = enrollment['core_birthdate'], zipcode = enrollment['core_zipcode'], record = enrollment, system_identifier = INTERNAL_SYSTEM) if not patient_entry: LOG.warning(f"Skipping record {enrollment.get('record_id')} with insufficient information to construct patient") return None birthdate = parse_date_from_string(enrollment.get('core_birthdate')) if not birthdate: LOG.warning(f"Record {enrollment.get('record_id')} has an invalid or missing `core_birthdate` value") location_resource_entries = build_residential_location_resources( db = db, cache = cache, housing_type = enrollment.get('core_housing_type'), primary_street_address = enrollment['core_home_street'], secondary_street_address = enrollment['core_apartment_number'], city = enrollment['core_home_city'], state = enrollment['core_home_state'], zipcode = enrollment['core_zipcode'], system_identifier = INTERNAL_SYSTEM) persisted_resource_entries = [patient_entry, *location_resource_entries] for redcap_record_instance in redcap_record_instances: event_type = None collection_method = None if redcap_record_instance.event_name == ENROLLMENT_EVENT_NAME: event_type = EventType.ENROLLMENT check_enrollment_data_quality(redcap_record_instance) elif redcap_record_instance.event_name == ENCOUNTER_EVENT_NAME: event_type = EventType.ENCOUNTER if is_complete('kiosk_registration_4c7f', redcap_record_instance): collection_method = CollectionMethod.KIOSK elif is_complete('test_order_survey', redcap_record_instance): collection_method = CollectionMethod.SWAB_AND_SEND else: LOG.info(f"Skipping event: {redcap_record_instance.event_name!r} for record " f"{redcap_record_instance.get('record_id')} because the event is not one " "that we process") continue # Skip an ENCOUNTER instance if we don't have the data we need to # create an encounter. if event_type == EventType.ENCOUNTER \ and not is_complete('daily_attestation', redcap_record_instance) \ and not collection_method \ and not redcap_record_instance['testing_date']: # from the 'Testing Determination - Internal' instrument LOG.debug("Skipping record instance with insufficient information to construct the initial encounter") continue # site_reference refers to where the sample was collected record_location = None if collection_method == CollectionMethod.KIOSK: record_location = redcap_record_instance.get('location_type') location_site_map = { 'bothell': 'UWBothell', 'odegaard': 'UWOdegaardLibrary', 'slu': 'UWSouthLakeUnion', 'tacoma': 'UWTacoma', 'uw_club': 'UWClub' } site_reference = create_site_reference( location = record_location, site_map = location_site_map, default_site = SWAB_AND_SEND_SITE, system_identifier = INTERNAL_SYSTEM) # Handle various symptoms. contained: List[dict] = [] diagnosis: List[dict] = [] # Map the various symptoms variables to their onset date. # For daily_symptoms_covid_like we don't know the actual onset date. The questions asks # "in the past 24 hours" if event_type == EventType.ENCOUNTER: symptom_onset_map = { 'daily_symptoms_covid_like': None, 'symptoms': redcap_record_instance['symptom_onset'], 'symptoms_kiosk': redcap_record_instance['symptom_duration_kiosk'], 'symptoms_swabsend': redcap_record_instance['symptom_duration_swabsend'] } elif event_type == EventType.ENROLLMENT: symptom_onset_map = {'symptoms_base': redcap_record_instance['symptom_onset_base']} contained, diagnosis = build_contained_and_diagnosis( patient_reference = patient_reference, record = redcap_record_instance, symptom_onset_map = symptom_onset_map, system_identifier = INTERNAL_SYSTEM) collection_code = None if event_type == EventType.ENROLLMENT or collection_method == CollectionMethod.SWAB_AND_SEND: collection_code = CollectionCode.HOME_HEALTH elif collection_method == CollectionMethod.KIOSK: collection_code = CollectionCode.FIELD encounter_date = get_encounter_date(redcap_record_instance, event_type) initial_encounter_entry, initial_encounter_reference = create_encounter( encounter_id = create_encounter_id(redcap_record_instance, False), encounter_date = encounter_date, patient_reference = patient_reference, site_reference = site_reference, locations = location_resource_entries, diagnosis = diagnosis, contained = contained, collection_code = collection_code, system_identifier = INTERNAL_SYSTEM, record = redcap_record_instance) # Skip the entire record if we can't create the enrollment encounter. # Otherwise, just skip the record instance. if not initial_encounter_entry: if event_type == EventType.ENROLLMENT: LOG.warning("Skipping record because we could not create the enrollment encounter for record: " f"{redcap_record_instance.get('record_id')}") return None else: LOG.warning("Skipping record instance with insufficient information to construct the initial encounter " f"for record: {redcap_record_instance.get('record_id')}, instance: " f"{redcap_record_instance.get('redcap_repeat_instance')}") continue specimen_entry = None specimen_observation_entry = None specimen_received = (collection_method == CollectionMethod.SWAB_AND_SEND and \ is_complete('post_collection_data_entry_qc', redcap_record_instance)) or \ (collection_method == CollectionMethod.KIOSK and \ is_complete('kiosk_registration_4c7f', redcap_record_instance)) if specimen_received: # Use barcode fields in this order. prioritized_barcodes = [ redcap_record_instance["collect_barcode_kiosk"], redcap_record_instance["return_utm_barcode"], redcap_record_instance["pre_scan_barcode"]] specimen_entry, specimen_reference = create_specimen( prioritized_barcodes = prioritized_barcodes, patient_reference = patient_reference, collection_date = get_collection_date(redcap_record_instance, collection_method), sample_received_time = redcap_record_instance['samp_process_date'], able_to_test = redcap_record_instance['able_to_test'], system_identifier = INTERNAL_SYSTEM) specimen_observation_entry = create_specimen_observation_entry( specimen_reference = specimen_reference, patient_reference = patient_reference, encounter_reference = initial_encounter_reference) else: LOG.info("Creating encounter for record instance without sample") if specimen_received and not specimen_entry: LOG.warning("Skipping record instance. We think the specimen was received, " "but we're unable to create the specimen_entry for record: " f"{redcap_record_instance.get('record_id')}, instance: {redcap_record_instance.get('redcap_repeat_instance')}" ) continue computed_questionnaire_entry = None enrollment_questionnaire_entry = None daily_questionnaire_entry = None testing_determination_internal_questionnaire_entry = None follow_up_encounter_entry = None follow_up_questionnaire_entry = None follow_up_computed_questionnaire_entry = None computed_questionnaire_entry = create_computed_questionnaire_response( redcap_record_instance, patient_reference, initial_encounter_reference, birthdate, parse_date_from_string(initial_encounter_entry['resource']['period']['start'])) if event_type == EventType.ENROLLMENT: enrollment_questionnaire_entry = create_enrollment_questionnaire_response( enrollment, patient_reference, initial_encounter_reference) else: testing_determination_internal_questionnaire_entry = \ create_testing_determination_internal_questionnaire_response( redcap_record_instance, patient_reference, initial_encounter_reference) daily_questionnaire_entry = \ create_daily_questionnaire_response( redcap_record_instance, patient_reference, initial_encounter_reference) if is_complete('week_followup', redcap_record_instance): # Don't set locations because the f/u survey doesn't ask for home address. follow_up_encounter_entry, follow_up_encounter_reference = create_encounter( encounter_id = create_encounter_id(redcap_record_instance, True), encounter_date = extract_date_from_survey_timestamp(redcap_record_instance, 'week_followup') \ or datetime.strptime(redcap_record_instance.get('fu_timestamp'), '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d'), patient_reference = patient_reference, site_reference = site_reference, collection_code = CollectionCode.HOME_HEALTH, parent_encounter_reference = initial_encounter_reference, encounter_reason_code = follow_up_encounter_reason_code(), encounter_identifier_suffix = "_follow_up", system_identifier = INTERNAL_SYSTEM, record = redcap_record_instance) follow_up_questionnaire_entry = create_follow_up_questionnaire_response( redcap_record_instance, patient_reference, follow_up_encounter_reference) follow_up_computed_questionnaire_entry = create_computed_questionnaire_response( redcap_record_instance, patient_reference, follow_up_encounter_reference, birthdate, parse_date_from_string(follow_up_encounter_entry['resource']['period']['start'])) current_instance_entries = [ initial_encounter_entry, computed_questionnaire_entry, enrollment_questionnaire_entry, testing_determination_internal_questionnaire_entry, daily_questionnaire_entry, specimen_entry, specimen_observation_entry, follow_up_encounter_entry, follow_up_questionnaire_entry, follow_up_computed_questionnaire_entry ] persisted_resource_entries.extend(list(filter(None, current_instance_entries))) return create_bundle_resource( bundle_id = str(uuid4()), timestamp = datetime.now().astimezone().isoformat(), source = f"{REDCAP_URL}{enrollment.project.id}/{enrollment.id}", entries = list(filter(None, persisted_resource_entries)) )
def max_instance(instrument: str, redcap_record: List[dict], since: int, complete: bool=True) -> Optional[int]: """ Returns the most recent instance number in a *redcap_record* on or after the given filter instance *since*. Filters also by events with an *instrument* marked according to the given variable *complete* (True filters for only completed instances, and False filters only for incomplete or unverified instances). The default value for *complete* is True. Returns None if no completed insrument is found. >>> max_instance('kiosk_registration_4c7f', [ \ {'redcap_repeat_instance': '1', 'kiosk_registration_4c7f_complete': '2'}], \ since=0) 1 >>> max_instance('kiosk_registration_4c7f', [ \ {'redcap_repeat_instance': '1', 'kiosk_registration_4c7f_complete': ''}, \ {'redcap_repeat_instance': '2', 'kiosk_registration_4c7f_complete': '1'}, \ {'redcap_repeat_instance': '3', 'kiosk_registration_4c7f_complete': '0'}], \ since=0) >>> max_instance('kiosk_registration_4c7f', [ \ {'redcap_repeat_instance': '1', 'kiosk_registration_4c7f_complete': ''}, \ {'redcap_repeat_instance': '2', 'kiosk_registration_4c7f_complete': '1'}, \ {'redcap_repeat_instance': '3', 'kiosk_registration_4c7f_complete': '0'}], \ since=0, complete=False) 3 >>> max_instance('kiosk_registration_4c7f', [ \ {'redcap_repeat_instance': '1', 'kiosk_registration_4c7f_complete': '2'}, \ {'redcap_repeat_instance': '2', 'kiosk_registration_4c7f_complete': '2'}, \ {'redcap_repeat_instance': '3', 'kiosk_registration_4c7f_complete': '0'}], \ since=2) 2 >>> max_instance('kiosk_registration_4c7f', [ \ {'redcap_repeat_instance': '1', 'kiosk_registration_4c7f_complete': '0'}, \ {'redcap_repeat_instance': '2', 'kiosk_registration_4c7f_complete': '0'}, \ {'redcap_repeat_instance': '3', 'kiosk_registration_4c7f_complete': '2'}], \ since=2, complete=False) 2 >>> max_instance('kiosk_registration_4c7f', [ \ {'redcap_repeat_instance': '1', 'kiosk_registration_4c7f_complete': '2'}, \ {'redcap_repeat_instance': '2', 'kiosk_registration_4c7f_complete': '2'}, \ {'redcap_repeat_instance': '3', 'kiosk_registration_4c7f_complete': '0'}], \ since=3) >>> max_instance('test_order_survey', [ \ {'redcap_repeat_instance': '1', 'test_order_survey_complete': '1', \ 'kiosk_registration_4c7f_complete': ''}, \ {'redcap_repeat_instance': '2', 'test_order_survey_complete': '', \ 'kiosk_registration_4c7f_complete': '2'}], \ since=0) """ events_instrument_complete = [ encounter for encounter in redcap_record if encounter[f"{instrument}_complete"] != '' and is_complete(instrument, encounter) == complete ] # Filter since the latest instance where testing was triggered. # If no instance exists, do not filter. Note: at this point in the code, we # already are only considering instances in the past week. if since is not None: events_instrument_complete = list(filter( lambda encounter: int(encounter['redcap_repeat_instance']) >= since, events_instrument_complete )) if not events_instrument_complete: return None return _max_instance(events_instrument_complete)
def decorated(*args, db: DatabaseSession, log_output: bool, **kwargs): LOG.debug( f"Starting the REDCap DET ETL routine {name}, revision {revision}" ) redcap_det = db.cursor(f"redcap-det {name}") redcap_det.execute( """ select redcap_det_id as id, document from receiving.redcap_det where not processing_log @> %s and document::jsonb @> %s order by id for update """, (Json([etl_id]), Json(det_contains))) with pickled_cache(CACHE_FILE) as cache: for det in redcap_det: with db.savepoint(f"redcap_det {det.id}"): LOG.info(f"Processing REDCap DET {det.id}") instrument = det.document['instrument'] # Only pull REDCap record if # `include_incomplete` flag was not included and # the current instrument is complete if not include_incomplete and not is_complete( instrument, det.document): LOG.debug( f"Skipping incomplete or unverified REDCap DET {det.id}" ) mark_skipped(db, det.id, etl_id) continue redcap_record = get_redcap_record_from_det( det.document, raw_coded_values) if not redcap_record: LOG.debug( f"REDCap record is missing or invalid. Skipping REDCap DET {det.id}" ) mark_skipped(db, det.id, etl_id) continue # Only process REDCap record if all required instruments are complete incomplete_instruments = { instrument for instrument in required_instruments if not is_complete(instrument, redcap_record) } if incomplete_instruments: LOG.debug(f"The following required instruments «{incomplete_instruments}» are not yet marked complete. " + \ f"Skipping REDCap DET {det.id}") mark_skipped(db, det.id, etl_id) continue bundle = routine(db=db, cache=cache, det=det, redcap_record=redcap_record) if not bundle: mark_skipped(db, det.id, etl_id) continue if log_output: print(as_json(bundle)) insert_fhir_bundle(db, bundle) mark_loaded(db, det.id, etl_id, bundle['id'])
def redcap_det_childcare( *, db: DatabaseSession, cache: TTLCache, det: dict, redcap_record_instances: List[REDCapRecord]) -> Optional[dict]: assert redcap_record_instances is not None and len(redcap_record_instances) > 0, \ 'The redcap_record_instances list was not populated.' enrollments = [record for record in redcap_record_instances if \ record.event_name.startswith(ENROLLMENT_EVENT_NAME_PREFIX)] assert len(enrollments) == 1, \ f'Record had {len(enrollments)} enrollments.' enrollment = enrollments[0] incomplete_enrollment_instruments = { instrument for instrument in REQUIRED_ENROLLMENT_INSTRUMENTS if not is_complete(instrument, enrollment) } if incomplete_enrollment_instruments: LOG.debug( f'The following required enrollment instruments «{incomplete_enrollment_instruments}» are not yet marked complete.' ) return None # If the participant's age < 18 ensure we have parental consent. if (enrollment['core_age_years'] == "" or int(enrollment['core_age_years']) < 18) and \ enrollment['parent_signature'] == '': LOG.debug( "The participant is < 18 years old and we do not have parental consent. Skipping record." ) return None # Create the participant resource entry and reference. patient_entry, patient_reference = create_patient_using_demographics( sex='unknown', # Set to unknown so that we don't ingest identifiers preferred_language=enrollment.get('language'), first_name=enrollment['core_participant_first_name'], last_name=enrollment['core_participant_last_name'], birth_date=enrollment['core_birthdate'], zipcode=enrollment['core_zipcode'], record=enrollment, system_identifier=INTERNAL_SYSTEM) if not patient_entry: LOG.warning( 'Skipping record with insufficient information to construct patient' ) return None location_resource_entries = build_residential_location_resources( db=db, cache=cache, housing_type=enrollment.get('core_housing_type'), primary_street_address=enrollment['core_home_street'], secondary_street_address=enrollment['core_apartment_number'], city=enrollment['core_home_city'], state=enrollment['core_home_state'], zipcode=enrollment['core_zipcode'], system_identifier=INTERNAL_SYSTEM) persisted_resource_entries = [patient_entry, *location_resource_entries] childcare_center = enrollment['childcare_center'] for redcap_record_instance in redcap_record_instances: event_type = None study_arm = None if redcap_record_instance.event_name.startswith( ENROLLMENT_EVENT_NAME_PREFIX): event_type = EventType.ENROLLMENT elif redcap_record_instance.event_name.startswith(ENCOUNTER_EVENT_NAME_PREFIX) \ or redcap_record_instance.event_name == UNSCHEDULED_ENCOUNTER_EVENT_NAME: event_type = EventType.ENCOUNTER else: LOG.error( f'The record instance has an unexpected event name: {redcap_record_instance.event_name}' ) continue if '_arm_1' in redcap_record_instance.event_name: study_arm = StudyArm.PRIMARY elif '_arm_2' in redcap_record_instance.event_name: study_arm = StudyArm.SECONDARY else: LOG.error( f'The record instance has an unexpected study arm in the event name: {redcap_record_instance.event_name}' ) continue # Skip an ENCOUNTER instance if we don't have the data we need to # create an encounter. Require the participant to have provided # survey data or a sample. if event_type == EventType.ENCOUNTER \ and not is_complete('symptom_check', redcap_record_instance) \ and not is_complete('swab_kit_reg', redcap_record_instance) \ and not is_complete('post_collection_data_entry_qc', redcap_record_instance): LOG.debug( 'Skipping record instance with insufficient information to construct the encounter' ) continue # From this point on, log at the `warning` level if we have to skip the encounter. # That situation would be one we'd need to dig into. # Create the site reference for the encounter. For primary participants, use # a completed `return_pickup` survey to indicate that they are having their # sample picked up from home instead of returning it to a dropbox. site_map = { 'childcare_room_70th': SANDPOINT_SITE, 'childcare_room_radford': RADFORD_SITE, 'childcare_room_portage': PORTAGE_BAY_SITE, 'childcare_room_minor': MINOR_SITE, 'childcare_room_maintinytots': MAINTINYTOTS_SITE, 'childcare_room_easttinytots': EASTTINYTOTS_SITE, 'childcare_room_dlbeacon': DLBEACON_SITE, 'childcare_room_dlmag': DLMAG_SITE, 'childcare_room_mighty': MIGHTY_SITE, 'childcare_room_birch': BIRCH_SITE, 'childcare_room_mothers': MOTHERS_SITE, 'childcare_room_wcampus': UWCHILDRENS_WEST_SITE, 'childcare_room_laurel': UWCHILDRENS_LAUREL_SITE } location = None # No location will cause `create_site_reference` to use the `default_site` value. if study_arm == StudyArm.PRIMARY and event_type == EventType.ENCOUNTER and \ not is_complete('return_pickup', redcap_record_instance): location = childcare_center site_reference = create_site_reference( location=location, site_map=site_map, default_site=SWAB_AND_SEND_SITE, system_identifier=INTERNAL_SYSTEM) # Handle various symptoms. contained: List[dict] = [] diagnosis: List[dict] = [] # Map the various symptoms variables to their onset date. # The PRIMARY arm does not get the symptom survey at enrollment, # but the SECONDARY arm does. if event_type == EventType.ENCOUNTER or study_arm == StudyArm.SECONDARY: symptom_onset_map = { 'symptoms_check': redcap_record_instance['symptom_duration'], } contained, diagnosis = build_contained_and_diagnosis( patient_reference=patient_reference, record=redcap_record_instance, symptom_onset_map=symptom_onset_map, system_identifier=INTERNAL_SYSTEM) encounter_date = get_encounter_date(redcap_record_instance, event_type) if not encounter_date: LOG.warning( 'Skipping record instance because we could not create an encounter_date' ) continue encounter_entry, encounter_reference = create_encounter( encounter_id=create_encounter_id(redcap_record_instance), encounter_date=encounter_date, patient_reference=patient_reference, site_reference=site_reference, locations=location_resource_entries, diagnosis=diagnosis, contained=contained, collection_code=COLLECTION_CODE, system_identifier=INTERNAL_SYSTEM, record=redcap_record_instance) # Skip the entire record if we can't create the enrollment encounter. # Otherwise, just skip the record instance. if not encounter_entry: if event_type == EventType.ENROLLMENT: LOG.warning( 'Skipping record because we could not create the enrollment encounter' ) return None else: LOG.warning( 'Skipping record instance with insufficient information to construct the encounter' ) continue specimen_entry = None specimen_observation_entry = None specimen_received = is_complete('post_collection_data_entry_qc', redcap_record_instance) if specimen_received: # Use barcode fields in this order. prioritized_barcodes = [ redcap_record_instance[ 'return_utm_barcode'], # Post Collection Data Entry Qc redcap_record_instance['utm_tube_barcode'], # Scan Kit Reg redcap_record_instance[ 'pre_scan_barcode'] # Back End Mail Scans ] specimen_entry, specimen_reference = create_specimen( prioritized_barcodes=prioritized_barcodes, patient_reference=patient_reference, collection_date=get_collection_date(redcap_record_instance), sample_received_time=redcap_record_instance[ 'samp_process_date'], able_to_test=redcap_record_instance['able_to_test'], system_identifier=INTERNAL_SYSTEM) specimen_observation_entry = create_specimen_observation_entry( specimen_reference=specimen_reference, patient_reference=patient_reference, encounter_reference=encounter_reference) else: LOG.info('Creating encounter for record instance without sample') enrollment_questionnaire_entry = None encounter_questionnaire_entry = None operational_questionnaire_entry = None if event_type == EventType.ENROLLMENT: enrollment_questionnaire_entry = create_enrollment_questionnaire_response( enrollment, study_arm, patient_reference, encounter_reference) # The SECONDARY arm gets "encounter" surveys in the ENROLLMENT event. if event_type == EventType.ENCOUNTER or study_arm == StudyArm.SECONDARY: encounter_questionnaire_entry = create_encounter_questionnaire_response( redcap_record_instance, patient_reference, encounter_reference) operational_questionnaire_entry = create_operational_questionnaire_response( redcap_record_instance, patient_reference, encounter_reference) current_instance_entries = [ encounter_entry, enrollment_questionnaire_entry, encounter_questionnaire_entry, operational_questionnaire_entry, specimen_entry, specimen_observation_entry, ] persisted_resource_entries.extend( list(filter(None, current_instance_entries))) return create_bundle_resource( bundle_id=str(uuid4()), timestamp=datetime.now().astimezone().isoformat(), source=f'{REDCAP_URL}{enrollment.project.id}/{enrollment.id}', entries=list(filter(None, persisted_resource_entries)))