Esempio n. 1
0
def redcap_registration_complete(redcap_record: dict) -> bool:
    """
    Returns True if a given *redcap_record* shows a participant has completed
    the enrollment surveys. Otherwise, returns False.

    >>> redcap_registration_complete(None)
    False

    >>> redcap_registration_complete({})
    False

    >>> redcap_registration_complete({ \
        'eligibility_screening_complete': '1', \
        'consent_form_complete': '2', \
        'enrollment_questionnaire_complete': '0'})
    False

    >>> redcap_registration_complete({ \
        'eligibility_screening_complete': '2', \
        'consent_form_complete': '2', \
        'enrollment_questionnaire_complete': '1'})
    False

    >>> redcap_registration_complete({ \
        'eligibility_screening_complete': '2', \
        'consent_form_complete': '2', \
        'enrollment_questionnaire_complete': '2'})
    True
    """
    if not redcap_record:
        return False

    return (is_complete('eligibility_screening', redcap_record) and \
            is_complete('consent_form', redcap_record) and \
            is_complete('enrollment_questionnaire', redcap_record))
Esempio n. 2
0
def collection_date(record: dict) -> Optional[str]:
    """
    Determine sample/specimen collection date from the given REDCap *record*.
    """
    # The back_end_mail_scans is filled out by the logistics team for shipping.
    # It is only used for mail-in samples.
    back_end_complete = is_complete('back_end_mail_scans', record)

    if back_end_complete is None:
        # An in-person/kiosk record.
        return record.get("nasal_swab_q")

    else:
        # A mail-in record.

        # Older records pre-dating the scan_kit_reg instrument may have a value
        # in the collection_date field from PCDEQC.  The field stopped being
        # used on 22 July 2020.
        if record.get("collection_date"):
            return record.get("collection_date")

        elif is_complete('scan_kit_reg', record):
            return (record.get("date_on_tube") or record.get("kit_reg_date"))

        else:
            return (record.get("date_on_tube")
                    or record.get("back_end_scan_date"))
def get_encounter_date(record: REDCapRecord, event_type: EventType) -> Optional[str]:
    # First try the attestation_date
    # from the daily attestation survey then try nasal_swab_timestamp from
    # the kiosk registration and finally the swab-and-send order date.
    # For all surveys, try the survey _timestamp field (which is in Pacific time)
    # before custom fields because the custom fields aren't always populated and when
    # they are populated they use the browser's time zone.
    # testing_determination_internal is not enabled as a survey, but we attempt to get its
    # timestamp just in case it ever is enabled as a survey.
    encounter_date = None

    if event_type == EventType.ENCOUNTER:
        encounter_date = extract_date_from_survey_timestamp(record, 'daily_attestation') \
            or record.get('attestation_date') \
            or extract_date_from_survey_timestamp(record, 'kiosk_registration_4c7f') \
            or (record.get('nasal_swab_timestamp') and datetime.strptime(record.get('nasal_swab_timestamp'),
                '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d')) \
            or extract_date_from_survey_timestamp(record, 'test_order_survey') \
            or (record.get('time_test_order') and datetime.strptime(record.get('time_test_order'),
                '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d')) \
            or extract_date_from_survey_timestamp(record, 'testing_determination_internal') \
            or record.get('testing_date')

        # We have seen cases when the `attestation_date` is not getting set
        # by REDCap in the `daily_attesation` instrument. Here, we get the date
        # based on the instance ID of the `daily_attestation` instrument. It's safe
        # to do this because the Musher computes the instance from the date.
        if encounter_date is None and is_complete('daily_attestation', record) and record.repeat_instance:
            encounter_date = get_date_from_repeat_instance(record.repeat_instance)

    elif event_type == EventType.ENROLLMENT:
        encounter_date = extract_date_from_survey_timestamp(record, 'enrollment_questionnaire') \
            or record.get('enrollment_date')

    return encounter_date
Esempio n. 4
0
def extract_date_from_survey_timestamp(record: REDCapRecord,
                                       survey_name: str) -> Optional[str]:
    """
    Extracts as a string the date component of the *survey_name* REDCap survey timestamp, the system
    timestamp that is captured automatically and is not dependent on the client. This timestamp
    is in local (Pacific) time. The timestamp will be populated only if the instrument was filled out
    as a survey. The timestamp field cannot be set via a REDCap data import.
    """
    if record and survey_name and is_complete(
            survey_name, record) and record.get(f'{survey_name}_timestamp'):
        return datetime.strptime(record.get(f'{survey_name}_timestamp'),
                                 '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d')

    return None
Esempio n. 5
0
        def decorated(*args, **kwargs):
            redcap_record = kwargs["redcap_record"]

            incomplete_instruments = {
                instrument
                for instrument in required_instruments
                if not is_complete(instrument, redcap_record)
            }

            if incomplete_instruments:
                LOG.debug(
                    f"The following required instruments «{incomplete_instruments}» are not yet marked complete."
                )
                return None

            return routine(*args, **kwargs)
Esempio n. 6
0
def redcap_det_scan(*, db: DatabaseSession, cache: TTLCache, det: dict,
                    redcap_record: REDCapRecord) -> Optional[dict]:
    # Add check for `enrollment_questionnaire` is complete because we cannot
    # include it in the top list of REQUIRED_INSTRUMENTS since the new
    # SCAN In-Person Enrollment project does not have this instrument.
    #   -Jover, 17 July 2020
    if is_complete('enrollment_questionnaire', redcap_record) == False:
        LOG.debug(
            "Skipping enrollment with incomplete `enrollment_questionnaire` instrument"
        )
        return None

    # If the `location_type` field exists, but is not filled in (empty string)
    # then skip record. Added check because the new SCAN Husky Project
    # has a different survey flow where participants are asked to fill in their
    # consent & illness questionnaire online before the doing the in-person
    # swab. We don't know the site of the swab until this field is completed in
    # person, so `location_type` is recorded in the nasal_swab_collection
    # instrument which is completed at the time of the swab.
    # We cannot check nasal_swab_collection is complete because it exists in
    # other projects and would delay ingestion of from them.
    #   -Jover, 04 September 2020
    if redcap_record.get('location_type') == '':
        LOG.debug("Skipping enrollment without completed `location_type`")
        return None

    # Skip record if the illness_questionnaire is not complete, because this is
    # a "false" enrollment where the participant was not mailed a swab kit.
    # We must verify illness_questionnaire with the `illness_q_date` field
    # since there is a bug in REDCap that sometimes leaves the questionnaire marked incomplete/unverified.
    # We must have another check of the back_end_mail_scans because sometimes
    # the `illness_q_date` field is not filled in due to a bug in REDCap.
    # By verifying illness_questionnaire is complete first, we minimize the
    # delay in data ingestion since the back_end_mail_scans is completed the day after enrollment.
    #   -Jover, 29 June 2020

    # Add check for `illness_questionnaire` is complete because the new
    # SCAN In-Person Enrollment project does not have the `illness_q_date` field
    # and it does not have the `back_end_mail_scans` instrument.
    #   -Jover, 16 July 2020

    # Add check for `nasal_swab_collection` is complete because the new
    # SCAN Husky Test project does not have the `illness_questionnaire` instrument
    # nor the `back_end_mail_scans` instrument.
    if not (redcap_record.get('illness_q_date')
            or is_complete('illness_questionnaire', redcap_record)
            or is_complete('back_end_mail_scans', redcap_record)
            or is_complete('nasal_swab_collection', redcap_record)):
        LOG.debug("Skipping incomplete enrollment")
        return None

    site_reference = create_site_reference(redcap_record)
    location_resource_entries = locations(db, cache, redcap_record)
    patient_entry, patient_reference = create_patient(redcap_record)

    if not patient_entry:
        LOG.warning(
            "Skipping enrollment with insufficient information to construct patient"
        )
        return None

    initial_encounter_entry, initial_encounter_reference = create_initial_encounter(
        redcap_record, patient_reference, site_reference,
        location_resource_entries)

    if not initial_encounter_entry:
        LOG.warning(
            "Skipping enrollment with insufficient information to construct a initial encounter"
        )
        return None

    initial_questionnaire_entry = create_initial_questionnaire_response(
        redcap_record, patient_reference, initial_encounter_reference)

    specimen_entry = None
    specimen_observation_entry = None
    specimen_received = is_complete('post_collection_data_entry_qc',
                                    redcap_record)

    # Mail in SCAN projects have `post_collection_data_entry_qc` instrument to
    # indicate a specimen is received. The SCAN In-Person Enrollmen project
    # and SCAN Husky project only uses this instrument to mark "never-tested".
    # So we rely on `nasal_swab_collection` instrument to know that we have
    # sample data to ingest.
    # Only rely on `nasal_swab_collection` if the `back_end_mail_scans` instrument
    # does not exist in the record, i.e. the record is from a kiosk project.
    #   -Jover, 09 September 2020
    if not specimen_received and is_complete('back_end_mail_scans',
                                             redcap_record) is None:
        specimen_received = is_complete('nasal_swab_collection', redcap_record)

    if specimen_received:
        specimen_entry, specimen_reference = create_specimen(
            redcap_record, patient_reference)
        specimen_observation_entry = create_specimen_observation_entry(
            specimen_reference, patient_reference, initial_encounter_reference)
    else:
        LOG.info("Creating encounter for record without sample")

    if specimen_received and not specimen_entry:
        LOG.warning(
            "Skipping enrollment with insufficent information to construct a specimen"
        )
        return None

    follow_up_encounter_entry = None
    follow_up_questionnaire_entry = None

    if is_complete('day_7_follow_up', redcap_record):
        # Follow-up encounter for 7 day follow-up survey
        follow_up_encounter_entry, follow_up_encounter_reference = create_follow_up_encounter(
            redcap_record, patient_reference, site_reference,
            initial_encounter_reference)
        follow_up_questionnaire_entry = create_follow_up_questionnaire_response(
            redcap_record, patient_reference, follow_up_encounter_reference)

    resource_entries = [
        patient_entry, initial_encounter_entry, initial_questionnaire_entry,
        specimen_entry, *location_resource_entries, specimen_observation_entry,
        follow_up_encounter_entry, follow_up_questionnaire_entry
    ]

    return create_bundle_resource(
        bundle_id=str(uuid4()),
        timestamp=datetime.now().astimezone().isoformat(),
        source=
        f"{REDCAP_URL}{redcap_record.project.id}/{redcap_record['record_id']}",
        entries=list(filter(None, resource_entries)))
Esempio n. 7
0
def generate(record_ids: List[str], project_id: int, token_name: str,
             since_date: str, until_date: str, instruments: List[str],
             include_incomplete: bool):
    """
    Generate DET notifications for REDCap records.

    Specify one or more record ids to only consider those records.  If no
    record ids are given, then all records (or all records matching the date
    filters) are considered.  The REDCap API does not support combining a list
    of specific record ids with date filters, so this command does not either.

    Requires environmental variables REDCAP_API_URL and REDCAP_API_TOKEN (or
    whatever you passed to --token-name).

    DET notifications are output for all completed instruments for each record
    by default.  Pass --include-incomplete to output DET notifications for
    incomplete and unverified instruments too.  Pass one or more --instrument
    options to limit output to specific instrument names.

    All DET notifications are output to stdout as newline-delimited JSON
    records.  You will likely want to redirect stdout to a file.
    """
    api_token = os.environ[token_name]
    api_url = os.environ['REDCAP_API_URL']

    project = Project(api_url, api_token, project_id)

    LOG.info(f"REDCap project #{project.id}: {project.title}")

    if bool(since_date or until_date) and bool(record_ids):
        raise click.UsageError(
            "The REDCap API does not support fetching records filtered by id *and* date."
        )

    if since_date and until_date:
        LOG.debug(
            f"Getting all records that have been created/modified between {since_date} and {until_date}"
        )
    elif since_date:
        LOG.debug(
            f"Getting all records that have been created/modified since {since_date}"
        )
    elif until_date:
        LOG.debug(
            f"Getting all records that have been created/modified before {until_date}"
        )
    elif record_ids:
        LOG.debug(f"Getting specified records: {record_ids}")
    else:
        LOG.debug(f"Getting all records")

    records = project.records(since_date=since_date,
                              until_date=until_date,
                              ids=record_ids or None,
                              raw=True)

    if instruments:
        LOG.debug(
            f"Producing DET notifications for the following {'instruments' if include_incomplete else 'complete instruments'}: {instruments}"
        )
    else:
        LOG.debug(
            f"Producing DET notifications for all {'instruments' if include_incomplete else 'complete instruments'} ({project.instruments})"
        )
        instruments = project.instruments

    unknown_instruments = set(instruments) - set(project.instruments)

    assert not unknown_instruments, \
        f"The following --instrument names aren't in the REDCap project: {unknown_instruments}"

    for record in records:
        for instrument in instruments:
            if include_incomplete or is_complete(instrument, record):
                print(as_json(create_det_records(project, record, instrument)))
Esempio n. 8
0
def generate(record_ids: List[str], api_url: str, project_id: int, token: str,
             since_date: str, until_date: str, instruments: List[str],
             events: List[str], include_incomplete: bool):
    """
    Generate DET notifications for REDCap records.

    Specify one or more record ids to only consider those records.  If no
    record ids are given, then all records (or all records matching the date
    filters) are considered.  The REDCap API does not support combining a list
    of specific record ids with date filters, so this command does not either.

    DET notifications are output for all completed instruments for each record
    by default.  Pass --include-incomplete to output DET notifications for
    incomplete and unverified instruments too.  Pass one or more --instrument
    options to limit output to specific instrument names.  Pass one or more
    --event options to limit output to specific event names.

    All DET notifications are output to stdout as newline-delimited JSON
    records.  You will likely want to redirect stdout to a file.
    """
    api_token = os.environ[token] if token else None

    project = Project(api_url, project_id, token=api_token)

    LOG.info(f"REDCap project #{project.id}: {project.title}")

    if bool(since_date or until_date) and bool(record_ids):
        raise click.UsageError(
            "The REDCap API does not support fetching records filtered by id *and* date."
        )

    if since_date and until_date:
        LOG.debug(
            f"Getting all records that have been created/modified between {since_date} and {until_date}"
        )
    elif since_date:
        LOG.debug(
            f"Getting all records that have been created/modified since {since_date}"
        )
    elif until_date:
        LOG.debug(
            f"Getting all records that have been created/modified before {until_date}"
        )
    elif record_ids:
        LOG.debug(f"Getting specified records: {record_ids}")
    else:
        LOG.debug(f"Getting all records")

    if events:
        LOG.debug(
            f"Producing DET notifications for the following events: {events}")
        assert_known_attribute_value(project, 'events', events, 'event')
    else:
        LOG.debug(
            f"Producing DET notifications for all events ({project.events})")
        events = project.events

    if instruments:
        LOG.debug(
            f"Producing DET notifications for the following {'instruments' if include_incomplete else 'complete instruments'}: {instruments}"
        )
        assert_known_attribute_value(project, 'instruments', instruments,
                                     'instrument')
    else:
        LOG.debug(
            f"Producing DET notifications for all {'instruments' if include_incomplete else 'complete instruments'} ({project.instruments})"
        )
        instruments = project.instruments

    fields = [
        project.record_id_field,
        *map(completion_status_field, instruments),
    ]

    records = project.records(since_date=since_date,
                              until_date=until_date,
                              ids=record_ids or None,
                              fields=fields,
                              events=events,
                              raw=True)

    for record in records:
        for instrument in instruments:
            if include_incomplete or is_complete(instrument, record):
                print(as_json(det(project, record, instrument)))
Esempio n. 9
0
        def decorated(*args,
                      db: DatabaseSession,
                      log_output: bool,
                      det_limit: int = None,
                      redcap_api_batch_size: int,
                      geocoding_cache: str = None,
                      **kwargs):
            LOG.debug(
                f"Starting the REDCap DET ETL routine {name}, revision {revision}"
            )

            project = Project(redcap_url, project_id)

            if det_limit:
                LOG.debug(f"Processing up to {det_limit:,} pending DETs")
                limit = sql.Literal(det_limit)
            else:
                LOG.debug(f"Processing all pending DETs")
                limit = sql.SQL("all")

            redcap_det = db.cursor(f"redcap-det {name}")
            redcap_det.execute(
                sql.SQL("""
                select redcap_det_id as id, document
                  from receiving.redcap_det
                 where not processing_log @> %s
                   and document::jsonb @> %s
                 order by id
                 limit {}
                   for update
                """).format(limit), (Json([etl_id]), Json(det_contains)))

            # First loop of the DETs to determine how to process each one.
            # Uses `first_complete_dets` to keep track of which DET to
            # use to process a unique REDCap record.
            # Uses `all_dets` to keep track of the status for each DET record
            # so that they can be processed in order of `redcap_det_id` later.
            #   --Jover, 21 May 2020
            first_complete_dets: Dict[str, Any] = {}
            all_dets: List[Dict[str, str]] = []
            for det in redcap_det:
                instrument = det.document['instrument']
                record_id = det.document['record']
                # Assume we are loading all DETs
                # Status will be updated to "skip" if DET does not need to be processed
                det_record = {"id": det.id, "status": "load"}

                # Only pull REDCap record if
                # `include_incomplete` flag was not included and
                # the current instrument is complete
                if not include_incomplete and not is_complete(
                        instrument, det.document):
                    det_record.update({
                        "status": "skip",
                        "reason": "incomplete/unverified DET"
                    })

                # Check if this is record has an older DET
                # Skip latest DET in favor of the first DET
                # This is done to continue our first-in-first-out
                # semantics of our receiving tables
                elif first_complete_dets.get(record_id):
                    det_record.update({
                        "status": "skip",
                        "reason": "repeat REDCap record"
                    })

                else:
                    first_complete_dets[record_id] = det
                    det_record["record_id"] = record_id

                all_dets.append(det_record)

            if not first_complete_dets:
                LOG.info("No new complete DETs found.")
            else:
                # Batch request records from REDCap
                LOG.info(f"Fetching REDCap project {project_id}")
                record_ids = list(first_complete_dets.keys())

                LOG.info(
                    f"Fetching {len(record_ids):,} REDCap records from project {project.id}"
                )

                # Convert list of REDCap records to a dict so that
                # records can be looked up by record id.
                # Records with repeating instruments or longitudinal
                # events will have multiple entries in the list.
                redcap_records: DefaultDict[str,
                                            List[dict]] = defaultdict(list)

                batches = list(chunked(record_ids, redcap_api_batch_size))

                for i, batch in enumerate(batches, 1):
                    LOG.info(
                        f"Fetching REDCap record batch {i:,}/{len(batches):,} of size {len(batch):,}"
                    )

                    for record in project.records(ids=batch,
                                                  raw=raw_coded_values):
                        redcap_records[record.id].append(record)

            # Process all DETs in order of redcap_det_id
            with pickled_cache(geocoding_cache) as cache:
                for det in all_dets:
                    with db.savepoint(f"redcap_det {det['id']}"):
                        LOG.info(f"Processing REDCap DET {det['id']}")

                        if det["status"] == "skip":
                            LOG.debug(
                                f"Skipping REDCap DET {det['id']} due to {det['reason']}"
                            )
                            mark_skipped(db, det["id"], etl_id, det["reason"])
                            continue

                        received_det = first_complete_dets.pop(
                            det["record_id"])
                        redcap_record_instances = redcap_records.get(
                            received_det.document["record"])

                        if not redcap_record_instances:
                            LOG.debug(
                                f"REDCap record is missing or invalid.  Skipping REDCap DET {received_det.id}"
                            )
                            mark_skipped(db, received_det.id, etl_id,
                                         "invalid REDCap record")
                            continue

                        bundle = routine(
                            db=db,
                            cache=cache,
                            det=received_det,
                            redcap_record_instances=redcap_record_instances)

                        if not bundle:
                            LOG.debug(
                                f"Skipping REDCap DET {received_det.id} due to insufficient data in REDCap record."
                            )
                            mark_skipped(db, received_det.id, etl_id,
                                         "insufficient data in record")
                            continue

                        if log_output:
                            print(as_json(bundle))

                        insert_fhir_bundle(db, bundle)
                        mark_loaded(db, received_det.id, etl_id, bundle['id'])
def redcap_det_uw_reopening(*, db: DatabaseSession, cache: TTLCache, det: dict,
    redcap_record_instances: List[REDCapRecord]) -> Optional[dict]:

    if redcap_record_instances is None or len(redcap_record_instances) == 0:
        LOG.warning(f"There are no record instances. Skipping record.")
        return None

    enrollments = [record for record in redcap_record_instances if record.event_name == ENROLLMENT_EVENT_NAME]

    if not len(enrollments) == 1:
        LOG.warning(f"There are {len(enrollments)} enrollment instances for record: {redcap_record_instances[0].get('record_id')}. Skipping record.")
        return None

    enrollment = enrollments[0]

    incomplete_enrollment_instruments = {
                instrument
                    for instrument
                    in REQUIRED_ENROLLMENT_INSTRUMENTS
                    if not is_complete(instrument, enrollment)
            }

    if incomplete_enrollment_instruments:
        LOG.debug(f"The following required enrollment instruments «{incomplete_enrollment_instruments}» are not yet marked complete.")
        return None

    # If the participant's age < 18 ensure we have parental consent.
    if (enrollment['core_age_years'] == "" or int(enrollment['core_age_years']) < 18) and \
            (is_complete('parental_consent_form', enrollment) == False or enrollment['signature_parent'] == ''):
        LOG.debug("The participant is < 18 years old and we do not have parental consent. Skipping record.")
        return None

    # Create the participant resource entry and reference.
    # Assumes that the project language is the participant's preferred language.
    netid = normalize_net_id(enrollment.get('netid'))

    if netid:
        patient_entry, patient_reference = create_patient_using_unique_identifier(
            sex = enrollment['core_sex'],
            preferred_language = LANGUAGE_CODE[enrollment.project.id],
            unique_identifier = netid,
            record = enrollment,
            system_identifier = INTERNAL_SYSTEM)
    else:
        patient_entry, patient_reference = create_patient_using_demographics(
            sex = enrollment['core_sex'],
            preferred_language = LANGUAGE_CODE[enrollment.project.id],
            first_name = enrollment['core_participant_first_name'],
            last_name = enrollment['core_participant_last_name'],
            birth_date = enrollment['core_birthdate'],
            zipcode = enrollment['core_zipcode'],
            record = enrollment,
            system_identifier = INTERNAL_SYSTEM)

    if not patient_entry:
        LOG.warning(f"Skipping record {enrollment.get('record_id')} with insufficient information to construct patient")
        return None

    birthdate = parse_date_from_string(enrollment.get('core_birthdate'))
    if not birthdate:
        LOG.warning(f"Record {enrollment.get('record_id')} has an invalid or missing `core_birthdate` value")

    location_resource_entries = build_residential_location_resources(
        db = db,
        cache = cache,
        housing_type = enrollment.get('core_housing_type'),
        primary_street_address = enrollment['core_home_street'],
        secondary_street_address = enrollment['core_apartment_number'],
        city = enrollment['core_home_city'],
        state = enrollment['core_home_state'],
        zipcode = enrollment['core_zipcode'],
        system_identifier = INTERNAL_SYSTEM)

    persisted_resource_entries = [patient_entry, *location_resource_entries]

    for redcap_record_instance in redcap_record_instances:

        event_type = None
        collection_method = None

        if redcap_record_instance.event_name == ENROLLMENT_EVENT_NAME:
            event_type = EventType.ENROLLMENT
            check_enrollment_data_quality(redcap_record_instance)
        elif redcap_record_instance.event_name == ENCOUNTER_EVENT_NAME:
            event_type = EventType.ENCOUNTER
            if is_complete('kiosk_registration_4c7f', redcap_record_instance):
                collection_method = CollectionMethod.KIOSK
            elif is_complete('test_order_survey', redcap_record_instance):
                collection_method = CollectionMethod.SWAB_AND_SEND
        else:
            LOG.info(f"Skipping event: {redcap_record_instance.event_name!r} for record "
            f"{redcap_record_instance.get('record_id')} because the event is not one "
            "that we process")
            continue

        # Skip an ENCOUNTER instance if we don't have the data we need to
        # create an encounter.
        if event_type == EventType.ENCOUNTER \
            and not is_complete('daily_attestation', redcap_record_instance) \
                and not collection_method  \
                and not redcap_record_instance['testing_date']: # from the 'Testing Determination - Internal' instrument
                    LOG.debug("Skipping record instance with insufficient information to construct the initial encounter")
                    continue

        # site_reference refers to where the sample was collected
        record_location = None
        if collection_method == CollectionMethod.KIOSK:
            record_location = redcap_record_instance.get('location_type')

        location_site_map = {
            'bothell':  'UWBothell',
            'odegaard': 'UWOdegaardLibrary',
            'slu':      'UWSouthLakeUnion',
            'tacoma':   'UWTacoma',
            'uw_club':  'UWClub'
            }

        site_reference = create_site_reference(
            location = record_location,
            site_map = location_site_map,
            default_site = SWAB_AND_SEND_SITE,
            system_identifier = INTERNAL_SYSTEM)

        # Handle various symptoms.
        contained: List[dict] = []
        diagnosis: List[dict] = []

        # Map the various symptoms variables to their onset date.
        # For daily_symptoms_covid_like we don't know the actual onset date. The questions asks
        # "in the past 24 hours"
        if event_type == EventType.ENCOUNTER:
            symptom_onset_map = {
                'daily_symptoms_covid_like': None,
                'symptoms': redcap_record_instance['symptom_onset'],
                'symptoms_kiosk': redcap_record_instance['symptom_duration_kiosk'],
                'symptoms_swabsend': redcap_record_instance['symptom_duration_swabsend']
            }
        elif event_type == EventType.ENROLLMENT:
            symptom_onset_map = {'symptoms_base': redcap_record_instance['symptom_onset_base']}

        contained, diagnosis = build_contained_and_diagnosis(
            patient_reference = patient_reference,
            record = redcap_record_instance,
            symptom_onset_map = symptom_onset_map,
            system_identifier = INTERNAL_SYSTEM)

        collection_code = None
        if event_type == EventType.ENROLLMENT or collection_method == CollectionMethod.SWAB_AND_SEND:
            collection_code = CollectionCode.HOME_HEALTH
        elif collection_method == CollectionMethod.KIOSK:
            collection_code = CollectionCode.FIELD

        encounter_date = get_encounter_date(redcap_record_instance, event_type)

        initial_encounter_entry, initial_encounter_reference = create_encounter(
            encounter_id = create_encounter_id(redcap_record_instance, False),
            encounter_date = encounter_date,
            patient_reference = patient_reference,
            site_reference = site_reference,
            locations = location_resource_entries,
            diagnosis = diagnosis,
            contained = contained,
            collection_code = collection_code,
            system_identifier = INTERNAL_SYSTEM,
            record = redcap_record_instance)

        # Skip the entire record if we can't create the enrollment encounter.
        # Otherwise, just skip the record instance.
        if not initial_encounter_entry:
            if event_type == EventType.ENROLLMENT:
                LOG.warning("Skipping record because we could not create the enrollment encounter for record: "
                    f"{redcap_record_instance.get('record_id')}")
                return None
            else:
                LOG.warning("Skipping record instance with insufficient information to construct the initial encounter "
                    f"for record: {redcap_record_instance.get('record_id')}, instance: "
                    f"{redcap_record_instance.get('redcap_repeat_instance')}")
                continue

        specimen_entry = None
        specimen_observation_entry = None
        specimen_received = (collection_method == CollectionMethod.SWAB_AND_SEND and \
            is_complete('post_collection_data_entry_qc', redcap_record_instance)) or \
            (collection_method == CollectionMethod.KIOSK and \
            is_complete('kiosk_registration_4c7f', redcap_record_instance))

        if specimen_received:
            # Use barcode fields in this order.
            prioritized_barcodes = [
                redcap_record_instance["collect_barcode_kiosk"],
                redcap_record_instance["return_utm_barcode"],
                redcap_record_instance["pre_scan_barcode"]]

            specimen_entry, specimen_reference = create_specimen(
                prioritized_barcodes = prioritized_barcodes,
                patient_reference = patient_reference,
                collection_date = get_collection_date(redcap_record_instance, collection_method),
                sample_received_time = redcap_record_instance['samp_process_date'],
                able_to_test = redcap_record_instance['able_to_test'],
                system_identifier = INTERNAL_SYSTEM)

            specimen_observation_entry = create_specimen_observation_entry(
                specimen_reference = specimen_reference,
                patient_reference = patient_reference,
                encounter_reference = initial_encounter_reference)
        else:
            LOG.info("Creating encounter for record instance without sample")

        if specimen_received and not specimen_entry:
            LOG.warning("Skipping record instance. We think the specimen was received, "
                 "but we're unable to create the specimen_entry for record: "
                 f"{redcap_record_instance.get('record_id')}, instance: {redcap_record_instance.get('redcap_repeat_instance')}"
                 )
            continue

        computed_questionnaire_entry = None
        enrollment_questionnaire_entry = None
        daily_questionnaire_entry = None
        testing_determination_internal_questionnaire_entry = None
        follow_up_encounter_entry = None
        follow_up_questionnaire_entry = None
        follow_up_computed_questionnaire_entry = None

        computed_questionnaire_entry = create_computed_questionnaire_response(
            redcap_record_instance, patient_reference, initial_encounter_reference,
            birthdate, parse_date_from_string(initial_encounter_entry['resource']['period']['start']))

        if event_type == EventType.ENROLLMENT:
            enrollment_questionnaire_entry = create_enrollment_questionnaire_response(
            enrollment, patient_reference, initial_encounter_reference)
        else:
            testing_determination_internal_questionnaire_entry = \
                create_testing_determination_internal_questionnaire_response(
                redcap_record_instance, patient_reference, initial_encounter_reference)

            daily_questionnaire_entry = \
                create_daily_questionnaire_response(
                redcap_record_instance, patient_reference, initial_encounter_reference)

            if is_complete('week_followup', redcap_record_instance):
                # Don't set locations because the f/u survey doesn't ask for home address.
                follow_up_encounter_entry, follow_up_encounter_reference = create_encounter(
                    encounter_id = create_encounter_id(redcap_record_instance, True),
                    encounter_date = extract_date_from_survey_timestamp(redcap_record_instance, 'week_followup') \
                        or datetime.strptime(redcap_record_instance.get('fu_timestamp'),
                        '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d'),
                    patient_reference = patient_reference,
                    site_reference = site_reference,
                    collection_code = CollectionCode.HOME_HEALTH,
                    parent_encounter_reference = initial_encounter_reference,
                    encounter_reason_code = follow_up_encounter_reason_code(),
                    encounter_identifier_suffix = "_follow_up",
                    system_identifier = INTERNAL_SYSTEM,
                    record = redcap_record_instance)

                follow_up_questionnaire_entry = create_follow_up_questionnaire_response(
                    redcap_record_instance, patient_reference, follow_up_encounter_reference)
                follow_up_computed_questionnaire_entry = create_computed_questionnaire_response(
                    redcap_record_instance, patient_reference, follow_up_encounter_reference,
                    birthdate, parse_date_from_string(follow_up_encounter_entry['resource']['period']['start']))


        current_instance_entries = [
            initial_encounter_entry,
            computed_questionnaire_entry,
            enrollment_questionnaire_entry,
            testing_determination_internal_questionnaire_entry,
            daily_questionnaire_entry,
            specimen_entry,
            specimen_observation_entry,
            follow_up_encounter_entry,
            follow_up_questionnaire_entry,
            follow_up_computed_questionnaire_entry
        ]

        persisted_resource_entries.extend(list(filter(None, current_instance_entries)))


    return create_bundle_resource(
        bundle_id = str(uuid4()),
        timestamp = datetime.now().astimezone().isoformat(),
        source = f"{REDCAP_URL}{enrollment.project.id}/{enrollment.id}",
        entries = list(filter(None, persisted_resource_entries))
    )
Esempio n. 11
0
def max_instance(instrument: str, redcap_record: List[dict], since: int,
    complete: bool=True) -> Optional[int]:
    """
    Returns the most recent instance number in a *redcap_record* on or after the
    given filter instance *since*. Filters also by events with an *instrument*
    marked according to the given variable *complete* (True filters for only
    completed instances, and False filters only for incomplete or unverified
    instances). The default value for *complete* is True.

    Returns None if no completed insrument is found.

    >>> max_instance('kiosk_registration_4c7f', [ \
        {'redcap_repeat_instance': '1', 'kiosk_registration_4c7f_complete': '2'}], \
        since=0)
    1

    >>> max_instance('kiosk_registration_4c7f', [ \
        {'redcap_repeat_instance': '1', 'kiosk_registration_4c7f_complete': ''}, \
        {'redcap_repeat_instance': '2', 'kiosk_registration_4c7f_complete': '1'}, \
        {'redcap_repeat_instance': '3', 'kiosk_registration_4c7f_complete': '0'}], \
        since=0)

    >>> max_instance('kiosk_registration_4c7f', [ \
        {'redcap_repeat_instance': '1', 'kiosk_registration_4c7f_complete': ''}, \
        {'redcap_repeat_instance': '2', 'kiosk_registration_4c7f_complete': '1'}, \
        {'redcap_repeat_instance': '3', 'kiosk_registration_4c7f_complete': '0'}], \
        since=0, complete=False)
    3

    >>> max_instance('kiosk_registration_4c7f', [ \
        {'redcap_repeat_instance': '1', 'kiosk_registration_4c7f_complete': '2'}, \
        {'redcap_repeat_instance': '2', 'kiosk_registration_4c7f_complete': '2'}, \
        {'redcap_repeat_instance': '3', 'kiosk_registration_4c7f_complete': '0'}], \
        since=2)
    2

    >>> max_instance('kiosk_registration_4c7f', [ \
        {'redcap_repeat_instance': '1', 'kiosk_registration_4c7f_complete': '0'}, \
        {'redcap_repeat_instance': '2', 'kiosk_registration_4c7f_complete': '0'}, \
        {'redcap_repeat_instance': '3', 'kiosk_registration_4c7f_complete': '2'}], \
        since=2, complete=False)
    2

    >>> max_instance('kiosk_registration_4c7f', [ \
        {'redcap_repeat_instance': '1', 'kiosk_registration_4c7f_complete': '2'}, \
        {'redcap_repeat_instance': '2', 'kiosk_registration_4c7f_complete': '2'}, \
        {'redcap_repeat_instance': '3', 'kiosk_registration_4c7f_complete': '0'}], \
        since=3)

    >>> max_instance('test_order_survey', [ \
        {'redcap_repeat_instance': '1', 'test_order_survey_complete': '1', \
            'kiosk_registration_4c7f_complete': ''}, \
        {'redcap_repeat_instance': '2', 'test_order_survey_complete': '', \
            'kiosk_registration_4c7f_complete': '2'}], \
        since=0)
    """
    events_instrument_complete = [
        encounter
        for encounter in redcap_record
        if encounter[f"{instrument}_complete"] != ''
        and is_complete(instrument, encounter) == complete
    ]

    # Filter since the latest instance where testing was triggered.
    # If no instance exists, do not filter. Note: at this point in the code, we
    # already are only considering instances in the past week.
    if since is not None:
        events_instrument_complete = list(filter(
            lambda encounter: int(encounter['redcap_repeat_instance']) >= since,
            events_instrument_complete
        ))

    if not events_instrument_complete:
        return None

    return _max_instance(events_instrument_complete)
Esempio n. 12
0
        def decorated(*args, db: DatabaseSession, log_output: bool, **kwargs):
            LOG.debug(
                f"Starting the REDCap DET ETL routine {name}, revision {revision}"
            )

            redcap_det = db.cursor(f"redcap-det {name}")
            redcap_det.execute(
                """
                select redcap_det_id as id, document
                  from receiving.redcap_det
                 where not processing_log @> %s
                   and document::jsonb @> %s
                 order by id
                   for update
                """, (Json([etl_id]), Json(det_contains)))

            with pickled_cache(CACHE_FILE) as cache:
                for det in redcap_det:
                    with db.savepoint(f"redcap_det {det.id}"):
                        LOG.info(f"Processing REDCap DET {det.id}")

                        instrument = det.document['instrument']

                        # Only pull REDCap record if
                        # `include_incomplete` flag was not included and
                        # the current instrument is complete
                        if not include_incomplete and not is_complete(
                                instrument, det.document):
                            LOG.debug(
                                f"Skipping incomplete or unverified REDCap DET {det.id}"
                            )
                            mark_skipped(db, det.id, etl_id)
                            continue

                        redcap_record = get_redcap_record_from_det(
                            det.document, raw_coded_values)

                        if not redcap_record:
                            LOG.debug(
                                f"REDCap record is missing or invalid.  Skipping REDCap DET {det.id}"
                            )
                            mark_skipped(db, det.id, etl_id)
                            continue

                        # Only process REDCap record if all required instruments are complete
                        incomplete_instruments = {
                            instrument
                            for instrument in required_instruments
                            if not is_complete(instrument, redcap_record)
                        }

                        if incomplete_instruments:
                            LOG.debug(f"The following required instruments «{incomplete_instruments}» are not yet marked complete. " + \
                                      f"Skipping REDCap DET {det.id}")
                            mark_skipped(db, det.id, etl_id)
                            continue

                        bundle = routine(db=db,
                                         cache=cache,
                                         det=det,
                                         redcap_record=redcap_record)

                        if not bundle:
                            mark_skipped(db, det.id, etl_id)
                            continue

                        if log_output:
                            print(as_json(bundle))

                        insert_fhir_bundle(db, bundle)
                        mark_loaded(db, det.id, etl_id, bundle['id'])
def redcap_det_childcare(
        *, db: DatabaseSession, cache: TTLCache, det: dict,
        redcap_record_instances: List[REDCapRecord]) -> Optional[dict]:

    assert redcap_record_instances is not None and len(redcap_record_instances) > 0, \
        'The redcap_record_instances list was not populated.'

    enrollments = [record for record in redcap_record_instances if \
        record.event_name.startswith(ENROLLMENT_EVENT_NAME_PREFIX)]
    assert len(enrollments) == 1, \
        f'Record had {len(enrollments)} enrollments.'

    enrollment = enrollments[0]

    incomplete_enrollment_instruments = {
        instrument
        for instrument in REQUIRED_ENROLLMENT_INSTRUMENTS
        if not is_complete(instrument, enrollment)
    }

    if incomplete_enrollment_instruments:
        LOG.debug(
            f'The following required enrollment instruments «{incomplete_enrollment_instruments}» are not yet marked complete.'
        )
        return None

    # If the participant's age < 18 ensure we have parental consent.
    if (enrollment['core_age_years'] == "" or int(enrollment['core_age_years']) < 18) and \
        enrollment['parent_signature'] == '':
        LOG.debug(
            "The participant is < 18 years old and we do not have parental consent. Skipping record."
        )
        return None

    # Create the participant resource entry and reference.
    patient_entry, patient_reference = create_patient_using_demographics(
        sex='unknown',  # Set to unknown so that we don't ingest identifiers
        preferred_language=enrollment.get('language'),
        first_name=enrollment['core_participant_first_name'],
        last_name=enrollment['core_participant_last_name'],
        birth_date=enrollment['core_birthdate'],
        zipcode=enrollment['core_zipcode'],
        record=enrollment,
        system_identifier=INTERNAL_SYSTEM)

    if not patient_entry:
        LOG.warning(
            'Skipping record with insufficient information to construct patient'
        )
        return None

    location_resource_entries = build_residential_location_resources(
        db=db,
        cache=cache,
        housing_type=enrollment.get('core_housing_type'),
        primary_street_address=enrollment['core_home_street'],
        secondary_street_address=enrollment['core_apartment_number'],
        city=enrollment['core_home_city'],
        state=enrollment['core_home_state'],
        zipcode=enrollment['core_zipcode'],
        system_identifier=INTERNAL_SYSTEM)

    persisted_resource_entries = [patient_entry, *location_resource_entries]

    childcare_center = enrollment['childcare_center']

    for redcap_record_instance in redcap_record_instances:

        event_type = None
        study_arm = None

        if redcap_record_instance.event_name.startswith(
                ENROLLMENT_EVENT_NAME_PREFIX):
            event_type = EventType.ENROLLMENT
        elif redcap_record_instance.event_name.startswith(ENCOUNTER_EVENT_NAME_PREFIX) \
            or redcap_record_instance.event_name == UNSCHEDULED_ENCOUNTER_EVENT_NAME:
            event_type = EventType.ENCOUNTER
        else:
            LOG.error(
                f'The record instance has an unexpected event name: {redcap_record_instance.event_name}'
            )
            continue

        if '_arm_1' in redcap_record_instance.event_name:
            study_arm = StudyArm.PRIMARY
        elif '_arm_2' in redcap_record_instance.event_name:
            study_arm = StudyArm.SECONDARY
        else:
            LOG.error(
                f'The record instance has an unexpected study arm in the event name: {redcap_record_instance.event_name}'
            )
            continue

        # Skip an ENCOUNTER instance if we don't have the data we need to
        # create an encounter. Require the participant to have provided
        # survey data or a sample.
        if event_type == EventType.ENCOUNTER \
            and not is_complete('symptom_check', redcap_record_instance) \
            and not is_complete('swab_kit_reg', redcap_record_instance) \
            and not is_complete('post_collection_data_entry_qc', redcap_record_instance):
            LOG.debug(
                'Skipping record instance with insufficient information to construct the encounter'
            )
            continue

        # From this point on, log at the `warning` level if we have to skip the encounter.
        # That situation would be one we'd need to dig into.

        # Create the site reference for the encounter. For primary participants, use
        # a completed `return_pickup` survey to indicate that they are having their
        # sample picked up from home instead of returning it to a dropbox.
        site_map = {
            'childcare_room_70th': SANDPOINT_SITE,
            'childcare_room_radford': RADFORD_SITE,
            'childcare_room_portage': PORTAGE_BAY_SITE,
            'childcare_room_minor': MINOR_SITE,
            'childcare_room_maintinytots': MAINTINYTOTS_SITE,
            'childcare_room_easttinytots': EASTTINYTOTS_SITE,
            'childcare_room_dlbeacon': DLBEACON_SITE,
            'childcare_room_dlmag': DLMAG_SITE,
            'childcare_room_mighty': MIGHTY_SITE,
            'childcare_room_birch': BIRCH_SITE,
            'childcare_room_mothers': MOTHERS_SITE,
            'childcare_room_wcampus': UWCHILDRENS_WEST_SITE,
            'childcare_room_laurel': UWCHILDRENS_LAUREL_SITE
        }

        location = None  # No location will cause `create_site_reference` to use the `default_site` value.

        if study_arm == StudyArm.PRIMARY and event_type == EventType.ENCOUNTER and \
            not is_complete('return_pickup', redcap_record_instance):
            location = childcare_center

        site_reference = create_site_reference(
            location=location,
            site_map=site_map,
            default_site=SWAB_AND_SEND_SITE,
            system_identifier=INTERNAL_SYSTEM)

        # Handle various symptoms.
        contained: List[dict] = []
        diagnosis: List[dict] = []

        # Map the various symptoms variables to their onset date.
        # The PRIMARY arm does not get the symptom survey at enrollment,
        # but the SECONDARY arm does.
        if event_type == EventType.ENCOUNTER or study_arm == StudyArm.SECONDARY:
            symptom_onset_map = {
                'symptoms_check': redcap_record_instance['symptom_duration'],
            }
            contained, diagnosis = build_contained_and_diagnosis(
                patient_reference=patient_reference,
                record=redcap_record_instance,
                symptom_onset_map=symptom_onset_map,
                system_identifier=INTERNAL_SYSTEM)

        encounter_date = get_encounter_date(redcap_record_instance, event_type)
        if not encounter_date:
            LOG.warning(
                'Skipping record instance because we could not create an encounter_date'
            )
            continue

        encounter_entry, encounter_reference = create_encounter(
            encounter_id=create_encounter_id(redcap_record_instance),
            encounter_date=encounter_date,
            patient_reference=patient_reference,
            site_reference=site_reference,
            locations=location_resource_entries,
            diagnosis=diagnosis,
            contained=contained,
            collection_code=COLLECTION_CODE,
            system_identifier=INTERNAL_SYSTEM,
            record=redcap_record_instance)

        # Skip the entire record if we can't create the enrollment encounter.
        # Otherwise, just skip the record instance.
        if not encounter_entry:
            if event_type == EventType.ENROLLMENT:
                LOG.warning(
                    'Skipping record because we could not create the enrollment encounter'
                )
                return None
            else:
                LOG.warning(
                    'Skipping record instance with insufficient information to construct the encounter'
                )
                continue

        specimen_entry = None
        specimen_observation_entry = None
        specimen_received = is_complete('post_collection_data_entry_qc',
                                        redcap_record_instance)

        if specimen_received:
            # Use barcode fields in this order.
            prioritized_barcodes = [
                redcap_record_instance[
                    'return_utm_barcode'],  # Post Collection Data Entry Qc
                redcap_record_instance['utm_tube_barcode'],  # Scan Kit Reg
                redcap_record_instance[
                    'pre_scan_barcode']  # Back End Mail Scans
            ]

            specimen_entry, specimen_reference = create_specimen(
                prioritized_barcodes=prioritized_barcodes,
                patient_reference=patient_reference,
                collection_date=get_collection_date(redcap_record_instance),
                sample_received_time=redcap_record_instance[
                    'samp_process_date'],
                able_to_test=redcap_record_instance['able_to_test'],
                system_identifier=INTERNAL_SYSTEM)

            specimen_observation_entry = create_specimen_observation_entry(
                specimen_reference=specimen_reference,
                patient_reference=patient_reference,
                encounter_reference=encounter_reference)
        else:
            LOG.info('Creating encounter for record instance without sample')

        enrollment_questionnaire_entry = None
        encounter_questionnaire_entry = None
        operational_questionnaire_entry = None

        if event_type == EventType.ENROLLMENT:
            enrollment_questionnaire_entry = create_enrollment_questionnaire_response(
                enrollment, study_arm, patient_reference, encounter_reference)

        # The SECONDARY arm gets "encounter" surveys in the ENROLLMENT event.
        if event_type == EventType.ENCOUNTER or study_arm == StudyArm.SECONDARY:
            encounter_questionnaire_entry = create_encounter_questionnaire_response(
                redcap_record_instance, patient_reference, encounter_reference)

            operational_questionnaire_entry = create_operational_questionnaire_response(
                redcap_record_instance, patient_reference, encounter_reference)

        current_instance_entries = [
            encounter_entry,
            enrollment_questionnaire_entry,
            encounter_questionnaire_entry,
            operational_questionnaire_entry,
            specimen_entry,
            specimen_observation_entry,
        ]

        persisted_resource_entries.extend(
            list(filter(None, current_instance_entries)))

    return create_bundle_resource(
        bundle_id=str(uuid4()),
        timestamp=datetime.now().astimezone().isoformat(),
        source=f'{REDCAP_URL}{enrollment.project.id}/{enrollment.id}',
        entries=list(filter(None, persisted_resource_entries)))