예제 #1
0
파일: kit.py 프로젝트: sonali-mhihim/id3c
def update_test_strip(db: DatabaseSession, document: dict):
    """
    Find identifier that matches the test_strip barcode within *document*.
    Updates *document* to have both the test_strip barcode and
    the identifier if found.
    """
    strip_barcode = document["test_strip"]
    strip_identifier = find_identifier(db, strip_barcode)

    document["test_strip"] = {"uuid": None, "barcode": strip_barcode}

    if not strip_identifier:
        LOG.warning(f"Test strip has unknown barcode «{strip_barcode}»")

    elif strip_identifier.set_name not in expected_identifier_sets[
            "test-strips"]:
        LOG.warning(
            f"Test strip barcode found in unexpected identifier set «{strip_identifier.set_name}»"
        )

    else:
        document["test_strip"] = {
            "uuid": strip_identifier.uuid,
            "barcode": strip_identifier.barcode
        }
예제 #2
0
def process_samples(db: DatabaseSession, encounter_id: int, document: dict):
    """
    Process an enrollment *document*'s samples.

    Find existing collected samples, or create skeletal sample records
    containing just the collection barcode linked back to this *encounter_id*.
    Sample manifests generated by the processing lab will usually be loaded
    later and fill in the rest of the sample record.
    """
    for sample in document["sampleCodes"]:
        barcode = sample.get("code")

        if not barcode:
            LOG.warning(f"Skipping collected sample with no barcode")
            continue

        # XXX TODO: Stop hardcoding this and handle other types.
        # ScannedSelfSwab and ManualSelfSwabbed are kit barcodes,
        # not collection barcodes.  TestStrip is an identifier
        # UUID, not barcode.
        #   - trs, 17 May 2019
        if sample["type"] != "ClinicSwab":
            LOG.warning(
                f"Skipping collected sample with unknown type {sample['type']}"
            )
            continue

        LOG.debug(f"Looking up collected sample code «{barcode}»")
        identifier = find_identifier(db, barcode)

        if not identifier:
            LOG.warning(
                f"Skipping collected {sample['type']} sample with unknown barcode «{barcode}»"
            )
            continue

        assert identifier.set_name in EXPECTED_COLLECTION_IDENTIFIER_SETS, \
            f"{sample['type']} sample with unexpected «{identifier.set_name}» barcode «{barcode}»"

        # XXX TODO: Relationally model sample type after we choose
        # a standard vocabulary (LOINC or SNOMED or whatever FHIR
        # normalizes?)
        #   -trs, 8 May 2019
        details = {
            "type": sample["type"],
        }

        upsert_sample(db,
                      update_identifiers=False,
                      overwrite_collection_date=False,
                      identifier=None,
                      collection_identifier=identifier.uuid,
                      collection_date=None,
                      encounter_id=encounter_id,
                      additional_details=details)
예제 #3
0
def sample_identifier(db: DatabaseSession, barcode: str) -> Optional[str]:
    """
    Find corresponding UUID for scanned sample barcode within
    warehouse.identifier.
    """
    identifier = find_identifier(db, barcode)

    if identifier:
        assert identifier.set_name == "samples", \
            f"Identifier found in set «{identifier.set_name}», not «samples»"

    return identifier.uuid if identifier else None
예제 #4
0
def sample_identifier(db: DatabaseSession, barcode: str) -> Optional[str]:
    """
    Find corresponding UUID for scanned sample or collection barcode within
    warehouse.identifier.

    Will be sample barcode if from UW and collection barcode if from SCH.
    """
    identifier = find_identifier(db, barcode)

    if identifier:
        assert identifier.set_name == "samples" or \
            identifier.set_name == "collections-seattleflu.org", \
            f"Identifier found in set «{identifier.set_name}», not «samples»"

    return identifier.uuid if identifier else None
def sample_identifier(db: DatabaseSession, document: dict) -> Optional[str]:
    """
    Given a *document*, find corresponding UUID for scanned sample or collection
    barcode within warehouse.identifier.
    """
    barcode = document.get('barcode')

    if not barcode:
        return None

    identifier = find_identifier(db, barcode)
    set_name = 'collections-seattleflu.org'

    if identifier:
        assert identifier.set_name == set_name, \
            f"Identifier found in set «{identifier.set_name}», not «{set_name}»"

    return identifier.uuid if identifier else None
예제 #6
0
def etl_manifest(*, db: DatabaseSession):
    LOG.debug(f"Starting the manifest ETL routine, revision {REVISION}")

    # XXX TODO: Stop hardcoding valid identifier sets.  Instead, accept them as
    # an option or config (and validate option choices against what's actually
    # in the database).  We won't want to validate using click.option(),
    # because that would necessitate a database connection simply to run
    # bin/id3c at all.
    #   -trs, 13 May 2019
    expected_identifier_sets = {
        "samples": {"samples"},
        "collections": {
            "collections-environmental",
            "collections-fluathome.org",
            "collections-household-intervention",
            "collections-household-intervention-asymptomatic",
            "collections-household-observation",
            "collections-household-observation-asymptomatic",
            "collections-kiosks",
            "collections-kiosks-asymptomatic",
            "collections-seattleflu.org",
            "collections-swab&send",
            "collections-swab&send-asymptomatic",
            "collections-self-test",
            "collections-scan",
            "collections-haarvi",
            "samples-haarvi",
        },
        "rdt": {"collections-fluathome.org"}
    }

    # Fetch and iterate over samples that aren't processed
    #
    # Rows we fetch are locked for update so that two instances of this
    # command don't try to process the same samples.
    LOG.debug("Fetching unprocessed manifest records")

    manifest = db.cursor("manifest")
    manifest.execute("""
        select manifest_id as id, document
          from receiving.manifest
         where not processing_log @> %s
         order by id
           for update
        """, (Json([{ "etl": ETL_NAME, "revision": REVISION }]),))

    for manifest_record in manifest:
        with db.savepoint(f"manifest record {manifest_record.id}"):
            LOG.info(f"Processing record {manifest_record.id}")

            # Convert sample barcode to full identifier, ensuring it's
            # known and from the correct identifier set.
            sample_barcode = manifest_record.document.pop("sample")
            sample_identifier = find_identifier(db, sample_barcode)

            if not sample_identifier:
                LOG.warning(f"Skipping sample with unknown sample barcode «{sample_barcode}»")
                mark_skipped(db, manifest_record.id)
                continue

            if (manifest_record.document.get("sample_type") and
                manifest_record.document["sample_type"] == "rdt"):
                assert sample_identifier.set_name in expected_identifier_sets["rdt"], \
                    (f"Sample identifier found in set «{sample_identifier.set_name}», " +
                    f"not {expected_identifier_sets['rdt']}")
            else:
                assert sample_identifier.set_name in expected_identifier_sets["samples"], \
                    (f"Sample identifier found in set «{sample_identifier.set_name}», " +
                    f"not {expected_identifier_sets['samples']}")

            # Optionally, convert the collection barcode to full
            # identifier, ensuring it's known and from the correct
            # identifier set.
            collection_barcode = manifest_record.document.pop("collection", None)
            collection_identifier = find_identifier(db, collection_barcode) if collection_barcode else None

            if collection_barcode and not collection_identifier:
                LOG.warning(f"Skipping sample with unknown collection barcode «{collection_barcode}»")
                mark_skipped(db, manifest_record.id)
                continue

            assert not collection_identifier \
                or collection_identifier.set_name in expected_identifier_sets["collections"], \
                    f"Collection identifier found in set «{collection_identifier.set_name}», not {expected_identifier_sets['collections']}" # type: ignore

            # Sample collection date
            collection_date = manifest_record.document.get("date")

            # Upsert sample cooperatively with enrollments ETL routine
            #
            # The details document was intentionally modified by two pop()s
            # earlier to remove barcodes that were looked up.  The
            # rationale is that we want just one clear place in the
            # warehouse for each piece of information.
            sample, status = upsert_sample(db,
                identifier            = sample_identifier.uuid,
                collection_identifier = collection_identifier.uuid if collection_identifier else None,
                collection_date       = collection_date,
                additional_details    = manifest_record.document)

            mark_loaded(db, manifest_record.id,
                status = status,
                sample_id = sample.id)

            LOG.info(f"Finished processing manifest record {manifest_record.id}")
예제 #7
0
파일: fhir.py 프로젝트: andysodt/id3c
def process_encounter_samples(db: DatabaseSession, encounter: Encounter, encounter_id: int,
    related_resources: Dict[str, List[DomainResource]]):
    """
    Given a dict of *related_resources*, finds Specimens linked to the given
    *encounter*. Linked Specimens are attached the given *encounter_id* via
    newly upserted samples in ID3C.
    """
    def is_related_specimen(observation: Observation, encounter: Encounter) -> bool:
        return bool(observation.encounter) and observation.encounter.resolved(Encounter) == encounter

    def related_specimens(encounter: Encounter, resources: Dict[str, List[DomainResource]]) -> Optional[List[Specimen]]:
        """
        Given a dict of FHIR *resources*, returns a list of Specimens linked to a given *encounter*.
        """
        observations = resources.get('Observation')
        if not observations:
            return None

        related_observations = list(filter(lambda o: is_related_specimen(o, encounter), observations))
        specimens = list(map(lambda o: o.specimen.resolved(Specimen), related_observations))

        if not specimens:
            LOG.warning("Encounter specimen not found.")
            return None

        return specimens

    specimens = related_specimens(encounter, related_resources)
    if not specimens:
        return

    for specimen in specimens:
        barcode = identifier(specimen, f"{INTERNAL_SYSTEM}/sample").strip()

        if not barcode:
            raise Exception("No barcode detectable. Either the barcode identification system is "
                            f"not «{INTERNAL_SYSTEM}/sample», or the barcode value is empty, which "
                            "violates the FHIR docs.")

        LOG.debug(f"Looking up collected specimen barcode «{barcode}»")
        specimen_identifier = find_identifier(db, barcode)

        if not specimen_identifier:
            LOG.warning(f"Skipping collected specimen with unknown barcode «{barcode}»")
            continue

        assert (specimen_identifier.set_name in EXPECTED_COLLECTION_IDENTIFIER_SETS or
                specimen_identifier.set_name in EXPECTED_SAMPLE_IDENTIFIER_SETS), \
            f"Specimen with unexpected «{specimen_identifier.set_name}» barcode «{barcode}»"

        sample_identifier: str = None
        collection_identifier: str = None
        if specimen_identifier.set_name in EXPECTED_COLLECTION_IDENTIFIER_SETS:
            collection_identifier = specimen_identifier.uuid
        elif specimen_identifier.set_name in EXPECTED_SAMPLE_IDENTIFIER_SETS:
            sample_identifier = specimen_identifier.uuid
        else:
            assert False, "logic bug"

        sample_details = {}
        if specimen.note:
            sample_details['note'] = specimen.note[0].text

        additional_details = { **specimen.type.as_json(), **sample_details}

        # XXX TODO: Improve details object here; the current approach produces
        # an object like {"coding": [{…}]} which isn't very useful.
        upsert_sample(db,
            identifier              = sample_identifier,
            collection_identifier   = collection_identifier,
            encounter_id            = encounter_id,
            additional_details      = additional_details)
예제 #8
0
파일: fhir.py 프로젝트: andysodt/id3c
def process_diagnostic_report_bundle_entry(db: DatabaseSession, bundle: Bundle, entry: BundleEntry):
    """
    Given an DiagnosticReport resource *entry* from a given *bundle*, processes
    the relevant information into the database.
    """
    resource, resource_type = resource_and_resource_type(entry)

    if resource_type != 'DiagnosticReport':
        return

    LOG.debug(f"Processing DiagnosticReport Resource «{entry.fullUrl}».")

    for reference in resource.specimen:
        barcode = None

        if not reference.identifier:
            specimen = reference.resolved(Specimen)
            barcode = identifier(specimen, f"{INTERNAL_SYSTEM}/sample").strip()

        elif matching_system(reference.identifier, INTERNAL_SYSTEM):
            barcode = reference.identifier.value.strip()

        if not barcode:
            continue

        LOG.debug(f"Looking up collected specimen barcode «{barcode}»")
        specimen_identifier = find_identifier(db, barcode)

        if not specimen_identifier:
            LOG.warning(f"Skipping collected specimen with unknown barcode «{barcode}»")
            continue

        # By default, assume that the incoming barcode is for a collection identifier
        is_collection_identifier = True

        try:
            assert specimen_identifier.set_name in EXPECTED_COLLECTION_IDENTIFIER_SETS, \
                f"Specimen with unexpected «{specimen_identifier.set_name}» barcode «{barcode}»"

        except AssertionError:
            assert specimen_identifier.set_name in EXPECTED_SAMPLE_IDENTIFIER_SETS, \
                f"Specimen with unexpected «{specimen_identifier.set_name}» barcode «{barcode}»"

            is_collection_identifier = False

        sample = find_sample(db, specimen_identifier.uuid)
        if not is_collection_identifier and not sample:
            raise SampleNotFoundError("No sample with identifier «{specimen_identifier.uuid}» found.")

        # Sometimes the Ellume samples come in faster than the specimen manifest
        # is updated. In this case, create a new collection identifier that will
        # be filled in later.
        if not sample:
            LOG.debug(f"Creating sample with collection identifier «{specimen_identifier.uuid}»")

            sample = db.fetch_row("""
                insert into warehouse.sample (collection_identifier)
                    values (%s)
                returning sample_id as id, collection_identifier
                """, (str(specimen_identifier.uuid),))

            LOG.info(f"Created sample {sample.id} with collection identifier «{sample.collection_identifier}»")

        process_presence_absence_tests(db, resource, sample.id, barcode)
예제 #9
0
def etl_presence_absence(*, db: DatabaseSession):
    LOG.debug(f"Starting the presence_absence ETL routine, revision {REVISION}")

    # Fetch and iterate over presence-absence tests that aren't processed
    #
    # Rows we fetch are locked for update so that two instances of this
    # command don't try to process the same presence-absence tests.
    LOG.debug("Fetching unprocessed presence-absence tests")

    presence_absence = db.cursor("presence_absence")
    presence_absence.itersize = 1
    presence_absence.execute("""
        select presence_absence_id as id, document,
               received::date as received_date
          from receiving.presence_absence
         where not processing_log @> %s
         order by id
           for update
        """, (Json([{ "revision": REVISION }]),))

    for group in presence_absence:
        with db.savepoint(f"presence_absence group {group.id}"):
            LOG.info(f"Processing presence_absence group {group.id}")

            # Samplify will now send documents with a top level key
            # "samples". The new format also includes a "chip" key for each
            # sample which is then included in the unique identifier for
            # each presence/absence result
            #   -Jover, 14 Nov 2019
            try:
                received_samples = group.document["samples"]
            except KeyError as error:
                # Skip documents in the old format because they do not
                # include the "chip" key which is needed for the
                # unique identifier for each result.
                #   -Jover, 14 Nov 2019
                # Also skip old format to avoid ingesting wrong data from
                # plate swapped data! This will lead to 188 samples with the
                # wrong nwgc_id associated with them.
                #   -Jover, 06 Dec 2019
                if (group.document.get("store") is not None or
                    group.document.get("Update") is not None):

                    LOG.info("Skipping presence_absence record that is in old format")
                    mark_processed(db, group.id)
                    continue

                else:
                    raise error from None

            for received_sample in received_samples:
                received_sample_barcode = received_sample.get("investigatorId")
                if not received_sample_barcode:
                    LOG.info(f"Skipping sample «{received_sample['sampleId']}» without SFS barcode")
                    continue

                # Don't go any further if the sample is marked as Failed
                sample_failed = received_sample.get("sampleFailed")
                if sample_failed is True:
                    LOG.info(f"Skipping sample «{received_sample_barcode}» that has been failed")
                    continue

                # Don't go any further if there are no results to import.
                test_results = received_sample["targetResults"]

                if not test_results:
                    LOG.warning(f"Skipping sample «{received_sample_barcode}» without any results")
                    continue

                received_sample_id = str(received_sample["sampleId"])
                chip = received_sample.get("chip")
                extraction_date = received_sample.get("extractionDate")
                assay_name = received_sample.get("assayName")
                assay_date = received_sample.get("assayDate")
                # The assayType field will be removed after Samplify starts
                # sending us OpenArray results with target.clinicalStatus.
                #
                # kfay, 28 Dec 2020
                assay_type = received_sample.get("assayType")

                # Guard against empty chip values
                assert chip or "chip" not in received_sample, "Received bogus chip id"

                # Must be current results
                LOG.info(f"Processing sample «{received_sample_barcode}»")

                if not received_sample.get("isCurrentExpressionResult"):
                    LOG.warning(f"Skipping out-of-date results for sample «{received_sample_barcode}»")
                    continue

                # Barcode must match a known identifier
                db_identifier = find_identifier(db, received_sample_barcode)

                if db_identifier:
                    assert db_identifier.set_name in valid_identifiers, \
                        f"Identifier found in invalid set «{db_identifier.set_name}»"
                    tiny_swab = False
                    if db_identifier.set_name.find('tiny-swab') >= 0:
                        tiny_swab = True
                else:
                    LOG.warning(f"Skipping results for sample without a known identifier «{received_sample_barcode}»")
                    continue

                received_sample_identifier = db_identifier.uuid

                # Track Samplify's internal ids for our samples, which is
                # unfortunately necessary for linking genomic data NWGC also
                # sends.
                if tiny_swab:
                    sample = update_sample(db,
                        collection_identifier = received_sample_identifier,
                        additional_details = sample_details(received_sample))
                else:
                    sample = update_sample(db,
                        identifier = received_sample_identifier,
                        additional_details = sample_details(received_sample))

                # Finally, process all results.
                for test_result in test_results:
                    test_result_target_id = test_result["geneTarget"]
                    LOG.debug(f"Processing target «{test_result_target_id}» for \
                    sample «{received_sample_barcode}»")

                    # Skip this result if it's actually a non-result
                    present = target_present(test_result)

                    if present is ...:
                        LOG.debug(f"No test result for «{test_result_target_id}», skipping")
                        continue

                    # Most of the time we expect to see existing targets so a
                    # select-first approach makes the most sense to avoid useless
                    # updates.
                    target = find_or_create_target(db,
                        identifier = test_result_target_id,
                        control = target_control(test_result["controlStatus"]))

                    # The unique identifier for each result.  If chip is
                    # applicable, then it's included to differentiate the same
                    # sample being run on multiple chips (uncommon, but it
                    # happens).
                    if chip:
                        identifier = f"NWGC/{received_sample_id}/{target.identifier}/{chip}"
                    else:
                        identifier = f"NWGC/{received_sample_id}/{target.identifier}"

                    # Most of the time we expect to see new samples and new
                    # presence_absence tests, so an insert-first approach makes more sense.
                    # Presence-absence tests we see more than once are presumed to be
                    # corrections.
                    upsert_presence_absence(db,
                        identifier = identifier,
                        sample_id  = sample.id,
                        target_id  = target.id,
                        present    = present,
                        details    = presence_absence_details(test_result,
                                                              group.received_date,
                                                              chip,
                                                              extraction_date,
                                                              assay_name,
                                                              assay_date,
                                                              assay_type))

            mark_processed(db, group.id)

            LOG.info(f"Finished processing presence_absence group {group.id}")
예제 #10
0
파일: kit.py 프로젝트: sonali-mhihim/id3c
def kit_enrollments(*, db: DatabaseSession):
    LOG.debug(
        f"Starting the kit enrollments ETL routine, revision {ENROLLMENTS_REVISION}"
    )

    expected_barcode_types = {"ScannedSelfSwab", "ManualSelfSwab"}

    LOG.debug("Fetching unprocessed enrollments")
    enrollments = db.cursor("enrollments")
    enrollments.execute(
        """
        select enrollment_id as id, document
          from receiving.enrollment
         where not processing_log @> %s
         order by id
          for update
        """, (Json([{
            "etl": ETL_NAME,
            "revision": ENROLLMENTS_REVISION
        }]), ))

    for enrollment in enrollments:
        with db.savepoint(f"enrollment {enrollment.id}"):
            LOG.info(f"Processing enrollment {enrollment.id}")

            # Find encounter that should have been created
            # from this enrollment record through etl enrollments
            encounter = find_encounter(db, enrollment.document["id"])

            # Error out the kit etl process if no encounter found
            # The kit etl process can try again starting with this record
            # next time with the idea that the encounter will be
            # created by then.
            if not encounter:
                raise EncounterNotFoundError(
                    f"No encounter with identifier «{enrollment.document['id']}» found"
                )

            # Skip and mark the enrollment document as processed if the
            # encounter found is linked to a site that is not self-test
            if encounter.site != "self-test":
                LOG.debug(
                    f"Found encounter {encounter.id} «{encounter.identifier}»"
                    + f"linked to site «{encounter.site}», not 'self-test'")
                mark_enrollment_processed(db, enrollment.id)
                continue

            for code in enrollment.document["sampleCodes"]:
                barcode = code.get("code")

                # Kit must have a barcode
                if not barcode:
                    LOG.warning(f"No barcode found in sampleCodes {code}")
                    continue

                # Barcode must be of expected barcode type
                if code["type"] not in expected_barcode_types:
                    LOG.debug(f"Skipping barcode with type {code['type']}")
                    continue

                # Convert kit barcode to full identifier
                kit_identifier = find_identifier(db, barcode)

                if not kit_identifier:
                    LOG.warning(
                        f"Skipping kit with unknown barcode «{barcode}»")
                    continue

                if kit_identifier.set_name not in expected_identifier_sets[
                        "kits"]:
                    LOG.warning(
                        f"Skipping kit with identifier found in " +
                        f"set «{kit_identifier.set_name}» not {expected_identifier_sets['kits']}"
                    )
                    continue

                details = {"type": code["type"]}

                kit, status = upsert_kit_with_encounter(
                    db,
                    identifier=kit_identifier.uuid,
                    encounter_id=encounter.id,
                    additional_details=details)

                if status == "updated":
                    update_kit_samples(db, kit)

            mark_enrollment_processed(db, enrollment.id)

            LOG.info(f"Finished processing enrollment {enrollment.id}")
예제 #11
0
파일: kit.py 프로젝트: sonali-mhihim/id3c
def kit_manifests(*, db: DatabaseSession):
    LOG.debug(
        f"Starting the kits manifests ETL routine, revision {MANIFEST_REVISION}"
    )

    LOG.debug("Fetching unprocessed manifest records")

    manifest = db.cursor("manifest")
    manifest.execute(
        """
        select manifest_id as id, document
          from receiving.manifest
         where not processing_log @> %s
         order by id
           for update
        """, (Json([{
            "etl": ETL_NAME,
            "revision": MANIFEST_REVISION
        }]), ))

    for manifest_record in manifest:
        with db.savepoint(f"manifest record {manifest_record.id}"):
            LOG.info(f"Processing record {manifest_record.id}")

            # Mark record as skipped
            # if it does not contain a kit related sample
            if "kit" not in manifest_record.document:
                LOG.info(
                    f"Skipping manifest record {manifest_record.id} without kit data"
                )
                mark_skipped(db, manifest_record.id)
                continue

            sample_barcode = manifest_record.document.pop("sample")
            sample_identifier = find_identifier(db, sample_barcode)

            # Mark record as skipped
            # if it has an unknown sample barcode
            if not sample_identifier:
                LOG.warning(
                    f"Skipping manifest record with unknown sample barcode «{sample_barcode}»"
                )
                mark_skipped(db, manifest_record.id)
                continue

            # Mark record as skipped sample identifier set is unexpected
            if sample_identifier.set_name not in expected_identifier_sets[
                    "samples"]:
                LOG.warning(
                    f"Skipping manifest record with sample identifier found in "
                    +
                    f"set «{sample_identifier.set_name}», not {expected_identifier_sets['samples']}"
                )
                mark_skipped(db, manifest_record.id)
                continue

            # Find sample that should have been created from this
            # manifest record via etl manifest
            sample = find_sample(db, sample_identifier.uuid)

            # Error out the kit etl process if no sample found
            # The kit etl process can try again starting with this record
            # next time with the idea that the sample will be
            # created by then.
            if not sample:
                raise SampleNotFoundError(
                    f"No sample with «{sample_identifier.uuid}» found")

            # Mark record as skipped if the sample does not have a
            # sample type (utm or rdt)
            if sample.type not in {"utm", "rdt"}:
                LOG.info(f"Skipping manifest record {manifest_record.id} " +
                         f"with unknown sample type {sample.type}")
                mark_skipped(db, manifest_record.id)
                continue

            kit_barcode = manifest_record.document.pop("kit")
            kit_identifier = find_identifier(db, kit_barcode)

            # Mark record as skipped if it has an unknown kit barcode
            if not kit_identifier:
                LOG.warning(
                    f"Skipping kit with unknown barcode «{kit_barcode}»")
                mark_skipped(db, manifest_record.id)
                continue

            # Mark record as skipped if kit identifier set is unexpected
            if kit_identifier.set_name not in expected_identifier_sets["kits"]:
                LOG.warning(
                    f"Skipping kit with identifier found in " +
                    f"set «{kit_identifier.set_name}» not {expected_identifier_sets['kits']}"
                )
                mark_skipped(db, manifest_record.id)
                continue

            # List of extra data not needed for kit record that can
            # be removed before adding manifest document to kit details
            extra_data = [
                "collection", "sample_type", "aliquot_date", "aliquots",
                "racks"
            ]
            for key in extra_data:
                manifest_record.document.pop(key, None)

            # Try to find identifier for the test-strip barcode for rdt samples
            if sample.type == "rdt":
                update_test_strip(db, manifest_record.document)

            kit, status = upsert_kit_with_sample(
                db,
                identifier=kit_identifier.uuid,
                sample=sample,
                additional_details=manifest_record.document)

            if status == "updated":
                update_sample(db, sample, kit.encounter_id)

            mark_loaded(db, manifest_record.id, status, kit.id)
예제 #12
0
def etl_manifest(*, db: DatabaseSession):
    LOG.debug(f"Starting the manifest ETL routine, revision {REVISION}")

    # XXX TODO: Stop hardcoding valid identifier sets.  Instead, accept them as
    # an option or config (and validate option choices against what's actually
    # in the database).  We won't want to validate using click.option(),
    # because that would necessitate a database connection simply to run
    # bin/id3c at all.
    #   -trs, 13 May 2019
    expected_identifier_sets = {
        "samples": {"samples"},
        "collections": {
            "collections-environmental",
            "collections-fluathome.org",
            "collections-household-intervention",
            "collections-household-intervention-asymptomatic",
            "collections-household-observation",
            "collections-household-observation-asymptomatic",
            "collections-kiosks",
            "collections-kiosks-asymptomatic",
            "collections-seattleflu.org",
            "collections-swab&send",
            "collections-swab&send-asymptomatic",
            "collections-self-test",
            "collections-scan",
            "collections-scan-kiosks",
            "collections-haarvi",
            "samples-haarvi",
            "collections-validation",
            "collections-uw-home",
            "collections-uw-observed",
            "collections-household-general",
            "collections-childcare",
            "collections-school-testing-home",
            "collections-school-testing-observed",
            "collections-apple-respiratory",
            "collections-apple-respiratory-serial",
            "collections-adult-family-home-outbreak",
            "collections-workplace-outbreak",
        },
        "rdt": {"collections-fluathome.org"}
    }

    # Fetch and iterate over samples that aren't processed
    #
    # Rows we fetch are locked for update so that two instances of this
    # command don't try to process the same samples.
    LOG.debug("Fetching unprocessed manifest records")

    manifest = db.cursor("manifest")
    manifest.execute(
        """
        select manifest_id as id, document
          from receiving.manifest
         where not processing_log @> %s
         order by id
           for update
        """, (Json([{
            "etl": ETL_NAME,
            "revision": REVISION
        }]), ))

    for manifest_record in manifest:
        with db.savepoint(f"manifest record {manifest_record.id}"):
            LOG.info(f"Processing record {manifest_record.id}")

            # When updating an existing row, update the identifiers
            # only if the record has both the 'sample' and
            # 'collection' keys.
            should_update_identifiers = "sample" in manifest_record.document \
                and "collection" in manifest_record.document

            # Sample collection date
            # Don't pop this entry off the document. For backwards
            # compatibility reasons, keep it in the document so that 'date'
            # also gets written to the 'details' column in warehouse.sample.
            collected_date = manifest_record.document.get("date", None)

            # Attempt to find barcodes and their related identifiers
            sample_barcode = manifest_record.document.pop("sample", None)
            sample_identifier = find_identifier(
                db, sample_barcode) if sample_barcode else None
            collection_barcode = manifest_record.document.pop(
                "collection", None)
            collection_identifier = find_identifier(
                db, collection_barcode) if collection_barcode else None

            # Skip a record if it has no associated barcodes
            if not sample_barcode and not collection_barcode:
                LOG.warning(
                    f"Skipping record «{manifest_record.id}» because it has neither a sample "
                    "barcode nor a collection barcode")
                mark_skipped(db, manifest_record.id)
                continue

            # Skip a record if it has a sample barcode but the barcode doesn't match an identifier
            if sample_barcode and not sample_identifier:
                LOG.warning(
                    f"Skipping sample with unknown sample barcode «{sample_barcode}»"
                )
                mark_skipped(db, manifest_record.id)
                continue

            # Skip a record if it has a collection barcode but the barcode doesn't match an identifier
            if collection_barcode and not collection_identifier:
                LOG.warning(
                    f"Skipping sample with unknown collection barcode «{collection_barcode}»"
                )
                mark_skipped(db, manifest_record.id)
                continue

            # Skip a record if the collection identifier is from an unexpected set
            if collection_identifier and collection_identifier.set_name not in expected_identifier_sets[
                    "collections"]:
                LOG.warning(
                    f"Skipping sample because collection identifier found in set «{collection_identifier.set_name}», not \
                    {expected_identifier_sets['collections']}")
                mark_skipped(db, manifest_record.id)
                continue

            # Validate the sample identifer and assert if a record fails
            if sample_identifier:
                if (manifest_record.document.get("sample_type")
                        and manifest_record.document["sample_type"] == "rdt"):
                    assert sample_identifier.set_name in expected_identifier_sets["rdt"], \
                        (f"Sample identifier found in set «{sample_identifier.set_name}»," +
                        f"not {expected_identifier_sets['rdt']}")
                else:
                    assert sample_identifier.set_name in expected_identifier_sets["samples"], \
                        (f"Sample identifier found in set «{sample_identifier.set_name}», " +
                        f"not {expected_identifier_sets['samples']}")

            # Upsert sample cooperatively with enrollments ETL routine
            #
            # The details document was intentionally modified by two pop()s
            # earlier to remove barcodes that were looked up.
            # The rationale is that we want just one clear place in the
            # warehouse for each piece of information.
            sample, status = upsert_sample(
                db,
                update_identifiers=should_update_identifiers,
                identifier=sample_identifier.uuid
                if sample_identifier else None,
                collection_identifier=collection_identifier.uuid
                if collection_identifier else None,
                collection_date=collected_date,
                additional_details=manifest_record.document)

            mark_loaded(db,
                        manifest_record.id,
                        status=status,
                        sample_id=sample.id)

            LOG.info(
                f"Finished processing manifest record {manifest_record.id}")