コード例 #1
0
def test_export_to_mongo_logs_error_correctly_on_samples_exception(
        subject, logger, mongo_database):
    _, mongo_database = mongo_database
    timeout_error = TimeoutError()

    with patch.object(Collection, "insert_many", side_effect=timeout_error):
        with pytest.raises(TransientRabbitError) as ex_info:
            subject.export_to_mongo()

    assert ex_info.value.message == (
        "There was an error updating MongoDB while exporting samples for message UUID 'CREATE_PLATE_UUID'."
    )

    logger.critical.assert_called_once()
    log_message = logger.critical.call_args.args[0]
    assert "CREATE_PLATE_UUID" in log_message
    assert str(timeout_error) in log_message

    source_plates_collection = get_mongo_collection(mongo_database,
                                                    COLLECTION_SOURCE_PLATES)
    assert source_plates_collection.count_documents({}) == 0

    samples_collection = get_mongo_collection(mongo_database,
                                              COLLECTION_SAMPLES)
    assert samples_collection.count_documents({}) == 0

    logger.exception.assert_called_once_with(timeout_error)
コード例 #2
0
ファイル: test_main.py プロジェクト: sanger/crawler
def test_error_run(mongo_database, testing_files_for_process, pyodbc_conn):
    _, mongo_database = mongo_database

    with patch("crawler.file_processing.CentreFile.insert_samples_from_docs_into_mlwh"):
        run(False, False, False, "crawler.config.integration")

    # We expect to have four collections following import
    centres_collection = get_mongo_collection(mongo_database, COLLECTION_CENTRES)
    imports_collection = get_mongo_collection(mongo_database, COLLECTION_IMPORTS)
    samples_collection = get_mongo_collection(mongo_database, COLLECTION_SAMPLES)
    source_plates_collection = get_mongo_collection(mongo_database, COLLECTION_SOURCE_PLATES)

    # we expect files in the errors directory after the first run
    (_, _, files) = next(os.walk("tmp/backups/TEST/errors"))
    assert 2 == len(files)

    _ = shutil.copytree("tests/test_files/good", "tmp/files", dirs_exist_ok=True)
    _ = shutil.copytree("tests/test_files/malformed", "tmp/files", dirs_exist_ok=True)

    run(False, False, False, "crawler.config.integration")

    # The number of centres should be the same as before
    assert centres_collection.count_documents({}) == NUMBER_CENTRES
    # The source plates count should be the same as before
    assert source_plates_collection.count_documents({}) == NUMBER_ACCEPTED_SOURCE_PLATES
    # The samples count should be the same as before
    assert samples_collection.count_documents({}) == NUMBER_VALID_SAMPLES

    # We expect an additional file in the errors directory after the second run
    (_, _, files) = next(os.walk("tmp/backups/TEST/errors"))
    assert 3 == len(files)

    # We get an additional imports
    assert imports_collection.count_documents({}) == NUMBER_OF_FILES_PROCESSED + 1
コード例 #3
0
def test_export_to_mongo_logs_error_correctly_on_source_plate_exception(
        subject, logger, mongo_database):
    _, mongo_database = mongo_database
    timeout_error = TimeoutError()

    with patch("crawler.processing.create_plate_exporter.get_mongo_collection"
               ) as get_collection:
        get_collection.side_effect = timeout_error

        with pytest.raises(TransientRabbitError) as ex_info:
            subject.export_to_mongo()

    assert (
        ex_info.value.message ==
        "There was an error updating MongoDB while exporting plate with barcode 'PLATE-001'."
    )

    logger.critical.assert_called_once()
    log_message = logger.critical.call_args.args[0]
    assert "PLATE-001" in log_message
    assert str(timeout_error) in log_message

    source_plates_collection = get_mongo_collection(mongo_database,
                                                    COLLECTION_SOURCE_PLATES)
    assert source_plates_collection.count_documents({}) == 0

    samples_collection = get_mongo_collection(mongo_database,
                                              COLLECTION_SAMPLES)
    assert samples_collection.count_documents({}) == 0

    logger.exception.assert_called_once_with(timeout_error)
コード例 #4
0
ファイル: test_main.py プロジェクト: sanger/crawler
def test_run(mongo_database, testing_files_for_process, pyodbc_conn):
    _, mongo_database = mongo_database
    with patch("crawler.file_processing.CentreFile.insert_samples_from_docs_into_mlwh"):
        run(False, False, False, "crawler.config.integration")

    # We expect to have four collections following import
    centres_collection = get_mongo_collection(mongo_database, COLLECTION_CENTRES)
    imports_collection = get_mongo_collection(mongo_database, COLLECTION_IMPORTS)
    samples_collection = get_mongo_collection(mongo_database, COLLECTION_SAMPLES)
    source_plates_collection = get_mongo_collection(mongo_database, COLLECTION_SOURCE_PLATES)

    # We record our test centres
    assert centres_collection.count_documents({}) == NUMBER_CENTRES
    assert centres_collection.count_documents({FIELD_CENTRE_NAME: "Test Centre"}) == 1

    # We record all our source plates
    assert source_plates_collection.count_documents({}) == NUMBER_ACCEPTED_SOURCE_PLATES
    # Centres that we don't process unconsolidated files for
    assert source_plates_collection.count_documents({"barcode": "AP123"}) == 0
    assert source_plates_collection.count_documents({"barcode": "MK123"}) == 0
    assert source_plates_collection.count_documents({"barcode": "MK456"}) == 0
    assert source_plates_collection.count_documents({"barcode": "GLS123"}) == 0
    assert source_plates_collection.count_documents({"barcode": "GLS789"}) == 0
    # Centres that process all files
    assert source_plates_collection.count_documents({"barcode": "CB123"}) == 1
    assert source_plates_collection.count_documents({"barcode": "TS789"}) == 1

    # We record *all* our samples
    assert samples_collection.count_documents({}) == NUMBER_VALID_SAMPLES, (
        f"Wrong number of samples inserted. Expected: {NUMBER_VALID_SAMPLES}, Actual: "
        f"{samples_collection.count_documents({})}"
    )
    assert samples_collection.count_documents({"RNA ID": "CB123_A09", "source": "Cambridge-az"}) == 1
    assert samples_collection.count_documents({"RNA ID": "23JAN21-0001Q_A11", "source": "Randox"}) == 1

    # We get one import per centre
    assert imports_collection.count_documents({}) == NUMBER_OF_FILES_PROCESSED, (
        f"Wrong number of imports inserted. Expected: {NUMBER_OF_FILES_PROCESSED}, Actual: "
        f"{imports_collection.count_documents({})}"
    )

    # check number of success/error files for Alderley
    (_, _, files) = next(os.walk("tmp/backups/ALDP/successes"))
    assert len(files) == 0, f"Wrong number of success files. Expected: 0, Actual: {len(files)}"
    (_, _, files) = next(os.walk("tmp/backups/ALDP/errors"))
    assert len(files) == 3, f"Wrong number of error files. Expected: 3, Actual: {len(files)}"

    # check number of success/error files for Randox
    (_, _, files) = next(os.walk("tmp/backups/RAND/successes"))
    assert len(files) == 1, f"Wrong number of success files. Expected: 1, Actual: {len(files)}"
    (_, _, files) = next(os.walk("tmp/backups/RAND/errors"))
    assert len(files) == 0, f"Wrong number of error files. Expected: 0, Actual: {len(files)}"

    # check the code cleaned up the temporary files
    (_, subfolders, files) = next(os.walk("tmp/files/"))
    assert 0 == len(subfolders), f"Wrong number of subfolders. Expected: 0, Actual: {len(subfolders)}"
コード例 #5
0
def process(run_id: str, config: Config = None) -> List[List[str]]:
    """Generates cherrypicker test data for processing by Crawler and then
    processes it via the usual runner.

    The specification of the plates to be generated should be in Mongo. Each
    plate will contain an exact number of positive results between 0 and 96 as
    specified. Up to 200 plates can be generated at a time.

    Arguments:
        run_id: str - The ID of the run.  If this is not found in Mongo an
            exception will be thrown.

    Returns:
        Metadata about the plates generated, as:
        [ [ "barcode1", "description1" ], [ "barcode2", "description2" ] ]
    """
    logger.info("Begin generating data.")

    if config is None:
        config, _ = get_config()

    with create_mongo_client(config) as mongo_client:
        mongo_db = get_mongo_db(config, mongo_client)
        collection = get_mongo_collection(mongo_db,
                                          COLLECTION_CHERRYPICK_TEST_DATA)

        return process_run(config, collection, run_id)
コード例 #6
0
ファイル: test_main.py プロジェクト: BenTopping/crawler
def test_error_run_duplicates_in_imports_message(mongo_database,
                                                 testing_files_for_process,
                                                 pyodbc_conn):
    _, mongo_database = mongo_database

    # copy an additional file with duplicates
    _ = shutil.copytree("tests/files_with_duplicate_samples",
                        "tmp/files",
                        dirs_exist_ok=True)

    with patch(
            "crawler.file_processing.CentreFile.insert_samples_from_docs_into_mlwh"
    ):
        run(False, False, False, "crawler.config.integration")

    # Fetch the imports collection, expect it to contain the additional duplicate error file record
    imports_collection = get_mongo_collection(mongo_database,
                                              COLLECTION_IMPORTS)
    assert imports_collection.count_documents(
        {}) == NUMBER_OF_FILES_PROCESSED + 1

    # Fetch the Test centre record
    test_centre_imports = imports_collection.find_one(
        {"centre_name": "Test Centre"})

    # We expect 2 errors for this file, type 5 (duplicates) errors, 1 message and 1 aggregate count
    assert len(test_centre_imports["errors"]) == 2

    # We expect errors to contain messages for type 5 duplicates, an aggregate total and a message
    # line
    assert "Total number of Duplicates within file errors (TYPE 5): 1" in test_centre_imports[
        "errors"][0]
    assert (
        "WARNING: Duplicates detected within the file. (TYPE 5) (e.g. Duplicated, line: 3, root_sample_id: 16)"
    ) in test_centre_imports["errors"][1]
コード例 #7
0
def test_export_to_mongo_reverts_the_transaction_when_duplicate_samples_inserted(
        subject, mongo_database):
    _, mongo_database = mongo_database

    samples = subject._message._body[FIELD_PLATE][FIELD_SAMPLES]
    samples[0] = samples[1]
    subject.export_to_mongo()

    # No documents were inserted in either collection
    samples_collection = get_mongo_collection(mongo_database,
                                              COLLECTION_SAMPLES)
    assert samples_collection.count_documents({}) == 0

    source_plates_collection = get_mongo_collection(mongo_database,
                                                    COLLECTION_SOURCE_PLATES)
    assert source_plates_collection.count_documents({}) == 0
コード例 #8
0
def test_get_mongo_collection(mongo_database):
    _, mongo_database = mongo_database
    collection_name = "test_collection"
    test_collection = get_mongo_collection(mongo_database, collection_name)

    assert type(test_collection) == Collection
    assert test_collection.name == collection_name
コード例 #9
0
def positive_result_samples_from_mongo(
        config: Config,
        plate_barcodes: Optional[List[str]] = None) -> List[SampleDoc]:
    """Fetch positive samples from Mongo contained within specified plates.

    Arguments:
        config {Config} -- application config specifying database details
        plate_barcodes {Optional[List[str]]} -- barcodes of plates whose samples we are concerned with

    Returns:
        List[Dict[str, str]] -- List of positive samples contained within specified plates
    """
    with create_mongo_client(config) as client:
        mongo_db = get_mongo_db(config, client)
        samples_collection = get_mongo_collection(mongo_db, COLLECTION_SAMPLES)

        pipeline = [{"$match": {FIELD_RESULT: {"$eq": POSITIVE_RESULT_VALUE}}}]

        if plate_barcodes is not None:
            pipeline.append(
                {"$match": {
                    FIELD_PLATE_BARCODE: {
                        "$in": plate_barcodes
                    }
                }})  # type: ignore

        # this should take everything from the cursor find into RAM memory
        # (assuming you have enough memory)
        # should we project to an object that has fewer fields?
        return list(samples_collection.aggregate(pipeline))
コード例 #10
0
def query_any_unprocessed_samples(db: Database) -> List[SampleDoc]:
    """
    Returns the list of unprocessed priority samples (from priority_samples mongo collection)
    that have at least one related sample (from samples mongo collection).

    Arguments:
        db {Database} -- mongo db instance
    """
    priority_samples_collection = get_mongo_collection(db, COLLECTION_PRIORITY_SAMPLES)

    IMPORTANT_UNPROCESSED_SAMPLES_MONGO_QUERY: Final[List[Mapping[str, Any]]] = [
        # All unprocessed priority samples
        {
            "$match": {FIELD_PROCESSED: False},
        },
        # Joins priority_samples and samples
        {
            "$lookup": {
                "from": "samples",
                "localField": FIELD_SAMPLE_ID,
                "foreignField": FIELD_MONGODB_ID,
                "as": "related_samples",
            }
        },
        # match is required so "Exception: Cannot unpad coordinate" isn't thrown
        # Only priority samples with a sample associated with them
        {"$match": {"related_samples": {"$ne": []}}},
        # Copy all sample attributes into the root of the object (merge sample+priority_sample)
        {"$replaceRoot": {"newRoot": {"$mergeObjects": [{"$arrayElemAt": ["$related_samples", 0]}, "$$ROOT"]}}},
        # Prune the branch for related samples as all that info is now in the root of the object
        {"$project": {"related_samples": 0}},
    ]

    value = priority_samples_collection.aggregate(IMPORTANT_UNPROCESSED_SAMPLES_MONGO_QUERY)
    return list(value)
コード例 #11
0
def filtered_positive_fields_set(config: Config, start_datetime: datetime,
                                 end_datetime: datetime) -> bool:
    """Find if the filtered positive version field has been set on any of samples in date range.
       This would indicate that the migration has already been run on those samples.

    Args:
        config {Config} -- application config specifying database details
        start_datetime {datetime} -- lower limit of sample creation date
        end_datetime {datetime} -- upper limit of sample creation date

    Returns:
        {bool} -- v0 version set in samples
    """
    with create_mongo_client(config) as client:
        mongo_db = get_mongo_db(config, client)
        samples_collection = get_mongo_collection(mongo_db, COLLECTION_SAMPLES)

        num_versioned_samples: int = samples_collection.count_documents({
            FIELD_CREATED_AT: {
                "$gte": start_datetime,
                "$lt": end_datetime
            },
            FIELD_FILTERED_POSITIVE: {
                "$exists": True
            },
        })

        return num_versioned_samples > 0
コード例 #12
0
ファイル: test_main.py プロジェクト: sanger/crawler
def test_error_run_duplicates_plate_barcodes_from_different_labs_message(
    mongo_database, testing_files_for_process, pyodbc_conn
):
    _, mongo_database = mongo_database

    # copy an additional file with duplicates
    _ = shutil.copytree("tests/test_files/duplicate_barcodes", "tmp/files", dirs_exist_ok=True)

    with patch("crawler.file_processing.CentreFile.insert_samples_from_docs_into_mlwh"):
        run(False, False, False, "crawler.config.integration")

    # Fetch the imports collection, expect it to contain the additional duplicate error file record
    imports_collection = get_mongo_collection(mongo_database, COLLECTION_IMPORTS)
    assert imports_collection.count_documents({}) == NUMBER_OF_FILES_PROCESSED + 1

    # Fetch the Test centre record
    test_centre_imports = imports_collection.find_one({"centre_name": "Test Centre"})

    assert test_centre_imports is not None

    # We expect 2 errors for this file, type 5 (duplicates) errors, 1 message and 1 aggregate count
    assert len(test_centre_imports["errors"]) == 2

    # We expect errors to contain messages for type 24 duplicates, an aggregate total and a message
    # line
    assert (
        "Total number of 'Duplicate source plate barcodes from different labs' errors (TYPE 25): 2"
        in test_centre_imports["errors"][0]
    )
    assert ("ERROR: Found duplicate source plate barcodes from different labs (TYPE 25)") in test_centre_imports[
        "errors"
    ][1]
コード例 #13
0
def test_export_to_mongo_puts_samples_in_mongo(subject, mongo_database):
    _, mongo_database = mongo_database

    samples_collection = get_mongo_collection(mongo_database,
                                              COLLECTION_SAMPLES)

    assert samples_collection.count_documents({}) == 0

    subject.export_to_mongo()

    assert samples_collection.count_documents({}) == 3
    assert (samples_collection.count_documents({
        FIELD_MONGO_MESSAGE_UUID: "CREATE_PLATE_UUID",
        FIELD_MONGO_LAB_ID: "CPTD",
        FIELD_SOURCE: "Alderley",
    }) == 3)
    assert (samples_collection.count_documents({
        FIELD_MONGO_SAMPLE_INDEX: 1,
        FIELD_LH_SAMPLE_UUID: "UUID_001",
        FIELD_COORDINATE: "A01"
    }) == 1)
    assert (samples_collection.count_documents({
        FIELD_MONGO_SAMPLE_INDEX: 2,
        FIELD_LH_SAMPLE_UUID: "UUID_002",
        FIELD_COORDINATE: "E06"
    }) == 1)
    assert (samples_collection.count_documents({
        FIELD_MONGO_SAMPLE_INDEX: 3,
        FIELD_LH_SAMPLE_UUID: "UUID_003",
        FIELD_COORDINATE: "H12"
    }) == 1)
コード例 #14
0
def update_dart(config: Config, start_datetime: datetime,
                end_datetime: datetime) -> None:
    try:
        with create_mongo_client(config) as client:
            mongo_db = get_mongo_db(config, client)

            samples_collection = get_mongo_collection(mongo_db,
                                                      COLLECTION_SAMPLES)

            # get samples from mongo between these time ranges and with updated UUIDs
            samples = get_samples(samples_collection, start_datetime,
                                  end_datetime)

        if not samples:
            logger.info("No samples in this time range and with updated UUIDs")
            return

        logger.debug(f"{len(samples)} samples to process")

        _, plate_barcodes = extract_required_cp_info(samples)

        logger.debug(f"{len(plate_barcodes)} unique plate barcodes")

        update_dart_fields(config, samples)
    except Exception as e:
        logger.error("Error while attempting to migrate all DBs")
        logger.exception(e)
コード例 #15
0
def test_export_to_mongo_sets_the_source_plate_uuid(subject, mongo_database):
    _, mongo_database = mongo_database

    subject.export_to_mongo()

    source_plates_collection = get_mongo_collection(mongo_database,
                                                    COLLECTION_SOURCE_PLATES)
    source_plate = source_plates_collection.find_one({"barcode": "PLATE-001"})
    plate_uuid = source_plate and source_plate[FIELD_LH_SOURCE_PLATE_UUID]

    assert subject._plate_uuid == plate_uuid
コード例 #16
0
def test_export_to_mongo_puts_a_source_plate_in_mongo(subject, mongo_database):
    _, mongo_database = mongo_database

    source_plates_collection = get_mongo_collection(mongo_database,
                                                    COLLECTION_SOURCE_PLATES)

    assert source_plates_collection.count_documents({"barcode":
                                                     "PLATE-001"}) == 0

    subject.export_to_mongo()

    assert source_plates_collection.count_documents({"barcode":
                                                     "PLATE-001"}) == 1
コード例 #17
0
    def _record_source_plate_in_mongo_db(
            self, session: ClientSession) -> ExportResult:
        """Find an existing plate in MongoDB or add a new one for the plate in the message."""
        try:
            plate_barcode = self._message.plate_barcode.value
            lab_id_field = self._message.lab_id

            session_database = get_mongo_db(self._config, session.client)
            source_plates_collection = get_mongo_collection(
                session_database, COLLECTION_SOURCE_PLATES)
            mongo_plate = source_plates_collection.find_one(
                filter={FIELD_BARCODE: plate_barcode}, session=session)

            if mongo_plate is not None:
                # There was a plate in Mongo DB for this field barcode so check that the lab ID matches then return.
                self._plate_uuid = mongo_plate[FIELD_LH_SOURCE_PLATE_UUID]

                if mongo_plate[FIELD_MONGO_LAB_ID] != lab_id_field.value:
                    return ExportResult(
                        success=False,
                        create_plate_errors=[
                            CreatePlateError(
                                type=ErrorType.ExportingPlateAlreadyExists,
                                origin=RABBITMQ_CREATE_FEEDBACK_ORIGIN_PLATE,
                                description=
                                (f"Plate barcode '{plate_barcode}' already exists "
                                 f"with a different lab ID: '{mongo_plate[FIELD_MONGO_LAB_ID]}'"
                                 ),
                                field=lab_id_field.name,
                            )
                        ],
                    )

                return ExportResult(success=True, create_plate_errors=[])

            # Create a new plate for this message.
            mongo_plate = create_source_plate_doc(plate_barcode,
                                                  lab_id_field.value)
            source_plates_collection.insert_one(mongo_plate, session=session)
            self._plate_uuid = mongo_plate[FIELD_LH_SOURCE_PLATE_UUID]

            return ExportResult(success=True, create_plate_errors=[])
        except Exception as ex:
            LOGGER.critical(
                f"Error accessing MongoDB during export of source plate '{plate_barcode}': {ex}"
            )
            LOGGER.exception(ex)

            raise TransientRabbitError(
                f"There was an error updating MongoDB while exporting plate with barcode '{plate_barcode}'."
            )
コード例 #18
0
def samples_updated_with_source_plate_uuids(
        mongo_db: Database, samples: List[SampleDoc]) -> List[SampleDoc]:
    logger.debug("Attempting to update docs with source plate UUIDs")

    updated_samples: List[SampleDoc] = []

    def update_doc_from_source_plate(sample: SampleDoc,
                                     existing_plate: SourcePlateDoc,
                                     skip_lab_check: bool = False) -> None:
        if skip_lab_check or sample[FIELD_LAB_ID] == existing_plate[
                FIELD_LAB_ID]:
            sample[FIELD_LH_SOURCE_PLATE_UUID] = existing_plate[
                FIELD_LH_SOURCE_PLATE_UUID]
            updated_samples.append(sample)
        else:
            logger.error(
                f"ERROR: Source plate barcode {sample[FIELD_PLATE_BARCODE]} already exists with different lab_id "
                f"{existing_plate[FIELD_LAB_ID]}", )

    try:
        new_plates: List[SourcePlateDoc] = []
        source_plates_collection = get_mongo_collection(
            mongo_db, COLLECTION_SOURCE_PLATES)

        for sample in samples:
            plate_barcode = sample[FIELD_PLATE_BARCODE]

            # attempt an update from plates that exist in mongo
            existing_mongo_plate = source_plates_collection.find_one(
                {FIELD_BARCODE: plate_barcode})
            if existing_mongo_plate is not None:
                update_doc_from_source_plate(sample, existing_mongo_plate)
                continue

            # then add a new plate
            new_plate = new_mongo_source_plate(str(plate_barcode),
                                               str(sample[FIELD_LAB_ID]))
            new_plates.append(new_plate)
            update_doc_from_source_plate(sample, new_plate, True)

        logger.debug(
            f"Attempting to insert {len(new_plates)} new source plates")
        if len(new_plates) > 0:
            source_plates_collection.insert_many(new_plates, ordered=False)

    except Exception:
        logger.error("Failed assigning source plate UUIDs to samples.")
        raise

    return updated_samples
コード例 #19
0
    def test_when_for_one_priority_sample_doesnt_exist_the_related_sample(
        self, mongo_database, config, mlwh_connection, with_different_scenarios
    ):
        # Creates one error sample priority
        _, mongo_database = mongo_database
        collection = get_mongo_collection(mongo_database, COLLECTION_PRIORITY_SAMPLES)
        _id = collection.find({})[0]["_id"]
        collection.find_one_and_update({"_id": _id}, {"$set": {"sample_id": "aaaaaaaxxxaaaaaaaaaaaaa1"}})

        try:
            update_priority_samples(mongo_database, config, True)
        except Exception:
            # Testing the match in IMPORTANT_UNPROCESSED_SAMPLES_MONGO_QUERY
            # so if there isnt a match, an Exception isn't thrown but handled
            pytest.fail("Unexpected error ..")
コード例 #20
0
def test_export_to_mongo_logs_error_correctly_on_bulk_write_error_with_mix_of_errors(
        subject, mongo_database):
    _, mongo_database = mongo_database
    bulk_write_error = BulkWriteError({
        "errorLabels": [],
        "writeErrors": [{
            "code": 11000,
            "op": MagicMock()
        }, {
            "code": 999
        }]
    })

    with patch.object(Collection, "insert_many", side_effect=bulk_write_error):
        subject.export_to_mongo()

    # No documents were inserted in either collection
    samples_collection = get_mongo_collection(mongo_database,
                                              COLLECTION_SAMPLES)
    assert samples_collection.count_documents({}) == 0

    source_plates_collection = get_mongo_collection(mongo_database,
                                                    COLLECTION_SOURCE_PLATES)
    assert source_plates_collection.count_documents({}) == 0
コード例 #21
0
def update_unprocessed_priority_samples_to_processed(db: Database, samples: List[SampleDoc]) -> None:
    """
    Update the given samples processed field in mongo to true
    Arguments:
       samples {list} -- a list of samples to update
    """

    def extract_sample_id(sample: SampleDoc) -> ModifiedRowValue:
        return sample[FIELD_SAMPLE_ID]

    logger.info("Updating Mongodb priority samples to processed")
    # use stored identifiers to update priority_samples table to processed true
    sample_ids = list(map(extract_sample_id, samples))

    priority_samples_collection = get_mongo_collection(db, COLLECTION_PRIORITY_SAMPLES)
    for sample_id in sample_ids:
        priority_samples_collection.update_one({FIELD_SAMPLE_ID: sample_id}, {"$set": {FIELD_PROCESSED: True}})

    logger.info("Mongo update of processed for priority samples successful")
コード例 #22
0
def update_mongo(config: Config, updated_at: datetime) -> None:
    with create_mongo_client(config) as client:
        mongo_db = get_mongo_db(config, client)

        samples_collection = get_mongo_collection(mongo_db, COLLECTION_SAMPLES)

        counter = 0
        for mysql_sample in mysql_sample_generator(
                config=config,
                query=
                f"SELECT * FROM lighthouse_sample WHERE updated_at > '{updated_at.strftime('%Y-%m-%d %H:%M')}'",
        ):
            mlwh_sample_uuid = mysql_sample.get(MLWH_LH_SAMPLE_UUID)

            if mlwh_sample_uuid is None:
                continue

            mongo_sample = samples_collection.find_one_and_update(
                filter={
                    FIELD_MONGODB_ID:
                    ObjectId(mysql_sample.get(MLWH_MONGODB_ID)),
                    FIELD_LH_SAMPLE_UUID: {
                        "$ne": mlwh_sample_uuid,
                    },
                },
                update={
                    "$set": {
                        FIELD_LH_SAMPLE_UUID: mlwh_sample_uuid,
                        UUID_UPDATED: True,
                        FIELD_UPDATED_AT: datetime.utcnow(),
                    }
                },
            )

            if mongo_sample is not None:
                counter += 1

            if counter > 0 and (counter % 5000) == 0:
                logger.debug(f"{counter = }")

        logger.debug(f"{counter} samples updated in mongo")
コード例 #23
0
def mongo_samples_by_date(config: Config, start_datetime: datetime,
                          end_datetime: datetime) -> List[SampleDoc]:
    """Gets all samples from Mongo created before Crawler started setting filtered positive fields

    Arguments:
        config {Config} -- application config specifying database details
        start_datetime {datetime} -- lower limit of sample creation date
        end_datetime {datetime} -- upper limit of sample creation date
    Returns:
        List[Sample] -- List of Mongo samples created before filtered positive Crawler changes
    """
    with create_mongo_client(config) as client:
        mongo_db = get_mongo_db(config, client)
        samples_collection = get_mongo_collection(mongo_db, COLLECTION_SAMPLES)
        return list(
            samples_collection.find({
                FIELD_CREATED_AT: {
                    "$gte": start_datetime,
                    "$lt": end_datetime
                },
            }))
コード例 #24
0
    def record_import(self):
        plate_barcode = self._message.plate_barcode.value
        if not plate_barcode:
            # We don't record imports without a plate barcode available. They would be meaningless without the barcode.
            LOGGER.error(
                f"Import record not created for message with UUID '{self._message.message_uuid.value}' "
                "because it doesn't have a plate barcode.")
            return

        try:
            imports_collection = get_mongo_collection(self._mongo_db,
                                                      COLLECTION_IMPORTS)

            create_mongo_import_record(
                imports_collection,
                self._message.centre_config,
                self._samples_inserted,
                plate_barcode,
                self._message.textual_errors_summary,
            )
        except Exception as ex:
            LOGGER.exception(ex)
コード例 #25
0
def test_record_import_creates_a_valid_import_record(freezer, subject,
                                                     mongo_database):
    _, mongo_database = mongo_database

    subject._samples_inserted = 3  # Simulate inserting all the records.

    subject.record_import()

    imports_collection = get_mongo_collection(mongo_database,
                                              COLLECTION_IMPORTS)

    assert (imports_collection.count_documents({
        "date":
        datetime.utcnow(),  # Time has been frozen for this test.
        "centre_name":
        "Alderley",
        "csv_file_used":
        "PLATE-001",
        "number_of_records":
        3,
        "errors": ["No errors were reported during processing."],
    }) == 1)
コード例 #26
0
    def test_mlwh_was_correctly_updated_in_update_priority_samples(
        self, mongo_database, config, mlwh_connection, with_different_scenarios
    ):
        _, mongo_database = mongo_database
        update_priority_samples(mongo_database, config, True)
        cursor = mlwh_connection.cursor(dictionary=True)
        samples_collection = get_mongo_collection(mongo_database, COLLECTION_SAMPLES)

        if len(self.expected_mlwh_samples) > 0:
            mongodb_ids = ",".join(map(lambda x: f'"{x[FIELD_MONGODB_ID]}"', self.expected_mlwh_samples))
            cursor.execute(
                f"SELECT * FROM {config.MLWH_DB_DBNAME}.{MLWH_TABLE_NAME} "
                f" WHERE {MLWH_MONGODB_ID} IN ({mongodb_ids})"
            )
            rows = cursor.fetchall()
            cursor.close()
            for pos, priority_sample in enumerate(self.expected_mlwh_samples):
                expected_sample = samples_collection.find({FIELD_MONGODB_ID: priority_sample[FIELD_SAMPLE_ID]})[0]

                assert ObjectId(rows[pos][MLWH_MONGODB_ID]) == priority_sample[FIELD_MONGODB_ID]
                assert rows[pos][MLWH_ROOT_SAMPLE_ID] == expected_sample[FIELD_ROOT_SAMPLE_ID]
                assert rows[pos][MLWH_MUST_SEQUENCE] == priority_sample[FIELD_MUST_SEQUENCE]
                assert rows[pos][MLWH_PREFERENTIALLY_SEQUENCE] == priority_sample[FIELD_PREFERENTIALLY_SEQUENCE]
コード例 #27
0
def get_centres_config(config: Config, data_source: str = "") -> List[CentreConf]:
    """Get the centres config from MongoDB. If MongoDB does not contain any centres config, it will become populated
    with the values in the app config for centres.

    Arguments:
        config {Config}: The configuration object for the whole application.
        data_source {str}: The data source filter to apply to centre configs, or None to apply no filter.

    Return:
        List[CentreConf]: A List of CentreConf from MongoDB matching the given data source.
    """
    with create_mongo_client(config) as client:
        db = get_mongo_db(config, client)

        centres_collection_exists = collection_exists(db, COLLECTION_CENTRES)
        centres_collection = get_mongo_collection(db, COLLECTION_CENTRES)

        if not centres_collection_exists:
            # Populate the centres collection from the config values
            create_index(centres_collection, FIELD_CENTRE_NAME, unique=True)
            populate_mongo_collection(centres_collection, config.CENTRES, FIELD_CENTRE_NAME)  # type: ignore

        # Get the centres collection from MongoDB
        cursor = centres_collection.find()
        centres = list(map(lambda x: cast(CentreConf, x), cursor))

        if data_source:

            def test_data_source(centre):
                try:
                    return centre.get(CENTRE_KEY_DATA_SOURCE).lower() == data_source.lower()
                except (AttributeError):
                    return False

            centres = list(filter(test_data_source, centres))

        return centres
コード例 #28
0
def update_mongo_fields(mongo_db: Database, samples: List[SampleDoc]) -> bool:
    """Bulk updates sample uuid fields in the Mongo database

    Arguments:
        config {ModuleType} -- application config specifying database details
        samples {List[Sample]} -- the list of samples whose uuid fields should be updated

    Returns:
        bool -- whether the updates completed successfully
    """
    samples_collection = get_mongo_collection(mongo_db, COLLECTION_SAMPLES)
    samples_collection.bulk_write([
        UpdateOne(
            {FIELD_MONGODB_ID: sample[FIELD_MONGODB_ID]},
            {
                "$set": {
                    FIELD_LH_SAMPLE_UUID: sample[FIELD_LH_SAMPLE_UUID],
                    FIELD_LH_SOURCE_PLATE_UUID:
                    sample[FIELD_LH_SOURCE_PLATE_UUID],
                }
            },
        ) for sample in samples
    ])
    return True
コード例 #29
0
def test_export_to_mongo_adds_an_error_when_source_plate_exists_for_another_lab_id(
        subject, mongo_database):
    _, mongo_database = mongo_database

    # Get the source plate added once
    subject.export_to_mongo()

    subject._message._body[FIELD_PLATE][FIELD_LAB_ID] = "NULL"
    with patch.object(CreatePlateMessage, "add_error") as add_error:
        subject.export_to_mongo()

    add_error.assert_called_once_with(
        CreatePlateError(
            type=ErrorType.ExportingPlateAlreadyExists,
            origin=RABBITMQ_CREATE_FEEDBACK_ORIGIN_PLATE,
            description=ANY,
            field=FIELD_LAB_ID,
        ))

    # NULL plate was not inserted
    source_plates_collection = get_mongo_collection(mongo_database,
                                                    COLLECTION_SOURCE_PLATES)
    assert source_plates_collection.count_documents(
        {FIELD_MONGO_LAB_ID: "NULL"}) == 0
コード例 #30
0
def update_mongo_filtered_positive_fields(config: Config,
                                          samples: List[SampleDoc],
                                          version: str,
                                          update_timestamp: datetime) -> bool:
    """Batch updates sample filtered positive fields in the Mongo database

    Arguments:
        config {Config} -- application config specifying database details
        samples {List[Sample]} -- the list of samples whose filtered positive fields should be updated
        version {str} -- the filtered positive identifier version used
        update_timestamp {datetime} -- the timestamp at which the update was performed

    Returns:
        bool -- whether the updates completed successfully
    """
    with create_mongo_client(config) as client:
        mongo_db = get_mongo_db(config, client)
        samples_collection = get_mongo_collection(mongo_db, COLLECTION_SAMPLES)

        num_samples = len(samples)
        SAMPLES_PER_QUERY = 15000
        samples_index = 0
        logger.debug(
            f"Attempting to update {num_samples} rows in Mongo in batches of {SAMPLES_PER_QUERY}"
        )
        while samples_index < num_samples:
            logger.debug(
                f"Updating records between {samples_index} and {samples_index + SAMPLES_PER_QUERY}"
            )

            samples_batch = samples[samples_index:(
                samples_index + SAMPLES_PER_QUERY)]  # noqa: E203

            # get ids of those that are filtered positive, and those that aren't
            filtered_positive_ids = []
            filtered_negative_ids = []
            for sample in samples_batch:
                if sample[FIELD_FILTERED_POSITIVE] is True:
                    filtered_positive_ids.append(sample[FIELD_MONGODB_ID])
                else:
                    filtered_negative_ids.append(sample[FIELD_MONGODB_ID])

            samples_collection.update_many(
                {FIELD_MONGODB_ID: {
                    "$in": filtered_positive_ids
                }},
                {
                    "$set": {
                        FIELD_FILTERED_POSITIVE: True,
                        FIELD_FILTERED_POSITIVE_VERSION: version,
                        FIELD_FILTERED_POSITIVE_TIMESTAMP: update_timestamp,
                    }
                },
            )

            samples_collection.update_many(
                {FIELD_MONGODB_ID: {
                    "$in": filtered_negative_ids
                }},
                {
                    "$set": {
                        FIELD_FILTERED_POSITIVE: False,
                        FIELD_FILTERED_POSITIVE_VERSION: version,
                        FIELD_FILTERED_POSITIVE_TIMESTAMP: update_timestamp,
                    }
                },
            )

            samples_index += SAMPLES_PER_QUERY
        return True