def test_export_to_mongo_logs_error_correctly_on_samples_exception( subject, logger, mongo_database): _, mongo_database = mongo_database timeout_error = TimeoutError() with patch.object(Collection, "insert_many", side_effect=timeout_error): with pytest.raises(TransientRabbitError) as ex_info: subject.export_to_mongo() assert ex_info.value.message == ( "There was an error updating MongoDB while exporting samples for message UUID 'CREATE_PLATE_UUID'." ) logger.critical.assert_called_once() log_message = logger.critical.call_args.args[0] assert "CREATE_PLATE_UUID" in log_message assert str(timeout_error) in log_message source_plates_collection = get_mongo_collection(mongo_database, COLLECTION_SOURCE_PLATES) assert source_plates_collection.count_documents({}) == 0 samples_collection = get_mongo_collection(mongo_database, COLLECTION_SAMPLES) assert samples_collection.count_documents({}) == 0 logger.exception.assert_called_once_with(timeout_error)
def test_error_run(mongo_database, testing_files_for_process, pyodbc_conn): _, mongo_database = mongo_database with patch("crawler.file_processing.CentreFile.insert_samples_from_docs_into_mlwh"): run(False, False, False, "crawler.config.integration") # We expect to have four collections following import centres_collection = get_mongo_collection(mongo_database, COLLECTION_CENTRES) imports_collection = get_mongo_collection(mongo_database, COLLECTION_IMPORTS) samples_collection = get_mongo_collection(mongo_database, COLLECTION_SAMPLES) source_plates_collection = get_mongo_collection(mongo_database, COLLECTION_SOURCE_PLATES) # we expect files in the errors directory after the first run (_, _, files) = next(os.walk("tmp/backups/TEST/errors")) assert 2 == len(files) _ = shutil.copytree("tests/test_files/good", "tmp/files", dirs_exist_ok=True) _ = shutil.copytree("tests/test_files/malformed", "tmp/files", dirs_exist_ok=True) run(False, False, False, "crawler.config.integration") # The number of centres should be the same as before assert centres_collection.count_documents({}) == NUMBER_CENTRES # The source plates count should be the same as before assert source_plates_collection.count_documents({}) == NUMBER_ACCEPTED_SOURCE_PLATES # The samples count should be the same as before assert samples_collection.count_documents({}) == NUMBER_VALID_SAMPLES # We expect an additional file in the errors directory after the second run (_, _, files) = next(os.walk("tmp/backups/TEST/errors")) assert 3 == len(files) # We get an additional imports assert imports_collection.count_documents({}) == NUMBER_OF_FILES_PROCESSED + 1
def test_export_to_mongo_logs_error_correctly_on_source_plate_exception( subject, logger, mongo_database): _, mongo_database = mongo_database timeout_error = TimeoutError() with patch("crawler.processing.create_plate_exporter.get_mongo_collection" ) as get_collection: get_collection.side_effect = timeout_error with pytest.raises(TransientRabbitError) as ex_info: subject.export_to_mongo() assert ( ex_info.value.message == "There was an error updating MongoDB while exporting plate with barcode 'PLATE-001'." ) logger.critical.assert_called_once() log_message = logger.critical.call_args.args[0] assert "PLATE-001" in log_message assert str(timeout_error) in log_message source_plates_collection = get_mongo_collection(mongo_database, COLLECTION_SOURCE_PLATES) assert source_plates_collection.count_documents({}) == 0 samples_collection = get_mongo_collection(mongo_database, COLLECTION_SAMPLES) assert samples_collection.count_documents({}) == 0 logger.exception.assert_called_once_with(timeout_error)
def test_run(mongo_database, testing_files_for_process, pyodbc_conn): _, mongo_database = mongo_database with patch("crawler.file_processing.CentreFile.insert_samples_from_docs_into_mlwh"): run(False, False, False, "crawler.config.integration") # We expect to have four collections following import centres_collection = get_mongo_collection(mongo_database, COLLECTION_CENTRES) imports_collection = get_mongo_collection(mongo_database, COLLECTION_IMPORTS) samples_collection = get_mongo_collection(mongo_database, COLLECTION_SAMPLES) source_plates_collection = get_mongo_collection(mongo_database, COLLECTION_SOURCE_PLATES) # We record our test centres assert centres_collection.count_documents({}) == NUMBER_CENTRES assert centres_collection.count_documents({FIELD_CENTRE_NAME: "Test Centre"}) == 1 # We record all our source plates assert source_plates_collection.count_documents({}) == NUMBER_ACCEPTED_SOURCE_PLATES # Centres that we don't process unconsolidated files for assert source_plates_collection.count_documents({"barcode": "AP123"}) == 0 assert source_plates_collection.count_documents({"barcode": "MK123"}) == 0 assert source_plates_collection.count_documents({"barcode": "MK456"}) == 0 assert source_plates_collection.count_documents({"barcode": "GLS123"}) == 0 assert source_plates_collection.count_documents({"barcode": "GLS789"}) == 0 # Centres that process all files assert source_plates_collection.count_documents({"barcode": "CB123"}) == 1 assert source_plates_collection.count_documents({"barcode": "TS789"}) == 1 # We record *all* our samples assert samples_collection.count_documents({}) == NUMBER_VALID_SAMPLES, ( f"Wrong number of samples inserted. Expected: {NUMBER_VALID_SAMPLES}, Actual: " f"{samples_collection.count_documents({})}" ) assert samples_collection.count_documents({"RNA ID": "CB123_A09", "source": "Cambridge-az"}) == 1 assert samples_collection.count_documents({"RNA ID": "23JAN21-0001Q_A11", "source": "Randox"}) == 1 # We get one import per centre assert imports_collection.count_documents({}) == NUMBER_OF_FILES_PROCESSED, ( f"Wrong number of imports inserted. Expected: {NUMBER_OF_FILES_PROCESSED}, Actual: " f"{imports_collection.count_documents({})}" ) # check number of success/error files for Alderley (_, _, files) = next(os.walk("tmp/backups/ALDP/successes")) assert len(files) == 0, f"Wrong number of success files. Expected: 0, Actual: {len(files)}" (_, _, files) = next(os.walk("tmp/backups/ALDP/errors")) assert len(files) == 3, f"Wrong number of error files. Expected: 3, Actual: {len(files)}" # check number of success/error files for Randox (_, _, files) = next(os.walk("tmp/backups/RAND/successes")) assert len(files) == 1, f"Wrong number of success files. Expected: 1, Actual: {len(files)}" (_, _, files) = next(os.walk("tmp/backups/RAND/errors")) assert len(files) == 0, f"Wrong number of error files. Expected: 0, Actual: {len(files)}" # check the code cleaned up the temporary files (_, subfolders, files) = next(os.walk("tmp/files/")) assert 0 == len(subfolders), f"Wrong number of subfolders. Expected: 0, Actual: {len(subfolders)}"
def process(run_id: str, config: Config = None) -> List[List[str]]: """Generates cherrypicker test data for processing by Crawler and then processes it via the usual runner. The specification of the plates to be generated should be in Mongo. Each plate will contain an exact number of positive results between 0 and 96 as specified. Up to 200 plates can be generated at a time. Arguments: run_id: str - The ID of the run. If this is not found in Mongo an exception will be thrown. Returns: Metadata about the plates generated, as: [ [ "barcode1", "description1" ], [ "barcode2", "description2" ] ] """ logger.info("Begin generating data.") if config is None: config, _ = get_config() with create_mongo_client(config) as mongo_client: mongo_db = get_mongo_db(config, mongo_client) collection = get_mongo_collection(mongo_db, COLLECTION_CHERRYPICK_TEST_DATA) return process_run(config, collection, run_id)
def test_error_run_duplicates_in_imports_message(mongo_database, testing_files_for_process, pyodbc_conn): _, mongo_database = mongo_database # copy an additional file with duplicates _ = shutil.copytree("tests/files_with_duplicate_samples", "tmp/files", dirs_exist_ok=True) with patch( "crawler.file_processing.CentreFile.insert_samples_from_docs_into_mlwh" ): run(False, False, False, "crawler.config.integration") # Fetch the imports collection, expect it to contain the additional duplicate error file record imports_collection = get_mongo_collection(mongo_database, COLLECTION_IMPORTS) assert imports_collection.count_documents( {}) == NUMBER_OF_FILES_PROCESSED + 1 # Fetch the Test centre record test_centre_imports = imports_collection.find_one( {"centre_name": "Test Centre"}) # We expect 2 errors for this file, type 5 (duplicates) errors, 1 message and 1 aggregate count assert len(test_centre_imports["errors"]) == 2 # We expect errors to contain messages for type 5 duplicates, an aggregate total and a message # line assert "Total number of Duplicates within file errors (TYPE 5): 1" in test_centre_imports[ "errors"][0] assert ( "WARNING: Duplicates detected within the file. (TYPE 5) (e.g. Duplicated, line: 3, root_sample_id: 16)" ) in test_centre_imports["errors"][1]
def test_export_to_mongo_reverts_the_transaction_when_duplicate_samples_inserted( subject, mongo_database): _, mongo_database = mongo_database samples = subject._message._body[FIELD_PLATE][FIELD_SAMPLES] samples[0] = samples[1] subject.export_to_mongo() # No documents were inserted in either collection samples_collection = get_mongo_collection(mongo_database, COLLECTION_SAMPLES) assert samples_collection.count_documents({}) == 0 source_plates_collection = get_mongo_collection(mongo_database, COLLECTION_SOURCE_PLATES) assert source_plates_collection.count_documents({}) == 0
def test_get_mongo_collection(mongo_database): _, mongo_database = mongo_database collection_name = "test_collection" test_collection = get_mongo_collection(mongo_database, collection_name) assert type(test_collection) == Collection assert test_collection.name == collection_name
def positive_result_samples_from_mongo( config: Config, plate_barcodes: Optional[List[str]] = None) -> List[SampleDoc]: """Fetch positive samples from Mongo contained within specified plates. Arguments: config {Config} -- application config specifying database details plate_barcodes {Optional[List[str]]} -- barcodes of plates whose samples we are concerned with Returns: List[Dict[str, str]] -- List of positive samples contained within specified plates """ with create_mongo_client(config) as client: mongo_db = get_mongo_db(config, client) samples_collection = get_mongo_collection(mongo_db, COLLECTION_SAMPLES) pipeline = [{"$match": {FIELD_RESULT: {"$eq": POSITIVE_RESULT_VALUE}}}] if plate_barcodes is not None: pipeline.append( {"$match": { FIELD_PLATE_BARCODE: { "$in": plate_barcodes } }}) # type: ignore # this should take everything from the cursor find into RAM memory # (assuming you have enough memory) # should we project to an object that has fewer fields? return list(samples_collection.aggregate(pipeline))
def query_any_unprocessed_samples(db: Database) -> List[SampleDoc]: """ Returns the list of unprocessed priority samples (from priority_samples mongo collection) that have at least one related sample (from samples mongo collection). Arguments: db {Database} -- mongo db instance """ priority_samples_collection = get_mongo_collection(db, COLLECTION_PRIORITY_SAMPLES) IMPORTANT_UNPROCESSED_SAMPLES_MONGO_QUERY: Final[List[Mapping[str, Any]]] = [ # All unprocessed priority samples { "$match": {FIELD_PROCESSED: False}, }, # Joins priority_samples and samples { "$lookup": { "from": "samples", "localField": FIELD_SAMPLE_ID, "foreignField": FIELD_MONGODB_ID, "as": "related_samples", } }, # match is required so "Exception: Cannot unpad coordinate" isn't thrown # Only priority samples with a sample associated with them {"$match": {"related_samples": {"$ne": []}}}, # Copy all sample attributes into the root of the object (merge sample+priority_sample) {"$replaceRoot": {"newRoot": {"$mergeObjects": [{"$arrayElemAt": ["$related_samples", 0]}, "$$ROOT"]}}}, # Prune the branch for related samples as all that info is now in the root of the object {"$project": {"related_samples": 0}}, ] value = priority_samples_collection.aggregate(IMPORTANT_UNPROCESSED_SAMPLES_MONGO_QUERY) return list(value)
def filtered_positive_fields_set(config: Config, start_datetime: datetime, end_datetime: datetime) -> bool: """Find if the filtered positive version field has been set on any of samples in date range. This would indicate that the migration has already been run on those samples. Args: config {Config} -- application config specifying database details start_datetime {datetime} -- lower limit of sample creation date end_datetime {datetime} -- upper limit of sample creation date Returns: {bool} -- v0 version set in samples """ with create_mongo_client(config) as client: mongo_db = get_mongo_db(config, client) samples_collection = get_mongo_collection(mongo_db, COLLECTION_SAMPLES) num_versioned_samples: int = samples_collection.count_documents({ FIELD_CREATED_AT: { "$gte": start_datetime, "$lt": end_datetime }, FIELD_FILTERED_POSITIVE: { "$exists": True }, }) return num_versioned_samples > 0
def test_error_run_duplicates_plate_barcodes_from_different_labs_message( mongo_database, testing_files_for_process, pyodbc_conn ): _, mongo_database = mongo_database # copy an additional file with duplicates _ = shutil.copytree("tests/test_files/duplicate_barcodes", "tmp/files", dirs_exist_ok=True) with patch("crawler.file_processing.CentreFile.insert_samples_from_docs_into_mlwh"): run(False, False, False, "crawler.config.integration") # Fetch the imports collection, expect it to contain the additional duplicate error file record imports_collection = get_mongo_collection(mongo_database, COLLECTION_IMPORTS) assert imports_collection.count_documents({}) == NUMBER_OF_FILES_PROCESSED + 1 # Fetch the Test centre record test_centre_imports = imports_collection.find_one({"centre_name": "Test Centre"}) assert test_centre_imports is not None # We expect 2 errors for this file, type 5 (duplicates) errors, 1 message and 1 aggregate count assert len(test_centre_imports["errors"]) == 2 # We expect errors to contain messages for type 24 duplicates, an aggregate total and a message # line assert ( "Total number of 'Duplicate source plate barcodes from different labs' errors (TYPE 25): 2" in test_centre_imports["errors"][0] ) assert ("ERROR: Found duplicate source plate barcodes from different labs (TYPE 25)") in test_centre_imports[ "errors" ][1]
def test_export_to_mongo_puts_samples_in_mongo(subject, mongo_database): _, mongo_database = mongo_database samples_collection = get_mongo_collection(mongo_database, COLLECTION_SAMPLES) assert samples_collection.count_documents({}) == 0 subject.export_to_mongo() assert samples_collection.count_documents({}) == 3 assert (samples_collection.count_documents({ FIELD_MONGO_MESSAGE_UUID: "CREATE_PLATE_UUID", FIELD_MONGO_LAB_ID: "CPTD", FIELD_SOURCE: "Alderley", }) == 3) assert (samples_collection.count_documents({ FIELD_MONGO_SAMPLE_INDEX: 1, FIELD_LH_SAMPLE_UUID: "UUID_001", FIELD_COORDINATE: "A01" }) == 1) assert (samples_collection.count_documents({ FIELD_MONGO_SAMPLE_INDEX: 2, FIELD_LH_SAMPLE_UUID: "UUID_002", FIELD_COORDINATE: "E06" }) == 1) assert (samples_collection.count_documents({ FIELD_MONGO_SAMPLE_INDEX: 3, FIELD_LH_SAMPLE_UUID: "UUID_003", FIELD_COORDINATE: "H12" }) == 1)
def update_dart(config: Config, start_datetime: datetime, end_datetime: datetime) -> None: try: with create_mongo_client(config) as client: mongo_db = get_mongo_db(config, client) samples_collection = get_mongo_collection(mongo_db, COLLECTION_SAMPLES) # get samples from mongo between these time ranges and with updated UUIDs samples = get_samples(samples_collection, start_datetime, end_datetime) if not samples: logger.info("No samples in this time range and with updated UUIDs") return logger.debug(f"{len(samples)} samples to process") _, plate_barcodes = extract_required_cp_info(samples) logger.debug(f"{len(plate_barcodes)} unique plate barcodes") update_dart_fields(config, samples) except Exception as e: logger.error("Error while attempting to migrate all DBs") logger.exception(e)
def test_export_to_mongo_sets_the_source_plate_uuid(subject, mongo_database): _, mongo_database = mongo_database subject.export_to_mongo() source_plates_collection = get_mongo_collection(mongo_database, COLLECTION_SOURCE_PLATES) source_plate = source_plates_collection.find_one({"barcode": "PLATE-001"}) plate_uuid = source_plate and source_plate[FIELD_LH_SOURCE_PLATE_UUID] assert subject._plate_uuid == plate_uuid
def test_export_to_mongo_puts_a_source_plate_in_mongo(subject, mongo_database): _, mongo_database = mongo_database source_plates_collection = get_mongo_collection(mongo_database, COLLECTION_SOURCE_PLATES) assert source_plates_collection.count_documents({"barcode": "PLATE-001"}) == 0 subject.export_to_mongo() assert source_plates_collection.count_documents({"barcode": "PLATE-001"}) == 1
def _record_source_plate_in_mongo_db( self, session: ClientSession) -> ExportResult: """Find an existing plate in MongoDB or add a new one for the plate in the message.""" try: plate_barcode = self._message.plate_barcode.value lab_id_field = self._message.lab_id session_database = get_mongo_db(self._config, session.client) source_plates_collection = get_mongo_collection( session_database, COLLECTION_SOURCE_PLATES) mongo_plate = source_plates_collection.find_one( filter={FIELD_BARCODE: plate_barcode}, session=session) if mongo_plate is not None: # There was a plate in Mongo DB for this field barcode so check that the lab ID matches then return. self._plate_uuid = mongo_plate[FIELD_LH_SOURCE_PLATE_UUID] if mongo_plate[FIELD_MONGO_LAB_ID] != lab_id_field.value: return ExportResult( success=False, create_plate_errors=[ CreatePlateError( type=ErrorType.ExportingPlateAlreadyExists, origin=RABBITMQ_CREATE_FEEDBACK_ORIGIN_PLATE, description= (f"Plate barcode '{plate_barcode}' already exists " f"with a different lab ID: '{mongo_plate[FIELD_MONGO_LAB_ID]}'" ), field=lab_id_field.name, ) ], ) return ExportResult(success=True, create_plate_errors=[]) # Create a new plate for this message. mongo_plate = create_source_plate_doc(plate_barcode, lab_id_field.value) source_plates_collection.insert_one(mongo_plate, session=session) self._plate_uuid = mongo_plate[FIELD_LH_SOURCE_PLATE_UUID] return ExportResult(success=True, create_plate_errors=[]) except Exception as ex: LOGGER.critical( f"Error accessing MongoDB during export of source plate '{plate_barcode}': {ex}" ) LOGGER.exception(ex) raise TransientRabbitError( f"There was an error updating MongoDB while exporting plate with barcode '{plate_barcode}'." )
def samples_updated_with_source_plate_uuids( mongo_db: Database, samples: List[SampleDoc]) -> List[SampleDoc]: logger.debug("Attempting to update docs with source plate UUIDs") updated_samples: List[SampleDoc] = [] def update_doc_from_source_plate(sample: SampleDoc, existing_plate: SourcePlateDoc, skip_lab_check: bool = False) -> None: if skip_lab_check or sample[FIELD_LAB_ID] == existing_plate[ FIELD_LAB_ID]: sample[FIELD_LH_SOURCE_PLATE_UUID] = existing_plate[ FIELD_LH_SOURCE_PLATE_UUID] updated_samples.append(sample) else: logger.error( f"ERROR: Source plate barcode {sample[FIELD_PLATE_BARCODE]} already exists with different lab_id " f"{existing_plate[FIELD_LAB_ID]}", ) try: new_plates: List[SourcePlateDoc] = [] source_plates_collection = get_mongo_collection( mongo_db, COLLECTION_SOURCE_PLATES) for sample in samples: plate_barcode = sample[FIELD_PLATE_BARCODE] # attempt an update from plates that exist in mongo existing_mongo_plate = source_plates_collection.find_one( {FIELD_BARCODE: plate_barcode}) if existing_mongo_plate is not None: update_doc_from_source_plate(sample, existing_mongo_plate) continue # then add a new plate new_plate = new_mongo_source_plate(str(plate_barcode), str(sample[FIELD_LAB_ID])) new_plates.append(new_plate) update_doc_from_source_plate(sample, new_plate, True) logger.debug( f"Attempting to insert {len(new_plates)} new source plates") if len(new_plates) > 0: source_plates_collection.insert_many(new_plates, ordered=False) except Exception: logger.error("Failed assigning source plate UUIDs to samples.") raise return updated_samples
def test_when_for_one_priority_sample_doesnt_exist_the_related_sample( self, mongo_database, config, mlwh_connection, with_different_scenarios ): # Creates one error sample priority _, mongo_database = mongo_database collection = get_mongo_collection(mongo_database, COLLECTION_PRIORITY_SAMPLES) _id = collection.find({})[0]["_id"] collection.find_one_and_update({"_id": _id}, {"$set": {"sample_id": "aaaaaaaxxxaaaaaaaaaaaaa1"}}) try: update_priority_samples(mongo_database, config, True) except Exception: # Testing the match in IMPORTANT_UNPROCESSED_SAMPLES_MONGO_QUERY # so if there isnt a match, an Exception isn't thrown but handled pytest.fail("Unexpected error ..")
def test_export_to_mongo_logs_error_correctly_on_bulk_write_error_with_mix_of_errors( subject, mongo_database): _, mongo_database = mongo_database bulk_write_error = BulkWriteError({ "errorLabels": [], "writeErrors": [{ "code": 11000, "op": MagicMock() }, { "code": 999 }] }) with patch.object(Collection, "insert_many", side_effect=bulk_write_error): subject.export_to_mongo() # No documents were inserted in either collection samples_collection = get_mongo_collection(mongo_database, COLLECTION_SAMPLES) assert samples_collection.count_documents({}) == 0 source_plates_collection = get_mongo_collection(mongo_database, COLLECTION_SOURCE_PLATES) assert source_plates_collection.count_documents({}) == 0
def update_unprocessed_priority_samples_to_processed(db: Database, samples: List[SampleDoc]) -> None: """ Update the given samples processed field in mongo to true Arguments: samples {list} -- a list of samples to update """ def extract_sample_id(sample: SampleDoc) -> ModifiedRowValue: return sample[FIELD_SAMPLE_ID] logger.info("Updating Mongodb priority samples to processed") # use stored identifiers to update priority_samples table to processed true sample_ids = list(map(extract_sample_id, samples)) priority_samples_collection = get_mongo_collection(db, COLLECTION_PRIORITY_SAMPLES) for sample_id in sample_ids: priority_samples_collection.update_one({FIELD_SAMPLE_ID: sample_id}, {"$set": {FIELD_PROCESSED: True}}) logger.info("Mongo update of processed for priority samples successful")
def update_mongo(config: Config, updated_at: datetime) -> None: with create_mongo_client(config) as client: mongo_db = get_mongo_db(config, client) samples_collection = get_mongo_collection(mongo_db, COLLECTION_SAMPLES) counter = 0 for mysql_sample in mysql_sample_generator( config=config, query= f"SELECT * FROM lighthouse_sample WHERE updated_at > '{updated_at.strftime('%Y-%m-%d %H:%M')}'", ): mlwh_sample_uuid = mysql_sample.get(MLWH_LH_SAMPLE_UUID) if mlwh_sample_uuid is None: continue mongo_sample = samples_collection.find_one_and_update( filter={ FIELD_MONGODB_ID: ObjectId(mysql_sample.get(MLWH_MONGODB_ID)), FIELD_LH_SAMPLE_UUID: { "$ne": mlwh_sample_uuid, }, }, update={ "$set": { FIELD_LH_SAMPLE_UUID: mlwh_sample_uuid, UUID_UPDATED: True, FIELD_UPDATED_AT: datetime.utcnow(), } }, ) if mongo_sample is not None: counter += 1 if counter > 0 and (counter % 5000) == 0: logger.debug(f"{counter = }") logger.debug(f"{counter} samples updated in mongo")
def mongo_samples_by_date(config: Config, start_datetime: datetime, end_datetime: datetime) -> List[SampleDoc]: """Gets all samples from Mongo created before Crawler started setting filtered positive fields Arguments: config {Config} -- application config specifying database details start_datetime {datetime} -- lower limit of sample creation date end_datetime {datetime} -- upper limit of sample creation date Returns: List[Sample] -- List of Mongo samples created before filtered positive Crawler changes """ with create_mongo_client(config) as client: mongo_db = get_mongo_db(config, client) samples_collection = get_mongo_collection(mongo_db, COLLECTION_SAMPLES) return list( samples_collection.find({ FIELD_CREATED_AT: { "$gte": start_datetime, "$lt": end_datetime }, }))
def record_import(self): plate_barcode = self._message.plate_barcode.value if not plate_barcode: # We don't record imports without a plate barcode available. They would be meaningless without the barcode. LOGGER.error( f"Import record not created for message with UUID '{self._message.message_uuid.value}' " "because it doesn't have a plate barcode.") return try: imports_collection = get_mongo_collection(self._mongo_db, COLLECTION_IMPORTS) create_mongo_import_record( imports_collection, self._message.centre_config, self._samples_inserted, plate_barcode, self._message.textual_errors_summary, ) except Exception as ex: LOGGER.exception(ex)
def test_record_import_creates_a_valid_import_record(freezer, subject, mongo_database): _, mongo_database = mongo_database subject._samples_inserted = 3 # Simulate inserting all the records. subject.record_import() imports_collection = get_mongo_collection(mongo_database, COLLECTION_IMPORTS) assert (imports_collection.count_documents({ "date": datetime.utcnow(), # Time has been frozen for this test. "centre_name": "Alderley", "csv_file_used": "PLATE-001", "number_of_records": 3, "errors": ["No errors were reported during processing."], }) == 1)
def test_mlwh_was_correctly_updated_in_update_priority_samples( self, mongo_database, config, mlwh_connection, with_different_scenarios ): _, mongo_database = mongo_database update_priority_samples(mongo_database, config, True) cursor = mlwh_connection.cursor(dictionary=True) samples_collection = get_mongo_collection(mongo_database, COLLECTION_SAMPLES) if len(self.expected_mlwh_samples) > 0: mongodb_ids = ",".join(map(lambda x: f'"{x[FIELD_MONGODB_ID]}"', self.expected_mlwh_samples)) cursor.execute( f"SELECT * FROM {config.MLWH_DB_DBNAME}.{MLWH_TABLE_NAME} " f" WHERE {MLWH_MONGODB_ID} IN ({mongodb_ids})" ) rows = cursor.fetchall() cursor.close() for pos, priority_sample in enumerate(self.expected_mlwh_samples): expected_sample = samples_collection.find({FIELD_MONGODB_ID: priority_sample[FIELD_SAMPLE_ID]})[0] assert ObjectId(rows[pos][MLWH_MONGODB_ID]) == priority_sample[FIELD_MONGODB_ID] assert rows[pos][MLWH_ROOT_SAMPLE_ID] == expected_sample[FIELD_ROOT_SAMPLE_ID] assert rows[pos][MLWH_MUST_SEQUENCE] == priority_sample[FIELD_MUST_SEQUENCE] assert rows[pos][MLWH_PREFERENTIALLY_SEQUENCE] == priority_sample[FIELD_PREFERENTIALLY_SEQUENCE]
def get_centres_config(config: Config, data_source: str = "") -> List[CentreConf]: """Get the centres config from MongoDB. If MongoDB does not contain any centres config, it will become populated with the values in the app config for centres. Arguments: config {Config}: The configuration object for the whole application. data_source {str}: The data source filter to apply to centre configs, or None to apply no filter. Return: List[CentreConf]: A List of CentreConf from MongoDB matching the given data source. """ with create_mongo_client(config) as client: db = get_mongo_db(config, client) centres_collection_exists = collection_exists(db, COLLECTION_CENTRES) centres_collection = get_mongo_collection(db, COLLECTION_CENTRES) if not centres_collection_exists: # Populate the centres collection from the config values create_index(centres_collection, FIELD_CENTRE_NAME, unique=True) populate_mongo_collection(centres_collection, config.CENTRES, FIELD_CENTRE_NAME) # type: ignore # Get the centres collection from MongoDB cursor = centres_collection.find() centres = list(map(lambda x: cast(CentreConf, x), cursor)) if data_source: def test_data_source(centre): try: return centre.get(CENTRE_KEY_DATA_SOURCE).lower() == data_source.lower() except (AttributeError): return False centres = list(filter(test_data_source, centres)) return centres
def update_mongo_fields(mongo_db: Database, samples: List[SampleDoc]) -> bool: """Bulk updates sample uuid fields in the Mongo database Arguments: config {ModuleType} -- application config specifying database details samples {List[Sample]} -- the list of samples whose uuid fields should be updated Returns: bool -- whether the updates completed successfully """ samples_collection = get_mongo_collection(mongo_db, COLLECTION_SAMPLES) samples_collection.bulk_write([ UpdateOne( {FIELD_MONGODB_ID: sample[FIELD_MONGODB_ID]}, { "$set": { FIELD_LH_SAMPLE_UUID: sample[FIELD_LH_SAMPLE_UUID], FIELD_LH_SOURCE_PLATE_UUID: sample[FIELD_LH_SOURCE_PLATE_UUID], } }, ) for sample in samples ]) return True
def test_export_to_mongo_adds_an_error_when_source_plate_exists_for_another_lab_id( subject, mongo_database): _, mongo_database = mongo_database # Get the source plate added once subject.export_to_mongo() subject._message._body[FIELD_PLATE][FIELD_LAB_ID] = "NULL" with patch.object(CreatePlateMessage, "add_error") as add_error: subject.export_to_mongo() add_error.assert_called_once_with( CreatePlateError( type=ErrorType.ExportingPlateAlreadyExists, origin=RABBITMQ_CREATE_FEEDBACK_ORIGIN_PLATE, description=ANY, field=FIELD_LAB_ID, )) # NULL plate was not inserted source_plates_collection = get_mongo_collection(mongo_database, COLLECTION_SOURCE_PLATES) assert source_plates_collection.count_documents( {FIELD_MONGO_LAB_ID: "NULL"}) == 0
def update_mongo_filtered_positive_fields(config: Config, samples: List[SampleDoc], version: str, update_timestamp: datetime) -> bool: """Batch updates sample filtered positive fields in the Mongo database Arguments: config {Config} -- application config specifying database details samples {List[Sample]} -- the list of samples whose filtered positive fields should be updated version {str} -- the filtered positive identifier version used update_timestamp {datetime} -- the timestamp at which the update was performed Returns: bool -- whether the updates completed successfully """ with create_mongo_client(config) as client: mongo_db = get_mongo_db(config, client) samples_collection = get_mongo_collection(mongo_db, COLLECTION_SAMPLES) num_samples = len(samples) SAMPLES_PER_QUERY = 15000 samples_index = 0 logger.debug( f"Attempting to update {num_samples} rows in Mongo in batches of {SAMPLES_PER_QUERY}" ) while samples_index < num_samples: logger.debug( f"Updating records between {samples_index} and {samples_index + SAMPLES_PER_QUERY}" ) samples_batch = samples[samples_index:( samples_index + SAMPLES_PER_QUERY)] # noqa: E203 # get ids of those that are filtered positive, and those that aren't filtered_positive_ids = [] filtered_negative_ids = [] for sample in samples_batch: if sample[FIELD_FILTERED_POSITIVE] is True: filtered_positive_ids.append(sample[FIELD_MONGODB_ID]) else: filtered_negative_ids.append(sample[FIELD_MONGODB_ID]) samples_collection.update_many( {FIELD_MONGODB_ID: { "$in": filtered_positive_ids }}, { "$set": { FIELD_FILTERED_POSITIVE: True, FIELD_FILTERED_POSITIVE_VERSION: version, FIELD_FILTERED_POSITIVE_TIMESTAMP: update_timestamp, } }, ) samples_collection.update_many( {FIELD_MONGODB_ID: { "$in": filtered_negative_ids }}, { "$set": { FIELD_FILTERED_POSITIVE: False, FIELD_FILTERED_POSITIVE_VERSION: version, FIELD_FILTERED_POSITIVE_TIMESTAMP: update_timestamp, } }, ) samples_index += SAMPLES_PER_QUERY return True