def mongo_database(mongo_client): config, mongo_client = mongo_client db = get_mongo_db(config, mongo_client) # Ensure any existing data is gone before a test starts mongo_client.drop_database(db) # Create indexes on collections -- this also creates the empty source_plates and samples collections ensure_mongo_collections_indexed(db) yield config, db
def run(sftp: bool, keep_files: bool, add_to_dart: bool, settings_module: str = "", centre_prefix: str = "") -> None: try: start = time.time() config, settings_module = get_config(settings_module) logging.config.dictConfig(config.LOGGING) logger.info("-" * 80) logger.info("START") logger.info(f"Using settings from {settings_module}") # get or create the centres collection and filter down to only those with an SFTP data source centres = get_centres_config(config, CENTRE_DATA_SOURCE_SFTP) with create_mongo_client(config) as client: db = get_mongo_db(config, client) ensure_mongo_collections_indexed(db) if centre_prefix: # We are only interested in processing a single centre centres = list(filter(lambda config: config.get(CENTRE_KEY_PREFIX) == centre_prefix, centres)) else: # We should only include centres that are to be batch processed centres = list(filter(lambda config: config.get(CENTRE_KEY_INCLUDE_IN_SCHEDULED_RUNS, True), centres)) centres_instances = [Centre(config, centre_config) for centre_config in centres] for centre_instance in centres_instances: logger.info("*" * 80) logger.info(f"Processing {centre_instance.centre_config[CENTRE_KEY_NAME]}") try: if sftp: centre_instance.download_csv_files() centre_instance.process_files(add_to_dart) except Exception as e: logger.error(f"Error in centre '{centre_instance.centre_config[CENTRE_KEY_NAME]}'") logger.exception(e) finally: if not keep_files and centre_instance.is_download_dir_walkable: centre_instance.clean_up() # Prioritisation of samples update_priority_samples(db, config, add_to_dart) logger.info(f"Import complete in {round(time.time() - start, 2)}s") logger.info("=" * 80) except Exception as e: logger.exception(e)
def test_ensure_mongo_collections_indexed_adds_correct_indexes_to_source_plates( mongo_database): _, mongo_database = mongo_database ensure_mongo_collections_indexed(mongo_database) source_plates_collection = mongo_database[COLLECTION_SOURCE_PLATES] source_plates_indexes = source_plates_collection.index_information() assert len( source_plates_indexes) == 3 # Default _id index plus two we added. assert list(source_plates_indexes.keys()) == [ "_id_", FIELD_BARCODE + "_1", FIELD_LH_SOURCE_PLATE_UUID + "_1", ]
def test_ensure_mongo_collections_indexed_adds_correct_indexes_to_samples( mongo_database): _, mongo_database = mongo_database ensure_mongo_collections_indexed(mongo_database) samples_collection = mongo_database[COLLECTION_SAMPLES] samples_indexes = samples_collection.index_information() assert len(samples_indexes) == 6 # Default _id index plus five we added. assert list(samples_indexes.keys()) == [ "_id_", FIELD_PLATE_BARCODE + "_1", FIELD_MONGO_RESULT + "_1", FIELD_LH_SAMPLE_UUID + "_1", f"{FIELD_MONGO_ROOT_SAMPLE_ID}_1_{FIELD_MONGO_RNA_ID}_1_{FIELD_MONGO_RESULT}_1_{FIELD_MONGO_LAB_ID}_1", FIELD_LH_SOURCE_PLATE_UUID + "_1", ]
def setup_mongo_indexes(config): with create_mongo_client(config) as client: db = get_mongo_db(config, client) ensure_mongo_collections_indexed(db)