def run(sftp: bool, keep_files: bool, add_to_dart: bool, settings_module: str = "", centre_prefix: str = "") -> None: try: start = time.time() config, settings_module = get_config(settings_module) logging.config.dictConfig(config.LOGGING) logger.info("-" * 80) logger.info("START") logger.info(f"Using settings from {settings_module}") # get or create the centres collection and filter down to only those with an SFTP data source centres = get_centres_config(config, CENTRE_DATA_SOURCE_SFTP) with create_mongo_client(config) as client: db = get_mongo_db(config, client) ensure_mongo_collections_indexed(db) if centre_prefix: # We are only interested in processing a single centre centres = list(filter(lambda config: config.get(CENTRE_KEY_PREFIX) == centre_prefix, centres)) else: # We should only include centres that are to be batch processed centres = list(filter(lambda config: config.get(CENTRE_KEY_INCLUDE_IN_SCHEDULED_RUNS, True), centres)) centres_instances = [Centre(config, centre_config) for centre_config in centres] for centre_instance in centres_instances: logger.info("*" * 80) logger.info(f"Processing {centre_instance.centre_config[CENTRE_KEY_NAME]}") try: if sftp: centre_instance.download_csv_files() centre_instance.process_files(add_to_dart) except Exception as e: logger.error(f"Error in centre '{centre_instance.centre_config[CENTRE_KEY_NAME]}'") logger.exception(e) finally: if not keep_files and centre_instance.is_download_dir_walkable: centre_instance.clean_up() # Prioritisation of samples update_priority_samples(db, config, add_to_dart) logger.info(f"Import complete in {round(time.time() - start, 2)}s") logger.info("=" * 80) except Exception as e: logger.exception(e)
def test_set_state_for_file_when_in_error_folder(config, tmpdir, testing_centres): with patch.dict(config.CENTRES[0], {CENTRE_KEY_BACKUPS_FOLDER: tmpdir.realpath()}): errors_folder = tmpdir.mkdir(ERRORS_DIR) # configure to use the backups folder for this test centre = Centre(config, config.CENTRES[0]) # create a backup of the file inside the errors directory as if previously processed there filename = "AP_sanger_report_200518_2132.csv" centre_file = CentreFile(filename, centre) centre_file.logging_collection.add_error("TYPE 4", "Some error happened") centre_file.backup_file() assert len(errors_folder.listdir()) == 1 # check the file state again now the error version exists centre_file.set_state_for_file() assert centre_file.file_state == CentreFileState.FILE_PROCESSED_WITH_ERROR
def test_set_state_for_file_when_file_in_black_list(config, blacklist_for_centre, testing_centres): centre = Centre(config, config.CENTRES[0]) centre_file = CentreFile("AP_sanger_report_200503_2338.csv", centre) centre_file.set_state_for_file() assert centre_file.file_state == CentreFileState.FILE_IN_BLACKLIST
def test_set_state_for_file_when_never_seen_before(config, testing_centres): centre = Centre(config, config.CENTRES[3]) centre_file = CentreFile("CB_sanger_report_200503_2338.csv", centre) centre_file.set_state_for_file() assert centre_file.file_state == CentreFileState.FILE_NOT_PROCESSED_YET
def test_set_state_for_file_when_should_not_be_processed(config, testing_centres): centre = Centre(config, config.CENTRES[0]) centre_file = CentreFile("AP_sanger_report_200503_2338.csv", centre) centre_file.set_state_for_file() assert centre_file.file_state == CentreFileState.FILE_SHOULD_NOT_BE_PROCESSED
def run(sftp: bool, keep_files: bool, add_to_dart: bool, settings_module: str = "") -> None: try: start = time.time() config, settings_module = get_config(settings_module) logging.config.dictConfig(config.LOGGING) logger.info("-" * 80) logger.info("START") logger.info(f"Using settings from {settings_module}") centres = config.CENTRES with create_mongo_client(config) as client: db = get_mongo_db(config, client) # get or create the centres collection centres_collection = get_mongo_collection(db, COLLECTION_CENTRES) logger.debug( f"Creating index '{FIELD_CENTRE_NAME}' on '{centres_collection.full_name}'" ) centres_collection.create_index(FIELD_CENTRE_NAME, unique=True) populate_collection(centres_collection, centres, FIELD_CENTRE_NAME) # get or create the source plates collection source_plates_collection = get_mongo_collection( db, COLLECTION_SOURCE_PLATES) logger.debug( f"Creating index '{FIELD_BARCODE}' on '{source_plates_collection.full_name}'" ) source_plates_collection.create_index(FIELD_BARCODE, unique=True) logger.debug( f"Creating index '{FIELD_LH_SOURCE_PLATE_UUID}' on '{source_plates_collection.full_name}'" ) source_plates_collection.create_index(FIELD_LH_SOURCE_PLATE_UUID, unique=True) with samples_collection_accessor( db, COLLECTION_SAMPLES) as samples_collection: # Index on plate barcode to make it easier to select based on plate barcode logger.debug( f"Creating index '{FIELD_PLATE_BARCODE}' on '{samples_collection.full_name}'" ) samples_collection.create_index(FIELD_PLATE_BARCODE) # Index on result column to make it easier to select the positives logger.debug( f"Creating index '{FIELD_RESULT}' on '{samples_collection.full_name}'" ) samples_collection.create_index(FIELD_RESULT) # Index on unique combination of columns logger.debug( f"Creating compound index on '{samples_collection.full_name}'" ) # create compound index on 'Root Sample ID', 'RNA ID', 'Result', 'Lab ID' - some # data had the same plate tested at another time so ignore the data if it is exactly # the same samples_collection.create_index( [ (FIELD_ROOT_SAMPLE_ID, pymongo.ASCENDING), (FIELD_RNA_ID, pymongo.ASCENDING), (FIELD_RESULT, pymongo.ASCENDING), (FIELD_LAB_ID, pymongo.ASCENDING), ], unique=True, ) # Index on lh_source_plate_uuid column # Added to make lighthouse API source completion event call query more efficient logger.debug( f"Creating index '{FIELD_LH_SOURCE_PLATE_UUID}' on '{samples_collection.full_name}'" ) samples_collection.create_index(FIELD_LH_SOURCE_PLATE_UUID) centres_instances = [ Centre(config, centre_config) for centre_config in centres ] for centre_instance in centres_instances: logger.info("*" * 80) logger.info( f"Processing {centre_instance.centre_config['name']}") try: if sftp: centre_instance.download_csv_files() centre_instance.process_files(add_to_dart) except Exception as e: logger.error("An exception occured") logger.error( f"Error in centre {centre_instance.centre_config['name']}" ) logger.exception(e) finally: if not keep_files and centre_instance.is_download_dir_walkable: centre_instance.clean_up() logger.info(f"Import complete in {round(time.time() - start, 2)}s") logger.info("=" * 80) except Exception as e: logger.exception(e)
def centre(config): yield Centre(config, config.CENTRES[0])