Exemple #1
0
def run(sftp: bool, keep_files: bool, add_to_dart: bool, settings_module: str = "", centre_prefix: str = "") -> None:
    try:
        start = time.time()
        config, settings_module = get_config(settings_module)

        logging.config.dictConfig(config.LOGGING)

        logger.info("-" * 80)
        logger.info("START")
        logger.info(f"Using settings from {settings_module}")

        # get or create the centres collection and filter down to only those with an SFTP data source
        centres = get_centres_config(config, CENTRE_DATA_SOURCE_SFTP)

        with create_mongo_client(config) as client:
            db = get_mongo_db(config, client)
            ensure_mongo_collections_indexed(db)

            if centre_prefix:
                # We are only interested in processing a single centre
                centres = list(filter(lambda config: config.get(CENTRE_KEY_PREFIX) == centre_prefix, centres))
            else:
                # We should only include centres that are to be batch processed
                centres = list(filter(lambda config: config.get(CENTRE_KEY_INCLUDE_IN_SCHEDULED_RUNS, True), centres))

            centres_instances = [Centre(config, centre_config) for centre_config in centres]

            for centre_instance in centres_instances:
                logger.info("*" * 80)
                logger.info(f"Processing {centre_instance.centre_config[CENTRE_KEY_NAME]}")

                try:
                    if sftp:
                        centre_instance.download_csv_files()

                    centre_instance.process_files(add_to_dart)
                except Exception as e:
                    logger.error(f"Error in centre '{centre_instance.centre_config[CENTRE_KEY_NAME]}'")
                    logger.exception(e)
                finally:
                    if not keep_files and centre_instance.is_download_dir_walkable:
                        centre_instance.clean_up()

                # Prioritisation of samples
                update_priority_samples(db, config, add_to_dart)

        logger.info(f"Import complete in {round(time.time() - start, 2)}s")
        logger.info("=" * 80)
    except Exception as e:
        logger.exception(e)
Exemple #2
0
def test_set_state_for_file_when_in_error_folder(config, tmpdir, testing_centres):
    with patch.dict(config.CENTRES[0], {CENTRE_KEY_BACKUPS_FOLDER: tmpdir.realpath()}):
        errors_folder = tmpdir.mkdir(ERRORS_DIR)

        # configure to use the backups folder for this test
        centre = Centre(config, config.CENTRES[0])

        # create a backup of the file inside the errors directory as if previously processed there
        filename = "AP_sanger_report_200518_2132.csv"
        centre_file = CentreFile(filename, centre)
        centre_file.logging_collection.add_error("TYPE 4", "Some error happened")
        centre_file.backup_file()

        assert len(errors_folder.listdir()) == 1

        # check the file state again now the error version exists
        centre_file.set_state_for_file()

        assert centre_file.file_state == CentreFileState.FILE_PROCESSED_WITH_ERROR
Exemple #3
0
def test_set_state_for_file_when_file_in_black_list(config, blacklist_for_centre, testing_centres):
    centre = Centre(config, config.CENTRES[0])
    centre_file = CentreFile("AP_sanger_report_200503_2338.csv", centre)
    centre_file.set_state_for_file()

    assert centre_file.file_state == CentreFileState.FILE_IN_BLACKLIST
Exemple #4
0
def test_set_state_for_file_when_never_seen_before(config, testing_centres):
    centre = Centre(config, config.CENTRES[3])
    centre_file = CentreFile("CB_sanger_report_200503_2338.csv", centre)
    centre_file.set_state_for_file()

    assert centre_file.file_state == CentreFileState.FILE_NOT_PROCESSED_YET
Exemple #5
0
def test_set_state_for_file_when_should_not_be_processed(config, testing_centres):
    centre = Centre(config, config.CENTRES[0])
    centre_file = CentreFile("AP_sanger_report_200503_2338.csv", centre)
    centre_file.set_state_for_file()

    assert centre_file.file_state == CentreFileState.FILE_SHOULD_NOT_BE_PROCESSED
Exemple #6
0
def run(sftp: bool,
        keep_files: bool,
        add_to_dart: bool,
        settings_module: str = "") -> None:
    try:
        start = time.time()
        config, settings_module = get_config(settings_module)

        logging.config.dictConfig(config.LOGGING)

        logger.info("-" * 80)
        logger.info("START")
        logger.info(f"Using settings from {settings_module}")

        centres = config.CENTRES

        with create_mongo_client(config) as client:
            db = get_mongo_db(config, client)

            # get or create the centres collection
            centres_collection = get_mongo_collection(db, COLLECTION_CENTRES)

            logger.debug(
                f"Creating index '{FIELD_CENTRE_NAME}' on '{centres_collection.full_name}'"
            )
            centres_collection.create_index(FIELD_CENTRE_NAME, unique=True)
            populate_collection(centres_collection, centres, FIELD_CENTRE_NAME)

            # get or create the source plates collection
            source_plates_collection = get_mongo_collection(
                db, COLLECTION_SOURCE_PLATES)

            logger.debug(
                f"Creating index '{FIELD_BARCODE}' on '{source_plates_collection.full_name}'"
            )
            source_plates_collection.create_index(FIELD_BARCODE, unique=True)

            logger.debug(
                f"Creating index '{FIELD_LH_SOURCE_PLATE_UUID}' on '{source_plates_collection.full_name}'"
            )
            source_plates_collection.create_index(FIELD_LH_SOURCE_PLATE_UUID,
                                                  unique=True)

            with samples_collection_accessor(
                    db, COLLECTION_SAMPLES) as samples_collection:
                # Index on plate barcode to make it easier to select based on plate barcode
                logger.debug(
                    f"Creating index '{FIELD_PLATE_BARCODE}' on '{samples_collection.full_name}'"
                )
                samples_collection.create_index(FIELD_PLATE_BARCODE)

                # Index on result column to make it easier to select the positives
                logger.debug(
                    f"Creating index '{FIELD_RESULT}' on '{samples_collection.full_name}'"
                )
                samples_collection.create_index(FIELD_RESULT)

                # Index on unique combination of columns
                logger.debug(
                    f"Creating compound index on '{samples_collection.full_name}'"
                )
                # create compound index on 'Root Sample ID', 'RNA ID', 'Result', 'Lab ID' - some
                # data had the same plate tested at another time so ignore the data if it is exactly
                # the same
                samples_collection.create_index(
                    [
                        (FIELD_ROOT_SAMPLE_ID, pymongo.ASCENDING),
                        (FIELD_RNA_ID, pymongo.ASCENDING),
                        (FIELD_RESULT, pymongo.ASCENDING),
                        (FIELD_LAB_ID, pymongo.ASCENDING),
                    ],
                    unique=True,
                )

                # Index on lh_source_plate_uuid column
                # Added to make lighthouse API source completion event call query more efficient
                logger.debug(
                    f"Creating index '{FIELD_LH_SOURCE_PLATE_UUID}' on '{samples_collection.full_name}'"
                )
                samples_collection.create_index(FIELD_LH_SOURCE_PLATE_UUID)

                centres_instances = [
                    Centre(config, centre_config) for centre_config in centres
                ]
                for centre_instance in centres_instances:
                    logger.info("*" * 80)
                    logger.info(
                        f"Processing {centre_instance.centre_config['name']}")

                    try:
                        if sftp:
                            centre_instance.download_csv_files()

                        centre_instance.process_files(add_to_dart)
                    except Exception as e:
                        logger.error("An exception occured")
                        logger.error(
                            f"Error in centre {centre_instance.centre_config['name']}"
                        )
                        logger.exception(e)
                    finally:
                        if not keep_files and centre_instance.is_download_dir_walkable:
                            centre_instance.clean_up()

        logger.info(f"Import complete in {round(time.time() - start, 2)}s")
        logger.info("=" * 80)
    except Exception as e:
        logger.exception(e)
Exemple #7
0
def centre(config):
    yield Centre(config, config.CENTRES[0])