예제 #1
0
def test_get_centres_config_with_data_source(test_data_source_centres, requested, actual_data_source):
    _, config = test_data_source_centres

    actual = get_centres_config(config, requested)

    assert len(actual) == 1
    assert actual[0][CENTRE_KEY_DATA_SOURCE] == actual_data_source
예제 #2
0
    def centres(self):
        if self._centres is None:
            try:
                self._centres = get_centres_config(
                    self._config, CENTRE_DATA_SOURCE_RABBITMQ)
            except Exception:
                raise TransientRabbitError(
                    "Unable to reach MongoDB while getting centres config.")

        return self._centres
예제 #3
0
파일: main.py 프로젝트: sanger/crawler
def run(sftp: bool, keep_files: bool, add_to_dart: bool, settings_module: str = "", centre_prefix: str = "") -> None:
    try:
        start = time.time()
        config, settings_module = get_config(settings_module)

        logging.config.dictConfig(config.LOGGING)

        logger.info("-" * 80)
        logger.info("START")
        logger.info(f"Using settings from {settings_module}")

        # get or create the centres collection and filter down to only those with an SFTP data source
        centres = get_centres_config(config, CENTRE_DATA_SOURCE_SFTP)

        with create_mongo_client(config) as client:
            db = get_mongo_db(config, client)
            ensure_mongo_collections_indexed(db)

            if centre_prefix:
                # We are only interested in processing a single centre
                centres = list(filter(lambda config: config.get(CENTRE_KEY_PREFIX) == centre_prefix, centres))
            else:
                # We should only include centres that are to be batch processed
                centres = list(filter(lambda config: config.get(CENTRE_KEY_INCLUDE_IN_SCHEDULED_RUNS, True), centres))

            centres_instances = [Centre(config, centre_config) for centre_config in centres]

            for centre_instance in centres_instances:
                logger.info("*" * 80)
                logger.info(f"Processing {centre_instance.centre_config[CENTRE_KEY_NAME]}")

                try:
                    if sftp:
                        centre_instance.download_csv_files()

                    centre_instance.process_files(add_to_dart)
                except Exception as e:
                    logger.error(f"Error in centre '{centre_instance.centre_config[CENTRE_KEY_NAME]}'")
                    logger.exception(e)
                finally:
                    if not keep_files and centre_instance.is_download_dir_walkable:
                        centre_instance.clean_up()

                # Prioritisation of samples
                update_priority_samples(db, config, add_to_dart)

        logger.info(f"Import complete in {round(time.time() - start, 2)}s")
        logger.info("=" * 80)
    except Exception as e:
        logger.exception(e)
예제 #4
0
def test_get_centres_config_only_populates_database_once(centres_collection_accessor, config):
    # Check the initial state is no documents.
    assert centres_collection_accessor.count_documents({}) == 0

    get_centres_config(config)

    # Now the database has all the centre documents from the configuration file.
    assert centres_collection_accessor.count_documents({}) == 12

    # Delete a document and ensure it isn't added back.
    centres_collection_accessor.delete_one({CENTRE_KEY_NAME: "Alderley"})
    get_centres_config(config)
    assert centres_collection_accessor.count_documents({}) == 11

    # Delete all the documents and ensure they aren't added back.
    centres_collection_accessor.delete_many({})
    get_centres_config(config)
    assert centres_collection_accessor.count_documents({}) == 0

    # Drop the whole collection and ensure it is added once more.
    centres_collection_accessor.drop()
    get_centres_config(config)
    assert centres_collection_accessor.count_documents({}) == 12
예제 #5
0
def test_get_centres_config_no_data_source_specified(test_data_source_centres):
    _, config = test_data_source_centres

    actual = get_centres_config(config)

    assert len(actual) == 2
예제 #6
0
파일: runner.py 프로젝트: sanger/crawler
def centre_prefix_choices():
    config, _ = get_config("")
    centres = get_centres_config(config, "SFTP")

    return [centre[CENTRE_KEY_PREFIX] for centre in centres]
예제 #7
0
def update_dart_fields(config: Config, samples: List[SampleDoc]) -> bool:
    """Updates DART plates and wells following updates to the filtered positive fields

    Arguments:
        config {Config} -- application config specifying database details
        samples {List[Dict[str, str]]} -- the list of samples to update in DART

    Returns:
        bool -- whether the updates completed successfully
    """
    sql_server_connection = create_dart_sql_server_conn(config)
    if sql_server_connection is None:
        raise ValueError("Unable to establish DART SQL Server connection")

    dart_updated_successfully = True
    centres = get_centres_config(config)
    labclass_by_centre_name = biomek_labclass_by_centre_name(centres)
    try:
        logger.info("Writing to DART")

        cursor = sql_server_connection.cursor()

        for plate_barcode, samples_in_plate in groupby_transform(
                iterable=samples,
                keyfunc=lambda x: x[FIELD_PLATE_BARCODE],
                valuefunc=None,
                reducefunc=lambda x: list(x)):
            try:
                labware_class = labclass_by_centre_name[(str)(
                    samples_in_plate[0][FIELD_SOURCE])]
                plate_state = add_dart_plate_if_doesnt_exist(
                    cursor,
                    plate_barcode,
                    labware_class  # type:ignore
                )
                if plate_state == DART_STATE_PENDING:
                    for sample in samples_in_plate:
                        if sample[FIELD_RESULT] == RESULT_VALUE_POSITIVE:
                            well_index = get_dart_well_index(
                                (str)(sample.get(FIELD_COORDINATE, None)))
                            if well_index is not None:
                                well_props = map_mongo_doc_to_dart_well_props(
                                    sample)
                                set_dart_well_properties(
                                    cursor,
                                    plate_barcode,
                                    well_props,
                                    well_index  # type:ignore
                                )
                            else:
                                raise ValueError(
                                    "Unable to determine DART well index for sample "
                                    f"{sample[FIELD_ROOT_SAMPLE_ID]} in plate {plate_barcode}"
                                )
                cursor.commit()
                dart_updated_successfully &= True
            except Exception as e:
                logger.error(
                    f"Failed updating DART for samples in plate {plate_barcode}"
                )
                logger.exception(e)
                cursor.rollback()
                dart_updated_successfully = False

        logger.info("Updating DART completed")
    except Exception as e:
        logger.error("Failed updating DART")
        logger.exception(e)
        dart_updated_successfully = False
    finally:
        sql_server_connection.close()

    return dart_updated_successfully