Exemple #1
0
def insert_or_update_samples_in_mlwh(
    samples: List[ModifiedRow],
    config: Config,
    logging_collection: LoggingCollection,
    logging_messages: Dict[str, Dict[str, Any]],
) -> bool:
    """Insert or update sample records into the MLWH database from the given samples, including the corresponding
    mongodb `_id`, `must_sequence` and `preferentially_sequence` for priority samples.

    Arguments:
        samples {List[ModifiedRow]} -- List of sample information
        config {Config} -- Config object
        logging_collection {LoggingCollection} -- the logging collection to use for logging
        logging_messages {Dict} -- a dictionary containing the logging messages to use for logging

    Returns:
        {bool} -- True if the insert was successful; otherwise False
    """
    mysql_samples = map(map_mongo_sample_to_mysql, samples)
    parsed_samples = set_is_current_on_mysql_samples(mysql_samples)
    mysql_conn = create_mysql_connection(config=config, readonly=False)

    if mysql_conn is not None and mysql_conn.is_connected():
        try:
            run_mysql_executemany_query(mysql_conn=mysql_conn,
                                        sql_query=SQL_MLWH_MULTIPLE_INSERT,
                                        values=parsed_samples)

            logger.debug(logging_messages["success"]["msg"])
            return True
        except Exception as e:
            logging_collection.add_error(
                logging_messages["insert_failure"]["error_type"],
                logging_messages["insert_failure"]["msg"])
            logger.critical(
                f"{logging_messages['insert_failure']['critical_msg']}: {e}")
            logger.exception(e)
    else:
        logging_collection.add_error(
            logging_messages["connection_failure"]["error_type"],
            logging_messages["connection_failure"]["msg"],
        )
        logger.critical(logging_messages["connection_failure"]["critical_msg"])

    return False
Exemple #2
0
def test_update_samples_in_mlwh_sets_is_current_correctly(
        config, mlwh_rw_db, logging_messages):
    _, cursor = mlwh_rw_db

    # Run two insert_or_updates back to back for the same document
    # This may seem like a redundant test, but because the second call is an update rather than in insert
    # the way it is processed is different.  It was observed that samples being updated to be priority samples
    # were losing the flag for is_current.  This was set explicitly to False as part of the insert preparation and
    # then the update was not pushing the value back to True again.
    with patch("crawler.db.mysql.map_mongo_sample_to_mysql"):
        with patch("crawler.db.mysql.set_is_current_on_mysql_samples"
                   ) as make_mysql_samples:
            make_mysql_samples.return_value = [MLWH_SAMPLE_COMPLETE]
            insert_or_update_samples_in_mlwh([{
                "pseudo": "sample"
            }], config, LoggingCollection(), logging_messages)
            insert_or_update_samples_in_mlwh([{
                "pseudo": "sample"
            }], config, LoggingCollection(), logging_messages)

    cursor.execute(f"SELECT {MLWH_IS_CURRENT} FROM lighthouse_sample;")
    rows = [row for row in cursor.fetchall()]
    assert len(rows) == 1
    assert rows[0][0] == 1
def test_logging_collection_with_a_single_error():
    logging = LoggingCollection()
    logging.add_error("TYPE 3", "This is a testing message")
    aggregator = logging.aggregator_types["TYPE 3"]
    assert aggregator.count_errors == 1
    assert aggregator.max_errors == 5
    assert aggregator.get_report_message() == "Total number of 'Only root sample id' errors (TYPE 3): 1"
    exptd_msgs = (
        "WARNING: Sample rows that have Root Sample ID value but no other information. (TYPE 3) "
        "(e.g. This is a testing message)"
    )
    assert aggregator.get_message() == exptd_msgs
    assert logging.get_aggregate_messages() == [exptd_msgs]
    assert logging.get_count_of_all_errors_and_criticals() == 0
    assert logging.get_aggregate_total_messages() == ["Total number of 'Only root sample id' errors (TYPE 3): 1"]
Exemple #4
0
def test_create_import_record(freezer, mongo_database):
    config, mongo_database = mongo_database
    import_collection = mongo_database["imports"]

    docs = [{"x": 1}, {"y": 2}, {"z": 3}]
    error_collection = LoggingCollection()
    error_collection.add_error("TYPE 4", "error1")
    error_collection.add_error("TYPE 5", "error2")

    for centre in config.CENTRES:
        now = datetime.utcnow()
        result = create_import_record(
            import_collection, centre, len(docs), "test",
            error_collection.get_messages_for_import())
        import_doc = import_collection.find_one({"_id": result.inserted_id})

        assert import_doc["date"].replace(microsecond=0) == now.replace(
            microsecond=0)
        assert import_doc["centre_name"] == centre["name"]
        assert import_doc["csv_file_used"] == "test"
        assert import_doc["number_of_records"] == len(docs)
        assert import_doc[
            "errors"] == error_collection.get_messages_for_import()
Exemple #5
0
    DART_STATE_PENDING,
    FIELD_MONGODB_ID,
    FIELD_PLATE_BARCODE,
    FIELD_PROCESSED,
    FIELD_SAMPLE_ID,
    FIELD_SOURCE,
)
from crawler.db.dart import add_dart_plate_if_doesnt_exist, add_dart_well_properties, create_dart_sql_server_conn
from crawler.db.mongo import get_mongo_collection
from crawler.db.mysql import insert_or_update_samples_in_mlwh
from crawler.helpers.logging_helpers import LoggingCollection
from crawler.types import Config, ModifiedRowValue, SampleDoc

logger = logging.getLogger(__name__)

logging_collection = LoggingCollection()


def update_priority_samples(db: Database, config: Config, add_to_dart: bool) -> None:
    """
    Update any unprocessed priority samples in MLWH and DART with an up to date
    value for the priority attributes (must_sequence and preferentially_sequence);
    after this, all correctly processed priorities from the collection priority_samples
    will be flagged with processed: True

    Arguments:
        db {Database} -- mongo db instance
        config {Config} -- config for mysql and dart connections
        add_to_dart {bool} -- whether to add the samples to DART
    """
    logging_collection.reset()
Exemple #6
0
def test_insert_samples_in_mlwh_inserts_one_complete_sample_correctly(
        config, mlwh_rw_db, logging_messages):
    _, cursor = mlwh_rw_db

    with patch("crawler.db.mysql.map_mongo_sample_to_mysql"):
        with patch("crawler.db.mysql.set_is_current_on_mysql_samples"
                   ) as make_mysql_samples:
            make_mysql_samples.return_value = [MLWH_SAMPLE_COMPLETE]
            insert_or_update_samples_in_mlwh([{
                "pseudo": "sample"
            }], config, LoggingCollection(), logging_messages)

    fields = [
        "ch1_cq",
        "ch1_result",
        "ch1_target",
        "ch2_cq",
        "ch2_result",
        "ch2_target",
        "ch3_cq",
        "ch3_result",
        "ch3_target",
        "ch4_cq",
        "ch4_result",
        "ch4_target",
        "coordinate",
        "date_tested",
        "filtered_positive",
        "filtered_positive_timestamp",
        "filtered_positive_version",
        "is_current",
        "lab_id",
        "lh_sample_uuid",
        "lh_source_plate_uuid",
        "mongodb_id",
        "must_sequence",
        "plate_barcode",
        "preferentially_sequence",
        "result",
        "rna_id",
        "root_sample_id",
        "source",
    ]
    cursor.execute(f"SELECT {','.join(fields)} FROM lighthouse_sample;")
    rows = [row for row in cursor.fetchall()]
    assert len(rows) == 1

    row = rows[0]
    assert row == (
        Decimal("24.67"),
        "Positive",
        "A gene",
        Decimal("23.92"),
        "Negative",
        "B gene",
        Decimal("25.12"),
        "Positive",
        "C gene",
        Decimal("22.86"),
        "Negative",
        "D gene",
        "C3",
        datetime(2021, 2, 3, 4, 5, 6),
        True,
        datetime(2021, 2, 3, 5, 6, 7),
        "v3",
        True,
        "BB",
        "233223d5-9015-4646-add0-f358ff2688c7",
        "c6410270-5cbf-4233-a8d1-b08445bbac5e",
        "6140f388800f8fe309689124",
        True,
        "95123456789012345",
        False,
        "Positive",
        "95123456789012345_C03",
        "BAA94123456",
        "Bob's Biotech",
    )
 def test_print_summary_writes_info_line_if_successful(self):
     with patch("crawler.priority_samples_process.logger") as mock_logger:
         logging_collection = LoggingCollection()
         with patch("crawler.priority_samples_process.logging_collection", logging_collection) as logging_collection:
             print_summary()
             assert mock_logger.info.called is True
def test_logging_collection_with_multiple_errors():
    logging = LoggingCollection()
    logging.add_error("TYPE 3", "This is the first type 3 message")
    logging.add_error("TYPE 1", "This is the first type 1 message")
    logging.add_error("TYPE 2", "This is the first type 2 message")
    logging.add_error("TYPE 3", "This is the second type 3 message")
    logging.add_error("TYPE 2", "This is the second type 2 message")
    logging.add_error("TYPE 4", "This is the first type 4 message")
    logging.add_error("TYPE 1", "This is the first type 1 message")
    logging.add_error("TYPE 3", "This is the third type 3 message")

    aggregator_type_1 = logging.aggregator_types["TYPE 1"]
    aggregator_type_2 = logging.aggregator_types["TYPE 2"]
    aggregator_type_3 = logging.aggregator_types["TYPE 3"]
    aggregator_type_4 = logging.aggregator_types["TYPE 4"]

    assert aggregator_type_1.count_errors == 2
    assert aggregator_type_2.count_errors == 2
    assert aggregator_type_3.count_errors == 3
    assert aggregator_type_4.count_errors == 1

    exptd_msgs = [
        "DEBUG: Blank rows in files. (TYPE 1)",
        (
            "CRITICAL: Files where we do not have the expected main column headers of Root Sample "
            "ID, RNA ID and Result. (TYPE 2)"
        ),
        (
            "WARNING: Sample rows that have Root Sample ID value but no other information. "
            "(TYPE 3) (e.g. This is the first type 3 message) (e.g. This is the second type 3 "
            "message) (e.g. This is the third type 3 message)"
        ),
        (
            "ERROR: Sample rows that have Root Sample ID and Result values but no RNA ID (no plate "
            "barcode). (TYPE 4) (e.g. This is the first type 4 message)"
        ),
    ]
    assert logging.get_aggregate_messages() == exptd_msgs
    assert logging.get_count_of_all_errors_and_criticals() == 3

    exptd_report_msgs = [
        "Total number of 'Blank row' errors (TYPE 1): 2",
        "Total number of 'Missing header column' errors (TYPE 2): 2",
        "Total number of 'Only root sample id' errors (TYPE 3): 3",
        "Total number of 'No plate barcode' errors (TYPE 4): 1",
    ]
    assert logging.get_aggregate_total_messages() == exptd_report_msgs