Beispiel #1
0
def main(run_args):
    """
    Main function which triggers run of engine with args passed in from command line.
    """
    with MatchEngine(
            plugin_dir=run_args.plugin_dir,
            sample_ids=run_args.samples,
            protocol_nos=run_args.trials,
            match_on_closed=run_args.match_on_closed,
            match_on_deceased=run_args.match_on_deceased,
            debug=run_args.debug,
            num_workers=run_args.workers[0],
            config=run_args.config_path,
            db_name=run_args.db_name,
            match_document_creator_class=run_args.match_document_creator_class,
            db_secrets_class=run_args.db_secrets_class,
            report_all_clinical_reasons=run_args.report_all_clinical_reasons,
            ignore_run_log=run_args.ignore_run_log,
            skip_run_log_entry=run_args.skip_run_log_entry,
            trial_match_collection=run_args.trial_match_collection,
            drop=run_args.drop or run_args.drop_and_exit,
            drop_accept=run_args.confirm_drop,
            exit_after_drop=run_args.drop_and_exit,
            resource_dirs=run_args.extra_resource_dirs,
            bypass_warnings=run_args.bypass_warnings) as me:
        me.get_matches_for_all_trials()
        if not args.dry:
            me.update_all_matches()

        if run_args.csv_output:
            me.create_output_csv()
Beispiel #2
0
def rerun_filters(filters=None, do_update=True, datapush_id=None):
    """
    Update all filters, or individual filters accepted as an array of ids
    :param filters: Array of filter IDs or None to run all filters
    :param do_update: When finding matches for temporary filters do not update db
    :param datapush_id: When all filters are rerun as part of the oncopanel datapush,
    flag new matches as 'new' and not 'pending', add datapush ID to matches
    """

    with MatchEngine(
            plugin_dir='./filters_config/plugins',
            protocol_nos=filters,
            match_on_closed=False,
            config='./filters_config/filters_config.json',
            db_name=settings.MONGO_DBNAME,
            match_document_creator_class="DFCIFilterMatchDocumentCreator",
            report_all_clinical_reasons=True,
            trial_match_collection="match",
            chunk_size=5000) as me:
        me.get_matches_for_all_trials()
        if do_update:
            me.update_all_matches()

        run_id = me.run_id.hex
        update = {"data_push_id": datapush_id}

        # set match status to "new" only when running filters as part of
        # new data ingestion
        if datapush_id:
            update["MATCH_STATUS"] = 0

        database.get_collection("match").update_many({"_me_id": run_id},
                                                     {"$set": update})
    return me.matches, run_id
Beispiel #3
0
async def run_check_indices_task(matchengine: MatchEngine, task, worker_id):
    """
    Ensure indexes exist on collections so queries are performant
    """
    if matchengine.debug:
        log.info(
            f"Worker: {worker_id}, got new CheckIndicesTask")
    try:
        for collection, desired_indices in matchengine.config['indices'].items():
            if collection == "trial_match":
                collection = matchengine.trial_match_collection
            indices = list()
            indices.extend(matchengine.db_ro[collection].list_indexes())
            existing_indices = set()
            for index in indices:
                index_key = list(index['key'].to_dict().keys())[0]
                existing_indices.add(index_key)
            indices_to_create = set(desired_indices) - existing_indices
            for index in indices_to_create:
                matchengine.task_q.put_nowait(IndexUpdateTask(collection, index))
        matchengine.task_q.task_done()
    except Exception as e:
        log.error(f"ERROR: Worker: {worker_id}, error: {e}")
        log.error(f"TRACEBACK: {traceback.print_tb(e.__traceback__)}")
        if e.__class__ is AutoReconnect:
            await matchengine.task_q.put(task)
            matchengine.task_q.task_done()
        elif e.__class__ is CursorNotFound:
            matchengine.task_q.put_nowait(task)
            matchengine.task_q.task_done()
        elif e.__class__ is ServerSelectionTimeoutError:
            matchengine.task_q.put_nowait(task)
            matchengine.task_q.task_done()
        else:
            matchengine.__exit__(None, None, None)
            matchengine.loop.stop()
            log.error((f"ERROR: Worker: {worker_id}, error: {e}"
                       f"TRACEBACK: {traceback.print_tb(e.__traceback__)}"))
            raise e
    def setUp(self) -> None:
        """init matchengine without running __init__ since tests will need to instantiate various values individually"""
        self.me = MatchEngine.__new__(MatchEngine)

        assert self.me.create_trial_matches({}, {}).__class__ is dict
        self.me.plugin_dir = 'matchengine/tests/plugins'
        self.me.match_document_creator_class = 'TestTrialMatchDocumentCreator'
        self.me.visualize_match_paths = False
        self.me.debug = False
        with open('matchengine/tests/config.json') as config_file_handle:
            self.config = json.load(config_file_handle)

        self.me.match_criteria_transform = MatchCriteriaTransform(self.config,
                                                                  [os.path.join(os.path.dirname(__file__), 'data')])
Beispiel #5
0
 def test_context_handler(self):
     self._reset(do_reset_trial_matches=True,
                 do_reset_trials=True,
                 reset_run_log=True,
                 trials_to_load=['all_closed'])
     assert self.me.db_rw.name == 'integration'
     with MatchEngine(
             sample_ids={'5d2799cb6756630d8dd0621d'},
             protocol_nos={'10-001'},
             match_on_closed=True,
             match_on_deceased=True,
             config='matchengine/config/dfci_config.json',
             plugin_dir='matchengine/plugins/',
             match_document_creator_class='DFCITrialMatchDocumentCreator',
             num_workers=1) as me:
         me.get_matches_for_trial('10-001')
         assert not me._loop.is_closed()
     assert me._loop.is_closed()
     with open(os.devnull, 'w') as _f, redirect_stderr(_f):
         try:
             me.get_matches_for_trial('10-001')
             raise AssertionError("MatchEngine should have failed")
         except RuntimeError as e:
             print(f"Found expected RuntimeError {e}")
Beispiel #6
0
async def run_query_task(matchengine: MatchEngine, task, worker_id):
    if matchengine.debug:
        log.info((f"Worker: {worker_id}, protocol_no: {task.trial['protocol_no']} got new QueryTask, "
                  f"{matchengine._task_q.qsize()} tasks left in queue"))
    try:
        results: Dict[ClinicalID, List[MatchReason]] = await matchengine.run_query(task.query,
                                                                                   task.clinical_ids)
    except Exception as e:
        results = dict()
        log.error(f"ERROR: Worker: {worker_id}, error: {e}")
        log.error(f"TRACEBACK: {traceback.print_tb(e.__traceback__)}")
        if e.__class__ is AutoReconnect:
            matchengine.task_q.put_nowait(task)
            matchengine.task_q.task_done()
        elif e.__class__ is CursorNotFound:
            matchengine.task_q.put_nowait(task)
            matchengine.task_q.task_done()
        elif e.__class__ is ServerSelectionTimeoutError:
            matchengine.task_q.put_nowait(task)
            matchengine.task_q.task_done()
        else:
            matchengine.loop.stop()
            log.error(f"ERROR: Worker: {worker_id}, error: {e}")
            log.error(f"TRACEBACK: {traceback.print_tb(e.__traceback__)}")

    try:
        by_sample_id = defaultdict(list)
        matchengine.results_transformer(results)
        if not results:
            matchengine.matches.setdefault(task.match_clause_data.protocol_no, dict())
        for _, sample_results in results.items():
            for result in sample_results:
                matchengine.queue_task_count += 1
                if matchengine.queue_task_count % 1000 == 0 and matchengine.debug:
                    log.info(f"Trial match count: {matchengine.queue_task_count}")
                match_context_data = TrialMatch(task.trial,
                                                task.match_clause_data,
                                                task.match_path,
                                                task.query,
                                                result,
                                                matchengine.starttime)

                # allow user to extend trial_match objects in plugin functions
                # generate required fields on trial match doc before
                # generate sort_order and hash fields after all fields are added
                new_match_proto = matchengine.pre_process_trial_matches(match_context_data)
                match_document = matchengine.create_trial_matches(match_context_data, new_match_proto)
                sort_order = get_sort_order(matchengine.config['trial_match_sorting'], match_document)
                match_document['sort_order'] = sort_order
                to_hash = {key: match_document[key] for key in match_document if key not in {'hash', 'is_disabled'}}
                match_document['hash'] = nested_object_hash(to_hash)
                match_document['_me_id'] = matchengine.run_id.hex

                matchengine.matches.setdefault(task.trial['protocol_no'],
                                               dict()).setdefault(match_document['sample_id'],
                                                                  list()).append(match_document)
                by_sample_id[match_document['sample_id']].append(match_document)

    except Exception as e:
        matchengine.loop.stop()
        log.error(f"ERROR: Worker: {worker_id}, error: {e}")
        log.error(f"TRACEBACK: {traceback.print_tb(e.__traceback__)}")
        raise e

    matchengine.task_q.task_done()
Beispiel #7
0
async def execute_clinical_queries(
    matchengine: MatchEngine, multi_collection_query: MultiCollectionQuery,
    clinical_ids: Set[ClinicalID]
) -> Tuple[Set[ObjectId], Dict[ClinicalID, List[ClinicalMatchReason]]]:
    """
    Take in a list of queries and only execute the clinical ones. Take the resulting clinical ids, and pass that
    to the next clinical query. Repeat for all clinical queries, continuously subsetting the returned ids.
    Finally, return all clinical IDs which matched every query, and match reasons.

    Match Reasons are not used by default, but are composed of QueryNode objects and a clinical ID.
    """
    reasons = defaultdict(list)
    reasons_cache = set()
    query_parts_by_hash = dict()
    for _clinical in multi_collection_query.clinical:
        for query_node in _clinical.query_nodes:
            query_level_mappings = matchengine.match_criteria_transform.ctml_collection_mappings[
                query_node.query_level]
            collection = query_level_mappings["query_collection"]
            join_field = query_level_mappings["join_field"]
            id_field = query_level_mappings["id_field"]
            show_in_ui, clinical_ids = matchengine.clinical_query_node_clinical_ids_subsetter(
                query_node, clinical_ids)
            for query_part in query_node.query_parts:
                if not query_part.render:
                    continue

                query_parts_by_hash[query_part.hash()] = query_part
                # hash the inner query to use as a reference for returned clinical ids, if necessary
                query_hash = query_part.hash()
                if query_hash not in matchengine.cache.ids:
                    matchengine.cache.ids[query_hash] = dict()

                # create a nested id_cache where the key is the clinical ID being queried and the vals
                # are the clinical IDs returned
                id_cache = matchengine.cache.ids[query_hash]
                queried_ids = set(id_cache.keys())
                still_waiting_for = matchengine.cache.in_process.setdefault(
                    query_hash, set())
                need_new = clinical_ids - queried_ids - still_waiting_for
                matchengine.cache.in_process.setdefault(query_hash,
                                                        set()).update(need_new)

                if need_new:
                    new_query = {
                        '$and': [{
                            join_field: {
                                '$in': list(need_new)
                            }
                        }, query_part.query]
                    }
                    if matchengine.debug:
                        log.info(f"{query_part.query}")
                    projection = {id_field: 1, join_field: 1}
                    docs = await matchengine.async_db_ro[collection].find(
                        new_query, projection).to_list(None)

                    # save returned ids
                    for doc in docs:
                        id_cache[doc[id_field]] = doc[join_field]

                    # save IDs NOT returned as None so if a query is run in the future which is the same, it will skip
                    for unfound in need_new - set(id_cache.keys()):
                        id_cache[unfound] = None
                    matchengine.cache.in_process[query_hash].difference_update(
                        need_new)

                while True:
                    still_waiting_for.intersection_update(
                        matchengine.cache.in_process[query_hash])
                    if not still_waiting_for:
                        break
                    await asyncio.sleep(0.01)
                for clinical_id in list(clinical_ids):

                    # an exclusion criteria returned a clinical document hence doc is not a match
                    if id_cache[clinical_id] is not None and query_part.negate:
                        clinical_ids.remove(clinical_id)
                        if (query_node.hash(), clinical_id,
                                query_node.query_depth) in reasons_cache:
                            reasons_cache.remove(
                                (show_in_ui, query_part.hash(), clinical_id,
                                 query_node.query_depth))

                    # clinical doc fulfills exclusion criteria
                    elif id_cache[clinical_id] is None and query_part.negate:
                        reasons_cache.add(
                            (show_in_ui, query_part.hash(), clinical_id,
                             query_node.query_depth))

                    # doc meets inclusion criteria
                    elif id_cache[
                            clinical_id] is not None and not query_part.negate:
                        reasons_cache.add(
                            (show_in_ui, query_part.hash(), clinical_id,
                             query_node.query_depth))

                    # no clinical doc returned for an inclusion criteria query, so remove _id from future queries
                    elif id_cache[
                            clinical_id] is None and not query_part.negate:
                        clinical_ids.remove(clinical_id)
                        if (query_node.hash(), clinical_id) in reasons_cache:
                            reasons_cache.remove(
                                (show_in_ui, query_part.hash(), clinical_id,
                                 query_node.query_depth))

    for show_in_ui, query_node_hash, clinical_id, depth in reasons_cache:
        reasons[clinical_id].append(
            ClinicalMatchReason(query_parts_by_hash[query_node_hash],
                                clinical_id, depth, show_in_ui))
    return clinical_ids, reasons
Beispiel #8
0
async def execute_extended_queries(
    matchengine: MatchEngine, multi_collection_query: MultiCollectionQuery,
    initial_clinical_ids: Set[ClinicalID], reasons: Dict[ClinicalID,
                                                         List[MatchReason]]
) -> Tuple[Set[ObjectId], Dict[str, Set[ObjectId]], Dict[ClinicalID,
                                                         List[MatchReason]]]:
    # This function will execute to filter patients on extended clinical/genomic attributes
    clinical_ids = {clinical_id: set() for clinical_id in initial_clinical_ids}
    qnc_qn_tracker = dict()
    for qnc_idx, query_node_container in enumerate(
            multi_collection_query.extended_attributes):
        query_node_container_clinical_ids = list()
        # TODO: add test for this - duplicate criteria causing empty qnc
        if not query_node_container.query_nodes:
            continue
        for qn_idx, query_node in enumerate(query_node_container.query_nodes):
            query_level_mappings = matchengine.match_criteria_transform.ctml_collection_mappings[
                query_node.query_level]
            collection = query_level_mappings["query_collection"]
            join_field = query_level_mappings["join_field"]
            id_field = query_level_mappings["id_field"]
            query_node_container_clinical_ids.append(
                matchengine.extended_query_node_clinical_ids_subsetter(
                    query_node, clinical_ids.keys()))
            show_in_ui, working_clinical_ids = query_node_container_clinical_ids[
                qn_idx]
            if not working_clinical_ids:
                continue

            # Create a nested id_cache where the key is the clinical ID being queried and the vals
            # are the extended_attributes IDs returned
            query_hash = query_node.raw_query_hash()
            if query_hash not in matchengine.cache.ids:
                matchengine.cache.ids[query_hash] = dict()
            id_cache = matchengine.cache.ids[query_hash]
            queried_ids = set(id_cache.keys())
            still_waiting_for = matchengine.cache.in_process.setdefault(
                query_hash, set())
            need_new = working_clinical_ids - queried_ids - still_waiting_for
            matchengine.cache.in_process.setdefault(query_hash,
                                                    set()).update(need_new)
            query = query_node.extract_raw_query()

            if need_new:
                new_query = query
                new_query['$and'] = new_query.get('$and', list())
                new_query['$and'].insert(0,
                                         {join_field: {
                                             '$in': list(need_new)
                                         }})

                projection = {id_field: 1, join_field: 1}
                genomic_docs = await matchengine.async_db_ro[collection].find(
                    new_query, projection).to_list(None)
                if matchengine.debug:
                    log.info(f"{new_query} returned {genomic_docs}")

                for genomic_doc in genomic_docs:
                    # If the clinical id of a returned extended_attributes doc is not present in the cache, add it.
                    if genomic_doc[join_field] not in id_cache:
                        id_cache[genomic_doc[join_field]] = set()
                    id_cache[genomic_doc[join_field]].add(
                        genomic_doc[id_field])

                # Clinical IDs which do not return extended_attributes docs need to be recorded to cache exclusions
                for unfound in need_new - set(id_cache.keys()):
                    id_cache[unfound] = None
                matchengine.cache.in_process[query_hash].difference_update(
                    need_new)
            while True:
                still_waiting_for.intersection_update(
                    matchengine.cache.in_process[query_hash])
                if not still_waiting_for:
                    break
                await asyncio.sleep(0.01)
            returned_clinical_ids = {
                clinical_id
                for clinical_id, genomic_docs in id_cache.items()
                if genomic_docs is not None
            }
            not_returned_clinical_ids = working_clinical_ids - returned_clinical_ids
            working_clinical_ids.intersection_update(
                (not_returned_clinical_ids
                 if query_node.exclusion else returned_clinical_ids))
        current_clinical_ids = set(clinical_ids.keys())
        qnc_clinical_ids = {
            clinical_id
            for clinical_id in reduce(
                operator.or_,
                map(operator.itemgetter(1), query_node_container_clinical_ids),
                set())
        }
        for invalid_clinical in current_clinical_ids - qnc_clinical_ids:
            all_qnc_qn_to_remove = clinical_ids.pop(invalid_clinical)
            for qnc_qn_to_remove in all_qnc_qn_to_remove:
                qnc_qn_tracker[qnc_qn_to_remove][1].remove(invalid_clinical)
        for qn_idx, qn_results in enumerate(query_node_container_clinical_ids):
            for valid_clinical_id in qn_results[1] & qnc_clinical_ids:
                clinical_ids[valid_clinical_id].add((qnc_idx, qn_idx))
            qnc_qn_tracker[(qnc_idx, qn_idx)] = qn_results

    reasons, all_extended = get_reasons(qnc_qn_tracker, multi_collection_query,
                                        matchengine.cache, reasons)
    return set(clinical_ids.keys()), all_extended, reasons
Beispiel #9
0
    def _reset(self, **kwargs):
        with MongoDBConnection(read_only=False,
                               db='integration',
                               async_init=False) as setup_db:
            if not self.first_run_done:
                if kwargs.get('do_reset_time', True):
                    set_static_date_time()

                self.first_run_done = True

            assert setup_db.name == 'integration'

            if not kwargs.get("skip_sample_id_reset", False):
                setup_db.clinical.update(
                    {"SAMPLE_ID": "5d2799d86756630d8dd065b8"}, {
                        "$set": {
                            "ONCOTREE_PRIMARY_DIAGNOSIS_NAME":
                            "Non-Small Cell Lung Cancer",
                            "_updated": datetime.datetime(
                                2001, 1, 1, 1, 1, 1, 1)
                        }
                    })

            if not kwargs.get("skip_vital_status_reset", False):
                setup_db.clinical.update(
                    {"SAMPLE_ID": "5d2799da6756630d8dd066a6"}, {
                        "$set": {
                            "VITAL_STATUS": "alive",
                            "_updated": datetime.datetime(
                                2001, 1, 1, 1, 1, 1, 1)
                        }
                    })

            if kwargs.get('do_reset_trial_matches', False):
                setup_db.trial_match.drop()

            if kwargs.get('reset_run_log', False):
                setup_db.run_log_trial_match.drop()

            if kwargs.get('do_reset_trials', False):
                setup_db.trial.drop()
                trials_to_load = map(
                    lambda x: os.path.join('matchengine', 'tests', 'data',
                                           'integration_trials', x + '.json'),
                    kwargs.get('trials_to_load', list()))
                for trial_path in trials_to_load:
                    with open(trial_path) as trial_file_handle:
                        trial = json.load(trial_file_handle)
                    setup_db.trial.insert(trial)
            if kwargs.get('do_rm_clinical_run_history', False):
                setup_db.clinical_run_history_trial_match.drop()

        if hasattr(self, 'me'):
            self.me.__exit__(None, None, None)

        self.me = MatchEngine(
            match_on_deceased=kwargs.get('match_on_deceased', True),
            match_on_closed=kwargs.get('match_on_closed', True),
            num_workers=kwargs.get('num_workers', 1),
            visualize_match_paths=kwargs.get('visualize_match_paths', False),
            config=kwargs.get('config', 'matchengine/config/dfci_config.json'),
            plugin_dir=kwargs.get('plugin_dir', 'matchengine/plugins/'),
            match_document_creator_class=kwargs.get(
                'match_document_creator_class',
                "DFCITrialMatchDocumentCreator"),
            fig_dir=kwargs.get('fig_dir', '/tmp/'),
            protocol_nos=kwargs.get('protocol_nos', None),
            sample_ids=kwargs.get('sample_ids', None),
            report_all_clinical_reasons=kwargs.get("report_all_clinical",
                                                   True))

        assert self.me.db_rw.name == 'integration'
        # Because ages are relative (people get older with the passage of time :/) the test data will stop working
        # to negate this, we need datetime.datetime.now() and datetime.date.today() to always return the same value
        # To accomplish this, there are overridden classes for datetime.datetime and datetime.date, implementing
        # static versions of now() and today(), respectively

        # The logic for overriding classes is generified here for future extensibility.

        # To perform the override, we first iterate over each of the override classes (at the time of writing,
        # this is just StaticDatetime and StaticDate
        if kwargs.get("do_reset_time", True):
            if kwargs.get('date_args', False):
                set_static_date_time(**kwargs['date_args'])
            else:
                set_static_date_time()
        if kwargs.get("unreplace_dt", False):
            unoverride_datetime()
Beispiel #10
0
class IntegrationTestMatchengine(TestCase):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.first_run_done = False

    def _reset(self, **kwargs):
        with MongoDBConnection(read_only=False,
                               db='integration',
                               async_init=False) as setup_db:
            if not self.first_run_done:
                if kwargs.get('do_reset_time', True):
                    set_static_date_time()

                self.first_run_done = True

            assert setup_db.name == 'integration'

            if not kwargs.get("skip_sample_id_reset", False):
                setup_db.clinical.update(
                    {"SAMPLE_ID": "5d2799d86756630d8dd065b8"}, {
                        "$set": {
                            "ONCOTREE_PRIMARY_DIAGNOSIS_NAME":
                            "Non-Small Cell Lung Cancer",
                            "_updated": datetime.datetime(
                                2001, 1, 1, 1, 1, 1, 1)
                        }
                    })

            if not kwargs.get("skip_vital_status_reset", False):
                setup_db.clinical.update(
                    {"SAMPLE_ID": "5d2799da6756630d8dd066a6"}, {
                        "$set": {
                            "VITAL_STATUS": "alive",
                            "_updated": datetime.datetime(
                                2001, 1, 1, 1, 1, 1, 1)
                        }
                    })

            if kwargs.get('do_reset_trial_matches', False):
                setup_db.trial_match.drop()

            if kwargs.get('reset_run_log', False):
                setup_db.run_log_trial_match.drop()

            if kwargs.get('do_reset_trials', False):
                setup_db.trial.drop()
                trials_to_load = map(
                    lambda x: os.path.join('matchengine', 'tests', 'data',
                                           'integration_trials', x + '.json'),
                    kwargs.get('trials_to_load', list()))
                for trial_path in trials_to_load:
                    with open(trial_path) as trial_file_handle:
                        trial = json.load(trial_file_handle)
                    setup_db.trial.insert(trial)
            if kwargs.get('do_rm_clinical_run_history', False):
                setup_db.clinical_run_history_trial_match.drop()

        if hasattr(self, 'me'):
            self.me.__exit__(None, None, None)

        self.me = MatchEngine(
            match_on_deceased=kwargs.get('match_on_deceased', True),
            match_on_closed=kwargs.get('match_on_closed', True),
            num_workers=kwargs.get('num_workers', 1),
            visualize_match_paths=kwargs.get('visualize_match_paths', False),
            config=kwargs.get('config', 'matchengine/config/dfci_config.json'),
            plugin_dir=kwargs.get('plugin_dir', 'matchengine/plugins/'),
            match_document_creator_class=kwargs.get(
                'match_document_creator_class',
                "DFCITrialMatchDocumentCreator"),
            fig_dir=kwargs.get('fig_dir', '/tmp/'),
            protocol_nos=kwargs.get('protocol_nos', None),
            sample_ids=kwargs.get('sample_ids', None),
            report_all_clinical_reasons=kwargs.get("report_all_clinical",
                                                   True))

        assert self.me.db_rw.name == 'integration'
        # Because ages are relative (people get older with the passage of time :/) the test data will stop working
        # to negate this, we need datetime.datetime.now() and datetime.date.today() to always return the same value
        # To accomplish this, there are overridden classes for datetime.datetime and datetime.date, implementing
        # static versions of now() and today(), respectively

        # The logic for overriding classes is generified here for future extensibility.

        # To perform the override, we first iterate over each of the override classes (at the time of writing,
        # this is just StaticDatetime and StaticDate
        if kwargs.get("do_reset_time", True):
            if kwargs.get('date_args', False):
                set_static_date_time(**kwargs['date_args'])
            else:
                set_static_date_time()
        if kwargs.get("unreplace_dt", False):
            unoverride_datetime()

    def test__match_on_deceased_match_on_closed(self):
        self._reset(do_reset_trials=True,
                    reset_run_log=True,
                    trials_to_load=[
                        'all_closed', 'all_open', 'closed_dose',
                        'closed_step_arm'
                    ])
        assert self.me.db_rw.name == 'integration'
        self.me.get_matches_for_all_trials()
        assert len(
            set(self.me._matches.keys()).intersection(
                {'10-001', '10-002', '10-003', '10-004'})) == 4
        assert len(self.me._matches['10-001']) == 5
        assert len(self.me._matches['10-002']) == 5
        assert len(self.me._matches['10-003']) == 5
        assert len(self.me._matches['10-004']) == 5

    def test__match_on_deceased(self):
        self._reset(match_on_deceased=True,
                    match_on_closed=False,
                    reset_run_log=True,
                    skip_sample_id_reset=False,
                    do_reset_trials=True,
                    trials_to_load=[
                        'all_closed', 'all_open', 'closed_dose',
                        'closed_step_arm'
                    ])
        assert self.me.db_rw.name == 'integration'
        self.me.get_matches_for_all_trials()
        assert len(
            set(self.me._matches.keys()).intersection(
                {'10-002', '10-003', '10-004'})) == 3
        assert len(self.me._matches['10-002']) == 5
        assert len(self.me._matches['10-003']) == 5

    def test__match_on_closed(self):
        self._reset(match_on_deceased=False,
                    match_on_closed=True,
                    reset_run_log=True,
                    do_reset_trials=True,
                    trials_to_load=[
                        'all_closed', 'all_open', 'closed_dose',
                        'closed_step_arm'
                    ])
        assert self.me.db_rw.name == 'integration'
        self.me.get_matches_for_all_trials()
        assert len(
            set(self.me._matches.keys()).intersection(
                {'10-001', '10-002', '10-003', '10-004'})) == 4
        assert len(self.me._matches['10-001']) == 4
        assert len(self.me._matches['10-002']) == 4
        assert len(self.me._matches['10-003']) == 4
        assert len(self.me._matches['10-004']) == 4

    def test_update_trial_matches(self):
        self._reset(do_reset_trial_matches=True,
                    do_reset_trials=True,
                    trials_to_load=[
                        'all_closed', 'all_open', 'closed_dose',
                        'closed_step_arm'
                    ],
                    report_all_clinical=False)
        assert self.me.db_rw.name == 'integration'
        self.me.get_matches_for_all_trials()
        for protocol_no in self.me.trials.keys():
            self.me.update_matches_for_protocol_number(protocol_no)
        assert self.me.db_ro.trial_match.count() == 48

    def test_wildcard_protein_change(self):
        self._reset(do_reset_trial_matches=True,
                    do_reset_trials=True,
                    reset_run_log=True,
                    trials_to_load=[
                        'wildcard_protein_found', 'wildcard_protein_not_found'
                    ])
        assert self.me.db_rw.name == 'integration'
        self.me.get_matches_for_all_trials()
        assert len(self.me._matches['10-005']) == 64

    def test_match_on_individual_protocol_no(self):
        self._reset(do_reset_trial_matches=True,
                    do_reset_trials=True,
                    reset_run_log=True,
                    trials_to_load=['wildcard_protein_not_found'],
                    protocol_nos={'10-006'})
        assert self.me.db_rw.name == 'integration'
        self.me.get_matches_for_all_trials()
        assert len(self.me._matches.keys()) == 1

    def test_match_on_individual_sample(self):
        self._reset(do_reset_trial_matches=True,
                    do_reset_trials=True,
                    reset_run_log=True,
                    trials_to_load=[
                        'all_closed', 'all_open', 'closed_dose',
                        'closed_step_arm'
                    ],
                    sample_ids={'5d2799cb6756630d8dd0621d'})
        assert self.me.db_rw.name == 'integration'
        self.me.get_matches_for_all_trials()
        assert len(self.me._matches['10-001']) == 1
        assert len(self.me._matches['10-002']) == 1
        assert len(self.me._matches['10-003']) == 1
        assert len(self.me._matches['10-004']) == 1

    def test_output_csv(self):
        self._reset(do_reset_trial_matches=True,
                    do_reset_trials=True,
                    reset_run_log=True,
                    trials_to_load=[
                        'all_closed', 'all_open', 'closed_dose',
                        'closed_step_arm'
                    ],
                    report_all_clinical=False)
        assert self.me.db_rw.name == 'integration'
        self.me.get_matches_for_all_trials()
        filename = f'trial_matches_{datetime.datetime.now().strftime("%b_%d_%Y_%H:%M")}.csv'
        try:
            from matchengine.internals.utilities.output import create_output_csv
            create_output_csv(self.me)
            assert os.path.exists(filename)
            assert os.path.isfile(filename)
            with open(filename) as csv_file_handle:
                csv_reader = csv.DictReader(csv_file_handle)
                fieldnames = set(csv_reader.fieldnames)
                rows = list(csv_reader)
            from matchengine.internals.utilities.output import get_all_match_fieldnames
            assert len(
                fieldnames.intersection(get_all_match_fieldnames(
                    self.me))) == len(fieldnames)
            assert sum([
                1 for protocol_matches in self.me._matches.values()
                for sample_matches in protocol_matches.values()
                for _ in sample_matches
            ]) == 48
            assert len(rows) == 48
            os.unlink(filename)
        except Exception as e:
            if os.path.exists(filename):
                os.unlink(filename)
            raise e

    def test_visualize_match_paths(self):
        # pygraphviz doesn't install easily on macOS so skip in that case.
        try:
            __import__('pygraphviz')
        except ImportError:
            print('WARNING: pygraphviz is not installed, skipping this test')
            return
        try:
            __import__('matplotlib')
        except ImportError:
            print('WARNING: matplotlib is not installed, skipping this test')
            return
        if not which('dot'):
            print('WARNING: executable "dot" not found, skipping this test')
            return

        fig_dir = f"/tmp/{os.urandom(10).hex()}"
        os.makedirs(fig_dir, exist_ok=True)
        unoverride_datetime()
        self._reset(do_reset_trial_matches=True,
                    do_reset_trials=True,
                    reset_run_log=True,
                    trials_to_load=['all_closed'],
                    sample_ids={'5d2799cb6756630d8dd0621d'},
                    visualize_match_paths=True,
                    fig_dir=fig_dir,
                    do_reset_time=False)
        assert self.me.db_rw.name == 'integration'
        self.me.get_matches_for_trial('10-001')
        for file_name in [
                '10-001-arm-212.png', '10-001-arm-222.png',
                '10-001-dose-312.png', '10-001-step-112.png'
        ]:
            assert os.path.exists(os.path.join(fig_dir, file_name))
            assert os.path.isfile(os.path.join(fig_dir, file_name))
            os.unlink(os.path.join(fig_dir, file_name))
        os.rmdir(fig_dir)

    def test_massive_match_clause(self):
        self._reset(do_reset_trials=True,
                    trials_to_load=['massive_match_clause'],
                    match_on_deceased=True,
                    match_on_closed=True,
                    reset_run_log=True,
                    num_workers=1)
        assert self.me.db_rw.name == 'integration'
        self.me.get_matches_for_all_trials()
        print(len(self.me._matches["11-113"]))

    def test_context_handler(self):
        self._reset(do_reset_trial_matches=True,
                    do_reset_trials=True,
                    reset_run_log=True,
                    trials_to_load=['all_closed'])
        assert self.me.db_rw.name == 'integration'
        with MatchEngine(
                sample_ids={'5d2799cb6756630d8dd0621d'},
                protocol_nos={'10-001'},
                match_on_closed=True,
                match_on_deceased=True,
                config='matchengine/config/dfci_config.json',
                plugin_dir='matchengine/plugins/',
                match_document_creator_class='DFCITrialMatchDocumentCreator',
                num_workers=1) as me:
            me.get_matches_for_trial('10-001')
            assert not me._loop.is_closed()
        assert me._loop.is_closed()
        with open(os.devnull, 'w') as _f, redirect_stderr(_f):
            try:
                me.get_matches_for_trial('10-001')
                raise AssertionError("MatchEngine should have failed")
            except RuntimeError as e:
                print(f"Found expected RuntimeError {e}")

    def test_signatures(self):
        self._reset(do_reset_trials=True,
                    reset_run_log=True,
                    trials_to_load=['signatures'])
        assert self.me.db_rw.name == 'integration'
        self.me.get_matches_for_all_trials()
        assert len(
            self.me._matches['99-9999']['5d2799df6756630d8dd068ca']) == 5

    def test_tmb(self):
        self._reset(do_reset_trials=True,
                    trials_to_load=['tmb'],
                    reset_run_log=True,
                    report_all_clinical=False)
        assert self.me.db_rw.name == 'integration'
        self.me.get_matches_for_all_trials()
        assert len(self.me._matches['99-9999']['1d2799df4446699a8ddeeee']) == 4
        assert len(
            self.me._matches['99-9999']['4d2799df4446630a8dd068dd']) == 3
        assert len(
            self.me._matches['99-9999']['1d2799df4446699a8dd068ee']) == 4

    def test_unstructured_sv(self):
        self._reset(do_reset_trials=True,
                    reset_run_log=True,
                    trials_to_load=['unstructured_sv'])
        assert self.me.db_rw.name == 'integration'
        self.me.get_matches_for_all_trials()
        matches = self.me._matches['10-005']['1d2799df4446699a8ddeeee']
        assert matches[0]['genomic_alteration'] == 'EGFR Structural Variation'
        assert len(matches) == 1

    def test_structured_sv(self):
        self._reset(do_reset_trials=True,
                    reset_run_log=True,
                    trials_to_load=['structured_sv'],
                    report_all_clinical=False)
        assert self.me.db_rw.name == 'integration'
        self.me.get_matches_for_all_trials()
        assert '5d2799df6756630d8dd068c6' in self.me.matches['99-9999']
        assert len(
            self.me.matches['99-9999']['5d2799df6756630d8dd068c6']) == 44
        caught_matches = defaultdict(int)
        for match in self.me.matches['99-9999']['5d2799df6756630d8dd068c6']:
            alteration = match.get('genomic_alteration')
            if match['reason_type'] == 'genomic':
                if match['internal_id'] == 1234566:
                    assert (alteration not in {
                        "CLIP4-ALK Structural Variation",
                        "ALK-CLIP4 Structural Variation",
                        "EML4-EML4 Structural Variation"
                    })
                else:
                    caught_matches[alteration] += 1
        check_against = {
            '!TP53 Structural Variation':
            12,
            'TFG-ALK Structural Variation':
            2,
            'ALK-TFG Structural Variation':
            2,
            'STRN-intergenic Structural Variation':
            2,
            'RANDB2-ALK Structural Variation':
            2,
            'ALK-RANDB2 Structural Variation':
            2,
            'NPM1-intergenic Structural Variation':
            6,
            'KIF5B-ALK Structural Variation':
            2,
            'ALK-KIF5B Structural Variation':
            2,
            'CLIP4-ALK Structural Variation':
            1,
            'this should only match to any_gene-KRAS Structural Variation':
            3,
            'KRAS-this should only match to any_gene Structural Variation':
            3,
            'EML4-EML4 Structural Variation':
            3,
            'this should only match to any_gene-this should only match to any gene Structural Variation':
            1,
            'ALK-CLIP4 Structural Variation':
            1
        }
        for alteration, count in caught_matches.items():
            assert check_against[alteration] == count

    def changed_deceased_flag_fail(self):
        """
        The matchengine should always run with the same deceased flag in order
        to guarantee data integrity
        """
        self._reset(
            do_reset_trial_matches=True,
            do_reset_trials=True,
            do_rm_clinical_run_history=True,
            trials_to_load=['run_log_arm_closed'],
            reset_run_log=True,
            match_on_closed=False,
            match_on_deceased=False,
            report_all_clinical=False,
            skip_vital_status_reset=False,
        )
        assert self.me.db_rw.name == 'integration'
        self.me.get_matches_for_all_trials()
        self.me.update_all_matches()

        with self.assertRaises(SystemExit) as cm:
            self._reset(do_reset_trial_matches=False,
                        do_reset_trials=True,
                        trials_to_load=["run_log_arm_open"],
                        reset_run_log=False,
                        match_on_closed=False,
                        match_on_deceased=True,
                        do_rm_clinical_run_history=False,
                        do_reset_time=False,
                        report_all_clinical=False,
                        skip_sample_id_reset=False)
        self.assertEqual(cm.exception.code, 1)
        self.me.__exit__(None, None, None)

    def changed_match_on_closed_to_open_fail(self):
        """
        The matchengine should always run with the same trial open/closed flag
        in order to guarantee data integrity
        """
        self._reset(
            do_reset_trial_matches=True,
            do_reset_trials=True,
            do_rm_clinical_run_history=True,
            trials_to_load=['run_log_arm_closed'],
            reset_run_log=False,
            match_on_closed=False,
            match_on_deceased=False,
            report_all_clinical=False,
            skip_vital_status_reset=False,
        )
        assert self.me.db_rw.name == 'integration'
        self.me.get_matches_for_all_trials()
        self.me.update_all_matches()

        with self.assertRaises(SystemExit) as cm:
            self._reset(do_reset_trial_matches=False,
                        do_reset_trials=True,
                        trials_to_load=["run_log_arm_open"],
                        reset_run_log=False,
                        match_on_closed=True,
                        match_on_deceased=False,
                        do_rm_clinical_run_history=False,
                        do_reset_time=False,
                        report_all_clinical=False,
                        skip_sample_id_reset=False)
        self.assertEqual(cm.exception.code, 1)
        self.me.__exit__(None, None, None)

    def tearDown(self) -> None:
        if hasattr(self, 'me'):
            self.me.__exit__(None, None, None)
Beispiel #11
0
class RunLogTest(TestCase):
    """
    The run_log is a log which keeps track of protocols and sample ID's used by the engine
    in previous runs, by protocol. There are four sources used to determine if any trial and/or
    sample should be updated during any given matchengine run. Those sources are the:
        (1) run_log,
        (2) trial _updated fields
        (3) clinical _updated fields,
        (4) clinical_run_history_trial_match

    Running and updating only the necessary trials and patients is the default behavior of the
    matchengine unless otherwise specified through a CLI flag. These tests enumerate many
    possible combinations of trial and/or patient data changes, and the subsequent expected states
    of the trial_match collection as the matchengine is run on changing and updated data.

    It is assumed that if a patient's extended_attributes document is updated or added, the corresponding
    clinical document's _updated date is updated as well.
    """

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.first_run_done = False

    def _reset(self, **kwargs):
        if not self.first_run_done:
            if kwargs.get('do_reset_time', True):
                set_static_date_time()
            self.first_run_done = True
        with MongoDBConnection(read_only=False, db='integration', async_init=False) as setup_db:
            assert setup_db.name == 'integration'

            if not kwargs.get("skip_sample_id_reset", False):
                setup_db.clinical.update({"SAMPLE_ID": "5d2799d86756630d8dd065b8"},
                                         {"$set": {"ONCOTREE_PRIMARY_DIAGNOSIS_NAME": "Non-Small Cell Lung Cancer",
                                                   "_updated": datetime.datetime(2001, 1, 1, 1, 1, 1, 1)}})

            if not kwargs.get("skip_vital_status_reset", False):
                setup_db.clinical.update({"SAMPLE_ID": "5d2799da6756630d8dd066a6"},
                                         {"$set": {"VITAL_STATUS": "alive",
                                                   "_updated": datetime.datetime(2001, 1, 1, 1, 1, 1, 1)}})

            if kwargs.get('do_reset_trial_matches', False):
                setup_db.trial_match.drop()

            if kwargs.get('reset_run_log', False):
                setup_db.run_log_trial_match.drop()

            if kwargs.get('do_reset_trials', False):
                setup_db.trial.drop()
                trials_to_load = map(lambda x: os.path.join('matchengine',
                                                            'tests',
                                                            'data',
                                                            'integration_trials',
                                                            x + '.json'),
                                     kwargs.get('trials_to_load', list()))
                for trial_path in trials_to_load:
                    with open(trial_path) as trial_file_handle:
                        trial = json.load(trial_file_handle)
                    setup_db.trial.insert(trial)
            if kwargs.get('do_rm_clinical_run_history', False):
                setup_db.clinical_run_history_trial_match.drop()

        if hasattr(self, 'me'):
            self.me.__exit__(None, None, None)

        self.me = MatchEngine(
            match_on_deceased=kwargs.get('match_on_deceased', True),
            match_on_closed=kwargs.get('match_on_closed', True),
            num_workers=kwargs.get('num_workers', 1),
            visualize_match_paths=kwargs.get('visualize_match_paths', False),
            config=kwargs.get('config', 'matchengine/config/dfci_config.json'),
            plugin_dir=kwargs.get('plugin_dir', 'matchengine/plugins/'),
            match_document_creator_class=kwargs.get('match_document_creator_class', "DFCITrialMatchDocumentCreator"),
            fig_dir=kwargs.get('fig_dir', '/tmp/'),
            protocol_nos=kwargs.get('protocol_nos', None),
            sample_ids=kwargs.get('sample_ids', None),
            report_all_clinical_reasons=kwargs.get("report_all_clinical", True)
        )

        assert self.me.db_rw.name == 'integration'
        # Because ages are relative (people get older with the passage of time :/) the test data will stop working
        # to negate this, we need datetime.datetime.now() and datetime.date.today() to always return the same value
        # To accomplish this, there are overridden classes for datetime.datetime and datetime.date, implementing
        # static versions of now() and today(), respectively

        # The logic for overriding classes is generified here for future extensibility.

        # To perform the override, we first iterate over each of the override classes (at the time of writing,
        # this is just StaticDatetime and StaticDate
        if kwargs.get("do_reset_time", True):
            if kwargs.get('date_args', False):
                set_static_date_time(**kwargs['date_args'])
            else:
                set_static_date_time()
        if kwargs.get("unreplace_dt", False):
            unoverride_datetime()
        else:
            for protocol_no, run_log_entries in self.me._run_log_history.items():
                for run_log_entry in run_log_entries:
                    dt = run_log_entry['_created']
                    run_log_entry['_created'] = datetime.datetime(*dt.timetuple()[0:6])

    def test_run_log_1(self):
        """
        Updated sample, updated curation, trial matches before, trial matches after, but different hashes
        :return:
        """

        self._reset(
            do_reset_trial_matches=True,
            do_reset_trials=True,
            do_rm_clinical_run_history=True,
            trials_to_load=['run_log_arm_closed'],
            reset_run_log=True,
            match_on_closed=False,
            match_on_deceased=False,
            report_all_clinical=False,
            skip_vital_status_reset=False
        )
        assert self.me.db_rw.name == 'integration'
        self.me.db_rw.clinical.update({"SAMPLE_ID": "5d2799d86756630d8dd065b8"},
                                      {"$set": {"ONCOTREE_PRIMARY_DIAGNOSIS_NAME": "Gibberish",
                                                "_updated": datetime.datetime.now()}})
        self.me.get_matches_for_all_trials()
        self.me.update_all_matches()
        trial_matches = list(self.me.db_ro.trial_match.find())
        run_log_trial_match = list(self.me.db_ro.run_log_trial_match.find())
        clinical_run_history_trial_match = list(self.me.db_ro.clinical_run_history_trial_match.find())
        assert len(list(self.me.db_ro.trial_match.find({"clinical_id": ObjectId("5d3778bf4fbf195d68cdf4d5")}))) == 0
        assert len(trial_matches) == 0
        assert len(run_log_trial_match) == 1
        assert len(clinical_run_history_trial_match) == 1392

        self._reset(
            do_reset_trial_matches=False,
            do_reset_trials=True,
            trials_to_load=["run_log_arm_open"],
            reset_run_log=False,
            match_on_closed=False,
            match_on_deceased=False,
            do_rm_clinical_run_history=False,
            do_reset_time=False,
            report_all_clinical=False,
            skip_sample_id_reset=False
        )

        self.me.get_matches_for_all_trials()
        self.me.update_all_matches()
        trial_matches = list(self.me.db_ro.trial_match.find())
        run_log_trial_match = list(self.me.db_ro.run_log_trial_match.find({}))
        clinical_run_history_trial_match = list(
            self.me.db_ro.clinical_run_history_trial_match.find({'clinical_id': ObjectId("5d2799d86756630d8dd065b8")})
        )[0]
        assert len(trial_matches) == 3
        assert len(run_log_trial_match) == 2
        assert len(clinical_run_history_trial_match['run_history']) == 2
        assert len(list(self.me.db_ro.trial_match.find({"clinical_id": ObjectId("5d3778bf4fbf195d68cdf4d5")}))) == 0
        self.me.db_rw.clinical.update({"SAMPLE_ID": "5d2799d86756630d8dd065b8"},
                                      {"$set": {"ONCOTREE_PRIMARY_DIAGNOSIS_NAME": "Lung Adenocarcinoma",
                                                "_updated": datetime.datetime(2002, 1, 1, 1, 1, 1, 1)}})

        self._reset(
            do_reset_trial_matches=False,
            do_reset_trials=False,
            reset_run_log=False,
            trials_to_load=["run_log_arm_open_criteria_change"],
            match_on_closed=False,
            match_on_deceased=False,
            do_rm_clinical_run_history=False,
            do_reset_time=False,
            report_all_clinical=False,
            skip_sample_id_reset=True
        )

        self.me.get_matches_for_all_trials()
        self.me.update_all_matches()
        trial_matches = list(self.me.db_ro.trial_match.find())
        disabled_trial_matches = list(self.me.db_ro.trial_match.find({"is_disabled": True}))
        run_log_trial_match = list(self.me.db_ro.run_log_trial_match.find({}))
        clinical_run_history_trial_match = list(
            self.me.db_ro.clinical_run_history_trial_match.find({'clinical_id': ObjectId("5d2799d86756630d8dd065b8")})
        )[0]
        assert len(trial_matches) == 4
        assert len(disabled_trial_matches) == 1
        assert len(run_log_trial_match) == 3
        assert len(list(self.me.db_ro.trial_match.find({"clinical_id": ObjectId("5d3778bf4fbf195d68cdf4d5")}))) == 0
        assert len(clinical_run_history_trial_match['run_history']) == 3

    def test_run_log_2(self):
        """
        Updated sample, updated curation, trial matches after, but not before
        :return:
        """

        self._reset(
            do_reset_trial_matches=True,
            do_reset_trials=True,
            trials_to_load=['run_log_arm_closed'],
            reset_run_log=True,
            match_on_closed=False,
            match_on_deceased=False,
            do_rm_clinical_run_history=True,
            report_all_clinical=False
        )
        assert self.me.db_rw.name == 'integration'
        self.me.db_rw.clinical.update({"SAMPLE_ID": "5d2799d86756630d8dd065b8"},
                                      {"$set": {"ONCOTREE_PRIMARY_DIAGNOSIS_NAME": "Gibberish",
                                                "_updated": datetime.datetime.now()}})
        self.me.get_matches_for_all_trials()
        self.me.update_all_matches()
        trial_matches = list(self.me.db_ro.trial_match.find())
        run_log_trial_match = list(self.me.db_ro.run_log_trial_match.find())
        clinical_run_history_trial_match = list(self.me.db_ro.clinical_run_history_trial_match.find())
        assert len(list(self.me.db_ro.trial_match.find({"clinical_id": ObjectId("5d3778bf4fbf195d68cdf4d5")}))) == 0
        assert len(trial_matches) == 0
        assert len(run_log_trial_match) == 1
        assert len(clinical_run_history_trial_match) == 1392
        self.me.db_rw.clinical.update({"SAMPLE_ID": "5d2799d86756630d8dd065b8"},
                                      {"$set": {"ONCOTREE_PRIMARY_DIAGNOSIS_NAME": "Medullary Carcinoma of the Colon",
                                                "_updated": datetime.datetime(2002, 1, 1, 1, 1, 1, 1)}})

        self._reset(
            do_reset_trial_matches=False,
            do_reset_trials=True,
            trials_to_load=["run_log_arm_open"],
            reset_run_log=False,
            match_on_closed=False,
            match_on_deceased=False,
            do_rm_clinical_run_history=False,
            do_reset_time=False,
            report_all_clinical=False,
            skip_sample_id_reset=True
        )

        self.me.get_matches_for_all_trials()
        self.me.update_all_matches()
        trial_matches = list(self.me.db_ro.trial_match.find())
        run_log_trial_match = list(self.me.db_ro.run_log_trial_match.find({}))
        clinical_run_history_trial_match = list(
            self.me.db_ro.clinical_run_history_trial_match.find({'clinical_id': ObjectId("5d2799d86756630d8dd065b8")})
        )[0]
        assert len(trial_matches) == 2
        assert len(run_log_trial_match) == 2
        assert len(clinical_run_history_trial_match['run_history']) == 2
        assert len(list(self.me.db_ro.trial_match.find({"clinical_id": ObjectId("5d3778bf4fbf195d68cdf4d5")}))) == 0

        self._reset(
            do_reset_trial_matches=False,
            do_reset_trials=False,
            reset_run_log=False,
            trials_to_load=["run_log_arm_open_criteria_change"],
            match_on_closed=False,
            match_on_deceased=False,
            do_rm_clinical_run_history=False,
            do_reset_time=False,
            report_all_clinical=False,
            skip_sample_id_reset=False
        )

        self.me.get_matches_for_all_trials()
        self.me.update_all_matches()
        trial_matches = list(self.me.db_ro.trial_match.find())
        disabled_trial_matches = list(self.me.db_ro.trial_match.find({"is_disabled": True}))
        run_log_trial_match = list(self.me.db_ro.run_log_trial_match.find({}))
        clinical_run_history_trial_match = list(
            self.me.db_ro.clinical_run_history_trial_match.find({'clinical_id': ObjectId("5d2799d86756630d8dd065b8")})
        )[0]
        assert len(trial_matches) == 3
        assert len(disabled_trial_matches) == 0
        assert len(run_log_trial_match) == 3
        assert len(list(self.me.db_ro.trial_match.find({"clinical_id": ObjectId("5d3778bf4fbf195d68cdf4d5")}))) == 0
        assert len(clinical_run_history_trial_match['run_history']) == 3

    def test_run_log_3(self):
        """
        Updated sample leads to new trial match
        Existing sample not updated does not cause new trial matches
        Sample that doesn't match never matches
        :return:
        """

        self._reset(
            do_reset_trial_matches=True,
            do_reset_trials=True,
            trials_to_load=['run_log_arm_closed'],
            reset_run_log=True,
            match_on_closed=True,
            match_on_deceased=False,
            do_rm_clinical_run_history=True,
            report_all_clinical=False
        )
        assert self.me.db_rw.name == 'integration'
        self.me.db_rw.clinical.update({"SAMPLE_ID": "5d2799d86756630d8dd065b8"},
                                      {"$set": {"ONCOTREE_PRIMARY_DIAGNOSIS_NAME": "Gibberish",
                                                "_updated": datetime.datetime.now()}})
        self.me.get_matches_for_all_trials()
        self.me.update_all_matches()
        trial_matches = list(self.me.db_ro.trial_match.find())
        run_log_trial_match = list(self.me.db_ro.run_log_trial_match.find())
        clinical_run_history_trial_match = list(self.me.db_ro.clinical_run_history_trial_match.find())
        assert len(list(self.me.db_ro.trial_match.find({"clinical_id": ObjectId("5d3778bf4fbf195d68cdf4d5")}))) == 0
        assert len(trial_matches) == 2
        assert len(run_log_trial_match) == 1
        assert len(clinical_run_history_trial_match) == 1392

        self._reset(
            do_reset_trial_matches=False,
            do_reset_trials=False,
            reset_run_log=False,
            match_on_closed=True,
            match_on_deceased=False,
            do_rm_clinical_run_history=False,
            do_reset_time=False,
            report_all_clinical=False,
            skip_sample_id_reset=False
        )

        self.me.get_matches_for_all_trials()
        self.me.update_all_matches()
        trial_matches = list(self.me.db_ro.trial_match.find())
        run_log_trial_match = list(self.me.db_ro.run_log_trial_match.find({}))
        clinical_run_history_trial_match = list(
            self.me.db_ro.clinical_run_history_trial_match.find({'clinical_id': ObjectId("5d2799d86756630d8dd065b8")})
        )[0]
        assert len(trial_matches) == 3
        assert len(run_log_trial_match) == 2
        assert len(clinical_run_history_trial_match['run_history']) == 2
        assert len(list(self.me.db_ro.trial_match.find({"clinical_id": ObjectId("5d3778bf4fbf195d68cdf4d5")}))) == 0

        self._reset(
            do_reset_trial_matches=False,
            do_reset_trials=False,
            reset_run_log=False,
            match_on_closed=True,
            match_on_deceased=False,
            do_rm_clinical_run_history=False,
            do_reset_time=False,
            report_all_clinical=False,
            skip_sample_id_reset=False
        )
        self.me.db_rw.clinical.update({"SAMPLE_ID": "5d2799d86756630d8dd065b8"},
                                      {"$set": {"ONCOTREE_PRIMARY_DIAGNOSIS_NAME": "Gibberish",
                                                "_updated": datetime.datetime(2002, 1, 1, 1, 1, 1, 1)}})

        self.me.get_matches_for_all_trials()
        self.me.update_all_matches()
        trial_matches = list(self.me.db_ro.trial_match.find())
        disabled_trial_matches = list(self.me.db_ro.trial_match.find({"is_disabled": True}))
        run_log_trial_match = list(self.me.db_ro.run_log_trial_match.find({}))
        clinical_run_history_trial_match = list(
            self.me.db_ro.clinical_run_history_trial_match.find({'clinical_id': ObjectId("5d2799d86756630d8dd065b8")})
        )[0]
        assert len(trial_matches) == 3
        assert len(disabled_trial_matches) == 1
        assert len(run_log_trial_match) == 3
        assert len(list(self.me.db_ro.trial_match.find({"clinical_id": ObjectId("5d3778bf4fbf195d68cdf4d5")}))) == 0
        assert len(clinical_run_history_trial_match['run_history']) == 3

    def test_run_log_4(self):
        """
        Update a trial field not used in matching.
        Samples who have matches should continue to have matches.
        Samples without matches should still not have matches.
        :return:
        """
        self._reset(
            do_reset_trial_matches=True,
            do_reset_trials=True,
            trials_to_load=['run_log_arm_open'],
            reset_run_log=True,
            match_on_closed=True,
            match_on_deceased=False,
            do_rm_clinical_run_history=True,
            report_all_clinical=False
        )
        assert self.me.db_rw.name == 'integration'

        self.me.get_matches_for_all_trials()
        self.me.update_all_matches()
        trial_matches = list(self.me.db_ro.trial_match.find())
        disabled_trial_matches = list(self.me.db_ro.trial_match.find({"is_disabled": True}))
        run_log_trial_match = list(self.me.db_ro.run_log_trial_match.find({}))
        non_match = list(self.me.db_rw.trial_match.find({"sample_id": ObjectId("5d2799df6756630d8dd068bc")}))
        assert len(trial_matches) == 3
        assert len(disabled_trial_matches) == 0
        assert len(run_log_trial_match) == 1
        assert len(non_match) == 0

        self._reset(
            do_reset_trial_matches=False,
            do_reset_trials=False,
            reset_run_log=False,
            match_on_closed=True,
            match_on_deceased=False,
            do_rm_clinical_run_history=False,
            do_reset_time=False,
            report_all_clinical=False,
            skip_sample_id_reset=False
        )

        self.me.db_rw.trial.update({"protocol_no": "10-007"},
                                   {"$set": {"unused_field": "ricky_bobby",
                                             "_updated": datetime.datetime(2002, 1, 1, 1, 1, 1, 1)
                                             }})
        self.me.get_matches_for_all_trials()
        self.me.update_all_matches()
        trial_matches = list(self.me.db_ro.trial_match.find())
        disabled_trial_matches = list(self.me.db_ro.trial_match.find({"is_disabled": True}))
        run_log_trial_match = list(self.me.db_ro.run_log_trial_match.find({}))
        non_match = list(self.me.db_rw.trial_match.find({"sample_id": ObjectId("5d2799df6756630d8dd068bc")}))
        assert len(trial_matches) == 3
        assert len(disabled_trial_matches) == 0
        assert len(run_log_trial_match) == 2
        assert len(non_match) == 0

    def test_run_log_5(self):
        """
        Update a trial arm status field. Update a sample.
        After update sample with matches should continue to have matches.
        After update sample without matches should still not have matches.
        :return:
        """
        self._reset(
            do_reset_trial_matches=True,
            do_reset_trials=True,
            trials_to_load=['run_log_two_arms'],
            reset_run_log=True,
            match_on_closed=False,
            match_on_deceased=False,
            do_rm_clinical_run_history=True,
            report_all_clinical=False
        )
        assert self.me.db_rw.name == 'integration'

        self.me.get_matches_for_all_trials()
        self.me.update_all_matches()
        trial_matches = list(self.me.db_ro.trial_match.find())
        disabled_trial_matches = list(self.me.db_ro.trial_match.find({"is_disabled": True}))
        run_log_trial_match = list(self.me.db_ro.run_log_trial_match.find({}))
        non_match = list(self.me.db_rw.trial_match.find({"sample_id": ObjectId("5d2799df6756630d8dd068bc")}))
        assert len(trial_matches) == 3
        for match in trial_matches:
            assert match['internal_id'] == 101
            assert match['is_disabled'] == False
        assert len(disabled_trial_matches) == 0
        assert len(run_log_trial_match) == 1
        assert len(non_match) == 0

        self._reset(
            do_reset_trial_matches=False,
            do_reset_trials=False,
            reset_run_log=False,
            match_on_closed=False,
            match_on_deceased=False,
            do_rm_clinical_run_history=False,
            do_reset_time=False,
            report_all_clinical=False,
            skip_sample_id_reset=False
        )

        self.me.db_rw.trial.update({"protocol_no": "10-007"},
                                   {"$set": {"treatment_list.step.0.arm.1.arm_suspended": "N",
                                             "_updated": datetime.datetime(2002, 1, 1, 1, 1, 1, 1)
                                             }})
        # update non-match
        self.me.db_rw.clinical.update({"SAMPLE_ID": "5d2799df6756630d8dd068bb"},
                                      {"$set": {"ONCOTREE_PRIMARY_DIAGNOSIS_NAME": "Gibberish",
                                                "_updated": datetime.datetime.now()}})

        # update matching
        self.me.db_rw.genomic.insert({
            "SAMPLE_ID": "5d2799da6756630d8dd066a6",
            "clinical_id": ObjectId("5d2799da6756630d8dd066a6"),
            "_updated": datetime.datetime(2002, 1, 1, 1, 1, 1, 1),
            "TRUE_HUGO_SYMBOL": "sonic_the_hedgehog"
        })

        self.me.get_matches_for_all_trials()
        self.me.update_all_matches()
        trial_matches = list(self.me.db_ro.trial_match.find())
        disabled_trial_matches = list(self.me.db_ro.trial_match.find({"is_disabled": True}))
        run_log_trial_match = list(self.me.db_ro.run_log_trial_match.find({}))
        non_match = list(self.me.db_rw.trial_match.find({"sample_id": ObjectId("5d2799df6756630d8dd068bc")}))
        assert len(trial_matches) == 3
        for match in trial_matches:
            assert match['internal_id'] == 101
            assert match['is_disabled'] == False
        assert len(disabled_trial_matches) == 0
        assert len(run_log_trial_match) == 2
        assert len(non_match) == 0

        self.me.db_rw.genomic.remove({"TRUE_HUGO_SYMBOL": "sonic_the_hedgehog"})

    def test_run_log_6(self):
        """
        Update a trial arm status field.
        Update a sample's vital_status to deceased.
        Sample should no longer have matches.
        :return:
        """
        self._reset(
            do_reset_trial_matches=True,
            do_reset_trials=True,
            trials_to_load=['run_log_two_arms'],
            reset_run_log=True,
            match_on_closed=False,
            match_on_deceased=False,
            do_rm_clinical_run_history=True,
            report_all_clinical=False
        )
        assert self.me.db_rw.name == 'integration'

        self.me.get_matches_for_all_trials()
        self.me.update_all_matches()
        enabled_trial_matches = list(self.me.db_ro.trial_match.find())
        disabled_trial_matches = list(self.me.db_ro.trial_match.find({"is_disabled": True}))
        run_log_trial_match = list(self.me.db_ro.run_log_trial_match.find({}))
        assert len(enabled_trial_matches) == 3
        assert len(disabled_trial_matches) == 0
        assert len(run_log_trial_match) == 1

        self._reset(
            do_reset_trial_matches=False,
            do_reset_trials=False,
            reset_run_log=False,
            match_on_closed=False,
            match_on_deceased=False,
            do_rm_clinical_run_history=False,
            do_reset_time=False,
            report_all_clinical=False,
            skip_sample_id_reset=False
        )

        self.me.db_rw.trial.update({"protocol_no": "10-007"},
                                   {"$set": {"treatment_list.step.0.arm.1.arm_suspended": "N",
                                             "_updated": datetime.datetime(2002, 1, 1, 1, 1, 1, 1)
                                             }})

        self.me.db_rw.clinical.update({"SAMPLE_ID": "5d2799da6756630d8dd066a6"},
                                      {"$set": {"VITAL_STATUS": "deceased",
                                                "_updated": datetime.datetime(2002, 1, 1, 1, 1, 1, 1)
                                                }})

        self.me.get_matches_for_all_trials()
        self.me.update_all_matches()
        enabled_trial_matches = list(self.me.db_ro.trial_match.find({"is_disabled": False}))
        disabled_trial_matches = list(self.me.db_ro.trial_match.find({"is_disabled": True}))
        run_log_trial_match = list(self.me.db_ro.run_log_trial_match.find({}))
        assert len(enabled_trial_matches) == 3
        for match in disabled_trial_matches:
            assert match['sample_id'] == "5d2799da6756630d8dd066a6"
        assert len(disabled_trial_matches) == 2
        assert len(run_log_trial_match) == 2

        self._reset(
            do_reset_trial_matches=False,
            do_reset_trials=False,
            reset_run_log=False,
            match_on_closed=False,
            match_on_deceased=False,
            do_rm_clinical_run_history=False,
            do_reset_time=False,
            report_all_clinical=False,
            skip_sample_id_reset=False
        )

        self.me.db_rw.trial.update({"protocol_no": "10-007"},
                                   {"$set": {"unused_field": "ricky_bobby",
                                             "_updated": datetime.datetime(2002, 2, 1, 1, 1, 1, 1)
                                             }})

        self.me.get_matches_for_all_trials()
        self.me.update_all_matches()
        enabled_trial_matches = list(self.me.db_ro.trial_match.find({"is_disabled": False}))
        disabled_trial_matches = list(self.me.db_ro.trial_match.find({"is_disabled": True}))
        run_log_trial_match = list(self.me.db_ro.run_log_trial_match.find({}))
        assert len(enabled_trial_matches) == 3
        for match in disabled_trial_matches:
            assert match['sample_id'] == "5d2799da6756630d8dd066a6"
        assert len(disabled_trial_matches) == 2
        assert len(run_log_trial_match) == 3

    def test_run_log_7(self):
        """
        Update a trial curation.
        Update a sample's vital_status to deceased.
        Sample should no longer have matches.
        :return:
        """
        self._reset(
            do_reset_trial_matches=True,
            do_reset_trials=True,
            trials_to_load=['run_log_two_arms'],
            reset_run_log=True,
            match_on_closed=False,
            match_on_deceased=False,
            do_rm_clinical_run_history=True,
            report_all_clinical=False
        )
        assert self.me.db_rw.name == 'integration'

        self.me.get_matches_for_all_trials()
        self.me.update_all_matches()
        trial_matches = list(self.me.db_ro.trial_match.find())
        disabled_trial_matches = list(self.me.db_ro.trial_match.find({"is_disabled": True}))
        run_log_trial_match = list(self.me.db_ro.run_log_trial_match.find({}))
        assert len(trial_matches) == 3
        assert len(disabled_trial_matches) == 0
        assert len(run_log_trial_match) == 1

        self._reset(
            do_reset_trial_matches=False,
            do_reset_trials=False,
            reset_run_log=False,
            match_on_closed=False,
            match_on_deceased=False,
            do_rm_clinical_run_history=False,
            do_reset_time=False,
            report_all_clinical=False,
            skip_sample_id_reset=False
        )

        self.me.db_rw.trial.update({"protocol_no": "10-007"},
                                   {"$set": {"treatment_list.step.0.arm.0.match.0.and.0.hugo_symbol": "BRAF",
                                             "_updated": datetime.datetime(2002, 1, 1, 1, 1, 1, 1)}})

        self.me.db_rw.clinical.update({"SAMPLE_ID": "5d2799da6756630d8dd066a6"},
                                      {"$set": {"VITAL_STATUS": "deceased",
                                                "_updated": datetime.datetime(2002, 1, 1, 1, 1, 1, 1)}})

        self.me.get_matches_for_all_trials()
        self.me.update_all_matches()
        trial_matches = list(self.me.db_ro.trial_match.find())
        disabled_trial_matches = list(self.me.db_ro.trial_match.find({"is_disabled": True}))
        run_log_trial_match = list(self.me.db_ro.run_log_trial_match.find({}))
        assert len(trial_matches) == 5
        for match in disabled_trial_matches:
            assert match['sample_id'] == "5d2799da6756630d8dd066a6"
        assert len(disabled_trial_matches) == 2
        assert len(run_log_trial_match) == 2

    def test_run_log_8(self):
        """
        Update a sample's vital_status to deceased.
        Sample should have matches before run and not after.
        :return:
        """
        self._reset(
            do_reset_trial_matches=True,
            do_reset_trials=True,
            trials_to_load=['run_log_arm_open'],
            reset_run_log=True,
            match_on_closed=False,
            match_on_deceased=False,
            do_rm_clinical_run_history=True,
            report_all_clinical=False
        )
        assert self.me.db_rw.name == 'integration'

        self.me.get_matches_for_all_trials()
        self.me.update_all_matches()
        enabled_trial_matches = list(self.me.db_ro.trial_match.find({"is_disabled": False}))
        disabled_trial_matches = list(self.me.db_ro.trial_match.find({"is_disabled": True}))
        run_log_trial_match = list(self.me.db_ro.run_log_trial_match.find({}))
        assert len(enabled_trial_matches) == 3
        sample_count = 0
        for match in enabled_trial_matches:
            if match['sample_id'] == "5d2799da6756630d8dd066a6":
                sample_count += 1
        assert sample_count == 2
        assert len(disabled_trial_matches) == 0
        assert len(run_log_trial_match) == 1

        self.me.db_rw.clinical.update({"SAMPLE_ID": "5d2799da6756630d8dd066a6"},
                                      {"$set": {"VITAL_STATUS": "deceased",
                                                "_updated": datetime.datetime(2002, 2, 1, 1, 1, 1, 1)}})

        self._reset(
            do_reset_trial_matches=False,
            do_reset_trials=False,
            reset_run_log=False,
            match_on_closed=False,
            match_on_deceased=False,
            do_rm_clinical_run_history=False,
            do_reset_time=False,
            report_all_clinical=False,
            skip_sample_id_reset=False,
            skip_vital_status_reset=True
        )

        self.me.get_matches_for_all_trials()
        self.me.update_all_matches()
        enabled_trial_matches = list(self.me.db_ro.trial_match.find({"is_disabled": False}))
        disabled_trial_matches = list(self.me.db_ro.trial_match.find({"is_disabled": True}))
        run_log_trial_match = list(self.me.db_ro.run_log_trial_match.find({}))
        assert len(enabled_trial_matches) == 1
        for match in enabled_trial_matches:
            assert match['sample_id'] != "5d2799da6756630d8dd066a6"
        for match in disabled_trial_matches:
            assert match['sample_id'] == "5d2799da6756630d8dd066a6"
        assert len(disabled_trial_matches) == 2
        assert len(run_log_trial_match) == 2

        self.me.db_rw.clinical.update({"SAMPLE_ID": "5d2799da6756630d8dd066a6"},
                                      {"$set": {"VITAL_STATUS": "alive",
                                                "_updated": datetime.datetime(2002, 2, 1, 1, 1, 1, 1)}})

    def test_run_log_9(self):
        """
        Update a trial arm status to open.
        Run on a new sample.
        Sample should have matches.
        Sample which doesn't match should still not match
        :return:
        """
        self._reset(
            do_reset_trial_matches=True,
            do_reset_trials=True,
            trials_to_load=['all_closed'],
            reset_run_log=True,
            match_on_closed=False,
            match_on_deceased=False,
            do_rm_clinical_run_history=True,
            report_all_clinical=False
        )
        assert self.me.db_rw.name == 'integration'

        self.me.get_matches_for_all_trials()
        self.me.update_all_matches()
        enabled_trial_matches = list(self.me.db_ro.trial_match.find({"is_disabled": False}))
        disabled_trial_matches = list(self.me.db_ro.trial_match.find({"is_disabled": True}))
        run_log_trial_match = list(self.me.db_ro.run_log_trial_match.find({}))
        no_match = list(self.me.db_ro.trial_match.find({"sample_id": "5d2799df6756630d8dd068ba"}))
        known_match = list(self.me.db_ro.trial_match.find({"sample_id": "5d2799cc6756630d8dd06265"}))
        assert len(enabled_trial_matches) == 0
        assert len(disabled_trial_matches) == 0
        assert len(run_log_trial_match) == 1
        assert len(no_match) == 0
        assert len(known_match) == 0

        self.me.db_rw.trial.update({"protocol_no": "10-001"},
                                   {"$set": {"treatment_list.step.0.arm.0.arm_suspended": "N",
                                             "_updated": datetime.datetime(2002, 1, 1, 1, 1, 1, 1)
                                             }})

        self._reset(
            do_reset_trial_matches=False,
            do_reset_trials=False,
            reset_run_log=False,
            match_on_closed=False,
            match_on_deceased=False,
            do_rm_clinical_run_history=False,
            do_reset_time=False,
            report_all_clinical=False,
            skip_sample_id_reset=False
        )

        self.me.get_matches_for_all_trials()
        self.me.update_all_matches()
        enabled_trial_matches = list(self.me.db_ro.trial_match.find({"is_disabled": False}))
        disabled_trial_matches = list(self.me.db_ro.trial_match.find({"is_disabled": True}))
        run_log_trial_match = list(self.me.db_ro.run_log_trial_match.find({}))
        no_match = list(self.me.db_ro.trial_match.find({"sample_id": "5d2799df6756630d8dd068ba"}))
        known_match = list(self.me.db_ro.trial_match.find({"sample_id": "5d2799cc6756630d8dd06265"}))
        assert len(enabled_trial_matches) == 8
        assert len(disabled_trial_matches) == 0
        assert len(run_log_trial_match) == 2
        assert len(no_match) == 0
        assert len(known_match) == 1

    def test_run_log_10(self):
        """
        Update a trial field not used in matching.
        Run on a new sample.
        Sample should have matches.
        Sample which doesn't match should still not match.
        :return:
        """
        self._reset(
            do_reset_trial_matches=True,
            do_reset_trials=True,
            trials_to_load=['all_open'],
            reset_run_log=True,
            match_on_closed=False,
            match_on_deceased=False,
            do_rm_clinical_run_history=True,
            report_all_clinical=False
        )
        assert self.me.db_rw.name == 'integration'

        self.me.get_matches_for_all_trials()
        self.me.update_all_matches()
        enabled_trial_matches = list(self.me.db_ro.trial_match.find({"is_disabled": False}))
        disabled_trial_matches = list(self.me.db_ro.trial_match.find({"is_disabled": True}))
        run_log_trial_match = list(self.me.db_ro.run_log_trial_match.find({}))
        no_match = list(self.me.db_ro.trial_match.find({"sample_id": "5d2799df6756630d8dd068ba"}))
        assert len(enabled_trial_matches) == 8
        assert len(disabled_trial_matches) == 0
        assert len(run_log_trial_match) == 1
        assert len(no_match) == 0

        self.me.db_rw.trial.update({"protocol_no": "10-001"},
                                   {"$set": {"unused_field": "ricky_bobby",
                                             "_updated": datetime.datetime(2002, 1, 1, 1, 1, 1, 1)
                                             }})

        self._reset(
            do_reset_trial_matches=False,
            do_reset_trials=False,
            reset_run_log=False,
            match_on_closed=False,
            match_on_deceased=False,
            do_rm_clinical_run_history=False,
            do_reset_time=False,
            report_all_clinical=False,
            skip_sample_id_reset=False
        )

        self.me.get_matches_for_all_trials()
        self.me.update_all_matches()
        enabled_trial_matches = list(self.me.db_ro.trial_match.find({"is_disabled": False}))
        disabled_trial_matches = list(self.me.db_ro.trial_match.find({"is_disabled": True}))
        run_log_trial_match = list(self.me.db_ro.run_log_trial_match.find({}))
        no_match = list(self.me.db_ro.trial_match.find({"sample_id": "5d2799df6756630d8dd068ba"}))
        assert len(enabled_trial_matches) == 8
        assert len(disabled_trial_matches) == 0
        assert len(run_log_trial_match) == 2
        assert len(no_match) == 0

    def test_run_log_11(self):
        """
        Update a sample's vital_status to deceased.
        Sample should not have matches before or after run.
        A third run with no trial changes should not produce matches.
        :return:
        """
        self._reset(
            do_reset_trial_matches=True,
            do_reset_trials=True,
            trials_to_load=['all_closed'],
            reset_run_log=True,
            match_on_closed=False,
            match_on_deceased=False,
            do_rm_clinical_run_history=True,
            report_all_clinical=False
        )
        assert self.me.db_rw.name == 'integration'

        self.me.get_matches_for_all_trials()
        self.me.update_all_matches()
        run_log_trial_match = list(self.me.db_ro.run_log_trial_match.find({}))
        no_match = list(self.me.db_ro.trial_match.find({"sample_id": "5d2799da6756630d8dd066a6"}))
        assert len(no_match) == 0
        assert len(run_log_trial_match) == 1

        self.me.db_rw.clinical.update({"SAMPLE_ID": "5d2799da6756630d8dd066a6"},
                                      {"$set": {"VITAL_STATUS": "deceased",
                                                "_updated": datetime.datetime(2002, 2, 1, 1, 1, 1, 1)}})

        self._reset(
            do_reset_trial_matches=False,
            do_reset_trials=False,
            reset_run_log=False,
            match_on_closed=False,
            match_on_deceased=False,
            do_rm_clinical_run_history=False,
            do_reset_time=False,
            report_all_clinical=False,
            skip_sample_id_reset=False,
            skip_vital_status_reset=True
        )

        self.me.get_matches_for_all_trials()
        self.me.update_all_matches()
        run_log_trial_match = list(self.me.db_ro.run_log_trial_match.find({}))
        no_match = list(self.me.db_ro.trial_match.find({"sample_id": "5d2799da6756630d8dd066a6"}))
        assert len(no_match) == 0
        assert len(run_log_trial_match) == 2

        self._reset(
            do_reset_trial_matches=False,
            do_reset_trials=False,
            reset_run_log=False,
            match_on_closed=False,
            match_on_deceased=False,
            do_rm_clinical_run_history=False,
            do_reset_time=False,
            report_all_clinical=False,
            skip_sample_id_reset=False,
            skip_vital_status_reset=True
        )

        self.me.get_matches_for_all_trials()
        self.me.update_all_matches()
        run_log_trial_match = list(self.me.db_ro.run_log_trial_match.find({}))
        no_match = list(self.me.db_ro.trial_match.find({"sample_id": "5d2799da6756630d8dd066a6"}))
        assert len(no_match) == 0
        assert len(run_log_trial_match) == 3

    def tearDown(self) -> None:
        if hasattr(self, 'me'):
            self.me.__exit__(None, None, None)