예제 #1
0
 def test_find_plugins(self):
     """Verify functions inside external config files are reachable within the Matchengine class"""
     old_create_trial_matches = self.me.create_trial_matches
     find_plugins(self.me)
     assert hasattr(self.me, 'create_trial_matches')
     assert id(self.me.create_trial_matches) != old_create_trial_matches
     blank_trial_match = self.me.create_trial_matches({})
     assert blank_trial_match.__class__ is dict and not blank_trial_match
예제 #2
0
    def test_query_transform(self):
        find_plugins(self.me)

        assert hasattr(self.me.match_criteria_transform.transform, 'is_negate')
        assert getattr(self.me.match_criteria_transform.transform, 'is_negate')('this') == ('this', False)
        assert getattr(self.me.match_criteria_transform.transform, 'is_negate')('!this') == ('this', True)
        assert getattr(self.me.match_criteria_transform.transform, 'is_negate')('!') == (str(), True)
        assert getattr(self.me.match_criteria_transform.transform, 'is_negate')('') == (str(), False)

        transform_args = {
            'trial_path': 'test',
            'trial_key': 'test',
            'trial_value': 'test',
            'sample_key': 'test',
            'file': 'external_file_mapping_test.json'
        }

        assert hasattr(self.me.match_criteria_transform.query_transformers, 'nomap')
        query_transform_result = getattr(self.me.match_criteria_transform.query_transformers,
                                         'nomap')(**transform_args).results[0]
        nomap_ret, nomap_no_negate = query_transform_result.query, query_transform_result.negate
        assert len(nomap_ret) == 1 and nomap_ret['test'] == 'test' and not nomap_no_negate

        assert hasattr(self.me.match_criteria_transform.query_transformers, 'external_file_mapping')
        query_transform_result = getattr(self.me.match_criteria_transform.query_transformers,
                                         'external_file_mapping')(**transform_args).results[0]
        ext_f_map_ret, ext_f_map_no_negate = query_transform_result.query, query_transform_result.negate
        assert len(ext_f_map_ret) == 1 and not ext_f_map_no_negate
        assert 'test' in ext_f_map_ret and '$in' in ext_f_map_ret['test']
        assert all(map(lambda x: x[0] == x[1],
                       zip(ext_f_map_ret['test']['$in'],
                           ['option_1', 'option_2', 'option_3'])))
        query_transform_result = getattr(
            self.me.match_criteria_transform.query_transformers,
            'external_file_mapping')(**dict(transform_args,
                                            **{'trial_value': '!test2'})).results[0]
        ext_f_map_ret_single, ext_f_map_no_negate_single = query_transform_result.query, query_transform_result.negate
        assert len(ext_f_map_ret) == 1 and ext_f_map_no_negate_single
        assert 'test' in ext_f_map_ret_single and ext_f_map_ret_single['test'].__class__ is str
        assert ext_f_map_ret_single['test'] == 'option_4'

        assert hasattr(self.me.match_criteria_transform.query_transformers, 'to_upper')
        query_transform_result = getattr(self.me.match_criteria_transform.query_transformers,
                                         'to_upper')(**transform_args).results[0]
        to_upper_ret, to_upper_no_negate = query_transform_result.query, query_transform_result.negate
        assert len(to_upper_ret) == 1 and not to_upper_no_negate
        assert 'test' in ext_f_map_ret and to_upper_ret['test'] == 'TEST'
예제 #3
0
 def test_translate_match_path(self):
     self.me.trials = dict()
     find_plugins(self.me)
     match_clause_data = MatchClauseData(match_clause=MatchClause([{}]),
                                         internal_id='123',
                                         code='456',
                                         coordinating_center='The Death Star',
                                         status='Open to Accrual',
                                         parent_path=ParentPath(()),
                                         match_clause_level=MatchClauseLevel('arm'),
                                         match_clause_additional_attributes={},
                                         protocol_no='12-345',
                                         is_suspended=True)
     match_paths = translate_match_path(self.me, match_clause_data=match_clause_data,
                                        match_criterion=MatchCriterion([MatchCriteria({}, 0, 0)]))
     assert len(match_paths.clinical) == 0
     assert len(match_paths.extended_attributes) == 0
예제 #4
0
    def __init__(
            self,
            cache: Cache = None,
            sample_ids: Set[str] = None,
            protocol_nos: Set[str] = None,
            match_on_deceased: bool = False,
            match_on_closed: bool = False,
            debug: bool = False,
            num_workers: int = cpu_count() * 5,
            visualize_match_paths: bool = False,
            fig_dir: str = None,
            config: Union[str, dict] = os.path.join(
                os.path.dirname(os.path.dirname(__file__)),
                'config',
                'dfci_config.json'),
            plugin_dir: str = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'plugins'),
            db_init: bool = True,
            db_name: str = None,
            match_document_creator_class: str = "DFCITrialMatchDocumentCreator",
            query_node_transformer_class: str = "DFCIQueryNodeTransformer",
            query_node_subsetter_class: str = "DFCIQueryNodeClinicalIDSubsetter",
            query_node_container_transformer_class: str = "DFCIQueryContainerTransformer",
            db_secrets_class: str = None,
            report_all_clinical_reasons: bool = False,
            ignore_run_log: bool = False,
            skip_run_log_entry: bool = False,
            trial_match_collection: str = "trial_match",
            drop: bool = False,
            exit_after_drop: bool = False,
            drop_accept: bool = False,
            resource_dirs: List = None,
            chunk_size: int = 1000,
            bypass_warnings: bool = False
    ):
        self.resource_dirs = list()
        self.resource_dirs.append(os.path.join(os.path.dirname(os.path.dirname(__file__)), 'ref'))
        if resource_dirs is not None:
            self.resource_dirs.extend(resource_dirs)
        self.trial_match_collection = trial_match_collection
        self.starttime = datetime.datetime.now()
        self.run_id = uuid.uuid4()
        self.run_log_entries = dict()
        self.ignore_run_log = ignore_run_log
        self.skip_run_log_entry = skip_run_log_entry
        self.clinical_run_log_entries = dict()
        self._protocol_nos_param = list(protocol_nos) if protocol_nos is not None else protocol_nos
        self._sample_ids_param = list(sample_ids) if sample_ids is not None else sample_ids
        self.chunk_size = chunk_size
        self.debug = debug

        if config.__class__ is str:
            with open(config) as config_file_handle:
                self.config = json.load(config_file_handle)
        else:
            self.config = config

        self.match_criteria_transform = MatchCriteriaTransform(self.config, self.resource_dirs)

        self.plugin_dir = plugin_dir
        self.match_document_creator_class = match_document_creator_class
        self.query_node_transformer_class = query_node_transformer_class
        self.query_node_container_transformer_class = query_node_container_transformer_class
        self.query_node_subsetter_class = query_node_subsetter_class
        self.db_secrets_class = db_secrets_class
        find_plugins(self)

        self.db_init = db_init
        self._db_ro = MongoDBConnection(read_only=True, async_init=False,
                                        db=db_name) if self.db_init else None
        self.db_ro = self._db_ro.__enter__() if self.db_init else None
        self._db_rw = MongoDBConnection(read_only=False, async_init=False,
                                        db=db_name) if self.db_init else None
        self.db_rw = self._db_rw.__enter__() if self.db_init else None
        log.info(f"Connected to database {self.db_ro.name}")
        # TODO: check how this flag works with run log
        self._drop = drop
        if self._drop:
            log.info((f"Dropping all matches"
                      "\n\t"
                      f"{f'for trials: {protocol_nos}' if protocol_nos is not None else 'all trials'}"
                      "\n\t"
                      f"{f'for samples: {sample_ids}' if sample_ids is not None else 'all samples'}"
                      "\n"
                      f"{'and then exiting' if exit_after_drop else 'and then continuing'}"))
            try:
                assert drop_accept or input(
                    'Type "yes" without quotes in all caps to confirm: ') == "YES"
                self.drop_existing_matches(protocol_nos, sample_ids)
            except AssertionError:
                log.error("Your response was not 'YES'; exiting")
                exit(1)
            if exit_after_drop:
                exit(0)

        if not ignore_run_log:
            self.check_run_log_flags(trial_match_collection, match_on_deceased, match_on_closed, bypass_warnings)

        # A cache-like object used to accumulate query results
        self.cache = Cache() if cache is None else cache
        self.sample_ids = sample_ids
        self.protocol_nos = protocol_nos
        self.match_on_closed = match_on_closed
        self.match_on_deceased = match_on_deceased
        self.report_all_clinical_reasons = report_all_clinical_reasons
        self.num_workers = num_workers
        self.visualize_match_paths = visualize_match_paths
        self.fig_dir = fig_dir
        self._queue_task_count = int()
        self._matches: Dict[str, Dict[str, List[Dict]]] = dict()

        self.trials = self.get_trials()
        self._trials_to_match_on = self._get_trials_to_match_on(self.trials)
        if self.protocol_nos is None:
            self.protocol_nos = list(self.trials.keys())
        self._run_log_history = self._populate_run_log_history()
        self._clinical_data = self._get_clinical_data()
        self.clinical_mapping = self.get_clinical_ids_from_sample_ids()
        self.clinical_deceased = self.get_clinical_deceased()
        self.clinical_birth_dates = self.get_clinical_birth_dates()
        self.clinical_update_mapping = dict() if self.ignore_run_log else self.get_clinical_updated_mapping()
        self.clinical_extra_field_lookup = self.get_extra_field_lookup(self._clinical_data,
                                                                       "clinical")
        self._clinical_ids_for_protocol_cache = dict()
        self.sample_mapping = {sample_id: clinical_id for clinical_id, sample_id in
                               self.clinical_mapping.items()}
        self.clinical_ids = set(self.clinical_mapping.keys())
        self.clinical_run_log_mapping = (dict()
                                         if self.get_clinical_ids_from_sample_ids()
                                         else self.get_clinical_run_log_mapping())
        if self.sample_ids is None:
            self.sample_ids = list(self.clinical_mapping.values())

        # instantiate a new async event loop to allow class to be used as if it is synchronous
        try:
            if asyncio.get_event_loop().is_closed() or not hasattr(self, '_loop'):
                asyncio.set_event_loop(asyncio.new_event_loop())
            self._loop = asyncio.get_event_loop()
        except RuntimeError as e:
            logging.error(e)
            self._loop = asyncio.new_event_loop()
            asyncio.set_event_loop(self._loop)

        self._loop.run_until_complete(self._async_init(db_name))