def test_find_plugins(self): """Verify functions inside external config files are reachable within the Matchengine class""" old_create_trial_matches = self.me.create_trial_matches find_plugins(self.me) assert hasattr(self.me, 'create_trial_matches') assert id(self.me.create_trial_matches) != old_create_trial_matches blank_trial_match = self.me.create_trial_matches({}) assert blank_trial_match.__class__ is dict and not blank_trial_match
def test_query_transform(self): find_plugins(self.me) assert hasattr(self.me.match_criteria_transform.transform, 'is_negate') assert getattr(self.me.match_criteria_transform.transform, 'is_negate')('this') == ('this', False) assert getattr(self.me.match_criteria_transform.transform, 'is_negate')('!this') == ('this', True) assert getattr(self.me.match_criteria_transform.transform, 'is_negate')('!') == (str(), True) assert getattr(self.me.match_criteria_transform.transform, 'is_negate')('') == (str(), False) transform_args = { 'trial_path': 'test', 'trial_key': 'test', 'trial_value': 'test', 'sample_key': 'test', 'file': 'external_file_mapping_test.json' } assert hasattr(self.me.match_criteria_transform.query_transformers, 'nomap') query_transform_result = getattr(self.me.match_criteria_transform.query_transformers, 'nomap')(**transform_args).results[0] nomap_ret, nomap_no_negate = query_transform_result.query, query_transform_result.negate assert len(nomap_ret) == 1 and nomap_ret['test'] == 'test' and not nomap_no_negate assert hasattr(self.me.match_criteria_transform.query_transformers, 'external_file_mapping') query_transform_result = getattr(self.me.match_criteria_transform.query_transformers, 'external_file_mapping')(**transform_args).results[0] ext_f_map_ret, ext_f_map_no_negate = query_transform_result.query, query_transform_result.negate assert len(ext_f_map_ret) == 1 and not ext_f_map_no_negate assert 'test' in ext_f_map_ret and '$in' in ext_f_map_ret['test'] assert all(map(lambda x: x[0] == x[1], zip(ext_f_map_ret['test']['$in'], ['option_1', 'option_2', 'option_3']))) query_transform_result = getattr( self.me.match_criteria_transform.query_transformers, 'external_file_mapping')(**dict(transform_args, **{'trial_value': '!test2'})).results[0] ext_f_map_ret_single, ext_f_map_no_negate_single = query_transform_result.query, query_transform_result.negate assert len(ext_f_map_ret) == 1 and ext_f_map_no_negate_single assert 'test' in ext_f_map_ret_single and ext_f_map_ret_single['test'].__class__ is str assert ext_f_map_ret_single['test'] == 'option_4' assert hasattr(self.me.match_criteria_transform.query_transformers, 'to_upper') query_transform_result = getattr(self.me.match_criteria_transform.query_transformers, 'to_upper')(**transform_args).results[0] to_upper_ret, to_upper_no_negate = query_transform_result.query, query_transform_result.negate assert len(to_upper_ret) == 1 and not to_upper_no_negate assert 'test' in ext_f_map_ret and to_upper_ret['test'] == 'TEST'
def test_translate_match_path(self): self.me.trials = dict() find_plugins(self.me) match_clause_data = MatchClauseData(match_clause=MatchClause([{}]), internal_id='123', code='456', coordinating_center='The Death Star', status='Open to Accrual', parent_path=ParentPath(()), match_clause_level=MatchClauseLevel('arm'), match_clause_additional_attributes={}, protocol_no='12-345', is_suspended=True) match_paths = translate_match_path(self.me, match_clause_data=match_clause_data, match_criterion=MatchCriterion([MatchCriteria({}, 0, 0)])) assert len(match_paths.clinical) == 0 assert len(match_paths.extended_attributes) == 0
def __init__( self, cache: Cache = None, sample_ids: Set[str] = None, protocol_nos: Set[str] = None, match_on_deceased: bool = False, match_on_closed: bool = False, debug: bool = False, num_workers: int = cpu_count() * 5, visualize_match_paths: bool = False, fig_dir: str = None, config: Union[str, dict] = os.path.join( os.path.dirname(os.path.dirname(__file__)), 'config', 'dfci_config.json'), plugin_dir: str = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'plugins'), db_init: bool = True, db_name: str = None, match_document_creator_class: str = "DFCITrialMatchDocumentCreator", query_node_transformer_class: str = "DFCIQueryNodeTransformer", query_node_subsetter_class: str = "DFCIQueryNodeClinicalIDSubsetter", query_node_container_transformer_class: str = "DFCIQueryContainerTransformer", db_secrets_class: str = None, report_all_clinical_reasons: bool = False, ignore_run_log: bool = False, skip_run_log_entry: bool = False, trial_match_collection: str = "trial_match", drop: bool = False, exit_after_drop: bool = False, drop_accept: bool = False, resource_dirs: List = None, chunk_size: int = 1000, bypass_warnings: bool = False ): self.resource_dirs = list() self.resource_dirs.append(os.path.join(os.path.dirname(os.path.dirname(__file__)), 'ref')) if resource_dirs is not None: self.resource_dirs.extend(resource_dirs) self.trial_match_collection = trial_match_collection self.starttime = datetime.datetime.now() self.run_id = uuid.uuid4() self.run_log_entries = dict() self.ignore_run_log = ignore_run_log self.skip_run_log_entry = skip_run_log_entry self.clinical_run_log_entries = dict() self._protocol_nos_param = list(protocol_nos) if protocol_nos is not None else protocol_nos self._sample_ids_param = list(sample_ids) if sample_ids is not None else sample_ids self.chunk_size = chunk_size self.debug = debug if config.__class__ is str: with open(config) as config_file_handle: self.config = json.load(config_file_handle) else: self.config = config self.match_criteria_transform = MatchCriteriaTransform(self.config, self.resource_dirs) self.plugin_dir = plugin_dir self.match_document_creator_class = match_document_creator_class self.query_node_transformer_class = query_node_transformer_class self.query_node_container_transformer_class = query_node_container_transformer_class self.query_node_subsetter_class = query_node_subsetter_class self.db_secrets_class = db_secrets_class find_plugins(self) self.db_init = db_init self._db_ro = MongoDBConnection(read_only=True, async_init=False, db=db_name) if self.db_init else None self.db_ro = self._db_ro.__enter__() if self.db_init else None self._db_rw = MongoDBConnection(read_only=False, async_init=False, db=db_name) if self.db_init else None self.db_rw = self._db_rw.__enter__() if self.db_init else None log.info(f"Connected to database {self.db_ro.name}") # TODO: check how this flag works with run log self._drop = drop if self._drop: log.info((f"Dropping all matches" "\n\t" f"{f'for trials: {protocol_nos}' if protocol_nos is not None else 'all trials'}" "\n\t" f"{f'for samples: {sample_ids}' if sample_ids is not None else 'all samples'}" "\n" f"{'and then exiting' if exit_after_drop else 'and then continuing'}")) try: assert drop_accept or input( 'Type "yes" without quotes in all caps to confirm: ') == "YES" self.drop_existing_matches(protocol_nos, sample_ids) except AssertionError: log.error("Your response was not 'YES'; exiting") exit(1) if exit_after_drop: exit(0) if not ignore_run_log: self.check_run_log_flags(trial_match_collection, match_on_deceased, match_on_closed, bypass_warnings) # A cache-like object used to accumulate query results self.cache = Cache() if cache is None else cache self.sample_ids = sample_ids self.protocol_nos = protocol_nos self.match_on_closed = match_on_closed self.match_on_deceased = match_on_deceased self.report_all_clinical_reasons = report_all_clinical_reasons self.num_workers = num_workers self.visualize_match_paths = visualize_match_paths self.fig_dir = fig_dir self._queue_task_count = int() self._matches: Dict[str, Dict[str, List[Dict]]] = dict() self.trials = self.get_trials() self._trials_to_match_on = self._get_trials_to_match_on(self.trials) if self.protocol_nos is None: self.protocol_nos = list(self.trials.keys()) self._run_log_history = self._populate_run_log_history() self._clinical_data = self._get_clinical_data() self.clinical_mapping = self.get_clinical_ids_from_sample_ids() self.clinical_deceased = self.get_clinical_deceased() self.clinical_birth_dates = self.get_clinical_birth_dates() self.clinical_update_mapping = dict() if self.ignore_run_log else self.get_clinical_updated_mapping() self.clinical_extra_field_lookup = self.get_extra_field_lookup(self._clinical_data, "clinical") self._clinical_ids_for_protocol_cache = dict() self.sample_mapping = {sample_id: clinical_id for clinical_id, sample_id in self.clinical_mapping.items()} self.clinical_ids = set(self.clinical_mapping.keys()) self.clinical_run_log_mapping = (dict() if self.get_clinical_ids_from_sample_ids() else self.get_clinical_run_log_mapping()) if self.sample_ids is None: self.sample_ids = list(self.clinical_mapping.values()) # instantiate a new async event loop to allow class to be used as if it is synchronous try: if asyncio.get_event_loop().is_closed() or not hasattr(self, '_loop'): asyncio.set_event_loop(asyncio.new_event_loop()) self._loop = asyncio.get_event_loop() except RuntimeError as e: logging.error(e) self._loop = asyncio.new_event_loop() asyncio.set_event_loop(self._loop) self._loop.run_until_complete(self._async_init(db_name))