def test_filter_indra_stmts(): def make_stmt_with_evid_anns(internal_list): new_stmt = deepcopy(stmt) new_stmt.evidence = [] for internal_val in internal_list: new_evid = Evidence(text='BRAF activates MAP2K1.', source_api='assertion', text_refs={'TRID': '1234'}) if internal_val is None: new_evid.annotations = {} # True or False else: new_evid.annotations = { 'emmaa': { 'metadata': { 'internal': internal_val } } } new_stmt.evidence.append(new_evid) return new_stmt stmt1 = make_stmt_with_evid_anns([None, None]) # Not filter unknown anns stmt2 = make_stmt_with_evid_anns([True]) # Only true anns stmt3 = make_stmt_with_evid_anns([True, True]) # Only true anns stmt4 = make_stmt_with_evid_anns([None, True]) # Only true or unknown anns stmt5 = make_stmt_with_evid_anns([False, True]) # Mixed true and false stmt6 = make_stmt_with_evid_anns([False]) # Only false anns stmt7 = make_stmt_with_evid_anns([None, False]) # Filter false or unknown stmt8 = make_stmt_with_evid_anns([False, False]) # Only false anns # Case in which the "internal" key is missing stmt9 = make_stmt_with_evid_anns([False]) del stmt9.evidence[0].annotations["emmaa"]["metadata"]["internal"] conditions = {'internal': True} stmts = [stmt1, stmt2, stmt3, stmt4, stmt5, stmt6, stmt7, stmt8, stmt9] filtered_any = filter_indra_stmts_by_metadata(stmts, conditions, 'any') assert len(filtered_any) == 5 assert stmt6 not in filtered_any assert stmt7 not in filtered_any assert stmt8 not in filtered_any assert stmt9 not in filtered_any # Mixed is not filtered assert stmt5 in filtered_any filtered_all = filter_indra_stmts_by_metadata(stmts, conditions, 'all') assert len(filtered_all) == 4 assert stmt6 not in filtered_all assert stmt7 not in filtered_all assert stmt8 not in filtered_all assert stmt9 not in filtered_all # Mixed is filtered too here assert stmt5 not in filtered_all
def load_from_s3_key(cls, key, bucket=EMMAA_BUCKET_NAME, load_estmts=False): mm = load_model_manager_from_s3(key=key, bucket=bucket) if not mm: return statements = mm.model.assembled_stmts date_str = mm.date_str try: paper_ids = list(mm.model.paper_ids) except AttributeError: paper_ids = None paper_id_type = mm.model.reading_config.get('main_id_type', 'TRID') estmts = None if load_estmts: estmts, _ = load_stmts_from_s3(mm.model.name, bucket) if mm.model.reading_config.get('filter'): conditions = mm.model.reading_config['filter']['conditions'] evid_policy = mm.model.reading_config['filter']['evid_policy'] statements = filter_indra_stmts_by_metadata( statements, conditions, evid_policy) if estmts: estmts = filter_emmaa_stmts_by_metadata(estmts, conditions) return cls(statements, date_str, paper_ids, paper_id_type, estmts)
def model_to_tests(model_name, upload=True, bucket=EMMAA_BUCKET_NAME): """Create StatementCheckingTests from model statements.""" stmts, _ = get_assembled_statements(model_name, bucket=bucket) config = load_config_from_s3(model_name, bucket=bucket) # Filter statements if needed if isinstance(config.get('make_tests'), dict): conditions = config['make_tests']['filter']['conditions'] evid_policy = config['make_tests']['filter']['evid_policy'] stmts = filter_indra_stmts_by_metadata(stmts, conditions, evid_policy) tests = [ StatementCheckingTest(stmt) for stmt in stmts if all(stmt.agent_list()) ] date_str = make_date_str() test_description = ( f'These tests were generated from the ' f'{config.get("human_readable_name")} on {date_str[:10]}') test_name = f'{config.get("human_readable_name")} model test corpus' test_dict = { 'test_data': { 'description': test_description, 'name': test_name }, 'tests': tests } if upload: save_tests_to_s3(test_dict, bucket, f'tests/{model_name}_tests_{date_str}.pkl', 'pkl') return test_dict