예제 #1
0
def test_filter_indra_stmts():
    def make_stmt_with_evid_anns(internal_list):
        new_stmt = deepcopy(stmt)
        new_stmt.evidence = []
        for internal_val in internal_list:
            new_evid = Evidence(text='BRAF activates MAP2K1.',
                                source_api='assertion',
                                text_refs={'TRID': '1234'})
            if internal_val is None:
                new_evid.annotations = {}
            # True or False
            else:
                new_evid.annotations = {
                    'emmaa': {
                        'metadata': {
                            'internal': internal_val
                        }
                    }
                }
            new_stmt.evidence.append(new_evid)
        return new_stmt

    stmt1 = make_stmt_with_evid_anns([None, None])  # Not filter unknown anns
    stmt2 = make_stmt_with_evid_anns([True])  # Only true anns
    stmt3 = make_stmt_with_evid_anns([True, True])  # Only true anns
    stmt4 = make_stmt_with_evid_anns([None, True])  # Only true or unknown anns
    stmt5 = make_stmt_with_evid_anns([False, True])  # Mixed true and false
    stmt6 = make_stmt_with_evid_anns([False])  # Only false anns
    stmt7 = make_stmt_with_evid_anns([None, False])  # Filter false or unknown
    stmt8 = make_stmt_with_evid_anns([False, False])  # Only false anns
    # Case in which the "internal" key is missing
    stmt9 = make_stmt_with_evid_anns([False])
    del stmt9.evidence[0].annotations["emmaa"]["metadata"]["internal"]

    conditions = {'internal': True}
    stmts = [stmt1, stmt2, stmt3, stmt4, stmt5, stmt6, stmt7, stmt8, stmt9]

    filtered_any = filter_indra_stmts_by_metadata(stmts, conditions, 'any')
    assert len(filtered_any) == 5
    assert stmt6 not in filtered_any
    assert stmt7 not in filtered_any
    assert stmt8 not in filtered_any
    assert stmt9 not in filtered_any
    # Mixed is not filtered
    assert stmt5 in filtered_any

    filtered_all = filter_indra_stmts_by_metadata(stmts, conditions, 'all')
    assert len(filtered_all) == 4
    assert stmt6 not in filtered_all
    assert stmt7 not in filtered_all
    assert stmt8 not in filtered_all
    assert stmt9 not in filtered_all
    # Mixed is filtered too here
    assert stmt5 not in filtered_all
예제 #2
0
 def load_from_s3_key(cls,
                      key,
                      bucket=EMMAA_BUCKET_NAME,
                      load_estmts=False):
     mm = load_model_manager_from_s3(key=key, bucket=bucket)
     if not mm:
         return
     statements = mm.model.assembled_stmts
     date_str = mm.date_str
     try:
         paper_ids = list(mm.model.paper_ids)
     except AttributeError:
         paper_ids = None
     paper_id_type = mm.model.reading_config.get('main_id_type', 'TRID')
     estmts = None
     if load_estmts:
         estmts, _ = load_stmts_from_s3(mm.model.name, bucket)
     if mm.model.reading_config.get('filter'):
         conditions = mm.model.reading_config['filter']['conditions']
         evid_policy = mm.model.reading_config['filter']['evid_policy']
         statements = filter_indra_stmts_by_metadata(
             statements, conditions, evid_policy)
         if estmts:
             estmts = filter_emmaa_stmts_by_metadata(estmts, conditions)
     return cls(statements, date_str, paper_ids, paper_id_type, estmts)
예제 #3
0
def model_to_tests(model_name, upload=True, bucket=EMMAA_BUCKET_NAME):
    """Create StatementCheckingTests from model statements."""
    stmts, _ = get_assembled_statements(model_name, bucket=bucket)
    config = load_config_from_s3(model_name, bucket=bucket)
    # Filter statements if needed
    if isinstance(config.get('make_tests'), dict):
        conditions = config['make_tests']['filter']['conditions']
        evid_policy = config['make_tests']['filter']['evid_policy']
        stmts = filter_indra_stmts_by_metadata(stmts, conditions, evid_policy)
    tests = [
        StatementCheckingTest(stmt) for stmt in stmts if all(stmt.agent_list())
    ]
    date_str = make_date_str()
    test_description = (
        f'These tests were generated from the '
        f'{config.get("human_readable_name")} on {date_str[:10]}')
    test_name = f'{config.get("human_readable_name")} model test corpus'
    test_dict = {
        'test_data': {
            'description': test_description,
            'name': test_name
        },
        'tests': tests
    }
    if upload:
        save_tests_to_s3(test_dict, bucket,
                         f'tests/{model_name}_tests_{date_str}.pkl', 'pkl')
    return test_dict