Esempio n. 1
0
def test_save_load_update_model_manager():
    # Local imports are recommended when using moto
    from emmaa.model_tests import ModelManager, save_model_manager_to_s3, \
        load_model_manager_from_s3, update_model_manager_on_s3
    from emmaa.util import find_number_of_files_on_s3
    client = setup_bucket(add_model=True)
    # Should be None if no model manager
    assert find_number_of_files_on_s3(TEST_BUCKET_NAME,
                                      'results/test/model_manager_',
                                      '.pkl') == 0
    loaded_mm = load_model_manager_from_s3(model_name='test',
                                           bucket=TEST_BUCKET_NAME)
    assert loaded_mm is None
    # Save a model manager and load it back
    model = create_model()
    mm = ModelManager(model)
    save_model_manager_to_s3('test', mm, bucket=TEST_BUCKET_NAME)
    loaded_mm = load_model_manager_from_s3(model_name='test',
                                           bucket=TEST_BUCKET_NAME)
    assert loaded_mm
    assert find_number_of_files_on_s3(TEST_BUCKET_NAME,
                                      'results/test/model_manager_',
                                      '.pkl') == 1
    # Update should create a new file if there's at least one second difference
    time.sleep(1)
    update_model_manager_on_s3('test', TEST_BUCKET_NAME)
    assert find_number_of_files_on_s3(TEST_BUCKET_NAME,
                                      'results/test/model_manager_',
                                      '.pkl') == 2
Esempio n. 2
0
def test_run_tests():
    model = create_model()
    tests = [StatementCheckingTest(
             Activation(Agent('BRAF', db_refs={'HGNC': '1097'}),
                        Agent('MAPK1', db_refs={'UP': 'P28482'})))]
    mm = ModelManager(model)
    tm = TestManager([mm], tests)
    tm.make_tests(ScopeTestConnector())
    tm.run_tests()
    assert len(mm.applicable_tests) == 1
    assert isinstance(mm.applicable_tests[0], StatementCheckingTest)
    assert len(mm.mc_types['pysb']['test_results']) == 1
    assert len(mm.mc_types['pybel']['test_results']) == 1
    assert len(mm.mc_types['signed_graph']['test_results']) == 1
    assert len(mm.mc_types['unsigned_graph']['test_results']) == 1
    assert isinstance(mm.mc_types['pysb']['test_results'][0], PathResult)
Esempio n. 3
0
def test_model_manager_structure():
    model = create_model()
    mm = ModelManager(model)
    assert isinstance(mm, ModelManager)
    assert isinstance(mm.model, EmmaaModel)
    assert mm.model.name == 'test'
    assert len(mm.mc_types) == 4, len(mm.mc_types)
    assert len(mm.mc_types['pysb']) == 3
    assert isinstance(mm.mc_types['pysb']['model_checker'], PysbModelChecker)
    assert isinstance(mm.mc_types['pybel']['model_checker'], PybelModelChecker)
    assert isinstance(mm.mc_types['signed_graph']['model_checker'],
                      SignedGraphModelChecker)
    assert isinstance(mm.mc_types['unsigned_graph']['model_checker'],
                      UnsignedGraphModelChecker)
    assert isinstance(mm.entities[0], Agent)
    assert isinstance(mm.date_str, str)
Esempio n. 4
0
def test_applicability():
    model = create_model()
    tests = [StatementCheckingTest(
                Activation(Agent('BRAF', db_refs={'HGNC': '1097'}),
                           Agent('MAPK1', db_refs={'UP': 'P28482'}))),
             StatementCheckingTest(
                Activation(Agent('BRAF', db_refs={'HGNC': '1097'}),
                           Agent('ERK', db_refs={'FPLX': 'ERK'})))]
    mm = ModelManager(model)
    tm = TestManager([mm], tests)
    # Only first test is applicable with ScopeTestConnector
    tm.make_tests(ScopeTestConnector())
    assert len(mm.applicable_tests) == 1
    assert mm.applicable_tests[0] == tests[0]
    # Both tests are applicable with RefinementTestConnector
    mm.applicable_tests = []
    tm.make_tests(RefinementTestConnector())
    assert len(mm.applicable_tests) == 2
Esempio n. 5
0
def test_direct_path_tests():
    model = create_model()
    stmt = Activation(Agent('BRAF', db_refs={'HGNC': '1097'}),
                      Agent('MAPK1', db_refs={'UP': 'P28482'}))
    model.stmts.append(
        EmmaaStatement(stmt, datetime.datetime.now(), [], {
            'internal': True,
            'curated': False
        }))
    tests = [StatementCheckingTest(stmt)]
    mm = ModelManager(model)
    tm = TestManager([mm], tests)
    tm.make_tests(ScopeTestConnector())
    tm.run_tests(allow_direct=True)
    for mc_type in ['pysb', 'signed_graph', 'unsigned_graph']:
        res = mm.mc_types[mc_type]['test_results'][0]
        print(res.paths)
        assert len(res.paths[0]) == 2, (mc_type, res.paths[0])  # 1 edge
    tm.run_tests(allow_direct=False)
    for mc_type in ['pysb', 'signed_graph', 'unsigned_graph']:
        # Look at the seecond test result here
        res = mm.mc_types[mc_type]['test_results'][1]
        assert len(res.paths[0]) == 3, (mc_type, res.paths[0])  # 2 edges
Esempio n. 6
0
def setup_bucket(add_model=False,
                 add_mm=False,
                 add_tests=False,
                 add_results=False,
                 add_model_stats=False,
                 add_test_stats=False):
    """
    This function creates a new (local) bucket mocking S3 bucket at each call.
    Then all calls to S3 are calling this bucket instead of real S3 bucket.
    Depending on the test we might or might not need the bucket to contain
    different files. For faster computation, only required files for the test
    are generated and stored in the bucket. Files can be added by setting
    corresponding arguments to True when calling this function.
    """
    # Local imports are recommended when using moto
    from emmaa.util import get_s3_client
    from emmaa.model import save_config_to_s3
    from emmaa.model_tests import ModelManager, save_model_manager_to_s3, \
        StatementCheckingTest
    # Create a mock s3 bucket
    client = get_s3_client()
    bucket = client.create_bucket(Bucket=TEST_BUCKET_NAME, ACL='public-read')
    date_str = make_date_str()
    emmaa_model = None
    if add_model:
        # Put config and model files into empty bucket
        config_dict = {
            'ndex': {
                'network': 'a08479d1-24ce-11e9-bb6a-0ac135e8bacf'
            },
            'search_terms': [{
                'db_refs': {
                    'HGNC': '20974'
                },
                'name': 'MAPK1',
                'search_term': 'MAPK1',
                'type': 'gene'
            }],
            'test': {
                'test_corpus': 'simple_tests',
                'default_test_corpus': 'simple_tests'
            },
            'human_readable_name':
            'Test Model',
            'assembly': [{
                'function': 'filter_no_hypothesis'
            }, {
                'function': 'map_grounding'
            }, {
                'function': 'filter_grounded_only'
            }, {
                'function': 'filter_human_only'
            }, {
                'function': 'map_sequence'
            }, {
                'function': 'run_preassembly',
                'kwargs': {
                    'return_toplevel': False
                }
            }, {
                'function': 'filter_top_level'
            }]
        }
        save_config_to_s3('test', config_dict, bucket=TEST_BUCKET_NAME)
        emmaa_model = create_model()
        emmaa_model.save_to_s3(bucket=TEST_BUCKET_NAME)
    if add_mm:
        # Add a ModelManager to bucket
        if not emmaa_model:
            emmaa_model = create_model()
        mm = ModelManager(emmaa_model)
        mm.date_str = date_str
        mm.save_assembled_statements(upload_to_db=False,
                                     bucket=TEST_BUCKET_NAME)
        save_model_manager_to_s3('test', mm, bucket=TEST_BUCKET_NAME)
    if add_tests:
        tests = [
            StatementCheckingTest(Activation(Agent('BRAF'), Agent('MAPK1')))
        ]
        test_dict = {
            'test_data': {
                'description': 'Tests for functionality testing'
            },
            'tests': tests
        }
        client.put_object(Body=pickle.dumps(test_dict),
                          Bucket=TEST_BUCKET_NAME,
                          Key=f'tests/simple_tests.pkl')
    if add_results:
        client.put_object(
            Body=json.dumps(previous_results, indent=1),
            Bucket=TEST_BUCKET_NAME,
            Key=f'results/test/results_simple_tests_{date_str}.json')
    if add_model_stats:
        client.put_object(Body=json.dumps(previous_model_stats, indent=1),
                          Bucket=TEST_BUCKET_NAME,
                          Key=f'model_stats/test/model_stats_{date_str}.json')
    if add_test_stats:
        client.put_object(
            Body=json.dumps(previous_test_stats, indent=1),
            Bucket=TEST_BUCKET_NAME,
            Key=f'stats/test/test_stats_simple_tests_{date_str}.json')
    return client
Esempio n. 7
0
            'MAP2K1 → MAPK1',
            'stmts': [[
                '/evidence?stmt_hash=-34603994586320440&source='
                'model_statement&model=test&date=2020-01-01',
                'Active MAP2K1 activates MAPK1.', ''
            ]]
        }]
    }
}
query_not_appl = {'2413475507': 'Query is not applicable for this model'}
fail_response = {
    '521653329': 'No path found that satisfies the test statement'
}
# Create a new EmmaaModel and ModelManager for tests instead of depending
# on S3 version
test_model = create_model()
test_mm = ModelManager(test_model)
test_mm.date_str = '2020-01-01-00-00-00'
test_email = '*****@*****.**'


def test_format_results():
    date = datetime.now()
    results = [
        ('test', query_object, 'pysb', test_response, {'3801854542'}, date),
        ('test', query_object, 'signed_graph', fail_response, {}, date),
        ('test', query_object, 'unsigned_graph', test_response, {}, date)
    ]
    formatted_results = format_results(results)
    assert len(formatted_results) == 1
    qh = query_object.get_hash_with_model('test')
Esempio n. 8
0
def test_results_json():
    model = create_model()
    model.run_assembly()
    # Add statements with similar subject and object to test grouping
    map2k1 = model.assembled_stmts[1].subj
    mapk1 = model.assembled_stmts[1].obj
    phos = Phosphorylation(map2k1, mapk1)
    phos_t185 = Phosphorylation(map2k1, mapk1, 'T', '185')
    phos_y187 = Phosphorylation(map2k1, mapk1, 'Y', '187')
    inc = IncreaseAmount(map2k1, mapk1)
    inh = Inhibition(map2k1, mapk1)
    model.assembled_stmts += [phos, phos_t185, phos_y187, inc, inh]
    mm = ModelManager(model)
    tests = [StatementCheckingTest(
                Activation(Agent('BRAF', db_refs={'HGNC': '1097'}),
                           Agent('MAPK1', db_refs={'UP': 'P28482'}))),
             StatementCheckingTest(
                 Phosphorylation(
                     Agent('MEK', db_refs={'TEXT': 'MEK', 'FPLX': 'MEK'}),
                     Agent('ERK', db_refs={'TEXT': 'ERK', 'FPLX': 'ERK'})))]
    tm = TestManager([mm], tests)
    tm.make_tests(RefinementTestConnector())
    tm.run_tests()
    result_json, json_lines = mm.results_to_json()
    assert len(result_json) == 3
    # Looking at the first result
    assert len(result_json[1]) == 6, len(result_json[1])
    # The second edge will be supported differently in different model types
    assert result_json[1]['pysb']['path_json'][0]['path'] == \
        'BRAF → MAP2K1 → MAPK1'
    second_edge = result_json[1]['pysb']['path_json'][0]['edge_list'][1]
    # Only Activation statement will be in the edge in PySB
    assert len(second_edge['stmts']) == 1
    assert second_edge['stmts'][0][1] == 'Active MAP2K1 activates MAPK1.'
    assert second_edge['stmts'][0][0].count('stmt_hash') == 1
    # Positive (Activation and IncreaseAmount) in SignedGraph edge
    assert result_json[1]['signed_graph']['path_json'][0]['path'] == \
        'BRAF → MAP2K1 → MAPK1'
    second_edge = result_json[1]['signed_graph']['path_json'][0][
        'edge_list'][1]
    assert len(second_edge['stmts']) == 2
    assert second_edge['stmts'][0][1] == 'MAP2K1 activates MAPK1.'
    assert second_edge['stmts'][0][0].count('stmt_hash') == 1
    assert second_edge['stmts'][1][1] == ('MAP2K1 increases the amount of '
                                          'MAPK1.')
    assert second_edge['stmts'][1][0].count('stmt_hash') == 1
    # All statement types support unsigned graph edge, but different statements
    # of the same type are grouped together
    assert result_json[1]['unsigned_graph']['path_json'][0]['path'] == \
        'BRAF → MAP2K1 → MAPK1'
    second_edge = result_json[1]['unsigned_graph']['path_json'][0][
        'edge_list'][1]
    assert len(second_edge['stmts']) == 4
    sentence_counts = {pair[1]: pair[0].count('stmt_hash')
                       for pair in second_edge['stmts']}
    assert 'MAP2K1 activates MAPK1.' in sentence_counts
    assert sentence_counts['MAP2K1 activates MAPK1.'] == 1
    assert 'MAP2K1 phosphorylates MAPK1.' in sentence_counts
    assert sentence_counts['MAP2K1 phosphorylates MAPK1.'] == 3
    assert 'MAP2K1 inhibits MAPK1.' in sentence_counts
    assert sentence_counts['MAP2K1 inhibits MAPK1.'] == 1
    assert 'MAP2K1 increases the amount of MAPK1.' in sentence_counts
    assert sentence_counts['MAP2K1 increases the amount of MAPK1.'] == 1
    # Test JSONL representation
    assert len(json_lines) == 6, len(json_lines)
    for path_dict in json_lines:
        # First test
        if path_dict['test'] == 13165736649758742:
            assert len(path_dict['edges']) == 2
            assert path_dict['edges'][0]['type'] == 'statements'
            assert len(path_dict['edges'][0]['hashes']) == 1
            if path_dict['graph_type'] == 'pysb':
                assert path_dict['edges'][1]['type'] == 'statements'
                assert len(path_dict['edges'][1]['hashes']) == 1
            elif path_dict['graph_type'] == 'signed_graph':
                assert path_dict['edges'][1]['type'] == 'statements'
                assert len(path_dict['edges'][1]['hashes']) == 2
            elif path_dict['graph_type'] == 'unsigned_graph':
                assert path_dict['edges'][1]['type'] == 'statements'
                assert len(path_dict['edges'][1]['hashes']) == 6
        # Second test
        else:
            if path_dict['graph_type'] == 'pysb':
                assert len(path_dict['edges']) == 3
                assert path_dict['edges'][0]['type'] == 'RefEdge'
                assert 'hashes' not in path_dict['edges'][0]
                assert path_dict['edges'][1]['type'] == 'statements'
                assert len(path_dict['edges'][1]['hashes']) == 1
                assert path_dict['edges'][2]['type'] == 'RefEdge'
                assert 'hashes' not in path_dict['edges'][2]
            elif path_dict['graph_type'] == 'pybel':
                assert len(path_dict['edges']) == 3
                assert path_dict['edges'][0]['type'] == 'RefEdge'
                assert 'hashes' not in path_dict['edges'][0]
                assert path_dict['edges'][1]['type'] == 'statements'
                assert len(path_dict['edges'][1]['hashes']) == 1
                assert path_dict['edges'][2]['type'] == 'RefEdge'
                assert 'hashes' not in path_dict['edges'][2]
            elif path_dict['graph_type'] == 'unsigned_graph':
                assert len(path_dict['edges']) == 3
                assert path_dict['edges'][0]['type'] == 'RefEdge'
                assert 'hashes' not in path_dict['edges'][0]
                assert path_dict['edges'][1]['type'] == 'statements'
                assert len(path_dict['edges'][1]['hashes']) == 6
                assert path_dict['edges'][2]['type'] == 'RefEdge'
                assert 'hashes' not in path_dict['edges'][2]