Ejemplo n.º 1
0
def test_agent_text_storage():
    A1 = Agent('A', db_refs={'TEXT': 'A'})
    A2 = Agent('A', db_refs={'TEXT': 'alpha'})
    B1 = Agent('B', db_refs={'TEXT': 'bag'})
    B2 = Agent('B', db_refs={'TEXT': 'bug'})
    C = Agent('C')
    D = Agent('D')
    inp = [
        Complex([A1, B1], evidence=Evidence(text='A complex bag.')),
        Complex([B2, A2], evidence=Evidence(text='bug complex alpha once.')),
        Complex([B2, A2], evidence=Evidence(text='bug complex alpha again.')),
        Complex([A1, C, B2], evidence=Evidence(text='A complex C bug.')),
        Phosphorylation(A1, B1, evidence=Evidence(text='A phospo bags.')),
        Phosphorylation(A2, B2, evidence=Evidence(text='alpha phospho bugs.')),
        Conversion(D, [A1, B1], [C, D],
                   evidence=Evidence(text='D: A bag -> C D')),
        Conversion(D, [B1, A2], [C, D],
                   evidence=Evidence(text='D: bag a -> C D')),
        Conversion(D, [B2, A2], [D, C],
                   evidence=Evidence(text='D: bug a -> D C')),
        Conversion(D, [B1, A1], [C, D],
                   evidence=Evidence(text='D: bag A -> C D')),
        Conversion(D, [A1], [A1, C], evidence=Evidence(text='D: A -> A C'))
    ]
    pa = Preassembler(hierarchies, inp)
    unq1 = pa.combine_duplicates()
    assert len(unq1) == 5, len(unq1)
    assert all([len(ev.annotations['prior_uuids']) == 1
                for s in unq1 for ev in s.evidence
                if len(s.evidence) > 1]),\
        'There can only be one prior evidence per uuid at this stage.'
    ev_uuid_dict = {
        ev.annotations['prior_uuids'][0]: ev.annotations['agents']
        for s in unq1 for ev in s.evidence
    }
    for s in inp:
        raw_text = [
            ag.db_refs.get('TEXT') for ag in s.agent_list(deep_sorted=True)
        ]
        assert raw_text == ev_uuid_dict[s.uuid]['raw_text'],\
            str(raw_text) + '!=' + str(ev_uuid_dict[s.uuid]['raw_text'])

    # Now run pa on the above corpus plus another statement.
    inp2 = unq1 + [
        Complex([A1, C, B1], evidence=Evidence(text='A complex C bag.'))
    ]
    pa2 = Preassembler(hierarchies, inp2)
    unq2 = pa2.combine_duplicates()
    assert len(unq2) == 5, len(unq2)
    old_ev_list = []
    new_ev = None
    for s in unq2:
        for ev in s.evidence:
            if ev.text == inp2[-1].evidence[0].text:
                new_ev = ev
            else:
                old_ev_list.append(ev)
    assert all([len(ev.annotations['prior_uuids']) == 2 for ev in old_ev_list])
    assert new_ev
    assert len(new_ev.annotations['prior_uuids']) == 1
Ejemplo n.º 2
0
def test_conversion_refinement():
    ras = Agent('RAS', db_refs={'FPLX': 'RAS'})
    hras = Agent('HRAS', db_refs={'HGNC': '5173'})
    gtp = Agent('GTP')
    gdp = Agent('GDP')
    st1 = Conversion(ras, gtp, gdp)
    st2 = Conversion(hras, gtp, gdp)
    st3 = Conversion(hras, [gtp, gdp], gdp)
    st4 = Conversion(hras, [gdp, gtp], gdp)
    pa = Preassembler(hierarchies, stmts=[st1, st2, st3, st4])
    toplevel_stmts = pa.combine_related()
    assert len(toplevel_stmts) == 2
Ejemplo n.º 3
0
def test_dump_build():
    """Test the dump pipeline.

    Method
    ------
    CREATE CONTEXT:
    - Create a local principal database with a small amount of content.
      Aim for representation of stmt motifs and sources.
    - Create a local readonly database.
    - Create a fake bucket (moto)

    RUN THE DUMP

    CHECK THE RESULTS
    """
    assert config.is_db_testing()

    # Create the dump locale.
    s3 = boto3.client('s3')
    dump_head = config.get_s3_dump()
    s3.create_bucket(Bucket=dump_head.bucket)
    assert dump_head.bucket == S3_DATA_LOC['bucket']

    # Create the principal database.
    db = get_temp_db(clear=True)

    db.copy('text_ref', [        # trid
        ('1', 1, 'PMC1', 1),     # 1
        ('2', 2, 'PMC2', 2),     # 2
        ('3', 3, None, None),    # 3
        (None, None, 'PMC4', 4)  # 4
    ], ('pmid', 'pmid_num', 'pmcid', 'pmcid_num'))

    db.copy('mesh_ref_annotations', [
        (1, 11, False),
        (1, 13, False),
        (1, 12, True),
        (2, 12, True),
        (3, 13, False),
        (3, 33, True)
    ], ('pmid_num', 'mesh_num', 'is_concept'))

    db.copy('text_content', [              # tcid
        (1, 'pubmed', 'txt', 'abstract'),  # 1
        (1, 'pmc', 'xml', 'fulltext'),     # 2
        (2, 'pubmed', 'txt', 'title'),     # 3
        (3, 'pubmed', 'txt', 'abstract'),  # 4
        (3, 'pmc', 'xml', 'fulltext'),     # 5
        (4, 'pmc', 'xml', 'fulltext')      # 6
    ], ('text_ref_id', 'source', 'format', 'text_type'))

    db.copy('reading', [(tcid, rdr, 1, reader_versions[rdr][-1], 'emtpy')
                        for tcid, rdr in [
        # 1             2             3
        (1, 'reach'), (1, 'eidos'), (1, 'isi'),

        # 4
        (2, 'reach'),

        # 5             6            7
        (3, 'reach'), (3, 'eidos'), (3, 'trips'),

        # 8
        (4, 'reach'),

        # 9
        (5, 'reach'),

        # 10
        (6, 'reach')
    ]], ('text_content_id', 'reader', 'batch_id', 'reader_version', 'format'))

    db.copy('db_info', [
        ('signor', 'signor', 'Signor'),       # 1
        ('pc', 'biopax', 'Pathway Commons'),  # 2
        ('medscan', 'medscan', 'MedScan')     # 3
    ], ('db_name', 'source_api', 'db_full_name'))

    raw_stmts = {
        'reading': {
            2: [
                Inhibition(
                    Agent('Fever', db_refs={'TEXT': 'fever', 'MESH': 'D005334'}),
                    Agent('Cough', db_refs={'TEXT': 'cough', 'MESH': 'D003371'}),
                    evidence=Evidence(text="We found fever inhibits cough.")
                )
            ],
            4: [
                Phosphorylation(
                    Agent('MEK', db_refs={'FPLX': 'MEK', 'TEXT': 'mek'}),
                    Agent('ERK', db_refs={'FPLX': 'MEK', 'TEXT': 'erk'}),
                    evidence=Evidence(text="mek phosphorylates erk, so say I.")
                ),
                Activation(
                    Agent('MAP2K1', db_refs={'HGNC': '6840', 'TEXT': 'MEK1'}),
                    Agent('MAPK1', db_refs={'HGNC': '6871', 'TEXT': 'ERK1'}),
                    evidence=Evidence(text="MEK1 activates ERK1, or os I'm told.")
                ),
                Activation(
                    Agent('ERK', db_refs={'FPLX': 'ERK', 'TEXT': 'ERK'}),
                    Agent('JNK', db_refs={'FPLX': 'JNK', 'TEXT': 'JNK'}),
                    evidence=Evidence(text="ERK activates JNK, maybe.")
                ),
                Complex([
                    Agent('MEK', db_refs={'FPLX': 'MEK', 'TEXT': 'MAP2K'}),
                    Agent('ERK', db_refs={'FPLX': 'ERK', 'TEXT': 'MAPK'}),
                    Agent('RAF', db_refs={'FPLX': 'RAF', 'TEXT': 'RAF'})
                ], evidence=Evidence(text="MAP2K, MAPK, and RAF form a complex."))
            ],
            7: [
                Activation(
                    Agent('ERK', db_refs={'FPLX': 'ERK', 'TEXT': 'ERK'}),
                    Agent('JNK', db_refs={'FPLX': 'JNK', 'TEXT': 'JNK'}),
                    evidence=Evidence(text='ERK activates JNK, maybe.')
                )
            ],
            8: [
                Complex([
                    Agent('MEK', db_refs={'FPLX': 'MEK', 'TEXT': 'mek'}),
                    Agent('ERK', db_refs={'FPLX': 'ERK', 'TEXT': 'erk'})
                ], evidence=Evidence(text="...in the mek-erk complex."))
            ],
        },
        'databases': {
            2: [
                Conversion(
                    Agent('FRK', db_refs={'HGNC': '3955'}),
                    [Agent('ATP', db_refs={'MESH': 'D000255'})],
                    [Agent('hydron', db_refs={'CHEBI': 'CHEBI:15378'})]
                )
            ],
            3: [
                Phosphorylation(
                    Agent('MEK', db_refs={'FPLX': 'MEK', 'TEXT': 'MEK'}),
                    Agent('ERK', db_refs={'FPLX': 'ERK', 'TEXT': 'ERK'}),
                    evidence=Evidence(text="...MEK phosphorylates ERK medscan.")
                )
            ]
        }
    }
    simple_insert_stmts(db, raw_stmts)

    # Run preassembly.
    prass.create_corpus(db)

    # Do the dump proceedure.
    ro = get_temp_ro(clear=True)
    dump(db, ro)

    # Check that the s3 dump exists.
    all_dumps = dm.list_dumps()
    assert len(all_dumps) == 1

    # Check to make sure all the dump files are present.
    dump_path = all_dumps[0]
    file_list = dump_path.list_objects(s3)
    assert dm.Start.from_list(file_list)
    assert dm.Readonly.from_list(file_list)
    assert dm.Belief.from_list(file_list)
    assert dm.Sif.from_list(file_list)
    assert dm.StatementHashMeshId.from_list(file_list)
    assert dm.FullPaStmts.from_list(file_list)
    assert dm.End.from_list(file_list)

    # Check what tables are active in the readonly database.
    active_tables = ro.get_active_tables()
    for tbl in ro.get_tables():
        if ro.tables[tbl]._temp:
            # If it was temp, it should be gone.
            assert tbl not in active_tables
        else:
            # Otherwise, it should be there.
            assert tbl in active_tables

    # Check that the principal db has no more ro schema.
    assert 'readonly' not in db.get_schemas()

    # Check contents of the readonly database.
    assert len(ro.select_all(ro.FastRawPaLink)) \
           == len(db.select_all(db.RawUniqueLinks))

    # Check that a query basically works.
    from indra_db.client.readonly import HasAgent
    res = HasAgent('MEK').get_statements(ro)
    assert len(res.statements()) == 2, len(res.statements())

    # Check that belief is represented in the table.
    bdict = {h: b for h, b in ro.select_all([ro.SourceMeta.mk_hash,
                                             ro.SourceMeta.belief])}
    assert all(1 >= b > 0 for b in bdict.values())

    # Check to make sure lambda was diverted correctly.
    call_records = config.get_test_call_records()
    assert len(call_records) == 2
    assert all(rec.func_name == '_set_lambda_env' for rec in call_records)
    assert all(isinstance(rec.args[1], dict) for rec in call_records)
    assert 'INDRAROOVERRIDE' in call_records[0].args[1]
    assert call_records[0].args[1]['INDRAROOVERRIDE'] == str(db.url)
    assert not call_records[1].args[1]