def test_agent_text_storage():
    A1 = Agent('A', db_refs={'TEXT': 'A'})
    A2 = Agent('A', db_refs={'TEXT': 'alpha'})
    B1 = Agent('B', db_refs={'TEXT': 'bag'})
    B2 = Agent('B', db_refs={'TEXT': 'bug'})
    C = Agent('C')
    D = Agent('D')
    inp = [
        Complex([A1, B1], evidence=Evidence(text='A complex bag.')),
        Complex([B2, A2], evidence=Evidence(text='bug complex alpha once.')),
        Complex([B2, A2], evidence=Evidence(text='bug complex alpha again.')),
        Complex([A1, C, B2], evidence=Evidence(text='A complex C bug.')),
        Phosphorylation(A1, B1, evidence=Evidence(text='A phospo bags.')),
        Phosphorylation(A2, B2, evidence=Evidence(text='alpha phospho bugs.')),
        Conversion(D, [A1, B1], [C, D],
                   evidence=Evidence(text='D: A bag -> C D')),
        Conversion(D, [B1, A2], [C, D],
                   evidence=Evidence(text='D: bag a -> C D')),
        Conversion(D, [B2, A2], [D, C],
                   evidence=Evidence(text='D: bug a -> D C')),
        Conversion(D, [B1, A1], [C, D],
                   evidence=Evidence(text='D: bag A -> C D')),
        Conversion(D, [A1], [A1, C], evidence=Evidence(text='D: A -> A C'))
    ]
    pa = Preassembler(hierarchies, inp)
    unq1 = pa.combine_duplicates()
    assert len(unq1) == 5, len(unq1)
    assert all([len(ev.annotations['prior_uuids']) == 1
                for s in unq1 for ev in s.evidence
                if len(s.evidence) > 1]),\
        'There can only be one prior evidence per uuid at this stage.'
    ev_uuid_dict = {
        ev.annotations['prior_uuids'][0]: ev.annotations['agents']
        for s in unq1 for ev in s.evidence
    }
    for s in inp:
        raw_text = [
            ag.db_refs.get('TEXT') for ag in s.agent_list(deep_sorted=True)
        ]
        assert raw_text == ev_uuid_dict[s.uuid]['raw_text'],\
            str(raw_text) + '!=' + str(ev_uuid_dict[s.uuid]['raw_text'])

    # Now run pa on the above corpus plus another statement.
    inp2 = unq1 + [
        Complex([A1, C, B1], evidence=Evidence(text='A complex C bag.'))
    ]
    pa2 = Preassembler(hierarchies, inp2)
    unq2 = pa2.combine_duplicates()
    assert len(unq2) == 5, len(unq2)
    old_ev_list = []
    new_ev = None
    for s in unq2:
        for ev in s.evidence:
            if ev.text == inp2[-1].evidence[0].text:
                new_ev = ev
            else:
                old_ev_list.append(ev)
    assert all([len(ev.annotations['prior_uuids']) == 2 for ev in old_ev_list])
    assert new_ev
    assert len(new_ev.annotations['prior_uuids']) == 1
def test_complexes_from_hierarchy():
    exp = ef.Expander(hierarchies)
    complexes = exp.complexes_from_hierarchy()
    keys = [c.matches_key() for c in complexes]
    probe_stmt = Complex([Agent('AMPK_alpha', db_refs={'FPLX':'AMPK_alpha'}),
                          Agent('AMPK_beta', db_refs={'FPLX':'AMPK_beta'}),
                          Agent('AMPK_gamma', db_refs={'FPLX':'AMPK_gamma'})])
    assert probe_stmt.matches_key() in keys
def test_complex_refinement():
    ras = Agent('RAS')
    raf = Agent('RAF')
    mek = Agent('MEK')
    st1 = Complex([ras, raf])
    st2 = Complex([mek, ras, raf])
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    pa.combine_related()
    assert len(pa.unique_stmts) == 2
    assert len(pa.related_stmts) == 2
def test_complex_refinement_order():
    st1 = Complex([Agent('MED23'), Agent('ELK1')])
    st2 = Complex([
        Agent('ELK1', mods=[ModCondition('phosphorylation')]),
        Agent('MED23')
    ])
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    pa.combine_duplicates()
    pa.combine_related()
    assert len(pa.related_stmts) == 1
def test_homodimer_refinement():
    egfr = Agent('EGFR')
    erbb = Agent('ERBB2')
    st1 = Complex([erbb, erbb])
    st2 = Complex([erbb, egfr])
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    pa.combine_duplicates()
    assert len(pa.unique_stmts) == 2
    pa.combine_related()
    assert len(pa.related_stmts) == 2
def test_grounding_aggregation_complex():
    mek = Agent('MEK')
    braf1 = Agent('BRAF', db_refs={'TEXT': 'braf', 'HGNC': '1097'})
    braf2 = Agent('BRAF', db_refs={'TEXT': 'BRAF', 'dummy': 'dummy'})
    braf3 = Agent('BRAF', db_refs={'TEXT': 'Braf', 'UP': 'P15056'})
    st1 = Complex([mek, braf1])
    st2 = Complex([braf2, mek])
    st3 = Complex([mek, braf3])
    pa = Preassembler(hierarchies, stmts=[st1, st2, st3])
    unique_stmts = pa.combine_duplicates()
    assert len(unique_stmts) == 3
def test_complex_agent_refinement():
    ras = Agent('RAS')
    raf1 = Agent('RAF',
                 mods=[ModCondition('ubiquitination', None, None, True)])
    raf2 = Agent('RAF',
                 mods=[ModCondition('ubiquitination', None, None, False)])
    st1 = Complex([ras, raf1])
    st2 = Complex([ras, raf2])
    pa = Preassembler(hierarchies, stmts=[st1, st2])
    pa.combine_related()
    assert len(pa.unique_stmts) == 2
    assert len(pa.related_stmts) == 2
def test_preassemble_related_complex():
    ras = Agent('RAS', db_refs={'FPLX': 'RAS'})
    kras = Agent('KRAS', db_refs={'HGNC': '6407'})
    hras = Agent('HRAS', db_refs={'HGNC': '5173'})
    st1 = Complex([kras, hras])
    st2 = Complex([kras, ras])
    st3 = Complex([hras, kras])
    st4 = Complex([ras, kras])
    pa = Preassembler(hierarchies, [st1, st2, st3, st4])
    uniq = pa.combine_duplicates()
    assert len(uniq) == 2
    top = pa.combine_related()
    assert len(top) == 1
Exemple #9
0
    def process_binding_statements(self):
        """Looks for Binding events in the graph and extracts them into INDRA
        statements.

        In particular, looks for a Binding event node with outgoing edges
        with relations Theme and Theme2 - the entities these edges point to
        are the two constituents of the Complex INDRA statement.
        """
        G = self.G
        statements = []

        binding_nodes = self.find_event_with_outgoing_edges(
            'Binding', ['Theme', 'Theme2'])

        for node in binding_nodes:
            theme1 = self.get_entity_text_for_relation(node, 'Theme')
            theme1_node = self.get_related_node(node, 'Theme')
            theme2 = self.get_entity_text_for_relation(node, 'Theme2')

            assert (theme1 is not None)
            assert (theme2 is not None)

            evidence = self.node_to_evidence(theme1_node, is_direct=True)
            statements.append(
                Complex([s2a(theme1), s2a(theme2)], evidence=evidence))

        return statements
Exemple #10
0
def geneways_action_to_indra_statement_type(actiontype, plo):
    """Return INDRA Statement corresponding to Geneways action type.

    Parameters
    ----------
    actiontype : str
        The verb extracted by the Geneways processor
    plo : str
        A one character string designating whether Geneways classifies
        this verb as a physical, logical, or other interaction

    Returns
    -------
    statement_generator :
        If there is no mapping to INDRA statements from this action type
        the return value is None.
        If there is such a mapping, statement_generator is an anonymous
        function that takes in the subject agent, object agent, and evidence,
        in that order, and returns an INDRA statement object.
    """
    actiontype = actiontype.lower()

    statement_generator = None
    is_direct = (plo == 'P')

    if actiontype == 'bind':
        statement_generator = lambda substance1, substance2, evidence: \
                Complex([substance1, substance2], evidence=evidence)
        is_direct = True
    elif actiontype == 'phosphorylate':
        statement_generator = lambda substance1, substance2, evidence: \
                Phosphorylation(substance1, substance2, evidence=evidence)
        is_direct = True

    return (statement_generator, is_direct)
def test_map_agent():
    erk = Agent('ERK1', db_refs={'TEXT': 'ERK1'})
    p_erk = Agent('P-ERK', db_refs={'TEXT': 'p-ERK'})
    stmt = Complex([erk, p_erk])
    mapped_stmts = gm.map_stmts([stmt])
    mapped_ag = mapped_stmts[0].members[1]
    assert mapped_ag.name == 'ERK'
    assert mapped_ag.db_refs.get('FPLX') == 'ERK'
Exemple #12
0
def _get_db_no_pa_stmts():
    db = get_temp_db(clear=True)

    db_builder = DbBuilder(db)
    db_builder.add_text_refs([('12345', 'PMC54321'), ('24680', 'PMC08642'),
                              ('97531', )])
    db_builder.add_text_content([['pubmed-ttl', 'pubmed-abs', 'pmc_oa'],
                                 ['pubmed-abs', 'manuscripts'],
                                 ['pubmed-ttl', 'pubmed-abs']])
    db_builder.add_readings([['REACH', 'TRIPS'], ['REACH', 'SPARSER'],
                             ['REACH', 'ISI'],
                             ['SPARSER'], ['REACH', 'SPARSER'],
                             ['SPARSER', 'TRIPS', 'REACH'], ['REACH',
                                                             'EIDOS']])
    db_builder.add_raw_reading_statements([
        [Phosphorylation(mek, erk)],  # reach pubmed title
        [Phosphorylation(mek, erk, 'T', '124')],  # trips pubmed title
        [
            Phosphorylation(mek, erk),
            Inhibition(erk, ras), (Phosphorylation(mek, erk), 'in the body')
        ],  # reach pubmed-abs
        [
            Complex([mek, erk]),
            Complex([erk, ras]), (Phosphorylation(None, erk), 'In the body')
        ],  # sparser pubmed-abs
        [],  # reach pmc_oa
        [],  # ISI pmc_oa
        [Phosphorylation(map2k1, mapk1)],  # sparser pubmed-abs
        [],  # reach manuscripts
        [],  # sparser manuscripts
        [Inhibition(simvastatin_ng, raf),
         Activation(map2k1_mg, erk)],  # sparser pubmed title
        [],  # TRIPS pubmed title
        [],  # reach pubmed title
        [],  # reach pubmed abs
        [],  # eidos pubmed abs
    ])
    db_builder.add_databases(['biopax', 'tas', 'bel'])
    db_builder.add_raw_database_statements([[
        Activation(mek, raf),
        Inhibition(erk, ras),
        Phosphorylation(mek, erk)
    ], [Inhibition(simvastatin, raf)], [Phosphorylation(mek, erk, 'T',
                                                        '124')]])
    return db
Exemple #13
0
def test_map_agent():
    erk = Agent('ERK1', db_refs={'TEXT': 'ERK1'})
    p_erk = Agent('P-ERK', db_refs={'TEXT': 'p-ERK'})
    stmt = Complex([erk, p_erk])
    gm = GroundingMapper(default_grounding_map, default_agent_map)
    mapped_stmts = gm.map_agents([stmt])
    mapped_ag = mapped_stmts[0].members[1]
    assert mapped_ag.name == 'ERK'
    assert mapped_ag.db_refs.get('BE') == 'ERK'
Exemple #14
0
def process_row(row, up_web_fallback=False):
    """Process one row of the DataFrame into an INDRA Statement."""
    host_agent = get_agent_from_grounding(row['host_grounding'],
                                          up_web_fallback=up_web_fallback)
    vir_agent = get_agent_from_grounding(row['vir_grounding'],
                                         up_web_fallback=up_web_fallback)

    # There's a column that is always a - character
    assert row['dash'] == '-', row['dash']

    exp_method_id, exp_method_name = parse_psi_mi(row['exp_method'])
    int_type__id, int_type_name = parse_psi_mi(row['int_type'])

    assert row['host_tax'].startswith('taxid:'), row['host_tax']
    _, host_tax = row['host_tax'].split(':')
    assert row['vir_tax'].startswith('taxid:'), row['vir_tax']
    _, vir_tax = row['vir_tax'].split(':')
    assert row['score'].startswith('virhostnet-miscore:'), row['score']
    _, score = row['score'].split(':')
    score = float(score)

    source_ids = parse_source_ids(row['source_id'])

    annotations = {
        'exp_method': {
            'id': exp_method_id,
            'name': exp_method_name
        },
        'int_type': {
            'id': int_type__id,
            'name': int_type_name
        },
        'host_tax': host_tax,
        'vir_tax': vir_tax,
        'score': score,
        **source_ids,
    }

    text_refs = parse_text_refs(row['publication'])

    ev = Evidence(source_api='virhostnet',
                  annotations=annotations,
                  text_refs=text_refs,
                  pmid=text_refs.get('PMID'),
                  source_id=source_ids.get('virhostnet-rid'))

    stmt = Complex([host_agent, vir_agent], evidence=[ev])
    return stmt
Exemple #15
0
def get_statements(gene_list):
    res_dict = _send_request(gene_list, include_interactors=True)
    statements = []
    for int_id, interaction in res_dict.items():
        agent_a_name = interaction['OFFICIAL_SYMBOL_A']
        agent_b_name = interaction['OFFICIAL_SYMBOL_B']
        agent_a = Agent(agent_a_name, db_refs={'HGNC': agent_a_name})
        agent_b = Agent(agent_b_name, db_refs={'HGNC': agent_b_name})
        ev = Evidence(source_api='biogrid',
                      source_id=int_id,
                      pmid=interaction['PUBMED_ID'],
                      text=None,
                      annotations=interaction)
        stmt = Complex([agent_a, agent_b], evidence=ev)
        statements.append(stmt)
    return statements
Exemple #16
0
    def __init__(self, biogrid_file=None, physical_only=True):
        self.statements = []
        self.physical_only = physical_only

        # If a path to the file is included, process it, skipping the header
        if biogrid_file:
            rows = read_unicode_csv(biogrid_file, '\t', skiprows=1)
        # If no file is provided, download from web
        else:
            logger.info('No data file specified, downloading from BioGrid '
                        'at %s' % biogrid_file_url)
            rows = _download_biogrid_data(biogrid_file_url)

        # Process the rows into Statements
        for row in tqdm.tqdm(rows, desc='Processing BioGRID rows'):
            # There are some extra columns that we don't need to take and
            # thereby save space in annotations
            filt_row = [None if item == '-' else item
                        for item in row][:len(columns)]
            bg_row = _BiogridRow(*filt_row)
            # Filter out non-physical interactions if desired
            if self.physical_only and bg_row.exp_system_type != 'physical':
                continue
            # Ground agents
            agent_a = self._make_agent(bg_row.symbol_a, bg_row.entrez_a,
                                       bg_row.swissprot_a, bg_row.trembl_a)
            agent_b = self._make_agent(bg_row.symbol_b, bg_row.entrez_b,
                                       bg_row.swissprot_b, bg_row.trembl_b)
            # Skip any agents with neither HGNC grounding or string name
            if agent_a is None or agent_b is None:
                continue
            # Get evidence
            pmid_match = re.match(r'PUBMED:(\d+)', bg_row.publication)
            doi_match = re.match(r'DOI:(.*)', bg_row.publication)
            text_refs = {}
            if pmid_match:
                text_refs['PMID'] = pmid_match.groups()[0]
            elif doi_match:
                text_refs['DOI'] = doi_match.groups()[0]
            ev = Evidence(source_api='biogrid',
                          source_id=bg_row.biogrid_int_id,
                          pmid=text_refs.get('PMID'),
                          text_refs=text_refs,
                          annotations=dict(bg_row._asdict()))
            # Make statement
            s = Complex([agent_a, agent_b], evidence=ev)
            self.statements.append(s)
Exemple #17
0
 def complexes_from_hierarchy(self):
     # Iterate over the partof_closure to determine all of the complexes
     # and all of their members
     all_complexes = {}
     for subunit, complex in self.entities.partof_closure:
         complex_subunits = all_complexes.get(complex, [])
         complex_subunits.append(subunit)
         all_complexes[complex] = complex_subunits
     # Now iterate over all of the complexes and create Complex statements
     complex_stmts = []
     for complex, subunits in all_complexes.items():
         # Create an Evidence object for the statement with the URI of the
         # complex as the source_id
         ev = Evidence(source_api='famplex', source_id=complex)
         subunit_agents = [_agent_from_uri(su) for su in subunits]
         complex_stmt = Complex(subunit_agents, evidence=[ev])
         complex_stmts.append(complex_stmt)
     return complex_stmts
Exemple #18
0
def get_statements(gene_list):
    res_dict = _send_request(gene_list, include_interactors=True)
    statements = []
    if res_dict is None:
        return statements

    def get_db_refs(egid):
        hgnc_id = hgnc_client.get_hgnc_from_entrez(egid)
        if not hgnc_id:
            logger.info("No HGNC ID for Entrez ID: %s" % egid)
            return (None, {})
        hgnc_name = hgnc_client.get_hgnc_name(hgnc_id)
        if not hgnc_name:
            logger.info("No HGNC name for HGNC ID: %s" % hgnc_id)
            return (None, {})
        up_id = hgnc_client.get_uniprot_id(hgnc_id)
        if not up_id:
            logger.info("No Uniprot ID for EGID / HGNC ID / Symbol "
                        "%s / %s / %s" % (egid, hgnc_id, hgnc_name))
            return (None, {})
        return (hgnc_name, {'HGNC': hgnc_id, 'UP': up_id})

    for int_id, interaction in res_dict.items():
        agent_a_egid = interaction['ENTREZ_GENE_A']
        agent_b_egid = interaction['ENTREZ_GENE_B']
        agent_a_name, agent_a_db_refs = get_db_refs(agent_a_egid)
        agent_b_name, agent_b_db_refs = get_db_refs(agent_b_egid)
        if agent_a_name is None or agent_b_name is None:
            continue
        if interaction['EXPERIMENTAL_SYSTEM_TYPE'] != 'physical':
            logger.info("Skipping non-physical interaction: %s" %
                        str(interaction))
            continue
        agent_a = Agent(agent_a_name, db_refs=agent_a_db_refs)
        agent_b = Agent(agent_b_name, db_refs=agent_b_db_refs)
        ev = Evidence(source_api='biogrid',
                      source_id=int_id,
                      pmid=interaction['PUBMED_ID'],
                      text=None,
                      annotations=interaction)
        stmt = Complex([agent_a, agent_b], evidence=ev)
        statements.append(stmt)
    return statements
    def complexes_from_hierarchy(self):
        # Iterate over the partof_closure to determine all of the complexes
        # and all of their members
        fplx_nodes = [('FPLX', entry) for entry in get_famplex_entities()]
        all_complexes = {}
        for fplx_node in fplx_nodes:
            parts = self.ontology.ancestors_rel(*fplx_node, {'partof'})
            if not parts:
                continue
            complex_subunits = all_complexes.get(fplx_node, [])
            for part in parts:
                complex_subunits.append(part)
            all_complexes[fplx_node] = complex_subunits

        # Now iterate over all of the complexes and create Complex statements
        complex_stmts = []
        for complex, subunits in all_complexes.items():
            # Create an Evidence object for the statement with the URI of the
            # complex as the source_id
            ev = Evidence(source_api='famplex', source_id=complex)
            subunit_agents = [_agent_from_ns_id(*su) for su in subunits]
            complex_stmt = Complex(subunit_agents, evidence=[ev])
            complex_stmts.append(complex_stmt)
        return complex_stmts
def _construct_database():
    db = get_temp_db(clear=True)
    db_builder = DbBuilder(db)
    db_builder.add_text_refs([('12345', 'PMC54321'), ('24680', 'PMC08642')])
    db_builder.add_text_content([['pubmed-abs', 'pmc_oa'], ['pubmed-abs']])
    db_builder.add_readings([['REACH'], ['REACH'], ['REACH', 'SPARSER']])

    mek = Agent('MEK', db_refs={'FPLX': 'MEK'})
    erk = Agent('ERK', db_refs={'FPLX': 'ERK'})
    raf = Agent('RAF', db_refs={'FPLX': 'RAF'})

    db_builder.add_raw_reading_statements(
        [[Phosphorylation(mek, erk),
          Complex([mek, erk])], [Phosphorylation(mek, erk)],
         [Activation(mek, erk)], [Complex([mek, erk]),
                                  Complex([raf, erk])]])

    db_builder.add_databases(['signor'])
    db_builder.add_raw_database_statements([[Complex([raf, erk])]])
    db_builder.add_pa_statements([(Phosphorylation(mek, erk), [0, 2]),
                                  (Complex([mek, erk]), [1, 4]),
                                  (Activation(mek, erk), [3]),
                                  (Complex([raf, erk]), [5, 6])])
    return db
Exemple #21
0
def _get_db_with_pa_stmts():
    db = get_temp_db(clear=True)

    db_builder = DbBuilder(db)
    db_builder.add_text_refs([('12345', 'PMC54321'), ('24680', 'PMC08642'),
                              ('97531', ), ('87687', )])
    db_builder.add_text_content([['pubmed-ttl', 'pubmed-abs', 'pmc_oa'],
                                 ['pubmed-ttl', 'pubmed-abs', 'manuscripts'],
                                 ['pubmed-ttl', 'pubmed-abs', 'pmc_oa'],
                                 ['pubmed-ttl', 'pubmed-abs']])
    db_builder.add_readings([
        # Ref 1
        ['REACH', 'TRIPS'],  # pubmed ttl
        ['REACH', 'SPARSER'],  # pubmed abs
        ['REACH', 'ISI'],  # pmc_oa
        # Ref 2
        ['REACH', 'TRIPS'],  # pubmed ttl (new)
        ['SPARSER'],  # pubmed abs
        ['REACH', 'SPARSER'],  # manuscripts
        # Ref 3
        ['SPARSER', 'TRIPS', 'REACH'],  # pubmed ttl
        ['REACH', 'EIDOS', 'SPARSER'],  # pubmed abs
        ['SPARSER'],  # pmc oa (new)
        # Ref 4
        ['TRIPS', 'REACH', 'SPARSER'],  # pubmed ttl (new)
        ['REACH', 'SPARSER'],  # pubmed abs (new)
    ])

    db_builder.add_raw_reading_statements([
        # Ref 1
        # pubmed ttl
        [Phosphorylation(mek, erk)],  # reach
        [Phosphorylation(mek, erk, 'T', '124')],  # trips
        # pubmed abs
        [
            Phosphorylation(mek, erk),
            Inhibition(erk, ras), (Phosphorylation(mek, erk), 'in the body')
        ],  # reach
        [
            Complex([mek, erk]),
            Complex([erk, ras]), (Phosphorylation(None, erk), 'In the body')
        ],  # sparser
        # pmc OA
        [],  # reach
        [],  # ISI

        # Ref 2
        # pubmed ttl
        [Phosphorylation(map2k1, mapk1)],  # reach (new)
        [Phosphorylation(map2k1, mapk1, 'T', '124')],  # trips (new)
        # pubmed abs
        [Phosphorylation(map2k1, mapk1)],  # sparser
        # manuscript
        [],  # reach
        [],  # sparser

        # Ref 3
        # pubmed ttl
        [],  # sparser
        [Inhibition(simvastatin, raf)],  # TRIPS
        [],  # reach
        # pubmed abs
        [],  # reach
        [],  # eidos
        [Activation(map2k1_mg, erk),
         Inhibition(simvastatin_ng, raf)],  # sparser (new)
        # pmc oa
        [
            Inhibition(simvastatin_ng, raf),
            Inhibition(erk, ras),
            Activation(ras, raf)
        ],  # sparser (new)

        # Ref 4
        # pubmed ttl
        [],  # trips (new)
        [],  # reach (new)
        [],  # sparser (new)
        # pubmed abstract
        [
            Activation(kras, braf),
            Complex([map2k1, mapk1]),
            Complex([kras, braf])
        ],  # reach (new)
        [
            Complex([kras, braf]),
            Complex([mek, erk]),
            IncreaseAmount(kras, braf)
        ],  # sparser (new)
    ])
    db_builder.add_databases(['biopax', 'tas', 'bel'])
    db_builder.add_raw_database_statements([[
        Activation(mek, raf),
        Inhibition(erk, ras),
        Phosphorylation(mek, erk),
        Activation(ras, raf)
    ], [Inhibition(simvastatin, raf)], [Phosphorylation(mek, erk, 'T',
                                                        '124')]])
    db_builder.add_pa_statements([(Phosphorylation(mek, erk), [0, 2, 4,
                                                               25], [1, 8]),
                                  (Phosphorylation(mek, erk, 'T',
                                                   '124'), [1, 28]),
                                  (Phosphorylation(None, erk), [7], [0, 1, 8]),
                                  (Activation(mek, raf), [23]),
                                  (Inhibition(simvastatin, raf), [11, 27]),
                                  (Complex([mek, erk]), [5]),
                                  (Complex([erk, ras]), [6]),
                                  (Inhibition(erk, ras), [3, 24]),
                                  (Phosphorylation(map2k1, mapk1), [10])])

    # Add the preassembly update.
    pu = db.PreassemblyUpdates(corpus_init=True)
    db.session.add(pu)
    db.session.commit()

    return db
Exemple #22
0
def test_dump_build():
    """Test the dump pipeline.

    Method
    ------
    CREATE CONTEXT:
    - Create a local principal database with a small amount of content.
      Aim for representation of stmt motifs and sources.
    - Create a local readonly database.
    - Create a fake bucket (moto)

    RUN THE DUMP

    CHECK THE RESULTS
    """
    assert config.is_db_testing()

    # Create the dump locale.
    s3 = boto3.client('s3')
    dump_head = config.get_s3_dump()
    s3.create_bucket(Bucket=dump_head.bucket)
    assert dump_head.bucket == S3_DATA_LOC['bucket']

    # Create the principal database.
    db = get_temp_db(clear=True)

    db.copy('text_ref', [        # trid
        ('1', 1, 'PMC1', 1),     # 1
        ('2', 2, 'PMC2', 2),     # 2
        ('3', 3, None, None),    # 3
        (None, None, 'PMC4', 4)  # 4
    ], ('pmid', 'pmid_num', 'pmcid', 'pmcid_num'))

    db.copy('mesh_ref_annotations', [
        (1, 11, False),
        (1, 13, False),
        (1, 12, True),
        (2, 12, True),
        (3, 13, False),
        (3, 33, True)
    ], ('pmid_num', 'mesh_num', 'is_concept'))

    db.copy('text_content', [              # tcid
        (1, 'pubmed', 'txt', 'abstract'),  # 1
        (1, 'pmc', 'xml', 'fulltext'),     # 2
        (2, 'pubmed', 'txt', 'title'),     # 3
        (3, 'pubmed', 'txt', 'abstract'),  # 4
        (3, 'pmc', 'xml', 'fulltext'),     # 5
        (4, 'pmc', 'xml', 'fulltext')      # 6
    ], ('text_ref_id', 'source', 'format', 'text_type'))

    db.copy('reading', [(tcid, rdr, 1, reader_versions[rdr][-1], 'emtpy')
                        for tcid, rdr in [
        # 1             2             3
        (1, 'reach'), (1, 'eidos'), (1, 'isi'),

        # 4
        (2, 'reach'),

        # 5             6            7
        (3, 'reach'), (3, 'eidos'), (3, 'trips'),

        # 8
        (4, 'reach'),

        # 9
        (5, 'reach'),

        # 10
        (6, 'reach')
    ]], ('text_content_id', 'reader', 'batch_id', 'reader_version', 'format'))

    db.copy('db_info', [
        ('signor', 'signor', 'Signor'),       # 1
        ('pc', 'biopax', 'Pathway Commons'),  # 2
        ('medscan', 'medscan', 'MedScan')     # 3
    ], ('db_name', 'source_api', 'db_full_name'))

    raw_stmts = {
        'reading': {
            2: [
                Inhibition(
                    Agent('Fever', db_refs={'TEXT': 'fever', 'MESH': 'D005334'}),
                    Agent('Cough', db_refs={'TEXT': 'cough', 'MESH': 'D003371'}),
                    evidence=Evidence(text="We found fever inhibits cough.")
                )
            ],
            4: [
                Phosphorylation(
                    Agent('MEK', db_refs={'FPLX': 'MEK', 'TEXT': 'mek'}),
                    Agent('ERK', db_refs={'FPLX': 'MEK', 'TEXT': 'erk'}),
                    evidence=Evidence(text="mek phosphorylates erk, so say I.")
                ),
                Activation(
                    Agent('MAP2K1', db_refs={'HGNC': '6840', 'TEXT': 'MEK1'}),
                    Agent('MAPK1', db_refs={'HGNC': '6871', 'TEXT': 'ERK1'}),
                    evidence=Evidence(text="MEK1 activates ERK1, or os I'm told.")
                ),
                Activation(
                    Agent('ERK', db_refs={'FPLX': 'ERK', 'TEXT': 'ERK'}),
                    Agent('JNK', db_refs={'FPLX': 'JNK', 'TEXT': 'JNK'}),
                    evidence=Evidence(text="ERK activates JNK, maybe.")
                ),
                Complex([
                    Agent('MEK', db_refs={'FPLX': 'MEK', 'TEXT': 'MAP2K'}),
                    Agent('ERK', db_refs={'FPLX': 'ERK', 'TEXT': 'MAPK'}),
                    Agent('RAF', db_refs={'FPLX': 'RAF', 'TEXT': 'RAF'})
                ], evidence=Evidence(text="MAP2K, MAPK, and RAF form a complex."))
            ],
            7: [
                Activation(
                    Agent('ERK', db_refs={'FPLX': 'ERK', 'TEXT': 'ERK'}),
                    Agent('JNK', db_refs={'FPLX': 'JNK', 'TEXT': 'JNK'}),
                    evidence=Evidence(text='ERK activates JNK, maybe.')
                )
            ],
            8: [
                Complex([
                    Agent('MEK', db_refs={'FPLX': 'MEK', 'TEXT': 'mek'}),
                    Agent('ERK', db_refs={'FPLX': 'ERK', 'TEXT': 'erk'})
                ], evidence=Evidence(text="...in the mek-erk complex."))
            ],
        },
        'databases': {
            2: [
                Conversion(
                    Agent('FRK', db_refs={'HGNC': '3955'}),
                    [Agent('ATP', db_refs={'MESH': 'D000255'})],
                    [Agent('hydron', db_refs={'CHEBI': 'CHEBI:15378'})]
                )
            ],
            3: [
                Phosphorylation(
                    Agent('MEK', db_refs={'FPLX': 'MEK', 'TEXT': 'MEK'}),
                    Agent('ERK', db_refs={'FPLX': 'ERK', 'TEXT': 'ERK'}),
                    evidence=Evidence(text="...MEK phosphorylates ERK medscan.")
                )
            ]
        }
    }
    simple_insert_stmts(db, raw_stmts)

    # Run preassembly.
    prass.create_corpus(db)

    # Do the dump proceedure.
    ro = get_temp_ro(clear=True)
    dump(db, ro)

    # Check that the s3 dump exists.
    all_dumps = dm.list_dumps()
    assert len(all_dumps) == 1

    # Check to make sure all the dump files are present.
    dump_path = all_dumps[0]
    file_list = dump_path.list_objects(s3)
    assert dm.Start.from_list(file_list)
    assert dm.Readonly.from_list(file_list)
    assert dm.Belief.from_list(file_list)
    assert dm.Sif.from_list(file_list)
    assert dm.StatementHashMeshId.from_list(file_list)
    assert dm.FullPaStmts.from_list(file_list)
    assert dm.End.from_list(file_list)

    # Check what tables are active in the readonly database.
    active_tables = ro.get_active_tables()
    for tbl in ro.get_tables():
        if ro.tables[tbl]._temp:
            # If it was temp, it should be gone.
            assert tbl not in active_tables
        else:
            # Otherwise, it should be there.
            assert tbl in active_tables

    # Check that the principal db has no more ro schema.
    assert 'readonly' not in db.get_schemas()

    # Check contents of the readonly database.
    assert len(ro.select_all(ro.FastRawPaLink)) \
           == len(db.select_all(db.RawUniqueLinks))

    # Check that a query basically works.
    from indra_db.client.readonly import HasAgent
    res = HasAgent('MEK').get_statements(ro)
    assert len(res.statements()) == 2, len(res.statements())

    # Check that belief is represented in the table.
    bdict = {h: b for h, b in ro.select_all([ro.SourceMeta.mk_hash,
                                             ro.SourceMeta.belief])}
    assert all(1 >= b > 0 for b in bdict.values())

    # Check to make sure lambda was diverted correctly.
    call_records = config.get_test_call_records()
    assert len(call_records) == 2
    assert all(rec.func_name == '_set_lambda_env' for rec in call_records)
    assert all(isinstance(rec.args[1], dict) for rec in call_records)
    assert 'INDRAROOVERRIDE' in call_records[0].args[1]
    assert call_records[0].args[1]['INDRAROOVERRIDE'] == str(db.url)
    assert not call_records[1].args[1]
Exemple #23
0
 def _get_op_complex(self, source, target, evidence_list):
     ag_list = self._complex_agents_from_op_complex(source) + \
               self._complex_agents_from_op_complex(target)
     return Complex(members=ag_list,
                    evidence=evidence_list)