Python GroundingMapper 예제들, indra.preassembler.grounding_mapper.GroundingMapper Python 예제들

예제 #1

0

파일 보기

파일: processor.py 프로젝트: johnbachman/indra

    def __init__(self, a1_text, a2_text, sentence_segmentations, pmid):
        # Store pmid
        self.pmid = pmid

        # Load grounding information
        path_this = os.path.dirname(os.path.abspath(__file__))
        gm_fname = os.path.join(path_this, '../../resources/',
                                'extracted_reach_grounding_map.csv')
        try:
            gm = load_grounding_map(gm_fname)
        except BaseException:
            raise Exception('Could not load the grounding map from ' +
                            gm_fname)
        mapper = GroundingMapper(gm)

        # Run TEES and parse into networkx graph
        self.G = parse_output(a1_text, a2_text, sentence_segmentations)

        # Extract statements from the TEES graph
        self.statements = []
        self.statements.extend(self.process_phosphorylation_statements())
        self.statements.extend(self.process_binding_statements())
        self.statements.extend(self.process_increase_expression_amount())
        self.statements.extend(self.process_decrease_expression_amount())

        # Ground statements
        self.statements = mapper.map_agents(self.statements)

예제 #2

0

파일 보기

파일: assemble_corpus.py 프로젝트: jmuhlich/indra

def map_grounding(stmts_in, **kwargs):
    """Map grounding using the GroundingMapper.

    Parameters
    ----------
    stmts_in : list[indra.statements.Statement]
        A list of statements to map.
    do_rename : Optional[bool]
        If True, Agents are renamed based on their mapped grounding.
    save : Optional[str]
        The name of a pickle file to save the results (stmts_out) into.

    Returns
    -------
    stmts_out : list[indra.statements.Statement]
        A list of mapped statements.
    """
    logger.info('Mapping grounding on %d statements...' % len(stmts_in))
    do_rename = kwargs.get('do_rename')
    if do_rename is None:
        do_rename = True
    gm = GroundingMapper(grounding_map)
    stmts_out = gm.map_agents(stmts_in, do_rename=do_rename)
    dump_pkl = kwargs.get('save')
    if dump_pkl:
        dump_statements(stmts_out, dump_pkl)
    return stmts_out

예제 #3

0

파일 보기

파일: processor.py 프로젝트: pupster90/indra

    def __init__(self, text, pmid, tees_path, python2_path):
        # Store pmid
        self.pmid = pmid

        # Load grounding information
        path_this = os.path.dirname(os.path.abspath(__file__))
        gm_fname = os.path.join(path_this, '../../resources/',
                                'extracted_reach_grounding_map.csv')
        try:
            gm = load_grounding_map(gm_fname)
        except BaseException:
            raise Exception('Could not load the grounding map from ' +
                            gm_fname)
        mapper = GroundingMapper(gm)

        # Run TEES and parse into networkx graph
        self.G = run_and_parse_tees(text, tees_path, python2_path)

        # Extract statements from the TEES graph
        self.statements = []
        self.statements.extend(self.process_phosphorylation_statements())
        self.statements.extend(self.process_binding_statements())
        self.statements.extend(self.process_increase_expression_amount())
        self.statements.extend(self.process_decrease_expression_amount())

        # Ground statements
        self.statements = mapper.map_agents(self.statements)

예제 #4

0

파일 보기

def _agent_from_ns_id(ag_ns, ag_id):
    # Add the ID as a placeholder name
    agent = Agent(ag_id)
    # If we have a proper grounding, add to db_refs
    if ag_id is not None:
        agent.db_refs[ag_ns] = ag_id
    # Now standardize db_refs and set standardized name
    GroundingMapper.standardize_agent_name(agent, standardize_refs=True)
    agent.db_refs['TEXT'] = agent.name
    return agent

예제 #5

0

파일 보기

파일: processor.py 프로젝트: pupster90/indra

 def add_grounding(self):
     # Load grounding information
     path_this = os.path.dirname(os.path.abspath(__file__))
     gm_fname = os.path.join(path_this, '../../resources/',
                             'extracted_reach_grounding_map.csv')
     try:
         gm = load_grounding_map(gm_fname)
     except BaseException:
         raise Exception('Could not load the grounding map from ' +
                         gm_fname)
     mapper = GroundingMapper(gm)
     self.statements = mapper.map_agents(self.statements)

예제 #6

0

파일 보기

파일: __init__.py 프로젝트: luisviolinist/indra

def filter_grounded(stmts):
    gm = GroundingMapper(grounding_map)
    stmts_mapped = gm.map_agents(stmts, do_rename=True)

    stmts_grounded = []
    for stmt in stmts_mapped:
        all_grounded = True
        for agent in stmt.agent_list():
            if agent is not None:
                if set(agent.db_refs.keys()) == set(['TEXT']):
                    all_grounded = False
                    break
        if all_grounded:
            stmts_grounded.append(stmt)
    return stmts_grounded

예제 #7

0

파일 보기

파일: __init__.py 프로젝트: jmuhlich/indra

def filter_grounded(stmts):
    gm = GroundingMapper(grounding_map)
    stmts_mapped = gm.map_agents(stmts, do_rename=True)

    stmts_grounded = []
    for stmt in stmts_mapped:
        all_grounded = True
        for agent in stmt.agent_list():
            if agent is not None:
                if set(agent.db_refs.keys()) == set(['TEXT']):
                    all_grounded = False
                    break
        if all_grounded:
            stmts_grounded.append(stmt)
    return stmts_grounded

예제 #8

0

파일 보기

파일: finder.py 프로젝트: metincansiper/IndraInteractionServer

def get_agent(name):
    opts = {'text': name}

    indra_url = read_from_config('INDRA_GROUND_URL')
    res = requests.post(indra_url, json=opts)

    if res.status_code != 200 and not res.json():
        return Agent(name, db_refs={'TEXT': name})

    js = res.json()
    top_term = js[0]['term']

    agent = Agent(name, db_refs={'TEXT': name, top_term['db']: top_term['id']})

    GroundingMapper.standardize_agent_name(agent, standardize_refs=True)
    return agent

예제 #9

0

파일 보기

파일: test_groundingmapper.py 프로젝트: sorgerlab/indra

def test_up_with_no_gene_name_with_hgnc_sym():
    erk = Agent('ERK1', db_refs={'TEXT': 'ERK1'})
    stmt = Phosphorylation(None, erk)
    g_map = {'ERK1': {'TEXT': 'ERK1', 'UP': 'A0K5Q6', 'HGNC': '6871'}}
    gm = GroundingMapper(g_map)
    mapped_stmts = gm.map_stmts([stmt])
    assert mapped_stmts[0].sub.db_refs['HGNC'] == '6871', \
        mapped_stmts[0].sub.db_refs
    assert mapped_stmts[0].sub.db_refs['UP'] == 'P28482', \
        mapped_stmts[0].sub.db_refs

예제 #10

0

파일 보기

파일: test_groundingmapper.py 프로젝트: sorgerlab/indra

def test_in_place_overwrite_of_gm():
    """Make sure HGNC lookups don't modify the original grounding map by adding
    keys."""
    erk = Agent('ERK1', db_refs={'TEXT': 'ERK1'})
    stmt = Phosphorylation(None, erk)
    g_map = {'ERK1': {'TEXT': 'ERK1', 'UP': 'P28482'}}
    gm = GroundingMapper(g_map)
    mapped_stmts = gm.map_stmts([stmt])
    gmap_after_mapping = gm.grounding_map
    assert set(gmap_after_mapping['ERK1'].keys()) == set(['TEXT', 'UP'])

예제 #11

0

파일 보기

파일: test_groundingmapper.py 프로젝트: sorgerlab/indra

def test_up_and_mismatched_hgnc():
    erk = Agent('ERK1', db_refs={'TEXT': 'ERK1'})
    stmt = Phosphorylation(None, erk)
    g_map = {'ERK1': {'TEXT': 'ERK1', 'UP': 'P28482', 'HGNC': '6877'}}
    gm = GroundingMapper(g_map)
    mapped_stmts = gm.map_stmts([stmt])
    assert mapped_stmts[0].sub.db_refs['HGNC'] == '6877', \
        mapped_stmts[0].sub.db_refs
    assert mapped_stmts[0].sub.db_refs['UP'] == 'P27361', \
        mapped_stmts[0].sub.db_refs

예제 #12

0

파일 보기

파일: test_groundingmapper.py 프로젝트: sorgerlab/indra

def test_hgnc_but_not_up():
    erk = Agent('ERK1', db_refs={'TEXT': 'ERK1'})
    stmt = Phosphorylation(None, erk)
    g_map = {'ERK1': {'TEXT': 'ERK1', 'HGNC': '6871'}}
    gm = GroundingMapper(g_map)
    mapped_stmts = gm.map_stmts([stmt])
    assert len(mapped_stmts) == 1
    mapped_erk = mapped_stmts[0].sub
    assert mapped_erk.name == 'MAPK1'
    assert mapped_erk.db_refs['TEXT'] == 'ERK1'
    assert mapped_erk.db_refs['HGNC'] == '6871'
    assert mapped_erk.db_refs['UP'] == 'P28482'

예제 #13

0

파일 보기

파일: test_groundingmapper.py 프로젝트: thebeastadi/indra

def test_up_id_with_no_gene_name():
    """Expect no HGNC entry; no error raised."""
    no_gn = Agent('NoGNname', db_refs={'TEXT': 'NoGN'})
    stmt = Phosphorylation(None, no_gn)
    g_map = {'NoGN': {'TEXT': 'NoGN', 'UP': 'A0K5Q6'}}
    gm = GroundingMapper(g_map)
    mapped_stmts = gm.map_stmts([stmt])
    assert len(mapped_stmts) == 1
    mapped_ag = mapped_stmts[0].sub
    assert mapped_ag.name == 'NoGNname'
    assert mapped_ag.db_refs['TEXT'] == 'NoGN'
    assert mapped_ag.db_refs.get('HGNC') is None
    assert mapped_ag.db_refs['UP'] == 'A0K5Q6'

예제 #14

0

파일 보기

파일: test_groundingmapper.py 프로젝트: sorgerlab/indra

def test_up_id_with_no_hgnc_id():
    """Non human protein"""
    gag = Agent('Gag', db_refs={'TEXT': 'Gag'})
    stmt = Phosphorylation(None, gag)
    g_map = {'Gag': {'TEXT': 'Gag', 'UP': 'P04585'}}
    gm = GroundingMapper(g_map)
    mapped_stmts = gm.map_stmts([stmt])
    assert len(mapped_stmts) == 1
    mapped_gag = mapped_stmts[0].sub
    assert mapped_gag.name == 'gag-pol'
    assert mapped_gag.db_refs['TEXT'] == 'Gag'
    assert mapped_gag.db_refs.get('HGNC') is None
    assert mapped_gag.db_refs['UP'] == 'P04585'

예제 #15

0

파일 보기

def test_map_entry_hgnc_and_up():
    """Make sure that HGNC symbol is replaced with HGNC ID when grounding map
    includes both UP ID and HGNC symbol."""
    rela = Agent('NF-kappaB p65', db_refs={'TEXT': 'NF-kappaB p65'})
    erk = Agent('ERK1', db_refs={'TEXT': 'ERK1'})
    stmt = Phosphorylation(erk, rela)
    g_map = {'NF-kappaB p65': {'TEXT': 'NF-kappaB p65', 'UP': 'Q04206',
                               'HGNC': '9955'}}
    gm = GroundingMapper(g_map)
    mapped_stmts = gm.map_stmts([stmt])
    assert len(mapped_stmts) == 1
    ms = mapped_stmts[0]
    assert ms.sub.db_refs == {'TEXT': 'NF-kappaB p65', 'UP': 'Q04206',
                              'HGNC': '9955'}

예제 #16

0

파일 보기

def test_name_standardize_hgnc_up():
    a1 = Agent('x', db_refs={'HGNC': '9387'})
    GroundingMapper.standardize_agent_name(a1, True)
    assert a1.name == 'PRKAG3'
    a1 = Agent('x', db_refs={'UP': 'Q9UGI9'})
    GroundingMapper.standardize_agent_name(a1, True)
    assert a1.name == 'PRKAG3'
    a1 = Agent('x', db_refs={'UP': 'Q8BGM7'})
    GroundingMapper.standardize_agent_name(a1, True)
    assert a1.name == 'Prkag3'

예제 #17

0

파일 보기

def run_assembly(stmts, folder, pmcid, background_assertions=None):
    '''Run assembly on a list of statements, for a given PMCID.'''
    # Folder for index card output (scored submission)
    indexcard_prefix = folder + '/index_cards/' + pmcid
    # Folder for other outputs (for analysis, debugging)
    otherout_prefix = folder + '/other_outputs/' + pmcid

    # Do grounding mapping here
    # Load the TRIPS-specific grounding map and add to the default
    # (REACH-oriented) grounding map:
    trips_gm = load_grounding_map('trips_grounding_map.csv')
    default_grounding_map.update(trips_gm)
    gm = GroundingMapper(default_grounding_map)

    mapped_agent_stmts = gm.map_agents(stmts)
    renamed_agent_stmts = gm.rename_agents(mapped_agent_stmts)

    # Filter for grounding
    grounded_stmts = []
    for st in renamed_agent_stmts:
        if all([is_protein_or_chemical(a) for a in st.agent_list()]):
            grounded_stmts.append(st)

    # Instantiate the Preassembler
    pa = Preassembler(hierarchies)
    pa.add_statements(grounded_stmts)
    print('== %s ====================' % pmcid)
    print('%d statements collected in total.' % len(pa.stmts))

    # Combine duplicates
    unique_stmts = pa.combine_duplicates()
    print('%d statements after combining duplicates.' % len(unique_stmts))

    # Run BeliefEngine on unique statements
    epe = BeliefEngine()
    epe.set_prior_probs(pa.unique_stmts)

    # Build statement hierarchy
    related_stmts = pa.combine_related()
    # Run BeliefEngine on hierarchy
    epe.set_hierarchy_probs(related_stmts)
    print('%d statements after combining related.' % len(related_stmts))

    # Instantiate the mechanism linker
    ml = MechLinker(related_stmts)
    # Link statements
    linked_stmts = ml.link_statements()
    # Run BeliefEngine on linked statements
    epe.set_linked_probs(linked_stmts)
    # Print linked statements for debugging purposes
    print('Linked\n=====')
    for ls in linked_stmts:
        print(ls.inferred_stmt.belief, ls.inferred_stmt)
    print('=============')

    # Combine all statements including linked ones
    all_statements = ml.statements + [ls.inferred_stmt for ls in linked_stmts]

    # Instantiate a new preassembler
    pa = Preassembler(hierarchies, all_statements)
    # Build hierarchy again
    pa.combine_duplicates()
    # Choose the top-level statements
    related_stmts = pa.combine_related()

    # Remove top-level statements that came only from the prior
    if background_assertions is not None:
        nonbg_stmts = [
            stmt for stmt in related_stmts if stmt not in background_assertions
        ]
    else:
        nonbg_stmts = related_stmts

    # Dump top-level statements in a pickle
    with open(otherout_prefix + '.pkl', 'wb') as fh:
        pickle.dump(nonbg_stmts, fh, protocol=2)

    # Flatten evidence for statements
    flattened_evidence_stmts = flatten_evidence(nonbg_stmts)

    # Start a card counter
    card_counter = 1
    # We don't limit the number of cards reported in this round
    card_lim = float('inf')
    top_stmts = []
    ###############################################
    # The belief cutoff for statements
    belief_cutoff = 0.3
    ###############################################
    # Sort by amount of evidence
    for st in sorted(flattened_evidence_stmts,
                     key=lambda x: x.belief,
                     reverse=True):
        if st.belief >= belief_cutoff:
            print(st.belief, st)
        if st.belief < belief_cutoff:
            print('SKIP', st.belief, st)

        # If it's background knowledge, we skip the statement
        if is_background_knowledge(st):
            print('This statement is background knowledge - skipping.')
            continue

        # Assemble IndexCards
        ia = IndexCardAssembler([st], pmc_override=pmcid)
        ia.make_model()
        # If the index card was actually made
        # (not all statements can be assembled into index cards to
        # this is often not the case)
        if ia.cards:
            # Save the index card json
            ia.save_model(indexcard_prefix + '-%d.json' % card_counter)
            card_counter += 1
            top_stmts.append(st)
            if card_counter > card_lim:
                break

    # Print the English-assembled model for debugging purposes
    ea = EnglishAssembler(top_stmts)
    print('=======================')
    print(ea.make_model())
    print('=======================')

    # Print the statement graph
    graph = render_stmt_graph(nonbg_stmts)
    graph.draw(otherout_prefix + '_graph.pdf', prog='dot')
    # Print statement diagnostics
    print_stmts(pa.stmts, otherout_prefix + '_statements.tsv')
    print_stmts(related_stmts, otherout_prefix + '_related_statements.tsv')

예제 #18

0

파일 보기

def get_channel_agent(channel):
    ag = Agent(channel, db_refs={'HGNC': hgnc_client.get_hgnc_id(channel)})
    GroundingMapper.standardize_agent_name(ag, standardize_refs=True)
    return ag

예제 #19

0

파일 보기

def test_name_standardize_chebi():
    a1 = Agent('x', db_refs={'CHEBI': '15996'})
    GroundingMapper.standardize_agent_name(a1, False)
    assert a1.name == 'GTP'

예제 #20

0

파일 보기

def test_name_standardize_mesh():
    a1 = Agent('x', db_refs={'MESH': 'D008545'})
    GroundingMapper.standardize_agent_name(a1, False)
    assert a1.name == 'Melanoma', a1.name

예제 #21

0

파일 보기

파일: test_groundingmapper.py 프로젝트: sorgerlab/indra

def test_up_and_invalid_hgnc_sym():
    erk = Agent('ERK1', db_refs={'TEXT': 'ERK1'})
    stmt = Phosphorylation(None, erk)
    g_map = {'ERK1': {'TEXT': 'ERK1', 'UP': 'P28482', 'HGNC': 'foobar'}}
    gm = GroundingMapper(g_map)

예제 #22

0

파일 보기

파일: rasmodel.py 프로젝트: jmuhlich/indra

def get_statements():
    statements = []

    egf = Agent('EGF')
    egfr = Agent('EGFR')
    st = Complex([egf, egfr])
    statements.append(st)

    egfre = Agent('EGFR', bound_conditions=[BoundCondition(egf, True)])
    egfre = Agent('EGFR', bound_conditions=[BoundCondition(egf, True)])
    st = Complex([egfre, egfre])
    statements.append(st)

    egfrdimer = Agent('EGFR', bound_conditions=[BoundCondition(egfr, True)])
    st = Transphosphorylation(egfrdimer, 'Y')
    statements.append(st)

    egfrpY = Agent('EGFR', mods=[ModCondition('phosphorylation', 'Y')])
    grb2 = Agent('GRB2')
    st = Complex([egfrpY, grb2])
    statements.append(st)


    grb2bound = Agent('GRB2', bound_conditions=[BoundCondition(egfr, True)])
    sos1 = Agent('SOS1')
    st = Complex([grb2bound, sos1])
    statements.append(st)

    hras = Agent('HRAS')
    kras = Agent('KRAS')
    nras = Agent('NRAS')
    gdp = Agent('GDP')
    for ras in [hras, kras, nras]:
        st = Complex([ras, gdp])
        statements.append(st)

    sos1bound = Agent('SOS1', bound_conditions=[BoundCondition(grb2, True)])
    hras_gdp = Agent('HRAS', bound_conditions=[BoundCondition(gdp, True)])
    kras_gdp = Agent('KRAS', bound_conditions=[BoundCondition(gdp, True)])
    nras_gdp = Agent('NRAS', bound_conditions=[BoundCondition(gdp, True)])
    for ras_gdp in [hras_gdp, kras_gdp, nras_gdp]:
        st = Complex([sos1bound, ras_gdp])
        statements.append(st)
        st = ActiveForm(ras_gdp, 'activity', False)
        statements.append(st)


    hras_bound = Agent('HRAS', bound_conditions=[BoundCondition(sos1, True)])
    kras_bound = Agent('KRAS', bound_conditions=[BoundCondition(sos1, True)])
    nras_bound = Agent('NRAS', bound_conditions=[BoundCondition(sos1, True)])
    sos1bound = Agent('SOS1', bound_conditions=[BoundCondition(grb2, True)])
    for ras_bound in [hras_bound, kras_bound, nras_bound]:
        st = Complex([sos1bound, ras_bound])
        statements.append(st)


    gtp = Agent('GTP')
    hras_gtp = Agent('HRAS', bound_conditions=[BoundCondition(gtp, True)])
    kras_gtp = Agent('KRAS', bound_conditions=[BoundCondition(gtp, True)])
    nras_gtp = Agent('NRAS', bound_conditions=[BoundCondition(gtp, True)])
    braf = Agent('BRAF')
    for ras_gtp in [hras_gtp, kras_gtp, nras_gtp]:
        st = Complex([ras_gtp, braf])
        statements.append(st)
        st = ActiveForm(ras_gtp, 'activity', True)
        statements.append(st)

    hras_braf = Agent('BRAF', bound_conditions=[BoundCondition(hras, True)])
    kras_braf = Agent('BRAF', bound_conditions=[BoundCondition(kras, True)])
    nras_braf = Agent('BRAF', bound_conditions=[BoundCondition(nras, True)])
    for braf1 in [hras_braf, kras_braf, nras_braf]:
        for braf2 in [hras_braf, kras_braf, nras_braf]:
            st = Complex([braf1, braf2])
            statements.append(st)

    braf_bound = Agent('BRAF', bound_conditions=[BoundCondition(braf, True)])
    st = Transphosphorylation(braf_bound)
    statements.append(st)

    braf_phos = Agent('BRAF', mods=[ModCondition('phosphorylation')])
    mek1 = Agent('MAP2K1')
    mek2 = Agent('MAP2K2')
    st = ActiveForm(braf_phos, 'kinase', True)
    statements.append(st)
    st = Phosphorylation(braf_phos, mek1)
    statements.append(st)
    st = Phosphorylation(braf_phos, mek2)
    statements.append(st)

    mek1_phos = Agent('MAP2K1', mods=[ModCondition('phosphorylation')])
    mek2_phos = Agent('MAP2K2', mods=[ModCondition('phosphorylation')])
    mapk1 = Agent('MAPK1')
    mapk3 = Agent('MAPK3')
    st = ActiveForm(mek1_phos, 'kinase', True)
    statements.append(st)
    st = ActiveForm(mek2_phos, 'kinase', True)
    statements.append(st)
    st = Phosphorylation(braf_phos, mek1)
    statements.append(st)
    st = Phosphorylation(braf_phos, mek2)
    statements.append(st)
    for mek in [mek1_phos, mek2_phos]:
        for erk in [mapk1, mapk3]:
            st = Phosphorylation(mek, erk)

    for st in statements:
        st.belief = 1
        st.evidence.append(Evidence(source_api='assertion'))

    # Update the statements with grounding info. To do this, we set the "text"
    # field of the db_refs to copy from the agent name, then run the grounding
    # mapper
    for st in statements:
        for ag in st.agent_list():
            if ag is None:
                continue
            else:
                ag.db_refs = {'TEXT': ag.name}
    # Now load the grounding map and run
    gm = GroundingMapper(default_grounding_map)
    mapped_stmts = gm.map_agents(statements)
    # This shouldn't change anything, but just in case...
    renamed_stmts = gm.rename_agents(mapped_stmts)
    return renamed_stmts

예제 #23

0

파일 보기

파일: assembly_eval.py 프로젝트: johnbachman/indra

def run_assembly(stmts, folder, pmcid, background_assertions=None):
    '''Run assembly on a list of statements, for a given PMCID.'''
    # Folder for index card output (scored submission)
    indexcard_prefix = folder + '/index_cards/' + pmcid
    # Folder for other outputs (for analysis, debugging)
    otherout_prefix = folder + '/other_outputs/' + pmcid

    # Do grounding mapping here
    # Load the TRIPS-specific grounding map and add to the default
    # (REACH-oriented) grounding map:
    trips_gm = load_grounding_map('trips_grounding_map.csv')
    default_grounding_map.update(trips_gm)
    gm = GroundingMapper(default_grounding_map)

    mapped_agent_stmts = gm.map_agents(stmts)
    renamed_agent_stmts = gm.rename_agents(mapped_agent_stmts)

    # Filter for grounding
    grounded_stmts = []
    for st in renamed_agent_stmts:
        if all([is_protein_or_chemical(a) for a in st.agent_list()]):
            grounded_stmts.append(st)

    # Instantiate the Preassembler
    pa = Preassembler(hierarchies)
    pa.add_statements(grounded_stmts)
    print('== %s ====================' % pmcid)
    print('%d statements collected in total.' % len(pa.stmts))

    # Combine duplicates
    unique_stmts = pa.combine_duplicates()
    print('%d statements after combining duplicates.' % len(unique_stmts))

    # Run BeliefEngine on unique statements
    epe = BeliefEngine()
    epe.set_prior_probs(pa.unique_stmts)

    # Build statement hierarchy
    related_stmts = pa.combine_related()
    # Run BeliefEngine on hierarchy
    epe.set_hierarchy_probs(related_stmts)
    print('%d statements after combining related.' % len(related_stmts))

    # Instantiate the mechanism linker
    # Link statements
    linked_stmts = MechLinker.infer_active_forms(related_stmts)
    linked_stmts += MechLinker.infer_modifications(related_stmts)
    linked_stmts += MechLinker.infer_activations(related_stmts)
    # Run BeliefEngine on linked statements
    epe.set_linked_probs(linked_stmts)
    # Print linked statements for debugging purposes
    print('Linked\n=====')
    for ls in linked_stmts:
        print(ls.inferred_stmt.belief, ls.inferred_stmt)
    print('=============')

    # Combine all statements including linked ones
    all_statements = related_stmts + [ls.inferred_stmt for ls in linked_stmts]

    # Instantiate a new preassembler
    pa = Preassembler(hierarchies, all_statements)
    # Build hierarchy again
    pa.combine_duplicates()
    # Choose the top-level statements
    related_stmts = pa.combine_related()

    # Remove top-level statements that came only from the prior
    if background_assertions is not None:
        nonbg_stmts = [stmt for stmt in related_stmts
                       if stmt not in background_assertions]
    else:
        nonbg_stmts = related_stmts

    # Dump top-level statements in a pickle
    with open(otherout_prefix + '.pkl', 'wb') as fh:
        pickle.dump(nonbg_stmts, fh)

    # Flatten evidence for statements
    flattened_evidence_stmts = flatten_evidence(nonbg_stmts)

    # Start a card counter
    card_counter = 1
    # We don't limit the number of cards reported in this round
    card_lim = float('inf')
    top_stmts = []
    ###############################################
    # The belief cutoff for statements
    belief_cutoff = 0.3
    ###############################################
    # Sort by amount of evidence
    for st in sorted(flattened_evidence_stmts,
                     key=lambda x: x.belief, reverse=True):
        if st.belief >= belief_cutoff:
            print(st.belief, st)
        if st.belief < belief_cutoff:
            print('SKIP', st.belief, st)

        # If it's background knowledge, we skip the statement
        if is_background_knowledge(st):
            print('This statement is background knowledge - skipping.')
            continue

        # Assemble IndexCards
        ia = IndexCardAssembler([st], pmc_override=pmcid)
        ia.make_model()
        # If the index card was actually made 
        # (not all statements can be assembled into index cards to
        # this is often not the case)
        if ia.cards:
            # Save the index card json
            ia.save_model(indexcard_prefix + '-%d.json' % card_counter)
            card_counter += 1
            top_stmts.append(st)
            if card_counter > card_lim:
                break

    # Print the English-assembled model for debugging purposes
    ea = EnglishAssembler(top_stmts)
    print('=======================')
    print(ea.make_model().encode('utf-8'))
    print('=======================')

    # Print the statement graph
    graph = render_stmt_graph(nonbg_stmts)
    graph.draw(otherout_prefix + '_graph.pdf', prog='dot')
    # Print statement diagnostics
    print_stmts(pa.stmts, otherout_prefix + '_statements.tsv')
    print_stmts(related_stmts, otherout_prefix + '_related_statements.tsv')

예제 #24

0

파일 보기

파일: rasmodel.py 프로젝트: sorgerlab/indra_apps

def get_statements():
    statements = []

    egf = Agent('EGF')
    egfr = Agent('EGFR')
    st = Complex([egf, egfr])
    statements.append(st)

    egfre = Agent('EGFR', bound_conditions=[BoundCondition(egf, True)])
    egfre = Agent('EGFR', bound_conditions=[BoundCondition(egf, True)])
    st = Complex([egfre, egfre])
    statements.append(st)

    egfrdimer = Agent('EGFR', bound_conditions=[BoundCondition(egfr, True)])
    st = Transphosphorylation(egfrdimer, 'Y')
    statements.append(st)

    egfrpY = Agent('EGFR', mods=[ModCondition('phosphorylation', 'Y')])
    grb2 = Agent('GRB2')
    st = Complex([egfrpY, grb2])
    statements.append(st)

    grb2bound = Agent('GRB2', bound_conditions=[BoundCondition(egfr, True)])
    sos1 = Agent('SOS1')
    st = Complex([grb2bound, sos1])
    statements.append(st)

    hras = Agent('HRAS')
    kras = Agent('KRAS')
    nras = Agent('NRAS')
    gdp = Agent('GDP')
    for ras in [hras, kras, nras]:
        st = Complex([ras, gdp])
        statements.append(st)

    sos1bound = Agent('SOS1', bound_conditions=[BoundCondition(grb2, True)])
    hras_gdp = Agent('HRAS', bound_conditions=[BoundCondition(gdp, True)])
    kras_gdp = Agent('KRAS', bound_conditions=[BoundCondition(gdp, True)])
    nras_gdp = Agent('NRAS', bound_conditions=[BoundCondition(gdp, True)])
    for ras_gdp in [hras_gdp, kras_gdp, nras_gdp]:
        st = Complex([sos1bound, ras_gdp])
        statements.append(st)
        st = ActiveForm(ras_gdp, 'activity', False)
        statements.append(st)

    hras_bound = Agent('HRAS', bound_conditions=[BoundCondition(sos1, True)])
    kras_bound = Agent('KRAS', bound_conditions=[BoundCondition(sos1, True)])
    nras_bound = Agent('NRAS', bound_conditions=[BoundCondition(sos1, True)])
    sos1bound = Agent('SOS1', bound_conditions=[BoundCondition(grb2, True)])
    for ras_bound in [hras_bound, kras_bound, nras_bound]:
        st = Complex([sos1bound, ras_bound])
        statements.append(st)

    gtp = Agent('GTP')
    hras_gtp = Agent('HRAS', bound_conditions=[BoundCondition(gtp, True)])
    kras_gtp = Agent('KRAS', bound_conditions=[BoundCondition(gtp, True)])
    nras_gtp = Agent('NRAS', bound_conditions=[BoundCondition(gtp, True)])
    braf = Agent('BRAF')
    for ras_gtp in [hras_gtp, kras_gtp, nras_gtp]:
        st = Complex([ras_gtp, braf])
        statements.append(st)
        st = ActiveForm(ras_gtp, 'activity', True)
        statements.append(st)

    hras_braf = Agent('BRAF', bound_conditions=[BoundCondition(hras, True)])
    kras_braf = Agent('BRAF', bound_conditions=[BoundCondition(kras, True)])
    nras_braf = Agent('BRAF', bound_conditions=[BoundCondition(nras, True)])
    for braf1 in [hras_braf, kras_braf, nras_braf]:
        for braf2 in [hras_braf, kras_braf, nras_braf]:
            st = Complex([braf1, braf2])
            statements.append(st)

    braf_bound = Agent('BRAF', bound_conditions=[BoundCondition(braf, True)])
    st = Transphosphorylation(braf_bound)
    statements.append(st)

    braf_phos = Agent('BRAF', mods=[ModCondition('phosphorylation')])
    mek1 = Agent('MAP2K1')
    mek2 = Agent('MAP2K2')
    st = ActiveForm(braf_phos, 'kinase', True)
    statements.append(st)
    st = Phosphorylation(braf_phos, mek1)
    statements.append(st)
    st = Phosphorylation(braf_phos, mek2)
    statements.append(st)

    mek1_phos = Agent('MAP2K1', mods=[ModCondition('phosphorylation')])
    mek2_phos = Agent('MAP2K2', mods=[ModCondition('phosphorylation')])
    mapk1 = Agent('MAPK1')
    mapk3 = Agent('MAPK3')
    st = ActiveForm(mek1_phos, 'kinase', True)
    statements.append(st)
    st = ActiveForm(mek2_phos, 'kinase', True)
    statements.append(st)
    st = Phosphorylation(braf_phos, mek1)
    statements.append(st)
    st = Phosphorylation(braf_phos, mek2)
    statements.append(st)
    for mek in [mek1_phos, mek2_phos]:
        for erk in [mapk1, mapk3]:
            st = Phosphorylation(mek, erk)

    for st in statements:
        st.belief = 1
        st.evidence.append(Evidence(source_api='assertion'))

    # Update the statements with grounding info. To do this, we set the "text"
    # field of the db_refs to copy from the agent name, then run the grounding
    # mapper
    for st in statements:
        for ag in st.agent_list():
            if ag is None:
                continue
            else:
                ag.db_refs = {'TEXT': ag.name}
    # Now load the grounding map and run
    gm = GroundingMapper(default_grounding_map)
    mapped_stmts = gm.map_agents(statements)
    # This shouldn't change anything, but just in case...
    renamed_stmts = gm.rename_agents(mapped_stmts)
    return renamed_stmts

예제 #25

0

파일 보기

파일: test_groundingmapper.py 프로젝트: sorgerlab/indra

def test_hgnc_sym_with_no_id():
    erk = Agent('ERK1', db_refs={'TEXT': 'ERK1'})
    stmt = Phosphorylation(None, erk)
    g_map = {'ERK1': {'TEXT': 'ERK1', 'HGNC': 'foobar'}}
    gm = GroundingMapper(g_map)
    mapped_stmts = gm.map_stmts([stmt])

예제 #26

0

파일 보기

def test_name_standardize_go():
    a1 = Agent('x', db_refs={'GO': 'GO:0006915'})
    GroundingMapper.standardize_agent_name(a1, False)
    assert a1.name == 'apoptotic process'