def get_drug_statements(groundings):
    all_stmts = {}
    for db_ns, db_id in groundings:
        print('Searching for %s@%s' % (db_id, db_ns))
        idp = indra_db_rest.get_statements(subject='%s@%s' % (db_id, db_ns),
                                           ev_limit=100)
        stmts = idp.statements
        stmts = ac.filter_by_type(stmts, Inhibition) + \
                ac.filter_by_type(stmts, Complex)
        new_stmts = []
        for stmt in stmts:
            new_ev = []
            for ev in stmt.evidence:
                if ev.source_api != 'medscan':
                    new_ev.append(ev)
            if not new_ev:
                continue
            stmt.evidence = new_ev
            new_stmts.append(stmt)
        for stmt in new_stmts:
            all_stmts[stmt.get_hash()] = stmt

    stmts = list(all_stmts.values())
    stmts = filter_db_support(stmts)
    stmts = fix_invalid(stmts)
    return stmts
Exemple #2
0
def get_omnipath_stmts():
    stmts = omnipath_client.get_all_modifications()
    phos_stmts = ac.filter_by_type(stmts, Phosphorylation)
    dephos_stmts = ac.filter_by_type(stmts, Dephosphorylation)
    stmts = phos_stmts + dephos_stmts
    stmts = ac.map_sequence(stmts)
    stmts = ac.filter_human_only(stmts)
    #stmts = ac.filter_genes_only(stmts, specific_only=True)
    return stmts
Exemple #3
0
def normalize_active_forms(stmts):
    af_stmts = ac.filter_by_type(stmts, ActiveForm)
    relevant_af_stmts = []
    for stmt in af_stmts:
        if (not stmt.agent.mods) and (not stmt.agent.mutations):
            continue
        relevant_af_stmts.append(stmt)
    print('%d relevant ActiveForms' % len(relevant_af_stmts))
    non_af_stmts = ac.filter_by_type(stmts, ActiveForm, invert=True)
    af_stmts = ac.run_preassembly(relevant_af_stmts)
    stmts = af_stmts + non_af_stmts
    return stmts
Exemple #4
0
def get_curation_texts():
    """Return activity/amount evidence texts based on curations."""
    # FIXME: get_statements_from_hashes will get the right statements but we
    # collect _all_ evidences of these statements, not just the ones that
    # were specifically curated. It might make more sense to filter down
    # the set of evidence to the specific evidence hashes to which each
    # curation corresponds.

    curations = get_curations(tag='act_vs_amt')
    stmts = get_statements_from_hashes([cur.pa_hash for cur in curations])
    # Note that these are flipped because the curation implies opposite
    amt_txts = get_ev_texts(ac.filter_by_type(stmts, RegulateActivity))
    act_txts = get_ev_texts(ac.filter_by_type(stmts, RegulateAmount))

    curations = get_curations(tag='correct')
    stmts = get_statements_from_hashes([cur.pa_hash for cur in curations])
    amt_txts += get_ev_texts(ac.filter_by_type(stmts, RegulateAmount))
    act_txts += get_ev_texts(ac.filter_by_type(stmts, RegulateActivity))

    return act_txts, amt_txts
Exemple #5
0
def preprocess_stmts(stmts, data_genes):
    # Filter the INDRA Statements to be put into the model
    stmts = ac.filter_mutation_status(stmts,
                                      {'BRAF': [('V', '600', 'E')]}, ['PTEN'])
    stmts = ac.filter_by_type(stmts, Complex, invert=True)
    stmts = ac.filter_direct(stmts)
    stmts = ac.filter_belief(stmts, 0.95)
    stmts = ac.filter_top_level(stmts)
    stmts = ac.filter_gene_list(stmts, data_genes, 'all')
    stmts = ac.filter_enzyme_kinase(stmts)
    stmts = ac.filter_mod_nokinase(stmts)
    stmts = ac.filter_transcription_factor(stmts)
    # Simplify activity types
    ml = MechLinker(stmts)
    ml.gather_explicit_activities()
    ml.reduce_activities()
    ml.gather_modifications()
    ml.reduce_modifications()
    af_stmts = ac.filter_by_type(ml.statements, ActiveForm)
    non_af_stmts = ac.filter_by_type(ml.statements, ActiveForm, invert=True)
    af_stmts = ac.run_preassembly(af_stmts)
    stmts = af_stmts + non_af_stmts
    # Replace activations when possible
    ml = MechLinker(stmts)
    ml.gather_explicit_activities()
    ml.replace_activations()
    # Require active forms
    ml.require_active_forms()
    num_stmts = len(ml.statements)
    while True:
        # Remove inconsequential PTMs
        ml.statements = ac.filter_inconsequential_mods(ml.statements,
                                                       get_mod_whitelist())
        ml.statements = ac.filter_inconsequential_acts(ml.statements,
                                                       get_mod_whitelist())
        if num_stmts <= len(ml.statements):
            break
        num_stmts = len(ml.statements)
    stmts = ml.statements
    return stmts
Exemple #6
0
def filter_by_type():
    """Filter to a given INDRA Statement type."""
    if request.method == 'OPTIONS':
        return {}
    response = request.body.read().decode('utf-8')
    body = json.loads(response)
    stmts_json = body.get('statements')
    stmt_type_str = body.get('type')
    stmt_type_str = stmt_type_str.capitalize()
    stmt_type = getattr(sys.modules[__name__], stmt_type_str)
    stmts = stmts_from_json(stmts_json)
    stmts_out = ac.filter_by_type(stmts, stmt_type)
    return _return_stmts(stmts_out)
Exemple #7
0
def filter_by_type():
    """Filter to a given INDRA Statement type."""
    if request.method == 'OPTIONS':
        return {}
    response = request.body.read().decode('utf-8')
    body = json.loads(response)
    stmts_json = body.get('statements')
    stmt_type_str = body.get('type')
    stmt_type_str = stmt_type_str.capitalize()
    stmt_type = getattr(sys.modules[__name__], stmt_type_str)
    stmts = stmts_from_json(stmts_json)
    stmts_out = ac.filter_by_type(stmts, stmt_type)
    return _return_stmts(stmts_out)
Exemple #8
0
def filter_by_type():
    """Filter to a given INDRA Statement type."""
    response = request.body.read().decode('utf-8')
    body = json.loads(response)
    stmts_json = body.get('statements')
    stmt_type_str = body.get('type')
    stmt_type_str = stmt_type_str.capitalize()
    stmt_type = getattr(sys.modules[__name__], stmt_type_str)
    stmts = stmts_from_json(stmts_json)
    stmts_out = ac.filter_by_type(stmts, stmt_type)
    if stmts_out:
        stmts_json = stmts_to_json(stmts_out)
        res = {'statements': stmts_json}
        return res
    else:
        res = {'statements': []}
    return res
Exemple #9
0
def assemble_sif(stmts, data, out_file):
    """Return an assembled SIF."""
    # Filter for high-belief statements
    stmts = ac.filter_belief(stmts, 0.99)
    stmts = ac.filter_top_level(stmts)
    # Filter for Activation / Inhibition
    stmts_act = ac.filter_by_type(stmts, Activation)
    stmts_inact = ac.filter_by_type(stmts, Inhibition)
    stmts = stmts_act + stmts_inact
    # Get Ras227 and filter statments
    ras_genes = process_data.get_ras227_genes()
    ras_genes = [x for x in ras_genes if x not in ['YAP1']]
    stmts = ac.filter_gene_list(stmts, ras_genes, 'all')
    # Get the drugs inhibiting their targets as INDRA
    # statements
    def get_drug_statements():
        drug_targets = process_data.get_drug_targets()
        drug_stmts = []
        for dn, tns in drug_targets.items():
            da = Agent(dn + ':Drugs')
            for tn in tns:
                ta = Agent(tn)
                drug_stmt = Inhibition(da, ta)
                drug_stmts.append(drug_stmt)
        return drug_stmts
    drug_stmts = get_drug_statements()
    stmts = stmts + drug_stmts
    # Because of a bug in CNO, node names containing AND
    # need to be replaced
    def rename_and_nodes(st):
        for s in st:
            for a in s.agent_list():
                if a is not None:
                    if a.name.find('AND') != -1:
                        a.name = a.name.replace('AND', 'A_ND')
    rename_and_nodes(stmts)
    # Rewrite statements to replace genes with their corresponding
    # antibodies when possible
    stmts = rewrite_ab_stmts(stmts, data)
    def filter_ab_edges(st, policy='all'):
        st_out = []
        for s in st:
            if policy == 'all':
                all_ab = True
                for a in s.agent_list():
                    if a is not None:
                        if a.name.find('_p') == -1 and \
                           a.name.find('Drugs') == -1:
                            all_ab = False
                            break
                if all_ab:
                    st_out.append(s)
            elif policy == 'one':
                any_ab = False
                for a in s.agent_list():
                    if a is not None and a.name.find('_p') != -1:
                        any_ab = True
                        break
                if any_ab:
                    st_out.append(s)
        return st_out
    stmts = filter_ab_edges(stmts, 'all')
    # Get a list of the AB names that end up being covered in the prior network
    # This is important because other ABs will need to be taken out of the
    # MIDAS file to work.
    def get_ab_names(st):
        prior_abs = set()
        for s in st:
            for a in s.agent_list():
                if a is not None:
                    if a.name.find('_p') != -1:
                        prior_abs.add(a.name)
        return sorted(list(prior_abs))
    pkn_abs = get_ab_names(stmts)
    print('Boolean PKN contains these antibodies: %s' % ', '.join(pkn_abs))
    # Make the SIF model
    sa = SifAssembler(stmts)
    sa.make_model(use_name_as_key=True)
    sif_str = sa.print_model()
    with open(out_file, 'wb') as fh:
        fh.write(sif_str.encode('utf-8'))
    # Make the MIDAS data file used for training the model
    midas_data = process_data.get_midas_data(data, pkn_abs)
    return sif_str
def test_filter_by_type():
    st_out = ac.filter_by_type([st1, st14], Phosphorylation)
    assert (len(st_out) == 1)
Exemple #11
0
groundings = set()
for agent in drug_agents:
    db_ns, db_id = agent.get_grounding()
    if db_ns is None:
        print('No grounding for %s (%s)' % (agent, str(agent.db_refs)))
        db_ns, db_id = ('TEXT', agent.db_refs['TEXT'])
    groundings.add((db_ns, db_id))

all_stmts = []
for db_ns, db_id in groundings:
    print('Searching for %s@%s' % (db_id, db_ns))
    idp = indra_db_rest.get_statements(subject='%s@%s' % (db_id, db_ns),
                                       ev_limit=100)
    stmts = idp.statements
    stmts = ac.filter_by_type(stmts, Inhibition) + \
        ac.filter_by_type(stmts, Complex)
    new_stmts = []
    for stmt in stmts:
        new_ev = []
        for ev in stmt.evidence:
            if ev.source_api != 'medscan':
                new_ev.append(ev)
        if not new_ev:
            continue
        stmt.evidence = new_ev
        new_stmts.append(stmt)
    all_stmts += new_stmts

with open('../stmts/drug_stmts.pkl', 'wb') as fh:
    pickle.dump(all_stmts, fh)
Exemple #12
0
    def make_model_by_preassembly(self,
                                  exclude_stmts=None,
                                  complex_members=3,
                                  graph_type='multi_graph',
                                  sign_dict=None,
                                  belief_scorer=None,
                                  weight_flattening=None,
                                  extra_columns=None):
        """Assemble an IndraNet graph object by preassembling the statements
        according to selected graph type.

        Parameters
        ----------
        exclude_stmts : list[str]
            A list of statement type names to not include in the graph.
        complex_members : int
            Maximum allowed size of a complex to be included in the graph.
            All complexes larger than complex_members will be rejected. For
            accepted complexes, all permutations of their members will be added
            as edges. Default is `3`.
        graph_type : str
            Specify the type of graph to assemble. Chose from 'multi_graph'
            (default), 'digraph', 'signed'. Default is `multi_graph`.
        sign_dict : dict
            A dictionary mapping a Statement type to a sign to be used for
            the edge. This parameter is only used with the 'signed' option.
            See IndraNet.to_signed_graph for more info.
        belief_scorer : Optional[indra.belief.BeliefScorer]
            Instance of BeliefScorer class to use in calculating edge
            probabilities. If None is provided (default), then the default
            scorer is used.
        weight_flattening : function(networkx.DiGraph)
            A function taking at least the graph G as an argument and
            returning G after adding edge weights as an edge attribute to the
            flattened edges using the reserved keyword 'weight'.

            Example:

            >>> def weight_flattening(G):
            ...     # Sets the flattened weight to the average of the
            ...     # inverse source count
            ...     for edge in G.edges:
            ...         w = [1/s['evidence_count']
            ...             for s in G.edges[edge]['statements']]
            ...         G.edges[edge]['weight'] = sum(w)/len(w)
            ...     return G

        Returns
        -------
        model : IndraNet
            IndraNet graph object.
        """
        # Filter out statements with one agent or with None subject
        stmts = [
            stmt for stmt in self.statements if len(stmt.real_agent_list()) > 1
        ]
        if exclude_stmts:
            exclude_types = tuple(
                get_statement_by_name(st_type) for st_type in exclude_stmts)
            stmts = [
                stmt for stmt in stmts if not isinstance(stmt, exclude_types)
            ]
        # Store edge data in statement annotations
        stmts = _store_edge_data(stmts, extra_columns)
        if graph_type == 'signed':
            if not sign_dict:
                sign_dict = default_sign_dict
            graph_stmts = []
            # Only keep statements with explicit signs
            for stmt_type in sign_dict:
                graph_stmts += ac.filter_by_type(stmts, stmt_type)
            graph_stmts += ac.filter_by_type(stmts, Influence)
            # Conversion statements can also be turned into two types of signed
            conv_stmts = ac.filter_by_type(stmts, Conversion)
            for stmt in conv_stmts:
                if stmt.subj:
                    for obj in stmt.obj_from:
                        graph_stmts.append(
                            DecreaseAmount(stmt.subj, obj, stmt.evidence))
                    for obj in stmt.obj_to:
                        graph_stmts.append(
                            IncreaseAmount(stmt.subj, obj, stmt.evidence))
            # Merge statements by agent name and polarity
            graph_stmts = ac.run_preassembly(graph_stmts,
                                             return_toplevel=False,
                                             belief_scorer=belief_scorer,
                                             matches_fun=partial(
                                                 agent_name_polarity_matches,
                                                 sign_dict=sign_dict),
                                             run_refinement=False)
            G = nx.MultiDiGraph()
        elif graph_type in ['digraph', 'multi_graph']:
            # Keep Complex and Conversion aside
            complex_stmts = ac.filter_by_type(stmts, Complex)
            conv_stmts = ac.filter_by_type(stmts, Conversion)
            graph_stmts = [
                stmt for stmt in stmts
                if stmt not in complex_stmts and stmt not in conv_stmts
            ]
            for stmt in complex_stmts:
                agents = stmt.real_agent_list()
                if len(agents) > complex_members:
                    continue
                for a, b in permutations(agents, 2):
                    graph_stmts.append(IncreaseAmount(a, b, stmt.evidence))
            for stmt in conv_stmts:
                if stmt.subj:
                    for obj in stmt.obj_from:
                        graph_stmts.append(
                            DecreaseAmount(stmt.subj, obj, stmt.evidence))
                    for obj in stmt.obj_to:
                        graph_stmts.append(
                            IncreaseAmount(stmt.subj, obj, stmt.evidence))
            if graph_type == 'digraph':
                # Merge statements by agent names
                graph_stmts = ac.run_preassembly(
                    graph_stmts,
                    return_toplevel=False,
                    belief_scorer=belief_scorer,
                    matches_fun=agent_name_stmt_matches,
                    run_refinement=False)
                G = nx.DiGraph()
            else:
                G = nx.MultiGraph()
        for stmt in graph_stmts:
            agents = stmt.agent_list()
            for ag in agents:
                ag_ns, ag_id = get_ag_ns_id(ag)
                G.add_node(ag.name, ns=ag_ns, id=ag_id)
            # We merged some different statements together based on their
            # agent names and polarity, we can retrieve the original
            # statements data back from annotations
            unique_stmts = {}
            for evid in stmt.evidence:
                edge_data = evid.annotations['indranet_edge']
                if edge_data['stmt_hash'] not in unique_stmts:
                    unique_stmts[edge_data['stmt_hash']] = edge_data
            statement_data = list(unique_stmts.values())
            if graph_type == 'signed':
                if isinstance(stmt, Influence):
                    stmt_pol = stmt.overall_polarity()
                    if stmt_pol == 1:
                        sign = 0
                    elif stmt_pol == -1:
                        sign = 1
                    else:
                        continue
                else:
                    sign = sign_dict[type(stmt).__name__]
                G.add_edge(agents[0].name,
                           agents[1].name,
                           sign,
                           statements=statement_data,
                           belief=stmt.belief,
                           sign=sign)
            elif graph_type == 'digraph':
                G.add_edge(agents[0].name,
                           agents[1].name,
                           statements=statement_data,
                           belief=stmt.belief)
            else:
                if statement_data:
                    edge_data = statement_data[0]
                else:
                    edge_data = _get_edge_data(stmt, extra_columns)
                G.add_edge(agents[0].name, agents[1].name, **edge_data)
        if weight_flattening:
            G = weight_flattening(G)
        return G
def filter_neg(stmts):
    inhib_stmts = ac.filter_by_type(stmts, Inhibition)
    decamt_stmts = ac.filter_by_type(stmts, DecreaseAmount)
    return inhib_stmts + decamt_stmts
Exemple #14
0
def assemble_pysb(stmts, data_genes, contextualize=False):
    # Filter the INDRA Statements to be put into the model
    stmts = ac.filter_by_type(stmts, Complex, invert=True)
    stmts = ac.filter_direct(stmts)
    stmts = ac.filter_belief(stmts, 0.95)
    stmts = ac.filter_top_level(stmts)
    # Strip the extraneous supports/supported by here
    strip_supports(stmts)
    stmts = ac.filter_gene_list(stmts, data_genes, 'all')
    stmts = ac.filter_enzyme_kinase(stmts)
    stmts = ac.filter_mod_nokinase(stmts)
    stmts = ac.filter_transcription_factor(stmts)
    # Simplify activity types
    ml = MechLinker(stmts)
    ml.gather_explicit_activities()
    ml.reduce_activities()
    ml.gather_modifications()
    ml.reduce_modifications()
    stmts = normalize_active_forms(ml.statements)
    # Replace activations when possible
    ml = MechLinker(stmts)
    ml.gather_explicit_activities()
    ml.replace_activations()
    # Require active forms
    ml.require_active_forms()
    num_stmts = len(ml.statements)
    while True:
        # Remove inconsequential PTMs
        ml.statements = ac.filter_inconsequential_mods(ml.statements,
                                                       get_mod_whitelist())
        ml.statements = ac.filter_inconsequential_acts(ml.statements,
                                                       get_mod_whitelist())
        if num_stmts <= len(ml.statements):
            break
        num_stmts = len(ml.statements)
    stmts = ml.statements
    # Save the Statements here
    ac.dump_statements(stmts, prefixed_pkl('pysb_stmts'))


    # Add drug target Statements
    drug_target_stmts = get_drug_target_statements()
    stmts += drug_target_stmts

    # Just generate the generic model
    pa = PysbAssembler()
    pa.add_statements(stmts)
    model = pa.make_model()
    with open(prefixed_pkl('pysb_model'), 'wb') as f:
        pickle.dump(model, f)

    # Run this extra part only if contextualize is set to True
    if not contextualize:
        return

    cell_lines_no_data = ['COLO858', 'K2', 'MMACSF', 'MZ7MEL', 'WM1552C']
    for cell_line in cell_lines:
        if cell_line not in cell_lines_no_data:
            stmtsc = contextualize_stmts(stmts, cell_line, data_genes)
        else:
            stmtsc = stmts
        pa = PysbAssembler()
        pa.add_statements(stmtsc)
        model = pa.make_model()
        if cell_line not in cell_lines_no_data:
            contextualize_model(model, cell_line, data_genes)
        ac.dump_statements(stmtsc, prefixed_pkl('pysb_stmts_%s' % cell_line))
        with open(prefixed_pkl('pysb_model_%s' % cell_line), 'wb') as f:
            pickle.dump(model, f)
    for stmt in stmts:
        st = get_text(stmt.subj)
        ot = get_text(stmt.obj)
        if text_too_long(st, k) or text_too_long(ot, k):
            continue
        new_stmts.append(stmt)
    logger.info(f'{len(new_stmts)} statements after filter.')
    return new_stmts


if __name__ == '__main__':
    wm_ont = load_world_ontology(wm_ont_url)

    # Load all raw statements
    eidos_stmts = load_eidos()
    eidos_stmts = ac.filter_by_type(eidos_stmts, Influence)
    hume_stmts = load_hume()
    hume_stmts = ac.filter_by_type(hume_stmts, Influence)
    hume_stmts = remove_hume_redundant(hume_stmts, None)
    #sofia_stmts = load_sofia()
    #cwms_stmts = load_cwms()

    # Reground where needed
    # sofia_stmts = reground_stmts(sofia_stmts, wm_ont, 'WM')
    # cwms_stmts = reground_stmts(cwms_stmts, wm_ont, 'WM')

    # Put statements together and filter to influence
    #stmts = eidos_stmts + hume_stmts + sofia_stmts + cwms_stmts
    stmts = eidos_stmts + hume_stmts
    # Remove name spaces that aren't needed in CauseMos
    remove_namespaces(stmts, ['WHO', 'MITRE12', 'UN'])
def test_filter_by_type():
    st_out = ac.filter_by_type([st1, st14], Phosphorylation)
    assert len(st_out) == 1
Exemple #17
0
def get_signor_stmts():
    """Return a list of activity and a list of amount regulation stmts."""
    sp = signor.process_from_web()
    return ac.filter_by_type(sp.statements, RegulateActivity), \
        ac.filter_by_type(sp.statements, RegulateAmount)
Exemple #18
0
    agent = Agent(concept.name, db_refs={gr[0]: gr[1], 'TEXT': concept.name})
    standardize_agent_name(agent, standardize_refs=True)
    return agent


def get_regulate_activity(stmt):
    subj = get_agent(stmt.subj.concept)
    obj = get_agent(stmt.obj.concept)
    if not subj or not obj:
        return None
    pol = stmt.overall_polarity()
    stmt_type = Activation if pol == 1 or not pol else Inhibition
    bio_stmt = stmt_type(subj, obj, evidence=stmt.evidence)
    return bio_stmt


if __name__ == '__main__':
    root = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir,
                        os.pardir)
    with open(os.path.join(root, 'stmts', 'eidos_statements.pkl'), 'rb') as fh:
        stmts = pickle.load(fh)
    stmts = ac.filter_by_type(stmts, Influence)
    bio_stmts = []
    for stmt in tqdm.tqdm(stmts):
        bio_stmt = get_regulate_activity(stmt)
        if bio_stmt:
            bio_stmts.append(bio_stmt)
    with open(os.path.join(root, 'stmts', 'eidos_bio_statements.pkl'),
              'wb') as fh:
        pickle.dump(bio_stmts, fh)
Exemple #19
0
if __name__ == "__main__":
    stmts = "../work/phospho_stmts.pkl"
    prize_outpath = "../work/pybel_prize.tsv"
    interactome_path = "../work/big_pybel_interactome2.tsv"
    site_file = "../work/gsea_sites.rnk"
    # Load the statements linking kinases/regulators to phospho sites
    # in the data
    stmts = ac.load_statements(stmts)

    # Employ filters to reduce network size
    stmts = ac.filter_grounded_only(stmts)
    stmts = ac.filter_human_only(stmts)
    stmts = ac.filter_genes_only(stmts)
    # In this data, statements of these two types will not act on
    # a short enough timescale to play a meaningful role
    stmts = ac.filter_by_type(stmts, DecreaseAmount, invert=True)
    stmts = ac.filter_by_type(stmts, IncreaseAmount, invert=True)
    stmts = ac.filter_by_type(stmts, Complex, invert=True)
    stmts = ac.filter_enzyme_kinase(stmts)

    # Assemble a pybel graph from statements
    pba = PybelAssembler(stmts)
    pb_graph = make_model(pba)

    signed_graph = to_signed_nodes(pb_graph)
    gn_dict = get_gene_node_dict(signed_graph)
    # Next we have to load the data file and assign values to

    site_data = read_site_file(site_file)

    dump_steiner_files(signed_graph, site_data, prize_outpath,