Python filter_human_onlyの例、indra.tools.assemble_corpus.filter_human_only Pythonの例

コード例 #1

0

ファイルを表示

ファイル: indra_gene_dump.py プロジェクト: kkaris/Network_Evaluation_Tools

def main(args):
    # This file takes about 32 GB to load
    if not args.infile:
        args.infile = './Data/indra_raw/bioexp_all_raw.pkl'
    if not args.outfile:
        args.outfile = './filtered_indra_network.sif'

    # Load statements from file
    stmts_raw = assemble_corpus.load_statements(args.infile)

    # Expand families, fix grounding errors and run run preassembly
    stmts_fixed = assemble_corpus.run_preassembly(
                    assemble_corpus.map_grounding(
                        assemble_corpus.expand_families(stmts_raw)))

    # Default filtering: specific (unique) genes that are grounded.
    stmts_filtered = assemble_corpus.filter_grounded_only(
                         assemble_corpus.filter_genes_only(stmts_fixed, specific_only=True))
    # Custom filters
    if args.human_only:
        stmts_filtered = assemble_corpus.filter_human_only(stmts_filtered)
    if args.filter_direct:
        stmts_filtered = assemble_corpus.filter_direct(stmts_filtered)

    binary_stmts = [s for s in stmts_filtered if len(s.agent_list()) == 2 and s.agent_list()[0] is not None]
    rows = []
    for s in binary_stmts:
        rows.append([ag.name for ag in s.agent_list()])

    # Write rows to .sif file
    with open(args.outfile, 'w', newline='') as csvfile:
        wrtr = csv.writer(csvfile, delimiter='\t')
        for row in rows:
            wrtr.writerow(row)

コード例 #2

0

ファイルを表示

    def run_assembly(self):
        """Run INDRA's assembly pipeline on the Statements."""
        self.eliminate_copies()
        stmts = self.get_indra_stmts()
        stmts = self.filter_event_association(stmts)
        stmts = ac.filter_no_hypothesis(stmts)
        if not self.assembly_config.get('skip_map_grounding'):
            stmts = ac.map_grounding(stmts)
        if self.assembly_config.get('standardize_names'):
            ac.standardize_names_groundings(stmts)
        if self.assembly_config.get('filter_ungrounded'):
            score_threshold = self.assembly_config.get('score_threshold')
            stmts = ac.filter_grounded_only(stmts,
                                            score_threshold=score_threshold)
        if self.assembly_config.get('merge_groundings'):
            stmts = ac.merge_groundings(stmts)
        if self.assembly_config.get('merge_deltas'):
            stmts = ac.merge_deltas(stmts)
        relevance_policy = self.assembly_config.get('filter_relevance')
        if relevance_policy:
            stmts = self.filter_relevance(stmts, relevance_policy)
        if not self.assembly_config.get('skip_filter_human'):
            stmts = ac.filter_human_only(stmts)
        if not self.assembly_config.get('skip_map_sequence'):
            stmts = ac.map_sequence(stmts)
        # Use WM hierarchies and belief scorer for WM preassembly
        preassembly_mode = self.assembly_config.get('preassembly_mode')
        if preassembly_mode == 'wm':
            hierarchies = get_wm_hierarchies()
            belief_scorer = get_eidos_scorer()
            stmts = ac.run_preassembly(stmts,
                                       return_toplevel=False,
                                       belief_scorer=belief_scorer,
                                       hierarchies=hierarchies)
        else:
            stmts = ac.run_preassembly(stmts, return_toplevel=False)
        belief_cutoff = self.assembly_config.get('belief_cutoff')
        if belief_cutoff is not None:
            stmts = ac.filter_belief(stmts, belief_cutoff)
        stmts = ac.filter_top_level(stmts)

        if self.assembly_config.get('filter_direct'):
            stmts = ac.filter_direct(stmts)
            stmts = ac.filter_enzyme_kinase(stmts)
            stmts = ac.filter_mod_nokinase(stmts)
            stmts = ac.filter_transcription_factor(stmts)

        if self.assembly_config.get('mechanism_linking'):
            ml = MechLinker(stmts)
            ml.gather_explicit_activities()
            ml.reduce_activities()
            ml.gather_modifications()
            ml.reduce_modifications()
            ml.gather_explicit_activities()
            ml.replace_activations()
            ml.require_active_forms()
            stmts = ml.statements

        self.assembled_stmts = stmts

コード例 #3

0

ファイルを表示

ファイル: phospho_prior.py プロジェクト: kkaris/sitemapper

def get_omnipath_stmts():
    stmts = omnipath_client.get_all_modifications()
    phos_stmts = ac.filter_by_type(stmts, Phosphorylation)
    dephos_stmts = ac.filter_by_type(stmts, Dephosphorylation)
    stmts = phos_stmts + dephos_stmts
    stmts = ac.map_sequence(stmts)
    stmts = ac.filter_human_only(stmts)
    #stmts = ac.filter_genes_only(stmts, specific_only=True)
    return stmts

コード例 #4

0

ファイルを表示

def run_assembly(stmts, filename):
    stmts = ac.map_grounding(stmts)
    stmts = ac.filter_grounded_only(stmts)
    stmts = ac.filter_human_only(stmts)
    #stmts = ac.expand_families(stmts)
    stmts = ac.filter_gene_list(stmts, gene_names, 'one', allow_families=True)
    stmts = ac.map_sequence(stmts)
    stmts = ac.run_preassembly(stmts, return_toplevel=False, poolsize=4)
    ac.dump_statements(stmts, filename)
    return stmts

コード例 #5

0

ファイルを表示

def test_readme_pipeline():
    stmts = gn_stmts  # Added only here, not in docs
    from indra.tools import assemble_corpus as ac
    stmts = ac.filter_no_hypothesis(stmts)
    stmts = ac.map_grounding(stmts)
    stmts = ac.filter_grounded_only(stmts)
    stmts = ac.filter_human_only(stmts)
    stmts = ac.map_sequence(stmts)
    stmts = ac.run_preassembly(stmts, return_toplevel=False)
    stmts = ac.filter_belief(stmts, 0.8)
    assert stmts, 'Update example to yield statements list of non-zero length'

コード例 #6

0

ファイルを表示

ファイル: test_assemble_corpus.py プロジェクト: adarshp/indra

def test_filter_human_only():
    st_out = ac.filter_human_only([st1, st5])
    assert len(st_out) == 2
    st_out = ac.filter_human_only([st8, st9])
    assert len(st_out) == 0

    # Can we filter out statements with bound conditions grounded to non-human
    # genes?
    st_out = ac.filter_human_only([st20], remove_bound=False)
    assert len(st_out) == 0

    # When we do such filtering, do we keep statements bounded to human genes?
    st_out = ac.filter_human_only([st21], remove_bound=False)
    assert len(st_out) == 1

    # Can we remove bound conditions grounded to non-human genes?
    st_out = ac.filter_human_only([st20], remove_bound=True)
    assert len(st_out) == 1
    assert len(st_out[0].sub.bound_conditions) == 0

    # When we do so, do we keep bound conditions not grounded to non-human
    # genes?
    st_out = ac.filter_human_only([st21], remove_bound=True)
    assert len(st_out) == 1
    assert len(st_out[0].sub.bound_conditions) == 1

コード例 #7

0

ファイルを表示

ファイル: incremental_model.py プロジェクト: johnbachman/indra

    def preassemble(self, filters=None, grounding_map=None):
        """Preassemble the Statements collected in the model.

        Use INDRA's GroundingMapper, Preassembler and BeliefEngine
        on the IncrementalModel and save the unique statements and
        the top level statements in class attributes.

        Currently the following filter options are implemented:
        - grounding: require that all Agents in statements are grounded
        - human_only: require that all proteins are human proteins
        - prior_one: require that at least one Agent is in the prior model
        - prior_all: require that all Agents are in the prior model

        Parameters
        ----------
        filters : Optional[list[str]]
            A list of filter options to apply when choosing the statements.
            See description above for more details. Default: None
        grounding_map : Optional[dict]
            A user supplied grounding map which maps a string to a
            dictionary of database IDs (in the format used by Agents'
            db_refs).
        """
        stmts = self.get_statements()

        # Filter out hypotheses
        stmts = ac.filter_no_hypothesis(stmts)

        # Fix grounding
        if grounding_map is not None:
            stmts = ac.map_grounding(stmts, grounding_map=grounding_map)
        else:
            stmts = ac.map_grounding(stmts)

        if filters and ('grounding' in filters):
            stmts = ac.filter_grounded_only(stmts)

        # Fix sites
        stmts = ac.map_sequence(stmts)

        if filters and 'human_only' in filters:
            stmts = ac.filter_human_only(stmts)

        # Run preassembly
        stmts = ac.run_preassembly(stmts, return_toplevel=False)

        # Run relevance filter
        stmts = self._relevance_filter(stmts, filters)

        # Save Statements
        self.assembled_stmts = stmts

コード例 #8

0

ファイルを表示

    def preassemble(self, filters=None, grounding_map=None):
        """Preassemble the Statements collected in the model.

        Use INDRA's GroundingMapper, Preassembler and BeliefEngine
        on the IncrementalModel and save the unique statements and
        the top level statements in class attributes.

        Currently the following filter options are implemented:
        - grounding: require that all Agents in statements are grounded
        - human_only: require that all proteins are human proteins
        - prior_one: require that at least one Agent is in the prior model
        - prior_all: require that all Agents are in the prior model

        Parameters
        ----------
        filters : Optional[list[str]]
            A list of filter options to apply when choosing the statements.
            See description above for more details. Default: None
        grounding_map : Optional[dict]
            A user supplied grounding map which maps a string to a
            dictionary of database IDs (in the format used by Agents'
            db_refs).
        """
        stmts = self.get_statements()

        # Filter out hypotheses
        stmts = ac.filter_no_hypothesis(stmts)

        # Fix grounding
        if grounding_map is not None:
            stmts = ac.map_grounding(stmts, grounding_map=grounding_map)
        else:
            stmts = ac.map_grounding(stmts)

        if filters and ('grounding' in filters):
            stmts = ac.filter_grounded_only(stmts)

        # Fix sites
        stmts = ac.map_sequence(stmts)

        if filters and 'human_only' in filters:
            stmts = ac.filter_human_only(stmts)

        # Run preassembly
        stmts = ac.run_preassembly(stmts, return_toplevel=False)

        # Run relevance filter
        stmts = self._relevance_filter(stmts, filters)

        # Save Statements
        self.assembled_stmts = stmts

コード例 #9

0

ファイルを表示

ファイル: assemble_model.py プロジェクト: steppi/indra_apps

def run_assembly(stmts, save_file):
    stmts = ac.map_grounding(stmts)
    stmts = ac.filter_grounded_only(stmts)
    stmts = ac.filter_human_only(stmts)
    stmts = ac.expand_families(stmts)
    stmts = ac.filter_gene_list(stmts, gene_names, 'one')
    stmts = ac.map_sequence(stmts)
    stmts = ac.run_preassembly(stmts, return_toplevel=False)
    stmts = ac.filter_belief(stmts, 0.95)
    stmts = ac.filter_top_level(stmts)
    stmts = ac.filter_direct(stmts)
    stmts = ac.filter_enzyme_kinase(stmts)
    ac.dump_statements(stmts, save_file)
    return stmts

コード例 #10

0

ファイルを表示

ファイル: model.py プロジェクト: cmluria/emmaa

    def run_assembly(self):
        """Run INDRA's assembly pipeline on the Statements.

        Returns
        -------
        stmts : list[indra.statements.Statement]
            The list of assembled INDRA Statements.
        """
        stmts = self.get_indra_smts()
        stmts = ac.filter_no_hypothesis(stmts)
        stmts = ac.map_grounding(stmts)
        stmts = ac.map_sequence(stmts)
        stmts = ac.filter_human_only(stmts)
        stmts = ac.run_preassembly(stmts, return_toplevel=False)
        return stmts

コード例 #11

0

ファイルを表示

ファイル: phospho_prior.py プロジェクト: kkaris/sitemapper

def get_indra_phos_stmts():
    stmts = by_gene_role_type(stmt_type='Phosphorylation')
    stmts += by_gene_role_type(stmt_type='Dephosphorylation')
    stmts = ac.map_grounding(stmts)
    # Expand families before site mapping
    stmts = ac.expand_families(stmts)
    stmts = ac.filter_grounded_only(stmts)
    stmts = ac.map_sequence(stmts)
    ac.dump_statements(stmts, 'sources/indra_phos_sitemap.pkl')
    stmts = ac.run_preassembly(stmts,
                               poolsize=4,
                               save='sources/indra_phos_stmts_pre.pkl')
    stmts = ac.filter_human_only(stmts)
    stmts = ac.filter_genes_only(stmts, specific_only=True)
    ac.dump_statements(stmts, 'sources/indra_phos_stmts.pkl')
    return stmts

コード例 #12

0

ファイルを表示

ファイル: expression_prior.py プロジェクト: steppi/indra_apps

def get_indra_expression():
    #inc_stmts = by_gene_role_type(stmt_type='IncreaseAmount')
    #dec_stmts = by_gene_role_type(stmt_type='DecreaseAmount')
    #stmts = inc_stmts + dec_stmts
    #ac.dump_statements(stmts, 'indra_regulate_amount_stmts.pkl')
    #stmts = ac.load_statements('indra_regulate_amount_stmts.pkl')
    #stmts = ac.map_grounding(stmts)
    # Expand families before site mapping
    #stmts = ac.expand_families(stmts)
    #stmts = ac.filter_grounded_only(stmts)
    #stmts = ac.map_sequence(stmts)
    #stmts = ac.run_preassembly(stmts, poolsize=4,
    #                           save='indra_regulate_amount_pre.pkl')
    stmts = ac.load_statements('indra_regulate_amount_pre.pkl')
    stmts = ac.filter_human_only(stmts)
    stmts = ac.filter_genes_only(stmts)
    stmts = [s for s in stmts if s.agent_list()[0] is not None]
    return stmts

コード例 #13

0

ファイルを表示

ファイル: get_relevant_stmts.py プロジェクト: sorgerlab/indra_apps

def regulons_from_stmts(stmts, filename):
    regulons = defaultdict(set)
    stmts = ac.filter_genes_only(stmts)
    stmts = ac.filter_human_only(stmts)
    for stmt in stmts:
        kinase = stmt.enz.name
        # Blacklist annoying stmts from NCI-PID
        if (kinase == 'BRAF' or kinase == 'RAF1') and \
           (stmt.sub.name == 'MAPK1' or stmt.sub.name == 'MAPK3'):
            continue
        if stmt.residue and stmt.position:
            site = '%s_%s%s' % (stmt.sub.name, stmt.residue, stmt.position)
            regulons[kinase].add(site)
    rows = []
    for kinase, sites in regulons.items():
        rows.append([kinase, 'Description'] + [s for s in sites])
    with open(filename, 'wt') as f:
        csvwriter = csv.writer(f, delimiter='\t')
        csvwriter.writerows(rows)

コード例 #14

0

ファイルを表示

ファイル: phospho_prior.py プロジェクト: kkaris/sitemapper

def get_indra_reg_act_stmts():
    try:
        stmts = ac.load_statements('sources/indra_reg_act_stmts.pkl')
        return stmts
    except:
        pass
    stmts = []
    for stmt_type in ('Activation', 'Inhibition', 'ActiveForm'):
        print("Getting %s statements from INDRA DB" % stmt_type)
        stmts += by_gene_role_type(stmt_type=stmt_type)
    stmts = ac.map_grounding(stmts, save='sources/indra_reg_act_gmap.pkl')
    stmts = ac.filter_grounded_only(stmts)
    stmts = ac.run_preassembly(stmts,
                               poolsize=4,
                               save='sources/indra_reg_act_pre.pkl')
    stmts = ac.filter_human_only(stmts)
    stmts = ac.filter_genes_only(stmts, specific_only=True)
    ac.dump_statements(stmts, 'sources/indra_reg_act_stmts.pkl')
    return stmts

コード例 #15

0

ファイルを表示

ファイル: phospho_prior.py プロジェクト: kkaris/sitemapper

def load_statements_from_synapse(synapse_id='syn11273504'):
    syn = synapseclient.Synapse()
    syn.login()
    # Obtain a pointer and download the data
    syn_data = syn.get(synapse_id)
    stmts = []
    for row in read_unicode_csv(syn_data.path, delimiter='\t'):
        sub_name, site_info = row[0].split(':')
        res = site_info[0]
        pos = site_info[1:]
        gene_list = row[1].split(',')
        for enz_name in gene_list:
            enz = Agent(enz_name, db_refs=get_ids(enz_name))
            sub = Agent(sub_name, db_refs=get_ids(sub_name))
            stmt = Phosphorylation(enz, sub, res, pos)
            stmts.append(stmt)
    stmts = ac.map_sequence(stmts)
    stmts = ac.filter_human_only(stmts)
    stmts = ac.filter_genes_only(stmts, specific_only=True)
    return stmts

コード例 #16

0

ファイルを表示

def assemble_statements(kinase, stmts, curs):
    """Run assembly steps on statements."""
    # Remove unary statements and ones with many agents
    stmts = [stmt for stmt in stmts if (1 < len(stmt.real_agent_list()) < 4)]
    stmts = replace_ctd(stmts, ctd_stmts_by_gene.get(kinase, []))
    # We do this at this point to make sure we capture the original DB
    # hashes before modifying statements to allow lookup
    for stmt in stmts:
        for ev in stmt.evidence:
            ev.annotations['prior_hash'] = stmt.get_hash()
    stmts = fix_invalidities(stmts)
    stmts = ac.filter_grounded_only(stmts)
    stmts = ac.filter_human_only(stmts)
    stmts = ac.filter_by_curation(stmts, curations=curs)
    stmts = unify_lspci(stmts)
    stmts = remove_contradictions(stmts)
    # Rename chemicals
    logger.info('Renaming chemicals')
    for stmt in stmts:
        for agent in stmt.real_agent_list():
            if agent.db_refs.get('CHEBI') and len(agent.name) > 25:
                rename_chemical(agent)
    # Remove long names
    logger.info('Removing statements with long names')
    stmts = [
        stmt for stmt in stmts if all(
            len(a.name) < 20 for a in stmt.real_agent_list())
    ]
    logger.info('%d statements remaining' % len(stmts))
    # Remove microRNAs
    logger.info('Removing microRNA statements')
    stmts = [
        stmt for stmt in stmts
        if not any('miR' in a.name for a in stmt.real_agent_list())
    ]
    logger.info('%d statements remaining' % len(stmts))
    stmts = add_source_urls(stmts)
    with open('data/assembled/%s.pkl' % kinase, 'wb') as fh:
        pickle.dump(stmts, fh)
    return stmts

コード例 #17

0

ファイルを表示

ファイル: family_analysis.py プロジェクト: IDG-Kinase/indra_analysis

def get_fplx_stmts(fplx_id):
    ip = indra_db_rest.get_statements(agents=['%s@FPLX' % fplx_id],
                                      ev_limit=10000)
    stmts = filter_out_medscan(ip.statements)
    stmts = ac.filter_human_only(stmts)
    return stmts

コード例 #18

0

ファイルを表示

ファイル: get_indra_tf_stmts.py プロジェクト: samuelbunga/panacea_indra

    return pd.DataFrame(tf_df)


wd = __file__

INDRA_SIF = os.path.join(os.pardir, 'input', 'sif.pkl')
with open(INDRA_SIF, 'rb') as fh:
    SIF = pickle.load(fh)

n_stmt_type = list(SIF.columns).index('stmt_type')
n_stmt_hash = list(SIF.columns).index('stmt_hash')
hash_set = set()
for r, c in SIF.iterrows():
    if c[n_stmt_type] == 'IncreaseAmount' or c[n_stmt_type] == 'DecreaseAmount':
        hash_set.add(c[n_stmt_hash])

#stmts = download_statements(hash_set)
indra_stmts = list(stmts.values())
with open('../output/all_stmts.pkl', 'wb') as fh:
    pickle.dump(indra_stmts, fh)

indra_stmts = filter_human_only(indra_stmts)
indra_stmts = filter_genes_only(indra_stmts)
indra_stmts = filter_transcription_factor(indra_stmts)
indra_stmts_db_only = filter_db_only(indra_stmts)

indra_stmts_df = make_dataframe(indra_stmts)
indra_stmts_df.to_csv('../output/indra_all_tf.csv')

indra_stmts_db_only_df = make_dataframe(indra_stmts_db_only)
indra_stmts_db_only_df.to_csv('../output/indra_db_only_tf.csv')

コード例 #19

0

ファイルを表示

ファイル: test_assemble_corpus.py プロジェクト: jmuhlich/indra

def test_filter_human_only():
    st_out = ac.filter_human_only([st1, st5])
    assert len(st_out) == 2
    st_out = ac.filter_human_only([st8, st9])
    assert len(st_out) == 0

コード例 #20

0

ファイルを表示

ファイル: phospho_prior.py プロジェクト: kkaris/sitemapper

def get_phosphosite_stmts():
    stmts = ac.load_statements('sources/phosphosite_stmts.pkl')
    stmts = ac.filter_human_only(stmts)
    stmts = ac.filter_genes_only(stmts, specific_only=True)
    return stmts

コード例 #21

0

ファイルを表示

ファイル: assemble_models.py プロジェクト: jmuhlich/indra

    if not reassemble:
        stmts = ac.load_statements(pjoin(outf, 'preassembled.pkl'))
        #stmts = ac.load_statements(pjoin(outf, 'prior.pkl'))
    else:
        #prior_stmts = build_prior(data_genes, pjoin(outf, 'prior.pkl'))
        prior_stmts = ac.load_statements(pjoin(outf, 'prior.pkl'))
        prior_stmts = ac.map_grounding(prior_stmts,
                                       save=pjoin(outf, 'gmapped_prior.pkl'))
        reading_stmts = ac.load_statements(pjoin(outf, 'phase3_stmts.pkl'))
        reading_stmts = ac.map_grounding(reading_stmts,
                                    save=pjoin(outf, 'gmapped_reading.pkl'))
        stmts = prior_stmts + reading_stmts

        stmts = ac.filter_grounded_only(stmts)
        stmts = ac.filter_genes_only(stmts, specific_only=False)
        stmts = ac.filter_human_only(stmts)
        stmts = ac.expand_families(stmts)
        stmts = ac.filter_gene_list(stmts, data_genes, 'one')
        stmts = ac.map_sequence(stmts, save=pjoin(outf, 'smapped.pkl'))
        stmts = ac.run_preassembly(stmts, return_toplevel=False,
                                   save=pjoin(outf, 'preassembled.pkl'))

    assemble_models = []
    assemble_models.append('sif')
    assemble_models.append('pysb')
    assemble_models.append('cx')

    ### PySB assembly
    if 'pysb' in assemble_models:
        pysb_model = assemble_pysb(stmts, data_genes,
                                   pjoin(outf, 'korkut_model_pysb.py'))

コード例 #22

0

ファイルを表示

        prior_stmts = ac.load_statements(pjoin(outf, 'prior.pkl'))
        prior_stmts = ac.map_grounding(prior_stmts,
                                       save=pjoin(outf, 'gmapped_prior.pkl'))
        reach_stmts = ac.load_statements(pjoin(outf, 'phase3_stmts.pkl'))
        reach_stmts = ac.filter_no_hypothesis(reach_stmts)
        #extra_stmts = ac.load_statements(pjoin(outf, 'extra_stmts.pkl'))
        extra_stmts = read_extra_sources(pjoin(outf, 'extra_stmts.pkl'))
        reading_stmts = reach_stmts + extra_stmts
        reading_stmts = ac.map_grounding(reading_stmts,
                                         save=pjoin(outf,
                                                    'gmapped_reading.pkl'))
        stmts = prior_stmts + reading_stmts + extra_stmts

        stmts = ac.filter_grounded_only(stmts)
        stmts = ac.filter_genes_only(stmts, specific_only=False)
        stmts = ac.filter_human_only(stmts)
        stmts = ac.expand_families(stmts)
        stmts = ac.filter_gene_list(stmts, data_genes, 'one')
        stmts = ac.map_sequence(stmts, save=pjoin(outf, 'smapped.pkl'))
        #stmts = ac.load_statements(pjoin(outf, 'smapped.pkl'))
        stmts = ac.run_preassembly(stmts,
                                   return_toplevel=False,
                                   save=pjoin(outf, 'preassembled.pkl'),
                                   poolsize=4)

    ### PySB assembly
    if 'pysb' in assemble_models:
        pysb_model = assemble_pysb(stmts, data_genes,
                                   pjoin(outf, 'korkut_model_pysb.py'))
    ### SIF assembly
    if 'sif' in assemble_models: