def main(args):
    # This file takes about 32 GB to load
    if not args.infile:
        args.infile = './Data/indra_raw/bioexp_all_raw.pkl'
    if not args.outfile:
        args.outfile = './filtered_indra_network.sif'

    # Load statements from file
    stmts_raw = assemble_corpus.load_statements(args.infile)

    # Expand families, fix grounding errors and run run preassembly
    stmts_fixed = assemble_corpus.run_preassembly(
                    assemble_corpus.map_grounding(
                        assemble_corpus.expand_families(stmts_raw)))

    # Default filtering: specific (unique) genes that are grounded.
    stmts_filtered = assemble_corpus.filter_grounded_only(
                         assemble_corpus.filter_genes_only(stmts_fixed, specific_only=True))
    # Custom filters
    if args.human_only:
        stmts_filtered = assemble_corpus.filter_human_only(stmts_filtered)
    if args.filter_direct:
        stmts_filtered = assemble_corpus.filter_direct(stmts_filtered)

    binary_stmts = [s for s in stmts_filtered if len(s.agent_list()) == 2 and s.agent_list()[0] is not None]
    rows = []
    for s in binary_stmts:
        rows.append([ag.name for ag in s.agent_list()])

    # Write rows to .sif file
    with open(args.outfile, 'w', newline='') as csvfile:
        wrtr = csv.writer(csvfile, delimiter='\t')
        for row in rows:
            wrtr.writerow(row)
Example #2
0
def filter(stmts, cutoff, filename):
    stmts = ac.filter_belief(stmts, cutoff)
    stmts = ac.filter_top_level(stmts)
    stmts = ac.filter_direct(stmts)
    #stmts = ac.filter_enzyme_kinase(stmts)
    ac.dump_statements(stmts, filename)
    return stmts
Example #3
0
    def run_assembly(self):
        """Run INDRA's assembly pipeline on the Statements."""
        self.eliminate_copies()
        stmts = self.get_indra_stmts()
        stmts = self.filter_event_association(stmts)
        stmts = ac.filter_no_hypothesis(stmts)
        if not self.assembly_config.get('skip_map_grounding'):
            stmts = ac.map_grounding(stmts)
        if self.assembly_config.get('standardize_names'):
            ac.standardize_names_groundings(stmts)
        if self.assembly_config.get('filter_ungrounded'):
            score_threshold = self.assembly_config.get('score_threshold')
            stmts = ac.filter_grounded_only(stmts,
                                            score_threshold=score_threshold)
        if self.assembly_config.get('merge_groundings'):
            stmts = ac.merge_groundings(stmts)
        if self.assembly_config.get('merge_deltas'):
            stmts = ac.merge_deltas(stmts)
        relevance_policy = self.assembly_config.get('filter_relevance')
        if relevance_policy:
            stmts = self.filter_relevance(stmts, relevance_policy)
        if not self.assembly_config.get('skip_filter_human'):
            stmts = ac.filter_human_only(stmts)
        if not self.assembly_config.get('skip_map_sequence'):
            stmts = ac.map_sequence(stmts)
        # Use WM hierarchies and belief scorer for WM preassembly
        preassembly_mode = self.assembly_config.get('preassembly_mode')
        if preassembly_mode == 'wm':
            hierarchies = get_wm_hierarchies()
            belief_scorer = get_eidos_scorer()
            stmts = ac.run_preassembly(stmts,
                                       return_toplevel=False,
                                       belief_scorer=belief_scorer,
                                       hierarchies=hierarchies)
        else:
            stmts = ac.run_preassembly(stmts, return_toplevel=False)
        belief_cutoff = self.assembly_config.get('belief_cutoff')
        if belief_cutoff is not None:
            stmts = ac.filter_belief(stmts, belief_cutoff)
        stmts = ac.filter_top_level(stmts)

        if self.assembly_config.get('filter_direct'):
            stmts = ac.filter_direct(stmts)
            stmts = ac.filter_enzyme_kinase(stmts)
            stmts = ac.filter_mod_nokinase(stmts)
            stmts = ac.filter_transcription_factor(stmts)

        if self.assembly_config.get('mechanism_linking'):
            ml = MechLinker(stmts)
            ml.gather_explicit_activities()
            ml.reduce_activities()
            ml.gather_modifications()
            ml.reduce_modifications()
            ml.gather_explicit_activities()
            ml.replace_activations()
            ml.require_active_forms()
            stmts = ml.statements

        self.assembled_stmts = stmts
Example #4
0
def assemble_cx(stmts, out_file_prefix, network_type):
    """Return a CX assembler."""
    stmts = ac.filter_belief(stmts, 0.95)
    stmts = ac.filter_top_level(stmts)
    if network_type == 'direct':
        stmts = ac.filter_direct(stmts)

    out_file = '%s_%s.cx' % (out_file_prefix, network_type)

    ca = CxAssembler()
    ca.add_statements(stmts)
    model = ca.make_model()
    ca.save_model(out_file)
    return ca
Example #5
0
def run_assembly(stmts, save_file):
    stmts = ac.map_grounding(stmts)
    stmts = ac.filter_grounded_only(stmts)
    stmts = ac.filter_human_only(stmts)
    stmts = ac.expand_families(stmts)
    stmts = ac.filter_gene_list(stmts, gene_names, 'one')
    stmts = ac.map_sequence(stmts)
    stmts = ac.run_preassembly(stmts, return_toplevel=False)
    stmts = ac.filter_belief(stmts, 0.95)
    stmts = ac.filter_top_level(stmts)
    stmts = ac.filter_direct(stmts)
    stmts = ac.filter_enzyme_kinase(stmts)
    ac.dump_statements(stmts, save_file)
    return stmts
Example #6
0
def assemble_pysb(stmts, data_genes, out_file):
    """Return an assembled PySB model."""
    stmts = ac.filter_direct(stmts)
    stmts = ac.filter_belief(stmts, 0.95)
    stmts = ac.filter_top_level(stmts)
    stmts = ac.filter_gene_list(stmts, data_genes, 'all')
    stmts = ac.reduce_activities(stmts)
    pa = PysbAssembler()
    pa.add_statements(stmts)
    model = pa.make_model()
    # Add observables
    o = Observable('MAPK1p', model.monomers['MAPK1'](T185='p', Y187='p'))
    model.add_component(o)
    o = Observable('MAPK3p', model.monomers['MAPK3'](T202='p', Y204='p'))
    model.add_component(o)
    o = Observable('GSK3Ap', model.monomers['GSK3A'](S21='p'))
    model.add_component(o)
    o = Observable('GSK3Bp', model.monomers['GSK3B'](S9='p'))
    model.add_component(o)
    o = Observable('RPS6p', model.monomers['RPS6'](S235='p'))
    model.add_component(o)
    o = Observable('EIF4EBP1p', model.monomers['EIF4EBP1'](S65='p'))
    model.add_component(o)
    o = Observable('JUNp', model.monomers['JUN'](S73='p'))
    model.add_component(o)
    o = Observable('FOXO3p', model.monomers['FOXO3'](S315='p'))
    model.add_component(o)
    o = Observable('AKT1p', model.monomers['AKT1'](S473='p'))
    model.add_component(o)
    o = Observable('AKT2p', model.monomers['AKT2'](S474='p'))
    model.add_component(o)
    o = Observable('AKT3p', model.monomers['AKT3'](S='p'))
    model.add_component(o)
    o = Observable('ELK1', model.monomers['ELK1'](S383='p'))
    model.add_component(o)
    # Set context
    pa.set_context('SKMEL28_SKIN')
    pa.save_model(out_file)

    ke = KappaExporter(model)
    with open('%s.ka' % base_file, 'wb') as fh:
        base_file, _ = os.path.splitext(out_file)
        fh.write(ke.export().encode('utf-8'))

    return model
Example #7
0
def preprocess_stmts(stmts, data_genes):
    # Filter the INDRA Statements to be put into the model
    stmts = ac.filter_mutation_status(stmts,
                                      {'BRAF': [('V', '600', 'E')]}, ['PTEN'])
    stmts = ac.filter_by_type(stmts, Complex, invert=True)
    stmts = ac.filter_direct(stmts)
    stmts = ac.filter_belief(stmts, 0.95)
    stmts = ac.filter_top_level(stmts)
    stmts = ac.filter_gene_list(stmts, data_genes, 'all')
    stmts = ac.filter_enzyme_kinase(stmts)
    stmts = ac.filter_mod_nokinase(stmts)
    stmts = ac.filter_transcription_factor(stmts)
    # Simplify activity types
    ml = MechLinker(stmts)
    ml.gather_explicit_activities()
    ml.reduce_activities()
    ml.gather_modifications()
    ml.reduce_modifications()
    af_stmts = ac.filter_by_type(ml.statements, ActiveForm)
    non_af_stmts = ac.filter_by_type(ml.statements, ActiveForm, invert=True)
    af_stmts = ac.run_preassembly(af_stmts)
    stmts = af_stmts + non_af_stmts
    # Replace activations when possible
    ml = MechLinker(stmts)
    ml.gather_explicit_activities()
    ml.replace_activations()
    # Require active forms
    ml.require_active_forms()
    num_stmts = len(ml.statements)
    while True:
        # Remove inconsequential PTMs
        ml.statements = ac.filter_inconsequential_mods(ml.statements,
                                                       get_mod_whitelist())
        ml.statements = ac.filter_inconsequential_acts(ml.statements,
                                                       get_mod_whitelist())
        if num_stmts <= len(ml.statements):
            break
        num_stmts = len(ml.statements)
    stmts = ml.statements
    return stmts
Example #8
0
def test_filter_direct():
    st_out = ac.filter_direct([st12])
    assert (len(st_out) == 1)
    st_out = ac.filter_direct([st13])
    assert (len(st_out) == 0)
from indra.util import _require_python3
from indra.assemblers.sif import SifAssembler
import indra.tools.assemble_corpus as ac

stmts = ac.load_statements('output/preassembled.pkl')
stmts = ac.filter_belief(stmts, 0.95)
stmts = ac.filter_direct(stmts)
sa = SifAssembler(stmts)
sa.make_model(True, True, False)
sa.set_edge_weights('support_all')
fname = 'model_high_belief_v2.sif'
with open(fname, 'wt') as fh:
    for s, t, d in sa.graph.edges(data=True):
        source = sa.graph.nodes[s]['name']
        target = sa.graph.nodes[t]['name']
        fh.write('%s %f %s\n' % (source, d['weight'], target))
Example #10
0
        with open(LIGANDS_IN_DATA[count], 'rb') as fh:
            ligands_in_data = pickle.load(fh)

        with open(NATURE_HASHES[count], 'rb') as fh:
            hashes = pickle.load(fh)

        # get the union of all the statement hashes
        all_hashes = set.union(*hashes.values())

        # Download the statements by hashes
        stmts_by_hash = download_statements(all_hashes)

        # get only the list of all the available statemtns
        indra_db_stmts = list(stmts_by_hash.values())
        # Filtering out the indirect INDRA statements
        indra_db_stmts = ac.filter_direct(indra_db_stmts)
        # Filter statements which are not ligands/receptors from
        # OmniPath database and filter op statemeents which are not
        # in 2015 paper
        op_filtered = filter_op_stmts(op.statements, ligands_in_data.values(),
                                      receptors_in_data, lg_rg)

        # Merge omnipath/INDRA statements and run assembly
        indra_op_stmts = ac.run_preassembly(indra_db_stmts + op_filtered,
                                            run_refinement=False)

        # Filter incorrect curations
        indra_op_filtered = filter_incorrect_curations(indra_op_stmts)

        # Filter complex statements
        indra_op_filtered = filter_complex_statements(indra_op_filtered,
Example #11
0
def assemble_pysb(stmts, data_genes, contextualize=False):
    # Filter the INDRA Statements to be put into the model
    stmts = ac.filter_by_type(stmts, Complex, invert=True)
    stmts = ac.filter_direct(stmts)
    stmts = ac.filter_belief(stmts, 0.95)
    stmts = ac.filter_top_level(stmts)
    # Strip the extraneous supports/supported by here
    strip_supports(stmts)
    stmts = ac.filter_gene_list(stmts, data_genes, 'all')
    stmts = ac.filter_enzyme_kinase(stmts)
    stmts = ac.filter_mod_nokinase(stmts)
    stmts = ac.filter_transcription_factor(stmts)
    # Simplify activity types
    ml = MechLinker(stmts)
    ml.gather_explicit_activities()
    ml.reduce_activities()
    ml.gather_modifications()
    ml.reduce_modifications()
    stmts = normalize_active_forms(ml.statements)
    # Replace activations when possible
    ml = MechLinker(stmts)
    ml.gather_explicit_activities()
    ml.replace_activations()
    # Require active forms
    ml.require_active_forms()
    num_stmts = len(ml.statements)
    while True:
        # Remove inconsequential PTMs
        ml.statements = ac.filter_inconsequential_mods(ml.statements,
                                                       get_mod_whitelist())
        ml.statements = ac.filter_inconsequential_acts(ml.statements,
                                                       get_mod_whitelist())
        if num_stmts <= len(ml.statements):
            break
        num_stmts = len(ml.statements)
    stmts = ml.statements
    # Save the Statements here
    ac.dump_statements(stmts, prefixed_pkl('pysb_stmts'))


    # Add drug target Statements
    drug_target_stmts = get_drug_target_statements()
    stmts += drug_target_stmts

    # Just generate the generic model
    pa = PysbAssembler()
    pa.add_statements(stmts)
    model = pa.make_model()
    with open(prefixed_pkl('pysb_model'), 'wb') as f:
        pickle.dump(model, f)

    # Run this extra part only if contextualize is set to True
    if not contextualize:
        return

    cell_lines_no_data = ['COLO858', 'K2', 'MMACSF', 'MZ7MEL', 'WM1552C']
    for cell_line in cell_lines:
        if cell_line not in cell_lines_no_data:
            stmtsc = contextualize_stmts(stmts, cell_line, data_genes)
        else:
            stmtsc = stmts
        pa = PysbAssembler()
        pa.add_statements(stmtsc)
        model = pa.make_model()
        if cell_line not in cell_lines_no_data:
            contextualize_model(model, cell_line, data_genes)
        ac.dump_statements(stmtsc, prefixed_pkl('pysb_stmts_%s' % cell_line))
        with open(prefixed_pkl('pysb_model_%s' % cell_line), 'wb') as f:
            pickle.dump(model, f)
Example #12
0
def test_filter_direct():
    st_out = ac.filter_direct([st12])
    assert len(st_out) == 1
    st_out = ac.filter_direct([st13])
    assert len(st_out) == 0
    # remove all the receptors from the surface_protein_set
    full_ligand_set = get_ligands() - receptor_genes_go

    # Now get INDRA DB Statements for the receptor-ligand pairs
    hashes_by_gene_pair = get_hashes_by_gene_pair(indra_df, full_ligand_set,
                                                  receptor_genes_go)

    # get the union of all the statement hashes
    all_hashes = set.union(*hashes_by_gene_pair.values())
    # Download the statements by hashes
    stmts_by_hash = download_statements(all_hashes)
    # get only the list of all the available statemtns
    indra_db_stmts = list(stmts_by_hash.values())

    # Filtering out the indirect INDRA statements
    indra_db_stmts = ac.filter_direct(indra_db_stmts)

    # Fetch omnipath database biomolecular interactions and
    # process them into INDRA statements
    op = process_from_web()

    # Filter statements which are not ligands/receptors from
    # OmniPath database
    op_filtered = filter_op_stmts(op.statements, full_ligand_set,
                                  receptor_genes_go)
    op_filtered = ac.filter_direct(op_filtered)

    op_filtered = ac.filter_by_curation(op_filtered, curations=db_curations)

    # Merge omnipath/INDRA statements and run assembly
    indra_op_stmts = ac.run_preassembly(indra_db_stmts + op_filtered,