Exemple #1
0
def filter(stmts, cutoff, filename):
    stmts = ac.filter_belief(stmts, cutoff)
    stmts = ac.filter_top_level(stmts)
    stmts = ac.filter_direct(stmts)
    #stmts = ac.filter_enzyme_kinase(stmts)
    ac.dump_statements(stmts, filename)
    return stmts
Exemple #2
0
    def run_assembly(self):
        """Run INDRA's assembly pipeline on the Statements."""
        self.eliminate_copies()
        stmts = self.get_indra_stmts()
        stmts = self.filter_event_association(stmts)
        stmts = ac.filter_no_hypothesis(stmts)
        if not self.assembly_config.get('skip_map_grounding'):
            stmts = ac.map_grounding(stmts)
        if self.assembly_config.get('standardize_names'):
            ac.standardize_names_groundings(stmts)
        if self.assembly_config.get('filter_ungrounded'):
            score_threshold = self.assembly_config.get('score_threshold')
            stmts = ac.filter_grounded_only(stmts,
                                            score_threshold=score_threshold)
        if self.assembly_config.get('merge_groundings'):
            stmts = ac.merge_groundings(stmts)
        if self.assembly_config.get('merge_deltas'):
            stmts = ac.merge_deltas(stmts)
        relevance_policy = self.assembly_config.get('filter_relevance')
        if relevance_policy:
            stmts = self.filter_relevance(stmts, relevance_policy)
        if not self.assembly_config.get('skip_filter_human'):
            stmts = ac.filter_human_only(stmts)
        if not self.assembly_config.get('skip_map_sequence'):
            stmts = ac.map_sequence(stmts)
        # Use WM hierarchies and belief scorer for WM preassembly
        preassembly_mode = self.assembly_config.get('preassembly_mode')
        if preassembly_mode == 'wm':
            hierarchies = get_wm_hierarchies()
            belief_scorer = get_eidos_scorer()
            stmts = ac.run_preassembly(stmts,
                                       return_toplevel=False,
                                       belief_scorer=belief_scorer,
                                       hierarchies=hierarchies)
        else:
            stmts = ac.run_preassembly(stmts, return_toplevel=False)
        belief_cutoff = self.assembly_config.get('belief_cutoff')
        if belief_cutoff is not None:
            stmts = ac.filter_belief(stmts, belief_cutoff)
        stmts = ac.filter_top_level(stmts)

        if self.assembly_config.get('filter_direct'):
            stmts = ac.filter_direct(stmts)
            stmts = ac.filter_enzyme_kinase(stmts)
            stmts = ac.filter_mod_nokinase(stmts)
            stmts = ac.filter_transcription_factor(stmts)

        if self.assembly_config.get('mechanism_linking'):
            ml = MechLinker(stmts)
            ml.gather_explicit_activities()
            ml.reduce_activities()
            ml.gather_modifications()
            ml.reduce_modifications()
            ml.gather_explicit_activities()
            ml.replace_activations()
            ml.require_active_forms()
            stmts = ml.statements

        self.assembled_stmts = stmts
def test_belief_cut_plus_filter_top():
    st1 = Phosphorylation(None, Agent('a'))
    st2 = Phosphorylation(Agent('b'), Agent('a'))
    st1.supports = [st2]
    st2.supported_by = [st1]
    st1.belief = 0.9
    st2.belief = 0.1
    st_high_belief = ac.filter_belief([st1, st2], 0.5)
    st_top_level = ac.filter_top_level(st_high_belief)
    assert len(st_top_level) == 1
Exemple #4
0
def assemble_cx(stmts, out_file):
    """Return a CX assembler."""
    stmts = ac.filter_belief(stmts, 0.95)
    stmts = ac.filter_top_level(stmts)
    stmts = ac.strip_agent_context(stmts)
    ca = CxAssembler()
    ca.add_statements(stmts)
    model = ca.make_model()
    ca.save_model(out_file)
    return ca
Exemple #5
0
def assemble_cx(stmts, out_file):
    """Return a CX assembler."""
    stmts = ac.filter_belief(stmts, 0.95)
    stmts = ac.filter_top_level(stmts)
    stmts = ac.strip_agent_context(stmts)
    ca = CxAssembler()
    ca.add_statements(stmts)
    model = ca.make_model()
    ca.save_model(out_file)
    return ca
Exemple #6
0
def test_readme_pipeline():
    stmts = gn_stmts  # Added only here, not in docs
    from indra.tools import assemble_corpus as ac
    stmts = ac.filter_no_hypothesis(stmts)
    stmts = ac.map_grounding(stmts)
    stmts = ac.filter_grounded_only(stmts)
    stmts = ac.filter_human_only(stmts)
    stmts = ac.map_sequence(stmts)
    stmts = ac.run_preassembly(stmts, return_toplevel=False)
    stmts = ac.filter_belief(stmts, 0.8)
    assert stmts, 'Update example to yield statements list of non-zero length'
def print_statements(
    statements: List[Statement],
    file: Union[None, str, TextIO] = None,
    sep: Optional[str] = None,
    limit: Optional[int] = None,
    allow_duplicates: bool = False,
    keep_only_pmids: Union[None, str, Collection[str]] = None,
    sort_attrs: Iterable[str] = ('uuid', 'pmid'),
    allow_ungrounded: bool = True,
    minimum_belief: Optional[float] = None,
    extra_columns: Optional[List[str]] = None,
) -> None:
    """Write statements to a CSV for curation.

    This one is similar to the other one, but sorts by the BEL string and only keeps the first for each group.
    """
    sep = sep or '\t'
    extra_columns = extra_columns or []
    extra_columns_placeholders = [''] * len(extra_columns)

    statements = run_preassembly(statements)

    if not allow_ungrounded:
        statements = filter_grounded_only(statements)

    if minimum_belief is not None:
        statements = filter_belief(statements, minimum_belief)

    rows = get_rows_from_statements(statements,
                                    allow_duplicates=allow_duplicates,
                                    keep_only_pmids=keep_only_pmids)
    rows = sorted(rows, key=attrgetter(*sort_attrs))

    if limit is not None:
        rows = rows[:limit]

    if not rows:
        logger.warning('no rows written')
        return

    def _write(_file):
        print(*start_header, *extra_columns, *end_header, sep=sep, file=_file)
        for row in rows:
            print(*row.start_tuple,
                  *extra_columns_placeholders,
                  *row.end_tuple,
                  sep=sep,
                  file=_file)

    if isinstance(file, str):
        with open(file, 'w') as _file:
            _write(_file)
    else:
        _write(file)
def test_readme_wm_pipeline():
    stmts = wm_raw_stmts
    # stmts = ac.filter_grounded_only(stmts)  # Does not work on test stmts
    belief_scorer = get_eidos_scorer()
    stmts = ac.run_preassembly(stmts,
                               return_toplevel=False,
                               belief_scorer=belief_scorer,
                               ontology=world_ontology,
                               normalize_opposites=True,
                               normalize_ns='WM')
    stmts = ac.filter_belief(stmts, 0.8)  # Apply belief cutoff of e.g., 0.8
    assert stmts, 'Update example to yield statements list of non-zero length'
Exemple #9
0
def filter_belief():
    """Filter to beliefs above a given threshold."""
    if request.method == 'OPTIONS':
        return {}
    response = request.body.read().decode('utf-8')
    body = json.loads(response)
    stmts_json = body.get('statements')
    belief_cutoff = body.get('belief_cutoff')
    if belief_cutoff is not None:
        belief_cutoff = float(belief_cutoff)
    stmts = stmts_from_json(stmts_json)
    stmts_out = ac.filter_belief(stmts, belief_cutoff)
    return _return_stmts(stmts_out)
Exemple #10
0
def filter_belief():
    """Filter to beliefs above a given threshold."""
    if request.method == 'OPTIONS':
        return {}
    response = request.body.read().decode('utf-8')
    body = json.loads(response)
    stmts_json = body.get('statements')
    belief_cutoff = body.get('belief_cutoff')
    if belief_cutoff is not None:
        belief_cutoff = float(belief_cutoff)
    stmts = stmts_from_json(stmts_json)
    stmts_out = ac.filter_belief(stmts, belief_cutoff)
    return _return_stmts(stmts_out)
Exemple #11
0
def run_assembly(stmts, save_file):
    stmts = ac.map_grounding(stmts)
    stmts = ac.filter_grounded_only(stmts)
    stmts = ac.filter_human_only(stmts)
    stmts = ac.expand_families(stmts)
    stmts = ac.filter_gene_list(stmts, gene_names, 'one')
    stmts = ac.map_sequence(stmts)
    stmts = ac.run_preassembly(stmts, return_toplevel=False)
    stmts = ac.filter_belief(stmts, 0.95)
    stmts = ac.filter_top_level(stmts)
    stmts = ac.filter_direct(stmts)
    stmts = ac.filter_enzyme_kinase(stmts)
    ac.dump_statements(stmts, save_file)
    return stmts
Exemple #12
0
def assemble_cx(stmts, out_file_prefix, network_type):
    """Return a CX assembler."""
    stmts = ac.filter_belief(stmts, 0.95)
    stmts = ac.filter_top_level(stmts)
    if network_type == 'direct':
        stmts = ac.filter_direct(stmts)

    out_file = '%s_%s.cx' % (out_file_prefix, network_type)

    ca = CxAssembler()
    ca.add_statements(stmts)
    model = ca.make_model()
    ca.save_model(out_file)
    return ca
Exemple #13
0
def assemble_pybel(stmts, out_file_prefix):
    """Return a PyBEL Assembler"""
    stmts = ac.filter_belief(stmts, 0.95)
    stmts = ac.filter_top_level(stmts)

    pba = PybelAssembler(stmts,
                         name='INDRA/REACH Korkut Model',
                         description='Automatically assembled model of '
                         'cancer signaling.',
                         version='0.0.10')
    pba.make_model()
    pybel.to_bel_path(pba.model, out_file_prefix + '.bel')
    with open(out_file_prefix, 'wt') as f:
        pybel.to_json_file(pba.model, f)
    url = 'https://pybel.scai.fraunhofer.de/api/receive'
    headers = {'content-type': 'application/json'}
    requests.post(url, json=pybel.to_json(pba.model), headers=headers)
Exemple #14
0
def assemble_pysb(stmts, data_genes, out_file):
    """Return an assembled PySB model."""
    stmts = ac.filter_direct(stmts)
    stmts = ac.filter_belief(stmts, 0.95)
    stmts = ac.filter_top_level(stmts)
    stmts = ac.filter_gene_list(stmts, data_genes, 'all')
    stmts = ac.reduce_activities(stmts)
    pa = PysbAssembler()
    pa.add_statements(stmts)
    model = pa.make_model()
    # Add observables
    o = Observable('MAPK1p', model.monomers['MAPK1'](T185='p', Y187='p'))
    model.add_component(o)
    o = Observable('MAPK3p', model.monomers['MAPK3'](T202='p', Y204='p'))
    model.add_component(o)
    o = Observable('GSK3Ap', model.monomers['GSK3A'](S21='p'))
    model.add_component(o)
    o = Observable('GSK3Bp', model.monomers['GSK3B'](S9='p'))
    model.add_component(o)
    o = Observable('RPS6p', model.monomers['RPS6'](S235='p'))
    model.add_component(o)
    o = Observable('EIF4EBP1p', model.monomers['EIF4EBP1'](S65='p'))
    model.add_component(o)
    o = Observable('JUNp', model.monomers['JUN'](S73='p'))
    model.add_component(o)
    o = Observable('FOXO3p', model.monomers['FOXO3'](S315='p'))
    model.add_component(o)
    o = Observable('AKT1p', model.monomers['AKT1'](S473='p'))
    model.add_component(o)
    o = Observable('AKT2p', model.monomers['AKT2'](S474='p'))
    model.add_component(o)
    o = Observable('AKT3p', model.monomers['AKT3'](S='p'))
    model.add_component(o)
    o = Observable('ELK1', model.monomers['ELK1'](S383='p'))
    model.add_component(o)
    # Set context
    pa.set_context('SKMEL28_SKIN')
    pa.save_model(out_file)

    ke = KappaExporter(model)
    with open('%s.ka' % base_file, 'wb') as fh:
        base_file, _ = os.path.splitext(out_file)
        fh.write(ke.export().encode('utf-8'))

    return model
Exemple #15
0
def preprocess_stmts(stmts, data_genes):
    # Filter the INDRA Statements to be put into the model
    stmts = ac.filter_mutation_status(stmts,
                                      {'BRAF': [('V', '600', 'E')]}, ['PTEN'])
    stmts = ac.filter_by_type(stmts, Complex, invert=True)
    stmts = ac.filter_direct(stmts)
    stmts = ac.filter_belief(stmts, 0.95)
    stmts = ac.filter_top_level(stmts)
    stmts = ac.filter_gene_list(stmts, data_genes, 'all')
    stmts = ac.filter_enzyme_kinase(stmts)
    stmts = ac.filter_mod_nokinase(stmts)
    stmts = ac.filter_transcription_factor(stmts)
    # Simplify activity types
    ml = MechLinker(stmts)
    ml.gather_explicit_activities()
    ml.reduce_activities()
    ml.gather_modifications()
    ml.reduce_modifications()
    af_stmts = ac.filter_by_type(ml.statements, ActiveForm)
    non_af_stmts = ac.filter_by_type(ml.statements, ActiveForm, invert=True)
    af_stmts = ac.run_preassembly(af_stmts)
    stmts = af_stmts + non_af_stmts
    # Replace activations when possible
    ml = MechLinker(stmts)
    ml.gather_explicit_activities()
    ml.replace_activations()
    # Require active forms
    ml.require_active_forms()
    num_stmts = len(ml.statements)
    while True:
        # Remove inconsequential PTMs
        ml.statements = ac.filter_inconsequential_mods(ml.statements,
                                                       get_mod_whitelist())
        ml.statements = ac.filter_inconsequential_acts(ml.statements,
                                                       get_mod_whitelist())
        if num_stmts <= len(ml.statements):
            break
        num_stmts = len(ml.statements)
    stmts = ml.statements
    return stmts
Exemple #16
0
def assemble_pysb(stmts, data_genes, contextualize=False):
    # Filter the INDRA Statements to be put into the model
    stmts = ac.filter_by_type(stmts, Complex, invert=True)
    stmts = ac.filter_direct(stmts)
    stmts = ac.filter_belief(stmts, 0.95)
    stmts = ac.filter_top_level(stmts)
    # Strip the extraneous supports/supported by here
    strip_supports(stmts)
    stmts = ac.filter_gene_list(stmts, data_genes, 'all')
    stmts = ac.filter_enzyme_kinase(stmts)
    stmts = ac.filter_mod_nokinase(stmts)
    stmts = ac.filter_transcription_factor(stmts)
    # Simplify activity types
    ml = MechLinker(stmts)
    ml.gather_explicit_activities()
    ml.reduce_activities()
    ml.gather_modifications()
    ml.reduce_modifications()
    stmts = normalize_active_forms(ml.statements)
    # Replace activations when possible
    ml = MechLinker(stmts)
    ml.gather_explicit_activities()
    ml.replace_activations()
    # Require active forms
    ml.require_active_forms()
    num_stmts = len(ml.statements)
    while True:
        # Remove inconsequential PTMs
        ml.statements = ac.filter_inconsequential_mods(ml.statements,
                                                       get_mod_whitelist())
        ml.statements = ac.filter_inconsequential_acts(ml.statements,
                                                       get_mod_whitelist())
        if num_stmts <= len(ml.statements):
            break
        num_stmts = len(ml.statements)
    stmts = ml.statements
    # Save the Statements here
    ac.dump_statements(stmts, prefixed_pkl('pysb_stmts'))


    # Add drug target Statements
    drug_target_stmts = get_drug_target_statements()
    stmts += drug_target_stmts

    # Just generate the generic model
    pa = PysbAssembler()
    pa.add_statements(stmts)
    model = pa.make_model()
    with open(prefixed_pkl('pysb_model'), 'wb') as f:
        pickle.dump(model, f)

    # Run this extra part only if contextualize is set to True
    if not contextualize:
        return

    cell_lines_no_data = ['COLO858', 'K2', 'MMACSF', 'MZ7MEL', 'WM1552C']
    for cell_line in cell_lines:
        if cell_line not in cell_lines_no_data:
            stmtsc = contextualize_stmts(stmts, cell_line, data_genes)
        else:
            stmtsc = stmts
        pa = PysbAssembler()
        pa.add_statements(stmtsc)
        model = pa.make_model()
        if cell_line not in cell_lines_no_data:
            contextualize_model(model, cell_line, data_genes)
        ac.dump_statements(stmtsc, prefixed_pkl('pysb_stmts_%s' % cell_line))
        with open(prefixed_pkl('pysb_model_%s' % cell_line), 'wb') as f:
            pickle.dump(model, f)
Exemple #17
0
def assemble_sif(stmts, data, out_file):
    """Return an assembled SIF."""
    # Filter for high-belief statements
    stmts = ac.filter_belief(stmts, 0.99)
    stmts = ac.filter_top_level(stmts)
    # Filter for Activation / Inhibition
    stmts_act = ac.filter_by_type(stmts, Activation)
    stmts_inact = ac.filter_by_type(stmts, Inhibition)
    stmts = stmts_act + stmts_inact
    # Get Ras227 and filter statments
    ras_genes = process_data.get_ras227_genes()
    ras_genes = [x for x in ras_genes if x not in ['YAP1']]
    stmts = ac.filter_gene_list(stmts, ras_genes, 'all')
    # Get the drugs inhibiting their targets as INDRA
    # statements
    def get_drug_statements():
        drug_targets = process_data.get_drug_targets()
        drug_stmts = []
        for dn, tns in drug_targets.items():
            da = Agent(dn + ':Drugs')
            for tn in tns:
                ta = Agent(tn)
                drug_stmt = Inhibition(da, ta)
                drug_stmts.append(drug_stmt)
        return drug_stmts
    drug_stmts = get_drug_statements()
    stmts = stmts + drug_stmts
    # Because of a bug in CNO, node names containing AND
    # need to be replaced
    def rename_and_nodes(st):
        for s in st:
            for a in s.agent_list():
                if a is not None:
                    if a.name.find('AND') != -1:
                        a.name = a.name.replace('AND', 'A_ND')
    rename_and_nodes(stmts)
    # Rewrite statements to replace genes with their corresponding
    # antibodies when possible
    stmts = rewrite_ab_stmts(stmts, data)
    def filter_ab_edges(st, policy='all'):
        st_out = []
        for s in st:
            if policy == 'all':
                all_ab = True
                for a in s.agent_list():
                    if a is not None:
                        if a.name.find('_p') == -1 and \
                           a.name.find('Drugs') == -1:
                            all_ab = False
                            break
                if all_ab:
                    st_out.append(s)
            elif policy == 'one':
                any_ab = False
                for a in s.agent_list():
                    if a is not None and a.name.find('_p') != -1:
                        any_ab = True
                        break
                if any_ab:
                    st_out.append(s)
        return st_out
    stmts = filter_ab_edges(stmts, 'all')
    # Get a list of the AB names that end up being covered in the prior network
    # This is important because other ABs will need to be taken out of the
    # MIDAS file to work.
    def get_ab_names(st):
        prior_abs = set()
        for s in st:
            for a in s.agent_list():
                if a is not None:
                    if a.name.find('_p') != -1:
                        prior_abs.add(a.name)
        return sorted(list(prior_abs))
    pkn_abs = get_ab_names(stmts)
    print('Boolean PKN contains these antibodies: %s' % ', '.join(pkn_abs))
    # Make the SIF model
    sa = SifAssembler(stmts)
    sa.make_model(use_name_as_key=True)
    sif_str = sa.print_model()
    with open(out_file, 'wb') as fh:
        fh.write(sif_str.encode('utf-8'))
    # Make the MIDAS data file used for training the model
    midas_data = process_data.get_midas_data(data, pkn_abs)
    return sif_str
Exemple #18
0
for pmid_sample_size, num_trials in sample_sizes_trials:
    print("\n\nSample size: %d\n\n" % pmid_sample_size)
    trial_results = []
    trial_results_uniq = []
    trial_results_top = []
    trial_results_filt = []
    for i in range(num_trials):
        sample_pmids = np.random.choice(pmids, pmid_sample_size, replace=False)
        trial_stmts = [s for pmid in sample_pmids for s in stmts_by_pmid[pmid]]
        trial_results.append(len(trial_stmts))
        #
        be = BeliefEngine()
        pa = Preassembler(hierarchies, trial_stmts)
        trial_stmts_top = pa.combine_related(poolsize=16, return_toplevel=True)
        trial_stmts_uniq = pa.unique_stmts
        trial_stmts_filt = ac.filter_belief(trial_stmts_top, 0.90)
        #trial_stmts_uniq = ac.run_preassembly_duplicate(pa, be)
        trial_results_uniq.append(len(trial_stmts_uniq))
        trial_results_top.append(len(trial_stmts_top))
        trial_results_filt.append(len(trial_stmts_filt))

    results.append((np.mean(trial_results), np.std(trial_results)))
    results_uniq.append((np.mean(trial_results_uniq), np.std(trial_results_uniq)))
    results_top.append((np.mean(trial_results_top), np.std(trial_results_top)))
    results_filt.append((np.mean(trial_results_filt), np.std(trial_results_filt)))

results = np.array(results)
results_uniq = np.array(results_uniq)
results_top = np.array(results_top)
results_filt = np.array(results_filt)
def test_filter_belief():
    st_out = ac.filter_belief([st1, st2, st3], 0.75)
    assert len(st_out) == 2
Exemple #20
0
    stats = {}
    logger.info(time.strftime('%c'))
    logger.info('Preassembling original model.')
    model.preassemble(filters=global_filters)
    logger.info(time.strftime('%c'))

    # Original statistics
    stats['orig_stmts'] = len(model.get_statements())
    stats['orig_assembled'] = len(model.assembled_stmts)
    db_stmts = ac.filter_evidence_source(model.assembled_stmts,
                                         ['biopax', 'bel'],
                                         policy='one')
    no_db_stmts = ac.filter_evidence_source(model.assembled_stmts,
                                            ['biopax', 'bel'],
                                            policy='none')
    no_db_stmts = ac.filter_belief(no_db_stmts, belief_threshold)
    orig_stmts = db_stmts + no_db_stmts
    stats['orig_final'] = len(orig_stmts)
    logger.info('%d final statements' % len(orig_stmts))

    # Extend the model with PMIDs
    logger.info('----------------')
    logger.info(time.strftime('%c'))
    logger.info('Extending model.')
    stats['new_papers'], stats['new_abstracts'], stats['existing'] = \
                            extend_model(model_name, model, pmids)
    # Having added new statements, we preassemble the model
    model.preassemble(filters=global_filters)

    # New statistics
    stats['new_stmts'] = len(model.get_statements())
def test_filter_belief():
    st_out = ac.filter_belief([st1, st2, st3], 0.75)
    assert (len(st_out) == 2)
from indra.util import _require_python3
from indra.assemblers.sif import SifAssembler
import indra.tools.assemble_corpus as ac

stmts = ac.load_statements('output/preassembled.pkl')
stmts = ac.filter_belief(stmts, 0.95)
stmts = ac.filter_direct(stmts)
sa = SifAssembler(stmts)
sa.make_model(True, True, False)
sa.set_edge_weights('support_all')
fname = 'model_high_belief_v2.sif'
with open(fname, 'wt') as fh:
    for s, t, d in sa.graph.edges(data=True):
        source = sa.graph.nodes[s]['name']
        target = sa.graph.nodes[t]['name']
        fh.write('%s %f %s\n' % (source, d['weight'], target))
from indra.tools import assemble_corpus as ac
from indra.statements import stmts_to_json_file
from indra.assemblers.html import HtmlAssembler
from indra.sources import reach
tp = reach.process_pmc('PMC4455820', url=reach.local_nxml_url)
if tp:
    stmts = tp.statements
    print(stmts)
    stmts = ac.filter_grounded_only(stmts)  # Filter out ungrounded agents
    stmts = ac.run_preassembly(
        stmts,  # Run preassembly
        return_toplevel=False,
        normalize_equivalences=
        True,  # Optional: rewrite equivalent groundings to one standard
        normalize_opposites=
        True,  # Optional: rewrite opposite groundings to one standard
        normalize_ns='WM'
    )  # Use 'WM' namespace to normalize equivalences and opposites
    stmts = ac.filter_belief(stmts, 0.8)  # Apply belief cutoff of e.g., 0.8
    stmts_to_json_file(stmts, 'PMC4455820.json')
    ha = HtmlAssembler(stmts)
    ha.save_model('PMC4455820.html')

#
#
#
#