def main(args):
    # This file takes about 32 GB to load
    if not args.infile:
        args.infile = './Data/indra_raw/bioexp_all_raw.pkl'
    if not args.outfile:
        args.outfile = './filtered_indra_network.sif'

    # Load statements from file
    stmts_raw = assemble_corpus.load_statements(args.infile)

    # Expand families, fix grounding errors and run run preassembly
    stmts_fixed = assemble_corpus.run_preassembly(
                    assemble_corpus.map_grounding(
                        assemble_corpus.expand_families(stmts_raw)))

    # Default filtering: specific (unique) genes that are grounded.
    stmts_filtered = assemble_corpus.filter_grounded_only(
                         assemble_corpus.filter_genes_only(stmts_fixed, specific_only=True))
    # Custom filters
    if args.human_only:
        stmts_filtered = assemble_corpus.filter_human_only(stmts_filtered)
    if args.filter_direct:
        stmts_filtered = assemble_corpus.filter_direct(stmts_filtered)

    binary_stmts = [s for s in stmts_filtered if len(s.agent_list()) == 2 and s.agent_list()[0] is not None]
    rows = []
    for s in binary_stmts:
        rows.append([ag.name for ag in s.agent_list()])

    # Write rows to .sif file
    with open(args.outfile, 'w', newline='') as csvfile:
        wrtr = csv.writer(csvfile, delimiter='\t')
        for row in rows:
            wrtr.writerow(row)
Exemple #2
0
def make_model_stmts(old_mm_stmts, other_stmts, new_cord_stmts=None):
    """Process and combine statements from different resources.

    Parameters
    ----------
    old_mm_stmts : list[indra.statements.Statement]
        A list of statements currently in the model.
    other_stmts : list[indra.statements.Statement]
        A list of statements that do not need additional processing
        (e.g. drug, gordon, virhostnet statements).
    new_cord_stmts : Optional[list[indra.statements.Statement]]
        A list of newly extracted statements from CORD19 corpus. If not
        provided, the statements are pulled from the database and filtered
        to those not in old_mm_stmts.
    
    Returns
    -------
    combined_stmts : list[indra.statements.Statement]
        A list of statements to make a new model from.
    paper_ids : list[str]
        A list of TRIDs associated with statements.
    """
    # If new cord statements are not provided, load from database
    if not new_cord_stmts:
        # Get text refs from metadata
        tr_dicts, multiple_tr_ids = get_tr_dicts_and_ids()
        # Filter to text refs that are not part of old model
        new_tr_dicts = {}
        old_tr_ids = set()
        for stmt in old_mm_stmts:
            for evid in stmt.evidence:
                if evid.text_refs.get('TRID'):
                    old_tr_ids.add(evid.text_refs['TRID'])
        for tr_id in tr_dicts:
            if tr_id not in old_tr_ids:
                new_tr_dicts[tr_id] = tr_dicts[tr_id]
        logger.info('Found %d TextRefs, %d of which are not in old model' %
                    (len(tr_dicts), len(new_tr_dicts)))
        # Get statements for new text re
        new_cord_stmts = get_raw_stmts(new_tr_dicts, date_limit=5)

    logger.info('Processing the statements')
    # Filter out ungrounded statements
    new_cord_grounded = ac.filter_grounded_only(new_cord_stmts)

    # Group statements by TextRef
    old_mm_by_tr, old_mm_no_tr = stmts_by_text_refs(old_mm_stmts)
    new_cord_by_tr, new_cord_no_tr = stmts_by_text_refs(new_cord_grounded)

    # Add any EMMAA statements from non-Cord19 publications
    updated_mm_stmts_by_tr = combine_stmts(new_cord_by_tr, old_mm_by_tr)
    updated_mm_stmts = [
        s for stmt_list in updated_mm_stmts_by_tr.values() for s in stmt_list
    ]

    # Now, add back in all other statements
    combined_stmts = updated_mm_stmts + other_stmts
    logger.info('Got %d total statements.' % len(combined_stmts))
    logger.info('Processed %d papers.' % len(updated_mm_stmts_by_tr))
    return combined_stmts, updated_mm_stmts_by_tr.keys()
Exemple #3
0
    def run_assembly(self):
        """Run INDRA's assembly pipeline on the Statements."""
        self.eliminate_copies()
        stmts = self.get_indra_stmts()
        stmts = self.filter_event_association(stmts)
        stmts = ac.filter_no_hypothesis(stmts)
        if not self.assembly_config.get('skip_map_grounding'):
            stmts = ac.map_grounding(stmts)
        if self.assembly_config.get('standardize_names'):
            ac.standardize_names_groundings(stmts)
        if self.assembly_config.get('filter_ungrounded'):
            score_threshold = self.assembly_config.get('score_threshold')
            stmts = ac.filter_grounded_only(stmts,
                                            score_threshold=score_threshold)
        if self.assembly_config.get('merge_groundings'):
            stmts = ac.merge_groundings(stmts)
        if self.assembly_config.get('merge_deltas'):
            stmts = ac.merge_deltas(stmts)
        relevance_policy = self.assembly_config.get('filter_relevance')
        if relevance_policy:
            stmts = self.filter_relevance(stmts, relevance_policy)
        if not self.assembly_config.get('skip_filter_human'):
            stmts = ac.filter_human_only(stmts)
        if not self.assembly_config.get('skip_map_sequence'):
            stmts = ac.map_sequence(stmts)
        # Use WM hierarchies and belief scorer for WM preassembly
        preassembly_mode = self.assembly_config.get('preassembly_mode')
        if preassembly_mode == 'wm':
            hierarchies = get_wm_hierarchies()
            belief_scorer = get_eidos_scorer()
            stmts = ac.run_preassembly(stmts,
                                       return_toplevel=False,
                                       belief_scorer=belief_scorer,
                                       hierarchies=hierarchies)
        else:
            stmts = ac.run_preassembly(stmts, return_toplevel=False)
        belief_cutoff = self.assembly_config.get('belief_cutoff')
        if belief_cutoff is not None:
            stmts = ac.filter_belief(stmts, belief_cutoff)
        stmts = ac.filter_top_level(stmts)

        if self.assembly_config.get('filter_direct'):
            stmts = ac.filter_direct(stmts)
            stmts = ac.filter_enzyme_kinase(stmts)
            stmts = ac.filter_mod_nokinase(stmts)
            stmts = ac.filter_transcription_factor(stmts)

        if self.assembly_config.get('mechanism_linking'):
            ml = MechLinker(stmts)
            ml.gather_explicit_activities()
            ml.reduce_activities()
            ml.gather_modifications()
            ml.reduce_modifications()
            ml.gather_explicit_activities()
            ml.replace_activations()
            ml.require_active_forms()
            stmts = ml.statements

        self.assembled_stmts = stmts
Exemple #4
0
def run_assembly(stmts, filename):
    stmts = ac.map_grounding(stmts)
    stmts = ac.filter_grounded_only(stmts)
    stmts = ac.filter_human_only(stmts)
    #stmts = ac.expand_families(stmts)
    stmts = ac.filter_gene_list(stmts, gene_names, 'one', allow_families=True)
    stmts = ac.map_sequence(stmts)
    stmts = ac.run_preassembly(stmts, return_toplevel=False, poolsize=4)
    ac.dump_statements(stmts, filename)
    return stmts
Exemple #5
0
def test_readme_pipeline():
    stmts = gn_stmts  # Added only here, not in docs
    from indra.tools import assemble_corpus as ac
    stmts = ac.filter_no_hypothesis(stmts)
    stmts = ac.map_grounding(stmts)
    stmts = ac.filter_grounded_only(stmts)
    stmts = ac.filter_human_only(stmts)
    stmts = ac.map_sequence(stmts)
    stmts = ac.run_preassembly(stmts, return_toplevel=False)
    stmts = ac.filter_belief(stmts, 0.8)
    assert stmts, 'Update example to yield statements list of non-zero length'
def print_statements(
    statements: List[Statement],
    file: Union[None, str, TextIO] = None,
    sep: Optional[str] = None,
    limit: Optional[int] = None,
    allow_duplicates: bool = False,
    keep_only_pmids: Union[None, str, Collection[str]] = None,
    sort_attrs: Iterable[str] = ('uuid', 'pmid'),
    allow_ungrounded: bool = True,
    minimum_belief: Optional[float] = None,
    extra_columns: Optional[List[str]] = None,
) -> None:
    """Write statements to a CSV for curation.

    This one is similar to the other one, but sorts by the BEL string and only keeps the first for each group.
    """
    sep = sep or '\t'
    extra_columns = extra_columns or []
    extra_columns_placeholders = [''] * len(extra_columns)

    statements = run_preassembly(statements)

    if not allow_ungrounded:
        statements = filter_grounded_only(statements)

    if minimum_belief is not None:
        statements = filter_belief(statements, minimum_belief)

    rows = get_rows_from_statements(statements,
                                    allow_duplicates=allow_duplicates,
                                    keep_only_pmids=keep_only_pmids)
    rows = sorted(rows, key=attrgetter(*sort_attrs))

    if limit is not None:
        rows = rows[:limit]

    if not rows:
        logger.warning('no rows written')
        return

    def _write(_file):
        print(*start_header, *extra_columns, *end_header, sep=sep, file=_file)
        for row in rows:
            print(*row.start_tuple,
                  *extra_columns_placeholders,
                  *row.end_tuple,
                  sep=sep,
                  file=_file)

    if isinstance(file, str):
        with open(file, 'w') as _file:
            _write(_file)
    else:
        _write(file)
def test_filter_grounded_only():
    # st18 has and i, which has an ungrounded bound condition
    st_out = ac.filter_grounded_only([st1, st4])
    assert len(st_out) == 2
    st_out = ac.filter_grounded_only([st3])
    assert len(st_out) == 0

    # Do we filter out a statement with an ungrounded bound condition?
    st_out = ac.filter_grounded_only([st18])
    assert len(st_out) == 0

    # When we request to remove ungrounded bound conditions, do we?
    st18_copy = deepcopy(st18)
    assert len(st18_copy.sub.bound_conditions) == 1
    st_out = ac.filter_grounded_only([st18_copy], remove_bound=True)
    assert len(st_out[0].sub.bound_conditions) == 0

    # When we request to remove ungrounded bound conditions, do we leave
    # grounded bound conditions in place?
    st19_copy = deepcopy(st19)
    assert len(st19_copy.sub.bound_conditions) == 1
    st_out = ac.filter_grounded_only([st19_copy], remove_bound=True)
    assert len(st_out[0].sub.bound_conditions) == 1

    # Do we filter out a statement with an grounded bound condition?
    st_out = ac.filter_grounded_only([st19])
    assert len(st_out) == 1
Exemple #8
0
def filter_grounded_only():
    """Filter to grounded Statements only."""
    if request.method == 'OPTIONS':
        return {}
    response = request.body.read().decode('utf-8')
    body = json.loads(response)
    stmts_json = body.get('statements')
    score_threshold = body.get('score_threshold')
    if score_threshold is not None:
        score_threshold = float(score_threshold)
    stmts = stmts_from_json(stmts_json)
    stmts_out = ac.filter_grounded_only(stmts, score_threshold=score_threshold)
    return _return_stmts(stmts_out)
Exemple #9
0
def filter_grounded_only():
    """Filter to grounded Statements only."""
    if request.method == 'OPTIONS':
        return {}
    response = request.body.read().decode('utf-8')
    body = json.loads(response)
    stmts_json = body.get('statements')
    score_threshold = body.get('score_threshold')
    if score_threshold is not None:
        score_threshold = float(score_threshold)
    stmts = stmts_from_json(stmts_json)
    stmts_out = ac.filter_grounded_only(stmts, score_threshold=score_threshold)
    return _return_stmts(stmts_out)
Exemple #10
0
    def preassemble(self, filters=None, grounding_map=None):
        """Preassemble the Statements collected in the model.

        Use INDRA's GroundingMapper, Preassembler and BeliefEngine
        on the IncrementalModel and save the unique statements and
        the top level statements in class attributes.

        Currently the following filter options are implemented:
        - grounding: require that all Agents in statements are grounded
        - human_only: require that all proteins are human proteins
        - prior_one: require that at least one Agent is in the prior model
        - prior_all: require that all Agents are in the prior model

        Parameters
        ----------
        filters : Optional[list[str]]
            A list of filter options to apply when choosing the statements.
            See description above for more details. Default: None
        grounding_map : Optional[dict]
            A user supplied grounding map which maps a string to a
            dictionary of database IDs (in the format used by Agents'
            db_refs).
        """
        stmts = self.get_statements()

        # Filter out hypotheses
        stmts = ac.filter_no_hypothesis(stmts)

        # Fix grounding
        if grounding_map is not None:
            stmts = ac.map_grounding(stmts, grounding_map=grounding_map)
        else:
            stmts = ac.map_grounding(stmts)

        if filters and ('grounding' in filters):
            stmts = ac.filter_grounded_only(stmts)

        # Fix sites
        stmts = ac.map_sequence(stmts)

        if filters and 'human_only' in filters:
            stmts = ac.filter_human_only(stmts)

        # Run preassembly
        stmts = ac.run_preassembly(stmts, return_toplevel=False)

        # Run relevance filter
        stmts = self._relevance_filter(stmts, filters)

        # Save Statements
        self.assembled_stmts = stmts
    def preassemble(self, filters=None, grounding_map=None):
        """Preassemble the Statements collected in the model.

        Use INDRA's GroundingMapper, Preassembler and BeliefEngine
        on the IncrementalModel and save the unique statements and
        the top level statements in class attributes.

        Currently the following filter options are implemented:
        - grounding: require that all Agents in statements are grounded
        - human_only: require that all proteins are human proteins
        - prior_one: require that at least one Agent is in the prior model
        - prior_all: require that all Agents are in the prior model

        Parameters
        ----------
        filters : Optional[list[str]]
            A list of filter options to apply when choosing the statements.
            See description above for more details. Default: None
        grounding_map : Optional[dict]
            A user supplied grounding map which maps a string to a
            dictionary of database IDs (in the format used by Agents'
            db_refs).
        """
        stmts = self.get_statements()

        # Filter out hypotheses
        stmts = ac.filter_no_hypothesis(stmts)

        # Fix grounding
        if grounding_map is not None:
            stmts = ac.map_grounding(stmts, grounding_map=grounding_map)
        else:
            stmts = ac.map_grounding(stmts)

        if filters and ('grounding' in filters):
            stmts = ac.filter_grounded_only(stmts)

        # Fix sites
        stmts = ac.map_sequence(stmts)

        if filters and 'human_only' in filters:
            stmts = ac.filter_human_only(stmts)

        # Run preassembly
        stmts = ac.run_preassembly(stmts, return_toplevel=False)

        # Run relevance filter
        stmts = self._relevance_filter(stmts, filters)

        # Save Statements
        self.assembled_stmts = stmts
Exemple #12
0
def filter_grounded_only():
    """Filter to grounded Statements only."""
    response = request.body.read().decode('utf-8')
    body = json.loads(response)
    stmts_json = body.get('statements')
    stmts = stmts_from_json(stmts_json)
    stmts_out = ac.filter_grounded_only(stmts)
    if stmts_out:
        stmts_json = stmts_to_json(stmts_out)
        res = {'statements': stmts_json}
        return res
    else:
        res = {'statements': []}
    return res
Exemple #13
0
def test_eidos_ungrounded():
    a = Agent('x', db_refs={'TEXT': 'x', 'TEXT_NORM': 'y'})
    b = Agent('x', db_refs={
        'TEXT': 'x',
    })
    c = Agent('x', db_refs={'TEXT': 'x', 'GO': 'GO:1234'})
    stmts = [
        Activation(a, b),
        Activation(a, c),
        Activation(b, c),
        Activation(c, c)
    ]
    stmts_out = ac.filter_grounded_only(stmts)
    assert len(stmts_out) == 1
Exemple #14
0
def run_assembly(stmts, save_file):
    stmts = ac.map_grounding(stmts)
    stmts = ac.filter_grounded_only(stmts)
    stmts = ac.filter_human_only(stmts)
    stmts = ac.expand_families(stmts)
    stmts = ac.filter_gene_list(stmts, gene_names, 'one')
    stmts = ac.map_sequence(stmts)
    stmts = ac.run_preassembly(stmts, return_toplevel=False)
    stmts = ac.filter_belief(stmts, 0.95)
    stmts = ac.filter_top_level(stmts)
    stmts = ac.filter_direct(stmts)
    stmts = ac.filter_enzyme_kinase(stmts)
    ac.dump_statements(stmts, save_file)
    return stmts
Exemple #15
0
def filter_eidos_ungrounded(stmts):
    """Filter out statements from Eidos with ungrounded agents."""
    logger.info(
        'Filtering out ungrounded Eidos statements from %d statements...' %
        len(stmts))
    stmts_out = []
    eidos_stmts = []
    for stmt in stmts:
        if stmt.evidence[0].source_api == 'eidos':
            eidos_stmts.append(stmt)
        else:
            stmts_out.append(stmt)
    eidos_grounded = filter_grounded_only(eidos_stmts)
    stmts_out += eidos_grounded
    logger.info('%d statements after filter...' % len(stmts_out))
    return stmts_out
Exemple #16
0
def get_indra_phos_stmts():
    stmts = by_gene_role_type(stmt_type='Phosphorylation')
    stmts += by_gene_role_type(stmt_type='Dephosphorylation')
    stmts = ac.map_grounding(stmts)
    # Expand families before site mapping
    stmts = ac.expand_families(stmts)
    stmts = ac.filter_grounded_only(stmts)
    stmts = ac.map_sequence(stmts)
    ac.dump_statements(stmts, 'sources/indra_phos_sitemap.pkl')
    stmts = ac.run_preassembly(stmts,
                               poolsize=4,
                               save='sources/indra_phos_stmts_pre.pkl')
    stmts = ac.filter_human_only(stmts)
    stmts = ac.filter_genes_only(stmts, specific_only=True)
    ac.dump_statements(stmts, 'sources/indra_phos_stmts.pkl')
    return stmts
Exemple #17
0
def test_filter_grounded_only_score():
    c1 = Event(Concept('x', db_refs={'a': [('x', 0.5), ('y', 0.8)]}))
    c2 = Event(Concept('x', db_refs={'a': [('x', 0.7), ('y', 0.9)]}))
    st1 = Influence(c1, c2)
    assert len(ac.filter_grounded_only([st1])) == 1
    assert len(ac.filter_grounded_only([st1], score_threshold=0.4)) == 1
    assert len(ac.filter_grounded_only([st1], score_threshold=0.6)) == 1
    assert len(ac.filter_grounded_only([st1], score_threshold=0.85)) == 0
    assert len(ac.filter_grounded_only([st1], score_threshold=0.95)) == 0
    c3 = Event(Concept('x', db_refs={'a': []}))
    st2 = Influence(c1, c3)
    assert len(ac.filter_grounded_only([st2])) == 0
Exemple #18
0
def run_preassembly(statements, hierarchies):
    print('%d total statements' % len(statements))
    # Filter to grounded only
    statements = ac.filter_grounded_only(statements, score_threshold=0.4)
    # Make a Preassembler with the Eidos and TRIPS ontology
    pa = Preassembler(hierarchies, statements)
    # Make a BeliefEngine and run combine duplicates
    be = BeliefEngine()
    unique_stmts = pa.combine_duplicates()
    print('%d unique statements' % len(unique_stmts))
    be.set_prior_probs(unique_stmts)
    # Run combine related
    related_stmts = pa.combine_related(return_toplevel=False)
    be.set_hierarchy_probs(related_stmts)
    # Filter to top-level Statements
    top_stmts = ac.filter_top_level(related_stmts)
    print('%d top-level statements' % len(top_stmts))
    return top_stmts
Exemple #19
0
def run_preassembly(statements, hierarchies):
    print('%d total statements' % len(statements))
    # Filter to grounded only
    statements = map_onto(statements)
    ac.dump_statements(statements, 'pi_mtg_demo_unfiltered.pkl')
    statements = ac.filter_grounded_only(statements, score_threshold=0.7)

    #statements = ac.filter_by_db_refs(statements, 'UN',
    #    ['conflict', 'food_security', 'precipitation'], policy='one',
    #    match_suffix=True)
    statements = ac.filter_by_db_refs(
        statements,
        'UN', [
            'conflict', 'food_security', 'flooding', 'food_production',
            'human_migration', 'drought', 'food_availability', 'market',
            'food_insecurity'
        ],
        policy='all',
        match_suffix=True)
    assume_polarity(statements)
    statements = filter_has_polarity(statements)

    # Make a Preassembler with the Eidos and TRIPS ontology
    pa = Preassembler(hierarchies, statements)
    # Make a BeliefEngine and run combine duplicates
    be = BeliefEngine()
    unique_stmts = pa.combine_duplicates()
    print('%d unique statements' % len(unique_stmts))
    be.set_prior_probs(unique_stmts)
    # Run combine related
    related_stmts = pa.combine_related(return_toplevel=False)
    be.set_hierarchy_probs(related_stmts)
    #related_stmts = ac.filter_belief(related_stmts, 0.8)
    # Filter to top-level Statements
    top_stmts = ac.filter_top_level(related_stmts)

    pa.stmts = top_stmts
    print('%d top-level statements' % len(top_stmts))
    conflicts = pa.find_contradicts()
    top_stmts = remove_contradicts(top_stmts, conflicts)

    ac.dump_statements(top_stmts, 'pi_mtg_demo.pkl')

    return top_stmts
Exemple #20
0
def get_indra_reg_act_stmts():
    try:
        stmts = ac.load_statements('sources/indra_reg_act_stmts.pkl')
        return stmts
    except:
        pass
    stmts = []
    for stmt_type in ('Activation', 'Inhibition', 'ActiveForm'):
        print("Getting %s statements from INDRA DB" % stmt_type)
        stmts += by_gene_role_type(stmt_type=stmt_type)
    stmts = ac.map_grounding(stmts, save='sources/indra_reg_act_gmap.pkl')
    stmts = ac.filter_grounded_only(stmts)
    stmts = ac.run_preassembly(stmts,
                               poolsize=4,
                               save='sources/indra_reg_act_pre.pkl')
    stmts = ac.filter_human_only(stmts)
    stmts = ac.filter_genes_only(stmts, specific_only=True)
    ac.dump_statements(stmts, 'sources/indra_reg_act_stmts.pkl')
    return stmts
Exemple #21
0
def assemble_statements(kinase, stmts, curs):
    """Run assembly steps on statements."""
    # Remove unary statements and ones with many agents
    stmts = [stmt for stmt in stmts if (1 < len(stmt.real_agent_list()) < 4)]
    stmts = replace_ctd(stmts, ctd_stmts_by_gene.get(kinase, []))
    # We do this at this point to make sure we capture the original DB
    # hashes before modifying statements to allow lookup
    for stmt in stmts:
        for ev in stmt.evidence:
            ev.annotations['prior_hash'] = stmt.get_hash()
    stmts = fix_invalidities(stmts)
    stmts = ac.filter_grounded_only(stmts)
    stmts = ac.filter_human_only(stmts)
    stmts = ac.filter_by_curation(stmts, curations=curs)
    stmts = unify_lspci(stmts)
    stmts = remove_contradictions(stmts)
    # Rename chemicals
    logger.info('Renaming chemicals')
    for stmt in stmts:
        for agent in stmt.real_agent_list():
            if agent.db_refs.get('CHEBI') and len(agent.name) > 25:
                rename_chemical(agent)
    # Remove long names
    logger.info('Removing statements with long names')
    stmts = [
        stmt for stmt in stmts if all(
            len(a.name) < 20 for a in stmt.real_agent_list())
    ]
    logger.info('%d statements remaining' % len(stmts))
    # Remove microRNAs
    logger.info('Removing microRNA statements')
    stmts = [
        stmt for stmt in stmts
        if not any('miR' in a.name for a in stmt.real_agent_list())
    ]
    logger.info('%d statements remaining' % len(stmts))
    stmts = add_source_urls(stmts)
    with open('data/assembled/%s.pkl' % kinase, 'wb') as fh:
        pickle.dump(stmts, fh)
    return stmts
Exemple #22
0
    data_genes = process_data.get_all_gene_names(data)
    reassemble = False
    if not reassemble:
        stmts = ac.load_statements(pjoin(outf, 'preassembled.pkl'))
        #stmts = ac.load_statements(pjoin(outf, 'prior.pkl'))
    else:
        #prior_stmts = build_prior(data_genes, pjoin(outf, 'prior.pkl'))
        prior_stmts = ac.load_statements(pjoin(outf, 'prior.pkl'))
        prior_stmts = ac.map_grounding(prior_stmts,
                                       save=pjoin(outf, 'gmapped_prior.pkl'))
        reading_stmts = ac.load_statements(pjoin(outf, 'phase3_stmts.pkl'))
        reading_stmts = ac.map_grounding(reading_stmts,
                                    save=pjoin(outf, 'gmapped_reading.pkl'))
        stmts = prior_stmts + reading_stmts

        stmts = ac.filter_grounded_only(stmts)
        stmts = ac.filter_genes_only(stmts, specific_only=False)
        stmts = ac.filter_human_only(stmts)
        stmts = ac.expand_families(stmts)
        stmts = ac.filter_gene_list(stmts, data_genes, 'one')
        stmts = ac.map_sequence(stmts, save=pjoin(outf, 'smapped.pkl'))
        stmts = ac.run_preassembly(stmts, return_toplevel=False,
                                   save=pjoin(outf, 'preassembled.pkl'))

    assemble_models = []
    assemble_models.append('sif')
    assemble_models.append('pysb')
    assemble_models.append('cx')

    ### PySB assembly
    if 'pysb' in assemble_models:
Exemple #23
0
 def parse_results(content):
     from indra.tools.assemble_corpus import filter_grounded_only
     ep = eidos.process_json_bio(content)
     ep.statements = filter_grounded_only(ep.statements)
     return ep
def test_filter_grounded_only():
    st_out = ac.filter_grounded_only([st1, st4])
    assert len(st_out) == 2
    st_out = ac.filter_grounded_only([st3])
    assert len(st_out) == 0
Exemple #25
0
    sofia_stmts = load_sofia()
    cwms_stmts = load_cwms()

    # Reground where needed
    # sofia_stmts = reground_stmts(sofia_stmts, world_ontology,
    #                              'WM')
    # cwms_stmts = reground_stmts(cwms_stmts, world_ontology,
    #                             'WM')

    # Put statements together and filter to influence
    stmts = eidos_stmts + hume_stmts + sofia_stmts + cwms_stmts
    stmts = ac.filter_by_type(stmts, Influence)
    # Remove name spaces that aren't needed in CauseMos
    remove_namespaces(stmts, ['WHO', 'MITRE12', 'UN'])

    stmts = ac.filter_grounded_only(stmts, score_threshold=0.7)
    stmts = filter_to_hume_interventions_only(stmts)
    # Filter again to remove any new top level groundings after
    # previous step.
    stmts = ac.filter_grounded_only(stmts, score_threshold=0.7)
    stmts = filter_out_long_words(stmts, 10)
    stmts = filter_groundings(stmts)
    # Make sure we don't include context before 1900
    stmts = filter_context_date(stmts, from_date=datetime(1900, 1, 1))
    stmts = set_positive_polarities(stmts)

    scorer = get_eidos_scorer()

    funs = {
        'grounding': (None, None),
        'location': (location_matches, location_refinement),
Exemple #26
0
import os
import csv
import pickle
import indra
from indra.tools.gene_network import GeneNetwork
from indra.tools import assemble_corpus as ac

# STEP 0: Get gene list
gene_list = []
# Get gene list from ras_pathway_proteins.csv
fname = os.path.join(indra.__path__[0], 'resources',
                     'ras_pathway_proteins.csv')
with open(fname, 'r') as f:
    csvreader = csv.reader(f, delimiter='\t')
    for row in csvreader:
        gene_list.append(row[0].strip())

gn = GeneNetwork(gene_list, 'ras_genes')
stmts = gn.get_statements(filter=True)
grounded_stmts = ac.filter_grounded_only(stmts)
results = ac.run_preassembly(grounded_stmts)
with open('ras_220_gn_stmts.pkl', 'wb') as f:
    pickle.dump(results, f)

from indra.tools import assemble_corpus as ac
from indra.statements import stmts_to_json_file
from indra.assemblers.html import HtmlAssembler
from indra.sources import reach
tp = reach.process_pmc('PMC4455820', url=reach.local_nxml_url)
if tp:
    stmts = tp.statements
    print(stmts)
    stmts = ac.filter_grounded_only(stmts)  # Filter out ungrounded agents
    stmts = ac.run_preassembly(
        stmts,  # Run preassembly
        return_toplevel=False,
        normalize_equivalences=
        True,  # Optional: rewrite equivalent groundings to one standard
        normalize_opposites=
        True,  # Optional: rewrite opposite groundings to one standard
        normalize_ns='WM'
    )  # Use 'WM' namespace to normalize equivalences and opposites
    stmts = ac.filter_belief(stmts, 0.8)  # Apply belief cutoff of e.g., 0.8
    stmts_to_json_file(stmts, 'PMC4455820.json')
    ha = HtmlAssembler(stmts)
    ha.save_model('PMC4455820.html')

#
#
#
#