def main(args): # This file takes about 32 GB to load if not args.infile: args.infile = './Data/indra_raw/bioexp_all_raw.pkl' if not args.outfile: args.outfile = './filtered_indra_network.sif' # Load statements from file stmts_raw = assemble_corpus.load_statements(args.infile) # Expand families, fix grounding errors and run run preassembly stmts_fixed = assemble_corpus.run_preassembly( assemble_corpus.map_grounding( assemble_corpus.expand_families(stmts_raw))) # Default filtering: specific (unique) genes that are grounded. stmts_filtered = assemble_corpus.filter_grounded_only( assemble_corpus.filter_genes_only(stmts_fixed, specific_only=True)) # Custom filters if args.human_only: stmts_filtered = assemble_corpus.filter_human_only(stmts_filtered) if args.filter_direct: stmts_filtered = assemble_corpus.filter_direct(stmts_filtered) binary_stmts = [s for s in stmts_filtered if len(s.agent_list()) == 2 and s.agent_list()[0] is not None] rows = [] for s in binary_stmts: rows.append([ag.name for ag in s.agent_list()]) # Write rows to .sif file with open(args.outfile, 'w', newline='') as csvfile: wrtr = csv.writer(csvfile, delimiter='\t') for row in rows: wrtr.writerow(row)
def make_model_stmts(old_mm_stmts, other_stmts, new_cord_stmts=None): """Process and combine statements from different resources. Parameters ---------- old_mm_stmts : list[indra.statements.Statement] A list of statements currently in the model. other_stmts : list[indra.statements.Statement] A list of statements that do not need additional processing (e.g. drug, gordon, virhostnet statements). new_cord_stmts : Optional[list[indra.statements.Statement]] A list of newly extracted statements from CORD19 corpus. If not provided, the statements are pulled from the database and filtered to those not in old_mm_stmts. Returns ------- combined_stmts : list[indra.statements.Statement] A list of statements to make a new model from. paper_ids : list[str] A list of TRIDs associated with statements. """ # If new cord statements are not provided, load from database if not new_cord_stmts: # Get text refs from metadata tr_dicts, multiple_tr_ids = get_tr_dicts_and_ids() # Filter to text refs that are not part of old model new_tr_dicts = {} old_tr_ids = set() for stmt in old_mm_stmts: for evid in stmt.evidence: if evid.text_refs.get('TRID'): old_tr_ids.add(evid.text_refs['TRID']) for tr_id in tr_dicts: if tr_id not in old_tr_ids: new_tr_dicts[tr_id] = tr_dicts[tr_id] logger.info('Found %d TextRefs, %d of which are not in old model' % (len(tr_dicts), len(new_tr_dicts))) # Get statements for new text re new_cord_stmts = get_raw_stmts(new_tr_dicts, date_limit=5) logger.info('Processing the statements') # Filter out ungrounded statements new_cord_grounded = ac.filter_grounded_only(new_cord_stmts) # Group statements by TextRef old_mm_by_tr, old_mm_no_tr = stmts_by_text_refs(old_mm_stmts) new_cord_by_tr, new_cord_no_tr = stmts_by_text_refs(new_cord_grounded) # Add any EMMAA statements from non-Cord19 publications updated_mm_stmts_by_tr = combine_stmts(new_cord_by_tr, old_mm_by_tr) updated_mm_stmts = [ s for stmt_list in updated_mm_stmts_by_tr.values() for s in stmt_list ] # Now, add back in all other statements combined_stmts = updated_mm_stmts + other_stmts logger.info('Got %d total statements.' % len(combined_stmts)) logger.info('Processed %d papers.' % len(updated_mm_stmts_by_tr)) return combined_stmts, updated_mm_stmts_by_tr.keys()
def run_assembly(self): """Run INDRA's assembly pipeline on the Statements.""" self.eliminate_copies() stmts = self.get_indra_stmts() stmts = self.filter_event_association(stmts) stmts = ac.filter_no_hypothesis(stmts) if not self.assembly_config.get('skip_map_grounding'): stmts = ac.map_grounding(stmts) if self.assembly_config.get('standardize_names'): ac.standardize_names_groundings(stmts) if self.assembly_config.get('filter_ungrounded'): score_threshold = self.assembly_config.get('score_threshold') stmts = ac.filter_grounded_only(stmts, score_threshold=score_threshold) if self.assembly_config.get('merge_groundings'): stmts = ac.merge_groundings(stmts) if self.assembly_config.get('merge_deltas'): stmts = ac.merge_deltas(stmts) relevance_policy = self.assembly_config.get('filter_relevance') if relevance_policy: stmts = self.filter_relevance(stmts, relevance_policy) if not self.assembly_config.get('skip_filter_human'): stmts = ac.filter_human_only(stmts) if not self.assembly_config.get('skip_map_sequence'): stmts = ac.map_sequence(stmts) # Use WM hierarchies and belief scorer for WM preassembly preassembly_mode = self.assembly_config.get('preassembly_mode') if preassembly_mode == 'wm': hierarchies = get_wm_hierarchies() belief_scorer = get_eidos_scorer() stmts = ac.run_preassembly(stmts, return_toplevel=False, belief_scorer=belief_scorer, hierarchies=hierarchies) else: stmts = ac.run_preassembly(stmts, return_toplevel=False) belief_cutoff = self.assembly_config.get('belief_cutoff') if belief_cutoff is not None: stmts = ac.filter_belief(stmts, belief_cutoff) stmts = ac.filter_top_level(stmts) if self.assembly_config.get('filter_direct'): stmts = ac.filter_direct(stmts) stmts = ac.filter_enzyme_kinase(stmts) stmts = ac.filter_mod_nokinase(stmts) stmts = ac.filter_transcription_factor(stmts) if self.assembly_config.get('mechanism_linking'): ml = MechLinker(stmts) ml.gather_explicit_activities() ml.reduce_activities() ml.gather_modifications() ml.reduce_modifications() ml.gather_explicit_activities() ml.replace_activations() ml.require_active_forms() stmts = ml.statements self.assembled_stmts = stmts
def run_assembly(stmts, filename): stmts = ac.map_grounding(stmts) stmts = ac.filter_grounded_only(stmts) stmts = ac.filter_human_only(stmts) #stmts = ac.expand_families(stmts) stmts = ac.filter_gene_list(stmts, gene_names, 'one', allow_families=True) stmts = ac.map_sequence(stmts) stmts = ac.run_preassembly(stmts, return_toplevel=False, poolsize=4) ac.dump_statements(stmts, filename) return stmts
def test_readme_pipeline(): stmts = gn_stmts # Added only here, not in docs from indra.tools import assemble_corpus as ac stmts = ac.filter_no_hypothesis(stmts) stmts = ac.map_grounding(stmts) stmts = ac.filter_grounded_only(stmts) stmts = ac.filter_human_only(stmts) stmts = ac.map_sequence(stmts) stmts = ac.run_preassembly(stmts, return_toplevel=False) stmts = ac.filter_belief(stmts, 0.8) assert stmts, 'Update example to yield statements list of non-zero length'
def print_statements( statements: List[Statement], file: Union[None, str, TextIO] = None, sep: Optional[str] = None, limit: Optional[int] = None, allow_duplicates: bool = False, keep_only_pmids: Union[None, str, Collection[str]] = None, sort_attrs: Iterable[str] = ('uuid', 'pmid'), allow_ungrounded: bool = True, minimum_belief: Optional[float] = None, extra_columns: Optional[List[str]] = None, ) -> None: """Write statements to a CSV for curation. This one is similar to the other one, but sorts by the BEL string and only keeps the first for each group. """ sep = sep or '\t' extra_columns = extra_columns or [] extra_columns_placeholders = [''] * len(extra_columns) statements = run_preassembly(statements) if not allow_ungrounded: statements = filter_grounded_only(statements) if minimum_belief is not None: statements = filter_belief(statements, minimum_belief) rows = get_rows_from_statements(statements, allow_duplicates=allow_duplicates, keep_only_pmids=keep_only_pmids) rows = sorted(rows, key=attrgetter(*sort_attrs)) if limit is not None: rows = rows[:limit] if not rows: logger.warning('no rows written') return def _write(_file): print(*start_header, *extra_columns, *end_header, sep=sep, file=_file) for row in rows: print(*row.start_tuple, *extra_columns_placeholders, *row.end_tuple, sep=sep, file=_file) if isinstance(file, str): with open(file, 'w') as _file: _write(_file) else: _write(file)
def test_filter_grounded_only(): # st18 has and i, which has an ungrounded bound condition st_out = ac.filter_grounded_only([st1, st4]) assert len(st_out) == 2 st_out = ac.filter_grounded_only([st3]) assert len(st_out) == 0 # Do we filter out a statement with an ungrounded bound condition? st_out = ac.filter_grounded_only([st18]) assert len(st_out) == 0 # When we request to remove ungrounded bound conditions, do we? st18_copy = deepcopy(st18) assert len(st18_copy.sub.bound_conditions) == 1 st_out = ac.filter_grounded_only([st18_copy], remove_bound=True) assert len(st_out[0].sub.bound_conditions) == 0 # When we request to remove ungrounded bound conditions, do we leave # grounded bound conditions in place? st19_copy = deepcopy(st19) assert len(st19_copy.sub.bound_conditions) == 1 st_out = ac.filter_grounded_only([st19_copy], remove_bound=True) assert len(st_out[0].sub.bound_conditions) == 1 # Do we filter out a statement with an grounded bound condition? st_out = ac.filter_grounded_only([st19]) assert len(st_out) == 1
def filter_grounded_only(): """Filter to grounded Statements only.""" if request.method == 'OPTIONS': return {} response = request.body.read().decode('utf-8') body = json.loads(response) stmts_json = body.get('statements') score_threshold = body.get('score_threshold') if score_threshold is not None: score_threshold = float(score_threshold) stmts = stmts_from_json(stmts_json) stmts_out = ac.filter_grounded_only(stmts, score_threshold=score_threshold) return _return_stmts(stmts_out)
def filter_grounded_only(): """Filter to grounded Statements only.""" if request.method == 'OPTIONS': return {} response = request.body.read().decode('utf-8') body = json.loads(response) stmts_json = body.get('statements') score_threshold = body.get('score_threshold') if score_threshold is not None: score_threshold = float(score_threshold) stmts = stmts_from_json(stmts_json) stmts_out = ac.filter_grounded_only(stmts, score_threshold=score_threshold) return _return_stmts(stmts_out)
def preassemble(self, filters=None, grounding_map=None): """Preassemble the Statements collected in the model. Use INDRA's GroundingMapper, Preassembler and BeliefEngine on the IncrementalModel and save the unique statements and the top level statements in class attributes. Currently the following filter options are implemented: - grounding: require that all Agents in statements are grounded - human_only: require that all proteins are human proteins - prior_one: require that at least one Agent is in the prior model - prior_all: require that all Agents are in the prior model Parameters ---------- filters : Optional[list[str]] A list of filter options to apply when choosing the statements. See description above for more details. Default: None grounding_map : Optional[dict] A user supplied grounding map which maps a string to a dictionary of database IDs (in the format used by Agents' db_refs). """ stmts = self.get_statements() # Filter out hypotheses stmts = ac.filter_no_hypothesis(stmts) # Fix grounding if grounding_map is not None: stmts = ac.map_grounding(stmts, grounding_map=grounding_map) else: stmts = ac.map_grounding(stmts) if filters and ('grounding' in filters): stmts = ac.filter_grounded_only(stmts) # Fix sites stmts = ac.map_sequence(stmts) if filters and 'human_only' in filters: stmts = ac.filter_human_only(stmts) # Run preassembly stmts = ac.run_preassembly(stmts, return_toplevel=False) # Run relevance filter stmts = self._relevance_filter(stmts, filters) # Save Statements self.assembled_stmts = stmts
def preassemble(self, filters=None, grounding_map=None): """Preassemble the Statements collected in the model. Use INDRA's GroundingMapper, Preassembler and BeliefEngine on the IncrementalModel and save the unique statements and the top level statements in class attributes. Currently the following filter options are implemented: - grounding: require that all Agents in statements are grounded - human_only: require that all proteins are human proteins - prior_one: require that at least one Agent is in the prior model - prior_all: require that all Agents are in the prior model Parameters ---------- filters : Optional[list[str]] A list of filter options to apply when choosing the statements. See description above for more details. Default: None grounding_map : Optional[dict] A user supplied grounding map which maps a string to a dictionary of database IDs (in the format used by Agents' db_refs). """ stmts = self.get_statements() # Filter out hypotheses stmts = ac.filter_no_hypothesis(stmts) # Fix grounding if grounding_map is not None: stmts = ac.map_grounding(stmts, grounding_map=grounding_map) else: stmts = ac.map_grounding(stmts) if filters and ('grounding' in filters): stmts = ac.filter_grounded_only(stmts) # Fix sites stmts = ac.map_sequence(stmts) if filters and 'human_only' in filters: stmts = ac.filter_human_only(stmts) # Run preassembly stmts = ac.run_preassembly(stmts, return_toplevel=False) # Run relevance filter stmts = self._relevance_filter(stmts, filters) # Save Statements self.assembled_stmts = stmts
def filter_grounded_only(): """Filter to grounded Statements only.""" response = request.body.read().decode('utf-8') body = json.loads(response) stmts_json = body.get('statements') stmts = stmts_from_json(stmts_json) stmts_out = ac.filter_grounded_only(stmts) if stmts_out: stmts_json = stmts_to_json(stmts_out) res = {'statements': stmts_json} return res else: res = {'statements': []} return res
def test_eidos_ungrounded(): a = Agent('x', db_refs={'TEXT': 'x', 'TEXT_NORM': 'y'}) b = Agent('x', db_refs={ 'TEXT': 'x', }) c = Agent('x', db_refs={'TEXT': 'x', 'GO': 'GO:1234'}) stmts = [ Activation(a, b), Activation(a, c), Activation(b, c), Activation(c, c) ] stmts_out = ac.filter_grounded_only(stmts) assert len(stmts_out) == 1
def run_assembly(stmts, save_file): stmts = ac.map_grounding(stmts) stmts = ac.filter_grounded_only(stmts) stmts = ac.filter_human_only(stmts) stmts = ac.expand_families(stmts) stmts = ac.filter_gene_list(stmts, gene_names, 'one') stmts = ac.map_sequence(stmts) stmts = ac.run_preassembly(stmts, return_toplevel=False) stmts = ac.filter_belief(stmts, 0.95) stmts = ac.filter_top_level(stmts) stmts = ac.filter_direct(stmts) stmts = ac.filter_enzyme_kinase(stmts) ac.dump_statements(stmts, save_file) return stmts
def filter_eidos_ungrounded(stmts): """Filter out statements from Eidos with ungrounded agents.""" logger.info( 'Filtering out ungrounded Eidos statements from %d statements...' % len(stmts)) stmts_out = [] eidos_stmts = [] for stmt in stmts: if stmt.evidence[0].source_api == 'eidos': eidos_stmts.append(stmt) else: stmts_out.append(stmt) eidos_grounded = filter_grounded_only(eidos_stmts) stmts_out += eidos_grounded logger.info('%d statements after filter...' % len(stmts_out)) return stmts_out
def get_indra_phos_stmts(): stmts = by_gene_role_type(stmt_type='Phosphorylation') stmts += by_gene_role_type(stmt_type='Dephosphorylation') stmts = ac.map_grounding(stmts) # Expand families before site mapping stmts = ac.expand_families(stmts) stmts = ac.filter_grounded_only(stmts) stmts = ac.map_sequence(stmts) ac.dump_statements(stmts, 'sources/indra_phos_sitemap.pkl') stmts = ac.run_preassembly(stmts, poolsize=4, save='sources/indra_phos_stmts_pre.pkl') stmts = ac.filter_human_only(stmts) stmts = ac.filter_genes_only(stmts, specific_only=True) ac.dump_statements(stmts, 'sources/indra_phos_stmts.pkl') return stmts
def test_filter_grounded_only_score(): c1 = Event(Concept('x', db_refs={'a': [('x', 0.5), ('y', 0.8)]})) c2 = Event(Concept('x', db_refs={'a': [('x', 0.7), ('y', 0.9)]})) st1 = Influence(c1, c2) assert len(ac.filter_grounded_only([st1])) == 1 assert len(ac.filter_grounded_only([st1], score_threshold=0.4)) == 1 assert len(ac.filter_grounded_only([st1], score_threshold=0.6)) == 1 assert len(ac.filter_grounded_only([st1], score_threshold=0.85)) == 0 assert len(ac.filter_grounded_only([st1], score_threshold=0.95)) == 0 c3 = Event(Concept('x', db_refs={'a': []})) st2 = Influence(c1, c3) assert len(ac.filter_grounded_only([st2])) == 0
def run_preassembly(statements, hierarchies): print('%d total statements' % len(statements)) # Filter to grounded only statements = ac.filter_grounded_only(statements, score_threshold=0.4) # Make a Preassembler with the Eidos and TRIPS ontology pa = Preassembler(hierarchies, statements) # Make a BeliefEngine and run combine duplicates be = BeliefEngine() unique_stmts = pa.combine_duplicates() print('%d unique statements' % len(unique_stmts)) be.set_prior_probs(unique_stmts) # Run combine related related_stmts = pa.combine_related(return_toplevel=False) be.set_hierarchy_probs(related_stmts) # Filter to top-level Statements top_stmts = ac.filter_top_level(related_stmts) print('%d top-level statements' % len(top_stmts)) return top_stmts
def run_preassembly(statements, hierarchies): print('%d total statements' % len(statements)) # Filter to grounded only statements = map_onto(statements) ac.dump_statements(statements, 'pi_mtg_demo_unfiltered.pkl') statements = ac.filter_grounded_only(statements, score_threshold=0.7) #statements = ac.filter_by_db_refs(statements, 'UN', # ['conflict', 'food_security', 'precipitation'], policy='one', # match_suffix=True) statements = ac.filter_by_db_refs( statements, 'UN', [ 'conflict', 'food_security', 'flooding', 'food_production', 'human_migration', 'drought', 'food_availability', 'market', 'food_insecurity' ], policy='all', match_suffix=True) assume_polarity(statements) statements = filter_has_polarity(statements) # Make a Preassembler with the Eidos and TRIPS ontology pa = Preassembler(hierarchies, statements) # Make a BeliefEngine and run combine duplicates be = BeliefEngine() unique_stmts = pa.combine_duplicates() print('%d unique statements' % len(unique_stmts)) be.set_prior_probs(unique_stmts) # Run combine related related_stmts = pa.combine_related(return_toplevel=False) be.set_hierarchy_probs(related_stmts) #related_stmts = ac.filter_belief(related_stmts, 0.8) # Filter to top-level Statements top_stmts = ac.filter_top_level(related_stmts) pa.stmts = top_stmts print('%d top-level statements' % len(top_stmts)) conflicts = pa.find_contradicts() top_stmts = remove_contradicts(top_stmts, conflicts) ac.dump_statements(top_stmts, 'pi_mtg_demo.pkl') return top_stmts
def get_indra_reg_act_stmts(): try: stmts = ac.load_statements('sources/indra_reg_act_stmts.pkl') return stmts except: pass stmts = [] for stmt_type in ('Activation', 'Inhibition', 'ActiveForm'): print("Getting %s statements from INDRA DB" % stmt_type) stmts += by_gene_role_type(stmt_type=stmt_type) stmts = ac.map_grounding(stmts, save='sources/indra_reg_act_gmap.pkl') stmts = ac.filter_grounded_only(stmts) stmts = ac.run_preassembly(stmts, poolsize=4, save='sources/indra_reg_act_pre.pkl') stmts = ac.filter_human_only(stmts) stmts = ac.filter_genes_only(stmts, specific_only=True) ac.dump_statements(stmts, 'sources/indra_reg_act_stmts.pkl') return stmts
def assemble_statements(kinase, stmts, curs): """Run assembly steps on statements.""" # Remove unary statements and ones with many agents stmts = [stmt for stmt in stmts if (1 < len(stmt.real_agent_list()) < 4)] stmts = replace_ctd(stmts, ctd_stmts_by_gene.get(kinase, [])) # We do this at this point to make sure we capture the original DB # hashes before modifying statements to allow lookup for stmt in stmts: for ev in stmt.evidence: ev.annotations['prior_hash'] = stmt.get_hash() stmts = fix_invalidities(stmts) stmts = ac.filter_grounded_only(stmts) stmts = ac.filter_human_only(stmts) stmts = ac.filter_by_curation(stmts, curations=curs) stmts = unify_lspci(stmts) stmts = remove_contradictions(stmts) # Rename chemicals logger.info('Renaming chemicals') for stmt in stmts: for agent in stmt.real_agent_list(): if agent.db_refs.get('CHEBI') and len(agent.name) > 25: rename_chemical(agent) # Remove long names logger.info('Removing statements with long names') stmts = [ stmt for stmt in stmts if all( len(a.name) < 20 for a in stmt.real_agent_list()) ] logger.info('%d statements remaining' % len(stmts)) # Remove microRNAs logger.info('Removing microRNA statements') stmts = [ stmt for stmt in stmts if not any('miR' in a.name for a in stmt.real_agent_list()) ] logger.info('%d statements remaining' % len(stmts)) stmts = add_source_urls(stmts) with open('data/assembled/%s.pkl' % kinase, 'wb') as fh: pickle.dump(stmts, fh) return stmts
data_genes = process_data.get_all_gene_names(data) reassemble = False if not reassemble: stmts = ac.load_statements(pjoin(outf, 'preassembled.pkl')) #stmts = ac.load_statements(pjoin(outf, 'prior.pkl')) else: #prior_stmts = build_prior(data_genes, pjoin(outf, 'prior.pkl')) prior_stmts = ac.load_statements(pjoin(outf, 'prior.pkl')) prior_stmts = ac.map_grounding(prior_stmts, save=pjoin(outf, 'gmapped_prior.pkl')) reading_stmts = ac.load_statements(pjoin(outf, 'phase3_stmts.pkl')) reading_stmts = ac.map_grounding(reading_stmts, save=pjoin(outf, 'gmapped_reading.pkl')) stmts = prior_stmts + reading_stmts stmts = ac.filter_grounded_only(stmts) stmts = ac.filter_genes_only(stmts, specific_only=False) stmts = ac.filter_human_only(stmts) stmts = ac.expand_families(stmts) stmts = ac.filter_gene_list(stmts, data_genes, 'one') stmts = ac.map_sequence(stmts, save=pjoin(outf, 'smapped.pkl')) stmts = ac.run_preassembly(stmts, return_toplevel=False, save=pjoin(outf, 'preassembled.pkl')) assemble_models = [] assemble_models.append('sif') assemble_models.append('pysb') assemble_models.append('cx') ### PySB assembly if 'pysb' in assemble_models:
def parse_results(content): from indra.tools.assemble_corpus import filter_grounded_only ep = eidos.process_json_bio(content) ep.statements = filter_grounded_only(ep.statements) return ep
def test_filter_grounded_only(): st_out = ac.filter_grounded_only([st1, st4]) assert len(st_out) == 2 st_out = ac.filter_grounded_only([st3]) assert len(st_out) == 0
sofia_stmts = load_sofia() cwms_stmts = load_cwms() # Reground where needed # sofia_stmts = reground_stmts(sofia_stmts, world_ontology, # 'WM') # cwms_stmts = reground_stmts(cwms_stmts, world_ontology, # 'WM') # Put statements together and filter to influence stmts = eidos_stmts + hume_stmts + sofia_stmts + cwms_stmts stmts = ac.filter_by_type(stmts, Influence) # Remove name spaces that aren't needed in CauseMos remove_namespaces(stmts, ['WHO', 'MITRE12', 'UN']) stmts = ac.filter_grounded_only(stmts, score_threshold=0.7) stmts = filter_to_hume_interventions_only(stmts) # Filter again to remove any new top level groundings after # previous step. stmts = ac.filter_grounded_only(stmts, score_threshold=0.7) stmts = filter_out_long_words(stmts, 10) stmts = filter_groundings(stmts) # Make sure we don't include context before 1900 stmts = filter_context_date(stmts, from_date=datetime(1900, 1, 1)) stmts = set_positive_polarities(stmts) scorer = get_eidos_scorer() funs = { 'grounding': (None, None), 'location': (location_matches, location_refinement),
import os import csv import pickle import indra from indra.tools.gene_network import GeneNetwork from indra.tools import assemble_corpus as ac # STEP 0: Get gene list gene_list = [] # Get gene list from ras_pathway_proteins.csv fname = os.path.join(indra.__path__[0], 'resources', 'ras_pathway_proteins.csv') with open(fname, 'r') as f: csvreader = csv.reader(f, delimiter='\t') for row in csvreader: gene_list.append(row[0].strip()) gn = GeneNetwork(gene_list, 'ras_genes') stmts = gn.get_statements(filter=True) grounded_stmts = ac.filter_grounded_only(stmts) results = ac.run_preassembly(grounded_stmts) with open('ras_220_gn_stmts.pkl', 'wb') as f: pickle.dump(results, f)
from indra.tools import assemble_corpus as ac from indra.statements import stmts_to_json_file from indra.assemblers.html import HtmlAssembler from indra.sources import reach tp = reach.process_pmc('PMC4455820', url=reach.local_nxml_url) if tp: stmts = tp.statements print(stmts) stmts = ac.filter_grounded_only(stmts) # Filter out ungrounded agents stmts = ac.run_preassembly( stmts, # Run preassembly return_toplevel=False, normalize_equivalences= True, # Optional: rewrite equivalent groundings to one standard normalize_opposites= True, # Optional: rewrite opposite groundings to one standard normalize_ns='WM' ) # Use 'WM' namespace to normalize equivalences and opposites stmts = ac.filter_belief(stmts, 0.8) # Apply belief cutoff of e.g., 0.8 stmts_to_json_file(stmts, 'PMC4455820.json') ha = HtmlAssembler(stmts) ha.save_model('PMC4455820.html') # # # #