def build_prior(gene_names): """Build a corpus of prior Statements from PC and BEL.""" gn = GeneNetwork(gene_names, basen) # Read BEL Statements bel_stmts = gn.get_bel_stmts(filter=False) ac.dump_statements(bel_stmts, prefixed_pkl('bel')) # Read Pathway Commons Statements database_filter = ['reactome', 'kegg', 'pid'] biopax_stmts = gn.get_biopax_stmts(database_filter=database_filter) # Eliminate blacklisted interactions tmp_stmts = [] for stmt in biopax_stmts: source_ids = [ev.source_id for ev in stmt.evidence] if set(source_ids) & set(biopax_blacklist): continue tmp_stmts.append(stmt) biopax_stmts = tmp_stmts ac.dump_statements(biopax_stmts, prefixed_pkl('biopax')) # Read Phosphosite Statements phosphosite_stmts = read_phosphosite_owl(phosphosite_owl_file) ac.dump_statements(phosphosite_stmts, prefixed_pkl('phosphosite'))
path_str = '' for ix, (node, sign) in enumerate(path): if ix == 0: path_str += node else: if sign == last_sign: path_str += ' -> %s' % node else: path_str += ' -| %s' % node last_sign = sign print('%s : score %s' % (path_str, score)) if __name__ == '__main__': # Run run_task1.py before running this one with open(prefixed_pkl('pysb_stmts'), 'rb') as f: stmts = pickle.load(f) with open('scored_paths.pkl', 'rb') as f: (scored_paths, model) = pickle.load(f) all_groups = set() all_path_details = {} for cell_line, drug_dict in scored_paths.items(): for drug, paths in drug_dict.items(): groups, path_details = group_scored_paths(paths, model, stmts) for pg, path_list in path_details.items(): if pg in all_path_details: all_path_details[pg] |= path_list else: all_path_details[pg] = path_list all_groups |= groups
def assemble_pysb(stmts, data_genes, contextualize=False): # Filter the INDRA Statements to be put into the model stmts = ac.filter_by_type(stmts, Complex, invert=True) stmts = ac.filter_direct(stmts) stmts = ac.filter_belief(stmts, 0.95) stmts = ac.filter_top_level(stmts) # Strip the extraneous supports/supported by here strip_supports(stmts) stmts = ac.filter_gene_list(stmts, data_genes, 'all') stmts = ac.filter_enzyme_kinase(stmts) stmts = ac.filter_mod_nokinase(stmts) stmts = ac.filter_transcription_factor(stmts) # Simplify activity types ml = MechLinker(stmts) ml.gather_explicit_activities() ml.reduce_activities() ml.gather_modifications() ml.reduce_modifications() stmts = normalize_active_forms(ml.statements) # Replace activations when possible ml = MechLinker(stmts) ml.gather_explicit_activities() ml.replace_activations() # Require active forms ml.require_active_forms() num_stmts = len(ml.statements) while True: # Remove inconsequential PTMs ml.statements = ac.filter_inconsequential_mods(ml.statements, get_mod_whitelist()) ml.statements = ac.filter_inconsequential_acts(ml.statements, get_mod_whitelist()) if num_stmts <= len(ml.statements): break num_stmts = len(ml.statements) stmts = ml.statements # Save the Statements here ac.dump_statements(stmts, prefixed_pkl('pysb_stmts')) # Add drug target Statements drug_target_stmts = get_drug_target_statements() stmts += drug_target_stmts # Just generate the generic model pa = PysbAssembler() pa.add_statements(stmts) model = pa.make_model() with open(prefixed_pkl('pysb_model'), 'wb') as f: pickle.dump(model, f) # Run this extra part only if contextualize is set to True if not contextualize: return cell_lines_no_data = ['COLO858', 'K2', 'MMACSF', 'MZ7MEL', 'WM1552C'] for cell_line in cell_lines: if cell_line not in cell_lines_no_data: stmtsc = contextualize_stmts(stmts, cell_line, data_genes) else: stmtsc = stmts pa = PysbAssembler() pa.add_statements(stmtsc) model = pa.make_model() if cell_line not in cell_lines_no_data: contextualize_model(model, cell_line, data_genes) ac.dump_statements(stmtsc, prefixed_pkl('pysb_stmts_%s' % cell_line)) with open(prefixed_pkl('pysb_model_%s' % cell_line), 'wb') as f: pickle.dump(model, f)
stmts_by_ag[stmt.agent.name].append(stmt) except KeyError: stmts_by_ag[stmt.agent.name] = [stmt] unique_by_ag = {} for k, v in stmts_by_ag.items(): for st in v: found = False try: uniques = unique_by_ag[k] except KeyError: unique_by_ag[k] = [] uniques = [] for stmt in uniques: if stmt.equals(st): found = True break if not found: unique_by_ag[k].append(st) new_stmts = [] for k, v in unique_by_ag.items(): new_stmts += v return new_stmts if __name__ == '__main__': stmts = [] for aff in active_forms_files: stmts = read_stmts(aff) with open(prefixed_pkl('r3'), 'wb') as fh: pickle.dump(stmts, fh)
fnames = glob.glob(os.path.join(base_dir, '*.ekb')) return fnames def get_file_stmts(fname): with open(fname, 'rt') as fh: xml_str = fh.read() tp = trips.process_xml(xml_str) if tp is None: return [] return tp.statements def read_stmts(folder): fnames = get_file_names(folder) all_stmts = [] for i, fname in enumerate(fnames): print('%d/%d' % (i, len(fnames))) print(fname) print('=' * len(fname)) st = get_file_stmts(fname) all_stmts += st return all_stmts if __name__ == '__main__': stmts = read_stmts(base_folder) print('Collected %d Statements from TRIPS' % len(stmts)) with open(prefixed_pkl('trips'), 'wb') as fh: pickle.dump(stmts, fh)