def assemble_pysb(stmts, data_genes, out_file): """Return an assembled PySB model.""" base_file, _ = os.path.splitext(out_file) #stmts = ac.load_statements('%s.pkl' % base_file) stmts = preprocess_stmts(stmts, data_genes) # Make a SIF model equivalent to the PySB model # Useful for making direct comparisons in pathfinding sa = SifAssembler(stmts) sa.make_model(use_name_as_key=True, include_mods=True, include_complexes=True) sif_str = sa.print_model(include_unsigned_edges=True) with open('%s_pysb.sif' % base_file, 'wt') as f: f.write(sif_str) # This is the "final" set of statements going into the assembler so it # makes sense to cache these. # This is also the point where index cards can be generated ac.dump_statements(stmts, '%s_before_pa.pkl' % base_file) assemble_index_cards(stmts, 'output/index_cards') # Save a version of statements with no evidence for faster loading for s in stmts: s.evidence = [] for ss in s.supports + s.supported_by: ss.evidence = [] ac.dump_statements(stmts, '%s_no_evidence.pkl' % base_file) # Assemble model pa = PysbAssembler() pa.add_statements(stmts) pa.make_model(reverse_effects=False) #ac.dump_statements(pa.statements, '%s_after_pa.pkl' % base_file) # Set context set_context(pa) # Add observables add_observables(pa.model) pa.save_model(out_file) with open('korkut_pysb.pkl', 'wb') as fh: pickle.dump(pa.model, fh) #pa.export_model('kappa', '%s.ka' % base_file) return pa.model
def assemble_sif(stmts, data, out_file): """Return an assembled SIF.""" # Filter for high-belief statements stmts = ac.filter_belief(stmts, 0.99) stmts = ac.filter_top_level(stmts) # Filter for Activation / Inhibition stmts_act = ac.filter_by_type(stmts, Activation) stmts_inact = ac.filter_by_type(stmts, Inhibition) stmts = stmts_act + stmts_inact # Get Ras227 and filter statments ras_genes = process_data.get_ras227_genes() #ras_genes = [x for x in ras_genes if x not in ['YAP1']] stmts = ac.filter_gene_list(stmts, ras_genes, 'all') # Get the drugs inhibiting their targets as INDRA # statements def get_drug_statements(): drug_targets = process_data.get_drug_targets() drug_stmts = [] for dn, tns in drug_targets.items(): da = Agent(dn + ':Drugs') for tn in tns: ta = Agent(tn) drug_stmt = Inhibition(da, ta) drug_stmts.append(drug_stmt) return drug_stmts drug_stmts = get_drug_statements() stmts = stmts + drug_stmts # Rewrite statements to replace genes with their corresponding # antibodies when possible stmts = rewrite_ab_stmts(stmts, data) def filter_ab_edges(st, policy='all'): st_out = [] for s in st: if policy == 'all': all_ab = True for a in s.agent_list(): if a is not None: if a.name.find('_p') == -1 and \ a.name.find('Drugs') == -1: all_ab = False break if all_ab: st_out.append(s) elif policy == 'one': any_ab = False for a in s.agent_list(): if a is not None and a.name.find('_p') != -1: any_ab = True break if any_ab: st_out.append(s) return st_out stmts = filter_ab_edges(stmts, 'all') # Get a list of the AB names that end up being covered in the prior network # This is important because other ABs will need to be taken out of the # MIDAS file to work. def get_ab_names(st): prior_abs = set() for s in st: for a in s.agent_list(): if a is not None: if a.name.find('_p') != -1: prior_abs.add(a.name) return sorted(list(prior_abs)) pkn_abs = get_ab_names(stmts) def get_drug_names(st): prior_drugs = set() for s in st: for a in s.agent_list(): if a is not None: if a.name.find('Drugs') != -1: prior_drugs.add(a.name.split(':')[0]) return sorted(list(prior_drugs)) pkn_drugs = get_drug_names(stmts) print('Boolean PKN contains these antibodies: %s' % ', '.join(pkn_abs)) # Because of a bug in CNO, # node names containing AND need to be replaced # node names containing - need to be replaced # node names starting in a digit need to be replaced # must happen before SIF assembly, but not sooner as that will drop # names from the MIDAS file def rename_nodes(st): for s in st: for a in s.agent_list(): if a is not None: if a.name.find('AND') != -1: a.name = a.name.replace('AND', 'A_ND') if a.name.find('-') != -1: a.name = a.name.replace('-', '_') if a.name[0].isdigit(): a.name = 'abc_' + a.name rename_nodes(stmts) # Make the SIF model sa = SifAssembler(stmts) sa.make_model(use_name_as_key=True) sif_str = sa.print_model() # assemble and dump a cx of the sif ca = CxAssembler() ca.add_statements(stmts) model = ca.make_model() ca.save_model('sif.cx') with open(out_file, 'wb') as fh: fh.write(sif_str.encode('utf-8')) # Make the MIDAS data file used for training the model midas_data = process_data.get_midas_data(data, pkn_abs, pkn_drugs) return sif_str