def get_reader_sites(input_file): input_stmts = ac.load_statements(input_file) readers = ('reach', 'sparser', 'rlimsp') pm = ProtMapper(use_cache=True, cache_path=CACHE_PATH) sites_by_reader = {} # For all readers for reader in readers: sites = [] # Filter to stmts for this reader reader_stmts = [ s for s in input_stmts if s.evidence[0].source_api == reader ] for s in reader_stmts: up_id = s.sub.db_refs.get('UP') # Filter to stmts with substrate UP ID, residue and position if up_id is None or s.residue is None or s.position is None: continue if s.residue not in ('S', 'T', 'Y'): continue site = (up_id, s.residue, s.position) # Get the mapped site for the residue ms = pm.map_to_human_ref(up_id, 'uniprot', s.residue, s.position) sites.append(ms) # Group, tabulate frequency site_ctr = Counter(sites) # Store in dict sites_by_reader[reader] = site_ctr # Save sites with open('output/reader_sites.pkl', 'wb') as f: pickle.dump(sites_by_reader, f) # Save cache pm.save_cache()
def main(args): # This file takes about 32 GB to load if not args.infile: args.infile = './Data/indra_raw/bioexp_all_raw.pkl' if not args.outfile: args.outfile = './filtered_indra_network.sif' # Load statements from file stmts_raw = assemble_corpus.load_statements(args.infile) # Expand families, fix grounding errors and run run preassembly stmts_fixed = assemble_corpus.run_preassembly( assemble_corpus.map_grounding( assemble_corpus.expand_families(stmts_raw))) # Default filtering: specific (unique) genes that are grounded. stmts_filtered = assemble_corpus.filter_grounded_only( assemble_corpus.filter_genes_only(stmts_fixed, specific_only=True)) # Custom filters if args.human_only: stmts_filtered = assemble_corpus.filter_human_only(stmts_filtered) if args.filter_direct: stmts_filtered = assemble_corpus.filter_direct(stmts_filtered) binary_stmts = [s for s in stmts_filtered if len(s.agent_list()) == 2 and s.agent_list()[0] is not None] rows = [] for s in binary_stmts: rows.append([ag.name for ag in s.agent_list()]) # Write rows to .sif file with open(args.outfile, 'w', newline='') as csvfile: wrtr = csv.writer(csvfile, delimiter='\t') for row in rows: wrtr.writerow(row)
def combine_all_stmts(pkl_list, output_file): all_stmts = [] for pkl_file in pkl_list: all_stmts.extend(ac.load_statements(pkl_file)) ac.dump_statements(all_stmts, output_file) stmt_json = stmts_to_json(all_stmts) output_json = f"{output_file.rsplit('.', maxsplit=1)[0]}.json" with open(output_json, 'wt') as f: json.dump(stmt_json, f, indent=2) return all_stmts
def load_prior(self, prior_fname): """Load a set of prior statements from a pickle file. The prior statements have a special key in the stmts dictionary called "prior". Parameters ---------- prior_fname : str The name of the pickle file containing the prior Statements. """ self.stmts['prior'] = ac.load_statements(prior_fname)
def load_prior(self, prior_fname): """Load a set of prior statements from a pickle file. The prior statements have a special key in the stmts dictionary called "prior". Parameters ---------- prior_fname : str The name of the pickle file containing the prior Statements. """ self.stmts['prior'] = ac.load_statements(prior_fname)
def get_indra_expression(): #inc_stmts = by_gene_role_type(stmt_type='IncreaseAmount') #dec_stmts = by_gene_role_type(stmt_type='DecreaseAmount') #stmts = inc_stmts + dec_stmts #ac.dump_statements(stmts, 'indra_regulate_amount_stmts.pkl') #stmts = ac.load_statements('indra_regulate_amount_stmts.pkl') #stmts = ac.map_grounding(stmts) # Expand families before site mapping #stmts = ac.expand_families(stmts) #stmts = ac.filter_grounded_only(stmts) #stmts = ac.map_sequence(stmts) #stmts = ac.run_preassembly(stmts, poolsize=4, # save='indra_regulate_amount_pre.pkl') stmts = ac.load_statements('indra_regulate_amount_pre.pkl') stmts = ac.filter_human_only(stmts) stmts = ac.filter_genes_only(stmts) stmts = [s for s in stmts if s.agent_list()[0] is not None] return stmts
def get_indra_reg_act_stmts(): try: stmts = ac.load_statements('sources/indra_reg_act_stmts.pkl') return stmts except: pass stmts = [] for stmt_type in ('Activation', 'Inhibition', 'ActiveForm'): print("Getting %s statements from INDRA DB" % stmt_type) stmts += by_gene_role_type(stmt_type=stmt_type) stmts = ac.map_grounding(stmts, save='sources/indra_reg_act_gmap.pkl') stmts = ac.filter_grounded_only(stmts) stmts = ac.run_preassembly(stmts, poolsize=4, save='sources/indra_reg_act_pre.pkl') stmts = ac.filter_human_only(stmts) stmts = ac.filter_genes_only(stmts, specific_only=True) ac.dump_statements(stmts, 'sources/indra_reg_act_stmts.pkl') return stmts
def test_dump_stmts(): ac.dump_statements([st1], '_test.pkl') st_loaded = ac.load_statements('_test.pkl') assert len(st_loaded) == 1 assert st_loaded[0].equals(st1)
def test_load_stmts(): with open('_test.pkl', 'wb') as fh: pickle.dump([st1], fh) st_loaded = ac.load_statements('_test.pkl') assert len(st_loaded) == 1 assert st_loaded[0].equals(st1)
'--ctd_stmts', help='Path to CTD statements pkl file', required=True) parser.add_argument('-f', '--output_file', help='Output file for combined pkl', required=True) args = parser.parse_args() # Load everything logger.info('Loading statements from pickle files') with open(args.old_mm, 'rb') as f: old_mm_emmaa_stmts = pickle.load(f) old_mm_stmts = [es.stmt for es in old_mm_emmaa_stmts] if args.new_cord: new_cord_stmts = ac.load_statements(args.new_cord) else: new_cord_stmts = None drug_stmts = ac.load_statements(args.drug_stmts) gordon_stmts = ac.load_statements(args.gordon_stmts) virhostnet_stmts = ac.load_statements(args.virhostnet_stmts) ctd_stmts = ac.load_statements(args.ctd_stmts) other_stmts = drug_stmts + gordon_stmts + virhostnet_stmts + ctd_stmts combined_stmts = make_model_stmts(old_mm_stmts, other_stmts, new_cord_stmts) # Dump new pickle ac.dump_statements(combined_stmts, args.output_file)
def get_phosphosite_stmts(): stmts = ac.load_statements('sources/phosphosite_stmts.pkl') stmts = ac.filter_human_only(stmts) stmts = ac.filter_genes_only(stmts, specific_only=True) return stmts
def main(args): global any_expl, any_expl_not_sr, common_parent, ab_expl_count, \ directed_im_expl_count, both_im_dir_expl_count, \ any_axb_non_sr_expl_count, sr_expl_count, \ shared_regulator_only_expl_count, explanations_of_pairs, unexplained, \ explained_nested_dict, id1, id2, nested_dict_statements, dataset_dict, \ avg_corr, dir_node_set, nx_dir_graph, explained_set, part_of_explained,\ sr_explanations, any_expl_ign_sr if args.cell_line_filter and not len(args.cell_line_filter) > 2: logger.info('Filtering to provided cell lines in correlation ' 'calculations.') cell_lines = _parse_cell_filter(*args.cell_line_filter) assert len(cell_lines) > 0 elif args.cell_line_filter and len(args.cell_line_filter) > 2: sys.exit('Argument --cell-line-filter only takes one or two arguments') # No cell line dictionary and rnai data and filtering is requested elif args.cell_line_filter and len(args.cell_line_filter) == 1 and \ args.rnai_data_file: sys.exit('Need a translation dictionary if RNAi data is provided and ' 'filter is requested') else: # Should be empty only when --cell-line-filter is not provided logger.info('No cell line filter provided. Using all cell lines in ' 'correlation calculations.') cell_lines = [] # Parse "explained genes" if args.explained_set and len(args.explained_set) == 2: explained_set = _parse_explained_genes( gene_set_file=args.explained_set[0], check_column=args.explained_set[1]) logger.info('Loading "explained pairs."') elif args.explained_set and len(args.explained_set) != 2: sys.exit('Argument --explained-set takes exactly two arguments: ' '--explained-set <file> <column name>') # Check if belief dict is provided if not args.belief_score_dict and not args.nested_dict_in: logger.error('Belief dict must be provided through the `-b (' '--belief-score-dict)` argument if no nested dict ' 'of statements with belief score is provided through the ' '`-ndi (--nested-dict-in)` argument.') raise FileNotFoundError # Get dict of {hash: belief score} belief_dict = None # ToDo use api to query belief scores if not loaded if args.belief_score_dict: if args.belief_score_dict.endswith('.json'): belief_dict = _json_open(args.belief_score_dict) elif args.belief_score_dict.endswith('.pkl'): belief_dict = _pickle_open(args.belief_score_dict) args_dict = _arg_dict(args) npairs = 0 filter_settings = { 'gene_set_filter': args.gene_set_filter, 'strict': args.strict, 'cell_line_filter': cell_lines, 'cell_line_translation_dict': _pickle_open(args.cell_line_filter[1]) if args.cell_line_filter and len(args.cell_line_filter) == 2 else None, 'margin': args.margin, 'filter_type': (args.filter_type if args.filter_type else None) } output_settings = { 'dump_unique_pairs': args.dump_unique_pairs, 'outbasename': args.outbasename } # Parse CRISPR and/or RNAi data if args_dict.get('crispr') or args_dict.get('rnai'): if not filter_settings['filter_type'] and \ args.crispr_data_file and \ args.rnai_data_file: logger.info('No merge filter set. Output will be intersection of ' 'the two data sets.') elif filter_settings.get('filter_type'): logger.info('Using filter type "%s"' % filter_settings['filter_type']) master_corr_dict, all_hgnc_ids, stats_dict = \ dnf.get_combined_correlations(dict_of_data_sets=args_dict, filter_settings=filter_settings, output_settings=output_settings) # Count pairs in merged correlation dict and dum it npairs = dnf._dump_master_corr_dict_to_pairs_in_csv( fname=args.outbasename + '_merged_corr_pairs.csv', nest_dict=master_corr_dict) if args.gene_set_filter: gene_filter_list = None if args_dict.get('crispr') and not args_dict.get('rnai'): gene_filter_list = dnf._read_gene_set_file( gf=filter_settings['gene_set_filter'], data=pd.read_csv(args_dict['crispr']['data'], index_col=0, header=0)) elif args_dict.get('rnai') and not args_dict.get('crispr'): gene_filter_list = dnf._read_gene_set_file( gf=filter_settings['gene_set_filter'], data=pd.read_csv(args_dict['rnai']['data'], index_col=0, header=0)) elif args_dict.get('crispr') and args_dict.get('rnai'): gene_filter_list = \ set(dnf._read_gene_set_file( gf=filter_settings['gene_set_filter'], data=pd.read_csv(args_dict['crispr']['data'], index_col=0, header=0))) & \ set(dnf._read_gene_set_file( gf=filter_settings['gene_set_filter'], data=pd.read_csv(args_dict['rnai']['data'], index_col=0, header=0))) assert gene_filter_list is not None else: gene_filter_list = None else: stats_dict = None # LOADING INDRA STATEMENTS # Get statements from file or from database that contain any gene from # provided list as set unless you're already loading a pre-calculated # nested dict and/or precalculated directed graph. if not (args.light_weight_stmts or args.nested_dict_in): if args.statements_in: # Get statments from file stmts_all = set(ac.load_statements(args.statements_in)) # Use api to get statements. _NOT_ the same as querying for each ID else: if args.gene_set_filter: stmts_all = dnf.dbc_load_statements(gene_filter_list) else: # if there is no gene set file, restrict to gene ids in # input data stmts_all = dnf.dbc_load_statements(list(all_hgnc_ids)) # Dump statements to pickle file if output name has been given if args.statements_out: logger.info('Dumping read raw statements') ac.dump_statements(stmts=stmts_all, fname=args.statements_out) # Get nested dicts from statements if args.light_weight_stmts: hash_df = pd.read_csv(args.light_weight_stmts, delimiter='\t') nested_dict_statements = dnf.nested_hash_dict_from_pd_dataframe( hash_df) elif args.nested_dict_in: nested_dict_statements = _pickle_open(args.nested_dict_in) else: nested_dict_statements = dnf.dedupl_nested_dict_gen( stmts_all, belief_dict) if args.nested_dict_out: _dump_it_to_pickle(fname=args.nested_dict_out, pyobj=nested_dict_statements) # Get directed simple graph if args.directed_graph_in: with open(args.directed_graph_in, 'rb') as rpkl: nx_dir_graph = pkl.load(rpkl) else: # Create directed graph from statement dict nx_dir_graph = dnf.nx_directed_graph_from_nested_dict_2layer( nest_d=nested_dict_statements, belief_dict=belief_dict) # Save as pickle file if args.directed_graph_out: _dump_it_to_pickle(fname=args.directed_graph_out, pyobj=nx_dir_graph) dir_node_set = set(nx_dir_graph.nodes) # LOOP THROUGH THE UNIQUE CORRELATION PAIRS, MATCH WITH INDRA NETWORK any_expl = 0 # Count if any explanation per (A,B) correlation found any_expl_not_sr = 0 # Count any explanation, exlcuding when shared # regulator is the only explanation any_expl_ign_sr = 0 # Count any explanation, ingoring shared regulator # explanations common_parent = 0 # Count if common parent found per set(A,B) part_of_explained = 0 # Count pairs part the "explained set" ab_expl_count = 0 # Count A-B/B-A as one per set(A,B) directed_im_expl_count = 0 # Count any A->X->B,B->X->A as one per set(A,B) any_axb_non_sr_expl_count = 0 # Count if shared target found per set(A,B) sr_expl_count = 0 # Count if shared regulator found per set(A,B) shared_regulator_only_expl_count = 0 # Count if only shared regulator found explanations_of_pairs = [] # Saves all non shared regulator explanations sr_explanations = [] # Saves all shared regulator explanations unexplained = [] # Unexplained correlations skipped = 0 # The explained nested dict: (1st key = subj, 2nd key = obj, 3rd key = # connection type or correlation). # # directed: any A->B or B->A # undirected: any of complex, selfmodification, parent # x_is_intermediary: A->X->B or B->X->A # x_is_downstream: A->X<-B # x_is_upstream: A<-X->B # # d[subj][obj] = {correlation: {gene_set1: corr, gene_set2: corr, ...}, # directed: [(stmt/stmt hash, belief score)], # undirected: [(stmt/stmt hash, belief score)], # common_parents: [list of parents] # x_is_intermediary: [(X, belief rank)], # x_is_downstream: [(X, belief rank)], # x_is_upstream: [(X, belief rank)]} # # Then in javascript you can for example do: # if SUBJ_is_subj_dict.obj.direct.length <-- should return zero if [] # # Used to get: directed graph # 1. all nodes of directed graph -> 1st dropdown # 2. dir -> undir graph -> jsons to check all corr neighbors -> 2nd dropdown # 3. jsons to check if connection is direct or intermediary # Using the following loop structure for counter variables: # a = 2 # def for_loop_body(): # global a # a += 1 # # Then loop like: # if dict: # for pairs in dict: # for_loop_body(args) # elif random: # for random pair: # for_loop_body(args) explained_nested_dict = dnf.create_nested_dict() # Loop rnai and/or crispr only if args_dict.get('rnai') or args_dict.get('crispr') and \ not args.brca_dependencies: logger.info('Gene pairs generated from DepMap knockout screening data ' 'sets') logger.info('Looking for connections between %i pairs' % (npairs if npairs > 0 else args.max_pairs)) for outer_id, do in master_corr_dict.items(): for inner_id, dataset_dict in do.items(): if len(dataset_dict.keys()) == 0: skipped += 1 if args.verbosity: logger.info('Skipped outer_id=%s and inner_id=%s' % (outer_id, inner_id)) continue id1, id2 = outer_id, inner_id loop_body(args) # Loop rnai and/or crispr AND BRCA cell line dependencies elif args_dict.get('rnai') or args_dict.get('crispr') and \ args.brca_dependencies: logger.info('Gene pairs generated from combined knockout screens. ' 'Output data will incluide BRCA cell line dependency\n' 'data as well as correlation data from knockout screens.') logger.info('Looking for connections between %i pairs' % (npairs if npairs > 0 else args.max_pairs)) # Load BRCA dependency data brca_data_set = pd.read_csv(args.brca_dependencies, header=0) depend_in_breast_genes = brca_data_set.drop( axis=1, labels=['Url Label', 'Type'])[brca_data_set['Type'] == 'gene'] genes = set(depend_in_breast_genes['Gene/Compound'].values) for outer_id, do in master_corr_dict.items(): for inner_id, knockout_dict in do.items(): if len(knockout_dict.keys()) == 0: skipped += 1 if args.verbosity: logger.info('Skipped outer_id=%s and inner_id=%s' % (outer_id, inner_id)) continue id1, id2 = outer_id, inner_id dataset_dict = {} gene1_data = [] gene2_data = [] # Get BRCA dep data if id1 in genes: for row in depend_in_breast_genes[ depend_in_breast_genes['Gene/Compound'] == id1].iterrows(): gene1_data.append( (row[1]['Dataset'], row[1]['T-Statistic'], row[1]['P-Value'])) if id2 in genes: for row in depend_in_breast_genes[ depend_in_breast_genes['Gene/Compound'] == id2].iterrows(): gene2_data.append( (row[1]['Dataset'], row[1]['T-Statistic'], row[1]['P-Value'])) dataset_dict[id1] = gene1_data dataset_dict[id2] = gene2_data dataset_dict['crispr'] = (knockout_dict['crispr'] if knockout_dict.get('crispr') else None), dataset_dict['rnai'] = (knockout_dict['rnai'] if knockout_dict.get('rnai') else None) if id1 not in genes and id2 not in genes: dataset_dict = knockout_dict # Run loop body loop_body(args) # loop brca dependency ONLY elif args.brca_dependencies and not \ (args_dict.get('rnai') or args_dict.get('crispr')): logger.info( 'Gene pairs generated from BRCA gene enrichment data only.') brca_data_set = pd.read_csv(args.brca_dependencies, header=0) depend_in_breast_genes = brca_data_set.drop( axis=1, labels=['Url Label', 'Type'])[brca_data_set['Type'] == 'gene'] genes = set(depend_in_breast_genes['Gene/Compound'].values) npairs = len(list(itt.combinations(genes, 2))) logger.info('Looking for connections between %i pairs' % (npairs if npairs > 0 else args.max_pairs)) for id1, id2 in itt.combinations(genes, 2): gene1_data = [] gene2_data = [] # For each non-diagonal pair in file, insert in dataset_dict: # geneA, geneB, # dataset for A, dataset for B, # T-stat for A, T-stat for B, # P-value for A, P-value for row in depend_in_breast_genes[ depend_in_breast_genes['Gene/Compound'] == id1].iterrows(): gene1_data.append((row[1]['Dataset'], row[1]['T-Statistic'], row[1]['P-Value'])) for row in depend_in_breast_genes[ depend_in_breast_genes['Gene/Compound'] == id2].iterrows(): gene2_data.append((row[1]['Dataset'], row[1]['T-Statistic'], row[1]['P-Value'])) # dataset_dict = {id1: # [(dataset1, T-stat1, P-value1), # (dataset2, T-stat2, P-value2)], # id2: # [(..., ...)], # ...} dataset_dict = {id1: gene1_data, id2: gene2_data} loop_body(args) # loop random pairs from data set elif args_dict.get('sampling_gene_file'): logger.info('Gene pairs generated at random from %s' % args_dict['sampling_gene_file']) with open(args_dict['sampling_gene_file'], 'r') as fi: rnd_gene_set = [l.strip() for l in fi.readlines()] npairs = args.max_pairs dataset_dict = None logger.info('Looking for connections between %i pairs' % (npairs if npairs > 0 else args.max_pairs)) for _ in range(npairs): id1, id2 = _rnd_pair_gen(rnd_gene_set) assert not isinstance(id1, list) loop_body(args) long_string = '' long_string += '-' * 63 + '\n' long_string += 'Summary for matching INDRA network to correlation pairs:'\ + '\n\n' long_string += '> Total number of correlation pairs checked: %i' % npairs\ + '\n' if args.verbosity: long_string += '> Skipped %i empty doublets in corr dict\n' % skipped long_string += '> Total correlations unexplained: %i' % len(unexplained)\ + '\n' long_string += '> Total correlations explained: %i' % any_expl + '\n' long_string += '> Total correlations explained, ignoring shared ' \ 'regulator: %i' % any_expl_ign_sr + '\n' long_string += '> Total correlations explained, excluding shared ' \ 'regulator (total - shared only): %i' % \ (any_expl - shared_regulator_only_expl_count) + '\n' long_string += '> %i correlations have an explanation involving a ' \ 'common parent' % common_parent + '\n' if args.explained_set: long_string += '> %i gene pairs were considered explained as part ' \ 'of the "explained set"' % part_of_explained + '\n' long_string += '> %i explanations involving direct connection or ' \ 'complex' % ab_expl_count + '\n' long_string += '> %i correlations have a directed explanation ' \ 'involving an intermediate node (A->X->B/A<-X<-B)' \ % directed_im_expl_count + '\n' long_string += '> %i correlations have an explanation involving an ' \ 'intermediate node excluding shared regulators' % \ any_axb_non_sr_expl_count + '\n' long_string += '> %i correlations have an explanation involving a ' \ 'shared regulator (A<-X->B)' % sr_expl_count + '\n' long_string += '> %i correlations have shared regulator as only ' \ 'explanation' % shared_regulator_only_expl_count + '\n\n' if stats_dict and (stats_dict.get('rnai') or stats_dict.get('crispr')): long_string += 'Statistics of input data:' + '\n\n' if stats_dict and stats_dict.get('rnai'): long_string += ' RNAi data ' + '\n' long_string += ' -----------' + '\n' long_string += '> mean: %f\n' % stats_dict['rnai']['mean'] long_string += '> SD: %f\n' % stats_dict['rnai']['sigma'] long_string += '> lower bound: %.3f*SD = %.4f\n' % ( args_dict['rnai']['ll'], args_dict['rnai']['ll'] * stats_dict['rnai']['sigma']) if args_dict['rnai']['ul']: long_string += '> upper bound: %.3f*SD = %.4f\n\n' % ( args_dict['rnai']['ul'], args_dict['rnai']['ul'] * stats_dict['rnai']['sigma']) if stats_dict and stats_dict.get('crispr'): long_string += ' CRISPR data ' + '\n' long_string += ' -------------' + '\n' long_string += '> mean: %f\n' % stats_dict['crispr']['mean'] long_string += '> SD: %f\n' % stats_dict['crispr']['sigma'] long_string += '> lower bound: %.3f*SD = %.4f\n' % ( args_dict['crispr']['ll'], args_dict['crispr']['ll'] * stats_dict['crispr']['sigma']) if args_dict['crispr']['ul']: long_string += '> upper bound: %.3f*SD = %.4f\n\n' % ( args_dict['crispr']['ul'], args_dict['crispr']['ul'] * stats_dict['crispr']['sigma']) long_string += '-' * 63 + '\n\n' logger.info('\n' + long_string) # Here create directed graph from explained nested dict nx_expl_dir_graph = dnf.nx_directed_graph_from_nested_dict_3layer( nest_d=explained_nested_dict) if not args.no_web_files: # 'explained_nodes' are used to produce first drop down explained_nodes = list(nx_expl_dir_graph.nodes) logger.info('Dumping json "explainable_ids.json" for first dropdown.') _dump_it_to_json(args.outbasename + '_explainable_ids.json', explained_nodes) # Get undir graph and save each neighbor lookup as json for 2nd dropdown nx_expl_undir_graph = nx_expl_dir_graph.to_undirected() dnf.nx_undir_to_neighbor_lookup_json( expl_undir_graph=nx_expl_undir_graph, outbasename=args.outbasename) # Easiest way to check if pairs are explained or not is to loop explained # dict. Skip shared regulators. _dump_nest_dict_to_csv(fname=args.outbasename + '_explained_correlations.csv', nested_dict=explained_nested_dict, header=['gene1', 'gene2', 'meta_data'], excl_sr=True) _dump_it_to_pickle(fname=args.outbasename + '_explained_nest_dict.pkl', pyobj=explained_nested_dict) headers = ['subj', 'obj', 'type', 'X', 'meta_data'] _dump_it_to_csv(fname=args.outbasename + '_explanations_of_pairs.csv', pyobj=explanations_of_pairs, header=headers) _dump_it_to_csv(fname=args.outbasename + '_explanations_of_shared_regulators.csv', pyobj=sr_explanations, header=headers) _dump_it_to_csv(fname=args.outbasename + '_unexpl_correlations.csv', pyobj=unexplained, header=headers[:-2]) with open(args.outbasename + '_script_summary.txt', 'w') as fo: fo.write(long_string) return 0
username=ndex_cred['user'], password=ndex_cred['password']) gene_names = [ hgnc_client.get_hgnc_name(ag.db_refs['HGNC']) for ag in ncp.get_agents() ] """ # Get PMIDs for reading entrez_pmids = get_pmids(gene_names) network_pmids = ncp.get_pmids() pmids = list(set(entrez_pmids + network_pmids)) save_pmids_for_reading(pmids, 'dna_damage_pmids.txt') """ # Build the model prior_stmts = build_prior(gene_names, 'prior_stmts.pkl') reach_stmts = ac.load_statements('reach_stmts.pkl') stmts = ncp.statements + reach_stmts + prior_stmts stmts = run_assembly(stmts, 'unfiltered_assembled_stmts.pkl') # Filter the statements at different levels ids_cutoffs = (('4e26a4f0-9388-11e7-a10d-0ac135e8bacf', 0.90), ('527fecf7-9388-11e7-a10d-0ac135e8bacf', 0.95), ('2f0e17bc-9387-11e7-a10d-0ac135e8bacf', 0.99)) for net_id, cutoff in ids_cutoffs: stmts_filt = filter(stmts, cutoff, 'stmts_%.2f.pkl' % cutoff) cxa = assemble_cx(stmts_filt, 'dna_damage_%.2f.cx' % cutoff) cx_str = cxa.print_cx() ndex_client.update_network(cx_str, net_id, ndex_cred)
def get_reach_output(path): stmts = ac.load_statements(path) return stmts
model_types = sys.argv[1:] if 'all' in model_types: assemble_models = ['pysb', 'sif', 'cx'] else: assemble_models = sys.argv[1:] print('Assembling the following model types: %s' % \ ', '.join(assemble_models)) print('##############') outf = 'output/' data = process_data.read_data(process_data.data_file) data_genes = process_data.get_all_gene_names(data) reassemble = True if not reassemble: stmts = ac.load_statements(pjoin(outf, 'preassembled_db.pkl')) else: #prior_stmts = build_prior(data_genes, pjoin(outf, 'prior.pkl')) prior_stmts = ac.load_statements(pjoin(outf, 'prior.pkl')) prior_stmts = ac.map_grounding(prior_stmts, save=pjoin(outf, 'gmapped_prior.pkl')) #reach_stmts = ac.load_statements(pjoin(outf, 'phase3_stmts.pkl')) #reach_stmts = ac.filter_no_hypothesis(reach_stmts) extra_stmts = read_extra_sources(pjoin(outf, 'extra_stmts.pkl')) #reading_stmts = reach_stmts + extra_stmts #reading_stmts = ac.map_grounding(reading_stmts, # save=pjoin(outf, 'gmapped_reading.pkl')) #stmts = prior_stmts + reading_stmts + extra_stmts stmts = prior_stmts + extra_stmts stmts = ac.filter_grounded_only(stmts)
gene_names = process_data.get_gene_names(data) # If generic assembly needs to be done (instead of just loading the result) # set this to True reassemble = False # The file in which the preassembled statements will be saved pre_stmts_file = prefixed_pkl('preassembled') if reassemble: # Load various files that were previously produced sources = [ 'indradb', 'trips', 'bel', 'biopax', 'phosphosite', 'r3', 'sparser' ] stmts = [] for source in sources: stmts += ac.load_statements(prefixed_pkl(source)) stmts = ac.filter_no_hypothesis(stmts) # Fix grounding and filter to grounded entities and for proteins, # filter to the human ones stmts = ac.map_grounding(stmts) stmts = ac.filter_grounded_only(stmts) stmts = ac.filter_human_only(stmts) # Combinatorially expand protein families stmts = ac.expand_families(stmts) # Apply a strict filter to statements based on the gene names stmts = ac.filter_gene_list(stmts, gene_names, 'all') # Fix errors in references to protein sequences stmts = ac.map_sequence(stmts) # Run preassembly and save result stmts = ac.run_preassembly(stmts, return_toplevel=False) ac.dump_statements(stmts, pre_stmts_file)
stmts = trips_stmts + sparser_stmts + r3_stmts return stmts def get_prior_genes(fname): """Get the list of prior genes.""" with open(fname, 'rt') as fh: genes = fh.read().strip().split('\n') return genes if __name__ == '__main__': outf = 'output/' data = process_data.read_data(process_data.data_file) data_genes = process_data.get_all_gene_names(data) reassemble = False if not reassemble: stmts = ac.load_statements(pjoin(outf, 'preassembled.pkl')) #stmts = ac.load_statements(pjoin(outf, 'prior.pkl')) else: #prior_stmts = build_prior(data_genes, pjoin(outf, 'prior.pkl')) prior_stmts = ac.load_statements(pjoin(outf, 'prior.pkl')) prior_stmts = ac.map_grounding(prior_stmts, save=pjoin(outf, 'gmapped_prior.pkl')) reading_stmts = ac.load_statements(pjoin(outf, 'phase3_stmts.pkl')) reading_stmts = ac.map_grounding(reading_stmts, save=pjoin(outf, 'gmapped_reading.pkl')) stmts = prior_stmts + reading_stmts stmts = ac.filter_grounded_only(stmts) stmts = ac.filter_genes_only(stmts, specific_only=False) stmts = ac.filter_human_only(stmts) stmts = ac.expand_families(stmts)
statements' db_refs dictionary. """.rstrip() parser = argparse.ArgumentParser(description=doc) parser.add_argument('--input', '-i', type=str, required=True, help='Pickle file with a dictionary mapping each ' + 'pmid to a list of INDRA statements', dest='input_file') parser.add_argument('--output', '-o', type=str, required=True, help='Output csv test file containing the extracted ' + ' grounding map', dest='output_file') args = parser.parse_args() # Load the statements from the pickle statement_list = ac.load_statements(args.input_file) # Make a dictionary mapping the raw text mention to db_refs logger.info('Extracting grounding information') text_to_refs = {} counter = 0 percent_done = 0 start_time = time.time() for statement in statement_list: for a in statement.agent_list(): db_refs = copy.copy(a.db_refs) text = db_refs.pop('TEXT', None) # Convert HGNC ids to names if 'HGNC' in db_refs and string_is_integer(db_refs['HGNC']): db_refs['HGNC'] = get_hgnc_name(db_refs['HGNC'])
def main(args): uniq_pairs, all_hgnc_ids, fsort_corrs = \ get_correlations(args.ceres_file, args.geneset_file, args.corr_file, args.strict, args.outbasename, args.recalc, args.ll, args.ul) # Get statements from file or from database that contain any gene from # provided list as set if args.statements_in: # Get statments from file stmts_all = set(ac.load_statements(args.statements_in)) else: # Use api to get statements. NOT the same as querying for each ID if args.geneset_file: stmts_all = dnf.dbc_load_statements(gene_filter_list) else: # if there is no gene set file, restrict to gene ids in # correlation data stmts_all = dnf.dbc_load_statements(list(all_hgnc_ids)) # Dump statements to pickle file if output name has been given if args.statements_out: ac.dump_statements(stmts=stmts_all, fname=args.statements_out) # Get nested dicts from statements nested_dict_statements = dnf.nested_dict_gen(stmts_all) # Loop through the unique pairs dir_conn_pairs = [] dir_neg_conn_pairs = [] unexplained = [] npairs = len(uniq_pairs) f_con = open(args.outbasename + '_connections_latex.tex', 'w') f_neg_c = open(args.outbasename + '_neg_conn_latex.tex', 'w') logger.info('Looking for connections between %i pairs' % npairs) for pair in uniq_pairs: pl = list(pair) for li in pl: if _is_float(li): correlation = li fmt_corr = '{0:.04}'.format(correlation) break pl.remove(correlation) id1, id2 = pl forward_fail = False backward_fail = False if (nested_dict_statements.get(id1) and nested_dict_statements.get(id1).get(id2)) or \ (nested_dict_statements.get(id2) and nested_dict_statements.get(id2).get(id1)): new_pair = r'\section{{{}, {}: {}}}'.format(id1, id2, fmt_corr) \ +'\n'+ \ r'See correlation plot \href{{' \ r'https://depmap.org/portal/interactive/?xDataset=Avana' \ r'&xFeature={}&yDataset=Avana&yFeature={}&colorDataset=' \ r'lineage&colorFeature=all&filterDataset=context' \ r'&filterFeature=®ressionLine=false&statisticsTable=false' \ r'&associationTable=true&plotOnly=false}}{{here}}'.format( id1, id2) + '\n\n' f_con.write(new_pair) if correlation < 0: f_neg_c.write(new_pair) # nested_dict_statements.get(id1).get(id2) raises AttributeError # if nested_dict_statements.get(id1) returns {} ev_fltr = 0 # Checks subj=id1, obj=id2 if nested_dict_statements.get(id1) and \ nested_dict_statements.get(id1).get(id2): stmts = nested_dict_statements[id1][id2] logger.info('Found connection between %s and %s' % (id1, id2)) dir_conn_pairs.append((id1, id2, correlation, stmts)) output = dnf.latex_output(subj=id1, obj=id2, corr=correlation, ev_len_fltr=ev_fltr, stmts=stmts, ignore_str='parent') f_con.write(output) if correlation < 0: dir_neg_conn_pairs.append((id1, id2, correlation, stmts)) f_neg_c.write(output) else: forward_fail = True # Checks subj=id2, obj=id1 if nested_dict_statements.get(id2) and \ nested_dict_statements.get(id2).get(id1): stmts = nested_dict_statements[id2][id1] logger.info('Found connection between %s and %s' % (id2, id1)) dir_conn_pairs.append((id2, id1, correlation, stmts)) output = dnf.latex_output(subj=id2, obj=id1, corr=correlation, ev_len_fltr=ev_fltr, stmts=stmts, ignore_str='parent') f_con.write(output) if correlation < 0: dir_neg_conn_pairs.append((id2, id1, correlation, stmts)) f_neg_c.write(output) else: backward_fail = True # If both failed, count as unexplained if forward_fail and backward_fail: unexplained.append([id1, id2, correlation]) with open(args.outbasename + '_connections.csv', 'w', newline='') as csvf: wrtr = csv.writer(csvf, delimiter=',') wrtr.writerows(dir_conn_pairs) with open(args.outbasename + '_neg_conn.csv', 'w', newline='') as csvf: wrtr = csv.writer(csvf, delimiter=',') wrtr.writerows(dir_neg_conn_pairs) with open(args.outbasename + '_unexplained.csv', 'w', newline='') as csvf: wrtr = csv.writer(csvf, delimiter=',') wrtr.writerows(unexplained) f_con.close() f_neg_c.close()
reg_stmts = act_stmts + inh_stmts reg_stmts = [s for s in reg_stmts if s.subj is not None] reg_stmts = ac.filter_genes_only(reg_stmts, specific_only=True) """ #indra_stmts = get_indra_phos_stmts() """ indra_stmts = ac.load_statements('sources/indra_phos_stmts.pkl') syn_stmts = load_statements_from_synapse(synapse_id='syn10998244') pc_stmts = load_pc_phos() omni_stmts = get_omnipath_stmts() phos_stmts = get_phosphosite_stmts() all_stmts = syn_stmts + omni_stmts + phos_stmts + indra_stmts + pc_stmts ac.dump_statements(all_stmts, 'sources/all_stmts.pkl') """ all_stmts = ac.load_statements('sources/all_stmts.pkl') nsprior = to_nonspec_prior(all_stmts) nsprior_filename = 'priors/indra_nkconf2_combined_prot_spec.txt' save_gene_prior(nsprior, nsprior_filename) syn = synapseclient.login() syn_file = synapseclient.File(nsprior_filename, parent='syn11272284') syn.store(syn_file) all_kinases = [k for kin_list in nsprior.values() for k in kin_list] kin_ctr = Counter(all_kinases) kin_ctr = sorted([(k, v) for k, v in kin_ctr.items()], key=lambda x: x[1], reverse=True) default_prior_list = [t[0] for t in kin_ctr[0:200]] default_prior_filename = 'priors/indra_nkconf2_combined_default200.txt'
type=str, required=True, help='Pickle file with a dictionary mapping each ' + 'pmid to a list of INDRA statements', dest='input_file') parser.add_argument('--output', '-o', type=str, required=True, help='Output csv test file containing the extracted ' + ' grounding map', dest='output_file') args = parser.parse_args() # Load the statements from the pickle statement_list = ac.load_statements(args.input_file) # Make a dictionary mapping the raw text mention to db_refs logger.info('Extracting grounding information') text_to_refs = {} counter = 0 percent_done = 0 start_time = time.time() for statement in statement_list: for a in statement.agent_list(): db_refs = copy.copy(a.db_refs) text = db_refs.pop('TEXT', None) # Convert HGNC ids to names if 'HGNC' in db_refs and string_is_integer(db_refs['HGNC']): db_refs['HGNC'] = get_hgnc_name(db_refs['HGNC'])
csvwriter.writerows(interactome_rows) with open(prize_outpath, 'wt') as f: csvwriter = csv.writer(f, delimiter='\t') csvwriter.writerows(prize_rows) return if __name__ == "__main__": stmts = "../work/phospho_stmts.pkl" prize_outpath = "../work/pybel_prize.tsv" interactome_path = "../work/big_pybel_interactome2.tsv" site_file = "../work/gsea_sites.rnk" # Load the statements linking kinases/regulators to phospho sites # in the data stmts = ac.load_statements(stmts) # Employ filters to reduce network size stmts = ac.filter_grounded_only(stmts) stmts = ac.filter_human_only(stmts) stmts = ac.filter_genes_only(stmts) # In this data, statements of these two types will not act on # a short enough timescale to play a meaningful role stmts = ac.filter_by_type(stmts, DecreaseAmount, invert=True) stmts = ac.filter_by_type(stmts, IncreaseAmount, invert=True) stmts = ac.filter_by_type(stmts, Complex, invert=True) stmts = ac.filter_enzyme_kinase(stmts) # Assemble a pybel graph from statements pba = PybelAssembler(stmts) pb_graph = make_model(pba)
for drug, stmtd in data_stmts.items(): print(drug) for ab in stmtd.keys(): print('-'+ ab) agent_obs = list(itertools.chain.from_iterable(ab_map.values())) # Here we need to cross-reference the antbody map with the data values agent_data = {} for drug_name, values in data_values.items(): agent_data[drug_name] = {} for ab_name, value in values.items(): agents = ab_map[ab_name] for agent in agents: agent_data[drug_name][agent] = value base_stmts = ac.load_statements('output/korkut_model_pysb_before_pa.pkl') for st in base_stmts: st.uuid = str(st.uuid) """ # Merge the sources of statements # stmts = manual_stmts + base_stmts stmts = base_stmts #stmts = manual_stmts # Assemble model pa = PysbAssembler() pa.add_statements(stmts) model = pa.make_model() with open('korkut_pysb.pkl', 'wb') as f:
if __name__ == '__main__': parser = argparse.ArgumentParser( description='Generate ranked lists of COVID docs for curation.') parser.add_argument('-i', '--input_file', help='Name of stmt pkl file', required=True) parser.add_argument('-o', '--output_base', help='Basename for output files.', required=True) args = parser.parse_args() # Load statements and filter to grounded only stmts = ac.load_statements(args.input_file) stmts = ac.filter_grounded_only(stmts) # Sort by TextRefs by_tr, no_tr = stmts_by_text_refs(stmts) # Combine duplicates in each statement list by_tr_pa = {} for tr, stmt_list in by_tr.items(): pa = Preassembler(bio_ontology, stmt_list) uniq_stmts = pa.combine_duplicates() by_tr_pa[tr] = uniq_stmts # Filter to MESH term for "Coronavirus" mesh_id = 'D017934' mesh_children = get_mesh_children(mesh_id)
else: on_nodes = on coll = boolean2.util.Collector() bn_str = boolean2.modify_states(bn_str, turnon=on, turnoff=off) model = boolean2.Model(text=bn_str, mode='async') for i in range(nsim): model.initialize() model.iterate(steps=nsteps) coll.collect(states=model.states, nodes=model.nodes) avgs = coll.get_averages(normalize=True) return avgs if __name__ == '__main__': # Build Boolean net for basic pathway st = ac.load_statements('ras_pathway.pkl') sa = SifAssembler(st) sa.make_model(use_name_as_key=True) sa.save_model('ras_pathway.sif') bn_str = sa.print_boolean_net('ras_pathway_bn.txt') # Build Boolean net for extended pathway st_ext = ac.load_statements('ras_pathway_extension.pkl') sa = SifAssembler(st + st_ext) sa.make_model(use_name_as_key=True) sa.save_model('ras_pathway_extension.sif') bn_str = sa.print_boolean_net('ras_pathway_extension_bn.txt') # Condition 1 off = [] on = ['GROWTH-FACTOR']
from indra.util import _require_python3 from indra.assemblers.sif import SifAssembler import indra.tools.assemble_corpus as ac stmts = ac.load_statements('output/preassembled.pkl') stmts = ac.filter_belief(stmts, 0.95) stmts = ac.filter_direct(stmts) sa = SifAssembler(stmts) sa.make_model(True, True, False) sa.set_edge_weights('support_all') fname = 'model_high_belief_v2.sif' with open(fname, 'wt') as fh: for s, t, d in sa.graph.edges(data=True): source = sa.graph.nodes[s]['name'] target = sa.graph.nodes[t]['name'] fh.write('%s %f %s\n' % (source, d['weight'], target))
def test_dump_stmts(): ac.dump_statements([st1], '_test.pkl') st_loaded = ac.load_statements('_test.pkl') assert (len(st_loaded) == 1) assert (st_loaded[0].equals(st1))
# Create EMMAA model emmaa_model = EmmaaModel(model_name, config_dict) emmaa_model.add_statements(emmaa_stmts) # Upload model to S3 with config as YAML and JSON emmaa_model.save_to_s3() s3_client = boto3.client('s3') save_config_to_s3(model_name, config_dict) if __name__ == '__main__': parser = argparse.ArgumentParser( description='Create and upload an EMMAA model from INDRA Statements.') parser.add_argument('-m', '--model_name', help='Model name', required=True) parser.add_argument('-s', '--stmt_pkl', help='Statement pickle file', required=True) parser.add_argument('-n', '--ndex_id', help='NDex ID. If not given, a new NDEx network will ' 'be created. If given, will update the NDEx ' 'network.', required=False) args = parser.parse_args() # Load the statements indra_stmts = ac.load_statements(args.stmt_pkl) # Create the model create_upload_model(args.model_name, indra_stmts, args.ndex_id)
required=True) args = parser.parse_args() # Load model statements and tests model_stmts, _ = get_assembled_statements('covid19') curated_tests, _ = load_tests_from_s3('covid19_curated_tests') if isinstance(curated_tests, dict): # if descriptions were added curated_tests = curated_tests['tests'] mitre_tests, _ = load_tests_from_s3('covid19_mitre_tests') if isinstance(mitre_tests, dict): # if descriptions were added mitre_tests = mitre_tests['tests'] all_test_stmts = [test.stmt for test in curated_tests] + \ [test.stmt for test in mitre_tests] # Load CTD statements chem_dis_stmts = ac.load_statements(args.chemical_disease) chem_gene_stmts = ac.load_statements(args.chemical_gene) gene_dis_stmts = ac.load_statements(args.gene_disease) all_ctd_stmts = chem_dis_stmts + chem_gene_stmts + gene_dis_stmts # Collect most frequents gene groundings for model statements and # chemical groundings for test statements model_gene_groundings = get_groundings(model_stmts, 'HGNC', cutoff=100) chem_test_groundings = get_groundings(all_test_stmts, 'CHEBI', None) gene_chem_groundings = model_gene_groundings + chem_test_groundings gene_chem_groundings = set(gene_chem_groundings) # Filter ctd statements to those having matching genes and chemicals gene_chem_stmts = filter_by_groundings(all_ctd_stmts, gene_chem_groundings, 'all') # Filter ctd statements to those having matching diseases mesh_groundings = set([('MESH', dis) for dis in diseases])
model_types = sys.argv[1:] if 'all' in model_types: assemble_models = ['pysb', 'sif', 'cx'] else: assemble_models = sys.argv[1:] print('Assembling the following model types: %s' % \ ', '.join(assemble_models)) print('##############') outf = 'output/' data = process_data.read_data(process_data.data_file) data_genes = process_data.get_all_gene_names(data) reassemble = False if not reassemble: stmts = ac.load_statements(pjoin(outf, 'preassembled.pkl')) else: #prior_stmts = build_prior(data_genes, pjoin(outf, 'prior.pkl')) prior_stmts = ac.load_statements(pjoin(outf, 'prior.pkl')) prior_stmts = ac.map_grounding(prior_stmts, save=pjoin(outf, 'gmapped_prior.pkl')) reach_stmts = ac.load_statements(pjoin(outf, 'phase3_stmts.pkl')) reach_stmts = ac.filter_no_hypothesis(reach_stmts) #extra_stmts = ac.load_statements(pjoin(outf, 'extra_stmts.pkl')) extra_stmts = read_extra_sources(pjoin(outf, 'extra_stmts.pkl')) reading_stmts = reach_stmts + extra_stmts reading_stmts = ac.map_grounding(reading_stmts, save=pjoin(outf, 'gmapped_reading.pkl')) stmts = prior_stmts + reading_stmts + extra_stmts
rows = [] for kinase, sites in regulons.items(): rows.append([kinase, 'Description'] + [s for s in sites]) with open(filename, 'wt') as f: csvwriter = csv.writer(f, delimiter='\t') csvwriter.writerows(rows) if __name__ == '__main__': reload = False if reload: phos_stmts = \ get_phosphorylation_stmts('../work/gsea_sites.rnk') ac.dump_statements(phos_stmts, '../work/phospho_stmts.pkl') else: phos_stmts = ac.load_statements('../work/phospho_stmts.pkl') regulons_from_stmts(phos_stmts, '../work/kinase_regulons.gmt') #kinases = get_kinase_counts(phos_stmts) target_list = get_stmt_subject_object(phos_stmts, 'SUBJECT') # Get all Tubulin child nodes as the source list source_list = [('FPLX', 'Tubulin')] tubulin_ag = Agent('Tubulin', db_refs={'FPLX': 'Tubulin'}) ex = Expander(bio_ontology) for ag_ns, ag_id in ex.get_children(tubulin_ag, ns_filter=None): #if ag_ns == 'HGNC': # ag_id = hgnc_client.get_hgnc_id(ag_id) source_list.append((ag_ns, ag_id))
def test_load_stmts(): with open('_test.pkl', 'wb') as fh: pickle.dump([st1], fh, protocol=2) st_loaded = ac.load_statements('_test.pkl') assert (len(st_loaded) == 1) assert (st_loaded[0].equals(st1))
norm_uuid_counts, color='orange', alpha=0.8, label='Statements') plt.plot(lengths, norm_node_counts, color='blue', alpha=0.8, label='Nodes') plt.legend(loc='upper left', fontsize=pf.fontsize, frameon=False) ax = plt.gca() pf.format_axis(ax) if __name__ == '__main__': source = sys.argv[2] target = sys.argv[3] if len(sys.argv) > 4: max_depth = int(sys.argv[4]) stmts = ac.load_statements(sys.argv[1]) print(len(stmts)) stmts = ac.filter_direct(stmts) stmts = ac.filter_belief(stmts, 0.95) stmts = ac.filter_top_level(stmts) stmts = [s for s in stmts if s.agent_list()[0]] print(len(stmts)) from util import pkldump import ipdb ipdb.set_trace() #ppa = PysbPreassembler(stmts) #ppa.replace_activities() #stmts = ppa.statements #g = stmts_to_digraph(stmts)
print(" Mapped: %d (%0.1f)" % (n_map, pct(n_map, n))) print("%% Mapped: %0.1f" % pct(n_map, n_inv)) print() print("Total site occurrences: %d" % f) print(" Valid: %d (%0.1f)" % (f_val, pct(f_val, f))) print(" Invalid: %d (%0.1f)" % (f_inv, pct(f_inv, f))) print(" Mapped: %d (%0.1f)" % (f_map, pct(f_map, f))) print("Pct occurrences mapped: %0.1f" % pct(f_map, f_inv)) print() # Sample 100 invalid-unmapped (by unique sites) # Sample 100 invalid-mapped (by unique sites) if __name__ == '__main__': outf = '../phase3_eval/output' prior_stmts = ac.load_statements(pjoin(outf, 'prior.pkl')) site_info = map_statements(prior_stmts, source='prior', outfile='prior_sites.csv') #reach_stmts = ac.load_statements(pjoin(outf, 'phase3_stmts.pkl')) #stmts = prior_stmts #stmts = reach_stmts #stmts = ac.map_grounding(stmts, save=pjoin(outf, 'gmapped_stmts.pkl')) #stmts = ac.load_statements(pjoin(outf, 'gmapped_stmts.pkl')) sys.exit() """ valid, sites, sm = get_incorrect_sites(do_methionine_offset=True, do_orthology_mapping=True, do_isoform_mapping=True)
else: on_nodes = on coll = boolean2.util.Collector() bn_str = boolean2.modify_states(bn_str, turnon=on, turnoff=off) model = boolean2.Model(text=bn_str, mode='async') for i in range(nsim): model.initialize() model.iterate(steps=nsteps) coll.collect(states=model.states, nodes=model.nodes) avgs = coll.get_averages(normalize=True) return avgs if __name__ == '__main__': # Build Boolean net for basic pathway st = ac.load_statements('ras_pathway.pkl') sa = SifAssembler(st) sa.make_model(use_name_as_key=True) sa.save_model('ras_pathway.sif') bn_str = sa.print_boolean_net('ras_pathway_bn.txt') # Build Boolean net for extended pathway st_ext = ac.load_statements('ras_pathway_extension.pkl') sa = SifAssembler(st + st_ext) sa.make_model(use_name_as_key=True) sa.save_model('ras_pathway_extension.sif') bn_str = sa.print_boolean_net('ras_pathway_extension_bn.txt') # Condition 1 off = [] on = ['GROWTH-FACTOR']