def main(args): print("Loading SPM data...") spm = flower.load_spm(args.spm_file) nodelist = spm['nodelist'] srcmodules = utils.load_nodeset_file(args.srcmodules_file, sep=args.sep) destmodules = utils.load_nodeset_file(args.destmodules_file, sep=args.sep) if not isinstance(nodelist[0], str): srcmodules = list(map(lambda x: list(map(int, x)), srcmodules)) destmodules = list(map(lambda x: list(map(int, x)), destmodules)) print("Evaluating flows...") flows_batch = flower.evaluate_flows_batch_nx(srcmodules, destmodules, spm, n_threads=args.n_threads) print("Completed.") print("Writing to file...") flows_batch.to_csv(args.output_file, sep='\t') print("Completed.")
def main(args): print("Loading SPM data...") spm_data = flower.load_spm(args.spm_file) srcnodes = utils.load_nodeset_file(args.srcnodes_file, sep=args.sep, as_column=args.as_col) destnodes = utils.load_nodeset_file(args.destnodes_file, sep=args.sep, as_column=args.as_col) print(len(srcnodes), len(destnodes)) if not isinstance(spm_data['nodelist'][0], str): srcnodes = list(map(int, srcnodes)) destnodes = list(map(int, destnodes)) print("Evaluating flows...") flows = flower.eval_flow_centrality_nx(srcnodes, destnodes, spm=spm_data) print("Completed.") print(flows.iloc[:10]) print("Writing to file...") flows.to_csv(args.output_file, sep='\t') print("Completed.")
def main(args): if args.rdm_fc_path is None and (args.rdm_src_path is None or args.rdm_dest_path is None): raise ValueError( 'Either rdm_fc_path or (rdm_src_path, rdm_dest_path) have to be specified!' ) print("Loading SPM data...") spm = flower.load_spm(args.spm_file) nodelist = spm['nodelist'] srcnodes = utils.load_nodeset_file(args.srcnodes_file, sep=args.sep, as_column=args.as_col) destnodes = utils.load_nodeset_file(args.destnodes_file, sep=args.sep, as_column=args.as_col) if not isinstance(nodelist[0], str): srcnodes = list(map(int, srcnodes)) destnodes = list(map(int, destnodes)) print("Evaluating flows...") if args.rdm_fc_path is None: rdm_srcmodules = utils.load_nodeset_file(args.rdm_src_path, sep=args.sep) rdm_destmodules = utils.load_nodeset_file(args.rdm_dest_path, sep=args.sep) if not isinstance(nodelist[0], str): rdm_srcmodules = map(lambda x: map(int, x), rdm_srcmodules) rdm_destmodules = map(lambda x: map(int, x), rdm_destmodules) fcs = flower.evaluate_fcs_nx(srcnodes, destnodes, rdm_srcmodules, rdm_destmodules, spm, fill_na=args.fill_na, n_threads=args.n_threads) else: rdm_fc = pd.read_csv(args.rdm_fc_path, sep='\t', index_col=0) fc = flower.eval_flow_centrality_nx(srcnodes, destnodes, spm) fcs = flower.evaluate_flows_statistics_from_batch_nx(rdm_fc, fc) fcs = flower.enrich_flows_table_nx(srcnodes, destnodes, fcs, spm, fill_na=args.fill_na) print("Completed.") print("Writing to file...") fcs.to_csv(args.output_file, sep=args.sep) print("Completed.")
def main(args): print ("Loading SPM data...") spm = flower.load_spm(args.spm_file) nodelist = spm['nodelist'] srcmodules = pd.read_csv(args.srcmodules_file, sep=args.sep).values.tolist() destmodules = pd.read_csv(args.destmodules_file, sep=args.sep).values.tolist() if type(nodelist[0]) is int: srcmodules = map(lambda x: map(int,x), srcmodules) destmodules = map(lambda x: map(int,x), destmodules) print ("Evaluating flows...") flows_batch = flower.evaluate_flows_statistics_nx(srcmodules, destmodules, spm, true_flows=, n_threads=args.n_threads) print ("Completed.") print ("Writing to file...") flows_batch.to_csv(args.output_file, sep='\t') print ("Completed.")
flows['FCScore'] = (flows.Flow_value.values - flows.Flow_mean.values) / flows.Flow_std.values flows[np.isnan(flows.FCScore) | np.isinf(flows.FCScore)].FCScore = None print "Creating rich columns..." # NOTE: this operation cannot be parallelized when gene symbols have to be retrieved on mygene server, probably the sock connection to the mygene server doesn't work in parallel flows['Degree'] = flows.apply(lambda row: nx.degree(ppi, row.NodeID),axis=1) flows['in_DestModule'] = flows.apply(lambda row: row.NodeID in destnodes,axis=1) flows['in_SrcModule'] = flows.apply(lambda row: row.NodeID in srcnodes,axis=1) print "Calculating distance statistics..." spm = flower.load_spm(args.spm_file) gm = utils.GIDMapper(spm['nodelist']) destnodes_ids = gm.gid2id(destnodes) srcnodes_ids = gm.gid2id(srcnodes) ids = gm.gid2id(flows.NodeID.values) flows['min_dist_dest'] = flower.eval_min_distance(spm['path_lengths'], destnodes_ids, ids) flows['min_dist_src'] = flower.eval_min_distance(spm['path_lengths'], srcnodes_ids, ids) flows['avg_dist_dest'] = flower.eval_avg_distance(spm['path_lengths'], destnodes_ids, ids) flows['avg_dist_src'] = flower.eval_avg_distance(spm['path_lengths'], srcnodes_ids, ids) print "Sorting..."
default=1, help='Number of parallel processes') parser.add_argument('--int_nodeid', action='store_true', help='Cast the node IDs to integer') parser.add_argument('--dry', '--dry_run', action='store_true', help='Dry run') args = parser.parse_args() print "Importing network..." nodetype = int if args.int_nodeid else str ppi = nx.read_edgelist(args.network_file, nodetype=nodetype) print "Loading SPM data..." spm_data = flower.load_spm(args.spm_file) nodelist = spm_data['nodelist'] with open(args.srcdestmodules_file, 'r') as f: srcrows, destrows = zip( *map(lambda x: x.rstrip().split('|'), f.readlines()) ) # this line is tricky but it just splits the rows in srcmodule and destmodule srcmodules = map(lambda x: map(nodetype, x.split(' ')), srcrows) destmodules = map(lambda x: map(nodetype, x.split(' ')), destrows) gm = utils.GIDMapper(nodelist=nodelist) nodes_dest_list = map(gm.gid2id, destmodules) nodes_src_list = map(gm.gid2id, srcmodules)