help='Number of samples') parser.add_argument('--N_per_bin', type=int, default=30, help='Minimum number of nodes per degree bin') parser.add_argument('--rdmseed', type=int, default=None, help='RNG seed') args = parser.parse_args() logging.basicConfig(level=logging.INFO, format='%(module)s:%(levelname)s:%(asctime)s:%(message)s', handlers=[ logging.FileHandler("../logs/report.log"), logging.StreamHandler() ]) logging.info(args) files = listdir(args.in_genesets_dir) net = utils.read_network(args.net_path) #def gen_samples(file): for file in tqdm(files): genes = utils.read_gene_list(join(args.in_genesets_dir, file)) samples = gen.gen_degree_preserved_sets(genes, net, args.N_samples, args.N_per_bin, rdmseed=args.rdmseed) utils.write_genesets_list(join(args.out_samples_dir, file), samples) #utils.parallel_process(gen_samples, files, n_jobs=8)
default=100, help='Number of samples') parser.add_argument('--N_cores', type=int, default=1, help='Number of cores') parser.add_argument('--seed', type=int, default=100, help='Random seed') args = parser.parse_args() logging.basicConfig(level=logging.INFO, format='%(module)s:%(levelname)s:%(asctime)s:%(message)s', handlers=[ logging.FileHandler("../logs/report.log"), logging.StreamHandler() ]) logging.info(args) random.seed(args.seed) net = utils.read_network(args.net_path) srcnodes = utils.read_gene_list(args.srcnodes_file) destnodes = utils.read_gene_list(args.destnodes_file) srcnodes_samples = gen.gen_degree_preserved_sets(srcnodes, net, n_samples=args.N_samples, rdmseed=args.seed) destnodes_samples = gen.gen_degree_preserved_sets(destnodes, net, n_samples=args.N_samples, rdmseed=args.seed) flows = utils.read_flows(args.flows_file) fcnodes = flows[(flows.FCS >= args.fc_thresh) & (flows.N_paths >= args.npath_thresh)].index.tolist()
def main(args): logging.basicConfig( level=logging.INFO, format='%(module)s:%(levelname)s:%(asctime)s:%(message)s', handlers=[ logging.FileHandler("../logs/report.log"), logging.StreamHandler() ]) logging.info(args) random.seed(args.seed) net = utils.read_network(args.net_path) srcnodes = utils.read_gene_list(args.srcnodes_file) destnodes = utils.read_gene_list(args.destnodes_file) flows = utils.read_flows(args.flows_file) fcnodes = flows[(flows.FCS >= args.fc_thresh) & (flows.N_paths >= args.npath_thresh)].index.tolist() logging.info('Num of FC nodes: {}'.format(len(fcnodes))) all_paths = [] for src_gene in tqdm(srcnodes): for dest_gene in tqdm(destnodes, leave=False): paths = list(nx.all_shortest_paths(net, src_gene, dest_gene)) all_paths += paths logging.info('Num of all paths: {}'.format(len(all_paths))) fc_paths = [] for i in trange(len(all_paths)): fullpath = all_paths[i] if len(fullpath) > 2: path = all_paths[i][1:-1] if np.all([node in fcnodes for node in path]): fc_paths.append(fullpath) logging.info('Num of FC paths: {}'.format(len(fc_paths))) rdm_paths_A = [] for i in trange(args.N_samples): path = random.choice(fc_paths) tries = 0 newpath = [] while tries < 100: newpath = [random.choice(list(net.nodes()))] while len(newpath) < len(path): possible_nodes = [ gene for gene in net[newpath[-1]] if gene not in newpath ] if len(possible_nodes) > 0: newpath.append(random.choice(possible_nodes)) else: tries += 1 break if len(newpath) == len(path): break assert tries < 100 rdm_paths_A.append(newpath) rdm_paths_B = random.sample(all_paths, args.N_samples) utils.write_paths(args.out_allpaths, all_paths) utils.write_paths(args.out_fcpaths, fc_paths) utils.write_paths(args.out_rdmpaths_A, rdm_paths_A) utils.write_paths(args.out_rdmpaths_B, rdm_paths_B)