def test_filter_correls(correls): correls_filt = filter_correls(correls, min_r=.3) assert len(correls_filt) == 2 correls_filt = filter_correls(correls, min_r=.3, conet=True) assert len(correls_filt) == 1 correls_filt = filter_correls(correls, min_p=.05) assert len(correls_filt) == 2
def build_correlation_network_r(correlation_table: pd.DataFrame, min_val: float = .75, cooccur: bool = False) -> nx.Graph: correlation_table_filtered = filter_correls(correlation_table, min_r=min_val, conet=cooccur) net = correls_to_net(correlation_table_filtered) return net
def make_modules_on_correlations(correlation_table: pd.DataFrame, feature_table: Table, min_r: float=.35) -> \ (Table, nx.Graph, pd.Series): modules = ma.make_modules_naive(correlation_table, min_r=min_r) modules_rev = {asv: module for module, asvs in modules.items() for asv in asvs} for asv in feature_table.ids(axis='observation'): if asv not in modules_rev: modules_rev[asv] = None module_membership = pd.Series(modules_rev) coll_table = ma.collapse_modules(feature_table, modules) metadata = get_metadata_from_table(feature_table) metadata = ma.add_modules_to_metadata(modules, metadata) correlation_table_filtered = filter_correls(correlation_table, conet=True, min_r=min_r) net = correls_to_net(correlation_table_filtered, metadata=metadata) return coll_table, net, module_membership
def make_modules_louvain(correls, min_r=None, max_p=None, gamma=.01, prefix="module"): import community as louvain correls_filt = general.filter_correls(correls, max_p=max_p, min_r=min_r, conet=True) net = general.correls_to_net(correls_filt) partition = louvain.best_partition(net, resolution=gamma) premodules = defaultdict(list) for otu, module in partition.items(): premodules[module].append(otu) premodules = list(premodules.values()) premodules.sort(key=len, reverse=True) modules = dict() for i, otus in enumerate(premodules): if len(otus) > 1: modules['%s_%s' % (prefix, i)] = otus return modules
def make_modules_k_cliques(correls, min_r=None, max_p=None, k=3, prefix="module"): correls_filt = general.filter_correls(correls, max_p=max_p, min_r=min_r, conet=True) net = general.correls_to_net(correls_filt) premodules = list(nx.algorithms.community.k_clique_communities(net, k)) # reverse modules so observations will be added to smallest modules premodules.sort(key=len, reverse=True) modules = dict() seen = set() for i, module in enumerate(premodules): # process module module = module-seen seen = seen | module modules[prefix+"_"+str(i)] = module for node in module: net.node[node][prefix] = i return modules
def module_maker(args): logger = general.Logger("SCNIC_module_log.txt") logger["SCNIC analysis type"] = "module" # read in correlations file correls = pd.read_table(args.input, index_col=(0, 1), sep='\t', dtype={ 'feature1': str, 'feature2': str }) logger["input correls"] = args.input if args.verbose: print("correls.txt read") # sanity check args if args.min_r is not None and args.min_p is not None: raise ValueError( "arguments min_p and min_r may not be used concurrently") if args.min_r is None and args.min_p is None: raise ValueError("argument min_p or min_r must be used") # read in correlations file and make distance matrix if args.min_r is not None: min_dist = ma.cor_to_dist(args.min_r) logger["minimum r value"] = args.min_r cor, labels = ma.correls_to_cor(correls) dist = ma.cor_to_dist(cor) elif args.min_p is not None: # TODO: This raise NotImplementedError() else: raise ValueError("this is prevented above") # read in biom table if given if args.table is not None: table = load_table(args.table) logger["input uncollapsed table"] = args.table if args.verbose: print("otu table read") # make new output directory and change to it if args.output is not None: if not os.path.isdir(args.output): os.makedirs(args.output) os.chdir(args.output) logger["output directory"] = os.getcwd() # make modules modules = ma.make_modules(dist, min_dist, obs_ids=labels) logger["number of modules created"] = len(modules) if args.verbose: print("Modules Formed") print("number of modules: %s" % len(modules)) print("number of observations in modules: %s" % np.sum([len(i) for i in modules])) print("") ma.write_modules_to_file(modules) # collapse modules if args.table is not None: coll_table = ma.collapse_modules(table, modules) ma.write_modules_to_dir(table, modules) logger["number of observations in output table"] = coll_table.shape[0] if args.verbose: print("Table Collapsed") print("collapsed Table Observations: " + str(coll_table.shape[0])) print("") with biom_open('collapsed.biom', 'w') as f: coll_table.to_hdf5(f, 'make_modules.py') # make network if args.table is not None: metadata = general.get_metadata_from_table(table) else: metadata = defaultdict(dict) metadata = ma.add_modules_to_metadata(modules, metadata) correls_filter = general.filter_correls(correls, conet=True, min_p=args.min_p, min_r=args.min_r) net = general.correls_to_net(correls_filter, metadata=metadata) nx.write_gml(net, 'correlation_network.gml') if args.verbose: print("Network Generated") print("number of nodes: %s" % str(net.number_of_nodes())) print("number of edges: %s" % str(net.number_of_edges())) logger["number of nodes"] = net.number_of_nodes() logger["number of edges"] = net.number_of_edges() logger.output_log()
def between_correls(args): """TABLES MUST SORT SO THAT SAMPLES ARE IN THE SAME ORDER """ logger = general.Logger("SCNIC_log.txt") logger["SCNIC analysis type"] = "between" # correlation and p-value adjustment methods correl_methods = {'spearman': spearmanr, 'pearson': pearsonr} correl_method = correl_methods[args.correl_method] # load tables table1 = load_table(args.table1) table2 = load_table(args.table2) logger["input table 1"] = args.table1 logger["input table 1"] = args.table2 table1 = table1.sort() table2 = table2.sort() # make new output directory and change to it if args.force and args.output is not None: shutil.rmtree(args.output, ignore_errors=True) if args.output is not None: os.makedirs(args.output) os.chdir(args.output) logger["output directory"] = args.output # filter tables if args.sparcc_filter is True: table1 = general.sparcc_paper_filter(table1) table2 = general.sparcc_paper_filter(table2) print("Table 1 filtered: %s observations" % str(table1.shape[0])) print("Table 2 filtered: %s observations" % str(table2.shape[0])) logger["sparcc paper filter"] = True logger["number of observations present in table 1 after filter"] = table1.shape[0] logger["number of observations present in table 2 after filter"] = table2.shape[0] if args.min_sample is not None: table1 = general.filter_table(table1, args.min_sample) table2 = general.filter_table(table2, args.min_sample) if not np.array_equal(table1.ids(), table2.ids()): raise ValueError("Tables have different sets of samples present") metadata = general.get_metadata_from_table(table1) metadata.update(general.get_metadata_from_table(table2)) # make correlations logger["correlation metric"] = args.correl_method logger["p adjustment method"] = args.p_adjust correls = ca.between_correls_from_tables(table1, table2, correl_method, nprocs=args.procs) correls.sort_values(correls.columns[-1], inplace=True) correls['p_adj'] = general.p_adjust(correls['p']) correls.to_csv(open('correls.txt', 'w'), sep='\t', index=True) # make network correls_filt = general.filter_correls(correls, min_p=args.min_p, min_r=args.min_r) net = general.correls_to_net(correls_filt, metadata=metadata) logger["number of nodes"] = net.number_of_nodes() logger["number of edges"] = net.number_of_edges() nx.write_gml(net, 'crossnet.gml') logger.output_log()
def build_correlation_network_p(correlation_table: pd.DataFrame, max_val: float=.05) -> nx.Graph: correlation_table_filtered = filter_correls(correlation_table, min_p=max_val) net = correls_to_net(correlation_table_filtered) return net
def module_maker(input_loc, output_loc, min_p=None, min_r=None, method='naive', k_size=3, gamma=.4, table_loc=None, prefix='module', verbose=False): logger = general.Logger(path.join(output_loc, "SCNIC_module_log.txt")) logger["SCNIC analysis type"] = "module" # read in correlations file correls = pd.read_csv(input_loc, index_col=(0, 1), sep='\t') correls.index = pd.MultiIndex.from_tuples([(str(id1), str(id2)) for id1, id2 in correls.index]) logger["input correls"] = input_loc if verbose: print("correls.txt read") # sanity check args if min_r is not None and min_p is not None: raise ValueError("arguments min_p and min_r may not be used concurrently") if min_r is None and min_p is None: raise ValueError("argument min_p or min_r must be used") # make new output directory and change to it if output_loc is not None: if not path.isdir(output_loc): os.makedirs(output_loc) logger["output directory"] = path.abspath(output_loc) # make modules if method == 'naive': modules = ma.make_modules_naive(correls, min_r, min_p, prefix=prefix) elif method == 'k_cliques': modules = ma.make_modules_k_cliques(correls, min_r, min_p, k_size, prefix=prefix) elif method == 'louvain': modules = ma.make_modules_louvain(correls, min_r, min_p, gamma, prefix=prefix) else: raise ValueError('%s is not a valid module picking method' % method) logger["number of modules created"] = len(modules) if verbose: print("Modules Formed") print("number of modules: %s" % len(modules)) print("number of observations in modules: %s" % np.sum([len(i) for i in modules])) print("") ma.write_modules_to_file(modules, path_str=path.join(output_loc, 'modules.txt')) # collapse modules if table_loc is not None: table = load_table(table_loc) logger["input uncollapsed table"] = table_loc if verbose: print("otu table read") coll_table = ma.collapse_modules(table, modules) # ma.write_modules_to_dir(table, modules) logger["number of observations in output table"] = coll_table.shape[0] if verbose: print("Table Collapsed") print("collapsed Table Observations: " + str(coll_table.shape[0])) print("") with biom_open(path.join(output_loc, 'collapsed.biom'), 'w') as f: coll_table.to_hdf5(f, 'make_modules.py') metadata = general.get_metadata_from_table(table) else: metadata = defaultdict(dict) # make network metadata = ma.add_modules_to_metadata(modules, metadata) correls_filter = general.filter_correls(correls, conet=True, min_p=min_p, min_r=min_r) net = general.correls_to_net(correls_filter, metadata=metadata) nx.write_gml(net, path.join(output_loc, 'correlation_network.gml')) if verbose: print("Network Generated") print("number of nodes: %s" % str(net.number_of_nodes())) print("number of edges: %s" % str(net.number_of_edges())) logger["number of nodes"] = net.number_of_nodes() logger["number of edges"] = net.number_of_edges() logger.output_log()