def generate_graph(input_data, dis=None, _eu_dm=None, eps_threshold=95, overlap=0.75, min_samples=3, r=40, filter='PCOA', verbose=1): if filter not in _filter_dict: logger("Wrong filter you provide, available fitler are", ','.join(_filter_dict.keys()), verbose=1) return else: filter = _filter_dict[filter] tm = mapper.Mapper(verbose=verbose) t1 = time.time() metric = Metric(metric="precomputed") lens = [filter(components=[0, 1], metric=metric, random_state=100)] projected_X = tm.filter(dis, lens=lens) logger("projection takes: ", time.time() - t1, verbose=verbose) ### t1 = time.time() eps = optimize_dbscan_eps(input_data, threshold=eps_threshold, dm=_eu_dm) clusterer = DBSCAN(eps=eps, min_samples=min_samples) cover = Cover(projected_data=MinMaxScaler().fit_transform(projected_X), resolution=r, overlap=overlap) graph = tm.map(data=input_data, cover=cover, clusterer=clusterer) logger(graph.info(), verbose=verbose) logger("graph generator take: ", time.time() - t1, verbose=verbose) return graph
def generate_graph(input_data, dis, _eu_dm=None, eps_threshold=95, overlap_params=0.75, min_samples=3, resolution_params="auto", filter_=Filter.PCOA): tm = mapper.Mapper(verbose=1) # TDA Step2. Projection t1 = time.time() metric = Metric(metric="precomputed") lens = [filter_(components=[0, 1], metric=metric, random_state=100)] projected_X = tm.filter(dis, lens=lens) if global_verbose: print("projection takes: ", time.time() - t1) ### t1 = time.time() eps = optimize_dbscan_eps(input_data, threshold=eps_threshold, dm=_eu_dm) clusterer = DBSCAN(eps=eps, min_samples=min_samples) r = resolution_params cover = Cover(projected_data=MinMaxScaler().fit_transform(projected_X), resolution=r, overlap=overlap_params) graph = tm.map(data=input_data, cover=cover, clusterer=clusterer) if global_verbose: print(graph.info()) print("graph time: ", time.time() - t1) graph_name = "{eps}_{overlap}_{r}_{filter}.graph".format( eps=eps_threshold, overlap=overlap_params, r=r, filter=lens[0].__class__.__name__) return graph, graph_name, projected_X
from tmap.tda.metric import Metric from tmap.tda.utils import optimize_dbscan_eps from tmap.test import load_data # load taxa abundance data, sample metadata and precomputed distance matrix X = load_data.FGFP_genus_profile() metadata = load_data.FGFP_metadata_ready() dm = squareform(pdist(X, metric='braycurtis')) ############################################################ tm = mapper.Mapper(verbose=1) metric = Metric(metric="precomputed") lens = [Filter.PCOA(components=[0, 1], metric=metric)] # for quick projected_X = tm.filter(dm, lens=lens) eps = optimize_dbscan_eps(X, threshold=95) clusterer = DBSCAN(eps=eps, min_samples=3) cover = Cover(projected_data=MinMaxScaler().fit_transform(projected_X), resolution=50, overlap=0.75) graph = tm.map(data=X, cover=cover, clusterer=clusterer) node_data = graph.transform_sn(X) from tmap.netx.SAFE import SAFE_batch, get_SAFE_summary n_iter = 5000 safe_scores = SAFE_batch(graph, metadata=metadata, n_iter=n_iter, nr_threshold=0.5, _mode='both', random_seed=100) enriched_SAFE_metadata, declined_SAFE_metadata = safe_scores['enrich'], safe_scores['decline'] safe_summary_metadata = get_SAFE_summary(graph=graph, metadata=metadata, safe_scores=enriched_SAFE_metadata, n_iter=n_iter, p_value=0.05) ############################################################ safe_scores = SAFE_batch(graph, metadata=metadata, n_iter=n_iter, nr_threshold=0.5, _mode='both', random_seed=500) enriched_SAFE_metadata = safe_scores['enrich'] safe_summary_metadata2 = get_SAFE_summary(graph=graph, metadata=metadata, safe_scores=enriched_SAFE_metadata, n_iter=n_iter, p_value=0.05)
# prepare graph X, y = datasets.make_circles(n_samples=5000, noise=0.05, factor=0.3, random_state=100) X = pd.DataFrame(X, index=['t%s' % _ for _ in range(X.shape[0])]) # Step1. initiate a Mapper tm = mapper.Mapper(verbose=1) # Step2. Projection lens = [Filter.PCA(components=[0, 1])] projected_X = tm.filter(X, lens=lens) # Step3. Covering, clustering & mapping clusterer = DBSCAN(eps=0.1, min_samples=5) cover = Cover(projected_data=projected_X, resolution=20, overlap=0.1) graph = tm.map(data=X, cover=cover, clusterer=clusterer) ############################################################ # start test print(np.__version__) assert len(graph.node) == 183 assert graph.node[2]['size'] == 28 assert graph.get_sample_size(2) == 28 assert graph.get_sample_size(30) == 33 assert graph.cover_ratio() - 0.9954 <= 1e-6 assert set(graph.node2sample(2)) == { 't1592', 't4921', 't4109', 't3514', 't4193', 't2988', 't3263', 't2410', 't3763', 't1438', 't3267', 't2545', 't3434', 't4769', 't4100', 't3315',