예제 #1
0
def generate_graph(input_data,
                   dis=None,
                   _eu_dm=None,
                   eps_threshold=95,
                   overlap=0.75,
                   min_samples=3,
                   r=40,
                   filter='PCOA',
                   verbose=1):
    if filter not in _filter_dict:
        logger("Wrong filter you provide, available fitler are",
               ','.join(_filter_dict.keys()),
               verbose=1)
        return
    else:
        filter = _filter_dict[filter]
    tm = mapper.Mapper(verbose=verbose)
    t1 = time.time()
    metric = Metric(metric="precomputed")
    lens = [filter(components=[0, 1], metric=metric, random_state=100)]
    projected_X = tm.filter(dis, lens=lens)
    logger("projection takes: ", time.time() - t1, verbose=verbose)
    ###
    t1 = time.time()
    eps = optimize_dbscan_eps(input_data, threshold=eps_threshold, dm=_eu_dm)
    clusterer = DBSCAN(eps=eps, min_samples=min_samples)
    cover = Cover(projected_data=MinMaxScaler().fit_transform(projected_X),
                  resolution=r,
                  overlap=overlap)
    graph = tm.map(data=input_data, cover=cover, clusterer=clusterer)
    logger(graph.info(), verbose=verbose)
    logger("graph generator take: ", time.time() - t1, verbose=verbose)
    return graph
예제 #2
0
def generate_graph(input_data,
                   dis,
                   _eu_dm=None,
                   eps_threshold=95,
                   overlap_params=0.75,
                   min_samples=3,
                   resolution_params="auto",
                   filter_=Filter.PCOA):
    tm = mapper.Mapper(verbose=1)
    # TDA Step2. Projection
    t1 = time.time()
    metric = Metric(metric="precomputed")
    lens = [filter_(components=[0, 1], metric=metric, random_state=100)]
    projected_X = tm.filter(dis, lens=lens)
    if global_verbose:
        print("projection takes: ", time.time() - t1)
    ###
    t1 = time.time()
    eps = optimize_dbscan_eps(input_data, threshold=eps_threshold, dm=_eu_dm)
    clusterer = DBSCAN(eps=eps, min_samples=min_samples)
    r = resolution_params
    cover = Cover(projected_data=MinMaxScaler().fit_transform(projected_X),
                  resolution=r,
                  overlap=overlap_params)
    graph = tm.map(data=input_data, cover=cover, clusterer=clusterer)
    if global_verbose:
        print(graph.info())
        print("graph time: ", time.time() - t1)

    graph_name = "{eps}_{overlap}_{r}_{filter}.graph".format(
        eps=eps_threshold,
        overlap=overlap_params,
        r=r,
        filter=lens[0].__class__.__name__)
    return graph, graph_name, projected_X
예제 #3
0
from tmap.tda.metric import Metric
from tmap.tda.utils import optimize_dbscan_eps
from tmap.test import load_data

# load taxa abundance data, sample metadata and precomputed distance matrix
X = load_data.FGFP_genus_profile()
metadata = load_data.FGFP_metadata_ready()
dm = squareform(pdist(X, metric='braycurtis'))
############################################################
tm = mapper.Mapper(verbose=1)
metric = Metric(metric="precomputed")
lens = [Filter.PCOA(components=[0, 1], metric=metric)]  # for quick
projected_X = tm.filter(dm, lens=lens)
eps = optimize_dbscan_eps(X, threshold=95)
clusterer = DBSCAN(eps=eps, min_samples=3)
cover = Cover(projected_data=MinMaxScaler().fit_transform(projected_X), resolution=50, overlap=0.75)
graph = tm.map(data=X, cover=cover, clusterer=clusterer)
node_data = graph.transform_sn(X)

from tmap.netx.SAFE import SAFE_batch, get_SAFE_summary

n_iter = 5000
safe_scores = SAFE_batch(graph, metadata=metadata, n_iter=n_iter, nr_threshold=0.5, _mode='both', random_seed=100)
enriched_SAFE_metadata, declined_SAFE_metadata = safe_scores['enrich'], safe_scores['decline']
safe_summary_metadata = get_SAFE_summary(graph=graph, metadata=metadata, safe_scores=enriched_SAFE_metadata,
                                         n_iter=n_iter, p_value=0.05)
############################################################
safe_scores = SAFE_batch(graph, metadata=metadata, n_iter=n_iter, nr_threshold=0.5, _mode='both', random_seed=500)
enriched_SAFE_metadata = safe_scores['enrich']
safe_summary_metadata2 = get_SAFE_summary(graph=graph, metadata=metadata, safe_scores=enriched_SAFE_metadata,
                                          n_iter=n_iter, p_value=0.05)
예제 #4
0
# prepare graph
X, y = datasets.make_circles(n_samples=5000,
                             noise=0.05,
                             factor=0.3,
                             random_state=100)
X = pd.DataFrame(X, index=['t%s' % _ for _ in range(X.shape[0])])
# Step1. initiate a Mapper
tm = mapper.Mapper(verbose=1)

# Step2. Projection
lens = [Filter.PCA(components=[0, 1])]
projected_X = tm.filter(X, lens=lens)

# Step3. Covering, clustering & mapping
clusterer = DBSCAN(eps=0.1, min_samples=5)
cover = Cover(projected_data=projected_X, resolution=20, overlap=0.1)
graph = tm.map(data=X, cover=cover, clusterer=clusterer)
############################################################
# start test
print(np.__version__)

assert len(graph.node) == 183
assert graph.node[2]['size'] == 28
assert graph.get_sample_size(2) == 28
assert graph.get_sample_size(30) == 33

assert graph.cover_ratio() - 0.9954 <= 1e-6

assert set(graph.node2sample(2)) == {
    't1592', 't4921', 't4109', 't3514', 't4193', 't2988', 't3263', 't2410',
    't3763', 't1438', 't3267', 't2545', 't3434', 't4769', 't4100', 't3315',