Esempio n. 1
0
def generate_graph(input_data,
                   dis=None,
                   _eu_dm=None,
                   eps_threshold=95,
                   overlap=0.75,
                   min_samples=3,
                   r=40,
                   filter='PCOA',
                   verbose=1):
    if filter not in _filter_dict:
        logger("Wrong filter you provide, available fitler are",
               ','.join(_filter_dict.keys()),
               verbose=1)
        return
    else:
        filter = _filter_dict[filter]
    tm = mapper.Mapper(verbose=verbose)
    t1 = time.time()
    metric = Metric(metric="precomputed")
    lens = [filter(components=[0, 1], metric=metric, random_state=100)]
    projected_X = tm.filter(dis, lens=lens)
    logger("projection takes: ", time.time() - t1, verbose=verbose)
    ###
    t1 = time.time()
    eps = optimize_dbscan_eps(input_data, threshold=eps_threshold, dm=_eu_dm)
    clusterer = DBSCAN(eps=eps, min_samples=min_samples)
    cover = Cover(projected_data=MinMaxScaler().fit_transform(projected_X),
                  resolution=r,
                  overlap=overlap)
    graph = tm.map(data=input_data, cover=cover, clusterer=clusterer)
    logger(graph.info(), verbose=verbose)
    logger("graph generator take: ", time.time() - t1, verbose=verbose)
    return graph
Esempio n. 2
0
def generate_graph(input_data,
                   dis,
                   _eu_dm=None,
                   eps_threshold=95,
                   overlap_params=0.75,
                   min_samples=3,
                   resolution_params="auto",
                   filter_=Filter.PCOA):
    tm = mapper.Mapper(verbose=1)
    # TDA Step2. Projection
    t1 = time.time()
    metric = Metric(metric="precomputed")
    lens = [filter_(components=[0, 1], metric=metric, random_state=100)]
    projected_X = tm.filter(dis, lens=lens)
    if global_verbose:
        print("projection takes: ", time.time() - t1)
    ###
    t1 = time.time()
    eps = optimize_dbscan_eps(input_data, threshold=eps_threshold, dm=_eu_dm)
    clusterer = DBSCAN(eps=eps, min_samples=min_samples)
    r = resolution_params
    cover = Cover(projected_data=MinMaxScaler().fit_transform(projected_X),
                  resolution=r,
                  overlap=overlap_params)
    graph = tm.map(data=input_data, cover=cover, clusterer=clusterer)
    if global_verbose:
        print(graph.info())
        print("graph time: ", time.time() - t1)

    graph_name = "{eps}_{overlap}_{r}_{filter}.graph".format(
        eps=eps_threshold,
        overlap=overlap_params,
        r=r,
        filter=lens[0].__class__.__name__)
    return graph, graph_name, projected_X
Esempio n. 3
0
from tmap.tda import mapper, Filter
from tmap.tda.cover import Cover
from tmap.tda.metric import Metric
from tmap.tda.utils import optimize_dbscan_eps
from tmap.test import load_data

# load taxa abundance data, sample metadata and precomputed distance matrix
X = load_data.FGFP_genus_profile()
metadata = load_data.FGFP_metadata_ready()
dm = squareform(pdist(X, metric='braycurtis'))
############################################################
tm = mapper.Mapper(verbose=1)
metric = Metric(metric="precomputed")
lens = [Filter.PCOA(components=[0, 1], metric=metric)]  # for quick
projected_X = tm.filter(dm, lens=lens)
eps = optimize_dbscan_eps(X, threshold=95)
clusterer = DBSCAN(eps=eps, min_samples=3)
cover = Cover(projected_data=MinMaxScaler().fit_transform(projected_X), resolution=50, overlap=0.75)
graph = tm.map(data=X, cover=cover, clusterer=clusterer)
node_data = graph.transform_sn(X)

from tmap.netx.SAFE import SAFE_batch, get_SAFE_summary

n_iter = 5000
safe_scores = SAFE_batch(graph, metadata=metadata, n_iter=n_iter, nr_threshold=0.5, _mode='both', random_seed=100)
enriched_SAFE_metadata, declined_SAFE_metadata = safe_scores['enrich'], safe_scores['decline']
safe_summary_metadata = get_SAFE_summary(graph=graph, metadata=metadata, safe_scores=enriched_SAFE_metadata,
                                         n_iter=n_iter, p_value=0.05)
############################################################
safe_scores = SAFE_batch(graph, metadata=metadata, n_iter=n_iter, nr_threshold=0.5, _mode='both', random_seed=500)
enriched_SAFE_metadata = safe_scores['enrich']