def time_range(sample, start, end): target_vals = [ 1 if metadata.loc[_, "HOST_SUBJECT_ID"] == "2202:Donor%s" % sample and metadata.loc[_, "COLLECTION_DAY"] in list(range(start, end + 1)) else 0 for _ in X.index ] color = Color(target=target_vals, dtype="numerical", target_by="sample") graph.show(color=color, fig_size=(10, 10), node_size=15, notshow=True) title("Subject %s at %s to %s" % (sample, start, end))
def main(args): graph = Graph().read(args.graph) color = None if args.metadata: metadata = data_parser(args.metadata) col = args.column if not col: logger("No column assign, it won't assign any color.", verbose=1) else: col_data = metadata.loc[:, col] color = Color(col_data, dtype=args.dtype, target_by='sample') vis_progressX(graph, mode='file', simple=False if args.complex else True, color=color, filename=args.output, auto_open=False)
overlap=0.75) graph = tm.map(data=X, cover=cover, clusterer=clusterer) print(graph.info()) ## Step 6. SAFE test for every features. # target_feature = 'Faecalibacterium' # target_feature = 'Prevotella' target_feature = 'Bacteroides' n_iter = 1000 enriched_scores = SAFE_batch(graph, metadata=X, n_iter=n_iter, _mode='enrich') target_safe_score = enriched_scores.loc[:, target_feature] ## Step 7. Visualization # colors by samples (target values in a list) color = Color(target=X.loc[:, target_feature], dtype="numerical", target_by="sample") graph.show(color=color, fig_size=(10, 10), node_size=15, notshow=True) # colors by nodes (target values in a dictionary) color = Color(target=target_safe_score, dtype="numerical", target_by="node") graph.show(color=color, fig_size=(10, 10), node_size=15, notshow=True) safe_summary = get_SAFE_summary(graph=graph, metadata=X, safe_scores=enriched_scores, n_iter=n_iter, p_value=0.01)
def show_samples(self, samples, **kwargs): nids = self.sample2nodes(samples) target = [1 if nid in nids else 0 for nid in self.nodes] color = Color(target, target_by='node', dtype='categorical') show(self, mode=None, color=color, **kwargs)
from sklearn.preprocessing import MinMaxScaler, StandardScaler from sklearn import datasets from sklearn.cluster import DBSCAN from tmap.tda import mapper, Filter from tmap.tda.cover import Cover from tmap.tda.plot import Color iris = datasets.load_iris() X = iris.data y = iris.target # Step1. initiate a Mapper tm = mapper.Mapper(verbose=1) # Step2. Projection lens = [Filter.MDS(components=[0, 1], random_state=100)] projected_X = tm.filter(X, lens=lens) # Step3. Covering, clustering & mapping clusterer = DBSCAN(eps=0.75, min_samples=1) cover = Cover(projected_data=MinMaxScaler().fit_transform(projected_X), resolution=20, overlap=0.75) graph = tm.map(data=StandardScaler().fit_transform(X), cover=cover, clusterer=clusterer) print(graph.info()) # Step4. Visualization color = Color(target=y, dtype="categorical") graph.show(color=color, fig_size=(10, 10), node_size=15, notshow=True)
def draw_stratification(graph, SAFE_dict, cols, output, mode='html', n_iter=1000, p_val=0.05, width=1000, height=1000, allnodes=False): # Enterotyping-like stratification map based on SAFE score node_pos = graph.nodePos sizes = graph.size nodes = graph.nodes sizes = np.array([sizes[_] for _ in range(len(nodes))]).reshape(-1, 1) transformed_sizes = MinMaxScaler( feature_range=(10, 40)).fit_transform(sizes).ravel() xs = [] ys = [] for edge in graph.edges: xs += [node_pos[edge[0], 0], node_pos[edge[1], 0], None] ys += [node_pos[edge[0], 1], node_pos[edge[1], 1], None] fig = plotly.tools.make_subplots(1, 1) node_line = go.Scatter( # ordination line visible=True, x=xs, y=ys, marker=dict(color="#8E9DA2", opacity=0.7), line=dict(width=1), showlegend=False, hoverinfo='skip', mode="lines") fig.append_trace(node_line, 1, 1) safe_score_df = pd.DataFrame.from_dict( SAFE_dict) # row: nodes, columns: features min_p_value = 1.0 / (n_iter + 1.0) SAFE_pvalue = np.log10(p_val) / np.log10(min_p_value) tmp = [ safe_score_df.columns[_] if safe_score_df.iloc[idx, _] >= SAFE_pvalue else np.nan for idx, _ in enumerate(np.argmax(safe_score_df.values, axis=1)) ] # get enriched features with biggest SAFE_score per nodes. t = Counter(tmp) # number of (imp) features among all nodes. (imp: with biggest SAFE score per node compared other features at same node and bigger than p_val) if cols: if any([_ not in safe_score_df.columns for _ in cols]): logger( "There are provided cols \" %s\"doesn't at SAFE summary table." % ';'.join(cols), verbose=1) for fea in cols: if allnodes: color = Color(SAFE_dict[fea], target_by='node', dtype='numerical') subfig = vis_progressX(graph, simple=True, mode='obj', color=color) subfig.data[1]['name'] = fea fig.append_trace(subfig.data[1], 1, 1) else: get_nodes_bool = (safe_score_df.loc[:, fea] >= SAFE_pvalue).all() if not get_nodes_bool: # if all False.... logger( "fea: %s get all False bool indicated there are not enriched nodes showed at the graph" % fea, verbose=1) else: node_position = go.Scatter( # node position visible=True, x=node_pos[get_nodes_bool, 0], y=node_pos[get_nodes_bool, 1], hoverinfo="text", marker=dict( # color=node_colors, size=[ sizes[_, 0] for _ in np.arange( node_pos.shape[0])[get_nodes_bool] ], opacity=0.9), showlegend=True, name=str(fea) + ' (%s)' % str(t.get(fea, 0)), mode="markers") fig.append_trace(node_position, 1, 1) else: for idx, fea in enumerate( [_ for _, v in sorted(t.items(), key=lambda x: x[1]) if v >= 10]): # safe higher than threshold, just centroides node_position = go.Scatter( # node position visible=True, x=node_pos[np.array(tmp) == fea, 0], y=node_pos[np.array(tmp) == fea, 1], hoverinfo="text", marker=dict( # color=node_colors, size=[ transformed_sizes[_] for _ in np.arange( node_pos.shape[0])[np.array(tmp) == fea] ], opacity=0.9), showlegend=True, name=str(fea) + ' (%s)' % str(t[fea]), mode="markers") fig.append_trace(node_position, 1, 1) fig.layout.width = width fig.layout.height = height fig.layout.font.size = 15 fig.layout.hovermode = 'closest' output_fig(fig, output, mode) logger("Stratification graph has been output to", output, verbose=1)
# Step3. Covering, clustering & mapping clusterer = DBSCAN(eps=0.1, min_samples=5) cover = Cover(projected_data=projected_X, resolution=20, overlap=0.1) graph = tm.map(data=X, cover=cover, clusterer=clusterer) ############################################################ metadata = pd.get_dummies(metadata) metadata.loc[:, 'circle'] = y node_metadata = graph.transform_sn(metadata, type='s2n') ############################################################ color1 = Color(target=node_metadata.iloc[:, 0], dtype='numerical', target_by='sample') # color1.get_colors(graph.nodes) color2 = Color(target=node_metadata.iloc[:, 0], dtype='numerical', target_by='node') color3 = Color(target=metadata.iloc[:, 0], dtype='numerical', target_by='sample') assert np.all(color3.get_colors(graph.nodes)[1][1] == color2.get_colors(graph.nodes)[1][1]) color4 = Color(target=node_metadata.iloc[:, 1], dtype='categorical', target_by='node') # wrong example, it should not use it as this way
metric = Metric(metric="precomputed") lens = [Filter.MDS(components=[0, 1], metric=metric, random_state=100)] projected_X = tm.filter(dm, lens=lens) # Step4. Covering, clustering & mapping eps = optimize_dbscan_eps(X, threshold=99) clusterer = DBSCAN(eps=eps, min_samples=3) cover = Cover(projected_data=MinMaxScaler().fit_transform(projected_X), resolution=50, overlap=0.85) graph = tm.map(data=X, cover=cover, clusterer=clusterer) print(graph.info()) target_feature = 'COLLECTION_DAY' color = Color(target=metadata.loc[:, target_feature], dtype="numerical", target_by="sample") graph.show(color=color, fig_size=(10, 10), node_size=15, notshow=True) target_feature = 'HOST_SUBJECT_ID' color = Color(target=metadata.loc[:, target_feature], dtype="categorical", target_by="sample") graph.show(color=color, fig_size=(10, 10), node_size=15, notshow=True) color = Color(target=metadata.loc[:, target_feature], dtype="numerical", target_by="sample") graph.show(color=color, fig_size=(10, 10), node_size=15, notshow=True)
lens = [Filter.MDS(components=[0, 1], metric=metric, random_state=100)] projected_X = tm.filter(dm, lens=lens) # Step4. Covering, clustering & mapping eps = optimize_dbscan_eps(X, threshold=99) clusterer = DBSCAN(eps=eps, min_samples=3) cover = Cover(projected_data=MinMaxScaler().fit_transform(projected_X), resolution=35, overlap=0.9) graph = tm.map(data=X, cover=cover, clusterer=clusterer) print(graph.info()) target_feature = 'COLLECTION_DAY' color = Color(target=metadata.loc[:, target_feature], dtype="numerical", target_by="sample") graph.show(color=color, fig_size=(10, 10), node_size=15, strength=0.03, notshow=True) target_feature = 'Bacteroides' color = Color(target=X.loc[:, target_feature], dtype="numerical", target_by="sample") graph.show(color=color, fig_size=(10, 10), node_size=15, strength=0.03,