def dimension_reduction(*x, algo='pca', **kwargs): algo = str(algo).lower() assert algo in ('pca', 'tsne', 'umap'), \ "No support for algorithm: '%s'" % algo if x[0].shape[1] == 1: raise ValueError("No dimension reduction for input with shape: %s" % str(x[0].shape)) elif x[0].shape[1] == 2: pass elif algo == 'tsne': x = fast_tsne(*x, n_components=2, perplexity=30.0, learning_rate=200, n_iter=1000, random_state=1234, n_jobs=8, **kwargs) elif algo == 'pca': x = fast_pca(*x, n_components=2, random_state=1234, **kwargs) else: x = fast_umap(*x, random_state=1234, **kwargs) if len(x) == 1: return x[0] return x
def evaluate_latent(fn, feeder, title): y_true = [] Z = [] for outputs in Progbar(feeder.set_batch(batch_mode='file'), name=title, print_report=True, print_summary=False, count_func=lambda x: x[-1].shape[0]): name = str(outputs[0]) idx = int(outputs[1]) data = outputs[2:] assert idx == 0 y_true.append(name) Z.append(fn(*data)) Z = np.concatenate(Z, axis=0) # ====== visualize spectrogram ====== # if Z.ndim >= 3: sample = np.random.choice(range(len(Z)), size=3, replace=False) spec = Z[sample.astype('int32')] y = [y_true[int(i)] for i in sample] plot_figure(nrow=6, ncol=6) for i, (s, tit) in enumerate(zip(spec, y)): s = s.reshape(len(s), -1) plot_spectrogram(s.T, ax=(1, 3, i + 1), title=tit) # ====== visualize each point ====== # # flattent to 2D Z = np.reshape(Z, newshape=(len(Z), -1)) # tsne if necessary if Z.shape[-1] > 3: Z = fast_tsne(Z, n_components=3, n_jobs=8, random_state=K.get_rng().randint(0, 10e8)) # color and marker Z_color = [digit_color_map[i.split('_')[-1]] for i in y_true] Z_marker = [gender_marker_map[i.split('_')[1]] for i in y_true] plot_figure(nrow=6, ncol=20) for i, azim in enumerate((15, 60, 120)): plot_scatter(x=Z[:, 0], y=Z[:, 1], z=Z[:, 2], ax=(1, 3, i + 1), size=4, color=Z_color, marker=Z_marker, azim=azim, legend=legends if i == 1 else None, legend_ncol=11, fontsize=10, title=title) plot_save(os.path.join(FIG_PATH, '%s.pdf' % title))
from sklearn.model_selection import train_test_split from odin import ml from odin import visual as vs os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' tf.random.set_seed(8) np.random.seed(8) X, y = load_digits(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) X_umap = ml.fast_umap(X_train, X_test) X_tsne = ml.fast_tsne(X_train, X_test) X_pca = ml.fast_pca(X_train, X_test, n_components=2) styles = dict(size=12, alpha=0.6, centroids=True) vs.plot_figure(6, 12) vs.plot_scatter(x=X_pca[0], color=y_train, ax=(1, 2, 1), **styles) vs.plot_scatter(x=X_pca[1], color=y_test, ax=(1, 2, 2), **styles) vs.plot_figure(6, 12) vs.plot_scatter(x=X_tsne[0], color=y_train, ax=(1, 2, 1), **styles) vs.plot_scatter(x=X_tsne[1], color=y_test, ax=(1, 2, 2), **styles) vs.plot_figure(6, 12) vs.plot_scatter(x=X_umap[0], color=y_train, ax=(1, 2, 1), **styles) vs.plot_scatter(x=X_umap[1], color=y_test, ax=(1, 2, 2), **styles)
def plot_imputation_scatter(self, test=True, pca=False, color_by_library=True): start_time = time.time() n_system = len(self) + 2 # add the original and the corrupted data_type = 'test' if test else 'train' if n_system <= 5: nrow = 1 ncol = n_system else: nrow = 2 ncol = int(np.ceil(n_system / 2)) X_org = self.posteriors[0].X_test_org if test else self.posteriors[ 0].X_train_org X_crr = self.posteriors[0].X_test if test else self.posteriors[ 0].X_train y = self.posteriors[0].y_test if test else self.posteriors[0].y_train labels = self.posteriors[0].labels is_binary_classes = self.posteriors[0].is_binary_classes allV = [X_org, X_crr] + [ pos.V_test if test else pos.V_train for pos in self.posteriors ] assert X_org.shape == X_crr.shape and all(v.shape == X_org.shape for v in allV) all_names = ["[%s]Original" % data_type, "[%s]Corrupted" % data_type ] + [i.short_id_lines for i in self.posteriors] # log-normalize everything if len(X_org) > 5000: np.random.seed(5218) ids = np.random.permutation(X_org.shape[0])[:5000] allV = [v[ids] for v in allV] y = y[ids] if is_binary_classes: y = np.argmax(y, axis=-1) else: y = ProbabilisticEmbedding().fit_transform(y) y = np.argmax(y, axis=-1) allV = [log_norm(v) for v in allV] fig = plt.figure(figsize=(min(20, 5 * ncol) + 2, nrow * 5)) for idx, (name, v) in enumerate(zip(all_names, allV)): ax = plt.subplot(nrow, ncol, idx + 1) n = np.sum(v, axis=-1) v = fast_pca(v, n_components=2) if pca else fast_tsne( v, n_components=2) with catch_warnings_ignore(Warning): if color_by_library: plot_scatter(x=v, val=n, ax=ax, size=8, legend_enable=False, grid=False, title=name) else: plot_scatter(x=v, color=[labels[i] for i in y], marker=[labels[i] for i in y], ax=ax, size=8, legend_enable=True if idx == 0 else False, grid=False, title=name) with catch_warnings_ignore(Warning): plt.tight_layout() self.add_figure( 'imputation_scatter_%s_%s' % ('lib' if color_by_library else 'cell', data_type), fig) return self._log( 'plot_imputation_scatter[%s] %s(s)' % (data_type, ctext(time.time() - start_time, 'lightyellow')))
U = [] Z_hat = [] Y = [] for x, y in tqdm(valid): qz_x, qu_z, qz_u = vae.encode_two_stages(x) Z.append(qz_x.mean()) U.append(qu_z.mean()) Z_hat.append(qz_u.mean()) Y.append(np.argmax(y, axis=-1)) Z = np.concatenate(Z, 0)[:5000] U = np.concatenate(U, 0)[:5000] Z_hat = np.concatenate(Z_hat, 0)[:5000] Y = np.concatenate(Y, 0)[:5000] plt.figure(figsize=(15, 5), dpi=150) vs.plot_scatter(fast_tsne(Z), color=Y, grid=False, ax=(1, 3, 1)) vs.plot_scatter(fast_tsne(U), color=Y, grid=False, ax=(1, 3, 2)) vs.plot_scatter(fast_tsne(Z_hat), color=Y, grid=False, ax=(1, 3, 3)) plt.tight_layout() ids = np.argsort(np.mean(qz_x.stddev(), 0)) ids_u = np.argsort(np.mean(qu_z.stddev(), 0)) plt.figure(figsize=(10, 10), dpi=200) plot_latent_stats(mean=np.mean(qz_x.mean(), 0)[ids], stddev=np.mean(qz_x.stddev(), 0)[ids], ax=(3, 1, 1), name='q(z|x)') plot_latent_stats(mean=np.mean(qu_z.mean(), 0)[ids_u], stddev=np.mean(qu_z.stddev(), 0)[ids_u], ax=(3, 1, 2),
def compare_methods(X, y, dim, title, n_iter='auto', verbose=0, plda=False): print(title, ':', dim) # pca = PCA(n_components=dim, random_state=random_state) pca.fit(X) X_pca = pca.transform(X) # if plda: plda = ml.PLDA(n_phi=dim, verbose=verbose) plda.fit(X=X_iris, y=y_iris) X_plda = plda.transform(X_iris) n_col = 5 else: plda = None X_plda = None n_col = 4 # ppca = ml.PPCA(n_components=dim, verbose=verbose, n_iter=n_iter, random_state=random_state) ppca.fit(X) X_ppca = ppca.transform(X) # sppca1 = ml.SupervisedPPCA(n_components=dim, verbose=verbose, extractor='supervised', n_iter=n_iter, random_state=random_state) sppca1.fit(X, y) X_sppca1 = sppca1.transform(X) # sppca2 = ml.SupervisedPPCA(n_components=dim, verbose=verbose, extractor='unsupervised', n_iter=n_iter, random_state=random_state) sppca2.fit(X, y) X_sppca2 = sppca2.transform(X) # T-SNE if necessary if dim > 2: X_pca = ml.fast_tsne(X_pca, n_components=2) X_ppca = ml.fast_tsne(X_ppca, n_components=2) X_sppca1 = ml.fast_tsne(X_sppca1, n_components=2) X_sppca2 = ml.fast_tsne(X_sppca2, n_components=2) if X_plda is not None: X_plda = ml.fast_tsne(X_plda, n_components=2) # Plotting V.plot_figure(nrow=4, ncol=18) plt.subplot(1, n_col, 1) plt.scatter(x=X_pca[:, 0], y=X_pca[:, 1], c=y, marker='o', alpha=0.5, s=1) plt.xticks([], []) plt.yticks([], []) plt.title("PCA") plt.subplot(1, n_col, 2) plt.scatter(x=X_ppca[:, 0], y=X_ppca[:, 1], c=y, marker='o', alpha=0.5, s=1) plt.xticks([], []) plt.yticks([], []) plt.title("PPCA") plt.subplot(1, n_col, 3) plt.scatter(x=X_sppca1[:, 0], y=X_sppca1[:, 1], c=y, marker='o', alpha=0.5, s=1) plt.xticks([], []) plt.yticks([], []) plt.title("S-PPCA (supervised extractor)") plt.subplot(1, n_col, 4) plt.scatter(x=X_sppca2[:, 0], y=X_sppca2[:, 1], c=y, marker='o', alpha=0.5, s=1) plt.xticks([], []) plt.yticks([], []) plt.title("S-PPCA (unsupervised extractor") if plda is not None: plt.subplot(1, n_col, 5) plt.scatter(x=X_plda[:, 0], y=X_plda[:, 1], c=y, marker='o', alpha=0.5, s=1) plt.xticks([], []) plt.yticks([], []) plt.title("PLDA") plt.suptitle('[%d]%s' % (dim, title))
inc_labels=True).concatenate( sc.create_dataset(batch_size=batch_size, partition='test', inc_labels=True)): X_test.append(x) y_test.append(y) X_test = tf.concat(X_test, axis=0) y_test = tf.concat(y_test, axis=0) try: from odin.ml import fast_umap x_ = fast_umap(X_test.numpy()) algo = "umap" except: from odin.ml import fast_tsne x_ = fast_tsne(X_test.numpy()) algo = "tsne" # =========================================================================== # MOdel # =========================================================================== def fig2image(fig: plt.Figure, dpi=180) -> tf.Tensor: r""" Return an image shape `[1, h, w, 4]` """ buf = io.BytesIO() plt.savefig(buf, format='png', dpi=dpi) buf.seek(0) image = tf.image.decode_png(buf.getvalue(), channels=4) # add batch dimension image = tf.expand_dims(image, 0) return image