def tsne_executor(X, y, logger, path_logs): check_input_type( ['epi'], "t-SNE experiment work just with epigenetic data, {} found".format( config['general']['input_type'])) cell_lines = config['general']['cell_lines'] tasks_dict = config['general']['tasks'] results = {} for t in tasks_dict: task_name, X_filtered, y_filtered = filter_labels(X, y, t) logger.debug("TASK: {}".format(task_name)) cpus = multiprocessing.cpu_count( ) // 2 # we use just half of avaible cpus to not overload the machine logger.debug("Using {} cpus".format(cpus)) for cl, data, labels in zip(cell_lines, X_filtered, y_filtered): logger.debug("Computing t-SNE for {}".format(cl)) tsne = TSNE(perplexity=config['tsne']['perplexity'], n_jobs=cpus) # TODO: add parameters tsne_results = tsne.fit_transform(data) assert len(tsne_results) == len(labels) tsne_results = np.c_[ tsne_results, labels] # to save the labels with the tsne results results["{}_{}".format(task_name, cl)] = tsne_results save_tsne(path_logs, "tsne_results", results) if config['tsne']['save_plots']: plot_tsne(results, path_logs, "tsne_plot")
def main(feats_path): with open(feats_path, 'rb') as handle: unpickler = pickle.Unpickler(handle) labels = unpickler.load() labels = { name: vector for name, vector in labels.items() if vector is not None } features = np.asarray(list(labels.values())) print('[INFO] Conducting t-SNE on ' + feats_path) tsne = TSNE(metric='braycurtis', verbose=1, n_iter=5000, random_state=42, n_jobs=-1) projection = tsne.fit_transform(features) # save reduced vectors base = path.basename(feats_path) name = path.splitext(base)[0] output = name + '_tsne.pickle' print('[INFO] Saving reduced vectors to ' + output) with open(output, 'wb') as handle: pickle.dump(projection, handle)
def calcTSNEMulti(data, iterations, perplexity, learning_rate): tsne = TSNE(n_jobs=4, perplexity=perplexity, n_iter=iterations, learning_rate=learning_rate) Y = tsne.fit_transform(data) return data.assign(x=Y[:, 0], y=Y[:, 1])
def set_params(self, n_components=2, perplexity=30.0, early_exaggeration=12, learning_rate=200, n_iter=1000, n_iter_without_progress=30, min_grad_norm=1e-07, metric='euclidean', init='random', verbose=0, random_state=None, method='barnes_hut', angle=0.5, n_jobs=1, cheat_metric=True): self.tsne = MulticoreTSNE( n_components=n_components, perplexity=perplexity, early_exaggeration=early_exaggeration, learning_rate=learning_rate, n_iter=n_iter, n_iter_without_progress=n_iter_without_progress, min_grad_norm=min_grad_norm, metric=metric, init=init, verbose=verbose, random_state=random_state, method=method, angle=angle, n_jobs=n_jobs, cheat_metric=cheat_metric)
def draw(x, y): from matplotlib.colors import ListedColormap from MulticoreTSNE import MulticoreTSNE as TSNE print("TSNE: fitting start...") tsne = TSNE(2, n_jobs=4, perplexity=30) Y = tsne.fit_transform(x) # matplotlib_axes_logger.setLevel('ERROR') labels = [ 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9', 'c10', 'open' ] id_to_label = {i: label for i, label in enumerate(labels)} y_true = pd.Series(y) plt.style.use('ggplot') n_class = y_true.unique().shape[0] colors = ('gray', 'lightgreen', 'plum', 'DarkMagenta', 'SkyBlue', 'PaleTurquoise', 'DeepPink', 'Gold', 'Orange', 'Brown', 'DarkKhaki') fig, ax = plt.subplots(figsize=(9, 6), ) la = [i for i in range(n_class)] la = sorted(la, reverse=True) cmap = ListedColormap(colors) for idx, label in enumerate(la): ix = y_true[y_true == label].index x = Y[:, 0][ix] y = Y[:, 1][ix] ax.scatter(x, y, c=cmap(idx), label=id_to_label[label], alpha=0.5) # Shrink current axis by 20% ax.set_title('proto_loss') box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
def generate2dftEmb(): global w2id, w, i, word, tsne, post_2d #### # Loading glove embeddings from pickle file glove_new.pickle and writing into embedding map and a text file which # can be used to gensim model #### file = open(finetuned_path, 'rb') embedding_map = pickle.load(file) # In[470]: ########## ##Converting glove embeddings to numpy matrix where each row contains embedding of a word. ##Adding words to "word to id" and "id to word" maps ########## w2id = {} id2w = {} w = np.zeros((len(embedding_map.keys()), 300)) for i, word in enumerate(embedding_map.keys()): w2id[word] = i id2w[i] = word w[i] = embedding_map[word] # In[6]: ###### ##Applying t-SNE to reduce the dimension of the embedding from 300D to 2D. ###### tsne = TSNE(n_jobs=12) post_2d = tsne.fit_transform(w) # In[486]: return post_2d, w2id, w
def generate2dpre(): global word, i, pre_w2id, tsne, pre_2d pre_vocab = [] pre = open(pretrained_path, 'r') for line in pre: embeds = line.rstrip().split(" ") word = embeds[0] pre_vocab.append(word) # In[37]: pre_w = np.zeros((len(pre_vocab), 300)) for i, line in enumerate(pre): embeds = line.rstrip().split(" ") word = embeds[0] pre_w[i, :] = embeds[1:] # In[ ]: ########## ##Converting pre glove embeddings to numpy matrix where each row contains embedding of a word. ##Adding words to "word to id" and "id to word" maps ########## pre_w2id = {} for i in range(len(pre_vocab)): pre_w2id[pre_vocab[i]] = i # In[39]: ###### ##Applying t-SNE to reduce the dimension of the embedding from 300D to 2D. ###### tsne = TSNE(n_jobs=12) pre_2d = tsne.fit_transform(pre_w) return pre_2d, pre_w2id, pre_w
def run_tSNE(natural_embed, n_jobs, perplexity): ''' The GPU version requires CUDA 9.0 and install the tsnecuda package by running conda install tsnecuda -c cannylab The Multicore CPU version can be installed by running pip install MulticoreTSNE Apply t-SNE to the input data INPUT: natural_embed: 2d numpy array with size [number of points, embedding length] n_jobs: perplexity: OUTPUT: natural_2d: 2d numpy array with size [number of points, 2] adversary_2d: 2d numpy array with size [number of points, 2] ''' X = natural_embed # CPU Sklearn # from sklearn.manifold import TSNE # tsne = TSNE(perplexity=perplexity, n_iter=5000, n_iter_without_progress=800, learning_rate=20, metric='cosine') # X_embedded = tsne.fit_transform(X) # CPU from MulticoreTSNE import MulticoreTSNE as TSNE tsne = TSNE(n_jobs=n_jobs, perplexity=perplexity, n_iter=5000, n_iter_without_progress=800, learning_rate=20, metric='cosine') X_embedded = tsne.fit_transform(X) # GPU # from tsnecuda import TSNE # X_embedded = TSNE(n_components=2, perplexity=30, learning_rate=10).fit_transform(X) return X_embedded
def ex3(wv): phrases = [ "szkoda", "strata", "uszczerbek", "szkoda majątkowa", # "uszczerbek na zdrowiu", "krzywda", "niesprawiedliwość", "nieszczęście" ] tsne = MulticoreTSNE(n_components=2, n_jobs=os.cpu_count()) tsne.fit(wv.vectors) vectors_embedded = tsne.fit_transform(wv[(sanitize(phrase) for phrase in phrases)]) fig, ax = plt.subplots() ax.scatter(vectors_embedded[:, 0], vectors_embedded[:, 1]) for i, phrase in enumerate(phrases): ax.annotate(phrase, (vectors_embedded[:, 0][i], vectors_embedded[:, 1][i])) plt.show()
def plot_tsne(experience=None, latent_states=None, rewards=None): if latent_states is None or rewards is None: latent_states = np.array([ list(rssm_state.prev_state.stoch) for rssm_state in experience['agent_infos'] ]) rewards = np.array(experience['reward']) np.random.seed(0) perm = np.random.permutation(10000) latent_states = latent_states[perm] rewards = rewards[perm] feature_cols = ['axis_' + str(i) for i in range(latent_states.shape[1])] df = DataFrame(latent_states, columns=feature_cols) df['y'] = rewards time_start = time() tsne = TSNE(n_components=2, verbose=1, perplexity=1000, n_iter=1000, n_jobs=16) tsne_results = tsne.fit_transform(df[feature_cols].values) print('t-SNE done! Time elapsed: {} seconds'.format(time() - time_start)) pickle.dump(tsne_results, open('tsne_results.pkl', 'wb')) df['tsne-2d-one'] = tsne_results[:, 0] df['tsne-2d-two'] = tsne_results[:, 1] sns.scatterplot(x="tsne-2d-one", y="tsne-2d-two", hue="y", palette=sns.color_palette("flare", as_cmap=True), data=df, alpha=0.6, s=5) plt.show()
def main(): parser = argparse.ArgumentParser(description='main function parser') parser.add_argument('--path', type=str, help='load file path', required=True) parser.add_argument('--dump_dir', type=str, help='dump directory', default=None) parser.add_argument('--size', type=int, default=1000, help='embedding vector size') args = parser.parse_args() embeddings, labels = load(args.path, args.size) output = args.path.split('/')[-1] # # UMAP # weights = umap.UMAP().fit_transform(embeddings) # show(weights, labels, 'umap.svg') # t-SNE tsne_model = TSNE(n_components=2) weights = tsne_model.fit_transform(embeddings) show(weights, labels, f'graph/{output}.svg')
def plot_distribution( epoch, train, # acc, path, data_x, # true_y, pred_y, learning_rate=100, n_jobs=-1): print("plotting image on " + path + "...") if (os.path.exists(path) == False): os.makedirs(path) tsne_model = TSNE(n_components=2, learning_rate=learning_rate, n_jobs=n_jobs) # pca_model = PCA(n_components=2) data_x = np.array(data_x) if (len(data_x.shape) > 2): data_temp = [] for data in data_x: data_temp.append(data.rehsape(-1)) data_x = np.array(data_temp) transformed = tsne_model.fit_transform(data_x) # transformed = pca_model.fit_transform(data_x) xs = transformed[:, 0] ys = transformed[:, 1] # draw_plot(xs, ys, train, epoch, true_y, os.path.join(path, "true_label")) draw_plot(xs, ys, train, epoch, pred_y, path)
def display_closestwords_tsnescatterplot(arg_path_to_model, word): model = word2vec.Word2Vec.load(arg_path_to_model) for i in range(len(word)): arr = np.empty((0, 300), dtype='f') word_labels = [word[i]] # get close words close_words = model.similar_by_word(word[i]) # add the vector for each of the closest words to the array arr = np.append(arr, np.array([model[word[i]]]), axis=0) for wrd_score in close_words: wrd_vector = model[wrd_score[0]] word_labels.append(wrd_score[0]) arr = np.append(arr, np.array([wrd_vector]), axis=0) # find tsne coords for 2 dimensions tsne = TSNE(n_components=2, random_state=0) np.set_printoptions(suppress=True) Y = tsne.fit_transform(arr) x_coords = Y[:, 0] y_coords = Y[:, 1] # display scatter plot plt.scatter(x_coords, y_coords) for label, x, y in zip(word_labels, x_coords, y_coords): plt.annotate(label, xy=(x, y), xytext=(0, 0), textcoords='offset points') # Zmiana mnoznika powoduje zmiane 'przyblizenia' wykresu (mniejszy mnoznik = wieksze przyblizenie) plt.xlim(x_coords.min()*1, x_coords.max()*1) plt.ylim(y_coords.min()*1, y_coords.max()*1) plt.show()
def main(): parser = argparse.ArgumentParser(description='main function parser') parser.add_argument('--path', type=str, help='load file path', required=True) parser.add_argument('--dump_dir', type=str, help='dump directory', default=None) parser.add_argument('--size', type=int, default=1000, help='embedding vector size') args = parser.parse_args() embeddings, labels = load(args.path) embeddings = np.array(embeddings) output = args.path.split('/')[-1] # # UMAP n_neighbors = [15] #, 35, 55, 75] min_dists = [0.1] #0.001, 0.01, 0.1] for min_dist in min_dists: for n_neighbor in n_neighbors: start = time.time() weights = umap.UMAP(n_neighbors=n_neighbor, min_dist=min_dist).fit_transform(embeddings) finish = time.time() print(f'time: {finish-start} s', flush=True) os.makedirs(f'graph/umap/{output}', exist_ok=True) show(weights, labels, f'graph/umap/{output}/min_dist:{min_dist}_neighbor:{n_neighbor}.svg') # t-SNE perplexities = [30] #10, 20, 30, 40, 50] for perplexity in perplexities: start = time.time() tsne_model = TSNE(n_components=2, perplexity=perplexity, n_jobs=10) weights = tsne_model.fit_transform(embeddings) finish = time.time() print(f'time: {finish-start} s', flush=True) os.makedirs(f'graph/tsne/{output}', exist_ok=True) show(weights, labels, f'graph/tsne/{output}/perplexity:{perplexity}.svg')
def __async_tsne_embedding(x): # learn manifold tsne = MulticoreTSNE(n_jobs=32, n_components=2) x = x.astype(np.float64) x_fitted = tsne.fit_transform(x) return x_fitted
def tsne_vis(netZ, rn, img_size, real_imgs): import matplotlib.pyplot as plt from MulticoreTSNE import MulticoreTSNE as TSNE Zs_real = netZ.emb.weight.data.detach().cpu().numpy() if not os.path.isdir("runs"): os.mkdir("runs") if not os.path.isdir("runs/ims_%s" % rn): os.mkdir("runs/ims_%s" % rn) tsne = TSNE(n_components=2, perplexity=30, n_jobs=20) n_samples = len(real_imgs) targets = np.asarray([netZ.idx2label[x] for x in range(n_samples)]) filtered_indices = targets[targets < 11] targets = targets[filtered_indices] Z_filter = Zs_real[filtered_indices] print(len(Z_filter)) reduced_data = tsne.fit_transform(np.asarray(Z_filter, dtype='float64')) plot_by_latent(reduced_data, real_imgs, indices=filtered_indices, img_size=img_size, rn=rn, title="G2") # print(indices) # y_for_plot = np.concatenate([a, moves_,y_labels]) # N = len(y_lables) # Y=Y[indices] # Y = bh_sne(np.asarray(s_t[0:N], dtype='float64')) # normalize min_1 = reduced_data[:, 0].min() max_1 = reduced_data[:, 0].max() min_2 = reduced_data[:, 1].min() max_2 = reduced_data[:, 1].max() Yn = reduced_data[:] Yn[:, 0] = (reduced_data[:, 0] - min_1) / (max_1 - min_1) Yn[:, 1] = (reduced_data[:, 1] - min_2) / (max_2 - min_2) ## plot distribution unique_classes = len(np.unique(targets)) y_labels_colors = targets plt.scatter(Yn[:, 1], -Yn[:, 0], c=y_labels_colors, cmap=plt.cm.get_cmap("tab20", unique_classes), s=10, edgecolors='k') mn = int(np.floor(y_labels_colors.min())) # colorbar min value mx = int(np.ceil(y_labels_colors.max())) # colorbar max value md = (mx - mn) // 2 cbar = plt.colorbar() cbar.set_ticks([mn, md, mx]) cbar.set_ticklabels([mn, md, mx]) # plt.scatter(Yn[Zs_real, 1], -Yn[Zs_real, 0], c="black", s=100, edgecolors='k', marker="x") # plt.scatter(Yn[indices[0], 1], -Yn[indices[0], 0], c="darkorange", s=100, edgecolors='k', marker="P", label="start") # plt.scatter(Yn[indices[1], 1], -Yn[indices[1], 0], c="yellow", s=100, edgecolors='k', marker="p", label="target") # plot_path(moves_knn, start_point_plt, targ_point_plt, title="RNN miniImagenet", more="") plt.savefig(f"runs/ims_{rn}/tsne_{rn}.jpg")
def compute_tsne(X, y, n_class=2, savepath=None, xlim=(-50,50), ylim=(-50,50), cls_lbl=['Benign','Tumor'], title=' ',PCADIM=50): tsne = TSNE(n_jobs=4, random_state=1337) #X = PCA(n_components=PCADIM).fit_transform(X) embs = tsne.fit_transform(X) plt.figure(figsize=(10,10)) for i in range(n_class): inds = np.where(y == i)[0] plt.scatter(embs[inds, 0], embs[inds, 1], color=colors[i], marker='*', s=30) if xlim: plt.xlim(xlim[0], xlim[1]) if ylim: plt.ylim(ylim[0], ylim[1]) plt.legend(cls_lbl) plt.grid(b=None) plt.title(title) if savepath: plt.savefig(savepath, dpi=300, bbox_inches='tight') plt.savefig(savepath.replace('.png','.pdf'), dpi=300, bbox_inches='tight') else: plt.show() plt.clf()
def tsne_image( features, images, img_res=64, res=4000, background_color=255, max_feature_size=-1, labels=None, point_radius=20, n_threads=0 ): """ Embeds images via tsne into a scatter plot. Parameters --------- features: numpy array Features to visualize images: list or numpy array Corresponding images to features. img_res: int Resolution to embed images at res: int Size of embedding image in pixels background_color: float or numpy array Background color value max_feature_size: int If input_feature_size > max_feature_size> 0, features are first reduced using PCA to the desired size. point_radius: int Size of the circle for the label image. n_threads: int Number of threads to use for t-SNE labels: List or numpy array if provided Label for each image for drawing circle image. """ features = np.asarray(features, dtype=np.float32) assert len(features.shape) == 2 print("Starting TSNE") s_time = time.time() if 0 < max_feature_size < features.shape[-1]: pca = PCA(n_components=max_feature_size) features = pca.fit_transform(features) if n_threads <= 0: n_threads = multiprocessing.cpu_count() model = TSNE(n_components=2, verbose=1, random_state=0, n_jobs=n_threads) f2d = model.fit_transform(features) print("TSNE done.", (time.time() - s_time)) print("Starting drawing.") x_coords = f2d[:, 0] y_coords = f2d[:, 1] return image_util.draw_images_at_locations(images, x_coords, y_coords, img_res, res, background_color, labels, point_radius)
def calc_tsne( X, n_jobs, n_components, perplexity, early_exaggeration, learning_rate, random_state, init="random", n_iter=1000, n_iter_early_exag=250, ): """ TODO: Typing """ tsne = TSNE( n_jobs=n_jobs, n_components=n_components, perplexity=perplexity, early_exaggeration=early_exaggeration, learning_rate=learning_rate, random_state=random_state, verbose=1, init=init, n_iter=n_iter, n_iter_early_exag=n_iter_early_exag, ) X_tsne = tsne.fit_transform(X) logger.info("Final error = {}".format(tsne.kl_divergence_)) return X_tsne
def tsne_reduction(samples, perplexity, data=None, n_components=2, l_r=200, dim=2, ex=12, iterations=5000, verbosity=0): if (samples is None) and (data is not None): samples = data[:, :-1] targets = data[:, -1] # tsne = manifold.TSNE(n_components = dim, init='pca', learning_rate = l_r, # perplexity=perplexity, early_exaggeration = ex, # n_iter = iterations, random_state=data_handling.RANDOM_SEED, # verbose = verbosity) tsne = TSNE(n_components=dim, n_jobs=-1, learning_rate=l_r, perplexity=perplexity, early_exaggeration=ex, n_iter=iterations, random_state=data_handling.RANDOM_SEED, verbose=verbosity) reduced_samples = tsne.fit_transform(samples) return reduced_samples, tsne
def get_data(n_cmd, n_spk, only_missed=False): if only_missed: # most popular MIS-CLASSIFIED command based on utterances count top_cmd = itemfreq(y_command[y_missed.astype('int32')]) top_spk = itemfreq(y_speaker[y_missed.astype('int32')]) else: top_spk = itemfreq(y_speaker) top_cmd = itemfreq(y_command) top_cmd = top_cmd[np.argsort(top_cmd[:, 1])][::-1] top_cmd = top_cmd[:, 0] # most speaker command based on utterances count top_spk = top_spk[np.argsort(top_spk[:, 1].astype('int32'))][::-1] top_spk = top_spk[:, 0] spk = top_spk[:n_spk] cmd = top_cmd[:n_cmd] ids = get_indices(speaker_set=spk, command_set=cmd) if only_missed: ids = np.array([i for i in ids if i in y_missed], dtype='int32') y_cmd = y_command[ids] y_spk = y_speaker[ids] z_org = Z_original[ids] z_max = Z_maximize[ids] tsne = TSNE(random_state=SEED) t = tsne.fit_transform(np.concatenate((z_org, z_max), axis=0)) t_org = t[:z_org.shape[0]] t_max = t[z_org.shape[0]:] return t_org, t_max, y_cmd, y_spk
def __init__(self, container, perplexity=30.0, learning_rate=120.0, n_componenets=2, n_jobs=4, n_iter=1000, verbose=1000): """ Args: container: EmbeddingContainer """ self._container = container self._engine = MulticoreTSNE( perplexity=perplexity, learning_rate=learning_rate, n_components=n_componenets, n_jobs=n_jobs, n_iter=n_iter, verbose=verbose) self._results = None self._ids = None self._label_ids = None self._label_names = None
def plot_conti_code_tsne(): data = pickle.load( open( "/home/patrick/repositories/hyperspectral_phenotyping_gan/experiments_{}/generated_code_noise{}_disc{}_conti{}_epoch{}.p" .format(opt.dataset, opt.n_noise, opt.n_dis, opt.n_conti, opt.epoch), "rb")) labels = np.array(data["y"]).squeeze() labels_unique = np.unique(labels) code = np.array(data["z"]).copy() z = np.array(data["z"]).copy() # print(code[0]) # code = code[:, -5:-2] code = code[:, -2:] # print(code[0]) # 1 / 0 signatures = np.array(data["x"]) tsne = TSNE(n_jobs=26, n_components=2, learning_rate=100) Y = tsne.fit_transform(code) colors = ["red", "green", "blue"] for idx, label in enumerate(labels_unique): data_tsne = Y[labels == label] plt.scatter(data_tsne[:, 0], data_tsne[:, 1], c=colors[idx], alpha=0.3, label=str(label)) plt.legend() plt.show()
def run(self, word_embedding): """ Runs t-SNE model with specified parameters and data. Returns result. :param word_embedding: Word embedding; expected to be only as long as self.num_words. :return: """ self.word_embedding = word_embedding word_vector_data = numpy.stack(word_embedding['values'].values, axis=0) # Initialize t-SNE instance. tsne = MulticoreTSNE(n_components=self.num_dimensions, perplexity=self.perplexity, early_exaggeration=self.early_exaggeration, learning_rate=self.learning_rate, n_iter=self.num_iterations, min_grad_norm=self.min_grad_norm, random_state=self.random_state, angle=self.angle, metric=self.metric, init=self.init_method, n_jobs=2) # Train TSNE on gensim's model. # Note: Since MulticoreTSNE doesn't support metrics other than Euclidean, we normalize our vectors to an unit # norm so that the Euclidean distance yields results/ordering more similar to the cosine similarity. self.tsne_results = tsne.fit_transform( sklearn.preprocessing.normalize(word_vector_data, axis=1, norm='l2')) return self.tsne_results
def train(self, parameters): tsne = TSNE(**parameters) tsne_outputs = tsne.fit_transform(self.x_train) utils.save_data_to_pkl(tsne_outputs, tsne_outputs_path + 'tsne_outputs.p')
def run_tSNE(data, n_pc, n_dim, p, verbose = 3, random_state = 0, n_jobs = 20): pca = PCA(n_components = n_pc) rateb_reduce = pca.fit_transform(data) ndim = n_dim tsne = MulticoreTSNE(perplexity = p, verbose = verbose, random_state = random_state, n_jobs = n_jobs) y = tsne.fit_transform(rateb_reduce[:, :n_dim]) return y
def visualize(self, indices = [], center_num = 0, ref_labels = [], use_colors = True): # If indices are not given if len(indices) ==0: indices = np.arange(len(self.embeddings_)) # If center number is not given if center_num == 0: center_num = self.opt_speaker_num_ # If reference labels are used if len(ref_labels) != 0: speaker_labels = ref_labels # Allow visualization of different center number configurations else: # Get speaker labels spkmeans = SphericalKMeans(n_clusters=len(self.centers_[center_num]), init = self.centers_[center_num], max_iter=1, n_init=1, n_jobs=1).fit(self.embeddings_[indices]) speaker_labels = spkmeans.labels_+1 if len(self.speaker_labels_) == 0: raise RuntimeError("Clustering not performed.") # Compute TSNE only once if len(self.emb_2d_) == 0: print("Computing TSNE transform...") tsne = TSNE(n_jobs=4) self.emb_2d_ = tsne.fit_transform(self.embeddings_) # Visualize emb_2d = self.emb_2d_[indices] speaker_labels = speaker_labels.astype(np.int) speakers = np.unique(speaker_labels) colors=cm.rainbow(np.linspace(0,1,len(speakers))) plt.figure(figsize=(7,7)) for speaker in speakers: speak_ind = np.where(speaker_labels == speaker)[0] x, y = np.transpose(emb_2d[speak_ind]) if use_colors == True: plt.scatter(x, y, c="k", edgecolors=colors[speaker-1], s=2, label=speaker) else: plt.scatter(x, y, c="k", edgecolors="k", s=2, label=speaker) plt.legend(title = "Speakers", prop={'size': 10}) if len(ref_labels) == 0: plt.title("Predicted speaker clusters") else: plt.title("Reference speaker clusters") plt.show()
def get_2D_vector(vectors): """ Sử dụng giải thuật TSNE để ánh xạ vectors nhiều chiều về 2 chiều http://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html https://distill.pub/2016/misread-tsne/ """ tsne = TSNE(perplexity=25, n_components=2, init='random', n_iter=1000, n_jobs=-1) return tsne.fit_transform(vectors)
def run_tsne(path): ids, X = load_matrix(path) tsne = TSNE(n_jobs=8) # tsne = TSNE(metric='cosine') Y = tsne.fit_transform(X) for i, row in enumerate(Y): print(' '.join([ids[i], str(row[0]), str(row[1])]))
def generate_tsne(path, data, label): print '\nGenerating t-SNE...' tsne = TSNE(n_jobs=-1) Y = tsne.fit_transform(data) plt.figure(figsize=(20, 20)) plt.scatter(Y[:, 0], Y[:, 1], c=label, s=100, cmap='Set1', alpha=0.2) plt.colorbar() plt.savefig(path)