def get_2d_coordinates_tsne(multinet, output_format="json", verbose=True): embedding = multinet.embedding X = embedding[0] indices = embedding[1] if verbose: multinet.monitor("Doing the TSNE reduction to 2 dimensions!") if parallel_tsne: X_embedded = TSNE(n_components=2, n_jobs=mp.cpu_count()).fit_transform(X) else: X_embedded = TSNE(n_components=2).fit_transform(X) dfr = pd.DataFrame(X_embedded, columns=['dim1', 'dim2']) dfr['node_names'] = [n for n in multinet.get_nodes()] dfr['node_codes'] = indices if output_format == "json": return dfr.to_json(orient='records') elif output_format == "dataframe": # pure pandas dataframe return dfr elif output_format == "pos_dict": output_dict = {} for index, row in dfr.iterrows(): output_dict[row['node_names']] = (row['dim1'], row['dim2']) return output_dict else: return None
def compare_embeddings_in_group(config, data, n_samples, point_size=10, log=True, images_path=None): sample_idx = random.sample(range(len(data)), n_samples) raw_obs, raw_action, raw_reward = data[0] raw_action = torch.repeat_interleave(raw_action[1:], 2, dim=0) action_dim = raw_action.size(1) initial_episode_size = raw_action.size(0) actual_episode_size = initial_episode_size - (initial_episode_size % config.traj_len) rewards_ak = [] embeddings_ak = [] for k in sample_idx: raw_obs, raw_action, raw_reward = data[k] raw_embeddings = torch.repeat_interleave(raw_action[1:], 2, dim=0)[:actual_episode_size] \ .reshape([actual_episode_size // config.traj_len, config.traj_len * action_dim]) rewards = torch.repeat_interleave(raw_reward[1:]/2, 2, dim=0)[:actual_episode_size] \ .reshape([actual_episode_size // config.traj_len, config.traj_len]).sum(axis=1) for idx, vector in enumerate(raw_embeddings): embeddings_ak.append(vector.numpy()) rewards_ak.append(rewards[idx]) tsne_ak_2d = TSNE(perplexity=30, n_components=2, init='random', n_iter=3500, random_state=32, n_jobs=8) embeddings_ak_2d = tsne_ak_2d.fit_transform(np.array(embeddings_ak)) dyne_emb_ak = [] for k in sample_idx: mu, logvar, _ = data.transform_episode(k) for idx, vector in enumerate(mu): dyne_emb_ak.append(vector.numpy()) tsne_dyne_ak_2d = TSNE(perplexity=30, n_components=2, init='random', n_iter=3500, random_state=32, n_jobs=8) embeddings_dyne_ak_2d = tsne_dyne_ak_2d.fit_transform(np.array(dyne_emb_ak)) fig, axes = plt.subplots(2, 2, figsize=(20, 20), constrained_layout=True) tsne_plot_2d('raw actions by time'.format(config.env), embeddings_ak_2d, color_style='time', rewards=rewards_ak, size=point_size, log=False, ax=axes[0][0], episodes_num=n_samples) tsne_plot_2d('dyne actions by time', embeddings_dyne_ak_2d, color_style='time', rewards=rewards_ak, size=point_size, log=False, ax=axes[0][1], episodes_num=n_samples) tsne_plot_2d('raw actions by rewards', embeddings_ak_2d, color_style='rewards', rewards=rewards_ak, size=point_size, log=False, ax=axes[1][0], episodes_num=n_samples) tsne_plot_2d('dyne actions by rewards', embeddings_dyne_ak_2d, color_style='rewards', rewards=rewards_ak, size=point_size, log=False, ax=axes[1][1], episodes_num=n_samples) fig.suptitle("{}_DynE-{}".format(config.env, config.traj_len), fontsize=16) fig.savefig(images_path / "{}_emb_comparison_{}_samples.png".format(config.env, config.n_samples), format='png', dpi=150, bbox_inches='tight') if log: wandb.log({'Embeddings Comparison': wandb.Image(fig)})
def create_compute_tsne_components_function(input_dim, target_dim, save_folder): # Get t-SNE function tsne = TSNE(n_jobs=4) if False: tsne = TSNE(n_jobs=number_of_jobs, n_components=target_dim, perplexity=perplexity, early_exaggeration=early_exaggeration, learning_rate=learning_rate, n_iter=n_iter, n_iter_without_progress=n_iter_without_progress, min_grad_norm=min_grad_norm, metric=metric, init=init, verbose=verbose, random_state=random_state, method=method, angle=angle) # Create the function which we are interested in def compute_tsne_components(in_feature_file_path, out_feature_file_path): xprint("Processing '{}'...".format(in_feature_file_path)) with open(in_feature_file_path, "rb") as in_feature_file: # Read features from file features = np.reshape( np.fromfile(in_feature_file, dtype=feature_dtype), (-1, input_dim)) # Compute t-SNE components components = tsne.fit_transform(features) xprint("features.shape:", features.shape) xprint("components.shape:", components.shape) if procuce_output_files: xprint("Creating '{}'...".format(out_feature_file_path)) with open(out_feature_file_path, "wb") as out_feature_file: # Write standardized features to file components.tofile(out_feature_file) if produce_plots: create_plots(components, title=os.path.basename(in_feature_file_path), total_variance=np.sum(np.var(features, axis=0)), save_folder=save_folder) return compute_tsne_components
def main(): parser = argparse.ArgumentParser(description='main function parser') parser.add_argument('--path', type=str, help='load file path', required=True) parser.add_argument('--dump_dir', type=str, help='dump directory', default=None) parser.add_argument('--size', type=int, default=1000, help='embedding vector size') args = parser.parse_args() embeddings, labels = load(args.path) embeddings = np.array(embeddings) output = args.path.split('/')[-1] # # UMAP n_neighbors = [15] #, 35, 55, 75] min_dists = [0.1] #0.001, 0.01, 0.1] for min_dist in min_dists: for n_neighbor in n_neighbors: start = time.time() weights = umap.UMAP(n_neighbors=n_neighbor, min_dist=min_dist).fit_transform(embeddings) finish = time.time() print(f'time: {finish-start} s', flush=True) os.makedirs(f'graph/umap/{output}', exist_ok=True) show(weights, labels, f'graph/umap/{output}/min_dist:{min_dist}_neighbor:{n_neighbor}.svg') # t-SNE perplexities = [30] #10, 20, 30, 40, 50] for perplexity in perplexities: start = time.time() tsne_model = TSNE(n_components=2, perplexity=perplexity, n_jobs=10) weights = tsne_model.fit_transform(embeddings) finish = time.time() print(f'time: {finish-start} s', flush=True) os.makedirs(f'graph/tsne/{output}', exist_ok=True) show(weights, labels, f'graph/tsne/{output}/perplexity:{perplexity}.svg')
def main(path): embs = pool_sentence_embs(path) print("Dimension", embs.shape) #number sentences X BERT hidden dimension (768) df = pd.read_csv('master_df_ALL.csv', encoding='utf - 8', index_col=False) filter_name = 'Coreference' target = df[filter_name] #df['Sentences'].to_csv("master_ALL", header=None, index=False) #target = [1., 0., 0., 1., 1., 1., 0., 1., 0., 1., 1., 0., 0., 1., 0., 0., 0., 0., 0., 1.] + [0. for i in range(22)] print(len(target)) embeddings = TSNE(n_jobs=4, random_state=1).fit_transform( embs) #t-SNE reduces 768 dimension to 2D or 3D vis_x = embeddings[:, 0] vis_y = embeddings[:, 1] plt.scatter(vis_x, vis_y, c=target, cmap=ListedColormap(["blue", "red"]), marker='.', s=50) plt.title(filter_name + " filter (red=passed filter, blue=did not pass filter)") # plt.colorbar(ticks=range(10)) # plt.clim(-0.5, 9.5) plt.ioff() #plt.show() plt.savefig(filter_name)
def run_tSNE(natural_embed, n_jobs, perplexity): ''' The GPU version requires CUDA 9.0 and install the tsnecuda package by running conda install tsnecuda -c cannylab The Multicore CPU version can be installed by running pip install MulticoreTSNE Apply t-SNE to the input data INPUT: natural_embed: 2d numpy array with size [number of points, embedding length] n_jobs: perplexity: OUTPUT: natural_2d: 2d numpy array with size [number of points, 2] adversary_2d: 2d numpy array with size [number of points, 2] ''' X = natural_embed # CPU Sklearn # from sklearn.manifold import TSNE # tsne = TSNE(perplexity=perplexity, n_iter=5000, n_iter_without_progress=800, learning_rate=20, metric='cosine') # X_embedded = tsne.fit_transform(X) # CPU from MulticoreTSNE import MulticoreTSNE as TSNE tsne = TSNE(n_jobs=n_jobs, perplexity=perplexity, n_iter=5000, n_iter_without_progress=800, learning_rate=20, metric='cosine') X_embedded = tsne.fit_transform(X) # GPU # from tsnecuda import TSNE # X_embedded = TSNE(n_components=2, perplexity=30, learning_rate=10).fit_transform(X) return X_embedded
def classifier_choice(method='tsne', neighbors=30, dimensions=2): if method in "tsne": return TSNE(n_components=dimensions, perplexity=30, verbose=1) elif method in "pca": return decomposition.TruncatedSVD(n_components=dimensions) elif method in "isomap": return manifold.Isomap(n_neighbors=neighbors, n_components=dimensions) elif method in "lle": return manifold.LocallyLinearEmbedding(n_neighbors=neighbors, n_components=dimensions, method='standard') elif method in "mlle": return manifold.LocallyLinearEmbedding(n_neighbors=neighbors, n_components=dimensions, method='modified') elif method in "hlle": return manifold.LocallyLinearEmbedding(n_neighbors=neighbors, n_components=dimensions, method='hessian') elif method in "ltsa": return manifold.LocallyLinearEmbedding(n_neighbors=neighbors, n_components=dimensions, method='ltsa') elif method in "mds": return manifold.MDS(n_components=dimensions, n_init=1, max_iter=100) elif method in "trees": trees = ensemble.RandomTreesEmbedding(n_estimators=200, max_depth=5) pca = decomposition.TruncatedSVD(n_components=dimensions) return Pipeline([('Random Tree Embedder', trees), ('PCA', pca)]) elif method in "spectral": return manifold.SpectralEmbedding(n_components=dimensions, eigen_solver="arpack") else: print('Please use valid method')
def get_data(n_cmd, n_spk, only_missed=False): if only_missed: # most popular MIS-CLASSIFIED command based on utterances count top_cmd = itemfreq(y_command[y_missed.astype('int32')]) top_spk = itemfreq(y_speaker[y_missed.astype('int32')]) else: top_spk = itemfreq(y_speaker) top_cmd = itemfreq(y_command) top_cmd = top_cmd[np.argsort(top_cmd[:, 1])][::-1] top_cmd = top_cmd[:, 0] # most speaker command based on utterances count top_spk = top_spk[np.argsort(top_spk[:, 1].astype('int32'))][::-1] top_spk = top_spk[:, 0] spk = top_spk[:n_spk] cmd = top_cmd[:n_cmd] ids = get_indices(speaker_set=spk, command_set=cmd) if only_missed: ids = np.array([i for i in ids if i in y_missed], dtype='int32') y_cmd = y_command[ids] y_spk = y_speaker[ids] z_org = Z_original[ids] z_max = Z_maximize[ids] tsne = TSNE(random_state=SEED) t = tsne.fit_transform(np.concatenate((z_org, z_max), axis=0)) t_org = t[:z_org.shape[0]] t_max = t[z_org.shape[0]:] return t_org, t_max, y_cmd, y_spk
def tsne_reduction(samples, perplexity, data=None, n_components=2, l_r=200, dim=2, ex=12, iterations=5000, verbosity=0): if (samples is None) and (data is not None): samples = data[:, :-1] targets = data[:, -1] # tsne = manifold.TSNE(n_components = dim, init='pca', learning_rate = l_r, # perplexity=perplexity, early_exaggeration = ex, # n_iter = iterations, random_state=data_handling.RANDOM_SEED, # verbose = verbosity) tsne = TSNE(n_components=dim, n_jobs=-1, learning_rate=l_r, perplexity=perplexity, early_exaggeration=ex, n_iter=iterations, random_state=data_handling.RANDOM_SEED, verbose=verbosity) reduced_samples = tsne.fit_transform(samples) return reduced_samples, tsne
def calcTSNEMulti(data, iterations, perplexity, learning_rate): tsne = TSNE(n_jobs=4, perplexity=perplexity, n_iter=iterations, learning_rate=learning_rate) Y = tsne.fit_transform(data) return data.assign(x=Y[:, 0], y=Y[:, 1])
def train(self, parameters): tsne = TSNE(**parameters) tsne_outputs = tsne.fit_transform(self.x_train) utils.save_data_to_pkl(tsne_outputs, tsne_outputs_path + 'tsne_outputs.p')
def main(feats_path): with open(feats_path, 'rb') as handle: unpickler = pickle.Unpickler(handle) labels = unpickler.load() labels = { name: vector for name, vector in labels.items() if vector is not None } features = np.asarray(list(labels.values())) print('[INFO] Conducting t-SNE on ' + feats_path) tsne = TSNE(metric='braycurtis', verbose=1, n_iter=5000, random_state=42, n_jobs=-1) projection = tsne.fit_transform(features) # save reduced vectors base = path.basename(feats_path) name = path.splitext(base)[0] output = name + '_tsne.pickle' print('[INFO] Saving reduced vectors to ' + output) with open(output, 'wb') as handle: pickle.dump(projection, handle)
def dim_red_plot(plt_type, emb, vocab, output_dir, n_components=2, random_state=42): print(f"-- Start {plt_type} --") if plt_type == 'tsne': new_values = TSNE(n_components=n_components, random_state=random_state, n_jobs=10, verbose=2).fit_transform(emb) x = [] y = [] for value in new_values: x.append(value[0]) y.append(value[1]) elif plt_type == 'umap': new_values = umap.UMAP(n_components=n_components, random_state=random_state).fit_transform(emb) x, y = new_values[:, 0], new_values[:, 1] print("-- Start ploting --") plt.figure(figsize=(16, 16)) plt.scatter(x, y) # for i in range(len(x)): # plt.annotate(vocab[i], xy=(x[i], y[i]), xytext=( # 5, 2), textcoords="offset points", ha="right", va="bottom") plt.savefig(os.path.join(output_dir, f'viz/emb_{plt_type}.png'))
def draw(x, y): from matplotlib.colors import ListedColormap from MulticoreTSNE import MulticoreTSNE as TSNE print("TSNE: fitting start...") tsne = TSNE(2, n_jobs=4, perplexity=30) Y = tsne.fit_transform(x) # matplotlib_axes_logger.setLevel('ERROR') labels = [ 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9', 'c10', 'open' ] id_to_label = {i: label for i, label in enumerate(labels)} y_true = pd.Series(y) plt.style.use('ggplot') n_class = y_true.unique().shape[0] colors = ('gray', 'lightgreen', 'plum', 'DarkMagenta', 'SkyBlue', 'PaleTurquoise', 'DeepPink', 'Gold', 'Orange', 'Brown', 'DarkKhaki') fig, ax = plt.subplots(figsize=(9, 6), ) la = [i for i in range(n_class)] la = sorted(la, reverse=True) cmap = ListedColormap(colors) for idx, label in enumerate(la): ix = y_true[y_true == label].index x = Y[:, 0][ix] y = Y[:, 1][ix] ax.scatter(x, y, c=cmap(idx), label=id_to_label[label], alpha=0.5) # Shrink current axis by 20% ax.set_title('proto_loss') box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
def generate2dftEmb(): global w2id, w, i, word, tsne, post_2d #### # Loading glove embeddings from pickle file glove_new.pickle and writing into embedding map and a text file which # can be used to gensim model #### file = open(finetuned_path, 'rb') embedding_map = pickle.load(file) # In[470]: ########## ##Converting glove embeddings to numpy matrix where each row contains embedding of a word. ##Adding words to "word to id" and "id to word" maps ########## w2id = {} id2w = {} w = np.zeros((len(embedding_map.keys()), 300)) for i, word in enumerate(embedding_map.keys()): w2id[word] = i id2w[i] = word w[i] = embedding_map[word] # In[6]: ###### ##Applying t-SNE to reduce the dimension of the embedding from 300D to 2D. ###### tsne = TSNE(n_jobs=12) post_2d = tsne.fit_transform(w) # In[486]: return post_2d, w2id, w
def generate2dpre(): global word, i, pre_w2id, tsne, pre_2d pre_vocab = [] pre = open(pretrained_path, 'r') for line in pre: embeds = line.rstrip().split(" ") word = embeds[0] pre_vocab.append(word) # In[37]: pre_w = np.zeros((len(pre_vocab), 300)) for i, line in enumerate(pre): embeds = line.rstrip().split(" ") word = embeds[0] pre_w[i, :] = embeds[1:] # In[ ]: ########## ##Converting pre glove embeddings to numpy matrix where each row contains embedding of a word. ##Adding words to "word to id" and "id to word" maps ########## pre_w2id = {} for i in range(len(pre_vocab)): pre_w2id[pre_vocab[i]] = i # In[39]: ###### ##Applying t-SNE to reduce the dimension of the embedding from 300D to 2D. ###### tsne = TSNE(n_jobs=12) pre_2d = tsne.fit_transform(pre_w) return pre_2d, pre_w2id, pre_w
def plot_distribution( epoch, train, # acc, path, data_x, # true_y, pred_y, learning_rate=100, n_jobs=-1): print("plotting image on " + path + "...") if (os.path.exists(path) == False): os.makedirs(path) tsne_model = TSNE(n_components=2, learning_rate=learning_rate, n_jobs=n_jobs) # pca_model = PCA(n_components=2) data_x = np.array(data_x) if (len(data_x.shape) > 2): data_temp = [] for data in data_x: data_temp.append(data.rehsape(-1)) data_x = np.array(data_temp) transformed = tsne_model.fit_transform(data_x) # transformed = pca_model.fit_transform(data_x) xs = transformed[:, 0] ys = transformed[:, 1] # draw_plot(xs, ys, train, epoch, true_y, os.path.join(path, "true_label")) draw_plot(xs, ys, train, epoch, pred_y, path)
def tsne_executor(X, y, logger, path_logs): check_input_type( ['epi'], "t-SNE experiment work just with epigenetic data, {} found".format( config['general']['input_type'])) cell_lines = config['general']['cell_lines'] tasks_dict = config['general']['tasks'] results = {} for t in tasks_dict: task_name, X_filtered, y_filtered = filter_labels(X, y, t) logger.debug("TASK: {}".format(task_name)) cpus = multiprocessing.cpu_count( ) // 2 # we use just half of avaible cpus to not overload the machine logger.debug("Using {} cpus".format(cpus)) for cl, data, labels in zip(cell_lines, X_filtered, y_filtered): logger.debug("Computing t-SNE for {}".format(cl)) tsne = TSNE(perplexity=config['tsne']['perplexity'], n_jobs=cpus) # TODO: add parameters tsne_results = tsne.fit_transform(data) assert len(tsne_results) == len(labels) tsne_results = np.c_[ tsne_results, labels] # to save the labels with the tsne results results["{}_{}".format(task_name, cl)] = tsne_results save_tsne(path_logs, "tsne_results", results) if config['tsne']['save_plots']: plot_tsne(results, path_logs, "tsne_plot")
def plot_tsne(experience=None, latent_states=None, rewards=None): if latent_states is None or rewards is None: latent_states = np.array([ list(rssm_state.prev_state.stoch) for rssm_state in experience['agent_infos'] ]) rewards = np.array(experience['reward']) np.random.seed(0) perm = np.random.permutation(10000) latent_states = latent_states[perm] rewards = rewards[perm] feature_cols = ['axis_' + str(i) for i in range(latent_states.shape[1])] df = DataFrame(latent_states, columns=feature_cols) df['y'] = rewards time_start = time() tsne = TSNE(n_components=2, verbose=1, perplexity=1000, n_iter=1000, n_jobs=16) tsne_results = tsne.fit_transform(df[feature_cols].values) print('t-SNE done! Time elapsed: {} seconds'.format(time() - time_start)) pickle.dump(tsne_results, open('tsne_results.pkl', 'wb')) df['tsne-2d-one'] = tsne_results[:, 0] df['tsne-2d-two'] = tsne_results[:, 1] sns.scatterplot(x="tsne-2d-one", y="tsne-2d-two", hue="y", palette=sns.color_palette("flare", as_cmap=True), data=df, alpha=0.6, s=5) plt.show()
def dimensionality_reduction(X, algorithm="PCA"): """Reduce the dimensionality of the AISNPs :param X: One-hot encoded 1kG AISNPs. :type X: pandas DataFrame :param algorithm: The type of dimensionality reduction to perform. One of {PCA, UMAP, t-SNE} :type algorithm: str :returns: The transformed X DataFrame, reduced to 3 components by <algorithm>, and the dimensionality reduction Transformer object. """ n_components = 3 if algorithm == "PCA": reducer = PCA(n_components=n_components) elif algorithm == "t-SNE": reducer = TSNE(n_components=n_components, n_jobs=4) elif algorithm == "UMAP": reducer = umap.UMAP(n_components=n_components, min_dist=0.2, metric="dice", random_state=42) else: return None, None X_reduced = reducer.fit_transform(X.values) return pd.DataFrame(X_reduced, columns=["x", "y", "z"], index=X.index), reducer
def decompose(dimred, dim, nneigh): if dimred == 'MDS': # slowest! embedding = MDS(n_components=dim, n_init=__inits, max_iter=__iters, n_jobs=-1, dissimilarity=__dis) elif dimred == 'ISOMAP': # slow embedding = Isomap(n_neighbors=nneigh, n_components=dim, n_jobs=-1) elif dimred == 'LLE': # slow-acceptable embedding = LocallyLinearEmbedding(n_neighbors=nneigh, n_components=dim, n_jobs=-1) elif dimred == 'TSNE': # acceptable embedding = TSNE(n_components=dim, n_iter=__iters, metric='precomputed', learning_rate=__lrate, perplexity=__perplexity) elif dimred == 'UMAP': # fast # embedding = umap.UMAP(n_neighbors=nneigh, n_components=dim, metric=__dis, min_dist=0.1) embedding = umap.UMAP(n_neighbors=nneigh, n_components=dim, min_dist=0.1) elif dimred == 'PCA': # fastest! embedding = PCA(n_components=dim) else: raise ValueError('dimension reduction method not recognized') positions = embedding.fit_transform(An) return positions
def reduce_dim(df, algorithm='pca'): """Reduce the dimensionality of the 55 AISNPs :param X: One-hot encoded 1kG 55 AISNPs. :type X: pandas DataFrame :param algorithm: The type of dimensionality reduction to perform. One of {pca, umap, tsne} :type algorithm: str :returns: The transformed X DataFrame, reduced to 3 components by <algorithm>. """ ncols = len(df.columns) ohe = OneHotEncoder(categories=[range(4)] * ncols, sparse=False) n_components = 3 X = ohe.fit_transform(df.values) if algorithm == 'pca': X_red = PCA(n_components=n_components).fit_transform(X) elif algorithm == 'tsne': # TSNE, Barnes-Hut have dim <= 3 if n_components > 3: print( 'The Barnes-Hut method requires the dimensionaility to be <= 3' ) return None else: X_red = TSNE(n_components=n_components, n_jobs=4).fit_transform(X) elif algorithm == 'umap': X_red = umap.UMAP(n_components=n_components).fit_transform(X) else: return None return pd.DataFrame(X_red, columns=['x', 'y', 'z'], index=df.index)
def display_closestwords_tsnescatterplot(arg_path_to_model, word): model = word2vec.Word2Vec.load(arg_path_to_model) for i in range(len(word)): arr = np.empty((0, 300), dtype='f') word_labels = [word[i]] # get close words close_words = model.similar_by_word(word[i]) # add the vector for each of the closest words to the array arr = np.append(arr, np.array([model[word[i]]]), axis=0) for wrd_score in close_words: wrd_vector = model[wrd_score[0]] word_labels.append(wrd_score[0]) arr = np.append(arr, np.array([wrd_vector]), axis=0) # find tsne coords for 2 dimensions tsne = TSNE(n_components=2, random_state=0) np.set_printoptions(suppress=True) Y = tsne.fit_transform(arr) x_coords = Y[:, 0] y_coords = Y[:, 1] # display scatter plot plt.scatter(x_coords, y_coords) for label, x, y in zip(word_labels, x_coords, y_coords): plt.annotate(label, xy=(x, y), xytext=(0, 0), textcoords='offset points') # Zmiana mnoznika powoduje zmiane 'przyblizenia' wykresu (mniejszy mnoznik = wieksze przyblizenie) plt.xlim(x_coords.min()*1, x_coords.max()*1) plt.ylim(y_coords.min()*1, y_coords.max()*1) plt.show()
def tsne_image( features, images, img_res=64, res=4000, background_color=255, max_feature_size=-1, labels=None, point_radius=20, n_threads=0 ): """ Embeds images via tsne into a scatter plot. Parameters --------- features: numpy array Features to visualize images: list or numpy array Corresponding images to features. img_res: int Resolution to embed images at res: int Size of embedding image in pixels background_color: float or numpy array Background color value max_feature_size: int If input_feature_size > max_feature_size> 0, features are first reduced using PCA to the desired size. point_radius: int Size of the circle for the label image. n_threads: int Number of threads to use for t-SNE labels: List or numpy array if provided Label for each image for drawing circle image. """ features = np.asarray(features, dtype=np.float32) assert len(features.shape) == 2 print("Starting TSNE") s_time = time.time() if 0 < max_feature_size < features.shape[-1]: pca = PCA(n_components=max_feature_size) features = pca.fit_transform(features) if n_threads <= 0: n_threads = multiprocessing.cpu_count() model = TSNE(n_components=2, verbose=1, random_state=0, n_jobs=n_threads) f2d = model.fit_transform(features) print("TSNE done.", (time.time() - s_time)) print("Starting drawing.") x_coords = f2d[:, 0] y_coords = f2d[:, 1] return image_util.draw_images_at_locations(images, x_coords, y_coords, img_res, res, background_color, labels, point_radius)
def compute_tsne(X, y, n_class=2, savepath=None, xlim=(-50,50), ylim=(-50,50), cls_lbl=['Benign','Tumor'], title=' ',PCADIM=50): tsne = TSNE(n_jobs=4, random_state=1337) #X = PCA(n_components=PCADIM).fit_transform(X) embs = tsne.fit_transform(X) plt.figure(figsize=(10,10)) for i in range(n_class): inds = np.where(y == i)[0] plt.scatter(embs[inds, 0], embs[inds, 1], color=colors[i], marker='*', s=30) if xlim: plt.xlim(xlim[0], xlim[1]) if ylim: plt.ylim(ylim[0], ylim[1]) plt.legend(cls_lbl) plt.grid(b=None) plt.title(title) if savepath: plt.savefig(savepath, dpi=300, bbox_inches='tight') plt.savefig(savepath.replace('.png','.pdf'), dpi=300, bbox_inches='tight') else: plt.show() plt.clf()
def tsne_main(args): verbose_print(args, f'Loaded niche labels from {args.labels}') labels = np.load(args.labels) verbose_print(args, f'Running t-SNE based on {args.proximity}') proximities = np.load(args.proximity) x_tsne = TSNE(n_components=2, n_jobs=-1, perplexity=800, learning_rate=100).fit_transform(proximities) if args.plot: # Show tSNE for i in range(4): idx = np.where(labels == i)[0] if len(idx) == 0: continue plt.plot(x_tsne[idx, 0], x_tsne[idx, 1], '.', label=f'Cluster {i}') plt.legend() plt.show() # Save the t-SNE coordinates np.save(args.tsne, x_tsne) verbose_print(args, f't-SNE coordinates saved to {args.tsne}') verbose_print(args, f'Niche clustering done!')
def calc_tsne( X, n_jobs, n_components, perplexity, early_exaggeration, learning_rate, random_state, init="random", n_iter=1000, n_iter_early_exag=250, ): """ TODO: Typing """ tsne = TSNE( n_jobs=n_jobs, n_components=n_components, perplexity=perplexity, early_exaggeration=early_exaggeration, learning_rate=learning_rate, random_state=random_state, verbose=1, init=init, n_iter=n_iter, n_iter_early_exag=n_iter_early_exag, ) X_tsne = tsne.fit_transform(X) logger.info("Final error = {}".format(tsne.kl_divergence_)) return X_tsne
def plot_conti_code_tsne(): data = pickle.load( open( "/home/patrick/repositories/hyperspectral_phenotyping_gan/experiments_{}/generated_code_noise{}_disc{}_conti{}_epoch{}.p" .format(opt.dataset, opt.n_noise, opt.n_dis, opt.n_conti, opt.epoch), "rb")) labels = np.array(data["y"]).squeeze() labels_unique = np.unique(labels) code = np.array(data["z"]).copy() z = np.array(data["z"]).copy() # print(code[0]) # code = code[:, -5:-2] code = code[:, -2:] # print(code[0]) # 1 / 0 signatures = np.array(data["x"]) tsne = TSNE(n_jobs=26, n_components=2, learning_rate=100) Y = tsne.fit_transform(code) colors = ["red", "green", "blue"] for idx, label in enumerate(labels_unique): data_tsne = Y[labels == label] plt.scatter(data_tsne[:, 0], data_tsne[:, 1], c=colors[idx], alpha=0.3, label=str(label)) plt.legend() plt.show()
def main(): parser = argparse.ArgumentParser(description='main function parser') parser.add_argument('--path', type=str, help='load file path', required=True) parser.add_argument('--dump_dir', type=str, help='dump directory', default=None) parser.add_argument('--size', type=int, default=1000, help='embedding vector size') args = parser.parse_args() embeddings, labels = load(args.path, args.size) output = args.path.split('/')[-1] # # UMAP # weights = umap.UMAP().fit_transform(embeddings) # show(weights, labels, 'umap.svg') # t-SNE tsne_model = TSNE(n_components=2) weights = tsne_model.fit_transform(embeddings) show(weights, labels, f'graph/{output}.svg')
def tsne(codewords, label, num_of_class): """plot the T-SNE based on codewords and label Params: ------------------ codewords: (num_of_samples, dims_of_feature) numpy array codewords to be dimension reduction label: (num_of_samples,) numpy array data label num_of_class: int number of class Returns: ------------------ None """ starter_time = time.time() embeddings = TSNE(n_components=2, perplexity=50, n_jobs=4).fit_transform(codewords) vis_x = embeddings[:, 0] vis_y = embeddings[:, 1] plt.scatter(vis_x, vis_y, c=label, cmap=plt.cm.get_cmap("jet", num_of_class), marker='.', s=100) plt.colorbar(ticks=range(num_of_class)) plt.clim(-0.5, 9.5) print('TSNE TIME: {} seconds'.format(time.time() - starter_time))