def tsne_plot(self, labels, colors, filename="tsne.png", show=False, n_components=2, perplexity=30.0, early_exaggeration=12.0, learning_rate=200.0, n_iter=1000, n_iter_without_progress=300, min_grad_norm=1e-07, metric='euclidean', init='random', verbose=0, random_state=None, method='barnes_hut', angle=0.5, n_jobs=None) -> None: if len(labels) != len(colors): raise ValueError( "The list of labels and colours should be the same!") filename_abs = os.path.join(self.embeddings_dir, filename) X_embedded = tsne(n_components=n_components, perplexity=perplexity, early_exaggeration=early_exaggeration, learning_rate=learning_rate, n_iter=n_iter, n_iter_without_progress=n_iter_without_progress, min_grad_norm=min_grad_norm, metric=metric, init=init, verbose=verbose, random_state=random_state, method=method, angle=angle, n_jobs=n_jobs).fit_transform(self.embeddings) plt.figure(figsize=(6, 5)) for i, label in enumerate(self.labels): label = label.numpy() plt.scatter(X_embedded[i, 0], X_embedded[i, 1], c=colors[label], label=int(label)) lines = [] for i, color in enumerate(colors): line = Line2D(range(1), range(1), color="white", marker='o', markerfacecolor=color) lines.append(line) plt.legend(lines, labels, numpoints=1, loc=1) plt.savefig(fname=filename_abs, format='png') if show: plt.show()
def project(embeddings, tokens, selectedTokens): print "Running tsne" projected = tsne().fit_transform(embeddings) colors = np.array([t in selectedTokens for t in tokens]) plt.figure() plt.scatter(projected[:, 0], projected[:, 1], c=colors) plt.savefig("../results/embeddings/projection")
def project(embeddings, tokens): print "Running tsne" projected = tsne().fit_transform(embeddings) plt.figure() plt.scatter(projected[:, 0], projected[:, 1]) for label, x, y in zip(tokens, projected[:, 0], projected[:, 1]): plt.annotate(label, xy=(x, y), xytext=(-10, -10), textcoords='offset points', size='x-small') plt.savefig("../results/embeddings/projection")
def visualize_clusters(X, all_labels): ''' ''' # sort of like "unique" uniq_labels = list(set(all_labels)) all_labels_indexed = [uniq_labels.index(x) for x in all_labels] tsne_output_file = os.path.join(c3d_feature_dir, 'Y_tsne_dog_cat.npy') tsne_output_txt_file = os.path.join(c3d_feature_dir, 'Y_tsne_dog_cat.txt') if os.path.isfile(tsne_output_file): #Y = np.load(tsne_output_file) Y = np.load(tsne_output_file) else: tsne_model = tsne(n_components=2, random_state=0) np.set_printoptions(suppress=True) Y = tsne_model.fit_transform(X) np.save(tsne_output_file, Y) np.savetxt(tsne_output_txt_file, Y) #pylab.scatter(Y[:,0], Y[:,1], 20, all_labels_indexed); #pylab.savefig('tsne_video_clusters.png') #pylab.show() for count, label in enumerate(uniq_labels): # skip "both" label if label == 'both': continue this_label_ind = np.where(all_labels == label)[0] #pylab.scatter(Y[this_label_ind,0], Y[this_label_ind,1], 20, count) print "label={}, len(this_label_ind)={}".format( label, len(this_label_ind)) if count < 6: marker = 'o' else: marker = 'v' if count == 1: color = 'g' elif count == 2: color = 'r' pylab.plot(Y[this_label_ind, 0], Y[this_label_ind, 1], marker=marker, linestyle='', ms=6, label=label, color=color) pylab.legend(numpoints=1, loc='upper left') pylab.xlim([-23, 17]) pylab.ylim([-16, 16]) pylab.grid() #pylab.xlim([-80, 60]) pylab.savefig('tsne_cat_dog_clusters.png') pylab.show()
def genrate_tsne(high_dim_repr, seed=4, perplexity=30): print('compute tsne with perplexity {} and seed {}'.format( perplexity, seed)) tsne_components = tsne(n_components=2, perplexity=perplexity, random_state=seed) transformed = tsne_components.fit_transform(high_dim_repr) df = pd.DataFrame() df['c1'] = transformed[:, 0] df['c2'] = transformed[:, 1] return df
def sk_tsne(): X_true = np.load('examples/123/true2.npy') #[0:500] from scipy import spatial D = spatial.distance_matrix(X_true, X_true) from sklearn.manifold import TSNE as tsne X_embedded = tsne(n_components=2, verbose=2, method='exact').fit_transform(X_true) plt.figure() plt.plot(X_embedded[:, 0], X_embedded[:, 1], 'o') plt.show()
def visualize_clusters(X, all_labels): """ """ # sort of like "unique" uniq_labels = list(set(all_labels)) all_labels_indexed = [uniq_labels.index(x) for x in all_labels] tsne_output_file = os.path.join(c3d_feature_dir, "Y_tsne_dog_cat.npy") tsne_output_txt_file = os.path.join(c3d_feature_dir, "Y_tsne_dog_cat.txt") if os.path.isfile(tsne_output_file): # Y = np.load(tsne_output_file) Y = np.load(tsne_output_file) else: tsne_model = tsne(n_components=2, random_state=0) np.set_printoptions(suppress=True) Y = tsne_model.fit_transform(X) np.save(tsne_output_file, Y) np.savetxt(tsne_output_txt_file, Y) # pylab.scatter(Y[:,0], Y[:,1], 20, all_labels_indexed); # pylab.savefig('tsne_video_clusters.png') # pylab.show() for count, label in enumerate(uniq_labels): # skip "both" label if label == "both": continue this_label_ind = np.where(all_labels == label)[0] # pylab.scatter(Y[this_label_ind,0], Y[this_label_ind,1], 20, count) print "label={}, len(this_label_ind)={}".format(label, len(this_label_ind)) if count < 6: marker = "o" else: marker = "v" if count == 1: color = "g" elif count == 2: color = "r" pylab.plot( Y[this_label_ind, 0], Y[this_label_ind, 1], marker=marker, linestyle="", ms=6, label=label, color=color ) pylab.legend(numpoints=1, loc="upper left") pylab.xlim([-23, 17]) pylab.ylim([-16, 16]) pylab.grid() # pylab.xlim([-80, 60]) pylab.savefig("tsne_cat_dog_clusters.png") pylab.show()
def highlight(embeddings, tokens, keyword): print "Running tsne" projected = tsne().fit_transform(embeddings) colors = np.array([keyword in t for t in tokens]) plt.figure() plt.scatter(projected[:, 0], projected[:, 1], c=colors) for label, x, y in zip(tokens, projected[:, 0], projected[:, 1]): if keyword in label: plt.annotate(label, xy=(x, y), xytext=(-10, -10), textcoords='offset points', size='x-small') plt.xticks([]) plt.yticks([]) plt.savefig("../results/embeddings/highlight_\"%s\".svg" %keyword)
def visualize_clusters(X, all_labels): ''' ''' # sort of like "unique" uniq_labels = list(set(all_labels)) all_labels_indexed = [uniq_labels.index(x) for x in all_labels] #tsne_output_file = os.path.join(c3d_feature_dir, 'tsne_output.npy') tsne_output_file = os.path.join(c3d_feature_dir, 'Y_bhtsne.txt') if os.path.isfile(tsne_output_file): #Y = np.load(tsne_output_file) Y = np.loadtxt(tsne_output_file) else: tsne_model = tsne(n_components=2, random_state=0) np.set_printoptions(suppress=True) Y = tsne_model.fit_transform(X) np.save(tsne_output_file, Y) #pylab.scatter(Y[:,0], Y[:,1], 20, all_labels_indexed); #pylab.savefig('tsne_video_clusters.png') #pylab.show() for count, label in enumerate(uniq_labels): this_label_ind = np.where(all_labels == label)[0] #pylab.scatter(Y[this_label_ind,0], Y[this_label_ind,1], 20, count) print "count={}, label={}, len(this_label_ind)={}".format(count, label, len(this_label_ind)) if count < 6: marker = 'o' else: marker = 'v' pylab.plot(Y[this_label_ind,0], Y[this_label_ind,1], marker=marker, linestyle='', ms=6, label=label); pylab.legend(numpoints=1, loc='upper left') #pylab.xlim([-23, 15]) pylab.xlim([-80, 60]) pylab.savefig('tsne_video_clusters.png') pylab.show()
def add_embeddings(tags_lst, tag_dict, debug_dict, token, tag, dim_reduce=False, dims=300): """given a tag(pos or dependency), add its embeddings value to dictionary""" if (tag in tags_lst) and (tag not in tag_dict.keys()): #include only the first value of a kind embed_vec = None if dims <= 1: dims = 1 embed_vec = [token.vector_norm] # l2 norm elif dim_reduce and dims < 300: #if we want to reduce the glove vector to smaller dimension. default is 300 #dims = 3 if dims>4 else dims #because of a weird bug with tsne embed_vec = list( tsne(n_components=dims).fit_transform( [token.vector, token.vector])[0]) else: embed_vec = list(token.vector) tag_dict[tag] = embed_vec debug_dict[tag] = token.text
import pandas as pd from sklearn.manifold import TSNE as tsne from matplotlib import pyplot as plt import seaborn as sns TRAIN_LOCATION = "/home/sanjeev/Documents/HeadMotionData/outputs/global_avg_output_train.csv" VALID_LOCATION = "/home/sanjeev/Documents/HeadMotionData/outputs/global_avg_output_valid.csv" OUT_TRAIN_LOCATION = "/home/sanjeev/Documents/HeadMotionData/outputs/global_avg_embed_train.csv" OUT_VALID_LOCATION = "/home/sanjeev/Documents/HeadMotionData/outputs/global_avg_embed_valid.csv" train_df = pd.read_csv(TRAIN_LOCATION) valid_df = pd.read_csv(VALID_LOCATION) train_df.iloc[:, -1] = train_df.iloc[:, -1].astype(int) valid_df.iloc[:, -1] = valid_df.iloc[:, -1].astype(int) embedder = tsne() embedder.fit(train_df.iloc[:, :-1]) train_embedded = embedder.transform(train_df.iloc[:, :-1]) valid_embedded = embedder.tranform(valid_df.iloc[:, -1]) train_df = pd.DataFrame(train_embedded, columns=["x", "y"]) valid_df = pd.DataFrame(valid_embedded, columns=["x", "y"]) train_df["target"] = train_df.iloc[:, -1] valid_df["target"] = valid_df.iloc[:, -1] train_df.to_csv(OUT_TRAIN_LOCATION, index=False) valid_df.to_csv(OUT_VALID_LOCATION, index=False) f, axes = plt.subplots(1, 2) sns.scatterplot(y="y",
all_firing_array = np.asarray(data.normal_off_firing) X = data.all_normal_off_firing.swapaxes(1, 2)[:, 80:160, :] #taste = 0 #X = data.normal_off_firing[taste][:,:,80:160].swapaxes(-1,-2) # Reduce dimensions of every timepoint using TSNE X_long = X[:, :, 0] for trial in range(1, X.shape[2]): X_long = np.concatenate((X_long, X[:, :, trial]), axis=-1) perm = np.random.permutation(np.arange(X_long.shape[1])) X_long_perm = X_long[:, perm] X_embedded = tsne(n_components=2, perplexity=35).fit_transform(X_long_perm.T) colors = range(np.int(X.shape[1])) / np.max(range(np.int(X.shape[1]))) for trial_num in range(60): fig = plt.figure() plt.scatter(X_embedded[(trial_num + 1) * np.arange(np.int(X.shape[1])), 0], X_embedded[(trial_num + 1) * np.arange(np.int(X.shape[1])), 1], c=colors) plt.colorbar() plt.plot(X_embedded[(trial_num + 1) * np.arange(np.int(X.shape[1])), 0], X_embedded[(trial_num + 1) * np.arange(np.int(X.shape[1])), 1]) plt.savefig(plot_dir + '/' + 'tsne_trial_%i.png' % trial_num) plt.close(fig) colors = np.matlib.repmat(range(np.int(X.shape[1])), 1, X.shape[2])[0, perm] plt.scatter(X_embedded[:, 0], X_embedded[:, 1], c=colors.flatten())
def t_sne(samples, use_scikit=False, files_dir=None, results_filename='result.dat', data_filename='data.dat', no_dims=DEFAULT_NO_DIMS, perplexity=DEFAULT_PERPLEXITY, theta=DEFAULT_THETA, eta=DEFAULT_ETA, iterations=DEFAULT_ITERATIONS, seed=DEFAULT_SEED, early_exaggeration=DEFAULT_EARLY_EXAGGERATION, gpu_mem=DEFAULT_GPU_MEM, randseed=DEFAULT_RANDOM_SEED, verbose=3): """ Run t-sne on the sapplied samples (Nxsamples x Dfeatures array). It either: 1) Calls the t_sne_bhcuda.exe (which should be in the Path of the OS somehow - maybe in the Scripts folder) which then run t-sne either on the CPU or the GPU) or 2) Calls the sklearn t-sne module (which runs only on CPU). Parameters ---------- samples -- The N_examples X D_features array to t-sne use_scikit -- If yes use the sklearn t-sne implementation. Otherwise use the t_sne_bhcuda.exe files_dir -- The folder in which the t_sne_bhcuda.exe should look for the data_filename.dat and save the results_filename.dat results_filename -- The name of the file that the t_sne_bhcuda.exe saves the t-sne results in data_filename -- The name of the file that the t_sne_bhcuda.exe looks into for data to t-sne. This data file also has a header with all the parameters that the t_sne_bhcuda.exe needs to run. no_dims -- Number of dimensions of the t-sne embedding perplexity -- Defines the amount of samples whose distances are comparred to every sample (check sklearn and the van der Maatens paper) theta -- If > 0 then the algorithm run the burnes hat aproximation (with angle = theta). If = 0 then it runs the exact version. Values smaller than 0.5 do not add to much error. eta -- The learning rate iterations -- The number of itterations (usually around 1000 should suffice) early_exaggeration -- The amount by which the samples are initially pushed apart. Used only in the sckit-learn version seed -- Set to a number > 0 if the amount of samples is too large to t-sne. Then the algorithm will t-sne the first seed number of samples. Then it will compare the euclidean distance between every other sample and these t-sned samples. For each non t-sned sample it will find the 5 closest t-sned samples and place the new sample on the point of the t-sne space that is given by the center of mass of those 5 closest ssamples. The mass of each closest sample is defined as the inverse of its euclidean distance to the new sample. gpu_mem -- If > 0 (and <= 1) then the t_sne_bhcuda.exe will run the eucledian distances calculations on the GPU (if possible) and will use (gpu_mem * 100) per cent of the available gpu memory to temporarily store results. If == 0 then the t_sne_bhcuda.exe will run only on the CPU. It has no affect if use_scikit = True randseed -- Set the random seed for the initiallization of the samples on the no_dims plane. verbose -- Define verbosity (0 = No output, 1 = Basic output, 2 = Full output) Returns ------- A N_examples X no_dims array of the embeded examples """ if use_scikit: # using python's scikit tsne implementation try: from sklearn.manifold import TSNE as tsne except ImportError: print('You do not have sklearn installed. Try calling t_sne with use_scikit=False' 'and gpu_mem=0 if you do not want to run the code in GPU.') return None if theta > 0: method = 'barnes_hut' elif theta == 0: method = 'exact' model = tsne(n_components=2, perplexity=perplexity, early_exaggeration=early_exaggeration, learning_rate=eta, n_iter=iterations, n_iter_without_progress=iterations, min_grad_norm=0, metric="euclidean", init="random", verbose=verbose, random_state=None, method=method, angle=theta) t_sne_results = model.fit_transform(samples) return t_sne_results else: # using the C++/cuda implementation save_data_for_tsne(samples, files_dir, data_filename, theta, perplexity, eta, no_dims, iterations, seed, gpu_mem, randseed) # Call t_sne_bhcuda and let it do its thing with Popen([_find_exe_dir(), ], cwd=files_dir, stdout=PIPE, bufsize=1, universal_newlines=True) \ as t_sne_bhcuda_p: for line in iter(t_sne_bhcuda_p.stdout): print(line, end='') sys.stdout.flush() t_sne_bhcuda_p.wait() assert not t_sne_bhcuda_p.returncode, ('ERROR: Call to t_sne_bhcuda exited ' 'with a non-zero return code exit status, please ' + ('enable verbose mode and ' if not verbose else '') + 'refer to the t_sne output for further details') return load_tsne_result(files_dir, results_filename)
def t_sne(samples, use_scikit=False, files_dir=None, results_filename='result.dat', data_filename='data.dat', no_dims=DEFAULT_NO_DIMS, perplexity=DEFAULT_PERPLEXITY, theta=DEFAULT_THETA, eta=DEFAULT_ETA, iterations=DEFAULT_ITERATIONS, seed=DEFAULT_SEED, early_exaggeration=DEFAULT_EARLY_EXAGGERATION, gpu_mem=DEFAULT_GPU_MEM, randseed=DEFAULT_RANDOM_SEED, verbose=2): """ Run t-sne on the sapplied samples (Nxsamples x Dfeatures array). It either: 1) Calls the t_sne_bhcuda.exe (which should be in the Path of the OS somehow - maybe in the Scripts folder for Windows or the python/bin folder for Linux) which then runs t-sne either on the CPU or the GPU or 2) Calls the sklearn t-sne module (which runs only on CPU). Parameters ---------- samples -- The N_examples X D_features array to t-sne use_scikit -- If yes use the sklearn t-sne implementation. Otherwise use the t_sne_bhcuda.exe files_dir -- The folder in which the t_sne_bhcuda.exe should look for the data_filename.dat and save the results_filename.dat results_filename -- The name of the file that the t_sne_bhcuda.exe saves the t-sne results in data_filename -- The name of the file that the t_sne_bhcuda.exe looks into for data to t-sne. This data file also has a header with all the parameters that the t_sne_bhcuda.exe needs to run. no_dims -- Number of dimensions of the t-sne embedding perplexity -- Defines the amount of samples whose distances are comparred to every sample (check sklearn and the van der Maatens paper) theta -- If > 0 then the algorithm run the burnes hat aproximation (with angle = theta). If = 0 then it runs the exact version. Values smaller than 0.2 do not add to much error. eta -- The learning rate iterations -- The number of itterations (usually around 1000 should suffice) early_exaggeration -- The amount by which the samples are initially pushed apart. Used only in the sckit-learn version seed -- Set to a number > 0 if the amount of samples is too large to t-sne. Then the algorithm will t-sne the first seed number of samples. Then it will compare the euclidean distance between every other sample and these t-sned samples. For each non t-sned sample it will find the 5 closest t-sned samples and place the new sample on the point of the t-sne space that is given by the center of mass of those 5 closest ssamples. The mass of each closest sample is defined as the inverse of its euclidean distance to the new sample. gpu_mem -- If > 0 (and <= 1) then the t_sne_bhcuda.exe will run the eucledian distances calculations on the GPU (if possible) and will use (gpu_mem * 100) per cent of the available gpu memory to temporarily store results. If == 0 then the t_sne_bhcuda.exe will run only on the CPU. It has no affect if use_scikit = True randseed -- Set the random seed for the initiallization of the samples on the no_dims plane. verbose -- Define verbosity. 0 = No output, 1 = Basic output, 2 = Full output, 3 = Also save t-sne results in interim files after every iteration. Option 3 is used to save all steps of t-sne to explore the way the algorithm seperates the data (good for movies). Returns ------- A N_examples X no_dims array of the embeded examples """ if use_scikit: # using python's scikit tsne implementation try: from sklearn.manifold import TSNE as tsne except ImportError: print('You do not have sklearn installed. Try calling t_sne with use_scikit=False' 'and gpu_mem=0 if you do not want to run the code in GPU.') return None if theta > 0: method = 'barnes_hut' elif theta == 0: method = 'exact' model = tsne(n_components=2, perplexity=perplexity, early_exaggeration=early_exaggeration, learning_rate=eta, n_iter=iterations, n_iter_without_progress=iterations, min_grad_norm=0, metric="euclidean", init="random", verbose=verbose, random_state=None, method=method, angle=theta) t_sne_results = model.fit_transform(samples) return t_sne_results else: # using the C++/cuda implementation save_data_for_tsne(samples, files_dir, data_filename, theta, perplexity, eta, no_dims, iterations, seed, gpu_mem, verbose, randseed) del samples # Call t_sne_bhcuda and let it do its thing with Popen([_find_exe_dir(), ], cwd=files_dir, stdout=PIPE) \ as t_sne_bhcuda_p: for line in iter(t_sne_bhcuda_p.stdout): print (line) sys.stdout.flush() t_sne_bhcuda_p.wait() assert not t_sne_bhcuda_p.returncode, ('ERROR: Call to t_sne_bhcuda exited ' 'with a non-zero return code exit status, please ' + ('enable verbose mode and ' if not verbose else '') + 'refer to the t_sne output for further details') return load_tsne_result(files_dir, results_filename)
t_tsne = np.load(r'D:\Data\George\Projects\SpikeSorting\Joana_Paired_128ch\2015-09-03\Analysis\klustakwik'+\ r'\threshold_6_5std\t_sne_results_final_allspikes.npy') # T-SNE # Python scikit-learn t-sne t0 = time.time() perplexity = 500.0 early_exaggeration = 100.0 learning_rate = 3000.0 theta = 0.0 model = tsne(n_components=2, perplexity=perplexity, early_exaggeration=early_exaggeration, learning_rate=learning_rate, n_iter=1000, n_iter_without_progress=500, min_grad_norm=1e-7, metric="euclidean", init="random", verbose=3, random_state=None, method='barnes_hut', angle=theta) t_tsne = model.fit_transform(data_for_tsne) t_tsne = t_tsne.T t1 = time.time() print("Scikit t-sne took {} seconds, ({} minutes), for {} spikes".format(t1-t0, (t1-t0)/60, up_to_extra_spike)) # save the python scikit generated t-sne results threshold = 5.5 file_name = r'D:\Data\George\Projects\SpikeSorting\Joana_Paired_128ch\2015-09-03\Analysis\t_tsne_ivm_data_{}sp_{}per_{}ee_{}lr_{}tp_{}thres.pkl'\ .format(len(indices_of_data_for_tsne), perplexity, early_exaggeration, learning_rate, number_of_time_points, threshold) file = open(file_name, 'bw') pickle.dump((ivm_data_filtered, t_tsne, juxta_cluster_indices_grouped, perplexity, early_exaggeration, learning_rate), file) file.close()
for i in range(1000): img = imread(df.local_path.loc[i]) if img.shape[0] < 200 or img.shape[1] < 200: df.drop(i) else: img_gray = color.rgb2gray(img) fd = hog(img_gray, orientations=9, pixels_per_cell=(8, 8),cells_per_block=(4, 4)) vector_list.append(fd) print i, len(fd), df.local_path.loc[i] X = np.vstack(vector_list) from sklearn.manifold import TSNE as tsne tsne = tsne(n_components=2) tsne.fit(X) subspace_tsne = pd.DataFrame(tsne.fit_transform(X),columns=["x","y"]) num_bins = 64 subspace_tsne['grid_x'] = pd.cut(subspace_tsne['x'],num_bins,labels=False) subspace_tsne['grid_y'] = pd.cut(subspace_tsne['y'],num_bins,labels=False) subspace_tsne['local_path'] = df.local_path[:len(subspace_tsne)] # I should save the dataframe here, so later maybe I can use full images thumb_side = 128 from PIL import Image
test_dataset = tf.data.Dataset.from_tensor_slices( (test_images, test_class)).shuffle(TEST_BUF).batch(BATCH_SIZE) from sklearn.manifold import TSNE as tsne i = 0 ds = np.empty([0, 10]) cls = [] for x, y in test_dataset: mean, logvar = model.encode(x) z = model.reparameterize(mean, logvar) z = z.numpy() ds = np.concatenate((ds, z)) cls.append(y) label = tf.convert_to_tensor(cls).numpy().flatten().astype(int) ts = tsne(2) mds = ts.fit_transform(ds) plt.scatter(mds[:, 0], mds[:, 1], s=0.3, c=label) for i, x in enumerate(test_dataset): img = x if (i == 5): break predictions = img[0][:16] fig = plt.figure(figsize=(4, 4)) for i in range(predictions.shape[0]): plt.subplot(4, 4, i + 1) plt.imshow(predictions[i, :, :, 0], cmap='gray') plt.axis('off')
#print(u.shape) #print(vt.shape) #print(u.shape[1]) #print(vt.shape[0]) #true_s = np.zeros((u.shape[1], vt.shape[0])) #true_s[:s.size, :s.size] = np.diag(s) #print(true_s) #print(true_s.shape) #np.save('../../reorganized_data/cluster1/sparse_matrix', true_s) pca = PCA(n_components=2000) pca.fit(sparsematrix) X_pca = pca.transform(sparsematrix) print("completed pca") tsne_results = tsne(n_components=2, verbose=1).fit_transform(X_pca) print("completed tsne") #arr = list(tsnedata.T) #x = tsne_results[:,0] #x = [number**3 for number in x] #y = tsne_results[:,1] #y = [number**3 for number in y] #fig = plt.figure() #ax = fig.gca(projection='3d') #fig, ax = plt.subplots() #ax.set(xlim = (0,20), ylim = (-500, 1000)) #ax.scatter(x, y) #for i in range(0, 3000):
def run_tsne(pool_arr,dims=1): #pca_m=PCA(n_components=1024) pca_res=pool_arr#pca_m.fit_transform(pool_arr) ts_model=tsne(n_components=dims,perplexity=30,n_iter=1500,learning_rate=120) embeddings=ts_model.fit_transform(pca_res) return embeddings
plt.figure() plt.scatter(reduced_stim[:,best_sep[2]],reduced_stim[:,best_sep[1]],c=tastes) plt.colorbar() clf.score(reduced_stim, tastes) fig = plt.figure() ax = Axes3D(fig) p = ax.scatter(reduced_stim[:,best_sep[0]],reduced_stim[:,best_sep[1]],reduced_stim[:,best_sep[2]], c =tastes,s=20) fig.colorbar(p) # for perp in np.arange(10,30,5): reduced_stim_tsne = tsne(perplexity = perp).fit_transform(total_this_off) plt.figure();plt.scatter(reduced_stim_tsne[:,0],reduced_stim_tsne[:,1],c=tastes) plt.title(perp) ## Palatability clf = lda() clf.fit(reduced_stim, pals) fit_coefs = clf.coef_[0] best_sep = np.argsort(np.abs(fit_coefs))[-3:] plt.figure() plt.scatter(reduced_stim[:,best_sep[2]],reduced_stim[:,best_sep[1]],c=pals) plt.colorbar() clf.score(reduced_stim, pals) fig = plt.figure()
def learn_tSNE(x, params=PARAMS_LEARNING, version=VERSION, path=REDUCTED_DATA_PATH, reduction_size_factor=REDUCTION_SIZE_FACTOR, pca_variance_needed=0.9): """ Learn tSNE representation. :param x: input data to project :param params: t-SNE parameters to learn it will learn every combination possible .. seealso:: sklearn.manifold.TSNE() :type params: { 'perplexities':array(int), 'learning_rates':array(int), 'inits':array({'pca', 'random'}) 'n_iter':array(int), } :param version: version of data to load (e.g: _20170614) :param path: where to store 2D representation, absolute path :param reduction_size_factor: factor by which we divide the number of samples (tsne is greedy) :return: Embedded data in 2D space, and t-SNE model :rtype: dict{params:(float,float)}, dict({params:tsne.model}) """ perplexities = params['perplexities'] learning_rates = params['learning_rates'] inits = params['inits'] n_iters = params['n_iters'] models, x_transformed = {}, {} concatenated_iterator = itertools.product( perplexities, learning_rates, inits, n_iters ) if pca_variance_needed: x = reduce_with_PCA(x, variance_needed=pca_variance_needed) for perplexity, learning_rate, init, n_iter in concatenated_iterator: param = ( perplexity, learning_rate, init, n_iter ) ''' models[param] = tsne( perplexity=perplexity, learning_rate=learning_rate, init=init, n_iter=n_iter )''' # in a desperate move to save RAM logging.info("learning model %s %s %s %s", str(perplexity), str(learning_rate), str(init), str(n_iter)) x_transformed[param] = tsne( perplexity=perplexity, learning_rate=learning_rate, n_iter=n_iter, # only use with Multicore_tSNE: n_jobs=12, ).fit_transform(x) logging.info("done!") name = ''.join('_' + str(p) for p in param) full_path = ''.join([ path, 'embedded_x_1-', str(reduction_size_factor), name, version, '.npz', ]) np.savez( full_path, x_2D=x_transformed[param], ) # model=models[param]) return x_transformed, models
for i in range(no_tasks): input = torch.tensor(x_testsets[i]).float() code = model.encode(input, task_id = i, no_samples=1, const_mask=True, temp = model.min_temp)[0].mean(0) zs.append(code.detach().numpy()) ys.append(torch.ones((code.shape[0]))*i) Z = np.concatenate(zs, axis = 0) Y = np.concatenate(ys) means_emp = np.concatenate([np.mean(d, 0).reshape([1,-1]) for d in zs]) vars_emp = np.concatenate([np.std(d, 0).reshape([1,-1]) for d in zs]) # means_emp = np.concatenate([m.cpu().detach().numpy().reshape([1,-1]) for m in model.z_mus]) # vars_emp = np.concatenate([(m/2).exp().cpu().detach().numpy().reshape([1,-1]) for m in model.z_lvs]) Z_with_means = np.concatenate([Z, means_emp], axis = 0) manif = tsne(n_components = 2) Z_embedded = manif.fit_transform(Z_with_means) prev = 0 figure = plt.figure(figsize = [8,8]) till = no_tasks tasks = np.arange(no_tasks)[:till] for t in tasks: now = zs[t].shape[0] plt.plot(Z_embedded[prev:now+prev,0],Z_embedded[prev:now+prev,1], '.', label = "class_" + str(t)) prev = now+prev # prev = Z.shape[0] # for t in tasks: # plt.plot(Z_embedded[prev+t:prev+t+1,0],Z_embedded[prev+t:prev+t+1,1], 'o', markersize=12, # color = 'C'+str(t), markeredgecolor=(0,0,0,1), markeredgewidth=2)
bad = np.array(res_ts["indices"][:1000]) if first_run: all_bad = bad first_run = False else: all_bad = np.intersect1d(bad, all_bad) print(all_bad) gc.collect() all_bad = all_bad[:10] print("Bad indexes in HuaWei hand data: ", all_bad) np.savetxt("huawei_hand_bad_indexes.csv", all_bad, delimiter=",", fmt="%d") e = tsne(n_components=2, n_jobs=8).fit_transform(np.genfromtxt(feature_file, delimiter=',')) np.savetxt("src/datasets/huawei1_tsne.csv", e, delimiter=",") plt.figure(1) for i in range(8): x = np.where(labels == i) plt.scatter(e[x, 0], e[x, 1], c=grays[i], s=4, label=names[i]) plt.scatter(e[all_bad, 0], e[all_bad, 1], marker='x', s=200, c='red', label="Mislabeled") plt.title("Mislabeled Instances in Sussex-HuaWei Hand") plt.savefig('huawei_hand_bad_instances.pdf')
def applyTsne(images): return tsne(n_components=2).fit_transform(images)
sel_words = random.sample(list(np.arange(vocab_size)),1500) # for epoch in range(16): # model_embeds = np.load(base_model_dir+'epoch_'+str(epoch)+'_embedding.npy') # # print(model_embeds.shape) # # model_embeds = model_embeds[sel_words,:] # # print(model_embeds.shape,sel_words[:10]) # img_save_name2d = base_model_dir+'epoch_'+str(epoch)+'_tsne.png' # img_save_name3d = base_model_dir+'epoch_'+str(epoch)+'_tsne3D.png' # # model_embeds = model_embeds[:1000,:] # tsne_ak_2d = TSNE(perplexity=30, n_components=2, init='random', n_iter=3500,verbose=True) # embeddings_ak_2d = tsne_ak_2d.fit_transform(model_embeds) # tsne_plot_2d('ABC Corpus',embeddings_ak_2d,img_save_name2d,a=0.1) for epoch in range(16): if epoch==5: model_embeds = np.load(base_model_dir+'epoch_'+str(epoch)+'_embedding.npy') print(model_embeds.shape) model_embeds = model_embeds[sel_words,:] img_save_name3d = base_model_dir+'epoch_'+str(epoch)+'_tsne3D.png' tsne_wp_3d = tsne(perplexity=30, n_components=3, init='pca', n_iter=3500, random_state=12, verbose=True,n_jobs=8) embeddings_wp_3d = tsne_wp_3d.fit_transform(model_embeds) tsne_plot_3d('Visualizing Embeddings using t-SNE ', 'ABC_Corpus', embeddings_wp_3d, a=0.5) else: continue
# compressing the tf-idf values row_sums = np.sum(output, axis=1) word_indices = np.array(np.argsort(row_sums)[-1:-101:-1]) output = output[word_indices, :] voc_list = [voc_list[i] for i in word_indices] dic = {} for i, word in enumerate(voc_list): dic[word] = i pd.DataFrame(data=output, index=voc_list, columns=docs) \ .to_csv('tf_idf_matrix.csv') # print(search_docs(output, dic, docs, 'veikk')) embedded_output = tsne().fit_transform(output.T) plt.scatter(embedded_output[:, 0], embedded_output[:, 1]) plt.show() for doc in [ 'button-mapping-journeys', 'veikk-linux-driver-v3-notes', 'on-developing-a-linux-driver', 'code-opinions' ]: print(embedded_output[docs.index('corpora/blog-posts/' + doc + '.txt'), :]) # print([docs[doc_index] for doc_index in np.argwhere(embedded_output[:, 0] > 300).flatten()]) # voc_list = [voc_list[i] for i in np.argsort(output[:, 1])] # output = output[np.argsort(output[:,1])] # for i,x in enumerate(output): # print(voc_list[i], end = ": ")
start = [0] end = [] total_spikes = 0 for i in np.arange(0, len(cell_info['num_of_spikes'])): if i < len(cell_info['num_of_spikes']) - 1: start.append(cell_info['num_of_spikes'][i] + total_spikes) end.append(cell_info['num_of_spikes'][i] + total_spikes) total_spikes += cell_info['num_of_spikes'][i] # T-SNE perplexity = 5 early_exaggeration = 1.0 learning_rate = 100 model = tsne(n_components=2, perplexity=perplexity, early_exaggeration=early_exaggeration, learning_rate=learning_rate, metric='euclidean', random_state=None, init='random', verbose=10) t_tsne = model.fit_transform(data_for_tsne) t_tsne = t_tsne.T # 2D plot fig = plt.figure() ax = fig.add_subplot(111) #c = ['r', 'g', 'b', 'y', 'sienna', 'palegreen', 'm', 'pink', 'olivedrab', 'silver', 'forestgreen', 'palegoldenrod'] s = 20 for i in np.arange(0, len(cell_info['num_of_spikes'])): ax.scatter(t_tsne[0][start[i]:end[i]], t_tsne[1][start[i]:end[i]], color=cell_info['color'][i], s=s, label=cell_info['label'][i]) # Now add the legend with some customizations. legend = ax.legend(loc='upper center', shadow=True)
DATASET_NUM) + "_indexes.csv" grays = ['#111111', '#555555', '#818181', '#a1a1a1', '#c1c1c1'] colors = [ 'blue', 'green', 'cyan', 'gray', 'olive', 'limegreen', 'gold', 'darkgreen' ] names = ["0", "1", "2", "3", "4"] X = np.genfromtxt(data_file, delimiter=',') labels = np.genfromtxt(label_file, delimiter=',', dtype='int') bad_indexes = np.genfromtxt(index_file, delimiter=',', dtype='int') print("Creating visualization of synthetic dataset #", DATASET_NUM) e = tsne(n_components=2, n_jobs=8).fit_transform(X) plt.figure(1) for i in range(max(labels) + 1): x = np.where(labels == i) plt.scatter(e[x, 0], e[x, 1], c=colors[i], s=4, label=names[i]) plt.scatter(e[bad_indexes, 0], e[bad_indexes, 1], marker='x', s=75, c='red') plt.title("Synthetic Set " + str(DATASET_NUM) + " t-SNE Visualization") plt.axis('off') plt.legend() plt.savefig('Synthetic_Set' + str(DATASET_NUM) + '.pdf')
# magerr = np.asarray(magerr)/1 array = [] stat_array = array.append( extract_features.extract_all(mag, magerr, convert=True)) array = np.array([i for i in array]) stat_array = pca_model.transform(array) except: continue x_data = np.append(x_data, stat_array, axis=0) #for CONS in [99309, 98402, 105836, 68914, 69516]: # pass #for ML in [33065, 107503]: # pass data = [np.asarray(i) for i in x_data] vis_data = tsne(n_components=2).fit_transform(data) vis_x = vis_data[:, 0] vis_y = vis_data[:, 1] #plt.scatter(vis_x, vis_y, c=y_data, cmap=plt.cm.get_cmap("jet",4)) #plt.title('Feature Space Distribution', fontsize=40) #plt.colorbar(ticks=range(4)) #plt. #plt.show() colors = ['red', 'green', 'blue', 'yellow', 'orange', 'purple', 'black'] fig, ax = plt.subplots() for i, object_class in enumerate(['Variable', 'Constant', 'CV', 'ML']): x = vis_x[i * 500:(i + 1) * 500] y = vis_y[i * 500:(i + 1) * 500] ax.scatter(x, y, c=colors[i], label=object_class)
# Fit CP tensor decomposition (two times). rank = 10 repeats = 10 all_models = [] all_obj = [] for repeat in tqdm(range(repeats)): U = tt.cp_als(all_data_long, rank=rank, verbose=False) all_models.append(U) all_obj.append(U.obj) U = all_models[np.argmin(all_obj)] ## We should be able to see differences in tastes by using distance matrices on trial factors trial_factors = U.factors.factors[-1] trial_distances = dist_mat(trial_factors, trial_factors) plt.figure() plt.imshow(exposure.equalize_hist(trial_distances)) # pca on trial factors and plot by taste trial_labels = np.sort([0, 1, 2, 3, 4, 5] * 32) reduced_trials_pca = pca(n_components=2).fit_transform(trial_factors) plt.scatter(reduced_trials_pca[:, 0], reduced_trials_pca[:, 1], c=trial_labels) # tsne on trial factors and plot by taste X_embedded = tsne(n_components=2, perplexity=40).fit_transform(trial_factors) plt.figure() plt.scatter(X_embedded[:, 0], X_embedded[:, 1], c=trial_labels) # Compare the low-dimensional factors from the two fits. fig, _, _ = tt.plot_factors(U.factors)
import scipy import numpy as np from sklearn.manifold import TSNE as tsne import sys import matplotlib.pyplot as plt data = np.load(sys.argv[1]) with open(sys.argv[2]) as f: lines = f.readlines() classes = [] for line in lines: classes.append(line.split()) classes = np.array(classes)[1:,1:].reshape([len(classes)-1]) print("Loaded data") xsne = tsne(learning_rate=10).fit_transform(data) np.save("save_%s"%(sys.argv[3]),xsne) sizes = [3]*xsne.shape[0] plt.scatter(xsne[:,0], xsne[:,1],c=classes,s=sizes) plt.show()