_ = ax.text2D(0.8, 0.05, s="n_samples=1500", transform=ax.transAxes) # %% # Computing the LLE and t-SNE embeddings, we find that LLE seems to unroll the # Swiss Roll pretty effectively. t-SNE on the other hand, is able # to preserve the general structure of the data, but, poorly represents the # continuous nature of our original data. Instead, it seems to unnecessarily # clump sections of points together. sr_lle, sr_err = manifold.locally_linear_embedding(sr_points, n_neighbors=12, n_components=2) sr_tsne = manifold.TSNE(n_components=2, learning_rate="auto", perplexity=40, init="pca", random_state=0).fit_transform(sr_points) fig, axs = plt.subplots(figsize=(8, 8), nrows=2) axs[0].scatter(sr_lle[:, 0], sr_lle[:, 1], c=sr_color) axs[0].set_title("LLE Embedding of Swiss Roll") axs[1].scatter(sr_tsne[:, 0], sr_tsne[:, 1], c=sr_color) _ = axs[1].set_title("t-SNE Embedding of Swiss Roll") # %% # .. note:: # # LLE seems to be stretching the points from the center (purple) # of the swiss roll. However, we observe that this is simply a byproduct # of how the data was generated. There is a higher density of points near the
assert (np.isclose( y_1, y_2).all()), "Invalid labels given: different labels for raw and scat data" y = y_1 labels_uniq = np.unique(y) fig, axs = plt.subplots(1, 2, figsize=(18, 6)) #fig, subplots = plt.subplots(3, 5, figsize=(15, 8)) if method == 'pca': pca = decomposition.PCA(n_components=2) X_1 = pca.fit_transform(X_1) X_2 = pca.fit_transform( X_2) # TODO: check if doing this without redefining pca is ok else: # TODO: check if setting random_state gives consistent results tsne = manifold.TSNE(n_components=2, init='random', random_state=0, perplexity=perplexity) X_1 = tsne.fit_transform(X_1) X_2 = tsne.fit_transform(X_2) for label in labels_uniq: X_label_1 = X_1[label == y, :] X_label_2 = X_2[label == y, :] axs[0].scatter(X_label_1[:, 0], X_label_1[:, 1], s=marker_size, label=label) axs[1].scatter(X_label_2[:, 0], X_label_2[:, 1], s=marker_size, label=label)
if stuff_hrep_tr is None: stuff_hrep_tr = l[2] else: stuff_hrep_tr = np.vstack((stuff_hrep_tr, l[2])) pkl.dump( { "x_hint_repr_tst": stuff_hrep_tst, "y_tst": testy, "ximg_tst": testx.reshape((testx.shape[0], 28 * 28)), "x_hint_repr_tr": stuff_hrep_tr, "y_tr": trainy, "ximg_tr": trainx.reshape((trainx.shape[0], 28 * 28)) }, fhr) # plot t-SNE of the opriginal images tx0 = DT.datetime.now() tsne_original = manifold.TSNE(n_components=2, init='pca', random_state=0) X_tsne_original = tsne_original.fit_transform( testx.reshape((testx.shape[0], 28 * 28))) fig_tsne_org = plot_representations( X_tsne_original, testy, "t-SNE embedding of mnist original images.") fig_tsne_org.savefig(fold_exp + "/original_rep_test.eps", format='eps', dpi=1200, bbox_inches='tight') print "t-SNE of original images took:", DT.datetime.now() - tx0 # plot t-SNE of the prediction tx0 = DT.datetime.now() tsne_lasthidden_rep = manifold.TSNE(n_components=2, init='pca', random_state=0) X_tsne_lhrep = tsne_original.fit_transform(stuff_hrep_tst)
def start_manifold_learning(input): # res = np.loadtxt('numpyData.csv', dtype=float, delimiter=';') # todo check mask # mask = np.any(np.not_equal(input, 0.), axis=0) # arr = input['numpyArr'][:,mask] # # arr = np.unique(arr, axis=0) # # arr = arr[:] n_points = 1000 X = input['numpyArr'] print(type(X)) color = datasets.samples_generator.make_s_curve(n_points, random_state=0) # X, color = datasets.samples_generator.make_s_curve(n_points, random_state=0) # print (X[0]) # print (input[0]) # print (len(X[0])) n_neighbors = 10 n_components = 2 fig = plt.figure(figsize=(15, 8)) plt.suptitle("Manifold Learning with %i points, %i neighbors" % (1000, n_neighbors), fontsize=14) ax = fig.add_subplot(251, projection='3d') ax.scatter(X[:, 0], X[:, 1], X[:, 2], cmap=plt.cm.Spectral) ax.view_init(4, -72) methods = ['standard', 'ltsa', 'hessian', 'modified'] labels = ['LLE', 'LTSA', 'Hessian LLE', 'Modified LLE'] res = {} # try: # for i, method in enumerate(methods): # t0 = time() # Y = manifold.LocallyLinearEmbedding(n_neighbors, n_components, # eigen_solver='auto', # method=method).fit_transform(X) # t1 = time() # print("%s: %.2g sec" % (methods[i], t1 - t0)) # ax = fig.add_subplot(252 + i) # plt.scatter(Y[:, 0], Y[:, 1], cmap=plt.cm.Spectral) # plt.title("%s (%.2g sec)" % (labels[i], t1 - t0)) # ax.xaxis.set_major_formatter(NullFormatter()) # ax.yaxis.set_major_formatter(NullFormatter()) # plt.axis('tight') # res[method] = { # 'x' : Y[:, 0].tolist(), # 'y' : Y[:, 1].tolist() # } # except: # pass t0 = time() Y = manifold.Isomap(n_neighbors, n_components).fit_transform(X) t1 = time() print("Isomap: %.2g sec" % (t1 - t0)) ax = fig.add_subplot(257) plt.scatter(Y[:, 0], Y[:, 1], cmap=plt.cm.Spectral) plt.title("Isomap (%.2g sec)" % (t1 - t0)) ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) plt.axis('tight') res['Isomap'] = { 'x': Y[:, 0].tolist(), 'y': Y[:, 1].tolist(), 'ids': input['ids'], 'matInfo': input['matInfo'] } print('Learning data: ') print('x: ' + str(len(res['Isomap']['x']))) print('y: ' + str(len(res['Isomap']['y']))) print('ids: ' + str(len(res['Isomap']['ids']))) print('matInfo: ' + str(len(res['Isomap']['matInfo']))) t0 = time() mds = manifold.MDS(n_components, max_iter=100, n_init=1) Y = mds.fit_transform(X) t1 = time() print("MDS: %.2g sec" % (t1 - t0)) ax = fig.add_subplot(258) plt.scatter(Y[:, 0], Y[:, 1], cmap=plt.cm.Spectral) plt.title("MDS (%.2g sec)" % (t1 - t0)) ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) plt.axis('tight') res['MDS'] = { 'x': Y[:, 0].tolist(), 'y': Y[:, 1].tolist(), 'ids': input['ids'], 'matInfo': input['matInfo'] } t0 = time() se = manifold.SpectralEmbedding(n_components=n_components, n_neighbors=n_neighbors) Y = se.fit_transform(X) t1 = time() print("SpectralEmbedding: %.2g sec" % (t1 - t0)) ax = fig.add_subplot(259) plt.scatter(Y[:, 0], Y[:, 1], cmap=plt.cm.Spectral) plt.title("SpectralEmbedding (%.2g sec)" % (t1 - t0)) ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) plt.axis('tight') res['Spectral Embedding'] = { 'x': Y[:, 0].tolist(), 'y': Y[:, 1].tolist(), 'ids': input['ids'], 'matInfo': input['matInfo'] } t0 = time() tsne = manifold.TSNE(n_components=n_components, init='pca', random_state=0) Y = tsne.fit_transform(X) t1 = time() print("t-SNE: %.2g sec" % (t1 - t0)) ax = fig.add_subplot(2, 5, 10) plt.scatter(Y[:, 0], Y[:, 1], cmap=plt.cm.Spectral) plt.title("t-SNE (%.2g sec)" % (t1 - t0)) ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) plt.axis('tight') res['TSNE'] = { 'x': Y[:, 0].tolist(), 'y': Y[:, 1].tolist(), 'ids': input['ids'], 'matInfo': input['matInfo'] } # plt.show() # return plt # np.savetxt('X.csv', X) # np.savetxt('Y.csv', Y) # res = { # 'x': Y[:,0], # 'y': Y[:,1] # } return res
#feature selection lasso_selector = linear_model.Lasso() lasso_selector.fit(X_train, y_train) print(lasso_selector.coef_) utils.plot_feature_importances(lasso_selector, X_train, 40) X_train1 = utils.select_features(lasso_selector, X_train) #reduce features for visualization utils.corr_heatmap(X_train1) lpca = decomposition.PCA(n_components=0.95) lpca.fit(X_train1) print(np.cumsum(lpca.explained_variance_ratio_)) pca_data = lpca.transform(X_train1) tsne = manifold.TSNE(n_components=3) tsne_data = tsne.fit_transform(pca_data) rutils.plot_data_3d_regression(tsne_data, y_train) #build model with regression machine learning algorithms scoring = metrics.make_scorer(log_rmse, greater_is_better=False) knn_estimator = neighbors.KNeighborsRegressor() knn_grid = {'n_neighbors': list(range(5, 15))} final_model = utils.grid_search_best_model(knn_estimator, knn_grid, pca_data, y_train, scoring=scoring) X_test = house3[house_train.shape[0]:]
X[:, c] = n_enc # 3. what does an embedding of all int values look like? print('embedding int values..') plt.figure(1) X_int = X[:, np.array(int_idx)] X_int = np.float64(X_int) #replace nan X_int[X_int != X_int] = 0 # 이부분이 이해가 안가네 nan을 0으로 바꾸는 부분인가? X_int -= np.min(X_int, axis=0) # 이부분은 generalize하는건가 X_int /= (.001 + np.max(X_int, axis=0) ) # 맥스값에 min value(0.001)을 더해서 이걸로 원래 값을 나눠버린다 역시 generalization tsne = manifold.TSNE(n_components=2, init='pca') Y_int = tsne.fit_transform(X_int) plt.scatter(Y_int[len(X1):, 0], Y_int[len(X1):, 1], marker='.', label='test') sp = plt.scatter(Y_int[:len(X1), 0], Y_int[:len(X1), 1], c=y1, label='train') plt.legend(prop={'size': 6}) plt.colorbar(sp) plt.title('t-SNE embedding of int variables') plt.savefig('t-SNE_int.png') plt.show() # 4: what does an embedding of all string values look like? print('embedding string values...') plt.figure(2) X_str = X[:, np.array(cat_idx)] # replace nan
def plot_tsne_result(X, y, n_components): positions = [] errors = [] def _gradient_descent(objective, p0, it, n_iter, n_iter_check=1, n_iter_without_progress=300, momentum=0.8, learning_rate=200.0, min_gain=0.01, min_grad_norm=1e-7, verbose=0, args=None, kwargs=None): if args is None: args = [] if kwargs is None: kwargs = {} p = p0.copy().ravel() update = np.zeros_like(p) gains = np.ones_like(p) error = np.finfo(np.float).max best_error = np.finfo(np.float).max best_iter = i = it tic = time() for i in range(it, n_iter): positions.append(p.copy()) error, grad = objective(p, *args, **kwargs) errors.append(error) grad_norm = linalg.norm(grad) inc = update * grad < 0.0 dec = np.invert(inc) gains[inc] += 0.2 gains[dec] *= 0.8 np.clip(gains, min_gain, np.inf, out=gains) grad *= gains update = momentum * update - learning_rate * grad p += update if (i + 1) % n_iter_check == 0: toc = time() duration = toc - tic tic = toc if verbose >= 2: print("[t-SNE] Iteration %d: error = %.7f," " gradient norm = %.7f" " (%s iterations in %0.3fs)" % (i + 1, error, grad_norm, n_iter_check, duration)) if error < best_error: best_error = error best_iter = i elif i - best_iter > n_iter_without_progress: if verbose >= 2: print("[t-SNE] Iteration %d: did not make any progress " "during the last %d episodes. Finished." % (i + 1, n_iter_without_progress)) break if grad_norm <= min_grad_norm: if verbose >= 2: print("[t-SNE] Iteration %d: gradient norm %f. Finished." % (i + 1, grad_norm)) break return p, error, i D = pairwise_distances(X, squared=True) P_binary = _joint_probabilities(D, 30., False) P_binary_s = squareform(P_binary) positions.clear() errors.clear() manifold.t_sne._gradient_descent = _gradient_descent manifold.TSNE(n_components=n_components, random_state=100).fit_transform(X) if n_components == 3: X_iter = np.dstack(position.reshape(-1, 3) for position in positions) elif n_components == 2: X_iter = np.dstack(position.reshape(-1, 2) for position in positions) cmap = sns.light_palette("blue", as_cmap=True) fig = plt.figure(figsize=(12, 12)) if X.shape[1] == 3: ax1 = fig.add_subplot(3, 4, 1, projection='3d') plot_data_3d_classification(X, y, ax=ax1, new_window=False, title="Original Data") elif X.shape[1] == 2: ax1 = fig.add_subplot(3, 4, 1) plot_data_2d_classification(X, y, ax=ax1, new_window=False, title="Original Data") ax2 = fig.add_subplot(3, 4, 2) plot_distance_matrix(P_binary_s, ax2, cmap, 'Pairwise Similarities') iter_size = int(len(positions) / 5) k = 2 for i in range(5): iter_index = i * iter_size tmp = X_iter[..., iter_index] err = round(errors[iter_index], 2) title = "Iter: " + str(iter_index) + " Loss:" + str(err) k = k + 1 if X_iter.shape[1] == 3: ax3 = fig.add_subplot(3, 4, k, projection='3d') plot_data_3d_classification(tmp, y, ax=ax3, new_window=False, title=title) elif X_iter.shape[1] == 2: ax3 = fig.add_subplot(3, 4, k) plot_data_2d_classification(tmp, y, ax=ax3, new_window=False, title=title) k = k + 1 ax4 = fig.add_subplot(3, 4, k) n = 1. / (pdist(tmp, "sqeuclidean") + 1) Q = n / (2.0 * np.sum(n)) Q = squareform(Q) plot_distance_matrix(Q, ax4, cmap, title=title) plt.subplots_adjust(wspace=0.1, hspace=0.5)
title=title.format(e), show=False) # ----------------------------------------------- # Transfrom using xdawn data_1 = xdawn.transform(epochs_1)[:, :n_components] data_2 = xdawn.transform(epochs_2)[:, :n_components] # Get data X, y, z = epochs_get_MVPA_data([epochs_1, epochs_2]) Xd = np.concatenate([data_1, data_2], axis=0) X.shape, Xd.shape, y.shape, z.shape # ----------------------------------------------- # Calculate TSNE manifold vectorizer = mne.decoding.Vectorizer() tsne = manifold.TSNE(n_components=2, n_jobs=n_jobs) X2 = tsne.fit_transform(vectorizer.fit_transform(Xd)) X2.shape # ----------------------------------------------- # Plot in TSNE manifold plt.style.use('ggplot') fig, ax = plt.subplots(1, 1, figsize=(10, 10)) yy = y + z * 10 for j in np.unique(yy): print(j) ax.scatter(X2[yy == j, 0], X2[yy == j, 1], alpha=0.5, label=j) ax.legend() drawer.fig = fig # -----------------------------------------------
def __init__(self, vis=None): self.vis = vis self.tsne = manifold.TSNE(n_components=2, init='pca', random_state=0)
X_all = o_data.df[o_config.free_parameter_names + o_config.qoi_names] X_all_scaled = preprocessing.scale(X_param) mds_n_components = 2 mds_all = MDS(n_components = 2,max_iter=1000, n_init=1) Y_all = mds_all.fit_transform(X_all_scaled) ax[2].scatter(Y_all[:,0],Y_all[:,1],s=1,c='black') elif manifold_learn_config['manifold_type'] == 'tsne': manifold_learn_config['config'] = OrderedDict() manifold_learn_config['config']['n_components'] = 2 manifold_learn_config['config']['init'] = 'pca' manifold_learn_config['config']['random_state'] = 0 print('parameter_analysis') X_param = o_data.df[o_config.free_parameter_names] Y_param = manifold.TSNE(**manifold_learn_config['config']).fit_transform(X_param) print(X_param.shape) print(Y_param.shape) ax[0].scatter(Y_param[:,0],Y_param[:,1],s=1,c='black') print('qoi_analysis') X_qoi = o_data.df[o_config.qoi_names] Y_qoi = manifold.TSNE(**manifold_learn_config['config']).fit_transform(X_qoi) print(X_qoi.shape) print(Y_qoi.shape) ax[1].scatter(Y_qoi[:,0],Y_qoi[:,1],s=1,c='black') print('parameter+qoi') X_all = o_data.df[o_config.free_parameter_names + o_config.qoi_names] Y_all = manifold.TSNE(**manifold_learn_config['config']).fit_transform(X_all) print(X_all.shape)
# Scale and visualize the embedding vectors def plot_embedding(X, y, title=None): x_min, x_max = np.min(X, 0), np.max(X, 0) X = (X - x_min) / (x_max - x_min) plt.figure() ax = plt.subplot(111) for i in range(X.shape[0]): plt.text(X[i, 0], X[i, 1], y[i, 0], color=plt.cm.Set1(m[y[i, 0]] / 21.), fontdict={ 'weight': 'bold', 'size': 9 }) if title is not None: plt.title(title) # t-SNE embedding of the digit utterances print("Computing t-SNE embedding") tsne = manifold.TSNE() X_tsne = tsne.fit_transform(X) plot_embedding(X_tsne, y, "t-SNE 2D embedding of English and Spanish digit utterances") plt.savefig('tsne.png')
import matplotlib.colors as colors import matplotlib.cm as cmx import matplotlib as mpl matrix1 = gensim.matutils.corpus2dense(p, num_terms=4) matrix3=matrix1.T matrix3 from sklearn import manifold, datasets, decomposition, ensemble,discriminant_analysis, random_projection def norm(x): return (x-np.min(x))/(np.max(x)-np.min(x)) X=norm(matrix3) tsne = manifold.TSNE(n_components=2, init='pca', random_state=0,perplexity=50,verbose=1,n_iter=1500) X_tsne = tsne.fit_transform(X) ### WORK HERE - COMO DESCOBRI QUE TINHA 3 CLUSTERS ???? SORT X_tsne ## DEFINE K-MEANS plt.hist(X_tsne) from sklearn.cluster import KMeans model3=KMeans(n_clusters=4,random_state=0) model3.fit(X_tsne) cc=model3.predict(X_tsne) ## ALSO TRY COM X PARA VER QUE TOPICO SELECIONA tokens2 = word_tokenize(str(sentences2))
def __init__(self, components=[0, 1]): if components is None: raise Exception("Component error.") self.components = components self.tsne = manifold.TSNE(n_components=max(components) + 1, init='pca')
def train_kmeans_tsne(train_dataloader, test_dataloader, autoencoder, Maxepoch): # We set criterion : L1 loss (or Mean Absolute Error, MAE) criterion = nn.MSELoss() #nn.L1Loss() optimizer = optim.Adam(autoencoder.parameters(), lr=0.001) # Now, we train 20 epochs. ''' for epoch in range(Maxepoch): cumulate_loss = 0 for x in train_dataloader: latent, reconstruct = autoencoder(x) loss = criterion(reconstruct, x) optimizer.zero_grad() loss.backward() optimizer.step() cumulate_loss = loss.item() * x.shape[0] print(f'Epoch { "%03d" % epoch }: Loss : { "%.5f" % (cumulate_loss / trainX.shape[0])}') ''' autoencoder1 = torch.load('model_best.pkl') #autoencoder2= torch.load('model_best2.pkl') # Collect the latents and stdardize it. latents = [] reconstructs = [] for x in test_dataloader: latent1, reconstruct1 = autoencoder1(x) #latent2, reconstruct2 = autoencoder2(x) latent = latent1 #(latent1+latent2)/2 reconstruct = reconstruct1 #(reconstruct1+reconstruct2)/2 latents.append(latent.cpu().detach().numpy()) reconstructs.append(reconstruct.cpu().detach().numpy()) reconstructs = np.concatenate(reconstructs, axis=0) reconstructs = np.transpose(reconstructs, (0, 2, 3, 1)) latents = np.concatenate(latents, axis=0).reshape([9000, -1]) latents = (latents - np.mean(latents, axis=0)) / np.std(latents, axis=0) # Use PCA to lower dim of latents and use K-means to clustering. #print(latents.shape) #latents = PCA(n_components=32).fit_transform(latents) #latents =RandomTreesEmbedding(n_jobs=-1).fit(latents).labels_ print("TSNE") # what the hell is tsne latents = PCA(n_components=32).fit_transform(latents) tsne = manifold.TSNE(n_components=2, init='pca', random_state=8700) latents = tsne.fit_transform(latents) torch.save(autoencoder, 'model.pkl') # 保存整个网络 ''' latents = TSNE(n_components =2).fit_transform(latents) #random_state=8700 print(latents.shape) ''' result = KMeans(n_clusters=2).fit(latents).labels_ print("KMeans") print(latents.shape) #result = MeanShift(bandwidth=2).fit(latents).labels_ # We know first 5 labels are zeros, it's a mechanism to check are your answers # need to be flipped or not. if np.sum(result[:5]) >= 3: result = 1 - result return latents, result
def reduce_to_2D(X, Y): color = Y n_neighbors = 10 n_components = 2 fig = plt.figure(figsize=(15, 8)) plt.suptitle('Manifold Learning with %i points, %i neighbors' % (len(X), n_neighbors), fontsize=14) methods = ['standard', 'ltsa', 'hessian', 'modified'] labels = ['LLE', 'LTSA', 'Hessian LLE', 'Modified LLE'] for i, method in enumerate(methods): t0 = time() Y = manifold.LocallyLinearEmbedding(n_neighbors, n_components, eigen_solver='auto', method=method).fit_transform(X) t1 = time() print('%s: %.2g sec' % (methods[i], t1 - t0)) ax = fig.add_subplot(252 + i) plt.scatter(Y[:, 0], Y[:, 1], c=color, cmap=plt.cm.Spectral) plt.title('%s (%.2g sec)' % (labels[i], t1 - t0)) ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) plt.axis('tight') t0 = time() Y = manifold.Isomap(n_neighbors, n_components).fit_transform(X) t1 = time() print('Isomap: %.2g sec' % (t1 - t0)) ax = fig.add_subplot(257) plt.scatter(Y[:, 0], Y[:, 1], c=color, cmap=plt.cm.Spectral) plt.title('Isomap (%.2g sec)' % (t1 - t0)) ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) plt.axis('tight') t0 = time() mds = manifold.MDS(n_components, max_iter=100, n_init=1) Y = mds.fit_transform(X) t1 = time() print('MDS: %.2g sec' % (t1 - t0)) ax = fig.add_subplot(258) plt.scatter(Y[:, 0], Y[:, 1], c=color, cmap=plt.cm.Spectral) plt.title('MDS (%.2g sec)' % (t1 - t0)) ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) plt.axis('tight') t0 = time() se = manifold.SpectralEmbedding(n_components=n_components, n_neighbors=n_neighbors) Y = se.fit_transform(X) t1 = time() print('SpectralEmbedding: %.2g sec' % (t1 - t0)) ax = fig.add_subplot(259) plt.scatter(Y[:, 0], Y[:, 1], c=color, cmap=plt.cm.Spectral) plt.title('SpectralEmbedding (%.2g sec)' % (t1 - t0)) ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) plt.axis('tight') t0 = time() tsne = manifold.TSNE(n_components=n_components, init='pca', random_state=0) Y = tsne.fit_transform(X) t1 = time() print('t-SNE: %.2g sec' % (t1 - t0)) ax = fig.add_subplot(2, 5, 10) plt.scatter(Y[:, 0], Y[:, 1], c=color, cmap=plt.cm.Spectral) plt.title('t-SNE (%.2g sec)' % (t1 - t0)) ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) plt.axis('tight') plt.show()
def getTSNE(): return manifold.TSNE()
n_components = 3 perplexity = 50 NUM_COLORS = 60 cm = pylab.get_cmap('gist_rainbow') if __name__ == "__main__": X, y = load_data() #print(X.shape) print(len(X)) for n_components in range(2, 4): for perplexity in range(20, 50, 5): for random_state in range(0, 2): t0 = time() tsne = manifold.TSNE(n_components=n_components, init='pca', random_state=random_state, perplexity=perplexity) X_ = tsne.fit_transform(X) print(len(X_)) print(len(X_[0])) np.save("data/tsne", X_) t1 = time() # 修改 print("t-SNE: %.2g sec" % (t1 - t0)) x_min, x_max = X_.min(0), X_.max(0) X_norm = (X_ - x_min) / (x_max - x_min) # 归一化 plt.figure(figsize=(8, 8)) for i in range(X_norm.shape[0]): #plt.text(X_norm[i, 0], X_norm[i, 1], str(y[i]), color=plt.cm.Set1(y[i]), # fontdict={'weight': 'bold', 'size': 9}) plt.scatter(X_norm[i, 0], X_norm[i, 1],
def plot_tsne(model, df, words=None, vectors=None, target_subfeatures_mask=None, perplexity=5, title="TSNE", colour=False, n_components=2): if model is not None: words = [] vectors = [] for word in model.wv.vocab.keys(): words.append(word) vectors.append(model.wv.word_vec(word)) if colour: cols = df.columns cmap = plt.get_cmap('viridis') clrs = cmap(np.linspace(0, 1, len(cols))) clr_dict = {} clrs_points = [] for i, c in enumerate(cols): unique_col_vals = df[c].unique() for uv in unique_col_vals: clr_dict[uv] = clrs[i] for w in words: clrs_points.append(clr_dict[w]) tsne = manifold.TSNE(n_components=n_components, init='pca', random_state=10, method='exact', perplexity=perplexity) Y = tsne.fit_transform(vectors) plt.figure(figsize=(18, 12)) if target_subfeatures_mask is None: target_subfeatures_mask = np.array([False] * Y.shape[0]) plt.rcParams.update({'font.size': 14}) # set everything to this font size marker_size = plt.rcParams['lines.markersize']**2 * 5 if colour: plt.scatter(Y[~target_subfeatures_mask, 0], Y[~target_subfeatures_mask, 1], c=clrs_points, s=marker_size) else: plt.scatter(Y[target_subfeatures_mask, 0], Y[target_subfeatures_mask, 1], c='red', s=marker_size) plt.scatter(Y[~target_subfeatures_mask, 0], Y[~target_subfeatures_mask, 1], c='blue', s=marker_size) for i, (label) in enumerate(words): plt.annotate(label, (Y[i, 0], Y[i, 1]), fontsize=22) plt.title(title, fontsize=26) plt.xlabel("Dimension 1", fontsize=26) plt.ylabel("Dimension 2", fontsize=26) plt.xticks(fontsize=24) plt.yticks(fontsize=24) plt.savefig("tsne_2_components") plt.show()
# we will take the maximum over the H and W dimensions image_features = torch.mean(torch.mean(image_features, dim=-1), dim=-1) # Finally, we can store the computed CNN features images_projected_cnn[i, :] = image_features.cpu() # # Applying t-SNE # # sklearn provides us with a nice t-SNE implementation and good documentation # https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html # https://scikit-learn.org/stable/auto_examples/manifold/plot_t_sne_perplexity.html#sphx-glr-auto-examples-manifold-plot-t-sne-perplexity-py from sklearn import manifold # Get t-SNE model tsne = manifold.TSNE(random_state=1) # Fit (=train) model on our data images_projected_tsne = tsne.fit_transform(images_projected_cnn) print(f"t-SNE projected our data to shape {images_projected_tsne.shape}") # Plot the result fig, ax = plt.subplots() ax.scatter(x=images_projected_tsne[:, 0], y=images_projected_tsne[:, 1], c=point_colors, s=0.1, cmap='nipy_spectral') ax.set_xlabel('x') ax.set_ylabel('y') ax.grid(True) fig.savefig('tsne.png', dpi=250) fig.savefig('tsne_large.png', dpi=500) fig.savefig('tsne.svg')
def run_expt(X, color, expt_name): n_neighbors = 10 n_components = 2 # Create figure fig = plt.figure(figsize=(15, 8)) #fig = plt.figure() #fig.suptitle("Manifold Learning with %i points, %i neighbors" # % (1000, n_neighbors), fontsize=14) # Add 3d scatter plot #ax = fig.add_subplot(251, projection='3d') ax = fig.add_subplot(111, projection='3d') ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, cmap=plt.cm.Spectral) ax.view_init(4, -72) ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) ax.zaxis.set_major_formatter(NullFormatter()) ax.axis('tight') ttl = '{}-data'.format(expt_name) ax.set_title(ttl) save_fig('{}.pdf'.format(ttl)) plt.show() # Set-up manifold methods LLE = partial(manifold.LocallyLinearEmbedding, n_neighbors, n_components, eigen_solver='auto') methods = OrderedDict() methods['Isomap'] = manifold.Isomap(n_neighbors, n_components) methods['PCA'] = decomposition.TruncatedSVD(n_components=n_components) methods['LLE'] = LLE(method='standard') #methods['LTSA'] = LLE(method='ltsa') #methods['Hessian LLE'] = LLE(method='hessian') #methods['Modified LLE'] = LLE(method='modified') methods['MDS'] = manifold.MDS(n_components, max_iter=100, n_init=1) methods['SE'] = manifold.SpectralEmbedding(n_components=n_components, n_neighbors=n_neighbors) methods['t-SNE'] = manifold.TSNE(n_components=n_components, init='pca', random_state=0) methods['kPCA'] = decomposition.KernelPCA(n_components=n_components, kernel='rbf') # Plot results for i, (label, method) in enumerate(methods.items()): t0 = time() Y = method.fit_transform(X) t1 = time() print("%s: %.2g sec" % (label, t1 - t0)) fig = plt.figure() # ax = fig.add_subplot(2, 5, 2 + i + (i > 3)) ax = fig.add_subplot(111) ax.scatter(Y[:, 0], Y[:, 1], c=color, cmap=plt.cm.Spectral) #ax.set_title("%s (%.2g sec)" % (label, t1 - t0)) ttl = '{}-{}'.format(expt_name, label) ax.set_title(ttl) ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) ax.axis('tight') save_fig('{}.pdf'.format(ttl)) plt.show()
def testModelOnOneDataset(loaded_weights, test_data_dir): # loaded_weights = '/media/wz209/a29353b7-1090-433f-b452-b4ce827adb17/sugurs/PythonProjects/pem_multi/Expirements/1.ProposedModel/patch/backup/IncepResV2-Adam/1/weights-improvement-15-0.8322.hdf5' # test_data_dir = '/media/wz209/a29353b7-1090-433f-b452-b4ce827adb17/sugurs/PythonProjects/pem_multi/dataset/patch/1' base_model = load_model(loaded_weights) # print(base_model.summary()) partial_model = Model(input=base_model.input, output=base_model.get_layer('avg_pool').output) # partial_model = Model(input=base_model.input, output=base_model.get_layer('activation_208').output) # 读取TPC和LTPC病人的序列号 with open('dict19.pkl', 'rb') as f: samples = cPickle.load(f) """ 对图片的评估 """ img_list = [] test_images = [] test_labels = [] test_features = [] for fpathe, dirs, fs in os.walk(test_data_dir): for f in fs: file = os.path.join(fpathe, f) img_list.append(file) for file in tqdm(img_list): x = image.load_img(file, target_size=(img_width, img_height)) x = image.img_to_array(x) x /= 255 x = x.tolist() test_images.append(x) lch_in_file = file.split('/')[-1].split('-')[0][:-1] for each in samples: if lch_in_file == each['lch']: test_features.append(each['feature19']) test_labels.append(each['label_bl']) break test_labels = np.array(test_labels) y_tsne = [] for yy in test_labels.tolist(): if yy == 0: y_tsne.append([1, 0]) elif yy == 1: y_tsne.append([0, 1]) LTPC_lch = [] TPC_lch = [] all_patient_pred_list = [] all_patient_truelabel_list = [] all_patient_in_one_pred_list = [] all_patient_in_one_truelabel_list = [] for each in samples: if each['label_bl'] == 0: LTPC_lch.append(each['lch']) elif each['label_bl'] == 1: TPC_lch.append(each['lch']) # 先检查LTPC的临床编号 all_cnt_ltpc = 0 all_pics_ltpc = 0 for each in LTPC_lch: for person in samples: if person['lch'] == each: features19 = person['feature19'] one_patient_pic_list = [] one_patient_test_res_list = [] for fpathe, dirs, fs in os.walk(test_data_dir): for f in fs: file = os.path.join(fpathe, f) lch_in_file = file.split('/')[-1].split('-')[0][:-1] if each == lch_in_file: one_patient_pic_list.append(file) if len(one_patient_pic_list) == 0: continue all_cnt_ltpc += 1 for i in one_patient_pic_list: x = image.load_img(i, target_size=(img_width, img_height)) x = image.img_to_array(x) x /= 255 x = x.tolist() image_input = np.array([x]) all_pics_ltpc += 1 bingshi_input = np.array([features19]) result = partial_model.predict(image_input).tolist()[0] one_patient_test_res_list.append(result) # 以图片为单位 all_patient_pred_list += one_patient_test_res_list all_patient_truelabel_list += np.zeros( len(one_patient_test_res_list)).tolist() # 以病人为单位 # print one_patient_test_res_list myArray = np.array(one_patient_test_res_list) # print myArray # print np.sum(myArray, axis=0) ret = (np.sum(myArray, axis=0) / len(one_patient_test_res_list)).tolist() all_patient_in_one_pred_list += [ret] all_patient_in_one_truelabel_list += [0] # 先检查TPC的临床编号 all_cnt_tpc = 0 all_pics_tpc = 0 for each in TPC_lch: for person in samples: if person['lch'] == each: features19 = person['feature19'] one_patient_pic_list = [] one_patient_test_res_list = [] for fpathe, dirs, fs in os.walk(test_data_dir): for f in fs: file = os.path.join(fpathe, f) lch_in_file = file.split('/')[-1].split('-')[0][:-1] if each == lch_in_file: one_patient_pic_list.append(file) if len(one_patient_pic_list) == 0: continue all_cnt_tpc += 1 for i in one_patient_pic_list: x = image.load_img(i, target_size=(img_width, img_height)) x = image.img_to_array(x) x /= 255 x = x.tolist() image_input = np.array([x]) all_pics_tpc += 1 bingshi_input = np.array([features19]) result = partial_model.predict(image_input).tolist()[0] one_patient_test_res_list.append(result) # 以图片为单位 all_patient_pred_list += one_patient_test_res_list all_patient_truelabel_list += np.ones( len(one_patient_test_res_list)).tolist() # 以病人为单位 myArray = np.array(one_patient_test_res_list) ret = (np.sum(myArray, axis=0) / len(one_patient_test_res_list)).tolist() all_patient_in_one_pred_list += [ret] all_patient_in_one_truelabel_list += [1] x_tsne = np.array(all_patient_pred_list) y_tsne = np.array(all_patient_truelabel_list) print x_tsne.shape print len(y_tsne) print('num of class is %d' % len(set(y_tsne))) # tsne = manifold.TSNE(n_components=2, init='random', random_state=0, perplexity=100) tsne = manifold.TSNE(n_components=2, init='pca', random_state=0) x_tsne = tsne.fit_transform(x_tsne) colors = ['r', 'b'] target_names = range(2) x_min, x_max = np.min(x_tsne, 0), np.max(x_tsne, 0) x_tsne = (x_tsne - x_min) / (x_max - x_min) plt.figure() for (i, color, target) in zip(target_names, colors, target_names): plt.scatter(x_tsne[y_tsne == i, 0], x_tsne[y_tsne == i, 1], c=color, label=target, s=2, lw=1) # plt.show() plt.savefig('./1.png', dpi=330) # # **×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*×*× x_tsne = np.array(all_patient_in_one_pred_list) y_tsne = np.array(all_patient_in_one_truelabel_list) print x_tsne.shape print len(y_tsne) print('num of class is %d' % len(set(y_tsne))) # tsne = manifold.TSNE(n_components=2, init='random', random_state=0, perplexity=100) tsne = manifold.TSNE(n_components=2, init='pca', random_state=0) x_tsne = tsne.fit_transform(x_tsne) colors = ['r', 'b'] target_names = range(2) x_min, x_max = np.min(x_tsne, 0), np.max(x_tsne, 0) x_tsne = (x_tsne - x_min) / (x_max - x_min) plt.figure() for (i, color, target) in zip(target_names, colors, target_names): plt.scatter(x_tsne[y_tsne == i, 0], x_tsne[y_tsne == i, 1], c=color, label=target, s=2, lw=1) # plt.show() plt.savefig('./2.png', dpi=330)
def tsne(X): return manifold.TSNE(n_components=n_components, init='pca', random_state=0).fit_transform(X)
def do_evaluation(config, qualitative_analysis=True, quantitative_analysis=True, verbose=0): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' tf.compat.v1.disable_eager_execution() Model_cls = getattr(sys.modules[__name__], config['model_cls']) Dataset_cls = getattr(sys.modules[__name__], config['dataset_cls']) batch_size = 1 data_sequence_length = None # Load validation dataset to fetch statistics. if issubclass(Dataset_cls, HandWritingDatasetConditional): validation_dataset = Dataset_cls( config['validation_data'], var_len_seq=True, use_bow_labels=config['use_bow_labels']) elif issubclass(Dataset_cls, HandWritingDataset): validation_dataset = Dataset_cls(config['validation_data'], var_len_seq=True) else: raise Exception("Unknown dataset class.") strokes = tf.compat.v1.placeholder(tf.float32, shape=[ batch_size, data_sequence_length, sum(validation_dataset.input_dims) ]) targets = tf.compat.v1.placeholder(tf.float32, shape=[ batch_size, data_sequence_length, sum(validation_dataset.target_dims) ]) sequence_length = tf.compat.v1.placeholder(tf.int32, shape=[batch_size]) # Create inference graph. with tf.name_scope("validation"): inference_model = Model_cls(config, reuse=False, input_op=strokes, target_op=targets, input_seq_length_op=sequence_length, input_dims=validation_dataset.input_dims, target_dims=validation_dataset.target_dims, batch_size=batch_size, mode="validation", data_processor=validation_dataset) inference_model.build_graph() inference_model.create_image_summary( validation_dataset.prepare_for_visualization) # Create sampling graph. with tf.name_scope("sampling"): model = Model_cls(config, reuse=True, input_op=strokes, target_op=None, input_seq_length_op=sequence_length, input_dims=validation_dataset.input_dims, target_dims=validation_dataset.target_dims, batch_size=batch_size, mode="sampling", data_processor=validation_dataset) model.build_graph() # Create a session object and initialize parameters. sess = tf.compat.v1.Session() # Restore computation graph. try: saver = tf.compat.v1.train.Saver() # Restore variables. if config['checkpoint_id'] is None: checkpoint_path = tf.train.latest_checkpoint(config['model_dir']) else: checkpoint_path = os.path.join(config['model_dir'], config['checkpoint_id']) print("Loading model " + checkpoint_path) saver.restore(sess, checkpoint_path) except: raise Exception("Model is not found.") if run_gmm_eval: from sklearn import manifold import matplotlib.pyplot as plt from matplotlib.ticker import NullFormatter gmm_mus, gmm_sigmas = model.evaluate_gmm_latent_space(sess) # We have ~70 components. Select a subset of them manually. gmm_component_ids = [2, 3, 11, 12, 13, 14, 15, 39, 40] gmm_legend_labels = ["1", "2", "a", "b", "c", "d", "e", "C", "D"] num_components = len(gmm_component_ids) size_components = gmm_mus.shape[1] gmm_samples = np.zeros( (num_components * gmm_num_samples, size_components)) gmm_sample_labels = np.zeros(num_components * gmm_num_samples) for comp_idx in range(num_components): epsilon = np.random.normal(0, 1, (gmm_num_samples, gmm_mus.shape[1])) gmm_samples[comp_idx * gmm_num_samples:comp_idx * gmm_num_samples + gmm_num_samples, :] = gmm_mus[ comp_idx] + gmm_sigmas[comp_idx] * epsilon gmm_sample_labels[comp_idx * gmm_num_samples:comp_idx * gmm_num_samples + gmm_num_samples] = np.ones( gmm_num_samples) * comp_idx # Creating a discrete colorbar colors = plt.cm.jet(np.linspace(0, 1, num_components)) Y = manifold.TSNE(n_components=2, init='pca', random_state=0).fit_transform(gmm_samples) fig = plt.figure(figsize=(15, 8)) ax = fig.add_subplot(1, 1, 1) current_plot_range = 0 previous_plot_range = 0 for i, c in enumerate(colors): previous_plot_range += current_plot_range current_plot_range = gmm_sample_labels[gmm_sample_labels == i].size plt.scatter( Y[previous_plot_range:previous_plot_range + current_plot_range, 0], Y[previous_plot_range:previous_plot_range + current_plot_range, 1], 20, lw=.25, marker='o', color=c, label=gmm_legend_labels[i], alpha=0.9, antialiased=True, zorder=3) ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) plt.legend() plt.axis('tight') plt.show() keyword_args = dict() keyword_args['conditional_inputs'] = None keyword_args['eoc_threshold'] = eoc_threshold keyword_args['cursive_threshold'] = cursive_threshold keyword_args['use_sample_mean'] = True if quantitative_analysis: pass if qualitative_analysis: print("Generating samples...") for real_img_idx in reference_sample_ids: _, stroke_model_input, _ = validation_dataset.fetch_sample( real_img_idx) stroke_sample = stroke_model_input[:, :, 0:3] if run_reconstruction or run_biased_sampling: inference_results = inference_model.reconstruct_given_sample( session=sess, inputs=stroke_model_input) if run_original_sample: svg_path = os.path.join( config['eval_dir'], "real_image_" + str(real_img_idx) + '.svg') visualize.draw_stroke_svg( validation_dataset.undo_normalization( validation_dataset.samples[real_img_idx], detrend_sample=False), factor=0.001, svg_filename=svg_path) if run_reconstruction: svg_path = os.path.join( config['eval_dir'], "reconstructed_image_" + str(real_img_idx) + '.svg') visualize.draw_stroke_svg( validation_dataset.undo_normalization( inference_results[0]['output_sample'][0], detrend_sample=False), factor=0.001, svg_filename=svg_path) if concat_ref_and_synthetic_samples: reference_sample_in_img = stroke_sample else: reference_sample_in_img = None # Conditional handwriting synthesis. for text_id, text in enumerate(conditional_texts): keyword_args['conditional_inputs'] = text if config.get('use_real_pi_labels', False) and isinstance( model, VRNNGMM): if run_biased_sampling: biased_sampling_results = model.sample_biased( session=sess, seq_len=seq_len, prev_state=inference_results[0]['state'], prev_sample=reference_sample_in_img, **keyword_args) save_name = 'synthetic_biased_ref(' + str( real_img_idx) + ')_(' + str(text_id) + ')' synthetic_sample = validation_dataset.undo_normalization( biased_sampling_results[0]['output_sample'][0], detrend_sample=False) if save_plots: plot_eval_details(biased_sampling_results[0], synthetic_sample, config['eval_dir'], save_name) # Without beautification: set False # Apply beautification: set True. keyword_args['use_sample_mean'] = True biased_sampling_results = model.sample_biased( session=sess, seq_len=seq_len, prev_state=inference_results[0]['state'], prev_sample=reference_sample_in_img, **keyword_args) save_name = 'synthetic_biased_sampled_ref(' + str( real_img_idx) + ')_(' + str(text_id) + ')' synthetic_sample = validation_dataset.undo_normalization( biased_sampling_results[0]['output_sample'][0], detrend_sample=False) if save_plots: plot_eval_details(biased_sampling_results[0], synthetic_sample, config['eval_dir'], save_name) if run_unbiased_sampling: unbiased_sampling_results = model.sample_unbiased( session=sess, seq_len=seq_len, **keyword_args) save_name = 'synthetic_unbiased_(' + str(text_id) + ')' synthetic_sample = validation_dataset.undo_normalization( unbiased_sampling_results[0]['output_sample'][0], detrend_sample=False) if save_plots: plot_eval_details(unbiased_sampling_results[0], synthetic_sample, config['eval_dir'], save_name) # Without beautification. keyword_args['use_sample_mean'] = True unbiased_sampling_results = model.sample_unbiased( session=sess, seq_len=seq_len, **keyword_args) save_name = 'synthetic_unbiased_sampled(' + str( text_id) + ')' synthetic_sample = validation_dataset.undo_normalization( unbiased_sampling_results[0]['output_sample'][0], detrend_sample=False) if save_plots: plot_eval_details(unbiased_sampling_results[0], synthetic_sample, config['eval_dir'], save_name) else: if run_biased_sampling: biased_sampling_results = model.sample_biased( session=sess, seq_len=seq_len, prev_state=inference_results[0]['state'], prev_sample=reference_sample_in_img) save_name = 'synthetic_biased_ref(' + str( real_img_idx) + ')_(' + str(text_id) + ')' synthetic_sample = validation_dataset.undo_normalization( biased_sampling_results[0]['output_sample'][0], detrend_sample=False) if save_plots: plot_eval_details(biased_sampling_results[0], synthetic_sample, config['eval_dir'], save_name) if run_unbiased_sampling: unbiased_sampling_results = model.sample_unbiased( session=sess, seq_len=seq_len) save_name = 'synthetic_unbiased_(' + str(text_id) + ')' synthetic_sample = validation_dataset.undo_normalization( unbiased_sampling_results[0]['output_sample'][0], detrend_sample=False) if save_plots: plot_eval_details(unbiased_sampling_results[0], synthetic_sample, config['eval_dir'], save_name) sess.close()
def compare_mds_tsne(stddev=0, metric='euclidean', tsne_init='pca', random_state=0): cmap = plt.cm.viridis n_points = 1000 # random_state = 0 X, sample_order = datasets.samples_generator.make_s_curve(n_points, random_state=0) if stddev > 0: X = X + np.random.normal(size=np.product(X.shape), scale=0.1).reshape( X.shape) fig = plt.figure(figsize=(12, 4)) plt.suptitle("Manifold Learning with %i points" % (n_points)) # Plot original data ax = fig.add_subplot(131, projection='3d') ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=sample_order, cmap=cmap) ax.view_init(4, -72) # Add noise if necessary n_components = 2 mds_kws = dict(n_components=n_components, random_state=random_state) tsne_kws = dict(init=tsne_init) tsne_kws.update(mds_kws) if metric != 'euclidean': X = scipy.spatial.distance.squareform( scipy.spatial.distance.pdist(X, metric=metric)) mds_kws['dissimliarity'] = 'precomputed' tsne_kws['metric'] = 'precomputed' print( 'FYI not initializing t-SNE with PCA since distances are precomputed' ) tsne_kws.pop('init') # Perform MDS and plot it t0 = time() mds = manifold.MDS(max_iter=100, n_init=1, **mds_kws) Y = mds.fit_transform(X) t1 = time() print("MDS: %.2g sec" % (t1 - t0)) ax = fig.add_subplot(1, 3, 2) plt.scatter(Y[:, 0], Y[:, 1], c=sample_order, cmap=cmap, linewidth=0.5, edgecolor='grey') plt.title("MDS (%.2g sec)" % (t1 - t0)) ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) plt.axis('tight') # Perform t-SNE and plot it t0 = time() tsne = manifold.TSNE(**tsne_kws) Y = tsne.fit_transform(X) t1 = time() print("t-SNE: %.2g sec" % (t1 - t0)) ax = fig.add_subplot(1, 3, 3) plt.scatter(Y[:, 0], Y[:, 1], c=sample_order, cmap=cmap, linewidth=0.5, edgecolor='grey') plt.title("t-SNE (%.2g sec)" % (t1 - t0)) ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) plt.axis('tight') plt.show()
def main(args): linearize = False if args.xtrans: period = 6 data = dset.XtransDataset(args.data_dir, transform=None, augment=False, linearize=linearize) else: period = 2 data = dset.BayerDataset(args.data_dir, transform=None, augment=False, linearize=linearize) loader = DataLoader(data, batch_size=args.batch_size, shuffle=True, num_workers=8) mask_viz = viz.BatchVisualizer("mask", env="demosaic_inspect") mos_viz = viz.BatchVisualizer("mosaic", env="demosaic_inspect") diff_viz = viz.BatchVisualizer("diff", env="demosaic_inspect") target_viz = viz.BatchVisualizer("target", env="demosaic_inspect") input_hist = viz.HistogramVisualizer("color_hist", env="demosaic_inspect") for sample in loader: mosaic = sample["mosaic"] mask = sample["mask"] pad = args.ksize // 2 dx = (pad - args.offset_x) % period dy = (pad - args.offset_y) % period print("dx {} dy {}".format(dx, dy)) mosaic = mosaic[..., dy:, dx:] mask = mask[..., dy:, dx:] def to_patches(arr): patches = arr.unfold(2, args.ksize, period).unfold(3, args.ksize, period) bs, c, h, w, _, _ = patches.shape patches = patches.permute(0, 2, 3, 1, 4, 5).contiguous() patches = patches.view(bs * h * w, c, args.ksize, args.ksize) return patches patches = to_patches(mosaic) bs = patches.shape[0] means = patches.view(bs, -1).mean(-1).view(bs, 1, 1, 1) std = patches.view(bs, -1).std(-1).view(bs, 1, 1, 1) print(means.min().item(), means.max().item()) patches -= means patches /= std + 1e-8 new_bs = 1024 idx = np.random.randint(0, patches.shape[0], (new_bs, )) patches = patches[idx] import torchlib.debug as D D.tensor(patches) flat = patches.view(new_bs, -1).cpu().numpy() nclusts = 16 clst = cluster.MiniBatchKMeans(n_clusters=nclusts) # clst.fit(flat) clst_idx = clst.fit_predict(flat) colors = np.random.uniform(size=(nclusts, 3)) manif = manifold.TSNE(n_components=2) new_coords = manif.fit_transform(flat) color = np.zeros((new_coords.shape[0], 3)) color = (colors[clst_idx, :] * 255).astype(np.uint8) print(color.shape) D.scatter(th.from_numpy(new_coords[:, 0]), th.from_numpy(new_coords[:, 1]), color=color, key="tsne") centers = th.from_numpy(clst.cluster_centers_).view( nclusts, 3, args.ksize, args.ksize) D.tensor(centers, "centers") for cidx in range(nclusts): idx = clst_idx == cidx p = th.from_numpy(patches.numpy()[idx]) D.tensor(p, key="cluster_{:02d}".format(cidx)) import ipdb ipdb.set_trace()
labels = np.load("data_window_labels.npy") print(X.columns.values) print(labels) print(np.where(labels == 'flow=From-Botne')[0][0]) y_bin6 = y == np.where(labels == 'flow=From-Botne')[0][0] X_train, X_test, y_train, y_test = model_selection.train_test_split( X, y_bin6, test_size=0.33, random_state=123456) print("y", np.unique(y, return_counts=True)) print("y_train", np.unique(X_train, return_counts=True)) print("y_test", np.unique(y_test, return_counts=True)) print("t-SNE") # Beware: this is very time-consuming clf = manifold.TSNE(n_components=2, random_state=123456) clf.fit( X[['Dport_nunique', 'TotBytes_sum', 'Dur_sum', 'Dur_mean', 'TotBytes_std']]) print(clf.embedding_) y_plot = np.where(y_bin6 == True)[0] print(len(y_plot)) y_plot2 = np.random.choice(np.where(y_bin6 == False)[0], size=len(y_plot) * 100, replace=False) print(len(y_plot2)) index = list(y_plot) + list(y_plot2)
pca = decomposition.TruncatedSVD(n_components=2) X_reduced = pca.fit_transform(X_transformed) plot_embedding( X_reduced, "Random forest embedding of the digits (time %.2fs)" % (time() - t0)) #---------------------------------------------------------------------- # Spectral embedding of the digits dataset print("Computing Spectral embedding") embedder = manifold.SpectralEmbedding(n_components=2, random_state=0, eigen_solver="arpack") t0 = time() X_se = embedder.fit_transform(X) plot_embedding(X_se, "Spectral embedding of the digits (time %.2fs)" % (time() - t0)) #---------------------------------------------------------------------- # t-SNE embedding of the digits dataset print("Computing t-SNE embedding") tsne = manifold.TSNE(n_components=2, init='pca', random_state=0) t0 = time() X_tsne = tsne.fit_transform(X) plot_embedding(X_tsne, "t-SNE embedding of the digits (time %.2fs)" % (time() - t0)) plt.show()
('scaler', preprocessing.StandardScaler())]) #build preprocessing pipeline for all features cat_features = utils.get_non_continuous_features(house_train1) num_features = utils.get_continuous_features(house_train1) preprocess_pipeline = compose.ColumnTransformer([ ('cat', categorical_pipeline, cat_features), ('num', numerical_pipeline, num_features) ]) viz_pipeline = pipeline.Pipeline([('preprocess', preprocess_pipeline), ('pca', decomposition.PCA(n_components=0.95)), ('tsne', manifold.TSNE(2))]) tsne_data = viz_pipeline.fit_transform(house_train1) rutils.plot_data_3d_regression(tsne_data, house_train['SalePrice']) #build feature selection pipeline features_pipeline = pipeline.FeatureUnion([ ('pca_selector', decomposition.PCA()), ('et_selector', feature_selection.SelectFromModel(ensemble.ExtraTreesClassifier())) ]) regressor = linear_model.Lasso() #build complete pipeline with feature selection and ml algorithms complete_pipeline = pipeline.Pipeline([ ('preprocess', preprocess_pipeline),
def test_with_tsne(model_path): load_state(model_path, model) model.eval() # 把module设置为评估模式,只对Dropout和BatchNorm模块有影响 test_loss = 0 correct = 0 data_all = Variable() first = True print('start test:') for data, target in test_loader: if args.cuda: data, target = data.cuda(), target.cuda() if first : data_all, target_all = Variable(data, volatile=True), Variable(target) data_all = data_all.view(-1, 784) data, target = Variable(data, volatile=True), Variable(target) data = data.view(-1, 784) data_all = torch.cat((data, data_all), 0) # print(data_all.size()) output, layer1_out, layer2_out, layer3_out, layer4_out = model(data) test_loss += F.nll_loss(output, target).data[0] # Variable.data if first: pred_all = output.data.max(1)[1] layer1_out_all = layer1_out layer2_out_all = layer2_out layer3_out_all = layer3_out layer4_out_all = layer4_out first = False pred = output.data.max(1)[1] # get the index of the max log-probability pred_all = torch.cat((pred, pred_all), 0) layer1_out_all = torch.cat((layer1_out, layer1_out_all), 0) layer2_out_all = torch.cat((layer2_out, layer2_out_all), 0) layer3_out_all = torch.cat((layer3_out, layer3_out_all), 0) layer4_out_all = torch.cat((layer4_out, layer4_out_all), 0) # print(pred_all.size()) correct += pred.eq(target.data).cpu().sum() # print(data_all.size()) # print(pred_all.size()) test_loss = test_loss test_loss /= len(test_loader) # loss function already averages over batch size print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( test_loss, correct, len(test_loader.dataset), 100. * correct / len(test_loader.dataset))) print("Computing t-SNE embedding") # tsne = manifold.TSNE(n_components=2, init='random', random_state=0) layer1_out_all = layer1_out_all.data.numpy() layer2_out_all = layer2_out_all.data.numpy() layer3_out_all = layer3_out_all.data.numpy() layer4_out_all = layer4_out_all.data.numpy() # data_all = pd.DataFrame(data_all, index=data_all[:, 0]), tsne = manifold.TSNE(n_components=2, init='pca', random_state=0) layer1_out_all_tsne = np.array(tsne.fit_transform(layer1_out_all))[:, np.newaxis, :] layer2_out_all_tsne = np.array(tsne.fit_transform(layer2_out_all))[:, np.newaxis, :] layer3_out_all_tsne = np.array(tsne.fit_transform(layer3_out_all))[:, np.newaxis, :] layer4_out_all_tsne = np.array(tsne.fit_transform(layer4_out_all))[:, np.newaxis, :] layerout_tsne = layer1_out_all_tsne layerout_tsne = np.concatenate((layerout_tsne, layer2_out_all_tsne), axis=1) layerout_tsne = np.concatenate((layerout_tsne, layer3_out_all_tsne), axis=1) layerout_tsne = np.concatenate((layerout_tsne, layer4_out_all_tsne), axis=1) np.save('layerout_tsne.npy', layerout_tsne) # layerout_tsne = np.load('layerout_tsne.npy') # print(layerout_tsne.shape) # tsne = pd.DataFrame(tsne.embedding_, index=data_all.index) # 转换数据格式 colors = ['red', 'm', 'cyan', 'blue', 'lime', 'lawngreen', 'lightcoral', 'lightyellow', 'mediumorchid', 'mediumpurple'] plt.figure(figsize=(10, 6)) print('start plot:') for i in range(len(colors)): px = [] py = [] px2 = [] py2 = [] for j in range(1000): if pred_all[j] == i : plt.plot(layerout_tsne[j,:,0], layerout_tsne[j,:,1]) # px.append(layerout_tsne[j, 0]) # py.append(layerout_tsne[j, 1]) # plt.scatter(px, py, s=20, c=colors[i], marker='o') # plt.scatter(px2, py2, s=20, c=colors[i], marker='v') # plt.legend(np.arange(0,5).astype(str)) plt.xticks([]) plt.yticks([]) # plt.savefig('C:/Users/Day/Desktop/PPT_report/Galaxy pic/Visualization/2/cnn1_train.png', dpi=300, bbox_inches='tight') plt.savefig('1.png', dpi=300, bbox_inches='tight') plt.show()
def __init__(self, source): min_max_scaler = preprocessing.MinMaxScaler() data_source = min_max_scaler.fit_transform(source) tsne = manifold.TSNE(n_components=2, init='pca', random_state=0) self.return_data = tsne.fit_transform(data_source)