#Scale every feature using min-max normalization and z-score standardization #Note that y is not scaled because it is categorical, but if it weren't, it would have to be scaled too x_norm = MinMaxScaler().fit_transform(x) x_std = StandardScaler().fit_transform(x) ''' ------------------------------------------------------------------------------- -------------------------Multi-Dimensional Scaling----------------------------- ------------------------------------------------------------------------------- ''' #Apply metric MDS, keeping n components < the number of original features #kernel choices: linear (default), poly (polynomial of degree=degree), rbf, sigmoid, cosine mds_model = MDS(n_components=2, metric=True, random_state=seed) mds_model.fit_transform(x_std) print(mds_model.get_params()) mds_dim = mds_model.embedding_ print(mds_dim.shape) #There should be 2 latent variables represented print('Stress:', mds_model.stress_) #Plot first 2 extracted features and the observation class plt.figure(figsize=(10, 5)) plt.xlabel('Latent Variable 1') plt.ylabel('Latent Variable 2') plt.title('Metric MDS 2-Dimension Plot with Observation Class') plt.scatter(mds_dim[:, 0], mds_dim[:, 1], c=y) plt.colorbar() plt.show() #Apply non-metric MDS, keeping n components < the number of original features #kernel choices: linear (default), poly (polynomial of degree=degree), rbf, sigmoid, cosine
import numpy as np from sklearn.manifold import MDS np.random.seed(0) #x = np.random.rand(4,3) x = np.array([[0, 1], [2, 3]]) model = MDS(n_components=2, metric=False) z = model.fit_transform(x) print(z) print(model.get_params()) #[[ 0.06135802 -0.01593326] # [-0.00745756 -0.2913253 ] # [-0.25541641 0.2224335 ] # [ 0.20151595 0.08482507]]
def main(args): outputdir = os.path.dirname(args.vectors) #winidx_path = os.path.join(outputdir, # 'cos-distance_' + os.path.basename(args.weights)) point_path = os.path.splitext(args.vectors)[0] + \ '_{0}_{1}d-points_it{2}_s{3}.txt'.format( args.algorithm, args.components, args.iteration, args.samples) fig_path = os.path.splitext(args.vectors)[0] + \ '_{0}_it{1}_s{2}.eps'.format(args.algorithm, args.iteration, args.samples) print('loading val...') val = utils.io.load_image_list(args.val) categories = utils.io.load_categories(args.categories) v = np.load(args.vectors) N = v.shape[0] d = v.shape[1] C = len(categories) NperC = N // C samples_per_c = args.samples random_order = np.random.permutation(NperC) selected_vectors = [] selected_images = [] Ys = [] for i in range(C): selected_vectors.extend( [v[i * NperC + ii] for ii in random_order[:samples_per_c]]) selected_images.extend( [val[i * NperC + ii][0] for ii in random_order[:samples_per_c]]) Ys.extend( [val[i * NperC + ii][1] for ii in random_order[:samples_per_c]]) #print(selected_vectors) #print(Ys) if args.algorithm == 'tsne': model = utils.TSNE(n_components=args.components, n_iter=args.iteration, n_iter_without_progress=args.preprocessdim, angle=args.angle, metric=args.metric) elif args.algorithm == 'mds': model = MDS(n_components=args.components, n_jobs=-1) elif args.algorithm == 'lle': model = LLE(n_components=args.components, n_neighbors=args.neighbors, n_jobs=-1) elif args.algorithm == 'isomap': model = Isomap(n_components=args.components, n_neighbors=args.neighbors, n_jobs=-1) elif args.algorithm == 'pca': model = PCA(n_components=args.components) #X = model.fit_transform(v[:23*10]) print('fitting...') X = model.fit_transform(np.array(selected_vectors)) Y = np.asarray([x[1] for x in val]) if args.algorithm == 'pca': pca = PCA(n_components=100) pca.fit(np.array(selected_vectors)) E = pca.explained_variance_ratio_ print "explained", E print "cumsum E", np.cumsum(E) print('drawing...') markers = ['o', 'x', 'v', '+'] if args.components == 2: plt.figure(2, figsize=(8, 6)) plt.clf() #plt.scatter(X[:, 0], X[:, 1], c=Y[:23*10], cmap=plt.cm.jet) #plt.scatter(X[:, 0], X[:, 1], c=np.array(Ys), cmap=plt.cm.jet, label=categories) for i in range(C): plt.scatter(X[samples_per_c * i:samples_per_c * (i + 1), 0], X[samples_per_c * i:samples_per_c * (i + 1), 1], marker=markers[i % len(markers)], s=10, color=plt.cm.jet(float(i) / (C - 1)), label=categories[i]) plt.xlabel(args.algorithm + '1') plt.ylabel(args.algorithm + '2') plt.legend(fontsize=10.25, scatterpoints=1, bbox_to_anchor=(1.05, 1.01), loc='upper left') plt.subplots_adjust(right=0.7) #plt.show() plt.savefig(fig_path) elif args.components == 3: from mpl_toolkits.mplot3d import Axes3D fig = plt.figure() ax = Axes3D(fig) ax.set_xlabel("X-axis") ax.set_ylabel("Y-axis") ax.set_zlabel("Z-axis") for i in range(C): ax.scatter(X[samples_per_c * i:samples_per_c * (i + 1), 0], X[samples_per_c * i:samples_per_c * (i + 1), 1], X[samples_per_c * i:samples_per_c * (i + 1), 2], marker=markers[i % len(markers)], s=10, c=plt.cm.jet(float(i) / (C - 1)), label=categories[i]) plt.show() print(model.get_params()) # save points with open(point_path, 'w') as fp: for path, t, p in zip(selected_images, Ys, X): fp.write("{0}\t{1}\t{2}\n".format(path, t, '\t'.join(map(str, p))))
corr_times_pctiles.append( permutation_test(time_corrmat, tails, seed=seed)) corr_times = np.array(corr_times) corr_times_pctiles = np.array(corr_times_pctiles) else: corr_times = None corr_times_pctiles = None # Use MDS to reduce the dimensionality of the correlation matrix seed = sum(map(ord, subj + "_mds")) mds = MDS(n_init=50, random_state=seed, dissimilarity="precomputed") mds.fit(1 - corrmat) mds_varexp = mds_variance_explained(corrmat, mds.embedding_) # Save out the results res = moss.Results(tails=tails, prefs=prefs, corrmat=corrmat, tail_corrs=tail_corrs, corr_pctile=corr_pctile, distance_thresh=distance_thresholds, corr_distance=corr_distance, corr_distance_pctiles=corr_distance_pctiles, corr_times=corr_times, corr_times_pctiles=corr_times_pctiles, mds_params=mds.get_params(), mds_coords=mds.embedding_, mds_varexp=mds_varexp) fname = "correlation_analysis/{}_{}_{}.pkz".format(subj, exp, roi) moss.save_pkl(fname, res)