#Scale every feature using min-max normalization and z-score standardization
#Note that y is not scaled because it is categorical, but if it weren't, it would have to be scaled too
x_norm = MinMaxScaler().fit_transform(x)
x_std = StandardScaler().fit_transform(x)
'''
-------------------------------------------------------------------------------
-------------------------Multi-Dimensional Scaling-----------------------------
-------------------------------------------------------------------------------
'''

#Apply metric MDS, keeping n components < the number of original features
#kernel choices: linear (default), poly (polynomial of degree=degree), rbf, sigmoid, cosine
mds_model = MDS(n_components=2, metric=True, random_state=seed)
mds_model.fit_transform(x_std)
print(mds_model.get_params())
mds_dim = mds_model.embedding_
print(mds_dim.shape)  #There should be 2 latent variables represented
print('Stress:', mds_model.stress_)

#Plot first 2 extracted features and the observation class
plt.figure(figsize=(10, 5))
plt.xlabel('Latent Variable 1')
plt.ylabel('Latent Variable 2')
plt.title('Metric MDS 2-Dimension Plot with Observation Class')
plt.scatter(mds_dim[:, 0], mds_dim[:, 1], c=y)
plt.colorbar()
plt.show()

#Apply non-metric MDS, keeping n components < the number of original features
#kernel choices: linear (default), poly (polynomial of degree=degree), rbf, sigmoid, cosine
예제 #2
0
import numpy as np
from sklearn.manifold import MDS

np.random.seed(0)
#x = np.random.rand(4,3)
x = np.array([[0, 1], [2, 3]])
model = MDS(n_components=2, metric=False)
z = model.fit_transform(x)
print(z)

print(model.get_params())

#[[ 0.06135802 -0.01593326]
# [-0.00745756 -0.2913253 ]
# [-0.25541641  0.2224335 ]
# [ 0.20151595  0.08482507]]
예제 #3
0
def main(args):
    outputdir = os.path.dirname(args.vectors)
    #winidx_path = os.path.join(outputdir,
    #    'cos-distance_' + os.path.basename(args.weights))
    point_path = os.path.splitext(args.vectors)[0] + \
        '_{0}_{1}d-points_it{2}_s{3}.txt'.format(
        args.algorithm, args.components, args.iteration, args.samples)
    fig_path = os.path.splitext(args.vectors)[0] + \
        '_{0}_it{1}_s{2}.eps'.format(args.algorithm, args.iteration, args.samples)

    print('loading val...')
    val = utils.io.load_image_list(args.val)
    categories = utils.io.load_categories(args.categories)

    v = np.load(args.vectors)
    N = v.shape[0]
    d = v.shape[1]
    C = len(categories)
    NperC = N // C

    samples_per_c = args.samples
    random_order = np.random.permutation(NperC)
    selected_vectors = []
    selected_images = []
    Ys = []
    for i in range(C):
        selected_vectors.extend(
            [v[i * NperC + ii] for ii in random_order[:samples_per_c]])
        selected_images.extend(
            [val[i * NperC + ii][0] for ii in random_order[:samples_per_c]])
        Ys.extend(
            [val[i * NperC + ii][1] for ii in random_order[:samples_per_c]])

    #print(selected_vectors)
    #print(Ys)
    if args.algorithm == 'tsne':
        model = utils.TSNE(n_components=args.components,
                           n_iter=args.iteration,
                           n_iter_without_progress=args.preprocessdim,
                           angle=args.angle,
                           metric=args.metric)
    elif args.algorithm == 'mds':
        model = MDS(n_components=args.components, n_jobs=-1)
    elif args.algorithm == 'lle':
        model = LLE(n_components=args.components,
                    n_neighbors=args.neighbors,
                    n_jobs=-1)
    elif args.algorithm == 'isomap':
        model = Isomap(n_components=args.components,
                       n_neighbors=args.neighbors,
                       n_jobs=-1)
    elif args.algorithm == 'pca':
        model = PCA(n_components=args.components)
    #X = model.fit_transform(v[:23*10])
    print('fitting...')
    X = model.fit_transform(np.array(selected_vectors))
    Y = np.asarray([x[1] for x in val])

    if args.algorithm == 'pca':
        pca = PCA(n_components=100)
        pca.fit(np.array(selected_vectors))
        E = pca.explained_variance_ratio_
        print "explained", E
        print "cumsum E", np.cumsum(E)

    print('drawing...')

    markers = ['o', 'x', 'v', '+']

    if args.components == 2:
        plt.figure(2, figsize=(8, 6))
        plt.clf()

        #plt.scatter(X[:, 0], X[:, 1], c=Y[:23*10], cmap=plt.cm.jet)
        #plt.scatter(X[:, 0], X[:, 1], c=np.array(Ys), cmap=plt.cm.jet, label=categories)

        for i in range(C):
            plt.scatter(X[samples_per_c * i:samples_per_c * (i + 1), 0],
                        X[samples_per_c * i:samples_per_c * (i + 1), 1],
                        marker=markers[i % len(markers)],
                        s=10,
                        color=plt.cm.jet(float(i) / (C - 1)),
                        label=categories[i])
        plt.xlabel(args.algorithm + '1')
        plt.ylabel(args.algorithm + '2')
        plt.legend(fontsize=10.25,
                   scatterpoints=1,
                   bbox_to_anchor=(1.05, 1.01),
                   loc='upper left')
        plt.subplots_adjust(right=0.7)
        #plt.show()
        plt.savefig(fig_path)
    elif args.components == 3:
        from mpl_toolkits.mplot3d import Axes3D
        fig = plt.figure()
        ax = Axes3D(fig)
        ax.set_xlabel("X-axis")
        ax.set_ylabel("Y-axis")
        ax.set_zlabel("Z-axis")
        for i in range(C):
            ax.scatter(X[samples_per_c * i:samples_per_c * (i + 1), 0],
                       X[samples_per_c * i:samples_per_c * (i + 1), 1],
                       X[samples_per_c * i:samples_per_c * (i + 1), 2],
                       marker=markers[i % len(markers)],
                       s=10,
                       c=plt.cm.jet(float(i) / (C - 1)),
                       label=categories[i])
        plt.show()

    print(model.get_params())
    # save points
    with open(point_path, 'w') as fp:
        for path, t, p in zip(selected_images, Ys, X):
            fp.write("{0}\t{1}\t{2}\n".format(path, t, '\t'.join(map(str, p))))
예제 #4
0
            corr_times_pctiles.append(
                permutation_test(time_corrmat, tails, seed=seed))
        corr_times = np.array(corr_times)
        corr_times_pctiles = np.array(corr_times_pctiles)
    else:
        corr_times = None
        corr_times_pctiles = None

    # Use MDS to reduce the dimensionality of the correlation matrix
    seed = sum(map(ord, subj + "_mds"))
    mds = MDS(n_init=50, random_state=seed, dissimilarity="precomputed")
    mds.fit(1 - corrmat)
    mds_varexp = mds_variance_explained(corrmat, mds.embedding_)

    # Save out the results
    res = moss.Results(tails=tails,
                       prefs=prefs,
                       corrmat=corrmat,
                       tail_corrs=tail_corrs,
                       corr_pctile=corr_pctile,
                       distance_thresh=distance_thresholds,
                       corr_distance=corr_distance,
                       corr_distance_pctiles=corr_distance_pctiles,
                       corr_times=corr_times,
                       corr_times_pctiles=corr_times_pctiles,
                       mds_params=mds.get_params(),
                       mds_coords=mds.embedding_,
                       mds_varexp=mds_varexp)
    fname = "correlation_analysis/{}_{}_{}.pkz".format(subj, exp, roi)
    moss.save_pkl(fname, res)