Example #1
0
 def get_embedding(self, points):
     n = self.wanted_dimensions
     try:
         embedding, errors = locally_linear_embedding(points,
                                                      n_neighbors=self.knn,
                                                      n_components=n,
                                                      eigen_solver='dense',
                                                      n_jobs=-1)
     except Exception:
         embedding, errors = locally_linear_embedding(points,
                                                      n_neighbors=self.knn,
                                                      n_components=n,
                                                      eigen_solver='dense',
                                                      n_jobs=-1)
     return (embedding, errors)
def test_singular_matrix():
    M = np.ones((10, 3))
    f = ignore_warnings
    with pytest.raises(ValueError):
        f(manifold.locally_linear_embedding(M, 2, 1,
                                            method='standard',
                                            eigen_solver='arpack'))
Example #3
0
 def improve_lle_alg(train_df, test_df=None, n_k=3, n_c=2, e_s='dense'):
     """
     输入:
         train_df 训练集; test_df 测试集; 均为 feature 数据, 无 label.
         n_k lle 选取近邻数, 对应 n_neighbors;
         n_c 选取嵌入数, 对应 n_components
         e_s 特征值求解算法, eigen_solver 默认为 dense, 必能找到求解方式, 详细请 help
     输出: 
         W 矩阵, 转化矩阵
         求解 W, 线性表示的误差项.
     """
     transform_array, error_result = manifold.locally_linear_embedding(X=train_df, n_neighbors=n_k,
                                                                       n_components=n_c, eigen_solver=e_s)
     # 数据类型转化
     train_data_transform_df = pd.DataFrame(transform_array)  # array to  df
     reduce_dim_matrix = np.matrix(transform_array)
     train_data_matrix = np.matrix(train_df.astype('float'))  # 是否存在逆??
     train_data_matrix_inv = train_data_matrix.getI()  # 求逆(不是方阵)
     # 计算出 W 矩阵
     w_transform_matrix = train_data_matrix_inv * reduce_dim_matrix
     # 判断是否输入测试集
     if test_df is None:  # 输出: 局部线性嵌入转化后的训练集,W 矩阵, 误差项
         return train_data_transform_df, w_transform_matrix, error_result
     test_data_transform_matrix = np.matrix(test_df) * w_transform_matrix
     test_data_transform_df = pd.DataFrame(test_data_transform_matrix)
     # 输出4个指标, 转化后的train, test, W 矩阵, 以及 误差项
     return train_data_transform_df, test_data_transform_df, w_transform_matrix, error_result
Example #4
0
 def test_locally_linear_embedding(self):
   np.random.seed(1234)
   pts = np.random.random((5, 3))
   expected = locally_linear_embedding(pts, 3, 1)[0]
   G = neighbor_graph(pts, k=3).barycenter_edge_weights(pts, copy=False)
   actual = G.locally_linear_embedding(num_dims=1)
   assert_signless_array_almost_equal(expected, actual)
Example #5
0
def perform_lle(X, d):
    X_r, err = locally_linear_embedding(X.toarray(),
                                        n_neighbors=12,
                                        n_components=d,
                                        random_state=32)
    print("Done. Reconstruction error: %g" % err)
    return X_r
    def _train(self):
        x = self._train_features
        y = self._train_outputs

        pipe = pipeline.Pipeline([
            ('drop', transformers.ColumnDropper(
                columns=(0, 3, 5, 14, 26, 35, 40, 65, 72, 95, 99, 104, 124)
            )),
            ('scale', preprocessing.StandardScaler(
                with_mean=True,
                with_std=True
            )),
            ('select', feature_selection.SelectPercentile(
                percentile=59,#59,
                score_func=feature_selection.mutual_info_classif
            )),
            ('select', feature_selection.SelectKBest(
                k=101,
                score_func=feature_selection.f_classif
            )),
            ('estim', manifold.locally_linear_embedding(
                x,
                n_neighbors=6,
                n_components=101,
                eigen_solver='auto',
                method='standard'

            )),
        ])

        pipe.fit_transform(x)
        self._model = pipe.predict
def main_LLE2():
    f3=open('projection vectors 1','w')
    f4=open('projection vectors 2','w')
    f5=open('projection vectors 3','w')
    f6=open('scatter matrix in 3D','w')
    k=40
    images=[]
    images,colors=load_images()
    matrix_images=matrix_build(images)
    newmatrix,squared_error=locally_linear_embedding(matrix_images,k,3,eigen_solver='auto')
    print 'squared_error='+repr(squared_error)
    print newmatrix.shape
    M1=np.dot(newmatrix.T,matrix_images)
    M2=np.linalg.inv(np.dot(matrix_images.T,matrix_images))
    P_matrix=(np.dot(M1,M2)).T
    print P_matrix.shape
    for x in range(len(P_matrix)):
        f3.write(repr(P_matrix[x][0])+'\n')
        f4.write(repr(P_matrix[x][1])+'\n')
        f5.write(repr(P_matrix[x][2])+'\n')
    xx=[]
    yy=[]
    zz=[]
    for x in range(len(newmatrix)):
        xx.append(newmatrix[x][0])
        yy.append(newmatrix[x][1])
        zz.append(newmatrix[x][2])
        f6.write(repr(newmatrix[x][0])+','+repr(newmatrix[x][1])+','+repr(newmatrix[x][2])+'\n')
    fig = plt.figure()
    ax = Axes3D(fig)
    ax.scatter(xx,yy,zz,c=colors)
    plt.show()
Example #8
0
def lle(input,finaldim):
    from sklearn.manifold import locally_linear_embedding
    import numpy
    if isinstance(input,numpy.ndarray)==False:
        input= input.todense()
    X_r, err = locally_linear_embedding(input, n_neighbors=12,  n_components=finaldim)
    # X_transformed = embedding.fit_transform(input.transpose())
    # return X_transformed,np.asarray(np.asmatrix(input)*np.asmatrix(X_transformed))
    # X_transformed = X_r.fit_transform(input)
    return [],X_r
Example #9
0
def network_to_es_df(network, labels):
	embedding = manifold.locally_linear_embedding(X=network, n_neighbors=5, n_components=2)
	embedded_x = embedding[0][:,0]
	embedded_y = embedding[0][:,1]
	embedded_df = pd.DataFrame()
	embedded_df['x'] = embedded_x
	embedded_df['y'] = embedded_y
	class_df = pd.DataFrame() 
	class_df['class'] = labels
	return embedded_df, class_df
Example #10
0
    def dim_down(self, method='tsne', ndim=2, rand_seed=6):
        """
        :param method: selected method of dimension reduction.
        :param ndim: number of retained dimensions.
        :param rand_seed: seed used by the random number generator.
        :return: embedding space with N cells * d feature.
        """

        X = self.data

        # http://scikit-learn.org/stable/modules/manifold.html
        if method == 'tsne' or method == 'TSNE':
            print(
                "Dimension reduction with t-stochastic neighbor embedding(tSNE).\n"
            )
            V = manifold.TSNE(n_components=ndim,
                              random_state=rand_seed,
                              init='pca').fit_transform(X)

        if method == 'lle' or method == 'LLE':
            print("Dimension reduction with locally_linear_embedding(LLE).\n")
            V, err = manifold.locally_linear_embedding(X,
                                                       n_neighbors=20,
                                                       n_components=ndim,
                                                       random_state=rand_seed,
                                                       method='modified')
        if method == 'mds' or method == 'MDS':
            print("Dimension reduction with Multidimensional scaling(MDS).\n")
            V = manifold.MDS(n_components=ndim,
                             random_state=rand_seed,
                             max_iter=100,
                             n_init=1).fit_transform(X)

        if method == 'se' or method == 'SE':
            print("Dimension reduction with Spectral Embedding(SE).\n")
            V = manifold.SpectralEmbedding(
                n_components=ndim, random_state=rand_seed).fit_transform(X)

        # http://scikit-learn.org/stable/modules/decomposition.html
        if method == 'ica' or method == 'ICA':
            print(
                "Matrix decomposition with Independent component analysis(FastICA).\n"
            )
            V = decomposition.FastICA(n_components=ndim,
                                      random_state=rand_seed).fit_transform(X)

        if method == 'pca' or method == 'PCA':
            print(
                "Matrix decomposition with Principal component analysis(PCA).\n"
            )
            V = decomposition.PCA(n_components=ndim,
                                  random_state=rand_seed).fit_transform(X)

        return V
def test_singular_matrix():
    M = np.ones((10, 3))
    f = ignore_warnings
    with pytest.raises(ValueError):
        f(
            manifold.locally_linear_embedding(
                M,
                n_neighbors=2,
                n_components=1,
                method="standard",
                eigen_solver="arpack",
            ))
Example #12
0
def preform_lle_on_dynamic_connectivity(input_path, output_path, brain_areas,
                                        pattern):
    """
    Computes the dynamic connectivity of brain areas with performing
    a locally linear embedding returning its matrix.

    :param input_path: path to input dir
    :type input_path: str
    :param output_path: path to output directory 
    :type output_path: str
    :param brain_areas: number of brain areas
    :type brain_areas: int
    :param pattern: pattern of input files
    :type pattern: str
    :return: LLE matrix, LLE matrix shape
    :rtype: np.ndarray, tuple
    """
    paths = return_paths_list(input_path, output_path, pattern=pattern)
    n_subjects = len(paths)
    array = np.genfromtxt(paths[0], delimiter=',')
    t_phases = array.shape[0]
    dFC = np.full((brain_areas, brain_areas), fill_value=0).astype(np.float64)
    lle_components = np.full((n_subjects, t_phases, (brain_areas * 2)),
                             fill_value=0).astype(np.float64)
    for n in tqdm(range(0, n_subjects)):
        phases = convert_to_phases(paths[n], output_path, brain_areas,
                                   t_phases, n)
        for t in range(0, t_phases):
            for i in range(0, brain_areas):
                for z in range(0, brain_areas):
                    if np.absolute(phases[i, t] - phases[z, t]) > np.pi:
                        dFC[i,
                            z] = np.cos(2 * np.pi - np.absolute(phases[i, t] -
                                                                phases[z, t]))
                    else:
                        dFC[i, z] = np.cos(
                            np.absolute(phases[i, t] - phases[z, t]))
            dfc_output = os.path.join(output_path, 'dFC')
            create_dir(dfc_output)
            np.savez(
                os.path.join(dfc_output, 'subject_{}_time_{}'.format(n, t)),
                dFC)
            lle, err = manifold.locally_linear_embedding(dFC,
                                                         n_neighbors=12,
                                                         n_components=2)
            with open(
                    os.path.join(output_path, 'LLE_error_{}_{}'.format(n, t)),
                    'w') as output:
                json.dump(err, output)
            lle_components[n, t, :] = np.squeeze(lle.flatten())
    # save the LLE matrix into a .npz file
    np.savez(os.path.join(output_path, 'components_matrix'), lle_components)
    return lle_components, lle_components.shape
Example #13
0
def local_linear_embedding(data, n_components=2, n_neighbors=5):

    from sklearn.manifold import LocallyLinearEmbedding, locally_linear_embedding
    # lle = LocallyLinearEmbedding(n_components=n_components,
    #                              n_neighbors=n_neighbors)

    [lle, er] = locally_linear_embedding(data,
                                         n_neighbors=n_neighbors,
                                         n_components=n_components,
                                         method='modified')

    return lle
Example #14
0
    def test_locally_linear_embedding(self):
        iris = datasets.load_iris()
        df = pdml.ModelFrame(iris)

        result = df.manifold.locally_linear_embedding(3, 3)
        expected = manifold.locally_linear_embedding(iris.data, 3, 3)

        self.assertEqual(len(result), 2)
        self.assertIsInstance(result[0], pdml.ModelFrame)
        tm.assert_index_equal(result[0].index, df.index)
        tm.assert_numpy_array_equal(result[0].values, expected[0])

        self.assertEqual(result[1], expected[1])
Example #15
0
    def test_locally_linear_embedding(self):
        iris = datasets.load_iris()
        df = pdml.ModelFrame(iris)

        result = df.manifold.locally_linear_embedding(3, 3)
        expected = manifold.locally_linear_embedding(iris.data, 3, 3)

        self.assertEqual(len(result), 2)
        self.assertTrue(isinstance(result[0], pdml.ModelFrame))
        self.assert_index_equal(result[0].index, df.index)
        self.assert_numpy_array_equal(result[0].values, expected[0])

        self.assertEqual(result[1], expected[1])
Example #16
0
def plot_lle(pidata, nneigh=14):
    lle_data = np.array(pidata)
    n_components = 2  # 2 dimension

    X_r, err = locally_linear_embedding(lle_data,
                                        n_neighbors=nneigh,
                                        n_components=n_components,
                                        eigen_solver='dense',
                                        method='standard')

    print("Done. Reconstruction error: %g" % err)
    plt.figure()
    plt.scatter(X_r[:, 0],
                X_r[:, 1],
                s=5,
                c=np.log10(np.array(pidata[['PI4']])))
    plt.title("Local linear embedding")
Example #17
0
def sampleBoundary(im, boundary, point, samples_x, samples_y):
    # Identify connected boundary segment with size 'samples_y' via K-NN
    # This can yield some errors if two different boundaries are very near. Fixing this later.
    nbrs = NearestNeighbors(n_neighbors=samples_y,
                            algorithm='ball_tree').fit(boundary)
    dist, ind = nbrs.kneighbors([point])

    # Apply LLE in order to unravel the boundary manifold.
    # Kind of an overkill but no other idea on how to do this.
    X_r, err = manifold.locally_linear_embedding(boundary[ind[0]],
                                                 n_neighbors=2,
                                                 n_components=1)
    X_r = np.concatenate((X_r, ind.transpose()), axis=1)
    X_r_sorted = X_r[X_r[:, 0].argsort()]

    # Some plotting
    plt.figure()
    plt.imshow(im)

    samples = np.zeros((samples_y, samples_x * 2 + 1, 3), 'uint8')
    # Iterate over boundary segments of size 10
    for i in range(0, samples_y - 9, 10):
        boundarySegment = boundary[X_r_sorted[:, 1].astype(int)[i:i + 10]]
        plt.scatter(boundarySegment[:, 0],
                    boundarySegment[:, 1],
                    marker='.',
                    s=5)

        # Fit line to boundary point coordinates
        slope, intercept, _, _, _ = stats.linregress(boundarySegment[:, 0],
                                                     boundarySegment[:, 1])
        line_x1 = min(boundarySegment[:, 0])
        line_x2 = max(boundarySegment[:, 0])
        line_y1 = slope * line_x1 + intercept
        line_y2 = slope * line_x2 + intercept

        plt.plot([line_x1, line_x2], [line_y1, line_y2], marker='.')

        # Sample patch
        patch = bilinearInterpolation(slope, intercept, im, line_x1, line_x2,
                                      samples_x, 10)
        samples[i:i + 10, :] = patch
    return samples
Example #18
0
    def compute(self):
        matrix = self.getInputFromPort("X")
        Y, squared_error = manifold.locally_linear_embedding(
            X=matrix.values,
            n_neighbors=self.forceGetInputFromPort("n_neighbors", 10),
            n_components=self.forceGetInputFromPort("n_components", 2),
            reg=self.forceGetInputFromPort("reg", 0.001),
            eigen_solver=self.forceGetInputFromPort("eigen_solver", "auto"),
            tol=self.forceGetInputFromPort("tol", 1e-06),
            max_iter=self.forceGetInputFromPort("max_iter", 100),
            method=self.method,
            hessian_tol=self.hessian_tol,
            modified_tol=self.modified_tol,
            random_state=self.forceGetInputFromPort("random_state", None)
            #            out_dim      = self.forceGetInputFromPort('out_dim', None)
        )

        proj_matrix = copy.deepcopy(matrix)
        proj_matrix.values = Y

        self.setResult("proj_matrix", proj_matrix)
        self.setResult("squared_error", squared_error)
Example #19
0
    def compute(self):
        matrix = self.getInputFromPort('X')
        Y, squared_error = manifold.locally_linear_embedding(
            X=matrix.values,
            n_neighbors=self.forceGetInputFromPort('n_neighbors', 10),
            n_components=self.forceGetInputFromPort('n_components', 2),
            reg=self.forceGetInputFromPort('reg', 0.001),
            eigen_solver=self.forceGetInputFromPort('eigen_solver', 'auto'),
            tol=self.forceGetInputFromPort('tol', 1e-06),
            max_iter=self.forceGetInputFromPort('max_iter', 100),
            method=self.method,
            hessian_tol=self.hessian_tol,
            modified_tol=self.modified_tol,
            random_state=self.forceGetInputFromPort('random_state', None)
            #            out_dim      = self.forceGetInputFromPort('out_dim', None)
        )

        proj_matrix = copy.deepcopy(matrix)
        proj_matrix.values = Y

        self.setResult('proj_matrix', proj_matrix)
        self.setResult('squared_error', squared_error)
def SwissRollTest01():
	import matplotlib.pyplot as plt

	from mpl_toolkits.mplot3d import Axes3D

	# Locally Linear Embedding of the swiss roll
	from sklearn import manifold, datasets
	#这里是生成数据集 X是1500 * 3 的矩阵(表示location),color是1500 * 1的矩阵(表示label--颜色)
	X, color = datasets.samples_generator.make_swiss_roll(n_samples = 1500)

	#print X[1,:]
	#print color[1]


	X_r, err = manifold.locally_linear_embedding(X, n_neighbors=12, n_components=2)
	#X_r是1500 * 2的矩阵,err是一个实数。
	print X_r.shape
	#print err

	fig = plt.figure()

	try:
		ax = fig.add_subplot(211, projection='3d')
		ax.scatter(X[:,0], X[:,1], X[:,2], c=color, cmap=plt.cm.Spectral)
	except:
		ax = fig.add_subplot(211)
		ax.scatter(X[:,0],X[:,2],c=color, cmap=plt.cm.Spectral)
		print ""
	ax.set_title("Original data")
	ax = fig.add_subplot(212)

	#这里绘制的是projected data
	ax.scatter(X_r[:,0], X_r[:,1], c=color, cmap=plt.cm.Spectral)

	plt.axis('tight')
	plt.xticks([]), plt.yticks([])
	plt.title('Projected Data')
	plt.show()
Example #21
0
def main_LLE2():
    f3 = open('projection vectors 1', 'w')
    f4 = open('projection vectors 2', 'w')
    f5 = open('projection vectors 3', 'w')
    f6 = open('scatter matrix in 3D', 'w')
    k = 40
    images = []
    images, colors = load_images()
    matrix_images = matrix_build(images)
    newmatrix, squared_error = locally_linear_embedding(matrix_images,
                                                        k,
                                                        3,
                                                        eigen_solver='auto')
    print 'squared_error=' + repr(squared_error)
    print newmatrix.shape
    M1 = np.dot(newmatrix.T, matrix_images)
    M2 = np.linalg.inv(np.dot(matrix_images.T, matrix_images))
    P_matrix = (np.dot(M1, M2)).T
    print P_matrix.shape
    for x in range(len(P_matrix)):
        f3.write(repr(P_matrix[x][0]) + '\n')
        f4.write(repr(P_matrix[x][1]) + '\n')
        f5.write(repr(P_matrix[x][2]) + '\n')
    xx = []
    yy = []
    zz = []
    for x in range(len(newmatrix)):
        xx.append(newmatrix[x][0])
        yy.append(newmatrix[x][1])
        zz.append(newmatrix[x][2])
        f6.write(
            repr(newmatrix[x][0]) + ',' + repr(newmatrix[x][1]) + ',' +
            repr(newmatrix[x][2]) + '\n')
    fig = plt.figure()
    ax = Axes3D(fig)
    ax.scatter(xx, yy, zz, c=colors)
    plt.show()
Example #22
0
           c=sr_color,
           s=50,
           alpha=0.8)
ax.set_title("Swiss Roll in Ambient Space")
ax.view_init(azim=-66, elev=12)
_ = ax.text2D(0.8, 0.05, s="n_samples=1500", transform=ax.transAxes)

# %%
# Computing the LLE and t-SNE embeddings, we find that LLE seems to unroll the
# Swiss Roll pretty effectively. t-SNE on the other hand, is able
# to preserve the general structure of the data, but, poorly represents the
# continous nature of our original data. Instead, it seems to unnecessarily
# clump sections of points together.

sr_lle, sr_err = manifold.locally_linear_embedding(sr_points,
                                                   n_neighbors=12,
                                                   n_components=2)

sr_tsne = manifold.TSNE(n_components=2,
                        learning_rate="auto",
                        perplexity=40,
                        init="pca",
                        random_state=0).fit_transform(sr_points)

fig, axs = plt.subplots(figsize=(8, 8), nrows=2)
axs[0].scatter(sr_lle[:, 0], sr_lle[:, 1], c=sr_color)
axs[0].set_title("LLE Embedding of Swiss Roll")
axs[1].scatter(sr_tsne[:, 0], sr_tsne[:, 1], c=sr_color)
_ = axs[1].set_title("t-SNE Embedding of Swiss Roll")

# %%
Example #23
0
# Load the STL files and add the vectors
your_mesh = mesh.Mesh.from_file('Left_Thalamus.stl')

# Convert from groups of vertices (for each triangle) to list of vertices
tri, points, dim = np.shape(your_mesh.vectors)
data_mesh = np.zeros((tri * points, dim))
for index_t in range(tri):
    for index_p in range(points):
        data_mesh[index_t + index_p +
                  index_t * 2, :] = your_mesh.vectors[index_t, index_p, :]

# Isomap
#X_r = manifold.Isomap(n_neighbors=10,n_components=2).fit_transform(data_mesh)
# LLE
X_r, err = manifold.locally_linear_embedding(data_mesh,
                                             n_neighbors=37,
                                             n_components=2)
# T-sne
#tsne = manifold.TSNE(n_components=2, init='pca', random_state=0 )
#X_r = tsne.fit_transform(data_mesh)

# Plot original vertices
figure = pyplot.figure()
ax = figure.add_subplot(211, projection='3d')
ax.scatter(data_mesh[:, 0],
           data_mesh[:, 1],
           data_mesh[:, 2],
           c=data_mesh[:, 0],
           cmap=pyplot.cm.Spectral)
#pyplot.show()
ax.set_title("Original data")
Example #24
0
train_df = pd.read_csv(data_root+"train_feature_pca500.csv")
test_df = pd.read_csv(data_root+"test_feature_pca500.csv")

photo_ids = np.vstack((train_df['photo_id'].reshape(-1,1),test_df['photo_id'].reshape(-1,1)))

train_df.drop('photo_id', axis=1, inplace=True)
test_df.drop('photo_id', axis=1, inplace=True)

X_std = np.vstack((train_df,test_df))
n_train = len(train_df)


from sklearn import manifold

X_lle, err = manifold.locally_linear_embedding(X_std, n_neighbors=10, n_components=300)

X_all = pd.DataFrame(X_lle)
X_all['photo_id'] = photo_ids

X_train = X_all[:n_train]
X_test = X_all[n_train:]
X_train.to_csv('train_feature_lle.csv', index=False) 
X_test.to_csv('test_feature_lle.csv', index=False)






Example #25
0
def lle(data):
	X_r, err = manifold.locally_linear_embedding(data, n_neighbors=5, n_components=2)
	return X_r
Example #26
0
# License: BSD, (C) INRIA 2011

print __doc__

import pylab as pl
# This import is needed to modify the way figure behaves
from mpl_toolkits.mplot3d import Axes3D

#----------------------------------------------------------------------
# Locally linear embedding of the swiss roll

from sklearn import manifold, datasets
X, color = datasets.samples_generator.make_swiss_roll(n_samples=1500)

print "Computing LLE embedding"
X_r, err = manifold.locally_linear_embedding(X, n_neighbors=12, out_dim=2)
print "Done. Reconstruction error: %g" % err

#----------------------------------------------------------------------
# Plot result

fig = pl.figure()
try:
    # compatibility matplotlib < 1.0
    ax = fig.add_subplot(211, projection='3d')
    ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, cmap=pl.cm.Spectral)
except:
    ax = fig.add_subplot(211)
    ax.scatter(X[:, 0], X[:, 2], c=color, cmap=pl.cm.Spectral)

ax.set_title("Original data")
print __doc__

import pylab as pl
# This import is needed to modify the way figure behaves
from mpl_toolkits.mplot3d import Axes3D

#----------------------------------------------------------------------
# Locally linear embedding of the swiss roll

from sklearn import manifold, datasets

X, color = datasets.samples_generator.make_swiss_roll(n_samples=1500)

print "Computing LLE embedding"
X_r, err = manifold.locally_linear_embedding(X, n_neighbors=12, out_dim=2)
print "Done. Reconstruction error: %g" % err

#----------------------------------------------------------------------
# Plot result

fig = pl.figure()
try:
    # compatibility matplotlib < 1.0
    ax = fig.add_subplot(211, projection='3d')
    ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, cmap=pl.cm.Spectral)
except:
    ax = fig.add_subplot(211)
    ax.scatter(X[:, 0], X[:, 2], c=color, cmap=pl.cm.Spectral)

ax.set_title("Original data")
Example #28
0
n_components = 4
pca = PCA(n_components, whiten=False)
param = pca.fit(dataset_matrix)
dataset_pca = pca.fit_transform(dataset_matrix)
print(pca.explained_variance_ratio_)
print(np.sum(pca.explained_variance_ratio_))
outdata_inter = dataset_pca.T
outdata = np.reshape(outdata_inter, (n_components, rows, cols))

#LLE DR method

n_neighbors = 5
n_components = 4
dataset_lle, err = manifold.locally_linear_embedding(dataset_matrix,
                                                     n_neighbors,
                                                     n_components,
                                                     eigen_solver='auto',
                                                     method='standard')
print("Reconstruction error: %g" % err)
outdata_inter = dataset_lle
outdata = np.reshape(outdata_inter, (n_components, rows, cols))

#ISOMAP Method
from sklearn.manifold import Isomap
n_neighbors = 5
n_components = 4
dataset_isomap = manifold.Isomap(n_neighbors,
                                 n_components).fit_transform(dataset_matrix)
outdata_inter = dataset_isomap
outdata = np.reshape(outdata_inter, (n_components, rows, cols))
# angles = np.linspace(0,2*np.pi,100)
angles = np.linspace(0, 2 * np.pi, 50)
# angles = [5.3]

newData = True

if newData:
    print("Integrating data")
    # data = sim.states(duration=500)
    data = sim.states(duration=2400, split=0.01)  # max modified 850
    data = data[1000:]
    data = sim.interpolateCurve()[1000:]
    print("Computing LLE embedding of data")
    manifoldData, err = manifold.locally_linear_embedding(
        data, n_neighbors=12, n_components=2, method='standard'
    )  # weird results, can see that it is rossler but it doesn't look as excpected
    # Modified provides a smoother manifold, the return map still sucks
    # Hessians manifold is weird.... and the return maps suck
    print("Done. Reconstruction error: %g" % err)
    print("Storing data")
    sim.storeData("TestRossler")
    np.savetxt("ManifoldRossler.txt", manifoldData)
else:
    print("Loading data")
    sim.loadData("TestRossler")
    data = sim.getData()
    manifoldData = np.loadtxt("ManifoldRossler.txt")

i = 0
Example #30
0
print(__doc__)

import pylab as pl

# This import is needed to modify the way figure behaves
from mpl_toolkits.mplot3d import Axes3D
Axes3D

#----------------------------------------------------------------------
# Locally linear embedding of the swiss roll

from sklearn import manifold, datasets
X, color = datasets.samples_generator.make_swiss_roll(n_samples=1500)

print("Computing LLE embedding")
X_r, err = manifold.locally_linear_embedding(X, n_neighbors=12, n_components=2)
print("Done. Reconstruction error: %g" % err)

#----------------------------------------------------------------------
# Plot result

fig = pl.figure()
try:
    # compatibility matplotlib < 1.0
    ax = fig.add_subplot(211, projection='3d')
    ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, cmap=pl.cm.Spectral)
except:
    ax = fig.add_subplot(211)
    ax.scatter(X[:, 0], X[:, 2], c=color, cmap=pl.cm.Spectral)

ax.set_title("Original data")
Example #31
0
poincareSection2 = np.array(
    [point for point in poincareSection if (point[2] < 0.1)])

extremeEvents = []
index = []
for i, point in enumerate(poincareSection):
    if point[2] > 0.1:
        extremeEvents.append(i)
    else:
        index.append(i)
extremeEvents = np.array(extremeEvents)
print('Number of extreme events = %g' % len(extremeEvents))

print("Computing LLE embedding of return map")
manifoldOfPoincareSection, err = manifold.locally_linear_embedding(
    poincareSection2, n_neighbors=8, n_components=2,
    method='modified')  # return map then manifold
print("Done. Reconstruction error: %g" % err)

## Fix colors for Plot
col = col = cm.get_cmap('plasma')
normdata = [np.linalg.norm(point) for point in poincareSection2]
normalize = colors.Normalize(vmin=min(normdata), vmax=max(normdata))
dataColors = [col(normalize(value)) for value in normdata]
poincColor = [dataColors[i] for i in range(0, len(poincareSection2))]

print('Plotting data')
fig = plt.figure()

ax = fig.add_subplot(221)
ax.set_title('Oscillator 1')
Example #32
0
from sklearn import manifold,datasets
import pandas as pd
from sklearn import svm
from sklearn import cross_validation
from sklearn.metrics import confusion_matrix
from sklearn.cross_validation import  cross_val_score
from unbalanced_dataset import SMOTE
import numpy as np

train=pd.read_csv('./cmv.csv')
train['Defective']=train['Defective'].map({'Y':1,'N':0})
print type(train.values)
train=train.values
print train[0:1]
X_r,err=manifold.locally_linear_embedding(train[:,0:-1],n_neighbors=12,n_components=4)
print("Done. Reconstruction error: %g" % err)
data=X_r
label=train[:,-1]
#print label
x_train,x_test,y_train,y_test=cross_validation.train_test_split(data,label,test_size=0.3,random_state=0)
verbose = False
ratio = float(np.count_nonzero(y_train==0)) / float(np.count_nonzero(y_train==1))
smote = SMOTE(ratio=ratio, verbose=verbose, kind='regular')
smox, smoy = smote.fit_transform(x_train, y_train)
print np.count_nonzero(smoy==1)
print np.count_nonzero(smoy==0)
clf=svm.SVC(C=10000,gamma=0.0078125)
#print y_train.astype(int)
clf.fit(smox,smoy)
y_pred=clf.predict(x_test)
print y_test
if not DBexists:
    logging.info("Starting to build index into DB file %s" % outputf)
    index.fit()
    logging.info("Index fitted!!")
    logging.info("Output database: {}".format(outputf))
else:
    logging.info("Index loaded from DB file %s" % outputf)
    
sparse_word_centroids = wordCentroids(db=index, vect=vectorizer)
# Tal vez pueda cargar la matrix dipersa de word_centroids en ram y hacer NMF.
 
logging.info("Fitting Isomap for sparse coding ...")
X_s = Dict(sorted({w: v for w, v in sparse_word_centroids
                    if not v is None}.items(), key=lambda t: len(t[0])))

word_embeddings, err = locally_linear_embedding(vstack(list(X_s.values())), method='ltsa',
                                n_neighbors=5, n_components=args.dim, n_jobs=-1)
#word_embeddings = factorizer.fit_transform(csr_matrix(vstack(list(X_s.values()))))
logging.info("Recosntruction error %f ..." % err)
logging.info("DB Vocabulary size %d ..." % index.vocab_size)
logging.info("Vectorizer vocabulary size %d ..." % len(vectorizer.vocabulary_.keys()))
logging.info("Shape of resulting embedding matrix:")
#logging.info("({} {})".format(factorizer.embedding_.shape[0], factorizer.embedding_.shape[1]))

logging.info("Writing word vectors into file %s ..." % args.output)
write = partial(indexing.write_given_embedding, fname=args.output)

with open(args.output, "w") as f:
    f.write("{} {}\n".format(len(X_s.keys()), word_embeddings.shape[1]) )

Parallel(#backend='threading',
            n_jobs=20 
Example #34
0
N2=(Y==2)
N3=(Y==3)

pca=PCA(copy=True, n_components=2, whiten=False)
pcaX=pca.fit_transform(X)

lda = LinearDiscriminantAnalysis(n_components=2)
ldaX = lda.fit(X, Y).transform(X)

kpca = KernelPCA(kernel="rbf",n_components=2)
kpcaX = kpca.fit_transform(X)

isomap=Isomap(n_neighbors=15,n_components=2)
isomapX=isomap.fit_transform(X)

iieX,err=locally_linear_embedding(X,n_neighbors=12,n_components=2)
print("Done. Reconstruction error: %g" % err)


distX=np.zeros([len(Y),len(Y)])
nbrs = NearestNeighbors(n_neighbors=15, algorithm='ball_tree').fit(X)
distances, indices = nbrs.kneighbors(X)
for i in range(0,len(Y)):
    distX[indices[i,0],indices[i,1:15]]=distances[i,1:15]
print(distX)
leX=spectral_embedding(distX,n_components=2)

plt.figure()
ax=plt.subplot(projection='3d')
ax.scatter(X[N1, 0], X[N1, 1], X[N1, 2], c='b')
ax.scatter(X[N2, 0], X[N2, 1], X[N2, 2], c='g')
Example #35
0
components_ = pca.components_

print(components_)

data1 = data_pca[0:3000]
data2 = data_pca[3000:6000]
data3 = data_pca[6000:9000]

fig = plt.figure()
plt.scatter(data1[:, 0], np.zeros((3000, )), s=1, c="b", marker="1")
plt.scatter(data2[:, 0], np.zeros((3000, )), s=1, c="y", marker="1")
plt.scatter(data3[:, 0], np.zeros((3000, )), s=1, c="r", marker="1")
plt.show()

from sklearn.manifold import locally_linear_embedding

for neis in [5, 10, 20, 30, 40, 50]:
    (data_lle, _) = locally_linear_embedding(data,
                                             n_neighbors=neis,
                                             n_components=1)

    data1 = data_lle[0:3000]
    data2 = data_lle[3000:6000]
    data3 = data_lle[6000:9000]

    fig = plt.figure()
    plt.scatter(data1[:, 0], np.zeros((3000, )), s=1, c="b", marker="1")
    plt.scatter(data2[:, 0], np.zeros((3000, )), s=1, c="y", marker="1")
    plt.scatter(data3[:, 0], np.zeros((3000, )), s=1, c="r", marker="1")
    plt.show()
Example #36
0
#print(np.all(np.isfinite(K_train)))

# Initialise an SVM and fit data using random walk Kernel.
clf = svm.SVC(kernel='precomputed', C=1)
clf.fit(K_train, y_train)

# Predict and test.
y_pred = clf.predict(K_test)

# Calculate accuracy of classification.
acc = accuracy_score(y_test, y_pred)

print("Accuracy:", str(round(acc * 100, 2)))

K_train = manifold.locally_linear_embedding(K_train,
                                            n_neighbors=5,
                                            n_components=3)
K_test = manifold.locally_linear_embedding(K_test,
                                           n_neighbors=5,
                                           n_components=3)

clf.fit(
    K_train,
    y_train,
)

# Predict and test.
y_pred = clf.predict(K_test)

# Calculate accuracy of classification.
acc = accuracy_score(y_test, y_pred)
import pylab as pl

# This import is needed to modify the way figure behaves
from mpl_toolkits.mplot3d import Axes3D

Axes3D

# ----------------------------------------------------------------------
# Locally linear embedding of the swiss roll

from sklearn import manifold, datasets

X, color = datasets.samples_generator.make_swiss_roll(n_samples=1500)

print "Computing LLE embedding"
X_r, err = manifold.locally_linear_embedding(X, n_neighbors=12, n_components=2)
print "Done. Reconstruction error: %g" % err

# ----------------------------------------------------------------------
# Plot result

fig = pl.figure()
try:
    # compatibility matplotlib < 1.0
    ax = fig.add_subplot(211, projection="3d")
    ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, cmap=pl.cm.Spectral)
except:
    ax = fig.add_subplot(211)
    ax.scatter(X[:, 0], X[:, 2], c=color, cmap=pl.cm.Spectral)

ax.set_title("Original data")
Example #38
0
def def_lenses_dimred(df, fs, get_PCA, get_isomap, get_LLE, get_MDS,
                      get_spectral_embedding, get_SVD):

    scaler = MinMaxScaler()

    mapper = km.KeplerMapper()

    X = df[fs].as_matrix()

    keys = []
    values = []

    minmax_scaler = MinMaxScaler()
    df_minmax = minmax_scaler.fit_transform(df[fs].as_matrix())

    # PCA
    if get_PCA == True:
        keys.append('lens_pca_0')
        keys.append('lens_pca_1')
        pca = mapper.fit_transform(df_minmax,
                                   projection=PCA(n_components=2),
                                   scaler=None)
        values.append(scaler.fit_transform(pca[:, 0].reshape(-1, 1)))
        values.append(scaler.fit_transform(pca[:, 1].reshape(-1, 1)))

    # Isomap
    if get_isomap == True:
        keys.append('lens_isomap_0')
        keys.append('lens_isomap_1')
        isomap = manifold.Isomap(n_components=2,
                                 n_neighbors=3).fit_transform(df_minmax)
        values.append(scaler.fit_transform(isomap[:, 0].reshape(-1, 1)))
        values.append(scaler.fit_transform(isomap[:, 1].reshape(-1, 1)))

    # Locally linear embedding
    if get_LLE == True:
        keys.append('lens_LLE_0')
        keys.append('lens_LLE_1')
        LLE = manifold.locally_linear_embedding(df_minmax,
                                                n_neighbors=3,
                                                n_components=2,
                                                random_state=0)[0]
        values.append(scaler.fit_transform(LLE[:, 0].reshape(-1, 1)))
        values.append(scaler.fit_transform(LLE[:, 1].reshape(-1, 1)))

    # Multi-dimensional scaling
    if get_MDS == True:
        keys.append('lens_MDS_0')
        keys.append('lens_MDS_1')
        MDS = manifold.MDS(n_components=2).fit_transform(df_minmax)
        values.append(scaler.fit_transform(MDS[:, 0].reshape(-1, 1)))
        values.append(scaler.fit_transform(MDS[:, 1].reshape(-1, 1)))

    # Spectral embedding
    if get_spectral_embedding == True:
        keys.append('lens_spectral_embedding_0')
        keys.append('lens_spectral_embedding_1')
        L = manifold.SpectralEmbedding(n_components=2,
                                       n_neighbors=1,
                                       random_state=0).fit_transform(df_minmax)
        values.append(scaler.fit_transform(L[:, 0].reshape(-1, 1)))
        values.append(scaler.fit_transform(L[:, 1].reshape(-1, 1)))

    # truncated SVD
    if get_SVD == True:
        keys.append('lens_SVD_0')
        keys.append('lens_SVD_1')
        svd = TruncatedSVD(n_components=2,
                           random_state=42).fit_transform(df_minmax)
        values.append(scaler.fit_transform(svd[:, 0].reshape(-1, 1)))
        values.append(scaler.fit_transform(svd[:, 1].reshape(-1, 1)))

    lenses_dimred = dict(zip(keys, values))

    return (lenses_dimred)