Esempio n. 1
0
class SemiSupervisedGradientBoosting : 
    def __init__(self, max_depth=3, n_estimators=10, learning_rate=0.1,
                 min_samples_leaf=4, n_neighbors=5, n_components=2) :
        self.GB = GradientBoosting.GradientBoosting(max_depth, n_estimators,
                                   learning_rate, min_samples_leaf)
        self.Transformator = LocallyLinearEmbedding(n_neighbors, n_components)
        
    def fit_predict(self,Xl, y, Xu) :
        print 'start collapse space'
        delimeter = Xl.shape[0]        
        X_all = np.vstack((Xl, Xu))
        X_all = self.Transformator.fit_transform(X_all)
        X_l_t = X_all[:delimeter]
        X_u_t = X_all[delimeter:]
        del X_all
        print 'start compute simalirity'
        Sim = GradientBoosting.Simalirity(X_l_t, X_u_t)
        print 'end compute simalirity'        
        del X_l_t, X_u_t        
        #Xl = X_all[:delimeter]
        #Xu = X_all[delimeter:]
        print 'end collapse space succesfully'
        return self.GB.fit_predict(Xl, y, Xu, Sim)
        
    def predict(self,X) : 
        return self.GB.predict(X)
    def score (self, X, y) : 
        return self.GB.score(X, y)
Esempio n. 2
0
def pseudotimes_from_embedding(data_array, n_neighbors=None):
    if n_neighbors is None:
        n_neighbors = int(data_array.shape[0] * 0.5)
    embedding = LocallyLinearEmbedding(n_components=1, n_neighbors=n_neighbors)
    u, s, v = np.linalg.svd(data_array, full_matrices=1)
    l = 2
    denoised_data_array = np.dot(u[:, :l], np.dot(np.diag(s[:l]), v[:l, :]))
    pseudotimes = embedding.fit_transform(denoised_data_array)

    pseudotimes -= pseudotimes.min()
    pseudotimes /= pseudotimes.max()
    return pseudotimes
Esempio n. 3
0
def get_metastable_connections_from_gmm(data, gmm, 
                                        connection_estimation_method='max_path_distance_diff', 
                                        min_paths=3, distance='euclidean', 
                                        low_dimension_distances=True, 
                                        as_graph=False):
    means = gmm.means_
    memberships = gmm.predict(data)
    if connection_estimation_method in ['max_path_distance_diff', 'connecting_paths', 'mst']:
        if low_dimension_distances:
            pca = PCA(n_components=2)
            lle = LocallyLinearEmbedding(n_components=2, 
                                         n_neighbors=int(0.8*data.shape[0]))
            distance_matrix = squareform(pdist(lle.fit_transform(data), distance))
        else:
            distance_matrix = squareform(pdist(data, distance))
        weighted_graph = nx.Graph(distance_matrix)
    else:
        weighted_graph = None
    return get_metastable_connections(data, means, memberships, 
                                      method=connection_estimation_method, 
                                      weighted_graph=weighted_graph, min_paths=3,
                                      as_graph=as_graph)
Esempio n. 4
0
def preprocess(x_train: np.ndarray, y_train: np.ndarray, x_test: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
    """
    Prepocesses data.

    :param x_train: the training data.
    :param y_train: the training labels.
    :param x_test: the test data.
    :return: Preprocessed x_train and x_test.
    """
    logger.log('Prepocessing...')

    # Scale data.
    logger.log('\tScaling data with params:')
    scaler = MinMaxScaler()
    logger.log('\t{}'.format(scaler.get_params()))
    x_train = scaler.fit_transform(x_train.astype(float))
    x_test = scaler.transform(x_test.astype(float))

    # Apply LLE.
    logger.log('\tApplying LLE with params:')
    embedding = LocallyLinearEmbedding(n_neighbors=100, n_jobs=-1, random_state=0)
    embedding_params = embedding.get_params()
    logger.log('\t' + str(embedding_params))
    x_train = embedding.fit_transform(x_train)
    x_test = embedding.transform(x_test)

    # Plot the graph embedding result.
    if PLOTTING_MODE != 'none':
        plotter.subfolder = 'graphs/LLE'
        plotter.filename = 'embedding'
        plotter.xlabel = 'first feature'
        plotter.ylabel = 'second feature'
        plotter.title = 'LLE'
        plotter.scatter(x_train, y_train, class_labels=helpers.datasets.get_voice_name)

    return x_train, x_test
Esempio n. 5
0
    def run_LLE(self, n_neighbors, low_dim_size):
        """
		Run LLE algorithm

		Parameters
		----------
		self : object
			EC_SCOP_Evaluate object setup for this analysis
		n_neighbors : int
			number of neighbors using for the isomap run
		low_dim_size : int
			resulted number of dimensions after isomap

		Returns
		-------
		None
		"""
        print("Run Locally Linear Embeddings")
        lle = LocallyLinearEmbedding(n_neighbors=n_neighbors,
                                     n_components=low_dim_size,
                                     method='modified')
        self.X_low = lle.fit_transform(self.get_x().values)
        print("Done. Reconstruction error: {:.3f}".format(
            lle.reconstruction_error_))
Esempio n. 6
0
from mpl_toolkits.mplot3d import Axes3D
from sklearn.datasets import make_s_curve
from sklearn.manifold import LocallyLinearEmbedding, TSNE

n_points = 1000
X, color = make_s_curve(n_points, random_state=0)
n_neighbors = 10
n_components = 2

fig = plt.figure(figsize=(12, 4))

ax = fig.add_subplot(131, projection='3d')
ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, cmap=plt.cm.viridis)
ax.view_init(4, -72)
ax.set_title('Original Data')

lle = LocallyLinearEmbedding(n_neighbors=n_neighbors,
                             n_components=n_components,
                             method='standard')
Y1 = lle.fit_transform(X)
ax = fig.add_subplot(132)
ax.scatter(Y1[:, 0], Y1[:, 1], c=color, cmap=plt.cm.viridis, alpha=0.8)
ax.set_title('LLE')

tsne = TSNE(n_components=n_components, init='pca')
Y2 = tsne.fit_transform(X)
ax = fig.add_subplot(133)
ax.scatter(Y2[:, 0], Y2[:, 1], c=color, cmap=plt.cm.viridis, alpha=0.8)
ax.set_title('t-SNE')

plt.show()
Esempio n. 7
0
    parser.add_argument("--n-components", type=int, default=2)
    parser.add_argument("--n-neighbors", type=int, default=4)
    args = parser.parse_args()

    X, y = load_raw()

    name = f"components_{args.n_components}_neighbors_{args.n_neighbors}"
    data_save_folder = f"./data/LLE/{name}"
    fig_save_folder = f"./fig/LLE"
    makedirs(data_save_folder)
    makedirs(fig_save_folder)

    lle = LocallyLinearEmbedding(n_neighbors=args.n_neighbors,
                                 n_components=args.n_components)

    X_decomposed = lle.fit_transform(X)
    np.save(osp.join(data_save_folder, "feature.npy"), X_decomposed)
    np.save(osp.join(data_save_folder, "label.npy"), y)

    if args.n_components == 2:
        x_min, x_max = X_decomposed.min(0), X_decomposed.max(0)
        X_normalized = (X_decomposed - x_min) / (x_max - x_min)
        plt.figure(figsize=(8, 8))
        for i in range(X_normalized.shape[0]):
            plt.text(X_normalized[i, 0],
                     X_normalized[i, 1],
                     str(y[i]),
                     color=plt.cm.Set3(y[i] % 12),
                     fontdict={
                         'weight': 'bold',
                         'size': 9
df = pd.read_csv('../../Documents/ece657a/data/DataB.csv')
df = df.astype(float)
target = df['gnd']
data = df.values[:, 1: len(df.columns) - 1]
threes_df = df.loc[df['gnd'] == 3]
threes_data = threes_df.values[:, 1: len(df.columns) - 1]
threes_data = (threes_data - threes_data.min()) / \
    (threes_data.max() - threes_data.min())
n_neighbors = 5
n_components = 4

# 1. Apply LLE

lle = LocallyLinearEmbedding(n_neighbors=n_neighbors,
                             n_components=n_components)
lle_data = lle.fit_transform(threes_data)
lle_df = pd.DataFrame(lle_data)
plot_three("LLE", lle_df, 0, 1, threes_df, 0.45)

# 2. Apply ISOMAP
iso = Isomap(n_neighbors=n_neighbors, n_components=n_components)
iso_data = iso.fit_transform(threes_data)
iso_df = pd.DataFrame(iso_data)
plot_three("Isomap", iso_df, 0, 1, threes_df, 0.45)


# 3. Use the Naive Bayes classier to classify the dataset based on the projected 4-dimension representations of the LLE and ISOMAP.
df_data = df.values[:, 1: len(df.columns) - 1]
test_size = 0.3

X_train, _, _ = one_hot_dataframe(X_raw, [
    'job', 'marital', 'education', 'default', 'housing', 'loan', 'contact',
    'month', 'poutcome'
],
                                  replace=True)
y_train = [1 if i == 'yes' else 0 for i in df.y]

reductions = []
pca = PCA(n_components=2)
reductions.append(pca.fit_transform(X_train, y_train))
lda = LDA(n_components=2)
reductions.append(lda.fit_transform(X_train, y_train))
isomap = Isomap(n_components=2)
reductions.append(isomap.fit_transform(X_train, y_train))
lle = LocallyLinearEmbedding(n_components=2, method='standard')
reductions.append(lle.fit_transform(X_train, y_train))

for reduced_X in reductions:
    plt.figure()
    red_x = []
    red_y = []
    blue_x = []
    blue_y = []
    green_x = []
    green_y = []

    for i in range(len(reduced_X)):
        if y_train[i] == 0:
            red_x.append(reduced_X[i][0])
            red_y.append(reduced_X[i][1])
        elif y_train[i] == 1:
 elif sys.argv[1] == '-isomap':
     trimmedImages = []
     for i in range(len(images)):
         images[i] = np.reshape(images[i], (-1))
         images[i] = images[i][:minSize]
         trimmedImages.append(images[i])
     isomap = Isomap(n_components=136)
     reducedImages = isomap.fit_transform(trimmedImages)
 elif sys.argv[1] == '-lle':
     trimmedImages = []
     for i in range(len(images)):
         images[i] = np.reshape(images[i], (-1))
         images[i] = images[i][:minSize]
         trimmedImages.append(images[i])
     lle = LocallyLinearEmbedding(n_components=136)
     reducedImages = lle.fit_transform(trimmedImages)
 
 # Do cross-fold validation 
 kf = KFold(len(images), n_folds=2)
 minAreas = {}
 maxAreas = {}
 avgAreas = {}
 totals = {}
 for train_index, test_index in kf:        
     xTrain = reducedImages[train_index]
     yTrain = labels[train_index]
     clf = OneVsRestClassifier(LinearSVC(), 4)
     clf.fit(xTrain, yTrain)
     xTest = reducedImages[test_index]
     yTest = labels[test_index]
     areas = eval(classes, clf, xTest, yTest)
def get_LLE_image(data):
    LLE = LocallyLinearEmbedding(n_components=2, n_neighbors=10)
    X_LLE = LLE.fit_transform(data)
    return X_LLE
Esempio n. 12
0
def lle(X=None, W=None, num_vecs=None, k=None):
    embedder = LocallyLinearEmbedding(n_neighbors=k, n_components=num_vecs)
    return embedder.fit_transform(X)
plt.show()

#Use iso_model.transform(x_test) to fit the isomap from the training set onto the test set
'''
-------------------------------------------------------------------------------
-------------------------------Modified LLE------------------------------------
-------------------------------------------------------------------------------
'''

#Apply modified LLE, keeping n components < the number of original features
#method = 'standard' for LLE, 'hessian' for HELLE, or 'modified' for modified LLE
mlle_model = LocallyLinearEmbedding(n_neighbors=5,
                                    n_components=2,
                                    method='modified',
                                    random_state=seed)
mlle_model.fit_transform(x_std)
print(mlle_model.get_params())
mlle_dim = mlle_model.embedding_
print(mlle_dim.shape)  #There should be 2 latent variables represented

#Plot first 2 extracted features and the observation class
plt.figure(figsize=(10, 5))
plt.xlabel('Latent Variable 1 (explains most variance)')
plt.ylabel('Latent Variable 2 (explains second most variance)')
plt.title('Modified LLE 2-Dimension Plot with Observation Class, 5 neighbors')
plt.scatter(mlle_dim[:, 0], mlle_dim[:, 1], c=y)
plt.colorbar()
plt.show()

#Try a different number of neighbors
mlle_model = LocallyLinearEmbedding(n_neighbors=15,
Esempio n. 14
0
from sklearn.utils import shuffle
from sklearn.naive_bayes import GaussianNB
from sklearn.manifold import LocallyLinearEmbedding

from data.preprocess import features_preprocess, features_test_preprocess, labels_preprocess, labels_preprocess_num
from data.preprocess_2nd import preprocess_ft_lbls_num
from data.preprocess import ft_lbls_num

scores = []

embedding = LocallyLinearEmbedding(n_components=10)

(features1, labels1) = ft_lbls_num()
(features2, labels2) = preprocess_ft_lbls_num()

features1 = embedding.fit_transform(features1, labels1)
features2 = embedding.fit_transform(features2, labels2)

K = 5
cv = KFold(n_splits=K, shuffle=True)

features = numpy.concatenate((features1, features2))
labels = numpy.concatenate((labels1, labels2))

clf = svm.SVC(kernel='rbf')

for i in range(100):

    features1, labels1 = shuffle(features1, labels1)

    for train, test in cv.split(features1):
def main():
    
    parser = argparse.ArgumentParser(description=
                                'Perform Dimensionality Reduction')
    parser.add_argument('--alg', type=str, default='MLLE',
        help='Algorithm to reduce dimensionality.')
    parser.add_argument('catalog', type=str,
        help='Specify the catalog on which to perform DimReduce.')
    args = parser.parse_args()

    #dat = Table.read('catalogs/ZEST_catalog_colors.fits')
    #training_sample = dat[0:10000]
    #testing_sample = dat[10001:20000]
    #zkeys = ['cc', 'aa', 'm20', 'gg']

    base = os.path.basename(args.catalog)
    filename = os.path.splitext(base)[0]

    dat = Table.read(args.catalog)
    mkeys = ['elipt', 'C', 'A_1a', 'G', 'M20']#

    #dat.remove_column('color')
    if 'color' not in dat.colnames:
        if 'kaggle' in sample:
            dat = prep_catalog.color_data2(dat, 'gz2class')
        if 'direct' in sample:
            dat = prep_catalog.color_data(dat, 'zclass')
        dat.write(args.catalog, overwrite=True)

    #dat = prep_catalog.adjust_asym(dat, mkeys[2])
    #train, traincols, targets = prep_catalog.whiten_data(dat, mkeys)

    n_neighbors = [10,12,15,20]
    #n_neighbors = [7]
    n_components = 3

    for i, n_neigh in enumerate(n_neighbors):
        
        if args.alg in ['MLLE', 'LLE', 'LTSA', 'HLLE']:
            if args.alg == 'MLLE':
                method = 'modified'
            elif args.alg == 'LLE':
                method = 'standard'
            elif args.alg == 'LTSA':
                method = 'ltsa'
            elif args.alg == 'HLLE':
                method = 'hessian'
                           
            #replace_panoptes(dat)
            #pdb.set_trace()
            #sample = 'directbig_panoptes'

            X, y = prep_catalog.whiten_data(dat, mkeys)

            (dat1, dat2),(thing1,thing2) = split_samples(dat, dat,[0.75, 0.35], 
                                                       random_state=0)
            
            (X_train, X_test), (y_train, y_test) = split_samples(X, y, 
                                                [0.75, 0.35], random_state=0)

            y_train = simplify_classlabels(y_train)
            y_test = simplify_classlabels(y_test)

            #filename = 'modified_7_directbig_new'

            X_train = X
            y_train = simplify_classlabels(y)

            #'''
            #sample ='direct_zcut'

            #Y_train, Y_test = open_previous_LLE(filename)

            #cut = np.where(X1['REDSHIFT'] <= 0.05)
            #X1_cut = X1[cut]
            #QC_plots(X1_cut)
            #Y_train = np.array(Y_train)[cut]
            #col_train = np.array(col_train)[cut]
            #X = Table(X)
            #cut_out_mixedup_region(X, np.array(Y_train))

            #'''
            print "performing "+method+" LLE with",n_neigh,\
                "nearest neighbors"
            print "on training sample of",len(X_train),"objects"

            t0 = time()
            A = LLE(n_neigh, n_components, eigen_solver='auto', method=method)
            error = A.fit(X_train).reconstruction_error_
            
            Y_train = A.fit_transform(X_train)
            Y_test = A.transform(X_train)
            t1 = time()
            #'''        

            metadata = {'method':method, 'N':n_neigh, 'd':n_components, 
                        'error':error, 'time':t1-t0, 'sample':filename+'_total'}
            save_dimreduce(dat, Y_train, y_train, metadata, filename+'_total')

            #metadata = {'method':method, 'N':n_neigh, 'd':n_components, 
            #            'error':error, 'time':t1-t0, 'sample':filename+'_test'}
            #save_dimreduce(X2, Y_test, y_test, metadata, filename+'_test')

            # plot in 3D
            plot_dimreduce_3D(Y_train, y_train[:,1], Y_test, y_test[:,1], 
                              method, n_neigh, error, t1-t0, filename, two=False)

        #====================================================================#

        elif args.alg == 'ISO':
            method='IsoMap'
                
            print "performing IsoMap with",n_neigh,"nearest neighbors"
            print "on training sample of",len(dat),"objects"
            
            t0 = time()
            A = Isomap(n_neigh, n_components, eigen_solver='dense')
            error = A.fit(train).reconstruction_error()
            
            Y = A.fit_transform(train)
            #Y2 = A.transform(test)
            
            t1 = time()
            print "%s: %.2g sec" %(args.alg, t1-t0)
            print "reconstruction error: ", error
            
            print "begin plotting"
            plot_dimreduce(Y, traincols, method, n_neigh, sample, axis=0)
            plot_dimreduce(Y, traincols, method, n_neigh, sample, axis=1)
            plot_dimreduce(Y, traincols, method, n_neigh, sample, axis=2)
            plot_dimreduce_3D(Y, traincols, Y, traincols, method, 
                              n_neigh, (t1-t0), error, sample)
            
        elif args.alg == 'LDA':
            
            print "performing LDA"
            
            X, Xc, y = prep_catalog.whiten_data(dat, mkeys)

            (X_train, X_test), (y_train, y_test) = split_samples(X, y, 
                                                [0.75, 0.25], random_state=0)

            DRclf = LDA(3, priors=None)
            #DRclf.fit(X_train, y_train)
            DRtrain = DRclf.fit(X_train, y_train).transform(X_train)
            DRtest = DRclf.fit(X_train, y_train).transform(X_test)

            classes = np.unique(y_train)
            colors = np.array(['darkred', 'red', 'lightsalmon', 
                               'darkgreen', 'lightgreen', 'lightseagreen', 
                               'indigo', 'darkviolet', 'plum'])
            plot_LDA_3D(DRtrain, y_train, classes, colors, sample)

            pdb.set_trace()

            #classifiers = []
            #predictions = []
            #Nparams = np.arange(1, X.shape[1]+1)
            #for nc in Nparams:
            clf = LDA()
            clf.fit(DRtrain, y_train)
            y_pred = clf.predict(DRtest)
            
            matchesLDA = (y_pred == y_test)
            print np.sum(matchesLDA)

            pdb.set_trace()

            #------------------------------------------

            from sklearn.neighbors import KNeighborsClassifier
            knc = KNeighborsClassifier(5)
            knc.fit(DRtrain, y_train)
            y_pred = knc.predict(DRtest)

            matchesKNN = (y_pred == y_test)
            print np.sum(matchesKNN)

            pdb.set_trace()
            #------------------------------------------

            from astroML.classification import GMMBayes
            gmmb = GMMBayes(9)
            gmmb.fit(DRtrain, y_train)
            y_pred = gmmb.predict(DRtest)

            matchesGMMB = (y_pred == y_test)
            print np.sum(matchesGMMB)

            pdb.set_trace()
            #------------------------------------------

            # plot the results
            fig = plt.figure(figsize=(5, 2.5))
            fig.subplots_adjust(bottom=0.15, top=0.95, hspace=0.0,
                                left=0.1, right=0.95, wspace=0.2)

            # left plot: data and decision boundary
            ax = fig.add_subplot(121)
            pdb.set_trace()
            im = ax.scatter(X[:, 3], X[:, 4], color=Xc, cmap=plt.cm.Spectral, 
                            s=4, lw=0) #cmap=plt.cm.binary,, zorder=2
            im.set_clim(-0.5, 1)
            
            #im = ax.imshow(Z, origin='lower', aspect='auto',
            #               cmap=plt.cm.binary, zorder=1,
            #               extent=xlim + ylim)
            #im.set_clim(0, 1.5)
            
            #ax.contour(xx, yy, Z, [0.5], colors='k')
            
            #ax.set_xlim(xlim)
            #ax.set_ylim(ylim)
            
            ax.set_xlabel('$G$')
            ax.set_ylabel('$M20$')

            #pred, true = classification_loss(predictions, y_test)
            #completeness, contamination = completeness_contamination(pred, true)

            pdb.set_trace()


            #'''
            #t0 = time()
            #A = LDA(n_components, priors=None)
            #Y = A.fit_transform(train, targets)
            #Y2 = A.fit(train, targets).transform(train)
                
            #t1 = time()
            #print "%s: %.2g sec" %(args.alg, t1-t0)
            
            predict = A.predict(train)
            #print "Predicted classes:", predict
            #pdb.set_trace()
            

            #pdb.set_trace()
            #'''
            
            plot_LDA_3D(Y2, targets, classes, colors, sample)
            plot_LDA(Y2, targets, classes, colors, sample, axis=0)
            plot_LDA(Y2, targets, classes, colors, sample, axis=1)
            plot_LDA(Y2, targets, classes, colors, sample, axis=2)
            
            pdb.set_trace()
#----------------------------------------------------------------------
# Isomap projection 
print "Computing Isomap embedding"
t0 = time()
D_iso = Isomap(n_neighbors, n_components=2).fit_transform(D_scaled)
print "Done in time %.2fs " % (time() - t0)

#----------------------------------------------------------------------
# Locally linear embedding 
n_neighbors = 35
print "Computing LLE embedding"
clf = LocallyLinearEmbedding(n_neighbors, n_components=2,
                                      method='modified')
t0 = time()
D_lle = clf.fit_transform(D_scaled)
print "Done in time %.2fs " % (time() - t0)
print "Reconstruction error: %g" % clf.reconstruction_error_

#----------------------------------------------------------------------
# kernel PCA
print "Computing kPCA embedding"
kpca = KernelPCA(n_components=2, kernel="rbf", gamma=0.0028942661247167516)
t0 = time()
D_kpca = kpca.fit_transform(D_scaled)
print "Done in time %.2fs " % (time() - t0)

plot_embedding(D_pca, 1, rescale=None, title="PCA projection")
plot_embedding(D_iso, 2, rescale=None, title="Isomap projection")
plot_embedding(D_lle, 3, rescale=None, title="LLE projection", legend_loc="lower right")
plot_embedding(D_kpca, 4, rescale=None, title="kPCA projection")
Esempio n. 17
0
fdata = iso.fit_transform(digits["data"])
fig = plt.figure()
ax = fig.add_subplot(111, projection="3d")

plt.scatter(fdata[:, 0], fdata[:, 1], zs=fdata[:, 2], c=digits["target"], s=100)

plt.show()


# LLE

from sklearn.manifold import LocallyLinearEmbedding

lle = LocallyLinearEmbedding(n_neighbors=15, n_components=3, method="modified")
fig = plt.figure()
fdata = lle.fit_transform(digits["data"])
ax = fig.add_subplot(111, projection="3d")

plt.scatter(fdata[:, 0], fdata[:, 1], zs=fdata[:, 2], c=digits["target"], s=100)

plt.show()

# MDS

from sklearn.manifold import MDS

mds = MDS(n_components=3)
fig = plt.figure()
fdata = mds.fit_transform(digits["data"])
ax = fig.add_subplot(111, projection="3d")
Esempio n. 18
0
            ax.add_artist(ab)

            ax.plot(ica_x[j], ica_y[j], 'ro', markersize=2)

            now_image = np.append(now_image, j)

    plt.show()

# for LLE
for i in range(10):
    temp = np.argmax(y_data, axis=1)
    x = x_data[temp == i]
    y = y_data[temp == i]

    lle = LocallyLinearEmbedding(n_components=2)
    data_lle = lle.fit_transform(x)
    lle_x = data_lle[:, 0]
    lle_y = data_lle[:, 1]

    draw_image = np.reshape(x, (len(x), 28, 28))
    now_image = np.array([], dtype='int32')

    fig, ax = plt.subplots(figsize=(10, 9))
    s = 'LLE : ' + str(i)

    plt.title(s)
    plt.xlabel('First Principal Component')
    plt.ylabel('Second Principal Component')
    #plt.axis([-7,9,-7,8])

    ax.plot(lle_x, lle_y, 'b.', markersize=1)
Esempio n. 19
0
fig = plt.figure(1)
ax = fig.add_subplot(211, projection='3d')
ax.scatter(xs, ys, zs, c=labels)
ax.set_title("Manifold N")
ax_transform = fig.add_subplot(212)
ax_transform.scatter(iso_transformed[:, 0], iso_transformed[:, 1], c=labels)
ax_transform.set_title("Isomap embedded")

# 3
thetas1 = np.arange(0, np.pi, np.pi / 300)
thetas2 = np.arange(0, np.pi, np.pi / 300)
xs = np.hstack((100 * np.sin(thetas1), 100 * np.sin(thetas2)))
zs = np.hstack((300 + 100 * np.cos(thetas1), 100 + 100 * np.cos(thetas2)))
xs = xs + 5 * np.random.randn(600)
zs = zs + 5 * np.random.randn(600)
ys = 10 * np.random.randn(600)
embedding = LocallyLinearEmbedding(n_neighbors=5, reg=0.1)
#embedding = Isomap()
lle_transformed = embedding.fit_transform(np.vstack((xs, ys, zs)).T)

fig = plt.figure(2)
ax = fig.add_subplot(211, projection='3d')
ax.scatter(xs, ys, zs, c=labels)
ax.set_title("Manifold 3")
ax_transform = fig.add_subplot(212)
ax_transform.scatter(lle_transformed[:, 0], lle_transformed[:, 1], c=labels)
ax_transform.set_title("LLE embedded")

plt.show()
Esempio n. 20
0
def LLE(data, n_components=57):
    embedding = LocallyLinearEmbedding(n_components=n_components)
    X_transformed = embedding.fit_transform(data)
    return X_transformed
Esempio n. 21
0
'''
train PCA basis based on training.txt and output dimension-reduced coefficients for both training.txt and testing.txt
'''

from __future__ import division
import sys
from sklearn.decomposition import PCA
from sklearn.manifold import Isomap
from sklearn.manifold import LocallyLinearEmbedding
from sklearn import preprocessing
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from mpl_toolkits.mplot3d import Axes3D
import random
from colorsys import hsv_to_rgb

final_dim = 30
data = np.genfromtxt("100examples.txt", delimiter=',')
pca = PCA(n_components=final_dim)
isomap = Isomap(n_components=final_dim)
lle = LocallyLinearEmbedding(n_components=final_dim)
data_xformed = lle.fit_transform(data)
np.savetxt("lle_data_30_dims.txt", data_xformed, delimiter=',')
Esempio n. 22
0
from sklearn.manifold import LocallyLinearEmbedding
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

tetra_freq = np.load('tetrafreq.npy')
phylum_index = np.load('phylumIndex.npy')
phylum_names = np.load('phylumNames.npy')

lle = LocallyLinearEmbedding(n_components=2)
lle_result = lle.fit_transform(tetra_freq)

plt.figure()
for c, i, name in zip ("bgrcmykw", list(range(7, -1, -1)), phylum_names):
    plt.scatter(lle_result[phylum_index == i, 0], lle_result[phylum_index == i, 1], c=c, label=name)
plt.title('LLE of tetranucleotide')
plt.legend(loc=3, fontsize=10)
plt.savefig('LLE.png')


Esempio n. 23
0
cells = opts.high / opts.step
lle_gmm_results = np.zeros((cells,opts.iters))

D = scale(X)

n_samples, n_features = D.shape
# chosen by hyperparam search in a separate test.
n_neighbors = 35

# For the specified number of principal components, do the clustering
dimension_list = range(opts.low, opts.high + 1, opts.step)
data_files = []
for i in dimension_list:
    index = (i / opts.step) - 1     
    lle = LocallyLinearEmbedding(n_neighbors, n_components=i, method='standard')
    X_lle = lle.fit_transform(D)
    
    for j in range(0,opts.iters,1):
        gaussmix = GMM(n_components=true_k, covariance_type='tied', n_init=10, n_iter=1000)
        gaussmix.fit(X_lle)
        gaussmix_labels = gaussmix.predict(X_lle)    
        homog = metrics.homogeneity_score(labels[:,0], gaussmix_labels)
        print "Gaussian mixture homogeneity: %0.3f" % homog
        test_result = {"Model": "LLE", "Dimension": i, "Homogeneity": homog}
        index = pd.Index([0], name='rows')
        data_files.append(pd.DataFrame(data=test_result,index=index))        

# Save the data to a file:

print "...Done"
print "...rbinding DataFrames"
Esempio n. 24
0
koor_x = ['false', 'true']
koor_y = besar
kelas_res = list(kelas_res)
valp = kelas_res.count(False)
valn = kelas_res.count(True)
new_y = []
new_y.append(valp)
new_y.append(valn)
plt.bar(koor_x,
        new_y,
        label='After SMOTE+TOMEK',
        color='b',
        width=0.3,
        align='center')
plt.bar(koor_x,
        koor_y,
        label='Before SMOTE+TOMEK',
        color='r',
        width=0.3,
        align='edge')
plt.xlabel('class')
plt.ylabel('value')
plt.legend()
plt.show()

embedding = LocallyLinearEmbedding(n_components=5,
                                   method='ltsa',
                                   eigen_solver='dense')
# method='hessian', eigen_solver='dense'
X_transformed = embedding.fit_transform(df_resm)
Esempio n. 25
0
class Model(nn.Module):
    def __init__(self, args):
        super(Model, self).__init__()
        self.temperature = args.temperature
        self.base = resnet12()
        self.nFeat = self.base.nFeat
        self.clasifier = nn.Conv2d(self.nFeat, args.num_classes, kernel_size=1)
        self.args = args
        if (args.method in {'CBM', 'CBM_LLE'}):
            with open(osp.join(args.save_dir, 'base_proto.pickle'),
                      'rb') as fo:
                self.base_proto = pickle.load(fo)  # [64 512]
            if (args.method == 'CBM_LLE'):
                self.LLE = LocallyLinearEmbedding(n_neighbors=args.k,
                                                  n_components=args.dim)
                if (args.L2):
                    self.base_proto = F.normalize(self.base_proto, p=2, dim=-1)
                self.base_proto = torch.from_numpy(
                    self.LLE.fit_transform(
                        self.base_proto.cpu().numpy())).cuda()
            self.base_proto = self.base_proto.unsqueeze(0)
            if (self.args.similarityOnBase == 'cosine'):
                self.base_proto = F.normalize(self.base_proto, p=2, dim=-1)

    def test(self, ftrain, ftest, batch_size, num_way, num_test):
        ftrain = ftrain.mean((-1, -2))
        ftest = ftest.mean((-1, -2))
        phi = self.calPhi(ftrain, ftest, batch_size, num_way, num_test)
        if (self.args.method in {'CBM', 'CBM_LLE'}):
            varPhi = self.calVarPhi(ftrain, ftest, batch_size, num_way,
                                    num_test)
            return self.args.alpha * phi + (
                1 - self.args.alpha) * varPhi  # [4 30 5]
        else:
            return phi

    def calPhi(self, ftrain, ftest, batch_size, num_way, num_test):
        ftrain = ftrain.view(batch_size, 1, num_way, -1)
        ftest = ftest.view(batch_size, num_test, 1, -1)
        ftrain = F.normalize(ftrain, p=2, dim=-1)
        ftest = F.normalize(ftest, p=2, dim=-1)
        scores = torch.sum(ftest * ftrain, dim=-1)  # [4 30 5]
        return scores

    def calVarPhi(self, ftrain, ftest, batch_size, num_way, num_test):
        if (self.args.method == 'CBM_LLE'):
            if (self.args.L2):
                ftrain = F.normalize(ftrain, p=2, dim=-1)
                ftest = F.normalize(ftest, p=2, dim=-1)
            ftrain = torch.from_numpy(self.LLE.transform(
                ftrain.cpu().numpy())).cuda()
            ftest = torch.from_numpy(self.LLE.transform(
                ftest.cpu().numpy())).cuda()
        ftrain = ftrain.unsqueeze(1)
        ftest = ftest.unsqueeze(1)
        if (self.args.similarityOnBase == 'cosine'):
            ftrain = F.normalize(ftrain, p=2, dim=-1)
            ftrain = (ftrain * self.base_proto).sum(-1)
            ftest = F.normalize(ftest, p=2, dim=-1)
            ftest = (ftest * self.base_proto).sum(-1)
        else:  # Euclidean
            ftrain = -(ftrain - self.base_proto).norm(dim=-1)
            ftest = -(ftest - self.base_proto).norm(dim=-1)
        if (self.args.softmax):
            ftrain = F.softmax(ftrain, dim=-1)
            ftest = F.softmax(ftest, dim=-1)
        if (self.args.similarityOfDistribution == 'cosine'):
            ftrain = F.normalize(ftrain, p=2,
                                 dim=-1).view(batch_size, 1, num_way, -1)
            ftest = F.normalize(ftest, p=2,
                                dim=-1).view(batch_size, num_test, 1, -1)
            scores = (ftrain * ftest).sum(-1)
        elif (self.args.similarityOfDistribution == 'Euclidean'):
            ftrain = F.normalize(ftrain, p=2,
                                 dim=-1).view(batch_size, 1, num_way, -1)
            ftest = F.normalize(ftest, p=2,
                                dim=-1).view(batch_size, num_test, 1, -1)
            scores = -(ftrain - ftest).norm(dim=-1)
        else:  # KL
            ftrain = F.softmax(ftrain, dim=-1).view(batch_size, 1, num_way, -1)
            ftest = F.softmax(ftest, dim=-1).view(batch_size, num_test, 1,
                                                  -1).log()
            scores = -(ftrain * (ftrain.log() - ftest)).sum(dim=-1)
        return scores

    def forward(self, xtrain, xtest, ytrain, ytest):
        batch_size, num_train = xtrain.size(0), xtrain.size(1)
        num_test = xtest.size(1)
        num_way = ytrain.size(2)
        ytrain = ytrain.transpose(1, 2)
        xtrain = xtrain.view(-1, xtrain.size(2), xtrain.size(3),
                             xtrain.size(4))
        xtest = xtest.view(-1, xtest.size(2), xtest.size(3), xtest.size(4))
        x = torch.cat((xtrain, xtest), 0)
        f = self.base(x)
        ftrain = f[:batch_size * num_train]
        ftrain = ftrain.view(batch_size, num_train, -1)
        ftrain = torch.bmm(ytrain, ftrain)
        ftrain = ftrain.div(ytrain.sum(dim=2, keepdim=True).expand_as(ftrain))
        ftrain = ftrain.view(-1, *f.size()[1:])  # [4*5 512 6 6]
        ftest = f[batch_size * num_train:]
        ftest = ftest.view(-1, *f.size()[1:])  # [4*30 512 6 6]
        if not self.training:
            score = self.test(ftrain, ftest, batch_size, num_way, num_test)
            # score = score.view(batch_size*num_test, num_way)
            return score
        else:
            ytest = self.clasifier(ftest) * self.temperature  # [4*30 64 6 6]
            return ytest
Esempio n. 26
0
from sklearn.manifold import LocallyLinearEmbedding
from sklearn import preprocessing
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from mpl_toolkits.mplot3d import Axes3D
import random
from colorsys import hsv_to_rgb

pca = PCA(n_components=2)
isomap = Isomap(n_components=2)
lle = LocallyLinearEmbedding(n_components=2)
data = np.genfromtxt('data01_small.txt', delimiter=',')
pca_xform = pca.fit_transform(data)
isomap_xform = isomap.fit_transform(data)
lle_xform = lle.fit_transform(data)
label = [0]*100+[1]*100
rgbs = [(0.5,0,0), (0,0.5,0)]


plt.figure()
xs = pca_xform[:,0]
ys = pca_xform[:,1]
ax = plt.subplot(111)
for i in xrange(len(xs)):
	ax.text(xs[i], ys[i], str(label[i]), color=rgbs[label[i]], fontdict={'weight': 'bold', 'size': 9})
t = (max(xs)-min(xs))*0.1
ax.axis([min(xs)-t, max(xs)+t, min(ys)-t, max(ys)+t])
plt.xticks([]), plt.yticks([])
plt.title('PCA')
 def getLLE(self):
     lle = LocallyLinearEmbedding(n_neighbors=4)
     self.dataPCA = lle.fit_transform(self.data.values[0:768, 0:8])
     self.labels = np.array(self.data.values[:, 8], int)
Esempio n. 28
0
# ISOMAP

print('ISOMAP')
from sklearn.manifold import Isomap
iso = Isomap(n_components=3, n_neighbors=7)
fdata = iso.fit_transform(authors)

show_figure(fdata, labels, ulabs, 'ISOMAP')

# LLE
print('LLE')
from sklearn.manifold import LocallyLinearEmbedding
lle = LocallyLinearEmbedding(n_neighbors=7, n_components=3, method='standard')

fdata = lle.fit_transform(authors)

print(lle.reconstruction_error_)

show_figure(fdata, labels, ulabs, 'LLE')

# MDS
print('MDS')
from sklearn.manifold import MDS
mds = MDS(n_components=3)
fdata = mds.fit_transform(authors)
print(mds.stress_)

show_figure(fdata, labels, ulabs, 'MDS')

# Spectral Embedding
spherical_helicoid_1024 = spherical_helicoid(0.5, 3, 1024)
spherical_helicoid_2048 = spherical_helicoid(0.5, 3, 2048)
spherical_helicoid_4096 = spherical_helicoid(0.5, 3, 4096)

# a) Klein Bottle
# From the data below, we can see that the optimal number of neighbors depends on the size of the dataset.
from sklearn.manifold import LocallyLinearEmbedding

klein_bottle_data = [klein_bottle_1024, klein_bottle_2048, klein_bottle_4096]
optimal_neighbors = []
for data_set in klein_bottle_data:
    minimum_error = float("inf")
    optimal_k = 0
    for k in range(3, 10):
        embedding = LocallyLinearEmbedding(n_neighbors=k, n_components=2)
        X_transformed = embedding.fit_transform(data_set)
        reconstruction_error = embedding.reconstruction_error_
        if reconstruction_error < minimum_error:
            optimal_k = k
            minimum_error = reconstruction_error
    optimal_neighbors.append(optimal_k)

print("Optimal Number of Neighbors for N=1024: " + str(optimal_neighbors[0]))
print("Optimal Number of Neighbors for N=2048: " + str(optimal_neighbors[1]))
print("Optimal Number of Neighbors for N=4096: " + str(optimal_neighbors[2]))

# b) Circular Helicoid
# From the data below, we can see that the optimal number of neighbors depends on the size of the dataset.
from sklearn.manifold import LocallyLinearEmbedding

circular_helicoid_data = [
def plot2d(X, y, scale=True, normalize=False, embedding='pca', title=''): 
	"""
	Plot data transformed into two dimensions by PCA. 
	PCA transforms into a new embedding dimension such that 
	the first dimension contains the maximal variance and following 
	dimensions maximal remaining variance. 
	This shoudl spread the observed n-dimensional data maximal. This 
	is unsupervised and will not consider target values. 
	"""
	if (scale): 
		scaler = StandardScaler()
		X = scaler.fit_transform(X)

	if (normalize): 
		normalizer = Normalizer(norm='l2')
		X = normalizer.fit_transform(X)
		
	if (embedding is 'pca'): 
		pca = PCA(n_components=2)
		X_transformed = pca.fit_transform(X)
	elif (embedding is 'isomap'):
		isomap = Isomap(n_components=2, n_neighbors=20)
		X_transformed = isomap.fit_transform(X)
	elif (embedding is 'lle' ): 
		lle = LocallyLinearEmbedding(n_components=2, n_neighbors=5)
		X_transformed = lle.fit_transform(X)
	elif (embedding is 'tsne'): 
		t_sne = TSNE(n_components=2)
		X_transformed = t_sne.fit_transform(X)
	elif (embedding is 'spectral'): 
		se = SpectralEmbedding(n_components=2)
		X_transformed = se.fit_transform(X)
	elif (embedding is 'mds'):
		mds = MDS(n_components=2)
		X_transformed = mds.fit_transform(X)
	elif (embedding is 'gallery'): 
		plt.figure(1)
		
		plt.subplot(231)
		plt.title('pca')
		X_t = PCA(n_components=2).fit_transform(X)
		plt.scatter(X_t[:,0 ], X_t[:, 1], c=y)

		plt.subplot(232)
		plt.title('isomap')
		X_t = Isomap(n_neighbors=20).fit_transform(X)
		plt.scatter(X_t[:,0 ], X_t[:, 1], c=y)

		plt.subplot(233)
		plt.title('lle')
		X_t = LocallyLinearEmbedding(n_neighbors=20).fit_transform(X)
		plt.scatter(X_t[:,0 ], X_t[:, 1], c=y)

		plt.subplot(234)
		plt.title('tsne')
		X_t = TSNE().fit_transform(X)
		plt.scatter(X_t[:,0 ], X_t[:, 1], c=y)

		plt.subplot(235)
		plt.title('spectral')
		X_t = SpectralEmbedding().fit_transform(X)
		plt.scatter(X_t[:,0 ], X_t[:, 1], c=y)

		plt.subplot(236)
		plt.title('mds')
		X_t = MDS().fit_transform(X)
		plt.scatter(X_t[:,0 ], X_t[:, 1], c=y)

		plt.suptitle('Gallery transforms ' + title)

		return plt
	else:
		raise ValueError("Choose between pca, isomap and tsne")

	plt.title(title + ' ' + embedding + ' plot')
	sc = plt.scatter(X_transformed[:, 0], X_transformed[:, 1], c=y)
	plt.colorbar(sc)
	return plt
Esempio n. 31
0
# Calculate the Locally Linear Embedding
# NOTE THAT YOUR DATA NEEDS TO BE NORMALIZED
from sklearn.manifold import LocallyLinearEmbedding

# Calculate LLE embedding
lle = LocallyLinearEmbedding(n_neighbors=20, n_components=2)
x, y = np.array(lle.fit_transform(data)).T
Esempio n. 32
0
    def fit_transform(self, X):
        """
        计算降维结果
        :param X: 高维数据矩阵,每一行是一个高维数据点
        :return:
        """
        (n, m) = X.shape
        print(self.parameters)

        # 用经典的降维方法
        if self.affinity == 'PCA':  # 直接返回 PCA 的降维结果
            print('Classical method: PCA...')
            pca = PCA(n_components=self.n_components)
            return pca.fit_transform(X)
        elif self.affinity == 'MDS':  # 直接返回 MDS 的降维结果
            print('Classical method: MDS...')
            mds = MDS(n_components=self.n_components)
            return mds.fit_transform(X)
        elif self.affinity == 'Isomap':  # 直接返回 Isomap 的降维结果
            print('Classical method: Isomap...')
            iso = Isomap(n_components=self.n_components,
                         n_neighbors=self.parameters['n_neighbors'])
            return iso.fit_transform(X)
        elif self.affinity == 't-SNE':  # 直接返回 t-SNE 的降维结果
            print('Classical method: t-SNE...')
            tsne = TSNE(n_components=self.n_components,
                        perplexity=self.parameters['perplexity'])
            return tsne.fit_transform(X)
        elif self.affinity == 'cTSNE':  # 用不加速版本的t-SNE降维
            print('Classical method: classical t-SNE...')
            from ArtDR import tsne
            return tsne.tsne(X,
                             perplexity=self.parameters['perplexity'],
                             path=self.path,
                             config_str='t-SNE ')
        elif self.affinity == 'LLE':  # 直接返回 LLE 的降维结果
            print('Classical method: LLE...')
            lle = LocallyLinearEmbedding(
                n_components=self.n_components,
                n_neighbors=self.parameters['n_neighbors'])
            return lle.fit_transform(X)
        elif self.affinity == 'geo-t-SNE':  # 用基于测地线距离的 t-SNE 方法
            print('Geodesic t-SNE...')
            gtsne = geoTsne(n_neighbors=self.parameters['n_neighbors'],
                            perplexity=self.parameters['perplexity'])
            return gtsne.fit_transform(X, n_components=self.n_components)

        if self.parameters['use_skeleton']:  # 用骨架点的方法
            return self.skeleton_fit_transform(X)

        # 用我们自己设计的降维方法
        if self.parameters['neighborhood_type'] == 'iter':  # 用迭代的方式
            W = self.iter_affinity_matrix(X)
        else:
            W = self.affinity_matrix(X)  # 用我们的普通方法
        if self.frame == 'MDS':
            print('Using MDS frame...')
            mds = MDS(n_components=self.n_components,
                      dissimilarity='precomputed')
            Y = mds.fit_transform(W)
            return Y
        elif self.frame == 't-SNE':
            print('Using t-SNE frame...')
            Y = tsneFrame.tsne_plus(W,
                                    self.parameters['perplexity'],
                                    path=self.path,
                                    config_str=self.config_str)
            return Y
        elif self.frame == 't-SNE+':
            print('Using t-SNE framework in sklearn...')
            tsne = tsneFramePlus.tsnePlus(
                n_components=self.n_components,
                perplexity=self.parameters['perplexity'])
            Y = tsne.fit_transform(W)
            return Y
        else:
            print("Wrong frame name!")
            return
Esempio n. 33
0
print(features.shape)
feats = fs.mutual_info_classif(features,
                               newlabels,
                               n_neighbors=5,
                               random_state=0)

max_indices = sorted(range(len(feats)),
                     key=lambda i: feats[i])[-64:]  #picking max 64 features
print(len(max_indices))

features = np.reshape(features, (len(features), -1))
newfeatures = []
for f in features:
    newfeatures.append(f[max_indices])

features = np.array(newfeatures)
features = np.reshape(features, (len(features), -1))
print(features.shape)

lle = LocallyLinearEmbedding(n_components=2,
                             max_iter=500,
                             method='ltsa',
                             n_jobs=7)

X_embedded = lle.fit_transform(features)

print(X_embedded.shape)

with open('../Manifold_features/ltsa', 'wb') as fp:
    pickle.dump(X_embedded, fp)
import numpy as np
from sklearn import datasets
import matplotlib.pyplot as plt
from sklearn.manifold import LocallyLinearEmbedding
from mpl_toolkits.mplot3d import Axes3D

np.random.seed(0)
X, color = datasets.samples_generator.make_swiss_roll(n_samples=1500)
model = LocallyLinearEmbedding(n_components=2, n_neighbors=15)
Z = model.fit_transform(X)

plt.figure(19)
ax = plt.axes(projection='3d')
ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color)
ax.view_init(4, -72)


plt.figure(20)
plt.scatter(Z[:, 0], Z[:, 1], c=color)
plt.show()










Esempio n. 35
0
    def dim_reduce(self,
                   method="tsne",
                   target_dim=2,
                   points=None,
                   metric="minkoswki"):

        try:
            if len(self.reduced_data) != 0:
                if self.reduced_data_method == method and method != "isomap":
                    return self.reduced_data
                elif method == "isomap" and self.reduced_data_method == method:
                    if self.reduced_data_method_metric == metric:
                        return self.reduced_data
        except:
            pass

        if method == "tsne":

            from sklearn.manifold import TSNE
            tsne = TSNE(n_components=target_dim, random_state=42)
            np.set_printoptions(suppress=True)

            self.reduced_data_method_metric = ""
            self.reduced_data_method = "tsne"
            if points == None:
                self.reduced_data = tsne.fit_transform(
                    self.word_vectors[:1000])
            else:
                self.reduced_data = tsne.fit_transform(points)

        elif method == "truncated_svd":
            from sklearn.decomposition import TruncatedSVD
            print("using TruncatedSVD...")
            svd = TruncatedSVD(n_components=target_dim,
                               n_iter=10,
                               random_state=42)
            self.reduced_data_method_metric = ""
            self.reduced_data_method = "truncated_svd"
            if points == None:
                self.reduced_data = svd.fit_transform(self.word_vectors[:1000])
            else:
                self.reduced_data = svd.fit_transform(points)
            print("sd-sum is:\t", svd.explained_variance_ratio_.sum())

        elif method == "spectral":
            from sklearn.manifold import SpectralEmbedding
            se = SpectralEmbedding(n_components=target_dim, random_state=42)
            self.reduced_data_method_metric = ""
            self.reduced_data_method = "spectral"
            if points == None:
                self.reduced_data = se.fit_transform(self.word_vectors[:1000])
            else:
                self.reduced_data = se.fit_transform(points)

        elif method == "isomap":
            from sklearn.manifold.isomap_mod import Isomap
            i = Isomap(n_components=target_dim,
                       max_iter=1000,
                       path_method='D',
                       neighbors_algorithm='auto')
            self.reduced_data_method_metric = metric
            self.reduced_data_method = "isomap"
            if points == None:
                self.reduced_data = i.fit_transform(self.word_vectors[:1000],
                                                    metric=metric)
            else:
                self.reduced_data = i.fit_transform(points, metric=metric)

        elif method == "lle":
            from sklearn.manifold import LocallyLinearEmbedding
            lle = LocallyLinearEmbedding(n_components=target_dim,
                                         max_iter=1000,
                                         neighbors_algorithm='auto')
            self.reduced_data_method_metric = ""
            self.reduced_data_method = "lle"
            if points == None:
                self.reduced_data = lle.fit_transform(self.word_vectors[:1000])
            else:
                self.reduced_data = lle.fit_transform(points)

        elif method == "kpca":
            from sklearn.decomposition import PCA, KernelPCA
            kpca = KernelPCA(kernel="rbf",
                             fit_inverse_transform=True,
                             gamma=10)
            self.reduced_data_method_metric = ""
            self.reduced_data_method = "kpca"
            if points == None:
                self.reduced_data = kpca.fit_transform(
                    self.word_vectors[:1000])
            else:
                self.reduced_data = kpca.fit_transform(points)

        return self.reduced_data
Esempio n. 36
0

if __name__ == '__main__':
    pth = './data.txt'
    data, label = load_data(pth)
    # print(data,label)
    # PCA
    pca = PCA(n_components=2)
    pca_ = pca.fit_transform(data)
    visual(pca_, label, "PCA")
    # LDA
    lda = LinearDiscriminantAnalysis()
    lda_ = lda.fit_transform(data, label)
    visual(lda_, label, "LDA")
    # KPCA
    kpca = KernelPCA(n_components=2, kernel='rbf')
    kpca_ = kpca.fit_transform(data)
    visual(kpca_, label, "KPCA")
    # Isomap
    iso = Isomap(n_components=2)
    iso_ = iso.fit_transform(data)
    visual(iso_, label, "Isomap")
    # LLE
    lle = LocallyLinearEmbedding(n_components=2)
    lle_ = lle.fit_transform(data)
    visual(lle_, label, "LLE")
    # Laplacian Eigenmaps
    le = SpectralEmbedding(n_components=2)
    le_ = le.fit_transform(data)
    visual(le_, label, "Laplacian Eigenmaps")
import matplotlib.pyplot as plt
import numpy as np

from sklearn.datasets import fetch_olivetti_faces
from sklearn.manifold import LocallyLinearEmbedding

# Set random seed for reproducibility
np.random.seed(1000)


if __name__ == '__main__':
    # Create the dataset
    faces = fetch_olivetti_faces()

    # Train LLE
    lle = LocallyLinearEmbedding(n_neighbors=15, n_components=2)
    X_lle = lle.fit_transform(faces['data'])

    # Plot the result
    fig, ax = plt.subplots(figsize=(18, 10))

    for i in range(100):
        ax.scatter(X_lle[i, 0], X_lle[i, 1], marker='o', s=100)
        ax.annotate('%d' % faces['target'][i], xy=(X_lle[i, 0] + 0.0015, X_lle[i, 1] + 0.0015))

    ax.set_xlabel(r'$x_0$')
    ax.set_ylabel(r'$x_1$')
    ax.grid()

    plt.show()
Esempio n. 38
0
#03-01.py
X, y = preprocess(data, shuffle=False, n_samples=1000, normalization=None)

from sklearn.manifold import LocallyLinearEmbedding
lle = LocallyLinearEmbedding(n_neighbors=15,
                             n_components=3, method='standard')
X_proj = lle.fit_transform(X)

three_component_plot(X_proj[:, 0], X_proj[:, 1], X_proj[:, 2], y, labels, trim_outliers=True)
Esempio n. 39
0
forest_test(X_lda, Y)
#####LDA can also be used as a classifier. Therefore, we can now test how an LDA Classifier can perform in this situation.
X_Reduced, X_Test_Reduced, Y_Reduced, Y_Test_Reduced = train_test_split(
    X_lda, Y, test_size=0.30, random_state=101)
start = time.process_time()
lda = LinearDiscriminantAnalysis().fit(X_Reduced, Y_Reduced)
print(time.process_time() - start)
predictionlda = lda.predict(X_Test_Reduced)
print(confusion_matrix(Y_Test_Reduced, predictionlda))
print(classification_report(Y_Test_Reduced, predictionlda))
#####Locally Linear Embedding is a dimensionality reduction technique based on Manifold
##Learning. A Manifold is an object of D dimensions which is embedded in an higher-dimensional space.
## Manifold Learning aims then to make this object representable in its original D dimensions instead of being represented in an unnecessary greater space.
from sklearn.manifold import LocallyLinearEmbedding
embedding = LocallyLinearEmbedding(n_components=3)
X_lle = embedding.fit_transform(X)
forest_test(X_lle, Y)
####t-SNE is non-linear dimensionality reduction technique which is typically used to visualize high dimensional datasets.
#####t-SNE works by minimizing the divergence between a distribution constituted by the pairwise probability similarities
### of the input features in the original high dimensional space and its equivalent in the reduced low dimensional space.
##t-SNE makes then use of the Kullback-Leiber (KL) divergence in order to measure the dissimilarity of the two different
####distributions. The KL divergence is then minimized using gradient descent.
from sklearn.manifold import TSNE
start = time.process_time()
tsne = TSNE(n_components=3, verbose=1, perplexity=40, n_iter=300)
X_tsne = tsne.fit_transform(X)
print(time.process_time() - start)
forest_test(X_tsne, Y)
#####Autoencoders are a family of Machine Learning algorithms which can be used as a
###dimensionality reduction technique. The main difference between Autoencoders and
##other dimensionality reduction techniques is that Autoencoders use non-linear
Esempio n. 40
0
        ("kpca", KernelPCA(n_components=2)),
        ("log_reg", LogisticRegression())
    ])

param_grid = [{
        "kpca__gamma": np.linspace(0.03, 0.05, 10),
        "kpca__kernel": ["rbf", "sigmoid"]
    }]

grid_search = GridSearchCV(clf, param_grid, cv=3)
grid_search.fit(X, y

# LLE
from sklearn.manifold import LocallyLinearEmbedding
lle = LocallyLinearEmbedding(n_components=2, n_neighbors=10)
X_reduced = lle.fit_transform(X)

#======================================================================================#
# K-Means
from sklearn.cluster import KMeans
k = 5
kmeans = KMeans(n_clusters=k)
y_pred = kmeans.fit_predict(X)
kmeans.cluster_centers_

# DBSCAN
from sklearn.cluster import DBSCAN
from sklearn.datasets import make_moons
X, y = make_moons(n_samples=1000, noise=0.05)
dbscan = DBSCAN(eps=0.05, min_samples=5)
dbscan.fit(X)
from sklearn.decomposition import PCA
pca = PCA(n_components=0.95)
X_reduced = pca.fit_transform(data[0])

np.sum(pca.explained_variance_ratio_)

dataset = keras.datasets.mnist.load_data()
images = dataset[1][0].reshape(10000, 28 * 28)
labels = dataset[1][1]

pca = PCA(n_components=154)
images_reduced = pca.fit_transform(images)

from sklearn.manifold import LocallyLinearEmbedding
lle = LocallyLinearEmbedding(n_components=2, n_neighbors=10)
X_lle = lle.fit_transform(data[0])

from sklearn.manifold import TSNE
tsne = TSNE(n_components=2)
x_clusters = tsne.fit_transform(images)

import matplotlib.pyplot as plt

for i in range(0, 10):
    indices = []
    for j in range(2 * 5000):
        if labels[j] == i:
            indices.append(j)

    plt.scatter(x_clusters[(indices), 0],
                x_clusters[(indices), 1],
Esempio n. 42
0
def lle(X=None, W=None, num_vecs=None, k=None):
    embedder = LocallyLinearEmbedding(n_neighbors=k, n_components=num_vecs)
    return embedder.fit_transform(X)
Esempio n. 43
0
#https://blog.csdn.net/u012162613/article/details/42192293

import numpy as np
from sklearn.decomposition import PCA
data = np.array([[1., 1.], [0.9, 0.95], [1.01, 1.03], [2., 2.], [2.03, 2.06],
                 [1.98, 1.89]])
data.shape  #(6, 2)

pca = PCA(n_components=1)
newData_shape = pca.fit_transform(data).shape  #(6, 1)

# 4. Multidimensional Scaling
#https://scikit-learn.org/stable/modules/generated/sklearn.manifold.MDS.html

from sklearn.datasets import load_digits
from sklearn.manifold import MDS
X, _ = load_digits(return_X_y=True)
X.shape  #(1797, 64)
mds = MDS(n_components=2)
X_transformed = mds.fit_transform(X[:100])  #(100, 2)

# 5. Locally Linear Embedding
#https://scikit-learn.org/stable/modules/generated/sklearn.manifold.LocallyLinearEmbedding.html

from sklearn.datasets import load_digits
from sklearn.manifold import LocallyLinearEmbedding
X, _ = load_digits(return_X_y=True)
X.shape  #(1797, 64)
lle = LocallyLinearEmbedding(n_components=2)
X_transformed = lle.fit_transform(X[:100])  #(100, 2)
Esempio n. 44
0
#03-02.py
X, y = preprocess(data, shuffle=False, n_samples=1000, normalization=None)

from sklearn.manifold import LocallyLinearEmbedding
lle = LocallyLinearEmbedding(n_neighbors=15,
                             n_components=3, method='modified')
X_proj = lle.fit_transform(X)

three_component_plot(X_proj[:, 0], X_proj[:, 1], X_proj[:, 2], y, labels, trim_outliers=True)
Esempio n. 45
0
f.close()


import numpy as np

N = len(dic_cl.items())
X = np.zeros((N, 7))
for i, (key, val) in enumerate(dic_cl.iteritems()):
    X[i, :] = dic_cl[key]

from sklearn.manifold import LocallyLinearEmbedding
from sklearn.preprocessing import scale

lle = LocallyLinearEmbedding(n_components=3, n_neighbors=20)
print X.max(axis=0)
Y3 = lle.fit_transform(scale(X))
Y3 -= Y3.min(axis=0)

print len(dic_cl.items())
lle = LocallyLinearEmbedding(n_components=1, n_neighbors=20)
Y1 = lle.fit_transform(X)
Y1 -= Y1.min()

o1 = open("1-d.csv", "w")
o3 = open("3-d.csv", "w")
for i, (key, val) in enumerate(dic_cl.iteritems()):
    o1.write("%s,%f\n" % (key, Y1[i - 1]))
    o3.write("%s,%s\n" % (key, ",".join(map(str, Y3[i - 1, :]))))
o1.close()
o3.close()
import pylab