def plot_3d(dataset):
    """TODO: Docstring for plot_3d.
    :returns: TODO

    """
    from mpl_toolkits.mplot3d import Axes3D

    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')

    iso = Isomap(n_components=3)
    projected = iso.fit_transform(dataset.data.toarray())

    print 'projected: sample: %s, feature: %s'\
            % (projected.shape[0], projected.shape[1])

    all_scatter = []
    colors = cm.rainbow(np.linspace(0, 1, len(dataset.target_names)), alpha=0.5)
    for i in range(len(dataset.target_names)):
        points = projected[dataset.target==i,:]
        cur = ax.scatter(points[:,0], points[:,1], points[:,2],
                          color=colors[i], edgecolor='k', lw=0.1,
                          vmin=0, vmax=len(dataset.target_names))
        all_scatter.append(cur)
    ax.legend(all_scatter, dataset.target_names,
               loc='lower left', scatterpoints=1)
    plt.savefig('isomap3d', dpi=500)
    plt.show()

    return True
def plotTrajectory(dfile):
    fin = open(dfile)

    Vsteps = []
    Vtarget = fin.readline().strip().split()
    Vtarget = map(float,Vtarget)
    Vsteps.append(Vtarget)
    for l in fin:
        l = l.strip().split()
        if len(l) != 26: continue
        l = map(float,l)
        Vsteps.append(l)


    distances = [euclidean(a,Vsteps[0]) for a in Vsteps[1:]]
    print len(distances)

    _map = plt.get_cmap("winter")
    distcolors = _map(distances)


    dimred = Isomap(n_components=2)
    Vsteps = dimred.fit_transform(Vsteps)



    #objective vector
    plt.scatter(Vsteps[0,0],Vsteps[0,1],color='red',s=30,marker=(5,1))
    #Optimization steps
    plt.scatter(Vsteps[1:,0],Vsteps[1:,1],color=distcolors,alpha=0.5)

    plt.show()
Example #3
0
def dimension_reduce():
    ''' This compares a few different methods of
    dimensionality reduction on the current dataset.
    '''
    pca = PCA(n_components=2)                             # initialize a dimensionality reducer
    pca.fit(digits.data)                                  # fit it to our data
    X_pca = pca.transform(digits.data)                    # apply our data to the transformation
    plt.subplot(1, 3, 1)
    plt.scatter(X_pca[:, 0], X_pca[:, 1], c=digits.target)# plot the manifold
    
    se = SpectralEmbedding()
    X_se = se.fit_transform(digits.data)
    plt.subplot(1, 3, 2)
    plt.scatter(X_se[:, 0], X_se[:, 1], c=digits.target)
    
    isomap = Isomap(n_components=2, n_neighbors=20)
    isomap.fit(digits.data)
    X_iso = isomap.transform(digits.data)
    plt.subplot(1, 3, 3)
    plt.scatter(X_iso[:, 0], X_iso[:, 1], c=digits.target)
    plt.show()

    plt.matshow(pca.mean_.reshape(8, 8))                  # plot the mean components
    plt.matshow(pca.components_[0].reshape(8, 8))         # plot the first principal component
    plt.matshow(pca.components_[1].reshape(8, 8))         # plot the second principal component
    plt.show()
Example #4
0
def isomap(similarity, euclid=False):
    if not euclid:
        print('podvod')
    model = Isomap(n_neighbors=15)
    result = model.fit_transform(similarity)

    return result.T
Example #5
0
def iso_map(data, target, target_names):
    iso = Isomap(n_components=2)
    data_projected = iso.fit_transform(data)
    formatter = plt.FuncFormatter(lambda i, *args:target_names[int(i)])
    plt.figure(figsize=(8, 8))
    plt.scatter(data_projected[:, 0], data_projected[:, 1], c=target,edgecolor='none', alpha=0.5, cmap=plt.cm.get_cmap('rainbow', len(target_names)));
    plt.colorbar(ticks=sorted(list(set(target))), format=formatter)
    #plt.clim(-200, 0)
    return iso, data_projected
def ISOMAP_transform(train_feature, test_feature, n_components, n_neighbors = 5):
    """ ISOMAP method
    """
    from sklearn.manifold import Isomap
    isomap = Isomap(n_neighbors, n_components).fit(train_feature)
    
    train_feature_transformed = isomap.transform(train_feature)
    test_feature_transformed = isomap.transform(test_feature)
    
    return train_feature_transformed, test_feature_transformed
Example #7
0
def embedDistanceMatrix(dmatDf, method='kpca', n_components=2, **kwargs):
    """Two-dimensional embedding of sequence distances in dmatDf,
    returning Nx2 x,y-coords: tsne, isomap, pca, mds, kpca, sklearn-tsne"""
    if isinstance(dmatDf, pd.DataFrame):
        dmat = dmatDf.values
    else:
        dmat = dmatDf

    if method == 'tsne':
        xy = tsne.run_tsne(dmat, no_dims=n_components, perplexity=kwargs['perplexity'])
    elif method == 'isomap':
        isoObj = Isomap(n_neighbors=10, n_components=n_components)
        xy = isoObj.fit_transform(dmat)
    elif method == 'mds':
        mds = MDS(n_components=n_components,
                  max_iter=3000,
                  eps=1e-9,
                  random_state=15,
                  dissimilarity="precomputed",
                  n_jobs=1)
        xy = mds.fit(dmat).embedding_
        rot = PCA(n_components=n_components)
        xy = rot.fit_transform(xy)
    elif method == 'pca':
        pcaObj = PCA(n_components=None)
        xy = pcaObj.fit_transform(dmat)[:, :n_components]
    elif method == 'kpca':
        pcaObj = KernelPCA(n_components=dmat.shape[0], kernel='precomputed', eigen_solver='dense')
        try:
            gram = dist2kernel(dmat)
        except:
            print('Could not convert dmat to kernel for KernelPCA; using 1 - dmat/dmat.max() instead')
            gram = 1 - dmat / dmat.max()
        xy = pcaObj.fit_transform(gram)[:, :n_components]
    elif method == 'lle':
        lle = manifold.LocallyLinearEmbedding(n_neighbors=30, n_components=n_components, method='standard')
        xy = lle.fit_transform(dist)
    elif method == 'sklearn-tsne':
        tsneObj = TSNE(n_components=n_components, metric='precomputed', random_state=0, perplexity=kwargs['perplexity'])
        xy = tsneObj.fit_transform(dmat)
    elif method == 'umap':
        umapObj = umap.UMAP(n_components=n_components, metric='precomputed', **kwargs)
        xy = umapObj.fit_transform(dmat)
    else:
        print('Method unknown: %s' % method)
        return

    assert xy.shape[0] == dmatDf.shape[0]
    xyDf = pd.DataFrame(xy[:, :n_components], index=dmatDf.index, columns=np.arange(n_components))
    if method == 'kpca':
        """Not sure how negative eigenvalues should be handled here, but they are usually
        small so it shouldn't make a big difference"""
        setattr(xyDf, 'explained_variance_', pcaObj.lambdas_[:n_components]/pcaObj.lambdas_[pcaObj.lambdas_>0].sum())
    return xyDf
Example #8
0
    def ML( self ):
        data = self.data.values[ :, :-3 ]
        scaler = MinMaxScaler()
        #scaler = StandardScaler()
        X = scaler.fit_transform( data )
        #X = data

        isomap = Isomap( n_components = 2 )
        isomap.fit( X )
        #print pca.explained_variance_ratio_
        import pdb; pdb.set_trace()
 def __init__(self):
     """
     Instantiate floorplan estimator
     """
     self.dimred = Isomap(n_neighbors=25, n_components=2)
     self._fingerprints = None
     self._label = None
class FloorplanEstimator:
    """
    Simple estimator for rough floorplans
    """
    def __init__(self):
        """
        Instantiate floorplan estimator
        """
        self.dimred = Isomap(n_neighbors=25, n_components=2)
        self._fingerprints = None
        self._label = None

    def fit(self, fingerprints, label):
        """
        Estimate floorplan from labeled fingerprints
        :param fingerprints: list of fingerprints
        :param label: list of corresponding labels
        """
        self.dimred.fit(fingerprints)
        self._fingerprints = fingerprints
        self._label = label

    def transform(self, fingerprints):
        """
        Get x,y coordinates of fingerprints on floorplan
        :param fingerprints: list of fingerprints
        :return: list of [x,y] coordinates
        """
        return self.dimred.transform(fingerprints)

    def draw(self):
        """
        Draw the estimated floorplan in the current figure
        """
        xy = self.dimred.transform(self._fingerprints)

        x_min, x_max = xy[:,0].min(), xy[:,0].max()
        y_min, y_max = xy[:,1].min(), xy[:,1].max()
        xx, yy = np.meshgrid(np.arange(x_min, x_max, 1.0),
                             np.arange(y_min, y_max, 1.0))
        clf = RadiusNeighborsClassifier(radius=3.0, outlier_label=0)
        clf.fit(xy, self._label)
        label = clf.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)

        plt.pcolormesh(xx, yy, label)
        plt.scatter(xy[:,0], xy[:,1], c=self._label, vmin=0)
Example #11
0
def isomap(file_name, dimension, num_neighbors, label):
    balls = np.loadtxt(file_name)
    matrix = balls[:, 0:dimension]
    new_matrix = convert_angles_to_cos_sin(matrix)
    imap = Isomap(n_neighbors=num_neighbors, n_components=2, eigen_solver='auto', tol=0, max_iter=None,
                  path_method='auto', neighbors_algorithm='auto')
    transformed_matrix = imap.fit_transform(new_matrix)
    ball_coords = np.zeros((balls.shape[0], dimension+3))
    for i in xrange(balls.shape[0]):
        ball_coords[i, 0:dimension] = balls[i, 0:dimension].tolist()
        ball_coords[i, dimension:dimension+2] = transformed_matrix[i]
        if label == 'cluster':
            ball_coords[i, dimension+2] = balls[i, dimension].tolist()
        elif label == 'eq':
            ball_coords[i, dimension+2] = (-0.0019872041*300*np.log(abs(balls[i, dimension+1]))).tolist()
        elif label == 'committor':
            ball_coords[i, dimension+2] = (balls[i, dimension+2]/abs(balls[i, dimension+1])).tolist()
        print ' '.join([str(x) for x in ball_coords[i, :]])
Example #12
0
 def isomap(self, data):
     print 'Isomap neighbours :', self.parameters["n_neighbors"]
     print 'Isomap components, ie final number of coordinates :', self.k
     
     k_means_n_clusters=self.parameters['k_means_n_clusters']
     isomap_params = dict(self.parameters)
     del isomap_params["k_means_n_clusters"]
     m = Isomap(neighbors_algorithm = 'kd_tree',**isomap_params)#eigen_solver='auto', tol=0, path_method='auto', neighbors_algorithm='kd_tree')
     x = m.fit_transform(data)
     
     error=m.reconstruction_error() 
     geod_d = m.dist_matrix_.flatten()
     new_euclid_d = cdist(x, x, metric='euclidean').flatten()
     corr=1- pearsonr(geod_d, new_euclid_d)[0]**2
     
     new_data = x
     print self.parameters
     return self.batch_kmeans(new_data, parameters = dict(zip(params["mini-batchk-means"], [k_means_n_clusters, 1000, 500, 1000, 'k-means++', 5])))
Example #13
0
    def isomap(self, n_components=2, n_neighbors=3, show=False):
        """
        Calculates lower dimention coordinates using the isomap algorithm.

        :param n_components: dimentionality of the reduced space
        :type n_components: int, optional

        :param n_neighbors: Used by isomap to determine the number of neighbors
            for each point. Large neighbor size tends to produce a denser map.
        :type n_neighbors: int, optional

        :param show: Shows the calculated coordinates if true.
        :type show: boolean, optional
        """

        model = Isomap(n_components=n_components, n_neighbors=n_neighbors)
        self.pos  = model.fit(self.dismat).embedding_

        if show:
            return self.pos
 def compute_iso_map(self, original_features):
   feature_matrix = original_features.drop('file', 1).as_matrix()
   feature_matrix = np.nan_to_num(feature_matrix)
   
   dimen_reductor = Isomap(n_components=self.n_components)
   
   full_size = feature_matrix.shape[0]
   train_size = int(self.ratio * full_size)
   
   row_indices = list(range(full_size))
   feature_training_indices = np.random.choice(row_indices, size = train_size)
   training_feature_matrix = feature_matrix[feature_training_indices, :]
   
   dimen_reductor.fit(training_feature_matrix)    
   reduced_features = dimen_reductor.transform(feature_matrix)
   
   reduced_normalized_features = reduced_features - reduced_features.min(axis=0)
   reduced_normalized_features /= reduced_normalized_features.max(axis=0)
   
   return reduced_normalized_features
Example #15
0
def mult_scl(X, labels):
    print('labels:')
    for i, label in zip(range(1, len(labels) + 1), labels):
        print('{}: {}'.format(i, label))

    isomap = Isomap()
    points = isomap.fit(np.nan_to_num(X)).embedding_
    f, (ax1, ax2, ax3) = plt.subplots(1, 3)
    plot_location(labels, ax3)
    ax1.scatter(points[:, 0], points[:, 1], s=20, c='r')
    ax1.set_title('Isomap')
    add_labels(labels, points, ax1)

    mds = MDS()
    points = mds.fit(np.nan_to_num(X)).embedding_
    ax2.scatter(points[:, 0], points[:, 1], s=20, c='g')
    ax2.set_title('MDS')
    add_labels(labels, points, ax2)

    plt.show()
def outputBin(data, ctrlSize,nbPheno, lPheno, binSize, sigma, nbDim=2, nbNeighbours=20):
    m = Isomap(n_neighbors=nbNeighbours, n_components=nbDim, eigen_solver='auto', tol=0, max_iter=None, path_method='auto', neighbors_algorithm='kd_tree')
    D = m.fit_transform(data)
    ctrl = D[:ctrlSize]
    ctrlTree = KDTree(ctrl, leafsize=10)
    length=ctrlSize
    
    mini = np.amin(D, 0); maxi=np.amax(D, 0); 
    nbPointsX = int((maxi[0]-mini[0])/float(binSize))+1
    nbPointsY = int((maxi[1]-mini[1])/float(binSize))+1
    
    result = np.zeros(shape=(nbPheno, nbPointsX, nbPointsY))
    denomCtrl = np.zeros(shape=(nbPointsX, nbPointsY))
    
    for pointX, pointY in product(range(nbPointsX), range(nbPointsY)):
        x=mini[0]+(pointX+0.5)*binSize; y=mini[1]+(pointY+0.5)*binSize
        ctrldou, ctrli = ctrlTree.query((x, y), ctrlSize, distance_upper_bound=binSize/sqrt(2))
        if min(ctrldou)<100:
            ctrlPoint = filter(lambda t: t[1]<ctrl.shape[0] and np.all(np.abs(ctrl[t[1]]-(x, y))<(binSize/2.0, binSize/2.0)), zip(ctrldou, ctrli))        
            for distance, cPoint in ctrlPoint:
                denomCtrl[pointX, pointY]+=dist((x,y), ctrl[cPoint], sigma)
                
    for ifilm in range(nbPheno):
        print 'film ', ifilm
        pheno = D[length:length+lPheno[ifilm]]
        phenoTree = KDTree(pheno, leafsize=10)
        
        for pointX, pointY in product(range(nbPointsX), range(nbPointsY)):
            x=mini[0]+(pointX+0.5)*binSize; y=mini[1]+(pointY+0.5)*binSize
            denom=denomCtrl[pointX, pointY]
            phenodou, phenoi=phenoTree.query((x, y), data.shape[0]-ctrlSize, distance_upper_bound=binSize/sqrt(2))
            if min(phenodou)<100:
                phenoPoint =filter(lambda t: t[1]<pheno.shape[0] and np.all(np.abs(pheno[t[1]]-(x, y))<(binSize/2.0, binSize/2.0)), zip(phenodou, phenoi))
                for distance, pPoint in phenoPoint:
                    local = dist((x,y), pheno[pPoint], sigma)
                    result[ifilm, pointX, pointY]+=local; denom+=local
        length+=lPheno[ifilm]        
        if denom>0:result[ifilm, pointX, pointY]/=denom
    plotMovies('/media/lalil0u/New/workspace2/Tracking/images', result, 'pattern_b{}_s{}'.format(binSize, sigma))
    return result
def plot_2d(dataset):
    """TODO: Docstring for plot_2d.
    :returns: TODO

    """
    iso = Isomap(n_components=2)
    projected = iso.fit_transform(dataset.data.toarray())

    print 'projected: sample: %s, feature: %s'\
            % (projected.shape[0], projected.shape[1])

    all_scatter = []
    colors = cm.rainbow(np.linspace(0, 1, len(dataset.target_names)), alpha=0.5)
    for i in range(len(dataset.target_names)):
        points = projected[dataset.target==i,:]
        cur = plt.scatter(points[:,0], points[:,1], color=colors[i],
                          edgecolor='k', lw=0.6,
                          vmin=0, vmax=len(dataset.target_names))
        all_scatter.append(cur)
    plt.legend(all_scatter, dataset.target_names,
               loc='lower left', scatterpoints=1)
    plt.clim(-0.5, 9.5)
    plt.savefig('isomap2d', dpi=500)
Example #18
0
def embedDistanceMatrix(dist,method='tsne'):
    """MDS embedding of sequence distances in dist, returning Nx2 x,y-coords: tsne, isomap, pca, mds, kpca"""
    if method == 'tsne':
        xy = tsne.run_tsne(dist, no_dims=2)
        #xy=pytsne.run_tsne(adist,no_dims=2)
    elif method == 'isomap':
        isoObj = Isomap(n_neighbors=10, n_components=2)
        xy = isoObj.fit_transform(dist)
    elif method == 'mds':
        mds = manifold.MDS(n_components=2, max_iter=3000, eps=1e-9, random_state=15,
                           dissimilarity="precomputed", n_jobs=1)
        xy = mds.fit(dist).embedding_
        rot = PCA(n_components=2)
        xy = rot.fit_transform(xy)
    elif method == 'pca':
        pcaObj = PCA(n_components=2)
        xy = pcaObj.fit_transform(1-dist)
    elif method == 'kpca':
        pcaObj = KernelPCA(n_components=2, kernel='precomputed')
        xy = pcaObj.fit_transform(1-dist)
    elif method == 'lle':
        lle = manifold.LocallyLinearEmbedding(n_neighbors=30, n_components=2, method='standard')
        xy = lle.fit_transform(dist)
    return xy
Example #19
0
#Set seed
np.random.seed(42)

#-------------------------------FEATURE EXTRACTION---------------------------------------------------------

#Transform the images in the images folder in a 2D numpy array with on image per row and one pixel per column
data = aux.images_as_matrix()

#Extract 6 features using Principal Component Analysis
PCA_features = PCA(n_components=6).fit_transform(data)

#Extract 6 features using t-Distributed Stochastic Neighbor Embedding
TSNE_features = TSNE(n_components=6, method="exact").fit_transform(data)

#Extract 6 features using Isometric mapping with Isomap
ISOMAP_features = Isomap(n_components=6).fit_transform(data)

#Save the 18 extracted features into one feature matrix
matrix = np.concatenate((PCA_features, TSNE_features, ISOMAP_features), axis=1)
np.savez('featureextration.npz', matrix)

#-------------------------------FEATURE SELECTION---------------------------------------------------------


def scatter_plot(features):
    """ 
    Another method to check the correlation between features
    """
    plt.figure()
    scatter_matrix(features, alpha=0.5, figsize=(15, 10), diagonal='kde')
    plt.savefig("scatter_plot.png")
Example #20
0
    fa_projected_data = FactorAnalysis(
        n_components=PROJECTED_DIMENSIONS).fit_transform(neural_data)
    color_3D_projection(fa_projected_data, variable_data, 'FA; ' + Title)

    # ICA

    ICA_projected_data = FastICA(
        n_components=PROJECTED_DIMENSIONS).fit_transform(neural_data)
    color_3D_projection(ICA_projected_data, variable_data, 'ICA; ' + Title)

    # Isomap

    N_NEIGHBORS = 30
    Isomap_projected_data = Isomap(
        n_components=PROJECTED_DIMENSIONS,
        n_neighbors=N_NEIGHBORS).fit_transform(neural_data)
    color_3D_projection(Isomap_projected_data, variable_data,
                        'Isomap; ' + Title)

    # tSNE

    PERPLEXITY = 30  # normally ranges 5-50
    TSNE_projected_data = TSNE(
        n_components=PROJECTED_DIMENSIONS,
        perplexity=PERPLEXITY).fit_transform(neural_data)
    color_3D_projection(TSNE_projected_data, variable_data, 'tSNE; ' + Title)

    # Multidimensional scaling

    MDS_projected_data = MDS(
Example #21
0
#maxabsscaler = pp.MaxAbsScaler()
#maxabsscaler.fit(X)
#X = maxabsscaler.transform(X)
#print('MaxAbsScaler\n========')

#X = pp.normalize(X)
#print('normalizer\n========')

# TODO: Use PCA to reduce noise, n_components 4-14

nc = 5
#pca = PCA(n_components=nc)
#pca.fit(X)
#X = pca.transform(X)
#print('PCA: ', nc)

# Use Isomap to reduce noise, n_neighbors 2-5
nn = 4
im = Isomap(n_neighbors=nn, n_components=nc)
im.fit(X)
X = im.transform(X)
print('Isomap: ',nn, ' comp: ', nc)

# TODO: train_test_split 30% and random_state=7

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=7)

# TODO: Create an SVC, train and score against defaults
result = findMaxSVC()
print(result['score'])
Example #22
0
def makeRingManifold(spikes, ep, angle, bin_size=200):
    """
    spikes : dict of hd spikes
    ep : epoch to restrict
    angle : tsd of angular direction
    bin_size : in ms
    """
    neurons = np.sort(list(spikes.keys()))
    inputs = []
    angles = []
    sizes = []
    bins = np.arange(
        ep.as_units('ms').start.iloc[0],
        ep.as_units('ms').end.iloc[0] + bin_size, bin_size)
    spike_counts = pd.DataFrame(index=bins[0:-1] + np.diff(bins) / 2,
                                columns=neurons)

    for i in neurons:
        spks = spikes[i].as_units('ms').index.values
        spike_counts[i], _ = np.histogram(spks, bins)

    rates = np.sqrt(spike_counts / (bin_size))

    epi = nts.IntervalSet(ep.loc[0, 'start'], ep.loc[0, 'end'])
    angle2 = angle.restrict(epi)
    newangle = pd.Series(index=np.arange(len(bins) - 1))
    tmp = angle2.groupby(
        np.digitize(angle2.as_units('ms').index.values, bins) - 1).mean()
    tmp = tmp.loc[np.arange(len(bins) - 1)]
    newangle.loc[tmp.index] = tmp
    newangle.index = pd.Index(bins[0:-1] + np.diff(bins) / 2.)

    tmp = rates.rolling(window=200,
                        win_type='gaussian',
                        center=True,
                        min_periods=1,
                        axis=0).mean(std=2).values
    sizes.append(len(tmp))
    inputs.append(tmp)
    angles.append(newangle)

    inputs = np.vstack(inputs)

    imap = Isomap(n_neighbors=20, n_components=2,
                  n_jobs=-1).fit_transform(inputs)

    H = newangle.values / (2 * np.pi)
    HSV = np.vstack((H, np.ones_like(H), np.ones_like(H))).T
    RGB = hsv_to_rgb(HSV)

    fig, ax = subplots()
    ax = subplot(111)
    ax.set_aspect(aspect=1)
    ax.scatter(imap[:, 0],
               imap[:, 1],
               c=RGB,
               marker='o',
               alpha=0.5,
               zorder=2,
               linewidth=0,
               s=40)

    # hsv
    display_axes = fig.add_axes([0.2, 0.25, 0.05, 0.1], projection='polar')
    colormap = plt.get_cmap('hsv')
    norm = mpl.colors.Normalize(0.0, 2 * np.pi)
    xval = np.arange(0, 2 * pi, 0.01)
    yval = np.ones_like(xval)
    display_axes.scatter(xval,
                         yval,
                         c=xval,
                         s=20,
                         cmap=colormap,
                         norm=norm,
                         linewidths=0,
                         alpha=0.8)
    display_axes.set_yticks([])
    display_axes.set_xticks(np.arange(0, 2 * np.pi, np.pi / 2))
    display_axes.grid(False)

    show()

    return imap, bins[0:-1] + np.diff(bins) / 2
Example #23
0
plt.ylabel("MLLE2")

# KERNEL PRINCIPAL COMPONENT ANALYSIS (KPCA)
print("Performing Kernel Principal Component Analysis (KPCA) ...")
plt.subplot(333)
kpca = KernelPCA(n_components=2, kernel='cosine').fit_transform(X)
plt.scatter(kpca[:, 0], kpca[:, 1], c=Y, cmap='viridis', s=1)
plt.title('Kernel PCA')
#plt.colorbar()
plt.xlabel("KPCA1")
plt.ylabel("KPCA2")

# ISOMAP
print("Performing Isomap Plotting ...")
plt.subplot(334)
model = Isomap(n_components=2)
isomap = model.fit_transform(X)
plt.scatter(isomap[:, 0], isomap[:, 1], c=Y, cmap='viridis', s=1)
plt.title('Isomap')
#plt.colorbar()
plt.xlabel("ISO1")
plt.ylabel("ISO2")

# LAPLACIAN EIGENMAP
print("Performing Laplacian Eigenmap (Spectral Embedding) ...")
plt.subplot(335)
model = SpectralEmbedding(n_components=2, n_neighbors=50)
se = model.fit_transform(X)
plt.scatter(se[:, 0], se[:, 1], c=Y, cmap='viridis', s=1)
plt.title('Laplacian Eigenmap')
#plt.colorbar()
Example #24
0
    print(x_train.shape)
    print(x_train)

    models = []
    emb_size=64
    num_neighbors=32

    for emb_size in (32,64):
        print("********************* emb_size="+str(emb_size)+" ***************")

        models=[]
        models.append(LocallyLinearEmbedding(n_neighbors=num_neighbors,n_components=emb_size,n_jobs=multiprocessing.cpu_count()))
        models.append(SpectralEmbedding(n_neighbors=num_neighbors,n_components=emb_size,n_jobs=multiprocessing.cpu_count()))
        models.append(PCA(n_components=emb_size))
        models.append(MDS(n_components=emb_size,n_jobs=multiprocessing.cpu_count()))
        models.append(Isomap(n_neighbors=num_neighbors, n_components=emb_size, n_jobs=multiprocessing.cpu_count()))
        models.append('matrix2vec')

        model_names = ['lle', 'le', 'pca', 'MDS', 'ISOMAP', 'matrix2vec']  # names corresponding to model


        for index, embedding in enumerate(models):
            print('Start running model '+model_names[index]+"...")
            start = datetime.datetime.now()
            X_transformed= np.zeros((x_train.shape[0],emb_size))
            if(index<=4):
                # X_transformed = embedding.fit_transform(x_train)
                X_transformed = embedding.fit_transform(x_train)

            else:
                X_transformed=matrix2vec.matrix2vec(x_train,emb_size,topk=5,num_iter=10)
Example #25
0
#03-03.py
X, y = preprocess(data, shuffle=False, n_samples=1000, normalization=None)

from sklearn.manifold import Isomap
iso = Isomap(n_neighbors=15, n_components=3)
X_proj = iso.fit_transform(X)

three_component_plot(X_proj[:, 0],
                     X_proj[:, 1],
                     X_proj[:, 2],
                     y,
                     labels,
                     trim_outliers=True)
Example #26
0
def show_batch(image_batch ):
  plt.figure(figsize=(7,5))
  for i in range(24):
      ax = plt.subplot(4,6,i+1)
      plt.imshow(image_batch[i],cmap='gray')
      plt.axis('off')


# %%
show_batch(face_data.images )
#plt.savefig('faces.png', dpi=72, bbox_inches='tight')


# %%
from sklearn.manifold import Isomap
model = Isomap(n_components=2)
isomap_face = model.fit_transform(face_data.data)
isomap_face.shape


# %%
data_num = isomap_face.shape[0]
data_count = list( range(data_num))
count_ind = data_count[0::2]
from matplotlib import offsetbox

def plotting_out(data, model, images=None, ax=None,cmap='gray'):
    ax = ax or plt.gca()
    model_pred = model.fit_transform(data)
    ax.plot(model_pred[:, 0], model_pred[:, 1], '.')
    for i in count_ind:
Example #27
0
def isomap(X):
    i = Isomap()
    return i.fit_transform(X)
Example #28
0
#%% comparing predictions with labels
Y_pred = kmeans_fit.predict(X_test)     #!
for p, l in zip(Y_pred, Y_test):
    print(p, l)

import pandas as pd
digdf = pd.DataFrame({'pred': Y_pred, 'label': Y_test})
sum(digdf['pred'] - digdf['label'] != 0)  # 396

#%%
# https://scikit-learn.org/stable/modules/generated/sklearn.manifold.Isomap.html#sklearn.manifold.Isomap
# https://scikit-learn.org/stable/modules/manifold.html#isomap
from sklearn.manifold import Isomap

X_iso = Isomap(n_neighbors=10).fit_transform(X_train)

# Compute cluster centers and predict cluster index for each sample
clusters = kmeans_fit.fit_predict(X_train)     #!

# Create a plot with subplots in a grid of 1X2
fig, ax = plt.subplots(1, 2, figsize=(8, 4))

# Adjust layout
fig.suptitle('Predicted Versus Training Labels', fontsize=14, fontweight='bold')
fig.subplots_adjust(top=0.85)

# Add scatterplots to the subplots
ax[0].scatter(X_iso[:, 0], X_iso[:, 1], c=clusters)
ax[0].set_title('Predicted Training Labels')
ax[1].scatter(X_iso[:, 0], X_iso[:, 1], c=Y_train)
def isomap(X=None, W=None, num_vecs=None, k=None):
    embedder = Isomap(n_neighbors=k, n_components=num_vecs)
    return embedder.fit_transform(X)
Example #30
0
for i, ax in enumerate(axes.flat):
    ax.imshow(digits.images[i], cmap='binary')
    ax.text(0.05,
            0.05,
            str(digits.target[i]),
            transform=ax.transAxes,
            color='green')
    ax.set_xticks([])
    ax.set_yticks([])
plt.show()

# Dimensionality Reduction
from sklearn.manifold import Isomap

iso = Isomap(n_components=2)
data_projected = iso.fit_transform(digits.data)
plt.show()
print(data_projected.shape)

# plot our new data in matplotlib
plt.scatter(data_projected[:, 0],
            data_projected[:, 1],
            c=digits.target,
            edgecolor='none',
            alpha=0.5,
            cmap=plt.cm.get_cmap('nipy_spectral', 10))
plt.clim(-0.05, 9.5)
plt.colorbar().set_ticks(np.arange(0, 10, 1))
plt.show()
Example #31
0
    model = PCA(n_components=2)
    model.fit(X_train)

else:
    print('Computing 2D Isomap Manifold')
    #
    # TODO: Implement Isomap here. save your model into the variable 'model'
    # Experiment with K values from 5-10.
    # You should reduce down to two dimensions.
    #
    # .. your code here ..

    from sklearn.manifold import Isomap

    model = Isomap(n_neighbors=5, n_components=2)
    model.fit(X_train)

#
# TODO: Train your model against data_train, then transform both
# data_train and data_test using your model. You can save the results right
# back into the variables themselves.
#
# .. your code here ..

X_train = model.transform(X_train)
X_test = model.transform(X_test)

#
# TODO: Implement and train KNeighborsClassifier on your projected 2D
# training data here. You can use any K value from 1 - 15, so play around
Example #32
0
def apply_ISOMap(proj_data, proj_weights=None):
    model = Isomap(n_neighbors=4, n_components=2)
    result = model.fit_transform(proj_data.T)
    return result
Example #33
0
from __future__ import division
import sys
from sklearn.decomposition import PCA
from sklearn.manifold import Isomap
from sklearn.manifold import LocallyLinearEmbedding
from sklearn import preprocessing
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from mpl_toolkits.mplot3d import Axes3D
import random
from colorsys import hsv_to_rgb

pca = PCA(n_components=2)
isomap = Isomap(n_components=2)
lle = LocallyLinearEmbedding(n_components=2)
data = np.genfromtxt('data01_small.txt', delimiter=',')
pca_xform = pca.fit_transform(data)
isomap_xform = isomap.fit_transform(data)
lle_xform = lle.fit_transform(data)
label = [0]*100+[1]*100
rgbs = [(0.5,0,0), (0,0.5,0)]


plt.figure()
xs = pca_xform[:,0]
ys = pca_xform[:,1]
ax = plt.subplot(111)
for i in xrange(len(xs)):
	ax.text(xs[i], ys[i], str(label[i]), color=rgbs[label[i]], fontdict={'weight': 'bold', 'size': 9})
t = (max(xs)-min(xs))*0.1
Example #34
0
threes_data = threes_df.values[:, 1: len(df.columns) - 1]
threes_data = (threes_data - threes_data.min()) / \
    (threes_data.max() - threes_data.min())
n_neighbors = 5
n_components = 4

# 1. Apply LLE

lle = LocallyLinearEmbedding(n_neighbors=n_neighbors,
                             n_components=n_components)
lle_data = lle.fit_transform(threes_data)
lle_df = pd.DataFrame(lle_data)
plot_three("LLE", lle_df, 0, 1, threes_df, 0.45)

# 2. Apply ISOMAP
iso = Isomap(n_neighbors=n_neighbors, n_components=n_components)
iso_data = iso.fit_transform(threes_data)
iso_df = pd.DataFrame(iso_data)
plot_three("Isomap", iso_df, 0, 1, threes_df, 0.45)


# 3. Use the Naive Bayes classier to classify the dataset based on the projected 4-dimension representations of the LLE and ISOMAP.
df_data = df.values[:, 1: len(df.columns) - 1]
test_size = 0.3


def calc_mean_accuracy(data, threshold=0.00015, miniter=500):
    print("Diff threshold {}".format(thresh))
    i = 0
    scores = []
    mean_accuracy = 0
def plot2d(X, y, scale=True, normalize=False, embedding='pca', title=''): 
	"""
	Plot data transformed into two dimensions by PCA. 
	PCA transforms into a new embedding dimension such that 
	the first dimension contains the maximal variance and following 
	dimensions maximal remaining variance. 
	This shoudl spread the observed n-dimensional data maximal. This 
	is unsupervised and will not consider target values. 
	"""
	if (scale): 
		scaler = StandardScaler()
		X = scaler.fit_transform(X)

	if (normalize): 
		normalizer = Normalizer(norm='l2')
		X = normalizer.fit_transform(X)
		
	if (embedding is 'pca'): 
		pca = PCA(n_components=2)
		X_transformed = pca.fit_transform(X)
	elif (embedding is 'isomap'):
		isomap = Isomap(n_components=2, n_neighbors=20)
		X_transformed = isomap.fit_transform(X)
	elif (embedding is 'lle' ): 
		lle = LocallyLinearEmbedding(n_components=2, n_neighbors=5)
		X_transformed = lle.fit_transform(X)
	elif (embedding is 'tsne'): 
		t_sne = TSNE(n_components=2)
		X_transformed = t_sne.fit_transform(X)
	elif (embedding is 'spectral'): 
		se = SpectralEmbedding(n_components=2)
		X_transformed = se.fit_transform(X)
	elif (embedding is 'mds'):
		mds = MDS(n_components=2)
		X_transformed = mds.fit_transform(X)
	elif (embedding is 'gallery'): 
		plt.figure(1)
		
		plt.subplot(231)
		plt.title('pca')
		X_t = PCA(n_components=2).fit_transform(X)
		plt.scatter(X_t[:,0 ], X_t[:, 1], c=y)

		plt.subplot(232)
		plt.title('isomap')
		X_t = Isomap(n_neighbors=20).fit_transform(X)
		plt.scatter(X_t[:,0 ], X_t[:, 1], c=y)

		plt.subplot(233)
		plt.title('lle')
		X_t = LocallyLinearEmbedding(n_neighbors=20).fit_transform(X)
		plt.scatter(X_t[:,0 ], X_t[:, 1], c=y)

		plt.subplot(234)
		plt.title('tsne')
		X_t = TSNE().fit_transform(X)
		plt.scatter(X_t[:,0 ], X_t[:, 1], c=y)

		plt.subplot(235)
		plt.title('spectral')
		X_t = SpectralEmbedding().fit_transform(X)
		plt.scatter(X_t[:,0 ], X_t[:, 1], c=y)

		plt.subplot(236)
		plt.title('mds')
		X_t = MDS().fit_transform(X)
		plt.scatter(X_t[:,0 ], X_t[:, 1], c=y)

		plt.suptitle('Gallery transforms ' + title)

		return plt
	else:
		raise ValueError("Choose between pca, isomap and tsne")

	plt.title(title + ' ' + embedding + ' plot')
	sc = plt.scatter(X_transformed[:, 0], X_transformed[:, 1], c=y)
	plt.colorbar(sc)
	return plt
Example #36
0
# title is your chart title
# x is the principal component you want displayed on the x-axis, Can be 0 or 1
# y is the principal component you want displayed on the y-axis, Can be 1 or 2
#
# .. your code here ..
from sklearn.decomposition import PCA
pca = PCA(n_components=3)
pca.fit(df)
T = pca.transform(df)
Plot2D(T, "PCA 1 2", 1, 2)

#
# TODO: Implement Isomap here. Reduce the dataframe df down
# to THREE components. Once you've done that, call Plot2D using
# the first two components.
#
# .. your code here ..
from sklearn.manifold import Isomap
imap = Isomap(n_neighbors=8, n_components=3)
imap.fit(df)
T2 = imap.transform(df)
Plot2D(T2, "Isomap", 1, 2)
#
# TODO: If you're up for a challenge, draw your dataframes in 3D
# Even if you're not, just do it anyway.
#
# .. your code here ..


plt.show()
    return data_n


def scatter_3d(X, y):
    fig = plt.figure(figsize=(6, 5))
    ax = fig.add_subplot(111, projection='3d')
    ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y, cmap=plt.cm.hot)
    ax.view_init(10, -70)
    ax.set_xlabel("$x_1$", fontsize=18)
    ax.set_ylabel("$x_2$", fontsize=18)
    ax.set_zlabel("$x_3$", fontsize=18)
    plt.show()


if __name__ == '__main__':
    X, Y = make_s_curve(n_samples=500, noise=0.1, random_state=42)

    data_1 = my_Isomap(X, 2, 10)

    data_2 = Isomap(n_neighbors=10, n_components=2).fit_transform(X)

    plt.figure(figsize=(8, 4))
    plt.subplot(121)
    plt.title("my_Isomap")
    plt.scatter(data_1[:, 0], data_1[:, 1], c=Y)

    plt.subplot(122)
    plt.title("sklearn_Isomap")
    plt.scatter(data_2[:, 0], data_2[:, 1], c=Y)
    plt.savefig("Isomap1.png")
    plt.show()
Example #38
0
# Build the output arrays
cells = opts.high / opts.step
isomap_gmm_results = np.zeros((cells,opts.iters))

D = scale(X)

n_samples, n_features = D.shape
# chosen by hyperparam search in a separate test.
n_neighbors = 10

# For the specified number of principal components, do the clustering
dimension_list = range(opts.low, opts.high + 1, opts.step)
data_files = []
for i in dimension_list:
    index = (i / opts.step) - 1 
    isomap = Isomap(n_neighbors, n_components=i)
    X_iso = isomap.fit_transform(D)
     
    for j in range(0,opts.iters,1): 
        gaussmix = GMM(n_components=true_k, covariance_type='tied', n_init=10, n_iter=1000)
        gaussmix.fit(X_iso)
        gaussmix_labels = gaussmix.predict(X_iso)
        homog = metrics.homogeneity_score(labels[:,0], gaussmix_labels)
        print "Homogeneity: %0.3f" % homog
        test_result = {"Model": 'Isomap', "Dimension": i, "Homogeneity": homog, "Trial": j}
        index = pd.Index([0], name='rows')
        data_files.append(pd.DataFrame(data=test_result,index=index))
        
print "...Done"
print "...rbinding DataFrames"
master_df = data_files[0]
Example #39
0
def embedDistanceMatrix(dmatDf, method='kpca', n_components=2, **kwargs):
    """Two-dimensional embedding of sequence distances in dmatDf,
    returning Nx2 x,y-coords: tsne, isomap, pca, mds, kpca, sklearn-tsne"""
    if isinstance(dmatDf, pd.DataFrame):
        dmat = dmatDf.values
    else:
        dmat = dmatDf

    if method == 'isomap':
        isoObj = Isomap(n_neighbors=10, n_components=n_components)
        xy = isoObj.fit_transform(dmat)
    elif method == 'mds':
        mds = MDS(n_components=n_components,
                  max_iter=3000,
                  eps=1e-9,
                  random_state=15,
                  dissimilarity="precomputed",
                  n_jobs=1)
        xy = mds.fit(dmat).embedding_
        rot = PCA(n_components=n_components)
        xy = rot.fit_transform(xy)
    elif method == 'pca':
        pcaObj = PCA(n_components=None)
        xy = pcaObj.fit_transform(dmat)[:, :n_components]
    elif method == 'kpca':
        pcaObj = KernelPCA(n_components=dmat.shape[0],
                           kernel='precomputed',
                           eigen_solver='dense')
        try:
            gram = dist2kernel(dmat)
        except:
            print(
                'Could not convert dmat to kernel for KernelPCA; using 1 - dmat/dmat.max() instead'
            )
            gram = 1 - dmat / dmat.max()
        xy = pcaObj.fit_transform(gram)[:, :n_components]
    elif method == 'lle':
        lle = LocallyLinearEmbedding(n_neighbors=30,
                                     n_components=n_components,
                                     method='standard')
        xy = lle.fit_transform(dist)
    elif method == 'sklearn-tsne':
        tsneObj = TSNE(n_components=n_components,
                       metric='precomputed',
                       random_state=0,
                       perplexity=kwargs['perplexity'])
        xy = tsneObj.fit_transform(dmat)
    else:
        print(('Method unknown: %s' % method))
        return

    assert xy.shape[0] == dmatDf.shape[0]
    xyDf = pd.DataFrame(xy[:, :n_components],
                        index=dmatDf.index,
                        columns=np.arange(n_components))
    if method == 'kpca':
        """Not sure how negative eigenvalues should be handled here, but they are usually
        small so it shouldn't make a big difference"""
        xyDf.explained_variance_ = pcaObj.lambdas_[:n_components] / pcaObj.lambdas_[
            pcaObj.lambdas_ > 0].sum()
    return xyDf
Example #40
0
def cluster_manifold_in_embedding(hl, y, n_clusters, save_dir, visualize):
    # find manifold on autoencoded embedding
    if args.manifold_learner == 'UMAP':
        md = float(args.umap_min_dist)
        hle = umap.UMAP(random_state=0,
                        metric=args.umap_metric,
                        n_components=args.umap_dim,
                        n_neighbors=args.umap_neighbors,
                        min_dist=md).fit_transform(hl)
    elif args.manifold_learner == 'LLE':
        hle = LocallyLinearEmbedding(
            n_components=args.umap_dim,
            n_neighbors=args.umap_neighbors).fit_transform(hl)
    elif args.manifold_learner == 'tSNE':
        hle = TSNE(n_components=args.umap_dim,
                   n_jobs=16,
                   random_state=0,
                   verbose=0).fit_transform(hl)
    elif args.manifold_learner == 'isomap':
        hle = Isomap(
            n_components=args.umap_dim,
            n_neighbors=5,
        ).fit_transform(hl)

    # clustering on new manifold of autoencoded embedding
    if args.cluster == 'GMM':
        gmm = mixture.GaussianMixture(covariance_type='full',
                                      n_components=n_clusters,
                                      random_state=0)
        gmm.fit(hle)
        y_pred_prob = gmm.predict_proba(hle)
        y_pred = y_pred_prob.argmax(1)
    elif args.cluster == 'KM':
        km = KMeans(init='k-means++',
                    n_clusters=n_clusters,
                    random_state=0,
                    n_init=20)
        y_pred = km.fit_predict(hle)
    elif args.cluster == 'SC':
        sc = SpectralClustering(n_clusters=n_clusters,
                                random_state=0,
                                affinity='nearest_neighbors')
        y_pred = sc.fit_predict(hle)

    y_pred = np.asarray(y_pred)
    y_pred = y_pred.reshape(len(y_pred), )
    y = np.asarray(y)
    y = y.reshape(len(y), )
    acc = np.round(cluster_acc(y, y_pred), 5)
    nmi = np.round(metrics.normalized_mutual_info_score(y, y_pred), 5)
    ari = np.round(metrics.adjusted_rand_score(y, y_pred), 5)
    print(args.dataset + " | " + args.manifold_learner +
          " on autoencoded embedding with " + args.cluster + " - N2D")
    print("======================")
    result = "{}\t{}\t{}".format(ari, nmi, acc)
    print(result)
    print("======================")

    if visualize:
        plt.scatter(*zip(*hle[:, :2]), c=y, label=y)

        plt.savefig(save_dir + '/' + args.dataset + '-n2d.png')
        plt.clf()

    return y_pred, acc, nmi, ari
Example #41
0
tmp3 = np.vstack(tmp3)

tmp2 = []
for rates in allrates['rnd']:
	tmp2.append(rates.rolling(window=100,win_type='gaussian',center=True,min_periods=1,axis=0).mean(std=2).values)
tmp2 = np.vstack(tmp2)



n = len(tmp1)

tmp = np.vstack((tmp1, tmp3))

sys.exit()

imap = Isomap(n_neighbors = 100, n_components = 2, n_jobs = -1).fit_transform(tmp)

iwak = imap[0:n]
isws = imap[n:]
iswr = imap[n:]

tokeep = np.where(np.logical_and(times>=-500,times<=500))[0]

iswr = iswr.reshape(len(rip_tsd),len(tokeep),2)

tmp = np.vstack((tmp1, tmp2))

imap2 = Isomap(n_neighbors = 100, n_components = 2, n_jobs = -1).fit_transform(tmp)

iwak2 = imap2[0:n]
irand = imap2[n:]
import seaborn as sns

"""Get the projection of the n-dimensional contextual embeddings into a lower dimensional space using differnt dimensionality reduction techniques.



NOTE: In the report, the results for TSNE, PCA, and multidimensional scaling is provided because the other techniques do not provide exciting results.
"""

matrix = np.array(avg_embs)
# tsne = TSNE(n_components=2, perplexity=5.0, early_exaggeration=12.0, metric='cosine',  init='pca').fit_transform(matrix)
tsne = TSNE(n_components=2).fit_transform(matrix)
pca = PCA(n_components=2).fit_transform(matrix)
lle = LocallyLinearEmbedding().fit_transform(matrix)
mds = MDS(dissimilarity='euclidean').fit_transform(matrix)
isomap = Isomap().fit_transform(matrix)
spectral = SpectralEmbedding().fit_transform(matrix)

import numpy
import matplotlib.pyplot as plt
import seaborn as sns

tsne[0]

tsne_df = pd.DataFrame({'X': tsne[:, 0], 'Y': tsne[:, 1]})
pca_df = pd.DataFrame({'X': pca[:, 0], 'Y': pca[:, 1]})
lle_df = pd.DataFrame({'X': lle[:, 0], 'Y': lle[:, 1]})
mds_df = pd.DataFrame({'X': mds[:, 0], 'Y': mds[:, 1]})
isomap_df = pd.DataFrame({'X': isomap[:, 0], 'Y': isomap[:, 1]})
spectral_df = pd.DataFrame({'X': spectral[:, 0], 'Y': spectral[:, 1]})
Example #43
0
 def __init__(self, x_data):
     self._x_data = x_data
     self._x_iso = Isomap(n_neighbors=10).fit_transform(x_data)
save_fig("lle_unrolling_plot")
plt.show()

print(
    '------------------------------------------------------------------------------------------------------\n'
    '    8.6 MDS(multidimensional scaling), Isomap and t-SNE(t-distributed stochastic neighbor embedding)  \n'
    '------------------------------------------------------------------------------------------------------\n'
)

# MDS(multidimensional scaling)
mds = MDS(n_components=2, random_state=42)
X_reduced_mds = mds.fit_transform(X)

# Isomap
isomap = Isomap(n_components=2)
X_reduced_isomap = isomap.fit_transform(X)

# t-SNE(t-distributed stochastic neighbor embedding)
tsne = TSNE(n_components=2, random_state=42)
X_reduced_tsne = tsne.fit_transform(X)

# LinearDiscriminantAnalysis
lda = LinearDiscriminantAnalysis(n_components=2)
X_mnist = mnist["data"]
y_mnist = mnist["target"]
lda.fit(X_mnist, y_mnist)
X_reduced_lda = lda.transform(X_mnist)

#
titles = ["MDS", "Isomap", "t-SNE"]
Example #45
0
#scaler = preprocessing.KernelCenterer() #0.915254237288
scaler = preprocessing.StandardScaler() #0.966101694915

scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

#pcaComponent = 4
#pca = PCA(n_components=pcaComponent)
#pca.fit(X_train)
#X_train = pca.transform(X_train)
#X_test = pca.transform(X_test)

neighbors = 2
components = 4
isomap = Isomap(n_neighbors=neighbors, n_components=components)
isomap.fit(X_train)
X_train = isomap.transform(X_train)
X_test = isomap.transform(X_test)

#svc = SVC()
#svc.fit(X_train, y_train)
#print svc.score(X_test, y_test)

best_score = 0
best_C = 0
best_gamma = 0
for C in np.arange(0.05, 2.05, 0.05):
    for gamma in np.arange(0.001, 1.001, 0.001):
        svc = SVC(C = C, gamma = gamma)
        svc.fit(X_train, y_train)
Example #46
0
    def plot2D_classification(self, query=None, colors=None, markers=['*', 'v', 'o', '+', '-', '.', ',']):

        X, y = self.__check_data_available()
        n_row, n_col = X.shape

        import matplotlib.pyplot as plt
        import matplotlib as mpl

        c_map = plt.cm.get_cmap("hsv", self._nb_clazz + 1)
        colors = dict((self._clazz[idx], c_map(idx)) for idx in range(0, self._nb_clazz)) \
            if colors is None else colors
        markers = dict((self._clazz[idx], markers[idx]) for idx in range(0, self._nb_clazz))

        def plot_constraints(lower, upper, _linestyle="solid"):
            plt.plot([lower[0], lower[0], upper[0], upper[0], lower[0]],
                     [lower[1], upper[1], upper[1], lower[1], lower[1]],
                     linestyle=_linestyle)
            plt.grid()

        def plot2D_scatter(X, y):
            for row in range(0, len(y)):
                plt.scatter(X[row, 0], X[row, 1], marker=markers[y[row]], c=colors[y[row]])

        def plot_ellipse(splot, mean, cov, color):
            from scipy import linalg

            v, w = linalg.eigh(cov)
            u = w[0] / linalg.norm(w[0])
            angle = np.arctan(u[1] / u[0])
            angle = 180 * angle / np.pi
            ell = mpl.patches.Ellipse(mean, 2 * v[0] ** 0.5, 2 * v[1] ** 0.5,
                                      180 + angle, facecolor="none",
                                      edgecolor=color,
                                      linewidth=2, zorder=2)
            ell.set_clip_box(splot.bbox)
            ell.set_alpha(0.9)
            splot.add_artist(ell)

        if n_col == 2:
            for clazz in self._clazz:
                post_mean_lower = self._mean_lower[clazz]
                post_mean_upper = self._mean_upper[clazz]
                plot_constraints(post_mean_lower, post_mean_upper)
                mean = self.get_mean_by_clazz(clazz)
                prior_mean_lower = mean - self.ell
                prior_mean_upper = mean + self.ell
                plot_constraints(prior_mean_lower, prior_mean_upper, _linestyle="dashed")

            if query is not None:
                ml_mean, ml_cov, ml_prob = self.fit_max_likelihood(query)
                plt.plot([query[0]], [query[1]], marker='h', markersize=5, color="black")
                _, _bounds = self.evaluate(query)
                for clazz in self._clazz:
                    plt.plot([ml_mean[clazz][0]], [ml_mean[clazz][1]], marker='o', markersize=5, color=colors[clazz])
                    _, est_mean_lower = _bounds[clazz]['inf']
                    _, est_mean_upper = _bounds[clazz]['sup']
                    plt.plot([est_mean_lower[0]], [est_mean_lower[1]], marker='x', markersize=4, color="black")
                    plt.plot([est_mean_upper[0]], [est_mean_upper[1]], marker='x', markersize=4, color="black")

            cov, inv, det = self.__cov_group_sample()
            s_plot = plt.subplot()
            for clazz in self._clazz:
                mean = self.get_mean_by_clazz(clazz)
                plot_ellipse(s_plot, mean, cov, colors[clazz])

        elif n_col > 2:
            if query is not None:
                inference, _ = self.evaluate(query)
                X = np.vstack([X, query])
                y = np.append(y, inference[0])

            from sklearn.manifold import Isomap
            iso = Isomap(n_components=2)
            projection = iso.fit_transform(X)
            X = np.c_[projection[:, 0], projection[:, 1]]

            if query is not None:
                color_instance = colors[inference[0]] if len(inference) == 1 else 'black'
                plt.plot([X[n_row, 0]], [X[n_row, 1]], color='red', marker='o', mfc=color_instance)
        else:
            raise Exception("Not implemented for one feature yet.")

        plot2D_scatter(X, y)
        plt.show()
Example #47
0
XX_train, yy_train = mnist.data / 255., mnist.target
X_train=[]
y_train=[]
for i, label in enumerate(yy_train):
  if label in mytargets:
    X_train.append(XX_train[i])
    y_train.append(yy_train[i])
num_samples_to_plot = 5000
X_train, y_train = shuffle(X_train, y_train)
X_train, y_train = X_train[:num_samples_to_plot], y_train[:num_samples_to_plot]  # lets subsample a bit for a first impression

for digit in mytargets:
  instances=[i for i in y_train if i==digit]
  print "Digit",digit,"appears ",len(instances), "times"

transformer = Isomap(n_neighbors = 10, n_components = 2)
fig, plot = plt.subplots()
fig.set_size_inches(50, 50)
plt.prism()

X_transformed = transformer.fit_transform(X_train)
plot.scatter(X_transformed[:, 0], X_transformed[:, 1], c=y_train)
plot.set_xticks(())
plot.set_yticks(())

count=0;
plt.tight_layout()
plt.suptitle("Isomap for MNIST digits ")
for label , x, y in zip(y_train, X_transformed[:, 0], X_transformed[:, 1]):
#Lets annotate every 1 out of 200 samples, otherwise graph will be cluttered with anotations
  if count % 200 == 0:
Example #48
0
 def apply_isomapEmbedding(self, X_train, X_test):
        """Returns the embedded points for Isomap."""
        embedding = Isomap(n_components =2, n_jobs=-1)
        X_train = embedding.fit_transform(X_train)
        X_test = embedding.transform(X_test)
        return X_train, X_test
Example #49
0
def eval_other_methods(x, y):
    gmm = mixture.GaussianMixture(covariance_type='full',
                                  n_components=args.n_clusters,
                                  random_state=0)
    gmm.fit(x)
    y_pred_prob = gmm.predict_proba(x)
    y_pred = y_pred_prob.argmax(1)
    acc = np.round(cluster_acc(y, y_pred), 5)
    nmi = np.round(metrics.normalized_mutual_info_score(y, y_pred), 5)
    ari = np.round(metrics.adjusted_rand_score(y, y_pred), 5)
    print(args.dataset + " | GMM clustering on raw data")
    print("======================")
    result = "{}\t{}\t{}".format(ari, nmi, acc)
    print(result)
    print("======================")

    y_pred = KMeans(n_clusters=args.n_clusters, random_state=0).fit_predict(x)
    acc = np.round(cluster_acc(y, y_pred), 5)
    nmi = np.round(metrics.normalized_mutual_info_score(y, y_pred), 5)
    ari = np.round(metrics.adjusted_rand_score(y, y_pred), 5)
    print(args.dataset + " | K-Means clustering on raw data")
    print("======================")
    result = "{}\t{}\t{}".format(ari, nmi, acc)
    print(result)
    print("======================")

    sc = SpectralClustering(n_clusters=args.n_clusters,
                            random_state=0,
                            affinity='nearest_neighbors')
    y_pred = sc.fit_predict(x)
    acc = np.round(cluster_acc(y, y_pred), 5)
    nmi = np.round(metrics.normalized_mutual_info_score(y, y_pred), 5)
    ari = np.round(metrics.adjusted_rand_score(y, y_pred), 5)
    print(args.dataset + " | Spectral Clustering on raw data")
    print("======================")
    result = "{}\t{}\t{}".format(ari, nmi, acc)
    print(result)
    print("======================")

    if args.manifold_learner == 'UMAP':
        md = float(args.umap_min_dist)
        hle = umap.UMAP(random_state=0,
                        metric=args.umap_metric,
                        n_components=args.umap_dim,
                        n_neighbors=args.umap_neighbors,
                        min_dist=md).fit_transform(x)
    elif args.manifold_learner == 'LLE':
        from sklearn.manifold import LocallyLinearEmbedding
        hle = LocallyLinearEmbedding(
            n_components=args.umap_dim,
            n_neighbors=args.umap_neighbors).fit_transform(x)
    elif args.manifold_learner == 'tSNE':
        method = 'exact'
        hle = TSNE(n_components=args.umap_dim,
                   n_jobs=16,
                   random_state=0,
                   verbose=0).fit_transform(x)
    elif args.manifold_learner == 'isomap':
        hle = Isomap(
            n_components=args.umap_dim,
            n_neighbors=5,
        ).fit_transform(x)

    gmm = mixture.GaussianMixture(covariance_type='full',
                                  n_components=args.n_clusters,
                                  random_state=0)
    gmm.fit(hle)
    y_pred_prob = gmm.predict_proba(hle)
    y_pred = y_pred_prob.argmax(1)
    acc = np.round(cluster_acc(y, y_pred), 5)
    nmi = np.round(metrics.normalized_mutual_info_score(y, y_pred), 5)
    ari = np.round(metrics.adjusted_rand_score(y, y_pred), 5)
    print(args.dataset + " | GMM clustering on " + str(args.manifold_learner) +
          " embedding")
    print("======================")
    result = "{}\t{}\t{}".format(ari, nmi, acc)
    print(result)
    print("======================")

    plt.scatter(*zip(*hle[:, :2]), c=y, label=y)

    plt.savefig(args.save_dir + '/' + args.dataset + '-' +
                str(args.manifold_learner) + '.png')
    plt.clf()

    y_pred = KMeans(n_clusters=args.n_clusters,
                    random_state=0).fit_predict(hle)
    acc = np.round(cluster_acc(y, y_pred), 5)
    nmi = np.round(metrics.normalized_mutual_info_score(y, y_pred), 5)
    ari = np.round(metrics.adjusted_rand_score(y, y_pred), 5)
    print(args.dataset + " | K-Means " + str(args.manifold_learner) +
          " embedding")
    print("======================")
    result = "{}\t{}\t{}".format(ari, nmi, acc)
    print(result)
    print("======================")

    sc = SpectralClustering(n_clusters=args.n_clusters,
                            random_state=0,
                            affinity='nearest_neighbors')
    y_pred = sc.fit_predict(hle)
    acc = np.round(cluster_acc(y, y_pred), 5)
    nmi = np.round(metrics.normalized_mutual_info_score(y, y_pred), 5)
    ari = np.round(metrics.adjusted_rand_score(y, y_pred), 5)
    print(args.dataset + " | Spectral Clustering on " +
          str(args.manifold_learner) + " embedding")
    print("======================")
    result = "{}\t{}\t{}".format(ari, nmi, acc)
    print(result)
    print("======================")
Example #50
0
#   Infer trajectory                                                        ####
# run topslam
from sklearn.manifold import TSNE, LocallyLinearEmbedding, SpectralEmbedding, Isomap
from sklearn.decomposition import FastICA, PCA

n_components = p["n_components"]

methods = {
    't-SNE':
    TSNE(n_components=n_components),
    'PCA':
    PCA(n_components=n_components),
    'Spectral':
    SpectralEmbedding(n_components=n_components, n_neighbors=p["n_neighbors"]),
    'Isomap':
    Isomap(n_components=n_components, n_neighbors=p["n_neighbors"]),
    'ICA':
    FastICA(n_components=n_components)
}
method_names = sorted(methods.keys())
method_names_selected = [
    method_names[i] for i, selected in enumerate(p["dimreds"]) if selected
]
methods = {
    method_name: method
    for method_name, method in methods.items()
    if method_name in method_names_selected
}

# dimensionality reduction
X_init, dims = run_methods(expression, methods)
X = df.iloc[:,1:-1].as_matrix()
y = df.iloc[:,-1:].as_matrix()

####################################################################

# # # ###### randomized principal component analysis for dimensionality reduction of alt set ########
# # # The purpose is to find a way to effectively label our data, since labeling based solely on 
# # # perceptual criteria (meaning, just listening to the sounds and judging to which instrument they should
# # # be assigned) does not work well enough.
# # from sklearn.decomposition import RandomizedPCA as RandPCA

# # pca = RandPCA(n_components = 30)

# # X = pca.fit_transform(X)
from sklearn.manifold import Isomap
isomap = Isomap(n_components=30)
X = isomap.fit_transform(X)


####################################################################

############ cluster the alternative set into 17 clusters, using KMeans ##########
clstrer = KMeans(n_clusters = 17)
clstr = clstrer.fit_predict(X)


####################################################################

########### names will be filled with the wav files' filenames ################
pardir = '../database/all_recorded_and_downloaded_alt_sounds_processed'
names = np.array([])
    ax.text(0.05,
            0.05,
            str(digits.target[i]),
            transform=ax.transAxes,
            color='green')

X = digits.data
print(X.shape)  # представляем массив пиксело длиной 64 элемента
y = digits.target
print(y.shape)  # Итого получили 1797 выборок и 64 признака

# 1. Обучение без учителя: понижение размерности

# Преобразуем данные в двумерный вид
from sklearn.manifold import Isomap  # Алгоритм обучения на базе многообразий
iso = Isomap(n_components=2)  # Понижение количества измерений до 2
iso.fit(digits.data)
data_projected = iso.transform(digits.data)
print(data_projected.shape)
# Посторим график данных
plt.scatter(data_projected[:, 0],
            data_projected[:, 1],
            c=digits.target,
            edgecolors='none',
            alpha=0.5,
            cmap=plt.cm.get_cmap("Spectral", 10))
plt.colorbar(label='digit label', ticks=range(10))
plt.clim(-0.5, 9.5)

# 2. Классификация цифр
Example #53
0
def isoMap(X, y):
	im = Isomap(n_components = 1, eigen_solver = "dense", n_neighbors = 20)
	im.fit(X)
	transformX = im.transform(X)
	return transformX
Example #54
0
# read in the data
X = pd.read_csv("X ansur.csv")

# standardize the data to take on values between 0 and 1
X = (X - X.min()) / (X.max() - X.min())

# separate the data into training and testing
np.random.seed(1)
test_idx = np.random.choice(a=X.index.values,
                            size=int(X.shape[0] / 5),
                            replace=False)
train_idx = np.array(list(set(X.index.values) - set(test_idx)))

# train a isomap model
n_comp = 1  # number of components
component = Isomap(n_components=n_comp, n_neighbors=5, n_jobs=1)
component.fit(X.iloc[train_idx, :])

# compute components for all the data, add cluster labels and train/test labels
components = pd.DataFrame(component.transform(X),
                          columns=["IC" + str(i + 1) for i in range(n_comp)])
components["Data"] = "Train"
for j in test_idx:
    components.loc[j, "Data"] = "Test"
# components.to_csv("isomap.csv", index=False)

# combine the data and components
data = pd.concat([X, components], axis=1)

# train a random forest to learn the clusters
model = RandomForestRegressor(n_estimators=50,
Example #55
0
def isomap(features, n_components=2):
    return Isomap(n_components=n_components, n_jobs=-1).fit_transform(features)
split = 45
X, y = np.float64(subimages), np.float64(subimages2)

del subimages, subimages2

#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

pca1 = PCA(n_components=2, svd_solver='auto', random_state=1)
ica1 = FastICA(n_components=20, random_state=1)
tsne1 = TSNE(n_components=2, random_state=1, method='exact')
fa1 = FactorAnalysis(n_components=5, random_state=1)
embedding = Isomap(n_components=20)
reducer = umap.UMAP(n_components=30, random_state=1)

#X_train_t =pca1.fit_transform(X_train)
#X_train_t =ica1.fit_transform(X_train)
#X_train_t = fa1.fit_transform(X_train)
#X_train_t=tsne1.fit_transform(X_train)
#X_train_t=embedding.fit_transform(X_train)
#X_train_t =reducer.fit_transform(X_train)

sc1 = MinMaxScaler()
#X_train_t =sc1.fit_transform(X_train_t)
X_train_t = X_train

## convert your array into a dataframe
df = pd.DataFrame(X_train)
Example #57
0
pca = PCA(n_components=3)
pca.fit(df)
T = pca.transform(df)

Plot2D(T, 'chart title', 1,2)

#
# TODO: Implement Isomap here. Reduce the dataframe df down
# to THREE components. Once you've done that, call Plot2D using
# the first two components.
#
# .. your code here ..

from sklearn.manifold import Isomap
im = Isomap(n_components=3)
im.fit(df)
T = im.transform(df)

Plot2D(T, 'chart title', 1,2)

#
# TODO: If you're up for a challenge, draw your dataframes in 3D
# Even if you're not, just do it anyway.
#
# .. your code here ..

fig = plt.figure()
ax = fig.add_subplot(111,projection="3d")
ax.set_xlabel('0')
ax.set_ylabel('1')
Example #58
0
      digits.target.shape)  # (1797, 64) (1797, 8, 8) (1797,)

fig, ax = plt.subplots(6,
                       6,
                       subplot_kw=dict(xticks=[], yticks=[]),
                       gridspec_kw=dict(hspace=0.1, wspace=0.1))
for i, axi in enumerate(ax.flat):
    axi.imshow(digits.images[i], cmap='binary')  # 插补 interpolation='nearest'
    axi.text(0.05,
             0.05,
             str(digits.target[i]),
             color='g',
             transform=axi.transAxes)

# dimensionality reduction
iso = Isomap(n_components=2)
new = iso.fit_transform(digits.data)
print(new.shape)
sns.set(style='whitegrid')
plt.figure()
plt.scatter(new[:, 0],
            new[:, 1],
            c=digits.target,
            cmap=plt.cm.get_cmap('Spectral', 10),
            edgecolor='none',
            alpha=0.6)
plt.colorbar(label='Digits', ticks=range(10), extend='both')
plt.clim(-0.5, 9.5)

# classification
model = RFC(n_estimators=400)
Example #59
0
pca = PCA(n_components=2)
data_pca = pca.fit_transform(data)
plt.scatter(data_pca[:, 0], data_pca[:, 1], c=target, edgecolor='none', alpha=0.5, cmap=plt.cm.get_cmap('rainbow', 2))
plt.colorbar();

## PCA能量
sb.set()
pca_ = PCA().fit(data)
plt.plot(np.cumsum(pca_.explained_variance_ratio_))
plt.xlabel('number of components')
plt.ylabel('cumulative explained variance');
plt.xlim(0,5)

## IsoMap降维
from sklearn.manifold import Isomap
iso = Isomap(n_components=2)
data_projected = iso.fit_transform(data)
plt.scatter(data_projected[:, 0], data_projected[:, 1], c=target,edgecolor='none', alpha=0.5, cmap=plt.cm.get_cmap('rainbow', 2));
plt.colorbar(label='Cancer', ticks=range(2))
plt.clim(-200, 0)

### KNN
from sklearn.neighbors import KNeighborsClassifier
from sklearn.grid_search import GridSearchCV

clf = KNeighborsClassifier()
n_neighbors = [1,2,3,5,8,10,15,20,25,30,35,40]
weights = ['uniform','distance']
param_grid = [{'n_neighbors': n_neighbors, 'weights': weights}]
grid_search = GridSearchCV(clf, param_grid=param_grid, cv=10)
grid_search.fit(data, target)
Example #60
0
    ax.yaxis.set_major_formatter(plt.NullFormatter())
    ax.set_xlabel('feature 1', color='gray')
    ax.set_ylabel('feature 2', color='gray')
    ax.set_title(title, color='gray')


# make data
X, y = make_swiss_roll(200, noise=0.5, random_state=42)
X = X[:, [0, 2]]

# visualize data
fig, ax = plt.subplots()
ax.scatter(X[:, 0], X[:, 1], color='gray', s=30)

# format the plot
format_plot(ax, 'Input Data')

model = Isomap(n_neighbors=8, n_components=1)
y_fit = model.fit_transform(X).ravel()

# visualize data
fig, ax = plt.subplots()
pts = ax.scatter(X[:, 0], X[:, 1], c=y_fit, cmap='viridis', s=30)
cb = fig.colorbar(pts, ax=ax)

# format the plot
format_plot(ax, 'Learned Latent Parameter')
cb.set_ticks([])
cb.set_label('Latent Variable', color='gray')

plt.show()