def reduceDataset(self,nr=3,method='PCA'):
     '''It reduces the dimensionality of a given dataset using different techniques provided by Sklearn library
      Methods available:
                         'PCA'
                         'FactorAnalysis'
                         'KPCArbf','KPCApoly'
                         'KPCAcosine','KPCAsigmoid'
                         'IPCA'
                         'FastICADeflation'
                         'FastICAParallel'
                         'Isomap'
                         'LLE'
                         'LLEmodified'
                         'LLEltsa'
     '''
     dataset=self.ModelInputs['Dataset']
     #dataset=self.dataset[Model.in_columns]
     #dataset=self.dataset[['Humidity','TemperatureF','Sea Level PressureIn','PrecipitationIn','Dew PointF','Value']]
     #PCA
     if method=='PCA':
         sklearn_pca = sklearnPCA(n_components=nr)
         reduced = sklearn_pca.fit_transform(dataset)
     #Factor Analysis
     elif method=='FactorAnalysis':
         fa=FactorAnalysis(n_components=nr)
         reduced=fa.fit_transform(dataset)
     #kernel pca with rbf kernel
     elif method=='KPCArbf':
         kpca=KernelPCA(nr,kernel='rbf')
         reduced=kpca.fit_transform(dataset)
     #kernel pca with poly kernel
     elif method=='KPCApoly':
         kpca=KernelPCA(nr,kernel='poly')
         reduced=kpca.fit_transform(dataset)
     #kernel pca with cosine kernel
     elif method=='KPCAcosine':
         kpca=KernelPCA(nr,kernel='cosine')
         reduced=kpca.fit_transform(dataset)
     #kernel pca with sigmoid kernel
     elif method=='KPCAsigmoid':
         kpca=KernelPCA(nr,kernel='sigmoid')
         reduced=kpca.fit_transform(dataset)
     #ICA
     elif method=='IPCA':
         ipca=IncrementalPCA(nr)
         reduced=ipca.fit_transform(dataset)
     #Fast ICA
     elif method=='FastICAParallel':
         fip=FastICA(nr,algorithm='parallel')
         reduced=fip.fit_transform(dataset)
     elif method=='FastICADeflation':
         fid=FastICA(nr,algorithm='deflation')
         reduced=fid.fit_transform(dataset)
     elif method == 'All':
         self.dimensionalityReduction(nr=nr)
         return self
     
     self.ModelInputs.update({method:reduced})
     self.datasetsAvailable.append(method)
     return self
Esempio n. 2
0
def Kernel_PCA(HE_MI_train_test, kernel, invTran, degree):
    '''
    개요
        - Kernel PCA 을 적용한다.
    '''

    MyDataSet = HE_MI_train_test
    my_HEtraining = MyDataSet[0]
    my_MItraining = MyDataSet[1]
    my_HEtest = MyDataSet[2]
    my_MItest = MyDataSet[3]

    kpca = KernelPCA(kernel=kernel, fit_inverse_transform=invTran, degree=degree)
    HE_training_kpca = kpca.fit_transform(my_HEtraining)
    MI_training_kpca = kpca.fit_transform(my_MItraining)
    HE_test_kpca = kpca.fit_transform(my_HEtest)
    MI_test_kpca = kpca.fit_transform(my_MItest)



    HE_training_KPCA_2dim = [];
    MI_training_KPCA_2dim = []
    HE_test_KPCA_2dim = [];
    MI_test_KPCA_2dim = []

    for pt in HE_training_kpca:
        HE_training_KPCA_2dim.append((pt[0], pt[1]))
    for pt in MI_training_kpca:
        MI_training_KPCA_2dim.append((pt[0], pt[1]))
    for pt in HE_test_kpca:
        HE_test_KPCA_2dim.append((pt[0], pt[1]))
    for pt in MI_test_kpca:
        MI_test_KPCA_2dim.append((pt[0], pt[1]))

    return [HE_training_KPCA_2dim, MI_training_KPCA_2dim, HE_test_KPCA_2dim, MI_test_KPCA_2dim]
 def dimensionalityReduction(self,nr=5):
     '''It applies all the dimensionality reduction techniques available in this class:
     Techniques available:
                         'PCA'
                         'FactorAnalysis'
                         'KPCArbf','KPCApoly'
                         'KPCAcosine','KPCAsigmoid'
                         'IPCA'
                         'FastICADeflation'
                         'FastICAParallel'
                         'Isomap'
                         'LLE'
                         'LLEmodified'
                         'LLEltsa'
     '''
     dataset=self.ModelInputs['Dataset']
     sklearn_pca = sklearnPCA(n_components=nr)
     p_components = sklearn_pca.fit_transform(dataset)
     fa=FactorAnalysis(n_components=nr)
     factors=fa.fit_transform(dataset)
     kpca=KernelPCA(nr,kernel='rbf')
     rbf=kpca.fit_transform(dataset)
     kpca=KernelPCA(nr,kernel='poly')
     poly=kpca.fit_transform(dataset)
     kpca=KernelPCA(nr,kernel='cosine')
     cosine=kpca.fit_transform(dataset)
     kpca=KernelPCA(nr,kernel='sigmoid')
     sigmoid=kpca.fit_transform(dataset)
     ipca=IncrementalPCA(nr)
     i_components=ipca.fit_transform(dataset)
     fip=FastICA(nr,algorithm='parallel')
     fid=FastICA(nr,algorithm='deflation')
     ficaD=fip.fit_transform(dataset)
     ficaP=fid.fit_transform(dataset)
     '''isomap=Isomap(n_components=nr).fit_transform(dataset)
     try:
         lle1=LocallyLinearEmbedding(n_components=nr).fit_transform(dataset)
     except ValueError:
         lle1=LocallyLinearEmbedding(n_components=nr,eigen_solver='dense').fit_transform(dataset)
     try:
         
         lle2=LocallyLinearEmbedding(n_components=nr,method='modified').fit_transform(dataset)
     except ValueError:
         lle2=LocallyLinearEmbedding(n_components=nr,method='modified',eigen_solver='dense').fit_transform(dataset) 
     try:
         lle3=LocallyLinearEmbedding(n_components=nr,method='ltsa').fit_transform(dataset)
     except ValueError:
         lle3=LocallyLinearEmbedding(n_components=nr,method='ltsa',eigen_solver='dense').fit_transform(dataset)'''
     values=[p_components,factors,rbf,poly,cosine,sigmoid,i_components,ficaD,ficaP]#,isomap,lle1,lle2,lle3]
     keys=['PCA','FactorAnalysis','KPCArbf','KPCApoly','KPCAcosine','KPCAsigmoid','IPCA','FastICADeflation','FastICAParallel']#,'Isomap','LLE','LLEmodified','LLEltsa']
     self.ModelInputs.update(dict(zip(keys, values)))
     [self.datasetsAvailable.append(key) for key in keys ]
     
     #debug
     #dataset=pd.DataFrame(self.ModelInputs['Dataset'])
     #dataset['Output']=self.ModelOutput
     #self.debug['Dimensionalityreduction']=dataset
     ###
     return self
Esempio n. 4
0
def _dimReduce(df, method='pca', n_components=2, labels=None, standardize=False, smatFunc=None, ldaShrinkage='auto'):
    if method == 'kpca':
        """By using KernelPCA for dimensionality reduction we don't need to impute missing values"""
        if smatFunc is None:
            smatFunc = corrTSmatFunc
        pca = KernelPCA(kernel='precomputed', n_components=n_components)
        smat = smatFunc(df).values
        xy = pca.fit_transform(smat)
        pca.components_ = pca.alphas_
        pca.explained_variance_ratio_ = pca.lambdas_ / pca.lambdas_.sum()
        return xy, pca
    elif method == 'pca':
        if standardize:
            normed = df.apply(lambda vec: (vec - vec.mean())/vec.std(), axis=0)
        else:
            normed = df.apply(lambda vec: vec - vec.mean(), axis=0)
        pca = PCA(n_components=n_components)
        xy = pca.fit_transform(normed)
        return xy, pca
    elif method == 'lda':
        if labels is None:
            raise ValueError('labels needed to perform LDA')
        if standardize:
            normed = df.apply(lambda vec: (vec - vec.mean())/vec.std(), axis=0)
        else:
            normed = df.apply(lambda vec: vec - vec.mean(), axis=0)
        
        if df.shape[1] > df.shape[0]:
            """Pre-PCA step"""
            ppca = PCA(n_components=int(df.shape[0]/1.5))
            normed = ppca.fit_transform(df)

        lda = LinearDiscriminantAnalysis(solver='eigen', shrinkage=ldaShrinkage, n_components=n_components)
        lda.fit(normed, labels.values)
        lda.explained_variance_ratio_ = np.abs(lda.explained_variance_ratio_) / np.abs(lda.explained_variance_ratio_).sum()
        xy = lda.transform(normed)
    elif method == 'pls':
        if labels is None:
            raise ValueError('labels needed to perform PLS')
        if standardize:
            normed = df.apply(lambda vec: (vec - vec.mean())/vec.std(), axis=0)
        else:
            normed = df.apply(lambda vec: vec - vec.mean(), axis=0)
        
        pls = PLSRegression(n_components=n_components)
        pls.fit(normed, labels)
        
        pls.explained_variance_ratio_ = np.zeros(n_components)
        xy = pls.x_scores_
        return xy, pls
Esempio n. 5
0
def test_remove_zero_eig():
    X = np.array([[1 - 1e-30, 1], [1, 1], [1, 1 - 1e-20]])

    # n_components=None (default) => remove_zero_eig is True
    kpca = KernelPCA()
    Xt = kpca.fit_transform(X)
    assert_equal(Xt.shape, (3, 0))

    kpca = KernelPCA(n_components=2)
    Xt = kpca.fit_transform(X)
    assert_equal(Xt.shape, (3, 2))

    kpca = KernelPCA(n_components=2, remove_zero_eig=True)
    Xt = kpca.fit_transform(X)
    assert_equal(Xt.shape, (3, 0))
Esempio n. 6
0
 def MyPCA():
     X,y = circle_data()
     kpca = KernelPCA(kernel='rbf', fit_inverse_transform=True, gamma= 10)
     X_kpca = kpca.fit_transform(X)
     pca = PCA()
     x_pca = pca.fit_transform(X)
     return X_kpca
def kPCA_visualization1d(X, y):
   
    kpca = KernelPCA(kernel="linear", fit_inverse_transform=True, gamma=10, n_components=2)
    X_kpca = kpca.fit_transform(X)
    X_back = kpca.inverse_transform(X_kpca)
    pca = PCA(n_components=1)
    X_pca = pca.fit_transform(X)

    class_1 = []
    class_0 = []

    for i in range(0, len(y)):
        
        if y[i] == 1:
            class_1.append(  list( X_kpca[i] )[0] )
        else:
            class_0.append(  list( X_kpca[i] )[0] )
    print "check"
    print class_1[:10]
    import numpy
    from matplotlib import pyplot
    

    pyplot.hist(class_1, 50, alpha=0.5, label='class 1' )  
    pyplot.hist(class_0, 50, alpha=0.5, label='class 0')

    pyplot.legend(loc='upper right')
    pyplot.show()
Esempio n. 8
0
def test_kernel_pca():
    rng = np.random.RandomState(0)
    X_fit = rng.random_sample((5, 4))
    X_pred = rng.random_sample((2, 4))

    for eigen_solver in ("auto", "dense", "arpack"):
        for kernel in ("linear", "rbf", "poly"):
            # transform fit data
            kpca = KernelPCA(4, kernel=kernel, eigen_solver=eigen_solver,
                             fit_inverse_transform=True)
            X_fit_transformed = kpca.fit_transform(X_fit)
            X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit)
            assert_array_almost_equal(np.abs(X_fit_transformed),
                                      np.abs(X_fit_transformed2))

            # non-regression test: previously, gamma would be 0 by default,
            # forcing all eigenvalues to 0 under the poly kernel
            assert_not_equal(X_fit_transformed, [])

            # transform new data
            X_pred_transformed = kpca.transform(X_pred)
            assert_equal(X_pred_transformed.shape[1],
                         X_fit_transformed.shape[1])

            # inverse transform
            X_pred2 = kpca.inverse_transform(X_pred_transformed)
            assert_equal(X_pred2.shape, X_pred.shape)
def test_kernel_pca():
    rng = np.random.RandomState(0)
    X_fit = rng.random_sample((5, 4))
    X_pred = rng.random_sample((2, 4))

    def histogram(x, y, **kwargs):
        # Histogram kernel implemented as a callable.
        assert_equal(kwargs, {})  # no kernel_params that we didn't ask for
        return np.minimum(x, y).sum()

    for eigen_solver in ("auto", "dense", "arpack"):
        for kernel in ("linear", "rbf", "poly", histogram):
            # histogram kernel produces singular matrix inside linalg.solve
            # XXX use a least-squares approximation?
            inv = not callable(kernel)

            # transform fit data
            kpca = KernelPCA(4, kernel=kernel, eigen_solver=eigen_solver, fit_inverse_transform=inv)
            X_fit_transformed = kpca.fit_transform(X_fit)
            X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit)
            assert_array_almost_equal(np.abs(X_fit_transformed), np.abs(X_fit_transformed2))

            # non-regression test: previously, gamma would be 0 by default,
            # forcing all eigenvalues to 0 under the poly kernel
            assert_not_equal(X_fit_transformed.size, 0)

            # transform new data
            X_pred_transformed = kpca.transform(X_pred)
            assert_equal(X_pred_transformed.shape[1], X_fit_transformed.shape[1])

            # inverse transform
            if inv:
                X_pred2 = kpca.inverse_transform(X_pred_transformed)
                assert_equal(X_pred2.shape, X_pred.shape)
    def test_compare_clinical_kernel(self):
        x_full, y, _, _ = load_arff_file(WHAS500_FILE, ['fstat', 'lenfol'], '1',
                                         standardize_numeric=False, to_numeric=False)

        trans = ClinicalKernelTransform()
        trans.fit(x_full)

        x = encode_categorical(standardize(x_full))

        kpca = KernelPCA(kernel=trans.pairwise_kernel)
        xt = kpca.fit_transform(x)

        nrsvm = FastSurvivalSVM(optimizer='rbtree', tol=1e-8, max_iter=1000, random_state=0)
        nrsvm.fit(xt, y)

        rsvm = FastKernelSurvivalSVM(optimizer='rbtree', kernel=trans.pairwise_kernel,
                                     tol=1e-8, max_iter=1000, random_state=0)
        rsvm.fit(x, y)

        pred_nrsvm = nrsvm.predict(kpca.transform(x))
        pred_rsvm = rsvm.predict(x)

        self.assertEqual(len(pred_nrsvm), len(pred_rsvm))

        c1 = concordance_index_censored(y['fstat'], y['lenfol'], pred_nrsvm)
        c2 = concordance_index_censored(y['fstat'], y['lenfol'], pred_rsvm)

        self.assertAlmostEqual(c1[0], c2[0])
        self.assertTupleEqual(c1[1:], c2[1:])
Esempio n. 11
0
class RegionSplitter_PCA_KMean():
    def __init__(self, data, label):

        data_dim_num = len(data[0])
        label_dim_num = len(label[0])

        self.n_comp = max(1, data_dim_num)

        self.pca = PCA(n_components=self.n_comp)

        data = self.pca.fit_transform(data)
        data_zipped = list(zip(*data))

        # k-mean cluster for the dimension
        self.clusterer = KMeans(n_clusters=2, init='k-means++')

        self.clusterer.fit(list(zip(*data_zipped)))


    def classify(self, data):
        if not isinstance(data, tuple):
            raise(TypeError, "data must be a tuple")

        data = tuple(self.pca.transform(data)[0])
        group = self.clusterer.predict(data)

        return group == 0
Esempio n. 12
0
File: test1.py Progetto: fferri/wir
def pca(X, gamma1):
    kpca = KernelPCA(kernel='rbf', fit_inverse_transform=False, gamma=gamma1)
    X_kpca = kpca.fit_transform(X)
    print('X', X.shape)
    print('alphas', kpca.alphas_.shape)
    print('lambdas', kpca.lambdas_.shape)
    #X_back = kpca.inverse_transform(X_kpca)
    return X_kpca
Esempio n. 13
0
  def isomap(self, num_dims=None, directed=None):
    '''Isomap embedding.

    num_dims : dimension of embedded coordinates, defaults to input dimension
    directed : used for .shortest_path() calculation
    '''
    W = -0.5 * self.shortest_path(directed=directed) ** 2
    kpca = KernelPCA(n_components=num_dims, kernel='precomputed')
    return kpca.fit_transform(W)
Esempio n. 14
0
class StyloPCA(StyloClassifier):
	def __init__(self,corpus,n_components=2,kernel=None):
		StyloClassifier.__init__(self,corpus)
		data = self.data_frame[self.cols].values
		self.n_components = n_components
		self.kernel = kernel
		if not kernel:
			self.pca = PCA(n_components=self.n_components)
		else:
			self.pca = KernelPCA(kernel=kernel, gamma=10)
		self.pca_data = self.pca.fit_transform(StandardScaler().fit_transform(data))

	def plot_pca(self, out_file=None):
		self.create_plot_pca()
		plt.show()
		# if out_file:
		# 	plt.savefig(out_file)

	def create_plot_pca(self):
		plt.figure(1)
		plt.clf()
		all_authors = set(self.data_frame["Author"])
		for a in all_authors:
			rows = self.data_frame.loc[self.data_frame["Author"] == a]
			indices = self.data_frame.loc[self.data_frame["Author"] == a].index
			plt.plot(self.pca_data[indices,0],self.pca_data[indices,1], 'o', markersize=7,\
				color=(random.random(),random.random(),random.random()), alpha=0.5, label=rows["Author_Orig"][indices[0]])
		
		plt.xlabel(self.cols[0])
		plt.ylabel(self.cols[1])
		plt.legend()
		plt.title('Transformed stylometry data using PCA')

	def plot_explained_variance(self, out_file=None):
		self.create_plot_explained_variance()
		plt.show()

	def create_plot_explained_variance(self):
		if not self.kernel:
			evr = self.pca.explained_variance_
		else:
			evr = self.pca.lambdas_
		print evr
		fig = plt.figure()
		ax = fig.add_subplot(111)
		tot = sum(evr)
		var_exp = [(i / tot)*100 for i in sorted(evr, reverse=True)]
		cum_var_exp = np.cumsum(var_exp)
		plt.plot(range(1,len(cum_var_exp)+1),cum_var_exp, 'b*-')
		width = .8
		plt.bar(range(1,len(var_exp)+1), var_exp, width=width)
		# ax.set_xticklabels()
		plt.grid(True)
		ax.set_ylim((0,110))
		plt.xlabel('n_components')
		plt.ylabel('Percentage of variance explained')
		plt.title('Variance Explained vs. n_components')
Esempio n. 15
0
def Kernel_PCA(HE_MI_train_test, kernel, invTran, degree):
    '''
    개요
        - Kernel PCA 을 적용한다.
    '''

    MyDataSet = HE_MI_train_test
    my_HEtraining = MyDataSet[0]
    my_MItraining = MyDataSet[1]
    my_HEtest = MyDataSet[2]
    my_MItest = MyDataSet[3]

    kpca = KernelPCA(kernel=kernel, fit_inverse_transform=invTran, degree=degree)
    HE_training_kpca = kpca.fit_transform(my_HEtraining)
    MI_training_kpca = kpca.fit_transform(my_MItraining)
    HE_test_kpca = kpca.fit_transform(my_HEtest)
    MI_test_kpca = kpca.fit_transform(my_MItest)

    return [HE_training_kpca, MI_training_kpca, HE_test_kpca, MI_test_kpca]
Esempio n. 16
0
def main():
    definition = load_definition()
    data = np.load(os.path.join(ROOT, definition.embedding))
    uuids = np.load(os.path.join(ROOT, definition.uuids))

    pca = KernelPCA(**definition.pca)
    tsne = TSNE(**definition.tsne)
    data = pca.fit_transform(data)
    data = tsne.fit_transform(data)

    plot_vectors(data, uuids, definition.sources, definition.output)
def kernelPCA(data, labels, new_dimension):
    print "start kernel pca..."

    if hasattr(data, "toarray"):
        data = data.toarray()

    start = time.time()
    pca = KernelPCA(fit_inverse_transform=True, gamma=10, n_components=new_dimension, alpha=2)

    reduced = pca.fit_transform(data)
    end = time.time()
    return (reduced, end-start)
Esempio n. 18
0
    def try_kpca(kernel, invTran, degree):
        '''
        개요
            - Kernel PCA 을 적용한다.
        '''

        MyDataSet = training_test(24, 150)
        my_HEtraining = MyDataSet[0]
        my_MItraining = MyDataSet[1]
        my_HEtest = MyDataSet[2]
        my_MItest = MyDataSet[3]

        from sklearn.decomposition import PCA, KernelPCA

        kpca = KernelPCA(kernel=kernel, fit_inverse_transform=invTran, degree=degree)
        HE_training_kpca = kpca.fit_transform(my_HEtraining)
        MI_training_kpca = kpca.fit_transform(my_MItraining)
        HE_test_kpca = kpca.fit_transform(my_HEtest)
        MI_test_kpca = kpca.fit_transform(my_MItest)

        return [HE_training_kpca, MI_training_kpca, HE_test_kpca, MI_test_kpca]
Esempio n. 19
0
 def fit(self,X, num, method='dijkstra'):
     # Construct k-neigh. graph
     knn = KNN(num).fit(X)
     #Find shortest path
     if method == 'dijkstra':
         result = dijkstra(knn)
     else:
         result = shortest_path(knn, method=method)
     #Multidimensional scaling
     #Can be used Kernel PCA
     model = KernelPCA(n_components=num)
     return model.fit_transform(result)
Esempio n. 20
0
def test_kernel_pca_deterministic_output():
    rng = np.random.RandomState(0)
    X = rng.rand(10, 10)
    eigen_solver = ('arpack', 'dense')

    for solver in eigen_solver:
        transformed_X = np.zeros((20, 2))
        for i in range(20):
            kpca = KernelPCA(n_components=2, eigen_solver=solver,
                             random_state=rng)
            transformed_X[i, :] = kpca.fit_transform(X)[0]
        assert_allclose(
            transformed_X, np.tile(transformed_X[0, :], 20).reshape(20, 2))
Esempio n. 21
0
def isomap(X, n_neighbors, metric):
    """
        Based on sklearn,
        Author: Jake Vanderplas  -- <*****@*****.**>
        License: BSD, (C) 2011
    """    
    
    kng = kneighbors_graph(D, n_neighbors = n_neighbors, metric = metric)    
    dist_matrix_ = graph_shortest_path(kng, method='auto', directed=False)    
    kernel_pca_ = KernelPCA(n_components=2, kernel="precomputed", eigen_solver='auto')
    G = dist_matrix_ ** 2
    G *= -0.5
    return kernel_pca_.fit_transform(G)
Esempio n. 22
0
def reduce_kpca(X, kern, retall=False):
    """ reduce_kpca(X, components, kern, retall=False)
    Reduce dim by Kernel PCA
    """

    kpca = KernelPCA(kernel=kern, fit_inverse_transform=True)
    X_kpca = kpca.fit_transform(X)
    X_back = kpca.inverse_transform(X_kpca)

    if not retall:
        return X_kpca, X_back
    else:
        return X_kpca, X_back, kpca
Esempio n. 23
0
    def RunKPCAScikit(q):
      totalTimer = Timer()

      # Load input dataset.
      Log.Info("Loading dataset", self.verbose)
      data = np.genfromtxt(self.dataset, delimiter=',')

      with totalTimer:
        # Get the new dimensionality, if it is necessary.
        dimension = re.search('-d (\d+)', options)
        if not dimension:
          d = data.shape[1]
        else:
          d = int(dimension.group(1))
          if (d > data.shape[1]):
            Log.Fatal("New dimensionality (" + str(d) + ") cannot be greater "
              + "than existing dimensionality (" + str(data.shape[1]) + ")!")
            q.put(-1)
            return -1

        # Get the kernel type and make sure it is valid.
        kernel = re.search("-k ([^\s]+)", options)
        try:
          if not kernel:
            Log.Fatal("Choose kernel type, valid choices are 'linear'," +
                  " 'hyptan' and 'polynomial'.")
            q.put(-1)
            return -1
          elif kernel.group(1) == "linear":
            model = KernelPCA(n_components=d, kernel="linear")
          elif kernel.group(1) == "hyptan":
            model = KernelPCA(n_components=d, kernel="sigmoid")
          elif kernel.group(1) == "polynomial":
            degree = re.search('-D (\d+)', options)
            degree = 1 if not degree else int(degree.group(1))

            model = KernelPCA(n_components=d, kernel="poly", degree=degree)
          else:
            Log.Fatal("Invalid kernel type (" + kernel.group(1) + "); valid " +
                "choices are 'linear', 'hyptan' and 'polynomial'.")
            q.put(-1)
            return -1

          out = model.fit_transform(data)
        except Exception as e:
          q.put(-1)
          return -1

      time = totalTimer.ElapsedTime()
      q.put(time)
      return time
Esempio n. 24
0
def project(X, kde = False, kernel = False, gamma = 10):
    if kernel:
        kpca = KernelPCA(kernel="rbf", fit_inverse_transform=True, gamma=gamma)
        reduced_data = kpca.fit_transform(X)
    else:
        pca = PCA(n_components=2).fit(X)
        print pca.explained_variance_ratio_ 
        print pca.components_
        reduced_data = pca.transform(X)
    if kde:
        with sns.axes_style("white"):
            sns.jointplot(reduced_data[:, 0], reduced_data[:, 1], kind="kde");
        plt.show()
    plt.plot(reduced_data[:, 0], reduced_data[:, 1], 'k.', markersize=2)
    return reduced_data
def test_kernel_pca_sparse():
    rng = np.random.RandomState(0)
    X_fit = sp.csr_matrix(rng.random_sample((5, 4)))
    X_pred = sp.csr_matrix(rng.random_sample((2, 4)))

    for eigen_solver in ("auto", "arpack"):
        for kernel in ("linear", "rbf", "poly"):
            # transform fit data
            kpca = KernelPCA(4, kernel=kernel, eigen_solver=eigen_solver, fit_inverse_transform=False)
            X_fit_transformed = kpca.fit_transform(X_fit)
            X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit)
            assert_array_almost_equal(np.abs(X_fit_transformed), np.abs(X_fit_transformed2))

            # transform new data
            X_pred_transformed = kpca.transform(X_pred)
            assert_equal(X_pred_transformed.shape[1], X_fit_transformed.shape[1])
def kPCA_visualization2d(X, y):
   
    kpca = KernelPCA(kernel="linear", fit_inverse_transform=True, gamma=10, n_components=2)
    X_kpca = kpca.fit_transform(X)
    X_back = kpca.inverse_transform(X_kpca)
    pca = PCA(n_components=2)
    X_pca = pca.fit_transform(X)

    class_1 = []
    class_0 = []
     
    for i in range(0, len(y)):
        
        if y[i] == 1:
            class_1.append( X_kpca[i] )
        else:
            class_0.append( X_kpca[i]  )
    
    class_0_x = []
    class_0_y = []
    class_1_x = []
    class_1_y = []
    for x in class_0:
        class_0_x.append( x[0] )
        class_0_y.append( x[1] )
        
    for x in class_1:
        class_1_x.append( x[0] )
        class_1_y.append( x[1] )
        

    # Plot
    #print principle component

    plt.title("kPCA kernel = linear")
    plt.plot( class_0_x, class_0_y, "ro")
    plt.plot( class_1_x, class_1_y, "go")
    plt.title("Projection by PCA")
    plt.xlabel("1st principal component")
    plt.ylabel("2nd component")
    

    
    plt.show()
def test_nested_circles():
    # Test the linear separability of the first 2D KPCA transform
    X, y = make_circles(n_samples=400, factor=0.3, noise=0.05, random_state=0)

    # 2D nested circles are not linearly separable
    train_score = Perceptron().fit(X, y).score(X, y)
    assert_less(train_score, 0.8)

    # Project the circles data into the first 2 components of a RBF Kernel
    # PCA model.
    # Note that the gamma value is data dependent. If this test breaks
    # and the gamma value has to be updated, the Kernel PCA example will
    # have to be updated too.
    kpca = KernelPCA(kernel="rbf", n_components=2, fit_inverse_transform=True, gamma=2.0)
    X_kpca = kpca.fit_transform(X)

    # The data is perfectly linearly separable in that space
    train_score = Perceptron().fit(X_kpca, y).score(X_kpca, y)
    assert_equal(train_score, 1.0)
Esempio n. 28
0
def main():
    filename1 = "HE.csv"
    filename2 = "MI.csv"

    csv_data = read_csv(filename1=filename1, filename2=filename2)
    csv_data = np.array(csv_data)
    total_matrix = []
    matrix_key = []

    train_mat = []
    test_mat = []


    for x in csv_data:
        for idx in x:
            matrix_key.append(idx)
            total_matrix.append(x[idx])

    total_matrix = np.array(total_matrix)
    print total_matrix.shape
    y = [1] * 37 + [-1] * 208

    print total_matrix[0]


    kpca = KernelPCA(n_components=2, kernel='rbf')
    A = kpca.fit_transform(total_matrix)

    '''
    for x in total_matrix[:37]:
        plt.plot(x,'b')
    for x in total_matrix[37:]:
        plt.plot(x,'r')
    '''

    for idx in A[:37]:
        plt.plot(idx[0], idx[1], 'bo')
    for idx in A[37:]:
        plt.plot(idx[0], idx[1], 'ro')


    plt.show()
	def dim_reduce(self, reduce_method = None, n_components = None):
		''' 
		Only dimensionality reduction. 
		
		:param reduce_method: 
			The method for dimensionality reduction. Can be ...
			
		:n_components:
			Dimension
		'''
		if reduce_method is None:
			return self.X_train
		elif reduce_method == 'pca':
			print('performing pca dimensionality reduction.')
			pca = PCA(n_components = n_components, whiten = False)
			self.X_train = pca.fit_transform(self.X_train)
		elif reduce_method =='kpca':
			print('performing kpca dimensionality reduction.')
			kpca = KernelPCA(n_components = n_components, kernel = 'rbf', eigen_solver = 'arpack')
			self.X_train = kpca.fit_transform(self.X_train)
Esempio n. 30
0
def plotModuleEmbedding(dmatDf, labels, dropped=None, method='kpca', plotLabels=True, plotDims=[0,1], weights=None, txtSize='large'):
    """Embed cytokine correlation matrix to visualize cytokine clusters"""
    uLabels = np.unique(labels).tolist()
    n_components = max(plotDims) + 1
    dmat = dmatDf.values
    
    if method == 'kpca':
        """By using KernelPCA for dimensionality reduction we don't need to impute missing values"""
        pca = KernelPCA(kernel='precomputed', n_components=n_components)
        gram = 1 - (dmat / dmat.max())
        xy = pca.fit_transform(gram)
    elif method == 'tsne':
        xy = tsne.run_tsne(dmat)
    elif method == 'sklearn-tsne':
        tsneObj = TSNE(n_components=n_components, metric='precomputed', random_state=0)
        xy = tsneObj.fit_transform(dmat)

    colors = palettable.colorbrewer.get_map('Set1', 'qualitative', len(uLabels)).mpl_colors
    figh = plt.gcf()
    figh.clf()
    axh = figh.add_axes([0.03,0.03,0.94,0.94])
    axh.axis('off')
    figh.set_facecolor('white')
    annotationParams = dict(xytext=(0,5), textcoords='offset points', size=txtSize)
    for cyi,cy in enumerate(dmatDf.columns):
        if not dropped is None and dropped[cy]:
            cyLab = '*' + cy
            alpha = 0.3
        else:
            cyLab = cy
            alpha = 0.8

        if plotLabels:
            axh.annotate(cyLab, xy=(xy[cyi,plotDims[0]], xy[cyi,plotDims[1]]), **annotationParams)
        col = colors[uLabels.index(labels[cyi])]
        if weights is None:
            s = 100
        else:
            s = weights[cy] * 200 + 10
        axh.scatter(xy[cyi,plotDims[0]], xy[cyi,plotDims[1]], marker='o', s=s, alpha=alpha, c=col)
    plt.draw()
Esempio n. 31
0
# print np.shape(data_train_poly)
# pca = PCA(n_components=2)
# data_proj = pca.fit_transform(np.transpose(data_train_poly))
# print np.shape(data_proj)
# ae_proj = data_proj[ae_index,:]
# ao_proj = data_proj[ao_index,:]
# uw_proj = data_proj[uw_index,:]
# iy_proj = data_proj[iy_index,:]
# aa_proj = data_proj[aa_index,:]
# eh_proj = data_proj[eh_index,:]

# In[34]:

kpca = KernelPCA(kernel="poly", coef0=2, degree=2, n_components=2)
data_train = np.transpose(data_train)
data_proj = kpca.fit_transform(data_train)
print np.shape(data_proj)
ae_proj = data_proj[ae_index, :]
ao_proj = data_proj[ao_index, :]
uw_proj = data_proj[uw_index, :]
iy_proj = data_proj[iy_index, :]
aa_proj = data_proj[aa_index, :]
eh_proj = data_proj[eh_index, :]

# In[35]:

plt.scatter(ae_proj[:, 0], ae_proj[:, 1], c='r', marker='o')
plt.scatter(ao_proj[:, 0], ao_proj[:, 1], c='g', marker='o')
plt.scatter(uw_proj[:, 0], uw_proj[:, 1], c='b', marker='o')
plt.scatter(iy_proj[:, 0], iy_proj[:, 1], c='y', marker='o')
plt.scatter(aa_proj[:, 0], aa_proj[:, 1], c='c', marker='o')
Esempio n. 32
0
from sklearn.cross_validation import KFold
from sklearn import svm
from sklearn.feature_selection import SelectKBest
from sklearn.decomposition import PCA, KernelPCA
from sklearn import cross_validation
from sklearn.cross_validation import StratifiedKFold
import matplotlib.cm as cm

# Import test data and labels
import_test = sio.loadmat(file_loc + 'Test.mat')
import_train = sio.loadmat(file_loc + 'Train.mat')
X_train = import_train['Xtrain']
X_testing = import_test['Xtest']
Y_train = import_train['Ytrain']
pca = KernelPCA(kernel="rbf", degree=5, gamma=10)
pca.fit_transform(X_train)
#print(pca.explained_variance_ratio_)
X_train = pca.transform(X_train)
#k_fold = cross_validation.KFold(len(X_train), 5)
Y_kf = Y_train.ravel()
k_fold = StratifiedKFold(Y_kf, n_folds=5)
print(k_fold)
#X, X_test, Y, Y_test = cross_validation.train_test_split(X_train, Y_train, test_size=0.2, random_state=0)
#y = Y.ravel()

#X_test = X[401:,:]
#X = X[:400,:]
#X = X[:, :2]
#Y_test = Y[401:,:]
#Y = Y[:400,:]
'''
Esempio n. 33
0
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import KernelPCA
from sklearn import datasets
import pandas as pd

iris = datasets.load_iris()
X = iris.data
y = iris.target
target_names = iris.target_names

data1 = pd.DataFrame(data=np.c_[iris['data'], iris['target']],
                     columns=iris['feature_names'] + ['target'])

kpca = KernelPCA(n_components=2, kernel="rbf")
data1_kpca = kpca.fit_transform(data1)

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
colors = (
    (1, 0, 0),
    (0, 1, 0),
    (0, 0, 1),
    (0.5, 0.5, 0),
    (0, 0.5, 0.5),
    (0.5, 0, 0.5),
    (0.4, 0.6, 0),
    (0.6, 0.4, 0),
    (0, 0.6, 0.4),
    (0.5, 0.3, 0.2),
)
Esempio n. 34
0
print('LDA_LR train/test accuracies %.3f/%.3f' %
      (lda_lr_train_score, lda_lr_test_score))

##########LDA&SVM##########################################
svm = SVC(kernel='linear', C=1.0, random_state=42)
svm.fit(X_train_lda, y_train)
lda_svm_train_pred = svm.predict(X_train_lda)
lda_svm_test_pred = svm.predict(X_test_lda)
lda_svm_train_score = accuracy_score(y_train, lda_svm_train_pred)
lda_svm_test_score = accuracy_score(y_test, lda_svm_test_pred)
print('LDA_SVM train/test accuracies %.3f/%.3f' %
      (lda_svm_train_score, lda_svm_test_score))

##########kPCA#############################################
kpca = KernelPCA(n_components=2, kernel='rbf', gamma=0.1)
X_train_kpca = kpca.fit_transform(X_train_std)
X_test_kpca = kpca.transform(X_test_std)

svm = SVC(kernel='linear', C=1.0, random_state=42)
svm.fit(X_train_kpca, y_train)

kpca_train_pred = svm.predict(X_train_kpca)
kpca_test_pred = svm.predict(X_test_kpca)
kpca_train_score = accuracy_score(y_train, kpca_train_pred)
kpca_test_score = accuracy_score(y_test, kpca_test_pred)
print('KPCA train/test accuracies %f/%f' % (kpca_train_score, kpca_test_score))

gammas = [0.001, 0.01, 0.1, 0.5, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20]
kpca_lr_train_scores = []
kpca_lr_test_scores = []
kpca_svm_train_scores = []
Esempio n. 35
0
    def dimRed(self,
               method="MDS",
               mapName=None,
               return_embedding=True,
               n_components=2,
               **kwargs):
        """
        Perform dimensionality reduction using the specified embedding method.
        Currently support algorithms are "KPCA", "MDS", "TSNE", and "UMAP". The
        number of components (i.e. desired number of dimensions to reduce the
        data to is automatically set to 2, but other options may be chosen).

        For other keyword arguements (algorithm parameter settings), please
        refer to the documentation for each algorithm:
        
        Args
        ----
        method: str
            Which dimensionality reduciton algorithm to use. "KPCA", "MDS", 
            "TSNE", or "UMAP".

        mapName: str
            Optional name for the particular embedding.

        return_embedding: bool
            Return the embedding infomration as a embedding dataclass object. Is
            also stored in the StructureMap.embeddings attribute. Useful for
            IPython environments (see example 2).

        n_components: int
            Number of components to reduce the data to. I.e. for a 2D map,
            n_components=2, for a 3D map, n_components=3.


        Documentation and helpful resources:
        ------------------------------------
        k-PCA:
            scikit-learn documentation:
            https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.KernelPCA.html

        MDS:
            scikit-learn documentation:
            https://scikit-learn.org/stable/modules/generated/sklearn.manifold.MDS.html

        TSNE:
            scikit-learn documentation:
            https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html

            a useful resource advising on parameter settings and interpretation:
            https://distill.pub/2016/misread-tsne/

        UMAP:
            UMAP documentation:
            https://umap-learn.readthedocs.io/en/latest/parameters.html

        """

        assert self._k is not None, "Calculate kernel matrix first!"

        method = method.strip().upper()

        # inititiate embedding scheme algorithm.
        if method == "MDS":
            a = eval(f"{method}(dissimilarity='precomputed', **kwargs)")
        elif method == "KPCA":
            a = KernelPCA(n_components=n_components,
                          kernel="precomputed",
                          **kwargs)
        else:
            a = eval(f"{method}(metric='precomputed', **kwargs)")

        # extract coordinates.
        if method in ["MDS", "TSNE"]:
            c = a.fit(self.d).embedding_
        elif method == "UMAP":
            c = a.fit_transform(self.d)
        elif method == "KPCA":
            # note k-PCA takes the kernel form, not the distance matrix.
            c = a.fit_transform(self._k)

        # store coordinates in dataclass, along with information about the
        # scaling/soap_parameter/embedding_parameter settings.
        if mapName is not None and mapName in [m.name for m in self.maps]:
            raise ValueError(f"mapName '{mapName}' already used!")

        if mapName is None:
            mapName = f"map_{len(self.embeddings)+1:02d}"

        # create the dataclass.
        e = embedding(mapName, method, self._soap_parameters, self._scaling, c)

        # Store embedding in embeddings set.
        self.embeddings |= {e}

        # optionally return the embedding.
        if return_embedding:
            return e
    startInd = index * 100
    endInd = (index + 1) * 100
    allXData[startInd:endInd, :] = curX
    index = index + 1

for kk in range(len(validationIDs)):
    curFile = 'AlexNetFeatures2D/feats3D_conv2_' + validationIDs[kk] + '.npy'
    curX = np.load(curFile)
    startInd = index * 100
    endInd = (index + 1) * 100
    allXData[startInd:endInd, :] = curX
    index = index + 1

transformer = KernelPCA(n_components=20)
#transformer = random_projection.GaussianRandomProjection()
allXnew = transformer.fit_transform(allXData)
print(allXnew.shape)

allPats = np.zeros((1595, 20 * 100))
for kk in range(len(trainTestIDs) + len(validationIDs)):
    startInd = kk * 100
    endInd = (kk + 1) * 100
    curPat = allXnew[startInd:endInd, :]
    allPats[kk, :] = np.reshape(curPat, 20 * 100)

allPatsNew = transformer.fit_transform(allPats)
print(allPatsNew.shape)

Xdata = allPatsNew[0:numTrainTest, :]
Xvalid = allPatsNew[numTrainTest:(numTrainTest + numValid), :]
Esempio n. 37
0
                                                            random_state=0)

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

# Fit on training set only.
scaler.fit(train_img)

# Apply transform to both the training set and the test set.
train_img = scaler.transform(train_img)
test_img = scaler.transform(test_img)

# Applying kernel PCA
from sklearn.decomposition import KernelPCA
kpca = KernelPCA(n_components=80, kernel='rbf')
train_img = kpca.fit_transform(train_img)
train_img = kpca.transform(train_img)

pca.fit(train_img)

pca.n_components_

train_img = pca.transform(train_img)
test_img = pca.transform(test_img)

from sklearn.linear_model import LogisticRegression

#all parameters not specified are set to their defaults
# default solver is incredibly slow which is why it was changed to 'lbfgs'
logisticRegr = LogisticRegression(solver='lbfgs')
Esempio n. 38
0
            s=500)
plt.legend(scatterpoints=1)

plt.tight_layout()
# plt.savefig('images/05_18.png', dpi=300)
plt.show()

# ## Kernel principal component analysis in scikit-learn

# In[51]:

from sklearn.decomposition import KernelPCA

X, y = make_moons(n_samples=100, random_state=123)
scikit_kpca = KernelPCA(n_components=2, kernel='rbf', gamma=15)
X_skernpca = scikit_kpca.fit_transform(X)

plt.scatter(X_skernpca[y == 0, 0],
            X_skernpca[y == 0, 1],
            color='red',
            marker='^',
            alpha=0.5)
plt.scatter(X_skernpca[y == 1, 0],
            X_skernpca[y == 1, 1],
            color='blue',
            marker='o',
            alpha=0.5)

plt.xlabel('PC1')
plt.ylabel('PC2')
plt.tight_layout()
Esempio n. 39
0
transformed_data = np.array(transformed_data)

label = df['class'].unique()
print(label)

with plt.style.context("seaborn-darkgrid"):
    for l in zip(label):
        plt.scatter(transformed_data[y==l,0], transformed_data[y==l,1],
                    label=l)
    plt.xlabel("PC 1")
    plt.ylabel("PC 2")
    plt.legend()
    plt.show()
    
kpca0 = KernelPCA(n_components=2, kernel='poly')
Y = kpca0.fit_transform(X)

with plt.style.context("seaborn-darkgrid"):  
    for l in label:
        plt.scatter(Y[y==l,0], Y[y==l,1],label=l)
    plt.xlabel("PC 1")
    plt.ylabel("PC 2")
    plt.legend()
    plt.show()
    
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
Y_ = pca.fit_transform(X)

with plt.style.context("seaborn-darkgrid"):  
    for l in label:
Esempio n. 40
0
print('svm_train_score_cv:', LDA_svm_train_score_cv)
print('svm_test_score_cv:', LDA_svm_test_score_cv)

#kPCA transformation (Test several different values for Gamma)
from sklearn.decomposition import KernelPCA

print('\n' + 'kPCA Trasformation:')

kPCA_df = pd.DataFrame()

for g in [
        0.018, 0.01899, 0.019, 0.0195, 0.02, 0.03, 0.05, 0.055, 0.06, 0.065,
        0.07, 0.08, 0.09, 0.1, 0.2
]:
    kpca = KernelPCA(n_components=2, kernel='rbf', gamma=g)
    kPCA_X_train = kpca.fit_transform(X_train_std)
    kPCA_X_test = kpca.transform(X_test_std)
    array = {f: s for f, s in zip()}
    array['gamma'] = g

    #kPCA lr
    lr.fit(kPCA_X_train, y_train)
    kPCA_lr_train_score_cv = np.average(
        cross_val_score(lr, kPCA_X_train, y_train, cv=10))
    kPCA_lr_test_score_cv = np.average(
        cross_val_score(lr, kPCA_X_test, y_test, cv=10))
    array['lr_train_score_cv'] = kPCA_lr_train_score_cv
    array['lr_test_score_cv'] = kPCA_lr_test_score_cv

    #kPCA SVM
    svm.fit(kPCA_X_train, y_train)
        x_train_sc)
#Aplicando lo aprendido
x_test_pca=pca.transform(
        x_test_sc)

#LDA - Componentes principales
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
#Aplicando LDA
lda = LDA(n_components=1)
x_train_lda = lda.fit_transform(x_train_sc, y_train)
x_test_lda = lda.transform(x_test)

#Kernel PCA - Componentes principales
from sklearn.decomposition import KernelPCA
kpca = KernelPCA(n_components=1, kernel='rbf')
x_train_kpca = kpca.fit_transform(x_train_sc)
x_test_kpca = kpca.transform(x_test_sc)


#Regresion logistica
#############################################################
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
logistic=LogisticRegression(random_state=0)

# 1) Variables originales
logistic.fit(x_train, y_train) #modificar aqui x
y_est_train=logistic.predict(x_train) #modificar aqui x,y
#Matriz de confusion
cm=confusion_matrix(y_train, y_est_train) #modificar y
prec_train=(cm[0,0]+cm[1,1])/np.sum(cm) #modificar nombre
Esempio n. 42
0
import numpy as np
import matplotlib.pyplot as plt

from sklearn.decomposition import PCA, KernelPCA
from sklearn.datasets import make_circles

np.random.seed(0)

X, y = make_circles(n_samples=400, factor=.3, noise=.05)

kpca = KernelPCA(kernel="rbf", fit_inverse_transform=True, gamma=10)
X_kpca = kpca.fit_transform(X)
X_back = kpca.inverse_transform(X_kpca)
pca = PCA()
X_pca = pca.fit_transform(X)
print X_pca.shape
print X_back.shape
# print X_kpca.shape

# Plot results

plt.figure()
plt.subplot(2, 2, 1, aspect='equal')
plt.title("Original space")
reds = y == 0
blues = y == 1

plt.scatter(X[reds, 0], X[reds, 1], c="red", s=20, edgecolor='k')
plt.scatter(X[blues, 0], X[blues, 1], c="blue", s=20, edgecolor='k')
plt.xlabel("$x_1$")
plt.ylabel("$x_2$")
Esempio n. 43
0
def kernelPCA(k, X):
    X = np.array(X)
    kpca = KernelPCA(n_components=k)
    kpcaresult = kpca.fit_transform(X)
    np.savetxt("KernelPCA_out.csv", kpcaresult, delimiter=",")
    return None
Esempio n. 44
0
from sklearn.datasets import make_moons
import matplotlib.pyplot as plt
from sklearn.decomposition import KernelPCA
import numpy as np
from matplotlib.ticker import FormatStrFormatter
import rbf_kernel_pca as RKP

X, y = make_moons(n_samples=100, random_state=123)

skKpca = KernelPCA(n_components=2, kernel='rbf', gamma=15)
X_skernpca = skKpca.fit_transform(X)
plt.scatter(X_skernpca[y == 0, 0],
            X_skernpca[y == 0, 1],
            color='r',
            marker='^',
            alpha=0.5)
plt.scatter(X_skernpca[y == 1, 0],
            X_skernpca[y == 1, 1],
            color='b',
            marker='o',
            alpha=0.5)
plt.xlabel('PC1')
plt.ylabel('PC2')
plt.show()
y = dataset.iloc[:, 4]

# Splitting the dataset into the Training set and Test set
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42)

# Feature Scaling
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Applying Kernal PCA
kpca = KernelPCA(n_components=2, kernel='rbf')
X_train = kpca.fit_transform(X_train)
X_test = kpca.transform(X_test)

# Build logistic classifier
clf = LogisticRegression(random_state=42)
clf.fit(X_train, y_train)

# Applying Linear Discriminant Analysis
y_predict = clf.predict(X_test)

# Find confusion matrix and copy data in the x_set and y_set
cm = confusion_matrix(y_test, y_predict)

# Find accuracy score
print("Accuracy is: ", accuracy_score(y_test, y_predict) * 100, "%")
Esempio n. 46
0
y = dataset.iloc[:, 4].values

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.25,
                                                    random_state=0)

from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

from sklearn.decomposition import KernelPCA
kernel_pca = KernelPCA(n_components=2, kernel='rbf')
X_train = kernel_pca.fit_transform(X_train)
X_test = kernel_pca.transform(X_test)

from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state=0)
classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)

from matplotlib.colors import ListedColormap
X_set, y_set = X_train, y_train
X1, X2 = np.meshgrid(
    np.arange(start=X_set[:, 0].min() - 1,
Esempio n. 47
0
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_swiss_roll
from sklearn.decomposition import KernelPCA

from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline

X, t = make_swiss_roll(n_samples=1000, noise=0.2, random_state=42)

if 0:
    rbf_pca = KernelPCA(n_components=2, kernel="rbf", gamma=0.04)
    X_reduced = rbf_pca.fit_transform(X)

    lin_pca = KernelPCA(n_components=2,
                        kernel="linear",
                        fit_inverse_transform=True)
    rbf_pca = KernelPCA(n_components=2,
                        kernel="rbf",
                        gamma=0.0433,
                        fit_inverse_transform=True)
    sig_pca = KernelPCA(n_components=2,
                        kernel="sigmoid",
                        gamma=0.001,
                        coef0=1,
                        fit_inverse_transform=True)

y = t > 6.9

#Importing the data set with pandas and taking the necessary variables
dataset = pd.read_csv('Social_Network_Ads.csv')
x = dataset.iloc[:, [2, 3]].values
y = dataset.iloc[:, 4].values
#Splitting the data into test and training set
from sklearn.cross_validation import train_test_split as tts
xTrain, yTrain, xTest, yTest = tts(x, y, test_size=0.25, random_state=0)
#Feature Scaling
from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
xTrain = ss.fit_transform(xTrain)
xTest = ss.transform(xTest)
#Applying Kernel PCA
from sklearn.decomposition import KernelPCA
kpca = KernelPCA(n_components=2, kernel='rbf')
xTrain = kpca.fit_transform(xTrain)
xTest = kpca.fit_transform(xTest)
#Checking for the most significant components by trial & error
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as lda
ld = lda(n_components=2)
xTrain = ld.fit_transform(xTrain, yTrain)
xTest = ld.transform(xTest)
#Fitting logistic regression to the training set
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state=0)
classifier.fit(xTrain, yTrain)
#Predicting the test set results
yPred = classifier.predict(xTest)
#Evaluating model performance by using Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(yTest, yPred)
Esempio n. 49
0
#%%

# convert to a sparse and compatible format for dimensionality reduction using sklearn
sparse_corpus_tfidf = matutils.corpus2csc(corpus_tfidf)
sparse_corpus_tfidf_transpose = sparse_corpus_tfidf.transpose()


train_tfidf, test_tfidf, train_category, test_category = train_test_split(sparse_corpus_tfidf_transpose, united.ix[:,1], test_size = 0.2, random_state = seed)

#%%

print('starting dimensionality reduction')
# reduce dimensions
from sklearn.decomposition import KernelPCA
reducer= KernelPCA(n_components = 30 , kernel="cosine", random_state=seed)
corpus_train_tfidf_kpca = reducer.fit_transform(train_tfidf)
corpus_test_tfidf_kpca = reducer.transform(test_tfidf)

corpus_train_tfidf_reduced=corpus_train_tfidf_kpca
corpus_test_tfidf_reduced =corpus_test_tfidf_kpca

#print('starting tsne')
#from sklearn.manifold import TSNE
#reducer = TSNE(n_components = 30, learning_rate=1000.0, n_iter=1000, metric='cosine')
#corpus_train_tfidf_reduced = reducer.fit_transform(corpus_train_tfidf_kpca)
#corpus_test_tfidf_reduced = reducer.transform(corpus_test_tfidf_kpca)


#%%

import matplotlib.pyplot as plt
Esempio n. 50
0
import numpy as np
import matplotlib.pyplot as plt

from sklearn.decomposition import PCA, KernelPCA
from sklearn.datasets import make_circles

np.random.seed(7)

X, y = make_circles(n_samples=500, factor=0.2, noise=0.04)

pca = PCA()
X_pca = pca.fit_transform(X)

kernel_pca = KernelPCA(kernel="rbf", fit_inverse_transform=True, gamma=10)
X_kernel_pca = kernel_pca.fit_transform(X)
X_inverse = kernel_pca.inverse_transform(X_kernel_pca)

class_0 = np.where(y == 0)
class_1 = np.where(y == 1)
plt.figure()
plt.title("Original data")
plt.plot(X[class_0, 0], X[class_0, 1], "ko", mfc='none')
plt.plot(X[class_1, 0], X[class_1, 1], "kx")
plt.xlabel("1st dimension")
plt.ylabel("2nd dimension")


plt.figure()
plt.plot(X_pca[class_0, 0], X_pca[class_0, 1], "ko", mfc='none')
plt.plot(X_pca[class_1, 0], X_pca[class_1, 1], "kx")
plt.title("Data transformed using PCA")
for doc in corpus_tfidf:
    print(doc)
    break

# convert to a sparse and compatible format for dimensionality reduction using sklearn
sparse_corpus_tfidf = matutils.corpus2csc(corpus_tfidf)
sparse_corpus_tfidf_transpose = sparse_corpus_tfidf.transpose()

#%%

print('starting dimensionality reduction')
# reduce dimensions
from sklearn.decomposition import KernelPCA
import numpy as np
reducer = KernelPCA(n_components=700, kernel="cosine", random_state=seed)
corpus_tfidf_kpca = reducer.fit_transform(sparse_corpus_tfidf_transpose)

explained_variance = np.var(corpus_tfidf_kpca, axis=0)
#total_variance = np.var(train_tfidf, axis=0)
#explained_variance_ratio = explained_variance / np.sum(total_variance)
explained_variance_ratio = explained_variance / np.sum(explained_variance)

cum_explained_variance = np.cumsum(explained_variance_ratio)

#%%
import matplotlib.pyplot as plt
plt.figure()
plt.plot(explained_variance_ratio)

#plt.figure()
#plt.plot(cum_explained_variance)
Esempio n. 52
0
            value, counts = np.unique(y_train, return_counts=True)
            minority_class = value[np.argmin(counts)]
            majority_class = value[np.argmax(counts)]

            idx_min = np.where(y_train == minority_class)[0]
            idx_maj = np.where(y_train == majority_class)[0]
            
            full_X = np.concatenate((X_train, X_test))
            full_y = np.concatenate((y_train, y_test))
            
            number_of_clusters = len(idx_min)
                
            # Adding PCA Method
            transformer = KernelPCA(n_components=math.ceil(X_train.shape[1]/3), kernel='poly')
            X_transformed = transformer.fit_transform(full_X)    

            # Training the kmean model
            kmeans = KMeans(n_clusters=number_of_clusters)
            kmeans.fit(full_X)

            points_under_each_cluster = {i: np.where(kmeans.labels_ == i)[0] for i in range(kmeans.n_clusters)}
            centers = kmeans.cluster_centers_

            # From each cluster removing the test instances 
            for i in points_under_each_cluster.keys():
                temp = []
                for j in range(len(points_under_each_cluster[i])):
                    if points_under_each_cluster[i][j] not in test_index:
                        temp.append(points_under_each_cluster[i][j])   
                points_under_each_cluster[i] = np.array(temp)
Esempio n. 53
0
print(cm)
print('Train accuracy : ', round(accuracy_score(y_train, y_pred_t)*100, 2) , '%')
#Predicting using Test Data
y_pred = clf.predict(x_test)

#Visualizing Predicted data using confusion matrix
cm = confusion_matrix(y_test,y_pred)
print(cm)
print('Test accuracy : ', round(accuracy_score(y_test, y_pred)*100,2) , '%')

#Graphical Visulization of Data
#Plot Code Starts

from sklearn.decomposition import KernelPCA
pca = KernelPCA(n_components=2)
principalComponents = pca.fit_transform(x,y)
principalDf = pd.DataFrame(data = principalComponents
             , columns = ['principal component 1', 'principal component 2'])

finalDf = pd.concat([principalDf, dataset[['CLASS']]], axis = 1)

fig = plt.figure(figsize = (8,8))
ax = fig.add_subplot(1,1,1) 
ax.set_xlabel('Component 1', fontsize = 15)
ax.set_ylabel('Component 2', fontsize = 15)
ax.set_title('2 component PCA', fontsize = 20)
targets = [0, 1, 2, 3]
colors = ['r', 'g', 'b', 'y']
for target, color in zip(targets,colors):
    indicesToKeep = finalDf['CLASS'] == target
    ax.scatter(finalDf.loc[indicesToKeep, 'principal component 1']
# Load libraries
from sklearn.decomposition import PCA, KernelPCA
from sklearn.datasets import make_circles

# Create linearly inseparable data
features, _ = make_circles(n_samples=1000, random_state=1, noise=0.1, factor=0.1)

# Apply kernal PCA with radius basis function (RBF) kernel
kpca = KernelPCA(kernel="rbf", gamma=15, n_components=1)
features_kpca = kpca.fit_transform(features)

print("Original number of features:", features.shape[1])
print("Reduced number of features:", features_kpca.shape[1])
Esempio n. 55
0
        def RunKPCAScikit(q):
            totalTimer = Timer()

            # Load input dataset.
            Log.Info("Loading dataset", self.verbose)
            data = np.genfromtxt(self.dataset, delimiter=',')

            with totalTimer:
                # Get the new dimensionality, if it is necessary.
                dimension = re.search('-d (\d+)', options)
                if not dimension:
                    d = data.shape[1]
                else:
                    d = int(dimension.group(1))
                    if (d > data.shape[1]):
                        Log.Fatal("New dimensionality (" + str(d) +
                                  ") cannot be greater " +
                                  "than existing dimensionality (" +
                                  str(data.shape[1]) + ")!")
                        q.put(-1)
                        return -1

                # Get the kernel type and make sure it is valid.
                kernel = re.search("-k ([^\s]+)", options)
                try:
                    if not kernel:
                        Log.Fatal(
                            "Choose kernel type, valid choices are 'linear'," +
                            " 'hyptan' and 'polynomial'.")
                        q.put(-1)
                        return -1
                    elif kernel.group(1) == "linear":
                        model = KernelPCA(n_components=d, kernel="linear")
                    elif kernel.group(1) == "hyptan":
                        model = KernelPCA(n_components=d, kernel="sigmoid")
                    elif kernel.group(1) == "polynomial":
                        degree = re.search('-D (\d+)', options)
                        degree = 1 if not degree else int(degree.group(1))

                        model = KernelPCA(n_components=d,
                                          kernel="poly",
                                          degree=degree)
                    elif kernel.group(1) == "cosine":
                        model = KernelPCA(n_components=d,
                                          kernel="cosine",
                                          degree=degree)
                    elif kernel.group(1) == "gaussian":
                        model = KernelPCA(n_components=d,
                                          kernel="rbf",
                                          degree=degree)
                    else:
                        Log.Fatal(
                            "Invalid kernel type (" + kernel.group(1) +
                            "); valid " +
                            "choices are 'linear', 'hyptan' and 'polynomial'.")
                        q.put(-1)
                        return -1

                    out = model.fit_transform(data)
                except Exception as e:
                    q.put(-1)
                    return -1

            time = totalTimer.ElapsedTime()
            q.put(time)
            return time
Esempio n. 56
0
from sklearn.preprocessing import StandardScaler
sc_x = StandardScaler()
x_train = sc_x.fit_transform(x_train)
x_test = sc_x.transform(x_test)

#Dimension Reduction Technique (PCA):
'''
we are considering Maxium 2 Eigen Value, which gives us the 2 Eigen Vector 
having Maximum 2 Variance distribution in those vector

our Non Linear data is Mapped to Higher Dimension using Gaussian Kernerl Trick
and then PCA will reduce the Dimension to 2 as inputted
'''
from sklearn.decomposition import KernelPCA
kpca = KernelPCA(n_components=2, kernel='rbf')
x_train = kpca.fit_transform(x_train)
x_test = kpca.transform(x_test)

#Fitting the data to Logistic Reg Module:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state=0)
classifier.fit(x_train, y_train)

#Prediction of Training Data:
y_predict_train = classifier.predict(x_train)

#Prediction of Testing Data:
y_predict_test = classifier.predict(x_test)

#Confusion Metrix to evaluate the prediction:
from sklearn.metrics import confusion_matrix
Esempio n. 57
0
def kpca_transformed(dataset_name, kernel_name, gamma):
    kpca = KernelPCA(n_components=2, kernel=kernel_name, gamma=gamma)

    X, y = datasets[dataset_name]
    X_transformed = kpca.fit_transform(X)
    return X, X_transformed, y
if (0):
    #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% NONLINEAR METHODS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%#
    #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% NONLINEAR METHODS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%#
    #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% NONLINEAR METHODS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%#
        
    d = pair.pairwise_distances(Xtrain,Xtrain)
    aux = np.triu(d)
    sigma = np.sqrt(np.mean(np.power(aux[aux!=0],2)*0.5))
    gamma = 1/(2*sigma**2)

if (0):
    #%% K-PCA
    # Calculate accumulated variance
    kpca = KernelPCA(kernel="rbf",gamma=gamma)
    kpca.fit_transform(Xtrain)
    eigenvals = kpca.lambdas_[0:220]

    
    # Calculate classifiation scores for each component
    nComponents =  np.linspace(1, 500, 100, endpoint=True)
    kpcaScores = np.zeros((5,np.alen(nComponents)))
    
    kpca = KernelPCA(n_components = Ntrain,kernel="rbf",gamma=gamma)
    kpca.fit(Xtrain)
    XtrainT = kpca.transform(Xtrain)
    XtestT = kpca.transform(Xtest)
    

    for i in range(len(nComponents)):   
        kpcaScores[:,i] = util.classify(XtrainT[:,:nComponents[i]],XtestT[:,:nComponents[i]],labelsTrain,labelsTest)
Esempio n. 59
0
 def kernel_pca(data, dim=3):
     scikit_kpca = KernelPCA(n_components=dim, kernel='rbf', gamma=15)
     result = scikit_kpca.fit_transform(data)
     return result
Esempio n. 60
0
    print(X.shape, y.shape)
    print(X[:5])
    print(y[:5])

    fig = plt.figure()
    ax = fig.add_subplot(projection='3d')
    ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y, cmap=plt.cm.hot)
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_zlabel('z')
    # ax.view_init(180, 0)
    plt.show()

    # Kernel PCA: 커널 트릭을 사용한 주성분 분성
    lin_kpca = KernelPCA(n_components=2, kernel='linear', random_state=1)
    X_reduced = lin_kpca.fit_transform(X)
    print(X_reduced.shape)

    plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=y, cmap=plt.cm.hot)
    plt.show()

    rbf_kpca = KernelPCA(n_components=2,
                         kernel='rbf',
                         gamma=0.043,
                         random_state=1)
    X_reduced = rbf_kpca.fit_transform(X)
    plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=y, cmap=plt.cm.hot)
    plt.show()

    sigmoid_kpca = KernelPCA(n_components=2,
                             kernel='sigmoid',