Esempio n. 1
0
def fit_kernel_pca(x, n_components, kernel='rbf', *args, **kwargs):
    x_new = KernelPCA(n_components=n_components,
                      kernel=kernel,
                      *args,
                      **kwargs).fit_transform(x)
    return x_new
Esempio n. 2
0
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    sparse_corpus_tfidf_transpose,
    df.ix[:, 1],
    test_size=0.2,
    random_state=seed)

from sklearn.decomposition import KernelPCA
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import learning_curve
import matplotlib.pyplot as plt

# reduce dimensions
print('Starting dimensionality reduction')
reducer = KernelPCA(n_components=1500, kernel="cosine", random_state=seed)
corpus_train_tfidf_kpca = reducer.fit_transform(X_train)
corpus_test_tfidf_kpca = reducer.transform(X_test)

print('Finished dimensionality reduction')

#Initialize Logistic Regression
log_reg = LogisticRegression(C=1.0)
log_reg.fit(corpus_train_tfidf_kpca, y_train)

a = log_reg.score(corpus_test_tfidf_kpca, y_test)

print('Starting logistic regression 2')
log_reg.fit(X_train, y_train)

b = log_reg.score(X_test, y_test)
for i in range(0, n):
    for j in range(i, n):
        sim_mat[i][j] = M2(ts_list[i], ts_list[j], delta, eps)
        sim_mat[j][i] = sim_mat[i][j]

        count = count + 1
        if count % 1000 == 0:
            print count

target_dim = 10

from sklearn.decomposition import KernelPCA
kpca = KernelPCA(n_components=target_dim,
                 kernel="precomputed",
                 eigen_solver="auto",
                 tol=1e-9,
                 max_iter=3000,
                 n_jobs=-1)
feature_coords = kpca.fit_transform((sim_mat**2) * -0.5)

from statsmodels.nonparametric.kernel_regression import KernelReg

landfalls = np.array([float(h.made_landfall) for h in hurricane_list])

inds = np.argsort(feature_coords[:, 0])

feature_coords_sorted = feature_coords[inds]
landfalls_sorted = landfalls[inds]

vartypes = ''.join('c' * target_dim)
reg = KernelReg(landfalls_sorted, feature_coords_sorted, vartypes)
Esempio n. 4
0
def embedDistanceMatrix(dmatDf, method='kpca', n_components=2, **kwargs):
    """Two-dimensional embedding of sequence distances in dmatDf,
    returning Nx2 x,y-coords: tsne, isomap, pca, mds, kpca, sklearn-tsne"""
    if isinstance(dmatDf, pd.DataFrame):
        dmat = dmatDf.values
    else:
        dmat = dmatDf

    if method == 'tsne':
        xy = tsne.run_tsne(dmat,
                           no_dims=n_components,
                           perplexity=kwargs['perplexity'])
    elif method == 'isomap':
        isoObj = Isomap(n_neighbors=10, n_components=n_components)
        xy = isoObj.fit_transform(dmat)
    elif method == 'mds':
        mds = MDS(n_components=n_components,
                  max_iter=3000,
                  eps=1e-9,
                  random_state=15,
                  dissimilarity="precomputed",
                  n_jobs=1)
        xy = mds.fit(dmat).embedding_
        rot = PCA(n_components=n_components)
        xy = rot.fit_transform(xy)
    elif method == 'pca':
        pcaObj = PCA(n_components=None)
        xy = pcaObj.fit_transform(dmat)[:, :n_components]
    elif method == 'kpca':
        pcaObj = KernelPCA(n_components=dmat.shape[0],
                           kernel='precomputed',
                           eigen_solver='dense')
        try:
            gram = dist2kernel(dmat)
        except:
            print(
                'Could not convert dmat to kernel for KernelPCA; using 1 - dmat/dmat.max() instead'
            )
            gram = 1 - dmat / dmat.max()
        xy = pcaObj.fit_transform(gram)[:, :n_components]
    elif method == 'lle':
        lle = manifold.LocallyLinearEmbedding(n_neighbors=30,
                                              n_components=n_components,
                                              method='standard')
        xy = lle.fit_transform(dist)
    elif method == 'sklearn-tsne':
        tsneObj = TSNE(n_components=n_components,
                       metric='precomputed',
                       random_state=0,
                       perplexity=kwargs['perplexity'])
        xy = tsneObj.fit_transform(dmat)
    elif method == 'umap':
        umapObj = umap.UMAP(n_components=n_components,
                            metric='precomputed',
                            random_state=110820,
                            **kwargs)
        xy = umapObj.fit_transform(dmat)
    else:
        print('Method unknown: %s' % method)
        return

    assert xy.shape[0] == dmatDf.shape[0]
    xyDf = pd.DataFrame(xy[:, :n_components],
                        index=dmatDf.index,
                        columns=np.arange(n_components))
    if method == 'kpca':
        """Not sure how negative eigenvalues should be handled here, but they are usually
        small so it shouldn't make a big difference"""
        setattr(
            xyDf, 'explained_variance_', pcaObj.lambdas_[:n_components] /
            pcaObj.lambdas_[pcaObj.lambdas_ > 0].sum())
    return xyDf
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.25,
                                                    random_state=0)

# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)

#Applying Kernel PCA
from sklearn.decomposition import KernelPCA
kpca = KernelPCA(n_components=2, kernel="rbf")
X_train = kpca.fit_transform(X_train)
X_test = kpca.transform(X_test)

#Fitting logistic regression to train set
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state=0)
classifier.fit(X_train, y_train)

#Predicting the test set result
y_pred = classifier.predict(X_test)

#making the confusion matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
plot_digits(
    X_test_noisy,
    f"Noisy test images\nMSE: {np.mean((X_test - X_test_noisy) ** 2):.2f}")

# %%
# Learn the `PCA` basis
# ---------------------
#
# We can now learn our PCA basis using both a linear PCA and a kernel PCA that
# uses a radial basis function (RBF) kernel.
from sklearn.decomposition import PCA, KernelPCA

pca = PCA(n_components=32)
kernel_pca = KernelPCA(n_components=400,
                       kernel="rbf",
                       gamma=1e-3,
                       fit_inverse_transform=True,
                       alpha=5e-3)

pca.fit(X_train_noisy)
_ = kernel_pca.fit(X_train_noisy)

# %%
# Reconstruct and denoise test images
# -----------------------------------
#
# Now, we can transform and reconstruct the noisy test set. Since we used less
# components than the number of original features, we will get an approximation
# of the original set. Indeed, by dropping the components explaining variance
# in PCA the least, we hope to remove noise. Similar thinking happens in kernel
# PCA; however, we expect a better reconstruction because we use a non-linear
Esempio n. 7
0
kernel_pca_linear_lambdas_, kernel_pca_full_linear_lambdas_, explained_variance = compare_KernelPCA(
)

print('Is equal singular values KernelPCA with linear kernel and PCA')
print(
    np.sqrt(kernel_pca_linear_lambdas_).round(3) ==
    pca_85.singular_values_.round(3))

kernel_pca_linear_lambdas_, kernel_pca_full_linear_lambdas_, explained_variance

res_poly = compare_KernelPCA(kernel='poly')
res_rbf = compare_KernelPCA(kernel='rbf')
res_sigmoid = compare_KernelPCA(kernel='sigmoid')
res_cosine = compare_KernelPCA(kernel='cosine')

kernel_pca_precomputed = KernelPCA(n_components=kernel_pca_n_comp,
                                   kernel='precomputed')
kernel_pca_precomputed_data = kernel_pca_precomputed.fit_transform(
    data.dot(data.T))
kernel_pca_precomputed.lambdas_.round(3)

# ---
# ## Модификации метода главных компонент
# ### SparcePCA

sparse_pca_lars = SparsePCA(2, method='lars')
sparse_pca_lars_data = sparse_pca_lars.fit_transform(data)

print("Sparse PCA with lars method components")
print(sparse_pca_lars.components_)

sparse_pca_cd = SparsePCA(2, method='cd')
    def __init__(self,
                 feature_extractor='tsne',
                 perplexity=30,
                 pixels=100,
                 random_state=None,
                 n_jobs=None):
        """Generate an ImageTransformer instance

        Args:
            feature_extractor: string of value ('tsne', 'pca', 'kpca') or a
                class instance with method `fit_transform` that returns a
                2-dimensional array of extracted features.
            pixels: int (square matrix) or tuple of ints (height, width) that
                defines the size of the image matrix.
            random_state: int or RandomState. Determines the random number
                generator, if present, of a string defined feature_extractor.
            n_jobs: The number of parallel jobs to run for a string defined
                feature_extractor.
        """
        self.random_state = random_state
        self.n_jobs = n_jobs

        if isinstance(feature_extractor, str):
            fe = feature_extractor.casefold()
            if fe == 'tsne_exact'.casefold():
                fe = TSNE(n_components=2,
                          metric='cosine',
                          perplexity=perplexity,
                          n_iter=1000,
                          method='exact',
                          random_state=self.random_state,
                          n_jobs=self.n_jobs)
            elif fe == 'tsne'.casefold():
                fe = TSNE(n_components=2,
                          metric='cosine',
                          perplexity=perplexity,
                          n_iter=1000,
                          method='barnes_hut',
                          random_state=self.random_state,
                          n_jobs=self.n_jobs)
            elif fe == 'pca'.casefold():
                fe = PCA(n_components=2, random_state=self.random_state)
            elif fe == 'kpca'.casefold():
                fe = KernelPCA(n_components=2,
                               kernel='rbf',
                               random_state=self.random_state,
                               n_jobs=self.n_jobs)
            else:
                raise ValueError(("Feature extraction method '{}' not accepted"
                                  ).format(feature_extractor))
            self._fe = fe
        elif hasattr(feature_extractor, 'fit_transform') and inspect.ismethod(
                feature_extractor.fit_transform):
            self._fe = feature_extractor
        else:
            raise TypeError('Parameter feature_extractor is not a '
                            'string nor has method "fit_transform"')

        if isinstance(pixels, int):
            pixels = (pixels, pixels)

        # The resolution of transformed image
        self._pixels = pixels
        self._xrot = None
Esempio n. 9
0
    def pca_transform(self,
                      nb_PC=4,
                      remove_mean0=False,
                      remove_mean1=False,
                      standard=False,
                      sklearn=False,
                      sklearn_kernel=False,
                      cov=True):
        """
        Perform the Principal component analysis with SKlearn
        using singular value fft
        The dataframe is standardize
        
        
        
        parameters:
            standard: default = True, standardize the dataframe
            nb_PC: default = 4, number of principal components to be used
            sklearn: if True (default=False) use svd by sklearn
            cov: if true (by default) sue the correlation matrix to perform the PCA analysis
        
        Stock in the object
            Dataframe with:
                eigenvalues
                eigenvectors
                scores
            list of vectors:
                eigenpairs
        
        NOTE:
            By default sklearn remove the mean from the dataset. So I cant use it to perform the downscalling
        
        References:
            http://sebastianraschka.com/Articles/2015_pca_in_3_steps.html#projection-onto-the-new-feature-space
        """
        df = self.df
        self.nb_PC = nb_PC

        if remove_mean0:
            print('remove_mean0')
            df = df.subtract(df.mean(axis=0), axis='columns')

        if remove_mean1:
            print('remove_mean1')
            df = df.subtract(df.mean(axis=1), axis='index')
            print(df)

        if standard:
            # standardize
            #             df_std = StandardScaler().fit_transform(df)
            self.standard = True
            df = (df - df.mean(axis=0)) / df.std(
                axis=0)  # another way to standardise

        #=======================================================================
        # Sklearn
        #=======================================================================
        if sklearn:
            print("o" * 80)
            print("SVD sklearn used")
            print("o" * 80)

            if sklearn_kernel:
                print('sklearn_kernel')
                pca = KernelPCA(nb_PC,
                                kernel="rbf",
                                fit_inverse_transform=True,
                                gamma=10)

            #Create a PCA model with nb_PC principal components
            else:
                pca = PCA(nb_PC)
            # fit data
            pca.fit(df)

            #Get the components from transforming the original data.
            scores = pca.transform(df)  #  or PCs
            eigenvalues = pca.explained_variance_
            eigenvectors = pca.components_  # or loading

            # Make a list of (eigenvalue, eigenvector) tuples
            self.eigpairs = [(np.abs(self.eigenvalues[i]),
                              self.eigenvector[i, :])
                             for i in range(len(self.eigenvalues))]

        #=======================================================================
        # Covariance Matrix
        #=======================================================================
        if cov:
            print("o" * 80)
            print("Covariance used")
            print("o" * 80)

            X = df.values
            cov_mat = np.cov(X.T)
            eigenvalues, eigenvectors = np.linalg.eig(cov_mat)

            scores = X.dot(eigenvectors)
            scores = pd.DataFrame(scores,
                                  columns=np.arange(1,
                                                    len(df.columns) + 1),
                                  index=df.index)
            eigenvalues = pd.Series(eigenvalues,
                                    index=np.arange(1,
                                                    len(df.columns) + 1))
            eigenvectors = pd.DataFrame(eigenvectors.T,
                                        columns=df.columns,
                                        index=np.arange(
                                            1,
                                            len(df.columns) + 1))

        self.scores = scores.iloc[:, 0:nb_PC]
        self.eigenvalues = eigenvalues  #[0:nb_PC]
        self.eigenvectors = eigenvectors[0:nb_PC]

        tot = sum(eigenvalues)
        self.var_exp = [(i / tot) * 100
                        for i in sorted(eigenvalues, reverse=True)]
Esempio n. 10
0
y = dataset.iloc[:, -1].values #all rows and only the last column

# Splitting the dataset into Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test , y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

# Feature scaling
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test) #transform both but only fit to X_train so that X_train and X_test have same scale

# Applying Kernel PCA
from sklearn.decomposition import KernelPCA
kpca = KernelPCA(n_components=2, kernel='rbf') # None changed to 2
X_train = kpca.fit_transform(X_train)
X_test = kpca.transform(X_test)

# Fitting classifier to dataset
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(random_state=0)
clf.fit(X_train, y_train)

# Predicting the test set results
y_pred = clf.predict(X_test)

# Testing model w/ confusion matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)        #90 correct predictions; 10 incorrect
Esempio n. 11
0
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

if __name__ == "__main__":
    dt_heart = pd.read_csv('./data/heart.csv')

    print(dt_heart.head(5))

    dt_features = dt_heart.drop(['target'], axis=1)
    dt_target = dt_heart['target']

    dt_features = StandardScaler().fit_transform(dt_features)

    X_train, X_test, y_train, y_test = train_test_split(dt_features,
                                                        dt_target,
                                                        test_size=0.3,
                                                        random_state=42)

    kpca = KernelPCA(n_components=4, kernel='poly')
    kpca.fit(X_train)

    dt_train = kpca.transform(X_train)
    dt_test = kpca.transform(X_test)

    logistic = LogisticRegression(solver='lbfgs')

    logistic.fit(dt_train, y_train)
    print("SCORE KPCA: ", logistic.score(dt_test, y_test))

    #kernels_y_kpca
# saving the model to disk for future use
corpora.MmCorpus.serialize('train_tfidf.mm', train_tfidf)
# convert to a sparse and compatible format for dimensionality reduction using sklearn
sparse_train_corpus_tfidf = matutils.corpus2csc(train_tfidf)
sparse_train_corpus_tfidf_transpose = sparse_train_corpus_tfidf.transpose()

#%%
# visualize the tf-idf corpus using kernel PCA
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import KernelPCA
from mpl_toolkits.mplot3d import Axes3D
from sklearn.manifold import TSNE

kpca2 = KernelPCA(n_components=3, kernel="cosine", random_state=seed)
corpus_train_tfidf_kpca2 = kpca2.fit_transform(
    sparse_train_corpus_tfidf_transpose)
#RENAMED FOR EASE
X = corpus_train_tfidf_kpca2

#kpca = KernelPCA(n_components = 1000 , kernel="cosine", random_state=seed)
#corpus_train_tfidf_kpca = kpca.fit_transform(sparse_train_corpus_tfidf_transpose)

#reducer = TSNE(n_components = 3, perplexity=.0, early_exaggeration=4.0, learning_rate=30.0, n_iter=10000, metric='cosine')
# visualize the tf-idf corpus using kernel PCA

#CREATE DICTIONARY TO ASSIGN COLORS
categories = train_category.unique()

#REINDEX OUTPUT TO COMPARE WITH LABELS
Esempio n. 13
0
fpr, tpr, thresholds = roc_curve(y_train_5, y_scores)
score = roc_auc_score(y_test, y_hat)

### dimensionality reduction

from sklearn.decomposition import PCA, KernelPCA

pca = PCA(n_components=20)
X_all = np.concantenate([X_train, X_test])
pca.fit(X_all)
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)

pca.explained_variance_ratio_, components_

rbf_pca = KernelPCA(n_components = 2, kernel="rbf", gamma=0.04)
X_reduced = rbf_pca.fit_transform(X)

### clustering

from sklearn.cluster import KMeans, MeanShift, DBSCAN

k_means = KMeans(init = "k-means++", n_clusters = 4, n_init = 12)
k_means.fit(X)
k_means.predict(X)

k_means.labels_, cluster_centers_

dbscan = DBSCAN(eps=0.05, min_samples=5)

### search for hyperparameters
Esempio n. 14
0
    X_errors_image,
    (X_errors_image.shape[0], ds.train.num_rows, ds.train.num_columns))
ocr_utils.montage(
    X_errors2D,
    title='PCA Error Characters, components={}'.format(n_components))

title = 'inverse transform errors'
X_inverse = pca.inverse_transform(X_errors_pca)
X2D = np.reshape(X_inverse,
                 (X_inverse.shape[0], ds.train.num_rows, ds.train.num_columns))
X2D = X2D - np.min(X2D)
ocr_utils.montage(X2D, title=title)

########################################################################################
kernel = 'rbf'  # really slow
pca = KernelPCA(n_components=2, kernel=kernel, gamma=15)

X_train_pca = pca.fit_transform(X_train_image)
X_test_pca = pca.transform(X_test_image)

lr = LogisticRegression()
logistic_fitted = lr.fit(X_train_pca, y_train)
y_train_pred = logistic_fitted.predict(X_train_pca)
y_test_pred = logistic_fitted.predict(X_test_pca)

print(
    '\nKernel PCA Train Accuracy: {:4.6f}, n_components={}, kernel={}'.format(
        accuracy_score(y_train, y_train_pred), pca.n_components, kernel))
print('Kernel PCA Test Accuracy: {:4.6f}, n_components={}, kernel={}'.format(
    accuracy_score(y_test, y_test_pred), pca.n_components, kernel))
Esempio n. 15
0
    kmeans = KMeans(n_clusters = k).fit(delta_noname)
    kmeans.fit(delta_noname)
    distortion.append(sum(numpy.min(cdist(delta_noname, kmeans.cluster_centers_, 'euclidean'), axis=1)) / delta_noname.shape[0])
    
plt.plot(K, distortion, 'bx-')
plt.title('The Elbow Method showing the optimal k')
plt.show()    


# In[277]:


#PCA with RBF
from sklearn.decomposition import PCA, KernelPCA

kpca = KernelPCA(kernel="rbf", fit_inverse_transform=True, gamma=10,n_components=4)
kpca.fit(delta_noname)
delta_noname_components_rbf = kpca.transform(delta_noname)


# In[278]:


#to get the variance being explained by the components
import numpy
explained_variance = numpy.var(delta_noname_components_rbf, axis=0)
explained_variance_ratio = explained_variance / numpy.sum(explained_variance)
print(explained_variance_ratio)


# In[279]:
Esempio n. 16
0
#X_reduced = inc_pca.transform(X_train) 

#You can also use the memmap class to do this same problem:
#look at page 217.

##Randomized PCA:
rnd_pca = PCA(n_components = 153, svd_solver = "randomized")
X_reduced = rnd_pca.fit_transform(X_train) 

##Kernel PCA:
#It seems that this instance has the same charactistics as the support vector machine 
#methods from the earlier chapters.
from sklearn.decomposition import KernelPCA 

rbf_pca = KernelPCA(n_components = 2, kernel = "rbf", gamma = 0.04)
X_reduced = rbf_pca.fit(X) #Interesting the console says that 
#kernelPCA doesn't have a transform_fit() method. Will need to look 
#into this. 

##Selecting a kernel and tuning hyperparameters.
#To find the best hyperparameters for this method (which is a unsupervised 
#statistical training method) you can use the function GridSearchCV().

from sklearn.model_selection import GridSearchCV 
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.datasets import make_moons

moons = make_moons(n_samples = 1000, shuffle = True, noise = 0.2, random_state = 42)
X = moons[0]
Esempio n. 17
0
def kpcaModel():
    return KernelPCA(n_components=5, degree=2, kernel="poly")
Esempio n. 18
0
#For predictiong on new data
data_pred = pd.read_csv('data/pred_data.csv')
data_for_pred = data_pred[["contr", "energ", "maxpr"]].values
data_pred["pred_class"] = ''
y_pred = kmeans.predict(data_for_pred)
for i in range(0, len(y_pred)):
    data_pred["pred_class"][i] = label_encoder.classes_[original_labels[
        y_pred[i]]]
data_pred.to_csv("data/pred_data.csv", index=False)

#Graphical Visulization of Data
#Plot Code Starts

from sklearn.decomposition import KernelPCA
pca = KernelPCA(n_components=2)
principalComponents = pca.fit_transform(x, y)
principalDf = pd.DataFrame(
    data=principalComponents,
    columns=['principal component 1', 'principal component 2'])

finalDf = pd.concat([principalDf, dataset[['CLASS']]], axis=1)

fig = plt.figure(figsize=(8, 8))
ax = fig.add_subplot(1, 1, 1)
ax.set_xlabel('Component 1', fontsize=15)
ax.set_ylabel('Component 2', fontsize=15)
ax.set_title('2 component PCA', fontsize=20)
targets = [0, 1, 2, 3]
colors = ['r', 'g', 'b', 'y']
for target, color in zip(targets, colors):
Esempio n. 19
0
            X_train, X_test = X[train_index], X[test_index]
            y_train, y_test = y[train_index], y[test_index]

            value, counts = np.unique(y_train, return_counts=True)
            minority_class = value[np.argmin(counts)]
            majority_class = value[np.argmax(counts)]

            idx_min = np.where(y_train == minority_class)[0]
            idx_maj = np.where(y_train == majority_class)[0]

            full_X = np.concatenate((X_train[idx_maj], X_test))
            full_y = np.concatenate((y_train[idx_maj], y_test))

            # Adding PCA Method
            transformer = KernelPCA(n_components=math.ceil(X_train.shape[1] /
                                                           3),
                                    kernel='poly')
            X_transformed = transformer.fit_transform(full_X)

            # Training the kmean model
            kmeans = KMeans(n_clusters=number_of_clusters)
            kmeans.fit(X_transformed)

            points_under_each_cluster = {
                i: np.where(kmeans.labels_ == i)[0]
                for i in range(kmeans.n_clusters)
            }

            # From each cluster removing the test instances
            for i in points_under_each_cluster.keys():
                temp = []
Esempio n. 20
0
plt.legend(loc='lower left')
plt.show()
print('LDA transform_support vector machines_training score: ',
      svm.score(X_train_lda, y_train))
print('LDA transform_support vector machines_testing score: ',
      svm.score(X_test_lda, y_test))

#kPCA

gamma_space = np.logspace(-2, 0, 10)
lr_train = []
lr_test = []
svm_train = []
svm_test = []
for gamma in gamma_space:
    kPCA = KernelPCA(n_components=2, kernel='rbf', gamma=gamma)
    X_train_kpca = kPCA.fit_transform(X_train_std, y_train)
    X_test_kpca = kPCA.transform(X_test_std)
    lr = LogisticRegression()
    lr = lr.fit(X_train_kpca, y_train)
    lr_train.append(lr.score(X_train_kpca, y_train))
    lr_test.append(lr.score(X_test_kpca, y_test))
    svm = SVC(kernel='linear', C=1.0, random_state=1)
    svm.fit(X_train_kpca, y_train)
    svm_train.append(svm.score(X_train_kpca, y_train))
    svm_test.append(svm.score(X_test_kpca, y_test))

print("gamma  lr_train  lr_test  svm_train  svm_test")
for i in range(10):
    print('%.3f,   %.3f,   %.3f,    %.3f,    %.3f' % (
        gamma_space[i],
Esempio n. 21
0
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=0)

#Feature Scaling
from sklearn.preprocessing import StandardScaler

sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.fit_transform(X_test)

#Applying Kernal PCA
from sklearn.decomposition import KernelPCA

kpca = KernelPCA(n_components=2, kernel='rbf')
X_train = kpca.fit_transform(X_train)
X_test = kpca.transform(X_test)

#Fitting the logistic regression to the training set
from sklearn.linear_model import LogisticRegression

classifier = LogisticRegression(random_state=0)
classifier.fit(X_train, y_train)

#Predicting the test set results
y_pred = classifier.predict(X_test)

#Making the confusion matrix
from sklearn.metrics import confusion_matrix
X[:, 2] = X[:, 0] * w1 + X[:, 1] * w2 + noise * np.random.randn(m)

X_centered = X - X.mean(axis=0)
U, s, Vt = np.linalg.svd(X_centered)
W2 = Vt.T[:, :2]
X2D = X_centered.dot(W2)

pca = PCA(n_components=2)
x2D = pca.fit_transform(X)
print(x2D[0])

pca1 = PCA(n_components=2, svd_solver="randomized")
x2D1 = pca1.fit_transform(X)
print(x2D1[0])

pca2 = KernelPCA(n_components=2, kernel="sigmoid", gamma=0.04)
x2D2 = pca2.fit_transform(X)
print(x2D2[0])

pca3 = KernelPCA(n_components=2, kernel="rbf", gamma=0.04)
x2D3 = pca2.fit_transform(X)
print(x2D3[0])

pca4 = KernelPCA(n_components=2, kernel="linear")
x2D4 = pca4.fit_transform(X)
print(x2D4[0])

lle = LocallyLinearEmbedding(n_components=2, n_neighbors=10, random_state=40)
x2D5 = lle.fit_transform(X)
print(x2D5[0])
Esempio n. 23
0
    def btnConvert_click(self):
        msgBox = QMessageBox()

        # Batch
        try:
            Batch = np.int32(ui.txtBatch.text())
        except:
            msgBox.setText("Size of batch is wrong!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        if Batch == 0:
            Batch = None

        # Kernel
        Kernel = ui.cbKernel.currentText()
        # Method
        Method = ui.cbMethod.currentText()

        # Gamma
        try:
            Gamma = np.float(ui.txtGamma.text())
        except:
            msgBox.setText("Gamma is wrong!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        # Degree
        try:
            Degree = np.int32(ui.txtDegree.text())
        except:
            msgBox.setText("Degree is wrong!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        # Coef0
        try:
            Coef0 = np.float(ui.txtCoef0.text())
        except:
            msgBox.setText("Coef0 is wrong!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        # Alpha
        try:
            Alpha = np.int32(ui.txtAlpha.text())
        except:
            msgBox.setText("Alpha is wrong!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        # Tol
        try:
            Tol = np.float(ui.txtTole.text())
        except:
            msgBox.setText("Tolerance is wrong!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        # MaxIte
        try:
            MaxIter = np.int32(ui.txtMaxIter.text())
        except:
            msgBox.setText("Maximum number of iterations is wrong!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        if MaxIter <= 0:
            MaxIter = None

        # Number of Job
        try:
            NJob = np.int32(ui.txtJobs.text())
        except:
            msgBox.setText("The number of parallel jobs is wrong!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        if NJob < -1 or NJob == 0:
            msgBox.setText(
                "The number of parallel jobs must be -1 or greater than 0!")
            msgBox.setIcon(QMessageBox.Critical)
            msgBox.setStandardButtons(QMessageBox.Ok)
            msgBox.exec_()
            return False

        TrFoldErr = list()
        TeFoldErr = list()

        try:
            FoldFrom = np.int32(ui.txtFoldFrom.text())
            FoldTo = np.int32(ui.txtFoldTo.text())
        except:
            print("Please check fold parameters!")
            return

        if FoldTo < FoldFrom:
            print("Please check fold parameters!")
            return

        for fold_all in range(FoldFrom, FoldTo + 1):
            # Regularization
            try:
                Regularization = np.float(ui.txtRegularization.text())
            except:
                msgBox.setText("Regularization value is wrong!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False

            # OutFile
            OutFile = ui.txtOutFile.text()
            OutFile = OutFile.replace("$FOLD$", str(fold_all))
            if not len(OutFile):
                msgBox.setText("Please enter out file!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False

            # InFile
            InFile = ui.txtInFile.text()
            InFile = InFile.replace("$FOLD$", str(fold_all))
            if not len(InFile):
                msgBox.setText("Please enter input file!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if not os.path.isfile(InFile):
                msgBox.setText("Input file not found!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False

            InData = io.loadmat(InFile)
            OutData = dict()
            OutData["imgShape"] = InData["imgShape"]

            # Data
            if not len(ui.txtITrData.currentText()):
                msgBox.setText("Please enter Input Train Data variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if not len(ui.txtITeData.currentText()):
                msgBox.setText("Please enter Input Test Data variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if not len(ui.txtOTrData.text()):
                msgBox.setText("Please enter Output Train Data variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if not len(ui.txtOTeData.text()):
                msgBox.setText("Please enter Output Test Data variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False

            try:
                XTr = InData[ui.txtITrData.currentText()]
                XTe = InData[ui.txtITeData.currentText()]

                if ui.cbScale.isChecked() and not ui.rbScale.isChecked():
                    XTr = preprocessing.scale(XTr)
                    XTe = preprocessing.scale(XTe)
                    print("Whole of data is scaled X~N(0,1).")
            except:
                print("Cannot load data")
                return

            # NComponent
            try:
                NumFea = np.int32(ui.txtNumFea.text())
            except:
                msgBox.setText("Number of features is wrong!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if NumFea < 1:
                msgBox.setText("Number of features must be greater than zero!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if NumFea > np.shape(XTr)[1]:
                msgBox.setText("Number of features is wrong!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False

            # Label
            if not len(ui.txtITrLabel.currentText()):
                msgBox.setText("Please enter Train Input Label variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if not len(ui.txtITeLabel.currentText()):
                msgBox.setText("Please enter Test Input Label variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if not len(ui.txtOTrLabel.text()):
                msgBox.setText(
                    "Please enter Train Output Label variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if not len(ui.txtOTeLabel.text()):
                msgBox.setText("Please enter Test Output Label variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            try:
                OutData[ui.txtOTrLabel.text()] = InData[
                    ui.txtITrLabel.currentText()]
                OutData[ui.txtOTeLabel.text()] = InData[
                    ui.txtITeLabel.currentText()]
            except:
                print("Cannot load labels!")

            # Subject
            if not len(ui.txtITrSubject.currentText()):
                msgBox.setText(
                    "Please enter Train Input Subject variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if not len(ui.txtITeSubject.currentText()):
                msgBox.setText(
                    "Please enter Test Input Subject variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if not len(ui.txtOTrSubject.text()):
                msgBox.setText(
                    "Please enter Train Output Subject variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            if not len(ui.txtOTeSubject.text()):
                msgBox.setText(
                    "Please enter Test Output Subject variable name!")
                msgBox.setIcon(QMessageBox.Critical)
                msgBox.setStandardButtons(QMessageBox.Ok)
                msgBox.exec_()
                return False
            try:
                TrSubject = InData[ui.txtITrSubject.currentText()]
                OutData[ui.txtOTrSubject.text()] = TrSubject
                TeSubject = InData[ui.txtITeSubject.currentText()]
                OutData[ui.txtOTeSubject.text()] = TeSubject
            except:
                print("Cannot load Subject IDs")
                return

            # Task
            if ui.cbTask.isChecked():
                if not len(ui.txtITrTask.currentText()):
                    msgBox.setText(
                        "Please enter Input Train Task variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtITeTask.currentText()):
                    msgBox.setText(
                        "Please enter Input Test Task variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTrTask.text()):
                    msgBox.setText(
                        "Please enter Output Train Task variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTeTask.text()):
                    msgBox.setText(
                        "Please enter Output Test Task variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    TrTask = InData[ui.txtITrTask.currentText()]
                    OutData[ui.txtOTrTask.text()] = TrTask
                    TeTask = InData[ui.txtITeTask.currentText()]
                    OutData[ui.txtOTeTask.text()] = TeTask
                    TrTaskIndex = TrTask.copy()
                    for tasindx, tas in enumerate(np.unique(TrTask)):
                        TrTaskIndex[TrTask == tas] = tasindx + 1
                    TeTaskIndex = TeTask.copy()
                    for tasindx, tas in enumerate(np.unique(TeTask)):
                        TeTaskIndex[TeTask == tas] = tasindx + 1
                except:
                    print("Cannot load Tasks!")
                    return

            # Run
            if ui.cbRun.isChecked():
                if not len(ui.txtITrRun.currentText()):
                    msgBox.setText(
                        "Please enter Train Input Run variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtITeRun.currentText()):
                    msgBox.setText(
                        "Please enter Test Input Run variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTrRun.text()):
                    msgBox.setText(
                        "Please enter Train Output Run variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTeRun.text()):
                    msgBox.setText(
                        "Please enter Test Output Run variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    TrRun = InData[ui.txtITrRun.currentText()]
                    OutData[ui.txtOTrRun.text()] = TrRun
                    TeRun = InData[ui.txtITeRun.currentText()]
                    OutData[ui.txtOTeRun.text()] = TeRun
                except:
                    print("Cannot load Runs!")
                    return

            # Counter
            if ui.cbCounter.isChecked():
                if not len(ui.txtITrCounter.currentText()):
                    msgBox.setText(
                        "Please enter Train Input Counter variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtITeCounter.currentText()):
                    msgBox.setText(
                        "Please enter Test Input Counter variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTrCounter.text()):
                    msgBox.setText(
                        "Please enter Train Output Counter variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTeCounter.text()):
                    msgBox.setText(
                        "Please enter Test Output Counter variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    TrCounter = InData[ui.txtITrCounter.currentText()]
                    OutData[ui.txtOTrCounter.text()] = TrCounter
                    TeCounter = InData[ui.txtITeCounter.currentText()]
                    OutData[ui.txtOTeCounter.text()] = TeCounter
                except:
                    print("Cannot load Counters!")
                    return

            # Matrix Label
            if ui.cbmLabel.isChecked():
                if not len(ui.txtITrmLabel.currentText()):
                    msgBox.setText(
                        "Please enter Train Input Matrix Label variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtITemLabel.currentText()):
                    msgBox.setText(
                        "Please enter Test Input Matrix Label variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTrmLabel.text()):
                    msgBox.setText(
                        "Please enter Train Output Matrix Label variable name!"
                    )
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTemLabel.text()):
                    msgBox.setText(
                        "Please enter Test Output Matrix Label variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    OutData[ui.txtOTrmLabel.text()] = InData[
                        ui.txtITrmLabel.currentText()]
                    OutData[ui.txtOTemLabel.text()] = InData[
                        ui.txtITemLabel.currentText()]
                except:
                    print("Cannot load matrix lables!")
                    return

            # Design
            if ui.cbDM.isChecked():
                if not len(ui.txtITrDM.currentText()):
                    msgBox.setText(
                        "Please enter Train Input Design Matrix variable name!"
                    )
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtITeDM.currentText()):
                    msgBox.setText(
                        "Please enter Test Input Design Matrix variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTrDM.text()):
                    msgBox.setText(
                        "Please enter Train Output Design Matrix variable name!"
                    )
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTeDM.text()):
                    msgBox.setText(
                        "Please enter Test Output Design Matrix variable name!"
                    )
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    OutData[ui.txtOTrDM.text()] = InData[
                        ui.txtITrDM.currentText()]
                    OutData[ui.txtOTeDM.text()] = InData[
                        ui.txtITeDM.currentText()]
                except:
                    print("Cannot load design matrices!")
                    return

            # Coordinate
            if ui.cbCol.isChecked():
                if not len(ui.txtCol.currentText()):
                    msgBox.setText("Please enter Coordinator variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOCol.text()):
                    msgBox.setText("Please enter Coordinator variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    OutData[ui.txtOCol.text()] = InData[
                        ui.txtCol.currentText()]
                except:
                    print("Cannot load coordinator!")
                    return

            # Condition
            if ui.cbCond.isChecked():
                if not len(ui.txtCond.currentText()):
                    msgBox.setText("Please enter Condition variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOCond.text()):
                    msgBox.setText("Please enter Condition variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    OutData[ui.txtOCond.text()] = InData[
                        ui.txtCond.currentText()]
                except:
                    print("Cannot load conditions!")
                    return

            # FoldID
            if ui.cbFoldID.isChecked():
                if not len(ui.txtFoldID.currentText()):
                    msgBox.setText("Please enter FoldID variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOFoldID.text()):
                    msgBox.setText("Please enter FoldID variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    OutData[ui.txtOFoldID.text()] = InData[
                        ui.txtFoldID.currentText()]
                except:
                    print("Cannot load Fold ID!")
                    return

            # FoldInfo
            if ui.cbFoldInfo.isChecked():
                if not len(ui.txtFoldInfo.currentText()):
                    msgBox.setText("Please enter FoldInfo variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOFoldInfo.text()):
                    msgBox.setText("Please enter FoldInfo variable name!")
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    OutData[ui.txtOFoldInfo.text()] = InData[
                        ui.txtFoldInfo.currentText()]
                except:
                    print("Cannot load Fold Info!")
                    return
                pass

            # Number of Scan
            if ui.cbNScan.isChecked():
                if not len(ui.txtITrScan.currentText()):
                    msgBox.setText(
                        "Please enter Number of Scan variable name for Input Train!"
                    )
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtITeScan.currentText()):
                    msgBox.setText(
                        "Please enter Number of Scan variable name for Input Test!"
                    )
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTrScan.text()):
                    msgBox.setText(
                        "Please enter Number of Scan variable name for Output Train!"
                    )
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                if not len(ui.txtOTeScan.text()):
                    msgBox.setText(
                        "Please enter Number of Scan variable name for Output Test!"
                    )
                    msgBox.setIcon(QMessageBox.Critical)
                    msgBox.setStandardButtons(QMessageBox.Ok)
                    msgBox.exec_()
                    return False
                try:
                    OutData[ui.txtOTrScan.text()] = InData[
                        ui.txtITrScan.currentText()]
                    OutData[ui.txtOTeScan.text()] = InData[
                        ui.txtITeScan.currentText()]
                except:
                    print("Cannot load NScan!")
                    return

            # Train Analysis Level
            print("Calculating Analysis Level for Training Set ...")
            TrGroupFold = None
            FoldStr = ""
            if ui.cbFSubject.isChecked():
                if not ui.rbFRun.isChecked():
                    TrGroupFold = TrSubject
                    FoldStr = "Subject"
                else:
                    TrGroupFold = np.concatenate((TrSubject, TrRun))
                    FoldStr = "Subject+Run"

            if ui.cbFTask.isChecked():
                TrGroupFold = np.concatenate(
                    (TrGroupFold,
                     TrTaskIndex)) if TrGroupFold is not None else TrTaskIndex
                FoldStr = FoldStr + "+Task"

            if ui.cbFCounter.isChecked():
                TrGroupFold = np.concatenate(
                    (TrGroupFold,
                     TrCounter)) if TrGroupFold is not None else TrCounter
                FoldStr = FoldStr + "+Counter"

            TrGroupFold = np.transpose(TrGroupFold)

            TrUniqFold = np.array(
                list(set(tuple(i) for i in TrGroupFold.tolist())))

            TrFoldIDs = np.arange(len(TrUniqFold)) + 1

            TrListFold = list()
            for gfold in TrGroupFold:
                for ufoldindx, ufold in enumerate(TrUniqFold):
                    if (ufold == gfold).all():
                        currentID = TrFoldIDs[ufoldindx]
                        break
                TrListFold.append(currentID)
            TrListFold = np.int32(TrListFold)
            TrListFoldUniq = np.unique(TrListFold)

            # Test Analysis Level
            print("Calculating Analysis Level for Testing Set ...")
            TeGroupFold = None
            if ui.cbFSubject.isChecked():
                if not ui.rbFRun.isChecked():
                    TeGroupFold = TeSubject
                else:
                    TeGroupFold = np.concatenate((TeSubject, TeRun))

            if ui.cbFTask.isChecked():
                TeGroupFold = np.concatenate(
                    (TeGroupFold,
                     TeTaskIndex)) if TeGroupFold is not None else TeTaskIndex

            if ui.cbFCounter.isChecked():
                TeGroupFold = np.concatenate(
                    (TeGroupFold,
                     TeCounter)) if TeGroupFold is not None else TeCounter

            TeGroupFold = np.transpose(TeGroupFold)

            TeUniqFold = np.array(
                list(set(tuple(i) for i in TeGroupFold.tolist())))

            TeFoldIDs = np.arange(len(TeUniqFold)) + 1

            TeListFold = list()
            for gfold in TeGroupFold:
                for ufoldindx, ufold in enumerate(TeUniqFold):
                    if (ufold == gfold).all():
                        currentID = TeFoldIDs[ufoldindx]
                        break
                TeListFold.append(currentID)
            TeListFold = np.int32(TeListFold)
            TeListFoldUniq = np.unique(TeListFold)

            # Train Partition
            print("Partitioning Training Data ...")
            TrX = list()
            TrShape = None

            if Method == "PCA":
                svdmodel = PCA(n_components=NumFea, copy=False, tol=Tol)
            elif Method == "Kernel PCA":
                svdmodel = KernelPCA(n_components=NumFea,kernel=Kernel,gamma=Gamma,degree=Degree,\
                              coef0=Coef0, alpha=Alpha, tol=Tol, max_iter=MaxIter, n_jobs=NJob,copy_X=False)
            else:
                svdmodel = IncrementalPCA(n_components=NumFea,
                                          copy=False,
                                          batch_size=Batch)

            for foldindx, fold in enumerate(TrListFoldUniq):
                dat = XTr[np.where(TrListFold == fold)]
                if ui.cbScale.isChecked() and ui.rbScale.isChecked():
                    dat = preprocessing.scale(dat)
                    print("Data belong to View " + str(foldindx + 1) +
                          " is scaled X~N(0,1).")

                dat = svdmodel.fit_transform(dat)
                TrX.append(dat)
                if TrShape is None:
                    TrShape = np.shape(dat)
                else:
                    if not (TrShape == np.shape(dat)):
                        print("ERROR: Train, Reshape problem for Fold " +
                              str(foldindx + 1) + ", Shape: " +
                              str(np.shape(dat)))
                        return
                print("Train: View " + str(foldindx + 1) +
                      " is extracted. Shape: " + str(np.shape(dat)))

            print("Training Shape: " + str(np.shape(TrX)))

            # Test Partition
            print("Partitioning Testing Data ...")
            TeX = list()
            TeShape = None
            for foldindx, fold in enumerate(TeListFoldUniq):
                dat = XTe[np.where(TeListFold == fold)]
                if ui.cbScale.isChecked() and ui.rbScale.isChecked():
                    dat = preprocessing.scale(dat)
                    print("Data belong to View " + str(foldindx + 1) +
                          " is scaled X~N(0,1).")

                dat = svdmodel.fit_transform(dat)
                TeX.append(dat)
                if TeShape is None:
                    TeShape = np.shape(dat)
                else:
                    if not (TeShape == np.shape(dat)):
                        print("Test: Reshape problem for Fold " +
                              str(foldindx + 1))
                        return
                print("Test: View " + str(foldindx + 1) + " is extracted.")

            print("Testing Shape: " + str(np.shape(TeX)))

            model = RHA(Dim=NumFea, regularization=Regularization)

            print("Running Hyperalignment on Training Data ...")
            MappedXtr, G = model.train(TrX)

            print("Running Hyperalignment on Testing Data ...")
            MappedXte = model.test(TeX)

            # Train Dot Product
            print("Producting Training Data ...")
            TrHX = None
            TrErr = None
            for foldindx, fold in enumerate(TrListFoldUniq):
                TrErr = TrErr + (
                    G - MappedXtr[foldindx]
                ) if TrErr is not None else G - MappedXtr[foldindx]
                TrHX = np.concatenate(
                    (TrHX, MappedXtr[foldindx]
                     )) if TrHX is not None else MappedXtr[foldindx]
            OutData[ui.txtOTrData.text()] = TrHX
            foldindx = foldindx + 1
            TrErr = TrErr / foldindx
            print("Train: alignment error ", np.linalg.norm(TrErr))
            TrFoldErr.append(np.linalg.norm(TrErr))

            # Train Dot Product
            print("Producting Testing Data ...")
            TeHX = None
            TeErr = None
            for foldindx, fold in enumerate(TeListFoldUniq):
                TeErr = TeErr + (
                    G - MappedXte[foldindx]
                ) if TeErr is not None else G - MappedXte[foldindx]
                TeHX = np.concatenate(
                    (TeHX, MappedXte[foldindx]
                     )) if TeHX is not None else MappedXte[foldindx]
            OutData[ui.txtOTeData.text()] = TeHX
            foldindx = foldindx + 1
            TeErr = TeErr / foldindx
            print("Test: alignment error ", np.linalg.norm(TeErr))
            TeFoldErr.append(np.linalg.norm(TeErr))

            HAParam = dict()
            HAParam["Method"] = Method
            HAParam["Kernel"] = Kernel
            HAParam["Share"] = G
            HAParam["Level"] = FoldStr
            OutData["FunctionalAlignment"] = HAParam

            print("Saving ...")
            io.savemat(OutFile, mdict=OutData)
            print("Fold " + str(fold_all) + " is DONE: " + OutFile)

        print("Training -> Alignment Error: mean " + str(np.mean(TrFoldErr)) +
              " std " + str(np.std(TrFoldErr)))
        print("Testing  -> Alignment Error: mean " + str(np.mean(TeFoldErr)) +
              " std " + str(np.std(TeFoldErr)))
        print("Kernel/SVD Hyperalignment is done.")
        msgBox.setText("Kernel/SVD Hyperalignment is done.")
        msgBox.setIcon(QMessageBox.Information)
        msgBox.setStandardButtons(QMessageBox.Ok)
        msgBox.exec_()
# params for fault detection-KNN with linear pca
fd_knn_linear_pca = {'classifier': FaultDetectionKNN(),
          'preprocessing_method': PCA(),
          'model_name': 'Fault_Detection_KNN_linear_PCA', 
          'sampling_method': None,
          'log_normalize': False, 
          'variables': ['k','alpha', 'n_components'],
          'distributions': ['quniform','uniform', 'quniform'],
          'arguments': [(2,200,1),(0,0.01),(1,139,1)], 
          'variable_type': {'k': 'estimator', 'alpha': 'estimator',
                            'n_components': 'preprocessor'}}

# params for fault detection-KNN with radial PCA
fd_knn_radial_pca = {'classifier': FaultDetectionKNN(),
          'preprocessing_method': KernelPCA(kernel="rbf", eigen_solver = "arpack"),
          'model_name': 'Fault_Detection_KNN_Radial_PCA', 
          'sampling_method': None,
          'log_normalize': False, 
          'variables': ['k','alpha', 'n_components','gamma'],
          'distributions': ['quniform','uniform', 'quniform','loguniform'],
          'arguments': [(2,200,1),(0,0.01),(1,100,1),(1e-6,300)], 
          'variable_type': {'k': 'estimator', 'alpha': 'estimator',
                            'n_components': 'preprocessor', 'gamma': 'preprocessor'}}

# params for adaptive Mahalanobis distance-KNN
mad_knn = {'classifier': MahalanobisDistanceKNN(),
          'preprocessing_method': None,
          'model_name': 'Mahalanobis_Distance_KNN', 
          'sampling_method': None,
          'log_normalize': False, 
Esempio n. 25
0
# _*_ coding: utf-8 _*_
import numpy as np
import matplotlib.pyplot as plt

from sklearn.decomposition import PCA, KernelPCA
from sklearn.datasets import make_circles

np.random.seed(0)

X, y = make_circles(n_samples=400, factor=.3, noise=.05)

kpca = KernelPCA(kernel='rbf', fit_inverse_transform=True, gamma=10)
X_kpca = kpca.fit_transform(X)
X_back = kpca.inverse_transform(X_kpca)
pca = PCA()
X_pca = pca.fit_transform(X)

# plot results
plt.figure()

plt.subplot(2, 2, 1, aspect='equal')
plt.title('Original space')

reds = y == 0
blues = y == 1

plt.scatter(X[reds, 0], X[reds, 1], c='red', s=20, edgecolor='k')
plt.scatter(X[blues, 0], X[blues, 1], c='blue', s=20, edgecolors='k')
plt.xlabel('$x_1$')
plt.ylabel('$x_2$')
Esempio n. 26
0
   time = 6.8710
   audio = audio[phi:int(phi+audio_fs*time)]
   fs = eeg_fs
 
   #This time around we're going to be iterating through all the songs for one patient 
   #instead of all patients for one song and traing i tlike that 
   indices = []
   songs = [] 
   for i in range(len(meta_data)):
       if meta_data[i]['subject'] == 'P01' and meta_data[i]['trial_type']=='perception' :
           indices.append(i)
           songs.append(meta_data[i]['stimulus_id'])
       #%%
   #Dimensionality reduction
   k = 1
   bumble = KernelPCA(n_components = k, kernel='linear') 
   
   ######################################################################
   #                            NN Training
   ######################################################################
   test_audio = 24
   nets = []
   print('Total Trials = around ' + str(len(indices)))
   #Iterating through each patient who has listed to that one song:
   for i, trial in enumerate(indices):
       #Read in audio:
       audio_fs, audio = read_audio(os.path.join(cwd, 'Audio', str(songs[i]) + ".wav"))
       audio = audio[:,0]
       phi = 82500
       time = 6.8710
       audio = audio[phi:int(phi+audio_fs*time)]
ax[1].scatter(x_kpca[y == 1, 0],
              np.zeros((500, 1)) - 0.02,
              marker='^',
              alpha=0.5)

ax[0].set_xlabel('PC 1')
ax[0].set_xlabel('PC 2')
ax[1].set_ylim([-1, 1])
ax[1].set_yticks([])
ax[1].set_xlabel('PC 1')
plt.show()

from sklearn.decomposition import KernelPCA

X, y = make_moons(n_samples=100, random_state=123)
scikit_kpca = KernelPCA(n_components=2, kernel='rbf', gamma=15)
X_skernpca = scikit_kpca.fit_transform(X)

plt.scatter(X_skernpca[y == 0, 0],
            X_skernpca[y == 0, 1],
            color='red',
            marker='^',
            alpha=0.5)
plt.scatter(X_skernpca[y == 1, 0],
            X_skernpca[y == 1, 1],
            color='blue',
            marker='o',
            alpha=0.5)

plt.xlabel('PC1')
plt.ylabel('PC2')
Esempio n. 28
0
    path='../../figs/out/%s/%s/scaled.png' % (scriptname, dataset))

#Initiate KPCAwith various kernels

# As I'm using 500 variables, 0.002 is the default gamma (1/n_variables)
# I only explicitly state it at this point so I can display it on graphs
gamma = 10

kpcas = []

#Use standard PCA for comparison

kpcas.append(('standard ', 'std_', PCA(n_components=2)))

#Linear kernal has no need for gamma
kpcas.append(('Linear K', 'lin_k', KernelPCA(n_components=2, kernel='linear')))
kpcas.append(
    ('RBF K', 'rbf_k', KernelPCA(n_components=2, kernel='rbf', gamma=gamma)))
kpcas.append(('Polynomial K', 'ply_k',
              KernelPCA(n_components=2, kernel='poly', gamma=gamma)))
kpcas.append(('Sigmoid K', 'sig_k',
              KernelPCA(n_components=2, kernel='sigmoid', gamma=gamma)))
kpcas.append(('Cosine K', 'cos_k',
              KernelPCA(n_components=2, kernel='cosine', gamma=gamma)))

for kernel, abbreviation, kpca in kpcas:

    X_kpca = kpca.fit_transform(X_scaled)

    plot_scatter(X_kpca,
                 y,
Esempio n. 29
0
for j in range(1, category):
    sim = np.vstack((sim, sim_n[j]))
#PCA
components = 0.99  #canshu
if PCAflag == 1:
    pca = PCA(n_components=components, svd_solver='full')
    pca.fit(train)
    train_new = pca.transform(train)
    sim_new = pca.transform(sim)
    print('pca.explained_variance_ratio_', pca.explained_variance_ratio_)
    print('sum(pca.explained_variance_ratio_)',
          sum(pca.explained_variance_ratio_))
    print(pca.singular_values_)
else:
    kpca = KernelPCA(n_components=components,
                     kernel="rbf",
                     fit_inverse_transform=True)
    kpca.fit(train)
    train_new = kpca.transform(train)
    sim_new = kpca.transform(sim)
    print('pca.explained_variance_ratio_', pca.explained_variance_ratio_)
    print('sum(pca.explained_variance_ratio_)',
          sum(pca.explained_variance_ratio_))
    print(pca.singular_values_)
print('train.shape', train.shape)
print('train_new.shape', train_new.shape)

if plotflag == 1:
    plt.figure(figsize=(10, 8))
    for i in range(0, category):
        plt.subplot(1, 2, 1)
Esempio n. 30
0
graphList = []
label = []

for model_name in os.listdir(library_folder):
    print('Loading', model_name)
    label.append(model_name.split('_')[2])
    model = cobra.io.read_sbml_model(library_folder + model_name)
    g = modelNet(model)
    graphList.append(g)
print('Done')

kernel = gk.WeisfeilerLehman(base_kernel=gk.VertexHistogram, normalize=True)
K = pd.DataFrame(kernel.fit_transform(graphList))

# 2-D scatterplot
kpca = KernelPCA(kernel="precomputed", n_components=2, n_jobs=-1)
X_kpca = kpca.fit_transform(K)

sns.scatterplot(x=X_kpca[:, 0], y=X_kpca[:, 1], hue=label)

# 3-D scatterplot
kpca = KernelPCA(kernel="precomputed", n_components=3, n_jobs=-1)
X_kpca = kpca.fit_transform(K)

fig = pyplot.figure(figsize=(8, 8))
ax = Axes3D(fig)

# make color label
td = {'old.xml': 'red', 'young.xml': 'blue'}
hue = [td[l] for l in label]