Example #1
0
def whitening(data, dim_reduction='',
              npc=None, explainedVar=1.0):

    """
    routine to perform whitening prior to Infomax ICA application
    (whitening is based on Principal Component Analysis from the
    RandomizedPCA package from sklearn.decomposition)

        Parameters
        ----------
        X : data array [ntsl, nchan] for decomposition.
        dim_reduction : {'', 'AIC', 'BIC', 'GAP', 'MDL', 'MIBS', 'explVar'}
            Method for dimension selection. For further information about
            the methods please check the script 'dimension_selection.py'.
            default: dim_reduction='' --> no dimension reduction is performed as
                                          long as not the parameter 'npc' is set.
        npc : int | None
            The number of components used for PCA decomposition. If None, no
            dimension reduction will be applied and max_pca_components will equal
            the number of channels supplied on decomposing data. Only of interest
            when dim_reduction=''
            default: npc = None
        explainedVar : float | None
            Must be between 0 and 1. If float, the number of components
            selected matches the number of components with a cumulative
            explained variance of 'explainedVar'
            default: explainedVar = None

        Returns
        -------
        whitened_data : data array [nchan, ntsl] of decomposed sources
        ica : instance of ICA
            Returns the instance of ICA where all information about the
            PCA decomposition are updated.
        sel : array containing the indices of the selected ICs
            (depends on the variable npc)
    """

    # -------------------------------------------
    # import necessary modules
    # -------------------------------------------
    from sklearn.decomposition import RandomizedPCA
    import dimension_selection as dim_sel


    # -------------------------------------------
    # check input data
    # -------------------------------------------
    ntsl, nchan = data.shape

    if (nchan < 2) or (ntsl < nchan):
        raise ValueError('Data size too small!')


    # -------------------------------------------
    # perform PCA decomposition
    # -------------------------------------------
    X = data.copy()
    whiten = False
    dmean = X.mean(axis=0)
    stddev = np.std(X, axis=0)
    X = (X - dmean[np.newaxis, :]) / stddev[np.newaxis, :]

    pca = RandomizedPCA(n_components=None, whiten=whiten,
                        copy=True)

    # -------------------------------------------
    # perform whitening
    # -------------------------------------------
    whitened_data = pca.fit_transform(X)


    # -------------------------------------------
    # update PCA structure
    # -------------------------------------------
    pca.mean_ = dmean
    pca.stddev_ = stddev

    # -------------------------------------------
    # check dimension selection
    # -------------------------------------------
    if dim_reduction == 'AIC':
        npc, _ = dim_sel.aic_mdl(pca.explained_variance_)
    elif dim_reduction == 'BIC':
        npc = dim_sel.mibs(pca.explained_variance_, ntsl,
                           use_bic=True)
    elif dim_reduction == 'GAP':
        npc = dim_sel.gap(pca.explained_variance_)
    elif dim_reduction == 'MDL':
        _, npc = dim_sel.aic_mdl(pca.explained_variance_)
    elif dim_reduction == 'MIBS':
        npc = dim_sel.mibs(pca.explained_variance_, ntsl,
                           use_bic=False)
    elif dim_reduction == 'explVar':
        # compute explained variance manually
        explained_variance_ratio_ = pca.explained_variance_
        explained_variance_ratio_ /= explained_variance_ratio_.sum()
        npc = np.sum(explained_variance_ratio_.cumsum() <= explainedVar)
    elif npc is None:
        npc = nchan

    # return results
    return whitened_data[:, :(npc+1)], pca
Example #2
0
            color='w',
            zorder=10)
plt.title('Kmeans clustering on Pima dataset after ICA\n'
          'Centroids are marked with white cross')
plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)
plt.xticks(())
plt.yticks(())
file_name = 'Plots/Kmeans Pima after ICA ' + str(n_components) + '.png'
fig.savefig(file_name)
plt.close()

##############################################################################
# Visualize the results on RP-reduced data

reduced_data = RandomizedPCA(n_components=2).fit_transform(data)
kmeans = KMeans(init="random", n_clusters=n_components, n_init=10)
kmeans.fit(reduced_data)

# Step size of the mesh. Decrease to increase the quality of the VQ.
h = .02  # point in the mesh [x_min, m_max]x[y_min, y_max].

# Plot the decision boundary. For that, we will assign a color to each
x_min, x_max = reduced_data[:, 0].min(), reduced_data[:, 0].max()
y_min, y_max = reduced_data[:, 1].min(), reduced_data[:, 1].max()
print(x_min, x_max, y_min, y_max)
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = kmeans.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
fig = plt.figure()
plt.clf()
Example #3
0
                cells_per_block=self.cells_per_block,
            )
            result.append(features)
        return np.array(result)


MODELS = {
    'linearsvc': (
        LinearSVC(),
        {
            'C': [0.01, 0.1, 1.0]
        },
    ),
    'linearsvc-pca': (
        Pipeline([
            ('pca', RandomizedPCA(n_components=100, whiten=True)),
            ('clf', LinearSVC(C=1.0)),
        ]),
        {
            'pca__n_components': [10, 30, 100],
            'clf__C': [0.01, 0.1, 1.0]
        },
    ),
    'linearsvc-hog': (
        Pipeline([
            ('hog',
             HOGFeatures(
                 orientations=8,
                 pixels_per_cell=(4, 4),
                 cells_per_block=(3, 3),
             )),
Example #4
0
 def fit_deprecated(X):
     global Y
     rpca = RandomizedPCA(random_state=0)
     Y = rpca.fit_transform(X)
Example #5
0
def build_SVC(face_profile_data, face_profile_name_index, face_dim):
    """
    Build the SVM classification modle using the face_profile_data matrix (numOfFace X numOfPixel) and face_profile_name_index array, face_dim is a tuple of the dimension of each image(h,w) Returns the SVM classification modle
    Parameters
    ----------
    face_profile_data : ndarray (number_of_images_in_face_profiles, width * height of the image)
        The pca that contains the top eigenvectors extracted using approximated Singular Value Decomposition of the data

    face_profile_name_index : ndarray
        The name corresponding to the face profile is encoded in its index

    face_dim : tuple (int, int)
        The dimension of the face data is reshaped to

    Returns
    -------
    clf : theano object
        The trained SVM classification model

    pca : theano ojbect
        The pca that contains the top 150 eigenvectors extracted using approximated Singular Value Decomposition of the data

    """

    X = face_profile_data
    y = face_profile_name_index

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

    # Compute a PCA (eigenfaces) on the face dataset (treated as unlabeled
    # dataset): unsupervised feature extraction / dimensionality reduction
    n_components = 10 # maximum number of components to keep

    print("\nExtracting the top %d eigenfaces from %d faces" % (n_components, X_train.shape[0]))

    pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train)
    eigenfaces = pca.components_.reshape((n_components, face_dim[0], face_dim[1]))

    # This portion of the code is used if the data is scarce, it uses the number 
    # of imputs as the number of features
    # pca = RandomizedPCA(n_components=None, whiten=True).fit(X_train)
    # eigenfaces = pca.components_.reshape((pca.components_.shape[0], face_dim[0], face_dim[1]))

    print("\nProjecting the input data on the eigenfaces orthonormal basis")
    X_train_pca = pca.transform(X_train)
    X_test_pca = pca.transform(X_test) 

    # Train a SVM classification model

    print("\nFitting the classifier to the training set")
    param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
                  'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
    # clf = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)

    # Best Estimator found using Radial Basis Function Kernal:
    clf = SVC(C=1000.0, cache_size=200, class_weight='balanced', coef0=0.0,
  decision_function_shape=None, degree=3, gamma=0.0001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
    # Train_pca with Alex Test Error Rate:  0.088424437299
    # Train_pca with Alex Test Recognition Rate:  0.911575562701

    clf = clf.fit(X_train_pca, y_train)
    # print("\nBest estimator found by grid search:")
    # print(clf.best_estimator_)

    ###############################################################################
    # Quantitative evaluation of the model quality on the test set
    print("\nPredicting people's names on the test set")
    t0 = time()
    y_pred = clf.predict(X_test_pca)
    print("\nPrediction took %s per sample on average" % ((time() - t0)/y_pred.shape[0]*1.0))

    # print "predicated names: ", y_pred
    # print "actual names: ", y_test
    error_rate = errorRate(y_pred, y_test)
    print ("\nTest Error Rate: %0.4f %%" % (error_rate * 100))
    print ("Test Recognition Rate: %0.4f %%" % ((1.0 - error_rate) * 100))

    return clf, pca
Example #6
0
    X_meg = preprocessing.scale(good_data['fmri'][g][idx, :])
    X_fmri = preprocessing.scale(good_data['meg'][g][idx, :])
    kernel = kernels.LinearKernel()
    cca = kcca.KCCA(kernel, kernel, regularization=1e-5, decomp='full',
                    method='kettering_method', scaler1=lambda x: x,
                    scaler2=lambda x: x)
    cca = cca.fit(X_fmri, X_meg)
    X_fmri2, X_meg2 = cca.transform(X_fmri, X_meg)

    score, yf, ym = [], [], []
    cnt = 1
    pl.figure()
    for c in ncomps:
        # X = np.hstack([X_fmri2[:, :c], X_meg2[:, :c]])
        X = np.hstack([X_fmri, X_meg])
        X = RandomizedPCA(n_components=c, whiten=True).fit_transform(X)

        Y = lda.LDA(n_components=2).fit_transform(X, y)
        score.append(get_score(Y, colors))

        pl.subplot(nrows, ncols, cnt)
        pl.scatter(Y[:, 0], Y[:, 1], color=list(colors))
        pl.xticks([])
        pl.yticks([])
        pl.title('%d: %f' % (c, score[-1]))
        cnt += 1

        X = RandomizedPCA(n_components=c, whiten=True).fit_transform(X_fmri)
        Y = lda.LDA(n_components=2).fit_transform(X, y)
        yf.append(get_score(Y, colors))
Example #7
0
from sklearn.decomposition import RandomizedPCA
import nimfa
import argparse

import CP_APR
import sptensor
import sptenmat

# Load the original data
filename = 'data/hf-tensor-level1-data.dat'
X = sptensor.loadTensor(filename)
R = 40
iters = 70
samples = 10

pcaModel = RandomizedPCA(n_components=R)
stats = np.zeros((1, 6))

parser = argparse.ArgumentParser()
parser.add_argument("pat", type=int, help="number of patients")
args = parser.parse_args()
pn = args.pat

patList = np.arange(pn)
ix = np.in1d(X.subs[:, 0].ravel(), patList)
idx = np.where(ix)[0]
xprime = sptensor.sptensor(X.subs[idx, :], X.vals[idx],
                           [pn, X.shape[1], X.shape[2]])
flatX = sptenmat.sptenmat(xprime,
                          [0]).tocsrmat()  # matricize along the first mode
stats = np.zeros((1, 6))
def recognize_faces(n_components=150, min_faces_per_person=70, svm_c=[1e3, 5e3, 1e4, 5e4, 1e5], svm_gamma=[0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], resize=0.4, plot_histogram=True, show_face_gallery=False):

    # Display progress logs on stdout
    logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')


    ###############################################################################
    # Download the data, if not already on disk and load it as numpy arrays

    lfw_people = fetch_lfw_people(min_faces_per_person=min_faces_per_person, resize=resize)

    # introspect the images arrays to find the shapes (for plotting)
    n_samples, h, w = lfw_people.images.shape

    # for machine learning we use the 2 data directly (as relative pixel
    # positions info is ignored by this model)
    X = lfw_people.data
    n_features = X.shape[1]

    # the label to predict is the id of the person
    y = lfw_people.target
    target_names = lfw_people.target_names
    n_classes = target_names.shape[0]

    print("Total dataset size:")
    print("n_samples: %d" % n_samples)
    print("n_features: %d" % n_features)
    print("n_classes: %d" % n_classes)


    ###############################################################################
    # Split into a training set and a test set using a stratified k fold

    # split into a training and testing set
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.25)


    ###############################################################################
    # Compute a PCA (eigenfaces) on the face dataset (treated as unlabeled
    # dataset): unsupervised feature extraction / dimensionality reduction

    print("Extracting the top %d eigenfaces from %d faces"
        % (n_components, X_train.shape[0]))
    t0 = time()
    pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train)

    cov_matrix = np.dot(X.T, X) / n_samples
    for eigenvector in pca.components_:
        print(np.dot(eigenvector.T, np.dot(cov_matrix, eigenvector)))

    print("done in %0.3fs" % (time() - t0))

    eigenfaces = pca.components_.reshape((n_components, h, w))
    print(eigenfaces)
    print("Projecting the input data on the eigenfaces orthonormal basis")
    t0 = time()
    X_train_pca = pca.transform(X_train)
    X_test_pca = pca.transform(X_test)
    print("done in %0.3fs" % (time() - t0))


    ###############################################################################
    # Train a SVM classification model

    print("Fitting the classifier to the training set")
    t0 = time()
    param_grid = {'C': svm_c,
                'gamma': svm_gamma, }
    clf = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
    clf = clf.fit(X_train_pca, y_train)
    print("done in %0.3fs" % (time() - t0))
    print("Best estimator found by grid search:")
    print(clf.best_estimator_)


    ###############################################################################
    # Quantitative evaluation of the model quality on the test set

    print("Predicting people's names on the test set")
    t0 = time()
    y_pred = clf.predict(X_test_pca)
    end_time = time() - t0
    print("done in {}s".format(end_time))

    print(classification_report(y_test, y_pred, target_names=target_names))
    print(confusion_matrix(y_test, y_pred, labels=range(n_classes)))

    # Return errors
    return mean_squared_error(y_test, y_pred, multioutput='uniform_average'), r2_score(y_test,y_pred)


    if show_face_gallery:
        prediction_titles = [title(y_pred, y_test, target_names, i)
                        for i in range(y_pred.shape[0])]
        plot_gallery(X_test, prediction_titles, h, w)

        # plot the gallery of the most significative eigenfaces

        eigenface_titles = ["eigenface %d" % i for i in range(eigenfaces.shape[0])]
        plot_gallery(eigenfaces, eigenface_titles, h, w)
Example #9
0
    def pca_linear_initialization(self, data):
        """
        We initialize the map, just by using the first two first eigen vals and
        eigenvectors
        Further, we create a linear combination of them in the new map by
        giving values from -1 to 1 in each

        X = UsigmaWT
        XTX = Wsigma^2WT
        T = XW = Usigma

        // Transformed by W EigenVector, can be calculated by multiplication
        // PC matrix by eigenval too
        // Further, we can get lower ranks by using just few of the eigen
        // vevtors

        T(2) = U(2)sigma(2) = XW(2) ---> 2 is the number of selected
        eigenvectors

        (*) Note that 'X' is the covariance matrix of original data

        :param data: data to use for the initialization
        :returns: initialized matrix with same dimension as input data
        """
        cols = self.mapsize[1]
        coord = None
        pca_components = None

        if np.min(self.mapsize) > 1:
            coord = np.zeros((self.nnodes, 2))
            pca_components = 2

            for i in range(0, self.nnodes):
                coord[i, 0] = int(i / cols)  # x
                coord[i, 1] = int(i % cols)  # y

        elif np.min(self.mapsize) == 1:
            coord = np.zeros((self.nnodes, 1))
            pca_components = 1

            for i in range(0, self.nnodes):
                coord[i, 0] = int(i % cols)  # y

        mx = np.max(coord, axis=0)
        mn = np.min(coord, axis=0)
        coord = (coord - mn)/(mx-mn)
        coord = (coord - .5)*2
        me = np.mean(data, 0)
        data = (data - me)
        tmp_matrix = np.tile(me, (self.nnodes, 1))

        # Randomized PCA is scalable
        pca = RandomizedPCA(n_components=pca_components)
        pca.fit(data)
        eigvec = pca.components_
        eigval = pca.explained_variance_
        norms = np.sqrt(np.einsum('ij,ij->i', eigvec, eigvec))
        eigvec = ((eigvec.T/norms)*eigval).T

        for j in range(self.nnodes):
            for i in range(eigvec.shape[0]):
                tmp_matrix[j, :] = tmp_matrix[j, :] + coord[j, i]*eigvec[i, :]

        self.matrix = np.around(tmp_matrix, decimals=6)
        self.initialized = True
Example #10
0
def do_RandomizedPCA(armadillo):
    pca = RandomizedPCA(n_components=2)
    pca.fit(armadillo)
    return pca.transform(armadillo)
def reduce_dems(X_train):
    rpca = RandomizedPCA(n_components=2)
    return rpca.fit_transform(X_train)
Example #12
0
def pca(embeddings, n=2):
    trained_pca = RandomizedPCA(n_components=n, random_state=2101991)
    return trained_pca.fit_transform(embeddings)
Example #13
0
from sklearn.datasets import fetch_lfw_people
from sklearn.decomposition import PCA as RandomizedPCA
import matplotlib.pyplot as plt
import numpy as np

faces = fetch_lfw_people(min_faces_per_person=60)

print(faces.target_names)
print(faces.images.shape)  ##1348 images of 62 x 47 pixels each
n_samples, h, w = faces.images.shape
print(n_samples)

n_components = 150
pca = RandomizedPCA(
    n_components=n_components,
    svd_solver='randomized')  ##Randomized PCA for the the first 150 components
x_proj = pca.fit_transform(faces.data)

#Reconstruction
x_inv_proj = pca.inverse_transform(x_proj)
x_proj_img = np.reshape(x_inv_proj, (1348, 62, 47))

#The first 24 reconstructed images
fig, axes = plt.subplots(3,
                         8,
                         figsize=(9, 4),
                         subplot_kw={
                             'xticks': [],
                             'yticks': []
                         },
Example #14
0
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
from sklearn.decomposition import PCA
from data_preprocess import *
# from sklearn.decomposition import RandomizedPCA
from sklearn.decomposition import PCA as RandomizedPCA
img = mpimg.imread('./train/train_1.bmp')
# print(img.shape)
# plt.axis('off')
# plt.imshow(img)

img_r = np.reshape(img, (1, 1213650))
print(img_r.shape)

ipca = RandomizedPCA(1).fit(img_r)
img_c = ipca.transform(img_r)
print(img_c.shape)
print(np.sum(ipca.explained_variance_ratio_))
temp = ipca.inverse_transform(img_c)
print(temp.shape)

plt.axis('off')
plt.imshow(temp)
Example #15
0
plt.title('Pre-stimulus and first blue light stimulus') 'd need aboout 15 components to retain 90% of the variance

pca_2comp= PCA(n_components=2)
pca_transform= pca_2comp.fit_transform(feat_merge_scaled)
pca_2comp.explained_variance_ratio_ # array([0.54912074, 0.06706042, 0.05229013, 0.04254121, 0.03191877])

plt.plot(pca_transform[0:19,0],pca_transform[0:19,1], 'o', markersize=7, color='tab:brown', alpha=0.8, label='pre-sti')
plt.plot(pca_transform[19:38,0], pca_transform[19:38,1], '^', markersize=7, color='tab:blue', alpha=0.8, label='blue-light')
plt.xlabel('PC1 (54.9%)')
plt.ylabel('PC2 (6.7%)')
plt.legend(loc='upper left')
plt.title('PCA(significant features)')

#array([0.53962999, 0.07110877]) so 40% information will be lost by projecting the data to a 2-dimensional data

PCA = RandomizedPCA(n_components=15, svd_solver='randomized', whiten=True).fit(feat_merge_scaled)
components= PCA.transform(feat_merge_scaled)
projected_pca= PCA.inverse_transform(components)
PCA.explained_variance_ratio_
 


#%% for pre-stimulus, bluelight and post-stimulus
feat_merge_scaled_all= StandardScaler().fit_transform(feat_merge_all_1)
#Check the normalized data  #shape= (38, 479)

pca_all=PCA().fit(feat_merge_scaled_all)
plt.plot(np.cumsum(pca_all.explained_variance_ratio_))
plt.xlabel('number of components')
plt.ylabel('cumulative explained variance')
plt.title('Pre-stimulus, first blue light stimulus and post-stimulus') 
Example #16
0
import numpy as np

from sklearn.cross_validation import train_test_split
from sklearn.decomposition import RandomizedPCA
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline, make_union
from sklearn.preprocessing import FunctionTransformer, PolynomialFeatures

# NOTE: Make sure that the class is labeled 'class' in the data file
tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1)
training_features, testing_features, training_classes, testing_classes = \
    train_test_split(features, tpot_data['class'], random_state=42)

exported_pipeline = make_pipeline(
    PolynomialFeatures(degree=2, include_bias=False, interaction_only=False),
    RandomizedPCA(iterated_power=10),
    LogisticRegression(C=0.49, dual=False, penalty="l1")
)

exported_pipeline.fit(training_features, training_classes)
results = exported_pipeline.predict(testing_features)
    ax_i.imshow(faces.images[i], cmap="bone")
    ax_i.set(xticks=[], yticks=[], xlabel=faces.target_names[faces.target[i]])

# In[11]:

62 * 47

# In[12]:

from sklearn.svm import SVC
from sklearn.decomposition import RandomizedPCA
from sklearn.pipeline import make_pipeline

# In[13]:

pca = RandomizedPCA(n_components=150, whiten=True, random_state=42)
svc = SVC(kernel="rbf", class_weight="balanced")
model = make_pipeline(pca, svc)

# In[14]:

from sklearn.cross_validation import train_test_split

# In[15]:

Xtrain, Xtest, Ytrain, Ytest = train_test_split(faces.data,
                                                faces.target,
                                                random_state=42)

# In[16]:
train_data, answer_data = read_csv(r'/train_data1_256_128_zoomed.csv')

#print train_data[0]

#Limit
#train_data = train_data[0:10]
#answer_data = answer_data[0:10]

#------------------------------------------------------------------------------
#Get the eigenfaces
n_components = 20  #How many eigenfaces - Higher number gives better result? n_components <= w*h (size of the image) or smaples?
h = 18
w = 8

#Extracting the top n_components eigenfaces from total number of faces, maximum 10 in this case becuse the image is not a square?
pca = RandomizedPCA(n_components=n_components, whiten=True)

pca.fit(train_data)

#print len(pca.components_),len(pca.components_[0]) #From n_components up to 10,h*w

eigenfaces = pca.components_.reshape(
    (len(pca.components_), h, w))  #Get the components with maximum variance

print 'Eigenfaces finished!'

#------------------------------------------------------------------------------
#Plot
eigenface = eigenfaces[0]
#Reverse
eigenface = eigenface[::-1]
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

from sklearn.datasets import fetch_lfw_people
faces = fetch_lfw_people(min_faces_per_person=60)
print(faces.target_names)
print(faces.images.shape)

from sklearn.decomposition import RandomizedPCA
pca = RandomizedPCA(150)
pca.fit(faces.data)

fig, axes = plt.subplots(3,
                         8,
                         figsize=(9, 4),
                         subplot_kw={
                             'xticks': [],
                             'yticks': []
                         },
                         gridspec_kw=dict(hspace=0.1, wspace=0.1))
for i, ax in enumerate(axes.flat):
    ax.imshow(pca.components_[i].reshape(62, 47), cmap='bone')

plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.xlabel('number of components')
plt.ylabel('cumulative explained variance')

# Compute the components and projected faces
pca = RandomizedPCA(150).fit(faces.data)
Example #20
0
def lininit(self):
    #X = UsigmaWT
    #XTX = Wsigma^2WT
    #T = XW = Usigma #Transformed by W EigenVector, can be calculated by
    #multiplication PC matrix by eigenval too
    #Furthe, we can get lower ranks by using just few of the eigen vevtors
    #T(2) = U(2)sigma(2) = XW(2) ---> 2 is the number of selected eigenvectors
    # This is how we initialize the map, just by using the first two first eigen vals and eigenvectors
    # Further, we create a linear combination of them in the new map by giving values from -1 to 1 in each
    #Direction of SOM map
    # it shoud be noted that here, X is the covariance matrix of original data

    msize = getattr(self, 'mapsize')
    rows = msize[0]
    cols = msize[1]
    nnodes = getattr(self, 'nnodes')

    if np.min(msize) > 1:
        coord = np.zeros((nnodes, 2))
        for i in range(0, nnodes):
            coord[i, 0] = int(i / cols)  #x
            coord[i, 1] = int(i % cols)  #y
        mx = np.max(coord, axis=0)
        mn = np.min(coord, axis=0)
        coord = (coord - mn) / (mx - mn)
        coord = (coord - .5) * 2
        data = getattr(self, 'data')
        me = np.mean(data, 0)
        data = (data - me)
        codebook = np.tile(me, (nnodes, 1))
        pca = RandomizedPCA(n_components=2)  #Randomized PCA is scalable
        #pca = PCA(n_components=2)
        pca.fit(data)
        eigvec = pca.components_
        eigval = pca.explained_variance_
        norms = np.sqrt(np.einsum('ij,ij->i', eigvec, eigvec))
        eigvec = ((eigvec.T / norms) * eigval).T
        eigvec.shape

        for j in range(nnodes):
            for i in range(eigvec.shape[0]):
                codebook[j, :] = codebook[j, :] + coord[j, i] * eigvec[i, :]
        return np.around(codebook, decimals=6)
    elif np.min(msize) == 1:
        coord = np.zeros((nnodes, 1))
        for i in range(0, nnodes):
            #coord[i,0] = int(i/cols) #x
            coord[i, 0] = int(i % cols)  #y
        mx = np.max(coord, axis=0)
        mn = np.min(coord, axis=0)
        #print coord

        coord = (coord - mn) / (mx - mn)
        coord = (coord - .5) * 2
        #print coord
        data = getattr(self, 'data')
        me = np.mean(data, 0)
        data = (data - me)
        codebook = np.tile(me, (nnodes, 1))
        pca = RandomizedPCA(n_components=1)  #Randomized PCA is scalable
        #pca = PCA(n_components=2)
        pca.fit(data)
        eigvec = pca.components_
        eigval = pca.explained_variance_
        norms = np.sqrt(np.einsum('ij,ij->i', eigvec, eigvec))
        eigvec = ((eigvec.T / norms) * eigval).T
        eigvec.shape

        for j in range(nnodes):
            for i in range(eigvec.shape[0]):
                codebook[j, :] = codebook[j, :] + coord[j, i] * eigvec[i, :]
        return np.around(codebook, decimals=6)
Example #21
0
def SVM(X, y):

    X_train2, X_test, y_train2, y_test = cross_validation.train_test_split(X, y, test_size=TRAIN_TEST_SPLIT_RATIO)


    # X_train = np.asarray(X_train)
    # classifier = svm.SVC(kernel='precomputed')
    # kernel_train = np.dot(X_train, X_train.T)  # linear kernel
    # classifier.fit(kernel_train, y_train)
    # print("-----------")

    # #Testing
    # from sklearn.metrics import accuracy_score
    # from sklearn.metrics import confusion_matrix
    # kernel_test = np.dot(X_test, X_train.T)
    # y_pred = classifier.predict(kernel_test)
    # print("t_pred", y_pred)
    # print("t_test", y_test)
    # print accuracy_score(y_test, y_pred)
    # print("======",1,"========")

    # X_train2 = X_train
    # y_train2 = y_train
    # for image in range(len(X_train)):
    #     X_reverse = np.fliplr(X_train[image].reshape(32, 32)).ravel()
    #     X_train = np.append(X_train, [X_reverse], axis=0)

    # y_train = np.append(y_train, y_train)

    # classifier1 = svm.SVC(kernel='poly', degree = 2)
    # classifier1.fit(X_train, y_train)
    # print("====== poly 2 large ========")
    # print('TRAIN SCORE', classifier1.score(X_train, y_train))
    # print('TEST SCORE', classifier1.score(X_test, y_test))


    # classifier2 = svm.SVC(kernel='poly', degree = 3)
    # classifier2.fit(X_train, y_train)
    # print("====== poly 3 large ========")
    # print('TRAIN SCORE', classifier2.score(X_train, y_train))
    # print('TEST SCORE', classifier2.score(X_test, y_test))

    # classifier3 = svm.SVC(kernel='poly', degree = 3)
    # classifier3.fit(X_train2, y_train2)
    # print("====== poly 3 small ========")
    # print('TRAIN SCORE', classifier3.score(X_train2, y_train2))
    # print('TEST SCORE', classifier3.score(X_test, y_test))

    # X_train2 = equalize_hist(X_train2)

    # classifier1 = svm.SVC(kernel='poly', degree = 2)
    # classifier1.fit(X_train2, y_train2)
    # print("====== poly 2 small round 1 ========")
    # print('TRAIN SCORE', classifier1.score(X_train2, y_train2))
    # print('TEST SCORE', classifier1.score(X_test, y_test))


    preprocessing.robust_scale(X_train2, axis=1, with_centering = True)
    preprocessing.robust_scale(X_test, axis=1,  with_centering = True)
    preprocessing.normalize(X_train2)
    preprocessing.normalize(X_test)



    classifier2 = svm.SVC(kernel='poly', degree = 2)
    classifier2.fit(X_train2, y_train2)
    print("====== poly 2 small 2========")
    print('TRAIN SCORE', classifier2.score(X_train2, y_train2))
    print('TEST SCORE', classifier2.score(X_test, y_test))

    # X_train2 -= np.mean(X_train2, axis=1)[:, np.newaxis];
    # X_train2 /= np.sqrt(np.var(X_train2, axis=1) + 0.01)[:, np.newaxis];


    # classifier4 = svm.SVC(kernel='poly', degree = 2)
    # classifier4.fit(X_train2, y_train2)
    # print("====== poly 2 small 3 ========")
    # print('TRAIN SCORE', classifier4.score(X_train2, y_train2))
    # print('TEST SCORE', classifier4.score(X_test, y_test))


    # n_components = 10

    # print("Extracting the top %d eigenfaces from %d faces"
    #       % (n_components, X_train2.shape[0]))
    # pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train2)


    # print("Projecting the input data on the eigenfaces orthonormal basis")
    # X_train_pca = pca.transform(X_train2)
    # X_test_pca = pca.transform(X_test)
    # print("done ")


    # param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
    #           'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
    # classifier11 = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
    # classifier11.fit(X_train_pca, y_train2)

    # print("====== PCA 10 ========")
    # print('TRAIN SCORE', classifier11.score(X_train_pca, y_train2))
    # print('TEST SCORE', classifier11.score(X_test_pca, y_test))

    

    # n_components = 50

    # print("Extracting the top %d eigenfaces from %d faces"
    #       % (n_components, X_train2.shape[0]))
    # pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train2)


    # print("Projecting the input data on the eigenfaces orthonormal basis")
    # X_train_pca = pca.transform(X_train2)
    # X_test_pca = pca.transform(X_test)
    # print("done ")


    # param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
    #           'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
    # classifier12 = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
    # classifier12.fit(X_train_pca, y_train2)

    # print("====== PCA 50 ========")
    # print('TRAIN SCORE', classifier12.score(X_train_pca, y_train2))
    # print('TEST SCORE', classifier12.score(X_test_pca, y_test))




    

    # n_components = 100

    # print("Extracting the top %d eigenfaces from %d faces"
    #       % (n_components, X_train2.shape[0]))
    # pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train2)


    # print("Projecting the input data on the eigenfaces orthonormal basis")
    # X_train_pca = pca.transform(X_train2)
    # X_test_pca = pca.transform(X_test)
    # print("done ")


    # param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
    #           'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
    # classifier13 = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
    # classifier13.fit(X_train_pca, y_train2)

    # print("====== PCA 100 ========")
    # print('TRAIN SCORE', classifier13.score(X_train_pca, y_train2))
    # print('TEST SCORE', classifier13.score(X_test_pca, y_test))


    n_components = 120

    print("Extracting the top %d eigenfaces from %d faces"
          % (n_components, X_train2.shape[0]))
    pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train2)


    print("Projecting the input data on the eigenfaces orthonormal basis")
    X_train_pca = pca.transform(X_train2)
    X_test_pca = pca.transform(X_test)
    print("done ")


    param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
              'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
    classifier13 = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
    classifier13.fit(X_train_pca, y_train2)

    print("====== PCA 120 ========")
    print('TRAIN SCORE', classifier13.score(X_train_pca, y_train2))
    print('TEST SCORE', classifier13.score(X_test_pca, y_test))

    n_components = 122

    print("Extracting the top %d eigenfaces from %d faces"
          % (n_components, X_train2.shape[0]))
    pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train2)


    print("Projecting the input data on the eigenfaces orthonormal basis")
    X_train_pca = pca.transform(X_train2)
    X_test_pca = pca.transform(X_test)
    print("done ")


    param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
              'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
    classifier13 = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
    classifier13.fit(X_train_pca, y_train2)

    print("====== PCA 122 ========")
    print('TRAIN SCORE', classifier13.score(X_train_pca, y_train2))
    print('TEST SCORE', classifier13.score(X_test_pca, y_test))

    n_components = 130

    print("Extracting the top %d eigenfaces from %d faces"
          % (n_components, X_train2.shape[0]))
    pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train2)


    print("Projecting the input data on the eigenfaces orthonormal basis")
    X_train_pca = pca.transform(X_train2)
    X_test_pca = pca.transform(X_test)
    print("done ")


    param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
              'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
    classifier13 = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
    classifier13.fit(X_train_pca, y_train2)

    print("====== PCA 130 ========")
    print('TRAIN SCORE', classifier13.score(X_train_pca, y_train2))
    print('TEST SCORE', classifier13.score(X_test_pca, y_test))

    # n_components = 135

    # print("Extracting the top %d eigenfaces from %d faces"
    #       % (n_components, X_train2.shape[0]))
    # pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train2)


    # print("Projecting the input data on the eigenfaces orthonormal basis")
    # X_train_pca = pca.transform(X_train2)
    # X_test_pca = pca.transform(X_test)
    # print("done ")


    # param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
    #           'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
    # classifier13 = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
    # classifier13.fit(X_train_pca, y_train2)

    # print("====== PCA 135 ========")
    # print('TRAIN SCORE', classifier13.score(X_train_pca, y_train2))
    # print('TEST SCORE', classifier13.score(X_test_pca, y_test))


    n_components = 147

    print("Extracting the top %d eigenfaces from %d faces"
          % (n_components, X_train2.shape[0]))
    pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train2)


    print("Projecting the input data on the eigenfaces orthonormal basis")
    X_train_pca = pca.transform(X_train2)
    X_test_pca = pca.transform(X_test)
    print("done ")


    param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
              'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
    classifier13 = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
    classifier13.fit(X_train_pca, y_train2)

    print("====== PCA 147 ========")
    print('TRAIN SCORE', classifier13.score(X_train_pca, y_train2))
    print('TEST SCORE', classifier13.score(X_test_pca, y_test))

    n_components = 150

    print("Extracting the top %d eigenfaces from %d faces"
          % (n_components, X_train2.shape[0]))
    pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train2)


    print("Projecting the input data on the eigenfaces orthonormal basis")
    X_train_pca = pca.transform(X_train2)
    X_test_pca = pca.transform(X_test)
    print("done ")



    param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
              'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
    classifier13 = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
    classifier13.fit(X_train_pca, y_train2)

    print("====== PCA 150 ========")
    print('TRAIN SCORE', classifier13.score(X_train_pca, y_train2))
    print('TEST SCORE', classifier13.score(X_test_pca, y_test))



    n_components = 160

    print("Extracting the top %d eigenfaces from %d faces"
          % (n_components, X_train2.shape[0]))
    pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train2)


    print("Projecting the input data on the eigenfaces orthonormal basis")
    X_train_pca = pca.transform(X_train2)
    X_test_pca = pca.transform(X_test)
    print("done ")


    param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
              'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
    classifier13 = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
    classifier13.fit(X_train_pca, y_train2)

    print("====== PCA 160 ========")
    print('TRAIN SCORE', classifier13.score(X_train_pca, y_train2))
    print('TEST SCORE', classifier13.score(X_test_pca, y_test))
Example #22
0
    e3 /= max_iter
    return e0, e1, e2, e3

print "Computing eigenfaces"
_, gt, _ = mem.cache(svd)(data)
gt.sort()
gt = gt[::-1]

print "computing ..."
e0, e1, e2, e3 = mem.cache(compute_all)(data, max_iter=1, verbose=True)

pca = PCA(n_components=100)
pca.fit(data)
p0 = pca.explained_variance_

rpca = RandomizedPCA(n_components=100, iterated_power=1)
rpca.fit(data)
p1 = rpca.explained_variance_

rpca = RandomizedPCA(n_components=100, iterated_power=2)
rpca.fit(data)
p2 = rpca.explained_variance_

rpca = RandomizedPCA(n_components=100, iterated_power=3)
rpca.fit(data)
p3 = rpca.explained_variance_

draw_plots([gt[:100], e0[:100], e1[:100], e2[:100], e3[:100]],
           legend=('grountruth', 'q=0', 'q=1', 'q=2', 'q=3'))
draw_plots([error(gt[:100], e0[:100]),
           error(gt[:100], e1[:100]),
Example #23
0
def doPCA(data, dimensions=2):
    from sklearn.decomposition import RandomizedPCA
    model = RandomizedPCA(n_components=dimensions)
    model.fit(data)
    return model
Example #24
0
# Create an array with flattened images X
# and an array with ID of the people on each image y
X = np.zeros([NUM_TRAINIMAGES, IMG_RES], dtype='int8')
y = []

# Populate training array with flattened imags from subfolders of train_faces and names
c = 0
for x, folder in enumerate(folders):
    train_faces = glob.glob(folder + '/*')
    for i, face in enumerate(train_faces):
        X[c, :] = prepare_image(face)
        y.append(ID_from_filename(face))
        c = c + 1

# perform principal component analysis on the images
pca = RandomizedPCA(n_components=NUM_EIGENFACES, whiten=True).fit(X)
X_pca = pca.transform(X)

# load test faces (usually one), located in folder test_faces
test_faces = glob.glob('test_faces/*')

# Create an array with flattened images X
X = np.zeros([len(test_faces), IMG_RES], dtype='int8')

# Populate test array with flattened imags from subfolders of train_faces
for i, face in enumerate(test_faces):
    X[i, :] = prepare_image(face)

# run through test images (usually one)
for j, ref_pca in enumerate(pca.transform(X)):
    distances = []
Example #25
0
def test_SVM(face_profile_data, face_profile_name_index, face_dim, face_profile_names):
    """
    Testing: Build the SVM classification modle using the face_profile_data matrix (numOfFace X numOfPixel) and face_profile_name_index array, face_dim is a tuple of the dimension of each image(h,w) Returns the SVM classification modle
    Parameters
    ----------
    face_profile_data : ndarray (number_of_images_in_face_profiles, width * height of the image)
        The pca that contains the top eigenvectors extracted using approximated Singular Value Decomposition of the data

    face_profile_name_index : ndarray
        The name corresponding to the face profile is encoded in its index

    face_dim : tuple (int, int)
        The dimension of the face data is reshaped to

    face_profile_names: ndarray
        The names corresponding to the face profiles
    Returns
    -------
    clf : theano object
        The trained SVM classification model

    pca : theano ojbect
        The pca that contains the top 150 eigenvectors extracted using approximated Singular Value Decomposition of the data

    """
    X = face_profile_data
    y = face_profile_name_index

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

    # Compute a PCA (eigenfaces) on the face dataset (treated as unlabeled
    # dataset): unsupervised feature extraction / dimensionality reduction
    n_components = 150 # maximum number of components to keep

    print("\nExtracting the top %d eigenfaces from %d faces" % (n_components, X_train.shape[0]))

    pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train)
    eigenfaces = pca.components_.reshape((n_components, face_dim[0], face_dim[1]))

    # This portion of the code is used if the data is scarce, it uses the number 
    # of imputs as the number of features
    # pca = RandomizedPCA(n_components=None, whiten=True).fit(X_train)
    # eigenfaces = pca.components_.reshape((pca.components_.shape[0], face_dim[0], face_dim[1]))

    print("\nProjecting the input data on the eigenfaces orthonormal basis")
    X_train_pca = pca.transform(X_train)
    X_test_pca = pca.transform(X_test) 

    # Train a SVM classification model

    print("\nFitting the classifier to the training set")
    param_grid = {'C': [1e3, 5e3, 1e4, 5e4, 1e5],
                  'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], }
    # clf = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid)
    # Train_pca Test Error Rate:  0.0670016750419
    # Train_pca Test Recognition Rate:  0.932998324958



    # clf = SVC(kernel='linear', C=1)
    # 2452  samples from  38  people are loaded
    # Extracting the top 150 eigenfaces from 1839 faces
    # Extracting the top 150 eigenfaces from 1790 faces
    # Train_pca Test Error Rate:  0.0904522613065
    # Train_pca Test Recognition Rate:  0.909547738693

    # clf = SVC(kernel='poly')
    # Train_pca Test Error Rate:  0.201005025126
    # Train_pca Test Recognition Rate:  0.798994974874

    # clf = SVC(kernel='sigmoid')
    # Train_pca Test Error Rate:  0.985318107667
    # Train_pca Test Recognition Rate:  0.0146818923328
    

    # clf = SVC(kernel='rbf').fit(X_train, y_train)
    # Train_pca Test Error Rate:  0.0619765494137
    # Train_pca Test Recognition Rate:  0.938023450586



    # Best Estimator found using Radial Basis Function Kernal:
    clf = SVC(C=1000.0, cache_size=200, class_weight='balanced', coef0=0.0,
  decision_function_shape=None, degree=3, gamma=0.0001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
    # Train_pca with Alex Test Error Rate:  0.088424437299
    # Train_pca with Alex Test Recognition Rate:  0.911575562701

    clf = clf.fit(X_train_pca, y_train)
    # print("\nBest estimator found by grid search:")
    # print(clf.best_estimator_)

    ###############################################################################
    # Quantitative evaluation of the model quality on the test set
    print("\nPredicting people's names on the test set")
    t0 = time()
    y_pred = clf.predict(X_test_pca)
    print("\nPrediction took %0.8f second per sample on average" % ((time() - t0)/y_pred.shape[0]*1.0))

    # print "predicated names: ", y_pred
    # print "actual names: ", y_test
    error_rate = errorRate(y_pred, y_test)
    print ("\nTest Error Rate: %0.4f %%" % (error_rate * 100))
    print ("Test Recognition Rate: %0.4f %%" % ((1.0 - error_rate) * 100))

    ###############################################################################
    # Testing

    # X_test_pic1 = X_test[0]
    # X_test_pic1_for_display = np.reshape(X_test_pic1, face_dim)

    # t0 = time()
    # pic1_pred_name = predict(clf, pca, X_test_pic1, face_profile_names)
    # print("\nPrediction took %0.3fs" % (time() - t0))
    # print "\nPredicated result for picture_1 name: ", pic1_pred_name
    # for i in range(1,3): print ("\n")

    # Display the picture
    # plt.figure(1)
    # plt.title(pic1_pred_name)
    # plt.subplot(111)
    # plt.imshow(X_test_pic1_for_display)
    # plt.show()


    ###############################################################################
    # Qualitative evaluation of the predictions using matplotlib
    # import matplotlib.pyplot as plt

    # def plot_gallery(images, titles, face_dim, n_row=3, n_col=4):
    #     """Helper function to plot a gallery of portraits"""
    #     plt.figure(figsize=(1.8 * n_col, 2.4 * n_row))
    #     plt.subplots_adjust(bottom=0, left=.01, right=.99, top=.90, hspace=.35)
    #     for i in range(n_row * n_col):
    #         plt.subplot(n_row, n_col, i + 1)
    #         plt.imshow(images[i].reshape(face_dim), cmap=plt.cm.gray)
    #         plt.title(titles[i], size=12)
    #         plt.xticks(())
    #         plt.yticks(())


    # # plot the result of the prediction on a portion of the test set

    # def title(y_pred, y_test, face_profile_names, i):
    #     pred_name = face_profile_names[y_pred[i]].rsplit(' ', 1)[-1]
    #     true_name = face_profile_names[y_test[i]].rsplit(' ', 1)[-1]
    #     return 'predicted: %s\ntrue:      %s' % (pred_name, true_name)

    # prediction_titles = [title(y_pred, y_test, face_profile_names, i)
    #                      for i in range(y_pred.shape[0])]

    # plot_gallery(X_test, prediction_titles, face_dim)

    # # plot the gallery of the most significative eigenfaces

    # eigenface_titles = ["eigenface %d" % i for i in range(eigenfaces.shape[0])]
    # plot_gallery(eigenfaces, eigenface_titles, face_dim)

    # plt.show()


    return clf, pca
import numpy as np

from sklearn.cross_validation import train_test_split
from sklearn.decomposition import RandomizedPCA
from sklearn.ensemble import AdaBoostClassifier, VotingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.pipeline import make_pipeline, make_union
from sklearn.preprocessing import FunctionTransformer, RobustScaler

# NOTE: Make sure that the class is labeled 'class' in the data file
tpot_data = np.recfromcsv('PATH/TO/DATA/FILE',
                          delimiter='COLUMN_SEPARATOR',
                          dtype=np.float64)
features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1),
                     tpot_data.dtype.names.index('class'),
                     axis=1)
training_features, testing_features, training_classes, testing_classes = \
    train_test_split(features, tpot_data['class'], random_state=42)

exported_pipeline = make_pipeline(
    RobustScaler(), RandomizedPCA(iterated_power=10),
    make_union(VotingClassifier([("est", GaussianNB())]),
               FunctionTransformer(lambda X: X)),
    AdaBoostClassifier(learning_rate=0.0001, n_estimators=500))

exported_pipeline.fit(training_features, training_classes)
results = exported_pipeline.predict(testing_features)
 mapping_targets = np.hstack(np.array(mapping_targets))

 first_half = np.hstack(Data[0:no_mappings/2,:,:,:])
 first_half = np.vstack(first_half)
 second_half = np.hstack(Data[no_mappings/2:no_mappings,:,:,:])
 second_half = np.vstack(second_half)
 Data = np.vstack([first_half,second_half])
 # for true targets uncomment next line
 targets = np.hstack([targets,targets])

 #for random targets uncomment next line
 #targets = np.random.randint(1,no_locations+1,no_mappings*no_locations*no_thwacks)


 lda = LDA(n_components=14)
 pca = RandomizedPCA(n_components = 125)
 classifier =  KNeighborsClassifier(8)
 proj = pca.fit_transform(Data)
 proj = lda.fit_transform(proj,targets)
 proj1 = pca.fit_transform(Data)
 proj1 = lda.fit_transform(proj1,mapping_targets)
 print(file)
 plt.clf()
 plt.scatter(proj[0:proj.shape[0]/2,0],proj[0:proj.shape[0]/2,1],c=targets[0:targets.shape[0]/2])
 plt.title(file.rsplit('_')[0]+'_'+file.rsplit('_')[1]+" Before "+file.rsplit('_')[2]+" injection")
 plt.colorbar()
 plt.ylabel("LD1")
 plt.xlabel("LD2")
 plt.savefig(file.rsplit('_')[0]+'_'+file.rsplit('_')[1]+" Before "+file.rsplit('_')[2]+file[-11:-4]+" injection.svg")
 plt.show()
 plt.clf()
	fig = plt.figure()
	ax = p3.Axes3D(fig)
	ax.view_init(7, -80)
	for l in np.unique(label):
	    ax.plot3D(X[label == l, 0], X[label == l, 1], X[label == l, 2],
	              'o', color=plt.cm.jet(np.float(l) / np.max(label + 1)))
	plt.title('Without connectivity constraints (time %.2fs)' % elapsed_time)
	plt.savefig("dendogram.png")

if __name__ == '__main__':
	articles, y = load_data(br_ts, mo_ts, pt_ts, True)
	# n_labels = len(np.unique(y))
	# reduced_data, svd =  extract_features(articles,1, False, True, n_labels)
	# data =  extract_features(articles, 1, False, False)

	# bench_k_means(KMeans(init='k-means++', n_clusters=n_labels, n_init=10), name="k-means++", data=data, labels=y)
	# bench_k_means(KMeans(init='random', n_clusters=n_labels, n_init=10), name="random", data=data, labels=y)
	# visualize_kclusters(data, n_labels, y)

	# svd = TruncatedSVD(2)
	# normalizer = Normalizer(copy=False)
	# lsa = make_pipeline(svd, normalizer)
	# lsa = Pipeline([('svd',svd),('normalizer',normalizer)])
	# reduced_data = lsa.fit_transform(data)
	plot_projection(RandomizedPCA(n_components=2), articles, "PCA projection of articles")
	# plot_projection(lsa, articles, "LSA projection of articles", 2)

	# plot_hierarchical(data, n_labels)

	# affinity(articles[:2000], y) 
Example #29
0
# Split into a training and testing set
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.25,
                                                    random_state=42)

###############################################################################
# Compute a PCA (eigenfaces) on the face dataset (treated as unlabeled
# dataset): unsupervised feature extraction / dimensionality reduction
n_components = 150
#n_components = 250

print "Extracting the top %d eigenfaces from %d faces" % (n_components,
                                                          X_train.shape[0])
t0 = time()
pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train)
print "done in %0.3fs" % (time() - t0)

print "PCA component 1 weight:", pca.explained_variance_ratio_[0]
print "PCA component 2 weight:", pca.explained_variance_ratio_[1]
eigenfaces = pca.components_.reshape((n_components, h, w))

print "Projecting the input data on the eigenfaces orthonormal basis"
t0 = time()
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)
print "done in %0.3fs" % (time() - t0)

###############################################################################
# Train a SVM classification model
Example #30
0
print('processing images...')
print('(this takes a long time if you have a lot of images)')
raw_data = [(process_file(filename),'like',filename) for filename in like_files] + \
           [(process_file(filename),'dislike',filename) for filename in dislike_files]

# randomly order the data
#seed(0)
shuffle(raw_data)

# pull out the features and the labels
data = np.array([cd for (cd, _y, f) in raw_data])
labels = np.array([_y for (cd, _y, f) in raw_data])

print('finding principal components...')
pca = RandomizedPCA(n_components=N_COMPONENTS, random_state=0)
X = pca.fit_transform(data)
y = [1 if label == 'dislike' else 0 for label in labels]

zipped = zip(X, raw_data)
likes = [x[0] for x in zipped if x[1][1] == "like"]
dislikes = [x[0] for x in zipped if x[1][1] == "dislike"]

likesByComponent = zip(*likes)
dislikesByComponent = zip(*dislikes)
allByComponent = zip(*X)

printComponentStatistics()

createEigendressPictures()