Example #1
0
def test_sparse_randomized_pca_inverse():
    """Test that RandomizedPCA is inversible on sparse data"""
    rng = np.random.RandomState(0)
    n, p = 50, 3
    X = rng.randn(n, p)  # spherical data
    X[:, 1] *= 0.00001  # make middle component relatively small
    # no large means because the sparse version of randomized pca does not do
    # centering to avoid breaking the sparsity
    X = csr_matrix(X)

    # same check that we can find the original data from the transformed signal
    # (since the data is almost of rank n_components)
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always", DeprecationWarning)
        pca = RandomizedPCA(n_components=2, random_state=0).fit(X)
        assert_equal(len(w), 1)
        assert_equal(w[0].category, DeprecationWarning)

    Y = pca.transform(X)

    Y_inverse = pca.inverse_transform(Y)
    assert_almost_equal(X.todense(), Y_inverse, decimal=2)

    # same as above with whitening (approximate reconstruction)
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always", DeprecationWarning)
        pca = RandomizedPCA(n_components=2, whiten=True, random_state=0).fit(X)
        assert_equal(len(w), 1)
        assert_equal(w[0].category, DeprecationWarning)

    Y = pca.transform(X)
    Y_inverse = pca.inverse_transform(Y)
    relative_max_delta = (np.abs(X.todense() - Y_inverse) / np.abs(X).mean()).max()
    # XXX: this does not seam to work as expected:
    assert_almost_equal(relative_max_delta, 0.91, decimal=2)
Example #2
0
def test_sparse_randomized_pca_inverse():
    """Test that RandomizedPCA is inversible on sparse data"""
    rng = np.random.RandomState(0)
    n, p = 50, 3
    X = rng.randn(n, p)  # spherical data
    X[:, 1] *= .00001  # make middle component relatively small
    # no large means because the sparse version of randomized pca does not do
    # centering to avoid breaking the sparsity
    X = csr_matrix(X)

    # same check that we can find the original data from the transformed signal
    # (since the data is almost of rank n_components)
    pca = RandomizedPCA(n_components=2, random_state=0)
    assert_warns(DeprecationWarning, pca.fit, X)
    Y = pca.transform(X)

    Y_inverse = pca.inverse_transform(Y)
    assert_almost_equal(X.toarray(), Y_inverse, decimal=2)

    # same as above with whitening (approximate reconstruction)
    pca = assert_warns(
        DeprecationWarning,
        RandomizedPCA(n_components=2, whiten=True, random_state=0).fit, X)
    Y = pca.transform(X)
    Y_inverse = pca.inverse_transform(Y)
    relative_max_delta = (np.abs(X.toarray() - Y_inverse) /
                          np.abs(X.toarray()).mean()).max()
    # XXX: this does not seam to work as expected:
    assert_almost_equal(relative_max_delta, 0.91, decimal=2)
Example #3
0
def open_img():
    x = filedialog.askopenfilenames(
    	parent=root,
    	initialdir='/',
    	initialfile='tmp',
    	filetypes=[
    		("All files", "*")])
    
    img = Image.open(x[0])
    img = img.resize((250, 250), Image.ANTIALIAS)
    img = ImageTk.PhotoImage(img)
    panel = tk.Label(root, image=img)
    panel.image = img
    panel.grid(row=70, column=1)
            
    image = cv2.imread(x[0])
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    cv2.imwrite("grey.jpeg", gray)
    gray.shape
    img = mpimg.imread("grey.jpeg")

    f=compo()
    ipca = RandomizedPCA(f)
    ipca.fit(img)
    img_c = ipca.transform(img)
    print(img_c.shape)
    temp = ipca.inverse_transform(img_c)
    
    print(temp.shape)
    cv2.imwrite("pca1.jpg", temp)
    print(np.sum(ipca.explained_variance_ratio_))
    plt.plot(np.cumsum(ipca.explained_variance_ratio_))
    plt.xlabel('number of components')
    plt.ylabel('cumulative explained variance');
    plt.savefig("graph.jpg")
Example #4
0
def test_randomized_pca_inverse():
    """Test that RandomizedPCA is inversible on dense data"""
    rng = np.random.RandomState(0)
    n, p = 50, 3
    X = rng.randn(n, p)  # spherical data
    X[:, 1] *= 0.00001  # make middle component relatively small
    X += [5, 4, 3]  # make a large mean

    # same check that we can find the original data from the transformed signal
    # (since the data is almost of rank n_components)
    pca = RandomizedPCA(n_components=2, random_state=0).fit(X)
    Y = pca.transform(X)
    Y_inverse = pca.inverse_transform(Y)
    assert_almost_equal(X, Y_inverse, decimal=2)

    # same as above with whitening (approximate reconstruction)
    pca = RandomizedPCA(n_components=2, whiten=True, random_state=0).fit(X)
    Y = pca.transform(X)
    Y_inverse = pca.inverse_transform(Y)
    relative_max_delta = (np.abs(X - Y_inverse) / np.abs(X).mean()).max()
    assert_almost_equal(relative_max_delta, 0.11, decimal=2)
Example #5
0
def test_randomized_pca_inverse():
    """Test that RandomizedPCA is inversible on dense data"""
    rng = np.random.RandomState(0)
    n, p = 50, 3
    X = rng.randn(n, p)  # spherical data
    X[:, 1] *= .00001  # make middle component relatively small
    X += [5, 4, 3]  # make a large mean

    # same check that we can find the original data from the transformed signal
    # (since the data is almost of rank n_components)
    pca = RandomizedPCA(n_components=2, random_state=0).fit(X)
    Y = pca.transform(X)
    Y_inverse = pca.inverse_transform(Y)
    assert_almost_equal(X, Y_inverse, decimal=2)

    # same as above with whitening (approximate reconstruction)
    pca = RandomizedPCA(n_components=2, whiten=True, random_state=0).fit(X)
    Y = pca.transform(X)
    Y_inverse = pca.inverse_transform(Y)
    relative_max_delta = (np.abs(X - Y_inverse) / np.abs(X).mean()).max()
    assert_almost_equal(relative_max_delta, 0.11, decimal=2)
Example #6
0
def gap_statistic(x, random_datasets=64):
    """
    Returns the gap statistic of the data set. Keeps increasing the number of clusters until the maximum gap statistic is more than double the current gap statistic.
    http://blog.echen.me/2011/03/19/counting-clusters/
    """
    assert isinstance(x, np.ndarray)
    assert len(x.shape) == 2

    if x.shape > SETTINGS.GAP_STATISTIC.RANDOMIZED_PCA_THRESHOLD:
        pca = RandomizedPCA(SETTINGS.GAP_STATISTIC.RANDOMIZED_PCA_THRESHOLD)
    else:
        pca = PCA()

    pca.fit(x)
    transformed = pca.transform(x)

    reference_datasets = [
        pca.inverse_transform(generate_random_dataset(transformed))
        for _ in range(random_datasets)
    ]

    max_gap_statistic = -1
    best_num_clusters = 1

    for num_clusters in range(1, x.shape[0] + 1):
        kmeans = MiniBatchKMeans(num_clusters)
        kmeans.fit(x)

        trained_dispersion = dispersion(kmeans, x)

        random_dispersions = [
            dispersion(kmeans, data) for data in reference_datasets
        ]

        gap_statistic = np.log(sum(random_dispersions) /
                               random_datasets) - np.log(trained_dispersion)

        if gap_statistic > max_gap_statistic:
            max_gap_statistic = gap_statistic
            best_num_clusters = num_clusters

        if gap_statistic < max_gap_statistic * SETTINGS.GAP_STATISTIC.MAXIMUM_DECLINE:
            break
        if num_clusters > best_num_clusters + SETTINGS.GAP_STATISTIC.NUM_CLUSTERS_WITHOUT_IMPROVEMENT:
            break

    return best_num_clusters
def callRandomizedPCA(X, n, type):
    # type = 1 for Energy data to avoid 1D plot, 2 for others
    rpca = RandomizedPCA(n_components=n)
    rpca.fit(X)
    transformed = rpca.transform(X)
    print("original shape:   ", X.shape)
    print("transformed shape after Randomized PCA:", transformed.shape)
    X_recons = rpca.inverse_transform(transformed)
    print("reconstruct shape after Randomized PCA:", X_recons.shape)

    if type == 2:  # Gstore data
        myplot(transformed[:, 0:2], np.transpose(rpca.components_[0:2, :]))
        plt.show()
        myplot(X_recons[:, 0:2], np.transpose(rpca.components_[0:2, :]))
        plt.show()

    return transformed
Example #8
0
def gap_statistic(x, random_datasets=64):
    """
    Returns the gap statistic of the data set. Keeps increasing the number of clusters until the maximum gap statistic is more than double the current gap statistic.
    http://blog.echen.me/2011/03/19/counting-clusters/
    """
    assert isinstance(x, np.ndarray)
    assert len(x.shape) == 2

    if x.shape > GAP_STATISTIC.RANDOMIZED_PCA_THRESHOLD:
        pca = RandomizedPCA(GAP_STATISTIC.RANDOMIZED_PCA_THRESHOLD)
    else:
        pca = PCA()

    pca.fit(x)
    transformed = pca.transform(x)

    reference_datasets = [pca.inverse_transform(generate_random_dataset(transformed)) for _ in range(random_datasets)]

    max_gap_statistic = -1
    best_num_clusters = 1

    for num_clusters in range(1, x.shape[0] + 1):
        kmeans = MiniBatchKMeans(num_clusters)
        kmeans.fit(x)

        trained_dispersion = dispersion(kmeans, x)

        random_dispersions = [dispersion(kmeans, data) for data in reference_datasets]

        gap_statistic = np.log(sum(random_dispersions) / random_datasets) - np.log(trained_dispersion)

        if gap_statistic > max_gap_statistic:
            max_gap_statistic = gap_statistic
            best_num_clusters = num_clusters

        if gap_statistic < max_gap_statistic * GAP_STATISTIC.MAXIMUM_DECLINE:
            break
        if num_clusters > best_num_clusters + GAP_STATISTIC.NUM_CLUSTERS_WITHOUT_IMPROVEMENT:
            break

    return best_num_clusters
Example #9
0
def PCA():
    img = mpimg.imread('imagep.png')
    print("Real Image Shape:", img.shape)
    a = img.shape[0]
    b = img.shape[1]
    c = img.shape[2]

    img_r = np.reshape(img, (a, b * c))
    print("Reshaped Image Shape:", img_r.shape)
    ipca = RandomizedPCA(1000).fit(img_r)
    img_c = ipca.transform(img_r)
    print(img_c.shape)
    print(np.sum(ipca.explained_variance_ratio_))
    temp = ipca.inverse_transform(img_c)
    temp = np.reshape(temp, (a, b, c))
    print(temp.shape, a, b, c)

    plt.axis('off')
    plt.imshow(temp)
    #plt.imshow(temp)
    plt.show()
kmeans = KMeans(k=49, n_init=1)
if not os.path.exists(MODEL_NAME):

    print "Training on ", train_set_x.shape
    print "Fitting PCA"

    X_tr = pca.fit_transform(X_tr)
    X_tst = pca.transform(X_tst)

    print "Fitted PCA"
    # Train KMeans on whitened data
    print "Transforming data"
    X_tr_white = pca.fit_transform(X_tr)
    print "Fitting KMEANS"
    kmeans.fit(X_tr_white)
    filters_kmeans = pca.inverse_transform(kmeans.cluster_centers_)
else:
    pca, kmeans = cPickle.load(open(MODEL_NAME, "r"))

import matplotlib.pylab as plt

if Visualise:
    for i, f in enumerate(F):
        plt.subplot(7, 7, i + 1)
        plt.imshow(f.reshape(ImageSideFinal, ImageSideFinal), cmap="gray")
        plt.axis("off")
        plt.show()


    N=min(1000, test_set_x.shape[0])
    x_plt, y_plt, clr_plt = [0]*int(N), [0]*int(N), [0]*int(N)
# range(2,74) means its goes from col 2 to col 73
df_input_data = df_input[list(range(2, 74))]
df_input_target = df_input[list(range(0, 1))]

colors = numpy.random.rand(len(df_input_target))

# Randomized PCA
from sklearn.decomposition import RandomizedPCA
pca = RandomizedPCA(n_components=6) #from optimal pca components chart n_components=6
proj1 = pca.fit_transform(df_input_data)

# Relative weights on features
print pca.explained_variance_ratio_
print pca.components_

# Plotting
mpyplot.figure(1)
p1 = mpyplot.scatter(proj1[:, 0], proj1[:, 1], c=colors)
mpyplot.colorbar(p1)
mpyplot.show(p1)

# Randomized PCA using inverse transform - to make it linear
proj2 = pca.inverse_transform(proj1)

# Plotting
mpyplot.figure(2)
# p1 = mpyplot.scatter(proj1[:, 0], proj1[:, 1], c=colors, alpha=0.2)
p2 = mpyplot.scatter(proj2[:, 0], proj2[:, 1], c=colors, alpha=0.8)
mpyplot.colorbar(p1)
mpyplot.show(p2)
print('reshaping image into 2 dimensions for PCA')
img_r = np.reshape(img, (img.shape[0],img.shape[1]*img.shape[2] )) 
print(img_r.shape)

number_of_components = 64
print('transofming image with'+str(number_of_components)+'number of components')
ipca = RandomizedPCA(number_of_components).fit(img_r)
img_c = ipca.transform(img_r)
print('new shape of image after transformation')
print(img_c.shape)
print('Randomized PCA with 64 components:')
print(np.sum(ipca.explained_variance_ratio_))

print('inversing the transformation back to image')
temp = ipca.inverse_transform(img_c) 
print('reshaping back to three dimension')
temp = np.reshape(temp, (img.shape[0],img.shape[1],img.shape[2])) 
print(temp.shape)

m = interp1d([temp.min(),temp.max()],[0,1])
print('rescaling image this may take some time....')
for i in range(temp.shape[0]):
    for j in range(temp.shape[1]):
        for k in range(temp.shape[2]):
            temp[i][j][k] = float(m(temp[i][j][k]))


fig = plt.figure()
plt.axis('off') 
plt.imshow(temp)
Example #13
0
plotImageGrid(X[sample_patches_ind, ...],
              image_size=(patch_size, patch_size, 3), nrow=6, ncol=6)
#plt.savefig('patches16.png')
plt.show()

#perform whitening
# 590 components = 99% explained varience
pca = RandomizedPCA(n_components=590, whiten=True, random_state=seed)
w_X = pca.fit_transform(X)

print("==== PCA fitted =====")
print("variance explained:")
print(pca.explained_variance_ratio_)

#plot whitened patches after inverse transform
orig_X = pca.inverse_transform(w_X[sample_patches_ind, ...])
plotImageGrid(orig_X, image_size=(patch_size, patch_size, 3), nrow=6, ncol=6)
#plt.savefig('patches_whitened16.png')
plt.show()

###KMEANS
k_means = cluster.KMeans(n_clusters=50, n_jobs=3)
k_means.fit(X)
print("==== K-Means fitted ====")


# get centroids and transform them to original space
#tmp = pca.inverse_transform(k_means.cluster_centers_.copy())
tmp = k_means.cluster_centers_.copy()
plotImageGrid(tmp, image_size=(16, 16, 3), nrow=5, ncol=10 )
plt.savefig('centroids_nw.png')
Example #14
0
              cmap='bone')
plt.show()

# In image above, you should could check, that at first the principal components are the base face structure, then we moved to recognize face features like nose, eyes, mouth, etc

# Let's find out the cumulative some of the variance to determine how much PCs suitable for our case
plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.xlabel('Number of components: ')
plt.ylabel('Cumulative explained variance: ')
plt.show()

# From the plot above, we can safely assume that using 150 PCs alreaed retrieve ~90% of our variances
# Let's see it by comparing the original image with image using only 150PCs
pca = RandomizedPCA(150).fit(faces.data)
components = pca.transform(faces.data)
pca_faces = pca.inverse_transform(components)

# Plot the results
fig, ax = plt.subplots(2,
                       10,
                       figsize=(10, 2.5),
                       subplot_kw={
                           'xticks': [],
                           'yticks': []
                       },
                       gridspec_kw=dict(hspace=0.1, wspace=0.1))

for i in range(10):
    dimensionH = faces.images.shape[1]
    dimensionW = faces.images.shape[2]
    ax[0, i].imshow(faces.data[i].reshape(dimensionH, dimensionW),
for i, ax in enumerate(axes.flat):
    ax.imshow(pca.components_[i].reshape(62, 47), cmap='bone')

plt.show()

plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.xlabel('number of components')
plt.ylabel('cumulative explained variance')
plt.show()

# compute the components and projected faces

pca = RandomizedPCA(150).fit(faces.data)
components = pca.transform(faces.data)
projected = pca.inverse_transform(components)

# plot the results

fig,ax=plt.subplots(2,10,figsize=(10,2.5),subplot_kw={'xticks':[],'yticks':[]},\
gridspec_kw=dict(hspace=0.1,wspace=0.1))

for i in range(10):
    ax[0, i].imshow(faces.data[i].reshape(62, 47), cmap='binary_r')
    ax[1, i].imshow(projected[i].reshape(62, 47), cmap='binary_r')

ax[0, 0].set_ylabel('full-dim\ninput')
ax[1, 0].set_ylabel('150-dim\nreconstruction')

plt.show()
Example #16
0
resolution = 50

alpha_X, beta_Y = np.meshgrid(
    np.linspace(
        np.min(path[:, 0]) - 1.0,
        np.max(path[:, 0]) + 1.0, resolution),
    np.linspace(
        np.min(path[:, 1]) - 1.0,
        np.max(path[:, 1]) + 1.0, resolution))

Esurface = np.zeros((resolution, resolution))

for alpha_idx in xrange(resolution):
    for beta_idx in xrange(resolution):
        alpha, beta = alpha_X[alpha_idx, beta_idx], beta_Y[alpha_idx, beta_idx]
        sigma = pca.inverse_transform(np.asarray((alpha, beta)))
        Esurface[alpha_idx, beta_idx] = run_net(net, sigma, ds,
                                                gradient_postproc)

errors = [
    run_net(net,
            pca.inverse_transform(path[epoch]),
            ds,
            gradient_postproc,
            learn=False) for epoch in xrange(epochs)
]

fig = plt.figure()
ax = fig.gca(projection='3d')
ax.plot_surface(alpha_X,
                beta_Y,
    pfd_data = pfddata(os.path.join(input_dir, fn))
    nbins = 64
    data = pfd_data.getdata(subbands=nbins)
    print("processing %s" % fn)

    fig = plt.figure()
    fig.set_size_inches(5, 5)
    ax = plt.Axes(fig, [0., 0., 1., 1.])
    ax = plt.gca()
    ax.set_axis_off()
    fig.add_axes(ax)

    pca = PCA(n_components=24)
    rd = data.reshape(nbins, int(data.shape[0] / nbins))
    pca.fit(rd)
    data = pca.inverse_transform(pca.transform(rd)).flatten()
    data = data.reshape((nbins, data.shape[0] / nbins))
    plt.imshow(data,
               origin='lower',
               interpolation='bilinear',
               cmap=plt.cm.gray_r
               )  #aspect='auto'plt.cm.Greys) #interpolation='bilinear'
    plt.savefig("%s_subbands.png" % os.path.join(output_dir, fn))

    intervals_data = pfd_data.getdata(intervals=64)

    fig = plt.figure()
    fig.set_size_inches(5, 5)
    ax = plt.Axes(fig, [0., 0., 1., 1.])
    ax = plt.gca()
    ax.set_axis_off()
Example #18
0
        betweenss[kIdx] / totss * 100,
        marker='o',
        markersize=12,
        markeredgewidth=2,
        markeredgecolor='r',
        markerfacecolor='None')
ax.set_ylim((0, 100))
plt.grid(True)
plt.xlabel('Number of clusters')
plt.ylabel('Percentage of variance explained (%)')
plt.title('Elbow for KMeans clustering')

# show centroids for K=10 clusters
plt.figure()
for i in range(kIdx + 1):
    img = pca.inverse_transform(centroids[kIdx][i]).reshape(8, 8)
    ax = plt.subplot(3, 4, i + 1)
    ax.set_xticks([])
    ax.set_yticks([])
    plt.imshow(img, cmap=cm.gray)
    plt.title('Cluster %d' % i)

# compare K=10 clustering vs. actual digits (PCA projections)
fig = plt.figure()
ax = fig.add_subplot(121)
for i in range(10):
    ind = (t == i)
    ax.scatter(X[ind, 0],
               X[ind, 1],
               s=35,
               c=clr[i],