コード例 #1
0
 def run_factorization(self, N, S, X, Z, I, K, k, n):
     # Smart initialization
     rat = k / n
     nans = np.isnan(rat)
     scaled_rat = scale_allelic_ratios(rat)
     ppca = PPCA()
     ppca.fit(data=np.transpose(scaled_rat), d=K, verbose=True)
     U = ppca.C
     V = ppca.transform()
     pickle.dump(ppca, open(self.output_root + '_model', 'wb'))
     np.savetxt(self.output_root + '_temper_U.txt',
                U,
                fmt="%s",
                delimiter='\t')
     np.savetxt(self.output_root + '_temper_V.txt',
                V.T,
                fmt="%s",
                delimiter='\t')
コード例 #2
0
    def remove_nan_test(self):

        N = 101
        k = 23
        p_nan = 0.02
        n_components = 3

        data = np.random.random((N, k))
        for n in range(N):
            for _k in range(k):
                if random.random() < p_nan:
                    data[n, _k] = np.nan

        pca = PPCA()
        pca.fit(data, n_components)

        self.assertEqual(pca.data[np.isnan(pca.data)].shape, (0, ))
        self.assertEqual(pca.C.shape, (k, n_components))
        self.assertEqual(pca.transform().shape, (N, n_components))
コード例 #3
0

for i in range(len(X)):
	Y = np.fromstring(X[i], dtype=int, sep = ' ')
	Y = np.reshape(Y,(48, 48))
	E.append(Y)

X_inp = np.array(E)
#X_train = X_inp.reshape(-1,X_inp.shape[1], X_inp.shape[2],1)
X_train = X_inp.astype('float32')
print X_inp

inp_img = X_train[0,:,:]
ppca = PPCA(inp_img)
ppca.fit(d=20, verbose=False)
component_mat = ppca.transform()
E_y = component_mat.reshape(1,component_mat.shape[0],component_mat.shape[1])

for i in range(1,len(X_train)):
	print i
	inp_img =   X_train[i,:,:]
	ppca = PPCA(inp_img)
	try:
		ppca.fit(d=20, verbose=False)
		component_mat = ppca.transform()
		shape = component_mat.shape
		component_mat = component_mat.reshape(1,component_mat.shape[0],component_mat.shape[1])
		if shape[1] == 20:
			E_y = concatenate((E_y,component_mat))
	except numpy.linalg.linalg.LinAlgError:
		print "Numpy Error"
コード例 #4
0
SelectedImage = showImagesRandomImages(
    3)  #select and image randomly from MNSIT dataset
missingPercentage = 0.2  # missing rate percentage
missingImage = generateMissingFig(
    SelectedImage,
    missingPercentage)  #inserting missing values to the original image

imputer = KNNImputer(n_neighbors=2, weights="uniform")
imputed_by_KNN = imputer.fit_transform(missingImage)
KNNImputed_RMSE = mean_squared_error(SelectedImage, imputed_by_KNN)
#plt.imshow(imputed_by_KNN, cmap='gray', vmin=0, vmax=1)
#plt.show()

imputer = MissForest()
MissForest_imputed = imputer.fit_transform(missingImage)
MissForest_RMSE = mean_squared_error(SelectedImage, MissForest_imputed)
#plt.imshow(MissForest_imputed, cmap='gray', vmin=0, vmax=1)
#plt.show()

imputer = IterativeImputer()
MICE_imputed = imputer.fit_transform(missingImage)
MICE_RMSE = mean_squared_error(SelectedImage, MICE_imputed)
#plt.imshow(MICE_imputed, cmap='gray', vmin=0, vmax=1)
#plt.show()

ppca = PPCA()
ppca.fit(data=SelectedImage, d=100, verbose=True)
PPCA_imputed = ppca.transform(missingImage)
PPCA_RMSE = mean_squared_error(SelectedImage, PPCA_imputed)
#plt.imshow(PPCA_imputed, cmap='gray', vmin=0, vmax=1)
#plt.show()
コード例 #5
0
ファイル: merge.py プロジェクト: ashayaan/recommender_system
# @Date:   2020-07-07 11:22:28
# @Last Modified by:   ashayaan
# @Last Modified time: 2020-07-07 12:44:33

import torch
import pandas as pd 
import numpy as np
import json
from ppca import PPCA
import pickle

if __name__ == '__main__':
	roberta_features = pd.read_csv('../data/roberta.csv')
	roberta_features = roberta_features.set_index('idx')

	mca_features = pd.read_csv('../data/mca.csv')
	mca_features = mca_features.set_index('idx')

	pca_features = pd.read_csv('../data/pca.csv')
	pca_features = pca_features.set_index('idx')

	links = pd.read_csv('../data/ml-20m/links.csv')

	df = pd.concat([roberta_features, mca_features, pca_features], axis=1)
	ppca = PPCA() 
	ppca.fit(data=df.values.astype(float),d=128,verbose=True)
	print(ppca.var_exp)

	transformed = ppca.transform()
	films_dict = dict([(k, torch.tensor(transformed[i]).float()) for k, i in zip(df.index, range(transformed.shape[0]))])
	pickle.dump(films_dict, open('../data//ml20_pca128.pkl', 'wb'))
コード例 #6
0
    plt.show()
    
    print('\n\n\n\n**TEST CALCULATING LIKELIHOOD**')
    ppca1 = PPCA(n_dimension=2)
    loglikelihoods = ppca1.fit(data, method='EM', keep_loglikes=True)
    plt.plot(loglikelihoods)
    plt.show()

    print('\n\n\n\n**TEST DIMENSION REDUCTION AND RECOVERING**')
    plt.matshow(data)
    print('\n\noriginal data')
    plt.show()
    
    ppca3 = PPCA(n_dimension=2)
    ppca3.fit(data, method='EM')
    plt.matshow( ppca3.inverse_transform( ppca3.transform(data) ) )
    print('\n\nrecovered data: 2-component')
    plt.show()
    
    ppca4 = PPCA(n_dimension=2)
    ppca4.fit(data, batchsize=16, n_iteration=2000, method='EM')
    plt.matshow( ppca4.inverse_transform( ppca4.transform(data) ) )
    print('\n\nrecovered data: 2-component (mini-batch)')
    plt.show()
    
    ppca5 = PPCA(n_dimension=63)
    ppca5.fit(data, method='EM')
    plt.matshow( ppca5.inverse_transform( ppca5.transform(data) ) )
    print('\n\nrecovered data: 63-component')
    plt.show()