import sklearn import pandas as pd import matplotlib as plt from fancyimpute import IterativeSVD print("reading data...") X = pd.read_csv("Data/train.csv").iloc[:, 1:-1].as_matrix( ) # remove first row (labels), first and last columns test = pd.read_csv("Data/test.csv").iloc[:, 1:].as_matrix() # ind = np.genfromtxt('Class_change_ind.csv', delimiter = ',', dtype = 'int32') # test_incomplete = test.copy() print X.shape print("setting svd object...") svd = IterativeSVD(rank=1000, convergence_threshold=0.0001) X_svd = svd.complete(X) # print X_svd[:,0] print("saving data...") np.savetxt("Data/train_isvdimp.csv", X_svd, delimiter=",") print("imputing test...") test_svd = svd.complete(test) np.savetxt("Data/test_isvdimp.csv", test_svd, delimiter=",") print '\a'
#reader = Reader(line_format='user item rating', sep='\t') # A reader is still needed but only the rating_scale param is requiered. data.split(n_folds=10) # data can now be used normally data_full = data.build_full_trainset() # obj = IterativeSVD(rank=20, max_iters=700, min_value=limits[0], max_value=limits[1], verbose=True) datamat_filled_SVD_fancy = obj.complete(datamat_missing) obj = SoftImpute(shrinkage_value=None, max_iters=700, max_rank=20, n_power_iterations=1, init_fill_method="zero", min_value=limits[0], max_value=limits[1], normalizer=None, verbose=True) datamat_filled_SOFT_fancy = obj.complete(datamat_missing) obj = NuclearNormMinimization(require_symmetric_solution=False, min_value=limits[0],