import sklearn
import pandas as pd
import matplotlib as plt
from fancyimpute import IterativeSVD

print("reading data...")

X = pd.read_csv("Data/train.csv").iloc[:, 1:-1].as_matrix(
)  # remove first row (labels), first and last columns
test = pd.read_csv("Data/test.csv").iloc[:, 1:].as_matrix()
# ind = np.genfromtxt('Class_change_ind.csv', delimiter = ',', dtype = 'int32')

# test_incomplete = test.copy()

print X.shape

print("setting svd object...")
svd = IterativeSVD(rank=1000, convergence_threshold=0.0001)

X_svd = svd.complete(X)
# print X_svd[:,0]

print("saving data...")
np.savetxt("Data/train_isvdimp.csv", X_svd, delimiter=",")

print("imputing test...")
test_svd = svd.complete(test)

np.savetxt("Data/test_isvdimp.csv", test_svd, delimiter=",")
print '\a'
Beispiel #2
0
#reader = Reader(line_format='user item rating', sep='\t')

# A reader is still needed but only the rating_scale param is requiered.

data.split(n_folds=10)  # data can now be used normally

data_full = data.build_full_trainset()

#
obj = IterativeSVD(rank=20,
                   max_iters=700,
                   min_value=limits[0],
                   max_value=limits[1],
                   verbose=True)

datamat_filled_SVD_fancy = obj.complete(datamat_missing)

obj = SoftImpute(shrinkage_value=None,
                 max_iters=700,
                 max_rank=20,
                 n_power_iterations=1,
                 init_fill_method="zero",
                 min_value=limits[0],
                 max_value=limits[1],
                 normalizer=None,
                 verbose=True)

datamat_filled_SOFT_fancy = obj.complete(datamat_missing)

obj = NuclearNormMinimization(require_symmetric_solution=False,
                              min_value=limits[0],