def GetImputedDataframe(self, df, impute_type='mean'): df = df.copy() # impute missing values if impute_type == 'mean': null_sum = df.replace('?', np.nan).isnull().sum() null_col = [k for k, v in null_sum.iteritems() if v != 0] for each_col_ind, each_col in enumerate(self.col_names): if each_col in null_col and self.col_types[ each_col_ind] != 'str': df[each_col] = mean_impute( df, each_col, data_type=self.col_types[each_col_ind]) elif impute_type == 'nnm': df = df.replace('?', np.nan) df = pd.DataFrame(NuclearNormMinimization().complete(df), columns=self.col_names) else: raise Exception() return df
def netPred(self, method='mf', dim=100, alpha=0.1): ''' supported methods: mf, cf, mnmf, fancy_nnm, fancy_soft ''' if method == 'mf': model = NMF(n_components=dim, alpha=alpha, l1_ratio=0.2) W = model.fit_transform(self.mat) H = model.components_ self.pred = np.matmul(W, H) elif method == 'cf': model = implicit.als.AlternatingLeastSquares(factors=dim, regularization=alpha) model.fit(self.mat) self.pred = np.matmul(model.item_factors, model.user_factors.T) elif method == 'mnmf': self.pred = mnmf(self.mat, dim, alpha) elif 'fancy' in method: X = self.mat.toarray().astype(np.float) X[X == 0] = np.nan if 'nnm' in method: self.pred = NuclearNormMinimization( error_tolerance=0.01).complete(X) elif 'soft' in method: self.pred = SoftImpute().complete(X)
"f_2551", "f_2552", "f_2553", "f_2554", "f_2555", "f_2556", "f_2557", "f_2558", "f_2559", "f_2560", "f_2561", "f_2562", "f_2563", "f_2564", "f_2565", "f_2566", "f_2567", "f_2568", "f_2569", "f_2570", "f_2571", "f_2572", "f_2573", "f_2574", "f_2575", "f_2576", "f_2577", "f_2578", "f_2579", "f_2580", "f_2581", "f_2582", "f_2583", "f_2584", "f_2585", "f_2586", "f_2587", "f_2588", "f_2589", "f_2590", "f_2591", "f_2592", "f_2593", "f_2594", "f_2595", "f_2596", "f_2597", "f_2598", "f_2599", "label" ] if True: data = read_csv(open('train.csv', 'r'), na_values='').as_matrix() X1 = data[:, 1:-1] # input features Y1 = data[:, -1].astype('int') # input features X1 = NuclearNormMinimization(min_value=0.0, max_value=1.0).complete(X1) train = np.concatenate((X1, np.reshape(Y1, (-1, 1))), axis=1) pd.DataFrame(train).to_csv('train_nnm.csv', header=lst) print('Train done:', train.shape, data.shape) data = read_csv(open('test.csv', 'r'), na_values='').as_matrix() X2 = data[:, 1:] # features train = X1.shape[0] X = np.concatenate((X1, X2)) del X1, X2 X_net = NuclearNormMinimization(min_value=0.0, max_value=1.0).complete(X)