コード例 #1
0
 def GetImputedDataframe(self, df, impute_type='mean'):
     df = df.copy()
     # impute missing values
     if impute_type == 'mean':
         null_sum = df.replace('?', np.nan).isnull().sum()
         null_col = [k for k, v in null_sum.iteritems() if v != 0]
         for each_col_ind, each_col in enumerate(self.col_names):
             if each_col in null_col and self.col_types[
                     each_col_ind] != 'str':
                 df[each_col] = mean_impute(
                     df, each_col, data_type=self.col_types[each_col_ind])
     elif impute_type == 'nnm':
         df = df.replace('?', np.nan)
         df = pd.DataFrame(NuclearNormMinimization().complete(df),
                           columns=self.col_names)
     else:
         raise Exception()
     return df
コード例 #2
0
ファイル: utils.py プロジェクト: yangji9181/cube2net_yelp
    def netPred(self, method='mf', dim=100, alpha=0.1):
        '''
			supported methods: mf, cf, mnmf, fancy_nnm, fancy_soft
		'''
        if method == 'mf':
            model = NMF(n_components=dim, alpha=alpha, l1_ratio=0.2)
            W = model.fit_transform(self.mat)
            H = model.components_
            self.pred = np.matmul(W, H)
        elif method == 'cf':
            model = implicit.als.AlternatingLeastSquares(factors=dim,
                                                         regularization=alpha)
            model.fit(self.mat)
            self.pred = np.matmul(model.item_factors, model.user_factors.T)
        elif method == 'mnmf':
            self.pred = mnmf(self.mat, dim, alpha)
        elif 'fancy' in method:
            X = self.mat.toarray().astype(np.float)
            X[X == 0] = np.nan
            if 'nnm' in method:
                self.pred = NuclearNormMinimization(
                    error_tolerance=0.01).complete(X)
            elif 'soft' in method:
                self.pred = SoftImpute().complete(X)
コード例 #3
0
    "f_2551", "f_2552", "f_2553", "f_2554", "f_2555", "f_2556", "f_2557",
    "f_2558", "f_2559", "f_2560", "f_2561", "f_2562", "f_2563", "f_2564",
    "f_2565", "f_2566", "f_2567", "f_2568", "f_2569", "f_2570", "f_2571",
    "f_2572", "f_2573", "f_2574", "f_2575", "f_2576", "f_2577", "f_2578",
    "f_2579", "f_2580", "f_2581", "f_2582", "f_2583", "f_2584", "f_2585",
    "f_2586", "f_2587", "f_2588", "f_2589", "f_2590", "f_2591", "f_2592",
    "f_2593", "f_2594", "f_2595", "f_2596", "f_2597", "f_2598", "f_2599",
    "label"
]

if True:
    data = read_csv(open('train.csv', 'r'), na_values='').as_matrix()
    X1 = data[:, 1:-1]  # input features
    Y1 = data[:, -1].astype('int')  # input features

    X1 = NuclearNormMinimization(min_value=0.0, max_value=1.0).complete(X1)

    train = np.concatenate((X1, np.reshape(Y1, (-1, 1))), axis=1)
    pd.DataFrame(train).to_csv('train_nnm.csv', header=lst)

    print('Train done:', train.shape, data.shape)

    data = read_csv(open('test.csv', 'r'), na_values='').as_matrix()
    X2 = data[:, 1:]  # features

    train = X1.shape[0]

    X = np.concatenate((X1, X2))
    del X1, X2

    X_net = NuclearNormMinimization(min_value=0.0, max_value=1.0).complete(X)