Esempio n. 1
0
    def fit(self, X, y=None):
        assert isinstance(X, pd.DataFrame)
        start = X
        y_present = y is not None
        groupby_present = self.groupby is not None
        self.imputer = []
        if y_present or groupby_present:
            assert not (groupby_present and y_present)
            if y_present:
                classes = np.unique(y)
                gen_mask = lambda c: y == c
            if groupby_present:
                classes = X[self.groupby].unique()
                gen_mask = lambda c: X[self.groupby] == c
            self.imputer = {
                c: {
                    "impute": SoftImpute(max_iters=self.max_iters,
                                         **self.kwargs),
                    "mask": gen_mask(c),
                }
                for c in classes
            }

            msg = """Building Soft Imputation Transformers for {} classes""".format(
                len(classes))
            logger.info(msg)

        else:
            self.imputer = SoftImpute(max_iters=self.max_iters, **self.kwargs)
            msg = """Building Soft Imputation Transformer"""
            logger.info(msg)

        return self
Esempio n. 2
0
def test_soft_impute_with_low_rank_random_matrix():
    solver = SoftImpute()
    XY_completed = solver.fit_transform(XY_incomplete)
    _, missing_mae = reconstruction_error(XY,
                                          XY_completed,
                                          missing_mask,
                                          name="SoftImpute")
    assert missing_mae < 0.1, "Error too high!"
Esempio n. 3
0
    def __init__(self):
        """
        Params:

        k                number of nearest neighbors to consider

        """

        self._imputer = SoftImpute()
Esempio n. 4
0
def softimpute_used(X, X_incomplete, missing_mask, count_miss):
    softImpute = SoftImpute(convergence_threshold=0.0001, max_iters=300)
    X_filled_softimpute_no_biscale = softImpute.complete(X_incomplete)
    """
    softImpute_no_biscale_mse = ((X_filled_softimpute_no_biscale[missing_mask] - X[missing_mask]) ** 2).mean()
    softImpute_no_biscale_rmse = np.sqrt(float(((X_filled_softimpute_no_biscale[missing_mask] - X[missing_mask]) ** 2).sum())/count_miss)
    print("SoftImpute without BiScale MSE: %f" % softImpute_no_biscale_mse)
    print("SoftImpute without BiScale RMSE: %f" % softImpute_no_biscale_rmse)
    """
    return X_filled_softimpute_no_biscale
Esempio n. 5
0
 def fun(lambd_val):
     # fit soft impute for each lambda value
     si = SoftImpute(shrinkage_value=lambd_val,
                     init_fill_method='mean',
                     max_rank=max_rank,
                     verbose=verbose,
                     max_iters=max_iters,
                     convergence_threshold=convergence_threshold)
     X_filled = si.fit_transform(X_incomplete.copy())
     return ((X_filled[missing_mask] - X[missing_mask]) ** 2).mean()
Esempio n. 6
0
def softimpute_used_for_cv(X, X_incomplete, missing_mask, count_miss,
                           defined_missing_percent, limit1, limit2,
                           percentile):
    softImpute = SoftImpute(convergence_threshold=0.0001, max_iters=300)
    X_filled_softimpute_no_biscale = softImpute.complete(X_incomplete)
    """
    softImpute_no_biscale_mse = ((X_filled_softimpute_no_biscale[missing_mask] - X[missing_mask]) ** 2).mean()
    softImpute_no_biscale_rmse = np.sqrt(float(((X_filled_softimpute_no_biscale[missing_mask] - X[missing_mask]) ** 2).sum())/count_miss)
    print("SoftImpute without BiScale MSE: %f" % softImpute_no_biscale_mse)
    print("SoftImpute without BiScale RMSE: %f" % softImpute_no_biscale_rmse)
    """
    rmse_percentile = defaultdict(float)
    y = X[missing_mask]
    y_predict = X_filled_softimpute_no_biscale[missing_mask]

    y_percentile = defaultdict(list)
    y_predict_percentile = defaultdict(list)
    y_percentile_arr = defaultdict()
    y_predict_percentile_arr = defaultdict()

    for m, n in zip(y, y_predict):
        if m < percentile[10] and m > percentile[10] * (-1):
            y_percentile[10].append(m)
            y_predict_percentile[10].append(n)

    y_percentile_arr[10] = np.asarray(y_percentile[10])
    y_predict_percentile_arr[10] = np.asarray(y_predict_percentile[10])
    rmse_percentile[10] = np.sqrt(
        float(
            ((y_predict_percentile_arr[10] - y_percentile_arr[10])**2).sum()) /
        len(y_predict_percentile_arr[10]))

    for m, n in zip(y, y_predict):
        if abs(m) < percentile[5] and abs(m) > percentile[10]:
            y_percentile[5].append(m)
            y_predict_percentile[5].append(n)

    y_percentile_arr[5] = np.asarray(y_percentile[5])
    y_predict_percentile_arr[5] = np.asarray(y_predict_percentile[5])
    rmse_percentile[5] = np.sqrt(
        float(((y_predict_percentile_arr[5] - y_percentile_arr[5])**2).sum()) /
        len(y_predict_percentile_arr[5]))

    for m, n in zip(y, y_predict):
        if abs(m) < percentile[2] and abs(m) > percentile[5]:
            y_percentile[2].append(m)
            y_predict_percentile[2].append(n)

    y_percentile_arr[2] = np.asarray(y_percentile[2])
    y_predict_percentile_arr[2] = np.asarray(y_predict_percentile[2])
    rmse_percentile[2] = np.sqrt(
        float(((y_predict_percentile_arr[2] - y_percentile_arr[2])**2).sum()) /
        len(y_predict_percentile_arr[2]))

    return (X_filled_softimpute_no_biscale, rmse_percentile)
Esempio n. 7
0
def Initialize_X_incomplete(X_incomplete, test_filename, train_filename):
    m, n = X_incomplete.shape
    missing_mask = np.zeros((m, n), dtype=bool)
    softImpute = SoftImpute(convergence_threshold=0.0001, max_iters=300)
    X = softImpute.complete(X_incomplete)
    count_miss = 0
    for i in range(m):
        for j in range(n):
            if np.isnan(X_incomplete[i, j]):
                missing_mask[i, j] = True
                count_miss += 1

    return (X, missing_mask, count_miss)
Esempio n. 8
0
    def forward(ctx, input):
        batch_num, c, h, w = input.size()
        output = torch.zeros_like(input).cpu().numpy()

        for i in range(batch_num):
            img = (input[i] * 2 - 1).cpu().numpy()

            if args.me_channel == 'concat':
                img = np.concatenate((np.concatenate(
                    (img[0], img[1]), axis=1), img[2]),
                                     axis=1)
                if globe_train:
                    mask = np.random.binomial(
                        1, args.startp + mask_train_cnt *
                        (args.endp - args.startp) / args.mask_num,
                        h * w * c).reshape(h, w * c).astype(float)
                else:
                    mask = np.random.binomial(
                        1, random.uniform(args.startp, args.endp),
                        h * w * c).reshape(h, w * c).astype(float)
                mask[mask < 1] = np.nan
                W = SoftImpute(verbose=False).fit_transform(mask * img)
                W[W < -1] = -1
                W[W > 1] = 1
                est_matrix = (W + 1) / 2
                for channel in range(c):
                    output[i,
                           channel] = est_matrix[:,
                                                 channel * h:(channel + 1) * h]
            else:
                if globe_train:
                    mask = np.random.binomial(
                        1, args.startp + mask_train_cnt *
                        (args.endp - args.startp) / args.mask_num,
                        h * w).reshape(h, w).astype(float)
                else:
                    mask = np.random.binomial(
                        1, random.uniform(args.startp, args.endp),
                        h * w).reshape(h, w).astype(float)
                mask[mask < 1] = np.nan
                for channel in range(c):
                    mask_img = img[channel] * mask
                    W = SoftImpute(verbose=False).fit_transform(mask_img)
                    W[W < -1] = -1
                    W[W > 1] = 1
                    output[i, channel] = (W + 1) / 2

        output = output - mean
        output /= std
        output = torch.from_numpy(output).float().to(device)
        return output
def preprocessingData_pialadunia2018(dataset):
    # solve missing value
    dataset.iloc[:, 2:] = SoftImpute().complete(dataset.iloc[:, 2:])
    dataset_independent = dataset.round({
        'HTRF': 0,
        'THFP': 0,
        'PHPAR': 0,
        'PHSPAR': 0,
        'PHPSAR': 0,
        'PHDAR': 0,
        'PHTSA': 0,
        'PHDBAR': 0,
        'PHAAR': 0,
        'ATRF': 0,
        'TAFP': 0,
        'PAPAR': 0,
        'PASPAR': 0,
        'PAPSAR': 0,
        'PADAR': 0,
        'PATSA': 0,
        'PADBAR': 0,
        'PAAAR': 0
    })
    dataset_independent = dataset_independent.drop('Hasil', axis=1)
    #label encoder
    dataset_dependent = dataset['Hasil']
    return dataset_independent, dataset_dependent
def preprocessingData(dataset):
    # solve missing value
    dataset.iloc[:, 5:] = SoftImpute().complete(dataset.iloc[:, 5:])
    dataset_independent = dataset.round({
        'HTRF': 0,
        'THFP': 0,
        'PHPAR': 0,
        'PHSPAR': 0,
        'PHPSAR': 0,
        'PHDAR': 0,
        'PHTSA': 0,
        'PHDBAR': 0,
        'PHAAR': 0,
        'ATRF': 0,
        'TAFP': 0,
        'PAPAR': 0,
        'PASPAR': 0,
        'PAPSAR': 0,
        'PADAR': 0,
        'PATSA': 0,
        'PADBAR': 0,
        'PAAAR': 0
    })
    dataset_independent = dataset_independent.drop('Hasil', axis=1)
    #label encoder
    dataset_dependent = dataset.iloc[:, [4]].values
    labelencoder_X = LabelEncoder()
    dataset_dependent = labelencoder_X.fit_transform(dataset_dependent)
    dataset_dependent_baru = pd.DataFrame(dataset_dependent, columns=['Hasil'])
    return dataset_independent, dataset_dependent_baru
def cmd(in_mat_file, dims, suffix, i_loo, j_loo, loo_output, loo_only, verbose,
        seed):
    """Read M_partial from IN_MAT_FILE and complete the matrix using soft-impute method."""

    M = io.loadmat(in_mat_file)['M_partial']
    rank = dims

    LOO_mode = False
    if i_loo > -1 and j_loo > -1:
        LOO = M[i_loo, j_loo]
        M[i_loo, j_loo] = 0
        LOO_mode = True

    num_comments, num_voters = M.shape

    M[M == 0] = np.nan
    M_complete = SoftImpute(max_rank=dims).complete(M)

    if LOO_mode:
        file_tmpl = f'{in_mat_file}.r{rank}.s{seed}.i{i_loo}.j{j_loo}.soft-impute.out'

        if not loo_only:
            op_mat_file = file_tmpl + '.mat'
            io.savemat(op_mat_file, {'Mhat': M_complete})

        op_loo_file = loo_output if loo_output is not None else file_tmpl + '.loo'
        loo_pred = M_complete[i_loo, j_loo]
        with open(op_loo_file, 'wt') as f:
            f.write('{}, {}'.format(LOO, loo_pred))
    else:
        raise NotImplementedError('Use randomized_svd here.')
        # np.savetxt(in_mat_file + '.' + suffix + '.c_vecs', U)
        # np.savetxt(in_mat_file + '.' + suffix + '.v_vecs', V)

    print('Done at', datetime.now())
Esempio n. 12
0
def baseline_inpute(X_incomplete, method='mean', level=0):

    if method == 'mean':
        X_filled_mean = SimpleFill().fit_transform(X_incomplete)
        return X_filled_mean
    elif method == 'knn':
        k = [3, 10, 50][level]
        X_filled_knn = KNN(k=k, verbose=False).fit_transform(X_incomplete)
        return X_filled_knn
    elif method == 'svd':
        rank = [
            np.ceil((X_incomplete.shape[1] - 1) / 10),
            np.ceil((X_incomplete.shape[1] - 1) / 5), X_incomplete.shape[1] - 1
        ][level]
        X_filled_svd = IterativeSVD(rank=int(rank),
                                    verbose=False).fit_transform(X_incomplete)
        return X_filled_svd
    elif method == 'mice':
        max_iter = [3, 10, 50][level]
        X_filled_mice = IterativeImputer(
            max_iter=max_iter).fit_transform(X_incomplete)
        return X_filled_mice
    elif method == 'spectral':
        # default value for the sparsity level is with respect to the maximum singular value,
        # this is now done in a heuristic way
        sparsity = [0.5, None, 3][level]
        X_filled_spectral = SoftImpute(
            shrinkage_value=sparsity).fit_transform(X_incomplete)
        return X_filled_spectral
    else:
        raise NotImplementedError
Esempio n. 13
0
def fancyimpute_matrix_completion(function, gram_drop,
                                  seqs=None, sigma=None, triangular=None,
                                  num_process=4,
                                  drop_flag_matrix=None):
    gram_partially_completed_by_gak = gak.gram_gak(seqs,
                                                   sigma=sigma,
                                                   triangular=triangular,
                                                   num_process=num_process,
                                                   drop_flag_matrix=drop_flag_matrix)
    for i in range(len(gram_drop)):
        gram_drop[i, i] = 1
        for j in range(len(gram_drop[0])):
            if np.isnan(gram_partially_completed_by_gak[i, j]):
                continue
            assert np.isnan(gram_drop[i, j])
            gram_drop[i, j] = gram_partially_completed_by_gak[i, j]
    if function == "SoftImpute":
        gram_completed = SoftImpute().complete(gram_drop)
    elif function == "KNN":
        gram_completed = KNN().complete(gram_drop)
    elif function == "IterativeSVD":
        gram_completed = IterativeSVD().complete(gram_drop)
    else:
        print("unsupported fancyimpute functin")
        exit(-1)
    return gram_completed
Esempio n. 14
0
    def fit(self, trainset):
        AlgoBase.fit(self, trainset)

        X_incomplete = np.nan * np.zeros((trainset.n_users, trainset.n_items))
        for u, i, r in trainset.all_ratings():
            X_incomplete[u, i] = r

        soft_impute = SoftImpute(shrinkage_value=self.lmbda,
                                 max_iters=self.max_iter,
                                 max_rank=self.max_rank,
                                 min_value=self.min_value,
                                 max_value=self.max_value,
                                 verbose=self.verbose)
        X_filled_normalized \
            = soft_impute.fit_transform(X_incomplete)
        self.predictions = X_filled_normalized
        return self
Esempio n. 15
0
def impute(data):
    row_bool = ~np.all(np.isnan(data), axis=1)
    col_bool = ~np.all(np.isnan(data), axis=0)
    data_filtered = data[row_bool, :][:, col_bool]
    data_imputed = SoftImpute().fit_transform(data_filtered)
    tmp = np.zeros([data_filtered.shape[0], data.shape[1]])
    tmp[:, col_bool] = data_imputed
    data[row_bool, :] = tmp
    data[np.isnan(data)] = 0
Esempio n. 16
0
def filtering(food_list, food_a, food_b):
    df = pd.read_csv('./resource/meal_problem/final_rating_data.csv')
    df = df.iloc[:, 1:]
    df = df.append(build_new_row(food_list, food_a), ignore_index=True)
    df = df.append(build_new_row(food_list, food_b), ignore_index=True)
    df_numeric = df.select_dtypes(include=[np.float]).to_numpy()
    df_new = pd.DataFrame(SoftImpute().fit_transform(df_numeric))
    df_new.columns = df.columns
    return df_new
Esempio n. 17
0
 def impute(self, trained_model, input):
     """
     Loads the input table and gives the imputed table
 
 	:param trained_model: trained model returned by train function - not used in our case
 	:param input: input table which needs to be imputed
 	:return:
 		X_filled_softimpute: imputed table as a numpy array
     """
     X_incomplete = input
     softImpute = SoftImpute()
     biscaler = BiScaler()
     X_incomplete_normalized = biscaler.fit_transform(X_incomplete)
     X_filled_softimpute_normalized = softImpute.fit_transform(
         X_incomplete_normalized)
     X_filled_softimpute = biscaler.inverse_transform(
         X_filled_softimpute_normalized)
     return X_filled_softimpute
Esempio n. 18
0
def softimp(img, maskp):
    """Preprocessing with Soft-Impute approach.

    Data matrix is scaled between [-1, 1] before matrix estimation (and rescaled back after ME)
    [Mazumder, R. et al. Spectral regularization algorithms for learning large incomplete matrices. 2010.]

    :param img: original image
    :param maskp: observation probability of each entry in mask matrix
    :return: preprocessed image
    """
    h, w, c = img.shape
    img = img.astype('float64') * 2 / 255 - 1

    if args.me_channel == 'concat':
        img = img.transpose(2, 0, 1)
        img = np.concatenate((np.concatenate(
            (img[0], img[1]), axis=1), img[2]),
                             axis=1)
        mask = np.random.binomial(1, maskp,
                                  h * w * c).reshape(h, w * c).astype(float)
        mask[mask < 1] = np.nan

        W = SoftImpute(verbose=False).fit_transform(mask * img)
        W[W < -1] = -1
        W[W > 1] = 1
        est_matrix = (W + 1) * 255 / 2
        outputs = np.zeros((h, w, c))
        for channel in range(c):
            outputs[:, :, channel] = est_matrix[:,
                                                channel * w:(channel + 1) * w]
    else:
        mask = np.random.binomial(1, maskp, h * w).reshape(h, w).astype(float)
        mask[mask < 1] = np.nan

        outputs = np.zeros((h, w, c))
        for channel in range(c):
            mask_img = img[:, :, channel] * mask
            W = SoftImpute(verbose=False).fit_transform(mask_img)
            W[W < -1] = -1
            W[W > 1] = 1
            outputs[:, :, channel] = (W + 1) * 255 / 2

    return outputs
Esempio n. 19
0
def fancy_predict(train,
                  test_data_points,
                  max_rank=8,
                  shrinkage_value=0.02,
                  max_iters=50):
    ''' Generates predictions for test data points using FancyImpute's dense implementation of SoftImpute. '''
    train, rowscale, colscale, rowcenter, colcenter = fancy_biscale(train)
    train[train == 0] = np.nan
    si = SoftImpute(shrinkage_value=shrinkage_value,
                    max_rank=max_rank,
                    max_iters=max_iters,
                    init_fill_method='zero',
                    verbose=False)
    complete = si.complete(train)
    targets = zip(test_data_points[0], test_data_points[1])
    res = []
    for idx, (r, c) in enumerate(targets):
        res.append((complete[r, c], r, c))
    res = fancy_remove_biscale(res, rowscale, colscale, rowcenter, colcenter)
    return res
Esempio n. 20
0
def construct_low_rank_imputer(method, k):
    clf = None
    if method == "SoftImpute":
        clf = SoftImpute(max_rank=k, verbose=False)
    elif method == "KNN":
        clf = KNN(k=k, verbose=False)
    elif method == 'II':
        clf = IterativeImputer(min_value=0)
    else:
        raise ("Not implemented")
    return clf
Esempio n. 21
0
def test_estimators(X, y, dum_enc, classification=True):
    ModeMeanImputer = create_mode_mean_imputer(X, dum_enc)

    # List with all imputation algorithms to test, in tuples of (name, estimator object, inductive)
    impute_estimators = [
        ("ModeMeanImputer", ModeMeanImputer, True),
        ("KNNImputer", KNNImputer(), True),
        ("Iter_BayesianRidge",
         IterativeImputer(estimator=BayesianRidge(), random_state=0), True),
        ("Iter_DecisionTree",
         IterativeImputer(estimator=DecisionTreeRegressor(max_features='sqrt',
                                                          random_state=0),
                          random_state=0), True),
        ("Iter_RF",
         IterativeImputer(estimator=RandomForestRegressor(n_estimators=100,
                                                          random_state=0),
                          random_state=0), True),
        ("Iter_ExtraTrees",
         IterativeImputer(estimator=ExtraTreesRegressor(n_estimators=100,
                                                        random_state=0),
                          random_state=0), True),
        ("Iter_KNRegr",
         IterativeImputer(estimator=KNeighborsRegressor(n_neighbors=15),
                          random_state=0), True),
        ("Iter_SVD", IterativeSVD(rank=min(min(X.shape) - 1, 10),
                                  verbose=False), False),
        ("SoftImpute", SoftImpute(verbose=False), False)
    ]

    imp_scores = {}
    times = {}
    if not classification:
        for estimator_name, impute_estimator, inductive in impute_estimators:
            time1 = time.time()
            imp_scores[estimator_name] = imputation_score_regression(
                X, y, estimator_name, impute_estimator, inductive)
            time2 = time.time()
            times[estimator_name] = time2 - time1
            #print(estimator_name + " finished, took " + str(round(time2 - time1, 1)) + " seconds")

    if classification:
        for estimator_name, impute_estimator, inductive in impute_estimators:
            time1 = time.time()
            imp_scores[estimator_name] = imputation_score_classification(
                X, y, estimator_name, impute_estimator, inductive)
            time2 = time.time()
            times[estimator_name] = time2 - time1
            #print(estimator_name + " finished, took " + str(round(time2 - time1, 1)) + " seconds")

    imputer_dict = {}
    for estimator_name, impute_estimator, inductive in impute_estimators:
        imputer_dict[estimator_name] = impute_estimator

    return imp_scores, times, imputer_dict
Esempio n. 22
0
 def fi_complete(self, X, method='mf', **params):
     if method == 'mf':
         #rank = params['rank']=100
         self.X_filled = MatrixFactorization(params['rank']).complete(X)
     if method == 'knn':
         # Use 3 nearest rows which have a feature to fill in each row's missing features
         #k = params['k'] = 3
         self.X_filled = KNN(params['k']).complete(X)
     if method == 'soft':
         # Instead of solving the nuclear norm objective directly, instead
         # induce sparsity using singular value thresholding
         self.X_filled = SoftImpute().complete(X)
def fancyImputeAttempts(data, dataframe):
    data = np.array(data, np.float)
    #use fancy impute package
    filled_knn = KNN(k=3, verbose=False).complete(data)
    filled_softimpute = SoftImpute(verbose=False).complete(data)
    filled_svd = IterativeSVD(verbose=False).complete(data)

    print "\nKNN computations\n"
    doiteration(filled_knn, dataframe)
    print "\n SOFTIMPUTE computations\n"
    doiteration(filled_softimpute, dataframe)
    print "\n SVD computations\n"
    doiteration(filled_svd, dataframe)
Esempio n. 24
0
    def isvt(self):
        """
        Matrix completion is done using the softimpute function in the fancyimpute library.
        """
        #the fancyimpute library requires that all the sparse elements in the sparse matrix be NaN.
        #so, the zeroes are converted accordingly
        rating = self.sparse.copy()
        rating[np.where(rating == 0)] = np.nan

        #the sparse matrix is then filled
        filled = SoftImpute(max_iters=100).fit_transform(rating)

        return (rating, filled)
Esempio n. 25
0
 def __init__(self, method, **kwargs):
     self.clf = None
     self.method = method
     if method == "SoftImpute":
         self.clf = SoftImpute(**kwargs)
     elif method == "KNN":
         self.clf = KNN(**kwargs)
     elif method == "Naive":
         self.clf = SimpleFill()
     elif method == 'II':
         raise ('NOT TESTED')
         self.clf = IterativeImputer(min_value=0)
     else:
         raise ("Not Implemented method")
Esempio n. 26
0
    def netPred(self, method='mf', dim=100, alpha=0.1):
        '''
			supported methods: mf, cf, mnmf, fancy_nnm, fancy_soft
		'''
        if method == 'mf':
            model = NMF(n_components=dim, alpha=alpha, l1_ratio=0.2)
            W = model.fit_transform(self.mat)
            H = model.components_
            self.pred = np.matmul(W, H)
        elif method == 'cf':
            model = implicit.als.AlternatingLeastSquares(factors=dim,
                                                         regularization=alpha)
            model.fit(self.mat)
            self.pred = np.matmul(model.item_factors, model.user_factors.T)
        elif method == 'mnmf':
            self.pred = mnmf(self.mat, dim, alpha)
        elif 'fancy' in method:
            X = self.mat.toarray().astype(np.float)
            X[X == 0] = np.nan
            if 'nnm' in method:
                self.pred = NuclearNormMinimization(
                    error_tolerance=0.01).complete(X)
            elif 'soft' in method:
                self.pred = SoftImpute().complete(X)
Esempio n. 27
0
    def train_models(self, minibatch_size=32):
        memory_arr = np.array(self.memory)
        file = "memoryBW.npy"
        np.save(file, memory_arr)

        if self.partial_obs_rate > 0:
            self.make_mem_partial_obs(memory_arr)
            file1 = "memoryBWcorrupted.npy"
            np.save(file1, memory_arr)
            print("Memory size:")
            print(memory_arr.size)
            print("Proportion of missing values:")
            print(np.isnan(memory_arr).sum() / memory_arr.size)
            #memory_train = np.array([exp for exp in memory_arr if not np.isnan(exp[-self.state_size - 1:-1]).any()])
            #imputer = Imputer()
            #memory_final = imputer.fit_transform(memory_train)
            memory_final = SoftImpute().complete(memory_arr)
            file2 = "memoryBWimputedSoft.npy"
            np.save(file2, memory_final)
        else:
            memory_final = memory_arr
        if self.useRNN:
            batch_size = len(memory_final)
            minibatch_size = min(minibatch_size, batch_size)
            t_x, t_y = self.setup_batch_for_RNN(memory_final)
            self.tmodel.fit(t_x,
                            t_y,
                            batch_size=minibatch_size,
                            epochs=self.net_train_epochs,
                            validation_split=0.1,
                            callbacks=self.Ttensorboard,
                            verbose=1)
        else:
            batch_size = len(memory_arr)
            minibatch_size = min(minibatch_size, batch_size)
            # batch = random.sample(list(memory_final), minibatch_size)
            # batch = np.array(batch)
            # batch = memory_arr
            t_x = memory_final[:, :self.state_size + self.action_size]
            t_y = memory_final[:, -self.state_size - 1:-1]
            self.tmodel.fit(t_x,
                            t_y,
                            batch_size=minibatch_size,
                            epochs=self.net_train_epochs,
                            validation_split=0.1,
                            callbacks=self.Ttensorboard,
                            verbose=1)
            '''
Esempio n. 28
0
def get_imputer(imputer_name, **add_params):

    imputer_name = imputer_name.lower()

    if imputer_name == 'knn':
        return KNN(**add_params)
    elif imputer_name.lower() == 'nnm':
        return NuclearNormMinimization(**add_params)
    elif imputer_name == 'soft':
        return SoftImpute(**add_params)
    elif imputer_name == 'iterative':
        return IterativeImputer(**add_params)
    elif imputer_name == 'biscaler':
        return BiScaler(**add_params)
    else:
        print('Choose one of predefined imputers')
Esempio n. 29
0
def fill_row(data):
    for i in range(data.shape[1]):
        if np.isnan(data[0,i]):
            data[0,i] = averages["avg_" + str(i)]
    tmp = np.zeros((1, data.shape[1]))
    tmp[:] = np.nan
    data = np.concatenate((data, tmp))
    for i in range(data.shape[1]):
        if i%2 == 1:
            tmp = data[0,i]
            data[0,i] = data[1,i]
            data[1,i] = tmp
    data_normalized = BiScaler(verbose=False).fit_transform(data)
    data_filled = SoftImpute(verbose=False).fit_transform(data_normalized)
    data_filled = np.delete(data_filled, 1, 0)
    return data_filled
Esempio n. 30
0
def clean_input(data):
    cols = data.shape[1]
    for i in range(cols):
        curr = data[:,i]
        nans = np.isnan(curr)
        if not False in nans:
            data[0,i] = averages["avg_" + str(i)]
    if data.shape[0] == 1:
        norm = np.linalg.norm(data)
        if norm == 0:
            return data
        else:
            return data / norm
    data_normalized = BiScaler(verbose=False).fit_transform(data)
    data_filled = SoftImpute(verbose=False).fit_transform(data_normalized)
    return data_filled
Esempio n. 31
0
X_incomplete[missing_mask] = np.nan

meanFill = SimpleFill("mean")
X_filled_mean = meanFill.complete(X_incomplete)

# Use 3 nearest rows which have a feature to fill in each row's missing features
knnImpute = KNN(k=3)
X_filled_knn = knnImpute.complete(X_incomplete)

# matrix completion using convex optimization to find low-rank solution
# that still matches observed values. Slow!
X_filled_nnm = NuclearNormMinimization().complete(X_incomplete)

# Instead of solving the nuclear norm objective directly, instead
# induce sparsity using singular value thresholding
softImpute = SoftImpute()

# simultaneously normalizes the rows and columns of your observed data,
# sometimes useful for low-rank imputation methods
biscaler = BiScaler()

# rescale both rows and columns to have zero mean and unit variance
X_incomplete_normalized = biscaler.fit_transform(X_incomplete)

X_filled_softimpute_normalized = softImpute.complete(X_incomplete_normalized)
X_filled_softimpute = biscaler.inverse_transform(X_filled_softimpute_normalized)

X_filled_softimpute_no_biscale = softImpute.complete(X_incomplete)

meanfill_mse = ((X_filled_mean[missing_mask] - X[missing_mask]) ** 2).mean()
print("meanFill MSE: %f" % meanfill_mse)