Example #1
0
def test_rank1_symmetric_convex_solver():
    XYXY_rank1, XYXY_missing_rank1 = create_rank1_data(symmetric=True)
    solver = NuclearNormMinimization(require_symmetric_solution=True)
    completed = solver.complete(XYXY_missing_rank1)
    assert abs(completed[1, 2] - XYXY_rank1[1, 2]) < 0.01, \
        "Expected %0.4f but got %0.4f" % (
            XYXY_rank1[1, 2], completed[1, 2])
Example #2
0
def test_rank1_convex_solver():
    XY_rank1, XY_missing_rank1 = create_rank1_data(symmetric=False)
    solver = NuclearNormMinimization(max_iters=50000)
    XY_completed_rank1 = solver.complete(XY_missing_rank1)
    assert abs(XY_completed_rank1[1, 2] - XY_rank1[1, 2]) < 0.01, \
        "Expected %0.4f but got %0.4f" % (
            XY_rank1[1, 2], XY_completed_rank1[1, 2])
def test_rank1_symmetric_convex_solver():
    XYXY_rank1, XYXY_missing_rank1 = create_rank1_data(symmetric=True)
    solver = NuclearNormMinimization(require_symmetric_solution=True)
    completed = solver.complete(XYXY_missing_rank1)
    assert abs(completed[1, 2] - XYXY_rank1[1, 2]) < 0.001, \
        "Expected %0.4f but got %0.4f" % (
            XYXY_rank1[1, 2], completed[1, 2])
Example #4
0
def test_nuclear_norm_minimization_with_low_rank_random_matrix():
    solver = NuclearNormMinimization(max_iters=2000)
    XY_completed = solver.complete(XY_incomplete[:100])
    _, missing_mae = reconstruction_error(XY[:100],
                                          XY_completed,
                                          missing_mask[:100],
                                          name="NuclearNorm")
    assert missing_mae < 0.1, "Error too high!"
Example #5
0
from fancyimpute import NuclearNormMinimization

solver = NuclearNormMinimization(
    min_value=0.0,
    max_value=1.0,
    error_tolerance=0.0005)

# X_incomplete has missing data which is represented with NaN values
X_filled = solver.complete(X_incomplete)
Example #6
0
from sklearn.preprocessing import Imputer
imp = Imputer(missing_values='NaN', strategy="mean", axis=0)
#strategy: "mean" or "median" or "most_frequent"
train['N30_missing_imputed'] = imp.fit_transform(train['N30'].values.reshape(
    -1, 1))
imp.fit_transform(
    train.iloc[:, 1:])  #Removing first column as it is a text variable

#Reference: https://pypi.python.org/pypi/fancyimpute/0.0.4
#pip3 install fancyimpute
#ONLY NUMERIC VALUES
from fancyimpute import NuclearNormMinimization, KNN, MICE
solver = NuclearNormMinimization(min_value=0.0,
                                 max_value=1.0,
                                 error_tolerance=0.0005)
X_filled = solver.complete(train['N30'].values.reshape(-1, 1))
X_filled = solver.complete(train)
X_filled_knn = KNN(k=3).complete(train)
#https://github.com/hammerlab/fancyimpute/blob/master/fancyimpute/mice.py
X_filled_mice = MICE().complete(train.as_matrix())
X_filled_mice_df = pd.DataFrame(X_filled_mice)
X_filled_mice_df.columns = train.columns
X_filled_mice_df.index = train.index
#Other methods: SimpleFill, SoftImpute, IterativeSVD, MICE, MatrixFactorization, NuclearNormMinimization, KNN, BiScaler
#SimpleFill: uses mean or median; SoftImpute: Matrix completion;

###Smote
#Only numeric/boolean and non_null values as input to TSNE model :: BETTER TRY THIS AFTER MISSING VALUE IMPUTATION AND ENCODING
from imblearn.over_sampling import SMOTE
sm = SMOTE(random_state=42)
X_train_new, y_train_new = sm.fit_sample(train.dropna().iloc[:, 1:44],
def test_nuclear_norm_minimization_with_low_rank_random_matrix():
    solver = NuclearNormMinimization(require_symmetric_solution=False)
    XY_completed = solver.complete(XY_incomplete[:100])
    _, missing_mae = reconstruction_error(
        XY[:100], XY_completed, missing_mask[:100], name="NuclearNorm")
    assert missing_mae < 0.1, "Error too high!"