예제 #1
0
print 'imputing with random replacement'
data_replace = imp.replace(x, missing_data_cond)

# replace missing values with feature summary
print 'imputing with feature summarization (mode)'
summ_func = lambda x: mode(x)[0]
data_mode = imp.summarize(x, summ_func, missing_data_cond)

# replace categorical features with one hot row
print 'imputing with one-hot'
data_onehot = imp.binarize_data(x, cat_cols)

# replace missing data with predictions using random forest
print 'imputing with predicted values from random forest'
clf = RandomForestClassifier(n_estimators=100, criterion='gini')
data_rf = imp.predict(x, cat_cols, missing_data_cond, clf)

# replace missing data with predictions using SVM
print 'imputing with predicted values usng SVM'
clf = SVM(penalty='l2',
          loss='squared_hinge',
          dual=True,
          tol=0.0001,
          C=1.0,
          multi_class='ovr',
          fit_intercept=True,
          intercept_scaling=1,
          class_weight=None,
          verbose=0,
          random_state=None,
          max_iter=1000)
예제 #2
0
    # drop observations with missing variables
    print 'imputing with drop'
    data_dict['Drop'] = imp.drop(pert_data, miss_data_cond)

    # replace missing values with random existing values
    print 'imputing with random replacement'
    data_dict['RandomReplace'] = imp.replace(pert_data, miss_data_cond)

    # replace missing values with feature summary
    print 'imputing with feature summarization (mode)'
    summ_func = lambda x: mode(x)[0]
    data_dict['Mode'] = imp.summarize(pert_data, summ_func, miss_data_cond)

    # replace missing data with predictions using random forest
    print 'imputing with Random Forest'
    data_dict['RandomForest'] = imp.predict(pert_data, cat_cols, miss_data_cond)

    # replace missing data with values obtained after factor analysis
    print 'imputing with PCA'
    data_dict['PCA'] = imp.factor_analysis(pert_data, cat_cols, miss_data_cond)

    # replace missing data with knn
    print 'imputing with K-Nearest Neighbors'
    data_dict['KNN'] = imp.knn(pert_data, n_neighbors, np.mean, miss_data_cond,
                               cat_cols)

    conf_methods = ['RandomReplace', 'Mode', 'RandomForest', 'PCA', 'KNN']
    methods = ['RawData', 'Drop', 'RandomReplace', 'Mode', 'RandomForest',
               'PCA', 'KNN']

    color_mapping = {}
예제 #3
0
print "imputing with random replacement"
data_replace = imp.replace(x, missing_data_cond)

# replace missing values with feature summary
print "imputing with feature summarization (mode)"
summ_func = lambda x: mode(x)[0]
data_mode = imp.summarize(x, summ_func, missing_data_cond)

# replace categorical features with one hot row
print "imputing with one-hot"
data_onehot = imp.binarize_data(x, cat_cols)

# replace missing data with predictions using random forest
print "imputing with predicted values from random forest"
clf = RandomForestClassifier(n_estimators=100, criterion="gini")
data_rf = imp.predict(x, cat_cols, missing_data_cond, clf)

# replace missing data with predictions using SVM
print "imputing with predicted values usng SVM"
clf = clf = SVM(
    penalty="l2",
    loss="squared_hinge",
    dual=True,
    tol=0.0001,
    C=1.0,
    multi_class="ovr",
    fit_intercept=True,
    intercept_scaling=1,
    class_weight=None,
    verbose=0,
    random_state=None,
예제 #4
0
    # drop observations with missing variables
    print 'imputing with drop'
    data_dict['Drop'] = imp.drop(pert_data, miss_data_cond)

    # replace missing values with random existing values
    print 'imputing with random replacement'
    data_dict['RandomReplace'] = imp.replace(pert_data, miss_data_cond)

    # replace missing values with feature summary
    print 'imputing with feature summarization (mode)'
    summ_func = lambda x: mode(x)[0]
    data_dict['Mode'] = imp.summarize(pert_data, summ_func, miss_data_cond)

    # replace missing data with predictions using random forest
    print 'imputing with Random Forest'
    data_dict['RandomForest'] = imp.predict(pert_data, cat_cols,
                                            miss_data_cond)

    # replace missing data with values obtained after factor analysis
    print 'imputing with PCA'
    data_dict['PCA'] = imp.factor_analysis(pert_data, cat_cols, miss_data_cond)

    # replace missing data with knn
    print 'imputing with K-Nearest Neighbors'
    data_dict['KNN'] = imp.knn(pert_data, n_neighbors, np.mean, miss_data_cond,
                               cat_cols)

    conf_methods = ['RandomReplace', 'Mode', 'RandomForest', 'PCA', 'KNN']
    methods = [
        'RawData', 'Drop', 'RandomReplace', 'Mode', 'RandomForest', 'PCA',
        'KNN'
    ]