Example #1
0
def test_rfe_features_importance():
    generator = check_random_state(0)
    iris = load_iris()
    X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
    y = iris.target

    clf = RandomForestClassifier(n_estimators=20,
                                 random_state=generator, max_depth=2)
    rfe = RFE(estimator=clf, n_features_to_select=4, step=0.1)
    rfe.fit(X, y)
    assert_equal(len(rfe.ranking_), X.shape[1])

    clf_svc = SVC(kernel="linear")
    rfe_svc = RFE(estimator=clf_svc, n_features_to_select=4, step=0.1)
    rfe_svc.fit(X, y)

    # Check if the supports are equal
    assert_array_equal(rfe.get_support(), rfe_svc.get_support())
Example #2
0
def test_rfe_features_importance():
    generator = check_random_state(0)
    iris = load_iris()
    X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
    y = iris.target

    clf = RandomForestClassifier(n_estimators=10, n_jobs=1)
    rfe = RFE(estimator=clf, n_features_to_select=4, step=0.1)
    rfe.fit(X, y)
    assert_equal(len(rfe.ranking_), X.shape[1])

    clf_svc = SVC(kernel="linear")
    rfe_svc = RFE(estimator=clf_svc, n_features_to_select=4, step=0.1)
    rfe_svc.fit(X, y)

    # Check if the supports are equal
    diff_support = rfe.get_support() == rfe_svc.get_support()
    assert_true(sum(diff_support) == len(diff_support))
Example #3
0
def test_rfe_features_importance():
    generator = check_random_state(0)
    iris = load_iris()
    X = np.c_[iris.data, generator.normal(size=(len(iris.data), 6))]
    y = iris.target

    clf = RandomForestClassifier(n_estimators=20,
                                 random_state=generator, max_depth=2)
    rfe = RFE(estimator=clf, n_features_to_select=4, step=0.1)
    rfe.fit(X, y)
    assert len(rfe.ranking_) == X.shape[1]

    clf_svc = SVC(kernel="linear")
    rfe_svc = RFE(estimator=clf_svc, n_features_to_select=4, step=0.1)
    rfe_svc.fit(X, y)

    # Check if the supports are equal
    assert_array_equal(rfe.get_support(), rfe_svc.get_support())
for i in range(10):
    x = [slices[(i+1) % 9], slices[(i+2) % 9], slices[(i+3) % 9], slices[(i+4) % 9], slices[(i+5) % 9], slices[(i+6) % 9], slices[(i+7) % 9], slices[(i+8) % 9]]
    x = pd.concat(x)
    y = [slicesOfc7[(i+1) % 9], slicesOfc7[(i+2) % 9], slicesOfc7[(i+3) % 9], slicesOfc7[(i+4) % 9], slicesOfc7[(i+5) % 9], slicesOfc7[(i+6) % 9], slicesOfc7[(i+7) % 9], slicesOfc7[(i+8) % 9]]
    y = pd.concat(y)
    model.fit(x, y)
    pr = model.predict(slices[i])
    RMSE += math.pow(((pr - slicesOfc7[i]) ** 2).sum() / len(slicesOfc7[i]), 0.5)
print "RMSE for validation is:"
print RMSE/k

# run SVM for extra features
svc = SVC(kernel="linear", C=1)
sel = RFE(estimator=svc, n_features_to_select=20, step=0.5, verbose=5)
sel.fit(training, c7)
training = training[:][sel.get_support(True)]
# after SVM
print "afteeeeeeeeeeeeeeeeeeeeeeer SVM:"
modelLR = linear_model.LinearRegression()
modelL = linear_model.Lasso(normalize=True)
modelGBR = ensemble.GradientBoostingRegressor(n_estimators=500, max_depth=4, min_samples_split=2,learning_rate=0.01, loss='ls')
modelR = linear_model.Ridge(alpha = .5)
RMSELR = 0
RMSEL = 0
RMSEGBR = 0
RMSER = 0
for i in range(10):
    x = [slices[(i+1) % 9], slices[(i+2) % 9], slices[(i+3) % 9], slices[(i+4) % 9], slices[(i+5) % 9], slices[(i+6) % 9], slices[(i+7) % 9], slices[(i+8) % 9]]
    x = pd.concat(x)
    y = [slicesOfc7[(i+1) % 9], slicesOfc7[(i+2) % 9], slicesOfc7[(i+3) % 9], slicesOfc7[(i+4) % 9], slicesOfc7[(i+5) % 9], slicesOfc7[(i+6) % 9], slicesOfc7[(i+7) % 9], slicesOfc7[(i+8) % 9]]
    y = pd.concat(y)