def test_f_regression_input_dtype():
    """
    Test whether f_regression returns the same value
    for any numeric data_type
    """

    X = np.random.rand(10, 20)
    y = np.arange(10).astype(int)

    F1, pv1 = f_regression(X, y)
    F2, pv2 = f_regression(X, y.astype(float))
    assert_array_equal(F1, F2)
    assert_array_equal(pv1, pv2)
def test_f_regression_input_dtype():
    """
    Test whether f_regression returns the same value
    for any numeric data_type
    """

    X = np.random.rand(10, 20)
    y = np.arange(10).astype(np.int)

    F1, pv1 = f_regression(X, y)
    F2, pv2 = f_regression(X, y.astype(np.float))
    assert_array_almost_equal(F1, F2, 5)
    assert_array_almost_equal(pv1, pv2, 5)
def test_f_regression():
    """
    Test whether the F test yields meaningful results
    on a simple simulated regression problem
    """
    X, Y = make_regression(n_samples=200, n_features=20, n_informative=5, shuffle=False, random_state=0)

    F, pv = f_regression(X, Y)
    assert (F > 0).all()
    assert (pv > 0).all()
    assert (pv < 1).all()
    assert (pv[:5] < 0.05).all()
    assert (pv[5:] > 1.0e-4).all()
Example #4
0
def test_f_regression():
    """
    Test whether the F test yields meaningful results
    on a simple simulated regression problem
    """
    X, Y = make_regression(n_samples=200, n_features=20,
                           n_informative=5, shuffle=False, random_state=0)

    F, pv = f_regression(X, Y)
    assert(F > 0).all()
    assert(pv > 0).all()
    assert(pv < 1).all()
    assert(pv[:5] < 0.05).all()
    assert(pv[5:] > 1.e-4).all()
Example #5
0
# Store data in a consistent place
DATA_DIR = '/users/ansonau/DS_HK_2/data/'

cars = read_csv(DATA_DIR + 'cars93.csv')

#Clean Data
#Get only Numeric Data from cars
car_x = cars._get_numeric_data()
car_x = car_x.fillna(car_x.mean())
#Drop y Values
car_x = cars.drop(['MPG.highway', 'MPG.city'], 1)
#car_x = cars[['EngineSize','Horsepower','RPM','Rev.per.mile','Fuel.tank.capacity','Passengers','Length','Wheelbase','Width','Turn.circle','Weight']]
car_y = cars['MPG.highway']

f, p = f_regression(car_x, car_y.values)
print[[x, y] for x, y in zip(f, p)]

# Regression on Weight

#Polynomial Regression
cars['Weight_squared'] = cars['Weight']**2
cars['Weight_cubed'] = cars['Weight']**3
cars['Weigh_fourth'] = cars['Weight']**4
MPG = cars['MPG.city']
Weight_poly = [[
    w, x, y, z
] for w, x, y, z in zip(cars['Weight'].values, cars['Weight_squared'].values,
                        cars['Weight_cubed'], cars['Weigh_fourth'])]

ridge = linear_model.Ridge()