Ejemplo n.º 1
0
                         ('std_scalar', StandardScaler())])

num_pipeline = Pipeline([('std_scalar', StandardScaler())])
housing_num_tr = num_pipeline.fit_transform(housing_tr)
'''
#%%
'''
housing['total_bedrooms'] = housing_tr['total_bedrooms']
housing['rooms_per_household'] = housing_tr['rooms_per_household']
housing['population_per_household'] = housing_tr['population_per_household']
housing['bedrooms_per_room'] = housing_tr['bedrooms_per_room']
'''
num_attribs = list(housing)
num_attribs.remove('ocean_proximity')
cat_attribs = ['ocean_proximity']
full_pipeline = ColumnTransformer([('num', num_pipeline, num_attribs),
                                   ('cat', OneHotEncoder(), cat_attribs)])
housing_prepared = full_pipeline.fit_transform(housing)

#%% Select and train a model
lin_reg = LinearRegression()
lin_reg.fit(housing_prepared, housing_labels)

#%%
housing_predictions = lin_reg.predict(housing_prepared)
lin_mse = mean_squared_error(housing_labels, housing_predictions)
lin_rmse = np.sqrt(lin_mse)

#%%
lin_mae = mean_absolute_error(housing_labels, housing_predictions)

#%%
Ejemplo n.º 2
0
housing_num_tr

# **Warning**: earlier versions of the book applied different transformations to different columns using a solution based on a `DataFrameSelector` transformer and a `FeatureUnion` (see below). It is now preferable to use the `ColumnTransformer` class that will be introduced in Scikit-Learn 0.20. For now we import it from `future_encoders.py`, but when Scikit-Learn 0.20 is released, you can import it from `sklearn.compose` instead:

# In[71]:

from future_encoders import ColumnTransformer

# In[72]:

num_attribs = list(housing_num)
cat_attribs = ["ocean_proximity"]

full_pipeline = ColumnTransformer([
    ("num", num_pipeline, num_attribs),
    ("cat", OneHotEncoder(), cat_attribs),
])

housing_prepared = full_pipeline.fit_transform(housing)

# In[73]:

housing_prepared

# In[74]:

housing_prepared.shape

# For reference, here is the old solution based on a `DataFrameSelector` transformer (to just select a subset of the Pandas `DataFrame` columns), and a `FeatureUnion`:

# In[75]:
Ejemplo n.º 3
0

if __name__ == "__main__":
    # Load the dataset
    df = load_data("Data/data.json")

    # Select features and target
    features = df.drop("score", axis=1)
    y = df["score"].copy()

    numeric_values = features.drop(["mood", "weather", "activity"], axis=1)  # returns a copy of the dataframe
    num_attribs = list(numeric_values)

    cat_attribs = ["mood", "weather", "activity"]

    num_pipeline = Pipeline([
        ('selector', DataFrameSelector(num_attribs)),  # Own transformation
        ('imputer', Imputer(strategy="median")),
        ('std_scaler', StandardScaler()),
    ])

    full_pipeline = ColumnTransformer([
        ("num_pipline", num_pipeline, num_attribs),
        ("cat_pipline", OneHotEncoder(), cat_attribs),
    ])

    X = full_pipeline.fit_transform(features)

    forest_reg = RandomForestRegressor()
    forest_reg.fit(X, y)