Exemple #1
0
def test_lm(fit_intercept):
    X, y = make_regression(n_samples=100, n_features=5, chunks=50)
    lr = LinearRegression(fit_intercept=fit_intercept)
    lr.fit(X, y)
    lr.predict(X)
    if fit_intercept:
        assert lr.intercept_ is not None
Exemple #2
0
)
X_traintrial = de.fit_transform(X_traintrial)


de = DummyEncoder(
    ["year", "hour", "is_holiday", "weekday", "is_workingday", "weathersit"]
)
X_testtrial = de.fit_transform(X_testtrial)
)

lr = LinearRegression()
lr.fit(X_traintrial.values, y_traintrial.values)

X_testtrial = X_testtrial.drop("season", axis=1)

y_predtrial = lr.predict(X_traintrial.values)

y_predtest = lr.predict(X_testtrial.values)

import dask.array as da

y_predtest.compute()

# random forest 

rf = RandomForestRegressor(n_estimators=1000, max_depth=10)

rf.fit(X_traintrial, y_traintrial)

rf_pred = rf.predict(X_testtrial)
    'Stay_In_Current_City_Years', 'Marital_Status'
]]
target = df['Purchase']

#creating dummies for the categorical variables
data = dd.get_dummies(categorical_variables.categorize()).compute()

#converting dataframe to array
datanew = data.values

#fit the model
from dask_ml.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(datanew, target)

#preparing the test data
test_categorical = test[[
    'Gender', 'Age', 'Occupation', 'City_Category',
    'Stay_In_Current_City_Years', 'Marital_Status'
]]
test_dummy = dd.get_dummies(test_categorical.categorize()).compute()
testnew = test_dummy.values

#predict on test and upload
pred = lr.predict(testnew)

#Clustering/K-Means
from dask_ml.cluster import KMeans
model = KMeans()
model.fit(datanew, target)
client

# #### Linear Regression

# In[44]:

from scikitplot.metrics import plot_calibration_curve
from scikitplot.plotters import plot_learning_curve
from scikitplot.estimators import plot_feature_importances

# In[45]:

lr = LinearRegression()
with joblib.parallel_backend('dask'):
    lr_model = lr.fit(X_train.values, y_train.values)
    y_pred_lr = lr.predict(X_test.values)

# In[46]:

mse(y_test.values, y_pred_lr)

# In[47]:

r2_score(y_test.values.compute(), y_pred_lr.compute())

# ### Non Linear Models

# #### Random Forest Regressor

# In[48]: