def test_lm(fit_intercept): X, y = make_regression(n_samples=100, n_features=5, chunks=50) lr = LinearRegression(fit_intercept=fit_intercept) lr.fit(X, y) lr.predict(X) if fit_intercept: assert lr.intercept_ is not None
) X_traintrial = de.fit_transform(X_traintrial) de = DummyEncoder( ["year", "hour", "is_holiday", "weekday", "is_workingday", "weathersit"] ) X_testtrial = de.fit_transform(X_testtrial) ) lr = LinearRegression() lr.fit(X_traintrial.values, y_traintrial.values) X_testtrial = X_testtrial.drop("season", axis=1) y_predtrial = lr.predict(X_traintrial.values) y_predtest = lr.predict(X_testtrial.values) import dask.array as da y_predtest.compute() # random forest rf = RandomForestRegressor(n_estimators=1000, max_depth=10) rf.fit(X_traintrial, y_traintrial) rf_pred = rf.predict(X_testtrial)
'Stay_In_Current_City_Years', 'Marital_Status' ]] target = df['Purchase'] #creating dummies for the categorical variables data = dd.get_dummies(categorical_variables.categorize()).compute() #converting dataframe to array datanew = data.values #fit the model from dask_ml.linear_model import LinearRegression lr = LinearRegression() lr.fit(datanew, target) #preparing the test data test_categorical = test[[ 'Gender', 'Age', 'Occupation', 'City_Category', 'Stay_In_Current_City_Years', 'Marital_Status' ]] test_dummy = dd.get_dummies(test_categorical.categorize()).compute() testnew = test_dummy.values #predict on test and upload pred = lr.predict(testnew) #Clustering/K-Means from dask_ml.cluster import KMeans model = KMeans() model.fit(datanew, target)
client # #### Linear Regression # In[44]: from scikitplot.metrics import plot_calibration_curve from scikitplot.plotters import plot_learning_curve from scikitplot.estimators import plot_feature_importances # In[45]: lr = LinearRegression() with joblib.parallel_backend('dask'): lr_model = lr.fit(X_train.values, y_train.values) y_pred_lr = lr.predict(X_test.values) # In[46]: mse(y_test.values, y_pred_lr) # In[47]: r2_score(y_test.values.compute(), y_pred_lr.compute()) # ### Non Linear Models # #### Random Forest Regressor # In[48]: