def test_save_model_joblib(): rf = RandomForestClassifier() save_model(model=rf, name='tests/randomforest', method='jb') expected = 'randomforest.jbl' output = os.listdir('tests/') assert expected in output os.remove('tests/randomforest.jbl')
def test_get_model(): rf = RandomForestClassifier() save_model(rf, name='tests/sampletest/outputs/models/rf_model', method='jb') output = get_model(path='tests/sampletest/outputs/models/rf_model.jbl') assert hasattr(rf, 'fit')
def test_get_output(): temp = pd.DataFrame([1, 2, 3, 4, 5, 6]) save_model(temp, name='tests/sampletest/outputs/submit', method='jb') output = get_output(path='tests/sampletest/outputs/submit.jbl') assert hasattr(temp, 'sample')
# + import datasist.project as dp import numpy as np from sklearn.metrics import mean_squared_error from sklearn.ensemble import RandomForestRegressor from sklearn.model_selection import cross_val_score #retrieve data from the processed folder data = dp.get_data("train_proc.csv", method='csv') label = dp.get_data("train_labels.csv", method='csv') #base model with random forest rf = RandomForestRegressor(n_estimators=10, random_state=2) score = cross_val_score(estimator=rf, X=data, y=label.Rating, cv=5, scoring="neg_mean_squared_error", n_jobs=-1) score = -1 * np.mean(score) print("RMSE is {}".format(score)) #save the model dp.save_model(rf, name='rf_model_n10') # save the result result = {"rmse_rf_model_n10": score} dp.save_outputs(result, name='rmse_rf_model_n10') # -