def train(): if request.method == 'POST' and 'inputFiles' in request.files: file = request.files['inputFiles'] filename = secure_filename(file.filename) data_reload = FileContents(name=filename) db.create_all() db.session.add(data_reload) db.session.commit() data_reloaded = FileContents.query.all() # os.path.join is used so that paths work in every operating system file.save(os.path.join(folder, filename)) # Data review new_data = pd.read_csv(os.path.join(folder, str(data_reloaded[-1]))) new_dataplot = new_data.head(10) new_data_info = new_data.info new_data_size = new_data.size new_data_shape = new_data.shape dropdown_list = list(new_data.columns) flash('Just a moment, app is thinking!') if str(data_reloaded[-1]).split('.')[-1] != 'csv': # Forbidden, No Access abort(403) return render_template('train.html', new_dataplot=new_dataplot, data_reload=data_reload, new_data_info=new_data.info, new_data_size=new_data_size, new_data_shape=new_data_shape, dropdown_list=dropdown_list)
def plot_2(): if request.method == 'POST' and 'inputFiles' in request.files: file = request.files['inputFiles'] filename = secure_filename(file.filename) data_reload = FileContents(name=filename) db.create_all() db.session.add(data_reload) db.session.commit() data_reload = FileContents.query.all() file.save(os.path.join(folder, filename)) new_data = pd.read_csv(os.path.join(folder, str(data_reload[-1]))) dropdown_list = list(new_data.columns) return render_template('plot_2.html', dropdown_list=dropdown_list)
def fit(): # X and Y vars y_var_select = request.form.get('y_var') multiselect = request.form.getlist('x_vars') # Type of forecast pred_type_select = request.form.get('rd_pred_type') # commit the prediction type pred_type_selected = ModelType(pred_type_select) db.create_all() db.session.add(pred_type_selected) # commit the X and Y vars xy_selection = ListXY(y_var_select, multiselect) db.create_all() db.session.add(xy_selection) db.session.commit() # testing - DELETE AFTEWARDS ListXY.query.all() # select vars data_reloaded = FileContents.query.all() new_data = pd.read_csv(os.path.join(folder, str(data_reloaded[-1]))) new_data = new_data.dropna() # deletes Na and NaN X = new_data[multiselect] Y = new_data[y_var_select] if request.form.get('rd_pred_type') == "Classification": # Step 1: Refactor columns with text to integer and remove NAs X = factorise_data(X) # prepare models seed = 7 models = [] models.append(('RandomForestClassifier', RandomForestClassifier())) models.append( ('GradientBoostingClassifier', GradientBoostingClassifier())) models.append(('LogisticRegression', LogisticRegression())) models.append( ('LinearDiscriminantAnalysis', LinearDiscriminantAnalysis())) models.append(('KNeighborsClassifier', KNeighborsClassifier())) models.append(('GaussianNB', GaussianNB())) models.append(('SVC', SVC())) # evaluate each model in turn results = [] names = [] allmodels = [] scoring = 'accuracy' for name, model in models: kfold = model_selection.KFold(n_splits=10, random_state=seed) cv_results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring) results.append(cv_results) names.append(name) msg = "%s - %f | %f" % (name, cv_results.mean(), cv_results.std()) allmodels.append(msg) model_results = results model_names = names if request.form.get('rd_pred_type') == "Regression": # Step 1: Refactor columns with text to integer and remove NAs X = factorise_data(X) # prepare models models = [] models.append( ('RandomForestRegressor', RandomForestRegressor(n_estimators=200))) models.append(('GradientBoostingRegressor', GradientBoostingRegressor(n_estimators=200))) models.append(('Ridge', Ridge())) models.append(('ElasticNet', ElasticNet())) models.append(('Lasso', Lasso())) models.append(('SVR', SVR())) # evaluate each model in turn results = [] names = [] allmodels = [] for name, model in models: X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=7) # standard scaler #standardises the feature variables sc = StandardScaler() X_train = sc.fit_transform(X_train) X_test = sc.transform(X_test) model_to_fit = model model_to_fit.fit(X_train, y_train) predictions = model_to_fit.predict(X_test) mse = mean_squared_error(y_test, predictions) results.append(mse) names.append(name) msg = "%s - %.2f | %s" % (name, (np.sqrt(mse)), "-") allmodels.append(msg) model_results = results model_names = names return render_template('fit.html', y_var_select=y_var_select, pred_type_select=pred_type_select, multiselect=multiselect, model_results=allmodels, model_names=names)