def linear(): params = get_params(request.args) X_train, X_test, y_train, y_test = regressionData(params[2], params[3]) start, end = params[0], params[1] model = linear_model.LinearRegression() model.fit(X_train, y_train) y_pred = model.predict(X_test) res = result(X_test, y_test, y_pred) res = res[start:end] return res.to_json(orient='index')
def ridge(): params = get_params(request.args) start, end = params[0], params[1] X_train, X_test, y_train, y_test = regressionData(params[2], params[3]) ridgereg = linear_model.Ridge(alpha=params[10], normalize=True) ridgereg.fit(X_train, y_train) y_pred = ridgereg.predict(X_test) res = result(X_test, y_test, y_pred) res = res[start:end] print("ridge") print(params) return res.to_json(orient='index')
def svm(): params = get_params(request.args) X_train, X_test, y_train, y_test = createData(params[2],params[3],params[4]) start, end = params[0], params[1] clf = SVC(kernel=params[6]) clf.fit(X_train, np.ravel(y_train)) y_pred = clf.predict(X_test) res = result(X_test, y_test, y_pred) res = res[start:end] print("svm") print(params) return res.to_json(orient='index')
def naive(): params = get_params(request.args) X_train, X_test, y_train, y_test = createData(params[2],params[3],params[4]) start, end = params[0], params[1] model = GaussianNB() model.fit(X_train,np.ravel(y_train)) y_pred = model.predict(X_test) res = result(X_test, y_test, y_pred) res = res[start:end] print("naive") print(params) return res.to_json(orient='index')
def logistic(): params = get_params(request.args) X_train, X_test, y_train, y_test = createData(params[2],params[3],params[4]) start, end = params[0], params[1] model = linear_model.LogisticRegression(random_state=0) model.fit(X_train, np.ravel(y_train)) y_pred = model.predict(X_test) res = result(X_test, y_test, y_pred) res = res[start:end] print("log") print(params) return res.to_json(orient='index')
def dtree(): params = get_params(request.args) X_train, X_test, y_train, y_test = createData(params[2],params[3],params[4]) start, end = params[0], params[1] clf = DecisionTreeClassifier(max_depth= params[7]) clf = clf.fit(X_train,np.ravel(y_train)) y_pred = clf.predict(X_test) res = result(X_test, y_test, y_pred) res = res[start:end] print("dtree") print(params) return res.to_json(orient='index')
def rtree(): params = get_params(request.args) X_train, X_test, y_train, y_test = createData(params[2],params[3],params[4]) start, end = params[0], params[1] clf=RandomForestClassifier(n_estimators=params[8]) clf.fit(X_train,np.ravel(y_train)) y_pred=clf.predict(X_test) res = result(X_test, y_test, y_pred) res = res[start:end] print("rtree") print(params) return res.to_json(orient='index')
def knear(): params = get_params(request.args) X_train, X_test, y_train, y_test = createData(params[2],params[3],params[4]) start, end = params[0], params[1] classifier = KNeighborsClassifier(n_neighbors=params[5]) classifier.fit(X_train, np.ravel(y_train)) y_pred = classifier.predict(X_test) res = result(X_test, y_test, y_pred) res = res[start:end] print("knear") print(params) return res.to_json(orient='index')
learning_rate=0.03, max_depth=5, min_child_weight=80, n_estimators=1100, reg_alpha=0.5, reg_lambda=0.7, subsample=0.9, silent=True, nthread=8, early_stopping_rounds=100) ''' model_xgb = xgb.XGBRegressor(colsample_bytree=0.4603, gamma=0.0468, learning_rate=0.05, max_depth=3, min_child_weight=1.7817, n_estimators=2200, reg_alpha=0.4640, reg_lambda=0.8571, subsample=0.5213, silent=True, nthread = 8)''' print('\n\nStart...') t0 = time.time() train_preds, test_preds, mses = np.zeros(train.shape[0]), np.zeros((test.shape[0], 5)), [] predictors = [f for f in test.columns if f not in ['血糖']] kf = KFold(n_splits=5, shuffle=True, random_state=520) for i, (train_index, test_index) in enumerate(kf.split(train)): print(' .{}/5.'.format(i + 1)) train_feat1, train_feat2 = train.iloc[train_index], train.iloc[test_index] gbm = model_xgb.fit(train_feat1[predictors], train_feat1['血糖']) predict = gbm.predict(train_feat2[predictors]) train_preds[test_index] += predict mses.append(.5 * mean_squared_error(np.expm1(train_feat2['血糖']), np.expm1(predict))) test_preds[:, i] = gbm.predict(test[predictors]) cv = mean_squared_error(np.expm1(train['血糖']), np.expm1(train_preds)) * 0.5 print('\nFinished.\n\nSeconds -> %s\n' % str(time.time() - t0)[:8]) function.result('XGBoost', mses, cv, test_preds)
import time import numpy as np from sklearn.model_selection import KFold from sklearn.metrics import mean_squared_error import function train, test = function.read_file(path='a') train["血糖"] = np.log1p(train["血糖"]) train, test = function.add_column(train, test) train, test = function.transform(train, test) print('\n\nStart...') t0, mses = time.time(), [] train_preds, test_preds = np.zeros(train.shape[0]), np.zeros((test.shape[0], 5)) predictors = [f for f in test.columns if f not in ['血糖']] kf = KFold(n_splits=5, shuffle=True, random_state=520) for i, (train_index, test_index) in enumerate(kf.split(train)): print(' .{}/5.'.format(i + 1)) train_feat1, train_feat2 = train.iloc[train_index], train.iloc[test_index] gbm = function.settings.model_lgb.fit(train_feat1[predictors], train_feat1['血糖'], categorical_feature=['性别', '体检日期']) predict = gbm.predict(train_feat2[predictors]) train_preds[test_index] += predict mses.append(.5 * mean_squared_error(np.expm1(train_feat2['血糖']), np.expm1(predict))) test_preds[:, i] = gbm.predict(test[predictors]) cv = .5 * mean_squared_error(np.expm1(train['血糖']), np.expm1(train_preds)) print('\nFinished.\n\nSeconds -> %s\n' % str(time.time() - t0)[:8]) function.result('LGBM', mses, cv, test_preds)
train, test = function.read_file() train["血糖"] = np.log1p(train["血糖"]) train, test = function.transform(train, test, fill_na=True) pred = test.copy() pred.drop('血糖', axis=1, inplace=True) KRR = KernelRidge(kernel='polynomial', coef0=0, alpha=0.1, degree=2.1) print('\n\nStart...') t0 = time.time() train_preds, test_preds, mses = np.zeros(train.shape[0]), np.zeros( (test.shape[0], 5)), [] predictors = [f for f in test.columns if f not in ['血糖']] kf = KFold(n_splits=5, shuffle=True, random_state=520) for i, (train_index, test_index) in enumerate(kf.split(train)): print(' .{}/5.'.format(i + 1)) train_feat1, train_feat2 = train.iloc[train_index], train.iloc[test_index] gbm = KRR.fit(train_feat1[predictors], train_feat1['血糖']) predict = gbm.predict(train_feat2[predictors]) train_preds[test_index] += predict mses.append( .5 * mean_squared_error(np.expm1(train_feat2['血糖']), np.expm1(predict))) test_preds[:, i] = gbm.predict(test[predictors]) cv = mean_squared_error(np.expm1(train['血糖']), np.expm1(train_preds)) * 0.5 print('\nFinished.\n\nSeconds -> %s\n' % str(time.time() - t0)[:8]) function.result('KRR', mses, cv, test_preds)
import time import numpy as np from sklearn.model_selection import KFold from sklearn.metrics import mean_squared_error import function train, test = function.read_file() train["血糖"] = np.log1p(train["血糖"]) train, test = function.transform(train, test) print('\n\nStart...') t0 = time.time() train_preds, test_preds, mses = np.zeros(train.shape[0]), np.zeros((test.shape[0], 5)), [] predictors = [f for f in test.columns if f not in ['血糖']] kf = KFold(n_splits=5, shuffle=True, random_state=520) for i, (train_index, test_index) in enumerate(kf.split(train)): print(' .{}/5.'.format(i + 1)) train_feat1, train_feat2 = train.iloc[train_index], train.iloc[test_index] [train_feat1, train_feat2] = function.fill_data([train_feat1, train_feat2]) gbm = function.settings.model_xgb.fit(train_feat1[predictors], train_feat1['血糖']) predict = gbm.predict(train_feat2[predictors]) train_preds[test_index] += predict mses.append(.5 * mean_squared_error(np.expm1(train_feat2['血糖']), np.expm1(predict))) test_preds[:, i] = gbm.predict(test[predictors]) cv = mean_squared_error(np.expm1(train['血糖']), np.expm1(train_preds)) * 0.5 print('\nFinished.\n\nSeconds -> %s\n' % str(time.time() - t0)[:8]) function.result('XGB_f', mses, cv, test_preds)
]) return self.meta_model_.predict(meta_features) stacked_averaged_models = StackingAveragedModels(base_models=(ENet, GBoost, KRR), meta_model=lasso) print('\n\nStart...') t0 = time.time() train_preds, test_preds, mses = np.zeros(train.shape[0]), np.zeros( (test.shape[0], 5)), [] predictors = [f for f in test.columns if f not in ['血糖']] kf = KFold(len(train), n_folds=5, shuffle=True, random_state=520) for i, (train_index, test_index) in enumerate(kf): print(' .{}/5.'.format(i + 1)) train_feat1, train_feat2 = train.iloc[train_index], train.iloc[test_index] gbm = stacked_averaged_models.fit(train_feat1[predictors].values, train_feat1['血糖'].values) predict = gbm.predict(train_feat2[predictors]) train_preds[test_index] += predict mses.append( .5 * mean_squared_error(np.expm1(train_feat2['血糖']), np.expm1(predict))) test_preds[:, i] = gbm.predict(test[predictors]) cv = mean_squared_error(np.expm1(train['血糖']), np.expm1(train_preds)) * 0.5 print('\nFinished.\n\nSeconds -> %s\n' % str(time.time() - t0)[:8]) function.result('Stack', mses, cv, test_preds)
import function as f f.febre() f.mancha() f.dor_musc() f.dor_art() f.intens_art() f.edema_articulacao() f.conjuntivite() f.dor_cabeca() f.coceira() f.hiper_gang() f.disc_hemorr() f.neuro() f.result()
predictors = [f for f in test.columns if f not in ['血糖']] kf = KFold(n_splits=5, shuffle=True, random_state=520) # Instantiate a RandomRegressor object MAXDEPTH = 40 regr = RandomForestRegressor(n_estimators=1000, # No of trees in forest criterion="mse", # Can also be mae max_features="sqrt", # no of features to consider for the best split max_depth=MAXDEPTH, # maximum depth of the tree min_samples_split=2, # minimum number of samples required to split an internal node min_impurity_decrease=0, # Split node if impurity decreases greater than this value. oob_score=True, # whether to use out-of-bag samples to estimate error on unseen data. n_jobs=-1, # No of jobs to run in parallel random_state=0, verbose=0 # Controls verbosity of process ) for i, (train_index, test_index) in enumerate(kf.split(train)): print(' .{}/5.'.format(i + 1)) train_feat1, train_feat2 = train.iloc[train_index], train.iloc[test_index] regr.fit(train_feat1[predictors], train_feat1['血糖']) predict = predictions = regr.predict(train_feat2[predictors]) train_preds[test_index] += predict mses.append(.5 * mean_squared_error(np.expm1(train_feat2['血糖']), np.expm1(predict))) test_preds[:, i] = regr.predict(test[predictors]) cv = .5 * mean_squared_error(np.expm1(train['血糖']), np.expm1(train_preds)) print('\nFinished.\n\nSeconds -> %s\n' % str(time.time() - t0)[:8]) function.result('R.F.', mses, cv, test_preds)