GaussianProcessRegressor(normalize_y=True), ARDRegression(), # HuberRegressor(), # epsilon: greater than 1.0, default 1.35 LinearRegression(n_jobs=5), PassiveAggressiveRegressor( random_state=randomstate), # C: 0.25, 0.5, 1, 5, 10 SGDRegressor(random_state=randomstate), TheilSenRegressor(n_jobs=5, random_state=randomstate), RANSACRegressor(random_state=randomstate), KNeighborsRegressor( weights='distance'), # n_neighbors: 3, 6, 9, 12, 15, 20 RadiusNeighborsRegressor(weights='distance'), # radius: 1, 2, 5, 10, 15 MLPRegressor(max_iter=10000000, random_state=randomstate), DecisionTreeRegressor( random_state=randomstate), # max_depth = 2, 3, 4, 6, 8 ExtraTreeRegressor(random_state=randomstate), # max_depth = 2, 3, 4, 6, 8 SVR() # C: 0.25, 0.5, 1, 5, 10 ] selectors = [ reliefF.reliefF, fisher_score.fisher_score, # chi_square.chi_square, JMI.jmi, CIFE.cife, DISR.disr, MIM.mim, CMIM.cmim, ICAP.icap, MRMR.mrmr, MIFS.mifs
def decode(cls,obj): from sklearn.tree.tree import ExtraTreeRegressor state = obj['state'] t = ExtraTreeRegressor.__new__(ExtraTreeRegressor) t.__setstate__(state) return t
'BernoulliNB':BernoulliNB(), 'BernoulliRBM':BernoulliRBM(), 'Binarizer':Binarizer(), 'Birch':Birch(), 'CCA':CCA(), 'CalibratedClassifierCV':CalibratedClassifierCV(), 'DBSCAN':DBSCAN(), 'DPGMM':DPGMM(), 'DecisionTreeClassifier':DecisionTreeClassifier(), 'DecisionTreeRegressor':DecisionTreeRegressor(), 'DictionaryLearning':DictionaryLearning(), 'ElasticNet':ElasticNet(), 'ElasticNetCV':ElasticNetCV(), 'EmpiricalCovariance':EmpiricalCovariance(), 'ExtraTreeClassifier':ExtraTreeClassifier(), 'ExtraTreeRegressor':ExtraTreeRegressor(), 'ExtraTreesClassifier':ExtraTreesClassifier(), 'ExtraTreesRegressor':ExtraTreesRegressor(), 'FactorAnalysis':FactorAnalysis(), 'FastICA':FastICA(), 'FeatureAgglomeration':FeatureAgglomeration(), 'FunctionTransformer':FunctionTransformer(), 'GMM':GMM(), 'GaussianMixture':GaussianMixture(), 'GaussianNB':GaussianNB(), 'GaussianProcess':GaussianProcess(), 'GaussianProcessClassifier':GaussianProcessClassifier(), 'GaussianProcessRegressor':GaussianProcessRegressor(), 'GaussianRandomProjection':GaussianRandomProjection(), 'GenericUnivariateSelect':GenericUnivariateSelect(), 'GradientBoostingClassifier':GradientBoostingClassifier(),
def moudle_select(X, test_A, y, moudelselect, threshold=False, Rate=False): ''' Function :model X : train data test_A : predict data y : result label predict_A : predict data moudelselect : waht' model do you select? threshold:False Rate:False modelselect : 1,XGBRegressor 2,ensemble.RandomForestRegressor 3,linear_model.Lasso 4,LinearRegression 5,linear_model.BayesianRidge 6,DecisionTreeRegressor 7,ensemble.RandomForestRegressor 8,ensemble.GradientBoostingRegressor 9,ensemble.AdaBoostRegressor 10,BaggingRegressor 11,ExtraTreeRegressor 12,SVR 13,MLPRegressor other:MLPRegressor ''' mse = [] sum_mse = 0.0 predict_A = pd.DataFrame(np.zeros((100, 10))) for index in range(5): X_train, X_test, y_train, y_test = train_test_split(X, y) if (moudelselect == 1): model = xgb.XGBRegressor( model=xgb.XGBRegressor(max_depth=17, min_child_weigh=5, eta=0.025, gamma=0.06, subsample=1, learning_rate=0.1, n_estimators=100, silent=0, n_jobs=-1, objective='reg:linear')) elif (moudelselect == 2): model = ensemble.RandomForestRegressor( n_estimators=25, criterion='mse', max_depth=14, min_samples_split=0.1, min_samples_leaf=2, min_weight_fraction_leaf=0.0, max_features=0.95, max_leaf_nodes=None, min_impurity_split=1e-07, bootstrap=True, oob_score=False, n_jobs=-1, random_state=None, verbose=0, warm_start=False) elif (moudelselect == 3): model = linear_model.Lasso(alpha=0.1, max_iter=1000, normalize=False) elif (moudelselect == 4): model = LinearRegression(fit_intercept=False, n_jobs=1, normalize=False) elif (moudelselect == 5): model = linear_model.BayesianRidge(alpha_1=1e-06, alpha_2=1e-06, compute_score=False, copy_X=True, fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06, n_iter=500, normalize=False, tol=10, verbose=False) elif (moudelselect == 6): model = DecisionTreeRegressor(criterion='mse', splitter='best', max_depth=3, min_samples_split=0.1, min_samples_leaf=0.1, min_weight_fraction_leaf=0.1, max_features=None, random_state=None, max_leaf_nodes=None, presort=False) elif (moudelselect == 7): model = ensemble.RandomForestRegressor( n_estimators=1000, criterion='mse', max_depth=14, min_samples_split=0.1, min_samples_leaf=2, min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None, min_impurity_split=1e-07, bootstrap=True, oob_score=False, n_jobs=-1, random_state=None, verbose=0, warm_start=False) elif (moudelselect == 8): model = ensemble.GradientBoostingRegressor(n_estimators=800, learning_rate=0.1, max_depth=4, random_state=0, loss='ls') elif (moudelselect == 9): model = ensemble.AdaBoostRegressor(base_estimator=None, n_estimators=120, learning_rate=1, loss='linear', random_state=None) elif (moudelselect == 10): model = BaggingRegressor(base_estimator=None, n_estimators=500, max_samples=1.0, max_features=1.0, bootstrap=True) elif (moudelselect == 11): model = ExtraTreeRegressor(criterion='mse', splitter='random', max_depth=3, min_samples_split=0.1, min_samples_leaf=1, min_weight_fraction_leaf=0.01, max_features='auto', random_state=None, max_leaf_nodes=None, min_impurity_split=1e-07) elif (moudelselect == 12): model = SVR(kernel='rbf', degree=3, gamma='auto', coef0=0.1, tol=0.001, C=1, epsilon=0.1, shrinking=True, cache_size=200, verbose=False, max_iter=-1) elif (moudelselect == 13): model = MLPRegressor(hidden_layer_sizes=(100, ), activation='relu', solver='adam', alpha=0.0001, batch_size='auto', learning_rate='constant', learning_rate_init=0.001, power_t=0.5, max_iter=200, shuffle=True, random_state=None, tol=0.0001, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08) else: model = MLPRegressor(activation='relu', alpha=0.001, solver='lbfgs', max_iter=90, hidden_layer_sizes=(11, 11, 11), random_state=1) model.fit(X_train, y_train) y_pred = model.predict(X_test) print("index: ", index, mean_squared_error(y_test, y_pred)) sum_mse += mean_squared_error(y_test, y_pred) # # if (threshold == False): y_predict = model.predict(test_A) predict_A.ix[:, index] = y_predict mse.append(mean_squared_error(y_test, y_pred)) else: if (mean_squared_error(y_test, y_pred) <= 0.03000): y_predict = model.predict(test_A) predict_A.ix[:, index] = y_predict mse.append(mean_squared_error(y_test, y_pred)) # if(Rate==False): # mse_rate = mse / np.sum(mse) # #predict_A = predict_A.ix[:,~(data==0).all()] # for index in range(len(mse_rate)): # y+=predict_A.ix[:,index]*mse_rate[index] # y = 0.0 mse = mse / np.sum(mse) mse = pd.Series(mse) mse_rate_asc = mse.sort_values(ascending=False) mse_rate_asc = mse_rate_asc.reset_index(drop=True) mse_rate_desc = mse.sort_values(ascending=True) indexs = list(mse_rate_desc.index) for index in range(len(mse)): y += mse_rate_asc.ix[index] * predict_A.ix[:, indexs[index]] print("y_predict_mean: ", y.mean()) print("y_predict_var: ", y.var()) y = pd.DataFrame(y) y.to_excel("H:/java/python/src/machinelearning/test/predict.xlsx", index=False) predict_A.to_excel( "H:/java/python/src/machinelearning/test/predict_testA.xlsx", index=False) print("Averge mse:", sum_mse / len(mse))