コード例 #1
0
    GaussianProcessRegressor(normalize_y=True),
    ARDRegression(),
    # HuberRegressor(),   # epsilon:  greater than 1.0, default 1.35
    LinearRegression(n_jobs=5),
    PassiveAggressiveRegressor(
        random_state=randomstate),  # C: 0.25, 0.5, 1, 5, 10
    SGDRegressor(random_state=randomstate),
    TheilSenRegressor(n_jobs=5, random_state=randomstate),
    RANSACRegressor(random_state=randomstate),
    KNeighborsRegressor(
        weights='distance'),  # n_neighbors: 3, 6, 9, 12, 15, 20
    RadiusNeighborsRegressor(weights='distance'),  # radius: 1, 2, 5, 10, 15
    MLPRegressor(max_iter=10000000, random_state=randomstate),
    DecisionTreeRegressor(
        random_state=randomstate),  # max_depth = 2, 3, 4, 6, 8
    ExtraTreeRegressor(random_state=randomstate),  # max_depth = 2, 3, 4, 6, 8
    SVR()  # C: 0.25, 0.5, 1, 5, 10
]

selectors = [
    reliefF.reliefF,
    fisher_score.fisher_score,
    # chi_square.chi_square,
    JMI.jmi,
    CIFE.cife,
    DISR.disr,
    MIM.mim,
    CMIM.cmim,
    ICAP.icap,
    MRMR.mrmr,
    MIFS.mifs
コード例 #2
0
 def decode(cls,obj):
     from sklearn.tree.tree import ExtraTreeRegressor
     state = obj['state']
     t = ExtraTreeRegressor.__new__(ExtraTreeRegressor)
     t.__setstate__(state)
     return t
コード例 #3
0
			'BernoulliNB':BernoulliNB(),
			'BernoulliRBM':BernoulliRBM(),
			'Binarizer':Binarizer(),
			'Birch':Birch(),
			'CCA':CCA(),
			'CalibratedClassifierCV':CalibratedClassifierCV(),
			'DBSCAN':DBSCAN(),
			'DPGMM':DPGMM(),
			'DecisionTreeClassifier':DecisionTreeClassifier(),
			'DecisionTreeRegressor':DecisionTreeRegressor(),
			'DictionaryLearning':DictionaryLearning(),
			'ElasticNet':ElasticNet(),
			'ElasticNetCV':ElasticNetCV(),
			'EmpiricalCovariance':EmpiricalCovariance(),
			'ExtraTreeClassifier':ExtraTreeClassifier(),
			'ExtraTreeRegressor':ExtraTreeRegressor(),
			'ExtraTreesClassifier':ExtraTreesClassifier(),
			'ExtraTreesRegressor':ExtraTreesRegressor(),
			'FactorAnalysis':FactorAnalysis(),
			'FastICA':FastICA(),
			'FeatureAgglomeration':FeatureAgglomeration(),
			'FunctionTransformer':FunctionTransformer(),
			'GMM':GMM(),
			'GaussianMixture':GaussianMixture(),
			'GaussianNB':GaussianNB(),
			'GaussianProcess':GaussianProcess(),
			'GaussianProcessClassifier':GaussianProcessClassifier(),
			'GaussianProcessRegressor':GaussianProcessRegressor(),
			'GaussianRandomProjection':GaussianRandomProjection(),
			'GenericUnivariateSelect':GenericUnivariateSelect(),
			'GradientBoostingClassifier':GradientBoostingClassifier(),
コード例 #4
0
def moudle_select(X, test_A, y, moudelselect, threshold=False, Rate=False):
    '''
    Function :model
    X : train data 
    test_A : predict data
    y : result label
    predict_A : predict data
    moudelselect : waht' model do you select?
    threshold:False
    Rate:False
    
    
    modelselect :
    1,XGBRegressor
    2,ensemble.RandomForestRegressor
    3,linear_model.Lasso
    4,LinearRegression
    5,linear_model.BayesianRidge
    6,DecisionTreeRegressor
    7,ensemble.RandomForestRegressor
    8,ensemble.GradientBoostingRegressor
    9,ensemble.AdaBoostRegressor
    10,BaggingRegressor
    11,ExtraTreeRegressor
    12,SVR
    13,MLPRegressor
    other:MLPRegressor
    '''

    mse = []
    sum_mse = 0.0
    predict_A = pd.DataFrame(np.zeros((100, 10)))

    for index in range(5):
        X_train, X_test, y_train, y_test = train_test_split(X, y)

        if (moudelselect == 1):
            model = xgb.XGBRegressor(
                model=xgb.XGBRegressor(max_depth=17,
                                       min_child_weigh=5,
                                       eta=0.025,
                                       gamma=0.06,
                                       subsample=1,
                                       learning_rate=0.1,
                                       n_estimators=100,
                                       silent=0,
                                       n_jobs=-1,
                                       objective='reg:linear'))

        elif (moudelselect == 2):
            model = ensemble.RandomForestRegressor(
                n_estimators=25,
                criterion='mse',
                max_depth=14,
                min_samples_split=0.1,
                min_samples_leaf=2,
                min_weight_fraction_leaf=0.0,
                max_features=0.95,
                max_leaf_nodes=None,
                min_impurity_split=1e-07,
                bootstrap=True,
                oob_score=False,
                n_jobs=-1,
                random_state=None,
                verbose=0,
                warm_start=False)
        elif (moudelselect == 3):
            model = linear_model.Lasso(alpha=0.1,
                                       max_iter=1000,
                                       normalize=False)

        elif (moudelselect == 4):
            model = LinearRegression(fit_intercept=False,
                                     n_jobs=1,
                                     normalize=False)

        elif (moudelselect == 5):
            model = linear_model.BayesianRidge(alpha_1=1e-06,
                                               alpha_2=1e-06,
                                               compute_score=False,
                                               copy_X=True,
                                               fit_intercept=True,
                                               lambda_1=1e-06,
                                               lambda_2=1e-06,
                                               n_iter=500,
                                               normalize=False,
                                               tol=10,
                                               verbose=False)

        elif (moudelselect == 6):
            model = DecisionTreeRegressor(criterion='mse',
                                          splitter='best',
                                          max_depth=3,
                                          min_samples_split=0.1,
                                          min_samples_leaf=0.1,
                                          min_weight_fraction_leaf=0.1,
                                          max_features=None,
                                          random_state=None,
                                          max_leaf_nodes=None,
                                          presort=False)

        elif (moudelselect == 7):
            model = ensemble.RandomForestRegressor(
                n_estimators=1000,
                criterion='mse',
                max_depth=14,
                min_samples_split=0.1,
                min_samples_leaf=2,
                min_weight_fraction_leaf=0.0,
                max_features='auto',
                max_leaf_nodes=None,
                min_impurity_split=1e-07,
                bootstrap=True,
                oob_score=False,
                n_jobs=-1,
                random_state=None,
                verbose=0,
                warm_start=False)
        elif (moudelselect == 8):
            model = ensemble.GradientBoostingRegressor(n_estimators=800,
                                                       learning_rate=0.1,
                                                       max_depth=4,
                                                       random_state=0,
                                                       loss='ls')

        elif (moudelselect == 9):
            model = ensemble.AdaBoostRegressor(base_estimator=None,
                                               n_estimators=120,
                                               learning_rate=1,
                                               loss='linear',
                                               random_state=None)

        elif (moudelselect == 10):
            model = BaggingRegressor(base_estimator=None,
                                     n_estimators=500,
                                     max_samples=1.0,
                                     max_features=1.0,
                                     bootstrap=True)
        elif (moudelselect == 11):
            model = ExtraTreeRegressor(criterion='mse',
                                       splitter='random',
                                       max_depth=3,
                                       min_samples_split=0.1,
                                       min_samples_leaf=1,
                                       min_weight_fraction_leaf=0.01,
                                       max_features='auto',
                                       random_state=None,
                                       max_leaf_nodes=None,
                                       min_impurity_split=1e-07)

        elif (moudelselect == 12):
            model = SVR(kernel='rbf',
                        degree=3,
                        gamma='auto',
                        coef0=0.1,
                        tol=0.001,
                        C=1,
                        epsilon=0.1,
                        shrinking=True,
                        cache_size=200,
                        verbose=False,
                        max_iter=-1)

        elif (moudelselect == 13):
            model = MLPRegressor(hidden_layer_sizes=(100, ),
                                 activation='relu',
                                 solver='adam',
                                 alpha=0.0001,
                                 batch_size='auto',
                                 learning_rate='constant',
                                 learning_rate_init=0.001,
                                 power_t=0.5,
                                 max_iter=200,
                                 shuffle=True,
                                 random_state=None,
                                 tol=0.0001,
                                 verbose=False,
                                 warm_start=False,
                                 momentum=0.9,
                                 nesterovs_momentum=True,
                                 early_stopping=False,
                                 validation_fraction=0.1,
                                 beta_1=0.9,
                                 beta_2=0.999,
                                 epsilon=1e-08)
        else:
            model = MLPRegressor(activation='relu',
                                 alpha=0.001,
                                 solver='lbfgs',
                                 max_iter=90,
                                 hidden_layer_sizes=(11, 11, 11),
                                 random_state=1)

        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        print("index: ", index, mean_squared_error(y_test, y_pred))
        sum_mse += mean_squared_error(y_test, y_pred)
        #
        #
        if (threshold == False):
            y_predict = model.predict(test_A)
            predict_A.ix[:, index] = y_predict
            mse.append(mean_squared_error(y_test, y_pred))
        else:
            if (mean_squared_error(y_test, y_pred) <= 0.03000):
                y_predict = model.predict(test_A)
                predict_A.ix[:, index] = y_predict
                mse.append(mean_squared_error(y_test, y_pred))


#        if(Rate==False):
#            mse_rate = mse / np.sum(mse)
#            #predict_A = predict_A.ix[:,~(data==0).all()]
#            for index in range(len(mse_rate)):
#                y+=predict_A.ix[:,index]*mse_rate[index]
#
    y = 0.0
    mse = mse / np.sum(mse)
    mse = pd.Series(mse)
    mse_rate_asc = mse.sort_values(ascending=False)
    mse_rate_asc = mse_rate_asc.reset_index(drop=True)
    mse_rate_desc = mse.sort_values(ascending=True)
    indexs = list(mse_rate_desc.index)
    for index in range(len(mse)):
        y += mse_rate_asc.ix[index] * predict_A.ix[:, indexs[index]]

    print("y_predict_mean: ", y.mean())
    print("y_predict_var: ", y.var())
    y = pd.DataFrame(y)
    y.to_excel("H:/java/python/src/machinelearning/test/predict.xlsx",
               index=False)
    predict_A.to_excel(
        "H:/java/python/src/machinelearning/test/predict_testA.xlsx",
        index=False)
    print("Averge mse:", sum_mse / len(mse))