Example #1
0
def getKernel(s, i, j, **kwargs):

    if s == 'custom':
        # Format: kernel_params = {'gamma' : j}
        return kr.KernelRidge(kernel=customKernelOption,
                              alpha=i,
                              kernel_params=kwargs['kernal_params'])
    else:
        return kr.KernelRidge(kernel=s, alpha=i, gamma=j)
Example #2
0
def test_params(X, Y):
	test_param(linear_model.LinearRegression(), '', X, Y)

	for gamma in [0.05, 0.1, 0.2]:
		for alpha in [0.1, 0.01, 1e-3]:
			test_param(kernel_ridge.KernelRidge(alpha=alpha, kernel='rbf', gamma=gamma), 'rbf g=' + str(gamma) + ", a=" + str(alpha), X, Y)

	for degree in [2, 3, 4, 5]:
		test_param(kernel_ridge.KernelRidge(kernel='polynomial', degree=degree), degree, X, Y)

	test_param(tree.DecisionTreeRegressor(), '', X, Y)
Example #3
0
 def Unfair_Prediction(self, kernel, lmd, gamma, avails, use_S=False):
     if use_S:
         X = np.c_[self.trainX1, self.trainS]
     else:
         X = self.trainX1
     if not kernel:  #linear
         lr = linear_model.Ridge(alpha=lmd, fit_intercept=True)
         #lr = linear_model.LinearRegression(fit_intercept=True)
     else:
         lr = kernel_ridge.KernelRidge(alpha=lmd, kernel="rbf", gamma=gamma)
     lr.fit(X, self.trainY)
     validS, validX1, validX2, validY = self.getValidationData()
     testS, testX1, testX2, testY = self.getPredictData()
     if use_S:
         predictX_train = np.c_[self.trainX1, self.trainS]
         predictX_valid = np.c_[validX1, validS]
         predictX_test = np.c_[testX1, testS]
     else:
         predictX_train = self.trainX1
         predictX_valid = validX1
         predictX_test = testX1
     yhat_train = lr.predict(predictX_train).flatten()
     yhat_valid = lr.predict(predictX_valid).flatten()
     yhat_test = lr.predict(predictX_test).flatten()
     #print ("genvar=",np.mean([(testY[i])**2 for i in range(len(testY))])**0.5)
     #print ("unfair genavg=",np.mean([(testY[i]-yhat_test[i])**2 for i in range(len(testY))])**0.5)
     return Result(self.trainY, yhat_train, self.trainS,
                   avails), Result(validY, yhat_valid, validS,
                                   avails), Result(testY, yhat_test, testS,
                                                   avails)
Example #4
0
def _get_base_ml_model(method):
    regressor = None
    if method == 'lr':
        regressor = linear_model.LinearRegression()
    if method == 'huber':
        regressor = linear_model.HuberRegressor(max_iter=50)
        regressor = multioutput.MultiOutputRegressor(regressor)
    if method == 'svr':
        regressor = svm.LinearSVR()
        regressor = multioutput.MultiOutputRegressor(regressor)
    if method == 'kr':
        regressor = kernel_ridge.KernelRidge(kernel='rbf')
    if method == 'rf':
        regressor = ensemble.RandomForestRegressor(n_estimators=50, n_jobs=8)
    if method == 'gbm':
        regressor = lgb.LGBMRegressor(max_depth=20,
                                      num_leaves=1000,
                                      n_estimators=100,
                                      min_child_samples=5,
                                      random_state=42)
        regressor = multioutput.MultiOutputRegressor(regressor)
    if method == 'nn':
        regressor = neural_network.MLPRegressor(hidden_layer_sizes=(25, 25),
                                                early_stopping=True,
                                                max_iter=1000000,
                                                alpha=0.01)

    return regressor
def optimize_KernelRidge(X_train, y_train):
    opt = modelSel.GridSearchCV(skKR.KernelRidge(cache_size=500),
                                param_kernelRidge,
                                cv=5,
                                scoring=scoreFunction)
    opt.fit(X_train, y_train)
    return formatOptimal(opt.best_params_)
Example #6
0
def GLS(X,y,model = "ols",**kwargs):
    """
    model = "ols","ridge","lasso","lar"
    """
    if model == "ols":
        md1 = linear_model.LinearRegression(fit_intercept=True).fit(X,y)
        md0 = linear_model.LinearRegression(fit_intercept=False).fit(X,y)
    if model == "ridge":
        alpha = get_alpha(kwargs,default=10**0.5)
        md1 = linear_model.Ridge(alpha=alpha,fit_intercept=True).fit(X,y)
        md0 = linear_model.Ridge(alpha=alpha, fit_intercept=False).fit(X, y)
    if model == 'lasso':
        alpha = get_alpha(kwargs, default=0.1)
        md1 = linear_model.Lasso(alpha=alpha,fit_intercept=True).fit(X,y)
        md0 = linear_model.Lasso(alpha=alpha, fit_intercept=False).fit(X, y)
    if model == 'lar':
        """
        TO DO
        """
        md1 = linear_model.Lars(fit_intercept=True).fit(X,y)
        md0 = linear_model.Lars(fit_intercept=False).fit(X,y)
    if model == 'kernel':
        alpha, kernel, gamma, degree, coef0 = get_kernel_coef(kwargs["alpha"])
        md1 = kernel_ridge.KernelRidge(alpha=alpha,kernel=kernel,gamma=gamma,degree=degree,coef0=coef0).fit(X,y)
        md0 = md1
    if model == 'xgb':
        md1 = xgb.XGBRegressor().fit(X,y)
        md0 = md1
    return {"1":md1,
            "-1":md0,
            "type":'GLS'}
Example #7
0
def __regression(x, y, regressor_return=False):

    # TheilSen Regression
    ts_y = y.ravel()
    ts = sk_lm.TheilSenRegressor()
    ts.fit(x, ts_y)

    # r squared
    ts_y_pred = ts.predict(x)
    ts_y_mean = np.mean(ts_y)
    ts_ssr = np.sum((ts_y_pred - ts_y_mean)**2)
    ts_sst = np.sum((ts_y - ts_y_mean)**2)
    #ts_score = np.absolute(ts.score(index_norm, ts_y))
    ts_score = ts_ssr / ts_sst

    # Ridge Regression
    # Normalizating & reshaping
    ridge = sk_lm.Ridge(alpha=0.01, normalize=False)
    ridge.fit(x, y)
    ridge_score = np.absolute(ridge.score(x, y))

    # Kernel Ridge Regression
    kernel_ridge = sk_kr.KernelRidge(kernel='rbf', alpha=0.01)
    kernel_ridge.fit(x, y)
    kr_score = np.absolute(kernel_ridge.score(x, y))

    if regressor_return:
        return (ts, ridge, kernel_ridge)
    else:
        return (ts_score, ridge_score, kr_score)
Example #8
0
def kernel_ridge_reg_fit(X,
                         y,
                         data_name,
                         binning_threshold=0.75,
                         n_splits=5,
                         log=True):
    """
    Build Ridge regression model
    Input: X - regression_X, original
           y - regression_y, original
           data_name - the name of the data
           binning_threshold - threshold used for vertical log binning
           log - whether take the log of the variables for preprocessing
    """
    # Scale the data
    X_scaled, y_scaled, X_scaler, y_scaler = scale_data(X, y)
    # Prepare for Stratified K Fold
    # Split y
    bin_result_dict, bin_edge, _ = vertical_log_binning(
        binning_threshold, dict(zip(range(len(list(y))), list(y))))
    split_y = list(bin_result_dict.values())
    # Cross Validation experiment
    cv = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=0)
    predictions = []
    for train, test in cv.split(X_scaled, split_y):
        y_test = y[test].flatten()
        ridge_reg = kernel_ridge.KernelRidge(alpha=10**(-5),
                                             gamma=5 * 10**(-5),
                                             kernel="rbf")
        ridge_reg.fit(X_scaled[train], y_scaled[train])
        predict_this_fold = list(ridge_reg.predict(X_scaled[test]).flatten())
        predict_this_fold_rescale = 10**(
            y_scaler.inverse_transform(predict_this_fold))
        predictions.append((y_test, predict_this_fold_rescale))
    return predictions
Example #9
0
def optimize_KernelRidge(X_train, y_train):
    opt = modelSel.RandomizedSearchCV(estimator=skKR.KernelRidge(),
                                      param_distributions=param_kernelRidge,
                                      cv=5,
                                      scoring=scoreFunction)
    opt.fit(X_train, y_train)
    return formatOptimal(opt.best_params_)
Example #10
0
def setup_KernelRidge(learner_settings):
    alpha = 1
    kernel = 'linear'
    gamma = None
    degree = 3
    coef0 = 1
    kernel_params = None

    for additional_setting in learner_settings:
        # split identifier=value, so you can identify value and the variable
        setting_value_pair = additional_setting.split("=")
        if setting_value_pair[0] == "alpha":
            alpha = float(setting_value_pair[1])
        if setting_value_pair[0] == "kernel":
            kernel = setting_value_pair[1]
        if setting_value_pair[0] == "gamma":
            gamma = float(setting_value_pair[1])
        if setting_value_pair[0] == "degree":
            degree = int(setting_value_pair[1])
        if setting_value_pair[0] == "coef0":
            coef0 = int(setting_value_pair[1])
        if setting_value_pair[0] == "kernel_params":
            kernel_params = setting_value_pair[1]

    return skKR.KernelRidge(alpha=alpha,
                            kernel=kernel,
                            gamma=gamma,
                            degree=degree,
                            coef0=coef0,
                            kernel_params=kernel_params)
Example #11
0
def get_all_regrs():
    regrs = {
        "Linear regression":
        linear_model.LinearRegression(),
        # "Perceptron": linear_model.Perceptron(),
        "Lars":
        linear_model.Lars(),
        "Lasso":
        linear_model.LassoCV(max_iter=5000),
        # "Passive Aggressive": linear_model.PassiveAggressiveRegressor(),
        "PLS":
        PLS(n_components=3),
        "Random Forest":
        ensemble.RandomForestRegressor(),
        "Gradient Boost":
        ensemble.GradientBoostingRegressor(),
        "Extra Trees":
        ensemble.ExtraTreesRegressor(max_depth=2),
        "Ada Boost":
        ensemble.AdaBoostRegressor(
            base_estimator=tree.DecisionTreeRegressor(max_depth=2),
            n_estimators=250),
        "Gaussian Process":
        gaussian_process.GaussianProcessRegressor(),
        # "Isotonic": isotonic.IsotonicRegression(),
        "Kernel Ridge":
        kernel_ridge.KernelRidge(),
        "Ridge CV":
        linear_model.RidgeCV(),
        # "Exp tranform": TransformedTargetRegressor(regressor=PLS(n_components=3),
        #                                            func=np.exp,
        #                                            inverse_func=np.log),
        # "Log tranform": TransformedTargetRegressor(regressor=PLS(n_components=3),
        #                                            func=np.log,
        #                                            inverse_func=np.exp),
        # "Inv tranform": TransformedTargetRegressor(regressor=PLS(n_components=3),
        #                                            func=invert,
        #                                            inverse_func=invert),
        # "Log regressor": linear_model.LogisticRegressionCV(),
        "ML Perceptron":
        neural_network.MLPRegressor(max_iter=50000, hidden_layer_sizes=(5, 5)),
        "Linear SVR":
        linear_svc,
        "RBF SVR":
        svm.SVR(kernel='rbf'),
        "Poly SVR":
        svm.SVR(kernel='poly'),
        # "Sigmoid SVR": svm.SVR(kernel='sigmoid'),
        "Bayesian Ridge":
        linear_model.BayesianRidge(),
        "Huber":
        linear_model.HuberRegressor(),
        # "Poisson": linear_model.PoissonRegressor(),
        "K-neighbors":
        neighbors.KNeighborsRegressor()
    }
    # "Radius Neighbors": neighbors.RadiusNeighborsRegressor()}
    return regrs
Example #12
0
def fit_vels(depths, vels_modeled, vels_train):
    y = vels_modeled - vels_train
    y = y
    print y.shape
    print depths.shape
    regr = kr.KernelRidge(alpha=0.1, kernel=gp.kernels.RBF(1000))
    regr.fit(depths, y)
    #    gpr_pwave.fit(depths,y)
    return regr  #gpr_pwave
Example #13
0
def run_specific_combination(test_frame, reg_type, column_list):
    target_feature = test_frame['Endurance_Score']
    test_df = test_frame.filter(column_list, axis=1)
    X_train, X_test, y_train, y_test = train_test_split(
                test_df, target_feature.values.reshape(-1,1),
                test_size=0.20, random_state=0)
    if reg_type == 'dt':
        regr = DecisionTreeRegressor(max_depth=2)
    elif reg_type == 'lin':
        regr = linear_model.LinearRegression()
    elif reg_type == 'ridge':
        regr = linear_model.Ridge(alpha=1500.0)
    elif reg_type == 'lasso':
        regr = linear_model.Lasso(alpha=10.0)
    elif reg_type == 'bayridge':
        regr = linear_model.BayesianRidge()
    elif reg_type == 'sgd':
        regr = linear_model.SGDRegressor(loss='huber')
    elif reg_type == 'lars':
        regr = linear_model.Lars(n_nonzero_coefs=np.inf)
    elif reg_type == 'pasagv':
        regr = linear_model.PassiveAggressiveRegressor(random_state=0)
    elif reg_type == 'kernelridge':
        regr = kernel_ridge.KernelRidge()
    elif reg_type == 'svr':
        regr = svm.SVR()
    elif reg_type == 'kneigh':
        regr = neighbors.KNeighborsRegressor(algorithm='kd_tree')
    elif reg_type == 'gauss':
        regr = gaussian_process.GaussianProcessRegressor()
    elif reg_type == 'gbr':
        params = {'n_estimators': 760, 'max_depth': 4, 'min_samples_split': 3, 'learning_rate': 0.026, 'loss': 'huber'}
        regr = GradientBoostingRegressor(**params)
    elif reg_type == 'ran':
        regr = RandomForestRegressor(n_estimators=300, max_depth=8)
    elif reg_type == 'et':
            regr = ExtraTreesRegressor()
    else:
        return
    x_train_frame = X_train.copy()
    del x_train_frame['Title']
    del x_train_frame['Artist']
    regr.fit(x_train_frame, y_train.ravel())
    x_test_frame = X_test.copy()
    del x_test_frame['Title']
    del x_test_frame['Artist']
    y_pred = regr.predict(x_test_frame)
    rmse = mean_squared_error(y_test, y_pred)
    score = r2_score(y_test, y_pred)
    print("R2-score: {}, RMSE: {}".format(score, math.sqrt(rmse)))
    result_df = pd.DataFrame(columns=['Song', 'Artist', 'Endurance_Score', 'Predicted_Endurance_Score'])
    result_df['Song'] = X_test['Title']
    result_df['Artist'] = X_test['Artist']
    result_df['Endurance_Score'] = y_test.ravel()
    result_df['Predicted_Endurance_Score'] = y_pred
    result_df.to_csv('{0}/{1}.csv'.format(path_final_csv, 'predicted_midtermdata'), index=False)
Example #14
0
 def __call__(self, X, Y):
     nprobs = len(X)
     if len(Y) != nprobs:
         raise ValueError("Number of problems for input ({}) and target ({}) do not match".format(nprobs, len(Y)))
     alpha = np.zeros((nprobs, X[0].shape[0]))
     for t in range(nprobs):
         clf = kernel_ridge.KernelRidge(alpha=self.mu, kernel=self.kernel)
         clf.fit(X[t], Y[t])
         alpha[t, :] = clf.dual_coef_.flatten().copy()
     return alpha, [partial(self.kernel, x) for x in X[0]]
Example #15
0
    def __init__(self, method, params, i=0):
        self.algorithm_list = [
            'PLS', 'GP', 'OLS', 'OMP', 'Lasso', 'Elastic Net', 'Ridge',
            'Bayesian Ridge', 'ARD', 'LARS', 'LASSO LARS', 'SVR', 'KRR', 'GBR'
        ]
        self.method = method
        self.outliers = None
        self.ransac = False

        #print(params)
        if self.method[i] == 'PLS':
            self.model = PLSRegression(**params[i])

        if self.method[i] == 'OLS':
            self.model = linear.LinearRegression(**params[i])

        if self.method[i] == 'OMP':
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            self.model = linear.OrthogonalMatchingPursuit(**params_temp)

        if self.method[i] == 'LASSO':
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            self.model = linear.Lasso(**params_temp)

        if self.method[i] == 'Elastic Net':
            params_temp = copy.copy(params[i])
            self.model = linear.ElasticNet(**params_temp)

        if self.method[i] == 'Ridge':
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            self.model = linear.Ridge(**params_temp)

        if self.method[i] == 'BRR':
            self.model = linear.BayesianRidge(**params[i])

        if self.method[i] == 'ARD':
            self.model = linear.ARDRegression(**params[i])

        if self.method[i] == 'LARS':
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            self.model = linear.Lars(**params_temp)

        if self.method[i] == 'SVR':
            self.model = svm.SVR(**params[i])

        if self.method[i] == 'KRR':
            self.model = kernel_ridge.KernelRidge(**params[i])
def model_initialization(name, **kwargs):
    if name == 'linear_regression':
        model = linear_model.LinearRegression()
    elif 'ridge_regression' in name:
        alpha = kwargs['alpha']
        model = linear_model.Ridge(alpha)
    elif 'lasso_regression' in name:
        alpha = kwargs['alpha']
        model = linear_model.Lasso(alpha)
    elif 'elastic_net' in name:
        alpha = kwargs['alpha'] if 'alpha' in kwargs else 1
        r = kwargs['l1_ratio'] if 'l1_ratio' in kwargs else 0.5
        model = linear_model.ElasticNet(alpha=alpha, l1_ratio=r, max_iter=2000)
    elif 'SGD_regression' in name:
        l = kwargs['loss'] if 'loss' in kwargs else 'squared_loss'
        p = kwargs['penalty'] if 'penalty' in kwargs else 'l2'
        alpha = kwargs['alpha'] if 'alpha' in kwargs else 0.0001
        r = kwargs['l1_ratio'] if 'l1_ratio' in kwargs else 0.15
        model = linear_model.SGDRegressor(loss=l,
                                          penalty=p,
                                          alpha=alpha,
                                          l1_ratio=r,
                                          n_iter=5)
    elif 'kernel_ridge' in name:
        kernel = kwargs['kernel']
        model = kernel_ridge.KernelRidge(kernel=kernel)
    elif 'support_vector_regression' in name:
        kernel = kwargs['kernel']
        c = kwargs['C'] if 'C' in kwargs else 1
        d = kwargs['degree'] if 'degree' in kwargs else 3
        g = kwargs['gamma'] if 'gamma' in kwargs else 'auto'
        model = svm.SVR(kernel=kernel,
                        C=c,
                        degree=d,
                        gamma=g,
                        cache_size=1000,
                        max_iter=2000)
    elif 'gradient_boost_regression' in name:
        l = kwargs['loss'] if 'loss' in kwargs else 'ls'
        r = kwargs['learning_rate'] if 'learning_rate' in kwargs else 0.1
        e = kwargs['n_estimators'] if 'n_estimators' in kwargs else 100
        model = ensemble.GradientBoostingRegressor(loss=l,
                                                   learning_rate=r,
                                                   n_estimators=e)
    elif 'xgboost_regression' in name:
        d = kwargs['max_depth'] if 'max_depth' in kwargs else 3
        ne = kwargs['n_estimators'] if 'n_estimators' in kwargs else 100
        model = xgb.XGBRegressor(max_depth=d, n_estimators=ne)
        print('max_depth:', d, 'n_e:', ne)
    return model
Example #17
0
 def __init__(self,
              alpha=1.0,
              kernel='linear',
              gamma=None,
              degree=3,
              coef0=1,
              kernel_params=None):
     ridge_reg = kernel_ridge.KernelRidge(alpha=alpha,
                                          kernel=kernel,
                                          gamma=gamma,
                                          degree=degree,
                                          coef0=coef0,
                                          kernel_params=kernel_params)
     super(KernelRidgeRegression, self).__init__(ridge_reg)
def ridgeKernelCV_regression(params, X_tr, X_ts, y_tr, y_ts, kernel='linear'):
    # kernel = ‘linear’ | ‘poly’ | ‘rbf’ | ‘sigmoid’ | ‘precomputed’ 

    kr = sk.KernelRidge(kernel=kernel)
    clf = sm.GridSearchCV(kr, params)
    
    clf.fit(X_tr, y_tr)
    kr = clf.best_estimator_ 
    
    y_pred = kr.predict(X_ts)

    mean_err = np.mean(np.abs(y_pred - y_ts))
    var_err = np.var(np.abs(y_pred - y_ts))

    return clf.best_params_['alpha'], mean_err, var_err, kr.dual_coef_, y_pred
Example #19
0
def make_ada_model(train_df, synergy_score, n_tree=120, base_estimator="RF"):
    from sklearn import ensemble
    from sklearn import kernel_ridge
    if base_estimator == "RF":
        base_estimator_ = ensemble.RandomForestRegressor()
    elif base_estimator == "Kernel":
        base_estimator_ = kernel_ridge.KernelRidge(alpha=0.01,
                                                   kernel='poly',
                                                   degree=2)

    model = ensemble.AdaBoostRegressor(n_estimators=n_tree,
                                       base_estimator=base_estimator_,
                                       learning_rate=0.9)
    model = fit(model, train_df, synergy_score)

    return model
Example #20
0
def train_kernel_regression(KNOWN_filepath: str,
                            QTABLE_filepath: str,
                            QUANTITIES_filepath: str,
                            writepath: str,
                            cutoff: int = 4):
    """
    Train a regression model using a database of observations and their Q values. Disregard data with less than "cutoff"
     updates.
    :param KNOWN_filepath: Database of observations
    :param QTABLE_filepath: Q Table for observations
    :param QUANTITIES_filepath: Counts updates in Q Table
    :param writepath Where to dump the model parameters
    :param cutoff Disregard data with fewer updates than this
    :return:
    """

    QTABLE = np.load(QTABLE_filepath)
    KNOWN = np.load(KNOWN_filepath)
    QUANTITIES = np.load(QUANTITIES_filepath)

    KNOWN_extended = np.tile(KNOWN, (6, 1))

    actions = []

    for i in range(6):
        actions.append(np.ones(KNOWN.shape[0]) * i)

    action_codes = np.concatenate(tuple(actions))
    qtable_cols = np.concatenate(tuple([QTABLE[:, i] for i in range(6)]))
    quantities_cols = np.concatenate(
        tuple([QUANTITIES[:, i] for i in range(6)]))

    KNOWN_extended = np.append(KNOWN_extended,
                               action_codes[:, np.newaxis],
                               axis=1)
    target_rows = np.where(quantities_cols >= cutoff)

    data = KNOWN_extended[target_rows]
    target = qtable_cols[target_rows]

    clf = kernel_ridge.KernelRidge(kernel="poly")
    clf.fit(data, target)

    pickle.dump(clf, open(writepath, "wb"))

    return clf
Example #21
0
def getRegressor( regressorName, regressorParams):
    
    if regressorName == 'p':
        return PolynomialRegressor(**regressorParams)
    elif regressorName == 'ch':
        return ChebyshevRegressor(**regressorParams)
    elif regressorName == 'c':
        return ClusteredRegressor(**regressorParams)
    elif regressorName == 'a':
        return AveragedRegressor(**regressorParams)
    elif regressorName == 'rf':
        return ensemble.RandomForestRegressor(**regressorParams) #(max_depth=5)
    elif regressorName == 'cfr':
        return CapFloorRegionRegressor(**regressorParams)
    elif regressorName == 'svr':
        return svm.SVR(**regressorParams)
    elif regressorName == 'kr':
        return kernel_ridge.KernelRidge(**regressorParams)
    else:
        raise Exception('Unhandled regression method: ' + regressorName)
Example #22
0
def kernel_ridge_version(X_tr,y_tr,X_te,y_te):
    # Create linear regression object

    tuned_parameters = [{'kernel':['linear'],'alpha': [0.001,0.05,0.01,0.1,1]},
                         {'kernel':['rbf'],'alpha':[0.001,0.05,0.01,0.1,1],'gamma':[0.1,1,10,100,500]}]
    print("# Tuning hyper-parameters")
    clf = grid_search.GridSearchCV(kernel_ridge.KernelRidge(alpha=1, coef0=1, degree=3, gamma=None, kernel='linear',
            kernel_params=None), tuned_parameters, cv=TimeSeriesCV(len(y_tr),fold=5))
    clf.fit(X_tr, y_tr)
    print("Best parameters set found on development set:")
    print(clf.best_params_)
    print("Grid scores on development set:")
    for params, mean_score, scores in clf.grid_scores_:
        print("%0.3f (+/-%0.03f) for %r"
              % (mean_score, scores.std() * 2, params))

    print("The model is trained on the full development set.")
    print("The scores are computed on the full evaluation set.")
    print('R^2: %.2f' % clf.score(X_te,  y_te))
    rss=np.mean((clf.predict(X_te) - y_te) ** 2)
    print("Residual sum of squares: %.2f" % rss)
    return (clf.score(X_te,  y_te),rss,clf.best_params_)
Example #23
0
def get_model(model_type, c=0, epsilon=0, gamma=0):

    if model_type == RBF:
        model =  model = svm.SVR(kernel='rbf', C=c, epsilon=epsilon, gamma=gamma)
    elif model_type == POLY2:
        model = svm.SVR(kernel='poly', C=c, degree=2, epsilon=epsilon)
    elif model_type == POLY3:
        model = svm.SVR(kernel='poly', C=c, degree=3, epsilon=epsilon)
    elif model_type == POLY4:
        model = svm.SVR(kernel='poly', C=c, degree=4, epsilon=epsilon)
    elif model_type == LIN:
        model = svm.SVR(kernel='linear', C=c, epsilon=epsilon)
    elif model_type == Rand_F:
        model = ensemble.RandomForestRegressor()
    elif model_type == SGD:
        model = linear_model.SGDRegressor()
    elif model_type == KRR:
        model = kernel_ridge.KernelRidge(kernel='linear', alpha=1/(2*c))
    elif model_type == DT:
        model = DecisionTreeRegressor()
    else:
        raise(ValueError('unknown model type: ' + str(model_type)))
    return model
Example #24
0
def est_KR():
    hp = [{
        'kernel': (
            'poly',
            'rbf',
        ),
        'alpha': (
            1e-3,
            1e-2,
            1e-1,
            0.0,
            0.5,
            1.,
        ),
        #          'degree': (),
        #          'coef0': (),
        'gamma': (
            0.1,
            1,
            2,
        ),
    }]
    est = kernel_ridge.KernelRidge()
    return est, hp
Example #25
0
def evaluate_regression(scat,
                        target,
                        cross_val_folds,
                        kind='linear',
                        alphas=10.**(-np.arange(0, 10))):
    for i, alpha in enumerate(alphas):
        if kind == 'linear':
            model = linear_model.Ridge(alpha=alpha)
        elif kind == 'bilinear':
            model = kernel_ridge.KernelRidge(alpha=alpha,
                                             kernel='poly',
                                             degree=2)
        else:
            raise ValueError('Invalid kind {}'.format(kind))
        regressor = pipeline.make_pipeline(preprocessing.StandardScaler(),
                                           model)
        scat_prediction = model_selection.cross_val_predict(regressor,
                                                            X=scat,
                                                            y=target,
                                                            cv=cross_val_folds)
        scat_MAE = np.mean(np.abs(scat_prediction - target))
        scat_RMSE = np.sqrt(np.mean((scat_prediction - target)**2))
        print('Ridge regression, alpha: {}, MAE: {}, RMSE: {}'.format(
            alpha, scat_MAE, scat_RMSE))
Example #26
0
def est_KR(est):
    hyper_params = [{
        'kernel': (
            'poly',
            'rbf',
        ),
        'alpha': (
            1e-3,
            1e-2,
            1e-1,
            0.0,
            0.5,
            1.,
        ),
        #                   'degree': (),
        #                   'coef0': (),
        'gamma': (
            0.1,
            1,
            2,
        ),
    }]

    est = kernel_ridge.KernelRidge()
Example #27
0
    def __init__(
        self,
        method,
        yrange,
        params,
        i=0
    ):  #TODO: yrange doesn't currently do anything. Remove or do something with it!
        self.algorithm_list = [
            'PLS',
            'GP',
            'OLS',
            'OMP',
            'Lasso',
            'Elastic Net',
            'Ridge',
            'Bayesian Ridge',
            'ARD',
            'LARS',
            'LASSO LARS',
            'SVR',
            'KRR',
        ]
        self.method = method
        self.outliers = None
        self.ransac = False

        print(params)
        if self.method[i] == 'PLS':
            self.model = PLSRegression(**params[i])

        if self.method[i] == 'OLS':
            self.model = linear.LinearRegression(**params[i])

        if self.method[i] == 'OMP':
            # check whether to do CV or not
            self.do_cv = params[i]['CV']
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            # Remove CV parameter
            params_temp.pop('CV')
            if self.do_cv is False:
                self.model = linear.OrthogonalMatchingPursuit(**params_temp)
            else:
                params_temp.pop('precompute')
                self.model = linear.OrthogonalMatchingPursuitCV(**params_temp)

        if self.method[i] == 'LASSO':
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            # check whether to do CV or not
            try:
                self.do_cv = params[i]['CV']
                # Remove CV parameter
                params_temp.pop('CV')
            except:
                self.do_cv = False

            if self.do_cv is False:
                self.model = linear.Lasso(**params_temp)
            else:
                params_temp.pop('alpha')
                self.model = linear.LassoCV(**params_temp)

        if self.method[i] == 'Elastic Net':
            params_temp = copy.copy(params[i])
            try:
                self.do_cv = params[i]['CV']
                params_temp.pop('CV')
            except:
                self.do_cv = False

            if self.do_cv is False:
                self.model = linear.ElasticNet(**params_temp)
            else:
                params_temp['l1_ratio'] = [.1, .5, .7, .9, .95, .99, 1]
                self.model = linear.ElasticNetCV(**params_temp)

        if self.method[i] == 'Ridge':
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            try:
                # check whether to do CV or not
                self.do_cv = params[i]['CV']

                # Remove CV parameter
                params_temp.pop('CV')
            except:
                self.do_cv = False

            if self.do_cv:
                self.model = linear.RidgeCV(**params_temp)
            else:
                self.model = linear.Ridge(**params_temp)

        if self.method[i] == 'BRR':
            self.model = linear.BayesianRidge(**params[i])

        if self.method[i] == 'ARD':
            self.model = linear.ARDRegression(**params[i])

        if self.method[i] == 'LARS':
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            try:
                # check whether to do CV or not
                self.do_cv = params[i]['CV']

                # Remove CV parameter
                params_temp.pop('CV')
            except:
                self.do_cv = False

            if self.do_cv is False:
                self.model = linear.Lars(**params_temp)
            else:
                self.model = linear.LarsCV(**params_temp)

        if self.method[i] == 'LASSO LARS':
            model = params[i]['model']
            params_temp = copy.copy(params[i])
            params_temp.pop('model')

            if model == 0:
                self.model = linear.LassoLars(**params_temp)
            elif model == 1:
                self.model = linear.LassoLarsCV(**params_temp)
            elif model == 2:
                self.model = linear.LassoLarsIC(**params_temp)
            else:
                print("Something went wrong, \'model\' should be 0, 1, or 2")

        if self.method[i] == 'SVR':
            self.model = svm.SVR(**params[i])

        if self.method[i] == 'KRR':
            self.model = kernel_ridge.KernelRidge(**params[i])

        if self.method[i] == 'GP':
            # get the method for dimensionality reduction and the number of components
            self.reduce_dim = params[i]['reduce_dim']
            self.n_components = params[i]['n_components']
            # create a temporary set of parameters
            params_temp = copy.copy(params[i])
            # Remove parameters not accepted by Gaussian Process
            params_temp.pop('reduce_dim')
            params_temp.pop('n_components')
            self.model = GaussianProcess(**params_temp)
path = 'E://utils'
sys.path.append(path)

import common_utils as utils
import regression_utils as rutils
from sklearn import metrics, kernel_ridge, svm, model_selection

scoring = metrics.make_scorer(rutils.rmse, greater_is_better=False)

X, y = rutils.generate_nonlinear_synthetic_data_regression(n_samples=200,
                                                           n_features=1)
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, test_size=0.1, random_state=1)
rutils.plot_data_2d_regression(X_train, y_train)

kernel_lr = kernel_ridge.KernelRidge(kernel="rbf")
kernel_lr_grid = {
    'alpha': [0.0001, 0.01, 0.05, 0.2, 0.5, 1],
    'gamma': [0.01, 0.1, 1, 2, 3, 4, 5, 10]
}
final_kernel_lr_model = utils.grid_search_best_model(kernel_lr,
                                                     kernel_lr_grid,
                                                     X_train,
                                                     y_train,
                                                     scoring=scoring)
rutils.plot_model_2d_regression(final_kernel_lr_model, X_train, y_train)
rutils.regression_performance(final_kernel_lr_model, X_test, y_test)

kernel_svm = svm.SVR(kernel="rbf")
kernel_svm_grid = {
    'C': [0.2, 0.5, 10, 20, 50, 100],
Example #29
0
LABELS = ["Curr_home_p", "Curr_away_p", "Curr_draw_p"]

data_set_path = 'train.csv'
df = pd.read_csv(data_set_path)
train, test = train_test_split(df, test_size=0.2)

x_train = train.as_matrix(columns=FEATURES)
x_test = test.as_matrix(columns=FEATURES)

z_train = train.as_matrix(columns=LABELS)
z_test = test.as_matrix(columns=LABELS)

model = sys.argv[1]

if model == 'linear':
    clf = kernel_ridge.KernelRidge(kernel="linear")
elif model == 'poly':
    clf = kernel_ridge.KernelRidge(kernel="poly",
                                   degree=2,
                                   alpha=0.6,
                                   gamma=0.9)
elif model == 'tree':
    clf = tree.DecisionTreeRegressor(max_depth=5, min_samples_leaf=4)

clf.fit(x_train, z_train)

z_pred = clf.predict(x_test)

cv_scores = cross_val_score(clf, x_train, z_train, cv=5)
print("Accuracy: %0.2f (+/- %0.2f)" % (cv_scores.mean(), cv_scores.std() * 2))
Example #30
0
#特征筛选,使用RLR
from sklearn.linear_model import RandomizedLogisticRegression as RLR
rlr = RLR()
rlr.fit(train_data, probs)
rlr.get_support()

#准备回归分类器
import sklearn
from sklearn import gaussian_process, kernel_ridge, isotonic
from sklearn.ensemble import ExtraTreesClassifier
Regressors = {
    #        'pls':cross_decomposition.PLSRegression(),报错
    'gradient boosting': ensemble.GradientBoostingRegressor(),
    #        'gaussian':gaussian_process.GaussianProcessRegressor(),报错
    #        'isotonic':isotonic.IsotonicRegression(),报错
    'kernelridge': kernel_ridge.KernelRidge(),
    'ARD': linear_model.ARDRegression(),
    'bayesianridge': linear_model.BayesianRidge(),
    #        'elasticnet':linear_model.ElasticNet(),#报错
    'HuberRegressor': linear_model.HuberRegressor(),
    'LinearRegression': linear_model.LinearRegression(),
    #        'logistic':linear_model.LogisticRegression(),报错
    #        'linear_model.RidgeClassifier':linear_model.RidgeClassifier(),报错
    'k-neighbor': neighbors.KNeighborsRegressor(),
    'SVR': svm.LinearSVR(),
    'NUSVR': svm.NuSVR(),
    'extra tree': tree.ExtraTreeRegressor(),
    'decesion tree': tree.DecisionTreeRegressor(),
    #        'random losgistic':linear_model.RandomizedLogisticRegression(),报错
    #        'dummy':dummy.DummyRegressor()报错
}