Example #1
0
def lasso_cv_coef(X_train, y_train, plotit=True, summarize=True):
    '''
lasso_cv_coef(X_train, y_train, plotit=True, summarize=True)
plotit produces plot at runtime
summarize returns printed summary
RETURNS: model, alpha, score, coef, yhat    
    '''
    model = LassoCV().fit(X_train, y_train)
    alpha = model.alpha_
    score = model.score(X_train, y_train)
    coef = pd.Series(model.coef_, index=X_train.columns)
    yhat = model.predict(X_train)
    if summarize:
        imp_coef = coef.sort_values()
        vars_kept = sum(coef != 0)
        vars_elim = sum(coef == 0)
        print("Best alpha using built-in LassoCV: %f" % model.alpha_)
        print("Best score using built-in LassoCV: %f" %
              model.score(X_train, y_train))
        print("Lasso picked " + str(sum(coef != 0)) +
              " variables and eliminated the other " + str(sum(coef == 0)) +
              " variables")
        print(pd.DataFrame(coef))
    if plotit:
        imp_coef = coef.sort_values()
        matplotlib.rcParams['figure.figsize'] = (4.0, 5.0)
        imp_coef.plot(kind="barh")
        plt.title("Feature importance using Lasso Model")
        plt.plot()
    return model, alpha, score, coef, yhat
Example #2
0
def run_lasso_main(X_tr, y_tr, X_vl, y_val, output, main):
    os.chdir(output)

    if not os.path.exists("lasso"):
        os.makedirs("lasso")
        dir = output + "lasso/"

    os.chdir(os.path.join(output, 'lasso/'))

    if not os.path.exists("figures"):
        os.makedirs("figures")
        dir = output + "lasso/figures/"

    os.chdir(os.path.join(output, 'lasso/figures/'))

    for i in range(0, y_tr.shape[1]):
        lasso = LassoCV(cv=5, random_state=0).fit(X_tr, y_tr[:,i])
        # lasso = linear_model.Lasso(alpha=0.01)
        # lasso.fit(X_train, y_train)
        lasso.score(X_tr, y_tr[:,i])
        y_hat = lasso.predict(X_vl)

        # correlation btw predicted and observed
        corr = pearsonr(y_hat, y_val[:,i])
        fig = plt.figure()
        # plot observed vs. predicted targets
        plt.title('Lasso: Observed vs Predicted Y_trait_' + str(i) + 'cor:' + str(corr[0]))
        plt.ylabel('Predicted')
        plt.xlabel('Observed')
        plt.scatter(y_val[:,i], y_hat, marker='o')
        fig.savefig("Lasso_Out" + str(i) + '.png', dpi=300)
        plt.close(fig)
Example #3
0
def test_cross_val_criterion():
    alpha_min = alpha_max / 10
    log_alpha_max = np.log(alpha_max)
    log_alpha_min = np.log(alpha_min)
    max_iter = 10000
    n_alphas = 10
    kf = KFold(n_splits=5, shuffle=True, random_state=56)

    estimator = sklearn.linear_model.Lasso(fit_intercept=False,
                                           max_iter=1000,
                                           warm_start=True)
    monitor_grid = Monitor()
    criterion = CrossVal(X, y, Lasso, cv=kf, estimator=estimator)
    algo = Forward()
    grid_search(algo,
                criterion,
                log_alpha_min,
                log_alpha_max,
                monitor_grid,
                max_evals=n_alphas,
                tol=tol)

    reg = LassoCV(cv=kf,
                  verbose=True,
                  tol=tol,
                  fit_intercept=False,
                  alphas=np.geomspace(alpha_max, alpha_min, num=n_alphas),
                  max_iter=max_iter).fit(X, y)
    reg.score(X, y)
    objs_grid_sk = reg.mse_path_.mean(axis=1)
    # these 2 value should be the same
    (objs_grid_sk - np.array(monitor_grid.objs))
    assert np.allclose(objs_grid_sk, monitor_grid.objs)
Example #4
0
class EPMNF_model(object):
    
    def __init__(self,train_path,test_path,pred_path):
        self.train_path = train_path
        self.test_path = test_path
        self.pred_path = pred_path
        self.lasso_model = LassoCV(alphas=[float(i)*0.05 for i in range(1,100)],cv=10,n_alphas=10,max_iter=10000000,normalize=False,random_state=0)
    
    #get X_train,y_train,X_test,y_test, and EPMNF expansion
    def preprocess_data(self):
        train_data = read_data(self.train_path)
        test_data = read_data(self.test_path)
        len_train = len(train_data)
        len_test = len(test_data)
        train_data = np.asarray(train_data)
        test_data = np.asarray(test_data)
        #print(train_data.shape,test_data.shape)


        X_train,y_train = train_data[:,:-1],train_data[:,-1]
        X_test,y_test = test_data[:,:-1],test_data[:,-1]
        #print(X_train.shape,y_train.shape,X_test.shape,y_test.shape)

        X_all = np.append(X_train,X_test,axis=0)
        X_all_EPMNF = []
        for row in X_all:
            line = []
            for p in row:
                line = line + PMNF_exp(p)
            X_all_EPMNF.append(line)
        X_all_EPMNF = np.asarray(X_all_EPMNF)
        #print(X_all_EPMNF.shape)
        
        scaler = StandardScaler()
        scaler.fit(X_all_EPMNF)
        X_all_EPMNF = scaler.transform(X_all_EPMNF)

        X_train_EPMNF = X_all_EPMNF[:len_train,:]
        X_test_EPMNF = X_all_EPMNF[len_train:,:]
        print(X_train_EPMNF.shape,X_test_EPMNF.shape)

        return train_data,test_data,X_train_EPMNF,X_test_EPMNF,y_train,y_test

    def train(self):

        train_data,test_data,X_train_EPMNF,X_test_EPMNF,y_train,y_test = self.preprocess_data()
        self.lasso_model.fit(X_train_EPMNF,y_train)
        y_pred = self.lasso_model.predict(X_test_EPMNF)

        with open(self.pred_path,"w",newline='') as f:
            csv_writer = csv.writer(f)
            for i in range(len(test_data)):
                row = np.append(test_data[i],y_pred[i])
                csv_writer.writerow(row)
        #print(pred_data)
        print("The alpha is : {}".format(self.lasso_model.alpha_))
        print("The train R^2 is : {}".format(self.lasso_model.score(X_train_EPMNF,y_train)))
        print("The test R^2 is : {}".format(self.lasso_model.score(X_test_EPMNF,y_test)))
        print("number of no-zero coefs is : {}".format(np.count_nonzero(self.lasso_model.coef_)))
Example #5
0
def linear_reg_all(df):
    ## Split and clean Data
    X_train, X_test, y_train, y_test = split_data_multimeter(df)

    # Fit your model using the training set
    linear = LinearRegression()
    lasso_cv = LassoCV(cv=5, random_state=0)
    ridge_cv = RidgeCV(alphas=(0.1, 1.0, 10.0))
    linear.fit(X_train, y_train)
    lasso_cv.fit(X_train, y_train)
    ridge_cv.fit(X_train, y_train)
    print(
        'Linear regression score on train set with all parameters: {}'.format(
            linear.score(X_train, y_train)))
    print('Linear regression score on test set with all parameters: {}'.format(
        linear.score(X_test, y_test)))
    print(
        'Linear regression crossVal score on train set with all parameters: {}'
        .format(linear.score(X_train, y_train)))
    print(
        'Linear regression crossVal score on test set with all parameters: {}'.
        format(linear.score(X_test, y_test)))

    print(
        'LassoCV regression score on train set with all parameters: {}'.format(
            lasso_cv.score(X_train, y_train)))
    print(
        'LassoCV regression score on test set with all parameters: {}'.format(
            lasso_cv.score(X_test, y_test)))
    print(
        'LassoCV regression crossVal score on train set with all parameters: {}'
        .format(lasso_cv.score(X_train, y_train)))
    print(
        'LassoCV regression crossVal score on test set with all parameters: {}'
        .format(lasso_cv.score(X_test, y_test)))

    print(
        'RidgeCV regression score on train set with all parameters: {}'.format(
            ridge_cv.score(X_train, y_train)))
    print(
        'RidgeCV regression score on test set with all parameters: {}'.format(
            ridge_cv.score(X_test, y_test)))
    print(
        'RidgeCV regression crossVal score on train set with all parameters: {}'
        .format(ridge_cv.score(X_train, y_train)))
    print(
        'RidgeCV regression crossVal score on test set with all parameters: {}'
        .format(ridge_cv.score(X_test, y_test)))

    return ridge_cv, lasso_cv, linear, X_train, X_test, y_train, y_test
def linear_reg_all(df, drop_list, dummies, thresh=1):
    ## Split and clean Data
    X_train, X_test, y_train, y_test = split_data_multimeter(
        df, drop_list, dummies, thresh)

    X_scaler = StandardScaler()
    X_train = X_scaler.fit_transform(X_train)
    X_test_1 = X_scaler.transform(X_test)

    # Fit your model using the training set
    linear = LinearRegression()
    lasso_cv = LassoCV(cv=5, random_state=0)
    ridge_cv = RidgeCV(alphas=(0.1, 1.0, 10.0))
    linear.fit(X_train, y_train)
    lasso_cv.fit(X_train, y_train)
    ridge_cv.fit(X_train, y_train)
    print("Variance Inflation Factors")
    vif = vifs(X_test)
    print(vif)
    print('\n')
    print(list(zip(vif, list(X_test.columns))))
    print(
        'Linear regression score on train set with all parameters: {}'.format(
            linear.score(X_train, y_train)))
    print('Linear regression score on test set with all parameters: {}'.format(
        linear.score(X_test_1, y_test)))
    # print('Linear regression crossVal score on train set with all parameters: {}'.format(linear.score(X_train, y_train)))
    # print('Linear regression crossVal score on test set with all parameters: {}'.format(linear.score(X_test, y_test)))

    print(
        'LassoCV regression score on train set with all parameters: {}'.format(
            lasso_cv.score(X_train, y_train)))
    print(
        'LassoCV regression score on test set with all parameters: {}'.format(
            lasso_cv.score(X_test_1, y_test)))
    # print('LassoCV regression crossVal score on train set with all parameters: {}'.format(lasso_cv.score(X_train, y_train)))
    # print('LassoCV regression crossVal score on test set with all parameters: {}'.format(lasso_cv.score(X_test, y_test)))

    print(
        'RidgeCV regression score on train set with all parameters: {}'.format(
            ridge_cv.score(X_train, y_train)))
    print(
        'RidgeCV regression score on test set with all parameters: {}'.format(
            ridge_cv.score(X_test_1, y_test)))
    # print('RidgeCV regression crossVal score on train set with all parameters: {}'.format(ridge_cv.score(X_train, y_train)))
    # print('RidgeCV regression crossVal score on test set with all parameters: {}'.format(ridge_cv.score(X_test, y_test)))

    return ridge_cv, lasso_cv, linear, X_train, X_test, y_train, y_test
Example #7
0
def lasso(A, y, positive=True):
    A_scaler = StandardScaler().fit(A[:, 1:])
    y_scaler = StandardScaler().fit(y.reshape(-1, 1))
    A_new = A_scaler.transform(A[:, 1:])
    y_new = y_scaler.transform(y.reshape(-1, 1)).reshape(-1)
    clf = LassoCV(cv=5,
                  n_jobs=8,
                  normalize=False,
                  fit_intercept=False,
                  positive=positive).fit(A_new, y_new)
    score = clf.score(A_new, y_new)
    df = np.count_nonzero(clf.coef_)
    logging.info("[LASSO] # iter: %d, alpha: %e, # of terms: %d, score: %f",
                 clf.n_iter_, clf.alpha_, df, score)
    logging.debug("[LASSO] alphas:")
    logging.debug(str(clf.alphas_))
    logging.debug("[LASSO] MSE path:")
    logging.debug(str(clf.mse_path_))
    nonzero = abs(clf.coef_) > 0.0
    coef = np.zeros_like(clf.coef_)
    # coef[nonzero] = ((y_scaler.var_ / A_scaler.var_[nonzero]) ** 0.5) * clf.coef_[nonzero]
    coef[nonzero] = (y_scaler.scale_ /
                     A_scaler.scale_[nonzero]) * clf.coef_[nonzero]
    intercept = y_scaler.mean_ - np.dot(A_scaler.mean_, coef)
    return np.append(intercept, coef), df
Example #8
0
def test_lasso_cv():
    X, y, X_test, y_test = build_dataset()
    max_iter = 150
    clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter, cv=3).fit(X, y)
    assert_almost_equal(clf.alpha_, 0.056, 2)

    clf = LassoCV(n_alphas=10,
                  eps=1e-3,
                  max_iter=max_iter,
                  precompute=True,
                  cv=3)
    clf.fit(X, y)
    assert_almost_equal(clf.alpha_, 0.056, 2)

    # Check that the lars and the coordinate descent implementation
    # select a similar alpha
    lars = LassoLarsCV(normalize=False, max_iter=30, cv=3).fit(X, y)
    # for this we check that they don't fall in the grid of
    # clf.alphas further than 1
    assert np.abs(
        np.searchsorted(clf.alphas_[::-1], lars.alpha_) -
        np.searchsorted(clf.alphas_[::-1], clf.alpha_)) <= 1
    # check that they also give a similar MSE
    mse_lars = interpolate.interp1d(lars.cv_alphas_, lars.mse_path_.T)
    np.testing.assert_approx_equal(mse_lars(clf.alphas_[5]).mean(),
                                   clf.mse_path_[5].mean(),
                                   significant=2)

    # test set
    assert clf.score(X_test, y_test) > 0.99
    def run(self,trainingDasaset,plotting):
        dataset = trainingDasaset
        accuracy = 0
        y = dataset['int_rate']
        X = dataset.drop(columns=['int_rate',])
        if plotting==True:
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
            lassoreg = LassoCV(cv=5, random_state=42)
            lassoreg.fit(X_train,y_train)
            print("###################################LassoRegression#############################")
            accuracy=lassoreg.score(X_test, y_test)
            pred = lassoreg.predict(X_test)
            #accuracy = np.sqrt(metrics.mean_squared_error( y_test,pred))
            print("score:"+str(accuracy))
        else:
            lassoreg = LassoCV(cv=5, random_state=42)
            lassoreg.fit(X,y)
            testData = pd.read_csv("./SiameseNeuralNetworkProject/MachineLearningAlgorithmSuite/CleanedData/SiameseTrainingData.csv")
            predictions = lassoreg.predict(testData)
            np.savetxt("./SiameseNeuralNetworkProject/MachineLearningAlgorithmSuite/OutputFiles/LassoCVRegressionPredictions.csv", predictions, delimiter=",")

            testData = pd.read_csv("./SiameseNeuralNetworkProject/MachineLearningAlgorithmSuite/CleanedData/OverallTestingData.csv")
            predictions = lassoreg.predict(testData)
            np.savetxt("./SiameseNeuralNetworkProject/MachineLearningAlgorithmSuite/OutputFiles/LassoCVRegressionPredictionsTestData.csv", predictions, delimiter=",")

        return accuracy
Example #10
0
    def train_and_test(self,
                       g,
                       m,
                       t,
                       approx,
                       I=100,
                       delta=0.025,
                       skip_variance=False):
        kernel = FastSK(
            g=g,
            m=m,
            t=t,
            approx=approx,
            max_iters=I,
            delta=delta,
            skip_variance=skip_variance,
        )

        kernel.compute_kernel(self.train_seq, self.test_seq)
        self.Xtest = kernel.get_test_kernel()
        self.Xtest = np.array(self.Xtest).reshape(len(self.Xtest), -1)
        self.Xtrain = kernel.get_train_kernel()
        self.Xtrain = np.array(self.Xtrain).reshape(len(self.Xtrain), -1)

        # Can replace Lasso with alternative regression approaches such as SVR
        model = LassoCV(cv=5, n_jobs=t,
                        random_state=293).fit(self.Xtrain, self.Ytrain)
        r2 = model.score(self.Xtest, self.Ytest)
        return r2
Example #11
0
def fit_grn_row((i, x, y, eps, max_iter, verbose)):
    model = LassoCV(eps=eps, max_iter=max_iter).fit(x, y)
    if verbose:
        print 'row:', i, 'nnz:', (
            ~np.isclose(model.coef_, 0)).sum(), 'score:', model.score(
                x, y), 'reg param', model.alpha_
    return model.coef_
def do_LASSO(cv=10):
    """
    Do LASSO on the data-set
    Params ::
    cv: int: folds of craoss-validation to do. Default 10
    Returns ::
    None
    """
    x_scaler = StandardScaler()
    y_scaler = StandardScaler()
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        train_size=0.60,
                                                        random_state=23)
    X_std_train = x_scaler.fit_transform(X_train)
    X_std_test = x_scaler.transform(X_test)
    y_std_train = y_scaler.fit_transform(y_train)
    y_std_test = y_scaler.transform(y_test)
    y_sigma = y_scaler.scale_

    lasso = LassoCV(cv=cv)
    lasso.fit(X_std_train, y_std_train)
    y_predict = [(_ * y_sigma) + y_scaler.mean_
                 for _ in lasso.predict(X_std_test)]
    print('Mean Absolute Error: ', mean_absolute_error(y_true=y_test, \
        y_pred=y_predict))
    print('R2 of training data: ', lasso.score(X_std_train, y_std_train))
    plot_parity(x=y_test, y=y_predict, xlabel='True E/Z Ratio', \
        ylabel='Predicted E/Z Ratio')
Example #13
0
def getSortedTopKfeatures(train_features, train_label):

    reg = LassoCV()
    reg.fit(train_features, train_label)
    print("Best alpha using built-in LassoCV: %f" % reg.alpha_)
    print("Best score using built-in LassoCV: %f" %
          reg.score(train_features, train_label))
    coef = pd.Series(reg.coef_, index=train_features.columns)

    # In[33]:

    print("Lasso picked " + str(sum(coef != 0)) +
          " variables and eliminated the other " + str(sum(coef == 0)) +
          " variables")

    # In[34]:

    imp_coef = coef.sort_values()
    '''
    import matplotlib
    matplotlib.rcParams['figure.figsize'] = (8.0, 25.0)
    imp_coef.plot(kind = "barh")
    plt.title("Feature importance using Lasso ds_model")
    fig=plt.gcf()
    fig.set_size_inches(10,20)
    #plt.show()
    fig.savefig('Features_importance.png')
    '''
    # In[35]:

    ## drop all columns except.
    #df.drop(df.columns.difference(['a','b']), 1, inplace=True)

    # In[47]:

    coef

    # In[38]:

    print(len(coef))

    # In[65]:

    filter(lambda a: a != 0, coef)

    # In[70]:

    from collections import OrderedDict, defaultdict
    coef_dict = (coef).to_dict()

    # In[75]:

    import collections

    sorted_dict = sorted(coef_dict.items(), key=lambda kv: kv[1],
                         reverse=True)  #OrderedDict(coef_dict)

    keys = [k for k, v in sorted_dict if v != 0]
    #return {"topk": keys}
    return keys
Example #14
0
def LASSO_cv(problem, **kwargs):
    r"""High level description.

    Parameters
    ----------
    problem : type
        Description
        kwargs['LASSO_reg_coefs'] must be a nonnegative float.  These are the
        multipliers for the penalty term in cross-validation of LASSO
        kwargs['coef_tolerance'] must be a nonnegative float

    Returns
    -------
    output : tuple
        (optimum, maximum)

    """
    data_list = [datum['data']['values'] for datum in problem.data]
    data = numpy.array(data_list)
    lasso = LassoCV(alphas=kwargs['LASSO_reg_coefs'])
    lasso.fit(data.T, problem.goal['data']['values'])
    lasso_coefficients = lasso.coef_
    optimum = [
        problem.data[index] for index, element in enumerate(lasso_coefficients)
        if abs(element) > kwargs['coef_tolerance']
    ]
    maximum = lasso.score(data.T, problem.goal['data']['values'])
    output = (optimum, maximum)
    return output
def lasso_regression(X_train, y_train, X_test, y_test, plot):
    """
    Perfomring a lasso regression with built in CV and plotting the feature importance
    """
    # Fit the ridge regression
    reg = LassoCV()    
    reg.fit(X_train, y_train)
    print("Best alpha using built-in LassoCV: %f" % reg.alpha_)
    print("Best score using built-in LassoCV: %f" % reg.score(X_train, y_train))
    coef = pd.Series(reg.coef_, index=X_train.columns)
    print(
        "Lasso picked "
        + str(sum(coef != 0))
        + " variables and eliminated the other "
        + str(sum(coef == 0))
        + " variables"
    )
    # Extract the feature importance
    imp_coef = coef.sort_values()
    # Plot the feature importance
    if plot:
        plt.rcParams["figure.figsize"] = (8.0, 10.0)
        imp_coef.plot(kind="barh")
        plt.title("Feature importance using Lasso Model")
        plt.show()

        # Plotting the prediction error
        visualizer = PredictionError(reg, size=(1080, 720))
        visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
        visualizer.score(X_test, y_test)  # Evaluate the model on the test data
        visualizer.show()                 # Finalize and render the figure
        # Visualizing the regression
        visualizer = ResidualsPlot(reg, size=(1080, 720))
        visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
        visualizer.score(X_test, y_test)  # Evaluate the model on the test data
        visualizer.show()                 # Finalize and render the figure
    # Using the test data to calculate a score
    y_pred = reg.predict(X_test)
    # Return metrics
    return {
        "name": "Lasso Regression",
        "R squared": reg.score(X_test, y_test),
        "RMSE": rmse(y_test, y_pred),
        "R squared training": reg.score(X_train, y_train),
        "MAE": mean_absolute_error(y_test, y_pred),
    }
Example #16
0
 def feature_lasso(self):
     model = LassoCV()
     model.fit(self.x, self.y)
     coefficients = pd.Series(model.coef_, index=self.x.columns)
     print("Beta weights/co-efficients (L1 regularisation)")
     print("-----------------------------------------")
     print(coefficients)
     print('\n')
     print('R2 score is {}'.format(model.score(self.x, self.y)))
Example #17
0
def Divergence_Plots_For_Single_Instrument(instrument_name, flag):

    data = pd.read_csv(instrument_name + ".csv")

    # Making a copy of data frame and dropping all the null values
    df_copy = data.copy()
    df_copy = df_copy.dropna(axis=1)
    df_copy = df_copy.dropna()

    print(len(df_copy))

    X = df_copy[[
        "CCI", "RSI", "MACD", "WPCTR", "pdi", "mdi", "adx",
        "Divergence Factor 1", "Divergence Factor 2", "Divergence Factor 3",
        "Divergence Factor 4"
    ]]
    y = df_copy["DF Avg Rank"]

    print(len(X), len(y))
    # Embedded method
    reg = LassoCV()
    reg.fit(X, y)
    print("Best alpha using built-in LassoCV: %f" % reg.alpha_)
    print("Best score using built-in LassoCV: %f" % reg.score(X, y))
    coef = pd.Series(reg.coef_, index=X.columns)

    print("Lasso picked " + str(sum(coef != 0)) +
          " variables and eliminated the other " + str(sum(coef == 0)) +
          " variables")

    fig, ax = plt.subplots(1)
    sns.set()
    imp_coef = coef.sort_values()
    matplotlib.rcParams['figure.figsize'] = (10, 14)
    matplotlib.rcParams['ytick.labelsize'] = 3
    matplotlib.rcParams['axes.labelsize'] = 3
    matplotlib.rcParams['legend.fontsize'] = 1

    plt.rc('axes', titlesize=12)
    plt.yticks(fontsize=9.5)
    my_colors = list(
        islice(cycle(['orange', 'b', 'r', 'g', 'y', 'k', 'm']), None,
               len(df_copy)))
    ax = imp_coef.plot(kind="barh",
                       stacked=True,
                       color=my_colors,
                       width=0.91,
                       align='edge')
    ax.yaxis.label.set_size(3)

    title = f"Feature importance of {instrument_name} using the Lasso Model"
    plt.title(title)
    fig_name = instrument_name + "_EmbaddedMethod" + ".png"
    if flag == True:
        plt.savefig("man_select_inst" + "\\" + fig_name)
    else:
        plt.savefig("rule_select_inst" + "\\" + fig_name)
Example #18
0
def lasso_reg(x, y):
    alpha = np.logspace(-2, 10, num=50)
    lassocv = LassoCV(alphas=alpha, cv=20)
    lassocv.fit(x, y)
    lassocv_score = lassocv.score(x, y)
    lassocv_alpha = lassocv.alpha_
    print('Lasso R square', lassocv_score)
    print('Lasso Alpha', lassocv_score)
    return lassocv.coef_
Example #19
0
def featureImportanceLasso(X,y):
    reg = LassoCV()
    reg.fit(X, y)
    print("Best alpha using built-in LassoCV: %f" % reg.alpha_)
    print("Best score using built-in LassoCV: %f" %reg.score(X,y))
    coef = pd.Series(reg.coef_, index = X.columns)
    imp_coef = coef.sort_values()
    imp_coef.plot(kind = "barh")
    plt.title("Feature importance usando Lasso Model")
def k_fold(x_train, y_train):
    alphas = np.logspace(-4, -0.5, 30)
    lassoCV = LassoCV(random_state=0, alphas=alphas, max_iter=10000)
    k_fold = KFold(3)
    scores = []
    for k, (train, test) in enumerate(k_fold.split(x_train, y_train)):
        lassoCV.fit(x_train[train], y_train[train])
        scores.append(lassoCV.score(x_train[test], y_train[test]))
    return scores
Example #21
0
def lassoReg(X, y, names):
    reg = LassoCV()
    reg.fit(X, y)
    print("Best alpha using built-in LassoCV: %f" % reg.alpha_)
    print("Best score using built-in LassoCV: %f" % reg.score(X, y))
    #coef = pd.Series(reg.coef_, index = X.columns)
    print("Lasso picked " + str(np.sum(reg.coef_ != 0)) +
          " variables and eliminated the other " +
          str(np.sum(reg.coef_ == 0)) + " variables")
Example #22
0
def get_regression(data: pd.DataFrame) -> typing.Tuple[LassoCV, float]:
    x_train, x_test, y_train, y_test = train_test_split(
        data[data.columns[:-1]],
        data[data.columns[-1]],
        test_size=0.1,
        random_state=42)

    reg = LassoCV(cv=5, random_state=42).fit(x_train, y_train)

    return reg, reg.score(x_test, y_test)
Example #23
0
def lasso(x, y):
    sv = LassoCV(normalize=True)
    sv.fit(x, y)
    print("Mejor alpha usando LassoCV: %f" % sv.alpha_)
    print("Mejor valor usando LassoCV: %f" % sv.score(x, y))
    coef = pd.Series(sv.coef_, index=x.columns)
    imp_coef = coef.sort_values()
    matplotlib.rcParams['figure.figsize'] = (8.0, 10.0)
    imp_coef.plot(kind="barh")
    plt.title("Modelo Lasso para selección de variables")
    plt.show()
def linear_reg_single_meter(X_train, X_test, y_train, y_test):
    # Fit your model using the training set
    linear = LinearRegression()
    lasso_cv = LassoCV(cv=5, random_state=0)
    ridge_cv = RidgeCV(alphas=(0.1, 1.0, 10.0))
    linear.fit(X_train, y_train)
    lasso_cv.fit(X_train, y_train)
    ridge_cv.fit(X_train, y_train)
    print("Variance Inflation Factors")
    print(vifs(X_test))
    print('\n')
    print('Features')
    print('\n')
    print(list(X_test.columns))
    print(
        'Linear regression score on train set with all parameters: {}'.format(
            linear.score(X_train, y_train)))
    print('Linear regression score on test set with all parameters: {}'.format(
        linear.score(X_test, y_test)))
    # print('Linear regression crossVal score on train set with all parameters: {}'.format(linear.score(X_train, y_train)))
    # print('Linear regression crossVal score on test set with all parameters: {}'.format(linear.score(X_test, y_test)))

    print(
        'LassoCV regression score on train set with all parameters: {}'.format(
            lasso_cv.score(X_train, y_train)))
    print(
        'LassoCV regression score on test set with all parameters: {}'.format(
            lasso_cv.score(X_test, y_test)))
    # print('LassoCV regression crossVal score on train set with all parameters: {}'.format(lasso_cv.score(X_train, y_train)))
    # print('LassoCV regression crossVal score on test set with all parameters: {}'.format(lasso_cv.score(X_test, y_test)))

    print(
        'RidgeCV regression score on train set with all parameters: {}'.format(
            ridge_cv.score(X_train, y_train)))
    print(
        'RidgeCV regression score on test set with all parameters: {}'.format(
            ridge_cv.score(X_test, y_test)))
    # print('RidgeCV regression crossVal score on train set with all parameters: {}'.format(ridge_cv.score(X_train, y_train)))
    # print('RidgeCV regression crossVal score on test set with all parameters: {}'.format(ridge_cv.score(X_test, y_test)))

    return ridge_cv, lasso_cv, linear
Example #25
0
def lasso_with_cv():
	

	lasso_cv = LassoCV(alphas=alphas)

	k_fold = KFold(5)

	for k, (train, test) in enumerate(k_fold.split(all_training_data, train_labels)):
	    lasso_cv.fit(all_training_data[train], train_labels[train])
	    print("[fold {0}] alpha: {1:.5f}, score: {2:.5f}".
	          format(k, lasso_cv.alpha_, lasso_cv.score(all_training_data[test], train_labels[test])))

	print()
Example #26
0
def run():
   # Data preprocessing
    train = DataPrep.prep_data(headless_run)
    # Scale data: https://scikit-learn.org/stable/modules/svm.html#tips-on-practical-use

    target = train.SalePrice
    train = train.drop(columns='SalePrice')

    X_train, X_test, y_train, y_test = train_test_split(
        train, target, test_size=0.25, random_state=0)


    # Trying L1 regularization
    parameters = {"fit_intercept": (True, False)}  # "n_alphas":(1000,10000)
    clf = LassoCV(alphas=None,
                  cv=5)
    # clf = GridSearchCV(clf_plain, parameters, cv = 5)
    clf = clf.fit(X_train, y_train)

    # Lasso gives us an alpha of 0.1231, picks some coefficients and gives the rest a 0 value
    coef = pd.Series(clf.coef_, index=X_train.columns)

    # Metrics
    variance_score = clf.score(X_test, y_test)
    MSEscore = mean_squared_error(clf.predict(X_test), y_test)
    MAEscore = median_absolute_error(clf.predict(X_test), y_test)
    R2score = r2_score(clf.predict(X_test), y_test)

    if not headless_run:
        print('Variance score: {}'.format(variance_score))
        # print("CLF best: {}".format(clf.best_score_)) grid search only
        print('MSE score: {}'.format(MSEscore))
        print('MAE score: {}'.format(MAEscore))
        print('R2 score: {}'.format(R2score))


        # Plotting Residuals

        plt.scatter(clf.predict(X_train), clf.predict(X_train) - y_train,
                    color="green", s=10, label='Train data')

        plt.scatter(clf.predict(X_test), clf.predict(X_test) - y_test,
                    color="blue", s=10, label='Test data')

        plt.hlines(y=0, xmin=10, xmax=14, linewidth=2)

        plt.legend(loc='upper right')
        plt.title("Residual errors")
        plt.show()
    else:
        return [variance_score,MSEscore,MAEscore,R2score]
Example #27
0
 def Embedded_Method(self, x, y, plot_matr = 'yes'):
     reg = LassoCV()
     reg.fit(x, y)
     print("Best alpha using built-in LassoCV: %f" % reg.alpha_)
     print("Best score using built-in LassoCV: %f" %reg.score(x,y))
     coef = pd.Series(reg.coef_, index = x.columns)
     print("Lasso picked " + str(sum(coef != 0)) + " variables and eliminated the other " +  str(sum(coef == 0)) + " variables")
     imp_coef = coef.sort_values()
     if plot_matr == 'yes':
         matplotlib.rcParams['figure.figsize'] = (8.0, 10.0)
         imp_coef.plot(kind = "barh")
         plt.title("Feature importance using Lasso Model")
         plt.show()
     return imp_coef
def makeLassoCVPrediction(cv=3):
    global y_t_pred, result
    print "Prediction with cv = %s" % 3
    prefix = "%s_LassoCV_FULL" % (name)
    lasso = LassoCV(cv=cv)
    y_t_pred = lasso.fit(x, y).predict(x_t)
    r = lasso.score(x, y)
    m_log_alphas = -np.log10(lasso.alphas_)
    plt.plot(m_log_alphas, lasso.mse_path_, ':')
    plt.show()
    print("score r = %s" % r)
    print "Intercept: %s" % lasso.intercept_
    #print "Coefficients: %s" % lasso.coef_
    return prefix, lasso
Example #29
0
    def cross_validation(self):
        """k-fold CV procedure to find the best (minimize deviance) complexity
        parameter of a lasso regression among a custom grid of points."""
        """Need to preprocess the Xtrain data for each time? Checked the data
        and the means and standard deviations are quite consistenly 0 and 1."""
        alpha_no = 100
        alpha_array = np.logspace(0, -7, alpha_no)

        reg = LassoCV(cv = 5, n_jobs = -1, alphas = alpha_array,\
            fit_intercept = False) # 5-fold CV
        reg = reg.fit(self.Xtrain, self.ytrain)

        score = reg.score(self.Xtest, self.ytest)
        return score
def scale_test_and_train_Lasso(X, y):
    """
    Run a ridge regression on the model
    """
    X, X_test, y, y_test = train_test_split(X,
                                            y,
                                            test_size=0.2,
                                            random_state=42)
    X_train, X_val, y_train, y_val = train_test_split(X,
                                                      y,
                                                      test_size=.25,
                                                      random_state=3)

    X_train_scale = X_train.values
    X_val_scale = X_val.values
    X_test_scale = X_test.values

    scale = StandardScaler()

    X_train_scale = scale.fit_transform(X_train_scale)
    X_test_scale = scale.transform(X_test_scale)
    X_val_scale = scale.transform(X_val_scale)

    lasso = LassoCV()
    lasso.fit(X_train_scale, y_train)

    lasso.score(X_val_scale, y_val)

    y_pred = lasso.predict(X_val_scale)

    print(f'Lasso Regression val R^2: {lasso.score(X_val_scale, y_val):.3f}')
    print(
        f'Lasso Regression val RME: {sqrt(mean_squared_error(y_val,y_pred)):.3f}'
    )

    return lasso.coef_
Example #31
0
def Lasso_model(train_linear, test_linear):
    train_linear_fea=train_linear.drop(columns=['SalePrice'])
    train_linear_tar=train_linear.SalePrice
    real_train_tar=np.expm1(train_linear_tar)
    x_train, x_test, y_train, y_test = train_test_split(train_linear_fea, train_linear_tar,test_size=0.2, random_state=0)
    real_train_tar=np.expm1(train_linear_tar)
    """
        . Lasso model
    """
    
    lassocv = LassoCV(alphas = np.logspace(-5, 4, 400), )
    lassocv.fit(train_linear_fea, train_linear_tar)
    lassocv_score = lassocv.score(train_linear_fea, train_linear_tar)
    lassocv_alpha = lassocv.alpha_
    print("Best alpha : ", lassocv_alpha, "Score: ",lassocv_score)
    
    start=time.time()
    lasso =Lasso(normalize = True)
    lasso.set_params(alpha=lassocv_alpha,max_iter = 10000)
    lasso.fit(x_train, y_train)
    end=time.time()
    mean_squared_error(y_test, lasso.predict(x_test))
    coef_lasso=pd.Series(lassocv.coef_, index=x_train.columns).sort_values(ascending =False)
    evaluate(lasso,x_test,y_test,x_train,y_train)
    print('Time elapsed: %.4f seconds' % (end-start))
    
    y_lasso_predict=lasso.predict(train_linear_fea)
    x_line = np.arange(700000)
    y_line=x_line
    plt.scatter(real_train_tar,np.expm1(y_lasso_predict))
    plt.plot(x_line, y_line, color='r')
    plt.xlabel('Actual Sale Price')
    plt.ylabel('Predict Sle Price')
    
    test_prediction_lasso=np.expm1(lasso.predict(test_linear))
    write_pkl(lassocv_alpha, '/Users/vickywinter/Documents/NYC/Machine Learning Proj/Pickle/lasso_params.pkl')
    return test_prediction_lasso
Example #32
0
print coef_path_forest_cv.feature_importances_
forest_prediction = coef_path_forest_cv.predict(X)
forest_score = coef_path_forest_cv.score(X,y)
print "Forest_score:%.3g" % forest_score
forest_cv_score = cross_validation.cross_val_score(coef_path_forest_cv, X, y, n_jobs=2, cv=5)
print forest_cv_score

print "########LASSO######"
coef_path_lasso_cv.fit(X,y)
print coef_path_lasso_cv.get_params
print "alphas:" 
print  coef_path_lasso_cv.alphas_
print "coef_:"
print coef_path_lasso_cv.coef_
lasso_prediction = coef_path_lasso_cv.predict(X)
lasso_score = coef_path_lasso_cv.score(X,y)
print "Lasso_score:%.3g" % lasso_score
#print "Lasso precision:%.3g" %  precision_score(y, lasso_predict) 
#print "Lasso_confusion matrix:"
#print confusion_matrix(y, lasso_prediction)
lasso_cv_score = cross_validation.cross_val_score(coef_path_lasso_cv, X, y, n_jobs=2, cv=5)
print lasso_cv_score
plt.figure()
plt.hist2d(y, lasso_prediction)
plt.ylabel("Predicted Values")
plt.xlabel("Truth Values")
plt.title("Lasso Linear Regression")
plt.savefig("figures/lasso_predicted_truth.png")
print "#######ELASTIC#####"
coef_path_elastic_cv.fit(X,y)
print coef_path_elastic_cv.get_params
Example #33
0
y_pred_lasso1.describe()
print lasso1
print 'Lasso R^2 score:'
print r2_score(y_test, y_pred_lasso1)
#0.2604
print 'Lasso Mean Squared Error:'
print mean_squared_error(y_test, y_pred_lasso1)
#24232
print 'Lasso Root Mean Squared Log Error:'
print rmsle(y_test, y_pred_lasso1)
#6.089
 
#Cross-validate the LASSO-penalized linear regression
lasso2 = LassoCV(cv = 15) #cv specifies the number of cross-validation folds to 
lasso2_fit = lasso2.fit(X_train, y_train)
lasso2_path = lasso2.score(X_train, y_train)
#run on each penalty-parameter value
 
 
plt.plot(-np.log(lasso2_fit.alphas_),
np.sqrt(lasso2_fit.mse_path_).mean(axis = 1))
plt.ylabel('RMSE (avg. across folds)')
plt.xlabel(r'\$-\\log(\\lambda)\$')
# Indicate the lasso parameter that minimizes the average MSE across
#folds
plt.axvline(-np.log(lasso2_fit.alpha_), color = 'red')
 
 
alpha = lasso2_fit.alpha_
 
lasso3 = Lasso(alpha = alpha)
Example #34
0
plt.axhline(np.max(scores), linestyle='--', color='.5')
plt.xlim([alphas[0], alphas[-1]])

# #############################################################################
# Bonus: how much can you trust the selection of alpha?

# To answer this question we use the LassoCV object that sets its alpha
# parameter automatically from the data by internal cross-validation (i.e. it
# performs cross-validation on the training data it receives).
# We use external cross-validation to see how much the automatically obtained
# alphas differ across different cross-validation folds.
lasso_cv = LassoCV(alphas=alphas, random_state=0)
k_fold = KFold(3)

print("Answer to the bonus question:",
      "how much can you trust the selection of alpha?")
print()
print("Alpha parameters maximising the generalization score on different")
print("subsets of the data:")
for k, (train, test) in enumerate(k_fold.split(X, y)):
    lasso_cv.fit(X[train], y[train])
    print("[fold {0}] alpha: {1:.5f}, score: {2:.5f}".
          format(k, lasso_cv.alpha_, lasso_cv.score(X[test], y[test])))
print()
print("Answer: Not very much since we obtained different alphas for different")
print("subsets of the data and moreover, the scores for these alphas differ")
print("quite substantially.")

# plt.show()
pltshow(plt)
p = 180
K = 10  # K-fold CV
y = y.reshape(n)

alphas = np.exp(np.linspace(np.log(0.01),np.log(10),100))  # Using log-scale
N = len(alphas) # Number of lasso parameters

scores = np.zeros(N)
alpha = np.zeros(N)
from sklearn.linear_model import LassoCV
from sklearn.feature_selection import SelectFromModel
for i in range(N):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
    clf = LassoCV(n_alphas = 100, cv = K)
    clf = clf.fit(X_train,y_train)
    scores[i] = clf.score(X_test,y_test)
    alpha[i] = clf.alpha_

scores = np.asarray(scores)
max_score_index = np.argmax(scores)
best_alpha = alpha[max_score_index]

print(best_alpha)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
clf = Lasso(alpha=best_alpha)
#clf = LassoCV(n_alphas = 100, cv = K, precompute='auto', n_jobs=2, normalize='True')
clf = clf.fit(X_train,y_train)
scores = clf.score(X_test,y_test)
print(predictor_var[0])
print("clf.coef_",clf.coef_)
Example #36
0
def Model(train_linear, test_linear):
    train_linear_fea=train_linear.drop(columns=['SalePrice'])
    train_linear_tar=train_linear.SalePrice
    x_train, x_test, y_train, y_test = train_test_split(train_linear_fea, train_linear_tar,test_size=0.2, random_state=0)
    def evaluate(model, test_features, test_labels,train_features, train_labels):
        predictions = model.predict(test_features)
        errors = abs(predictions - test_labels)
        mape = 100 * np.mean(errors / test_labels)
        accuracy = 100 - mape
        print('Model Performance')
        print('Average Error: {:0.4f} degrees.'.format(np.mean(errors)))
        print('Accuracy = {:0.2f}%.'.format(accuracy))    
        print("MSE for train data is: %f" % mean_squared_error(y_train, model.predict(x_train)))
        print("MSE for validation data is: %f" % mean_squared_error(y_test, model.predict(x_test)))
        return accuracy
    real_train_tar=np.expm1(train_linear_tar)
    """
        . Lasso model
    """
    
    lassocv = LassoCV(alphas = np.logspace(-5, 4, 400), )
    lassocv.fit(train_linear_fea, train_linear_tar)
    lassocv_score = lassocv.score(train_linear_fea, train_linear_tar)
    lassocv_alpha = lassocv.alpha_
    print("Best alpha : ", lassocv_alpha, "Score: ",lassocv_score)
    
    start=time.time()
    lasso =Lasso(normalize = True)
    lasso.set_params(alpha=lassocv_alpha,max_iter = 10000)
    lasso.fit(x_train, y_train)
    end=time.time()
    mean_squared_error(y_test, lasso.predict(x_test))
    coef_lasso=pd.Series(lassocv.coef_, index=x_train.columns).sort_values(ascending =False)
    evaluate(lasso,x_test,y_test,x_train,y_train)
    print('Time elapsed: %.4f seconds' % (end-start))
    
    y_lasso_predict=lasso.predict(train_linear_fea)
    x_line = np.arange(700000)
    y_line=x_line
    plt.scatter(real_train_tar,np.expm1(y_lasso_predict))
    plt.plot(x_line, y_line, color='r')
    plt.xlabel('Actual Sale Price')
    plt.ylabel('Predict Sle Price')
    
    test_prediction_lasso=np.expm1(lasso.predict(test_linear))
    
    
    """
        . Ridge model
    """
    
    ridgecv = RidgeCV(alphas = np.logspace(-5, 4, 400))
    ridgecv.fit(x_train, y_train)
    ridgecv_score = ridgecv.score(x_train, y_train)
    ridgecv_alpha = ridgecv.alpha_
    print("Best alpha : ", ridgecv_alpha, "Score: ",ridgecv_score)
    coef=pd.Series(ridgecv.coef_, index=x_train.columns).sort_values(ascending =False)
    
    start=time.time()
    ridge =Ridge(normalize = True)
    ridge.set_params(alpha=ridgecv_alpha,max_iter = 10000)
    ridge.fit(x_train, y_train)
    end=time.time()
    mean_squared_error(y_test, ridge.predict(x_test))
    coef_ridge=pd.Series(ridgecv.coef_, index=x_train.columns).sort_values(ascending =False)
    evaluate(ridge,x_test,y_test,x_train,y_train)
    print('Time elapsed: %.4f seconds' % (end-start))
    
    y_ridge_predict=ridge.predict(train_linear_fea)
    x_line = np.arange(700000)
    y_line=x_line
    plt.scatter(real_train_tar,np.expm1(y_ridge_predict))
    plt.plot(x_line, y_line, color='r')
    plt.xlabel('Actual Sale Price')
    plt.ylabel('Predict Sle Price')
    
    test_prediction_ridge=np.expm1(ridge.predict(test_linear))
    
    
    """
        . Random Forest
    """
    #train=train.drop(columns=['DateSold'])
    #test=test.drop(columns=['DateSold'])
    #X_train=train.drop(columns=['SalePrice'])
    #Y_train=train['SalePrice']
    X_train=train_linear_fea
    Y_train=train_linear_tar
    x_train_rf, x_test_rf, y_train_rf, y_test_rf = train_test_split(X_train, Y_train,test_size=0.2, random_state=0)
    
    
    n_estimators = [int(x) for x in np.linspace(start = 100, stop = 2000, num = 20)]
    max_features = ['auto', 'sqrt']
    max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
    min_samples_split = [2, 5, 10]
    min_samples_leaf = [1, 2, 4]
    bootstrap = [True, False]
    random_grid = {'n_estimators': n_estimators,
                   'max_features': max_features,
                   'max_depth': max_depth,
                   'min_samples_split': min_samples_split,
                   'min_samples_leaf': min_samples_leaf,
                   'bootstrap': bootstrap}
    
    rf = RandomForestRegressor()
    # Random search of parameters, using 3 fold cross validation, 
    # search across 100 different combinations, and use all available cores
    #
    rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 100, cv = 3, verbose=2, random_state=42, n_jobs = -1)
    rf_random.fit(X_train, Y_train)
    #rf_random.fit(x_train_rf, y_train_rf)
    rf_random.best_params_
    
    #Random search allowed us to narrow down the range for each hyperparameter. Now that we know where to concentrate our search,
    # we can explicitly specify every combination of settings to try. 
    param_grid = {
        'bootstrap': [False],
        'max_depth': [80, 90, 100, 110,120,130],
        'max_features': [2, 3],
        'min_samples_leaf': [1,2,3, 4],
        'min_samples_split': [2,4,6,8, 10, 12],
        'n_estimators': [600,700, 800, 900, 1000]
    }
    # Create a based model
    rf = RandomForestRegressor()
    # Instantiate the grid search model
    grid_search = GridSearchCV(estimator = rf, param_grid = param_grid, cv = 3, n_jobs = -1, verbose = 2)
    #grid_search.fit(x_train, y_train)
    grid_search.fit(X_train, Y_train)
    grid_search.best_params_
    
    best_random = grid_search.best_estimator_
    start=time.time()
    best_random.fit(x_train_rf,y_train_rf)
    end=time.time()
    evaluate(best_random, x_test_rf, y_test_rf,x_train_rf,y_train_rf)
    print('Time elapsed: %.4f seconds' % (end-start))
    
    y_rf_predict=best_random.predict(train_linear_fea)
    x_line = np.arange(700000)
    y_line=x_line
    plt.scatter(real_train_tar,np.expm1(y_rf_predict))
    plt.plot(x_line, y_line, color='r')
    plt.xlabel('Actual Sale Price')
    plt.ylabel('Predict Sle Price')
    importance_rf = pd.DataFrame({'features':train_linear_fea.columns, 'imp':best_random.feature_importances_}).\
                            sort_values('imp',ascending=False)
    
    importance_top20_rf = importance_rf.iloc[:20,]
    
    plt.barh(importance_top20_rf.features, importance_top20_rf.imp)
    plt.xlabel('Feature Importance')
    
    test_prediction_rf=np.expm1(best_random.predict(test_linear))
    
    """
        . Xgboost
    """
    
    learning_rate = [round(float(x), 2) for x in np.linspace(start = .1, stop = .2, num = 11)]
        # Minimum for sum of weights for observations in a node
    min_child_weight = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
        # Maximum nodes in each tree
    max_depth = [int(x) for x in np.linspace(1, 10, num = 10)]
    n_estimators=[int(x) for x in np.linspace(start = 100, stop = 2000, num = 20)]
    subsample=[0.3, 0.4,0.5,0.6, 0.7]
    model = xgb.XGBRegressor()
    random_grid = {'learning_rate': learning_rate,
                    'max_depth': max_depth,
                    'min_child_weight': min_child_weight,
                    'subsample': subsample,
                    'n_estimators':n_estimators
                    }
    
        # Make a RandomizedSearchCV object with correct model and specified hyperparams
    xgb_random = RandomizedSearchCV(estimator=model, param_distributions=random_grid, n_iter=1000, cv=5, verbose=2, random_state=42, n_jobs=-1)
    start = time.time()
        # Fit models
    xgb_random.fit(X_train, Y_train)
    xgb_random.best_params_
    
    
    """
    best_params_={'learning_rate': 0.1,
     'max_depth': 2,
     'min_child_weight': 4,
     'n_estimators': 900,
     'subsample': 0.5}
    """
    model_xgb = XGBRegressor(**xgb_random.best_params_)
    #model_xgb = XGBRegressor(**best_params_)
    start=time.time()
    model_xgb.fit(x_train_rf,y_train_rf)
    end=time.time()
    evaluate(model_xgb, x_test_rf, y_test_rf,x_train_rf,y_train_rf)
    print('Time elapsed: %.4f seconds' % (end-start))
    
    
    
    y_xgb_predict=model_xgb.predict(train_linear_fea)
    x_line = np.arange(700000)
    y_line=x_line
    plt.scatter(real_train_tar,np.expm1(y_xgb_predict))
    plt.plot(x_line, y_line, color='r')
    plt.xlabel('Actual Sale Price')
    plt.ylabel('Predict Sle Price')
    importance_xgb = pd.DataFrame({'features':train_linear_fea.columns, 'imp':model_xgb.feature_importances_}).\
                            sort_values('imp',ascending=False)
    
    importance_top20_xgb = importance_xgb.iloc[:20,]
    
    plt.barh(importance_top20_xgb.features, importance_top20_xgb.imp)
    plt.xlabel('Feature Importance')
    
    test_prediction_xgb=np.expm1(model_xgb.predict(test_linear))
    
    return(test_prediction_lasso, test_prediction_ridge, test_prediction_rf, test_prediction_xgb,y_lasso_predict, y_ridge_predict, y_rf_predict, y_xgb_predict)