def trainRandomForest(data, columns, targetColumn, parameters):

    modelColumns = []
    for column in columns:
        if column != targetColumn:
            modelColumns.append(column)

    modelData = []

    for i in range(0, len(data[targetColumn])):
        record = []
        for column in modelColumns:
            record.append(data[column][i])

        modelData.append(record)
    if "depth" in parameters:
        model = RandomForestRegressor(max_depth=parameters["depth"],
                                      n_estimators=parameters["estimators"],
                                      n_jobs=-1,
                                      random_state=42)
    elif "leaf" in parameters:
        model = RandomForestRegressor(min_samples_leaf=parameters["leaf"],
                                      n_estimators=parameters["estimators"],
                                      n_jobs=-1,
                                      random_state=42)

    model.fit(modelData, data[targetColumn])

    return RandomForestModel(model, modelColumns)
def evalOne(parameters):
    all_obs = []
    all_pred = []
    for location in locations:
        trainX, testX, trainY, testY = splitDataForXValidation(
            location, "location", data, all_features, "target")
        if "depth" in parameters:
            model = RandomForestRegressor(
                max_depth=parameters["depth"],
                random_state=42,
                n_estimators=parameters["n_estimators"],
                n_jobs=-1)
        elif "leaf" in parameters:
            model = RandomForestRegressor(
                min_samples_leaf=parameters["leaf"],
                random_state=42,
                n_estimators=parameters["n_estimators"],
                n_jobs=-1)
        elif "max_leaf" in parameters:
            model = RandomForestRegressor(
                max_leaf_nodes=parameters["max_leaf"],
                random_state=42,
                n_estimators=parameters["n_estimators"],
                n_jobs=-1)

        model.fit(trainX, trainY)
        prediction = model.predict(testX)
        all_obs.extend(testY)
        all_pred.extend(prediction)
    return rmseEval(all_obs, all_pred)[1]
Beispiel #3
0
def getModels():
    models = {}
    models['dt'] = DecisionTreeRegressor(max_depth=50)
    models['rf1'] = RandomForestRegressor()
    models['rf2'] = RandomForestRegressor(n_estimators=128, max_depth=15)
    models['gbr'] = GradientBoostingRegressor(n_estimators=128,
                                              max_depth=5,
                                              learning_rate=1.0)
    # models['abr'] = AdaBoostRegressor(n_estimators=128)
    return models
    def test_boston_housing_no_fit_invalid(self):
        (x_train, y_train), (x_test, y_test) = TestUtil.get_boston_housing()
        explained_model = RandomForestRegressor(n_estimators=64,
                                                max_depth=5,
                                                random_state=1)
        explained_model.fit(x_train, y_train)

        model_builder = MLPModelBuilder(num_layers=2,
                                        num_units=32,
                                        activation="relu",
                                        p_dropout=0.2,
                                        verbose=0,
                                        batch_size=32,
                                        learning_rate=0.001,
                                        num_epochs=2,
                                        early_stopping_patience=128)
        masking_operation = ZeroMasking()
        loss = mean_squared_error
        explainer = CXPlain(explained_model, model_builder, masking_operation,
                            loss)

        with self.assertRaises(AssertionError):
            explainer.predict(x_test, y_test)

        with self.assertRaises(AssertionError):
            explainer.score(x_test, y_test)
    def test_boston_housing_valid(self):
        (x_train, y_train), (x_test, y_test) = TestUtil.get_boston_housing()
        explained_model = RandomForestRegressor(n_estimators=64,
                                                max_depth=5,
                                                random_state=1)
        explained_model.fit(x_train, y_train)

        model_builder = MLPModelBuilder(num_layers=2,
                                        num_units=32,
                                        activation="relu",
                                        p_dropout=0.2,
                                        verbose=0,
                                        batch_size=32,
                                        learning_rate=0.001,
                                        num_epochs=2,
                                        early_stopping_patience=128)
        masking_operation = ZeroMasking()
        loss = mean_squared_error
        explainer = CXPlain(explained_model, model_builder, masking_operation,
                            loss)

        explainer.fit(x_train, y_train)
        self.assertEqual(explainer.prediction_model.output_shape,
                         (None, np.prod(x_test.shape[1:])))

        eval_score = explainer.score(x_test, y_test)
        train_score = explainer.get_last_fit_score()
        median = explainer.predict(x_test)
        self.assertTrue(median.shape == x_test.shape)
Beispiel #6
0
    def test_boston_housing_confidence_level_invalid(self):
        (x_train, y_train), (x_test, y_test) = TestUtil.get_boston_housing()
        explained_model = RandomForestRegressor(n_estimators=64,
                                                max_depth=5,
                                                random_state=1)
        explained_model.fit(x_train, y_train)

        model_builder = MLPModelBuilder(num_layers=2,
                                        num_units=32,
                                        activation="relu",
                                        p_dropout=0.2,
                                        verbose=0,
                                        batch_size=32,
                                        learning_rate=0.001,
                                        num_epochs=3,
                                        early_stopping_patience=128)
        masking_operation = ZeroMasking()
        loss = mean_squared_error

        num_models = 2
        explainer = CXPlain(explained_model,
                            model_builder,
                            masking_operation,
                            loss,
                            num_models=num_models)

        explainer.fit(x_train, y_train)

        invalid_confidence_levels = [1.01, -0.5, -0.01]

        for confidence_level in invalid_confidence_levels:
            with self.assertRaises(ValueError):
                explainer.predict(x_test, confidence_level=confidence_level)
Beispiel #7
0
def RF_ST(trainFileName, testFilename):
    trainData = ld.LoadData_DATA_ST(trainFileName)
    testData = ld.LoadData_DATA_ST(testFilename)

    store = ['1', '2', '3', '4', '5']
    res = []

    for i in store:
        train_X = []
        train_y = []
        context = trainData[i]
        for array in context:
            array = [float(x) for x in array[2:]]
            train_X.append((array[2:-1]))
            train_y.append(array[-1])

        test_X = []
        items = []
        context = testData[i]
        for array in context:
            items.append((array[0], array[1]))
            array = [float(x) for x in array[2:]]
            test_X.append((array[2:]))


        clf = RandomForestRegressor(n_estimators=100,criterion='mse', max_depth=None,max_features='auto').\
                    fit(train_X,train_y)
        pred_y = clf.predict(test_X)

        for i in range(len(pred_y)):
            res.append([items[i][0], items[i][1], '%.4f' % max(pred_y[i], 0)])
    return res
    def __init__(self, config_file=''):
        # Parse config file
        self.parser = SafeConfigParser()
        self.parser.read(config_file)

        # machine learning specific variables
        self.classify = constants.DO_CLASSIFICATION  # Regress or classify?
        self.vars_features = constants.fixed_vars
        self.vars_target = constants.ML_TARGETS

        if self.classify:
            self.var_target = constants.ML_TARGETS
            self.task = 'classification'
            self.model = RandomForestClassifier(n_estimators=2500, n_jobs=constants.ncpu, random_state=0)
        else:
            self.var_target = constants.ML_TARGETS
            self.task = 'regression'
            self.model = RandomForestRegressor(n_estimators=2500, n_jobs=constants.ncpu, random_state=0)  # SVR()

        # Get path to input
        self.path_inp = constants.base_dir + os.sep + constants.name_inp_fl

        # Output directory is <dir>_<classification>_<2014>
        self.path_out_dir = constants.out_dir
        utils.make_dir_if_missing(self.path_out_dir)

        # Model pickle
        self.path_pickle_model = self.path_out_dir + os.sep + constants.model_pickle
        self.path_pickle_features = self.path_out_dir + os.sep + 'pickled_features'
Beispiel #9
0
def build_other_learners(train_x, train_y):
    simple_learners = []
    simple_learners.append(
        SimpleLearner(
            "rf",
            RandomForestRegressor(n_jobs=-1,
                                  max_features=0.6,
                                  n_estimators=2,
                                  max_depth=8)))
    simple_learners.append(
        SimpleLearner(
            "gb",
            GradientBoostingRegressor(n_estimators=10,
                                      loss='huber',
                                      learning_rate=0.5,
                                      max_depth=4)))
    simple_learners.append(
        SimpleLearner(
            "linearSVR",
            LinearSVR(intercept_scaling=64,
                      C=128,
                      max_iter=1000,
                      dual=False,
                      loss='squared_epsilon_insensitive')))
    simple_learners.append(
        SimpleLearner("svr", SVR(C=100, epsilon=0.001, gamma=0.00001)))
    for sl in simple_learners:
        if sl.name == "linearSVR" or sl.name == "pa":
            sl.scaler.fit(train_x)
            s_train_x = sl.scaler.transform(train_x)
            sl.fit(s_train_x, train_y)
        else:
            sl.fit(train_x, train_y)
        print "%s: finish to build the model" % sl.name
    return simple_learners
Beispiel #10
0
    def test_boston_housing_load_save_valid(self):
        (x_train, y_train), (x_test, y_test) = TestUtil.get_boston_housing()
        explained_model = RandomForestRegressor(n_estimators=64, max_depth=5, random_state=1)
        explained_model.fit(x_train, y_train)

        model_builder = MLPModelBuilder(num_layers=2, num_units=32, activation="relu", p_dropout=0.2, verbose=0,
                                        batch_size=32, learning_rate=0.001, num_epochs=2, early_stopping_patience=128)
        masking_operation = ZeroMasking()
        loss = mean_squared_error

        num_models_settings = [1, 2]
        for num_models in num_models_settings:
            explainer = CXPlain(explained_model, model_builder, masking_operation, loss,
                                num_models=num_models)

            explainer.fit(x_train, y_train)
            median_1 = explainer.predict(x_test)

            tmp_dir_name = tempfile.mkdtemp()
            explainer.save(tmp_dir_name)

            with self.assertRaises(ValueError):
                explainer.save(tmp_dir_name, overwrite=False)

            explainer.save(tmp_dir_name, overwrite=True)
            explainer.load(tmp_dir_name)
            median_2 = explainer.predict(x_test)

            self.assertTrue(np.array_equal(median_1, median_2))

            shutil.rmtree(tmp_dir_name)  # Cleanup.
Beispiel #11
0
def eval_one(step):
    
    if step in cached_results:
        return cached_results[step]
    
    eval_features = []
    for i in range(0, len(all_features)):
        if step[i]:
            eval_features.append(all_features[i])
    
    all_predictions = []
    all_observations = []
    
    for location in locations:
        trainX, testX, trainY, testY = splitDataForXValidation(location, "location", data, eval_features, "target")
        model = RandomForestRegressor(min_samples_leaf = 2, random_state=42, n_estimators=650, n_jobs=-1)
        model.fit(trainX, trainY)
        predictions = model.predict(testX)
        all_observations.extend(testY)
        all_predictions.extend(predictions)
    
    rmse = rmseEval(all_observations, all_predictions)[1]
    
    cached_results[step] = rmse
    
    # save down the cached result
    
    cache_output = open(CACHE_FILE, "a")
    step_list = [str(s) for s in step]
    step_str = ",".join(step_list)  
    cache_output.write(str(rmse) + ";" + step_str + "\n")
    cache_output.close()
    
    return rmse
Beispiel #12
0
    def __init__(self, features=[], target=[], model='ols', tag='train'):
        self.tag = tag + '_' + model
        self.outdir = 'fig/results'
        self.model = model
        import os
        os.system('mkdir -p ' + self.outdir)

        # setup analysis
        self.X = features
        self.y = target

        # Scale
        self.scaler = StandardScaler(with_mean=True, with_std=True).fit(self.X)

        if model == 'ols':
            self.regr = skl_lm.LinearRegression()
        elif model == 'huber':
            self.regr = HuberRegressor(fit_intercept=True,
                                       alpha=0.0,
                                       max_iter=100,
                                       epsilon=1.35)
        elif model == 'tree':
            self.regr = DecisionTreeRegressor(max_depth=6)
        elif model == 'forest':
            self.regr = RandomForestRegressor(n_estimators=10,
                                              bootstrap=True,
                                              criterion='mae',
                                              max_depth=10,
                                              max_features='auto',
                                              min_samples_leaf=5,
                                              min_samples_split=10,
                                              random_state=0)
        print self
Beispiel #13
0
def test_moving_average_smoothing_estimator():
    np.random.seed(1)

    m = 10000
    n = 10

    # Simulate an event under constant hazard, with hazard = X * beta and
    # iid exponentially distributed exposure times.
    X = np.random.normal(size=(m, n))
    beta = np.random.normal(size=(n, 1))
    hazard = np.exp(np.dot(X, beta))
    exposure = np.random.exponential(size=(m, 1))
    rate = np.random.poisson(hazard * exposure) / exposure

    model = CalibratedEstimatorCV(
        GLM(sm.families.Gaussian(sm.families.links.log), add_constant=False),
        ThresholdClassifier(
            HazardToRiskEstimator(
                MovingAverageSmoothingEstimator(RandomForestRegressor()))))

    model.fit(X, rate, exposure=exposure)

    y_pred = model.predict(X, exposure)
    assert np.abs((np.sum(y_pred) - np.sum(rate > 0)) / np.sum(rate > 0)) < .1
    assert np.max(np.abs(model.estimator_.coef_ - beta[:, 0])) < .1
 def __init__(self):
     super(ItemSetModel, self).__init__()
     #self.clf = DecisionTreeRegressor()
     #self.clf = Lasso(0.1)
     #self.clf = SVR(kernel='rbf')
     #self.clf = ElasticNetCV()
     self.clf = RandomForestRegressor(max_depth=7, n_estimators=10)
Beispiel #15
0
def baggedModel(X_train, y_train, X_test, y_test, X_holdout, y_holdout):
    """
    INPUT: X_train, y_train, and the dataset you plan on predicting on
    OUTPUT: The predictions for the unseen dataset
    """
    rf_reg = RandomForestRegressor(max_depth= 20,
                                    max_features='sqrt',
                                    min_samples_leaf= 4,
                                    min_samples_split= 5,
                                    n_estimators= 100)
    boost_rf_rg = AdaBoostRegressor(base_estimator=rf_reg,
                                             n_estimators=10,
                                             random_state=123)
    #Trained model fit on training set
    boost_rf_rg.fit(X_train, y_train)

    #Prediting on Test Set
    predictions_testset = boost_rf_rg.predict(X_test)
    regressor_test_accuracy = boost_rf_rg.score(X_test,y_test)

    #Predicting on Holdout Set
    predictions_holdoutset = boost_rf_rg.predict(X_holdout)
    regressor_holdout_accuracy = boost_rf_rg.score(X_holdout,y_holdout)

    return predictions_testset, predictions_holdoutset, regressor_test_accuracy, regressor_holdout_accuracy
Beispiel #16
0
    def post(self):

        # upload audio file in server
        voice = self.request.files["audio"][0]
        extn = os.path.splitext(voice['filename'])[1]
        fnm = os.path.splitext(voice['filename'])[0]
        cname = str(uuid.uuid4()) + extn
        fh = open(__UPLOADS__ + cname, 'w')
        fh.write(voice['body'])
        fh.close()

        # get features from the audio file
        attr = getAttributes(cname)
        fdf = mongoTolist(False)

        train = fdf[:,:-1]
        target = fdf[:,-1]

        #RandomForest Regression
        rf = RandomForestRegressor(n_estimators = 506, n_jobs = -1)
        rf.fit(train, target)

        updrs_val = rf.predict([attr])
        attr.append(updrs_val[0])

        # get the theta from database
        theta = list(db.theta.find({}))
        theta1 = theta[0]["theta1"]
        theta2 = theta[1]["theta2"]

        # check is the person has Parkinson's Disease
        isParkinson = octave.classify(theta1, theta2, np.array(attr))

        self.render("output.html", ipk = isParkinson, updrs = updrs_val[0])
Beispiel #17
0
def eval_one(min_samples_leaf, n_estimators):
    log("min_samples_leaf: " + str(min_samples_leaf) + ", n_estimators: " +
        str(n_estimators))

    all_observations = []
    all_pred_ALL = []

    for group in range(0, len(groups)):
        trainStations = []
        for i in range(0, len(groups)):
            if i != group:
                trainStations.extend(groups[i])
        testStations = groups[group]

        train_station_set = set([float(s) for s in trainStations])
        test_station_set = set([float(s) for s in testStations])

        trainX, testX, trainY, testY = splitDataForXValidation(
            train_station_set, test_station_set, "location", data,
            all_features, "target")
        model = RandomForestRegressor(min_samples_leaf=min_samples_leaf,
                                      n_estimators=n_estimators,
                                      n_jobs=-1,
                                      random_state=42)
        model.fit(trainX, trainY)
        prediction_ALL = model.predict(testX)
        rmse = rmseEval(testY, prediction_ALL)[1]
        log("\tALL rmse: " + str(rmse))
        all_observations.extend(testY)
        all_pred_ALL.extend(prediction_ALL)

    rmse = rmseEval(all_observations, all_pred_ALL)[1]
    log("\tALL rmse:" + str(rmse))
    return rmse
Beispiel #18
0
    def __init__(self,
                 param_grid,
                 n_evaluations=10,
                 random_state=None,
                 start_evaluations=3,
                 n_attempts=10,
                 regressor=None,
                 maximize=True):
        """
        This general method relies on regression.
        Regressor will try to predict the best point based on already known result fir different parameters.

        :param OrderedDict param_grid: the grid with parameters to optimize on
        :param int n_evaluations: the number of evaluations to do
        :param random_state: random generator
        :type random_state: int or RandomState or None

        :param int start_evaluations: count of random point generation on start
        :param int n_attempts: this number of points will be compared on each iteration.
            Regressor is to choose optimal from them.
        :param regressor: regressor to choose appropriate next point with potential best score
            (estimated this score by regressor); If None them RandomForest algorithm will be used.
        """
        AbstractParameterGenerator.__init__(self,
                                            param_grid=param_grid,
                                            n_evaluations=n_evaluations,
                                            random_state=random_state,
                                            maximize=maximize)
        if regressor is None:
            regressor = RandomForestRegressor(max_depth=3,
                                              n_estimators=10,
                                              max_features=0.7)
        self.regressor = regressor
        self.n_attempts = n_attempts
        self.start_evaluations = start_evaluations
Beispiel #19
0
    def make_prediction(self, site_id, label):
        self._create_X_Y_per_site(site_id, label)

        self.xgbooster(label)

        clf_RDM = {
            'params': {
                'n_estimators': [300],
                'bootstrap': [True],
                'criterion': ['mse']
            },
            'clf': RandomForestRegressor()
        }

        clf = clf_RDM['clf']
        params = clf_RDM['params']

        clf_rdm, Xtrain, ytrain, Xtest, ytest, r2 = \
            do_classify(clf, params, self.X_training, self.y)

        prediction = clf_rdm.predict(self.X_test)
        self.r2 = r2

        self.forecast_predictors[label] = pd.Series(
            prediction, index=self.forecast_predictors.index)

        self.features_weighted = get_features_importance(
            clf_rdm, self._get_features())

        print self.features_weighted

        self.forecast_predictors.to_csv(get_file_path(
            "data/store/" + self.name + "_predictions_" +
            self.datastore.period + "_" + label + ".csv", fileDir),
                                        sep=";")
Beispiel #20
0
    def randomforestregressor(self, testlen, ntrain, ntrees, nodes):
        hsmadata = self.hsmadata
        dates = pd.Series(hsmadata['date'].unique()).sort_values()
        dates.index = range(0, len(dates))
        ntest = len(dates) // testlen

        hsma = pd.DataFrame()
        for i in range(ntrain, ntest):
            traindata = hsmadata[
                (hsmadata['date'] >= dates[(i - ntrain) * testlen])
                & (hsmadata['date'] < dates[i * testlen - self.day])].copy()
            testdata = hsmadata[(hsmadata['date'] >= dates[i * testlen]) & (
                hsmadata['date'] < dates[(i + 1) * testlen])].copy()

            traindata = traindata.iloc[:, 2:]
            traindatax = traindata.drop(['closeratio'], 1)
            traindatay = traindata['closeratio']
            testdatax = testdata[traindatax.columns]

            treemodel = RandomForestRegressor(
                n_estimators=ntrees,
                min_samples_split=nodes * 2,
                min_samples_leaf=nodes)
            treemodel.fit(traindatax, traindatay)
            testdata['predratio'] = treemodel.predict(testdatax)

            hsma = pd.concat([hsma, testdata], ignore_index=True)

        return (hsma)
 def train_model(self, X_train, Y_train):
     print("training model %d_%d.pkl" % (self.frame_len, self.predict_dist))
     model_name = "simple_reg_model/%d_%d.pkl" % (self.frame_len, self.predict_dist)
     self.estimator = RandomForestRegressor(random_state=0, n_estimators=100, n_jobs=-1)
     self.estimator.fit(X_train, Y_train)
     print("finish training model")
     joblib.dump(self.estimator, model_name)
Beispiel #22
0
def calcRandomForest(channels_training, channels_testing, target_training,
                     target_testing):
    clf = RandomForestRegressor(n_estimators=500,
                                max_features=len(channels_training[0]))
    clf = clf.fit(channels_training, target_training)
    predictions = clf.predict(channels_testing)
    comp = [predictions, target_testing]
    return clf, comp
Beispiel #23
0
 def __init__(self):
     from sklearn.pipeline import Pipeline
     self.clf = Pipeline([
         ('vect', DecisionTreeRegressor()),
         ('tfidf', RandomForestRegressor()),
         ('clf', BayesianRidge(compute_score=True)),
         
     ])
Beispiel #24
0
def randomForest(trainFeatures, trainResponses, testFeatures, maxFeatures = 'log2', nTree=100):
    ## Settings of random forests regressor
    regModel = RandomForestRegressor(n_estimators=nTree, max_features=maxFeatures)    
    ## Train the random forests regressor
    regModel.fit(trainFeatures, trainResponses)
    ## Prediction
    testResponsesPred = regModel.predict(testFeatures)
    return testResponsesPred
Beispiel #25
0
def evalTrainStationTestStation(trainStation, testStation, features):
    trainX, _, trainY, _ = splitDataForXValidation(set([trainStation]), set(), "location", dataByStation[trainStation], features, "target")
    _, testX2, _, testY2 = splitDataForXValidation(set(), set([testStation]), "location", dataByStation[testStation], features, "target")
    model = RandomForestRegressor(max_depth=10, n_estimators = 60, n_jobs = -1, random_state=42)
    model.fit(trainX, trainY)
    prediction = model.predict(testX2)
    rmse = rmseEval(testY2, prediction)[1]
    print("Training on station " + str(trainStation) + ", applying on station " + str(testStation) + ": rmse: " + str(rmse))
    return rmse
Beispiel #26
0
 def __init__(self, param_grid, n_evaluations=10, random_state=None,
              start_evaluations=3, n_attempts=5, regressor=None):
     AbstractParameterGenerator.__init__(self, param_grid=param_grid, n_evaluations=n_evaluations,
                                         random_state=random_state)
     if regressor is None:
         regressor = RandomForestRegressor(max_depth=3, n_estimators=10, max_features=0.7)
     self.regressor = regressor
     self.n_attempts = n_attempts
     self.start_evaluations = start_evaluations
Beispiel #27
0
 def trainModel(self, column):
     self.prepareTrainingInputs(column)
     #self.clf = LinearRegression()
     if (column == 'Salary' or column == 'DOJ' or column == 'DOL'):
         self.clf = RandomForestRegressor(n_estimators=100, n_jobs=2)
         print('Regressor')
     else:
         self.clf = RandomForestClassifier(n_estimators=100, n_jobs=2)
         print('Classifier')
     self.clf = self.clf.fit(self.X_train, self.y_train)
Beispiel #28
0
    def fitDurationEstimator(self, modelType="RF"):
        """ Fit duration model with specified regressor type (Random forest by default) """

        print "fitting charging duration model..."

        if modelType == "RF":
            self.durationEstimator = RandomForestRegressor(random_state=0,
                                                           n_estimators=50,
                                                           max_depth=50)
            self.durationEstimator.fit(self.X, self.durationData)
Beispiel #29
0
def RF_ALL(trainFileName, testFileName):
    train_X, train_y, _ = ld.LoadData_DATA_LABEL_ITEM(trainFileName)
    Eval_X, items = ld.LoadData_DATA_ITEM(testFileName)
    clf = RandomForestRegressor(n_estimators=100,criterion='mse', max_depth=None,max_features='auto',bootstrap=True).\
            fit(train_X, train_y)
    pred_y = clf.predict(Eval_X)
    res = []
    for i in range(len(Eval_X)):
        res.append([items[i], 'all', '%.4f' % max(pred_y[i], 0)])
    return res
Beispiel #30
0
 def modeltrain(X_train, y_train, X_test, y_test):
     from sklearn.ensemble.forest import RandomForestRegressor
     # Generando el modelo 
     RF_Model = RandomForestRegressor(n_estimators=100,max_features=1)
     # Ajustando el modelo con X_train y y_train
     rgr = RF_Model.fit(X_train, y_train)
     y_train_predict = (rgr.predict(X_train)).astype(int)
     y_test_predict = (rgr.predict(X_test)).astype(int)
     
     return y_train_predict ,  y_test_predict , rgr