Esempio n. 1
0
def uniRegression(p, xLabel, yLabel):
    global image_num
    # Randomly shuffle rows
    p = p.sample(frac=1).reset_index(drop=True)
    # Split train and test
    twentyPercent = -1 * round(p.shape[0] * 0.2)
    xCol = p[xLabel].values.reshape(-1, 1)
    X_train = xCol[:twentyPercent]
    X_test = xCol[twentyPercent:]
    y_train = p[yLabel][:twentyPercent].values.reshape(-1, 1)
    y_test = p[yLabel][twentyPercent:].values.reshape(-1, 1)
    # Fit linear regression model
    lr = linear_model.LinearRegression()
    lr.fit(X_train, y_train)
    # Make predictions
    predicted = lr.predict(X_test)
    r2 = r2_score(y_test, predicted)
    mse = mean_squared_error(y_test, predicted)
    # Plot expected vs. predicted
    plt.scatter(X_test, y_test, color='black')
    plt.plot(X_test, predicted, color='blue', linewidth=2)
    plt.xlabel(xLabel)
    plt.ylabel(yLabel)
    plt.show()
    plt.savefig(image_path.format(image_num), bbox_inches='tight')
    image_num += 1
    print("R2 = ", r2)
    print("MSE = ", mse)
    visualizer = ResidualsPlot(lr)
    # Plot residuals
    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    visualizer.show()  # Finalize and render the figure
Esempio n. 2
0
def residual_plot(model_properties=None, output_path=None):
    '''
    Method that shows the residual plot of the trained model
    '''
    if model_properties is None or output_path is None:
        raise ValueError('Need Model properties and Output path as arguments !')
    estimator = model_properties['estimator']
    X_train = model_properties['X_train']
    y_train = model_properties['y_train']
    X_validation = model_properties['X_validation']
    y_validation = model_properties['y_validation']
    config_map = model_properties['config_map']
    X_scaler = model_properties['X_scaler']
    y_scaler = model_properties['y_scaler']
    X_train[config_map['scale_columns']] = X_scaler.transform(
        X_train[config_map['scale_columns']])
    y_train[config_map['label']] = y_scaler.transform(
        y_train[config_map['label']])
    X_validation[config_map['scale_columns']] = X_scaler.transform(
        X_validation[config_map['scale_columns']])
    y_validation[config_map['label']] = y_scaler.transform(
        y_validation[config_map['label']])
    visualizer = ResidualsPlot(estimator)
    visualizer.fit(X_train.values, y_train.values)
    visualizer.score(X_validation.values, y_validation.values)
    visualizer.poof(outpath=os.path.join(output_path, 'residual_plot.png'))
    return None
Esempio n. 3
0
def train_model(rf, healed_data, target_string):
    #rf.fit(healed_data["train_features"], healed_data["train_target"])
    model = Ridge()
    visualizer = ResidualsPlot(rf)
    try:
        visualizer.fit(healed_data["train_features"],
                       healed_data["train_target"])
    except Exception as e:
        st.error("Fit error: " + str(e))

    try:
        visualizer.score(healed_data["test_features"],
                         healed_data["test_target"])
    except Exception as e:
        st.error("Score error: " + str(e))

    visualizer.show()
    # st.write(visualizer)
    st.pyplot(plt.savefig("models/rf_reg_eval_" + target_string + ".png"))
    # save model output
    model_output_loc = "models/rf_reg_" + target_string + "_rf_reg_model.pkl"
    model_output = open(model_output_loc, "wb")
    pickle.dump(rf, model_output)
    model_output.close()
    print("saving model to: " + model_output_loc)
    return
Esempio n. 4
0
def residuals():
    X, y = load_concrete()
    X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2)
    oz = ResidualsPlot(Ridge(), ax=newfig())
    oz.fit(X_train, y_train)
    oz.score(X_test, y_test)
    savefig(oz, "residuals")
Esempio n. 5
0
def plot_residuals(X, y, model, outpath="images/residuals.png", **kwargs):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    _, ax = plt.subplots()

    visualizer = ResidualsPlot(model, ax=ax, **kwargs)
    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    visualizer.poof(outpath=outpath)
Esempio n. 6
0
def linregress(*args):
    #import dependencies
    import sklearn as sk
    from sklearn.linear_model import LogisticRegression
    model = LogisticRegression()
    from sklearn import feature_selection
    import statsmodels.api as sm
    from patsy import dmatrices
    import numpy as np

    #define arguments
    dataframe = args[0]
    y = args[1]
    xvars = []
    for i in range(2, len(args)):
        xvars.append(args[i])
    x = dataframe[[item for item in xvars]]
    y = dataframe[y]
    #fit the model
    model.fit(x, y)

    #Generate Fit Statistics
    ##prep data for patsy
    list = []
    for item in xvars:
        list.append(f' + {item}')
    string = "".join(list)
    newstring = string[3:]

    ind = args[1]
    ind = ind.strip('"')

    ##Fit the Model
    Y, X = dmatrices(f"{ind} ~ {newstring}",
                     data=dataframe,
                     return_type="dataframe")
    logit = sm.Logit(Y, X)
    logit_result = logit.fit()

    #Print Log Odds
    print("LOG ODDS")
    print(logit_result.summary())
    print(np.exp(logit_result.params))

    #Plot the Residuals
    print("\n Residual Plot")
    from sklearn.linear_model import Ridge
    from yellowbrick.datasets import load_concrete
    from yellowbrick.regressor import ResidualsPlot

    model = Ridge()

    visualizer = ResidualsPlot(model, hist=True)
    y2 = y.values.reshape(-1, 1)
    visualizer.fit(x, y2)  # Fit the training data to the visualizer
    visualizer.score(x, y2)  # Evaluate the model on the test data
    visualizer.show()  # Finalize and render the figure
Esempio n. 7
0
def plot_residuals(X, y, model, outpath="images/residuals.png", **kwargs):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    _, ax = plt.subplots()

    visualizer = ResidualsPlot(model, ax=ax, **kwargs)
    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    visualizer.poof(outpath=outpath)
Esempio n. 8
0
def vis_residuals(model, features, target):
    '''

    '''
    vis_residuals = ResidualsPlot(model, size=(1080, 720))
    vis_residuals.fit(features, target)
    vis = vis_residuals.poof()
    vis
    return vis
Esempio n. 9
0
    def residual_plot(lin_model,x_train, y_train, x_test, y_test):
        fig = plt.figure(figsize=(16,12))
        ax = fig.add_subplot(111)
        visualizer = ResidualsPlot(lin_model, ax=ax)

        fig = plt.figure(figsize=(16,12))
        visualizer.fit(x_train, y_train)  # Fit the training data to the visualizer
        visualizer.score(x_test, y_test)  # Evaluate the model on the test data
        visualizer.show()
Esempio n. 10
0
def residuals_yellowbrick(predictors, target):
    """Returns a residuals vs. fitted graph with a histogram. Not currently functional.

    For future development. uses yellowbrick, which makes good graphs, but experiencing an unexplained missing
    argument TypeError
    """
    lm = LinearRegression
    visualizer = ResidualsPlot(lm)
    visualizer.fit(predictors, target)
    return visualizer
Esempio n. 11
0
def log_residuals_chart(regressor,
                        X_train,
                        X_test,
                        y_train,
                        y_test,
                        experiment=None):
    """Log residuals chart.

    Make sure you created an experiment by using ``neptune.create_experiment()`` before you use this method.

    Tip:
        Check `Neptune documentation <https://docs.neptune.ai/integrations/scikit_learn.html>`_ for the full example.

    Args:
        regressor (:obj:`regressor`):
            | Fitted sklearn regressor object
        X_train (:obj:`ndarray`):
            | Training data matrix
        X_test (:obj:`ndarray`):
            | Testing data matrix
        y_train (:obj:`ndarray`):
            | The regression target for training
        y_test (:obj:`ndarray`):
            | The regression target for testing
        experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``):
            | Neptune ``Experiment`` object to control to which experiment you log the data.
            | If ``None``, log to currently active, and most recent experiment.

    Returns:
        ``None``

    Examples:
        .. code:: python3

            rfr = RandomForestRegressor()
            rfr.fit(X_train, y_train)

            neptune.init('my_workspace/my_project')
            exp = neptune.create_experiment()

            log_residuals_chart(rfr, X_train, X_test, y_train, y_test, experiment=exp)
    """
    assert is_regressor(regressor), 'regressor should be sklearn regressor.'
    exp = _validate_experiment(experiment)

    try:
        fig, ax = plt.subplots()
        visualizer = ResidualsPlot(regressor, is_fitted=True, ax=ax)
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        visualizer.finalize()
        exp.log_image('charts_sklearn', fig, image_name='Residuals Plot')
        plt.close(fig)
    except Exception as e:
        print('Did not log residuals chart. Error: {}'.format(e))
Esempio n. 12
0
def regression_sanity_check(model, X_train, X_test, y_train, y_test):
    fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(20, 10))
    plt.sca(ax1)
    visualizer = ResidualsPlot(model, ax=ax1)
    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    plt.sca(ax2)
    visualizer2 = PredictionError(model, ax=ax2)
    visualizer2.fit(X_train, y_train)
    visualizer2.score(X_test, y_test)
    visualizer.finalize()
    visualizer2.poof()
Esempio n. 13
0
	def visualize_residuals_plot(self, model_info):
		model = model_info['model']	   
		X_train = model_info['X_train']
		X_test = model_info['X_test']
		Y_train = model_info['Y_train']
		Y_test = model_info['Y_test']

		visualizer = ResidualsPlot(model)

		visualizer.fit(X_train, Y_train)  # Fit the training data to the model
		visualizer.score(X_test, Y_test)  # Evaluate the model on the test data
		visualizer.poof()				  # Draw/show/poof the data
Esempio n. 14
0
def test_for_homoscedasticity(X_train, y_train, X_test, y_test):
    """ Plot the data and check for homoscedasticity.
    Arguments:
    X_train (dataframe): examples in the training set
    X_test (dataframe): examples in the test set
    y_train (dataframe): target in the training set
    y_train (dataframe): target in the test set
    """
    lr = LinearRegression()
    visualizer = ResidualsPlot(lr)
    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    #there should be no clear pattern
    visualizer.poof()
Esempio n. 15
0
def testFunc7(savepath='Results/bikeshare_LinearRegression_ResidualsPlot.png'):
    '''
    基于共享单车数据使用线性回归模型预测
    '''
    data = pd.read_csv('fixtures/bikeshare/bikeshare.csv')
    X = data[[
        "season", "month", "hour", "holiday", "weekday", "workingday",
        "weather", "temp", "feelslike", "humidity", "windspeed"
    ]]
    Y = data["riders"]
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3)
    visualizer = ResidualsPlot(LinearRegression())
    visualizer.fit(X_test, y_test)
    visualizer.poof(outpath=savepath)
Esempio n. 16
0
def create_residuals_chart(regressor, X_train, X_test, y_train, y_test):
    """Create residuals chart.

    Tip:
        Check Sklearn-Neptune integration
        `documentation <https://docs-beta.neptune.ai/essentials/integrations/machine-learning-frameworks/sklearn>`_
        for the full example.

    Args:
        regressor (:obj:`regressor`):
            | Fitted sklearn regressor object
        X_train (:obj:`ndarray`):
            | Training data matrix
        X_test (:obj:`ndarray`):
            | Testing data matrix
        y_train (:obj:`ndarray`):
            | The regression target for training
        y_test (:obj:`ndarray`):
            | The regression target for testing

    Returns:
        ``neptune.types.File`` object that you can assign to run's ``base_namespace``.

    Examples:
        .. code:: python3

            import neptune.new.integrations.sklearn as npt_utils

            rfr = RandomForestRegressor()
            rfr.fit(X_train, y_train)

            run = neptune.init(project='my_workspace/my_project')
            run['visuals/residuals'] = npt_utils.create_residuals_chart(rfr, X_train, X_test, y_train, y_test)
    """
    assert is_regressor(regressor), 'regressor should be sklearn regressor.'

    chart = None

    try:
        fig, ax = plt.subplots()
        visualizer = ResidualsPlot(regressor, is_fitted=True, ax=ax)
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        visualizer.finalize()
        chart = neptune.types.File.as_image(fig)
        plt.close(fig)
    except Exception as e:
        print('Did not log residuals chart. Error: {}'.format(e))

    return chart
def plotResidualsAgainstHoldout(df, holdOut_df, task, seed, schema):
    X_train = df[COLUMNS.get(task)].values
    X_test = holdOut_df[COLUMNS.get(task)].values
    y_train = df[TARGETS.get(task)].values
    y_test = holdOut_df[TARGETS.get(task)].values

    # Instantiate the linear model and visualizer
    wrapped_model = LinearRegression()
    visualizer = ResidualsPlot(wrapped_model, title="Residuals for schema {}".format(schema))

    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    visualizer.show(outpath="figs/residuals_{}_seed{}_{}.png".format(task, seed, schema))
    plt.close()
Esempio n. 18
0
def residuals(ax):
    from sklearn.linear_model import RidgeCV
    from yellowbrick.regressor import ResidualsPlot

    features = [
        'cement', 'slag', 'ash', 'water', 'splast', 'coarse', 'fine', 'age'
    ]

    splits = load_data('concrete', cols=features, target='strength', tts=True)
    X_train, X_test, y_train, y_test = splits

    estimator = RidgeCV()
    visualizer = ResidualsPlot(estimator, ax=ax)
    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    return visualizer
Esempio n. 19
0
    def residuals_plot(self) -> None:
        """Plot the difference between the observed value of the target variable (y)
        and the predicted value (ŷ), i.e. the error of the prediction"""

        visualizer = ResidualsPlot(self.trained_model)
        visualizer.fit(self.X_train,
                       self.y_train)  # Fit the training data to the visualizer
        visualizer.score(self.X_test,
                         self.y_test)  # Evaluate the model on the test data
        save_dir = f"{self.plots_dir}/residuals_plot_{self.model_id}.png"
        visualizer.show(outpath=save_dir)
        if not LOCAL:
            upload_to_s3(save_dir,
                         f'plots/residuals_plot_{self.model_id}.png',
                         bucket=S3_BUCKET_NAME)
        plt.clf()
Esempio n. 20
0
def visualize_pred_residuals(X_train, X_test, y_train, y_test):
    model = linear_model.Ridge(alpha=0.05)
    fitted = model.fit(X_train, y_train)
    visualizer = ResidualsPlot(fitted, size=(1080, 720))
    pred = fitted.predict(X_test)
    r = stats.linregress(pred, y_test)
    print(r[2])
    visualizer.fit(X_train, y_train)
    visualizer.score(X_test, y_test)
    visualizer.poof()
    cvr = model_selection.cross_validate(model,
                                         X_test,
                                         y_test,
                                         cv=10,
                                         return_train_score=True)
    print('Training scores:', cvr['train_score'], '\n')
    print('Testing scores:', cvr['test_score'])
Esempio n. 21
0
def my_residual_plot(X_train, y_train, X_test, y_test):
    plt.figure(figsize=(20, 5))
    plt.grid(True)

    visualizer = ResidualsPlot(LinearRegression(), hist=False)

    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data

    ticks = np.arange(1000, max(y_test.values) + 1, 500)

    plt.title("Wykres rezyduów", fontsize=25)
    plt.xlabel("Ceny mieszkań", fontsize=15)
    plt.ylabel("Rezydua", fontsize=15)

    plt.plot(ticks, np.zeros(len(ticks)), "r")
    plt.legend()
    plt.show()
def generate_ordinal_diagnostics(x, y, current_best_model, label_type,
                                 diagnostic_image_path):
    x = np.array(x)
    y = np.array(y)
    kf = KFold(n_splits=10, shuffle=True)
    guesses = []
    for train_index, test_index in kf.split(x):
        X_train, X_test = x[train_index], x[test_index]
        y_train, y_test = np.array(y)[train_index], np.array(y)[test_index]
        model = current_best_model[0].fit(X_train, y_train)
        for guess in zip(y_test.tolist(), model.predict(X_test).tolist()):
            guesses.append(guess)
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
    if "VotingClassifier" not in str(current_best_model[0].__class__):
        visualizer = ResidualsPlot(current_best_model[0])
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        visualizer.poof(outpath=diagnostic_image_path + "/residuals_plot.png")
        plt.clf()
        visualizer = PredictionError(current_best_model[0])
        visualizer.fit(X_train, y_train)
        visualizer.score(X_test, y_test)
        visualizer.poof(outpath=diagnostic_image_path +
                        "/prediction_error.png")
        plt.clf()
    visualizer = PCADecomposition(scale=True, center=False, col=y, proj_dim=2)
    visualizer.fit_transform(x, y)
    print(diagnostic_image_path + "/pca_2.png")
    visualizer.poof(outpath=diagnostic_image_path + "/pca_2.png")
    plt.clf()
    visualizer = PCADecomposition(scale=True, center=False, col=y, proj_dim=3)
    visualizer.fit_transform(x, y)
    visualizer.poof(outpath=diagnostic_image_path + "/pca_3.png")
    plt.clf()
    return {
        "mse": mean_squared_error(*np.array(guesses).transpose()),
        "r2": r2_score(*np.array(guesses).transpose()),
        "mae": median_absolute_error(*np.array(guesses).transpose()),
        "evs": explained_variance_score(*np.array(guesses).transpose()),
        "rmse": np.sqrt(mean_squared_error(*np.array(guesses).transpose()))
    }
Esempio n. 23
0
def showResiduals():
    # Load the data
    df = load_data('concrete')
    feature_names = [
        'cement', 'slag', 'ash', 'water', 'splast', 'coarse', 'fine', 'age'
    ]
    target_name = 'strength'

    # Get the X and y data from the DataFrame
    X = df[feature_names].as_matrix()
    y = df[target_name].as_matrix()

    # Create the train and test data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    # Instantiate the linear model and visualizer
    ridge = Ridge()
    visualizer = ResidualsPlot(ridge)

    visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
    visualizer.score(X_test, y_test)  # Evaluate the model on the test data
    g = visualizer.poof()  # Draw/show/poof the data
def ridge_regression(X_train, y_train, X_test, y_test, plot):
    """
    Perfomring a ridge regression with built in CV and plotting the feature importance
    """
    # Fit the ridge regression
    reg = RidgeCV()
    reg.fit(X_train, y_train)
    print("Best alpha using built-in RidgeCV: %f" % reg.alpha_)
    print("Best score using built-in RidgeCV: %f" % reg.score(X_train, y_train))
    coef = pd.Series(reg.coef_, index=X_train.columns)
    print(
        "Ridge picked "
        + str(sum(coef != 0))
        + " variables and eliminated the other "
        + str(sum(coef == 0))
        + " variables"
    )
    # Extract the feature importance
    imp_coef = coef.sort_values()
    # Plot the feature importance
    if plot:
        plt.rcParams["figure.figsize"] = (8.0, 10.0)
        imp_coef.plot(kind="barh")
        plt.title("Feature importance using Ridge Model")
        plt.show()
        # Visualizing the regression
        visualizer = ResidualsPlot(reg, size=(1080, 720))
        visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
        visualizer.score(X_test, y_test)  # Evaluate the model on the test data
        visualizer.show()                 # Finalize and render the figure
    # Using the test data to calculate a score
    y_pred = reg.predict(X_test)
    # Return metrics
    return {
        "name": "Ridge Regression",
        "R squared": reg.score(X_test, y_test),
        "R squared training": reg.score(X_train, y_train),
        "RMSE": rmse(y_test, y_pred),
        "MAE": mean_absolute_error(y_test, y_pred),
    }
Esempio n. 25
0
    def slr(self, iv, dv, plot_relationship=False, plot_residuals=True):

        # Create simple linear regression model
        self.slr_model = LinearRegression(fit_intercept=True)
        y = self.data[dv]
        x = self.data[iv]

        self.slr_model.fit(x[:, np.newaxis], y)

        xfit = np.linspace(-4, 4, 1000)
        yfit = self.slr_model.predict(xfit[:, np.newaxis])

        if plot_relationship:
            sns.lmplot(x=iv, y=dv, data=self.data, height=7, aspect=1.25)
            plt.plot(xfit, yfit)
            plt.ylabel(dv)
            plt.xlabel(iv)
            plt.title("{} = {} • {} + {}".format(dv, round(self.slr_model.coef_[0], 5), iv,
                                               round(self.slr_model.intercept_, 5)))
            plt.subplots_adjust(left=.095, right=.95, top=.9, bottom=.15)
            plt.xlim(-100, max(self.data["Counts"])*1.1)

        if plot_residuals:
            from yellowbrick.regressor import ResidualsPlot

            # Instantiate the linear model and visualizer
            visualizer = ResidualsPlot(model=self.slr_model)

            visualizer.fit(x[:, np.newaxis], y)  # Fit the training data to the model
            visualizer.poof()

        print("Simple Linear Regression\n{} = {} • {} + {}".format(dv, round(self.slr_model.coef_[0], 5), iv,
                                                                   round(self.slr_model.intercept_, 5)))

        # Predicts RMSE
        y_predict = self.slr_model.predict(x.values.reshape(-1, 1))
        rmse = sqrt(((y - y_predict) ** 2).values.mean())

        self.df_rmse.loc["Linear"] = round(rmse, 5)
        print("\n", self.df_rmse)
Esempio n. 26
0
class PrincipalComponentRegressor(Regressor):
    def __init__(self, n_components):
        super().__init__()
        self.n_components = n_components
        self.regressor = LinearRegression()
        self.pca = None

    def fit(self, x_train, y_train, standardize=False):
        self.pca = PCA(self.n_components)
        self.x_train = self.pca.fit_transform(x_train)
        self.y_train = y_train
        self.regressor.fit(self.x_train, self.y_train)
        self._inference()
        return self.regressor.intercept_, self.regressor.coef_, self.p, self.regressor.score(self.x_train, y_train)

    def predict(self, x_test):
        try:
            x_test_transform = self.pca.transform(x_test)
        except ValueError:
            x_test_transform = x_test
        prediction = self.regressor.predict(x_test_transform)
        return prediction

    def residual_plot(self, x_test=None, y_test=None):
        if self.standardize:
            x_test = self.standardizescaler.transform(x_test)
        try:
            self.residual_visualizer = ResidualsPlot(self.regressor)
        except yellowbrick.exceptions.YellowbrickTypeError:
            self.residual_visualizer = ResidualsPlot(self.regressor.regressor)

        self.residual_visualizer.fit(self.x_train, self.y_train)
        if x_test is not None and y_test is not None:
            try:
                self.residual_visualizer.score(x_test, y_test)
            except ValueError:
                x_test = self.pca.transform(x_test)
                self.residual_visualizer.score(x_test, y_test)
        self.residual_visualizer.poof()
def main():
    data = pd.read_csv('plano-saude.csv')
    # .values transform to a numpy array
    x = data.iloc[:, 0].values
    y = data.iloc[:, 1].values
    corr_coef = np.corrcoef(x, y)
    # algoritmos no scikit learn necessitam estar no formato de matriz
    x = x.reshape(-1, 1)
    
    regression = LinearRegression()
    # realizando o treinamento
    regression.fit(x, y)
    # b0
    regression.intercept_
    # b1
    regression.coef_
    
    plt.scatter(x, y)
    plt.plot(x, regression.predict(x), color='red')
    plt.title('Regressão linear simples')
    plt.xlabel('Idade')
    plt.ylabel('Custo')
    
    value = [40]
    value = np.asarray(value)
    value = value.reshape(-1, 1)
    prevision1 = regression.predict(value)
    # y = b0 + b1 * x1
    prevision2 = regression.intercept_ + regression.coef_ * value
    # verificando a pontuacao do algoritmo de regressão
    score = regression.score(x, y)
    # plotando um grafico para melhor visualizacao dos dados.
    visualizer = ResidualsPlot(regression)
    visualizer.fit(x, y)
    # Train R² é a mesma coisa que regression.score
    visualizer.poof()
Esempio n. 28
0
class RandForestRegressor(Regressor):
    def __init__(self):
        super().__init__()
        self.regressor = RandomForestRegressor()

    def fit(self, x_train, y_train, standardize=False):
        self.standardize = standardize
        if self.standardize:
            self.standardizescaler.fit(x_train)
            x_train = self.standardizescaler.transform(x_train)

        self.x_train = x_train
        self.y_train = y_train
        self.regressor.fit(self.x_train, self.y_train.ravel())
        self._inference()
        return self.rsquared

    def residual_plot(self, x_test=None, y_test=None):
        if self.standardize:
            x_test = self.standardizescaler.transform(x_test)
        try:
            self.residual_visualizer = ResidualsPlot(self.regressor)
        except yellowbrick.exceptions.YellowbrickTypeError:
            self.residual_visualizer = ResidualsPlot(self.regressor.regressor)

        y_train = self.y_train.ravel()
        self.residual_visualizer.fit(self.x_train, y_train)
        if x_test is not None and y_test is not None:
            y_test = y_test.ravel()
            self.residual_visualizer.score(x_test, y_test)
        self.residual_visualizer.poof()

    def predict(self, x_test):
        if self.standardize:
            x_test = self.standardizescaler.transform(x_test)
        return self.regressor.predict(x_test).reshape(-1, 1)
Esempio n. 29
0
#calculating mse

mse = np.mean((pred - y_test)**2)

mse  
## calculating score
ridgeReg.score(X_test,y_test) 


from yellowbrick.regressor import ResidualsPlot

# Instantiate the linear model and visualizer
ridge = Ridge()
visualizer = ResidualsPlot(ridge)

visualizer.fit(X_train, y_train)  # Fit the training data to the model
visualizer.score(X_test, y_test)  # Evaluate the model on the test data
visualizer.poof()        
       

##Apply different algos as on X_train,X_test,y_train,y_test

# Fitting K-NN to the Training set
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors = 5, metric = 'minkowski', p = 2)
classifier.fit(X_train, y_train)

# Predicting the Test set results
pred_y = classifier.predict(X_test)

# Making the Confusion Matrix
Esempio n. 30
0
#Predicting the price of a 150m2 living area
model.predict([[150]])

#Predicting the price of a 200m2 living area
model.predict([[200]])

from sklearn.linear_model import Ridge
from yellowbrick.regressor import ResidualsPlot

#Create a new model for the and plot the residuals for the regression
model = Ridge()
visualizer = ResidualsPlot(model)

#Fit the training data to the visualizer
visualizer.fit(LIVING_AREA, Selling_price)
visualizer.show()

#Creating a multiple variable regression
df1 = df[df['Land_size'].notna()]
df1 = df1[df1['Rooms'].notna()]
X = df1[['Living_area', 'Rooms', 'Land_size', 'Age']]
y = df1['Selling_price']
regr = LinearRegression()
regr.fit(X, y)

#The intercept and coefficients of the new model
print('Intercept: \n', regr.intercept_)
print('Coefficients: \n', regr.coef_)

#Plot the residuals for the model based on multiple variables
Esempio n. 31
0
# Intercecção entre x e y (inicio da linha de regressão)
print(modelo.intercept_)

# Coeficiente
print(modelo.coef_)

#%%
# Gera o grafico
# scatter - gera o grafico com os pontos
plt.scatter(X, Y)
# plot - com base nos pontos, gera a linha de melhor ajuste
plt.plot(X, modelo.predict(X), color='red')

# Obs - Rode os dois comandos acima simuntaneamente para montar o grafico
# de disperção com a linha de melhor ajuste

# Distancia de parada 22 pés(previsão de qual velocidade estava)
distancia = 22
modelo.intercept_ + modelo.coef_ * distancia
# ou
modelo.predict(np.array(distancia).reshape(-1, 1))

# Residuais - Distancia entre os pontos com base na linha de regressão
print(modelo._residues)
#%%
# Gera um novo grafico com base no modelo para melhor visualização dos residuais
visualizador = ResidualsPlot(modelo)
visualizador.fit(X, Y)
visualizador.poof()