def peplot():
    X, y = load_concrete()
    X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2)
    oz = PredictionError(Lasso(), ax=newfig())
    oz.fit(X_train, y_train)
    oz.score(X_test, y_test)
    savefig(oz, "prediction_error")
Exemple #2
0
def alpha_selection(ax=None):
    data = load_concrete(return_dataset=True)
    X, y = data.to_pandas()

    alphas = np.logspace(-10, 1, 400)
    viz = AlphaSelection(LassoCV(alphas=alphas), ax=ax)
    return tts_plot(viz, X, y)
def residuals():
    X, y = load_concrete()
    X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2)
    oz = ResidualsPlot(Ridge(), ax=newfig())
    oz.fit(X_train, y_train)
    oz.score(X_test, y_test)
    savefig(oz, "residuals")
def manifold(dataset, manifold):
    if dataset == "concrete":
        X, y = load_concrete()
    elif dataset == "occupancy":
        X, y = load_occupancy()
    else:
        raise ValueError("unknown dataset")

    oz = Manifold(manifold=manifold, ax=newfig())
    oz.fit_transform(X, y)
    savefig(oz, "{}_{}_manifold".format(dataset, manifold))
Exemple #5
0
def dataset_example(dataset="occupancy",
                    manifold="all",
                    path="images/",
                    quick=False,
                    **kwargs):
    if manifold == "all":
        if path is not None and not os.path.isdir(path):
            "please specify a directory to save examples to"

        for algorithm in MANIFOLD_ALGORITHMS:
            if algorithm in SKIP:
                continue

            print("generating {} {} manifold".format(dataset, algorithm))
            fpath = os.path.join(
                path, "{}_{}_manifold.png".format(dataset, algorithm))
            try:
                dataset_example(dataset, algorithm, fpath)
            except Exception as e:
                print("could not visualize {} manifold on {} data: {}".format(
                    algorithm, dataset, e))
                continue

        # Break here!
        return

    # Create single example

    _, ax = plt.subplots(figsize=(9, 6))

    # Checks for the dataset which user wants to use
    if dataset == "occupancy":
        X, y = load_occupancy()
    elif dataset == "concrete":
        X, y = load_concrete()
    else:
        raise Exception("unknown dataset '{}'".format(dataset))

    # Check if the quick method is called
    if quick:
        oz = manifold_embedding(X, y, manifold=manifold, show=False, **kwargs)
        oz.show(outpath=path)
    else:
        oz = Manifold(ax=ax, manifold=manifold, **kwargs)
        oz.fit_transform(X, y)
        oz.show(outpath=path)
    def test_integration_coef(self):
        """
        Integration test of visualizer with coef param
        """

        # Load the test dataset
        dataset = load_concrete(return_dataset=True)
        X, y = dataset.to_numpy()
        features = dataset.meta["features"]

        fig = plt.figure()
        ax = fig.add_subplot()

        reg = Lasso(random_state=42)
        features = list(map(lambda s: s.title(), features))
        viz = FeatureImportances(reg, ax=ax, labels=features, relative=False)
        viz.fit(X, y)
        viz.finalize()

        # Appveyor and Linux conda non-text-based differences
        self.assert_images_similar(viz, tol=16.2)
    def test_numpy_integration(self):
        """
        Test on concrete dataset with numpy arrays
        """
        data = load_concrete(return_dataset=True)
        X, y = data.to_numpy()

        assert isinstance(X, np.ndarray)
        assert isinstance(y, np.ndarray)

        _, ax = plt.subplots()
        viz = CooksDistance(ax=ax).fit(X, y)
        assert_fitted(viz)

        assert viz.distance_.sum() == pytest.approx(1.2911900571300652)
        assert viz.p_values_.sum() == pytest.approx(1029.9999525376425)
        assert viz.influence_threshold_ == pytest.approx(0.003883495145631068)
        assert viz.outlier_percentage_ == pytest.approx(7.3786407766990285)

        viz.finalize()
        self.assert_images_similar(viz)
    def test_with_fitted(self):
        """
        Test that visualizer properly handles an already-fitted model
        """
        X, y = load_concrete(return_dataset=True).to_numpy()

        model = Lasso().fit(X, y)

        with mock.patch.object(model, "fit") as mockfit:
            oz = FeatureImportances(model)
            oz.fit(X, y)
            mockfit.assert_not_called()

        with mock.patch.object(model, "fit") as mockfit:
            oz = FeatureImportances(model, is_fitted=True)
            oz.fit(X, y)
            mockfit.assert_not_called()

        with mock.patch.object(model, "fit") as mockfit:
            oz = FeatureImportances(model, is_fitted=False)
            oz.fit(X, y)
            mockfit.assert_called_once_with(X, y)
def dataset_example(dataset="occupancy",
                    manifold="all",
                    path="images/",
                    **kwargs):
    if manifold == "all":
        if path is not None and not os.path.isdir(path):
            "please specify a directory to save examples to"

        for algorithm in MANIFOLD_ALGORITHMS:
            if algorithm in SKIP:
                continue

            print("generating {} {} manifold".format(dataset, algorithm))
            fpath = os.path.join(
                path, "{}_{}_manifold.png".format(dataset, algorithm))
            try:
                dataset_example(dataset, algorithm, fpath)
            except Exception as e:
                print("could not visualize {} manifold on {} data: {}".format(
                    algorithm, dataset, e))
                continue

        # Break here!
        return

    # Create single example
    _, ax = plt.subplots(figsize=(9, 6))
    oz = Manifold(ax=ax, manifold=manifold, **kwargs)

    if dataset == "occupancy":
        X, y = load_occupancy()
    elif dataset == "concrete":
        X, y = load_concrete()
    else:
        raise Exception("unknown dataset '{}'".format(dataset))

    oz.fit(X, y)
    oz.poof(outpath=path)
Exemple #10
0
from yellowbrick.datasets import load_energy

# Load dataset
X, y = load_energy() # make our dataset read as x and y axis values somehow and replace this dataset with ours
# X = []                         # makes a list
# y = []                         # makes a list
# data = pd.read_csv(r'C:\Users\Jujin\Desktop\cs-4641-group-44\cleaned_encoded_COVID_Data_Copy.csv')
# for row in data:
#     X.append(row[1])     # selects data from the ith row
#     y.append(row[2])     # selects data from the ith row

# Use the quick method and immediately show the figure
alphas(LassoCV(random_state=0), X, y)

# Load a regression dataset
X, y = load_concrete()  # same as above

#X = pd.read_csv(r'C:\Users\Jujin\Desktop\cs-4641-group-44\training_data.csv')
#y = pd.read_csv(r'C:\Users\Jujin\Desktop\cs-4641-group-44\test_data.csv')

# X_train = []                         # makes a list
# y_train = []                         # makes a list
# d = pd.read_csv(r'C:\Users\Jujin\Desktop\cs-4641-group-44\training_data.csv')
# for row in d:
#     X_train.append(row[1])     # selects data from the ith row
#     y_train.append(row[2])     # selects data from the ith row
#
# X_test = []                         # makes a list
# y_test = []                         # makes a list
# da = pd.read_csv(r'C:\Users\Jujin\Desktop\cs-4641-group-44\test_data.csv')
# for row in da:
def binning():
    _, y = load_concrete()
    oz = BalancedBinningReference(ax=newfig())
    oz.fit(y)
    savefig(oz, "balanced_binning_reference")
def alphas():
    X, y = load_concrete()
    alphas = np.logspace(-10, 1, 400)
    oz = AlphaSelection(LassoCV(alphas=alphas), ax=newfig())
    oz.fit(X, y)
    savefig(oz, "alpha_selection")
def jointplot():
    X, y = load_concrete()
    oz = JointPlot(columns=["cement", "splast"], ax=newfig())
    oz.fit_transform(X, y)
    savefig(oz, "jointplot")
Exemple #14
0
fig.set_size_inches(7, 5)
fig.savefig("5._Autoregression_Model.png")

# Xenia: Showing all my plots as an output
plt.show()

# %%
# Residuals Plot (Trying new things)

# The residuals plot shows how the model is injecting error, the bold \
# horizontal line at residuals = 0 is no error, and any point above or below \
# that line, indicates the magnitude of error.
# (https://www.scikit-yb.org/en/latest/quickstart.html#installation)

# Load a regression dataset
X, y = load_concrete()

# Create training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

visualizer = ResidualsPlot(LinearRegression())
visualizer.fit(X_train, y_train)  # Fit the training data to the visualizer
visualizer.score(X_test, y_test)  # Evaluate the model on the test data
visualizer.show()  # Finalize and render the figure

# Xenia: Saving my plots
plt.show()
fig.set_size_inches(7, 5)
plt.savefig("6._Residuals_Plot.png")
fig.savefig("6._Residuals_Plot.png")
Exemple #15
0
def residuals_plot(ax=None):
    data = load_concrete(return_dataset=True)
    X, y = data.to_pandas()

    viz = ResidualsPlot(Ridge(), ax=ax)
    return tts_plot(viz, X, y)
Exemple #16
0
def prediction_error(ax=None):
    data = load_concrete(return_dataset=True)
    X, y = data.to_pandas()

    viz = PredictionError(Lasso(), ax=ax)
    return tts_plot(viz, X, y)