def peplot(): X, y = load_concrete() X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2) oz = PredictionError(Lasso(), ax=newfig()) oz.fit(X_train, y_train) oz.score(X_test, y_test) savefig(oz, "prediction_error")
def alpha_selection(ax=None): data = load_concrete(return_dataset=True) X, y = data.to_pandas() alphas = np.logspace(-10, 1, 400) viz = AlphaSelection(LassoCV(alphas=alphas), ax=ax) return tts_plot(viz, X, y)
def residuals(): X, y = load_concrete() X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2) oz = ResidualsPlot(Ridge(), ax=newfig()) oz.fit(X_train, y_train) oz.score(X_test, y_test) savefig(oz, "residuals")
def manifold(dataset, manifold): if dataset == "concrete": X, y = load_concrete() elif dataset == "occupancy": X, y = load_occupancy() else: raise ValueError("unknown dataset") oz = Manifold(manifold=manifold, ax=newfig()) oz.fit_transform(X, y) savefig(oz, "{}_{}_manifold".format(dataset, manifold))
def dataset_example(dataset="occupancy", manifold="all", path="images/", quick=False, **kwargs): if manifold == "all": if path is not None and not os.path.isdir(path): "please specify a directory to save examples to" for algorithm in MANIFOLD_ALGORITHMS: if algorithm in SKIP: continue print("generating {} {} manifold".format(dataset, algorithm)) fpath = os.path.join( path, "{}_{}_manifold.png".format(dataset, algorithm)) try: dataset_example(dataset, algorithm, fpath) except Exception as e: print("could not visualize {} manifold on {} data: {}".format( algorithm, dataset, e)) continue # Break here! return # Create single example _, ax = plt.subplots(figsize=(9, 6)) # Checks for the dataset which user wants to use if dataset == "occupancy": X, y = load_occupancy() elif dataset == "concrete": X, y = load_concrete() else: raise Exception("unknown dataset '{}'".format(dataset)) # Check if the quick method is called if quick: oz = manifold_embedding(X, y, manifold=manifold, show=False, **kwargs) oz.show(outpath=path) else: oz = Manifold(ax=ax, manifold=manifold, **kwargs) oz.fit_transform(X, y) oz.show(outpath=path)
def test_integration_coef(self): """ Integration test of visualizer with coef param """ # Load the test dataset dataset = load_concrete(return_dataset=True) X, y = dataset.to_numpy() features = dataset.meta["features"] fig = plt.figure() ax = fig.add_subplot() reg = Lasso(random_state=42) features = list(map(lambda s: s.title(), features)) viz = FeatureImportances(reg, ax=ax, labels=features, relative=False) viz.fit(X, y) viz.finalize() # Appveyor and Linux conda non-text-based differences self.assert_images_similar(viz, tol=16.2)
def test_numpy_integration(self): """ Test on concrete dataset with numpy arrays """ data = load_concrete(return_dataset=True) X, y = data.to_numpy() assert isinstance(X, np.ndarray) assert isinstance(y, np.ndarray) _, ax = plt.subplots() viz = CooksDistance(ax=ax).fit(X, y) assert_fitted(viz) assert viz.distance_.sum() == pytest.approx(1.2911900571300652) assert viz.p_values_.sum() == pytest.approx(1029.9999525376425) assert viz.influence_threshold_ == pytest.approx(0.003883495145631068) assert viz.outlier_percentage_ == pytest.approx(7.3786407766990285) viz.finalize() self.assert_images_similar(viz)
def test_with_fitted(self): """ Test that visualizer properly handles an already-fitted model """ X, y = load_concrete(return_dataset=True).to_numpy() model = Lasso().fit(X, y) with mock.patch.object(model, "fit") as mockfit: oz = FeatureImportances(model) oz.fit(X, y) mockfit.assert_not_called() with mock.patch.object(model, "fit") as mockfit: oz = FeatureImportances(model, is_fitted=True) oz.fit(X, y) mockfit.assert_not_called() with mock.patch.object(model, "fit") as mockfit: oz = FeatureImportances(model, is_fitted=False) oz.fit(X, y) mockfit.assert_called_once_with(X, y)
def dataset_example(dataset="occupancy", manifold="all", path="images/", **kwargs): if manifold == "all": if path is not None and not os.path.isdir(path): "please specify a directory to save examples to" for algorithm in MANIFOLD_ALGORITHMS: if algorithm in SKIP: continue print("generating {} {} manifold".format(dataset, algorithm)) fpath = os.path.join( path, "{}_{}_manifold.png".format(dataset, algorithm)) try: dataset_example(dataset, algorithm, fpath) except Exception as e: print("could not visualize {} manifold on {} data: {}".format( algorithm, dataset, e)) continue # Break here! return # Create single example _, ax = plt.subplots(figsize=(9, 6)) oz = Manifold(ax=ax, manifold=manifold, **kwargs) if dataset == "occupancy": X, y = load_occupancy() elif dataset == "concrete": X, y = load_concrete() else: raise Exception("unknown dataset '{}'".format(dataset)) oz.fit(X, y) oz.poof(outpath=path)
from yellowbrick.datasets import load_energy # Load dataset X, y = load_energy() # make our dataset read as x and y axis values somehow and replace this dataset with ours # X = [] # makes a list # y = [] # makes a list # data = pd.read_csv(r'C:\Users\Jujin\Desktop\cs-4641-group-44\cleaned_encoded_COVID_Data_Copy.csv') # for row in data: # X.append(row[1]) # selects data from the ith row # y.append(row[2]) # selects data from the ith row # Use the quick method and immediately show the figure alphas(LassoCV(random_state=0), X, y) # Load a regression dataset X, y = load_concrete() # same as above #X = pd.read_csv(r'C:\Users\Jujin\Desktop\cs-4641-group-44\training_data.csv') #y = pd.read_csv(r'C:\Users\Jujin\Desktop\cs-4641-group-44\test_data.csv') # X_train = [] # makes a list # y_train = [] # makes a list # d = pd.read_csv(r'C:\Users\Jujin\Desktop\cs-4641-group-44\training_data.csv') # for row in d: # X_train.append(row[1]) # selects data from the ith row # y_train.append(row[2]) # selects data from the ith row # # X_test = [] # makes a list # y_test = [] # makes a list # da = pd.read_csv(r'C:\Users\Jujin\Desktop\cs-4641-group-44\test_data.csv') # for row in da:
def binning(): _, y = load_concrete() oz = BalancedBinningReference(ax=newfig()) oz.fit(y) savefig(oz, "balanced_binning_reference")
def alphas(): X, y = load_concrete() alphas = np.logspace(-10, 1, 400) oz = AlphaSelection(LassoCV(alphas=alphas), ax=newfig()) oz.fit(X, y) savefig(oz, "alpha_selection")
def jointplot(): X, y = load_concrete() oz = JointPlot(columns=["cement", "splast"], ax=newfig()) oz.fit_transform(X, y) savefig(oz, "jointplot")
fig.set_size_inches(7, 5) fig.savefig("5._Autoregression_Model.png") # Xenia: Showing all my plots as an output plt.show() # %% # Residuals Plot (Trying new things) # The residuals plot shows how the model is injecting error, the bold \ # horizontal line at residuals = 0 is no error, and any point above or below \ # that line, indicates the magnitude of error. # (https://www.scikit-yb.org/en/latest/quickstart.html#installation) # Load a regression dataset X, y = load_concrete() # Create training and test sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1) visualizer = ResidualsPlot(LinearRegression()) visualizer.fit(X_train, y_train) # Fit the training data to the visualizer visualizer.score(X_test, y_test) # Evaluate the model on the test data visualizer.show() # Finalize and render the figure # Xenia: Saving my plots plt.show() fig.set_size_inches(7, 5) plt.savefig("6._Residuals_Plot.png") fig.savefig("6._Residuals_Plot.png")
def residuals_plot(ax=None): data = load_concrete(return_dataset=True) X, y = data.to_pandas() viz = ResidualsPlot(Ridge(), ax=ax) return tts_plot(viz, X, y)
def prediction_error(ax=None): data = load_concrete(return_dataset=True) X, y = data.to_pandas() viz = PredictionError(Lasso(), ax=ax) return tts_plot(viz, X, y)