def test_rank1d_shapiro(self): """ Test Rank1D using shapiro metric """ X, _ = load_energy(return_dataset=True).to_numpy() oz = Rank1D(algorithm="shapiro") npt.assert_array_equal(oz.fit_transform(X), X) # Check Ranking expected = np.array( [ 0.93340671, 0.94967198, 0.92689574, 0.7459445, 0.63657606, 0.85603625, 0.84349269, 0.91551381, ] ) assert hasattr(oz, "ranks_") assert oz.ranks_.shape == (X.shape[1],) npt.assert_array_almost_equal(oz.ranks_, expected) # Image similarity comparison oz.finalize() self.assert_images_similar(oz)
def test_kendalltau(self): """ Test results returned match expectations """ X, _ = load_energy(return_dataset=True).to_numpy() expected = np.array([ [1.0, -1.0, -0.2724275, -0.7361443, 0.7385489, 0.0, 0.0, 0.0], [-1.0, 1.0, 0.2724275, 0.7361443, -0.7385489, 0.0, 0.0, 0.0], [ -0.2724275, 0.2724275, 1.0, -0.15192004, 0.19528337, 0.0, 0.0, 0.0 ], [ -0.73614431, 0.73614431, -0.15192004, 1.0, -0.87518995, 0.0, 0.0, 0.0 ], [ 0.73854895, -0.73854895, 0.19528337, -0.87518995, 1.0, 0.0, 0.0, 0.0 ], [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.15430335], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.15430335, 1.0], ]) actual = kendalltau(X) npt.assert_almost_equal(expected, actual)
def test_rank2d_kendalltau(self): """ Test Rank2D using kendalltau metric """ X, _ = load_energy(return_dataset=True).to_numpy() oz = Rank2D(algorithm="kendalltau") npt.assert_array_equal(oz.fit_transform(X), X) # Check Ranking expected = np.array( [ [1.0, -1.0, -0.2724275, -0.73614431, 0.73854895, 0.0, 0.0, 0.0], [-1.0, 1.0, 0.2724275, 0.73614431, -0.73854895, 0.0, 0.0, 0.0], [-0.2724275, 0.2724275, 1.0, -0.15192004, 0.19528337, 0.0, 0.0, 0.0], [-0.73614431, 0.73614431, -0.15192004, 1.0, -0.87518995, 0.0, 0.0, 0.0], [0.73854895, -0.73854895, 0.19528337, -0.87518995, 1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.15430335], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.15430335, 1.0], ] ) assert hasattr(oz, "ranks_") assert oz.ranks_.shape == (X.shape[1], X.shape[1]) npt.assert_array_almost_equal(oz.ranks_, expected) # Image similarity comparision oz.finalize() self.assert_images_similar(oz, tol=0.1)
def test_rank2d_spearman(self): """ Test Rank2D using spearman metric """ X, _ = load_energy(return_dataset=True).to_numpy() oz = Rank2D(algorithm="spearman") npt.assert_array_equal(oz.fit_transform(X), X) # Check Ranking expected = np.array( [ [1.0, -1.0, -0.25580533, -0.8708862, 0.86904819, 0.0, 0.0, 0.0], [-1.0, 1.0, 0.25580533, 0.8708862, -0.86904819, 0.0, 0.0, 0.0], [-0.25580533, 0.25580533, 1.0, -0.19345677, 0.22076336, 0.0, 0.0, 0.0], [-0.8708862, 0.8708862, -0.19345677, 1.0, -0.93704257, 0.0, 0.0, 0.0], [0.86904819, -0.86904819, 0.22076336, -0.93704257, 1.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.18759162], [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.18759162, 1.0], ] ) assert hasattr(oz, "ranks_") assert oz.ranks_.shape == (X.shape[1], X.shape[1]) npt.assert_array_almost_equal(oz.ranks_, expected) # Image similarity comparision oz.finalize() self.assert_images_similar(oz, tol=0.1)
def test_rank2d_unknown_algorithm(self): """ Test that an error is raised for Rank2D with an unknown algorithm """ X, _ = load_energy() msg = "'oscar' is unrecognized ranking method" with pytest.raises(YellowbrickValueError, match=msg): Rank2D(algorithm="Oscar").transform(X)
def test_rankdbase_unknown_algorithm(self): """ Assert that unknown algorithms raise an exception """ X, _ = load_energy(return_dataset=True).to_numpy() with pytest.raises(YellowbrickValueError, match=".* is unrecognized ranking method") as e: oz = RankDBase(algorithm="unknown") oz.fit_transform(X) assert str(e.value) == "'unknown' is unrecognized ranking method"
def test_kendalltau_shape(self): """ Assert that a square correlation matrix is returned """ X, _ = load_energy(return_dataset=True).to_numpy() corr = kendalltau(X) assert corr.shape[0] == corr.shape[1] for (i, j), val in np.ndenumerate(corr): assert corr[j][i] == pytest.approx(val)
def test_rank1d_horizontal(self): """ Test Rank1D using horizontal orientation """ X, _ = load_energy(return_dataset=True).to_numpy() oz = Rank1D(orient="h") npt.assert_array_equal(oz.fit_transform(X), X) # Image similarity comparison oz.finalize() self.assert_images_similar(oz)
def validation(): X, y = load_energy() oz = ValidationCurve( DecisionTreeRegressor(), param_name="max_depth", param_range=np.arange(1, 11), cv=10, scoring="r2", ax=newfig(), ) oz.fit(X, y) savefig(oz, "validation_curve")
def test_quick_method(self): """ Test the quick method producing a valid visualization """ X, y = load_energy(return_dataset=True).to_numpy() visualizer = alphas(LassoCV(random_state=0), X, y, is_fitted=False, show=False) assert isinstance(visualizer, AlphaSelection) self.assert_images_similar(visualizer)
def test_quick_method_manual(self): """ Test the manual alphas quick method producing a valid visualization """ X, y = load_energy(return_dataset=True).to_numpy() visualizer = manual_alphas(ElasticNet(random_state=0), X, y, cv=3, is_fitted=False, show=False) assert isinstance(visualizer, ManualAlphaSelection) # Python 3.6 Travis images not similar (RMS 0.024) self.assert_images_similar(visualizer, tol=0.5)
def test_residuals_plot_numpy(self): """ Test NumPy real world dataset with image similarity on Lasso """ _, ax = plt.subplots() # Load the occupancy dataset from fixtures data = load_energy(return_dataset=True) X, y = data.to_numpy() # Create train/test splits splits = tts(X, y, test_size=0.2, random_state=231) X_train, X_test, y_train, y_test = splits visualizer = ResidualsPlot(Lasso(random_state=44), ax=ax) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.finalize() self.assert_images_similar(visualizer, tol=1.5)
def test_prediction_error_numpy(self): """ Test NumPy real world dataset with image similarity on Ridge """ _, ax = plt.subplots() # Load the occupancy dataset from fixtures data = load_energy(return_dataset=True) X, y = data.to_numpy() # Create train/test splits splits = tts(X, y, test_size=0.2, random_state=8873) X_train, X_test, y_train, y_test = splits visualizer = PredictionError(Ridge(random_state=22), ax=ax) visualizer.fit(X_train, y_train) visualizer.score(X_test, y_test) visualizer.finalize() self.assert_images_similar(visualizer, tol=1, remove_legend=True)
def test_residuals_with_fitted(self): """ Test that ResidualsPlot properly handles an already-fitted model """ X, y = load_energy(return_dataset=True).to_numpy() model = Ridge().fit(X, y) with mock.patch.object(model, "fit") as mockfit: oz = ResidualsPlot(model) oz.fit(X, y) mockfit.assert_not_called() with mock.patch.object(model, "fit") as mockfit: oz = ResidualsPlot(model, is_fitted=True) oz.fit(X, y) mockfit.assert_not_called() with mock.patch.object(model, "fit") as mockfit: oz = ResidualsPlot(model, is_fitted=False) oz.fit(X, y) mockfit.assert_called_once_with(X, y)
from sklearn.linear_model import Lasso from sklearn.model_selection import train_test_split import pandas as pd from yellowbrick.datasets import load_concrete from yellowbrick.regressor import prediction_error from sklearn.linear_model import LassoCV from yellowbrick.regressor.alphas import alphas from yellowbrick.datasets import load_energy # Load dataset X, y = load_energy() # make our dataset read as x and y axis values somehow and replace this dataset with ours # X = [] # makes a list # y = [] # makes a list # data = pd.read_csv(r'C:\Users\Jujin\Desktop\cs-4641-group-44\cleaned_encoded_COVID_Data_Copy.csv') # for row in data: # X.append(row[1]) # selects data from the ith row # y.append(row[2]) # selects data from the ith row # Use the quick method and immediately show the figure alphas(LassoCV(random_state=0), X, y) # Load a regression dataset X, y = load_concrete() # same as above #X = pd.read_csv(r'C:\Users\Jujin\Desktop\cs-4641-group-44\training_data.csv') #y = pd.read_csv(r'C:\Users\Jujin\Desktop\cs-4641-group-44\test_data.csv') # X_train = [] # makes a list
def cvscores(): X, y = load_energy() oz = CVScores(Ridge(), scoring="r2", cv=10, ax=newfig()) oz.fit(X, y) savefig(oz, "cv_scores")
def learning(): X, y = load_energy() sizes = np.linspace(0.3, 1.0, 10) oz = LearningCurve(RidgeCV(), train_sizes=sizes, scoring="r2", ax=newfig()) oz.fit(X, y) savefig(oz, "learning_curve")
def test_rank2d_pearson(self): """ Test Rank2D using pearson metric """ X, _ = load_energy(return_dataset=True).to_numpy() oz = Rank2D(algorithm="pearson") npt.assert_array_equal(oz.fit_transform(X), X) # Check Ranking expected = np.array( [ [ 1.00000000e00, -9.91901462e-01, -2.03781680e-01, -8.68823408e-01, 8.27747317e-01, 0.00000000e00, 1.11706815e-16, -1.12935670e-16, ], [ -9.91901462e-01, 1.00000000e00, 1.95501633e-01, 8.80719517e-01, -8.58147673e-01, 0.00000000e00, -2.26567708e-16, -3.55861251e-16, ], [ -2.03781680e-01, 1.95501633e-01, 1.00000000e00, -2.92316466e-01, 2.80975743e-01, 0.00000000e00, 7.87010445e-18, 0.00000000e00, ], [ -8.68823408e-01, 8.80719517e-01, -2.92316466e-01, 1.00000000e00, -9.72512237e-01, 0.00000000e00, -3.27553310e-16, 2.20057668e-16, ], [ 8.27747317e-01, -8.58147673e-01, 2.80975743e-01, -9.72512237e-01, 1.00000000e00, 0.00000000e00, -1.24094525e-18, 0.00000000e00, ], [ 0.00000000e00, 0.00000000e00, 0.00000000e00, 0.00000000e00, 0.00000000e00, 1.00000000e00, -2.42798319e-19, 0.00000000e00, ], [ 1.11706815e-16, -2.26567708e-16, 7.87010445e-18, -3.27553310e-16, -1.24094525e-18, -2.42798319e-19, 1.00000000e00, 2.12964221e-01, ], [ -1.12935670e-16, -3.55861251e-16, 0.00000000e00, 2.20057668e-16, 0.00000000e00, 0.00000000e00, 2.12964221e-01, 1.00000000e00, ], ] ) assert hasattr(oz, "ranks_") assert oz.ranks_.shape == (X.shape[1], X.shape[1]) npt.assert_array_almost_equal(oz.ranks_, expected) # Image similarity comparision oz.finalize() # Travis Python 3.6 images not close (RMS 0.112) self.assert_images_similar(oz, tol=0.5)
def test_rank2d_covariance(self): """ Test Rank2D using covariance metric """ X, _ = load_energy(return_dataset=True).to_numpy() oz = Rank2D(algorithm="covariance") npt.assert_array_equal(oz.fit_transform(X), X) # Check Ranking expected = np.array( [ [ 1.11888744e-02, -9.24206867e00, -9.40391134e-01, -4.15083877e00, 1.53324641e-01, 0.00000000e00, 1.57414282e-18, -1.85278419e-17, ], [ -9.24206867e00, 7.75916384e03, 7.51290743e02, 3.50393655e03, -1.32370274e02, 0.00000000e00, -2.65874531e-15, -4.86170571e-14, ], [ -9.40391134e-01, 7.51290743e02, 1.90326988e03, -5.75989570e02, 2.14654498e01, 0.00000000e00, 4.57406096e-17, 0.00000000e00, ], [ -4.15083877e00, 3.50393655e03, -5.75989570e02, 2.03996306e03, -7.69178618e01, 0.00000000e00, -1.97089918e-15, 1.54151644e-14, ], [ 1.53324641e-01, -1.32370274e02, 2.14654498e01, -7.69178618e01, 3.06649283e00, 0.00000000e00, -2.89497529e-19, 0.00000000e00, ], [ 0.00000000e00, 0.00000000e00, 0.00000000e00, 0.00000000e00, 0.00000000e00, 1.25162973e00, -3.61871912e-20, 0.00000000e00, ], [ 1.57414282e-18, -2.65874531e-15, 4.57406096e-17, -1.97089918e-15, -2.89497529e-19, -3.61871912e-20, 1.77477184e-02, 4.40026076e-02, ], [ -1.85278419e-17, -4.86170571e-14, 0.00000000e00, 1.54151644e-14, 0.00000000e00, 0.00000000e00, 4.40026076e-02, 2.40547588e00, ], ] ) assert hasattr(oz, "ranks_") assert oz.ranks_.shape == (X.shape[1], X.shape[1]) npt.assert_array_almost_equal(oz.ranks_, expected, decimal=5) # Image similarity comparision oz.finalize() self.assert_images_similar(oz, tol=0.1)