def load_iris_dataset(self): # Loading the Iris dataset from scikit-learn. # The classes are already converted to integer labels where 0=Iris-Setosa, 1=Iris-Versicolor, 2=Iris-Virginica. iris = datasets.load_iris() x = iris.data[:, [2, 3]] y = iris.target print('Class labels:', np.unique(y)) # plotter data and save it to file Plotter.plot_iris_data_set(x, FilesystemUtils.get_test_resources_plot_file_name( 'ScikitLearn-Iris-Training-Set.png')) # Splitting data into 70% training and 30% test data x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=1, stratify=y) print('Labels counts in y:', np.bincount(y)) print('Labels counts in y_train:', np.bincount(y_train)) print('Labels counts in y_test:', np.bincount(y_test)) # Standardize features sc = StandardScaler() sc.fit(x_train) x_train_std = sc.transform(x_train) sc.fit(x_test) x_test_std = sc.transform(x_test) self.x_train = x_train_std self.y_train = y_train self.x_test = x_test_std self.y_test = y_test
def setUp(self): # load subset of Iris data iris_data_reader = IrisDataReader( FilesystemUtils.get_resources_data_file_name('iris/iris.data')) self.x, self.y = iris_data_reader.get_data() # plotter data and save it to file Plotter.plot_iris_data_set( self.x, FilesystemUtils.get_test_resources_plot_file_name( 'adaline/Adaline-Training-Set.png'))
def test_adaline_with_stochastic_update(self): # standardize features x_std: np.matrix = np.copy(self.x) x_std[:, 0] = (self.x[:, 0] - self.x[:, 0].mean()) / self.x[:, 0].std() x_std[:, 1] = (self.x[:, 1] - self.x[:, 1].mean()) / self.x[:, 1].std() # plotter data and save it to file Plotter.plot_iris_data_set( x_std, FilesystemUtils.get_test_resources_plot_file_name( 'adaline/AdalineSGD-Standardized-Training-Set.png')) # train adaline on standardized features with a small number of epochs adaline = AdalineSGD(learning_rate=0.01, num_epochs=15) adaline.fit(x_std, self.y) # plot learning curve curve = { 'cost_length': len(adaline.cost), 'cost': adaline.cost, 'marker': 'o', 'x_label': 'Epochs', 'y_label': 'log(Sum-squared-error)', 'title': 'Adaline - Learning rate 0.01' } Plotter.plot_learning_curve( curve, FilesystemUtils.get_test_resources_plot_file_name( 'adaline/AdalineSGD-Learning-Curve-Standardized-Features.png')) # plot decision boundary Plotter.plot_decision_boundary( x_std, self.y, classifier=adaline, diagram_options={ 'x_label': 'sepal length [cm]', 'y_label': 'petal length [cm]', 'legend': 'upper left' }, image_file_path=FilesystemUtils.get_test_resources_plot_file_name( 'adaline/AdalineSGD-Decision-Boundary-Standardized-Features.png' )) adaline.partial_fit(x_std[0, :], self.y[0])
def setUp(self): # load subset of Iris data iris = datasets.load_iris() x_train = iris.data[:, [2, 3]] y_train = iris.target # consider only 0 and 1 labels x_train_01_subset = x_train[(y_train == 0) | (y_train == 1)] y_train_01_subset = y_train[(y_train == 0) | (y_train == 1)] # Standardize features sc = StandardScaler() sc.fit(x_train_01_subset) self.x = sc.transform(x_train_01_subset) self.y = y_train_01_subset print('Class labels:', np.unique(self.y)) # plotter data and save it to file Plotter.plot_iris_data_set( self.x, FilesystemUtils.get_test_resources_plot_file_name( 'logistic_regression/LogisticRegressionBGD-Training-Set.png'))