コード例 #1
0
    def load_iris_dataset(self):
        # Loading the Iris dataset from scikit-learn.
        # The classes are already converted to integer labels where 0=Iris-Setosa, 1=Iris-Versicolor, 2=Iris-Virginica.
        iris = datasets.load_iris()
        x = iris.data[:, [2, 3]]
        y = iris.target
        print('Class labels:', np.unique(y))

        # plotter data and save it to file
        Plotter.plot_iris_data_set(x, FilesystemUtils.get_test_resources_plot_file_name(
            'ScikitLearn-Iris-Training-Set.png'))

        # Splitting data into 70% training and 30% test data
        x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=1, stratify=y)
        print('Labels counts in y:', np.bincount(y))
        print('Labels counts in y_train:', np.bincount(y_train))
        print('Labels counts in y_test:', np.bincount(y_test))

        # Standardize features
        sc = StandardScaler()
        sc.fit(x_train)
        x_train_std = sc.transform(x_train)
        sc.fit(x_test)
        x_test_std = sc.transform(x_test)

        self.x_train = x_train_std
        self.y_train = y_train

        self.x_test = x_test_std
        self.y_test = y_test
コード例 #2
0
    def setUp(self):
        # load subset of Iris data
        iris_data_reader = IrisDataReader(
            FilesystemUtils.get_resources_data_file_name('iris/iris.data'))
        self.x, self.y = iris_data_reader.get_data()

        # plotter data and save it to file
        Plotter.plot_iris_data_set(
            self.x,
            FilesystemUtils.get_test_resources_plot_file_name(
                'adaline/Adaline-Training-Set.png'))
コード例 #3
0
    def test_adaline_with_stochastic_update(self):
        # standardize features
        x_std: np.matrix = np.copy(self.x)
        x_std[:, 0] = (self.x[:, 0] - self.x[:, 0].mean()) / self.x[:, 0].std()
        x_std[:, 1] = (self.x[:, 1] - self.x[:, 1].mean()) / self.x[:, 1].std()

        # plotter data and save it to file
        Plotter.plot_iris_data_set(
            x_std,
            FilesystemUtils.get_test_resources_plot_file_name(
                'adaline/AdalineSGD-Standardized-Training-Set.png'))

        # train adaline on standardized features with a small number of epochs
        adaline = AdalineSGD(learning_rate=0.01, num_epochs=15)
        adaline.fit(x_std, self.y)

        # plot learning curve
        curve = {
            'cost_length': len(adaline.cost),
            'cost': adaline.cost,
            'marker': 'o',
            'x_label': 'Epochs',
            'y_label': 'log(Sum-squared-error)',
            'title': 'Adaline - Learning rate 0.01'
        }
        Plotter.plot_learning_curve(
            curve,
            FilesystemUtils.get_test_resources_plot_file_name(
                'adaline/AdalineSGD-Learning-Curve-Standardized-Features.png'))

        # plot decision boundary
        Plotter.plot_decision_boundary(
            x_std,
            self.y,
            classifier=adaline,
            diagram_options={
                'x_label': 'sepal length [cm]',
                'y_label': 'petal length [cm]',
                'legend': 'upper left'
            },
            image_file_path=FilesystemUtils.get_test_resources_plot_file_name(
                'adaline/AdalineSGD-Decision-Boundary-Standardized-Features.png'
            ))

        adaline.partial_fit(x_std[0, :], self.y[0])
コード例 #4
0
    def setUp(self):
        # load subset of Iris data
        iris = datasets.load_iris()
        x_train = iris.data[:, [2, 3]]
        y_train = iris.target

        # consider only 0 and 1 labels
        x_train_01_subset = x_train[(y_train == 0) | (y_train == 1)]
        y_train_01_subset = y_train[(y_train == 0) | (y_train == 1)]

        # Standardize features
        sc = StandardScaler()
        sc.fit(x_train_01_subset)
        self.x = sc.transform(x_train_01_subset)

        self.y = y_train_01_subset
        print('Class labels:', np.unique(self.y))

        # plotter data and save it to file
        Plotter.plot_iris_data_set(
            self.x,
            FilesystemUtils.get_test_resources_plot_file_name(
                'logistic_regression/LogisticRegressionBGD-Training-Set.png'))