Exemple #1
0
def test_sgd_optimizer_trigger_stopping():
    params = [np.zeros(shape) for shape in shapes]
    lr = 2e-6
    optimizer = SGDOptimizer(params, lr, lr_schedule='adaptive')
    assert not optimizer.trigger_stopping('', False)
    assert lr / 5 == optimizer.learning_rate
    assert optimizer.trigger_stopping('', False)
def test_sgd_optimizer_trigger_stopping():
    params = [np.zeros(shape) for shape in shapes]
    lr = 2e-6
    optimizer = SGDOptimizer(params, lr, lr_schedule='adaptive')
    assert_false(optimizer.trigger_stopping('', False))
    assert_equal(lr / 5, optimizer.learning_rate)
    assert_true(optimizer.trigger_stopping('', False))
def test_sgd_optimizer_trigger_stopping():
    params = [np.zeros(shape) for shape in shapes]
    lr = 2e-6
    optimizer = SGDOptimizer(params, lr, lr_schedule='adaptive')
    assert_false(optimizer.trigger_stopping('', False))
    assert_equal(lr / 5, optimizer.learning_rate)
    assert_true(optimizer.trigger_stopping('', False))
def test_neuralNetwork_sgd():
    from sklearn.neural_network._stochastic_optimizers import SGDOptimizer

    np.random.seed(2019)
    X = np.random.normal(size=(1, 500))
    target = 3.9285985 * X

    nn = NeuralNetwork(inputs=1,
                       neurons=3,
                       outputs=1,
                       activations='sigmoid',
                       silent=True)
    nn.addLayer()
    nn.addLayer()
    nn.addOutputLayer(activations='identity')
    learning_rate = 0.001

    yhat = nn.forward_pass(X)
    nn.backpropagation(yhat.T, target.T)
    nn.learning_rate = learning_rate
    initial_params = copy.deepcopy(nn.weights + nn.biases)
    nn.sgd()
    grad = nn.d_weights + nn.d_biases
    params = nn.weights + nn.biases
    change = [p - i_p for p, i_p in zip(params, initial_params)]

    skl_sgd = SGDOptimizer(params=initial_params,
                           learning_rate_init=learning_rate,
                           nesterov=False,
                           momentum=1.0)
    upd = skl_sgd._get_updates(grad)

    for update_nn, update_skl in zip(change, upd):
        assert update_nn == pytest.approx(update_skl)
Exemple #5
0
def test_sgd_optimizer_no_momentum():
    params = [np.zeros(shape) for shape in shapes]

    for lr in [10 ** i for i in range(-3, 4)]:
        optimizer = SGDOptimizer(params, lr, momentum=0, nesterov=False)
        grads = [np.random.random(shape) for shape in shapes]
        expected = [param - lr * grad for param, grad in zip(params, grads)]
        optimizer.update_params(grads)

        for exp, param in zip(expected, optimizer.params):
            assert_array_equal(exp, param)
def test_sgd_optimizer_no_momentum():
    params = [np.zeros(shape) for shape in shapes]

    for lr in [10 ** i for i in range(-3, 4)]:
        optimizer = SGDOptimizer(params, lr, momentum=0, nesterov=False)
        grads = [np.random.random(shape) for shape in shapes]
        expected = [param - lr * grad for param, grad in zip(params, grads)]
        optimizer.update_params(grads)

        for exp, param in zip(expected, optimizer.params):
            assert_array_equal(exp, param)
Exemple #7
0
def test_sgd_optimizer_momentum():
    params = [np.zeros(shape) for shape in shapes]
    lr = 0.1

    for momentum in np.arange(0.5, 0.9, 0.1):
        optimizer = SGDOptimizer(params, lr, momentum=momentum, nesterov=False)
        velocities = [np.random.random(shape) for shape in shapes]
        optimizer.velocities = velocities
        grads = [np.random.random(shape) for shape in shapes]
        updates = [momentum * velocity - lr * grad
                   for velocity, grad in zip(velocities, grads)]
        expected = [param + update for param, update in zip(params, updates)]
        optimizer.update_params(grads)

        for exp, param in zip(expected, optimizer.params):
            assert_array_equal(exp, param)
def test_sgd_optimizer_momentum():
    params = [np.zeros(shape) for shape in shapes]
    lr = 0.1

    for momentum in np.arange(0.5, 0.9, 0.1):
        optimizer = SGDOptimizer(params, lr, momentum=momentum, nesterov=False)
        velocities = [np.random.random(shape) for shape in shapes]
        optimizer.velocities = velocities
        grads = [np.random.random(shape) for shape in shapes]
        updates = [momentum * velocity - lr * grad
                   for velocity, grad in zip(velocities, grads)]
        expected = [param + update for param, update in zip(params, updates)]
        optimizer.update_params(grads)

        for exp, param in zip(expected, optimizer.params):
            assert_array_equal(exp, param)
Exemple #9
0
def test_sgd_optimizer_nesterovs_momentum():
    params = [np.zeros(shape) for shape in shapes]
    lr = 0.1
    rng = np.random.RandomState(0)

    for momentum in np.arange(0.5, 0.9, 0.1):
        optimizer = SGDOptimizer(params, lr, momentum=momentum, nesterov=True)
        velocities = [rng.random_sample(shape) for shape in shapes]
        optimizer.velocities = velocities
        grads = [rng.random_sample(shape) for shape in shapes]
        updates = [
            momentum * velocity - lr * grad
            for velocity, grad in zip(velocities, grads)
        ]
        updates = [
            momentum * update - lr * grad
            for update, grad in zip(updates, grads)
        ]
        expected = [param + update for param, update in zip(params, updates)]
        optimizer.update_params(params, grads)

        for exp, param in zip(expected, params):
            assert_array_equal(exp, param)
Exemple #10
0
    def _fit_stochastic(self, X, y, X_val, y_val, activations, deltas,
                        coef_grads, intercept_grads, layer_units, incremental):

        if not incremental or not hasattr(self, '_optimizer'):
            params = self.coefs_ + self.intercepts_

            if self.solver == 'sgd':
                self._optimizer = SGDOptimizer(params, self.learning_rate_init,
                                               self.learning_rate,
                                               self.momentum,
                                               self.nesterovs_momentum,
                                               self.power_t)
            elif self.solver == 'adam':
                self._optimizer = AdamOptimizer(params,
                                                self.learning_rate_init,
                                                self.beta_1, self.beta_2,
                                                self.epsilon)

        # early_stopping in partial_fit doesn't make sense
        early_stopping = self.early_stopping and not incremental

        n_samples = X.shape[0]

        if self.batch_size == 'auto':
            batch_size = min(200, n_samples)
        else:
            batch_size = np.clip(self.batch_size, 1, n_samples)

        try:
            for it in range(self.max_iter):
                X, y = shuffle(X, y, random_state=self._random_state)
                accumulated_loss = 0.0
                for batch_slice in gen_batches(n_samples, batch_size):
                    activations[0] = X[batch_slice]
                    batch_loss, coef_grads, intercept_grads = self._backprop(
                        X[batch_slice], y[batch_slice], activations, deltas,
                        coef_grads, intercept_grads)
                    accumulated_loss += batch_loss * (batch_slice.stop -
                                                      batch_slice.start)

                    # update weights
                    grads = coef_grads + intercept_grads
                    self._optimizer.update_params(grads)

                # Get val path
                activations[0] = X_val
                activations = self._forward_pass(activations)
                loss_val = LOSS_FUNCTIONS[self.loss](y_val, activations[-1])
                self.lpath['val'].append(loss_val)

                self.n_iter_ += 1
                self.loss_ = accumulated_loss / X.shape[0]

                self.t_ += n_samples
                self.loss_curve_.append(self.loss_)
                self.lpath['train'].append(self.loss_)
                if self.verbose:
                    print("Iteration %d, loss = %.8f" %
                          (self.n_iter_, self.loss_))

                # update no_improvement_count based on training loss or
                # validation score according to early_stopping
                self._update_no_improvement_count(early_stopping, X_val, y_val)

                # for learning rate that needs to be updated at iteration end
                self._optimizer.iteration_ends(self.t_)

                if self._no_improvement_count > self.n_iter_no_change:
                    # not better than last `n_iter_no_change` iterations by tol
                    # stop or decrease learning rate
                    if early_stopping:
                        msg = ("Validation score did not improve more than "
                               "tol=%f for %d consecutive epochs." %
                               (self.tol, self.n_iter_no_change))
                    else:
                        msg = ("Training loss did not improve more than tol=%f"
                               " for %d consecutive epochs." %
                               (self.tol, self.n_iter_no_change))

                    is_stopping = self._optimizer.trigger_stopping(
                        msg, self.verbose)
                    if is_stopping:
                        break
                    else:
                        self._no_improvement_count = 0

                if incremental:
                    break

                if self.n_iter_ == self.max_iter:
                    warnings.warn(
                        "Stochastic Optimizer: Maximum iterations (%d) "
                        "reached and the optimization hasn't converged yet." %
                        self.max_iter, ConvergenceWarning)
        except KeyboardInterrupt:
            warnings.warn("Training interrupted by user.")

        if early_stopping:
            # restore best weights
            self.coefs_ = self._best_coefs
            self.intercepts_ = self._best_intercepts
Exemple #11
0
# In[90]:

from sklearn.model_selection import train_test_split
X5_train, X5_test, y5_train, y5_test = train_test_split(X5, y5, test_size=0.20)
print("Sample in traiing set:::", X5_train.shape)
print("Sample in testing set:::", X5_test.shape)
print("Sample of target in training set::", y5_train.shape)
print("Sample of target in testing set::", y5_test.shape)

# In[122]:

from sklearn.neural_network._stochastic_optimizers import SGDOptimizer

# In[177]:

gd = SGDOptimizer(params='0.1', learning_rate_init=0.1)

# In[184]:

gd.fit(X5_train, y5_train)

# In[193]:

from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(y5_test, pred5.round()))
print(classification_report(y5_test, pred5.round()))

# In[194]:

from sklearn.metrics import accuracy_score
accuracy5 = accuracy_score(y5_test, pred5.round())
    def _fit_stochastic(self, X, y, activations, deltas, coef_grads,
                        intercept_grads, layer_units, incremental):
        
        if not incremental or not hasattr(self, '_optimizer'):
            params = self.coefs_ + self.intercepts_

            if self.solver == 'sgd':
                self._optimizer = SGDOptimizer(
                    params, self.learning_rate_init, self.learning_rate,
                    self.momentum, self.nesterovs_momentum, self.power_t)
            elif self.solver == 'adam':
                self._optimizer = AdamOptimizer(
                    params, self.learning_rate_init, self.beta_1, self.beta_2,
                    self.epsilon)

        # early_stopping in partial_fit doesn't make sense
        early_stopping = self.early_stopping and not incremental
        if early_stopping:
            X, X_val, y, y_val = train_test_split(
                X, y, random_state=self._random_state,
                test_size=self.validation_fraction)
            if isinstance(self, ClassifierMixin):
                y_val = self._label_binarizer.inverse_transform(y_val)
        else:
            X_val = None
            y_val = None

        n_samples = X.shape[0]

        if self.batch_size == 'auto':
            batch_size = min(200, n_samples)
        else:
            batch_size = np.clip(self.batch_size, 1, n_samples)
        
        '''for explanation start'''
        activations_over_all_itr = []
        '''for explanation end'''
        try:
            for it in range(self.max_iter):
                X, y = shuffle(X, y, random_state=self._random_state)
                accumulated_loss = 0.0
                for batch_slice in gen_batches(n_samples, batch_size):
                    activations[0] = X[batch_slice]
                    batch_loss, coef_grads, intercept_grads = self._backprop(
                        X[batch_slice], y[batch_slice], activations, deltas,
                        coef_grads, intercept_grads)
                    accumulated_loss += batch_loss * (batch_slice.stop - 
                                                      batch_slice.start)

                    # update weights
                    grads = coef_grads + intercept_grads
                    self._optimizer.update_params(grads)
                '''for explanation start'''
                activations_over_all_itr.append(activations)
                '''for explanation end'''
                self.n_iter_ += 1
                self.loss_ = accumulated_loss / X.shape[0]

                self.t_ += n_samples
                self.loss_curve_.append(self.loss_)
                if self.verbose:
                    print("Iteration %d, loss = %.8f" % (self.n_iter_,
                                                         self.loss_))

                # update no_improvement_count based on training loss or
                # validation score according to early_stopping
                self._update_no_improvement_count(early_stopping, X_val, y_val)

                # for learning rate that needs to be updated at iteration end
                self._optimizer.iteration_ends(self.t_)

                if self._no_improvement_count > 2:
                    # not better than last two iterations by tol.
                    # stop or decrease learning rate
                    if early_stopping:
                        msg = ("Validation score did not improve more than "
                               "tol=%f for two consecutive epochs." % self.tol)
                    else:
                        msg = ("Training loss did not improve more than tol=%f"
                               " for two consecutive epochs." % self.tol)

                    is_stopping = self._optimizer.trigger_stopping(
                        msg, self.verbose)
                    if is_stopping:
                        break
                    else:
                        self._no_improvement_count = 0

                if incremental:
                    break

                if self.n_iter_ == self.max_iter:
                    warnings.warn(
                        "Stochastic Optimizer: Maximum iterations (%d) "
                        "reached and the optimization hasn't converged yet."
                        % self.max_iter, ConvergenceWarning)
        except KeyboardInterrupt:
            warnings.warn("Training interrupted by user.")

        if early_stopping:
            # restore best weights
            self.coefs_ = self._best_coefs
            self.intercepts_ = self._best_intercepts
        
        '''for explanation start'''
        return activations_over_all_itr
        '''for explanation end'''
    def _fit_stochastic(
        self,
        X,
        y,
        activations,
        deltas,
        coef_grads,
        intercept_grads,
        layer_units,
        incremental,
    ):

        if not incremental or not hasattr(self, "_optimizer"):
            params = self.coefs_ + self.intercepts_

            if self.solver == "sgd":
                self._optimizer = SGDOptimizer(
                    params,
                    self.learning_rate_init,
                    self.learning_rate,
                    self.momentum,
                    self.nesterovs_momentum,
                    self.power_t,
                )
            elif self.solver == "adam":
                self._optimizer = AdamOptimizer(
                    params,
                    self.learning_rate_init,
                    self.beta_1,
                    self.beta_2,
                    self.epsilon,
                )

        # early_stopping in partial_fit doesn't make sense
        early_stopping = self.early_stopping and not incremental
        if early_stopping:
            # don't stratify in multilabel classification
            # should_stratify = is_classifier(self) and self.n_outputs_ == 1
            # stratify = y if should_stratify else None
            # X, X_val, y, y_val = train_test_split(
            #     X,
            #     y,
            #     random_state=self._random_state,
            #     test_size=self.validation_fraction,
            #     stratify=stratify,
            # )
            # if is_classifier(self):
            #     y_val = self._label_binarizer.inverse_transform(y_val)

            # --------------------------- #
            #    Custom validation set    #
            # --------------------------- #

            X_val = self.custom_validation_data[0]
            y_val = self.custom_validation_data[1]
            if type(X_val) is not np.ndarray:
                X_val = X_val.to_numpy()
            if type(y_val) is not np.ndarray:
                y_val = y_val.to_numpy()

            # --------------------------- #
            #       Custom code end       #
            # --------------------------- #

        else:
            X_val = None
            y_val = None

        n_samples = X.shape[0]

        if self.batch_size == "auto":
            batch_size = min(200, n_samples)
        else:
            batch_size = np.clip(self.batch_size, 1, n_samples)

        try:
            for it in tqdm(range(self.max_iter), unit="iteration"):
                if self.shuffle:
                    X, y = shuffle(X, y, random_state=self._random_state)
                accumulated_loss = 0.0
                for batch_slice in gen_batches(n_samples, batch_size):
                    activations[0] = X[batch_slice]
                    batch_loss, coef_grads, intercept_grads = self._backprop(
                        X[batch_slice],
                        y[batch_slice],
                        activations,
                        deltas,
                        coef_grads,
                        intercept_grads,
                    )
                    accumulated_loss += batch_loss * (batch_slice.stop -
                                                      batch_slice.start)

                    # update weights
                    grads = coef_grads + intercept_grads
                    self._optimizer.update_params(grads)

                self.n_iter_ += 1
                self.loss_ = accumulated_loss / X.shape[0]

                self.t_ += n_samples
                self.loss_curve_.append(self.loss_)

                if self.verbose:
                    print("Iteration %d, loss = %.8f" %
                          (self.n_iter_, self.loss_))

                # update no_improvement_count based on training loss or
                # validation score according to early_stopping
                self._update_no_improvement_count(early_stopping, X_val, y_val)

                # for learning rate that needs to be updated at iteration end
                self._optimizer.iteration_ends(self.t_)

                if self._no_improvement_count > self.n_iter_no_change:
                    # not better than last `n_iter_no_change` iterations by tol
                    # stop or decrease learning rate
                    if early_stopping:
                        msg = ("Validation score did not improve more than "
                               "tol=%f for %d consecutive epochs." %
                               (self.tol, self.n_iter_no_change))
                    else:
                        msg = ("Training loss did not improve more than tol=%f"
                               " for %d consecutive epochs." %
                               (self.tol, self.n_iter_no_change))

                    is_stopping = self._optimizer.trigger_stopping(
                        msg, self.verbose)
                    if is_stopping:
                        break
                    else:
                        self._no_improvement_count = 0

                if incremental:
                    break

                if self.n_iter_ == self.max_iter:
                    warnings.warn(
                        "Stochastic Optimizer: Maximum iterations (%d) "
                        "reached and the optimization hasn't converged yet." %
                        self.max_iter,
                        ConvergenceWarning,
                    )
        except KeyboardInterrupt:
            warnings.warn("Training interrupted by user.")

        if early_stopping:
            # restore best weights
            self.coefs_ = self._best_coefs
            self.intercepts_ = self._best_intercepts