def test_sgd_optimizer_trigger_stopping(): params = [np.zeros(shape) for shape in shapes] lr = 2e-6 optimizer = SGDOptimizer(params, lr, lr_schedule='adaptive') assert not optimizer.trigger_stopping('', False) assert lr / 5 == optimizer.learning_rate assert optimizer.trigger_stopping('', False)
def test_sgd_optimizer_trigger_stopping(): params = [np.zeros(shape) for shape in shapes] lr = 2e-6 optimizer = SGDOptimizer(params, lr, lr_schedule='adaptive') assert_false(optimizer.trigger_stopping('', False)) assert_equal(lr / 5, optimizer.learning_rate) assert_true(optimizer.trigger_stopping('', False))
def test_neuralNetwork_sgd(): from sklearn.neural_network._stochastic_optimizers import SGDOptimizer np.random.seed(2019) X = np.random.normal(size=(1, 500)) target = 3.9285985 * X nn = NeuralNetwork(inputs=1, neurons=3, outputs=1, activations='sigmoid', silent=True) nn.addLayer() nn.addLayer() nn.addOutputLayer(activations='identity') learning_rate = 0.001 yhat = nn.forward_pass(X) nn.backpropagation(yhat.T, target.T) nn.learning_rate = learning_rate initial_params = copy.deepcopy(nn.weights + nn.biases) nn.sgd() grad = nn.d_weights + nn.d_biases params = nn.weights + nn.biases change = [p - i_p for p, i_p in zip(params, initial_params)] skl_sgd = SGDOptimizer(params=initial_params, learning_rate_init=learning_rate, nesterov=False, momentum=1.0) upd = skl_sgd._get_updates(grad) for update_nn, update_skl in zip(change, upd): assert update_nn == pytest.approx(update_skl)
def test_sgd_optimizer_no_momentum(): params = [np.zeros(shape) for shape in shapes] for lr in [10 ** i for i in range(-3, 4)]: optimizer = SGDOptimizer(params, lr, momentum=0, nesterov=False) grads = [np.random.random(shape) for shape in shapes] expected = [param - lr * grad for param, grad in zip(params, grads)] optimizer.update_params(grads) for exp, param in zip(expected, optimizer.params): assert_array_equal(exp, param)
def test_sgd_optimizer_momentum(): params = [np.zeros(shape) for shape in shapes] lr = 0.1 for momentum in np.arange(0.5, 0.9, 0.1): optimizer = SGDOptimizer(params, lr, momentum=momentum, nesterov=False) velocities = [np.random.random(shape) for shape in shapes] optimizer.velocities = velocities grads = [np.random.random(shape) for shape in shapes] updates = [momentum * velocity - lr * grad for velocity, grad in zip(velocities, grads)] expected = [param + update for param, update in zip(params, updates)] optimizer.update_params(grads) for exp, param in zip(expected, optimizer.params): assert_array_equal(exp, param)
def test_sgd_optimizer_nesterovs_momentum(): params = [np.zeros(shape) for shape in shapes] lr = 0.1 rng = np.random.RandomState(0) for momentum in np.arange(0.5, 0.9, 0.1): optimizer = SGDOptimizer(params, lr, momentum=momentum, nesterov=True) velocities = [rng.random_sample(shape) for shape in shapes] optimizer.velocities = velocities grads = [rng.random_sample(shape) for shape in shapes] updates = [ momentum * velocity - lr * grad for velocity, grad in zip(velocities, grads) ] updates = [ momentum * update - lr * grad for update, grad in zip(updates, grads) ] expected = [param + update for param, update in zip(params, updates)] optimizer.update_params(params, grads) for exp, param in zip(expected, params): assert_array_equal(exp, param)
def _fit_stochastic(self, X, y, X_val, y_val, activations, deltas, coef_grads, intercept_grads, layer_units, incremental): if not incremental or not hasattr(self, '_optimizer'): params = self.coefs_ + self.intercepts_ if self.solver == 'sgd': self._optimizer = SGDOptimizer(params, self.learning_rate_init, self.learning_rate, self.momentum, self.nesterovs_momentum, self.power_t) elif self.solver == 'adam': self._optimizer = AdamOptimizer(params, self.learning_rate_init, self.beta_1, self.beta_2, self.epsilon) # early_stopping in partial_fit doesn't make sense early_stopping = self.early_stopping and not incremental n_samples = X.shape[0] if self.batch_size == 'auto': batch_size = min(200, n_samples) else: batch_size = np.clip(self.batch_size, 1, n_samples) try: for it in range(self.max_iter): X, y = shuffle(X, y, random_state=self._random_state) accumulated_loss = 0.0 for batch_slice in gen_batches(n_samples, batch_size): activations[0] = X[batch_slice] batch_loss, coef_grads, intercept_grads = self._backprop( X[batch_slice], y[batch_slice], activations, deltas, coef_grads, intercept_grads) accumulated_loss += batch_loss * (batch_slice.stop - batch_slice.start) # update weights grads = coef_grads + intercept_grads self._optimizer.update_params(grads) # Get val path activations[0] = X_val activations = self._forward_pass(activations) loss_val = LOSS_FUNCTIONS[self.loss](y_val, activations[-1]) self.lpath['val'].append(loss_val) self.n_iter_ += 1 self.loss_ = accumulated_loss / X.shape[0] self.t_ += n_samples self.loss_curve_.append(self.loss_) self.lpath['train'].append(self.loss_) if self.verbose: print("Iteration %d, loss = %.8f" % (self.n_iter_, self.loss_)) # update no_improvement_count based on training loss or # validation score according to early_stopping self._update_no_improvement_count(early_stopping, X_val, y_val) # for learning rate that needs to be updated at iteration end self._optimizer.iteration_ends(self.t_) if self._no_improvement_count > self.n_iter_no_change: # not better than last `n_iter_no_change` iterations by tol # stop or decrease learning rate if early_stopping: msg = ("Validation score did not improve more than " "tol=%f for %d consecutive epochs." % (self.tol, self.n_iter_no_change)) else: msg = ("Training loss did not improve more than tol=%f" " for %d consecutive epochs." % (self.tol, self.n_iter_no_change)) is_stopping = self._optimizer.trigger_stopping( msg, self.verbose) if is_stopping: break else: self._no_improvement_count = 0 if incremental: break if self.n_iter_ == self.max_iter: warnings.warn( "Stochastic Optimizer: Maximum iterations (%d) " "reached and the optimization hasn't converged yet." % self.max_iter, ConvergenceWarning) except KeyboardInterrupt: warnings.warn("Training interrupted by user.") if early_stopping: # restore best weights self.coefs_ = self._best_coefs self.intercepts_ = self._best_intercepts
# In[90]: from sklearn.model_selection import train_test_split X5_train, X5_test, y5_train, y5_test = train_test_split(X5, y5, test_size=0.20) print("Sample in traiing set:::", X5_train.shape) print("Sample in testing set:::", X5_test.shape) print("Sample of target in training set::", y5_train.shape) print("Sample of target in testing set::", y5_test.shape) # In[122]: from sklearn.neural_network._stochastic_optimizers import SGDOptimizer # In[177]: gd = SGDOptimizer(params='0.1', learning_rate_init=0.1) # In[184]: gd.fit(X5_train, y5_train) # In[193]: from sklearn.metrics import classification_report, confusion_matrix print(confusion_matrix(y5_test, pred5.round())) print(classification_report(y5_test, pred5.round())) # In[194]: from sklearn.metrics import accuracy_score accuracy5 = accuracy_score(y5_test, pred5.round())
def _fit_stochastic(self, X, y, activations, deltas, coef_grads, intercept_grads, layer_units, incremental): if not incremental or not hasattr(self, '_optimizer'): params = self.coefs_ + self.intercepts_ if self.solver == 'sgd': self._optimizer = SGDOptimizer( params, self.learning_rate_init, self.learning_rate, self.momentum, self.nesterovs_momentum, self.power_t) elif self.solver == 'adam': self._optimizer = AdamOptimizer( params, self.learning_rate_init, self.beta_1, self.beta_2, self.epsilon) # early_stopping in partial_fit doesn't make sense early_stopping = self.early_stopping and not incremental if early_stopping: X, X_val, y, y_val = train_test_split( X, y, random_state=self._random_state, test_size=self.validation_fraction) if isinstance(self, ClassifierMixin): y_val = self._label_binarizer.inverse_transform(y_val) else: X_val = None y_val = None n_samples = X.shape[0] if self.batch_size == 'auto': batch_size = min(200, n_samples) else: batch_size = np.clip(self.batch_size, 1, n_samples) '''for explanation start''' activations_over_all_itr = [] '''for explanation end''' try: for it in range(self.max_iter): X, y = shuffle(X, y, random_state=self._random_state) accumulated_loss = 0.0 for batch_slice in gen_batches(n_samples, batch_size): activations[0] = X[batch_slice] batch_loss, coef_grads, intercept_grads = self._backprop( X[batch_slice], y[batch_slice], activations, deltas, coef_grads, intercept_grads) accumulated_loss += batch_loss * (batch_slice.stop - batch_slice.start) # update weights grads = coef_grads + intercept_grads self._optimizer.update_params(grads) '''for explanation start''' activations_over_all_itr.append(activations) '''for explanation end''' self.n_iter_ += 1 self.loss_ = accumulated_loss / X.shape[0] self.t_ += n_samples self.loss_curve_.append(self.loss_) if self.verbose: print("Iteration %d, loss = %.8f" % (self.n_iter_, self.loss_)) # update no_improvement_count based on training loss or # validation score according to early_stopping self._update_no_improvement_count(early_stopping, X_val, y_val) # for learning rate that needs to be updated at iteration end self._optimizer.iteration_ends(self.t_) if self._no_improvement_count > 2: # not better than last two iterations by tol. # stop or decrease learning rate if early_stopping: msg = ("Validation score did not improve more than " "tol=%f for two consecutive epochs." % self.tol) else: msg = ("Training loss did not improve more than tol=%f" " for two consecutive epochs." % self.tol) is_stopping = self._optimizer.trigger_stopping( msg, self.verbose) if is_stopping: break else: self._no_improvement_count = 0 if incremental: break if self.n_iter_ == self.max_iter: warnings.warn( "Stochastic Optimizer: Maximum iterations (%d) " "reached and the optimization hasn't converged yet." % self.max_iter, ConvergenceWarning) except KeyboardInterrupt: warnings.warn("Training interrupted by user.") if early_stopping: # restore best weights self.coefs_ = self._best_coefs self.intercepts_ = self._best_intercepts '''for explanation start''' return activations_over_all_itr '''for explanation end'''
def _fit_stochastic( self, X, y, activations, deltas, coef_grads, intercept_grads, layer_units, incremental, ): if not incremental or not hasattr(self, "_optimizer"): params = self.coefs_ + self.intercepts_ if self.solver == "sgd": self._optimizer = SGDOptimizer( params, self.learning_rate_init, self.learning_rate, self.momentum, self.nesterovs_momentum, self.power_t, ) elif self.solver == "adam": self._optimizer = AdamOptimizer( params, self.learning_rate_init, self.beta_1, self.beta_2, self.epsilon, ) # early_stopping in partial_fit doesn't make sense early_stopping = self.early_stopping and not incremental if early_stopping: # don't stratify in multilabel classification # should_stratify = is_classifier(self) and self.n_outputs_ == 1 # stratify = y if should_stratify else None # X, X_val, y, y_val = train_test_split( # X, # y, # random_state=self._random_state, # test_size=self.validation_fraction, # stratify=stratify, # ) # if is_classifier(self): # y_val = self._label_binarizer.inverse_transform(y_val) # --------------------------- # # Custom validation set # # --------------------------- # X_val = self.custom_validation_data[0] y_val = self.custom_validation_data[1] if type(X_val) is not np.ndarray: X_val = X_val.to_numpy() if type(y_val) is not np.ndarray: y_val = y_val.to_numpy() # --------------------------- # # Custom code end # # --------------------------- # else: X_val = None y_val = None n_samples = X.shape[0] if self.batch_size == "auto": batch_size = min(200, n_samples) else: batch_size = np.clip(self.batch_size, 1, n_samples) try: for it in tqdm(range(self.max_iter), unit="iteration"): if self.shuffle: X, y = shuffle(X, y, random_state=self._random_state) accumulated_loss = 0.0 for batch_slice in gen_batches(n_samples, batch_size): activations[0] = X[batch_slice] batch_loss, coef_grads, intercept_grads = self._backprop( X[batch_slice], y[batch_slice], activations, deltas, coef_grads, intercept_grads, ) accumulated_loss += batch_loss * (batch_slice.stop - batch_slice.start) # update weights grads = coef_grads + intercept_grads self._optimizer.update_params(grads) self.n_iter_ += 1 self.loss_ = accumulated_loss / X.shape[0] self.t_ += n_samples self.loss_curve_.append(self.loss_) if self.verbose: print("Iteration %d, loss = %.8f" % (self.n_iter_, self.loss_)) # update no_improvement_count based on training loss or # validation score according to early_stopping self._update_no_improvement_count(early_stopping, X_val, y_val) # for learning rate that needs to be updated at iteration end self._optimizer.iteration_ends(self.t_) if self._no_improvement_count > self.n_iter_no_change: # not better than last `n_iter_no_change` iterations by tol # stop or decrease learning rate if early_stopping: msg = ("Validation score did not improve more than " "tol=%f for %d consecutive epochs." % (self.tol, self.n_iter_no_change)) else: msg = ("Training loss did not improve more than tol=%f" " for %d consecutive epochs." % (self.tol, self.n_iter_no_change)) is_stopping = self._optimizer.trigger_stopping( msg, self.verbose) if is_stopping: break else: self._no_improvement_count = 0 if incremental: break if self.n_iter_ == self.max_iter: warnings.warn( "Stochastic Optimizer: Maximum iterations (%d) " "reached and the optimization hasn't converged yet." % self.max_iter, ConvergenceWarning, ) except KeyboardInterrupt: warnings.warn("Training interrupted by user.") if early_stopping: # restore best weights self.coefs_ = self._best_coefs self.intercepts_ = self._best_intercepts