def step(self, net: NeuralNet) -> None: for param, grad in net.params_and_grads(): predicted = net.forward(net.curr_batch.inputs) loss_old = net.loss_f.loss(predicted, net.curr_batch.targets) old_param = copy.deepcopy(param) param -= self.lr * grad count = 0 predicted = net.forward(net.curr_batch.inputs) loss = net.loss_f.loss(predicted, net.curr_batch.targets) temp_lr = self.lr while not loss <= loss_old - self.alpha * ( np.linalg.norm(param - old_param))**2: print(f'lr: {temp_lr}') temp_lr = temp_lr / 2.0 param = old_param - temp_lr * grad predicted = net.forward(net.curr_batch.inputs) loss = net.loss_f.loss(predicted, net.curr_batch.targets) #print(f'\nloss: {loss}\nloss_desejada: {loss_old - self.alpha*(np.linalg.norm(param - old_param))**2}') if temp_lr < 1e-10: print('Passo muito pequeno') break count = count + 1
def step(self, net: NeuralNet) -> None: i = 0 for param, grad in net.params_and_grads(): G_temp = self.gamma * self.G[i] + (1 - self.gamma) * (grad**2) param -= (self.lr / (np.sqrt(G_temp + self.epsilon))) * grad # except ValueError: # G_temp = self.gamma*self.G[i] + (1-self.gamma)*(grad[0]**2) # param -= (self.lr/(np.sqrt(G_temp + self.epsilon)))*grad[0] self.G[i] = G_temp i = i + 1
def step(self, net: NeuralNet) -> None: i = 0 for param, grad in net.params_and_grads(): #try: G_temp = (self.gamma2 * self.G[i] + (1.0 - self.gamma2) * grad**2.0) / ( 1.0 - np.power(self.gamma2, net.n_iter + 1.0)) m_temp = (self.gamma1 * self.m[i] + (1.0 - self.gamma1) * grad) / ( 1.0 - np.power(self.gamma1, net.n_iter + 1.0)) param -= (self.lr * self.m[i]) / (np.sqrt(self.G[i]) + self.epsilon) # except ValueError: # G_temp = (self.gamma2*self.G[i] + (1.0 - self.gamma2)*grad[0]**2.0)/(1.0 - np.power(self.gamma2, net.n_iter+1.0)) # m_temp = (self.gamma1*self.m[i] + (1.0 -self.gamma1)*grad[0])/(1.0-np.power(self.gamma1, net.n_iter+1.0) ) # param -= (self.lr*self.m[i])/(np.sqrt(self.G[i]) + self.epsilon) self.G[i] = G_temp self.m[i] = m_temp i = i + 1
def step(self, net: NeuralNet) -> None: for param, grad in net.params_and_grads(): param -= self.lr * grad