def step(self, net: NeuralNet) -> None: for param, grad in net.params_and_grads(): predicted = net.forward(net.curr_batch.inputs) loss_old = net.loss_f.loss(predicted, net.curr_batch.targets) old_param = copy.deepcopy(param) param -= self.lr * grad count = 0 predicted = net.forward(net.curr_batch.inputs) loss = net.loss_f.loss(predicted, net.curr_batch.targets) temp_lr = self.lr while not loss <= loss_old - self.alpha * ( np.linalg.norm(param - old_param))**2: print(f'lr: {temp_lr}') temp_lr = temp_lr / 2.0 param = old_param - temp_lr * grad predicted = net.forward(net.curr_batch.inputs) loss = net.loss_f.loss(predicted, net.curr_batch.targets) #print(f'\nloss: {loss}\nloss_desejada: {loss_old - self.alpha*(np.linalg.norm(param - old_param))**2}') if temp_lr < 1e-10: print('Passo muito pequeno') break count = count + 1
def intermediate_step(self, net: NeuralNet, m, param) -> None: #for param, grad in net.params_and_grads(): param = np.add(param, self.gamma * m) predicted = net.forward(net.curr_batch.inputs) grad = net.loss_f.grad(predicted, net.curr_batch.targets) net.backward(grad)
def step(self, net: NeuralNet) -> None: if self.first: self.velocities = [np.zeros_like(param) for param in net.params()] self.first = False for (velocity, param, grad) in zip(self.velocities, net.params(), net.grads()): velocity *= self.momentum velocity += self.learning_rate * grad param -= velocity
def step(self, net: NeuralNet) -> None: for param, grad, jac in net.params_and_grads_v3(): predicted = net.forward(net.curr_batch.inputs) loss_old = net.loss_f.loss(predicted, net.curr_batch.targets) #lamb = min(max( np.linalg.norm(grad.flatten()), 1e-5), 1e5) lamb = 1e4 JTJ = jac.T @ jac sh = grad.shape d = np.linalg.solve(JTJ + lamb * np.eye(len(JTJ)), -grad.flatten()) d = d.reshape(sh) # inner_p = np.inner(-grad.flatten(), d.flatten()) # print('produto interno: ', inner_p ) old_param = copy.deepcopy(param) param += d predicted = net.forward(net.curr_batch.inputs) loss = net.loss_f.loss(predicted, net.curr_batch.targets) loop_count = 0 while not loss <= loss_old - self.alpha * ( np.linalg.norm(param - old_param))**2: lamb = 2 * lamb d = np.linalg.solve(JTJ + lamb * np.eye(len(JTJ)), -grad.flatten()) d = d.reshape(sh) param += d predicted = net.forward(net.curr_batch.inputs) loss = net.loss_f.loss(predicted, net.curr_batch.targets) # inner_p = np.inner(-grad.flatten(), d.flatten()) # print('produto interno: ', inner_p ) loop_count = loop_count + 1 #print(f'loop : {loop_count}') if lamb > 1e20: #print('trapaça') break net.n_eval = net.n_eval + loop_count + 1
def train(net: NeuralNet, inputs: Tensor, targets: Tensor, num_epochs: int = 5000, iterator: DataIterator = BatchIterator(), loss: Loss = MSE(), optimizer: Optimizer = SGD()) -> None: for epoch in range(num_epochs): epoch_loss = 0.0 for batch in iterator(inputs, targets): predicted = net.forward(batch.inputs) epoch_loss += loss.loss(predicted, batch.targets) grad = loss.grad(predicted, batch.targets) net.backward(grad) optimizer.step(net) print(epoch, epoch_loss)
def step(self, net: NeuralNet) -> None: count = 0 for param, grad, prev, grad_prev in net.params_and_grads_v4(): #s_k = x_k - x_{k-1} #y_k = gradf(x_k) - gradf(x_{k-1}) s_k = param - prev y_k = grad - grad_prev count = count + 1 if np.linalg.norm(s_k) < 1e-5 or np.linalg.norm(y_k) < 1e-5: break #print('sk: ', s_k) #print('yk: ', y_k) #print('cima: ', np.inner(s_k.flatten(),y_k.flatten())) #print('baixo: ', np.inner(s_k.flatten(), s_k.flatten())) eta_k = np.linalg.norm(s_k.flatten()) / np.inner( s_k.flatten(), y_k.flatten()) if eta_k < 0: eta_k = 1e-4 print('tudo: ', eta_k) param -= eta_k * grad return True
def step(self, net: NeuralNet) -> None: for param, grad, jac in net.params_and_grads_v3(): lamb = max(np.linalg.norm(grad), 1e-6) JTJ = jac.T @ jac sh = grad.shape d = np.linalg.solve(JTJ + lamb * np.eye(len(JTJ)), -grad.flatten()) d = d.reshape(sh) param += d
def test(net: NeuralNet, inputs: Tensor, targets: Tensor, labels: List, input_decoder: Callable) -> None: correct = 0 for i in range(1, len(inputs)): predicted = net.forward(inputs[i]) predicted_idx = np.argmax(predicted) actual_idx = np.argmax(targets[i]) print(input_decoder(inputs[i]), inputs[i], labels[predicted_idx], labels[actual_idx]) if predicted_idx == actual_idx: correct += 1 print(correct / len(inputs))
def step(self, net: NeuralNet) -> None: i = 0 for param, grad in net.params_and_grads(): G_temp = self.gamma * self.G[i] + (1 - self.gamma) * (grad**2) param -= (self.lr / (np.sqrt(G_temp + self.epsilon))) * grad # except ValueError: # G_temp = self.gamma*self.G[i] + (1-self.gamma)*(grad[0]**2) # param -= (self.lr/(np.sqrt(G_temp + self.epsilon)))*grad[0] self.G[i] = G_temp i = i + 1
def step(self, net: NeuralNet) -> None: for param, grad, prev in net.params_and_grads_v2(): temp = copy.deepcopy(param) m = param - prev #print('prev:', prev) self.intermediate_step(net, m, param) #print('param:', param) #try: param -= self.lr * grad # except ValueError: # param -= self.lr *grad[0] prev = temp
def train(net: NeuralNet, inputs: Tensor, targets: Tensor, num_epochs: int = 5000, iterator: DataIterator = BatchIterator(), loss: Loss = MSE(), optimizer: Optimizer = SGD(0.001), eps: float = -1) -> [float]: loss_list = [] eval_list = [] net.n_eval = 0 for epoch in range(num_epochs): n_iter = 0 epoch_loss = 0.0 print(f'================ EPOCH NUMBER {epoch + 1} ================') for batch in iterator(inputs, targets): #print(f'batch: \n{batch}') net.n_iter = n_iter net.curr_batch = batch net.loss_f = loss predicted = net.forward(batch.inputs) curr_loss = loss.loss(predicted, batch.targets) epoch_loss += curr_loss grad = loss.grad(predicted, batch.targets) net.backward(grad) optimizer.step(net) n_iter = n_iter + 1 eval_list.append(net.n_eval) #eval_list.append(net.n_eval) # () / iterator.batch_size print(epoch, epoch_loss) loss_list.append(epoch_loss) if eps > 0 and epoch_loss < eps: print('precisão atingida') break return loss_list, eval_list
def step(self, net: NeuralNet) -> None: i = 0 for param, grad in net.params_and_grads(): #try: G_temp = (self.gamma2 * self.G[i] + (1.0 - self.gamma2) * grad**2.0) / ( 1.0 - np.power(self.gamma2, net.n_iter + 1.0)) m_temp = (self.gamma1 * self.m[i] + (1.0 - self.gamma1) * grad) / ( 1.0 - np.power(self.gamma1, net.n_iter + 1.0)) param -= (self.lr * self.m[i]) / (np.sqrt(self.G[i]) + self.epsilon) # except ValueError: # G_temp = (self.gamma2*self.G[i] + (1.0 - self.gamma2)*grad[0]**2.0)/(1.0 - np.power(self.gamma2, net.n_iter+1.0)) # m_temp = (self.gamma1*self.m[i] + (1.0 -self.gamma1)*grad[0])/(1.0-np.power(self.gamma1, net.n_iter+1.0) ) # param -= (self.lr*self.m[i])/(np.sqrt(self.G[i]) + self.epsilon) self.G[i] = G_temp self.m[i] = m_temp i = i + 1
from joelnet.layers import Sigmoid # tim def'ned, sept16 # logical xor is defined as: # xor(bool1,bool2) := false if bool1==bool2 else true inputs = np.array([[0, 0], [1, 0], [0, 1], [1, 1]]) targets = np.array([[1, 0], [0, 1], [0, 1], [1, 0]]) # OR: USE SIGMOID ACTIVATION LAYER I IMPLEMENTED # NOTE: NOT QUITE AS ACCURATE, BUT PRETTY CLOSE... # Sigmoid() # instead of Tanh() # instantiate the net (supply layers as iterable, here a list) net = NeuralNet([ # layer 1: input, takes TODO Linear(input_size=2, output_size=2), # layer 2: activation layer, hyperbolic tangent Tanh(), # layer 3: output, returns TODO Linear(input_size=2, output_size=2) ]) # check out attrs of the net + its class # inspect.signature(net.backward) # vars(NeuralNet) # # NeuralNet methods: # - .forward(inputs): propogate inputs to next layer # - .backward(grad): propogate gradients to previous layer # - .params_and_grads(): generator yielding params and gradients train(net, inputs, targets) for x, y in zip(inputs, targets):
def step(self, net: NeuralNet) -> None: count = 0 for param, grad, jac in net.params_and_grads_v3(): if count == 0: print('Dando o passo para w1 e w2') else: print('Dando o passo para w3 e w4') count = count + 1 predicted = net.forward(net.curr_batch.inputs) loss_old = net.loss_f.loss(predicted, net.curr_batch.targets) #lamb = min(max( np.linalg.norm(gf), 1e-5), 1e5) #print(f'GRADIENTE: {gf}') #print(f'NORMA GRAD: {np.linalg.norm(gf)}') lamb = self.lamb print('grad: ', grad) print('jac: ', jac) JTJ = jac.T @ jac print('jtj: ', JTJ) sh = grad.shape d = np.linalg.solve(JTJ + lamb * np.eye(len(JTJ)), -grad.flatten()) d = d.reshape(sh) # inner_p = np.inner(-grad.flatten(), d.flatten()) # print('produto interno: ', inner_p ) old_param = copy.deepcopy(param) param += d predicted = net.forward(net.curr_batch.inputs) loss = net.loss_f.loss(predicted, net.curr_batch.targets) print('param :', param) #time.sleep(30) lixo = input('oi') loop_count = 0 print( f'erro: {loss} / {loss_old - self.alpha*(np.linalg.norm(param - old_param))**2}' ) while not loss <= loss_old - self.alpha * ( np.linalg.norm(param - old_param))**2: #print(f'f: {loss_old}') #print(f'x: {param}\nx^k: {old_param}') #print(f'x-xk: {param - old_param}') #print('||x-xk||^2: ',np.linalg.norm(param - old_param)**2) if loop_count == 0: print('entrou no loop da busca linear') lamb = 2 * lamb #print(f'erro: {loss} / {loss_old - self.alpha*(np.linalg.norm(param - old_param))**2}') d = np.linalg.solve(JTJ + lamb * np.eye(len(JTJ)), -grad.flatten()) d = d.reshape(sh) param += d predicted = net.forward(net.curr_batch.inputs) loss = net.loss_f.loss(predicted, net.curr_batch.targets) # inner_p = np.inner(-grad.flatten(), d.flatten()) # print('produto interno: ', inner_p ) loop_count = loop_count + 1 if lamb > 1e10: print('LAMBDA GRANDE') break if loop_count > 0: print(f'saiu do loop com {loop_count} giros') else: print('não entrou no loop') net.n_eval = net.n_eval + loop_count + 1
eps = 5 for j in range(int(5)): inputs.append([j]) inputs = np.array(inputs) targets = inputs**2 np.random.seed(20) net = NeuralNet([ Linear(input_size=1, output_size=2, weights=np.random.randn(1, 2), biases=np.random.randn(2)), reLu(), Linear(input_size=2, output_size=2, weights=np.random.randn(2, 2), biases=np.random.randn(2)), reLu(), Linear(input_size=2, output_size=1, weights=np.random.randn(2, 1), biases=np.random.randn(1)) ]) start_time = time.time() try: loss_list, eval_list = train(net, inputs, targets, loss=MSE(), optimizer=LM_cond(1e15),
y_test = np.array(y_test) inputs = X_train targets = np.array(y_train) np.random.seed(2) net = NeuralNet([ Linear(input_size=30, output_size=24, weights=np.random.randn(30, 24), biases=np.random.randn(24)), Tanh(), Linear(input_size=24, output_size=30, weights=np.random.randn(24, 30), biases=np.random.randn(30)), Tanh(), Linear(input_size=30, output_size=35, weights=np.random.randn(30, 35), biases=np.random.randn(35)), Tanh(), Linear(input_size=35, output_size=1, weights=np.random.randn(35, 1), biases=np.random.randn(1)), Sigmoid() ]) # net = NeuralNet([ # Linear(input_size=30, output_size=2), # Tanh(), # Linear(input_size=2, output_size=1),
return [0, 1, 0, 0] else: return [1, 0, 0, 0] def binary_encode(x: int) -> List[int]: """ 10 digit binary encoding of x """ return [x >> i & 1 for i in range(10)] inputs = np.array([binary_encode(x) for x in range(101, 1024)]) targets = np.array([fizz_buzz_encode(x) for x in range(101, 1024)]) net = NeuralNet([ Linear(input_size=10, output_size=50), Tanh(), Linear(input_size=50, output_size=4) ]) train(net, inputs, targets, num_epochs=5000, optimizer=SGD(lr=0.001)) for x in range(1, 101): predicted = net.forward(binary_encode(x)) predicted_idx = np.argmax(predicted) actual_idx = np.argmax(fizz_buzz_encode(x)) labels = [str(x), "fizz", "buzz", "fizzbuzz"] print(x, labels[predicted_idx], labels[actual_idx])
def step(self, net: NeuralNet) -> None: for param, grad in net.params_and_grads(): param -= self.lr * grad
import matplotlib.pyplot as plt from joelnet.train import train from joelnet.nn import NeuralNet from joelnet.layers import Linear, Tanh, Sigmoid, reLu from joelnet.data import BatchIterator from joelnet.optim import SGD, RMSProp, SGD_Nesterov, Adam from joelnet.loss import MSE, Log_loss import random inputs = np.array([[1], [2], [3], [4], [5]]) targets = np.array([[1], [4], [9], [16], [25]]) net = NeuralNet([ Linear(input_size=1, output_size=2), reLu(), Linear(input_size=2, output_size=1) ]) n_epochs = 1 #loss_list = train(net, inputs,targets, optimizer = Adam(lr = 1e-2, gamma1 = 0.3, gamma2 = 0.3),iterator = BatchIterator(batch_size = 5), num_epochs = 1000) loss_list = train(net, inputs, targets, loss=MSE(), optimizer=SGD(lr=1e-3), iterator=BatchIterator(batch_size=5), num_epochs=n_epochs) for x, y in zip(inputs, targets): predicted = net.forward(x) print(x, predicted, y)
""" The canonical example of a function that can't be learned with a simple linear model is XOR """ import numpy as np from joelnet.train import train from joelnet.nn import NeuralNet from joelnet.layers import Linear, Tanh inputs = np.array([[0, 0], [1, 0], [0, 1], [1, 1]]) targets = np.array([[1, 0], [0, 1], [0, 1], [1, 0]]) net = NeuralNet([ Linear(input_size=2, output_size=2), Tanh(), Linear(input_size=2, output_size=2) ]) train(net, inputs, targets) for x, y in zip(inputs, targets): predicted = net.forward(x) print(x, predicted, y)
def binary_encode(x: int) -> List[int]: """ 10 digit binary encoding of x """ return [x >> i & 1 for i in range(10)] def binary_decode(bitlist: List) -> int: pass inputs = np.array([binary_encode(x) for x in range(101, 1024)]) targets = np.array([fizz_buzz_encode(x) for x in range(101, 1024)]) net = NeuralNet([ Linear(input_size=10, output_size=50), Tanh(), Linear(input_size=50, output_size=4) ]) train(net, inputs, targets, num_epochs=20, loss=MSE(), optimizer=SGD(lr=0.001)) inputs = np.array([binary_encode(x) for x in range(1, 101)]) targets = np.array([fizz_buzz_encode(x) for x in range(1, 101)]) labels = ["x", "fizz", "buzz", "fizzbuzz"] test(net, inputs, targets, labels, binary_decode)
# reLu(), # Linear(input_size=16, output_size=24), # reLu(), # Linear(input_size=24, output_size=20), # reLu(), # Linear(input_size=20, output_size=24), # reLu(), # Linear(input_size=24, output_size=1), # Sigmoid(), # Linear(input_size=1, output_size=1) # ]) net = NeuralNet([ Linear(input_size=30, output_size=24), Tanh(), Linear(input_size=24, output_size=30), Tanh(), Linear(input_size=30, output_size=35), Tanh(), Linear(input_size=35, output_size=1), Sigmoid() ]) n_epochs = 200 loss_list = train(net, inputs, targets, optimizer=Adam(lr=1e-2, gamma1=0.3, gamma2=0.4), iterator=BatchIterator(128), num_epochs=n_epochs) y_pred = [] for x in X_test[0:1000]:
return [x >> i & 1 for i in range(num_bits)] NUM_ENCODE_BITS = 10 NUM_EPOCHS = 10000 inputs = np.array([ binary_encode(x, NUM_ENCODE_BITS) for x in range(101, 1024) ]) targets = np.array([ fizz_buzz_encode(x) for x in range(101, 1024) ]) net = NeuralNet([ Linear(input_size=NUM_ENCODE_BITS, output_size=50), Tanh(), Linear(input_size=50, output_size=4) ]) train(net=net, inputs=inputs, targets=targets, num_epochs=NUM_EPOCHS, optimizer=SGD(lr=0.001)) for x in range(1, 101): predicted = net.forward(inputs=binary_encode(x)) predicted_idx = np.argmax(predicted) # largest value is predicted class actual_idx = np.argmax(fizz_buzz_encode(x)) labels = [str(x), 'fizz', 'buzz', 'fizzbuzz'] print(x, labels[predicted_idx], labels[actual_idx])
inputs.append([i]) inputs = np.array(inputs) # targets = np.array([ # [1], # [4], # [9], # [16], # [25] # ]) targets = inputs**2 net = NeuralNet([ Linear(input_size=1, output_size=2, weights = np.array([[1.0,2.0]]), biases = np.array([0.0, 0.0])), reLu(), Linear(input_size=2, output_size=1, weights = np.array([[3.0],[4.0]]), biases = np.array([0.0])), reLu() ]) n_epochs = 1000 #loss_list = train(net, inputs,targets, optimizer = Adam(lr = 1e-2, gamma1 = 0.3, gamma2 = 0.3),iterator = BatchIterator(batch_size = 5), num_epochs = 1000) start_time = time.time() loss_list = train(net, inputs,targets, loss = MSE() ,optimizer = SGD(1e-5), iterator = BatchIterator(batch_size = 5), num_epochs = n_epochs, eps = 2000) end_time = time.time() print(f'Tempo gasto no treinamento: {end_time - start_time}s')
import matplotlib.pyplot as plt from joelnet.train import train from joelnet.nn import NeuralNet from joelnet.layers import Linear, Tanh, Sigmoid, reLu from joelnet.optim import Optimizer, SGD, Adam from joelnet.data import BatchIterator from joelnet.loss import Log_loss inputs = np.array([[0, 0], [1, 0], [0, 1], [1, 1]]) targets = np.array([[0], [1], [1], [0]]) net = NeuralNet([ Linear(input_size=2, output_size=4), Sigmoid(), Linear(input_size=4, output_size=4), Sigmoid(), Linear(input_size=4, output_size=1), Sigmoid() ]) n_epochs = 10000 loss_list = train(net, inputs, targets, loss=Log_loss(), optimizer=SGD(lr=1e-5), iterator=BatchIterator(4), num_epochs=n_epochs) for x, y in zip(inputs, targets): predicted = net.forward(x)