def fit(model): optimizer = SGD(model.parameters(), model.grads(), lr=0.1) losses = [] print('Epoch | Loss') for epoch in range(500): epoch_loss = 0 for b in range(num_batches): batch_input = train_input[b * batch_size:(b + 1) * batch_size] batch_target = train_target[b * batch_size:(b + 1) * batch_size] batch_output = model(batch_input) batch_loss = criterion(batch_output, batch_target) epoch_loss += batch_loss output_grad = criterion.backward() model.backward(output_grad) optimizer.step() optimizer.zero_grad() losses.append(epoch_loss.item() / num_batches) print(f'{epoch+1:>5} | {epoch_loss.item() / num_batches:.5f}') train_output = model(train_input) print( f'\nTrain Error: {sum(train_output.argmax(1) != train_target.argmax(1)).item() / 1000}' ) test_output = model(test_input) print( f'Test Error: {sum(test_output.argmax(1) != test_target.argmax(1)).item() / 1000}' ) return losses
def our_fit(model, epochs=500, verbose=False): criterion = MSE() optimizer = SGD(model.parameters(), model.grads(), lr=0.1) start = time() losses = [] if verbose: print('Epoch | Loss') for epoch in range(epochs): epoch_loss = 0 for b in range(num_batches): batch_input = train_input[b * batch_size:(b + 1) * batch_size] batch_target = train_target[b * batch_size:(b + 1) * batch_size] batch_output = model(batch_input) batch_loss = criterion(batch_output, batch_target) epoch_loss += batch_loss output_grad = criterion.backward() model.backward(output_grad) optimizer.step() optimizer.zero_grad() losses.append(epoch_loss.item() / num_batches) if verbose: print(f'{epoch+1:>5} | {epoch_loss.item() / num_batches:.5f}') end = time() train_output = model(train_input) print( f'\nTrain Error: {sum(train_output.argmax(1) != train_target.argmax(1)).item() / len(train_output)}' ) test_output = model(test_input) print( f'Test Error: {sum(test_output.argmax(1) != test_target.argmax(1)).item() / len(test_output)}' ) return losses, end - start
avg_loss = avg_acc = 0 for e in range(epoch): if e and e % 3 == 0: optimizer.lr *= 0.1 train_loss = train_acc = 0 e_data, e_labels = shuffle(train_data, train_labels) with tqdm(total=epoch_steps) as pbar: for x, t in zip(np.array_split(e_data, epoch_steps), np.array_split(e_labels, epoch_steps)): x = Tensor(x[:, None]) t = Tensor(t) optimizer.zero_grad() logits = my_net(x) loss, grad = cross_entropy_loss(logits, t) acc = accuracy_score(t, logits.argmax(1)) logits.backward(grad) optimizer.step() loss_history.append(loss) train_loss += loss train_acc += acc if not avg_loss: avg_loss = loss else: avg_loss *= 0.98