def test(config, seq_size, n_examples): # Initialize the dataset and data loader dataset = PalindromeDataset(seq_size+1) # dataset = PalindromeDataset(seq_size) ################################### data_loader = DataLoader(dataset, config.batch_size, num_workers=1) #Get one batch to test (batch_inputs, batch_targets) = next(iter(data_loader)) #Convert inputs and labels into tensors x = torch.tensor(batch_inputs, device=config.device) # Load the trained model model = torch.load('./Results/RNN/' + str(seq_size) + '_RNN_model', map_location=config.device) # model = torch.load('./Results/RNN/Run 1/' + str(seq_size) + '_RNN_model_1', map_location=config.device) ############# model.to(config.device) #get predictions for batch with torch.no_grad(): pred = model.forward(x) print('\n----------------------\nSequence length: ',str(seq_size+1),'\n----------------------') ##### for i in range(n_examples): print('\nTesting on palindrome',str(i+1),':\n---------------\n\nInput:',str(batch_inputs[i].tolist()),'\nPredicted last digit:',str(pred[i,:].argmax().item()),'\n')
def train(config, input_length): # Initialize the model that we are going to use model = VanillaRNN(input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size) # fixme device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model.to(device) # Initialize the dataset and data loader (leave the +1) dataset = PalindromeDataset(input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() # fixme optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate) # fixme losses = [] accuracies = [] loss = 0.0 for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Add more code here ... optimizer.zero_grad() batch_inputs, batch_targets = batch_inputs.to( device), batch_targets.to(device) outputs = model(batch_inputs) loss = criterion(outputs, batch_targets) loss.backward() optimizer.step() # the following line is to deal with exploding gradients torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) # Add more code here ... loss += loss.item() # fixme accu = 0.0 # fixme if step % 10 == 0: # print acuracy/loss here print('[step: %5d] loss: %.4f' % (step, loss / 10)) losses.append(loss / 10) loss = 0.0 accu = accuracy(outputs, batch_targets) accuracies.append(accu) print('Accuracy on training dataset: %.3f %%' % (accu)) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.') return model, losses, accuracies
def train(config): print('Vanilla RNN is WORKING...') model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size) dataset = PalindromeDataset(config.input_length+1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), config.learning_rate, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False) #optimizer = torch.optim.SGD(model.parameters(), config.learning_rate) # model.train() for step, (batch_inputs, batch_targets) in enumerate(data_loader): # step: epoch # Add more code here ... # the following line is to deal with exploding gradients torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) optimizer.zero_grad() batch_inputs = batch_inputs.unsqueeze(0) output= model(batch_inputs)[0] loss = criterion(output, batch_targets) loss.backward() optimizer.step() _, pred = torch.max(output, 1) all = len(pred) correct = 0 for i in range(len(pred)): if batch_targets[i] == pred[i]: correct += 1 # Add more code here ... accuracy = correct/all if step % 25 == 0: plot_step.append(step) plot_loss.append(loss) plot_accuracy.append(accuracy*100) # print acuracy/loss here if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break fig1 = plt.subplot(2,1,1) fig2 = plt.subplot(2,1,2) fig1.plot(plot_step, plot_accuracy, c='red', label='accuracy') fig1.legend() fig2.plot(plot_step, plot_loss, c='green', label='loss') fig2.legend() plt.show() print('Done training.')
def train(config): # Initialize the model that we are going to use model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size) # fixme # Initialize the dataset and data loader (leave the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() # fixme optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) # fixme for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Add more code here ... hit = 0 n, dim = batch_inputs.size() batch_inputs_T = torch.transpose(batch_inputs, 0, 1) # print(batch_inputs_T.size()) y_hat_oh = model.forward(batch_inputs_T) for i in range(n): y_pre, _ = max(enumerate(y_hat_oh[i]), key=itemgetter(1)) y = batch_targets[i].item() # print(y_pre, y) if y_pre == y: hit += 1 # print("/////////") # the following line is to deal with exploding gradients torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) # Add more code here ... loss = criterion(y_hat_oh, batch_targets) # fixme accuracy = hit / n * 100 # fixme optimizer.zero_grad() loss.backward() optimizer.step() if step % 10 == 0: print("loss: ", loss.item()) print("accuracy: ", accuracy) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.')
def main_grads(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device(config.device) for seq in range(config.input_length-1, config.input_length): # Initialize the model that we are going to use if config.model_type == "RNN": model_def = VanillaRNN else: model_def = LSTM model = model_def(seq, config.input_dim, config.num_hidden, config.num_classes, config.device).to(device) # fixme # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(seq + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) batch_inputs, batch_targets = next(iter(data_loader)) batch_inputs.requires_grad_(True) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() # fixme optimizer = optim.RMSprop(model.parameters(), config.learning_rate) # fixme hidden_state, model_outputs = model.forward(batch_inputs) # retain grad before doing actual backward pass loss = criterion(model_outputs, batch_targets) # model.zero_grad() # optimizer.step() loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) grads_file = "results/{}_grad_hidden_state_seq.txt".format(config.model_type) with open(grads_file, 'w+') as fd: writer = csv.writer(fd) for i, x in enumerate(model.hidden_states): print(x.grad.abs().mean().item()) writer.writerow([i, x.grad.abs().mean().item()])
def test(model, config, input_length): # Initialize the dataset and data loader (leave the +1) dataset = PalindromeDataset(input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) accuracies = [] for step, (batch_inputs, batch_targets) in enumerate(data_loader): outputs = model(batch_inputs) accu = 0.0 if step % 10 == 0: accu = accuracy(outputs, batch_targets) accuracies.append(accu) if step == 2000: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done testing.') return accuracies
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the model that we are going to use if config.model_type == "RNN": model_def = VanillaRNN else: model_def = LSTM model = model_def(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.device).to(device) # fixme # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() # fixme optimizer = optim.RMSprop(model.parameters(), config.learning_rate) # fixme # init csv file for d in ["results", "checkpoints", "assets"]: if not os.path.exists(d): os.mkdir(d) cvs_file = 'results/w_grad_{}_inputlength_{}_hiddenunits_{}_lr_{}_batchsize_{}_{}.csv'.format( config.model_type, config.input_length, config.num_hidden, config.learning_rate, config.batch_size, int(time.time())) cols_data = ['step', 'train_loss', 'train_accuracy', "avg_grad_w"] with open(cvs_file, 'a') as fd: writer = csv.writer(fd) writer.writerow(cols_data) for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() # Add more code here ... # voncert tensors to device for gpu training batch_inputs = batch_inputs.to(device) batch_targets = batch_targets.to(device) _, model_outputs = model.forward(batch_inputs) loss = criterion(model_outputs, batch_targets) loss.backward() ############################################################################ # QUESTION: what happens here and why? ############################################################################ torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) ############################################################################ # Add more code here ... optimizer.step() loss = loss.item() # fixme accuracy = calc_accuracy(model_outputs, batch_targets) # fixme # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % 10 == 0 and step > 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) csv_data = [step, loss, accuracy] with open(cvs_file, 'a') as fd: writer = csv.writer(fd) writer.writerow(csv_data) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.')
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the model that we are going to use if config.model_type == "RNN": model = VanillaRNN(seq_length=config.input_length, input_dim=config.input_dim, num_hidden=config.num_hidden, batch_size=config.batch_size, num_classes=config.num_classes, device=device) elif config.model_type == "LSTM": model = LSTM(seq_length=config.input_length, input_dim=config.input_dim, num_hidden=config.num_hidden, num_classes=config.num_classes, device=device, batch_size=config.batch_size) # send model to device model.to(device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) # track training statistics train_accuracies = [] train_losses = [] for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() # batch inputs to device for cuda batch_inputs = batch_inputs.to(device) batch_targets = batch_targets.to(device) # convert input batches to tensors on device Ãnput_sequences = torch.tensor(batch_inputs, dtype=torch.float, device=device) targets = torch.tensor(batch_targets, dtype=torch.long, device=device) #print(Ãnput_sequences) #print(targets) # Backward pass # reset gradients optimizer.zero_grad() # Forward pass # Debugging # predict classes for input batches # a = Ãnput_sequences[:, 0].unsqueeze(1) # print(Ãnput_sequences.size()) # print(a.size()) # break # predict input sequences predictions = model.forward(Ãnput_sequences) # accuracy accuracy = torch.div( torch.sum(targets == predictions.argmax(dim=1)).to(torch.float), config.batch_size) # print(accuracy) # backpropagate loss # compute loss per batch loss = criterion(predictions, targets) loss.backward() ############################################################################ # QUESTION: what happens here and why? # --> # ANSWER: Gradients are reinforced at each layer. Thus, very large gradients can appear. This leads to # learning problems. Cutting the gradients to a limit overcomes that issue. ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ # update weights according to optimizer optimizer.step() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) # save stats for each step train_accuracies.append(accuracy) train_losses.append(loss) if step % 10 == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) # If the last 50 accuracies are already 1 (avg=1), stop the training, as convergence is reached and unnecessary # computations dont have to be done avg_accuracies = np.sum(train_accuracies[-50:]) / 50 print(avg_accuracies) if avg_accuracies == 1: print( "\nTraining finished for length: {} after {} steps".format( config.input_length, step)) print("Avg Accuracy : {:.3f}".format(avg_accuracies)) break if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.') return max(train_accuracies), step
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the model that we are going to use if config.model_type == 'RNN': model = VanillaRNN(config.input_length, config.input_dim, \ config.num_hidden, config.num_classes, \ config.batch_size, device=config.device) elif config.model_type == 'LSTM': model = LSTM(config.input_length, config.input_dim, \ config.num_hidden, config.num_classes, \ config.batch_size, device=config.device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate) results = 'palindrome length:' + str(config.input_length + 1) + '\n' for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() # Add more code here ... batch_inputs = batch_inputs.to(device) batch_targets = batch_targets.to(device) optimizer.zero_grad() nn_out = model(batch_inputs) loss = criterion(nn_out, batch_targets) loss.backward() ######################################################################## # QUESTION: what happens here and why? ######################################################################## nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) ######################################################################## optimizer.step() accuracy = torch.sum(nn_out.argmax(dim=1) == batch_targets)\ .to(torch.float) / (config.batch_size) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % 10 == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {},\ Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) results += str(accuracy.item()) + ", " if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.') with open('results/' + str(config.model_type) \ + str(config.input_length + 1) \ + '.txt', 'w') as f: f.write(results)
def train(net, input_length, print_log): # Initialize the model that we are going to use model = net(input_length, INPUT_DIM, NUM_HIDDEN, OUTPUT_DIM, NUM_BATCH_SIZE).cuda() # Initialize the dataset and data loader (leave the +1) dataset = PalindromeDataset(input_length + 1) data_loader = DataLoader(dataset, NUM_BATCH_SIZE, num_workers=1) # Setup the loss and optimizer cross_entropy = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=LEARNING_RATE) # To avoid fluctuation lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=TRAIN_STEPS / 20, gamma=0.96) record_epochs, accs, losses = [], [], [] for step, train_data in enumerate(data_loader): X, y = train_data X, y = X.cuda().float(), y.cuda() y_pred = model(X) loss = cross_entropy(y_pred, y) optimizer.zero_grad() loss.backward() optimizer.step() lr_scheduler.step() # the following line is to deal with exploding gradients torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=MAX_NORM) if step % 10 == 0: correct = 0 total = 0 with torch.no_grad(): for i, test_data in enumerate(data_loader): X, y = test_data X, y = X.cuda().float(), y.cuda() y_pred = model(X) total += y.size(0) _, predicted = torch.max(y_pred.data, 1) correct += (predicted == y).sum().item() if (i + 1) % TEST_SIZE == 0: break acc = round(correct / total, 4) avg_loss = round(loss.item(), 6) record_epochs.append(step) accs.append(acc) losses.append(avg_loss) if print_log: print('step: {}, loss: {}, test acc: {}'.format( step, avg_loss, acc)) if step == TRAIN_STEPS: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break correct = 0 total = 0 with torch.no_grad(): for i, test_data in enumerate(data_loader): X, y = test_data X, y = X.cuda().float(), y.cuda() y_pred = model(X) total += y.size(0) _, predicted = torch.max(y_pred.data, 1) correct += (predicted == y).sum().item() if (i + 1) % TEST_SIZE == 0: break acc = round(correct / total, 4) if print_log: print('Done training.') return record_epochs, accs, losses, acc
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on if config.device == 'best': config.device = 'cuda:0' if torch.cuda.is_available() else 'cpu' device = torch.device(config.device) # Initialize the model that we are going to use if config.model_type == 'RNN': model = VanillaRNN(config.embed_dim, config.num_hidden, \ config.num_classes, device) else: model = LSTM(config.embed_dim, config.num_hidden, \ config.num_classes, device) # Initialize the dataset and data loader dataset = PalindromeDataset(config.input_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = F.cross_entropy optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate) # Track metrics losses = [] losses_last10 = [] accuracies = [] accuracies_last10 = [] for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Transform input to RNN input format (sequence, batch, input) # batch_inputs = batch_inputs.t().unsqueeze(2).to(device=device, dtype=torch.long) batch_inputs = batch_inputs.t().to(device=device, dtype=torch.long) batch_targets = batch_targets.to(device=device, dtype=torch.long) # Only for time measurement of step through network t1 = time.time() # forward pass logits = model.forward(batch_inputs) # backprop optimizer.zero_grad() loss = criterion(logits, batch_targets) loss.backward() ############################################################################ # QUESTION: what happens here and why? ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ optimizer.step() # Compute metrics accuracy = (logits.cpu().argmax(dim=1) == batch_targets.cpu()).numpy().mean() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) # track metrics accuracies_last10.append(accuracy.tolist()) losses_last10.append(loss.tolist()) if step % 10 == 0: message = "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss) print(message) if config.log_path != "": with open(config.log_path, "a") as f: f.write(message + "\n") accuracies.append(np.mean(accuracies_last10)) losses.append(np.mean(losses_last10)) accuracies_last10 = [] losses_last10 = [] # Early stopping criterion: average accuracy over last 1000 iters was lower than the 1000 before that stopping_criterion = len(accuracies) > 200 and \ np.mean(accuracies[-100:]) <= np.mean(accuracies[-200:-100]) if step == config.train_steps or stopping_criterion: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 print('Done training.') return losses, accuracies
def train(): train_on_gpu = torch.cuda.is_available() # Initialize the model that we are going to use model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size) if train_on_gpu: model.cuda() # Initialize the dataset and data loader (leave the +1) dataset = PalindromeDataset(config.input_length+1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) # Adjust learning rate lrs = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.96) for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Add more code here ... if train_on_gpu: batch_inputs, batch_targets = batch_inputs.cuda().float(), batch_targets.cuda() prediction = model(batch_inputs) loss = criterion(prediction, batch_targets) optimizer.zero_grad() loss.backward(retain_graph=True) optimizer.step() lrs.step() # the following line is to deal with exploding gradients torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) if step % 25 == 0: # print acuracy/loss here plot_epoch.append(step) correct = 0 total = 0 with torch.no_grad(): for i, (test_input, test_target) in enumerate(data_loader): if train_on_gpu: test_input, test_target = test_input.cuda(), test_target.cuda() eval_prediction = model(test_input) num = len(test_target) total += num _, target = torch.max(eval_prediction.data, 1) correct_batch = (target == test_target).sum().item() correct += correct_batch if i == len(test_target) - 1: break; accuracy = correct/total test_loss = loss.item() plot_test_accuracy.append(accuracy*100) plot_test_loss.append(test_loss) if step == config.train_steps: break fig1 = plt.subplot(2,1,1) fig2 = plt.subplot(2,1,2) fig1.plot(plot_epoch, plot_test_accuracy, c='red', label='accuracy') fig1.legend() fig2.plot(plot_epoch, plot_test_loss, c='green', label='loss') fig2.legend() plt.show() print('Done training.')
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device(config.device) if config.model_type == 'RNN': model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device) else: model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() output = model.forward(batch_inputs) loss = criterion(output, batch_targets) optimizer.zero_grad() loss.backward() ############################################################################ # QUESTION: It cuts off the gradient so we don't get exploding gradients ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ optimizer.step() loss = loss.item() accuracy = (torch.max(output, 1)[1] == batch_targets).float().mean() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % 10 == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.')
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the model that we are going to use if config.model_type == "RNN": model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden,\ config.num_classes, device=device) elif config.model_type == "LSTM": model = LSTM(config.input_length, config.input_dim, config.num_hidden,\ config.num_classes, device=device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) test_loader = iter(DataLoader(dataset, config.test_size, num_workers=1)) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) results = open(config.out_file, "w+") results.write( "#model_type : {}\n#input_length : {}\n#input_dim : {}\n#num_classes : {}\n#num_hidden : {}\n#batch_size : {}\n#learn_rate : {}\n#train_steps : {}\n#max_norm : {}\n" .format(config.model_type, config.input_length, config.input_dim, config.num_classes, config.num_hidden, config.batch_size, config.learning_rate, config.train_steps, config.max_norm)) results.write("#train_step accuracy loss\n") for step, (batch_inputs, batch_targets) in enumerate(data_loader): batch_inputs = torch.nn.functional.one_hot( batch_inputs.type(torch.LongTensor)).type( torch.FloatTensor).to(device) batch_targets = batch_targets.to(device) # Only for time measurement of step through network t1 = time.time() optimizer.zero_grad() # #for calculating gradients # for timestep in range(config.input_length): # model.zero_grad() # batch_y, hGrad = model(batch_inputs, timestep) #without softmax # #prevent gradients from exploding # torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) # loss = criterion(batch_y, batch_targets) # loss.backward() # results.write("{} {}\n".format(timestep,hGrad.grad.norm())) # print("Done calculating gradients.") # results.close() # return batch_y = model(batch_inputs) #without softmax #prevent gradients from exploding torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) loss = criterion(batch_y, batch_targets) loss.backward() optimizer.step() if step > 0 else 0 #to be able to test initial model # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % config.eval_freq == 0: # predictions = torch.argmax(torch.abs(batch_y),1) #training: smaller batch size than test # accuracy = torch.sum(predictions == batch_targets).type(torch.FloatTensor)/config.batch_size with torch.no_grad(): test_inputs, test_targets = next(test_loader) test_inputs = torch.nn.functional.one_hot( test_inputs.type(torch.LongTensor), config.input_dim).type(torch.FloatTensor).to(device) test_targets = test_targets.to(device) test_y = model(test_inputs) test_loss = criterion(test_y, test_targets) test_predictions = torch.argmax(test_y, 1) test_accuracy = torch.sum( test_predictions == test_targets).type( torch.FloatTensor) / config.test_size # #uncomment for printing # print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " # "Accuracy = {:.2f}, Loss = {:.3f}".format( # datetime.now().strftime("%Y-%m-%d %H:%M"), step, # config.train_steps, config.batch_size, examples_per_second, # test_accuracy, test_loss)) results.write("%d %.3f %.3f\n" % (step, test_accuracy, test_loss)) optimizer.step() if step == 0 else 0 if np.round(test_accuracy, 2) == 1.00: print("Achieved >99.95% accuracy.") break if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.') results.close()
def train(config): np.random.seed(42) torch.manual_seed(42) assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device(config.device) print(device) # Initialize the model that we are going to use if config.model_type=="RNN": print("Training VanillaRNN") print() model = VanillaRNN(config.input_length, config.input_dim,\ config.num_hidden, config.num_classes, config.batch_size, config.device) # fixme else: print("Training LSTM") print() model = LSTM(config.input_length, config.input_dim,\ config.num_hidden, config.num_classes, config.batch_size, config.device) model = model.to(device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length+1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() #fixme if config.optimizer=="adam": optimizer = optim.Adam(model.parameters(), lr = config.learning_rate) # fixme else: optimizer = optim.RMSprop(model.parameters(), lr = config.learning_rate) pl_loss =[] average_loss =[] acc =[] for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() batch_targets = torch.LongTensor(batch_targets) batch_inputs, batch_targets = batch_inputs.to(device), batch_targets.to(device) # zero the parameter gradients model.zero_grad() # Add more code here ... output = model(batch_inputs) out_loss = criterion(output, batch_targets) out_loss.backward() ############################################################################ # QUESTION: what happens here and why? # ANSWER: helps prevent the exploding gradient problem in RNNs / LSTMs. ############################################################################ torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) ############################################################################ optimizer.step() # Add more code here ... loss = out_loss.item() # fixme # get argmax softmax = torch.nn.Softmax(dim=1) predictions = torch.argmax(softmax(output), dim=1) predictions = config.batch_size-len(torch.nonzero(predictions - batch_targets)) accuracy = predictions/config.batch_size # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) pl_loss.append(loss) average_loss.append(np.mean(pl_loss[:-100:-1])) acc.append(accuracy) if step % 10 == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss )) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break # if step%100==0: # # save training loss # plt.plot(pl_loss,'r-', label="Batch loss", alpha=0.5) # plt.plot(average_loss,'g-', label="Average loss", alpha=0.5) # plt.legend() # plt.xlabel("Iterations") # plt.ylabel("Loss") # plt.title("Training Loss") # plt.grid(True) # # plt.show() # plt.savefig(config.optimizer+"_loss_"+config.model_type+"_"+str(config.input_length)+".png") # plt.close() ################################training################################################## # plt.plot(acc,'g-', alpha=0.5) # plt.xlabel("Iterations") # plt.ylabel("Accuracy") # plt.title("Train Accuracy") # plt.grid(True) # plt.savefig("accuracy_"+config.sampling+"_"+str(config.temp)+".png") # plt.close() # fl = config.optimizer+"_acc_"+config.model_type+"_"+str(config.input_length) # np.savez(fl, acc=acc) print('Done training.')
def train(config, pallindrome_length, m): config.input_length = pallindrome_length config.model_type = m assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Initialize the model that we are going to use hyper_params = [ config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device ] model = globals()['Vanilla' + config.model_type]( *hyper_params) if config.model_type == 'RNN' else globals()[ config.model_type](*hyper_params) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) accuracies = [] losses = [] avg_loss = 0 ########## One hot encoding buffer that you create out of the loop and just keep reusing # if config.input_dim != 1: # nb_digits = 10 # x_onehot = torch.FloatTensor(config.batch_size, config.input_length, nb_digits) for step, (batch_inputs, batch_targets) in enumerate(data_loader): batch_inputs = batch_inputs.to(device) batch_targets = batch_targets.to(device) # Only for time measurement of step through network t1 = time.time() torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) optimizer.zero_grad() # Forward pass: ########## Convert input to one-hot: # if config.input_dim != 1: # batch_inputs = batch_inputs.type(torch.LongTensor).view(config.batch_size, config.input_length, 1) # x_onehot.zero_() # x_onehot.scatter_(2, batch_inputs, 1) # y_pred = model.forward(x_onehot) # else: # y_pred = model.forward(batch_inputs) y_pred = model.forward(batch_inputs) loss = criterion.forward(y_pred, batch_targets) #Backward pass loss.backward(retain_graph=True) optimizer.step() accuracy = (y_pred.argmax( dim=1) == batch_targets).float().mean().item() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) # accuracies.append(accuracy) losses.append(loss.item()) if step % 500 == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) accuracies.append(accuracy) if loss < 0.01 or accuracy == 1: break else: avg_loss = np.average(losses) losses = [] if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.') return max(accuracies)
def train(config): # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the model that we are going to use if config.model_type == 'RNN': model = VanillaRNN(config.input_length + 1, config.input_dim, config.num_hidden, config.num_classes, device) elif config.model_type == 'LSTM': model = LSTM(config.input_length + 1, config.input_dim, config.num_hidden, config.num_classes, device) else: print("Unknown model type, please use RNN or LSTM") exit() model.store_hidden = True # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate) accuracies = [] for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() batch_inputs = batch_inputs.to(device) batch_targets = batch_targets.to(device) ############################################################################ # QUESTION: what happens here and why? ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ optimizer.zero_grad() outputs = model(batch_inputs) loss = criterion(outputs, batch_targets) loss.backward() loss = loss.data.item() optimizer.step() outputs = outputs.cpu().detach().numpy() acc = accuracy(outputs, batch_targets.cpu().detach().numpy()) accuracies.append(acc) grads = [ torch.norm(t.grad).cpu().detach() for t in model.hiddenActivity ] # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % 10 == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, acc, loss)) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.') drawPlotMagn( grads, './' + str(config.model_type) + '_len:' + str(config.input_length) + '_lr:' + str(config.learning_rate) + '_grads_over_time.jpg', "Gradients over time steps with " + str(config.model_type), 1)
def train(config): assert config.model_type in ('RNN', 'LSTM') # Print all configs to confirm parameter settings print_flags() # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the model that we are going to use if config.model_type == 'RNN': model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device) else: model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device) model.to(device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay, momentum=config.momentum) # Store some measures best_acc = 0. los = list() iteration = list() tmp_acc = list() acc = list() for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() optimizer.zero_grad() batch_inputs = batch_inputs.to(device) batch_targets = batch_targets.to(device) pred = model(batch_inputs) accuracy = compute_accuracy(pred, batch_targets) tmp_acc.append(accuracy) loss = criterion(pred, batch_targets) loss.backward() ############################################################################ # QUESTION: what happens here and why? ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ optimizer.step() # Just for time measurement t2 = time.time() if not float(t2 - t1) == 0: examples_per_second = config.batch_size / float(t2 - t1) if step % 10 == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) iteration.append(step) acc.append(accuracy) los.append(loss) if accuracy > best_acc: best_acc = accuracy if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.') tmp_acc.sort(reverse=True) avg_acc = sum(tmp_acc[:50]) / 50 print('Average of 50 best accuracies: {}'.format(avg_acc)) with open('result/{}_acc.txt'.format(config.model_type), 'a') as file: file.write('{} {}\n'.format(config.input_length, avg_acc)) file.close() fig, axs = plt.subplots(1, 2, figsize=(10, 5)) axs[0].plot(iteration, acc) axs[0].set_xlabel('Iteration') axs[0].set_ylabel('Accuracy') axs[1].plot(iteration, los) axs[1].set_xlabel('Iteration') axs[1].set_ylabel('Loss') fig.tight_layout() plt.show()
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on if torch.cuda.is_available(): device = 'cuda' else: device = 'cpu' print('Currently using: ', device) # Initialize the model that we are going to use input_length = config.input_length input_dim = config.input_dim num_classes = config.num_classes num_hidden = config.num_hidden batch_size = config.batch_size learning_rate = config.learning_rate if config.model_type == 'RNN': model = VanillaRNN(input_length, input_dim, num_hidden, num_classes , batch_size, device).double() if config.model_type == 'LSTM': model = LSTM(input_length, input_dim, num_hidden, num_classes, batch_size, device).double() model = model.to(device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(inp_len+1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() # fixme optimizer = torch.optim.RMSprop(model.parameters(), lr = learning_rate) # fixme accuracy_list = [] loss_list = [] ## first 100 steps are to generate the test set for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() batch_inputs = batch_inputs.to(device) batch_targets = batch_targets.to(device) output = model.forward(batch_inputs.transpose(0,1).double()) optimizer.zero_grad() output_indices = torch.argmax(output.transpose(0,1), dim=0) loss_for_backward = criterion(output,batch_targets).to(device) loss_for_backward.backward() ############################################################################ # QUESTION: what happens here and why? ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ #print(output.shape) #print(batch_targets.shape) optimizer.step() #loss = criterion.forward(output, batch_targets) correct_indices = output_indices == batch_targets #if step == 4000: # return correct_indices, output_indices, batch_targets, batch_inputs accuracy = int(sum(correct_indices))/int(len(correct_indices)) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) if step % 10 == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss_for_backward )) accuracy_list.append(accuracy) loss_list.append(loss_for_backward) if step == config.train_steps or (len(accuracy_list) > 10 and (sum(accuracy_list[-3:]) /len(accuracy_list[-3:])) == 1.0): # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.') line = ' '.join((str(config.model_type),'Palindrome length:',str(input_length),'Accuracy:',str(accuracy_list),'Loss', str(loss_list))) with open('LSTMMMMM.txt', 'a') as file: file.write(line + '\n')
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on # if GPU was chosen, check if CUDA is available if str(config.device) != "cpu": if not torch.cuda.is_available(): print('\n* GPU was selected but CUDA is not available.\nTraining on CPU ...') device = torch.device("cpu") else: print('\nCUDA is available! Training on GPU ...') device = torch.device(config.device) else: print('\nTraining on GPU ...') device = torch.device(config.device) # Initialize the model that we are going to use if config.model_type == 'RNN': model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device) else: model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device) # Print Configuration print("Model Type: {!s:5} Input Length: {!s:5} Learning Rate: {}\n" .format(config.model_type, config.input_length, config.learning_rate)) # Initialize model model = torch.nn.DataParallel(model).to(device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length+1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) train_loss, train_accuracy, train_steps = [], [], [] # Enable train mode model.train() for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() # move tensors to GPU, if enabled batch_targets = batch_targets.long().to(device) batch_inputs = batch_inputs.to(device) # Forward pass predictions = model(batch_inputs) # Calculate loss loss = criterion(predictions, batch_targets) # Back-propagate loss.backward() ############################################################################ # QUESTION: what happens here and why? # ANSWER: `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. # ref: https://medium.com/usf-msds/deep-learning-best-practices-1-weight-initialization-14e5c0295b94 ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ # Update weights optimizer.step() # Clear weights gradients optimizer.zero_grad() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) if step % 10 == 0: # Store accuracy and loss train_steps.append(step) train_loss.append(loss.item()) train_accuracy.append(accuracy(predictions, batch_targets)) if step % 100 == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, train_accuracy[-1], train_loss[-1])) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 # Save Train and Test accuracies and losses file_name = str(config.model_type) + '_' + str(config.input_length) + '.npz' np.savez(file_name, train_steps=train_steps, train_accuracy=train_accuracy, model_type=config.model_type, input_length=config.input_length) break print('Done training.')
def train(config,n_run): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device(config.device) # Train on T-1 first digits config.input_length = config.input_length - 1 # Initialize the model that we are going to use if config.model_type == 'RNN': model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device=device) elif config.model_type == 'LSTM': model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device=device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length+1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) model.to(device) train_loss = [] train_acc = [] t_loss = [] t_acc = [] #Convergence condition eps = 1e-6 for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Clear stored gradient model.zero_grad() # Only for time measurement of step through network t1 = time.time() # Add more code here ... #Convert inputs and labels into tensors x = torch.tensor(batch_inputs, device=device) y = torch.tensor(batch_targets,device=device) #Forward pass pred = model.forward(x) loss = criterion(pred, y) t_loss.append(loss.item()) optimizer.zero_grad() #Backward pass loss.backward() ############################################################################ # QUESTION: what happens here and why? # ANSWER : the function torch.nn.utils.clip_grad_norm() is used to prevent # exploding gradients by ‘clipping’ the norm of the gradients, to restrain # the gradient values to a certain threshold. This essentially acts as a # limit to the size of the updates of the parameters of every layer, ensuring # that the parameter values don't change too much from their previous values. ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ # Add more code here ... optimizer.step() accuracy = get_accuracy(pred,y, config.batch_size) t_acc.append(accuracy.item()) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) if step % 1000 == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss )) if step % 100 == 0: #Get loss and accuracy averages over 100 steps train_loss.append(np.mean(t_loss)) train_acc.append(np.mean(t_acc)) t_loss = [] t_acc = [] if step > 0 and abs(train_loss[-1] - train_loss[-2]) < eps: break if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('\nDone training.\n') # #Save trained model and results if config.model_type == 'RNN': #save model torch.save(model, "./Results/RNN/" + str(config.input_length) + "_RNN_model") #save train accuracy and loss np.save("./Results/RNN/" + str(config.input_length) + "_RNN_accuracy", train_acc) np.save("./Results/RNN/" + str(config.input_length) + "_RNN_loss", train_loss) # #save model ####################################################################### For SURFsara # torch.save(model, str(config.input_length+1) + "_RNN_model_" + str(n_run)) # #save train accuracy and loss # np.save(str(config.input_length+1) + "_RNN_accuracy_" + str(n_run), train_acc) # np.save(str(config.input_length+1) + "_RNN_loss_" + str(n_run), train_loss) elif config.model_type == 'LSTM': #save model torch.save(model, "./Results/LSTM/" + str(config.input_length) + "_LSTM_model") #save train accuracy and loss np.save("./Results/LSTM/" + str(config.input_length) + "_LSTM_accuracy", train_acc) np.save("./Results/LSTM/" + str(config.input_length) + "_LSTM_loss", train_loss)
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the model that we are going to use if config.model_type == 'RNN': model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device) # fixme else: model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device) print(model) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() # fixme optimizer = torch.optim.RMSprop(model.parameters(), config.learning_rate) # fixme optimizer.zero_grad() for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() # Add more code here ... model_outputs = model.forward(batch_inputs) ############################################################################ # QUESTION: what happens here and why? # This function clips the norm of the gradient to an acceptable level. # It accually puts a limit of the update parameters. ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ # Add more code here ... loss = criterion(torch.t(model_outputs), batch_targets) # fixme accuracy = accuracy_(model_outputs, batch_targets) # fixme optimizer.zero_grad() loss.backward() optimizer.step() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) # writer.add_scalar('accuracy',accuracy,step) # writer.add_scalar('loss',loss,step) # if loss < 0.001: # writer.add_scalar('loss',loss,10000) # writer.add_scalar('accuracy',accuracy,10000) # break if step % 10 == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.')
def train(config): # Initialize the device which to run the model on device = torch.device(config.device) # Setup the model that we are going to use print("Initializing Vanilla RNN model...") model = VanillaRNN( seq_length=config.input_length, input_dim=config.input_dim, num_hidden=config.num_hidden, num_classes=config.num_classes, batch_size=config.batch_size, device=device ) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length+1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer loss_function = torch.nn.NLLLoss() optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate) for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() # Move to GPU batch_inputs = batch_inputs.unsqueeze(-1) # add input dimensionality batch_inputs = batch_inputs.to(device) # [batch_size, seq_length, 1] batch_targets = batch_targets.to(device) # [batch_size] # Reset for next iteration model.zero_grad() # Forward pass log_probs = model(batch_inputs) # Compute the loss, gradients and update network parameters loss = loss_function(log_probs, batch_targets) loss.backward() ############################################################################ # QUESTION: what happens here and why? ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ optimizer.step() predictions = torch.argmax(log_probs, dim=1) correct = (predictions == batch_targets).sum().item() accuracy = correct / log_probs.size(0) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) if step % 10 == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss )) # Check if training is finished if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.')
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on if config.device == 'cuda': if torch.cuda.is_available(): device = torch.device(config.device) else: device = torch.device('cpu') else: device = torch.device(config.device) # Initialize the model that we are going to use if config.model_type == 'RNN': model = VanillaRNN(seq_length=config.input_length, input_dim=config.input_dim, num_hidden=config.num_hidden, num_classes=config.num_classes, batch_size=config.batch_size, device=device) elif config.model_type == 'LSTM': model = LSTM(seq_length=config.input_length, input_dim=config.input_dim, num_hidden=config.num_hidden, num_classes=config.num_classes, batch_size=config.batch_size, device=device) # make the results directory (if it doesn't exist) RESULTS_DIR = Path.cwd() / 'results' RESULTS_DIR.mkdir(parents=True, exist_ok=True) results_filepath = RESULTS_DIR / (model.__class__.__name__ + '.csv') # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) results = { 'T': [], 'step': [], 'accuracy': [], 'loss': [], } for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() # Add more code here ... # send the data to device batch_inputs = batch_inputs.to(device) batch_targets = batch_targets.to(device) # (re)set the optimizer gradient to 0 optimizer.zero_grad() # forward pass the mini-batch pred_targets = model.forward(batch_inputs) loss = criterion.forward(pred_targets, batch_targets) # backwards propogate the loss loss.backward() ############################################################################ # QUESTION: what happens here and why? # clip_grad_norm is deprecated, use clip_grad_norm_ instead ############################################################################ torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) ############################################################################ # Add more code here ... optimizer.step() accuracy = (pred_targets.argmax(dim=1) == batch_targets).float().mean() # append the results results['T'].append(config.input_length) results['step'].append(step) results['accuracy'].append(accuracy.item()) results['loss'].append(loss.item()) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % 1000 == 0: print( f'[{datetime.now().strftime("%Y-%m-%d %H:%M")}] Train Step {step:04d}/{config.train_steps:04d}, Batch Size = {config.batch_size}, Examples/Sec = {examples_per_second:.2f}, Accuracy = {accuracy:.2f}, Loss = {loss:.3f}' ) if step == config.train_steps: results_df = df.from_dict(results) if not results_filepath.exists(): results_df.to_csv(results_filepath, sep=';', mode='w', encoding='utf-8', index=False) else: results_df.to_csv(results_filepath, sep=';', mode='a', header=False, encoding='utf-8', index=False) # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.')
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the model that we are going to use # print(torch.nn.init.constant_(torch.empty(5), 0)) # print(asdasda) # print(torch.nn.Parameter(torch.nn.init.normal_((torch.empty(5, 5))))) if (config.model_type == 'RNN'): model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device=device) # model = model.to(device) else: model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device=device) # model = model.to(device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() # import pdb # pdb.set_trace() optimizer = optim.RMSprop( model.parameters(), lr=config.learning_rate) #, weight_decay=1/(200*9)) # optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) #, weight_decay=1/(200*9)) # optimizer = torch.optim.SGD(model.parameters(), lr=config.learning_rate) accuracies = [] losses = [] for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() # Add more code here ... # print(batch_inputs.shape[1]) # print(sadasd) ############################################################################ # QUESTION: what happens here and why? # Clipping gradients helps prevent exploding gradients (hence clipping) # However it does nothing against vanishing gradients for RNN's # For vanishing gradients LSTMs are useful ############################################################################ ############################################################################ batch_inputs = batch_inputs.to(device) batch_targets = batch_targets.to(device) out = model.forward(batch_inputs) # Add more code here ... # print(out.argmax(dim=1).shape, batch_targets.shape) loss = criterion(out, batch_targets) optimizer.zero_grad() loss.backward() # if (config.model_type == 'RNN'): torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) optimizer.step() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % 100 == 0: compare = (out.argmax(dim=1) == batch_targets) summed = compare.sum().item() accuracy = summed / compare.size()[0] accuracies.append(accuracy) losses.append(loss) print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break plt.plot(accuracies, label='accuracies') plt.plot(losses, label='losses') plt.tight_layout() plt.legend() plt.show() print('Done training.')
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the model that we are going to use if config.model_type == 'RNN': model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, config.device) elif config.model_type == 'LSTM': model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, config.device) else: AssertionError('Models available: RNN, LSTM') # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), config.learning_rate) # keep track of variables accuracy_list = [] loss_list = [] # loop through data (get batches) for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() # get model predictions (USE THE PLAIN INTEGERS FROM THE PALINDROME TO INSERT INTO THE MODEL AS INPUT) predictions = model(batch_inputs) ############################################################################ # QUESTION: what happens here and why? # # it clips the gradient to a border value, to prevent exploding or vanishing # gradients. # ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ # calculate loss loss = criterion(predictions, batch_targets) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() targets = batch_targets.data.numpy() predictions_np = predictions.data.numpy() # get amount of correct predictions correct = 0 for i in range(len(targets)): if targets[i] == np.argmax(predictions_np[i]): correct += 1 # get accuracy accuracy = correct / len(targets) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % 10 == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) # keep track of losses loss_list.append(loss.data.numpy()) # keep track of accuracies accuracy_list.append(accuracy) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break # save variables data = [loss_list, accuracy_list] filename = 'Result_' + config.model_type + '_inputlen_' + str( config.input_length) + '.p' pickle.dump(data, open(filename, 'wb')) print('Done training.')
def train(config, device="cpu"): assert config.model_type in ('RNN', 'LSTM') # Tensorboard summary writer run_id = datetime.now().strftime("%Y-%m-%d_%H-%M-%S_" + config.model_type.lower() + '_' + str(config.input_length)) log_dir = 'tensorboard/' + config.model_type.lower() + '/' + run_id writer = SummaryWriter(log_dir=log_dir) # Torch settings if device == 'cpu': torch.set_default_tensor_type(torch.FloatTensor) elif device == 'cuda:0': torch.set_default_tensor_type(torch.cuda.FloatTensor) dtype = torch.float # Initialize the model that we are going to use if config.model_type == 'RNN': model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device=device).to(device) elif config.model_type == 'LSTM': model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device=device).to(device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) # Accuracy and loss to be saved accuracies = [] losses = [] # Useful for convergence check avg_range = 200 last_accuracy = 0 convergence_threshold = 1e-4 model.train() for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() # Load batches in the GPU batch_inputs = batch_inputs.to(device=device) batch_targets = batch_targets.to(device=device) # Forward pass predictions = model.forward(batch_inputs) # Compute loss loss = criterion(predictions, batch_targets) # Reset gradients before backwards pass optimizer.zero_grad() # Backward pass loss.backward() # Clipping gradients to avoid exploding gradient problem torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) # Update weights optimizer.step() # Compute accuracy accuracy = get_accuracy(predictions, batch_targets) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) # Add accuracy and loss to the writer writer.add_scalars('accuracy_and_loss', { 'acc': accuracy, 'loss': loss }, step) # Store accuracy and loss accuracies.append(accuracy) losses.append(loss) # Print information if step % 100 == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) # Check for convergence if step % avg_range == 0 and step != 0: avg_accuracy = np.mean(accuracies[-avg_range:]) if np.abs(avg_accuracy - last_accuracy) < convergence_threshold: print( "The model has converged with accuracy", avg_accuracy, "(" + ("+" if avg_accuracy > last_accuracy else "-") + str(np.abs(avg_accuracy - last_accuracy)) + ")") break last_accuracy = avg_accuracy if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break save_results(accuracies, losses, run_id, config.model_type, config.input_length, last_accuracy) writer.close() print('Done training. Accuracy:', avg_accuracy)
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device(config.device) final_accuracy = [] seq_list = [] for i in range(30): input_length = config.input_length + i # Initialize the model that we are going to use if config.model_type == 'RNN': model = VanillaRNN(input_length, config.input_dim, config.num_hidden, config.num_classes, device=device) elif config.model_type == 'LSTM': model = LSTM(input_length, config.input_dim, config.num_hidden, config.num_classes, device=device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer optimizer = optim.RMSprop(model.parameters(), config.learning_rate) criterion = nn.CrossEntropyLoss() accuracies = [] losses = [] for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() optimizer.zero_grad() prediction = model(batch_inputs.to(device)) loss = criterion(prediction, batch_targets.to(device)) loss.backward() ############################################################################ # QUESTION: what happens here and why? # this function causes the gradient not to explode ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ optimizer.step() _, predicted = torch.max(prediction, 1) accuracy = (predicted == batch_targets.to(device) ).sum().item() / len(predicted) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) accuracies.append(accuracy * 100) losses.append(loss) if step % 10 == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) if step % 100 == 0: # go on with training if the model predicted with 100% accuracy for the last 100 steps accuracy_average = sum(accuracies[-100:]) / 100 if accuracy_average == 100: break if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break final_accuracy.append(accuracy_average) seq_list.append(input_length) print('Done training.')
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Initialize the model that we are going to use if config.model_type == 'RNN': model = VanillaRNN(seq_length=config.input_length, input_dim=config.input_dim, num_hidden=config.num_hidden, num_classes=config.num_classes, batch_size=config.batch_size, device=device) elif config.model_type == 'LSTM': model = LSTM(seq_length=config.input_length, input_dim=config.input_dim, num_hidden=config.num_hidden, num_classes=config.num_classes, batch_size=config.batch_size, device=device) model.to(device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(params=model.parameters(), lr=config.learning_rate) # evaluation metrics results = [] print_setting(config) for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() batch_inputs = batch_inputs.to(device) batch_targets = batch_targets.to(device) s_inputs = batch_inputs.shape s_targets = batch_targets.shape #forward pass predictions = model.forward(batch_inputs) #compute loss loss = criterion(predictions, batch_targets) #backward pass & updates # set gradients to zero optimizer.zero_grad() loss.backward() ############################################################################ # QUESTION: what happens here and why? # Prevents exploding gradients by rescaling to a limit specified by config.max_norm # Forcing gradients to be within a certain norm to ensure reasonable updates ############################################################################ torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) ############################################################################ optimizer.step() accuracy = (predictions.argmax(dim=1) == batch_targets).sum().float() / (config.batch_size) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % config.eval_freq == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) #l = loss.float().item() results.append([step, accuracy.item(), loss.float().item()]) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training. \n') return results
X[:, t, :].view(1, -1, 1), X[:, t, :].view(1, -1, 1), X[:, t, :].view(1, -1, 1), X[:, t, :].view(1, -1, 1) ], dim=0) # term1 = torch.einsum('kij,bij->kji', X_in, self.W_x) # kij,kij->kij , kij,kjl->kil tmm1 = torch.einsum('kij,kjl->kil', X_t, self.W_x) tmm2 = torch.einsum('ij,kjl->kil', h_t, self.W_h) gates_t = self.activation_gates(tmm1 + tmm2 + self.bias_gates) c_t = gates_t[0, :, :] * gates_t[1, :, :] + c_t * gates_t[2, :, :] h_t = self.activation_hidden(c_t) * gates_t[3, :, :] return torch.matmul(h_t, self.W_p) + self.bias_p if __name__ == '__main__': palindrom_generator = PalindromeDataset(3) pali = palindrom_generator.generate_palindrome() print(pali) ############################### Comment out Before Submission ####################### # defaults config = { 'model_type': 'LSTM', 'seq_length': 5, 'input_length': 10, 'input_dim': 1, 'num_classes': 10, 'num_hidden': 100, 'batch_size': 128, 'learning_rate': 0.001, 'train_steps': 10000,