def train(config): # Initialize the model that we are going to use model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size) # fixme # Initialize the dataset and data loader (leave the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() # fixme optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) # fixme for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Add more code here ... hit = 0 n, dim = batch_inputs.size() batch_inputs_T = torch.transpose(batch_inputs, 0, 1) # print(batch_inputs_T.size()) y_hat_oh = model.forward(batch_inputs_T) for i in range(n): y_pre, _ = max(enumerate(y_hat_oh[i]), key=itemgetter(1)) y = batch_targets[i].item() # print(y_pre, y) if y_pre == y: hit += 1 # print("/////////") # the following line is to deal with exploding gradients torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) # Add more code here ... loss = criterion(y_hat_oh, batch_targets) # fixme accuracy = hit / n * 100 # fixme optimizer.zero_grad() loss.backward() optimizer.step() if step % 10 == 0: print("loss: ", loss.item()) print("accuracy: ", accuracy) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.')
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the model that we are going to use # print(torch.nn.init.constant_(torch.empty(5), 0)) # print(asdasda) # print(torch.nn.Parameter(torch.nn.init.normal_((torch.empty(5, 5))))) if (config.model_type == 'RNN'): model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device=device) # model = model.to(device) else: model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device=device) # model = model.to(device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() # import pdb # pdb.set_trace() optimizer = optim.RMSprop( model.parameters(), lr=config.learning_rate) #, weight_decay=1/(200*9)) # optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) #, weight_decay=1/(200*9)) # optimizer = torch.optim.SGD(model.parameters(), lr=config.learning_rate) accuracies = [] losses = [] for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() # Add more code here ... # print(batch_inputs.shape[1]) # print(sadasd) ############################################################################ # QUESTION: what happens here and why? # Clipping gradients helps prevent exploding gradients (hence clipping) # However it does nothing against vanishing gradients for RNN's # For vanishing gradients LSTMs are useful ############################################################################ ############################################################################ batch_inputs = batch_inputs.to(device) batch_targets = batch_targets.to(device) out = model.forward(batch_inputs) # Add more code here ... # print(out.argmax(dim=1).shape, batch_targets.shape) loss = criterion(out, batch_targets) optimizer.zero_grad() loss.backward() # if (config.model_type == 'RNN'): torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) optimizer.step() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % 100 == 0: compare = (out.argmax(dim=1) == batch_targets) summed = compare.sum().item() accuracy = summed / compare.size()[0] accuracies.append(accuracy) losses.append(loss) print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break plt.plot(accuracies, label='accuracies') plt.plot(losses, label='losses') plt.tight_layout() plt.legend() plt.show() print('Done training.')
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on if torch.cuda.is_available(): device = 'cuda' else: device = 'cpu' print('Currently using: ', device) # Initialize the model that we are going to use input_length = config.input_length input_dim = config.input_dim num_classes = config.num_classes num_hidden = config.num_hidden batch_size = config.batch_size learning_rate = config.learning_rate if config.model_type == 'RNN': model = VanillaRNN(input_length, input_dim, num_hidden, num_classes , batch_size, device).double() if config.model_type == 'LSTM': model = LSTM(input_length, input_dim, num_hidden, num_classes, batch_size, device).double() model = model.to(device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(inp_len+1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() # fixme optimizer = torch.optim.RMSprop(model.parameters(), lr = learning_rate) # fixme accuracy_list = [] loss_list = [] ## first 100 steps are to generate the test set for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() batch_inputs = batch_inputs.to(device) batch_targets = batch_targets.to(device) output = model.forward(batch_inputs.transpose(0,1).double()) optimizer.zero_grad() output_indices = torch.argmax(output.transpose(0,1), dim=0) loss_for_backward = criterion(output,batch_targets).to(device) loss_for_backward.backward() ############################################################################ # QUESTION: what happens here and why? ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ #print(output.shape) #print(batch_targets.shape) optimizer.step() #loss = criterion.forward(output, batch_targets) correct_indices = output_indices == batch_targets #if step == 4000: # return correct_indices, output_indices, batch_targets, batch_inputs accuracy = int(sum(correct_indices))/int(len(correct_indices)) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) if step % 10 == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss_for_backward )) accuracy_list.append(accuracy) loss_list.append(loss_for_backward) if step == config.train_steps or (len(accuracy_list) > 10 and (sum(accuracy_list[-3:]) /len(accuracy_list[-3:])) == 1.0): # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.') line = ' '.join((str(config.model_type),'Palindrome length:',str(input_length),'Accuracy:',str(accuracy_list),'Loss', str(loss_list))) with open('LSTMMMMM.txt', 'a') as file: file.write(line + '\n')
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the model that we are going to use if config.model_type == "RNN": model = VanillaRNN(seq_length=config.input_length, input_dim=config.input_dim, num_hidden=config.num_hidden, batch_size=config.batch_size, num_classes=config.num_classes, device=device) elif config.model_type == "LSTM": model = LSTM(seq_length=config.input_length, input_dim=config.input_dim, num_hidden=config.num_hidden, num_classes=config.num_classes, device=device, batch_size=config.batch_size) # send model to device model.to(device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) # track training statistics train_accuracies = [] train_losses = [] for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() # batch inputs to device for cuda batch_inputs = batch_inputs.to(device) batch_targets = batch_targets.to(device) # convert input batches to tensors on device ínput_sequences = torch.tensor(batch_inputs, dtype=torch.float, device=device) targets = torch.tensor(batch_targets, dtype=torch.long, device=device) #print(ínput_sequences) #print(targets) # Backward pass # reset gradients optimizer.zero_grad() # Forward pass # Debugging # predict classes for input batches # a = ínput_sequences[:, 0].unsqueeze(1) # print(ínput_sequences.size()) # print(a.size()) # break # predict input sequences predictions = model.forward(ínput_sequences) # accuracy accuracy = torch.div( torch.sum(targets == predictions.argmax(dim=1)).to(torch.float), config.batch_size) # print(accuracy) # backpropagate loss # compute loss per batch loss = criterion(predictions, targets) loss.backward() ############################################################################ # QUESTION: what happens here and why? # --> # ANSWER: Gradients are reinforced at each layer. Thus, very large gradients can appear. This leads to # learning problems. Cutting the gradients to a limit overcomes that issue. ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ # update weights according to optimizer optimizer.step() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) # save stats for each step train_accuracies.append(accuracy) train_losses.append(loss) if step % 10 == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) # If the last 50 accuracies are already 1 (avg=1), stop the training, as convergence is reached and unnecessary # computations dont have to be done avg_accuracies = np.sum(train_accuracies[-50:]) / 50 print(avg_accuracies) if avg_accuracies == 1: print( "\nTraining finished for length: {} after {} steps".format( config.input_length, step)) print("Avg Accuracy : {:.3f}".format(avg_accuracies)) break if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.') return max(train_accuracies), step
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on if config.device == 'cuda': if torch.cuda.is_available(): device = torch.device(config.device) else: device = torch.device('cpu') else: device = torch.device(config.device) # Initialize the model that we are going to use if config.model_type == 'RNN': model = VanillaRNN(seq_length=config.input_length, input_dim=config.input_dim, num_hidden=config.num_hidden, num_classes=config.num_classes, batch_size=config.batch_size, device=device) elif config.model_type == 'LSTM': model = LSTM(seq_length=config.input_length, input_dim=config.input_dim, num_hidden=config.num_hidden, num_classes=config.num_classes, batch_size=config.batch_size, device=device) # make the results directory (if it doesn't exist) RESULTS_DIR = Path.cwd() / 'results' RESULTS_DIR.mkdir(parents=True, exist_ok=True) results_filepath = RESULTS_DIR / (model.__class__.__name__ + '.csv') # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) results = { 'T': [], 'step': [], 'accuracy': [], 'loss': [], } for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() # Add more code here ... # send the data to device batch_inputs = batch_inputs.to(device) batch_targets = batch_targets.to(device) # (re)set the optimizer gradient to 0 optimizer.zero_grad() # forward pass the mini-batch pred_targets = model.forward(batch_inputs) loss = criterion.forward(pred_targets, batch_targets) # backwards propogate the loss loss.backward() ############################################################################ # QUESTION: what happens here and why? # clip_grad_norm is deprecated, use clip_grad_norm_ instead ############################################################################ torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) ############################################################################ # Add more code here ... optimizer.step() accuracy = (pred_targets.argmax(dim=1) == batch_targets).float().mean() # append the results results['T'].append(config.input_length) results['step'].append(step) results['accuracy'].append(accuracy.item()) results['loss'].append(loss.item()) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % 1000 == 0: print( f'[{datetime.now().strftime("%Y-%m-%d %H:%M")}] Train Step {step:04d}/{config.train_steps:04d}, Batch Size = {config.batch_size}, Examples/Sec = {examples_per_second:.2f}, Accuracy = {accuracy:.2f}, Loss = {loss:.3f}' ) if step == config.train_steps: results_df = df.from_dict(results) if not results_filepath.exists(): results_df.to_csv(results_filepath, sep=';', mode='w', encoding='utf-8', index=False) else: results_df.to_csv(results_filepath, sep=';', mode='a', header=False, encoding='utf-8', index=False) # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.')
def train(config,n_run): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device(config.device) # Train on T-1 first digits config.input_length = config.input_length - 1 # Initialize the model that we are going to use if config.model_type == 'RNN': model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device=device) elif config.model_type == 'LSTM': model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device=device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length+1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) model.to(device) train_loss = [] train_acc = [] t_loss = [] t_acc = [] #Convergence condition eps = 1e-6 for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Clear stored gradient model.zero_grad() # Only for time measurement of step through network t1 = time.time() # Add more code here ... #Convert inputs and labels into tensors x = torch.tensor(batch_inputs, device=device) y = torch.tensor(batch_targets,device=device) #Forward pass pred = model.forward(x) loss = criterion(pred, y) t_loss.append(loss.item()) optimizer.zero_grad() #Backward pass loss.backward() ############################################################################ # QUESTION: what happens here and why? # ANSWER : the function torch.nn.utils.clip_grad_norm() is used to prevent # exploding gradients by ‘clipping’ the norm of the gradients, to restrain # the gradient values to a certain threshold. This essentially acts as a # limit to the size of the updates of the parameters of every layer, ensuring # that the parameter values don't change too much from their previous values. ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ # Add more code here ... optimizer.step() accuracy = get_accuracy(pred,y, config.batch_size) t_acc.append(accuracy.item()) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) if step % 1000 == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss )) if step % 100 == 0: #Get loss and accuracy averages over 100 steps train_loss.append(np.mean(t_loss)) train_acc.append(np.mean(t_acc)) t_loss = [] t_acc = [] if step > 0 and abs(train_loss[-1] - train_loss[-2]) < eps: break if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('\nDone training.\n') # #Save trained model and results if config.model_type == 'RNN': #save model torch.save(model, "./Results/RNN/" + str(config.input_length) + "_RNN_model") #save train accuracy and loss np.save("./Results/RNN/" + str(config.input_length) + "_RNN_accuracy", train_acc) np.save("./Results/RNN/" + str(config.input_length) + "_RNN_loss", train_loss) # #save model ####################################################################### For SURFsara # torch.save(model, str(config.input_length+1) + "_RNN_model_" + str(n_run)) # #save train accuracy and loss # np.save(str(config.input_length+1) + "_RNN_accuracy_" + str(n_run), train_acc) # np.save(str(config.input_length+1) + "_RNN_loss_" + str(n_run), train_loss) elif config.model_type == 'LSTM': #save model torch.save(model, "./Results/LSTM/" + str(config.input_length) + "_LSTM_model") #save train accuracy and loss np.save("./Results/LSTM/" + str(config.input_length) + "_LSTM_accuracy", train_acc) np.save("./Results/LSTM/" + str(config.input_length) + "_LSTM_loss", train_loss)
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device(config.device) if config.model_type == 'RNN': model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device) else: model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() output = model.forward(batch_inputs) loss = criterion(output, batch_targets) optimizer.zero_grad() loss.backward() ############################################################################ # QUESTION: It cuts off the gradient so we don't get exploding gradients ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ optimizer.step() loss = loss.item() accuracy = (torch.max(output, 1)[1] == batch_targets).float().mean() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % 10 == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.')
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the model that we are going to use if (config.model_type == 'RNN'): model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device) else: model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = optim.RMSprop( model.parameters(), lr=config.learning_rate ) #, alpha=0.99, eps=1e-8, weight_decay=0, momentum=0, centered=False) accuracies = [] losses = [] old_loss = float('inf') for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() batch_inputs = batch_inputs[ ..., None] # need to add this because input is a number batch_inputs = batch_inputs.to(device) batch_targets = batch_targets.to(device) batch_predictions = model.forward(batch_inputs) loss = criterion(batch_predictions, batch_targets) losses.append(loss.item()) model.zero_grad() #should we do this?? loss.backward() torch.nn.utils.clip_grad_norm( model.parameters(), max_norm=config.max_norm) #prevents maximum gradient problem optimizer.step() #before or after clip_grad_norm? accuracy = accuracy_(batch_predictions, batch_targets) accuracies.append(accuracy) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % 10 == 0: with open(config.save_logs, 'a') as file: file.write( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, Accuracy = {:.2f}, Loss = {:.3f}" .format(datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss) + '\n') if step == config.train_steps or old_loss == loss.item( ): # stop if two consecutive losses remain consistent # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break old_loss = loss.item() print('Done training.') return losses, accuracies
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the model that we are going to use if config.model_type == 'RNN': model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device) # fixme else: model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device) print(model) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() # fixme optimizer = torch.optim.RMSprop(model.parameters(), config.learning_rate) # fixme optimizer.zero_grad() for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() # Add more code here ... model_outputs = model.forward(batch_inputs) ############################################################################ # QUESTION: what happens here and why? # This function clips the norm of the gradient to an acceptable level. # It accually puts a limit of the update parameters. ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ # Add more code here ... loss = criterion(torch.t(model_outputs), batch_targets) # fixme accuracy = accuracy_(model_outputs, batch_targets) # fixme optimizer.zero_grad() loss.backward() optimizer.step() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) # writer.add_scalar('accuracy',accuracy,step) # writer.add_scalar('loss',loss,step) # if loss < 0.001: # writer.add_scalar('loss',loss,10000) # writer.add_scalar('accuracy',accuracy,10000) # break if step % 10 == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.')
def train(config): #print parameters print_config(config) config.model_type = config.model_type.lower() assert config.model_type in ('rnn', 'lstm', 'rrn') # Initialize the device which to run the model on wanted_device = config.device.lower() if wanted_device == 'cuda': #check if cuda is available device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') else: #cpu is the standard option device = torch.device('cpu') # Initialize the model that we are going to use if config.model_type == 'rnn': model = VanillaRNN(seq_length = config.input_length, input_dim = config.input_dim, num_hidden = config.num_hidden, num_classes = config.num_classes, batch_size = config.batch_size, device = device) elif config.model_type == 'lstm': model = LSTM(seq_length = config.input_length, input_dim = config.input_dim, num_hidden = config.num_hidden, num_classes = config.num_classes, batch_size = config.batch_size, device = device) elif config.model_type == 'rrn': model = RRN(seq_length = config.input_length, input_dim = config.input_dim, num_hidden = config.num_hidden, num_classes = config.num_classes, batch_size = config.batch_size, device = device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length+1) data_loader = DataLoader(dataset, config.batch_size, num_workers=0) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) #keep stats train_acc = np.zeros(config.train_steps+1) first_best_acc = 0 acc_MA = 0 for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() #batches to torch tensors x = torch.tensor(batch_inputs, dtype=torch.float, device=device) y_true = torch.tensor(batch_targets, dtype=torch.long, device=device) #Forward pass y_pred = model.forward(x) loss = criterion(y_pred, y_true) #Backward pass optimizer.zero_grad() loss.backward() ############################################################################ # QUESTION: what happens here and why? # clip_grad_norm() is a method to avoid exploding gradients. It clips # gradients above max_norm to max_norm. #Deprecated, use clip_grad_norm_() instead ############################################################################ torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) ############################################################################ optimizer.step() train_acc[step] = accuracy(y_pred, y_true, config) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/(float(t2-t1) + 1e-6) if step % config.print_every == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, train_acc[step], loss )) print(f"x: {x[0,:]}, y_pred: {y_pred[0,:].argmax()}, y_true: {y_true[0]}") acc_MA = train_acc[step-4:step+1].sum()/5 if step == config.train_steps or acc_MA == 1.0: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.') #Save the final model torch.save(model, config.model_type + "_model.pt") np.save("train_acc_" + config.model_type + str(config.input_length), train_acc) if config.experiment: stats = {} stats["last acc"] = train_acc[-1] first_best_acc = np.argmax(train_acc) stats["best acc"] = train_acc[first_best_acc] stats["step best acc"] = first_best_acc stats["num steps"] = len(train_acc) stats["accs"] = train_acc return stats
optimizer = torch.optim.RMSprop(model.parameters(), lr=lr, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False ) print('start training') for step, tpl in enumerate(data_loader): (batch_inputs, batch_targets) = tpl # Only for time measurement of step through network t1 = time.time() # Add more code here ... #tensor_input = torch.Tensor(batch_inputs, dtype=torch.float, device=device) #tensor_targets = torch.Tensor(batch_targets, dtype=torch.long, device=device) batch_targets = batch_targets.to(device) output = model.forward(batch_inputs) loss = criterion(output, batch_targets) accuracy = acc(output, batch_targets) optimizer.zero_grad() loss.backward() ############################################################################ # QUESTION: what happens here and why? # ANSWER: It scales the gradient. With each layer backtracked the gradiend gets amplified. # This can result in an exploding gradient. # To avoid this, the gradient is clipped to the max_norm ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=max_norm) ############################################################################
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the model that we are going to use settings = [config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device] model = VanillaRNN(*settings) if config.model_type=='RNN' else LSTM(*settings) # print("model params:", list(model.parameters())) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length+1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() # Add more code here ... ############################################################################ # QUESTION: what happens here and why? # - Gradients are clipped according to the given threshold to prevent # exploding gradients ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ # Add more code here ... predictions = model.forward(batch_inputs) loss = criterion(predictions, batch_targets) accuracy = float((predictions.argmax(dim=1) == batch_targets.long()).sum())/float(batch_targets.shape[0]) # print("acc", accuracy) loss.backward() optimizer.step() # Just for time measurement t2 = time.time() # examples_per_second = config.batch_size/float(t2-t1) examples_per_second = 0.0 if step % 10 == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss.item() )) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.')
def train(config): assert config.model_type in ('RNN', 'LSTM') tol = 0. # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the model that we are going to use if config.model_type == 'RNN': model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device) else: model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate) accuracies = [0, 1] losses = [0, 1] if config.quite: bar = tqdm(total=config.train_steps) for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() batch_inputs = batch_inputs[..., None] batch_inputs.to(device) batch_targets.to(device) # FORWARD, BACKWARD, AND STEP out = model.forward(batch_inputs) model.zero_grad() loss = criterion(out, batch_targets) loss.backward() ############################################################################ # QUESTION: what happens here and why? ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ optimizer.step() # Add more code here ... accuracy = (out.argmax(dim=1) == batch_targets.long()).float().mean() losses.append(loss.item()) accuracies.append(accuracy.item()) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % 10 == 0 and not config.quite: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracies[-1], losses[-1])) if config.quite: bar.update() if step == config.train_steps or np.isclose(losses[-1], losses[-2], tol): # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.') return accuracies[2:], losses[2:]
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the model that we are going to use device = torch.device(config.device) # fixme if config.model_type == 'RNN': model = VanillaRNN(seq_length=config.input_length, input_dim=config.input_dim, num_hidden=config.num_hidden, num_classes=config.num_classes, batch_size=config.batch_size, device=config.device).to(config.device) # fixme elif config.model_type == 'LSTM': model = LSTM(seq_length=config.input_length, input_dim=config.input_dim, num_hidden=config.num_hidden, num_classes=config.num_classes, batch_size=config.batch_size, device=config.device).to(config.device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() # fixme optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) # fixme for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() # Add more code here ... ############################################################################ # QUESTION: what happens here and why? """This function is used to bound the norm of the gradient within certain threshold (specified by the max\_norm argument), this technique can help with the problem of exploding gradients and makes training stable (avoid too large steps when update parameters ).""" ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ # Add more code here ... batch_targets = batch_targets.to(config.device) y_pred = model.forward(batch_inputs) loss = criterion(y_pred, batch_targets) optimizer.zero_grad() loss.backward() optimizer.step() accuracy = acc(y_pred, batch_targets) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % 10 == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break acc_test = [] for i in range(10): (tr, te) = next(iter(data_loader)) y_pred = model.forward(tr) acc_test.append(acc(y_pred, te)) print('FINAL TEST ACCURACY: ', np.mean(acc_test)) print('Done training.')
def train(config, device="cpu"): assert config.model_type in ('RNN', 'LSTM') # Tensorboard summary writer run_id = datetime.now().strftime("%Y-%m-%d_%H-%M-%S_" + config.model_type.lower() + '_' + str(config.input_length)) log_dir = 'tensorboard/' + config.model_type.lower() + '/' + run_id writer = SummaryWriter(log_dir=log_dir) # Torch settings if device == 'cpu': torch.set_default_tensor_type(torch.FloatTensor) elif device == 'cuda:0': torch.set_default_tensor_type(torch.cuda.FloatTensor) dtype = torch.float # Initialize the model that we are going to use if config.model_type == 'RNN': model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device=device).to(device) elif config.model_type == 'LSTM': model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device=device).to(device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) # Accuracy and loss to be saved accuracies = [] losses = [] # Useful for convergence check avg_range = 200 last_accuracy = 0 convergence_threshold = 1e-4 model.train() for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() # Load batches in the GPU batch_inputs = batch_inputs.to(device=device) batch_targets = batch_targets.to(device=device) # Forward pass predictions = model.forward(batch_inputs) # Compute loss loss = criterion(predictions, batch_targets) # Reset gradients before backwards pass optimizer.zero_grad() # Backward pass loss.backward() # Clipping gradients to avoid exploding gradient problem torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) # Update weights optimizer.step() # Compute accuracy accuracy = get_accuracy(predictions, batch_targets) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) # Add accuracy and loss to the writer writer.add_scalars('accuracy_and_loss', { 'acc': accuracy, 'loss': loss }, step) # Store accuracy and loss accuracies.append(accuracy) losses.append(loss) # Print information if step % 100 == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) # Check for convergence if step % avg_range == 0 and step != 0: avg_accuracy = np.mean(accuracies[-avg_range:]) if np.abs(avg_accuracy - last_accuracy) < convergence_threshold: print( "The model has converged with accuracy", avg_accuracy, "(" + ("+" if avg_accuracy > last_accuracy else "-") + str(np.abs(avg_accuracy - last_accuracy)) + ")") break last_accuracy = avg_accuracy if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break save_results(accuracies, losses, run_id, config.model_type, config.input_length, last_accuracy) writer.close() print('Done training. Accuracy:', avg_accuracy)
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on if config.device == 'best': config.device = 'cuda:0' if torch.cuda.is_available() else 'cpu' device = torch.device(config.device) # Initialize the model that we are going to use if config.model_type == 'RNN': model = VanillaRNN(config.embed_dim, config.num_hidden, \ config.num_classes, device) else: model = LSTM(config.embed_dim, config.num_hidden, \ config.num_classes, device) # Initialize the dataset and data loader dataset = PalindromeDataset(config.input_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = F.cross_entropy optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate) # Track metrics losses = [] losses_last10 = [] accuracies = [] accuracies_last10 = [] for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Transform input to RNN input format (sequence, batch, input) # batch_inputs = batch_inputs.t().unsqueeze(2).to(device=device, dtype=torch.long) batch_inputs = batch_inputs.t().to(device=device, dtype=torch.long) batch_targets = batch_targets.to(device=device, dtype=torch.long) # Only for time measurement of step through network t1 = time.time() # forward pass logits = model.forward(batch_inputs) # backprop optimizer.zero_grad() loss = criterion(logits, batch_targets) loss.backward() ############################################################################ # QUESTION: what happens here and why? ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ optimizer.step() # Compute metrics accuracy = (logits.cpu().argmax(dim=1) == batch_targets.cpu()).numpy().mean() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) # track metrics accuracies_last10.append(accuracy.tolist()) losses_last10.append(loss.tolist()) if step % 10 == 0: message = "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss) print(message) if config.log_path != "": with open(config.log_path, "a") as f: f.write(message + "\n") accuracies.append(np.mean(accuracies_last10)) losses.append(np.mean(losses_last10)) accuracies_last10 = [] losses_last10 = [] # Early stopping criterion: average accuracy over last 1000 iters was lower than the 1000 before that stopping_criterion = len(accuracies) > 200 and \ np.mean(accuracies[-100:]) <= np.mean(accuracies[-200:-100]) if step == config.train_steps or stopping_criterion: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 print('Done training.') return losses, accuracies
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Initialize the model that we are going to use if config.model_type == 'RNN': model = VanillaRNN(seq_length=config.input_length, input_dim=config.input_dim, num_hidden=config.num_hidden, num_classes=config.num_classes, batch_size=config.batch_size, device=device) elif config.model_type == 'LSTM': model = LSTM(seq_length=config.input_length, input_dim=config.input_dim, num_hidden=config.num_hidden, num_classes=config.num_classes, batch_size=config.batch_size, device=device) model.to(device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(params=model.parameters(), lr=config.learning_rate) # evaluation metrics results = [] print_setting(config) for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() batch_inputs = batch_inputs.to(device) batch_targets = batch_targets.to(device) s_inputs = batch_inputs.shape s_targets = batch_targets.shape #forward pass predictions = model.forward(batch_inputs) #compute loss loss = criterion(predictions, batch_targets) #backward pass & updates # set gradients to zero optimizer.zero_grad() loss.backward() ############################################################################ # QUESTION: what happens here and why? # Prevents exploding gradients by rescaling to a limit specified by config.max_norm # Forcing gradients to be within a certain norm to ensure reasonable updates ############################################################################ torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) ############################################################################ optimizer.step() accuracy = (predictions.argmax(dim=1) == batch_targets).sum().float() / (config.batch_size) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % config.eval_freq == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) #l = loss.float().item() results.append([step, accuracy.item(), loss.float().item()]) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training. \n') return results
def train(config): assert config.model_type in ('RNN', 'LSTM') # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the model that we are going to use if config.model_type == 'RNN': model = VanillaRNN(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device).to(device) else: model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device).to(device) # Initialize the dataset and data loader (note the +1) dataset = PalindromeDataset(config.input_length + 1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss().to(device) # fixme optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate) # fixme for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() # Reshape the tensors to their correct shape, cast to a device # (cpu/gpu) and compute the model's output. batch_inputs = batch_inputs.unsqueeze(-1).to(device) batch_targets = batch_targets.to(device) output = model.forward(batch_inputs) # Compute the loss and the gradients. loss = criterion(output, batch_targets) loss.backward() ############################################################################ # QUESTION: what happens here and why? ############################################################################ torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) ############################################################################ # Add more code here ... optimizer.step() optimizer.zero_grad() loss = loss.item() # fixme accuracy = ( output.argmax(1) == batch_targets).float().mean().item() # fixme # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % 10 == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) if step == config.train_steps or f"{loss:.3f}" == "0.000": with open("results.csv", 'a') as file: file.write( f"{config.input_length};{accuracy};{config.model_type}\n") # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.')