def load_nlu_model(self, model_path): """ load the trained NLU model """ model_params = pickle.load(open(model_path, 'rb')) hidden_size = model_params['model']['Wd'].shape[0] output_size = model_params['model']['Wd'].shape[1] if model_params['params']['model'] == 'lstm': # lstm_ input_size = model_params['model']['WLSTM'].shape[ 0] - hidden_size - 1 rnnmodel = lstm(input_size, hidden_size, output_size) elif model_params['params']['model'] == 'bi_lstm': # bi_lstm input_size = model_params['model']['WLSTM'].shape[ 0] - hidden_size - 1 rnnmodel = biLSTM(input_size, hidden_size, output_size) rnnmodel.model = copy.deepcopy(model_params['model']) self.model = rnnmodel self.word_dict = copy.deepcopy(model_params['word_dict']) self.slot_dict = copy.deepcopy(model_params['slot_dict']) self.act_dict = copy.deepcopy(model_params['act_dict']) self.tag_set = copy.deepcopy(model_params['tag_set']) self.params = copy.deepcopy(model_params['params']) self.inverse_tag_dict = { self.tag_set[k]: k for k in self.tag_set.keys() }
def train(config, seed=0, seq_length=0): np.random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False if seq_length != 0: config.input_length = seq_length # Initialize tensorboard writer # writer = SummaryWriter() # Initialize the device which to run the model on device = torch.device(config.device) print(device) # Load dataset if config.dataset == 'randomcomb': print('Load random combinations dataset ...') # Initialize the dataset and data loader config.num_classes = config.input_length dataset = datasets.RandomCombinationsDataset(config.input_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=1, drop_last=True) elif config.dataset == 'bss': print('Load bss dataset ...') # Initialize the dataset and data loader config.num_classes = 2 config.input_dim = 3 dataset = datasets.BaumSweetSequenceDataset(config.input_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=1, drop_last=True) config.input_length = 4 * config.input_length elif config.dataset == 'bipalindrome': print('Load binary palindrome dataset ...') # Initialize the dataset and data loader config.num_classes = config.input_length dataset = datasets.BinaryPalindromeDataset(config.input_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=1, drop_last=True) config.input_length = config.input_length * 4 + 2 - 1 # Setup the model that we are going to use if config.model_type == 'LSTM': print("Initializing LSTM model ...") model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device).to(device) elif config.model_type == 'biLSTM': print("Initializing bidirectional LSTM model...") model = biLSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device).to(device) elif config.model_type == 'GRU': print("Initializing GRU model ...") model = GRU(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device).to(device) elif config.model_type == 'peepLSTM': print("Initializing peephole LSTM model ...") model = peepLSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device).to(device) # Setup the loss and optimizer loss_function = torch.nn.NLLLoss() optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) loss_history = [] acc_history = [] for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() # Move to GPU batch_inputs = batch_inputs.to(device) # [batch_size, seq_length,1] batch_targets = batch_targets.to(device) # [batch_size] # Reset for next iteration model.zero_grad() # Forward pass log_probs = model(batch_inputs) # print('log', log_probs.size()) # print('batch', batch_targets.size) # Compute the loss, gradients and update network parameters loss = loss_function(log_probs, batch_targets) loss.backward() ####################################################################### # Check for yourself: what happens here and why? ####################################################################### torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) ####################################################################### optimizer.step() predictions = torch.argmax(log_probs, dim=1) correct = (predictions == batch_targets).sum().item() accuracy = correct / log_probs.size(0) loss_history.append(loss.item()) acc_history.append(accuracy) if step % 200 == 0: print('\nLoss:', loss.item()) print('Acc:', accuracy) # writer.add_scalar("Loss", loss, step) # writer.add_scalar("Accuracy", accuracy, step) # print(predictions[0, ...], batch_targets[0, ...]) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % 60 == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, \ Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) # Check if training is finished if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report # https://github.com/pytorch/pytorch/pull/9655 break # writer.flush() # writer.close() print(f'Done training with seed {seed} and seq_length {seq_length}') print('Final loss:', loss_history[-1]) print('Final acc:', acc_history[-1]) return loss_history, acc_history
def train(config): #np.random.seed(24) #torch.manual_seed(24) # Initialize the device which to run the model on device = torch.device(config.device) print(device) # Load dataset if config.dataset == 'randomcomb': print('Load random combinations dataset ...') # Initialize the dataset and data loader config.num_classes = config.input_length dataset = datasets.RandomCombinationsDataset(config.input_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=1, drop_last=True) elif config.dataset == 'bss': print('Load bss dataset ...') # Initialize the dataset and data loader config.num_classes = 2 config.input_dim = 3 dataset = datasets.BaumSweetSequenceDataset(config.input_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=1, drop_last=True) config.input_length = 4 * config.input_length elif config.dataset == 'bipalindrome': print('Load binary palindrome dataset ...') # Initialize the dataset and data loader config.num_classes = config.input_length dataset = datasets.BinaryPalindromeDataset(config.input_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=1, drop_last=True) config.input_length = config.input_length * 4 + 2 - 1 # Setup the model that we are going to use if config.model_type == 'LSTM': print("Initializing LSTM model ...") model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device).to(device) elif config.model_type == 'biLSTM': print("Initializing bidirectional LSTM model...") model = biLSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device).to(device) elif config.model_type == 'GRU': print("Initializing GRU model ...") model = GRU(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device).to(device) elif config.model_type == 'peepLSTM': print("Initializing peephole LSTM model ...") model = peepLSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device).to(device) # Setup the loss and optimizer loss_function = torch.nn.NLLLoss() optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) accuracy_list = [] loss_list = [] old_loss = 1.0 for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() # Move to GPU batch_inputs = batch_inputs.to(device) # [batch_size, seq_length,1] batch_targets = batch_targets.to(device) # [batch_size] #print(batch_inputs[:,0,:].shape) #embedding = nn.Embedding(3, config.input_dim) #print(embedding(batch_inputs[:,0,:].long()).shape) # Reset for next iteration model.zero_grad() # Forward pass log_probs = model(batch_inputs) # Compute the loss, gradients and update network parameters loss = loss_function(log_probs, batch_targets) loss.backward() ####################################################################### # Check for yourself: what happens here and why? ####################################################################### torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) ####################################################################### optimizer.step() predictions = torch.argmax(log_probs, dim=1) correct = (predictions == batch_targets).sum().item() accuracy = correct / log_probs.size(0) accuracy_list.append(accuracy) loss_list.append(loss.item()) # print(predictions[0, ...], batch_targets[0, ...]) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % 60 == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, \ Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) # Check if training is finished if step == config.train_steps or old_loss == loss.item(): # If you receive a PyTorch data-loader error, check this bug report # https://github.com/pytorch/pytorch/pull/9655 break else: old_loss = loss.item() print('Done training.') ########################################################################### ########################################################################### print('Evaluating...') acc = [] for i in range(3): acc_sublist = [] for step, (batch_inputs, batch_targets) in enumerate(data_loader): model.eval() batch_inputs = batch_inputs.to( device) # [batch_size, seq_length,1] batch_targets = batch_targets.to(device) pred = model(batch_inputs) predictions = torch.argmax(pred, dim=1) correct = (predictions == batch_targets).sum().item() accuracy = correct / pred.size(0) acc_sublist.append(accuracy) if step == 25: break acc.append(np.mean(acc_sublist)) print('Mean accuracy is {} and standard deviation is {}'.format( np.mean(acc), np.std(acc))) return accuracy_list, loss_list
def train(config, seed): np.random.seed(seed) torch.manual_seed(seed) # Initialize the device which to run the model on device = torch.device(config.device) print(device) # Load dataset if config.dataset == 'randomcomb': print('Load random combinations dataset ...') # Initialize the dataset and data loader config.num_classes = config.input_length dataset = datasets.RandomCombinationsDataset(config.input_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=1, drop_last=True) elif config.dataset == 'bss': print('Load bss dataset ...') # Initialize the dataset and data loader config.num_classes = 2 config.input_dim = 3 dataset = datasets.BaumSweetSequenceDataset(config.input_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=1, drop_last=True) config.input_length = 4 * config.input_length elif config.dataset == 'bipalindrome': print('Load binary palindrome dataset ...') # Initialize the dataset and data loader config.num_classes = 2 dataset = datasets.BinaryPalindromeDataset(config.input_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=1, drop_last=True) config.input_length = config.input_length * 4 + 2 - 1 # Setup the model that we are going to use if config.model_type == 'LSTM': print("Initializing LSTM model ...") model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device).to(device) elif config.model_type == 'biLSTM': print("Initializing bidirectional LSTM model...") model = biLSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device).to(device) elif config.model_type == 'GRU': print("Initializing GRU model ...") model = GRU(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device).to(device) elif config.model_type == 'peepLSTM': print("Initializing peephole LSTM model ...") model = peepLSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device).to(device) # Setup the loss and optimizer loss_function = torch.nn.NLLLoss() optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) losses = [] train_accuracies = [] for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() # Move to GPU batch_inputs = batch_inputs.to(device) # [batch_size, seq_length,1] batch_targets = batch_targets.to(device) # [batch_size] # Reset for next iteration model.zero_grad() # Forward pass log_probs = model(batch_inputs) # Compute the loss, gradients and update network parameters loss = loss_function(log_probs, batch_targets) loss.backward() losses.append(loss.item()) ####################################################################### # Check for yourself: what happens here and why? ####################################################################### torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) ####################################################################### optimizer.step() predictions = torch.argmax(log_probs, dim=1) correct = (predictions == batch_targets).sum().item() accuracy = correct / log_probs.size(0) train_accuracies.append(accuracy) # print(predictions[0, ...], batch_targets[0, ...]) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % 60 == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, \ Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) # Check if training is finished if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report # https://github.com/pytorch/pytorch/pull/9655 break # Stop early if the last 100 losses were all low enough if all(x < 0.001 for x in losses[-100:]): break print('Done training.') # evaluate the model on new random data model.eval() test_accuracies = [] for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Move to GPU batch_inputs = batch_inputs.to(device) # [batch_size, seq_length,1] batch_targets = batch_targets.to(device) # [batch_size] # Forward pass with torch.no_grad(): log_probs = model(batch_inputs) predictions = torch.argmax(log_probs, dim=1) correct = (predictions == batch_targets).sum().item() accuracy = correct / log_probs.size(0) test_accuracies.append(accuracy) if step >= 5000 / config.batch_size: # If you receive a PyTorch data-loader error, check this bug report # https://github.com/pytorch/pytorch/pull/9655 break return losses, train_accuracies, torch.tensor( test_accuracies).mean().item()
def train(config): seed = config.seed np.random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) #torch.backends.cudnn.deterministic=True #torch.backends.cudnn.benchmark=False # Initialize the device which to run the model on device = torch.device(config.device) print(device) # Load dataset if config.dataset == 'randomcomb': print('Load random combinations dataset ...') # Initialize the dataset and data loader config.num_classes = config.input_length dataset = datasets.RandomCombinationsDataset(config.input_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=1, drop_last=True) elif config.dataset == 'bss': print('Load bss dataset ...') # Initialize the dataset and data loader config.num_classes = 2 config.input_dim = 3 dataset = datasets.BaumSweetSequenceDataset(config.input_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=1, drop_last=True) config.input_length = 4 * config.input_length elif config.dataset == 'bipalindrome': print('Load binary palindrome dataset ...') # Initialize the dataset and data loader config.num_classes = 2 #config.input_length dataset = datasets.BinaryPalindromeDataset(config.input_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=1, drop_last=True) config.input_length = config.input_length * 4 + 2 - 1 # Setup the model that we are going to use if config.model_type == 'LSTM': print("Initializing LSTM model ...") model = LSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device).to(device) elif config.model_type == 'biLSTM': print("Initializing bidirectional LSTM model...") model = biLSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device).to(device) elif config.model_type == 'GRU': print("Initializing GRU model ...") model = GRU(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device).to(device) elif config.model_type == 'peepLSTM': print("Initializing peephole LSTM model ...") model = peepLSTM(config.input_length, config.input_dim, config.num_hidden, config.num_classes, config.batch_size, device).to(device) model.numTrainableParameters() # Setup the loss and optimizer loss_function = torch.nn.NLLLoss() optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) # For plotting acc_plt = [] loss_plt = [] convergenceCounter = 0 # to stop after consecutive accuracies of 1.0 on training for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() # Move to GPU batch_inputs = batch_inputs.to(device) # [batch_size, seq_length,1] batch_targets = batch_targets.to(device) # [batch_size] # Reset for next iteration model.zero_grad() # Forward pass log_probs = model(batch_inputs) # Compute the loss, gradients and update network parameters loss = loss_function(log_probs, batch_targets) loss.backward() ####################################################################### # Check for yourself: what happens here and why? ####################################################################### torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) ####################################################################### optimizer.step() predictions = torch.argmax(log_probs, dim=1) correct = (predictions == batch_targets).sum().item() accuracy = correct / log_probs.size(0) # print(predictions[0, ...], batch_targets[0, ...]) acc_plt.append(accuracy) loss_plt.append(loss) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % 60 == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, \ Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) # Check if training is finished if accuracy > 0.999: convergenceCounter += 1 if step == config.train_steps or convergenceCounter > 100: # If you receive a PyTorch data-loader error, check this bug report # https://github.com/pytorch/pytorch/pull/9655 break # test on new data: model.eval() with torch.no_grad(): correct = 0 total = 0 test_loss = 0 numBatchesTestEval = 10 for step, (x, t) in enumerate(data_loader): if device.type == 'cuda': x = x.to(device) t = t.to(device) log_probs = model(x) predictions = torch.argmax(log_probs, dim=1) correct += (predictions == t).sum().item() total += log_probs.size(0) test_loss += loss_function(log_probs, t) / numBatchesTestEval if step == numBatchesTestEval: break test_accuracy = correct / total model.train() print('Done training.') print('Accuracy on testset of 5000 examples:', test_accuracy) print('Avg. loss on testset:', test_loss) pltLossAcc(loss_plt, acc_plt, config)