def train(config): if config.tensorboard: writer = SummaryWriter(config.summary + datetime.now().strftime("%Y%m%d-%H%M%S")) # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) # fixme data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Initialize the model that we are going to use model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size, lstm_num_hidden=config.lstm_num_hidden, lstm_num_layers=config.lstm_num_layers, device=config.device) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) for epoch in range(config.epochs): for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() ####################################################### # Add more code here ... ####################################################### optimizer.zero_grad() # Set to float LongTensor output dtype of one_hot produces internal error for forward batch_inputs = torch.nn.functional.one_hot( batch_inputs, num_classes=dataset.vocab_size).float().to(device) batch_targets = batch_targets.to(device) out, _ = model.forward(batch_inputs) #Expected size 64 x 87 x 30 got 64 x 30 x 87 to compute with 64 x 30 loss = criterion(out.permute(0, 2, 1), batch_targets) loss.backward() torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) optimizer.step() predictions = out.argmax(dim=-1) accuracy = (predictions == batch_targets).float().mean() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % config.print_every == 0: print( "[{}] Train Step {:04d}/{:04d}, Epoch {:d} Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, int(config.train_steps), epoch, config.batch_size, examples_per_second, accuracy, loss)) if config.tensorboard: writer.add_scalar('training_loss', loss, step) writer.add_scalar('accuracy', accuracy, step) if step % config.sample_every == 0: # Generate some sentences by sampling from the model # print(f'shape state {state[1].shape}') # sys.exit(0) generate_sentence(step, model, config, dataset) # pass if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 print('Done training.') break
def train(config): # Initialize the device which to run the model on use_cuda = torch.cuda.is_available() device = torch.device("cuda:0" if use_cuda else "cpu") #path to save the model path = "results/" # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) # print("Data file:", dataset._data[0:5]) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Initialize the model that we are going to use model = TextGenerationModel(config.batch_size, config.seq_length, dataset, config.lstm_num_hidden, config.lstm_num_layers, device) # model = torch.load("results/book_EN_grimms_fairy_tails_final_model.pt") # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) # Store Accuracy and losses: results = {'accuracy': [], 'loss': []} # Training: total_steps = 0 while total_steps <= config.train_steps: for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() optimizer.zero_grad() # Stacking and One-hot encoding: batch_inputs = torch.stack(batch_inputs, dim=1).to(device) batch_targets = torch.stack(batch_targets, dim=1).to(device) # print("Inputs and targets:", x_onehot.size(), batch_targets.size()) # forward inputs to the model: pred_targets, _ = model.forward( index_to_onehot(batch_inputs, dataset.vocab_size)) # print("pred_targets trans shape:", pred_targets.transpose(2,1).size()) loss = criterion(pred_targets.transpose(2, 1), batch_targets) #Backward pass loss.backward(retain_graph=True) optimizer.step() #Accuracy # argmax along the vocab dimension accuracy = (pred_targets.argmax( dim=2) == batch_targets).float().mean().item() #Update the accuracy and losses for visualization: results['accuracy'].append(accuracy) results['loss'].append(loss.item()) # Just for time measurement t2 = time.time() # examples_per_second = config.batch_size/float(t2-t1) total_steps += 1 if step % config.print_every == 0: # print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " # "Accuracy = {:.2f}, Loss = {:.3f}".format( # datetime.now().strftime("%Y-%m-%d %H:%M"), step, # config.train_steps, config.batch_size, examples_per_second, # accuracy, loss # )) print("[{}] Train Step {:07d}/{:07d}, Batch Size = {}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, total_steps, config.batch_size, results['accuracy'][-1], results['loss'][-1])) if step % config.sample_every == 0: # Generate some sentences by sampling from the model print('GENERATED NO TEMP:') print(model.generate_sentence(100)) print('__________________') print('GENERATED 0.5 TEMP:') print(model.generate_sentence(100, 0.5)) print('__________________') print('GENERATED 1 TEMP:') print(model.generate_sentence(100, 1)) print('__________________') print('GENERATED 2 TEMP:') print(model.generate_sentence(100, 2)) # save model for individual timesteps torch.save( model, path + config.txt_file.split('/')[1].split('.')[0] + str(step) + "_model.pt") if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.') #Save the final model torch.save( model, path + config.txt_file.split('/')[1].split('.')[0] + "_final_model.pt") print("saving results in folder...") np.save(path + "loss_train", results['loss']) np.save(path + "accuracy_train", results['accuracy'])
def train(config): # empty file to write generated text to with open('generated.txt', 'w'): pass # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Initialize the model that we are going to use model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size, \ lstm_num_hidden=config.lstm_num_hidden, device=device) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate) for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() batch_inputs = torch.stack(batch_inputs) embedding = one_hot(batch_inputs, dataset._vocab_size) batch_targets = torch.stack(batch_targets) h_0 = torch.zeros(config.lstm_num_layers, config.batch_size, config.lstm_num_hidden) c_0 = torch.zeros(config.lstm_num_layers, config.batch_size, config.lstm_num_hidden) output = model.forward(embedding, h_0, c_0) optimizer.zero_grad() losses, accuracies = [], [] for i, out in enumerate(output): label = batch_targets[i,:] loss = criterion(out, label) losses.append(loss) accuracy = (torch.max(out, 1)[1] == label).float().mean() accuracies.append(accuracy) loss = sum(losses) / len(losses) accuracy = sum(accuracies) / len(accuracies) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) optimizer.step() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) if step % config.print_every == 0: train_step = float(step) / float(config.train_steps) print("[{}] Train Step {:.0f}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), train_step*1000000, config.batch_size, examples_per_second, accuracy, loss )) if step % config.sample_every == 0: sample(dataset, model) #sample2(dataset, model) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.')
offset = 2380 temperature = 1 policy = 'greedy' for e in range(epoch): #torch.save(model.state_dict(), str(e+1) + 'model.pt') for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network #lr_optim.step() optimizer.zero_grad() t1 = time.time() inputs = torch.stack([*batch_inputs], dim=1) targets = torch.stack([*batch_targets], dim=1) inputs = inputs.to(device) targets = targets.to(device) out = model.forward(inputs)[0] out = out.permute(0, 2, 1) loss = criterion(out, targets) accuracy = acc(out, targets) torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) loss.backward() optimizer.step() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) if step % config.print_every == 0: print('accuracy, loss, step: \n',
temp_list = [0.5, 1., 2.] policy_list = ['greedy', 'temp'] seq_length = 111 alice_string = list('Alice') # Generate some sentences by sampling from the model for policy in policy_list: for temperature in temp_list: char_list = [] hidden = None for alice in alice_string: idx = dataset.convert_to_idx(alice) char_list.append(idx) generator = torch.tensor([idx]).unsqueeze(-1) generator = generator.to(device) generator, hidden = model.forward(generator, hidden) for _ in range(seq_length): if policy == 'greedy': idx = torch.argmax(generator).item() else: temp = generator.squeeze() / temperature soft = torch.softmax(temp, dim=0) idx = torch.multinomial(soft, 1).item() generator = torch.tensor([idx]).unsqueeze(-1) generator = generator.to(device) generator, hidden = model.forward(generator, hidden) char_list.append(idx) char = dataset.convert_to_string(char_list) with open("BonusTemp" + str(int(np.floor(temperature))) + "Book.txt", "w+") as text_file: print(policy + ': ', temperature, '\n Output: ', char, file=text_file)
def train(config): # Initialize the device which to run the model on use_cuda = torch.cuda.is_available() if use_cuda: print("cuda") device = torch.device('cuda:0') else: print("no cuda") device = torch.device('cpu') # Text generation options generate_text = True generated_text_size = 1500 fixed_output_samples = False fixed_random_samples = True # device = torch.device(device) dtype = torch.cuda.LongTensor if use_cuda else torch.LongTensor # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) # fixme data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Initialize the model that we are going to use if config.load_model == "none": model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size).to(device) # fixme print(model) else: print("load model") model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size).to(device) if use_cuda: model.load_state_dict(torch.load("model.pt")) else: trained_model = torch.load( "model.pt", map_location=lambda storage, loc: storage) model.load_state_dict(trained_model) print(model) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() # fixme optimizer = optim.RMSprop(model.parameters(), config.learning_rate) # fixme for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() ####################################################### # Add more code here ... ####################################################### model_output = model.forward(batch_inputs, use_cuda, config.temp) out_max = torch.argmax(model_output, dim=2) batch_targets = torch.stack(batch_targets) optimizer.zero_grad() accuracy = 0.0 model_output = model_output.view(-1, model_output.shape[2]) batch_targets = batch_targets.view(-1).type( torch.LongTensor).type(dtype) loss = criterion(model_output, batch_targets) accuracy = accuracy_(model_output, batch_targets) loss.backward() optimizer.step() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % config.print_every == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, int(config.train_steps), config.batch_size, examples_per_second, accuracy, loss)) text = torch.stack(batch_inputs) sentece1 = text[:, 0].view(text[:, 0].shape[0], 1) # print(dataset.convert_to_string(sentece1)) if step % config.sample_every == 0: # Generate some sentences by sampling from the model text = torch.stack(batch_inputs) if generate_text: sentece = text[:, 0].view(text[:, 0].shape[0], 1) text = model.create_text(sentece, generated_text_size, use_cuda, config.temp) print("Generated Text : ", dataset.convert_to_string(text), " : end") if fixed_output_samples: sentece1 = text[:, 0].view(text[:, 0].shape[0], 1) gen_sentence1 = out_max[:, 0].view(out_max[:, 0].shape[0], 1) print("Original Text : ", dataset.convert_to_string(sentece1), " Generated Text : ", dataset.convert_to_string(gen_sentence1)) sentece2 = text[:, 1].view(text[:, 1].shape[0], 1) gen_sentence2 = out_max[:, 1].view(out_max[:, 1].shape[0], 1) print("Original Text : ", dataset.convert_to_string(sentece2), " Generated Text : ", dataset.convert_to_string(gen_sentence2)) sentece3 = text[:, 2].view(text[:, 2].shape[0], 1) gen_sentence3 = out_max[:, 2].view(out_max[:, 2].shape[0], 1) print("Original Text : ", dataset.convert_to_string(sentece3), " Generated Text : ", dataset.convert_to_string(gen_sentence3)) if fixed_random_samples: text = model.random_sampling(config.seq_length, use_cuda, config.temp) print("Generated Text : ", dataset.convert_to_string(text), " : end") print("Saving model...") torch.save(model.state_dict(), "model.pt") if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.')
def train(): # Torch settings device = torch.device(config.device) if device == 'cpu': torch.set_default_tensor_type(torch.FloatTensor) elif device == 'cuda:0': torch.set_default_tensor_type(torch.cuda.FloatTensor) dtype = torch.float # Tensorboard summary writer if config.tensorboard: run_id = datetime.now().strftime("%Y-%m-%d_%H-%M-%S_" + config.model_type.lower() + '_' + str(config.input_length)) log_dir = 'tensorboard/' + config.model_type.lower() + '/' + run_id writer = SummaryWriter(log_dir=log_dir) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Model parameters lr = config.learning_rate lr_decay = config.learning_rate_decay lr_step = config.learning_rate_step dropout = 1.0 - config.dropout_keep_prob temp = [0.5, 1., 2.] assert config.sample_num % 3 == 0 # Initialize the model that we are going to use model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size, dropout, device).to(device) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=lr) # Characters used to start sentences (closing characters such as ')', '.' or others were removed) start_characters = ['1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '(', '[', '*', '-', '‘', '“'] start_characters = list(set(start_characters) & set(dataset.vocab)) # Store all generated sentences sentences = {} # Load model, if there's any model to load model, optimizer, sentences, start_step = load_model(model, optimizer, sentences, step=0) try: for step, (batch_inputs, batch_targets) in enumerate(data_loader): # If the model has been loaded, regulate step number accordingly step += start_step # Only for time measurement of step through network t1 = time.time() # Get batches as tensors of size (batch_size x seq_length) batch_inputs = torch.stack(batch_inputs).permute((1, 0)) batch_targets = torch.stack(batch_targets).permute((1, 0)).to(device) # Convert batches to one-hot representation (batch_size x seq_length x vocab_size) batch_inputs = get_one_hot(batch_inputs, config.batch_size, config.seq_length, dataset.vocab_size).to(device) # Forward pass model.train() optimizer.zero_grad() predictions = model.forward(batch_inputs) # Compute loss loss = criterion(predictions.permute(0, 2, 1), batch_targets) # Backward pass loss.backward() # Clipping gradients to avoid exploding gradient problem torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) # Update weights optimizer.step() # Compute accuracy accuracy = get_accuracy(predictions, batch_targets) # Add accuracy and loss to the writer if config.tensorboard: writer.add_scalars('Accuracy_and_Loss', {'accuracy': accuracy, 'loss': loss}, step) writer.add_scalar('Learning_Rate', lr, step) # Update learning rate if (step % lr_step == 0) and step != 0: lr *= lr_decay for group in optimizer.param_groups: group['lr'] = lr # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % config.print_every == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format(datetime.now().strftime("%Y-%m-%d %H:%M:%S"), step, int(config.train_steps), config.batch_size, examples_per_second, accuracy, loss)) if step % config.sample_every == 0: model.eval() # Store sentences for this step step_sentences = {temp[0]: [], temp[1]: [], temp[2]: []} # Get 6 random starter characters sample = random.sample(start_characters, config.sample_num) print() for idx, c in enumerate(sample): # Temperature parameter t = temp[int(idx / 2)] # Character's one-hot representation c_oh = torch.tensor(dataset.convert_to_one_hot(c), dtype=dtype).to(device) # Returns a sentence of indexes and length 30 sentence = dataset.convert_to_string(model.generate(c_oh, t)) print("[t={:.1f}] {}".format(t, sentence.replace('\n', '\\n '))) step_sentences[t].append(sentence) print() sentences[step] = step_sentences if (step % config.save_every == 0) and step != 0: save_model(model, optimizer, sentences, step) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break if config.tensorboard: writer.close() print('Done training.') except (KeyboardInterrupt, BrokenPipeError): if config.tensorboard: writer.close() print("\n" + random.choice(quit_msgs))
def train(config): # determine the filename (to be used for saving results, checkpoints, models, etc.) filename = Path(config.txt_file).stem # Initialize the device which to run the model on if config.device == 'cuda': if torch.cuda.is_available(): device = torch.device(config.device) else: device = torch.device('cpu') else: device = torch.device(config.device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset( filename=config.txt_file, seq_length=config.seq_length ) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # get the vocabulary size and int2char and char2int dictionaries for use later VOCAB_SIZE = dataset.vocab_size # Initialize the model that we are going to use model = TextGenerationModel( batch_size=config.batch_size, seq_length=config.seq_length, vocabulary_size=VOCAB_SIZE, lstm_num_hidden=config.lstm_num_hidden, lstm_num_layers=config.lstm_num_layers, device=device, batch_first=config.batch_first, dropout=1.0-config.dropout_keep_prob ) # Setup the loss and optimizer and learning rate scheduler criterion = nn.CrossEntropyLoss() optimizer = optim.Adam( model.parameters(), config.learning_rate ) # Load the latest checkpoint, if any exist checkpoints = list(CHECKPOINTS_DIR.glob(f'{model.__class__.__name__}_{filename}_checkpoint_*.pt')) if len(checkpoints) > 0: # load the latest checkpoint checkpoints.sort(key=os.path.getctime) latest_checkpoint_path = checkpoints[-1] start_step, results, sequences = load_checkpoint(latest_checkpoint_path, model, optimizer) else: # initialize the epoch, results and best_accuracy start_step = 0 results = { 'step': [], 'accuracy': [], 'loss': [], } sequences = { 'step': [], 't': [], 'temperature': [], 'sequence': [] } for step in range(start_step, int(config.train_steps)): # reinitialize the data_loader iterater if we have iterated over all available mini-batches if step % len(data_loader) == 0 or step == start_step: data_iter = iter(data_loader) # get the mini-batch batch_inputs, batch_targets = next(data_iter) # Only for time measurement of step through network t1 = time.time() ####################################################### # Add more code here ... ####################################################### # put the model in training mode model.train() # convert the data and send to device X = torch.stack(batch_inputs, dim=1) X = X.to(device) Y = torch.stack(batch_targets, dim=1) Y = Y.to(device) # forward pass the mini-batch Y_out, _ = model.forward(X) Y_pred = Y_out.argmax(dim=-1) # (re)set the optimizer gradient to 0 optimizer.zero_grad() # compute the accuracy and the loss accuracy = get_accuracy(Y_pred, Y) loss = criterion.forward(Y_out.transpose(2, 1), Y) # backwards propogate the loss loss.backward() # clip the gradients (to preven them from exploding) torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) # tune the model parameters optimizer.step() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) if step % config.print_every == 0: print(f'[{datetime.now().strftime("%Y-%m-%d %H:%M")}], Train Step {step:04d}/{int(config.train_steps):04d}, Batch Size = {config.batch_size}, Examples/Sec = {examples_per_second:.2f}, Accuracy = {accuracy:.2f}, Loss = {loss:.3f}') # append the accuracy and loss to the results results['step'].append(step) results['accuracy'].append(accuracy.item()) results['loss'].append(loss.item()) if step % config.sample_every == 0: for T in [20, 30, 60, 120]: for temperature in [0.0, 0.5, 1.0, 2.0]: # Generate some sentences by sampling from the model sequence = sample_sequence( model=model, vocab_size=VOCAB_SIZE, T=T, char=None, temperature=temperature, device=device ) sequence_str = dataset.convert_to_string(sequence) print(f'Generated sample sequence (T={T}, temp={temperature}): {sequence_str}') # append the generated sequence to the sequences sequences['step'].append(step) sequences['t'].append(T) sequences['temperature'].append(temperature) sequences['sequence'].append(sequence_str) if step % config.checkpoint_every == 0: # create a checkpoint create_checkpoint(CHECKPOINTS_DIR, filename, step, model, optimizer, results, sequences) # save the results save_results(RESULTS_DIR, filename, results, sequences, model) # save the model save_model(MODELS_DIR, filename, model) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.')
def train(config): # Initialize the device which to run the model on config.device = 'cuda' device = torch.device(config.device) # Initialize the model that we are going to use dataset = TextDataset(config.txt_file, config.seq_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) vocab_size = dataset.vocab_size config.vocab_size = vocab_size model = TextGenerationModel(config.batch_size, config.seq_length, vocab_size, config.lstm_num_hidden, config.lstm_num_layers, config.device) model = model.to(device) # Initialize the dataset and data loader (note the +1) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() # criterion = nn.NLLLoss() # optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate) writer = SummaryWriter(comment=config.txt_file) writer_iteration = 0 for epoch in range(50): print("\n\n\n EPOCH: {}".format(epoch)) for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() ####################################################### # Add more code here ... ####################################################### # print(batch_inputs) # print(asdasd) batch_inputs = torch.stack(batch_inputs).to(device) # print(batch_inputs.shape) # batch_inputs = F.one_hot(batch_inputs, vocab_size) one_hot = torch.FloatTensor(batch_inputs.size(0), batch_inputs.size(1), vocab_size).zero_().to(config.device) one_hot.scatter_(2, batch_inputs.unsqueeze(-1), 1) # make batch first dim batch_targets = torch.stack(batch_targets, dim = 1).to(device) out, _ = model.forward(one_hot) # The data is (sequence,batch,one-hot) (30, 64, 87) # but criterion gets angry, you can keep the batch targets as index # but the input must be the shape (sequence, one-hot, batch)? # all these errors yelling at me # print(out.transpose(2,1).shape, batch_targets.shape) # print(asdasd) loss = criterion(out.transpose(2,1), batch_targets) optimizer.zero_grad() loss.backward() optimizer.step() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) if step % config.print_every == 0: compare = (out.argmax(2) == batch_targets) summed = compare.sum().item() accuracy = summed/compare.numel() writer.add_scalar('loss', loss, writer_iteration) writer.add_scalar('accuracy', accuracy, writer_iteration) writer_iteration +=1 print("[{}] Train Step {:04d}/{:d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), int(step), int(config.train_steps), config.batch_size, examples_per_second, accuracy, loss )) if step % config.sample_every == 0: # sleeping_beauty(dataset, config, model) random_int_sentence(dataset, config, model) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break torch.save(model, config.txt_file.strip('.txt') + ".pt") print('Done training.')
def train(config): # Initialize the device which to run the model on device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size, lstm_num_hidden=config.lstm_num_hidden, lstm_num_layers=config.lstm_num_layers, device=device) # Setup the loss and optimizer criterion = CrossEntropyLoss() optimizer = RMSprop(model.parameters(), lr=config.learning_rate) realsteps = 0 for epoch in range(1000): for step, (batch_inputs, batch_targets) in enumerate(data_loader): realsteps += 1 step = realsteps t1 = time.time() batch_targets = torch.stack(batch_targets) batch_targets.to(device) optimizer.zero_grad() print(len(batch_inputs), len(batch_inputs[0])) if (len(batch_inputs[0]) < 64): continue probs = model.forward(batch_inputs) loss = 0 accuracy = 0 for prob, target in zip(probs, batch_targets): # prediction = torch.argmax(prob, dim=1).float() loss += criterion.forward(prob, target) predictions = prob.argmax(dim=1).float() accuracy += float(torch.sum( predictions == target.float())) / config.batch_size loss = loss / config.seq_length loss.backward() writer.add_scalar('Train/Loss', loss, realsteps) writer.add_scalar('Train/Accurac3y', accuracy, realsteps) optimizer.step() accuracy = accuracy / config.seq_length # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % 10000 == 0: torch.save(model, './' + str(step)) if step % config.print_every == 0: print( "[{}] Train Step {:04d}/{:04f}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) # if step % config.sample_every == 0: # Generate some sentences by sampling from the model # greedy_sampling_model(model, dataset) if realsteps > config.train_steps: break if realsteps > config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.')
def train(config): # Initialize the device which to run the model on device = torch.device(config.device) dataset = TextDataset(filename=config.txt_file, seq_length=config.seq_length) # Initialize the model that we are going to use model = TextGenerationModel(batch_size=config.batch_size, seq_length=config.seq_length, vocabulary_size=dataset.vocab_size, lstm_num_hidden=config.lstm_num_hidden, lstm_num_layers=config.lstm_num_layers, device=config.device).to( config.device) # fixme # Initialize the dataset and data loader (note the +1) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) # fixme # if the number of required steps exceed the size of the data, then more than one epoch required and I need the outer loop for it steps_in_epoch = int(dataset.__len__() / config.batch_size) + 1 epochs = int(config.train_steps / steps_in_epoch) + 1 print('EPOCHS ', epochs) print('STEPS IN EPOCH ', steps_in_epoch) print('TOTAL NUMBER OF STEPS ', config.train_steps) #print('MAX POSSIBLE NUMBER OF STEPS ', dataset.__len__(), ' TOTAL NUMBER OF STEPS ', config.train_steps) #save_model and save_model1 are lists with the number of steps for which I save the model save_model = [int(h * 0.2 * config.train_steps) for h in range(5)] save_model1 = [100, 500, 1500] accuracy_dict = {} loss_dict = {} for j in range(epochs): print('EPOCH ', j) for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() ####################################################### # Add more code here ... ####################################################### batch_targets = torch.stack(batch_targets).to(config.device) y_pred = model.forward(batch_inputs).transpose(0, 2) optimizer.zero_grad() torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) batch_targets = batch_targets.transpose(0, 1) loss = criterion(y_pred, batch_targets) loss.backward() optimizer.step() accuracy = acc(y_pred, batch_targets) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if int(step + j * steps_in_epoch) % config.print_every == 0: accuracy_dict[int(step + j * steps_in_epoch)] = accuracy loss_dict[int(step + j * steps_in_epoch)] = float(loss) print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), int(step + j * steps_in_epoch), int(config.train_steps), config.batch_size, examples_per_second, accuracy, loss)) if step == config.sample_every: # Generate some sentences by sampling from the model pass if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break if int(step + j * steps_in_epoch) in save_model: name_model = 'model_' + str( int(step + j * steps_in_epoch)) + '.pickle' torch.save(model.state_dict(), name_model) if int(step + j * steps_in_epoch) in save_model1: name_model = 'model_' + str( int(step + j * steps_in_epoch)) + '.pickle' torch.save(model.state_dict(), name_model) torch.save(model.state_dict(), 'model_final.pickle') f1 = open("accuracy.txt", "w") f1.write(str(accuracy_dict)) f1.close() f2 = open("loss.txt", "w") f2.write(str(loss_dict)) f2.close() print('Done training.')
def train(config): if torch.cuda.is_available(): dev = "cuda:0" else: dev = "cpu" # Initialize the device which to run the model on device = torch.device(dev) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) # fixme data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Initialize the model that we are going to use model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size, config.lstm_num_hidden, config.lstm_num_layers, device).to(device) # fixme # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() # fixme optimizer = torch.optim.Adam( model.parameters(), lr=config.learning_rate, weight_decay=config.learning_rate_decay) # fixme # added for vscode debug functionality multiprocessing.set_start_method('spawn', True) total_steps = 0 while config.train_steps > total_steps: for step, (batch_inputs, batch_targets) in enumerate(data_loader): total_steps += 1 if total_steps > config.train_steps: break batch_inputs = batch_inputs.to(device) batch_targets = batch_targets.to(device) # Only for time measurement of step through network t1 = time.time() ####################################################### # Add more code here ... ####################################################### batch_inputs = torch.nn.functional.one_hot(batch_inputs, dataset.vocab_size) optimizer.zero_grad() output = model.forward(batch_inputs) loss = 0.0 for i in range(len(output[0])): pred = output[:, i, :] target = batch_targets[:, i] loss += criterion.forward(pred, target) / len(output[0]) loss.backward() optimizer.step() with torch.no_grad(): accuracy = 0.0 total_size = 0 correct = 0 for i in range(len(output[0])): pred = torch.nn.functional.softmax(output[:, i, :], dim=1) pred = torch.max(pred, 1)[1] correct += pred.eq(batch_targets[:, i]).sum().item() total_size += len(pred) accuracy = correct / total_size # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if total_steps % config.print_every == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), total_steps, int(config.train_steps), config.batch_size, examples_per_second, accuracy, loss)) if total_steps % config.sample_every == -1232342342152345236526: # Generate some sentences by sampling from the model text = torch.zeros( (1, 1)).long().random_(0, dataset.vocab_size).to(device) text = torch.nn.functional.one_hot(text, dataset.vocab_size) for i in range(config.seq_length - 1): prediction = model.forward(text) pred = torch.nn.functional.softmax(prediction[:, i, :], dim=1) pred = torch.max(pred, 1)[1] pred = torch.nn.functional.one_hot( pred, dataset.vocab_size) pred = pred.unsqueeze(0) text = torch.cat((text, pred), 1) stuff = torch.argmax(text[0], 1) sentence = dataset.convert_to_string(stuff.tolist()) print(sentence) if total_steps == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.')
def train(config): # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the model that we are going to use dataset = TextDataset(config.txt_file, config.seq_length) torch.save(dataset, config.txt_file + '.dataset') model = TextGenerationModel(dataset.vocab_size, config.lstm_num_hidden, config.lstm_num_layers, config.device, 1. - config.dropout_keep_prob) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) lr_scheduler = optim.lr_scheduler.StepLR( optimizer, step_size=config.learning_rate_step, gamma=config.learning_rate_decay) accuracies = [0, 1] losses = [0, 1] for step in range(int(config.train_steps)): if step % len(data_loader) == 0: data_iter = iter(data_loader) batch_inputs, batch_targets = next(data_iter) # Only for time measurement of step through network t1 = time.time() device_inputs = torch.stack(batch_inputs, dim=0).to(device) device_targets = torch.stack(batch_targets, dim=1).to(device) out, _ = model.forward(device_inputs) outt = out.transpose(0, 1).transpose(1, 2) optimizer.zero_grad() loss = criterion.forward(outt, device_targets) losses.append(loss.item()) accuracy = (outt.argmax(dim=1) == device_targets).float().mean() accuracies.append(accuracy) loss.backward() optimizer.step() lr_scheduler.step() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % config.print_every == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}, LR = {}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, int(config.train_steps), config.batch_size, examples_per_second, accuracies[-1], losses[-1], optimizer.param_groups[-1]['lr'])) if step % config.sample_every == 0: torch.save(model, config.txt_file + '.model') with torch.no_grad(), open(config.txt_file + '.generated', 'a') as fp: for length, temp in product([20, 30, 50, 120], [0, 0.5, 1.0, 2.0]): text = seq_sampling(model, dataset, length, temp, device) fp.write("{};{};{};{}\n".format(step, length, temp, text)) print('Done training.')
def train(config): # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) # should we do +1?? torch.save(dataset, config.save_dataset) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Initialize the model that we are going to use model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size, config.lstm_num_hidden, config.lstm_num_layers, 1 - config.dropout_keep_prob, device) criterion = nn.CrossEntropyLoss() optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate) losses = [] accuracies = [] # run through the dataset several times till u reach max_steps step = 0 while step < config.train_steps: for (batch_inputs, batch_targets) in data_loader: step += 1 # Only for time measurement of step through network t1 = time.time() batch_inputs = torch.stack(batch_inputs).to(device) batch_targets = torch.stack(batch_targets, dim=1).to( device) #dim=1 to avoid transposing batch_predictions, (_, _) = model.forward(batch_inputs) batch_predictions = batch_predictions.permute(1, 2, 0) loss = criterion(batch_predictions, batch_targets) losses.append(loss.item()) model.zero_grad() # should we do this?? loss.backward() torch.nn.utils.clip_grad_norm( model.parameters(), max_norm=config.max_norm) # prevents maximum gradient problem optimizer.step() accuracy = accuracy_(batch_predictions, batch_targets) accuracies.append(accuracy) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % config.print_every == 0: print( "[{}] Train Step {}/{}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), int(step), int(config.train_steps), config.batch_size, examples_per_second, accuracy, loss)) if step % config.sample_every == 0: for temperature in [0]: for length in [30, 60, 90, 120]: sentence = generate_sentence(model, dataset, temperature, length, device) with open(config.save_generated_text, 'a', encoding='utf-8') as file: file.write("{};{};{};{}\n".format( step, temperature, length, sentence)) if step % config.save_every == 0: torch.save(model.state_dict(), config.save_model) if step == config.train_steps: # save only the model parameters torch.save(model.state_dict(), config.save_model) # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break # revive the model # model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size(), # config.lstm_num_hidden, config.lstm_num_layers, device) # model.load_state_dict(torch.load(config.save_model)) print('Done training.')
def train(config): # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Initialize the model that we are going to use model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size, config.dropout_prob, config.lstm_num_hidden, config.lstm_num_layers, device=device) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss( ) ############################################################################################################ optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) model.to(device) train_loss = [] train_acc = [] t_loss = [] t_acc = [] texts = [] #Convergence condition eps = 1e-6 for epoch in range(20): for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Clear stored gradient model.zero_grad() # Only for time measurement of step through network t1 = time.time() ####################################################### # Add more code here ... # #Convert list of tensors into one tensor for inputs and labels # x = torch.stack(batch_inputs).to(device) # y = torch.stack(batch_targets).to(device) # # print(x.shape) x = (batch_inputs.to(device)).t() ############################# y = (batch_targets.to(device)).t() # print(x.shape) #Convert input to one-hot vectors x = idx_2_onehot( x, dataset.vocab_size ) #x = (sentence length, batch_size, one_hot vec(char)) #Forward pass pred, _ = model.forward( x) #pred = (sentence length, score of each char ,batch_size) print(pred.shape) loss = criterion(pred, y) train_loss.append(loss.item()) optimizer.zero_grad() #Backward pass loss.backward() optimizer.step() accuracy = get_accuracy(pred, y, config.batch_size, config.seq_length) train_acc.append(accuracy.item()) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) # if step % config.print_every == 0: # print("[{}] Train Step {:04}/{:04}, Batch Size = {}, Examples/Sec = {:.2f}, " # "Accuracy = {:.2f}, Loss = {:.3f}".format( # datetime.now().strftime("%Y-%m-%d %H:%M"), step, # config.train_steps, config.batch_size, examples_per_second, # accuracy, loss # )) if step % config.sample_every == 0: # Generate some sentences by sampling from the model #get text in int format text = text_gen(model, config.seq_length, dataset.vocab_size, temperature=None) #convert text to string text = dataset.convert_to_string(text) print( '\nEpoch ', epoch + 1, '/ 20, Training Step ', step, '/', int(config.train_steps), ', Training Accuracy = ', accuracy.item(), ', Training Loss = ', loss.item(), '\n-----------------------------------------------\nGenerated text: ', text) #Get loss and accuracy averages over 100 steps t_loss.append(np.mean(train_loss)) t_acc.append(np.mean(train_acc)) train_loss = [] train_acc = [] texts.append(text) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break if epoch != 0: if step == 0: #save current model at the start of every epoch torch.save(model, "epoch_" + str(epoch - 1) + "_model") #save current train accuracy, loss and text np.save("epoch_" + str(epoch + 1) + "_accuracy", t_acc) np.save("epoch_" + str(epoch + 1) + "_loss", t_loss) np.save("epoch_" + str(epoch + 1) + "_texts", texts) if step > 0 and abs(t_loss[-1] - t_loss[-2]) < eps: break print('Done training.') #save final model torch.save(model, "final_model")
def train(config): def acc(predictions, targets): hotvec = predictions.argmax(-2) == targets accuracy = torch.mean(hotvec.float()) return accuracy # Initialize the device which to run the model on device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=0) print('batch', config.batch_size) vocabulary_size = dataset.vocab_size print('vocab', vocabulary_size) # Initialize the model that we are going to use model = TextGenerationModel(config.batch_size, config.seq_length, vocabulary_size=vocabulary_size, lstm_num_hidden=config.lstm_num_hidden, lstm_num_layers=config.lstm_num_layers, dropout=1 - config.dropout_keep_prob, device=device) model = model.to(device) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate, weight_decay=1e-5) gamma = 1 - config.learning_rate_decay lr_optim = torch.optim.lr_scheduler.StepLR(optimizer, config.learning_rate_step, gamma=gamma, last_epoch=-1) print('Hi') acc_list = [] loss_list = [] step_list = [] text_list = [] epoch = 100 offset = 2380 temperature = 1 policy = 'greedy' for e in range(epoch): torch.save(model.state_dict(), str(e + 1) + 'tunedmodel.pt') for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network lr_optim.step() optimizer.zero_grad() t1 = time.time() inputs = torch.stack([*batch_inputs], dim=1) targets = torch.stack([*batch_targets], dim=1) inputs = inputs.to(device) targets = targets.to(device) out = model.forward(inputs)[0] out = out.permute(0, 2, 1) loss = criterion(out, targets) accuracy = acc(out, targets) torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) loss.backward() optimizer.step() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % config.print_every == 0: print('accuracy, loss, step: \n', np.around(accuracy.item(), 4), np.around(loss.item(), 4), step, '\n') acc_list.append(accuracy.item()) loss_list.append(loss.item()) step_list.append(step + offset * e) if step % config.sample_every == 0: # Generate some sentences by sampling from the model generator = torch.randint(low=0, high=vocabulary_size, size=(1, 1)).to(device) hidden = None char_list = [] for _ in range(config.seq_length): generator, hidden = model.forward(generator, hidden) if policy == 'greedy': idx = torch.argmax(generator).item() else: pass generator = torch.Tensor([idx]).unsqueeze(-1) generator = generator.to(device) char_list.append(idx) char = dataset.convert_to_string(char_list) with open("MyTunedBook.txt", "a") as text_file: print('Epoch. ', e, 'Stahp: ', step, '\n Output: ', char, file=text_file) print('Epoch. ', e, 'Stahp: ', step, '\n Output: ', char) text_list.append((str((step + offset * e)) + '\n' + char)) pass if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.') with open('FinalTunedBook.txt', 'w+') as f: for item in text_list: f.write("%s\n" % item) # save with pandas header = ['accuracy', 'length', 'loss', 'step'] savefiles = zip(acc_list, [config.seq_length] * len(acc_list), loss_list, step_list) df = pd.DataFrame(list(savefiles), columns=header) df.to_csv('GEN' + str(config.seq_length) + 'tunedlstm.csv') print('I am Loaded') temp_list = [0., 0.5, 1., 2.] policy_list = ['greedy', 'temp'] seq_length = 111 alice_string = list('Alice') # Generate some sentences by sampling from the model for policy in policy_list: for temperature in temp_list: char_list = [] hidden = None for alice in alice_string: idx = dataset.convert_to_idx(alice) char_list.append(idx) generator = torch.tensor([idx]).unsqueeze(-1) generator = generator.to(device) generator, hidden = model.forward(generator, hidden) for _ in range(seq_length): if policy == 'greedy': idx = torch.argmax(generator).item() else: temp = generator.squeeze() / temperature soft = torch.softmax(temp, dim=0) idx = torch.multinomial(soft, 1)[-1].item() generator = torch.tensor([idx]).unsqueeze(-1) generator = generator.to(device) generator, hidden = model.forward(generator, hidden) char_list.append(idx) char = dataset.convert_to_string(char_list) with open( "BonusTemp" + str(int(np.floor(temperature))) + "Book.txt", "w+") as text_file: print(policy + ': ', temperature, '\n Output: ', char, file=text_file) print(policy + ': ', temperature, '\n Output: ', char) print('Finito!')
def train(config): # Initialise custom summary writer writer = Writer(config.summary_path) # Initialize the dataset and data loader writer.log("Loading dataset from: " + config.txt_file) dataset = TextDataset(config.txt_file, config.seq_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Initialize the device which to run the model on device = 'cuda:0' if torch.cuda.is_available() else 'cpu' writer.log("Device: " + device) # Initialize the model that we are going to use model = TextGenerationModel(config.lstm_num_embed, config.seq_length, dataset.vocab_size, config.lstm_num_hidden, config.lstm_num_layers, device) if config.checkpoint_path is not None: model = torch.load(config.checkpoint_path).to(device) writer.log("Model:\n" + str(model)) # Setup the loss and optimizer criterion = F.cross_entropy learning_rate = config.learning_rate optimizer = optim.RMSprop(model.parameters(), lr=learning_rate) epoch = 0 total_step = 0 while True: for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() batch_inputs = torch.LongTensor([x.tolist() for x in batch_inputs]).to(device) batch_targets = torch.LongTensor( [x.tolist() for x in batch_targets]).to(device) # Forward pass logits = model.forward(batch_inputs) # backprop optimizer.zero_grad() loss = criterion(logits.transpose(1, 2), batch_targets) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) optimizer.step() with torch.no_grad(): # accuracy = fraction of characters predicted correctly accuracy = (logits.argmax(dim=2) == batch_targets).to( dtype=torch.float).mean() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) # Learning rate decay if step % config.learning_rate_step == 0 and step != 0: learning_rate *= config.learning_rate_decay writer.log("Reduced learning rate: {}".format(learning_rate)) for g in optimizer.param_groups: g['lr'] = learning_rate # Metrics and samples if step % config.print_every == 0: writer.log( "[{}] Epoch {:02d}, Train Step {:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), epoch, step, config.batch_size, examples_per_second, accuracy, loss)) writer.write( 'metrics', '{},{},{},{}'.format(total_step, accuracy, loss, learning_rate)) if step % config.sample_every == 0: writer.log("Generating sentences") writer.write('samples', 'ITER{}'.format(step)) for temp in [0, .5, 1, 2]: writer.log("\nTemperature: {}".format(temp)) writer.write('samples', 'T{}'.format(temp)) for i in np.random.choice(dataset.vocab_size, size=5): text = dataset.convert_to_string( model.predict([i], 100, temp)).replace("\n", "<br>") writer.log(text) writer.write('samples', text) for string in [ "1:1. In the beginning God created", "1:5. And he called the light Day, and the darkness", "7:1. And the Lord said to him:", "Genesis Chapter 7" ]: text = dataset.convert_to_string( model.predict(dataset.convert_to_id(string), 100, temp)).replace("\n", "<br>") writer.log(text) writer.write('samples', text) if step % config.checkpoint_every == 0: writer.save_model(model, step) # if step == config.train_steps: # # If you receive a PyTorch data-loader error, check this bug report: # # https://github.com/pytorch/pytorch/pull/9655 # break total_step += 1 epoch += 1 writer.log('Done training.')
def train(config, lr): # Initialize the device which to run the model on #device = torch.device(config.device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) # fixme data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Initialize the model that we are going to use model = TextGenerationModel( batch_size=config.batch_size, seq_length=config.seq_length, vocabulary_size=dataset.vocab_size, lstm_num_hidden=config.lstm_num_hidden, lstm_num_layers=config.lstm_num_layers) # fixme if torch.cuda.is_available(): device = 'cuda' else: device = 'cpu' print('Currently using: ', device) model = model.to(device) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() # fixme #optimizer = torch.optim.Adam(model.parameters(), lr = config.learning_rate, amsgrad = True) # fixme #optimizer = torch.optim.Adam(model.parameters(), lr = lr, amsgrad = True) acc_list = [] loss_list = [] test_batches_in = [] test_batches_ta = [] test_acc = [] best_accuracy = 0 ### Flag for temperature temp = True temp_value = 2 for runs in range(3): optimizer = torch.optim.RMSprop(model.parameters(), lr=lr) for step, (batch_inputs, batch_targets) in enumerate(data_loader): if step % config.print_every != 0 or step == 0: t1 = time.time() #print(type(step)) #model.train() ####################################################### torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) zerox = create_zerox(batch_inputs, dataset.vocab_size, device) output, _ = model.forward(zerox) #.to(device) targets = torch.stack(batch_targets).to(device) output_indices = torch.argmax(output, dim=2).to(device) output = output.transpose(0, 1).transpose(1, 2).to(device) #print(output.shape, targets.shape) #return 'a' #print(output.transpose(0,2).shape, targets.t().shape) #return 'a' loss_for_backward = criterion(output.transpose(0, 2), targets.t()).to(device) optimizer.zero_grad() loss_for_backward.backward() optimizer.step() correct_indices = output_indices == targets.transpose( 0, 1).to(device) #return correct_indices ####################################################### #loss = criterion.forward(output, targets) #accuracy = int(sum(sum(correct_indices)))/int(correct_indices.shape[0]* #correct_indices.shape[1]) #print(type(accuracy),type(loss)) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % config.print_every == 0 and step != 0: #model.eval() zerox = create_zerox(batch_inputs, dataset.vocab_size, device) output, _ = model.forward(zerox) output_indices = torch.argmax(output, dim=2).to(device) output = output.transpose(0, 1).transpose(1, 2).to(device) targets = torch.stack(batch_targets).to(device) #loss_for_backward = criterion(output,targets).to(device) loss_for_backward = criterion(output.transpose(0, 2), targets.t()).to(device) correct_indices = output_indices == targets.transpose( 0, 1) #.to(device) #return output_indices, targets.transpose(0,1) #print(correct_indices.shape) #accuracy = sum(acc_list) / len(acc_list) #accuracy = int(sum(sum(correct_indices)))/int(correct_indices.numel()) accuracy = np.array(correct_indices.detach().cpu()).mean() #print("[{}] Train Step {:04d}/{:f}, Batch Size = {}, Examples/Sec = {:.2f}, " # "Accuracy = {:.2f}, Loss = {:.3f}".format( # datetime.now().strftime("%Y-%m-%d %H:%M"), step, # config.train_steps, config.batch_size, examples_per_second, # accuracy, # loss_for_backward #)) acc_list.append(accuracy) loss_list.append(float(loss_for_backward)) if accuracy > best_accuracy: torch.save( { 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict() }, 'model.pth') if step % config.sample_every == 0: # Generate some sentences by sampling from the model ## Generate a good sample instead of the same one over and over again #model.eval() ### Append every modulo batch to a list of test batches and run ### over that list to test zerox = create_zerox(batch_inputs, dataset.vocab_size, device) test_batches_in.append(zerox) targets = torch.stack(batch_targets).to(device) test_batches_ta.append(targets) batch_inputz = torch.stack(batch_inputs).to(device) batch_input = batch_inputz.transpose(1, 0).to(device) output, _ = model.forward(zerox) #.to(device) output_indices = torch.argmax(output, dim=2).to(device) output = output.transpose(0, 1).transpose(1, 2).to(device) loss_for_backward = criterion(output, targets).to(device) correct_indices = output_indices == targets.transpose( 0, 1).to(device) best_sample = np.argmax( np.asarray(sum(correct_indices.t().detach().cpu()))) print( 'Real: ', dataset.convert_to_string( np.asarray(batch_input[best_sample].cpu()))) output, _ = model.forward(zerox) #.to(device) output_indices = torch.argmax(output, dim=2).to(device) print( 'prediction: ', dataset.convert_to_string( np.asarray(output_indices[best_sample].cpu()))) bc = int(sum(correct_indices.t().detach().cpu()) [best_sample]) / config.seq_length print('This sample had:', bc, 'characters right') output = np.random.randint(dataset.vocab_size) letters = [output] greedy_output = np.random.randint(dataset.vocab_size) greedy_letters = [greedy_output] Temperature_time(runs, step, dataset, device, model) for i in range(config.seq_length - 1): #if temp: # ============================================================================= # # soft = torch.nn.Softmax(dim=2) # # # # # zerol = torch.zeros([1,1,dataset.vocab_size]) # one_hot_letter = torch.tensor(output).unsqueeze(-1).unsqueeze(-1).unsqueeze(-1) # zerol.scatter_(2,one_hot_letter,1) # zerol = zerol.to(device) # if i == 0: # output, h = model.forward(zerol) # # else: # output, h = model.forward(zerol, h) # # tempered = soft(output/temp_value) # #print(tempered) # output = int(torch.multinomial(tempered[0][0],1).detach().cpu()) # #print(output) # letters.append(output) # ============================================================================= greedy_zerol = torch.zeros([1, 1, dataset.vocab_size]) greedy_one_hot_letter = torch.tensor( greedy_output).unsqueeze(-1).unsqueeze(-1).unsqueeze( -1) greedy_zerol.scatter_(2, greedy_one_hot_letter, 1) greedy_zerol = greedy_zerol.to(device) if i == 0: greedy_output, greedy_h = model.forward(greedy_zerol) else: greedy_output, greedy_h = model.forward( greedy_zerol, greedy_h) greedy_output = int( torch.argmax(greedy_output, dim=2).detach().cpu()) greedy_letters.append(greedy_output) print('Greedy Generation ', dataset.convert_to_string(greedy_letters)) abs_step = (runs * 10000) + step line = ' '.join(('Step:', str(abs_step), dataset.convert_to_string(letters))) with open('GreedyGeneration.txt', 'a') as file: file.write(line + '\n') # ============================================================================= # if step % (config.sample_every*1000) ==0: # avg = [] # print('Testing over ', len(test_batches_in), 'batches') # for z in range(len(test_batches_in)): # ##OUTPUT # output,_ = model.forward(test_batches_in[z]) # output_indices = torch.argmax(output, dim=2).to(device) # output = output.transpose(0,1).transpose(1,2).to(device) # # ##LOSS AND ACCURACY # loss_for_backward = criterion(output,targets).to(device) # correct_indices = output_indices == test_batches_ta[z].transpose(0,1).to(device) # # accuracy = int(sum(sum(correct_indices)))/int(correct_indices.shape[0]* # correct_indices.shape[1]) # # avg.append(accuracy) # # this_test_acc = sum(avg)/len(avg) # print('The test accuracy over ',len(test_batches_in), 'is: ', this_test_acc) # test_acc.append(this_test_acc) # #if bc > 0.8: # # print(bc) # # #return correct_indices # # ============================================================================= if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.') line = ' '.join( ('Test accuracy:', str(test_acc.append), 'Learning rate:', str(lr), 'Accuracy:', str(acc_list), 'Loss:', str(loss_list))) with open('textresults.txt', 'a') as file: file.write(line + '\n') #hiddenstates = [None]*30 output = np.random.randint(dataset.vocab_size) letters = [output] for i in range(400): zerol = torch.zeros([1, 1, dataset.vocab_size]) one_hot_letter = torch.tensor(output).unsqueeze(-1).unsqueeze( -1).unsqueeze(-1) zerol.scatter_(2, one_hot_letter, 1) zerol = zerol.to(device) if i == 0: output, h = model.forward(zerol) output = int(torch.argmax(output, dim=2).detach().cpu()) letters.append(output) #hiddenstates[i] = h else: output, h = model.forward(zerol, h) output = int(torch.argmax(output, dim=2).detach().cpu()) letters.append(output) #hiddenstates[i % 30] = h print('Final generation: ', dataset.convert_to_string(letters)) line = ' '.join(('Accuracy:', str(acc_list), 'Loss', str(loss_list))) with open('PrideAndPrejudice2.txt', 'a') as file: file.write(line + '\n')
output,h = string_generator(pre,zerol,model) soft = torch.nn.Softmax(dim=2) zerol = torch.zeros([1,1,dataset.vocab_size]) one_hot_letter = torch.tensor(output).unsqueeze(-1).unsqueeze(-1).unsqueeze(-1) zerol.scatter_(2,one_hot_letter,1) zerol = zerol.to(device) if i == 0: output, h = model.forward(zerol) else: output, h = model.forward(zerol, h) tempered = soft(output/temp_value) #print(tempered) output = int(torch.multinomial(tempered[0][0],1).detach()) #print(output) letters.append(output) the_string = dataset.convert_to_string(letters) abs_step = 1 line = ' '.join(('Step:',str(abs_step),'Temperature:' ,str(temp_value), 'Text:',the_string))