def setup(self): (txt_file, seq_length, gen_length, lstm_num_hidden, lstm_num_layers, batch_size, learning_rate, learning_rate_decay, learning_rate_step, dropout_keep_prob, train_steps, max_norm, summary_path, print_every, sample_every, wanted_device, temperature) = itemgetter(*flags)(vars(self.config)) # Initialize the device which to run the model on self.device = torch.device(wanted_device) # Initialize the dataset and data loader (note the +1) self.dataset = TextDataset(txt_file, seq_length) self.data_loader = DataLoader(self.dataset, batch_size, num_workers=1) self.vocabulary_size = self.dataset.vocab_size # Initialize the model that we are going to use dropout = 1 - dropout_keep_prob model_path = txt_file + '.pt' self.model = \ torch.load(model_path) \ if os.path.exists(model_path) else \ TextGenerationModel(batch_size, seq_length, self.vocabulary_size, lstm_num_hidden, lstm_num_layers, self.device) # , dropout self.model.to(self.device) # Setup the loss and optimizer # cross-entropy loss can be computed by averaging over all timesteps using the target labels y(t) self.criterion = torch.nn.CrossEntropyLoss(reduction='mean') self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate) gamma = 1 - learning_rate_decay self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, step_size=learning_rate_step, gamma=gamma)
def train(config): # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(...) # fixme data_loader = DataLoader(dataset, config.batch_size) # Initialize the model that we are going to use model = TextGenerationModel(...) # FIXME # Setup the loss and optimizer criterion = None # FIXME optimizer = None # FIXME for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() ####################################################### # Add more code here ... ####################################################### loss = np.inf # fixme accuracy = 0.0 # fixme # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) if (step + 1) % config.print_every == 0: print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, \ Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss )) if (step + 1) % config.sample_every == 0: # Generate some sentences by sampling from the model pass if step == config.train_steps: # If you receive a PyTorch data-loader error, # check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.')
def infer(): flags = parse_flags() flag_keys = ['checkpoint_path', 'txt_file', 'length', 'device', 'temperature'] (checkpoint_path, txt_file, length, wanted_device, temperature) = itemgetter(*flag_keys)(vars(flags)) # init with dummy values to load in checkpoint dataset = TextDataset(txt_file, 1) device = torch.device(wanted_device) model = torch.load(checkpoint_path) while True: start = input('Please input your text to complete:\n') chars = [dataset._char_to_ix[char] for char in start] s = generate(model, dataset, length, device, chars, temperature) print(s)
def train(config): # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Initialize the model that we are going to use model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size, config.lstm_num_hidden, config.lstm_num_layers, device) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = optim.RMSprop(model.parameters(), config.learning_rate) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=config.learning_rate_step, gamma=config.learning_rate_decay) accuracy_train = [] loss_train = [] if config.samples_out_file != "STDOUT": samples_out_file = open(config.samples_out_file, 'w') epochs = config.train_steps // len(data_loader) + 1 print( "Will train on {} batches in {} epochs, max {} batches/epoch.".format( config.train_steps, epochs, len(data_loader))) for epoch in range(epochs): data_loader_iter = iter(data_loader) if epoch == config.train_steps // len(data_loader): batches = config.train_steps % len(data_loader) else: batches = len(data_loader) for step in range(batches): batch_inputs, batch_targets = next(data_loader_iter) model.zero_grad() # Only for time measurement of step through network t1 = time.time() batch_inputs = F.one_hot( batch_inputs, num_classes=dataset.vocab_size, ).float().to(device) batch_targets = batch_targets.to(device) optimizer.zero_grad() pred, _ = model.forward(batch_inputs) loss = criterion(pred.transpose(2, 1), batch_targets) accuracy = acc( pred.transpose(2, 1), F.one_hot(batch_targets, num_classes=dataset.vocab_size).float(), dataset.vocab_size) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) optimizer.step() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) scheduler.step() if (epoch * len(data_loader) + step + 1) % config.seval_every == 0: accuracy_train.append(accuracy) loss_train.append(loss.item()) if (epoch * len(data_loader) + step + 1) % config.print_every == 0: print( "[{}] Epoch: {:04d}/{:04d}, Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), epoch + 1, epochs, (epoch * len(data_loader) + step + 1), config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) if (epoch * len(data_loader) + step + 1) % config.sample_every == 0: with torch.no_grad(): codes = [] input_tensor = torch.zeros((1, 1, dataset.vocab_size), device=device) input_tensor[0, 0, np.random.randint(0, dataset.vocab_size)] = 1 for i in range(config.seq_length - 1): response = model.step(input_tensor) logits = F.log_softmax(config.temp * response, dim=1) dist = torch.distributions.one_hot_categorical.OneHotCategorical( logits=logits) code = dist.sample().argmax().item() input_tensor *= 0 input_tensor[0, 0, code] = 1 codes.append(code) string = dataset.convert_to_string(codes) model.reset_stepper() if config.samples_out_file != "STDOUT": samples_out_file.write("Step {}: ".format( epoch * len(data_loader) + step + 1) + string + "\n") else: print(string) if config.samples_out_file != "STDOUT": samples_out_file.close() if config.model_out_file != None: torch.save(model, config.model_out_file) if config.curves_out_file != None: import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 2, figsize=(10, 5)) fig.suptitle( 'Training curves for Pytorch 2-layer LSTM.\nFinal loss: {:.4f}. Final accuracy: {:.4f}\nSequence length: {}, Hidden units: {}, LSTM layers: {}, Learning rate: {:.4f}' .format(loss_train[-1], accuracy_train[-1], config.seq_length, config.lstm_num_hidden, config.lstm_num_layers, config.learning_rate)) plt.subplots_adjust(top=0.8) ax[0].set_title('Loss') ax[0].set_ylabel('Loss value') ax[0].set_xlabel('No of batches seen x{}'.format(config.seval_every)) ax[0].plot(loss_train, label='Train') ax[0].legend() ax[1].set_title('Accuracy') ax[1].set_ylabel('Accuracy value') ax[1].set_xlabel('No of batches seen x{}'.format(config.seval_every)) ax[1].plot(accuracy_train, label='Train') ax[1].legend() plt.savefig(config.curves_out_file) print('Done training.')
def train(_run): config = argparse.Namespace(**_run.config) # Initialize the device device = torch.device(config.device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) total_samples = int(config.train_steps * config.batch_size) sampler = RandomSampler(dataset, replacement=True, num_samples=total_samples) data_sampler = BatchSampler(sampler, config.batch_size, drop_last=False) data_loader = DataLoader(dataset, num_workers=1, batch_sampler=data_sampler) # Initialize the model that we are going to use model = TextGenerationModel(dataset.vocab_size, config.lstm_num_hidden, config.lstm_num_layers).to(device) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() # Prepare data batch_inputs = torch.stack(batch_inputs).to(device) batch_targets = torch.stack(batch_targets).t().to(device) # Forward, backward, optimize optimizer.zero_grad() logits = model(batch_inputs) batch_loss = criterion(logits, batch_targets) batch_loss.backward() optimizer.step() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % config.print_every == 0: accuracy = eval_accuracy(logits, batch_targets) loss = batch_loss.item() log_str = ("[{}] Train Step {:04d}/{:04d}, " "Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}") print( log_str.format(datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) _run.log_scalar('loss', loss, step) _run.log_scalar('acc', accuracy, step) if step % config.sample_every == 0: # Generate some sentences by sampling from the model print('-' * (config.sample_length + 1)) x0 = torch.randint(low=0, high=dataset.vocab_size, size=(1, 5)) samples = model.sample(x0, config.sample_length).detach().cpu() samples = samples.numpy() for sample in samples: print(dataset.convert_to_string(sample)) print('-' * (config.sample_length + 1)) if step == config.train_steps: break print('Done training.') ckpt_path = os.path.join(SAVE_PATH, str(config.timestamp) + '.pt') torch.save( { 'state_dict': model.state_dict(), 'hparams': model.hparams, 'ix_to_char': dataset.ix_to_char }, ckpt_path) print('Saved checkpoint to {}'.format(ckpt_path))
def train(config): # Initialize the device which to run the model on device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Initialize the model that we are going to use model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size, config.lstm_num_hidden, config.lstm_num_layers, device) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() ####################################################### model_out = model.forward(batch_inputs) loss = criterion(model_out, batch_targets) optimizer.zero_grad() loss.backward() ####################################################### torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) optimizer.step() loss = loss.item() accuracy = np.average((torch.max(model_out, 1)[1] == batch_targets)) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % config.print_every == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) if step == config.sample_every: # Generate some sentences by sampling from the model pass if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.')
def train(config): # Initialize the device which to run the model on if torch.cuda.is_available(): dev = "cuda:0" else: dev = "cpu" # Initialize the device which to run the model on device = torch.device(dev) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) # fixme data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Initialize the model that we are going to use model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size, config.lstm_num_hidden, config.lstm_num_layers, device).to(device) # fixme # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() # fixme optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) # fixme total_steps = 0 training_summary = [['Total steps', 'Accuracy', 'Loss']] sampling_summary = [['Total steps', 'Sentence']] while config.train_steps > total_steps: for step, (batch_inputs, batch_targets) in enumerate(data_loader): total_steps += 1 if total_steps > config.train_steps: break batch_inputs = batch_inputs.to(device) batch_targets = batch_targets.to(device) # Only for time measurement of step through network t1 = time.time() ####################################################### # Add more code here ... ####################################################### batch_inputs = torch.nn.functional.one_hot(batch_inputs, dataset.vocab_size) optimizer.zero_grad() output = model.forward(batch_inputs) torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) loss = 0.0 for i in range(len(output[0])): pred = output[:, i, :] target = batch_targets[:, i] loss += criterion.forward(pred, target) / len(output[0]) loss.backward() optimizer.step() with torch.no_grad(): accuracy = 0.0 total_size = 0 correct = 0 for i in range(len(output[0])): pred = torch.nn.functional.softmax(output[:, i, :], dim=1) pred = torch.max(pred, 1)[1] correct += pred.eq(batch_targets[:, i]).sum().item() total_size += len(pred) accuracy = correct / total_size # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if total_steps % config.print_every == 0: training_summary.append( [total_steps, accuracy, loss.item()]) print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), total_steps, int(config.train_steps), config.batch_size, examples_per_second, accuracy, loss)) if total_steps % config.sample_every == 0: # Generate some sentences by sampling from the model text = torch.zeros( (1, 1)).long().random_(0, dataset.vocab_size).to(device) text = torch.nn.functional.one_hot(text, dataset.vocab_size) temprature = config.temprature if config.temprature is not None else 1 for i in range(config.seq_length - 1): prediction = model.forward(text) pred = torch.nn.functional.softmax(temprature * prediction[:, i, :], dim=1) if config.temprature is not None: m = torch.distributions.categorical.Categorical( pred) pred = m.sample() else: pred = torch.max(pred, 1)[1] pred = torch.nn.functional.one_hot( pred, dataset.vocab_size) pred = pred.unsqueeze(0) text = torch.cat((text, pred), 1) stuff = torch.argmax(text[0], 1) sentence = dataset.convert_to_string(stuff.tolist()) print(sentence) sampling_summary.append([total_steps, sentence]) if total_steps == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.') print('Storing data') if not os.path.exists(config.summary_path): os.makedirs(config.summary_path) training_summary = pd.DataFrame(training_summary) sampling_summary = pd.DataFrame(sampling_summary) training_summary.to_csv(config.summary_path + "training_summary.csv", header=False, index=False, sep=';') sampling_summary.to_csv(config.summary_path + "sampling_summary.csv", header=False, index=False, sep=';') print('Finished')
def train(config): # Initialize the device which to run the model on # device = torch.device(config.device) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') dataset = TextDataset(filename=config.txt_file, seq_length=config.seq_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) VOCAB_SIZE = dataset.vocab_size CHAR2IDX = dataset._char_to_ix IDX2CHAR = dataset._ix_to_char # Initialize the model that we are going to use model = TextGenerationModel(batch_size=config.batch_size, seq_length=config.seq_length, vocabulary_size=VOCAB_SIZE, lstm_num_hidden=config.lstm_num_hidden, lstm_num_layers=config.lstm_num_layers, device=device) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate) scheduler = scheduler_lib.StepLR(optimizer=optimizer, step_size=config.learning_rate_step, gamma=config.learning_rate_decay) if True: model.load_state_dict( torch.load('grimm-results/intermediate-model-epoch-30-step-0.pth', map_location='cpu')) optimizer.load_state_dict( torch.load("grimm-results/intermediate-optim-epoch-30-step-0.pth", map_location='cpu')) print("Loaded it!") model = model.to(device) EPOCHS = 50 for epoch in range(EPOCHS): # initialization of state that's given to the forward pass # reset every epoch h, c = model.reset_lstm(config.batch_size) h = h.to(device) c = c.to(device) for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() model.train() optimizer.zero_grad() x = torch.stack(batch_inputs, dim=1).to(device) if x.size()[0] != config.batch_size: print("We're breaking because something is wrong") print("Current batch is of size {}".format(x.size()[0])) print("Supposed batch size is {}".format(config.batch_size)) break y = torch.stack(batch_targets, dim=1).to(device) x = one_hot_encode(x, VOCAB_SIZE) output, (h, c) = model(x=x, prev_state=(h, c)) loss = criterion(output.transpose(1, 2), y) accuracy = calculate_accuracy(output, y) h = h.detach() c = c.detach() loss.backward() # add clipping torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) optimizer.step() scheduler.step() # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % config.print_every == 0: #TODO FIX THIS PRINTING print( f"Epoch {epoch} Train Step {step}/{config.train_steps}, Examples/Sec = {examples_per_second}, Accuracy = {accuracy}, Loss = {loss}" ) # # print("[{}]".format(datetime.now().strftime("%Y-%m-%d %H:%M"))) # print("[{}] Train Step {:04f}/{:04f}, Batch Size = {}, Examples/Sec = {:.2f}, Accuracy = {:.2f}, Loss = {:.3f}".format( # datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss # )) # print(loss) if step % config.sample_every == 0: FIRST_CHAR = 'I' # Is randomized within the prediction, actually predict(device, model, FIRST_CHAR, VOCAB_SIZE, IDX2CHAR, CHAR2IDX) # Generate some sentences by sampling from the model path_model = 'intermediate-model-epoch-{}-step-{}.pth'.format( epoch, step) path_optimizer = 'intermediate-optim-epoch-{}-step-{}.pth'.format( epoch, step) torch.save(model.state_dict(), path_model) torch.save(optimizer.state_dict(), path_optimizer) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.')
def train(config): seed = 42 torch.manual_seed(seed) np.random.seed(seed) # Initialize the device which to run the model on device = torch.device(config.device) writer = SummaryWriter() seq_length = config.seq_length batch_size = config.batch_size lstm_num_hidden = config.lstm_num_hidden lstm_num_layers = config.lstm_num_layers dropout_keep_prob = config.dropout_keep_prob # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, seq_length) data_loader = DataLoader(dataset, batch_size, num_workers=1) vocab_size = dataset.vocab_size # Initialize the model that we are going to use model = TextGenerationModel(batch_size, seq_length, vocab_size, lstm_num_hidden, lstm_num_layers, dropout_keep_prob, device) model.to(device) # Setup the loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) lr_scheduler = optim.lr_scheduler.StepLR(optimizer, config.learning_rate_step, config.learning_rate_decay) for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() ####################################################### # Add more code here ... ####################################################### # To onehot represetation of input or embedding => decided for embedding # batch_inputs = F.one_hot(batch_inputs, vocab_size).type(torch.FloatTensor).to(device) batch_inputs = batch_inputs.to(device) batch_targets = batch_targets.to(device) train_output, _ = model.forward(batch_inputs) loss = criterion(train_output, batch_targets) accuracy = torch.sum( torch.eq(torch.argmax(train_output, dim=1), batch_targets)).item() / (batch_targets.size(0) * batch_targets.size(1)) writer.add_scalar('Loss/train', loss.item(), step) writer.add_scalar('Accuracy/train', accuracy, step) optimizer.zero_grad() loss.backward() optimizer.step() lr_scheduler.step(step) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if step % config.print_every == 0: print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, int(config.train_steps), config.batch_size, examples_per_second, accuracy, loss)) if step % config.sample_every == 0: # Generate some sentences by sampling from the model sample_from_model(config, step, model, dataset) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.') torch.save(model, "trained_model_part2.pth") writer.close()
def train(config): def acc(predictions, targets): accuracy = (predictions.argmax(dim=2) == targets).float().mean() return accuracy # Initialize the device which to run the model on device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("Device", device) print("book:", config.txt_file) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Initialize the model that we are going to use model = TextGenerationModel(config.batch_size, config.seq_length, dataset._vocab_size, config.lstm_num_hidden, config.lstm_num_layers, device).to(device) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate) gen_lengths = [20, 30, 100, 200] print("temperature:", config.temperature_int) all_accuracies = [] all_losses = [] all_train_steps = [] for step, (batch_inputs, batch_targets) in enumerate(data_loader): batch_inputs = (torch.arange(dataset._vocab_size) == batch_inputs[..., None]) # create one-hot # Only for time measurement of step through network t1 = time.time() # set the data to device batch_inputs = batch_inputs.float().to(device) batch_targets = batch_targets.to(device) out, _ = model.forward(batch_inputs) # forward pass loss = criterion(out.permute(0, 2, 1), batch_targets) # calculate the loss accuracy = acc(out, batch_targets) # calculate the accuracy optimizer.zero_grad() # throw away previous grads loss.backward() # calculate new gradients torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm) # make sure the gradients do not explode optimizer.step() # update the weights # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) if step % config.print_every == 0: print("[{}] Train Step {:04d}/{:04f}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss )) all_accuracies.append(accuracy.item()) all_losses.append(loss.item()) all_train_steps.append(step) if step % config.sample_every == 0: for gen_length in gen_lengths: print("Generated sentence with length of {}".format(gen_length)) previous = random.randint(0, dataset._vocab_size - 1) # get the first random letter letters = [previous] cell = None for i in range(gen_length): # create input input = torch.zeros(1, 1, dataset._vocab_size).to(device) input[0, 0, previous] = 1 # do a forward pass out, cell = model.forward(input, cell) # get the next letter out = out.squeeze() if config.temperature is True: out *= config.temperature_int out = torch.softmax(out, dim=0) previous = torch.multinomial(out, 1)[0].item() else: previous = out.argmax().item() letters.append(previous) # convert to sentence sentence = dataset.convert_to_string(letters) print(sentence) if step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break with open("acc_loss_T_{}.txt".format(config.temperature_int), "w") as output: output.write("accuracies \n") output.write(str(all_accuracies) + "\n") output.write("losses \n") output.write(str(all_losses) + "\n") output.write("train steps \n") output.write(str(all_train_steps) + "\n") print('Done training.')
def train(config): # Initialize the device which to run the model on #device = torch.device(config.device) # Initialize the dataset and data loader (note the +1) dataset = TextDataset(config.txt_file, config.seq_length) # fixme data_loader = DataLoader(dataset, config.batch_size, num_workers=1) #print(dataset._char_to_ix) vocabulary order changes, but batches are same sentence examples with the seeds earlier. # Initialize the model that we are going to use model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size, config.lstm_num_hidden, config.lstm_num_layers, config.device) # fixme device = model.device model = model.to(device) # Setup the loss and optimizer criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) print("Len dataset:", len(dataset)) print("Amount of steps for dataset:", len(dataset) / config.batch_size) current_step = 0 not_max = True list_train_acc = [] list_train_loss = [] acc_average = [] loss_average = [] file = open("sentences.txt", 'w', encoding='utf-8') ''' file_greedy = open("sentences_greedy.txt",'w',encoding='utf-8') file_tmp_05 = open("sentences_tmp_05.txt", 'w', encoding='utf-8') file_tmp_1 = open("sentences_tmp_1.txt", 'w', encoding='utf-8') file_tmp_2 = open("sentences_tmp_2.txt", 'w', encoding='utf-8') ''' while not_max: for (batch_inputs, batch_targets) in data_loader: # Only for time measurement of step through network t1 = time.time() ####################################################### # Add more code here ... #List of indices from word to ID, that is in dataset for embedding #Embedding lookup embed = model.embed #Embeding shape(dataset.vocab_size, config.lstm_num_hidden) #Preprocess input to embeddings to give to LSTM all at once all_embed = [] #sentence = [] for batch_letter in batch_inputs: batch_letter_to = batch_letter.to( device) #torch.tensor(batch_letter,device = device) embedding = embed(batch_letter_to) all_embed.append(embedding) #sentence.append(batch_letter_to[0].item()) all_embed = torch.stack(all_embed) #Print first example sentence of batch along with target #print(dataset.convert_to_string(sentence)) #sentence = [] #for batch_letter in batch_targets: # sentence.append(batch_letter[0].item()) #print(dataset.convert_to_string(sentence)) all_embed = all_embed.to(device) outputs = model( all_embed ) #[30,64,vocab_size] 87 last dimension for fairy tails ####################################################### #loss = np.inf # fixme #accuracy = 0.0 # fixme #For loss: ensuring that the prediction dim are batchsize x vocab_size x sequence length and targets: batchsize x sequence length batch_first_output = outputs.transpose(0, 1).transpose(1, 2) batch_targets = torch.stack(batch_targets).to(device) loss = criterion(batch_first_output, torch.t(batch_targets)) #Backpropagate model.zero_grad() loss.backward() loss = loss.item() torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm) optimizer.step() #Accuracy number_predictions = torch.argmax(outputs, dim=2) result = number_predictions == batch_targets accuracy = result.sum().item() / (batch_targets.shape[0] * batch_targets.shape[1]) '''' #Generate sentences for all settings on every step sentence_id = model.generate_sentence(config.gsen_length, -1) sentence = dataset.convert_to_string(sentence_id) #print(sentence) file_greedy.write( (str(current_step) + ": " + sentence + "\n")) sentence_id = model.generate_sentence(config.gsen_length, 0.5) sentence = dataset.convert_to_string(sentence_id) #print(sentence) file_tmp_05.write( (str(current_step) + ": " + sentence + "\n")) sentence_id = model.generate_sentence(config.gsen_length, 1) sentence = dataset.convert_to_string(sentence_id) #print(sentence) file_tmp_1.write( (str(current_step) + ": " + sentence + "\n")) sentence_id = model.generate_sentence(config.gsen_length, 2) sentence = dataset.convert_to_string(sentence_id) #print(sentence) file_tmp_2.write( (str(current_step) + ": " + sentence + "\n")) ''' if config.measure_type == 2: acc_average.append(accuracy) loss_average.append(loss) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size / float(t2 - t1) if current_step % config.print_every == 0: # Average accuracy and loss over the last print every step (5 by default) if config.measure_type == 2: accuracy = sum(acc_average) / config.print_every loss = sum(loss_average) / config.print_every acc_average = [] loss_average = [] # Either accuracy and loss on the print every interval or the average of that interval as stated above list_train_acc.append(accuracy) list_train_loss.append(loss) print( "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), current_step, config.train_steps, config.batch_size, examples_per_second, accuracy, loss)) elif config.measure_type == 0: # Track accuracy and loss for every step list_train_acc.append(accuracy) list_train_loss.append(loss) if current_step % config.sample_every == 0: # Generate sentence sentence_id = model.generate_sentence(config.gsen_length, config.temperature) sentence = dataset.convert_to_string(sentence_id) print(sentence) file.write((str(current_step) + ": " + sentence + "\n")) if current_step == config.train_steps: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 not_max = False break current_step += 1 # Close the file and make sure sentences en measures are saved file.close() pickle.dump((list_train_acc, list_train_loss), open("loss_and_train.p", "wb")) #Plot print(len(list_train_acc)) if config.measure_type == 0: eval_steps = list(range(config.train_steps + 1)) # Every step Acc else: # eval_steps = list( range(0, config.train_steps + config.print_every, config.print_every)) if config.measure_type == 2: plt.plot(eval_steps[:-1], list_train_acc[1:], label="Train accuracy") else: plt.plot(eval_steps, list_train_acc, label="Train accuracy") plt.xlabel("Step") plt.ylabel("Accuracy") plt.title("Training accuracy LSTM", fontsize=18, fontweight="bold") plt.legend() # plt.savefig('accuracies.png', bbox_inches='tight') plt.show() if config.measure_type == 2: plt.plot(eval_steps[:-1], list_train_loss[1:], label="Train loss") else: plt.plot(eval_steps, list_train_loss, label="Train loss") plt.xlabel("Step") plt.ylabel("Loss") plt.title("Training loss LSTM", fontsize=18, fontweight="bold") plt.legend() # plt.savefig('loss.png', bbox_inches='tight') plt.show() print('Done training.')
type=str, default="./summaries/", help='Output path for summaries') parser.add_argument('--print_every', type=int, default=5, help='How often to print training progress') parser.add_argument('--sample_every', type=int, default=100, help='How often to sample from the model') config = parser.parse_args() dataset = TextDataset( config.txt_file, config.seq_length) #'./part2/een_klein_heldendicht.txt', 10) # get a couple of sequance examples from batches data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # for step, (batch_inputs, batch_targets) in enumerate(data_loader): X_itarable = enumerate(data_loader) step, (X_transposed, y_transposed) = next(X_itarable) X_batch = torch.stack(X_transposed).t() Y_batch = torch.stack(y_transposed).t() # one-hot encode X = torch.zeros(len(X_batch), 30, dataset.vocab_size).scatter_(2, X_batch.unsqueeze(2), 1)
def sample(config): dataset = TextDataset(config.base_txt, config.seq_length) # Initialize the device which to run the model on device = torch.device(config.device) # Initialize the model that we are going to use model = torch.load(config.model_file, map_location=device) if config.samples_out_file != "STDOUT": samples_out_file = open(config.samples_out_file, 'w') if config.no_random != None: with torch.no_grad(): codes = [] for k in range(config.no_random): input_tensor = torch.zeros((1, 1, dataset.vocab_size), device=device) input_tensor[0, 0, np.random.randint(0, dataset.vocab_size)] = 1 for i in range(config.seq_length - 1): response = model.step(input_tensor) logits = F.log_softmax(config.temp * response, dim=1) dist = torch.distributions.one_hot_categorical.OneHotCategorical( logits=logits) code = dist.sample().argmax().item() input_tensor *= 0 input_tensor[0, 0, code] = 1 codes.append(code) string = dataset.convert_to_string(codes) model.reset_stepper() if config.samples_out_file != "STDOUT": samples_out_file.write("Sample {}: ".format(k) + string + "\n") else: print("Sample {}: ".format(k) + string) string = '' codes = [] elif config.sentence != None: with torch.no_grad(): codes = [] for char in config.sentence: codes.append(dataset._char_to_ix[char]) input_tensor = torch.zeros((1, len(codes), dataset.vocab_size), device=device) input_tensor[0, np.arange(0, len(codes), 1), codes] = 1 chars_to_gen = config.seq_length - len(codes) for i in range(len(codes)): response = model.step(input_tensor[:, i, :].view( 1, 1, dataset.vocab_size)) input_tensor = torch.zeros((1, 1, dataset.vocab_size), device=device) for i in range(chars_to_gen): logits = F.log_softmax(config.temp * response, dim=1) dist = torch.distributions.one_hot_categorical.OneHotCategorical( logits=logits) code = dist.sample().argmax().item() input_tensor *= 0 input_tensor[0, 0, code] = 1 codes.append(code) response = model.step(input_tensor) string = dataset.convert_to_string(codes) model.reset_stepper() if config.samples_out_file != "STDOUT": samples_out_file.write(string + "\n") else: print(string) else: with torch.no_grad(): codes = [] beams = [] for k in range(config.beam_width): beam_dict = {} beam_dict['hidden_state'] = None beam_dict['logit'] = -np.log(config.beam_width) beam_dict['seq_codes'] = [ np.random.randint(0, dataset.vocab_size) ] beams.append(beam_dict) input_tensor = torch.zeros((1, 1, dataset.vocab_size), device=device) import copy for i in range(config.seq_length): new_beams = [] for element in beams: input_tensor *= 0 input_tensor[0, 0, element['seq_codes'][-1]] = 1.0 response, hid = model.forward(input_tensor, element['hidden_state']) logits = F.log_softmax(config.temp * response, dim=2) for code, logit in enumerate(logits[0, 0, :]): new_dict = copy.deepcopy(element) new_dict['hidden_state'] = hid new_dict['seq_codes'].append(code) new_dict['logit'] += logit.item() new_beams.append(new_dict) new_beams.sort(reverse=True, key=lambda dic: dic['logit']) beams = new_beams[:config.beam_width] for beam in beams: string = dataset.convert_to_string(beam['seq_codes']) if config.samples_out_file != "STDOUT": samples_out_file.write(string + "\n") else: print(string)
def train(config): print(config.train_steps) device = torch.device(config.device) dataset = TextDataset(config.txt_file, config.seq_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size, config.lstm_num_hidden, config.lstm_num_layers, device) criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=config.learning_rate) generated_text = [] for epochs in range(10): for step, (batch_inputs, batch_targets) in enumerate(data_loader): # Only for time measurement of step through network t1 = time.time() x = torch.stack(batch_inputs, dim=1).to(device) # one hot encodded_size = list(x.shape) encodded_size.append(dataset.vocab_size) one_hot = torch.zeros(encodded_size, device=x.device) one_hot.scatter_(2, x.unsqueeze(-1), 1) targets = torch.stack(batch_targets, dim=1).to(device) ####################################################### predictions = model.forward(one_hot) loss = criterion(predictions.transpose(2, 1), targets) loss.backward() ####################################################### optimizer.step() optimizer.zero_grad() loss = loss.item() size = targets.shape[0] * targets.shape[1] accuracy = torch.sum(predictions.argmax(dim=2) == targets).to(torch.float32) / size # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) if step % config.print_every == 0: print("examples per sec " + str(examples_per_second)+" step "+str(step)+" accuracy "+str(accuracy.item()) +" loss "+str(loss)) # print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, " # "Accuracy = {:.2f}, Loss = {:.3f}".format( # datetime.now().strftime("%Y-%m-%d %H:%M"), step, # config.train_steps, config.batch_size, examples_per_second, # accuracy, loss # )) # Generate some sentences by sampling from the model random_seed = torch.randint(low=0, high=dataset.vocab_size, size=(1, 1), dtype=torch.long, device=device) text_fifteen, text, temp_nine, temp_five, temp_one = generator(model=model, seed=random_seed, length=config.seq_length, dataset=dataset) generated_text.append(text_fifteen) generated_text.append(text) generated_text.append(temp_nine) generated_text.append(temp_five) generated_text.append(temp_one) print("temp 1.5: " + generated_text[-5]) print("temp 1: " + generated_text[-4]) print("temp 0.9: " + generated_text[-3]) print("temp 0.5: " + generated_text[-2]) print("temp 0.2: " + generated_text[-1]) print("") file = open("generated.txt", "a") file.write("beta 1.5: " + generated_text[-5] + "\n") file.write("beta 1: " + generated_text[-4] + "\n") file.write("beta 0.9: " + generated_text[-3] + "\n") file.write("beta 0.5: " + generated_text[-2] + "\n") file.write("beta 0.2: " + generated_text[-1] + "\n") file.write("") file.close() if step == config.sample_every: # Generate some sentences by sampling from the model pass if step == 30000: # If you receive a PyTorch data-loader error, check this bug report: # https://github.com/pytorch/pytorch/pull/9655 break print('Done training.')
def train(config): """ """ # some additional vars learning_rate = config.learning_rate # TODO: Initialize the device which to run the model on device = 'cpu' device = torch.device(device) dataset = TextDataset(config.txt_file, config.seq_length) data_loader = DataLoader(dataset, config.batch_size, num_workers=1) # Initialize the model that we are going to use model = TextGenerationModel(vocabulary_size=dataset.vocab_size, device='cpu', **config.__dict__) criterion = nn.CrossEntropyLoss() optimizer = optim.RMSprop(model.parameters(), lr=learning_rate) # evaluation loss_list = list() accuracy_list = list() mean_loss_list = list() mean_accuracy_list = list() step = 0 epoch = 0 steps_total = 0 text_greedy_generated = dict() text_random_generated = dict() while steps_total < config.train_steps: epoch += 1 for step, (X_transposed, y_transposed) in enumerate(data_loader): steps_total = step * epoch # Only for time measurement of step through network t1 = time.time() X_batch = torch.stack(X_transposed).t() Y_batch = torch.stack(y_transposed).t() X = X_batch.to(device) y = Y_batch.to(device) X = torch.zeros(len(X), config.seq_length, dataset.vocab_size).scatter_(2, X.unsqueeze(2), 1) optimizer.zero_grad() outputs = model.forward(X).type(dtype) # Add more code here ... loss_current = criterion(outputs.transpose(2, 1), y) loss_current.backward(retain_graph=True) optimizer.step() # evaluation loss = loss_current.detach().item() accuracy = (outputs.argmax(dim=2) == y.long()).sum().float() / (float(y.shape[0]) * float(y.shape[1])) # Just for time measurement t2 = time.time() examples_per_second = config.batch_size/float(t2-t1) loss_list.append(loss) accuracy_list.append(accuracy) if step % config.print_every == 0: mean_loss_list.append(np.mean(loss_list[-50:])) mean_accuracy_list.append(np.mean(accuracy_list[-50:])) print("[{}] Train Step {}/{}, Batch Size = {}, Examples/Sec = {:.2f}, " "Accuracy = {:.2f}, Loss = {:.3f}".format( datetime.now().strftime("%Y-%m-%d %H:%M"), steps_total, config.train_steps, config.batch_size, examples_per_second, accuracy, loss )) # Text generation if step % config.sample_every == 0: # Generate some sentences by sampling from the model text_greedy, text_random = text_generator(model, config.seq_length, 0.2, dataset, device) text_greedy_generated[len(mean_accuracy_list)] = text_greedy text_random_generated[len(mean_accuracy_list)] = text_random print(text_greedy, len(text_greedy)) print(text_random, len(text_random)) # if step == config.train_steps: # # If you receive a PyTorch data-loader error, check this bug report: # # https://github.com/pytorch/pytorch/pull/9655 if step > config.train_steps: break print('Done training.') return mean_loss_list, mean_accuracy_list, text_greedy_generated, text_random_generated