Ejemplo n.º 1
0
def eval(config):
    use_cuda = torch.cuda.is_available()
    if use_cuda:
        device = torch.device('cuda:0')
    else:
        device = torch.device('cpu')
    # Initialize the device which to run the model on
    device = torch.device(device)
    dtype = torch.cuda.LongTensor if use_cuda else torch.LongTensor
    # Initialize the dataset and data loader (note the +1)
    dataset = pickle.load(open(config.dataset_path, 'rb'))
    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size, \
                 config.lstm_num_hidden, config.dropout_keep_prob, config.lstm_num_layers).to(device)

    model.load_state_dict(torch.load(config.model_path))
    # Setup the loss and optimizer
    model.eval()
    print('Evaluating: ')
    num_summaries = 5
    # get random intial chars
    rand_chars = [
        dataset._char_to_ix[random.choice(dataset._chars)]
        for i in range(num_summaries)
    ]
    # to tensor
    prev_pred = torch.Tensor(rand_chars).type(dtype)
    prev_pred_one_hot = to_one_hot(prev_pred, dataset.vocab_size, dtype)
    predictions = []
    for i in range(config.sample_length):
        # batch size 1
        prev_pred_one_hot = torch.unsqueeze(prev_pred_one_hot, 1)
        if i is 0:
            y_pred, hidden = model(prev_pred_one_hot.float())
        else:
            y_pred, hidden = model(prev_pred_one_hot.float(), hidden)
        # get argmax
        # Sample from the network as a multinomial distribution
        if config.sampling_method == 'temp':
            output_dist = y_pred.data.div(config.temperature).exp()
            y_pred_batch_idx = output_dist.squeeze(1).multinomial(1).type(
                dtype)
        else:
            y_pred_batch_idx = y_pred.argmax(2).type(dtype)

        # to one hot
        prev_pred_one_hot = to_one_hot(y_pred_batch_idx.flatten(),
                                       dataset.vocab_size, dtype)
        predictions.append(y_pred_batch_idx.flatten().cpu().detach().numpy())
    predictions = np.asarray(predictions).T
    summaries = [dataset.convert_to_string(pred) for pred in list(predictions)]
    print("{} \n".format('\n'.join(summaries)))
Ejemplo n.º 2
0
def generate_sequence(config,
                      seed=0,
                      temp=0,
                      seq_length=30,
                      model_path='output_dir/kant_100_4.pt',
                      init_char='t'):

    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    # Initialize the device which to run the model on
    device = torch.device(config.device)
    print(device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)

    # Initialize the model that we are going to use
    model = TextGenerationModel(1, 1, dataset.vocab_size,
                                config.lstm_num_hidden, config.lstm_num_layers,
                                config.device).to(device)

    model.load_state_dict(torch.load(model_path, map_location=config.device))
    model.eval()

    # print(init_char)
    word_list = [dataset._char_to_ix[char] for char in init_char]
    state = model.init_state()

    for step in range(seq_length):
        last = torch.tensor([[word_list[step]]]).long().to(device)
        # print(last)
        output, state = model.predict(last, state, temp=temp)
        # print(output.squeeze())
        if step + 1 >= len(word_list):
            if temp > 0:
                word_list.append(torch.multinomial(output.squeeze(), 1).item())
            else:
                word_list.append(torch.argmax(output).item())

    # plt.hist(output.squeeze().numpy(), 100)
    # plt.show()

    sequence = ''.join([dataset._ix_to_char[ix] for ix in word_list])
    return sequence
Ejemplo n.º 3
0
Archivo: eval.py Proyecto: frank/dl
def eval():
    # Torch settings
    torch.set_default_tensor_type(torch.FloatTensor)

    # Initialize the dataset
    dataset = TextDataset(config.txt_file, config.seq_length)

    # Get temperature
    temp = config.temperature

    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size, config.seq_length,
                                dataset.vocab_size)

    # Load model, if there's any model to load
    model, steps = load_model(model)
    print("Model trained for", steps, "steps")
    model.eval()

    try:
        while True:
            # Get input for the start of the sentence
            start = input("\nStart: ")

            # Convert input to one-hot representation (length x vocab_size)
            try:
                start_oh = get_one_hot(start, dataset)
            except KeyError:
                print("One or more characters were not recognized. Try again!")
                continue

            # Generate the rest of the sentence
            sentence = dataset.convert_to_string(
                model.cmd_generate(start_oh, temp, config.seq_length))

            print("Model says:\n")
            print(start + sentence)
    except KeyboardInterrupt:
        print("\n\n" + random.choice(quit_msgs))
Ejemplo n.º 4
0
def train(config):

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)  # fixme
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Save the instantiated dataset.
    with open('model_ckpt/train.dataset', 'wb') as dataset_file:
        pickle.dump(dataset, dataset_file)

    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size, config.lstm_num_hidden,
                                config.lstm_num_layers, device, config.dropout_keep_prob)  # fixme

    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss()  # reduction='mean'(default) - average over all timesteps and all batches as they are merged.
    optimizer = optim.RMSprop(model.parameters(), config.learning_rate)  # fixme
    # optimizer = optim.Adam(model.parameters(), config.learning_rate)

    # Create a tensor to hold the one-hot encoding for the batch inputs.
    onehot_batch_inputs = torch.FloatTensor(config.seq_length, config.batch_size, dataset.vocab_size)
    onehot_batch_inputs = onehot_batch_inputs.to(device)

    h_init = torch.zeros(config.lstm_num_layers, config.batch_size, config.lstm_num_hidden, device=device)
    c_init = torch.zeros(config.lstm_num_layers, config.batch_size, config.lstm_num_hidden, device=device)

    # Record the learning rate steps individually for learning rate decay.
    lr_step = 0
    lr = 1
    for epoch in np.arange(config.epochs):
        losses = []
        accs = []
        for step, (batch_inputs, batch_targets) in enumerate(data_loader):

            # Only for time measurement of step through network
            t1 = time.time()

            #######################################################
            # Add more code here ...
            #######################################################
            model.train()
            # Convert the DataLoader output from list of tensors to tensors.
            batch_inputs = torch.stack(batch_inputs)
            batch_inputs = batch_inputs.to(device)

            # If the epoch is finished and there is not enough character to extract, break the loop
            if batch_inputs.shape[0] * batch_inputs.shape[1] != onehot_batch_inputs.shape[0] * onehot_batch_inputs.shape[1]:
                break

            # Zero the one-hot encoding and encode according to batch_inputs.
            onehot_batch_inputs.zero_()
            onehot_batch_inputs.scatter_(2, batch_inputs.unsqueeze_(-1), 1)

            # Convert the DataLoader output from list of tensors to tensors.
            batch_targets = torch.stack(batch_targets)
            batch_targets = batch_targets.to(device)

            # Learning rate decay.
            if lr_step % config.learning_rate_step == 0:
                optimizer = optim.RMSprop(model.parameters(), config.learning_rate * lr)
                lr *= config.learning_rate_decay

            optimizer.zero_grad()
            logits, _, _ = model(onehot_batch_inputs, h_init, c_init)
            # The seq_length dimension and batch_size dimension of the logits and batch_targets are merged together, and the mean is computed over this new dimension.
            loss = criterion(logits.view(-1, dataset.vocab_size), batch_targets.view(-1))   # fixme
            loss.backward()

            torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm)

            accuracy = accuracy_fn(logits.view(-1, dataset.vocab_size), batch_targets.view(-1))  # fixme
            optimizer.step()

            losses.append(loss.item())
            accs.append(accuracy)

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size/float(t2-t1)

            if step % config.print_every == 0:
                print("[{}] Epoch {}/{}, Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                      "Accuracy = {:.2f}, Loss = {:.3f}".format(
                          datetime.now().strftime("%Y-%m-%d %H:%M"), epoch + 1, config.epochs, step,
                          config.train_steps, config.batch_size, examples_per_second,
                          accuracy, loss
                      ))

            if step % config.sample_every == 0:
                # Generate some sentences by sampling from the model
                model.eval()
                # Create tensor to hold the generated samples.
                samples = torch.zeros((5, config.sample_length), dtype=torch.int, device=device)
                # Initialize the first characters for the samples.
                start_chars = torch.randint(dataset.vocab_size, size=(1, 5, 1), dtype=torch.long, device=device)
                samples[:, 0] = start_chars.squeeze()
                # Create a tensor to hold the one-hot encoding for the output characters of the LSTM network (one per each time step).
                onehot_chars = torch.zeros((1, 5, dataset.vocab_size), device=device)
                onehot_chars.scatter_(2, start_chars, 1)

                last_h = torch.zeros(config.lstm_num_layers, 5, config.lstm_num_hidden, device=device)
                last_c = torch.zeros(config.lstm_num_layers, 5, config.lstm_num_hidden, device=device)
                for t in np.arange(config.sample_length - 1):
                    logits, last_h, last_c = model(onehot_chars, last_h, last_c)
                    next_chars = logits.squeeze().argmax(-1)
                    onehot_chars.zero_()
                    onehot_chars.scatter_(2, next_chars.view(1, 5, 1), 1)
                    samples[:, t + 1] = next_chars

                samples = samples.tolist()
                samples = [dataset.convert_to_string(sample) for sample in samples]
                # Output the samples into a text file.
                with open(config.summary_path + 'samples.txt', 'a') as txt_file:
                    txt_file.write('Epoch: {}\nStep: {}\n'.format(epoch + 1, step))
                    txt_file.writelines(map(lambda x: x + '\n', samples))

            if step == config.train_steps:
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                break

            lr_step += 1
        # After each training epoch, save the model and the training loss and accuracy.
        model.train()
        torch.save(model.state_dict(), 'model_ckpt/lstm_gen_epoch{}.ckpt'.format(epoch + 1))
        with open(config.summary_path + 'train_epoch{}.csv'.format(epoch + 1), 'w', newline='') as csv_file:
            csv_writer = csv.writer(csv_file)
            csv_writer.writerow(losses)
            csv_writer.writerow(accs)

    print('Done training.')
Ejemplo n.º 5
0
def train(config):
    
    
    # Initialize the device which to run the model on
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)   # fixme
    data_loader = DataLoader(dataset, batch_size = config.batch_size, shuffle=True, num_workers=1)
    vocab_size = dataset.vocab_size
    # char2i = dataset._char_to_ix
    # i2char = dataset._ix_to_char
    # ----------------------------------------
    
    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size, config.seq_length, vocab_size, \
                                config.lstm_num_hidden, config.lstm_num_layers, device)  # fixme
    model.to(device)

    # Setup the loss and optimizer
    criterion = nn.NLLLoss()  # fixme
    optimizer = optim.RMSprop(model.parameters(), lr = config.learning_rate)  # fixme
    logSoftmax = nn.LogSoftmax(dim=2)
    
    # Learning rate scheduler
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, \
                  step_size=config.learning_rate_step, gamma=config.learning_rate_decay)
    step = 1
    
    if config.resume:
        if os.path.isfile(config.resume):
            print("Loading checkpoint '{}'".format(config.resume))
            checkpoint = torch.load(config.resume)
            step = checkpoint['step']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
            print("Checkpoint loaded '{}', steps {}".format(config.resume, checkpoint['step']))

    if not os.path.isdir(config.summary_path):
            os.makedirs(config.summary_path)

    if config.sampling =="greedy":
        
        f = open(os.path.join(config.summary_path,"sampled_"+config.sampling+".txt"), "w+")
    else:
        f = open(os.path.join(config.summary_path,"sampled_"+config.sampling+"_"+str(config.temp)+".txt"), "w+")



    
   
    best_accuracy = 0.0
    pl_loss =[]
    average_loss =[]
    acc =[]

    for epochs in range(30):

        if step == config.train_steps:
            print('Done training.')
            break

        for (batch_inputs, batch_targets) in data_loader:

            if config.batch_size!=batch_inputs.size()[0]:
                print("batch mismatch")
                break

            # Only for time measurement of step through network
            t1 = time.time()
            model.hidden = model.init_hidden(config.batch_size)

            model.zero_grad()
            #######################################################
            # Add more code here ...
            
            #convert batch inputs to one-hot vector
            batch_inputs= torch.zeros(config.batch_size, config.seq_length, vocab_size).scatter_(2,batch_inputs.unsqueeze(-1),1.0)
            
            batch_inputs, batch_targets = batch_inputs.to(device), batch_targets.to(device)

            predictions, _ = model(batch_inputs)
            if config.sampling=="greedy":
                predictions = logSoftmax(predictions)
            else:
                predictions = logSoftmax(predictions/config.temp)

            loss = criterion(predictions.transpose(2,1), batch_targets)   # fixme

            _, predictions = torch.max(predictions, dim=2, keepdim=True)
            predictions = (predictions.squeeze(-1) == batch_targets).float()
            accuracy = torch.mean(predictions)
            
            
            
            loss.backward()
            torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm)
            
            optimizer.step()
            lr_scheduler.step()

            #######################################################

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size/float(t2-t1)
            pl_loss.append(loss.item())
            average_loss.append(np.mean(pl_loss[:-100:-1]))
            acc.append(accuracy)


            if step % config.print_every == 0:

                print("[{}] Train Step {}/{}, Batch Size = {}, Examples/Sec = {:.2f}, "
                    "Accuracy = {:.2f}, Loss = {:.3f}".format(
                        datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                        config.train_steps, config.batch_size, examples_per_second,
                        accuracy, loss.item()
                ))
                
                

            if step % config.sample_every == 0:
                               
                model.eval()
               
                with torch.no_grad():
                   char_ix = generate_sample(model, vocab_size, config.seq_length, device, config)
                   sentence = dataset.convert_to_string(char_ix) 
                           
            
                f.write("--------------"+str(step)+"----------------\n")
                f.write(sentence+"\n")
                print(sentence)
                print()
                model.train()
                # ###########################################################################
                # save training loss
                plt.plot(pl_loss,'r-', label="Batch loss", alpha=0.5)
                plt.plot(average_loss,'g-', label="Average loss", alpha=0.5)
                plt.legend()
                plt.xlabel("Iterations")
                plt.ylabel("Loss")  
                plt.title("Training Loss")
                plt.grid(True)
                # plt.show()
                if config.sampling == "greedy":
                    plt.savefig("loss_"+config.sampling+".png")
                else:
                    plt.savefig("loss_"+config.sampling+"_"+str(config.temp)+".png")

                plt.close()
                ################################training##################################################
                plt.plot(acc,'g-', alpha=0.5)
                plt.xlabel("Iterations")
                plt.ylabel("Accuracy")
                plt.title("Train Accuracy")
                plt.grid(True)
                if config.sampling == "greedy":
                    plt.savefig("accuracy_"+config.sampling+".png")
                else:
                    plt.savefig("accuracy_"+config.sampling+"_"+str(config.temp)+".png")
                plt.close()

            if step == config.train_steps:
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                break
            
            step+=1
            
        save_checkpoint({
            'epoch': epochs + 1,
            'step': step,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'lr_scheduler':lr_scheduler.state_dict(),
            'accuracy': accuracy
                }, config)
        
    f.close()
Ejemplo n.º 6
0
def train(config):

    # Print all configs to confirm parameter settings
    print_flags()
    assert config.sampling_method in ('greedy', 'random')
    assert config.generate_mode in ('generate', 'finish')

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(filename=config.txt_file,
                          seq_length=config.seq_length)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Initialize the model that we are going to use
    model = TextGenerationModel(batch_size=config.batch_size,
                                seq_length=config.seq_length,
                                vocabulary_size=dataset.vocab_size,
                                dropout=1-config.dropout_keep_prob,
                                lstm_num_hidden=config.lstm_num_hidden,
                                lstm_num_layers=config.lstm_num_layers,
                                device=device)
    model.to(device)

    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
    epoch = 10

    # Store some measures
    los = list()
    iteration = list()
    acc = list()
    max_step = 0

    for i in range(epoch):
      for step, (batch_inputs, batch_targets) in enumerate(data_loader):

          # Only for time measurement of step through network
          t1 = time.time()

          model.train()
          optimizer.zero_grad()

          batch_inputs = torch.stack(batch_inputs).to(device)
          batch_targets = torch.stack(batch_targets).to(device)

          h_0 = torch.zeros(config.lstm_num_layers, batch_inputs.shape[1], config.lstm_num_hidden).to(device)
          c_0 = torch.zeros(config.lstm_num_layers, batch_inputs.shape[1], config.lstm_num_hidden).to(device)

          pred, _, _ = model(batch_inputs, h_0, c_0)
          accuracy = compute_accuracy(pred, batch_targets)
          pred = pred.permute(1, 2, 0)
          batch_targets = batch_targets.permute(1, 0)
          loss = criterion(pred, batch_targets)
          loss.backward()
          torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm)
          optimizer.step()

          # Just for time measurement
          t2 = time.time()
          examples_per_second = config.batch_size/float(t2-t1)

          if (step + i * max_step) % config.print_every == 0:

              print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                    "Accuracy = {:.2f}, Loss = {:.3f}".format(
                      datetime.now().strftime("%Y-%m-%d %H:%M"), step + i * max_step,
                      int(config.train_steps), config.batch_size, examples_per_second,
                      accuracy, loss
              ))
              iteration.append(step + i * max_step)
              acc.append(accuracy)
              los.append(loss)
              if max_step < step:
                max_step = step

          if (step + i * max_step) % config.sample_every == 0:
              model.eval()
              batch_sample = 5
              if config.generate_mode == 'finish':
                generated = [dataset._char_to_ix[c] for c in config.input_seq]
                generated = torch.LongTensor(generated).view(-1, 1).to(device)
                for l in range(config.generate_length):
                  if l == 0:
                    h_s = torch.zeros(config.lstm_num_layers, 1, config.lstm_num_hidden).to(device)
                    c_s = torch.zeros(config.lstm_num_layers, 1, config.lstm_num_hidden).to(device)
                    gen, h_s, c_s = model(generated, h_s, c_s)
                    gen = torch.unsqueeze(gen[-1], 0)
                  else:
                    gen, h_s, c_s = model(gen, h_s, c_s)
                  if config.sampling_method == 'greedy':
                    gen = gen.argmax(dim=2)
                  else:
                    gen = nn.functional.softmax(gen/config.temperature, dim=2)
                    dist = torch.distributions.categorical.Categorical(gen)
                    gen = dist.sample()
                  generated = torch.cat((generated, gen))
              else:
                generated = [dataset._char_to_ix[random.choice(dataset._chars)] for c in range(batch_sample)]
                generated = torch.LongTensor(generated).view(-1, batch_sample).to(device)
                for l in range(config.generate_length - 1):
                  if l == 0:
                    h_s = torch.zeros(config.lstm_num_layers, batch_sample, config.lstm_num_hidden).to(device)
                    c_s = torch.zeros(config.lstm_num_layers, batch_sample, config.lstm_num_hidden).to(device)
                    gen, h_s, c_s = model(generated, h_s, c_s)
                  else:
                    gen, h_s, c_s = model(gen, h_s, c_s)
                  if config.sampling_method == 'greedy':
                    gen = gen.argmax(dim=2)
                  else:
                    gen = nn.functional.softmax(gen/config.temperature, dim=2)
                    dist = torch.distributions.categorical.Categorical(gen)
                    gen = dist.sample()
                  generated = torch.cat((generated, gen))
              generated = generated.t()
              sentence = [dataset.convert_to_string(idx) for idx in generated.tolist()]
              if config.sampling_method == 'random':
                with open('{}/{}_{}_{}_{}.txt'.format(config.summary_path, config.generate_mode, datetime.now().strftime("%Y-%m-%d"), config.sampling_method, config.temperature), 'a', encoding='utf-8') as file:
                  file.write('--------------\n')
                  file.write('Training Step: {}\n'.format(step + i * max_step))
                  file.write('--------------\n')
                  for sen in sentence:
                    file.write('{}\n'.format(sen))
                  file.write('\n')
                  file.close()   
              else:
                with open('{}/{}_{}_{}.txt'.format(config.summary_path, config.generate_mode, datetime.now().strftime("%Y-%m-%d"), config.sampling_method), 'a', encoding='utf-8') as file:
                  file.write('--------------\n')
                  file.write('Training Step: {}\n'.format(step + i * max_step))
                  file.write('--------------\n')
                  for sen in sentence:
                    file.write('{}\n'.format(sen))
                  file.write('\n')
                  file.close()

          if (step + i * max_step) == config.train_steps:
              # If you receive a PyTorch data-loader error, check this bug report:
              # https://github.com/pytorch/pytorch/pull/9655
              break

      if (step + i * max_step) == config.train_steps:
        break

    print('Done training.')
    fig, axs = plt.subplots(1, 2, figsize=(10,5))
    axs[0].plot(iteration, acc)
    axs[0].set_xlabel('Iteration')
    axs[0].set_ylabel('Accuracy')
    axs[1].plot(iteration, los)
    axs[1].set_xlabel('Iteration')
    axs[1].set_ylabel('Loss')
    fig.tight_layout()
    plt.show()
Ejemplo n.º 7
0
def train(config, CHOICES):
    
    # Initialize the device which to run the model on
    #device = torch.device(config.device)# fix this!
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(device)
    
    # Initialize the model that we are going to use

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length );  # fixme
    model = TextGenerationModel( config.batch_size, config.seq_length, dataset.vocab_size, config.temperature).cuda();
    if (CHOICES['LOAD_BEST_MODEL']):
        model.load_state_dict(torch.load('./model_parameter.txt'));
    #print(model.state_dict());
    
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss();
    optimizer = torch.optim.RMSprop(model.parameters(),lr=config.learning_rate);
    if (CHOICES['LOAD_BEST_MODEL']):
        optimizer.load_state_dict(torch.load('./model_optimizer.txt'));
    accuracy_list = [];
    loss_list = [];
    string_list = [];
    tmp_accuracy = 0;
    
    a = 76;
    while (tmp_accuracy == 0) or (accuracy_list[-1] >0.85): 
        for step, (batch_inputs, batch_targets) in enumerate(data_loader):

            # Only for time measurement of step through network
            t1 = time.time()
            
            batch_inputs = torch.stack(batch_inputs)[:,:, None].view(config.seq_length, -1).to(device); # sequ_length * batch_size
            batch_targets = torch.stack(batch_targets)[:,:, None].view(config.seq_length, -1).to(device); # sequ_length * batch_size
            
            if not((int(batch_inputs.size()[1])) == config.batch_size):
                continue;
                
            #print(dataset.convert_to_string(batch_inputs[:, 0].cpu().numpy())); 
            
            batch_inputs_onehot = one_hot(batch_inputs, dataset.vocab_size); # seq_length * batch_size * vacab_size;
            optimizer.zero_grad();
            torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm);
            out = model(batch_inputs_onehot);
            
            values, indices = torch.max(out, 1);
            
            loss_criterion = criterion(out,batch_targets);
            loss_criterion.backward();
            optimizer.step();
            
            loss = loss_criterion.data[0]/(config.seq_length);
            values, indices = torch.max(out, 1);
            
            accuracy = ((indices[indices == batch_targets].size())[0])/(config.batch_size*config.seq_length);

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size/float(t2-t1)
            if step % config.print_every == 0:
                print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                          "Accuracy = {:.2f}, Loss = {:.3f}".format(
                            datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                            int(config.train_steps), config.batch_size, examples_per_second,
                            accuracy, loss))
                            
                # generate sentences
                if step % 50000 == 0 and CHOICES['GENERATE_FIVE_SENTENCES']:                            
                    model.eval();                    
                    test_input = (torch.Tensor(batch_inputs.size())).type(torch.LongTensor).to(device);
                    a = a + 1;
                    test_input = test_input.fill_(a);
                    output_string = generate_new_stings(model, test_input, dataset.vocab_size, config.seq_length);  
                    tmp = dataset.convert_to_string(output_string.cpu().numpy().tolist());
                    string_list += [tmp];
                    print(tmp);
                    print('---')     
                    
                    model.train();
                # save parameter
                torch.save(model.state_dict(), './model_parameter{:d}.txt'.format(step));
                torch.save(optimizer.state_dict(), './model_optimizer{:d}.txt'.format(step));                    
                
                
                if (CHOICES['DRAW_ACCURACY_PLOT']):
                    accuracy_list += [accuracy];  
                    loss_list += [loss]; 
                

            if step == config.sample_every:
                # Generate some sentences by sampling from the model
                pass
            
            if step == config.train_steps:
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                break
                
            if (CHOICES['GENERATE_FIVE_SENTENCES']) and (len(string_list) == 5):
                break;
        
        if (CHOICES['GENERATE_FIVE_SENTENCES']) and (len(string_list) == 5):
                break;
                        
        
        print("============ finish {} epoch ============ ".format(len(accuracy_list)));
        
    torch.save(model.state_dict(), './model_parameter.txt');
    torch.save(optimizer.state_dict(), './model_optimizer.txt');
    print('Done training.');
    
    if (CHOICES['GENERATE_FIVE_SENTENCES']):
    
        if (CHOICES['DRAW_ACCURACY_PLOT']):
            fig, ax = plt.subplots();
            ax.plot(np.arange(len(accuracy_list)), accuracy_list, 'r', label = 'accuracy');
            ax.plot(np.arange(len(accuracy_list)), loss_list, 'b', label = 'loss');
            legend = ax.legend(loc='upper center');      
            plt.xlabel('Steps');
            plt.title('loss and accuracy of LSTM in 2000 steps');
            plt.show();
        
        for idx in range(5):
            print('====')
            print(string_list[idx]);
Ejemplo n.º 8
0
def train(config):
    writer = torch.utils.tensorboard.SummaryWriter()

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)
    data_loader = DataLoader(
        dataset,
        config.batch_size,
        config.seq_length,
    )

    # Initialize the model that we are going to use
    vocabulary_size = dataset.vocab_size
    model = TextGenerationModel(batch_size=config.batch_size,
                                seq_length=config.seq_length,
                                vocabulary_size=vocabulary_size)
    model.to(device)

    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)

    accuracies = []
    losses = []

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        #######################################################

        # Move to GPU
        batch_inputs = to_tensor_rep(batch_inputs).to(device)
        batch_targets = to_tensor_rep(batch_targets).to(device)

        # Reset for next iteration
        model.zero_grad()

        #######################################################
        model_output = model(batch_inputs,
                             c_0=torch.zeros(config.lstm_num_layers,
                                             batch_inputs.shape[1],
                                             config.lstm_num_hidden,
                                             device=device),
                             h_0=torch.zeros(config.lstm_num_layers,
                                             batch_inputs.shape[1],
                                             config.lstm_num_hidden,
                                             device=device))

        # for each timestep, the crossentropy loss is computed and subsequently averaged
        batch_losses = torch.zeros(config.seq_length, device=device)
        for i in range(config.seq_length):
            batch_losses[i] = criterion(model_output[i], batch_targets[i])

        loss = (1 / config.seq_length) * torch.sum(batch_losses)

        # compute the gradients, clip them to prevent exploding gradients and backpropagate
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(),
                                       max_norm=config.max_norm)
        optimizer.step()

        # calculate accuracy
        predictions = torch.argmax(model_output, dim=2)
        correct = (predictions == batch_targets).sum().item()
        accuracy = correct / (model_output.size(0) * model_output.size(1))

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if (step + 1) % config.print_every == 0:
            print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, \
                    Examples/Sec = {:.2f}, "
                  "Accuracy = {:.2f}, Loss = {:.3f}".format(
                      datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                      config.train_steps, config.batch_size,
                      examples_per_second, accuracy, loss))

            # save loss and accuracy
            accuracies.append(accuracy)
            losses.append(loss)
            writer.add_scalar("loss", loss)
            writer.add_scalar("accuracy", accuracy)

        if (step + 1) % config.sample_every == 0:
            model.eval()
            generate_sequence(model, 62, dataset)
            model.train()

        if step == config.train_steps:
            break

    print('Done training.')

    # make loss and accuracy plots
    x = np.arange(len(accuracies)) * config.print_every
    plot_curve(x, accuracies, "Accuracy", "Training accuracy")
    plot_curve(x, losses, "Loss", "Training Loss")
Ejemplo n.º 9
0
def evaluate(config):

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Load the dataset
    with open(config.dataset, 'rb') as dataset_file:
        dataset = pickle.load(dataset_file)

    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size, config.seq_length,
                                dataset.vocab_size, config.lstm_num_hidden,
                                config.lstm_num_layers, device,
                                config.dropout_keep_prob)  # fixme
    model.load_state_dict(torch.load(config.ckpt))

    # Generate some sentences by sampling from the model
    model.eval()
    # Create tensor to hold the generated samples.
    samples = torch.zeros((config.sample_batch_size, config.sample_length),
                          dtype=torch.int,
                          device=device,
                          requires_grad=False)

    last_h = torch.zeros(config.lstm_num_layers,
                         config.sample_batch_size,
                         config.lstm_num_hidden,
                         device=device,
                         requires_grad=False)
    last_c = torch.zeros(config.lstm_num_layers,
                         config.sample_batch_size,
                         config.lstm_num_hidden,
                         device=device,
                         requires_grad=False)

    if config.pre_text:
        pre_input = torch.tensor(
            [dataset._char_to_ix[ch] for ch in config.pre_text] * 10,
            device=device,
            requires_grad=False).view(config.sample_batch_size,
                                      -1).t().unsqueeze(-1)
        onehot_pre_input = torch.zeros(
            (pre_input.shape[0], pre_input.shape[1], dataset.vocab_size),
            device=device,
            requires_grad=False)
        onehot_pre_input.scatter_(2, pre_input, 1)
        logits, last_h, last_c = model(onehot_pre_input, last_h, last_c)
        logits = nn.functional.softmax(logits[-1, :, :].unsqueeze(-1) /
                                       config.temperature,
                                       dim=1)
        start_chars = logits.squeeze().argmax(-1)
        samples[:, 0] = start_chars
        onehot_chars = torch.zeros(
            (1, config.sample_batch_size, dataset.vocab_size),
            device=device,
            requires_grad=False)
        onehot_chars.scatter_(2,
                              start_chars.view(1, config.sample_batch_size, 1),
                              1)
    else:
        # Initialize the first characters for the samples.
        start_chars = torch.randint(dataset.vocab_size,
                                    size=(1, config.sample_batch_size, 1),
                                    dtype=torch.long,
                                    device=device,
                                    requires_grad=False)
        samples[:, 0] = start_chars.squeeze()
        # Create a tensor to hold the one-hot encoding for the output characters of the LSTM network (one per each time step).
        onehot_chars = torch.zeros(
            (1, config.sample_batch_size, dataset.vocab_size),
            device=device,
            requires_grad=False)
        onehot_chars.scatter_(2, start_chars, 1)

    for t in np.arange(config.sample_length - 1):
        logits, last_h, last_c = model(onehot_chars, last_h, last_c)
        logits = nn.functional.softmax(logits / config.temperature, dim=2)
        next_chars = logits.squeeze().argmax(-1)
        onehot_chars.zero_()
        onehot_chars.scatter_(2, next_chars.view(1, config.sample_batch_size,
                                                 1), 1)
        samples[:, t + 1] = next_chars

    samples = samples.tolist()
    samples = [dataset.convert_to_string(sample) for sample in samples]
    # Output the samples into a text file.
    with open(config.summary_path + 'samples.txt', 'a') as txt_file:
        txt_file.write('Temperature: {}\nSample length: {}\n'.format(
            config.temperature, config.sample_length))
        txt_file.writelines(map(lambda x: config.pre_text + x + '\n', samples))

    print('Done evaluation.')
Ejemplo n.º 10
0
Archivo: train.py Proyecto: frank/dl
def train():
    # Torch settings
    device = torch.device(config.device)
    if device == 'cpu':
        torch.set_default_tensor_type(torch.FloatTensor)
    elif device == 'cuda:0':
        torch.set_default_tensor_type(torch.cuda.FloatTensor)
    dtype = torch.float

    # Tensorboard summary writer
    if config.tensorboard:
        run_id = datetime.now().strftime("%Y-%m-%d_%H-%M-%S_"
                                         + config.model_type.lower()
                                         + '_' + str(config.input_length))
        log_dir = 'tensorboard/' + config.model_type.lower() + '/' + run_id
        writer = SummaryWriter(log_dir=log_dir)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Model parameters
    lr = config.learning_rate
    lr_decay = config.learning_rate_decay
    lr_step = config.learning_rate_step
    dropout = 1.0 - config.dropout_keep_prob
    temp = [0.5, 1., 2.]
    assert config.sample_num % 3 == 0

    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size,
                                config.seq_length,
                                dataset.vocab_size,
                                dropout,
                                device).to(device)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    # Characters used to start sentences (closing characters such as ')', '.' or others were removed)
    start_characters = ['1', '2', '3', '4', '5', '6', '7', '8', '9',
                        'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L',
                        'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
                        'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
                        'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
                        'w', 'x', 'y', 'z',
                        '(', '[', '*', '-', '‘', '“']
    start_characters = list(set(start_characters) & set(dataset.vocab))

    # Store all generated sentences
    sentences = {}

    # Load model, if there's any model to load
    model, optimizer, sentences, start_step = load_model(model, optimizer, sentences, step=0)

    try:
        for step, (batch_inputs, batch_targets) in enumerate(data_loader):

            # If the model has been loaded, regulate step number accordingly
            step += start_step

            # Only for time measurement of step through network
            t1 = time.time()

            # Get batches as tensors of size (batch_size x seq_length)
            batch_inputs = torch.stack(batch_inputs).permute((1, 0))
            batch_targets = torch.stack(batch_targets).permute((1, 0)).to(device)

            # Convert batches to one-hot representation (batch_size x seq_length x vocab_size)
            batch_inputs = get_one_hot(batch_inputs,
                                       config.batch_size,
                                       config.seq_length,
                                       dataset.vocab_size).to(device)

            # Forward pass
            model.train()
            optimizer.zero_grad()
            predictions = model.forward(batch_inputs)

            # Compute loss
            loss = criterion(predictions.permute(0, 2, 1), batch_targets)

            # Backward pass
            loss.backward()

            # Clipping gradients to avoid exploding gradient problem
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm)

            # Update weights
            optimizer.step()

            # Compute accuracy
            accuracy = get_accuracy(predictions, batch_targets)

            # Add accuracy and loss to the writer
            if config.tensorboard:
                writer.add_scalars('Accuracy_and_Loss', {'accuracy': accuracy, 'loss': loss}, step)
                writer.add_scalar('Learning_Rate', lr, step)

            # Update learning rate
            if (step % lr_step == 0) and step != 0:
                lr *= lr_decay
                for group in optimizer.param_groups:
                    group['lr'] = lr

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size / float(t2 - t1)

            if step % config.print_every == 0:
                print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                      "Accuracy = {:.2f}, Loss = {:.3f}".format(datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                                                                step,
                                                                int(config.train_steps),
                                                                config.batch_size,
                                                                examples_per_second,
                                                                accuracy,
                                                                loss))

            if step % config.sample_every == 0:
                model.eval()

                # Store sentences for this step
                step_sentences = {temp[0]: [], temp[1]: [], temp[2]: []}

                # Get 6 random starter characters
                sample = random.sample(start_characters, config.sample_num)

                print()
                for idx, c in enumerate(sample):
                    # Temperature parameter
                    t = temp[int(idx / 2)]

                    # Character's one-hot representation
                    c_oh = torch.tensor(dataset.convert_to_one_hot(c), dtype=dtype).to(device)

                    # Returns a sentence of indexes and length 30
                    sentence = dataset.convert_to_string(model.generate(c_oh, t))
                    print("[t={:.1f}] {}".format(t, sentence.replace('\n', '\\n ')))
                    step_sentences[t].append(sentence)
                print()
                sentences[step] = step_sentences

            if (step % config.save_every == 0) and step != 0:
                save_model(model, optimizer, sentences, step)

            if step == config.train_steps:
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                break

        if config.tensorboard:
            writer.close()

        print('Done training.')

    except (KeyboardInterrupt, BrokenPipeError):
        if config.tensorboard:
            writer.close()
        print("\n" + random.choice(quit_msgs))
Ejemplo n.º 11
0
def train(config):
    # Create output generated images directory (if it does not already exists)
    os.makedirs('./generated_text/', exist_ok=True)
    os.makedirs('./models/', exist_ok=True)

    os.makedirs('./part2/generated_text/', exist_ok=True)
    os.makedirs('./part2/models/', exist_ok=True)

    # Initialize the device which to run the model on
    # if GPU was chosen, check if CUDA is available
    if str(config.device) != "cpu":
        if not torch.cuda.is_available():
            print(
                '\n* GPU was selected but CUDA is not available.\nTraining on CPU ...\n'
            )
            device = torch.device("cpu")
        else:
            print('\n* CUDA is available!  Training on GPU ...\n')
            device = torch.device(config.device)
    else:
        print('\n* Training on GPU ...\n')
        device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    model = TextGenerationModel(config.batch_size,
                                config.seq_length,
                                dataset.vocab_size,
                                lstm_num_hidden=config.lstm_num_hidden,
                                lstm_num_layers=config.lstm_num_layers,
                                drop_prob=1.0 - config.dropout_keep_prob,
                                device=device).to(device)

    # Setup the loss, optimizer and scheduler
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    lr=config.learning_rate)
    scheduler = torch.optim.lr_scheduler.StepLR(
        optimizer,
        step_size=config.learning_rate_step,
        gamma=config.learning_rate_decay)

    train_accuracy, train_loss = [], []
    eval_steps, eval_loss, eval_accuracy, = [], [], []

    for epoch in range(config.epochs):
        # Print current epoch
        print('\n',
              str('-') * (56), 'epoch: {}/{}'.format(epoch + 1, config.epochs),
              str('-') * (56))

        for step, (batch_inputs, batch_targets) in enumerate(data_loader):
            # Enable train mode
            model.train()

            # Only for time measurement of step through network
            t1 = time.time()

            ################################################################
            # batch_inputs.shape = batch_size x seq_lenght dimentions
            batch_inputs = torch.stack(batch_inputs, dim=1).to(device)
            batch_targets = torch.stack(batch_targets, dim=1).to(device)

            # Update batch size
            # -- in case that the last batch size is less than the confg. one
            config.batch_size = batch_inputs.shape[0]

            # Clear accumulated gradients
            optimizer.zero_grad()

            # Forward pass
            predictions = model(batch_inputs)

            # Calculate loss
            loss = criterion(
                predictions,
                batch_targets.view(config.batch_size * config.seq_length))

            # Store train accuracy and loss
            train_loss.append(loss.item())
            train_accuracy.append(accuracy(predictions, batch_targets))

            # Back-propagate
            loss.backward()

            # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
            torch.nn.utils.clip_grad_norm(model.parameters(),
                                          max_norm=config.max_norm)

            # Update weights and scheduler
            optimizer.step()
            scheduler.step(loss.item())
            ################################################################

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size / float(t2 - t1)

            if step % config.print_every == 0:

                print(
                    "[{}] Train Step {:04f}/{:04f}, Batch Size = {}, Examples/Sec = {:.2f}, "
                    "Accuracy = {:.2f}, Loss = {:.3f}".format(
                        datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                        config.train_steps, config.batch_size,
                        examples_per_second, train_accuracy[-1],
                        train_loss[-1]))

            if step % config.sample_every == 0:
                # Generate sentences by sampling from the model
                print("\n* Sampling...\n")

                # Model into evaluation mode
                model.eval()

                # If summaries are prinded between the training
                print_ = False

                # Tempering Sampling
                betas = [0.5, 1, 2]
                for beta in betas:
                    tempering_sample(model, dataset, beta, config, device,
                                     epoch, step, print_)

                # Greedy Sampling
                greedy_sample(model, dataset, config, device, epoch, step,
                              print_)

                # Bonus part: Generate sentence given a sentence
                sentence = 'They run into the train.'
                T = 2000
                sampling_methodes = ['top_k', 'beta']
                for sampling_meth in sampling_methodes:
                    gen_from_word(sentence, model, dataset, config, device,
                                  epoch, step, sampling_meth, T, print_)

                sentence = 'Anna'
                T = 2000
                sampling_methodes = ['top_k', 'beta']
                for sampling_meth in sampling_methodes:
                    gen_from_word(sentence, model, dataset, config, device,
                                  epoch, step, sampling_meth, T, print_)

                # Save the trained model -- Checkpoint
                # save_model(epoch, step, model)

                # Save loss and accuracy
                eval_steps.append(step)
                eval_loss.append(train_loss[-1])
                eval_accuracy.append(train_accuracy[-1])
                np.savez('lstm.npz',
                         eval_steps=eval_steps,
                         eval_loss=eval_loss,
                         eval_accuracy=eval_accuracy)

            if step == config.train_steps:
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                break

    print('Done training.')

    # Save the trained model -- Checkpoint
    save_model(epoch, step, model)
Ejemplo n.º 12
0
def train(config):

    # Initialize the device which to run the model on
    device = torch.device(config.device)
    config.txt_file = './assets/book_EN_grimms_fairy_tails.txt'
    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)  # fixme
    data_loader = DataLoader(dataset, config.batch_size)

    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size, config.seq_length,
                                dataset.vocab_size, 64,
                                config.dropout_keep_prob,
                                config.lstm_num_hidden, config.lstm_num_layers,
                                device)  # FIXME

    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss()  # FIXME
    optimizer = optim.RMSprop(model.parameters(),
                              lr=config.learning_rate)  # FIXME
    step = 0
    loss_list = []
    accuracy_list = []
    while step < 33600:
        for (batch_inputs, batch_targets) in data_loader:
            model.train()
            step += 1
            # Only for time measurement of step through network
            t1 = time.time()

            #######################################################
            # Add more code here ...
            #######################################################
            batch_inputs = torch.stack(batch_inputs).to(device)
            batch_targets = torch.stack(batch_targets, dim=1).to(device)

            # loss = np.inf   # fixme
            # accuracy = 0.0  # fixme
            model.zero_grad()
            pred, _ = model(batch_inputs)
            pred = pred.view(-1, dataset.vocab_size)
            batch_targets = batch_targets.view(-1)
            loss = criterion(pred, batch_targets)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 5.0)
            optimizer.step()

            predictions = torch.argmax(pred, dim=1)
            correct = (predictions == batch_targets).sum().item()

            accuracy = correct / pred.size(0)
            accuracy_list.append(accuracy)
            loss_list.append(loss.item())
            # Just for time measurement
            t2 = time.time()
            examples_per_second = 64 / float(t2 - t1)

            if (step + 1) % 60 == 0:
                print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, \
                           Examples/Sec = {:.2f}, "
                      "Accuracy = {:.2f}, Loss = {:.3f}".format(
                          datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                          1000000, 64, examples_per_second, accuracy, loss))

            if step % 11200 == 0:
                # Generate some sentences by sampling from the model
                model.eval()
                for i in range(5):
                    for temperature in [0, 0.5, 1.0, 2.0]:
                        for length in [30, 40, 60]:
                            sentence = generate_sequence(
                                dataset, model, device, temperature, length)
                            with open('./summaries.txt', 'a',
                                      encoding='utf-8') as file:
                                file.write("{};{};{};{};{}\n".format(
                                    i, step, temperature, length, sentence))

            if step == 33600:
                # If you receive a PyTorch data-loader error,
                # check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                break
    plt.subplot(2, 1, 1)
    plt.plot(np.arange(len(accuracy_list)), accuracy_list, 'o-')
    plt.xlabel('Step')
    plt.ylabel('Accuracy')
    #
    plt.subplot(2, 1, 2)
    plt.plot(np.arange(len(loss_list)), loss_list)
    plt.xlabel('Step')
    plt.ylabel('Loss')
    print('Done training.')
Ejemplo n.º 13
0
    parser = argparse.ArgumentParser()
    parser.add_argument(dest='path',
                        type=str,
                        help="Path to the trained model.")
    parser.add_argument('-d',
                        dest='data',
                        type=str,
                        default='assets/book_EN_grimms_fairy_tails.txt',
                        help="Path to the dataset.")
    parser.add_argument('-t',
                        dest='temperature',
                        type=float,
                        default=1,
                        help="Sampling temperature.")
    args = parser.parse_args()

    checkpoint = torch.load(args.path)

    dataset = TextDataset(args.data, 30)  # fixme

    model = TextGenerationModel(512, 30, 87, lstm_num_hidden=128).cuda()
    model.load_state_dict(checkpoint['state_dict'])
    model.eval()

    # Randomly sample sequences from the model.
    sample = model.sample(True, args.temperature)
    sample = sample_text(dataset, sample)

    for s in sample:
        print(s)
Ejemplo n.º 14
0
def train(config):
    device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)
    vocabulary_size = dataset.vocab_size

    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size,
                                config.seq_length,
                                vocabulary_size,
                                config.lstm_num_hidden,
                                config.lstm_num_layers,
                                device=device)

    # Setup the loss and optimizer
    optimizer = optim.RMSprop(model.parameters(), config.learning_rate)
    criterion = nn.CrossEntropyLoss()
    accuracies = []
    losses = []
    h0 = torch.zeros(config.lstm_num_layers, config.batch_size,
                     config.lstm_num_hidden)
    c0 = torch.zeros(config.lstm_num_layers, config.batch_size,
                     config.lstm_num_hidden)
    for step, (batch_inputs, batch_targets) in enumerate(data_loader):
        # Only for time measurement of step through network
        t1 = time.time()

        batch_inputs = batch_inputs.to(device)
        batch_targets = batch_targets.to(device)

        model.train()

        optimizer.zero_grad()

        prediction, _ = model(batch_inputs, h0, c0)

        loss = criterion(prediction.permute(1, 2, 0), batch_targets)

        loss.backward()

        #######################################################
        torch.nn.utils.clip_grad_norm_(model.parameters(),
                                       max_norm=config.max_norm)
        #######################################################

        optimizer.step()

        _, prediction = prediction.max(-1)

        accuracy = (prediction.t() == batch_targets).sum().item() / (
            prediction.shape[0] * prediction.shape[1])

        accuracies.append(accuracy * 100)
        losses.append(loss.item())

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if step % config.print_every == 0:

            print(
                "[{}] Train Step {:04d}/{:04f}, Batch Size = {}, Examples/Sec = {:.2f}, "
                "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    config.train_steps, config.batch_size, examples_per_second,
                    accuracy, loss))

        if step % config.sample_every == 0:
            # temperature from [0.5, 1.0, 2.0]
            temp = 0.5

            model.eval()

            h1 = torch.zeros(config.lstm_num_layers, 1, config.lstm_num_hidden)
            c1 = torch.zeros(config.lstm_num_layers, 1, config.lstm_num_hidden)

            # set first character to be a random symbol from the vocabulary
            symbol = torch.randint(low=0, high=dataset.vocab_size,
                                   size=(1, 1)).long().to(device)

            # uppercase alphabet
            # alphabet = list(string.ascii_uppercase)

            # lowercase alphabet
            # alphabet = list(string.ascii_lowercase)

            # initializing with a random upper- or lowercase letter from the alphabet
            # symbol = torch.tensor([dataset.convert_to_idx(alphabet[randrange(26)])])

            # first character to be 'S'
            # symbol = torch.tensor([dataset.convert_to_idx('S')])

            generated_text = []
            generated_text.append(symbol.item())

            generated_seq_length = 60
            for i in range(generated_seq_length):
                pred_symbol, (h1, c1) = model(symbol, h1, c1)

                # without using temperature
                _, prediction_symbol = pred_symbol.max(-1)
                symbol = prediction_symbol

                # using temperature function
                # symbol = torch.tensor([[sample(pred_symbol, temperature=temp)]])

                generated_text.append(symbol.item())

            # print(dataset.convert_to_string(generated_text))

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')
Ejemplo n.º 15
0
def train(config, seed=0):

    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    # Initialize the device which to run the model on
    device = torch.device(config.device)
    print(device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)
    data_loader = DataLoader(dataset, config.batch_size, drop_last=True)

    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size, config.seq_length,
                                dataset.vocab_size, config.lstm_num_hidden,
                                config.lstm_num_layers,
                                config.device).to(device)

    if config.load_model == 'load':
        model.load_state_dict(torch.load('output_dir/kant.pt'))
        model.eval()

    # Setup the loss and optimizer
    criterion = nn.NLLLoss()
    optimizer = optim.AdamW(model.parameters(), lr=config.learning_rate)

    loss_history = []
    acc_history = []
    count = 1

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        # Move to GPU
        batch_inputs = torch.Tensor(
            torch.cat([x.float().unsqueeze(dim=0)
                       for x in batch_inputs])).long().to(device)
        batch_targets = torch.Tensor(
            torch.cat([y.float().unsqueeze(dim=0)
                       for y in batch_targets])).long().to(device)

        # Reset for next iteration
        model.zero_grad()

        # Forward pass
        log_probs = model(batch_inputs)

        loss = criterion(log_probs.transpose(1, 2), batch_targets)
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(),
                                       max_norm=config.max_norm)

        optimizer.step()

        predictions = torch.argmax(log_probs, dim=-1)
        correct = (predictions == batch_targets).sum().item()
        accuracy = correct / (log_probs.size(1) * log_probs.size(0))

        loss_history.append(loss.item())
        acc_history.append(accuracy)

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if config.load_model == 'save' and step % 7000 == 0:
            torch.save(model.state_dict(),
                       f'output_dir/kant_{config.seq_length}_{count}.pt')
            count += 1

        if (step + 1) % config.print_every == 0:

            print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, \
                    Examples/Sec = {:.2f}, "
                  "Accuracy = {:.2f}, Loss = {:.3f}".format(
                      datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                      config.train_steps, config.batch_size,
                      examples_per_second, accuracy, loss))

        if (step + 1) % config.sample_every == 0:
            # Generate some sentences by sampling from the model
            pass

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error,
            # check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    if config.load_model == 'save':
        torch.save(model.state_dict(),
                   f'output_dir/kant_{config.seq_length}_{count}.pt')

    print('Done training.')
    print('Final loss:', loss_history[-1])
    print('Final acc:', acc_history[-1])
    return loss_history, acc_history