예제 #1
0
def speak(length):

    dataset = torch.load('./outputs/secondexperiment/saved_dataset.dataset')
    device = torch.device('cpu')
    model = TextGenerationModel(64, 90, dataset.vocab_size, 128, 2, 0, device)
    model.load_state_dict(
        torch.load('./outputs/secondexperiment/saved_model.pt',
                   map_location='cpu'))

    _input = open('sample_start.txt', 'r', encoding='utf-8').read()
    idxs = dataset.convert_from_string(_input)
    text = generate_sentence(idxs, model, dataset, 0.5, length)
    print(text)
예제 #2
0
def load_model(model_name):
    """ Loads LSTM model
    """
    with open(model_name, 'rb') as f:
        checkpoint = torch.load(f)

    loaded_model = TextGenerationModel(
        checkpoint['batch_size'], checkpoint['seq_length'],
        checkpoint['vocabulary_size'], checkpoint['lstm_num_hidden'],
        checkpoint['lstm_num_layers'], checkpoint['drop_prob'],
        checkpoint['device']).to(checkpoint['device'])

    loaded_model.load_state_dict(checkpoint['state_dict'])
    return loaded_model
예제 #3
0
def generate_sequence(config,
                      seed=0,
                      temp=0,
                      seq_length=30,
                      model_path='output_dir/kant_100_4.pt',
                      init_char='t'):

    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    # Initialize the device which to run the model on
    device = torch.device(config.device)
    print(device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)

    # Initialize the model that we are going to use
    model = TextGenerationModel(1, 1, dataset.vocab_size,
                                config.lstm_num_hidden, config.lstm_num_layers,
                                config.device).to(device)

    model.load_state_dict(torch.load(model_path, map_location=config.device))
    model.eval()

    # print(init_char)
    word_list = [dataset._char_to_ix[char] for char in init_char]
    state = model.init_state()

    for step in range(seq_length):
        last = torch.tensor([[word_list[step]]]).long().to(device)
        # print(last)
        output, state = model.predict(last, state, temp=temp)
        # print(output.squeeze())
        if step + 1 >= len(word_list):
            if temp > 0:
                word_list.append(torch.multinomial(output.squeeze(), 1).item())
            else:
                word_list.append(torch.argmax(output).item())

    # plt.hist(output.squeeze().numpy(), 100)
    # plt.show()

    sequence = ''.join([dataset._ix_to_char[ix] for ix in word_list])
    return sequence
예제 #4
0
def train(config):

    # Initialize the text dataset
    dataset = TextDataset(config.txt_file)

    # Initialize the model
    model = TextGenerationModel(
        batch_size=config.batch_size,
        seq_length=config.seq_length,
        vocabulary_size=dataset.vocab_size,
        lstm_num_hidden=config.lstm_num_hidden,
        lstm_num_layers=config.lstm_num_layers
    )

    ###########################################################################
    # Implement code here.
    ###########################################################################

    # Define the optimizer
    optimizer = tf.train.RMSPropOptimizer(config.learning_rate)

    # Compute the gradients for each variable
    grads_and_vars = optimizer.compute_gradients(model.loss)
    train_op = optimizer.apply_gradients(grads_and_vars, global_step)
    grads, variables = zip(*grads_and_vars)
    grads_clipped, _ = tf.clip_by_global_norm(grads, clip_norm=config.max_norm_gradient)
    apply_gradients_op = optimizer.apply_gradients(zip(grads_clipped, variables), global_step=global_step)

    ###########################################################################
    # Implement code here.
    ###########################################################################

    for train_step in range(int(config.train_steps)):

        # Only for time measurement of step through network
        t1 = time.time()

        #######################################################################
        # Implement code here.
        #######################################################################

        # sess.run ( .. )

        # Only for time measurement of step through network
        t2 = time.time()
        examples_per_second = config.batch_size/float(t2-t1)

        # Output the training progress
        if train_step % config.print_every == 0:
            print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, Loss = XX".format(
                datetime.now().strftime("%Y-%m-%d %H:%M"), train_step+1,
                int(config.train_steps), config.batch_size, examples_per_second
            ))
예제 #5
0
def train(config):

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(...)  # fixme
    data_loader = DataLoader(dataset, config.batch_size)

    # Initialize the model that we are going to use
    model = TextGenerationModel(...)  # FIXME

    # Setup the loss and optimizer
    criterion = None  # FIXME
    optimizer = None  # FIXME

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        #######################################################
        # Add more code here ...
        #######################################################

        loss = np.inf  # fixme
        accuracy = 0.0  # fixme

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if (step + 1) % config.print_every == 0:

            print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, \
                    Examples/Sec = {:.2f}, "
                  "Accuracy = {:.2f}, Loss = {:.3f}".format(
                      datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                      config.train_steps, config.batch_size,
                      examples_per_second, accuracy, loss))

        if (step + 1) % config.sample_every == 0:
            # Generate some sentences by sampling from the model
            pass

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error,
            # check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')
예제 #6
0
파일: eval.py 프로젝트: frank/dl
def eval():
    # Torch settings
    torch.set_default_tensor_type(torch.FloatTensor)

    # Initialize the dataset
    dataset = TextDataset(config.txt_file, config.seq_length)

    # Get temperature
    temp = config.temperature

    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size, config.seq_length,
                                dataset.vocab_size)

    # Load model, if there's any model to load
    model, steps = load_model(model)
    print("Model trained for", steps, "steps")
    model.eval()

    try:
        while True:
            # Get input for the start of the sentence
            start = input("\nStart: ")

            # Convert input to one-hot representation (length x vocab_size)
            try:
                start_oh = get_one_hot(start, dataset)
            except KeyError:
                print("One or more characters were not recognized. Try again!")
                continue

            # Generate the rest of the sentence
            sentence = dataset.convert_to_string(
                model.cmd_generate(start_oh, temp, config.seq_length))

            print("Model says:\n")
            print(start + sentence)
    except KeyboardInterrupt:
        print("\n\n" + random.choice(quit_msgs))
예제 #7
0
def generate_sentences(config, sentence):
    state = torch.load('checkpoints/{}'.format(
        config.txt_file.split("/", 1)[1].replace('.txt', '')))
    device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length,
                          config.batch_size, config.train_steps)

    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size, config.seq_length,
                                dataset.vocab_size).to(device=device)

    model.load_state_dict(state['state_dict'])

    char_list = dataset.convert_to_ix(sentence)
    return_list = [[torch.tensor(char)] for char in char_list]

    for i in range(len(sentence) + 50):
        tensor = [torch.tensor([char_list[i]])]
        tensor = torch.unsqueeze(torch.unsqueeze(tensor[-1], 0),
                                 0).float().to(device=config.device)
        if i == 0:
            predictions = model(tensor, 1)
        else:
            predictions = model(tensor)

        out = torch.max(predictions, 1)[1]
        char_list.append(out)
        return_v = int(out.cpu().numpy()[0])
        return_list.append([torch.tensor(return_v)])

    indices = []
    for char in return_list:
        indices.append(int(char[0].numpy()))
    generated_sentence = dataset.convert_to_string(indices)

    return generated_sentence
예제 #8
0
def generate(config):
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)  # fixme
    data_loader = DataLoader(dataset, config.batch_size, drop_last=True)

    # Initialize the model that we are going to use
    model = TextGenerationModel(batch_size=config.batch_size,
                                seq_length=config.seq_length,
                                vocabulary_size=86,
                                lstm_num_hidden=config.lstm_num_hidden,
                                lstm_num_layers=config.lstm_num_layers,
                                device=config.device).to(device)
    model.load_state_dict(torch.load(config.model))

    for l in ["In 1776 ", "Liberty is ", "Democracy is "]:
        char_id = torch.tensor([dataset._char_to_ix[ch]
                                for ch in l]).reshape(-1, 1).to(device)
        hidden = (torch.zeros(
            (config.lstm_num_layers, 1, config.lstm_num_hidden)).to(device),
                  torch.zeros((config.lstm_num_layers, 1,
                               config.lstm_num_hidden)).to(device))
        sequence = sample(model=model,
                          dataset=dataset,
                          init_seq=char_id,
                          init_hidden=hidden,
                          seq_length=200,
                          device=device,
                          temp=config.temp)
        print(
            dataset.convert_to_string(char_id.cpu().numpy().reshape(-1)) +
            sequence)
예제 #9
0
def eval(config):
    use_cuda = torch.cuda.is_available()
    if use_cuda:
        device = torch.device('cuda:0')
    else:
        device = torch.device('cpu')
    # Initialize the device which to run the model on
    device = torch.device(device)
    dtype = torch.cuda.LongTensor if use_cuda else torch.LongTensor
    # Initialize the dataset and data loader (note the +1)
    dataset = pickle.load(open(config.dataset_path, 'rb'))
    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size, \
                 config.lstm_num_hidden, config.dropout_keep_prob, config.lstm_num_layers).to(device)

    model.load_state_dict(torch.load(config.model_path))
    # Setup the loss and optimizer
    model.eval()
    print('Evaluating: ')
    num_summaries = 5
    # get random intial chars
    rand_chars = [
        dataset._char_to_ix[random.choice(dataset._chars)]
        for i in range(num_summaries)
    ]
    # to tensor
    prev_pred = torch.Tensor(rand_chars).type(dtype)
    prev_pred_one_hot = to_one_hot(prev_pred, dataset.vocab_size, dtype)
    predictions = []
    for i in range(config.sample_length):
        # batch size 1
        prev_pred_one_hot = torch.unsqueeze(prev_pred_one_hot, 1)
        if i is 0:
            y_pred, hidden = model(prev_pred_one_hot.float())
        else:
            y_pred, hidden = model(prev_pred_one_hot.float(), hidden)
        # get argmax
        # Sample from the network as a multinomial distribution
        if config.sampling_method == 'temp':
            output_dist = y_pred.data.div(config.temperature).exp()
            y_pred_batch_idx = output_dist.squeeze(1).multinomial(1).type(
                dtype)
        else:
            y_pred_batch_idx = y_pred.argmax(2).type(dtype)

        # to one hot
        prev_pred_one_hot = to_one_hot(y_pred_batch_idx.flatten(),
                                       dataset.vocab_size, dtype)
        predictions.append(y_pred_batch_idx.flatten().cpu().detach().numpy())
    predictions = np.asarray(predictions).T
    summaries = [dataset.convert_to_string(pred) for pred in list(predictions)]
    print("{} \n".format('\n'.join(summaries)))
예제 #10
0
def train(config):
    def acc(predictions, targets):
        hotvec = predictions.argmax(-2) == targets
        accuracy = torch.mean(hotvec.float())
        return accuracy

    # Initialize the device which to run the model on
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=0)
    print('batch', config.batch_size)

    vocabulary_size = dataset.vocab_size
    print('vocab', vocabulary_size)
    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size,
                                config.seq_length,
                                vocabulary_size=vocabulary_size,
                                lstm_num_hidden=config.lstm_num_hidden,
                                lstm_num_layers=config.lstm_num_layers,
                                dropout=1 - config.dropout_keep_prob,
                                device=device)
    model = model.to(device)
    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=config.learning_rate,
                                 weight_decay=1e-5)
    gamma = 1 - config.learning_rate_decay
    lr_optim = torch.optim.lr_scheduler.StepLR(optimizer,
                                               config.learning_rate_step,
                                               gamma=gamma,
                                               last_epoch=-1)
    print('Hi')
    acc_list = []
    loss_list = []
    step_list = []
    text_list = []
    epoch = 100
    offset = 2380
    temperature = 1
    policy = 'greedy'
    for e in range(epoch):
        torch.save(model.state_dict(), str(e + 1) + 'tunedmodel.pt')
        for step, (batch_inputs, batch_targets) in enumerate(data_loader):

            # Only for time measurement of step through network
            lr_optim.step()
            optimizer.zero_grad()
            t1 = time.time()
            inputs = torch.stack([*batch_inputs], dim=1)
            targets = torch.stack([*batch_targets], dim=1)
            inputs = inputs.to(device)
            targets = targets.to(device)
            out = model.forward(inputs)[0]
            out = out.permute(0, 2, 1)
            loss = criterion(out, targets)
            accuracy = acc(out, targets)

            torch.nn.utils.clip_grad_norm(model.parameters(),
                                          max_norm=config.max_norm)
            loss.backward()
            optimizer.step()

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size / float(t2 - t1)

            if step % config.print_every == 0:

                print('accuracy, loss, step: \n',
                      np.around(accuracy.item(), 4), np.around(loss.item(),
                                                               4), step, '\n')
                acc_list.append(accuracy.item())
                loss_list.append(loss.item())

                step_list.append(step + offset * e)

            if step % config.sample_every == 0:
                # Generate some sentences by sampling from the model
                generator = torch.randint(low=0,
                                          high=vocabulary_size,
                                          size=(1, 1)).to(device)
                hidden = None
                char_list = []
                for _ in range(config.seq_length):
                    generator, hidden = model.forward(generator, hidden)
                    if policy == 'greedy':
                        idx = torch.argmax(generator).item()
                    else:
                        pass
                    generator = torch.Tensor([idx]).unsqueeze(-1)
                    generator = generator.to(device)
                    char_list.append(idx)
                char = dataset.convert_to_string(char_list)
                with open("MyTunedBook.txt", "a") as text_file:
                    print('Epoch. ',
                          e,
                          'Stahp: ',
                          step,
                          '\n Output: ',
                          char,
                          file=text_file)

                print('Epoch. ', e, 'Stahp: ', step, '\n Output: ', char)
                text_list.append((str((step + offset * e)) + '\n' + char))

                pass

            if step == config.train_steps:
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                break

    print('Done training.')

    with open('FinalTunedBook.txt', 'w+') as f:
        for item in text_list:
            f.write("%s\n" % item)

    # save with pandas
    header = ['accuracy', 'length', 'loss', 'step']
    savefiles = zip(acc_list, [config.seq_length] * len(acc_list), loss_list,
                    step_list)
    df = pd.DataFrame(list(savefiles), columns=header)
    df.to_csv('GEN' + str(config.seq_length) + 'tunedlstm.csv')

    print('I am Loaded')

    temp_list = [0., 0.5, 1., 2.]
    policy_list = ['greedy', 'temp']
    seq_length = 111
    alice_string = list('Alice')

    # Generate some sentences by sampling from the model
    for policy in policy_list:
        for temperature in temp_list:
            char_list = []
            hidden = None
            for alice in alice_string:
                idx = dataset.convert_to_idx(alice)
                char_list.append(idx)
                generator = torch.tensor([idx]).unsqueeze(-1)
                generator = generator.to(device)
                generator, hidden = model.forward(generator, hidden)

            for _ in range(seq_length):
                if policy == 'greedy':
                    idx = torch.argmax(generator).item()
                else:
                    temp = generator.squeeze() / temperature
                    soft = torch.softmax(temp, dim=0)
                    idx = torch.multinomial(soft, 1)[-1].item()
                generator = torch.tensor([idx]).unsqueeze(-1)
                generator = generator.to(device)
                generator, hidden = model.forward(generator, hidden)
                char_list.append(idx)
            char = dataset.convert_to_string(char_list)
            with open(
                    "BonusTemp" + str(int(np.floor(temperature))) + "Book.txt",
                    "w+") as text_file:
                print(policy + ': ',
                      temperature,
                      '\n Output: ',
                      char,
                      file=text_file)

            print(policy + ': ', temperature, '\n Output: ', char)
    print('Finito!')
예제 #11
0
def train(config, CHOICES):
    
    # Initialize the device which to run the model on
    #device = torch.device(config.device)# fix this!
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print(device)
    
    # Initialize the model that we are going to use

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length );  # fixme
    model = TextGenerationModel( config.batch_size, config.seq_length, dataset.vocab_size, config.temperature).cuda();
    if (CHOICES['LOAD_BEST_MODEL']):
        model.load_state_dict(torch.load('./model_parameter.txt'));
    #print(model.state_dict());
    
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss();
    optimizer = torch.optim.RMSprop(model.parameters(),lr=config.learning_rate);
    if (CHOICES['LOAD_BEST_MODEL']):
        optimizer.load_state_dict(torch.load('./model_optimizer.txt'));
    accuracy_list = [];
    loss_list = [];
    string_list = [];
    tmp_accuracy = 0;
    
    a = 76;
    while (tmp_accuracy == 0) or (accuracy_list[-1] >0.85): 
        for step, (batch_inputs, batch_targets) in enumerate(data_loader):

            # Only for time measurement of step through network
            t1 = time.time()
            
            batch_inputs = torch.stack(batch_inputs)[:,:, None].view(config.seq_length, -1).to(device); # sequ_length * batch_size
            batch_targets = torch.stack(batch_targets)[:,:, None].view(config.seq_length, -1).to(device); # sequ_length * batch_size
            
            if not((int(batch_inputs.size()[1])) == config.batch_size):
                continue;
                
            #print(dataset.convert_to_string(batch_inputs[:, 0].cpu().numpy())); 
            
            batch_inputs_onehot = one_hot(batch_inputs, dataset.vocab_size); # seq_length * batch_size * vacab_size;
            optimizer.zero_grad();
            torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm);
            out = model(batch_inputs_onehot);
            
            values, indices = torch.max(out, 1);
            
            loss_criterion = criterion(out,batch_targets);
            loss_criterion.backward();
            optimizer.step();
            
            loss = loss_criterion.data[0]/(config.seq_length);
            values, indices = torch.max(out, 1);
            
            accuracy = ((indices[indices == batch_targets].size())[0])/(config.batch_size*config.seq_length);

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size/float(t2-t1)
            if step % config.print_every == 0:
                print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                          "Accuracy = {:.2f}, Loss = {:.3f}".format(
                            datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                            int(config.train_steps), config.batch_size, examples_per_second,
                            accuracy, loss))
                            
                # generate sentences
                if step % 50000 == 0 and CHOICES['GENERATE_FIVE_SENTENCES']:                            
                    model.eval();                    
                    test_input = (torch.Tensor(batch_inputs.size())).type(torch.LongTensor).to(device);
                    a = a + 1;
                    test_input = test_input.fill_(a);
                    output_string = generate_new_stings(model, test_input, dataset.vocab_size, config.seq_length);  
                    tmp = dataset.convert_to_string(output_string.cpu().numpy().tolist());
                    string_list += [tmp];
                    print(tmp);
                    print('---')     
                    
                    model.train();
                # save parameter
                torch.save(model.state_dict(), './model_parameter{:d}.txt'.format(step));
                torch.save(optimizer.state_dict(), './model_optimizer{:d}.txt'.format(step));                    
                
                
                if (CHOICES['DRAW_ACCURACY_PLOT']):
                    accuracy_list += [accuracy];  
                    loss_list += [loss]; 
                

            if step == config.sample_every:
                # Generate some sentences by sampling from the model
                pass
            
            if step == config.train_steps:
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                break
                
            if (CHOICES['GENERATE_FIVE_SENTENCES']) and (len(string_list) == 5):
                break;
        
        if (CHOICES['GENERATE_FIVE_SENTENCES']) and (len(string_list) == 5):
                break;
                        
        
        print("============ finish {} epoch ============ ".format(len(accuracy_list)));
        
    torch.save(model.state_dict(), './model_parameter.txt');
    torch.save(optimizer.state_dict(), './model_optimizer.txt');
    print('Done training.');
    
    if (CHOICES['GENERATE_FIVE_SENTENCES']):
    
        if (CHOICES['DRAW_ACCURACY_PLOT']):
            fig, ax = plt.subplots();
            ax.plot(np.arange(len(accuracy_list)), accuracy_list, 'r', label = 'accuracy');
            ax.plot(np.arange(len(accuracy_list)), loss_list, 'b', label = 'loss');
            legend = ax.legend(loc='upper center');      
            plt.xlabel('Steps');
            plt.title('loss and accuracy of LSTM in 2000 steps');
            plt.show();
        
        for idx in range(5):
            print('====')
            print(string_list[idx]);
예제 #12
0
def train(config):

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file,
                          config.seq_length)  # should we do +1??
    torch.save(dataset, config.save_dataset)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size, config.seq_length,
                                dataset.vocab_size, config.lstm_num_hidden,
                                config.lstm_num_layers,
                                1 - config.dropout_keep_prob, device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate)

    losses = []
    accuracies = []

    # run through the dataset several times till u reach max_steps
    step = 0
    while step < config.train_steps:
        for (batch_inputs, batch_targets) in data_loader:
            step += 1
            # Only for time measurement of step through network
            t1 = time.time()

            batch_inputs = torch.stack(batch_inputs).to(device)
            batch_targets = torch.stack(batch_targets, dim=1).to(
                device)  #dim=1 to avoid transposing

            batch_predictions, (_, _) = model.forward(batch_inputs)
            batch_predictions = batch_predictions.permute(1, 2, 0)
            loss = criterion(batch_predictions, batch_targets)
            losses.append(loss.item())
            model.zero_grad()  # should we do this??
            loss.backward()

            torch.nn.utils.clip_grad_norm(
                model.parameters(),
                max_norm=config.max_norm)  # prevents maximum gradient problem

            optimizer.step()

            accuracy = accuracy_(batch_predictions, batch_targets)
            accuracies.append(accuracy)

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size / float(t2 - t1)

            if step % config.print_every == 0:

                print(
                    "[{}] Train Step {}/{}, Batch Size = {}, Examples/Sec = {:.2f}, "
                    "Accuracy = {:.2f}, Loss = {:.3f}".format(
                        datetime.now().strftime("%Y-%m-%d %H:%M"), int(step),
                        int(config.train_steps), config.batch_size,
                        examples_per_second, accuracy, loss))

            if step % config.sample_every == 0:

                for temperature in [0]:
                    for length in [30, 60, 90, 120]:
                        sentence = generate_sentence(model, dataset,
                                                     temperature, length,
                                                     device)
                        with open(config.save_generated_text,
                                  'a',
                                  encoding='utf-8') as file:
                            file.write("{};{};{};{}\n".format(
                                step, temperature, length, sentence))

            if step % config.save_every == 0:
                torch.save(model.state_dict(), config.save_model)

            if step == config.train_steps:
                # save only the model parameters
                torch.save(model.state_dict(), config.save_model)
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                break

    # revive the model
    # model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size(),
    #                                 config.lstm_num_hidden, config.lstm_num_layers, device)
    # model.load_state_dict(torch.load(config.save_model))

    print('Done training.')
예제 #13
0
def train(config):
    writer = torch.utils.tensorboard.SummaryWriter()

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)
    data_loader = DataLoader(
        dataset,
        config.batch_size,
        config.seq_length,
    )

    # Initialize the model that we are going to use
    vocabulary_size = dataset.vocab_size
    model = TextGenerationModel(batch_size=config.batch_size,
                                seq_length=config.seq_length,
                                vocabulary_size=vocabulary_size)
    model.to(device)

    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)

    accuracies = []
    losses = []

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):

        # Only for time measurement of step through network
        t1 = time.time()

        #######################################################

        # Move to GPU
        batch_inputs = to_tensor_rep(batch_inputs).to(device)
        batch_targets = to_tensor_rep(batch_targets).to(device)

        # Reset for next iteration
        model.zero_grad()

        #######################################################
        model_output = model(batch_inputs,
                             c_0=torch.zeros(config.lstm_num_layers,
                                             batch_inputs.shape[1],
                                             config.lstm_num_hidden,
                                             device=device),
                             h_0=torch.zeros(config.lstm_num_layers,
                                             batch_inputs.shape[1],
                                             config.lstm_num_hidden,
                                             device=device))

        # for each timestep, the crossentropy loss is computed and subsequently averaged
        batch_losses = torch.zeros(config.seq_length, device=device)
        for i in range(config.seq_length):
            batch_losses[i] = criterion(model_output[i], batch_targets[i])

        loss = (1 / config.seq_length) * torch.sum(batch_losses)

        # compute the gradients, clip them to prevent exploding gradients and backpropagate
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(),
                                       max_norm=config.max_norm)
        optimizer.step()

        # calculate accuracy
        predictions = torch.argmax(model_output, dim=2)
        correct = (predictions == batch_targets).sum().item()
        accuracy = correct / (model_output.size(0) * model_output.size(1))

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if (step + 1) % config.print_every == 0:
            print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, \
                    Examples/Sec = {:.2f}, "
                  "Accuracy = {:.2f}, Loss = {:.3f}".format(
                      datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                      config.train_steps, config.batch_size,
                      examples_per_second, accuracy, loss))

            # save loss and accuracy
            accuracies.append(accuracy)
            losses.append(loss)
            writer.add_scalar("loss", loss)
            writer.add_scalar("accuracy", accuracy)

        if (step + 1) % config.sample_every == 0:
            model.eval()
            generate_sequence(model, 62, dataset)
            model.train()

        if step == config.train_steps:
            break

    print('Done training.')

    # make loss and accuracy plots
    x = np.arange(len(accuracies)) * config.print_every
    plot_curve(x, accuracies, "Accuracy", "Training accuracy")
    plot_curve(x, losses, "Loss", "Training Loss")
예제 #14
0
def train(config):
    # determine the filename (to be used for saving results, checkpoints, models, etc.)
    filename = Path(config.txt_file).stem

    # Initialize the device which to run the model on
    if config.device == 'cuda':
        if torch.cuda.is_available():
            device = torch.device(config.device)
        else:
            device = torch.device('cpu')
    else:
        device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(
        filename=config.txt_file,
        seq_length=config.seq_length
    )
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # get the vocabulary size and int2char and char2int dictionaries for use later
    VOCAB_SIZE = dataset.vocab_size

    # Initialize the model that we are going to use
    model = TextGenerationModel(
        batch_size=config.batch_size,
        seq_length=config.seq_length,
        vocabulary_size=VOCAB_SIZE,
        lstm_num_hidden=config.lstm_num_hidden,
        lstm_num_layers=config.lstm_num_layers,
        device=device,
        batch_first=config.batch_first,
        dropout=1.0-config.dropout_keep_prob
    )

    # Setup the loss and optimizer and learning rate scheduler
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(
        model.parameters(),
        config.learning_rate
    )

    # Load the latest checkpoint, if any exist
    checkpoints = list(CHECKPOINTS_DIR.glob(f'{model.__class__.__name__}_{filename}_checkpoint_*.pt'))
    if len(checkpoints) > 0:
        # load the latest checkpoint
        checkpoints.sort(key=os.path.getctime)
        latest_checkpoint_path = checkpoints[-1]
        start_step, results, sequences = load_checkpoint(latest_checkpoint_path, model, optimizer)
    else:
         # initialize the epoch, results and best_accuracy
        start_step = 0
        results = {
            'step': [],
            'accuracy': [],
            'loss': [],
        }
        sequences = {
            'step': [],
            't': [],
            'temperature': [],
            'sequence': []
        }

    for step in range(start_step, int(config.train_steps)):
        # reinitialize the data_loader iterater if we have iterated over all available mini-batches
        if step % len(data_loader) == 0 or step == start_step:
            data_iter = iter(data_loader)
        
        # get the mini-batch
        batch_inputs, batch_targets = next(data_iter)

        # Only for time measurement of step through network
        t1 = time.time()

        #######################################################
        # Add more code here ...
        #######################################################

        # put the model in training mode
        model.train()

        # convert the data and send to device
        X = torch.stack(batch_inputs, dim=1)
        X = X.to(device)

        Y = torch.stack(batch_targets, dim=1)
        Y = Y.to(device)

        # forward pass the mini-batch
        Y_out, _ = model.forward(X)
        Y_pred = Y_out.argmax(dim=-1)

        # (re)set the optimizer gradient to 0
        optimizer.zero_grad()

        # compute the accuracy and the loss
        accuracy = get_accuracy(Y_pred, Y)
        loss = criterion.forward(Y_out.transpose(2, 1), Y)

        # backwards propogate the loss
        loss.backward()

        # clip the gradients (to preven them from exploding)
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm)

        # tune the model parameters
        optimizer.step()

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size/float(t2-t1)

        if step % config.print_every == 0:
            print(f'[{datetime.now().strftime("%Y-%m-%d %H:%M")}], Train Step {step:04d}/{int(config.train_steps):04d}, Batch Size = {config.batch_size}, Examples/Sec = {examples_per_second:.2f}, Accuracy = {accuracy:.2f}, Loss = {loss:.3f}')

            # append the accuracy and loss to the results
            results['step'].append(step)
            results['accuracy'].append(accuracy.item())
            results['loss'].append(loss.item())

        if step % config.sample_every == 0:
            for T in [20, 30, 60, 120]:
                for temperature in [0.0, 0.5, 1.0, 2.0]:
                    # Generate some sentences by sampling from the model
                    sequence = sample_sequence(
                        model=model,
                        vocab_size=VOCAB_SIZE,
                        T=T,
                        char=None,
                        temperature=temperature,
                        device=device
                    )
                    sequence_str = dataset.convert_to_string(sequence)
                    print(f'Generated sample sequence (T={T}, temp={temperature}): {sequence_str}')

                    # append the generated sequence to the sequences
                    sequences['step'].append(step)
                    sequences['t'].append(T)
                    sequences['temperature'].append(temperature)
                    sequences['sequence'].append(sequence_str)

        if step % config.checkpoint_every == 0:
            # create a checkpoint
            create_checkpoint(CHECKPOINTS_DIR, filename, step, model, optimizer, results, sequences)

            # save the results
            save_results(RESULTS_DIR, filename, results, sequences, model)

            # save the model
            save_model(MODELS_DIR, filename, model)

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error, check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break

    print('Done training.')
예제 #15
0
    # Initialize the device which to run the model on
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=0)
    print('batch', config.batch_size)

    vocabulary_size = dataset.vocab_size
    print('vocab', vocabulary_size)
    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size,
                                config.seq_length,
                                vocabulary_size=vocabulary_size,
                                lstm_num_hidden=config.lstm_num_hidden,
                                lstm_num_layers=config.lstm_num_layers,
                                dropout=1-config.dropout_keep_prob,
                                device=device
                                )
    model = model.to(device)
    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=config.learning_rate
                                 )
    print('Hi')
    acc_list = []
    loss_list = []
    step_list = []
    text_list = []
    epoch = 50
예제 #16
0
def train(config):
    def compute_accuracy(outputs, targets):
        """
        Compute the accuracy of the predicitions.
        """
        outputs = torch.argmax(outputs, -1)

        return (outputs == targets).float().mean()

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)  # fixme
    data_loader = DataLoader(dataset, config.batch_size, num_workers=4)

    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size, config.seq_length,
                                dataset.vocab_size, config.lstm_num_hidden,
                                config.lstm_num_layers, device,
                                config.dropout_keep_prob).to(device)

    learning_rate = config.learning_rate

    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss()  # fixme
    optimizer = optim.Adam(model.parameters(), learning_rate)  # fixme

    x_onehot = torch.FloatTensor(config.seq_length, config.batch_size,
                                 dataset.vocab_size).to(device)
    y_onehot = torch.FloatTensor(config.seq_length, config.batch_size,
                                 dataset.vocab_size).to(device)

    # HACK: config.train_steps seems to be of type 'float' instead of 'int'.
    config.train_steps = int(config.train_steps)

    step = 0

    loss_list = []
    accuracy_list = []

    while step < config.train_steps:
        for batch_inputs, batch_targets in data_loader:

            # Only for time measurement of step through network
            t1 = time.time()

            #######################################################
            # Add more code here ...
            #######################################################
            optimizer.zero_grad()

            batch_inputs = torch.stack(batch_inputs).to(device)
            batch_targets = torch.stack(batch_targets).to(device)
            # print(dataset.convert_to_string(batch_inputs.t()[0].cpu().numpy()))

            try:
                x_onehot.zero_()
                x_onehot.scatter_(2, batch_inputs.unsqueeze(-1), 1)
            except RuntimeError:
                continue

            y = model(x_onehot)

            loss = criterion(y.view(-1, dataset.vocab_size),
                             batch_targets.view(-1))

            loss.backward()
            optimizer.step()

            loss = loss.item()  # fixme
            accuracy = compute_accuracy(y, batch_targets)  # fixme

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size / float(t2 - t1)

            loss_list.append(loss)
            accuracy_list.append(accuracy)

            if step % config.learning_rate_step == 0:
                learning_rate = config.learning_rate_decay * learning_rate
                print(learning_rate)
                optimizer = optim.Adam(model.parameters(), learning_rate)

            if step % config.print_every == 0:

                print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, "
                      "Examples/Sec = {:.2f}, Accuracy = {:.2f}, "
                      "Loss = {:.3f}".format(
                          datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                          config.train_steps, config.batch_size,
                          examples_per_second, accuracy, loss))

                # Save an image of loss and accuracy during training.
                plt.figure()
                plt.subplot(121)
                plt.plot(loss_list)
                plt.xlabel("Steps")
                plt.ylabel("Loss")
                plt.subplot(122)
                plt.plot(accuracy_list)
                plt.xlabel("Steps")
                plt.ylabel("Accuracy")
                plt.tight_layout()
                plt.savefig('loss.png')
                plt.close()

            if step % config.sample_every == 0:
                # Generate some sentences by sampling from the model
                inputs = sample_text(dataset, x_onehot)
                output = sample_text(dataset, y)
                sample = sample_text(dataset, model.sample())

                for idx in range(5):
                    print(f"{inputs[idx]} | {output[idx]} | {sample[idx]}")

                # Save some sampled sequences.
                with open('samples.csv', 'a') as file:
                    for line in sample[:5]:
                        file.write(f"{step};'{line}'\n")

                torch.save(
                    {
                        'step': step + 1,
                        'state_dict': model.state_dict(),
                        'optimizer': optimizer.state_dict()
                    },
                    os.path.join(config.summary_path, f"model_{step}.pth.tar"))

            if step == config.train_steps:
                # If you receive a PyTorch data-loader error, check this
                # bug report: https://github.com/pytorch/pytorch/pull/9655
                break
            else:
                step += 1

    print('Done training.')
예제 #17
0
    y_pred, (h1, h2) = model.generate(chars, h1, h2)
    chars.append(torch.max(y_pred, 1)[1][y_pred.size(0) - 1])
    for i in range(sentence_len):
        y_pred, (h1, h2) = model.generate([chars[-1]], h1, h2)
        chars.append(torch.max(y_pred, 1)[1][y_pred.size(0) - 1])
    chars = [int(char.cpu().numpy()[0]) for char in chars]

    return dataset.convert_to_string(chars)


batch_size = 64
seq_length = 30
vocabulary_size = 87
lstm_num_hidden = 128
lstm_num_layers = 2
device = 'cpu'

model = TextGenerationModel(batch_size, seq_length, vocabulary_size,
                            lstm_num_hidden, lstm_num_layers,
                            device).to(device)
model.load_state_dict(
    torch.load('results_grim/model_final.pickle', map_location='cpu'))

dataset = TextDataset(filename='assets/book_EN_grimms_fairy_tails.txt',
                      seq_length=30)

print(generate_greedy(model, dataset, 30))
print(generate_temperature(model, dataset, 30, 2))
print(generate_greedy_given(model, dataset, 180))
print(generate_temperature_given(model, dataset, 180, T=0.5))
예제 #18
0
def train(config):

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)
    data_loader = DataLoader(dataset,
                             config.batch_size,
                             num_workers=1,
                             drop_last=True)
    vocab_size = dataset.vocab_size

    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size, config.seq_length,
                                vocab_size, config.lstm_num_hidden,
                                config.lstm_num_layers, config.device)
    model = model.to(device)

    print(model)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()

    # if pickle file is available, load steps and use index -1 to get last step + get lists of values, to continue training
    # where we left off
    if os.path.isfile("steps.p"):
        print('Pre-trained model available...')
        print('Resuming training...')

        # load lists
        step_intervals = pickle.load(open("steps.p", "rb"))
        all_sentences = pickle.load(open("sentences.p", "rb"))
        accuracy_list = pickle.load(open("accuracies.p", "rb"))
        loss_list = pickle.load(open("loss.p", "rb"))
        model_info = pickle.load(open("model_info.p", "rb"))

        # start where we left off
        all_steps = step_intervals[-1]

        # load model
        Modelname = 'TrainIntervalModel' + model_info[0] + 'acc:' + model_info[
            1] + '.pt'
        model = torch.load(Modelname)
        model = model.to(device)

    # otherwise start training from a clean slate
    else:
        print('No pre-trained model available...')
        print('Initializing training...')

        # create lists to keep track of data while training
        all_sentences = []
        step_intervals = []
        accuracy_list = []
        loss_list = []

        # initialize total step counter
        all_steps = 0

        # initialize optimizer with starting learning rate
        optimizer = torch.optim.RMSprop(model.parameters(),
                                        lr=config.learning_rate)

    # initialize optimizer with previous learning rate. (extract from pickle then use scheduler)
    scheduler = torch.optim.lr_scheduler.StepLR(
        optimizer,
        step_size=config.learning_rate_step,
        gamma=config.learning_rate_decay)

    # since the nested for loop stops looping after a complete iteration through the data_loader, add for loop for epochs
    for epoch in range(config.epochs):
        print(model)
        for step, (batch_inputs, batch_targets) in enumerate(data_loader):

            # Only for time measurement of step through network
            t1 = time.time()

            # apply scheduler
            scheduler.step()

            # create 2D tensor instead of list of 1D tensors
            #batch_inputs = torch.stack(batch_inputs)
            batch_inputs = batch_inputs.to(device)

            h, c = model.init_hidden()
            out, (h, c) = model(batch_inputs, h, c)

            # transpose to match cross entropy input dimensions
            out.transpose_(1, 2)

            batch_targets = batch_targets.to(device)

            #######################################################
            # Add more code here ...
            #######################################################

            loss = criterion(out, batch_targets)

            max = torch.argmax(out, dim=1)
            correct = (max == batch_targets)
            accuracy = torch.sum(
                correct).item() / correct.size()[0] / correct.size()[1]

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size / float(t2 - t1)

            if step % config.print_every == 0:

                print(
                    "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                    "Accuracy = {:.2f}, Loss = {:.3f}".format(
                        datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                        int(config.train_steps), config.batch_size,
                        examples_per_second, accuracy, loss))

            if all_steps % config.sample_every == 0:

                ###############################
                # Generate generated sequence #
                ###############################

                # do not keep track of gradients during model evaluation
                with torch.no_grad():

                    # create random character to start sentence with
                    random_input = torch.randint(0,
                                                 vocab_size,
                                                 (config.batch_size, ),
                                                 dtype=torch.long).view(-1, 1)
                    x_input = random_input.to(device)

                    # initialize hidden state and cell state
                    h, c = model.init_hidden()
                    h = h.to(device)
                    c = c.to(device)

                    sentences = x_input

                    # loop through sequence length to set generated output as input for next sequence
                    for i in range(config.seq_length):

                        # get randomly generated sentence
                        out, (h, c) = model(x_input, h, c)

                        ####################
                        # Temperature here #
                        ####################

                        # check whether user wants to apply temperature sampling
                        if config.temperature:

                            # apply temperature sampling
                            out = out / config.tempvalue
                            out = F.softmax(out, dim=2)

                            # create a torch distribution of the calculated softmax probabilities and sample from that distribution
                            distribution = torch.distributions.categorical.Categorical(
                                out.view(config.batch_size, vocab_size))
                            out = distribution.sample().view(-1, 1)

                        # check whether user wants to apply greedy sampling
                        else:
                            # load new datapoint by taking the predicted previous letter using greedy approach
                            out = torch.argmax(out, dim=2)

                        # append generated character to total sentence
                        sentences = torch.cat((sentences, out), 1)
                        x_input = out

                    # pick a random sentence (from the batch of created sentences)
                    index = np.random.randint(0, config.batch_size, 1)
                    sentence = sentences[index, :]

                    # squeeze sentence into 1D
                    sentence = sentence.view(-1).cpu()

                    # print sentence
                    print(dataset.convert_to_string(sentence.data.numpy()))

                    # save sentence
                    all_sentences.append(sentence.data.numpy())

                    ##########################
                    # Save loss and accuracy #
                    ##########################

                    # save loss value
                    loss = loss.cpu()
                    loss_list.append(loss.data.numpy())

                    # save accuracy value
                    accuracy_list.append(accuracy)

                    # save step interval
                    step_intervals.append(all_steps)

            if step == config.train_steps:
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                break

            # counter of total amounts of steps (keep track over multiple training sessions)
            all_steps += 1

        if config.savefiles:
            # pickle sentences and steps
            pickle.dump(all_sentences, open('sentences.p', 'wb'))
            pickle.dump(step_intervals, open('steps.p', 'wb'))

            # pickle accuracy and loss
            pickle.dump(accuracy_list, open('accuracies.p', 'wb'))
            pickle.dump(loss_list, open('loss.p', 'wb'))

            # save model

            Modelname = 'TrainIntervalModel' + str(epoch) + 'acc:' + str(
                accuracy) + '.pt'
            torch.save(model, Modelname)

            model_info = [str(epoch), str(accuracy)]
            pickle.dump(model_info, open('model_info.p', 'wb'))

    print('Done training.')
예제 #19
0
def train(config):
    seed = config.seed
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    # Initialize the device which to run the model on
    device = torch.device(config.device)
    #device = torch.device('cpu')

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(filename=config.txt_file,
                          seq_length=config.seq_length)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=0)

    # Initialize the model that we are going to use
    model = TextGenerationModel(config, dataset._vocab_size, device).to(device)
    print('device:', device.type)
    print('Model defined. Number of trainable params:',
          model.numTrainableParameters())
    print(model)
    model.numTrainableParameters()
    testLSTM(dataset, data_loader, model, config, device)

    # Setup the loss and optimizer
    criterion = torch.nn.NLLLoss()
    optimizer = optim.AdamW(model.parameters(), config.learning_rate)
    scheduler = torch.optim.lr_scheduler.StepLR(
        optimizer,
        step_size=config.learning_rate_step,
        gamma=config.learning_rate_decay)

    selfGenTHRES = 0
    maxTrainAcc = 0
    acc_plt = []
    loss_plt = []
    for step, (batch_inputs, batch_targets) in enumerate(data_loader):
        # Only for time measurement of step through network
        t1 = time.time()
        #######################################################
        # Add more code here ...
        #######################################################
        X = torch.stack(batch_inputs).to(
            device)  # (seq_len,bsize), input sequence
        T = torch.stack(batch_targets).to(
            device)  # (seq_len,bsize), ground truth sequence

        model.zero_grad()
        h, C = model.init_cell(config.batch_size)
        logprobs, _, _ = model(X, h, C)  # (seq_len,bsize,voc_size)

        loss = criterion(
            logprobs.reshape(config.seq_length * config.batch_size,
                             dataset.vocab_size), T.reshape(-1))
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(),
                                       max_norm=config.max_norm)
        optimizer.step()
        scheduler.step()

        predchar = torch.argmax(
            logprobs, dim=2
        )  # (seq_len,bsize) the predicted characters: selected highest logprob for each sequence and example in the mini batch
        accuracy = torch.sum(predchar == T).item() / (config.batch_size *
                                                      config.seq_length)
        loss_plt.append(loss)
        acc_plt.append(accuracy)

        # Save model with max train accuracy (I will use this for this toy example with batch_size*seq_len character predictions.
        # Of course this should be based on a larger test dataset
        if accuracy > maxTrainAcc:
            maxTrainAcc = accuracy
            torch.save(
                {
                    'step': step,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'loss': loss,
                    'accuracy': accuracy
                }, "saved_model.tar")
            # If a new accuracy level (steps of 0.1) is reached, print five self-generated sentences
            if accuracy > selfGenTHRES:
                selfGenTHRES += 0.1
                print(
                    '\n#################################### SAMPLE SELF GENERATED SEQUENCES #######################################'
                )
                print('# Step:', step, ', loss:', loss, 'accuracy', accuracy)
                print('# ')
                print('# Greedy sampling [a...]:',
                      generateSequenceGreedy(dataset, model, device, 70, 'a'))
                print('# Greedy sampling [b...]:',
                      generateSequenceGreedy(dataset, model, device, 70, 'b'))
                print('# Greedy sampling [c...]:',
                      generateSequenceGreedy(dataset, model, device, 70, 'c'))
                print('# Greedy sampling [d...]:',
                      generateSequenceGreedy(dataset, model, device, 70, 'd'))
                print('# Greedy sampling [e...]:',
                      generateSequenceGreedy(dataset, model, device, 70, 'e'))
                print('#')
                print('# Output of last training example:')
                print('# INPUT....: ', end="")
                printSequence(X, 0, dataset)
                print('# TARGET...: ', end="")
                printSequence(T, 0, dataset)
                print('# PREDICTED: ', end="")
                printSequence(predchar, 0, dataset)
                print('#')
                print(
                    '############################################################################################################\n'
                )

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if (step + 1) % config.print_every == 0:
            # Print training update
            print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, \
                    Examples/Sec = {:.2f}, "
                  "Accuracy = {:.2f}, Loss = {:.3f}".format(
                      datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                      config.train_steps, config.batch_size,
                      examples_per_second, accuracy, loss))
            print('best training acc', maxTrainAcc)

        if (step + 1) % (config.train_steps // 3) == 0:
            # Generate some sentences by sampling from the model
            print(
                '\n#################################### SAMPLE SELF GENERATED SEQUENCES #######################################'
            )
            print('# Step:', step, ', loss:', loss, 'accuracy', accuracy)
            print('# Greedy sampling [a...]:',
                  generateSequenceGreedy(dataset, model, device, 30, 'a'))
            print(
                '############################################################################################################\n'
            )

        if step == config.train_steps:
            # If you receive a PyTorch data-loader error,
            # check this bug report:
            # https://github.com/pytorch/pytorch/pull/9655
            break
    print('Done training.')
    Testaccuracy = getTestAccuracy(dataset, data_loader, model, config, device,
                                   200)
    pltLossAcc(loss_plt, acc_plt, config)
예제 #20
0
def train(config):

    # Initialize the device which to run the model on
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    # Initialize the dataset and data loader (note the +1)
    abs_path = os.path.abspath(config.txt_file)
    dataset = TextDataset(abs_path, config.seq_length)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Initialize the model that we are going to use
    model = TextGenerationModel(batch_size=config.batch_size,
                                seq_length=config.seq_length,
                                vocabulary_size=dataset.vocab_size,
                                lstm_num_hidden=config.lstm_num_hidden,
                                lstm_num_layers=config.lstm_num_layers,
                                device=device)


    experiment_label = "{}_".format(datetime.now().strftime("%Y-%m-%d %H:%M"))
    for key, value in vars(config).items():
        experiment_label += "{}={}_".format(key, value)

    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.RMSprop(model.parameters(), lr=config.learning_rate)

    # TODO: configure learning rate scheduler

    for epoch in range(1, config.epochs + 1):
        for step, (batch_inputs, batch_targets) in enumerate(data_loader):

            # Only for time measurement of step through network
            t1 = time.time()

            X = torch.stack(batch_inputs, dim=1)
            X = one_hot(X, dataset.vocab_size)
            Y = torch.stack(batch_targets, dim=1)
            X, Y = X.to(device), Y.to(device)

            # forward pass
            outputs, _ = model(X)

            # compute training metrics
            loss = criterion(outputs.transpose(2, 1), Y)
            predictions = get_predictions(outputs)
            accuracy = (Y == predictions).sum().item() / reduce(lambda x,y: x*y, Y.size())

            # backward pass
            model.zero_grad()
            loss.backward()
            optimizer.step()

            # clip gradients to prevent them form exploding
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm)

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size/float(t2-t1)

            if step % config.print_every == 0:

                print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                      "Accuracy = {:.2f}, Loss = {:.3f}".format(
                        datetime.now().strftime("%Y-%m-%d %H:%M"), epoch*step,
                        config.train_steps, config.batch_size, examples_per_second,
                        accuracy, loss
                ))

        torch.save(model, 'grimm/grimm_epoch_{}.pt'.format(epoch))

    # _ = xp.to_zip(experiment_label + ".zip")
    print('Done training.')
예제 #21
0
def train(config):

    if config.tensorboard:
        writer = SummaryWriter(config.summary +
                               datetime.now().strftime("%Y%m%d-%H%M%S"))
    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)  # fixme
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size,
                                config.seq_length,
                                dataset.vocab_size,
                                lstm_num_hidden=config.lstm_num_hidden,
                                lstm_num_layers=config.lstm_num_layers,
                                device=config.device)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    lr=config.learning_rate)
    for epoch in range(config.epochs):
        for step, (batch_inputs, batch_targets) in enumerate(data_loader):

            # Only for time measurement of step through network
            t1 = time.time()

            #######################################################
            # Add more code here ...
            #######################################################
            optimizer.zero_grad()
            # Set to float LongTensor output dtype of one_hot produces internal error for forward
            batch_inputs = torch.nn.functional.one_hot(
                batch_inputs,
                num_classes=dataset.vocab_size).float().to(device)

            batch_targets = batch_targets.to(device)
            out, _ = model.forward(batch_inputs)

            #Expected size 64 x 87 x 30 got 64 x 30 x 87 to compute with 64 x 30
            loss = criterion(out.permute(0, 2, 1), batch_targets)
            loss.backward()

            torch.nn.utils.clip_grad_norm(model.parameters(),
                                          max_norm=config.max_norm)
            optimizer.step()

            predictions = out.argmax(dim=-1)
            accuracy = (predictions == batch_targets).float().mean()

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size / float(t2 - t1)

            if step % config.print_every == 0:

                print(
                    "[{}] Train Step {:04d}/{:04d}, Epoch {:d} Batch Size = {}, Examples/Sec = {:.2f}, "
                    "Accuracy = {:.2f}, Loss = {:.3f}".format(
                        datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                        int(config.train_steps), epoch, config.batch_size,
                        examples_per_second, accuracy, loss))
                if config.tensorboard:
                    writer.add_scalar('training_loss', loss, step)
                    writer.add_scalar('accuracy', accuracy, step)

            if step % config.sample_every == 0:
                # Generate some sentences by sampling from the model
                # print(f'shape state {state[1].shape}')
                # sys.exit(0)
                generate_sentence(step, model, config, dataset)
                # pass

            if step == config.train_steps:
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                print('Done training.')
                break
예제 #22
0
def train(config):
    tf.reset_default_graph()
    # Initialize the text dataset
    dataset = TextDataset(config.txt_file, config.clean_data)

    # Initialize the model
    model = TextGenerationModel(batch_size=config.batch_size,
                                seq_length=config.seq_length,
                                vocabulary_size=dataset.vocab_size,
                                lstm_num_hidden=config.lstm_num_hidden,
                                lstm_num_layers=config.lstm_num_layers,
                                embed_dim=config.embed_dim,
                                decoding_model=config.decoding_mode)

    ###########################################################################
    # Implement code here.
    ###########################################################################

    warmup_seq = tf.placeholder(dtype=tf.int32,
                                shape=(None, 1),
                                name='warmup_decoding_sequences')
    warmup_decodes = model.decode_warmup(warmup_seq, config.decode_length)

    init_decode_char = tf.placeholder(dtype=tf.int32,
                                      shape=(config.num_rand_samples),
                                      name='rand_init_decoding')
    random_decodes = model.decode(decode_batch_size=config.num_rand_samples,
                                  init_input=init_decode_char,
                                  decode_length=config.decode_length,
                                  init_state=None)

    # Reproducibility
    # tf.set_random_seed(42)
    # np.random.seed(42)

    # Utility vars and ops
    gpu_opts = tf.GPUOptions(
        per_process_gpu_memory_fraction=config.gpu_mem_frac, allow_growth=True)
    session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_opts))
    global_step = tf.Variable(0, trainable=False, name='global_step')

    # logging
    train_logdir = os.path.join(config.summary_path,
                                '{}_train'.format(config.model_name))
    train_log_writer = init_summary_writer(session, train_logdir)

    # Define the optimizer
    if config.optimizer.lower() == 'rmsprop':
        optimizer = tf.train.RMSPropOptimizer(
            learning_rate=config.learning_rate,
            decay=config.learning_rate_decay)
    elif config.optimizer.lower() == 'adam':
        optimizer = tf.train.AdamOptimizer(config.learning_rate)

    # Compute the gradients for each variable
    grads_and_vars = optimizer.compute_gradients(model.loss)
    grads, variables = zip(*grads_and_vars)
    grads_clipped, _ = tf.clip_by_global_norm(
        grads, clip_norm=config.max_norm_gradient)
    apply_gradients_op = optimizer.apply_gradients(zip(grads_clipped,
                                                       variables),
                                                   global_step=global_step)
    saver = tf.train.Saver(max_to_keep=50)
    save_path = os.path.join(config.checkpoint_path,
                             '{}/model.ckpt'.format(config.model_name))
    _ensure_path_exists(save_path)

    # Summaries
    summary_op = tf.summary.merge_all()
    session.run(fetches=[
        tf.global_variables_initializer(),
        tf.local_variables_initializer()
    ])

    for train_step in range(int(config.train_steps)):

        # dim: [batch_size, time_step]
        batch_inputs, batch_labels = dataset.batch(
            batch_size=config.batch_size, seq_length=config.seq_length)

        # Time-major: [time_step, batch_size]
        batch_inputs = batch_inputs.T

        # Only for time measurement of step through network
        t1 = time.time()

        #######################################################################
        # Implement code here
        #######################################################################
        train_feed = {model.inputs: batch_inputs, model.labels: batch_labels}
        fetches = [model.loss, apply_gradients_op]
        if train_step % config.print_every == 0:
            fetches += [summary_op]
            loss, _, summary = session.run(feed_dict=train_feed,
                                           fetches=fetches)
            train_log_writer.add_summary(summary, train_step)
        else:
            loss, _ = session.run(feed_dict=train_feed, fetches=fetches)

        # Only for time measurement of step through network
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        # Output the training progress
        if train_step % config.print_every == 0:
            print(
                "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, Loss = {}"
                .format(datetime.now().strftime("%Y-%m-%d %H:%M"),
                        train_step + 1, int(config.train_steps),
                        config.batch_size, examples_per_second, loss))

        # Decode
        if train_step % config.sample_every == 0:
            # warmup_seq = tf.placeholder(dtype=tf.int32, shape=(None, 5), name='warmup_decoding_sequences')
            # decoded_seqs = model.decode_warmup(warmup_seq, config.decode_length)
            #
            # init_decode_char = tf.placeholder(dtype=tf.int32, shape=(config.num_rand_samples),
            #                                   name='rand_init_decoding')
            # random_decodes = model.decode(decode_batch_size=config.num_rand_samples, init_input=init_decode_char,
            #                               decode_length=config.decode_length, init_state=None)

            # random character sampling
            print('Random character sampling')
            rand_chars = np.random.choice(a=dataset.vocab_size,
                                          size=(config.num_rand_samples))
            decode_feed = {init_decode_char: rand_chars}
            decoded_tokens = session.run(fetches=[random_decodes],
                                         feed_dict=decode_feed)[0]
            decoded_tokens = np.array(decoded_tokens).T
            for i in range(decoded_tokens.shape[0]):
                print('{}|{}'.format(
                    dataset._ix_to_char[rand_chars[i]],
                    dataset.convert_to_string(decoded_tokens[i, :])))

            print('Warmup sequence sampling')
            warmups = [
                'Welcome to the planet Earth ',
                'Human beings grew up in forests ', 'Satan said ',
                'God is not ', 'theory of evolution ',
                'whole groups of species '
            ]

            for warmup in warmups:
                warmup_tokens = np.array([
                    dataset._char_to_ix[x] for x in warmup.lower()
                    if x in dataset._char_to_ix
                ]).reshape((-1, 1))
                feed = {warmup_seq: warmup_tokens}
                decoded_tokens = session.run(fetches=[warmup_decodes],
                                             feed_dict=feed)[0]
                print('{}|{}'.format(
                    warmup,
                    dataset.convert_to_string(
                        decoded_tokens.squeeze().tolist())))

        if train_step % config.checkpoint_every == 0:
            saver.save(session, save_path=save_path)

    train_log_writer.close()
예제 #23
0
def train(config):

    # Print all configs to confirm parameter settings
    print_flags()
    assert config.sampling_method in ('greedy', 'random')
    assert config.generate_mode in ('generate', 'finish')

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(filename=config.txt_file,
                          seq_length=config.seq_length)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Initialize the model that we are going to use
    model = TextGenerationModel(batch_size=config.batch_size,
                                seq_length=config.seq_length,
                                vocabulary_size=dataset.vocab_size,
                                dropout=1-config.dropout_keep_prob,
                                lstm_num_hidden=config.lstm_num_hidden,
                                lstm_num_layers=config.lstm_num_layers,
                                device=device)
    model.to(device)

    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)
    epoch = 10

    # Store some measures
    los = list()
    iteration = list()
    acc = list()
    max_step = 0

    for i in range(epoch):
      for step, (batch_inputs, batch_targets) in enumerate(data_loader):

          # Only for time measurement of step through network
          t1 = time.time()

          model.train()
          optimizer.zero_grad()

          batch_inputs = torch.stack(batch_inputs).to(device)
          batch_targets = torch.stack(batch_targets).to(device)

          h_0 = torch.zeros(config.lstm_num_layers, batch_inputs.shape[1], config.lstm_num_hidden).to(device)
          c_0 = torch.zeros(config.lstm_num_layers, batch_inputs.shape[1], config.lstm_num_hidden).to(device)

          pred, _, _ = model(batch_inputs, h_0, c_0)
          accuracy = compute_accuracy(pred, batch_targets)
          pred = pred.permute(1, 2, 0)
          batch_targets = batch_targets.permute(1, 0)
          loss = criterion(pred, batch_targets)
          loss.backward()
          torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=config.max_norm)
          optimizer.step()

          # Just for time measurement
          t2 = time.time()
          examples_per_second = config.batch_size/float(t2-t1)

          if (step + i * max_step) % config.print_every == 0:

              print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                    "Accuracy = {:.2f}, Loss = {:.3f}".format(
                      datetime.now().strftime("%Y-%m-%d %H:%M"), step + i * max_step,
                      int(config.train_steps), config.batch_size, examples_per_second,
                      accuracy, loss
              ))
              iteration.append(step + i * max_step)
              acc.append(accuracy)
              los.append(loss)
              if max_step < step:
                max_step = step

          if (step + i * max_step) % config.sample_every == 0:
              model.eval()
              batch_sample = 5
              if config.generate_mode == 'finish':
                generated = [dataset._char_to_ix[c] for c in config.input_seq]
                generated = torch.LongTensor(generated).view(-1, 1).to(device)
                for l in range(config.generate_length):
                  if l == 0:
                    h_s = torch.zeros(config.lstm_num_layers, 1, config.lstm_num_hidden).to(device)
                    c_s = torch.zeros(config.lstm_num_layers, 1, config.lstm_num_hidden).to(device)
                    gen, h_s, c_s = model(generated, h_s, c_s)
                    gen = torch.unsqueeze(gen[-1], 0)
                  else:
                    gen, h_s, c_s = model(gen, h_s, c_s)
                  if config.sampling_method == 'greedy':
                    gen = gen.argmax(dim=2)
                  else:
                    gen = nn.functional.softmax(gen/config.temperature, dim=2)
                    dist = torch.distributions.categorical.Categorical(gen)
                    gen = dist.sample()
                  generated = torch.cat((generated, gen))
              else:
                generated = [dataset._char_to_ix[random.choice(dataset._chars)] for c in range(batch_sample)]
                generated = torch.LongTensor(generated).view(-1, batch_sample).to(device)
                for l in range(config.generate_length - 1):
                  if l == 0:
                    h_s = torch.zeros(config.lstm_num_layers, batch_sample, config.lstm_num_hidden).to(device)
                    c_s = torch.zeros(config.lstm_num_layers, batch_sample, config.lstm_num_hidden).to(device)
                    gen, h_s, c_s = model(generated, h_s, c_s)
                  else:
                    gen, h_s, c_s = model(gen, h_s, c_s)
                  if config.sampling_method == 'greedy':
                    gen = gen.argmax(dim=2)
                  else:
                    gen = nn.functional.softmax(gen/config.temperature, dim=2)
                    dist = torch.distributions.categorical.Categorical(gen)
                    gen = dist.sample()
                  generated = torch.cat((generated, gen))
              generated = generated.t()
              sentence = [dataset.convert_to_string(idx) for idx in generated.tolist()]
              if config.sampling_method == 'random':
                with open('{}/{}_{}_{}_{}.txt'.format(config.summary_path, config.generate_mode, datetime.now().strftime("%Y-%m-%d"), config.sampling_method, config.temperature), 'a', encoding='utf-8') as file:
                  file.write('--------------\n')
                  file.write('Training Step: {}\n'.format(step + i * max_step))
                  file.write('--------------\n')
                  for sen in sentence:
                    file.write('{}\n'.format(sen))
                  file.write('\n')
                  file.close()   
              else:
                with open('{}/{}_{}_{}.txt'.format(config.summary_path, config.generate_mode, datetime.now().strftime("%Y-%m-%d"), config.sampling_method), 'a', encoding='utf-8') as file:
                  file.write('--------------\n')
                  file.write('Training Step: {}\n'.format(step + i * max_step))
                  file.write('--------------\n')
                  for sen in sentence:
                    file.write('{}\n'.format(sen))
                  file.write('\n')
                  file.close()

          if (step + i * max_step) == config.train_steps:
              # If you receive a PyTorch data-loader error, check this bug report:
              # https://github.com/pytorch/pytorch/pull/9655
              break

      if (step + i * max_step) == config.train_steps:
        break

    print('Done training.')
    fig, axs = plt.subplots(1, 2, figsize=(10,5))
    axs[0].plot(iteration, acc)
    axs[0].set_xlabel('Iteration')
    axs[0].set_ylabel('Accuracy')
    axs[1].plot(iteration, los)
    axs[1].set_xlabel('Iteration')
    axs[1].set_ylabel('Loss')
    fig.tight_layout()
    plt.show()
예제 #24
0
def train(config):

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)  # fixme
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)
    test_loader = DataLoader(dataset, config.test_size, num_workers=1)

    results = open(config.out_file, "w+")
    results.write(
        "#model_type   : {}-layer LSTM\n#seq_length   : {}\n#input_dim    : {}\n#num_classes  : {}\n#num_hidden   :\
 {}\n#batch_size   : {}\n#learn_rate   : {}\n#train_steps  : {}\n#max_norm     : {}\n#lr_decay     : {}\n#lr_step      :\
 {}\n".format(config.lstm_num_layers, config.seq_length, dataset.vocab_size,
              dataset.vocab_size, config.lstm_num_hidden, config.batch_size,
              config.learning_rate, config.train_steps, config.max_norm,
              config.learning_rate_decay, config.learning_rate_step))
    results.write("#train_step accuracy loss\n")
    gen_text = open(config.out_file[:-4] + ".txt", 'w+', encoding="utf-8")

    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size,
                                config.seq_length,
                                dataset.vocab_size,
                                lstm_num_hidden=config.lstm_num_hidden,
                                lstm_num_layers=config.lstm_num_layers,
                                device=device).to(device)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()

    #train
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    lr=config.learning_rate)

    prevstep = 0
    while True:  #otherwise it stop after 1 epoch
        for step, (batch_inputs, batch_targets) in enumerate(data_loader):
            step = prevstep + step
            batch_inputs = torch.nn.functional.one_hot(
                batch_inputs.type(torch.LongTensor),
                dataset.vocab_size).type(torch.FloatTensor).to(device)
            batch_targets = batch_targets.to(device)

            optimizer.zero_grad()

            batch_y = model(batch_inputs)  #without softmax, dim: B x T x C

            #prevent gradients from exploding, not sure if still necessary
            torch.nn.utils.clip_grad_norm(model.parameters(),
                                          max_norm=config.max_norm)

            # Only for time measurement of step through network
            t1 = time.time()

            loss = criterion(batch_y.transpose(1, 2), batch_targets)

            loss.backward()
            optimizer.step(
            ) if step > 0 else 0  #to be able to test initial model

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size / float(t2 - t1)

            if step % config.print_every == 0:

                predictions = torch.argmax(torch.softmax(batch_y, 2), 2)
                accuracy = torch.sum(predictions == batch_targets).type(
                    torch.FloatTensor) / config.batch_size / config.seq_length

                #            #uncomment for printing
                #            print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                #               "Accuracy = {:.2f}, Loss = {:.3f}".format(
                #                 datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                #                 int(config.train_steps), config.batch_size, examples_per_second,
                #                 accuracy, loss))

                #writing results
                results.write("%d %.3f %.3f\n" % (step, accuracy, loss))

            optimizer.step() if step == 0 else 0

            if np.round(accuracy, 2) == 1.00:
                print("Achieved >99.95% accuracy.")
                break

            if step % config.sample_every == 0:

                gen_text.write("--- Step: {} ---\n".format(step))
                with torch.no_grad():
                    #get random char from alphabet
                    rnd_char = np.random.choice(list(map(chr,
                                                         range(97,
                                                               123)))).upper()
                    prev = torch.zeros(dataset.vocab_size).to(device)
                    prev[dataset._chars.index(rnd_char)] = 1
                    prev = prev.view(1, 1, -1)  #dim: B x T x D
                    #feed to network, maybe a bit redundant
                    for i in range(config.out_seq - 1):
                        gen_y = model(prev)  #dim: B x T x C
                        char = torch.zeros(dataset.vocab_size).to(device)
                        softm = torch.softmax(
                            config.temp * gen_y[0, -1, :],
                            0).squeeze()  #temperature included
                        #                       char[np.random.choice(np.arange(dataset.vocab_size),p=np.array(softm.cpu()))] = 1
                        char[torch.argmax(
                            softm
                        )] = 1  #greedy, uncomment prev line for random
                        prev = torch.cat([prev, char.view(1, 1, -1)], 1)
                    txt = dataset.convert_to_string(
                        torch.argmax(prev, 2).squeeze().cpu())
                    gen_text.write(txt + "\n\n")

            if step == config.train_steps:
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                break
        prevstep = step
        if np.round(accuracy, 2) == 1.00 or step == config.train_steps:
            break

    print('Done training.')

    #Saving model doesn't work
    #hard-coding temperatures as solution
    with torch.no_grad():
        length = 500
        gen_text.write("--- Greedy ---\n")
        #get random char from alphabet
        rnd_char = np.random.choice(list(map(chr, range(97, 123)))).upper()
        prev = torch.zeros(dataset.vocab_size).to(device)
        prev[dataset._chars.index(rnd_char)] = 1
        prev = prev.view(1, 1, -1)  #dim: B x T x D
        #feed to network, maybe a bit redundant
        for i in range(length - 1):
            gen_y = model(prev)  #dim: B x T x C
            char = torch.zeros(dataset.vocab_size).to(device)
            softm = torch.softmax(config.temp * gen_y[0, -1, :],
                                  0).squeeze()  #temperature included
            #             char[np.random.choice(np.arange(dataset.vocab_size),p=np.array(softm.cpu()))] = 1
            char[torch.argmax(softm)] = 1  #greedy
            prev = torch.cat([prev, char.view(1, 1, -1)], 1)
        txt = dataset.convert_to_string(torch.argmax(prev, 2).squeeze().cpu())
        gen_text.write(txt + "\n\n")
        for t in [0.5, 1.0, 2.0]:
            gen_text.write("--- Temperature: {} ---\n".format(t))
            #get random char from alphabet
            rnd_char = np.random.choice(list(map(chr, range(97, 123)))).upper()
            prev = torch.zeros(dataset.vocab_size).to(device)
            prev[dataset._chars.index(rnd_char)] = 1
            prev = prev.view(1, 1, -1)  #dim: B x T x D
            #feed to network, maybe a bit redundant
            for i in range(length - 1):
                gen_y = model(prev)  #dim: B x T x C
                char = torch.zeros(dataset.vocab_size).to(device)
                softm = torch.softmax(t * gen_y[0, -1, :],
                                      0).squeeze()  #temperature included
                char[np.random.choice(np.arange(dataset.vocab_size),
                                      p=np.array(softm.cpu()))] = 1
                #                 char[torch.argmax(softm)] = 1 #greedy
                prev = torch.cat([prev, char.view(1, 1, -1)], 1)
            txt = dataset.convert_to_string(
                torch.argmax(prev, 2).squeeze().cpu())
            gen_text.write(txt + "\n\n")

            gen_text.write("--- Temperature: {}. Finish ---\n".format(t))
            finish = "Sleeping beauty is "
            prev = torch.zeros(1, len(finish), dataset.vocab_size).to(device)
            for i, s in enumerate(finish):
                prev[0, i, dataset._chars.index(s)] = 1
            for i in range(length - len(finish)):
                gen_y = model(prev)  #dim: B x T x C
                char = torch.zeros(dataset.vocab_size).to(device)
                softm = torch.softmax(t * gen_y[0, -1, :],
                                      0).squeeze()  #temperature included
                char[np.random.choice(np.arange(dataset.vocab_size),
                                      p=np.array(softm.cpu()))] = 1
                #                 char[torch.argmax(softm)] = 1 #greedy
                prev = torch.cat([prev, char.view(1, 1, -1)], 1)
            txt = dataset.convert_to_string(
                torch.argmax(prev, 2).squeeze().cpu())
            gen_text.write(txt + "\n\n")

    results.close()
    gen_text.close()
예제 #25
0
def train(config):
    
    
    # Initialize the device which to run the model on
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)   # fixme
    data_loader = DataLoader(dataset, batch_size = config.batch_size, shuffle=True, num_workers=1)
    vocab_size = dataset.vocab_size
    # char2i = dataset._char_to_ix
    # i2char = dataset._ix_to_char
    # ----------------------------------------
    
    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size, config.seq_length, vocab_size, \
                                config.lstm_num_hidden, config.lstm_num_layers, device)  # fixme
    model.to(device)

    # Setup the loss and optimizer
    criterion = nn.NLLLoss()  # fixme
    optimizer = optim.RMSprop(model.parameters(), lr = config.learning_rate)  # fixme
    logSoftmax = nn.LogSoftmax(dim=2)
    
    # Learning rate scheduler
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, \
                  step_size=config.learning_rate_step, gamma=config.learning_rate_decay)
    step = 1
    
    if config.resume:
        if os.path.isfile(config.resume):
            print("Loading checkpoint '{}'".format(config.resume))
            checkpoint = torch.load(config.resume)
            step = checkpoint['step']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
            print("Checkpoint loaded '{}', steps {}".format(config.resume, checkpoint['step']))

    if not os.path.isdir(config.summary_path):
            os.makedirs(config.summary_path)

    if config.sampling =="greedy":
        
        f = open(os.path.join(config.summary_path,"sampled_"+config.sampling+".txt"), "w+")
    else:
        f = open(os.path.join(config.summary_path,"sampled_"+config.sampling+"_"+str(config.temp)+".txt"), "w+")



    
   
    best_accuracy = 0.0
    pl_loss =[]
    average_loss =[]
    acc =[]

    for epochs in range(30):

        if step == config.train_steps:
            print('Done training.')
            break

        for (batch_inputs, batch_targets) in data_loader:

            if config.batch_size!=batch_inputs.size()[0]:
                print("batch mismatch")
                break

            # Only for time measurement of step through network
            t1 = time.time()
            model.hidden = model.init_hidden(config.batch_size)

            model.zero_grad()
            #######################################################
            # Add more code here ...
            
            #convert batch inputs to one-hot vector
            batch_inputs= torch.zeros(config.batch_size, config.seq_length, vocab_size).scatter_(2,batch_inputs.unsqueeze(-1),1.0)
            
            batch_inputs, batch_targets = batch_inputs.to(device), batch_targets.to(device)

            predictions, _ = model(batch_inputs)
            if config.sampling=="greedy":
                predictions = logSoftmax(predictions)
            else:
                predictions = logSoftmax(predictions/config.temp)

            loss = criterion(predictions.transpose(2,1), batch_targets)   # fixme

            _, predictions = torch.max(predictions, dim=2, keepdim=True)
            predictions = (predictions.squeeze(-1) == batch_targets).float()
            accuracy = torch.mean(predictions)
            
            
            
            loss.backward()
            torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm)
            
            optimizer.step()
            lr_scheduler.step()

            #######################################################

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size/float(t2-t1)
            pl_loss.append(loss.item())
            average_loss.append(np.mean(pl_loss[:-100:-1]))
            acc.append(accuracy)


            if step % config.print_every == 0:

                print("[{}] Train Step {}/{}, Batch Size = {}, Examples/Sec = {:.2f}, "
                    "Accuracy = {:.2f}, Loss = {:.3f}".format(
                        datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                        config.train_steps, config.batch_size, examples_per_second,
                        accuracy, loss.item()
                ))
                
                

            if step % config.sample_every == 0:
                               
                model.eval()
               
                with torch.no_grad():
                   char_ix = generate_sample(model, vocab_size, config.seq_length, device, config)
                   sentence = dataset.convert_to_string(char_ix) 
                           
            
                f.write("--------------"+str(step)+"----------------\n")
                f.write(sentence+"\n")
                print(sentence)
                print()
                model.train()
                # ###########################################################################
                # save training loss
                plt.plot(pl_loss,'r-', label="Batch loss", alpha=0.5)
                plt.plot(average_loss,'g-', label="Average loss", alpha=0.5)
                plt.legend()
                plt.xlabel("Iterations")
                plt.ylabel("Loss")  
                plt.title("Training Loss")
                plt.grid(True)
                # plt.show()
                if config.sampling == "greedy":
                    plt.savefig("loss_"+config.sampling+".png")
                else:
                    plt.savefig("loss_"+config.sampling+"_"+str(config.temp)+".png")

                plt.close()
                ################################training##################################################
                plt.plot(acc,'g-', alpha=0.5)
                plt.xlabel("Iterations")
                plt.ylabel("Accuracy")
                plt.title("Train Accuracy")
                plt.grid(True)
                if config.sampling == "greedy":
                    plt.savefig("accuracy_"+config.sampling+".png")
                else:
                    plt.savefig("accuracy_"+config.sampling+"_"+str(config.temp)+".png")
                plt.close()

            if step == config.train_steps:
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                break
            
            step+=1
            
        save_checkpoint({
            'epoch': epochs + 1,
            'step': step,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'lr_scheduler':lr_scheduler.state_dict(),
            'accuracy': accuracy
                }, config)
        
    f.close()
예제 #26
0
def train(config):

    if not os.path.isdir(CHECKPOINTS_FOLDER):
        os.mkdir(CHECKPOINTS_FOLDER)

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length,
                          config.batch_size, config.train_steps)
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size, config.seq_length,
                                dataset.vocab_size).to(device=device)

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=config.learning_rate)

    generated_sentences = []

    for step, (batch_inputs, batch_targets) in enumerate(data_loader):
        # Only for time measurement of step through network
        t1 = time.time()

        optimizer.zero_grad()

        batch_inputs = torch.unsqueeze(torch.stack(batch_inputs),
                                       2).float().to(device=device)
        batch_targets = torch.cat(batch_targets).to(device=device)

        predictions = model(batch_inputs, config.batch_size)
        torch.nn.utils.clip_grad_norm_(model.parameters(),
                                       max_norm=config.max_norm)

        loss = criterion(predictions, batch_targets)
        accuracy = get_accuracy(predictions, batch_targets)

        loss.backward()
        optimizer.step()

        # Just for time measurement
        t2 = time.time()
        examples_per_second = config.batch_size / float(t2 - t1)

        if step % config.print_every == 0:
            print(
                "[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                "Accuracy = {:.2f}, Loss = {:.3f}".format(
                    datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                    int(config.train_steps), config.batch_size,
                    examples_per_second, accuracy, loss))

        if step % config.sample_every == 0:
            # Generate some sentences by sampling from the model
            sentence = generate_sentence(model, dataset, config)
            generated_sentences.append(sentence)

    state = {
        'state_dict': model.state_dict(),
        'optimizer': optimizer.state_dict(),
    }
    torch.save(
        state, 'checkpoints/{}'.format(
            config.txt_file.split("/", 1)[1].replace('.txt', '')))

    filename = config.txt_file.replace('.txt', '') + 'generated_sentences.txt'
    f = open(filename, 'w')
    output_string = '\n'.join(generated_sentences)
    f.write(output_string)

    print('Done training.')
예제 #27
0
def train(config):

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)  # fixme
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Save the instantiated dataset.
    with open('model_ckpt/train.dataset', 'wb') as dataset_file:
        pickle.dump(dataset, dataset_file)

    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size, config.seq_length, dataset.vocab_size, config.lstm_num_hidden,
                                config.lstm_num_layers, device, config.dropout_keep_prob)  # fixme

    # Setup the loss and optimizer
    criterion = nn.CrossEntropyLoss()  # reduction='mean'(default) - average over all timesteps and all batches as they are merged.
    optimizer = optim.RMSprop(model.parameters(), config.learning_rate)  # fixme
    # optimizer = optim.Adam(model.parameters(), config.learning_rate)

    # Create a tensor to hold the one-hot encoding for the batch inputs.
    onehot_batch_inputs = torch.FloatTensor(config.seq_length, config.batch_size, dataset.vocab_size)
    onehot_batch_inputs = onehot_batch_inputs.to(device)

    h_init = torch.zeros(config.lstm_num_layers, config.batch_size, config.lstm_num_hidden, device=device)
    c_init = torch.zeros(config.lstm_num_layers, config.batch_size, config.lstm_num_hidden, device=device)

    # Record the learning rate steps individually for learning rate decay.
    lr_step = 0
    lr = 1
    for epoch in np.arange(config.epochs):
        losses = []
        accs = []
        for step, (batch_inputs, batch_targets) in enumerate(data_loader):

            # Only for time measurement of step through network
            t1 = time.time()

            #######################################################
            # Add more code here ...
            #######################################################
            model.train()
            # Convert the DataLoader output from list of tensors to tensors.
            batch_inputs = torch.stack(batch_inputs)
            batch_inputs = batch_inputs.to(device)

            # If the epoch is finished and there is not enough character to extract, break the loop
            if batch_inputs.shape[0] * batch_inputs.shape[1] != onehot_batch_inputs.shape[0] * onehot_batch_inputs.shape[1]:
                break

            # Zero the one-hot encoding and encode according to batch_inputs.
            onehot_batch_inputs.zero_()
            onehot_batch_inputs.scatter_(2, batch_inputs.unsqueeze_(-1), 1)

            # Convert the DataLoader output from list of tensors to tensors.
            batch_targets = torch.stack(batch_targets)
            batch_targets = batch_targets.to(device)

            # Learning rate decay.
            if lr_step % config.learning_rate_step == 0:
                optimizer = optim.RMSprop(model.parameters(), config.learning_rate * lr)
                lr *= config.learning_rate_decay

            optimizer.zero_grad()
            logits, _, _ = model(onehot_batch_inputs, h_init, c_init)
            # The seq_length dimension and batch_size dimension of the logits and batch_targets are merged together, and the mean is computed over this new dimension.
            loss = criterion(logits.view(-1, dataset.vocab_size), batch_targets.view(-1))   # fixme
            loss.backward()

            torch.nn.utils.clip_grad_norm(model.parameters(), max_norm=config.max_norm)

            accuracy = accuracy_fn(logits.view(-1, dataset.vocab_size), batch_targets.view(-1))  # fixme
            optimizer.step()

            losses.append(loss.item())
            accs.append(accuracy)

            # Just for time measurement
            t2 = time.time()
            examples_per_second = config.batch_size/float(t2-t1)

            if step % config.print_every == 0:
                print("[{}] Epoch {}/{}, Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                      "Accuracy = {:.2f}, Loss = {:.3f}".format(
                          datetime.now().strftime("%Y-%m-%d %H:%M"), epoch + 1, config.epochs, step,
                          config.train_steps, config.batch_size, examples_per_second,
                          accuracy, loss
                      ))

            if step % config.sample_every == 0:
                # Generate some sentences by sampling from the model
                model.eval()
                # Create tensor to hold the generated samples.
                samples = torch.zeros((5, config.sample_length), dtype=torch.int, device=device)
                # Initialize the first characters for the samples.
                start_chars = torch.randint(dataset.vocab_size, size=(1, 5, 1), dtype=torch.long, device=device)
                samples[:, 0] = start_chars.squeeze()
                # Create a tensor to hold the one-hot encoding for the output characters of the LSTM network (one per each time step).
                onehot_chars = torch.zeros((1, 5, dataset.vocab_size), device=device)
                onehot_chars.scatter_(2, start_chars, 1)

                last_h = torch.zeros(config.lstm_num_layers, 5, config.lstm_num_hidden, device=device)
                last_c = torch.zeros(config.lstm_num_layers, 5, config.lstm_num_hidden, device=device)
                for t in np.arange(config.sample_length - 1):
                    logits, last_h, last_c = model(onehot_chars, last_h, last_c)
                    next_chars = logits.squeeze().argmax(-1)
                    onehot_chars.zero_()
                    onehot_chars.scatter_(2, next_chars.view(1, 5, 1), 1)
                    samples[:, t + 1] = next_chars

                samples = samples.tolist()
                samples = [dataset.convert_to_string(sample) for sample in samples]
                # Output the samples into a text file.
                with open(config.summary_path + 'samples.txt', 'a') as txt_file:
                    txt_file.write('Epoch: {}\nStep: {}\n'.format(epoch + 1, step))
                    txt_file.writelines(map(lambda x: x + '\n', samples))

            if step == config.train_steps:
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                break

            lr_step += 1
        # After each training epoch, save the model and the training loss and accuracy.
        model.train()
        torch.save(model.state_dict(), 'model_ckpt/lstm_gen_epoch{}.ckpt'.format(epoch + 1))
        with open(config.summary_path + 'train_epoch{}.csv'.format(epoch + 1), 'w', newline='') as csv_file:
            csv_writer = csv.writer(csv_file)
            csv_writer.writerow(losses)
            csv_writer.writerow(accs)

    print('Done training.')
예제 #28
0
def evaluate(config):

    # Initialize the device which to run the model on
    device = torch.device(config.device)

    # Load the dataset
    with open(config.dataset, 'rb') as dataset_file:
        dataset = pickle.load(dataset_file)

    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size, config.seq_length,
                                dataset.vocab_size, config.lstm_num_hidden,
                                config.lstm_num_layers, device,
                                config.dropout_keep_prob)  # fixme
    model.load_state_dict(torch.load(config.ckpt))

    # Generate some sentences by sampling from the model
    model.eval()
    # Create tensor to hold the generated samples.
    samples = torch.zeros((config.sample_batch_size, config.sample_length),
                          dtype=torch.int,
                          device=device,
                          requires_grad=False)

    last_h = torch.zeros(config.lstm_num_layers,
                         config.sample_batch_size,
                         config.lstm_num_hidden,
                         device=device,
                         requires_grad=False)
    last_c = torch.zeros(config.lstm_num_layers,
                         config.sample_batch_size,
                         config.lstm_num_hidden,
                         device=device,
                         requires_grad=False)

    if config.pre_text:
        pre_input = torch.tensor(
            [dataset._char_to_ix[ch] for ch in config.pre_text] * 10,
            device=device,
            requires_grad=False).view(config.sample_batch_size,
                                      -1).t().unsqueeze(-1)
        onehot_pre_input = torch.zeros(
            (pre_input.shape[0], pre_input.shape[1], dataset.vocab_size),
            device=device,
            requires_grad=False)
        onehot_pre_input.scatter_(2, pre_input, 1)
        logits, last_h, last_c = model(onehot_pre_input, last_h, last_c)
        logits = nn.functional.softmax(logits[-1, :, :].unsqueeze(-1) /
                                       config.temperature,
                                       dim=1)
        start_chars = logits.squeeze().argmax(-1)
        samples[:, 0] = start_chars
        onehot_chars = torch.zeros(
            (1, config.sample_batch_size, dataset.vocab_size),
            device=device,
            requires_grad=False)
        onehot_chars.scatter_(2,
                              start_chars.view(1, config.sample_batch_size, 1),
                              1)
    else:
        # Initialize the first characters for the samples.
        start_chars = torch.randint(dataset.vocab_size,
                                    size=(1, config.sample_batch_size, 1),
                                    dtype=torch.long,
                                    device=device,
                                    requires_grad=False)
        samples[:, 0] = start_chars.squeeze()
        # Create a tensor to hold the one-hot encoding for the output characters of the LSTM network (one per each time step).
        onehot_chars = torch.zeros(
            (1, config.sample_batch_size, dataset.vocab_size),
            device=device,
            requires_grad=False)
        onehot_chars.scatter_(2, start_chars, 1)

    for t in np.arange(config.sample_length - 1):
        logits, last_h, last_c = model(onehot_chars, last_h, last_c)
        logits = nn.functional.softmax(logits / config.temperature, dim=2)
        next_chars = logits.squeeze().argmax(-1)
        onehot_chars.zero_()
        onehot_chars.scatter_(2, next_chars.view(1, config.sample_batch_size,
                                                 1), 1)
        samples[:, t + 1] = next_chars

    samples = samples.tolist()
    samples = [dataset.convert_to_string(sample) for sample in samples]
    # Output the samples into a text file.
    with open(config.summary_path + 'samples.txt', 'a') as txt_file:
        txt_file.write('Temperature: {}\nSample length: {}\n'.format(
            config.temperature, config.sample_length))
        txt_file.writelines(map(lambda x: config.pre_text + x + '\n', samples))

    print('Done evaluation.')
예제 #29
0
def train(config, lr):

    # Initialize the device which to run the model on
    #device = torch.device(config.device)

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)  # fixme
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Initialize the model that we are going to use
    model = TextGenerationModel(
        batch_size=config.batch_size,
        seq_length=config.seq_length,
        vocabulary_size=dataset.vocab_size,
        lstm_num_hidden=config.lstm_num_hidden,
        lstm_num_layers=config.lstm_num_layers)  # fixme

    if torch.cuda.is_available():
        device = 'cuda'
    else:
        device = 'cpu'
    print('Currently using: ', device)

    model = model.to(device)
    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()  # fixme
    #optimizer = torch.optim.Adam(model.parameters(), lr = config.learning_rate, amsgrad = True)  # fixme
    #optimizer = torch.optim.Adam(model.parameters(), lr = lr, amsgrad = True)
    acc_list = []
    loss_list = []

    test_batches_in = []
    test_batches_ta = []

    test_acc = []

    best_accuracy = 0

    ### Flag for temperature
    temp = True
    temp_value = 2

    for runs in range(3):
        optimizer = torch.optim.RMSprop(model.parameters(), lr=lr)

        for step, (batch_inputs, batch_targets) in enumerate(data_loader):

            if step % config.print_every != 0 or step == 0:

                t1 = time.time()
                #print(type(step))

                #model.train()

                #######################################################
                torch.nn.utils.clip_grad_norm(model.parameters(),
                                              max_norm=config.max_norm)

                zerox = create_zerox(batch_inputs, dataset.vocab_size, device)

                output, _ = model.forward(zerox)  #.to(device)

                targets = torch.stack(batch_targets).to(device)

                output_indices = torch.argmax(output, dim=2).to(device)

                output = output.transpose(0, 1).transpose(1, 2).to(device)

                #print(output.shape, targets.shape)
                #return 'a'

                #print(output.transpose(0,2).shape, targets.t().shape)
                #return 'a'
                loss_for_backward = criterion(output.transpose(0, 2),
                                              targets.t()).to(device)

                optimizer.zero_grad()
                loss_for_backward.backward()
                optimizer.step()

                correct_indices = output_indices == targets.transpose(
                    0, 1).to(device)

                #return correct_indices
                #######################################################

                #loss = criterion.forward(output, targets)

                #accuracy = int(sum(sum(correct_indices)))/int(correct_indices.shape[0]*
                #correct_indices.shape[1])
                #print(type(accuracy),type(loss))
                # Just for time measurement
                t2 = time.time()
                examples_per_second = config.batch_size / float(t2 - t1)

            if step % config.print_every == 0 and step != 0:
                #model.eval()

                zerox = create_zerox(batch_inputs, dataset.vocab_size, device)

                output, _ = model.forward(zerox)

                output_indices = torch.argmax(output, dim=2).to(device)

                output = output.transpose(0, 1).transpose(1, 2).to(device)
                targets = torch.stack(batch_targets).to(device)

                #loss_for_backward = criterion(output,targets).to(device)
                loss_for_backward = criterion(output.transpose(0, 2),
                                              targets.t()).to(device)
                correct_indices = output_indices == targets.transpose(
                    0, 1)  #.to(device)
                #return output_indices, targets.transpose(0,1)

                #print(correct_indices.shape)
                #accuracy = sum(acc_list) / len(acc_list)
                #accuracy = int(sum(sum(correct_indices)))/int(correct_indices.numel())
                accuracy = np.array(correct_indices.detach().cpu()).mean()

                #print("[{}] Train Step {:04d}/{:f}, Batch Size = {}, Examples/Sec = {:.2f}, "
                #      "Accuracy = {:.2f}, Loss = {:.3f}".format(
                #        datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                #        config.train_steps, config.batch_size, examples_per_second,
                #        accuracy,
                #        loss_for_backward
                #))
                acc_list.append(accuracy)
                loss_list.append(float(loss_for_backward))

                if accuracy > best_accuracy:
                    torch.save(
                        {
                            'model_state_dict': model.state_dict(),
                            'optimizer_state_dict': optimizer.state_dict()
                        }, 'model.pth')

            if step % config.sample_every == 0:
                # Generate some sentences by sampling from the model
                ## Generate a good sample instead of the same one over and over again
                #model.eval()

                ### Append every modulo batch to a list of test batches and run
                ### over that list to test

                zerox = create_zerox(batch_inputs, dataset.vocab_size, device)

                test_batches_in.append(zerox)

                targets = torch.stack(batch_targets).to(device)

                test_batches_ta.append(targets)

                batch_inputz = torch.stack(batch_inputs).to(device)
                batch_input = batch_inputz.transpose(1, 0).to(device)

                output, _ = model.forward(zerox)  #.to(device)
                output_indices = torch.argmax(output, dim=2).to(device)
                output = output.transpose(0, 1).transpose(1, 2).to(device)

                loss_for_backward = criterion(output, targets).to(device)
                correct_indices = output_indices == targets.transpose(
                    0, 1).to(device)

                best_sample = np.argmax(
                    np.asarray(sum(correct_indices.t().detach().cpu())))
                print(
                    'Real: ',
                    dataset.convert_to_string(
                        np.asarray(batch_input[best_sample].cpu())))
                output, _ = model.forward(zerox)  #.to(device)
                output_indices = torch.argmax(output, dim=2).to(device)
                print(
                    'prediction: ',
                    dataset.convert_to_string(
                        np.asarray(output_indices[best_sample].cpu())))

                bc = int(sum(correct_indices.t().detach().cpu())
                         [best_sample]) / config.seq_length
                print('This sample had:', bc, 'characters right')

                output = np.random.randint(dataset.vocab_size)
                letters = [output]

                greedy_output = np.random.randint(dataset.vocab_size)
                greedy_letters = [greedy_output]

                Temperature_time(runs, step, dataset, device, model)
                for i in range(config.seq_length - 1):

                    #if temp:
                    # =============================================================================
                    #
                    #                         soft = torch.nn.Softmax(dim=2)
                    #
                    #
                    #
                    #
                    #                         zerol = torch.zeros([1,1,dataset.vocab_size])
                    #                         one_hot_letter = torch.tensor(output).unsqueeze(-1).unsqueeze(-1).unsqueeze(-1)
                    #                         zerol.scatter_(2,one_hot_letter,1)
                    #                         zerol = zerol.to(device)
                    #                         if i == 0:
                    #                             output, h = model.forward(zerol)
                    #
                    #                         else:
                    #                             output, h = model.forward(zerol, h)
                    #
                    #                         tempered = soft(output/temp_value)
                    #                         #print(tempered)
                    #                         output = int(torch.multinomial(tempered[0][0],1).detach().cpu())
                    #                         #print(output)
                    #                         letters.append(output)
                    # =============================================================================

                    greedy_zerol = torch.zeros([1, 1, dataset.vocab_size])
                    greedy_one_hot_letter = torch.tensor(
                        greedy_output).unsqueeze(-1).unsqueeze(-1).unsqueeze(
                            -1)
                    greedy_zerol.scatter_(2, greedy_one_hot_letter, 1)
                    greedy_zerol = greedy_zerol.to(device)

                    if i == 0:
                        greedy_output, greedy_h = model.forward(greedy_zerol)
                    else:
                        greedy_output, greedy_h = model.forward(
                            greedy_zerol, greedy_h)

                    greedy_output = int(
                        torch.argmax(greedy_output, dim=2).detach().cpu())
                    greedy_letters.append(greedy_output)

                print('Greedy Generation ',
                      dataset.convert_to_string(greedy_letters))
                abs_step = (runs * 10000) + step
                line = ' '.join(('Step:', str(abs_step),
                                 dataset.convert_to_string(letters)))

                with open('GreedyGeneration.txt', 'a') as file:
                    file.write(line + '\n')

    # =============================================================================
    #         if step % (config.sample_every*1000) ==0:
    #             avg = []
    #             print('Testing over ', len(test_batches_in), 'batches')
    #             for z in range(len(test_batches_in)):
    #                 ##OUTPUT
    #                 output,_ = model.forward(test_batches_in[z])
    #                 output_indices = torch.argmax(output, dim=2).to(device)
    #                 output =  output.transpose(0,1).transpose(1,2).to(device)
    #
    #                 ##LOSS AND ACCURACY
    #                 loss_for_backward = criterion(output,targets).to(device)
    #                 correct_indices = output_indices == test_batches_ta[z].transpose(0,1).to(device)
    #
    #                 accuracy = int(sum(sum(correct_indices)))/int(correct_indices.shape[0]*
    #                               correct_indices.shape[1])
    #
    #                 avg.append(accuracy)
    #
    #             this_test_acc = sum(avg)/len(avg)
    #             print('The test accuracy over ',len(test_batches_in), 'is: ', this_test_acc)
    #             test_acc.append(this_test_acc)
    #             #if bc > 0.8:
    #             #    print(bc)
    #             #    #return correct_indices
    #
    # =============================================================================
            if step == config.train_steps:
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                break
        print('Done training.')
        line = ' '.join(
            ('Test accuracy:', str(test_acc.append), 'Learning rate:', str(lr),
             'Accuracy:', str(acc_list), 'Loss:', str(loss_list)))
        with open('textresults.txt', 'a') as file:
            file.write(line + '\n')

        #hiddenstates = [None]*30
        output = np.random.randint(dataset.vocab_size)
        letters = [output]
        for i in range(400):
            zerol = torch.zeros([1, 1, dataset.vocab_size])
            one_hot_letter = torch.tensor(output).unsqueeze(-1).unsqueeze(
                -1).unsqueeze(-1)
            zerol.scatter_(2, one_hot_letter, 1)
            zerol = zerol.to(device)
            if i == 0:
                output, h = model.forward(zerol)

                output = int(torch.argmax(output, dim=2).detach().cpu())

                letters.append(output)
                #hiddenstates[i] = h
            else:
                output, h = model.forward(zerol, h)

                output = int(torch.argmax(output, dim=2).detach().cpu())

                letters.append(output)
                #hiddenstates[i % 30] = h
        print('Final generation: ', dataset.convert_to_string(letters))
    line = ' '.join(('Accuracy:', str(acc_list), 'Loss', str(loss_list)))
    with open('PrideAndPrejudice2.txt', 'a') as file:
        file.write(line + '\n')
예제 #30
0
def train(config):

    # Initialize the device which to run the model on
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda:0" if use_cuda else "cpu")

    #path to save the model
    path = "results/"

    # Initialize the dataset and data loader (note the +1)
    dataset = TextDataset(config.txt_file, config.seq_length)
    # print("Data file:", dataset._data[0:5])
    data_loader = DataLoader(dataset, config.batch_size, num_workers=1)

    # Initialize the model that we are going to use
    model = TextGenerationModel(config.batch_size, config.seq_length, dataset,
                                config.lstm_num_hidden, config.lstm_num_layers,
                                device)
    # model = torch.load("results/book_EN_grimms_fairy_tails_final_model.pt")

    # Setup the loss and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(),
                                    lr=config.learning_rate)

    # Store Accuracy and losses:
    results = {'accuracy': [], 'loss': []}

    # Training:
    total_steps = 0
    while total_steps <= config.train_steps:

        for step, (batch_inputs, batch_targets) in enumerate(data_loader):

            # Only for time measurement of step through network
            t1 = time.time()
            optimizer.zero_grad()

            # Stacking and One-hot encoding:
            batch_inputs = torch.stack(batch_inputs, dim=1).to(device)
            batch_targets = torch.stack(batch_targets, dim=1).to(device)
            # print("Inputs and targets:", x_onehot.size(), batch_targets.size())

            # forward inputs to the model:
            pred_targets, _ = model.forward(
                index_to_onehot(batch_inputs, dataset.vocab_size))
            # print("pred_targets trans shape:", pred_targets.transpose(2,1).size())
            loss = criterion(pred_targets.transpose(2, 1), batch_targets)

            #Backward pass
            loss.backward(retain_graph=True)
            optimizer.step()

            #Accuracy
            # argmax along the vocab dimension
            accuracy = (pred_targets.argmax(
                dim=2) == batch_targets).float().mean().item()

            #Update the accuracy and losses for visualization:
            results['accuracy'].append(accuracy)
            results['loss'].append(loss.item())

            # Just for time measurement
            t2 = time.time()
            # examples_per_second = config.batch_size/float(t2-t1)
            total_steps += 1

            if step % config.print_every == 0:

                # print("[{}] Train Step {:04d}/{:04d}, Batch Size = {}, Examples/Sec = {:.2f}, "
                #       "Accuracy = {:.2f}, Loss = {:.3f}".format(
                #         datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                #         config.train_steps, config.batch_size, examples_per_second,
                #         accuracy, loss
                # ))
                print("[{}] Train Step {:07d}/{:07d}, Batch Size = {}, "
                      "Accuracy = {:.2f}, Loss = {:.3f}".format(
                          datetime.now().strftime("%Y-%m-%d %H:%M"), step,
                          total_steps, config.batch_size,
                          results['accuracy'][-1], results['loss'][-1]))

            if step % config.sample_every == 0:
                # Generate some sentences by sampling from the model
                print('GENERATED NO TEMP:')
                print(model.generate_sentence(100))
                print('__________________')
                print('GENERATED 0.5 TEMP:')
                print(model.generate_sentence(100, 0.5))
                print('__________________')
                print('GENERATED 1 TEMP:')
                print(model.generate_sentence(100, 1))
                print('__________________')
                print('GENERATED 2 TEMP:')
                print(model.generate_sentence(100, 2))
                # save model for individual timesteps
                torch.save(
                    model, path + config.txt_file.split('/')[1].split('.')[0] +
                    str(step) + "_model.pt")

            if step == config.train_steps:
                # If you receive a PyTorch data-loader error, check this bug report:
                # https://github.com/pytorch/pytorch/pull/9655
                break

        print('Done training.')
        #Save the final model

        torch.save(
            model, path + config.txt_file.split('/')[1].split('.')[0] +
            "_final_model.pt")
        print("saving results in folder...")
        np.save(path + "loss_train", results['loss'])
        np.save(path + "accuracy_train", results['accuracy'])