def main():
    """ Main function

        Here, you should instantiate
        1) DataLoaders for training and validation. 
           Try SubsetRandomSampler to create these DataLoaders.
        3) model
        4) optimizer
        5) cost function: use torch.nn.CrossEntropyLoss

    """

    parser = argparse.ArgumentParser()
    parser.add_argument('--val_ratio',
                        type=float,
                        default=.5,
                        help='The ratio for valid set')
    parser.add_argument('--n_layers',
                        type=int,
                        default=4,
                        help='Number of stacked RNN layers')
    parser.add_argument('--n_hidden',
                        type=int,
                        default=512,
                        help='Number of hidden neurons of RNN cells')
    parser.add_argument('--drop_prob',
                        type=float,
                        default=0.1,
                        help='Dropout probability')
    parser.add_argument('--num_epochs',
                        type=int,
                        default=100,
                        help='The number of epochs')
    parser.add_argument('--lr',
                        type=float,
                        default=0.001,
                        help='Learning rate')
    parser.add_argument('--device',
                        type=str,
                        default='gpu',
                        help='For cpu: \'cpu\', for gpu: \'gpu\'')
    parser.add_argument('--batch_size',
                        type=int,
                        default=256,
                        help='Size of batches for training')
    parser.add_argument('--model_save_dir',
                        type=str,
                        default='../model',
                        help='Directory for saving model.')
    parser.add_argument('--results_save_dir',
                        type=str,
                        default='../results',
                        help='Directory for saving results.')
    parser.add_argument('--rnn',
                        type=bool,
                        default=True,
                        help='Train vanilla rnn model')
    parser.add_argument('--lstm',
                        type=bool,
                        default=True,
                        help='Train lstm model')
    parser.add_argument('--chunk_size',
                        type=int,
                        default=30,
                        help='Chunk size(sequence length)')
    parser.add_argument('--s_step', type=int, default=3, help='Sequence step')

    args = parser.parse_args()

    n_cpu = multiprocessing.cpu_count()

    if args.device == 'gpu':
        args.device = 'cuda'
    device = torch.device(args.device)

    chunk_size = args.chunk_size
    s_step = args.s_step
    num_epochs = args.num_epochs
    batch_size = args.batch_size
    val_ratio = args.val_ratio
    shuffle_dataset = True
    random_seed = 42

    datasets = dataset.Shakespeare('shakespeare_train.txt', chunk_size, s_step)

    dataset_size = len(datasets)
    indices = list(range(dataset_size))
    split = int(np.floor(val_ratio * dataset_size))

    if shuffle_dataset:
        np.random.seed(random_seed)
        np.random.shuffle(indices)

    train_indices, val_indices = indices[split:], indices[:split]

    train_sampler = SubsetRandomSampler(train_indices)
    valid_sampler = SubsetRandomSampler(val_indices)

    trn_loader = DataLoader(datasets,
                            batch_size=batch_size,
                            sampler=train_sampler,
                            num_workers=n_cpu)
    val_loader = DataLoader(datasets,
                            batch_size=batch_size,
                            sampler=valid_sampler,
                            num_workers=n_cpu)

    chars = datasets.chars

    print('-----Train Vanilla RNN Model-----')

    if args.rnn:

        model = CharRNN(chars, args).to(device)
        optimizer = Adam(model.parameters(), lr=args.lr)
        criterion = nn.CrossEntropyLoss()

        rnn_trn_loss, rnn_val_loss = [], []
        best_val_loss = np.inf

        for epoch in range(args.num_epochs):

            epoch_time = time.time()

            trn_loss = train(model, trn_loader, device, criterion, optimizer)
            val_loss = validate(model, val_loader, device, criterion)

            rnn_trn_loss.append(trn_loss)
            rnn_val_loss.append(val_loss)

            print('Epoch: %3s/%s...' % (epoch + 1, num_epochs),
                  'Train Loss: %.4f...' % trn_loss,
                  'Val Loss: %.4f...' % val_loss,
                  'Time: %.4f' % (time.time() - epoch_time))

            if val_loss < best_val_loss:
                best_val_loss = val_loss
                torch.save(model.state_dict(),
                           '%s/rnn.pt' % args.model_save_dir)

        value, idx = np.array(rnn_val_loss).min(), np.array(
            rnn_val_loss).argmin()
        plt.figure(figsize=(8, 6))
        plt.title('Vanilla RNN Model training and validation loss')
        plt.plot(np.arange(1, args.num_epochs + 1),
                 rnn_trn_loss,
                 'g',
                 label='Train Loss')
        plt.plot(np.arange(1, args.num_epochs + 1),
                 rnn_val_loss,
                 'r',
                 label='Val Loss')
        plt.grid(True)
        plt.legend(loc='upper right')
        plt.annotate('min epoch: %s \n\
                     min valid loss: %.5f' % (idx, value), (idx, value),
                     xytext=(-60, 20),
                     textcoords='offset points',
                     arrowprops={'arrowstyle': '->'})
        plt.savefig('%s/rnn_loss.png' % args.results_save_dir, dpi=300)

    print('-----Train LSTM Model-----')

    if args.lstm:

        model = CharLSTM(chars, args).to(device)
        optimizer = Adam(model.parameters(), lr=args.lr)
        criterion = nn.CrossEntropyLoss()

        lstm_trn_loss, lstm_val_loss = [], []
        best_val_loss = np.inf

        for epoch in range(args.num_epochs):

            epoch_time = time.time()

            trn_loss = train(model, trn_loader, device, criterion, optimizer)
            val_loss = validate(model, val_loader, device, criterion)

            lstm_trn_loss.append(trn_loss)
            lstm_val_loss.append(val_loss)

            print('Epoch: %3s/%s...' % (epoch + 1, num_epochs),
                  'Train Loss: %.4f...' % trn_loss,
                  'Val Loss: %.4f...' % val_loss,
                  'Time: %.4f' % (time.time() - epoch_time))

            if val_loss < best_val_loss:
                best_val_loss = val_loss
                torch.save(model.state_dict(),
                           '%s/lstm.pt' % args.model_save_dir)

        value, idx = np.array(lstm_val_loss).min(), np.array(
            lstm_val_loss).argmin()
        plt.figure(figsize=(8, 6))
        plt.title('LSTM Model training and validation loss')
        plt.plot(np.arange(1, args.num_epochs + 1),
                 lstm_trn_loss,
                 'g',
                 label='Train Loss')
        plt.plot(np.arange(1, args.num_epochs + 1),
                 lstm_val_loss,
                 'r',
                 label='Val Loss')
        plt.grid(True)
        plt.legend(loc='upper right')
        plt.annotate('min epoch: %s \n\
                     min valid loss: %.5f' % (idx, value), (idx, value),
                     xytext=(-60, 20),
                     textcoords='offset points',
                     arrowprops={'arrowstyle': '->'})
        plt.savefig('%s/lstm_loss.png' % args.results_save_dir, dpi=300)
Ejemplo n.º 2
0
def train(opt, x_train, x_val, dictionary_len):
    ''' Training a network

        Arguments
        ---------

        net: CharRNN network
        data: training data to train the network (text)
        epochs: Number of epochs to train
        batch_size: Number of mini-sequences per mini-batch, aka batch size
        seq_length: Number of character steps per mini-batch
        lr: learning rate
        clip: gradient clipping
        val_frac: Fraction of data to hold out for validation
        print_every: Number of steps for printing training and validation loss

    '''
    torch.manual_seed(0)
    np.random.seed(0)
    random.seed(0)
    # Declaring the hyperparameters
    batch_size = opt.batch_size
    seq_length = int(opt.seq_length)
    epochs = 50

    if torch.cuda.is_available():
        device = "cuda"
        torch.cuda.manual_seed_all(0)
    else:
        device = "cpu"
    print(device)

    date = datetime.now().strftime('%y%m%d%H%M%S')
    if opt.nologs:
        writer = SummaryWriter(log_dir=f'logs/nologs/')
    else:
        writer = SummaryWriter(log_dir=f'logs/logs_{date}/')

    y_train = get_labels_text_prediction(x_train)

    train_dataset = TextDataset(x_train, y_train, max_len=seq_length)

    if not opt.onlytrain:
        y_val = get_labels_text_prediction(x_val)
        val_dataset = TextDataset(x_val, y_val, max_len=seq_length)
        val_loader = DataLoader(dataset=val_dataset,
                                pin_memory=device == 'cuda',
                                batch_size=batch_size,
                                shuffle=False)

    train_loader = DataLoader(dataset=train_dataset,
                              pin_memory=device == 'cuda',
                              batch_size=batch_size,
                              shuffle=True)

    model_params = {
        'dictionary_len': dictionary_len,
        'dropout': opt.dropout,
        'hidden_size': opt.hidden_size,
        'layers': opt.layers,
        'embedding_len': 32,
        'device': device,
        'lr': opt.lr
    }

    model = CharRNN(**model_params).to(device)
    print(model)
    # embed()
    # summary(model, input_size=(channels, H, W))
    # summary(model, input_size=(dictionary_len, 28, 28))

    optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr)
    criterion = nn.CrossEntropyLoss()
    if opt.scheduler:
        scheduler = ReduceLROnPlateau(optimizer,
                                      'min',
                                      cooldown=3,
                                      factor=0.5,
                                      patience=10)

    global_step = 0
    for j in trange(epochs, desc='T raining LSTM...'):

        for i, (x, y) in enumerate(train_loader):
            if i == len(train_loader) - 1:
                print("FER PADDING -  DE MOMENT NO VA")
                continue
            model.train()

            x = x.to(device)
            y = y.to(device)

            # state_h, state_c = model.zero_state(opt.batch_size)
            # # Transfer data to GPU
            # state_h = state_h.to(device)
            # state_c = state_c.to(device)

            # DELETE PAST GRADIENTS
            optimizer.zero_grad()
            # FORWARD PASS  --> ultim state , (tots)  [ state_h[-1] == pred ]
            pred, (state_h, state_c) = model(x)
            # pred, (state_h, state_c) = model(x, (state_h, state_c))
            # CALCULATE LOSS
            # pred = pred.transpose(1, 2)
            pred2 = pred.view(-1, dictionary_len)
            y2 = y.view(-1)
            loss = criterion(pred2, y2)
            loss_value = loss.item()

            # BACKWARD PASS
            loss.backward()
            # MINIMIZE LOSS
            optimizer.step()
            global_step += 1
            if i % 100 == 0:
                writer.add_scalar('train/loss', loss_value, global_step)
                print('[Training epoch {}: {}/{}] Loss: {}'.format(
                    j, i, len(train_loader), loss_value))

        if not opt.onlytrain:
            val_loss = []

            for i, (x, y) in enumerate(val_loader):

                if i == len(val_loader) - 1:
                    # print("FER PADDING -  DE MOMENT NO VA")
                    continue

                model.eval()

                x = x.to(device)
                y = y.to(device)

                # state_h, state_c = model.zero_state(opt.batch_size)
                # state_h = state_h.to(device)
                # state_c = state_c.to(device)

                # NO BACKPROPAGATION
                # FORWARD PASS
                # pred, (state_h, state_c) = model(x, (state_h, state_c))
                pred, (state_h, state_c) = model(x)
                # CALCULATE LOSS
                # pred = pred.transpose(1, 2)

                # pred = [batch x 40 x diccionary_len]
                # y =  [batch x 40]
                pred2 = pred.view(-1, dictionary_len)
                y2 = y.view(-1)
                loss = criterion(pred2, y2)
                # loss = criterion(pred, y)

                val_loss.append(loss.item())

                if i % 50 == 0:
                    print('[Validation epoch {}: {}/{}] Loss: {}'.format(
                        j, i, len(val_loader), loss.item()))

            writer.add_scalar('val/loss', np.mean(val_loss), j)

            if opt.scheduler:
                scheduler.step(np.mean(val_loss))
                writer.add_scalar("lr", optimizer.param_groups[0]["lr"], j)

            predicted_words = inference_prediction(model, device, 500)
            # output = pred[0].unsqueeze(0)  # [1,diccionary_len, 40]
            # predicted_words = do_inference_test(output, model, device)
            print(predicted_words)
            writer.add_text('val/Generated_Samples', predicted_words, j)

        checkpoint = {
            "state_dict": model.state_dict(),
            "optimizer": optimizer.state_dict(),
        }

        # if j % 5 == 0:
        os.makedirs("weights/{}".format(date), exist_ok=True)
        torch.save(checkpoint, "weights/{}/checkpoint_{}.pt".format(date, j))
Ejemplo n.º 3
0
    n_hidden = args.n_hidden
    n_layers = args.n_layers
    net = CharRNN(chars, n_hidden, n_layers)

    # declaring the hyperparameters
    batch_size = args.batch_size
    seq_length = args.seq_length
    n_epochs = args.n_epochs

    # train the model
    train(net,
          encoded,
          epochs=n_epochs,
          batch_size=batch_size,
          seq_length=seq_length,
          lr=0.001,
          print_every=50)

    # Saving the model
    model_name = f'rnn_{n_epochs}_epoch.net'

    checkpoint = {
        'n_hidden': net.n_hidden,
        'n_layers': net.n_layers,
        'state_dict': net.state_dict(),
        'tokens': net.chars
    }

    with open(model_name, 'wb') as f:
        torch.save(checkpoint, f)
Ejemplo n.º 4
0
def train(filename, rnn_type, num_layers, dropout, emb_size, hidden_size,
          num_epochs, batch_size, learning_rate, num_samples, seed_phrase,
          sample_every, checkpoint_path):
    """ Trains a character-level Recurrent Neural Network in PyTorch.

    Args: optional arguments [python train.py --help]
    """
    logging.info('reading `{}` for character sequences'.format(filename))
    inputs, token_to_idx, idx_to_token = load_dataset(file_name=filename)

    idx_to_token.remove('~')
    idx_to_token.remove('#')
    idx_to_token = ['~'] + idx_to_token + ['#']
    token_to_idx = {token: idx_to_token.index(token) for token in idx_to_token}

    logging.info(idx_to_token)
    logging.info(token_to_idx)

    n_tokens = len(idx_to_token)
    max_length = inputs.size(1)

    logging.debug('creating char-level RNN model')
    model = CharRNN(num_layers=num_layers,
                    rnn_type=rnn_type,
                    dropout=dropout,
                    n_tokens=n_tokens,
                    emb_size=emb_size,
                    hidden_size=hidden_size,
                    pad_id=token_to_idx[PAD_TOKEN])
    if torch.cuda.is_available():
        model = model.cuda()

    logging.debug('defining model training operations')
    # define training procedures and operations for training the model
    criterion = nn.NLLLoss(reduction='mean')
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     mode='min',
                                                     min_lr=1e-6,
                                                     factor=0.1,
                                                     patience=7,
                                                     verbose=True)

    # train-val-test split of the dataset
    split_index = int(0.9 * inputs.size(0))
    train_tensors, inputs = inputs[:split_index], inputs[split_index:]
    split_index = int(0.5 * inputs.size(0))
    val_tensors, test_tensors = inputs[:split_index], inputs[split_index:]
    del inputs
    logging.info('train tensors: {}'.format(train_tensors.size()))
    logging.info('val tensors: {}'.format(val_tensors.size()))
    logging.info('test tensors: {}'.format(test_tensors.size()))

    logging.debug('training char-level RNN model')
    # loop over epochs
    for epoch in range(1, num_epochs + 1):
        epoch_loss, n_iter = 0.0, 0
        # loop over batches
        for tensors in tqdm(iterate_minibatches(train_tensors,
                                                batchsize=batch_size),
                            desc='Epoch[{}/{}]'.format(epoch, num_epochs),
                            leave=False,
                            total=train_tensors.size(0) // batch_size):
            # optimize model parameters
            epoch_loss += optimize(model, tensors, max_length, n_tokens,
                                   criterion, optimizer)
            n_iter += 1
        # evaluate model after every epoch
        val_loss = evaluate(model, val_tensors, max_length, n_tokens,
                            criterion)
        # lr_scheduler decreases lr when stuck at local minima
        scheduler.step(val_loss)
        # log epoch status info
        logging.info(
            'Epoch[{}/{}]: train_loss - {:.4f}   val_loss - {:.4f}'.format(
                epoch, num_epochs, epoch_loss / n_iter, val_loss))

        # sample from the model every few epochs
        if epoch % sample_every == 0:
            print(
                'Epoch[{}/{}]: train_loss - {:.4f}   val_loss - {:.4f}'.format(
                    epoch, num_epochs, epoch_loss / n_iter, val_loss))
            for _ in range(num_samples):
                sample = generate_sample(model,
                                         token_to_idx,
                                         idx_to_token,
                                         max_length,
                                         n_tokens,
                                         seed_phrase=seed_phrase)
                logging.debug(sample)

        checkpoint = {
            'epoch': epoch + 1,
            'valid_loss_min': val_loss,
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
        }
        # save checkpoint
        best_model_path = checkpoint_path
        save_ckp(checkpoint, False, checkpoint_path, best_model_path)