Example #1
0
def train(train_loader,
          validate_data,
          device,
          gradient_clipping=1,
          hidden_state=10,
          lr=0.001,
          opt="adam",
          epochs=600,
          batch_size=32):
    model = EncoderDecoder(1, hidden_state, 1, 50).to(device)
    validate_data = validate_data.to(device)
    if (opt == "adam"):
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    else:
        optimizer = torch.optim.RMSprop(model.parameters(), lr=lr)
    optimizer_name = 'adam' if 'adam' in str(optimizer).lower() else 'mse'

    mse = nn.MSELoss()
    min_loss = float("inf")
    best_loss_global = float("inf")
    min_in, min_out = None, None
    validation_losses = []
    for epoch in range(1, epochs):
        total_loss = 0
        for batch_idx, data in enumerate(train_loader):
            data = data.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = mse(output, data)
            total_loss += loss.item()
            if loss.item() < min_loss:
                min_loss = loss.item()
                min_in, min_out = data, output
            loss.backward()
            if gradient_clipping:
                nn.utils.clip_grad_norm_(model.parameters(),
                                         max_norm=gradient_clipping)

            optimizer.step()

        epoch_loss = total_loss / len(train_loader)
        best_loss_global = min(best_loss_global, epoch_loss)
        print(f'Train Epoch: {epoch} \t loss: {epoch_loss}')

        if epoch % 50 == 0:
            file_name = f'ae_toy_{optimizer_name}_lr={lr}_hidden_size={hidden_state}_' \
                        f'_gradient_clipping={gradient_clipping}'
            path = os.path.join("saved_models", "toy_task", file_name)
            create_folders(path)
            torch.save(
                model,
                os.path.join(path,
                             f'epoch={epoch}_bestloss={best_loss_global}.pt'))
        if epoch % 10 == 0:
            validation(model, mse, validate_data, validation_losses)

    plot_validation_loss(epochs, gradient_clipping, lr, optimizer_name,
                         validation_losses, batch_size, hidden_state)
Example #2
0
def make_model(src_vocab,
               tgt_vocab,
               N=6,
               d_model=512,
               d_ff=2048,
               h=8,
               dropout=0.1):
    "Helper: Construct a model from hyperparameters."
    c = copy.deepcopy
    attn = MultiHeadedAttention(h, d_model)
    ff = PositionwiseFeedForward(d_model, d_ff, dropout)
    position = PositionalEncoding(d_model, dropout)
    model = EncoderDecoder(
        Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N),
        Decoder(DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N),
        nn.Sequential(Embeddings(d_model, src_vocab), c(position)),
        nn.Sequential(Embeddings(d_model, tgt_vocab), c(position)),
        Generator(d_model, tgt_vocab))

    # This was important from their code.
    # Initialize parameters with Glorot / fan_avg.
    for p in model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)
    return model
Example #3
0
def main(args):
    num_frames = 15
    ms_per_frame = 40

    network = EncoderDecoder(args).cuda()
    optimizer = torch.optim.Adam(network.parameters(), lr=args.lr, betas=(0.9, 0.99))
    criterion = nn.MSELoss()

    train_loader, dev_loader, test_loader = fetch_kth_data(args)

    # test_tens = next(iter(train_loader))['instance'][0, :, :, :, :].transpose(0, 1)
    # print(test_tens.shape)
    # save_image(test_tens, './img/test_tens.png')
    # print(next(iter(train_loader))['instance'][0, :, 0, :, :].shape)
    train_loss = []
    dev_loss = []
    for epoch in range(args.epochs):
        epoch_loss = 0
        batch_num = 0
        for item in train_loader:
            #label = item['label']
            item = item['instance'].cuda()

            frames_processed = 0
            batch_loss = 0

            # fit a whole batch for all the different milliseconds
            for i in range(num_frames-1):
                for j in range(i+1, num_frames):
                    network.zero_grad()
                    frame_diff = j - i
                    time_delta = torch.tensor(frame_diff * ms_per_frame).float().repeat(args.batch_size).cuda()
                    time_delta.requires_grad = True

                    seq = item[:, :, i, :, :]
                    #print(seq.shape)

                    # downsample
                    #seq = F.interpolate(seq, size=(64, 64))
                    #print(seq.shape)

                    seq.requires_grad = True

                    seq_targ = item[:, :, j, :, :]

                    # downsample
                    #seq_targ = F.interpolate(seq_targ, size=(64, 64))

                    seq_targ.requires_grad = False

                    assert seq.requires_grad and time_delta.requires_grad, 'No Gradients'

                    outputs = network(seq, time_delta)
                    error = criterion(outputs, seq_targ)
                    error.backward()
                    optimizer.step()

                    batch_loss += error.cpu().item()
                    frames_processed += 1

                    if i == 0:
                        save_image(outputs, '/scratch/eecs-share/dinkinst/kth/img/train_output_{}_epoch_{}.png'.format(j, epoch))

            batch_num += 1
            epoch_loss += batch_loss
            print('Epoch {} Batch #{} Total Error {}'.format(epoch, batch_num, batch_loss))
        print('\nEpoch {} Total Loss {} Scaled Loss {}\n'.format(epoch, epoch_loss, epoch_loss/frames_processed))
        train_loss.append(epoch_loss)
        if epoch % 10 == 0:
            torch.save(network.state_dict(), KTH_PATH+str('/model_new_{}.pth'.format(epoch)))
            torch.save(optimizer.state_dict(), KTH_PATH+str('/optim_new_{}.pth'.format(epoch)))

        dev_loss.append(eval_model(network, dev_loader, epoch))
        network.train()

    plt.plot(range(args.epochs), train_loss)
    plt.grid()
    plt.savefig('/scratch/eecs-share/dinkinst/kth/img/loss_train.png', dpi=64)
    plt.close('all')
    plt.plot(range(args.epochs), dev_loss)
    plt.grid()
    plt.savefig('/scratch/eecs-share/dinkinst/kth/img/loss_dev.png', dpi=64)
    plt.close('all')
Example #4
0
def main():
    torch.manual_seed(10)  # fix seed for reproducibility
    torch.cuda.manual_seed(10)

    train_data, train_source_text, train_target_text = create_data(
        os.path.join(train_data_dir, train_dataset), lang)
    #dev_data, dev_source_text, dev_target_text = create_data(os.path.join(eval_data_dir, 'newstest2012_2013'), lang)

    eval_data, eval_source_text, eval_target_text = create_data(
        os.path.join(dev_data_dir, eval_dataset), lang)

    en_emb_lookup_matrix = train_source_text.vocab.vectors.to(device)
    target_emb_lookup_matrix = train_target_text.vocab.vectors.to(device)

    global en_vocab_size
    global target_vocab_size

    en_vocab_size = train_source_text.vocab.vectors.size(0)
    target_vocab_size = train_target_text.vocab.vectors.size(0)

    if verbose:
        print('English vocab size: ', en_vocab_size)
        print(lang, 'vocab size: ', target_vocab_size)
        print_runtime_metric('Vocabs loaded')

    model = EncoderDecoder(en_emb_lookup_matrix, target_emb_lookup_matrix,
                           hidden_size, bidirectional, attention,
                           attention_type, decoder_cell_type).to(device)

    model.encoder.device = device

    criterion = nn.CrossEntropyLoss(
        ignore_index=1
    )  # ignore_index=1 comes from the target_data generation from the data iterator

    #optimiser = torch.optim.Adadelta(model.parameters(), lr=1.0, rho=0.9, eps=1e-06, weight_decay=0) # This is the exact optimiser in the paper; rho=0.95
    optimiser = torch.optim.Adam(model.parameters(), lr=lr)

    best_loss = 10e+10  # dummy variable
    best_bleu = 0
    epoch = 1  # initial epoch id

    if resume:
        print('\n ---------> Resuming training <----------')
        checkpoint_path = os.path.join(save_dir, 'checkpoint.pth')
        checkpoint = torch.load(checkpoint_path)
        epoch = checkpoint['epoch']
        subepoch, num_subepochs = checkpoint['subepoch_num']
        model.load_state_dict(checkpoint['state_dict'])
        best_loss = checkpoint['best_loss']
        optimiser.load_state_dict(checkpoint['optimiser'])
        is_best = checkpoint['is_best']
        metric_store.load(os.path.join(save_dir, 'checkpoint_metrics.pickle'))

        if subepoch == num_subepochs:
            epoch += 1
            subepoch = 1
        else:
            subepoch += 1

    if verbose:
        print_runtime_metric('Model initialised')

    while epoch <= num_epochs:
        is_best = False  # best loss or not

        # Initialise the iterators
        train_iter = BatchIterator(train_data,
                                   batch_size,
                                   do_train=True,
                                   seed=epoch**2)

        num_subepochs = train_iter.num_batches // subepoch_size

        # train sub-epochs from start_batch
        # This allows subepoch training resumption
        if not resume:
            subepoch = 1
        while subepoch <= num_subepochs:

            if verbose:
                print(' Running code on: ', device)

                print('------> Training epoch {}, sub-epoch {}/{} <------'.
                      format(epoch, subepoch, num_subepochs))

            mean_train_loss = train(model, criterion, optimiser, train_iter,
                                    train_source_text, train_target_text,
                                    subepoch, num_subepochs)

            if verbose:
                print_runtime_metric('Training sub-epoch complete')
                print(
                    '------> Evaluating sub-epoch {} <------'.format(subepoch))

            eval_iter = BatchIterator(eval_data,
                                      batch_size,
                                      do_train=False,
                                      seed=325632)

            mean_eval_loss, mean_eval_bleu, _, mean_eval_sent_bleu, _, _ = evaluate(
                model, criterion, eval_iter, eval_source_text.vocab,
                eval_target_text.vocab, train_source_text.vocab,
                train_target_text.vocab)  # here should be the eval data

            if verbose:
                print_runtime_metric('Evaluating sub-epoch complete')

            if mean_eval_loss < best_loss:
                best_loss = mean_eval_loss
                is_best = True

            if mean_eval_bleu > best_bleu:
                best_bleu = mean_eval_bleu
                is_best = True

            config_dict = {
                'train_dataset': train_dataset,
                'b_size': batch_size,
                'h_size': hidden_size,
                'bidirectional': bidirectional,
                'attention': attention,
                'attention_type': attention_type,
                'decoder_cell_type': decoder_cell_type
            }

            # Save the model and the optimiser state for resumption (after each epoch)
            checkpoint = {
                'epoch': epoch,
                'subepoch_num': (subepoch, num_subepochs),
                'state_dict': model.state_dict(),
                'config': config_dict,
                'best_loss': best_loss,
                'best_BLEU': best_bleu,
                'optimiser': optimiser.state_dict(),
                'is_best': is_best
            }
            torch.save(checkpoint, os.path.join(save_dir, 'checkpoint.pth'))
            metric_store.log(mean_train_loss, mean_eval_loss)
            metric_store.save(
                os.path.join(save_dir, 'checkpoint_metrics.pickle'))

            if verbose:
                print('Checkpoint.')

            # Save the best model so far
            if is_best:
                save_dict = {
                    'state_dict': model.state_dict(),
                    'config': config_dict,
                    'epoch': epoch
                }
                torch.save(save_dict, os.path.join(save_dir, 'best_model.pth'))
                metric_store.save(
                    os.path.join(save_dir, 'best_model_metrics.pickle'))

            if verbose:
                if is_best:
                    print('Best model saved!')
                print(
                    'Ep {} Sub-ep {}/{} Tr loss {} Eval loss {} Eval BLEU {} Eval sent BLEU {}'
                    .format(epoch, subepoch, num_subepochs,
                            round(mean_train_loss, 3),
                            round(mean_eval_loss, 3), round(mean_eval_bleu, 4),
                            round(mean_eval_sent_bleu, 4)))

            subepoch += 1
        epoch += 1
Example #5
0
def train(train_loader,
          test_loader,
          gradient_clipping=1,
          hidden_state_size=10,
          lr=0.001,
          epochs=100,
          classify=True):
    model = EncoderDecoder(input_size=28, hidden_size=hidden_state_size, output_size=28, labels_num=10) if not classify \
        else EncoderDecoder(input_size=28, hidden_size=hidden_state_size, output_size=28, is_prediction=True,
                            labels_num=10)
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    loss_name = "mse"
    min_loss = float("inf")
    task_name = "classify" if classify else "reconstruct"
    validation_losses = []
    validation_accuracies = []
    tensorboard_writer = init_writer(results_path, lr, classify,
                                     hidden_state_size, epochs)
    for epoch in range(1, epochs):
        total_loss = 0
        total_batches = 0
        for batch_idx, (data, target) in enumerate(train_loader):
            data = data.to(device)
            target = target.to(device)
            # data_sequential = data # turn each image to vector sized 784
            data_sequential = data.view(data.shape[0], 28, 28)
            optimizer.zero_grad()
            if classify:
                resconstucted_batch, batch_pred_probs = model(data_sequential)
                loss = model.loss(data_sequential, resconstucted_batch, target,
                                  batch_pred_probs)
            else:
                resconstucted_batch = model(data_sequential)
                loss = model.loss(data_sequential, resconstucted_batch)
            total_loss += loss.item()
            loss.backward()
            if gradient_clipping:
                nn.utils.clip_grad_norm_(model.parameters(),
                                         max_norm=gradient_clipping)
            optimizer.step()
            total_batches += 1

        epoch_loss = total_loss / total_batches

        tensorboard_writer.add_scalar('train_loss', epoch_loss, epoch)
        print(f'Train Epoch: {epoch} \t loss: {epoch_loss}')

        validation_loss = validation(model, test_loader, validation_losses,
                                     device, classify, validation_accuracies,
                                     tensorboard_writer, epoch)
        model.train()

        if epoch % 5 == 0 or validation_loss < min_loss:
            file_name = f"ae_toy_{loss_name}_lr={lr}_hidden_size={hidden_state_size}_epoch={epoch}_gradient_clipping={gradient_clipping}.pt"
            path = os.path.join(results_path, "saved_models", "MNIST_task",
                                task_name, file_name)
            torch.save(model, path)

        min_loss = min(validation_loss, min_loss)

    plot_validation_loss(epochs, gradient_clipping, lr, loss_name,
                         validation_losses, hidden_state_size, task_name)
    if classify:
        plot_validation_acc(epochs, gradient_clipping, lr, loss_name,
                            validation_accuracies, hidden_state_size,
                            task_name)
Example #6
0
def train(train_loader,
          validate_data,
          device,
          gradient_clipping=1,
          hidden_state_size=10,
          lr=0.001,
          opt="adam",
          epochs=1000,
          batch_size=32):
    model = EncoderDecoder(1, hidden_state_size, 1, 50).to(device)
    validate_data = validate_data.to(device)
    if (opt == "adam"):
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    else:
        optimizer = torch.optim.RMSprop(model.parameters(), lr=lr)
    optimizer_name = 'adam' if 'adam' in str(optimizer).lower() else 'mse'

    mse = nn.MSELoss()
    min_loss = float("inf")
    best_loss_global = float("inf")
    min_in, min_out = None, None
    validation_losses = []
    for epoch in range(0, epochs):
        total_loss = 0
        for batch_idx, data in enumerate(train_loader):
            data = data.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = mse(output, data)
            total_loss += loss.item()
            if loss.item() < min_loss:
                min_loss = loss.item()
                min_in, min_out = data, output
            loss.backward()
            if gradient_clipping:
                nn.utils.clip_grad_norm_(model.parameters(),
                                         max_norm=gradient_clipping)

            optimizer.step()

        epoch_loss = total_loss / len(train_loader)
        best_loss_global = min(best_loss_global, epoch_loss)
        print(f'Train Epoch: {epoch} \t loss: {epoch_loss}')

        if epoch % 100 == 0:
            path = f'{results_path}saved_models/ae_toy_{optimizer_name}_lr={lr}_hidden_size={hidden_state_size}_' \
                   f'_gradient_clipping={gradient_clipping}_'
            create_folders(path)
            torch.save(model,
                       path + f"/epoch={epoch}_bestloss={best_loss_global}.pt")

            # run validation        if epoch % 20 == 0:
            model.eval()
            mse.eval()
            output = model(validate_data)
            loss = mse(output,
                       validate_data)  # print("Accuracy: {:.4f}".format(acc))
            validation_losses.append(loss.item())
            mse.train()
            model.train()

    plot_sequence_examples(epochs, gradient_clipping, lr, min_in, min_out,
                           optimizer_name, batch_size)

    plot_validation_loss(epochs, gradient_clipping, lr, optimizer_name,
                         validation_losses, batch_size)
Example #7
0
def main():

    parser = argparse.ArgumentParser()

    parser.add_argument('-data',
                        help="Path to ar2en dataset.",
                        default='./ar2en_dataset')

    parser.add_argument('-embeddings_size', type=int, default=300)
    parser.add_argument('-layers', type=int, default=2)
    parser.add_argument('-hidden_sizes', type=int, default=300)
    parser.add_argument('-dropout', type=float, default=0.1)
    parser.add_argument('-epochs', type=int, default=20)
    parser.add_argument('-optimizer', choices=['sgd', 'adam'], default='adam')
    parser.add_argument('-learning_rate', type=float, default=0.001)
    parser.add_argument('-l2_decay', type=float, default=0.0)
    parser.add_argument('-batch_size', type=int, default=64)

    parser.add_argument(
        '-cuda',
        action='store_true',
        help=
        'Whether or not to use cuda for parallelization (if devices available)'
    )

    parser.add_argument('-name',
                        type=str,
                        required=False,
                        default=None,
                        help="Filename for the plot")

    parser.add_argument('-quiet',
                        action='store_true',
                        help='No execution output.')

    parser.add_argument(
        '-tqdm',
        action='store_true',
        help='Whether or not to use TQDM progress bar in training.')

    parser.add_argument(
        '-display_vocabularies',
        action="store_true",
        help="Only display the vocabularies (no further execution).")

    parser.add_argument(
        '-reverse_source_string',
        action="store_true",
        help="Whether or not to reverse the source arabic string.")

    parser.add_argument(
        '-bidirectional',
        action="store_true",
        help="Whether or not to use a bidirectional encoder LSTM.")

    parser.add_argument('-attention',
                        type=str,
                        choices=["dot", "general"],
                        required=False,
                        default=None,
                        help="Attention mechanism in the decoder.")

    opt = parser.parse_args()

    # ############# #
    # 1 - Load Data #
    # ############# #

    dataset = Ar2EnDataset(opt.data, opt.reverse_source_string)

    if opt.display_vocabularies:
        sys.exit(0)

    dataloader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=True)
    X_dev, y_dev = dataset.X_dev, dataset.y_dev
    X_test, y_test = dataset.X_test, dataset.y_test

    # ################ #
    # 2 - Create Model #
    # ################ #

    device = torch.device(
        "cuda:0" if torch.cuda.is_available() and opt.cuda else "cpu")
    if not opt.quiet: print(f"Using device '{device}'", flush=True)

    model = EncoderDecoder(dataset.n_inputs, dataset.n_outputs,
                           opt.embeddings_size, opt.attention,
                           opt.bidirectional, opt.hidden_sizes, opt.layers,
                           opt.dropout, dataset.arabic_vocabulary,
                           dataset.english_vocabulary, device)

    # ############# #
    # 3 - Optimizer #
    # ############# #

    optimizer = {
        "adam": torch.optim.Adam,
        "sgd": torch.optim.SGD
    }[opt.optimizer](model.parameters(),
                     lr=opt.learning_rate,
                     weight_decay=opt.l2_decay)
    criterion = nn.CrossEntropyLoss(
        ignore_index=dataset.english_vocabulary["$PAD"])

    # ###################### #
    # 4 - Train and Evaluate #
    # ###################### #

    epochs = torch.arange(1, opt.epochs + 1)
    train_mean_losses = []
    val_word_acc = []
    val_char_acc = []
    train_losses = []

    for epoch in epochs:

        if not opt.quiet:
            print('\nTraining epoch {}'.format(epoch), flush=True)

        if opt.tqdm:
            from tqdm import tqdm
            dataloader = tqdm(dataloader)

        for X_batch, y_batch in dataloader:
            loss = train_batch(X_batch, y_batch, model, optimizer, criterion)
            train_losses.append(loss)

        mean_loss = torch.tensor(train_losses).mean().item()
        word_acc, char_acc = evaluate(model, X_dev, y_dev)

        train_mean_losses.append(mean_loss)
        val_word_acc.append(word_acc)
        val_char_acc.append(char_acc)

        if not opt.quiet:
            print('Training loss: %.4f' % mean_loss, flush=True)
            print('Valid word acc: %.4f' % val_word_acc[-1], flush=True)
            print('Valid char acc: %.4f' % val_char_acc[-1], flush=True)

    final_test_accuracy_words, final_test_accuracy_chars = evaluate(
        model, X_test, y_test)
    if not opt.quiet:
        print('\nFinal Test Word Acc: %.4f' % final_test_accuracy_words,
              flush=True)
        print('Final Test Char Acc: %.4f' % final_test_accuracy_chars,
              flush=True)

    # ######## #
    # 5 - Plot #
    # ######## #

    name = opt.name if opt.name is not None else "encoder_decoder"
    plot(epochs,
         train_mean_losses,
         ylabel='Loss',
         name=name + "_loss",
         title="Training Loss")
    plot(
        epochs,
        val_word_acc,
        ylabel='Word Val Acc',
        name=name + "_acc",
        title=
        f"Word Validation Accuracy\n(Final Word Test Accuracy: {round(final_test_accuracy_words,3)})"
    )

    return final_test_accuracy_words
Example #8
0
    dataset = JIGSAWSegmentsDataset(dataset_path, dataset_tasks)
    dataloader = JIGSAWSegmentsDataloader(batch_size,
                                          input_length,
                                          output_length,
                                          dataset,
                                          scale=scale)
    model = EncoderDecoder(src_vocab,
                           tgt_vocab,
                           N=num_layers,
                           input_size=feature_dim,
                           hidden_layer=hidden_layer,
                           h=num_heads,
                           dropout=dropout,
                           task_dim=task_dim)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=learning_rate,
                                 betas=betas,
                                 eps=eps)
    running_loss_plot = train_epochs(dataloader,
                                     train_split,
                                     model,
                                     loss_function,
                                     optimizer,
                                     n_epochs=num_epochs,
                                     use_gpu=use_gpu,
                                     use_task=use_task)
    visulize_results(
        dataloader,
        0,
        loss_function,
Example #9
0
def train(train_loader,
          test_loader,
          gradient_clipping=1,
          hidden_state_size=10,
          lr=0.001,
          epochs=3000,
          is_prediction=False):
    model = EncoderDecoder(input_size=1, hidden_size=hidden_state_size, output_size=1,
                           labels_num=1) if not is_prediction \
        else EncoderDecoder(input_size=1, hidden_size=hidden_state_size, output_size=1, is_prediction=True,
                            labels_num=1, is_snp=True)
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    loss_name = "mse"
    min_loss = float("inf")
    task_name = "classify" if is_prediction else "reconstruct"
    validation_losses = []
    tensorboard_writer = init_writer(lr, is_prediction, hidden_state_size,
                                     epochs, task_name)
    for epoch in range(1, epochs):
        total_loss = 0
        for batch_idx, (data, target) in enumerate(train_loader):

            data_sequential = (data.view(data.shape[0], data.shape[1],
                                         1)).to(device)
            target = target.to(device)
            optimizer.zero_grad()
            if is_prediction:
                resconstucted_batch, batch_preds = model(data_sequential)
                batch_preds = batch_preds.view(batch_preds.shape[0],
                                               batch_preds.shape[1])
                loss = model.loss(data_sequential, resconstucted_batch, target,
                                  batch_preds)
            else:
                resconstucted_batch = model(data_sequential)
                loss = model.loss(data_sequential, resconstucted_batch)
            total_loss += loss.item()
            loss.backward()
            if gradient_clipping:
                nn.utils.clip_grad_norm_(model.parameters(),
                                         max_norm=gradient_clipping)
            optimizer.step()

        epoch_loss = total_loss / len(train_loader)
        tensorboard_writer.add_scalar('train_loss', epoch_loss, epoch)
        print(f'Train Epoch: {epoch} \t loss: {epoch_loss}')

        validation_loss = validation(model, test_loader, validation_losses,
                                     device, is_prediction, tensorboard_writer,
                                     epoch)

        if epoch % 5 == 0 or validation_loss < min_loss:
            file_name = f"ae_s&p500_{loss_name}_lr={lr}_hidden_size={hidden_state_size}_epoch={epoch}_gradient_clipping={gradient_clipping}.pt"
            path = os.path.join(results_path, "saved_models", "s&p500_task",
                                task_name, file_name)
            torch.save(model, path)

        min_loss = min(validation_loss, min_loss)

    plot_validation_loss(epochs, gradient_clipping, lr, loss_name,
                         validation_losses, hidden_state_size, task_name)
Example #10
0
def main():
    # Check if cuda is available
    if torch.cuda.is_available():
        device = "cuda"
    else:
        device = "cpu"
    transform = transforms.Compose([transforms.ToTensor()])
    # Create the train test split
    # Automatically download if missing
    train_data = datasets.MNIST(root="data", 
                                train=True, 
                                transform=transform, 
                                download=True)
    val_data = datasets.MNIST(root="data",
                              train=False,
                              transform=transform,
                              download=True)
    
    # Create dataloaders
    train_loader = DataLoader(train_data,
                              batch_size=32,
                              shuffle=True)
    val_loader = DataLoader(val_data,
                            batch_size=32,
                            shuffle=False)
    
    # Define model, loss function and optimizer 
    net = EncoderDecoder()
    net.to(device)
    epochs=10
    optimizer = Adam(net.parameters(), lr=0.001, weight_decay=1e-7)
    loss_fn = MSELoss(reduction="mean")
    
    # Training loop
    for i in range(epochs):
        # print(i)
        print("Epoch {}/{}".format(i + 1, epochs))
        epoch_loss = []
        counter = 0
        for imgs, labels in train_loader:
            imgs = imgs.to(device)
            labels = labels.to(device)
            imgs = imgs.reshape(imgs.shape[0], -1)
            # print(imgs.device)
            # print(labels.device)
            counter += 1
            # print(features.shape)
            # print(labels)
            # print(labels.dtype)
            y_pred = net(imgs)
            # print(y_pred.dtype)
            # print(y_pred)
            loss = loss_fn(imgs, y_pred)
            epoch_loss.append(loss.item())
            # with torch.no_grad():
                # acc = accuracy_score(labels.view(-1).cpu(), y_pred.view(-1).cpu())
            print("{}/{}. Train Loss: {:.2f}".format(counter, len(train_data)//32, loss.item()), end="\r")
            # print(loss.dtype)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        
        epoch_loss = np.array(epoch_loss)
        print()
        print("Checking val loss")
        val_loss = []
        counter = 0
        for imgs, labels in val_loader:
            imgs = imgs.to(device)
            labels = labels.to(device)
            imgs = imgs.reshape(imgs.shape[0], -1)
            counter += 1
            # print(features.shape)
            # print(labels)
            # print(labels.dtype)
            with torch.no_grad():
                y_pred = net(imgs)
                loss = loss_fn(imgs, y_pred)
                val_loss.append(loss.item())
                print("{}/{}. Train Loss: {:.2f}".format(counter, len(val_data)//32, loss.item()), end="\r")
        print()
        val_loss = np.array(val_loss)
        print("Training loss epoch: {:.2f}\tValidation loss: {:.2f}".format(epoch_loss.mean(), val_loss.mean()))
    
    # Save model
    torch.save(net, "model.pth")
Example #11
0
def train(resume_training=True):

    EMBEDDING_SIZE = 32
    num_hiddens, num_layers, dropout, batch_size, num_steps = EMBEDDING_SIZE, 2, 0.1, 64, 10
    lr, num_epochs, device = 0.005, 1000, d2lt.try_gpu()
    ffn_num_input, ffn_num_hiddens, num_heads = EMBEDDING_SIZE, 64, 4
    key_size, query_size, value_size = EMBEDDING_SIZE, EMBEDDING_SIZE, EMBEDDING_SIZE
    norm_shape = [EMBEDDING_SIZE]

    ### Load data
    data_iter, src_vocab, tgt_vocab = load_data_nmt(batch_size, num_steps)
    encoder = TransformerEncoder(len(src_vocab), key_size, query_size,
                                 value_size, num_hiddens, norm_shape,
                                 ffn_num_input, ffn_num_hiddens, num_heads,
                                 num_layers, dropout)
    decoder = TransformerDecoder(len(tgt_vocab), key_size, query_size,
                                 value_size, num_hiddens, norm_shape,
                                 ffn_num_input, ffn_num_hiddens, num_heads,
                                 num_layers, dropout)

    ### Load model
    model = EncoderDecoder(encoder, decoder).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    ### Load checkpoint
    if resume_training and PATH_MODEL.exists(
    ) and os.path.getsize(PATH_MODEL) > 0:
        model, optimizer, last_epoch = load_checkpoint(model, optimizer)
        print("Continue training from last checkpoint...")
    else:
        if not os.path.exists(checkpoint_path):
            os.makedirs(checkpoint_path)
        with open(PATH_MODEL, 'w') as fp:
            pass
        print(
            'No prior checkpoint existed, created new save files for checkpoint.'
        )
        model.apply(xavier_init_weights)
        last_epoch = 0

    # model.apply(xavier_init_weights)
    # model.to(device)
    # optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    ### Initialize Loss functions
    loss = MaskedSoftmaxCELoss()

    ### Train
    model.train()
    # animator = d2lt.Animator(xlabel='epoch', ylabel='loss',
    # xlim=[10, num_epochs])
    for epoch in range(last_epoch, num_epochs):
        timer = d2lt.Timer()
        metric = d2lt.Accumulator(2)  # Sum of training loss, no. of tokens
        for batch in data_iter:
            X, X_valid_len, Y, Y_valid_len = [x.to(device) for x in batch]
            bos = torch.tensor([tgt_vocab['<bos>']] * Y.shape[0],
                               device=device).reshape(-1, 1)
            dec_input = torch.cat([bos, Y[:, :-1]], 1)  # Teacher forcing
            Y_hat, _ = model(X, dec_input, X_valid_len)
            l = loss(Y_hat, Y, Y_valid_len)
            l.sum().backward()  # Make the loss scalar for `backward`
            d2lt.grad_clipping(model, 1)
            num_tokens = Y_valid_len.sum()
            optimizer.step()
            with torch.no_grad():
                metric.add(l.sum(), num_tokens)
        if (epoch + 1) % 10 == 0:
            # animator.add(epoch + 1, (metric[0] / metric[1],))
            print(f'epoch {epoch + 1} - ' f'loss {metric[0] / metric[1]:.5f}')

        ### Save checkpoint
        save_checkpoint(epoch, model, optimizer)
    print(f'loss {metric[0] / metric[1]:.5f}, {metric[1] / timer.stop():.1f} '
          f'tokens/sec on {str(device)}')