Ejemplo n.º 1
0
def main():
    parser = argparse.ArgumentParser(description="Compute BLEU.")
    parser.add_argument('ckpt', type=str, help="Checkpoint to restore.")
    parser.add_argument('--dir',
                        type=str,
                        default="./wmt14",
                        help="Directory of dataset.")
    parser.add_argument('--split',
                        default='test',
                        type=str,
                        help="Specify which split of data to evaluate.")
    parser.add_argument(
        '--gpu_id',
        default=0,
        type=int,
        help="CUDA visible GPU ID. Currently only support single GPU.")
    parser.add_argument('--beams',
                        default=1,
                        type=int,
                        help="Beam Search width.")
    args = parser.parse_args()

    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_id)
    assert torch.cuda.is_available()
    import data
    import build_model

    # Restore checkpoint
    info = torch.load(args.ckpt)
    cfg = info['cfg']

    # Build model
    bpe_model = yttm.BPE(model=cfg['bpe'])
    model = build_model.Seq2Seq(bpe_model.vocab_size(),
                                bpe_model.vocab_size(),
                                hidden_size=cfg['model']['hidden_size'],
                                encoder_layers=cfg['model']['encoder_layers'],
                                decoder_layers=cfg['model']['decoder_layers'],
                                use_bn=cfg['model']['use_bn'])
    model.load_state_dict(info['weights'])
    model.eval()
    model = model.cuda()

    # Create dataset
    if args.beams == 1:
        batch_size = cfg['train']['batch_size']
    else:
        batch_size = 1
    loader = data.load(args.dir,
                       split=args.split,
                       batch_size=batch_size,
                       bpe_model=bpe_model)

    # Evaluate
    _, bleu = utils.eval_dataset(loader, model, bpe_model, args.beams)
    print("BLEU on %s set = %.4f" % (args.split, error))
Ejemplo n.º 2
0
def main():
    parser = argparse.ArgumentParser(description="Compute error rate.")
    parser.add_argument('ckpt', type=str, help="Checkpoint to restore.")
    parser.add_argument('--split',
                        default='test',
                        type=str,
                        help="Specify which split of data to evaluate.")
    parser.add_argument(
        '--gpu_id',
        default=0,
        type=int,
        help="CUDA visible GPU ID. Currently only support single GPU.")
    parser.add_argument('--beams',
                        default=1,
                        type=int,
                        help="Beam Search width.")
    parser.add_argument('--workers',
                        default=0,
                        type=int,
                        help="How many subprocesses to use for data loading.")
    args = parser.parse_args()

    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_id)
    assert torch.cuda.is_available()
    import data
    import build_model

    # Restore checkpoint
    info = torch.load(args.ckpt)
    cfg = info['cfg']

    # Create dataset
    if args.beams == 1:
        batch_size = cfg['train']['batch_size']
    else:
        batch_size = 1
    loader = data.load(split=args.split,
                       batch_size=batch_size,
                       workers=args.workers)

    # Build model
    tokenizer = torch.load('tokenizer.pth')
    model = build_model.Seq2Seq(len(tokenizer.vocab),
                                hidden_size=cfg['model']['hidden_size'],
                                encoder_layers=cfg['model']['encoder_layers'],
                                decoder_layers=cfg['model']['decoder_layers'],
                                use_bn=cfg['model']['use_bn'])
    model.load_state_dict(info['weights'])
    model.eval()
    model = model.cuda()

    # Evaluate
    _, error = eval_utils.eval_dataset(loader, model, args.beams)
    print("Error rate on %s set = %.4f" % (args.split, error))
Ejemplo n.º 3
0
def main():
    parser = argparse.ArgumentParser(description="Compute error rate.")
    parser.add_argument('ckpt', type=str, help="Checkpoint to restore.")
    parser.add_argument('--split',
                        default='test',
                        type=str,
                        help="Specify which split of data to evaluate.")
    parser.add_argument(
        '--gpu_id',
        default=0,
        type=int,
        help="CUDA visible GPU ID. Currently only support single GPU.")
    args = parser.parse_args()

    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_id)
    assert torch.cuda.is_available()
    import data
    import build_model

    # Restore checkpoint
    info = torch.load(args.ckpt)
    print("Dev. error rate of checkpoint: %.4f @epoch: %d" %
          (info['dev_error'], info['epoch']))

    cfg = info['cfg']

    # Create dataset
    loader = data.load(split=args.split, batch_size=cfg['train']['batch_size'])

    # Build model
    tokenizer = torch.load('tokenizer.pth')
    model = build_model.Seq2Seq(len(tokenizer.vocab),
                                hidden_size=cfg['model']['hidden_size'],
                                encoder_layers=cfg['model']['encoder_layers'],
                                decoder_layers=cfg['model']['decoder_layers'])
    model.load_state_dict(info['weights'])
    model.eval()
    model = model.cuda()

    # Evaluate
    error = eval_utils.get_error(loader, model)
    print("Error rate on %s set = %.4f" % (args.split, error))
Ejemplo n.º 4
0
def main():
    parser = argparse.ArgumentParser(description="Train the model.")
    parser.add_argument('cfg',
                        type=str,
                        help="Specify which experiment config file to use.")
    parser.add_argument('--dir',
                        type=str,
                        default="./wmt14",
                        help="Directory of dataset.")
    parser.add_argument(
        '--gpu_id',
        default=0,
        type=int,
        help="CUDA visible GPU ID. Currently only support single GPU.")
    parser.add_argument('--workers',
                        default=0,
                        type=int,
                        help="How many subprocesses to use for data loading.")
    args = parser.parse_args()

    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_id)
    assert torch.cuda.is_available()
    import data
    import build_model

    with open(args.cfg) as f:
        cfg = yaml.load(f, Loader=yaml.FullLoader)

    save_path = os.path.splitext(args.cfg)[0]
    if not os.path.exists(save_path):
        os.mkdir(save_path)

    # Build model
    bpe_model = yttm.BPE(model=cfg['bpe'])
    model = build_model.Seq2Seq(bpe_model.vocab_size(),
                                bpe_model.vocab_size(),
                                hidden_size=cfg['model']['hidden_size'],
                                encoder_layers=cfg['model']['encoder_layers'],
                                decoder_layers=cfg['model']['decoder_layers'],
                                drop_p=cfg['model']['drop_p'],
                                use_bn=cfg['model']['use_bn'])
    model = model.cuda()

    # Create dataset
    train_loader = data.load(args.dir,
                             split='train',
                             batch_size=cfg['train']['batch_size'],
                             bpe_model=bpe_model,
                             workers=args.workers)
    dev_loader = data.load(args.dir,
                           split='dev',
                           batch_size=cfg['train']['batch_size'],
                           bpe_model=bpe_model)

    # Training criteria
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=cfg['train']['init_lr'])
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        mode='min',
        factor=cfg['train']['decay_factor'],
        patience=cfg['train']['patience'],
        threshold=0.01,
        min_lr=1e-6)
    assert cfg['train']['metric'] in ['loss', 'bleu']

    # Restore checkpoints
    if os.path.exists(os.path.join(save_path, 'last.pth')):
        info = torch.load(os.path.join(save_path, 'last.pth'))
        epoch = info['epoch']
        model.load_state_dict(info['weights'])
        optimizer.load_state_dict(info['optimizer'])
        scheduler.load_state_dict(info['scheduler'])
    else:
        epoch = 0

    if os.path.exists(os.path.join(save_path, 'best.pth')):
        info = torch.load(os.path.join(save_path, 'best.pth'))
        best_epoch = info['epoch']
        best_bleu = info['dev_bleu']
    else:
        best_epoch = 0
        best_bleu = 0

    while (1):
        print("---")
        epoch += 1
        print("Epoch: %d" % (epoch))
        # Show learning rate
        lr = get_lr(optimizer)
        print("Learning rate: %f" % lr)

        # Training loop
        model.train()
        train_loss = []
        train_tqdm = tqdm(train_loader, desc="Training")
        for (xs, ys) in train_tqdm:
            loss = model(xs.cuda(), ys.cuda())
            train_loss.append(loss.item())
            train_tqdm.set_postfix(loss="%.3f" % np.mean(train_loss))

            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(),
                                           5.)  # Gradient clipping
            optimizer.step()

        # Validation loop
        model.eval()
        dev_loss, dev_bleu = utils.eval_dataset(dev_loader, model, bpe_model)
        print("Dev. loss: %.3f," % dev_loss, end=' ')
        print("dev. BLEU: %.4f" % dev_bleu)
        if dev_bleu > best_bleu:
            best_bleu = dev_bleu
            best_epoch = epoch
            # Save best model
            save_checkpoint("best.pth", save_path, best_epoch, best_bleu, cfg,
                            model, optimizer, scheduler)
        print("Best dev. BLEU: %.4f @epoch: %d" % (best_bleu, best_epoch))

        # Update learning rate scheduler
        if cfg['train']['metric'] == 'loss':
            scheduler.step(dev_loss)
        else:
            scheduler.step(1 - dev_bleu)

        # Save checkpoint
        save_checkpoint("last.pth", save_path, epoch, dev_bleu, cfg, model,
                        optimizer, scheduler)

        # Logging
        datetime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        msg = "%s,%d,%f,%f,%f,%f" % (datetime, epoch, lr, np.mean(train_loss),
                                     dev_loss, dev_bleu)
        log_history(save_path, msg)
Ejemplo n.º 5
0
def main():
    parser = argparse.ArgumentParser(
        description=
        "Train the model on DEVELOPMENT set to make sure it can overfit.")
    parser.add_argument('cfg',
                        type=str,
                        help="Specify which experiment config file to use.")
    parser.add_argument('--dir',
                        type=str,
                        default="./wmt14",
                        help="Directory of dataset.")
    parser.add_argument(
        '--gpu_id',
        default=0,
        type=int,
        help="CUDA visible GPU ID. Currently only support single GPU.")
    args = parser.parse_args()

    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_id)
    assert torch.cuda.is_available()
    import data
    import build_model

    with open(args.cfg) as f:
        cfg = yaml.load(f, Loader=yaml.FullLoader)

    # Build model
    bpe_model = yttm.BPE(model=cfg['bpe'])
    model = build_model.Seq2Seq(bpe_model.vocab_size(),
                                bpe_model.vocab_size(),
                                hidden_size=cfg['model']['hidden_size'],
                                encoder_layers=cfg['model']['encoder_layers'],
                                decoder_layers=cfg['model']['decoder_layers'],
                                drop_p=cfg['model']['drop_p'],
                                use_bn=cfg['model']['use_bn'])
    model = model.cuda()

    # Create dataset
    dev_loader = data.load(args.dir,
                           split='dev',
                           batch_size=32,
                           bpe_model=bpe_model)

    # Training criteria
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=cfg['train']['init_lr'])

    epoch = 0
    best_epoch = 0
    best_bleu = 0
    while (1):
        print("---")
        epoch += 1
        print("Epoch: %d" % (epoch))

        # Training loop
        model.train()
        train_loss = []
        train_tqdm = tqdm(dev_loader, desc="Training")
        for (xs, ys) in train_tqdm:
            loss = model(xs.cuda(), ys.cuda())
            train_loss.append(loss.item())
            train_tqdm.set_postfix(loss="%.3f" % np.mean(train_loss))

            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(),
                                           5.)  # Gradient clipping
            optimizer.step()

        # Validation loop
        model.eval()
        dev_loss, dev_bleu = utils.eval_dataset(dev_loader, model, bpe_model)
        print("Dev. loss: %.3f," % dev_loss, end=' ')
        print("dev. BLEU: %.4f" % dev_bleu)
        if dev_bleu > best_bleu:
            best_bleu = dev_bleu
            best_epoch = epoch
        print("Best dev. BLEU: %.4f @epoch: %d" % (best_bleu, best_epoch))
def main():
    parser = argparse.ArgumentParser(description="Train the model.")
    parser.add_argument('cfg',
                        type=str,
                        help="Specify which experiment config file to use.")
    parser.add_argument(
        '--gpu_id',
        default=0,
        type=int,
        help="CUDA visible GPU ID. Currently only support single GPU.")
    parser.add_argument('--workers',
                        default=0,
                        type=int,
                        help="How many subprocesses to use for data loading.")
    parser.add_argument(
        '--ckpt_freq',
        default=10,
        type=int,
        help="Frequency (number of epochs) to save checkpoints.")
    args = parser.parse_args()

    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_id)
    assert torch.cuda.is_available()
    import data
    import build_model

    with open(args.cfg) as f:
        cfg = yaml.load(f, Loader=yaml.FullLoader)

    if not cfg['logdir']:
        save_path = os.path.splitext(args.cfg)[0]
    if not os.path.exists(save_path):
        os.mkdir(save_path)

    # Create dataset
    train_loader = data.load(split='train',
                             batch_size=cfg['train']['batch_size'],
                             workers=args.workers)
    dev_loader = data.load(split='dev', batch_size=cfg['train']['batch_size'])

    # Build model
    tokenizer = torch.load('tokenizer.pth')
    model = build_model.Seq2Seq(len(tokenizer.vocab),
                                hidden_size=cfg['model']['hidden_size'],
                                encoder_layers=cfg['model']['encoder_layers'],
                                decoder_layers=cfg['model']['decoder_layers'],
                                drop_p=cfg['model']['drop_p'])
    model = model.cuda()

    # Training criteria
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=cfg['train']['init_lr'])
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        mode='min',
        factor=cfg['train']['decay_factor'],
        patience=cfg['train']['patience'],
        min_lr=1e-6)

    best_epoch = 0
    best_error = float('inf')
    for epoch in range(cfg['train']['epochs'] + 1):
        print("---")
        # Show learning rate
        lr = get_lr(optimizer)
        print("Learning rate: %f" % lr)

        # Training loop
        model.train()
        train_loss = 0
        n_tokens = 0
        for step, (xs, xlens, ys) in enumerate(train_loader):
            loss = model(xs.cuda(), xlens, ys.cuda())
            train_loss += loss.item() * (ys[:, 1:] > 0).long().sum()
            n_tokens += (ys[:, 1:] > 0).long().sum()

            optimizer.zero_grad()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(),
                                           5.)  # Gradient clipping
            optimizer.step()

            if not step % 10:
                print(time.strftime("%H:%M:%S", time.localtime()), end=' ')
                print("epoch: %d, step: %d, loss: %.3f" %
                      (epoch, step, loss.item()))
        train_loss = train_loss / n_tokens

        # Validation loop
        model.eval()
        # Compute dev loss
        dev_loss = 0
        n_tokens = 0
        with torch.no_grad():
            for (xs, xlens, ys) in dev_loader:
                dev_loss += model(xs.cuda(), xlens, ys.cuda()).item() * (
                    ys[:, 1:] > 0).long().sum()
                n_tokens += (ys[:, 1:] > 0).long().sum()
        dev_loss = dev_loss / n_tokens
        # Compute dev error rate
        error = eval_utils.get_error(dev_loader, model)
        print("Dev. loss: %.3f," % dev_loss, end=' ')
        print("dev. error rate: %.4f" % error)
        if error < best_error:
            best_error = error
            best_epoch = epoch
            # Save best model
            save_checkpoint("best.pth", save_path, best_epoch, best_error, cfg,
                            model.state_dict())
        print("Best dev. error rate: %.4f @epoch: %d" %
              (best_error, best_epoch))

        scheduler.step(error)

        # Save checkpoint
        if not epoch % args.ckpt_freq or epoch == cfg['train']['epochs']:
            save_checkpoint("checkpoint_%05d.pth" % epoch, save_path, epoch,
                            error, cfg, model.state_dict())

        # Logging
        datetime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        msg = "%s,%d,%f,%f,%f,%f" % (datetime, epoch, lr, train_loss, dev_loss,
                                     error)
        log_history(save_path, msg)
Ejemplo n.º 7
0
def main():
    parser = argparse.ArgumentParser(
        description=
        "Test on random audio from dataset and visualize the attention matrix."
    )
    parser.add_argument('ckpt', type=str, help="Checkpoint to restore.")
    parser.add_argument('--split',
                        default='test',
                        type=str,
                        help="Specify which split of data to evaluate.")
    parser.add_argument(
        '--gpu_id',
        default=0,
        type=int,
        help="CUDA visible GPU ID. Currently only support single GPU.")
    parser.add_argument('--beams',
                        default=1,
                        type=int,
                        help="Beam Search width.")
    args = parser.parse_args()

    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_id)
    assert torch.cuda.is_available()
    import data
    import build_model

    # Restore checkpoint
    info = torch.load(args.ckpt)
    cfg = info['cfg']

    # Create dataset
    loader = data.load(split=args.split, batch_size=1)

    # Build model
    tokenizer = torch.load('tokenizer.pth')
    model = build_model.Seq2Seq(len(tokenizer.vocab),
                                hidden_size=cfg['model']['hidden_size'],
                                encoder_layers=cfg['model']['encoder_layers'],
                                decoder_layers=cfg['model']['decoder_layers'],
                                use_bn=cfg['model']['use_bn'])
    model.load_state_dict(info['weights'])
    model.eval()
    model = model.cuda()

    # Inference
    with torch.no_grad():
        for (x, xlens, y) in loader:
            predictions, attentions = model(x.cuda(),
                                            xlens,
                                            beam_width=args.beams)
            predictions, attentions = predictions[0], attentions[0]
            predictions = tokenizer.decode(predictions)
            attentions = attentions[:len(predictions.split())].cpu().numpy(
            )  # (target_length, source_length)
            ground_truth = tokenizer.decode(y[0])
            print("Predict:")
            print(predictions)
            print("Ground-truth:")
            print(ground_truth)
            print()
            showAttention(predictions, attentions)