Ejemplo n.º 1
0
Archivo: train.py Proyecto: ZNLP/ATSum
def main():
    logging.info("Build dataset...")
    prob_and_idx = get_prob_idx(opt.strategy)
    train_dataset = build_dataset(opt,
                                  opt.train,
                                  opt.vocab,
                                  device,
                                  prob_and_idx,
                                  train=True)
    valid_dataset = build_dataset(opt,
                                  opt.valid,
                                  opt.vocab,
                                  device,
                                  prob_and_idx,
                                  train=False)
    fields = valid_dataset.fields = train_dataset.fields
    logging.info("Build model...")

    pad_ids = {"src": fields["src"].pad_id, "tgt": fields["tgt"].pad_id}
    vocab_sizes = {
        "src": len(fields["src"].vocab),
        "tgt": len(fields["tgt"].vocab)
    }

    model = NMTModel.load_model(opt, pad_ids, vocab_sizes).to(device)
    criterion = LabelSmoothingLoss(opt.label_smoothing, vocab_sizes["tgt"],
                                   pad_ids["tgt"]).to(device)

    n_step = int(opt.train_from.split("-")[-1]) if opt.train_from else 1
    optimizer = WarmAdam(model.parameters(), opt.lr, opt.hidden_size,
                         opt.warm_up, n_step)

    logging.info("start training...")
    train(model, criterion, optimizer, train_dataset, valid_dataset)
Ejemplo n.º 2
0
def main():
    logging.info("Build dataset...")
    valid_dataset = build_dataset(opt, opt.valid, opt.vocab, device, train=False)
    logging.info("Built dataset valid ...")
    train_dataset = build_dataset(opt, opt.train, opt.vocab, device, train=True)
    logging.info("Built dataset train ...")
    
    fields = valid_dataset.fields = train_dataset.fields
    logging.info("Build model...")

    pad_ids = {"source": fields["source"].pad_id,
               "summary_cn": fields["summary_cn"].pad_id,
               "summary_en": fields["summary_en"].pad_id}
    vocab_sizes = {"source": len(fields["source"].vocab),
                   "summary_cn": len(fields["summary_cn"].vocab),
                   "summary_en": len(fields["summary_en"].vocab)}
    print(vocab_sizes)

    model = NMTModel.load_model(opt, pad_ids, vocab_sizes).to(device)
    criterion_cn = LabelSmoothingLoss(opt.label_smoothing, vocab_sizes["summary_cn"], pad_ids["summary_cn"]).to(device)
    criterion_en = LabelSmoothingLoss(opt.label_smoothing, vocab_sizes["summary_en"], pad_ids["summary_en"]).to(device)

    n_step = int(opt.train_from.split("-")[-1]) if opt.train_from else 1
    optimizer = WarmAdam(model.parameters(), opt.lr, opt.hidden_size, opt.warm_up, n_step)

    logging.info("start training...")
    train(model, pad_ids, vocab_sizes, criterion_cn, criterion_en, optimizer, train_dataset, valid_dataset)
def main():
    sim_model = load_sim_model(opt.sim_model_file)

    logging.info("Build dataset...")
    train_dataset = build_dataset(opt, opt.train, opt.vocab, device, train=True)
    valid_dataset = build_dataset(opt, opt.valid, opt.vocab, device, train=False)
    fields = valid_dataset.fields = train_dataset.fields
    logging.info("Build model...")

    pad_ids = {"src": fields["src"].pad_id,
               "task1_tgt": fields["task1_tgt"].pad_id,
               "task2_tgt": fields["task2_tgt"].pad_id}
    vocab_sizes = {"src": len(fields["src"].vocab),
                   "task1_tgt": len(fields["task1_tgt"].vocab),
                   "task2_tgt": len(fields["task2_tgt"].vocab)}

    model = NMTModel.load_model(opt, pad_ids, vocab_sizes).to(device)
    criterion_task1 = MyLabelSmoothingLoss(opt.label_smoothing, vocab_sizes["task1_tgt"], pad_ids["task1_tgt"]).to(device)
    criterion_task2 = MyLabelSmoothingLoss(opt.label_smoothing, vocab_sizes["task2_tgt"], pad_ids["task2_tgt"]).to(device)

    n_step = 1
    optimizer = WarmAdam(model.parameters(), opt.lr, opt.hidden_size, opt.warm_up, n_step)

    logging.info("start training...")
    train(sim_model, model, criterion_task1, criterion_task2, optimizer, train_dataset, valid_dataset, fields, opt.alpha, opt.beta)
Ejemplo n.º 4
0
def main():
    logging.info("Build dataset...")
    train_dataset = build_dataset(opt,
                                  opt.train,
                                  opt.vocab,
                                  device,
                                  train=True)
    valid_dataset = build_dataset(opt,
                                  opt.valid,
                                  opt.vocab,
                                  device,
                                  train=False)
    fields = valid_dataset.fields = train_dataset.fields
    logging.info("Build model...")

    pad_ids = {
        "src": fields["src"].pad_id,
        "task1_tgt": fields["task1_tgt"].pad_id,
        "task2_tgt": fields["task2_tgt"].pad_id,
        "task3_tgt": fields["task3_tgt"].pad_id
    }
    vocab_sizes = {
        "src": len(fields["src"].vocab),
        "task1_tgt": len(fields["task1_tgt"].vocab),
        "task2_tgt": len(fields["task2_tgt"].vocab),
        "task3_tgt": len(fields["task3_tgt"].vocab)
    }

    print(vocab_sizes)

    model = NMTModel.load_model(opt, pad_ids, vocab_sizes).to(device)
    #     if torch.cuda.device_count() > 1:
    #         print("Let's use", torch.cuda.device_count(), "GPUs!")
    #         model = nn.DataParallel(model, device_ids=[0,1])

    # for MT
    criterion_task1 = LabelSmoothingLoss(opt.label_smoothing,
                                         vocab_sizes["task1_tgt"],
                                         pad_ids["task1_tgt"]).to(device)
    # for MS
    criterion_task2 = LabelSmoothingLoss(opt.label_smoothing,
                                         vocab_sizes["task2_tgt"],
                                         pad_ids["task2_tgt"]).to(device)
    # for CLS
    criterion_task3 = LabelSmoothingLoss(opt.label_smoothing,
                                         vocab_sizes["task3_tgt"],
                                         pad_ids["task3_tgt"]).to(device)

    n_step = int(opt.train_from.split("-")[-1]) if opt.train_from else 1
    optimizer = WarmAdam(model.parameters(), opt.lr, opt.hidden_size,
                         opt.warm_up, n_step)

    logging.info("start training...")
    train(model, pad_ids, vocab_sizes, criterion_task1, criterion_task2,
          criterion_task3, optimizer, train_dataset, valid_dataset)
Ejemplo n.º 5
0
def main():
    logger.info("Build dataset...")
    train_dataset = build_dataset(opt, opt.train, opt.vocab, device, train=True)
    valid_dataset = build_dataset(opt, opt.valid, opt.vocab, device, train=False)
    fields = valid_dataset.fields = train_dataset.fields
    logger.info("Build model...")

    model = NMTModel.load_model(loader, fields)
    criterion = LabelSmoothingLoss(opt.label_smoothing, len(fields["tgt"].vocab), fields["tgt"].pad_id)
    model = nn.DataParallel(FullModel(model, criterion)).to(device)

    optimizer = WarmAdam(model.module.model.parameters(), opt.lr, opt.hidden_size, opt.warm_up, loader.step)

    logger.info("start training...")
    train(model, optimizer, train_dataset, valid_dataset)
Ejemplo n.º 6
0
def main():
    logging.info("Build dataset...")
    prob_and_idx = get_prob_idx()
    dataset = build_dataset(opt, [opt.input, opt.truth or opt.input],
                            opt.vocab,
                            device,
                            prob_and_idx,
                            train=False)

    fields = dataset.fields
    pad_ids = {"src": fields["src"].pad_id, "tgt": fields["tgt"].pad_id}
    vocab_sizes = {
        "src": len(fields["src"].vocab),
        "tgt": len(fields["tgt"].vocab)
    }

    # load checkpoint from model_path
    logging.info("Load checkpoint from %s." % opt.model_path)
    checkpoint = torch.load(opt.model_path,
                            map_location=lambda storage, loc: storage)

    logging.info("Build model...")
    model = NMTModel.load_model(checkpoint["opt"], pad_ids, vocab_sizes,
                                checkpoint["model"]).to(device).eval()

    logging.info("Start translation...")
    with torch.set_grad_enabled(False):
        translate(dataset, fields, model)
Ejemplo n.º 7
0
def main():
    logger.info("Build dataset...")
    dataset = build_dataset(opt, [opt.input, opt.input], opt.vocab, device, train=False)

    logger.info("Build model...")
    model = NMTModel.load_model(loader, dataset.fields).to(device).eval()

    logger.info("Start translation...")
    with torch.set_grad_enabled(False):
        translate(dataset, dataset.fields, model)
def main():
    logging.info("Build dataset...")
    valid_dataset = build_dataset(opt,
                                  opt.valid,
                                  opt.vocab,
                                  device,
                                  train=False)
    fields = valid_dataset.fields

    pad_ids = {
        "source": fields["source"].pad_id,
        "summary_cn": fields["summary_cn"].pad_id,
        "summary_en": fields["summary_en"].pad_id
    }
    vocab_sizes = {
        "source": len(fields["source"].vocab),
        "summary_cn": len(fields["summary_cn"].vocab),
        "summary_en": len(fields["summary_en"].vocab)
    }

    # load checkpoint from model_path
    for ckpt in range(1100000, 1345001, 5000):
        model_path = opt.model_path + '/checkpoint-step-' + str(ckpt)
        logging.info("Load checkpoint from %s." % model_path)
        checkpoint = torch.load(model_path,
                                map_location=lambda storage, loc: storage)

        logging.info("Build model...")
        model = NMTModel.load_model(checkpoint["opt"], pad_ids, vocab_sizes,
                                    checkpoint["model"]).to(device).eval()
        criterion_cn = LabelSmoothingLoss(opt.label_smoothing,
                                          vocab_sizes["summary_cn"],
                                          pad_ids["summary_cn"]).to(device)
        criterion_en = LabelSmoothingLoss(opt.label_smoothing,
                                          vocab_sizes["summary_en"],
                                          pad_ids["summary_en"]).to(device)

        n_step = ckpt

        logging.info("Start translation...")
        with torch.set_grad_enabled(False):
            valid(model, criterion_cn, criterion_en, valid_dataset, n_step)
Ejemplo n.º 9
0
            encoder += torch.numel(tensor)
        if 'cn_decoder' in layer_tensor_name:
            task1 += torch.numel(tensor)
        if 'en_decoder' in layer_tensor_name:
            task2 += torch.numel(tensor)
    print('encoder: {}, task1: {}, task2: {}, Total parameters: {}'.format(
        encoder, task1, task2, total))
    pytorch_total_params = sum(p.numel() for p in model.parameters()
                               if p.requires_grad)
    print(pytorch_total_params)


if __name__ == '__main__':
    train_dataset = build_dataset(opt,
                                  opt.train,
                                  opt.vocab,
                                  device,
                                  train=True)
    fields = train_dataset.fields
    logging.info("Build model...")
    pad_ids = {
        "source": fields["source"].pad_id,
        "summary_cn": fields["summary_cn"].pad_id,
        "summary_en": fields["summary_en"].pad_id
    }
    vocab_sizes = {
        "source": len(fields["source"].vocab),
        "summary_cn": len(fields["summary_cn"].vocab),
        "summary_en": len(fields["summary_en"].vocab)
    }