Esempio n. 1
0
def train(args):
    batch_size = 16

    output_path = base_output_path
    dataset = args.dataset
    data_path = args.data_path + dataset + '/' + dataset

    tokenizer, vocab2id, id2vocab = bert_tokenizer()
    detokenizer = bert_detokenizer()

    train_samples = torch.load(data_path + '.pkl')
    marco_train_size = len(train_samples)
    train_dataset = GLKSDataset(train_samples,
                                None,
                                None,
                                None,
                                None,
                                None,
                                None,
                                None,
                                None,
                                sample_tensor=torch.load(data_path +
                                                         '.GLKS.dataset.pkl'))

    model = GLKS(min_window_size,
                 num_windows,
                 embedding_size,
                 hidden_size,
                 vocab2id,
                 id2vocab,
                 max_dec_len=max_target_length,
                 beam_width=1,
                 emb_matrix=None)
    init_params(model)

    model_bp_count = (epoch * marco_train_size) / (4 * batch_size *
                                                   accumulation_steps)
    model_optimizer = optim.Adam(model.parameters(), lr=2.5e-4)
    model_scheduler = get_cosine_with_hard_restarts_schedule_with_warmup(
        model_optimizer, 2000,
        int(model_bp_count) + 100)
    model_trainer = CumulativeTrainer(model,
                                      tokenizer,
                                      detokenizer,
                                      args.local_rank,
                                      4,
                                      accumulation_steps=accumulation_steps)

    for i in range(epoch):
        model_trainer.train_epoch('ds_mle_mce_train', train_dataset,
                                  collate_fn, batch_size, i, model_optimizer,
                                  model_scheduler)
        model_trainer.serialize(i, output_path=output_path)
Esempio n. 2
0
def train(args):
    tokenizer, vocab2id, id2vocab = bert_tokenizer()
    detokenizer = bert_detokenizer()

    data_path = os.path.join(args.data_path, args.dataset + '/')

    train_samples = torch.load(
        os.path.join(data_path, args.dataset + '.train.pkl'))
    train_size = len(train_samples)
    train_dataset = CaSEDataset(
        train_samples,
        None,
        None,
        None,
        None,
        None,
        None,
        None,
        None,
        None,
        None,
        sample_tensor=torch.load(
            os.path.join(data_path, args.dataset + '.train.CaSE.dataset.pkl')))

    model = CaSE(args.max_span_size, args.max_target_length, id2vocab,
                 vocab2id, args.hidden_size)
    init_params(model)

    model_bp_count = (args.epoch * train_size) / (
        args.num_gpu * args.batch_size * args.accumulation_steps)
    model_optimizer = optim.Adam(model.parameters(), lr=2.5e-4)
    model_scheduler = get_cosine_with_hard_restarts_schedule_with_warmup(
        model_optimizer, 2000,
        int(model_bp_count) + 100)
    model_trainer = CumulativeTrainer(
        model,
        tokenizer,
        detokenizer,
        args.local_rank,
        args.num_gpu,
        accumulation_steps=args.accumulation_steps)

    for i in range(args.epoch):
        model_trainer.train_epoch('train', train_dataset, collate_fn,
                                  args.batch_size, i, model_optimizer,
                                  model_scheduler)
        model_trainer.serialize(i, output_path=args.output_path)
Esempio n. 3
0
def train(args):
    batch_size = args.train_batch_size
    ratio = args.profile_dropout_ratio
    policy = args.neighbor_policy
    task_dir = '%s/%s-%s' % (src, task, policy)
    drop_attr = ''
    if args.keep_attributes is not None:
        for k in _keys:
            if k not in args.keep_attributes:
                drop_attr += '_%s' % k

    _, _, _, kb_vocab = torch.load('%s/kbs.pkl' % task_dir)
    candidates = torch.load('%s/candidates.pkl' % task_dir)
    candidate_tensor = torch.load('%s/candidate.ctds.pkl' % task_dir)
    # candidate_tensor = candidate_tensor.cuda() if torch.cuda.is_available() else candidate_tensor
    train_samples = torch.load('%s/train.pkl' % task_dir)
    train_sample_tensor = torch.load('%s/train.ctds-%s%s.pkl' %
                                     (task_dir, ratio, drop_attr))
    meta_data = torch.load('%s/meta.pkl' % task_dir)
    vocab2id, id2vocab = torch.load('%s/vocab.pkl' % task_dir)
    tokenizer = babi_tokenizer

    print('Item size', len(vocab2id))

    train_dataset = CTDSDataset(
        train_samples[:cut_data_index],
        candidates,
        meta_data,
        tokenizer,
        vocab2id,
        id2vocab,
        sample_tensor=train_sample_tensor[:cut_data_index],
        train_sample_tensor=train_sample_tensor)

    if args.train_epoch_start > 0:  # load a model and continue to train
        file = os.path.join(output_model_path,
                            str(args.train_epoch_start) + '.pkl')

        if os.path.exists(file):
            model = CTDS(hidden_size, vocab2id, id2vocab, candidate_tensor,
                         meta_data)
            model.load_state_dict(torch.load(file, map_location='cpu'))
        else:
            print('ERR: do not have %s' % args.train_epoch_start)

    else:
        model = CTDS(hidden_size, vocab2id, id2vocab, candidate_tensor,
                     meta_data)
        init_params(model)

    train_size = len(train_dataset)
    model_bp_count = (epoches * train_size) / (
        4 * batch_size * accumulation_steps)  # global_batch_step
    model_optimizer = optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)
    # model_optimizer = optim.Adam(model.parameters(), lr=args.lr)
    # model_optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, nesterov=True)
    if args.warmup > 0:
        model_scheduler = get_cosine_with_hard_restarts_schedule_with_warmup(
            model_optimizer, round(args.warmup * model_bp_count),
            int(model_bp_count) + 100)
    else:
        model_scheduler = None
    model_trainer = CumulativeTrainer(
        model,
        tokenizer,
        None,
        args.local_rank,
        4,
        accumulation_steps=accumulation_steps,
        max_grad_norm=args.max_grad_norm,
        save_data_attributes=save_data_attributes)

    for i in range(args.train_epoch_start, epoches):
        model_trainer.train_epoch('train', train_dataset, collate_fn,
                                  batch_size, i, model_optimizer,
                                  model_scheduler)
        model_trainer.serialize(i, output_path=output_model_path)