Ejemplo n.º 1
0
def train(args):
    if not exists(args.path):
        os.makedirs(args.path)

    # make net
    agent, agent_vocab, abstractor, net_args = configure_net(
        args.abs_dir, args.ext_dir, args.cuda)

    # configure training setting
    assert args.stop > 0
    train_params = configure_training(
        'adam', args.lr, args.clip, args.decay, args.batch,
        args.gamma, args.reward, args.stop, 'rouge-1'
    )
    train_batcher, val_batcher = build_batchers(args.batch)
    # TODO different reward
    reward_fn = compute_rouge_l
    stop_reward_fn = compute_rouge_n(n=1)

    # save abstractor binary
    if args.abs_dir is not None:
        abs_ckpt = {}
        abs_ckpt['state_dict'] = load_best_ckpt(args.abs_dir)
        abs_vocab = pkl.load(open(join(args.abs_dir, 'vocab.pkl'), 'rb'))
        abs_dir = join(args.path, 'abstractor')
        os.makedirs(join(abs_dir, 'ckpt'))
        with open(join(abs_dir, 'meta.json'), 'w') as f:
            json.dump(net_args['abstractor'], f, indent=4)
        torch.save(abs_ckpt, join(abs_dir, 'ckpt/ckpt-0-0'))
        with open(join(abs_dir, 'vocab.pkl'), 'wb') as f:
            pkl.dump(abs_vocab, f)
    # save configuration
    meta = {}
    meta['net']           = 'rnn-ext_abs_rl'
    meta['net_args']      = net_args
    meta['train_params']  = train_params
    with open(join(args.path, 'meta.json'), 'w') as f:
        json.dump(meta, f, indent=4)
    with open(join(args.path, 'agent_vocab.pkl'), 'wb') as f:
        pkl.dump(agent_vocab, f)

    # prepare trainer
    grad_fn = get_grad_fn(agent, args.clip)
    optimizer = optim.Adam(agent.parameters(), **train_params['optimizer'][1])
    scheduler = ReduceLROnPlateau(optimizer, 'max', verbose=True,
                                  factor=args.decay, min_lr=0,
                                  patience=args.lr_p)

    pipeline = A2CPipeline(meta['net'], agent, abstractor,
                           train_batcher, val_batcher,
                           optimizer, grad_fn,
                           reward_fn, args.gamma,
                           stop_reward_fn, args.stop)
    trainer = BasicTrainer(pipeline, args.path,
                           args.ckpt_freq, args.patience, scheduler,
                           val_mode='score')

    print('start training with the following hyper-parameters:')
    print(meta)
    trainer.train()
Ejemplo n.º 2
0
def main(args):
    assert args.net_type in ['ff', 'rnn']
    # create data batcher, vocabulary
    # batcher
    with open(join(DATA_DIR, 'vocab_cnt.pkl'), 'rb') as f:
        wc = pkl.load(f)
    word2id = make_vocab(wc, args.vsize)
    train_batcher, val_batcher = build_batchers(args.net_type, word2id,
                                                args.cuda, args.debug)

    # make net
    net, net_args = configure_net(args.net_type, len(word2id), args.emb_dim,
                                  args.conv_hidden, args.lstm_hidden,
                                  args.lstm_layer, args.bi)
    if args.w2v:
        # NOTE: the pretrained embedding having the same dimension
        #       as args.emb_dim should already be trained
        embedding, _ = make_embedding({i: w
                                       for w, i in word2id.items()}, args.w2v)
        net.set_embedding(embedding)

    # configure training setting
    criterion, train_params = configure_training(args.net_type, 'adam',
                                                 args.lr, args.clip,
                                                 args.decay, args.batch)

    # save experiment setting
    if not exists(args.path):
        os.makedirs(args.path)
    with open(join(args.path, 'vocab.pkl'), 'wb') as f:
        pkl.dump(word2id, f, pkl.HIGHEST_PROTOCOL)
    meta = {}
    meta['net'] = 'ml_{}_extractor'.format(args.net_type)
    meta['net_args'] = net_args
    meta['traing_params'] = train_params
    with open(join(args.path, 'meta.json'), 'w') as f:
        json.dump(meta, f, indent=4)

    # prepare trainer
    val_fn = basic_validate(net, criterion)
    grad_fn = get_basic_grad_fn(net, args.clip)
    optimizer = optim.Adam(net.parameters(), **train_params['optimizer'][1])
    scheduler = ReduceLROnPlateau(optimizer,
                                  'min',
                                  verbose=True,
                                  factor=args.decay,
                                  min_lr=0,
                                  patience=args.lr_p)

    if args.cuda:
        net = net.cuda()
    pipeline = BasicPipeline(meta['net'], net, train_batcher, val_batcher,
                             args.batch, val_fn, criterion, optimizer, grad_fn)
    trainer = BasicTrainer(pipeline, args.path, args.ckpt_freq, args.patience,
                           scheduler)

    print('start training with the following hyper-parameters:')
    print(meta)
    trainer.train()
Ejemplo n.º 3
0
def main(args):
    # create data batcher, vocabulary
    # batcher

    with open(join(DATA_DIR, 'vocab_cnt.pkl'), 'rb') as f:
        wc = pkl.load(f)
    word2id = make_vocab(wc, args.vsize)

    train_batcher, val_batcher = build_batchers(word2id, args.cuda, args.debug)

    # make net
    print('vocab size:', len(word2id))
    ids = [id for word, id in word2id.items()]
    print(max(ids))
    print(list(sorted(ids))[0])
    net, net_args = configure_net(len(word2id), args.emb_dim, args.n_hidden,
                                  args.bi, args.n_layer, args.load_from)

    # configure training setting
    criterion, train_params = configure_training('adam', args.lr, args.clip,
                                                 args.decay, args.batch)

    # save experiment setting
    if not exists(args.path):
        os.makedirs(args.path)
    with open(join(args.path, 'vocab.pkl'), 'wb') as f:
        pkl.dump(word2id, f, pkl.HIGHEST_PROTOCOL)
    meta = {}
    meta['net'] = 'base_abstractor'
    meta['net_args'] = net_args
    meta['traing_params'] = train_params
    with open(join(args.path, 'meta.json'), 'w') as f:
        json.dump(meta, f, indent=4)

    # prepare trainer
    if args.cuda:
        net = net.cuda()

    val_fn = basic_validate(net, criterion)

    grad_fn = get_basic_grad_fn(net, args.clip)

    optimizer = optim.AdamW(net.parameters(), **train_params['optimizer'][1])

    #optimizer = optim.Adagrad(net.parameters(), **train_params['optimizer'][1])
    scheduler = ReduceLROnPlateau(optimizer,
                                  'min',
                                  verbose=True,
                                  factor=args.decay,
                                  min_lr=0,
                                  patience=args.lr_p)

    pipeline = BasicPipeline(meta['net'], net, train_batcher, val_batcher,
                             args.batch, val_fn, criterion, optimizer, grad_fn)
    trainer = BasicTrainer(pipeline, args.path, args.ckpt_freq, args.patience,
                           scheduler)

    print('start training with the following hyper-parameters:')
    trainer.train()
Ejemplo n.º 4
0
def main(args):
    assert args.net_type in ['ff', 'rnn']
    # create data batcher, vocabulary
    # batcher
    with open(join(DATA_DIR, 'vocab_cnt.pkl'), 'rb') as f:
        wc = pkl.load(f)
    word2id = make_vocab(wc, args.vsize)
    train_batcher, val_batcher = build_batchers(args.net_type, word2id,
                                                args.cuda, args.debug)

    # make net
    net, net_args = configure_net(args.net_type,
                                  len(word2id), args.emb_dim, args.conv_hidden,
                                  args.lstm_hidden, args.lstm_layer, args.bi)
    if args.w2v:
        # NOTE: the pretrained embedding having the same dimension
        #       as args.emb_dim should already be trained
        embedding, _ = make_embedding(
            {i: w for w, i in word2id.items()}, args.w2v)
        net.set_embedding(embedding)

    # configure training setting
    criterion, train_params = configure_training(
        args.net_type, 'adam', args.lr, args.clip, args.decay, args.batch
    )

    # save experiment setting
    if not exists(args.path):
        os.makedirs(args.path)
    with open(join(args.path, 'vocab.pkl'), 'wb') as f:
        pkl.dump(word2id, f, pkl.HIGHEST_PROTOCOL)
    meta = {}
    meta['net']           = 'ml_{}_extractor'.format(args.net_type)
    meta['net_args']      = net_args
    meta['traing_params'] = train_params
    with open(join(args.path, 'meta.json'), 'w') as f:
        json.dump(meta, f, indent=4)

    # prepare trainer
    val_fn = basic_validate(net, criterion)
    grad_fn = get_basic_grad_fn(net, args.clip)
    optimizer = optim.Adam(net.parameters(), **train_params['optimizer'][1])
    scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=True,
                                  factor=args.decay, min_lr=0,
                                  patience=args.lr_p)

    if args.cuda:
        net = net.cuda()
    pipeline = BasicPipeline(meta['net'], net,
                             train_batcher, val_batcher, args.batch, val_fn,
                             criterion, optimizer, grad_fn)
    trainer = BasicTrainer(pipeline, args.path,
                           args.ckpt_freq, args.patience, scheduler)

    print('start training with the following hyper-parameters:')
    print(meta)
    trainer.train()
Ejemplo n.º 5
0
def train(args):
    if not exists(args.path):
        os.makedirs(args.path)

    # make net
    if args.docgraph or args.paragraph:
        agent, agent_vocab, abstractor, net_args = configure_net_graph(
            args.abs_dir, args.ext_dir, args.cuda, args.docgraph,
            args.paragraph)
    else:
        agent, agent_vocab, abstractor, net_args = configure_net(
            args.abs_dir, args.ext_dir, args.cuda, True, False, args.rl_dir)

    if args.bert_stride > 0:
        assert args.bert_stride == agent._bert_stride
    # configure training setting
    assert args.stop > 0
    train_params = configure_training('adam', args.lr, args.clip, args.decay,
                                      args.batch, args.gamma, args.reward,
                                      args.stop, 'rouge-1')

    if args.docgraph or args.paragraph:
        if args.bert:
            train_batcher, val_batcher = build_batchers_graph_bert(
                args.batch, args.key, args.adj_type, args.max_bert_word,
                args.docgraph, args.paragraph)
        else:
            train_batcher, val_batcher = build_batchers_graph(
                args.batch, args.key, args.adj_type, args.gold_key,
                args.docgraph, args.paragraph)
    elif args.bert:
        train_batcher, val_batcher = build_batchers_bert(
            args.batch, args.bert_sent, args.bert_stride, args.max_bert_word)
    else:
        train_batcher, val_batcher = build_batchers(args.batch)
    # TODO different reward
    if args.reward == 'rouge-l':
        reward_fn = compute_rouge_l
    elif args.reward == 'rouge-1':
        reward_fn = compute_rouge_n(n=1)
    elif args.reward == 'rouge-2':
        reward_fn = compute_rouge_n(n=2)
    elif args.reward == 'rouge-l-s':
        reward_fn = compute_rouge_l_summ
    else:
        raise Exception('Not prepared reward')
    stop_reward_fn = compute_rouge_n(n=1)

    # save abstractor binary
    if args.abs_dir is not None:
        abs_ckpt = {}
        abs_ckpt['state_dict'] = load_best_ckpt(args.abs_dir, reverse=True)
        abs_vocab = pkl.load(open(join(args.abs_dir, 'vocab.pkl'), 'rb'))
        abs_dir = join(args.path, 'abstractor')
        os.makedirs(join(abs_dir, 'ckpt'))
        with open(join(abs_dir, 'meta.json'), 'w') as f:
            json.dump(net_args['abstractor'], f, indent=4)
        torch.save(abs_ckpt, join(abs_dir, 'ckpt/ckpt-0-0'))
        with open(join(abs_dir, 'vocab.pkl'), 'wb') as f:
            pkl.dump(abs_vocab, f)
        # save configuration
    meta = {}
    meta['net'] = 'rnn-ext_abs_rl'
    meta['net_args'] = net_args
    meta['train_params'] = train_params
    with open(join(args.path, 'meta.json'), 'w') as f:
        json.dump(meta, f, indent=4)
    with open(join(args.path, 'agent_vocab.pkl'), 'wb') as f:
        pkl.dump(agent_vocab, f)

    # prepare trainer
    grad_fn = get_grad_fn(agent, args.clip)
    optimizer = optim.Adam(agent.parameters(), **train_params['optimizer'][1])
    scheduler = ReduceLROnPlateau(optimizer,
                                  'max',
                                  verbose=True,
                                  factor=args.decay,
                                  min_lr=1e-5,
                                  patience=args.lr_p)

    if args.docgraph or args.paragraph:
        entity = True
    else:
        entity = False
    pipeline = SCPipeline(meta['net'], agent, abstractor, train_batcher,
                          val_batcher, optimizer, grad_fn, reward_fn, entity,
                          args.bert)

    trainer = BasicTrainer(pipeline,
                           args.path,
                           args.ckpt_freq,
                           args.patience,
                           scheduler,
                           val_mode='score')

    print('start training with the following hyper-parameters:')
    print(meta)
    trainer.train()
Ejemplo n.º 6
0
def main(args):
    # create data batcher, vocabulary
    # batcher
    word2id = pkl.load(open(join(args.abs_dir, 'vocab.pkl'), 'rb'))

    # reward func

    reward_func = None
    reward_weight = 0.

    # make net

    net, net_args = configure_net(args.abs_dir)

    bert = net._bert

    train_batcher, val_batcher = build_batchers(word2id, args.cuda, args.debug)

    # configure training setting
    criterion, train_params = configure_training('adam', args.lr, args.clip,
                                                 args.decay, args.batch)

    # save experiment setting
    if not exists(args.path):
        os.makedirs(args.path)
    with open(join(args.path, 'vocab.pkl'), 'wb') as f:
        pkl.dump(word2id, f, pkl.HIGHEST_PROTOCOL)
    meta = {}
    meta['net'] = 'base_abstractor'
    meta['net_args'] = net_args
    meta['traing_params'] = train_params
    with open(join(args.path, 'meta.json'), 'w') as f:
        json.dump(meta, f, indent=4)

    # prepare trainer
    if args.cuda:
        net = net.cuda()

    val_fn = rl_validate(net,
                         reward_func=reward_func,
                         reward_coef=reward_weight,
                         _bleu=args.bleu,
                         f1=args.f1)
    grad_fn = get_basic_grad_fn(net, args.clip)

    optimizer = optim.AdamW(net.parameters(), **train_params['optimizer'][1])

    #optimizer = optim.Adagrad(net.parameters(), **train_params['optimizer'][1])

    scheduler = ReduceLROnPlateau(optimizer,
                                  'max',
                                  verbose=True,
                                  factor=args.decay,
                                  min_lr=0,
                                  patience=args.lr_p)
    pipeline = AbsSelfCriticalPipeline(meta['net'],
                                       net,
                                       train_batcher,
                                       val_batcher,
                                       args.batch,
                                       val_fn,
                                       optimizer,
                                       grad_fn,
                                       weights=[args.r1, args.r2, args.rl],
                                       _bleu=args.bleu,
                                       f1=args.f1)
    trainer = BasicTrainer(pipeline,
                           args.path,
                           args.ckpt_freq,
                           args.patience,
                           scheduler,
                           val_mode='score')

    print('start training with the following hyper-parameters:')
    print(meta)
    trainer.train()
Ejemplo n.º 7
0
def main(args):
    # create data batcher, vocabulary
    # batcher
    with open(join(args.data_path, 'vocab_cnt.pkl'), 'rb') as f:
        wc = pkl.load(f)
    word2id = make_vocab(wc, args.vsize, args.max_target_sent)  #一个word的词典
    train_batcher, val_batcher = build_batchers(word2id, args.cuda, args.debug)

    # make net

    if args.w2v:
        # NOTE: the pretrained embedding having the same dimension
        #       as args.emb_dim should already be trained
        embedding, _ = make_embedding({i: w
                                       for w, i in word2id.items()},
                                      args.w2v)  #提供一个embedding矩阵

        net, net_args = configure_net(len(word2id), args.emb_dim,
                                      args.n_hidden, args.bi, args.n_layer,
                                      args.sampling_teaching_force,
                                      args.self_attn, args.hi_encoder,
                                      embedding)
    else:
        print("please provide pretrain_w2v")
        return

    # configure training setting
    criterion, train_params = configure_training('adam', args.lr, args.clip,
                                                 args.decay, args.batch)

    # save experiment setting

    if not exists(args.path):
        os.makedirs(args.path)
    with open(join(args.path, 'vocab.pkl'), 'wb') as f:
        pkl.dump(word2id, f, pkl.HIGHEST_PROTOCOL)

    net_args_backup = net_args.copy()
    del net_args_backup["embedding"]

    meta = {}
    meta['net'] = 'base_abstractor'
    meta['net_args'] = net_args_backup
    meta['traing_params'] = train_params

    with open(join(args.path, 'meta.json'), 'w') as f:
        json.dump(meta, f, indent=4)

    # prepare trainer
    val_fn = basic_validate(net, criterion)
    grad_fn = get_basic_grad_fn(net, args.clip)
    optimizer = optim.Adam(net.parameters(), **train_params['optimizer'][1])
    scheduler = ReduceLROnPlateau(optimizer,
                                  'min',
                                  verbose=True,
                                  factor=args.decay,
                                  min_lr=0,
                                  patience=args.lr_p)

    if args.cuda:
        net = net.cuda()
    pipeline = BasicPipeline(meta['net'], net, train_batcher, val_batcher,
                             args.batch, val_fn, criterion, optimizer, grad_fn)
    trainer = BasicTrainer(pipeline, args.path, args.ckpt_freq, args.patience,
                           scheduler)

    print('start training with the following hyper-parameters:')
    print(meta)
    trainer.train()
Ejemplo n.º 8
0
def main(args):
    abstractor = get_abstractor(args.abs_dir, args.beam_search, args.cuda)
    for split in ('train', 'val'):
        decode(args, split)

    embedding = abstractor._net._decoder._embedding
    word2id = abstractor._word2id
    id2words = {i: w for w, i in word2id.items()}

    elmo = None
    if args.elmo:
        elmo = get_elmo(dropout=args.elmo_dropout,
                        vocab_to_cache=[id2words[i] for i in range(len(id2words))],
                        cuda=args.cuda,
                        projection_dim=args.elmo_projection)
        args.emb_dim = elmo.get_output_dim()

    meta = {
        'net': '{}_discriminator'.format('cnn'),
        'net_args': {
            'vocab_size': len(abstractor._word2id),
            'emb_dim': embedding.embedding_dim,
            'kernel_num': args.kernel_num,
            'kernel_sizes': args.kernel_sizes,
            'class_num': 2,
            'dropout': args.dropout,
            'max_norm': args.max_norm,
            'static': args.static,
        },
        'training_params': {
            'optimizer': ('adam', {'lr': args.lr}),
            'batch_size': args.batch,
            'clip_grad_norm': args.clip,
            'lr_decay': args.decay,
        }
    }

    net = ConvNet(**meta['net_args'])

    if elmo:
        meta['net_args']['elmo'] = {
            'dropout': args.elmo_dropout,
            'projection': args.elmo_projection,
        }
        net.set_elmo_embedding(elmo)
    else:
        net.set_embedding(embedding.weight)

    train_batcher, val_batcher = build_batchers(args, word2id)

    def criterion(logit, target):
        return F.cross_entropy(logit, target, reduce=False)

    val_fn = basic_validate(net, criterion)
    grad_fn = get_basic_grad_fn(net, args.clip)
    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), lr=args.lr)
    scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=True,
                                  factor=args.decay, min_lr=0,
                                  patience=args.lr_p)

    if args.cuda:
        net = net.cuda()
    pipeline = BasicPipeline('discriminator', net,
                             train_batcher, val_batcher, args.batch, val_fn,
                             criterion, optimizer, grad_fn)
    trainer = BasicTrainer(pipeline, args.path,
                           args.ckpt_freq, args.patience, scheduler)

    print('start training with the following hyper-parameters:')
    print(meta)
    trainer.train()
Ejemplo n.º 9
0
def train(args):

    assert args.encoder  in ['BiLSTM', 'DeepLSTM', 'Transformer']
    assert args.decoder  in ['SL', 'PN']
    assert args.emb_type in ['W2V', 'BERT']

    # create data batcher, vocabulary
    # batcher
    with open(join(DATA_DIR, 'vocab_cnt.pkl'), 'rb') as f:
        wc = pkl.load(f)
    word2id = make_vocab(wc, args.vsize)
    train_batcher, val_batcher = build_batchers(args.decoder, args.emb_type, 
                                                word2id, args.cuda, args.debug)

    # make model
    model, model_args = configure_net(args.encoder, args.decoder, args.emb_type, len(word2id), 
                                      args.emb_dim, args.conv_hidden, args.encoder_hidden, 
                                      args.encoder_layer)
    
    if args.emb_type == 'W2V':
        # NOTE: the pretrained embedding having the same dimension
        #       as args.emb_dim should already be trained
        w2v_path='./CNNDM/word2vec/word2vec.128d.226k.bin'
        embedding, _ = make_embedding(
            {i: w for w, i in word2id.items()}, w2v_path)
        model.set_embedding(embedding)

    # configure training setting
    criterion, train_params = configure_training(
        args.decoder, 'adam', args.lr, args.clip, args.decay, args.batch
    )

    # save experiment setting
    if not exists(args.path):
        os.makedirs(args.path)
    with open(join(args.path, 'vocab.pkl'), 'wb') as f:
        pkl.dump(word2id, f, pkl.HIGHEST_PROTOCOL)
    meta = {}
    meta['model_args']    = model_args
    meta['traing_params'] = train_params
    with open(join(args.path, 'meta.json'), 'w') as f:
        json.dump(meta, f, indent=4)

    # prepare trainer
    val_fn = basic_validate(model, criterion, args.decoder)
    grad_fn = get_basic_grad_fn(model, args.clip)
    optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), **train_params['optimizer'][1])
    scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=True,
                                  factor=args.decay, min_lr=2e-5,
                                  patience=args.lr_p)

    if args.cuda:
        model = model.cuda()
    pipeline = BasicPipeline(model, args.decoder, 
                             train_batcher, val_batcher, args.batch, val_fn,
                             criterion, optimizer, grad_fn)
    trainer = BasicTrainer(pipeline, args.path,
                           args.ckpt_freq, args.patience, scheduler)
    
    # for name, para in net.named_parameters():
    #     if para.requires_grad:
    #         print(name)

    print('Start training with the following hyper-parameters:')
    print(meta)
    trainer.train()
Ejemplo n.º 10
0
def main(args):
    # create data batcher, vocabulary
    # batcher
    word2id = pkl.load(open(join(args.abs_dir, 'vocab.pkl'), 'rb'))

    # reward func
    if args.reward_model_dir is not None:
        assert args.reward_data_dir is not None
        reward_func = cloze_reward(args.reward_model_dir, args.cloze_device)
        reward_weight = args.reward_weight
    else:
        reward_func = None
        reward_weight = 0.

    # make net
    if args.docgraph or args.paragraph:
        net, net_args = configure_net_graph(args.abs_dir, args.docgraph,
                                            args.paragraph)
    else:
        net, net_args = configure_net(args.abs_dir)

    bert = net._bert
    if bert:
        print('model use bert')
        import logging
        print('disable')
        logging.getLogger('transformers.tokenization_utils').setLevel(
            logging.ERROR)
        logging.getLogger('transformers.tokenization_utils').disabled = True

    if args.docgraph or args.paragraph:
        if bert:
            tokenizer = net._bert_model._tokenizer
            train_batcher, val_batcher = build_batchers_graph_bert(
                tokenizer, args.cuda, args.debug, args.key, net._adj_type,
                args.docgraph, args.reward_data_dir)
        else:
            train_batcher, val_batcher = build_batchers_graph(
                word2id, args.cuda, args.debug, args.key, net._adj_type,
                args.docgraph, args.reward_data_dir)
    else:
        if bert:
            tokenizer = net._bert_model._tokenizer
            train_batcher, val_batcher = build_batchers_bert(
                tokenizer, args.cuda, args.debug)
        else:
            train_batcher, val_batcher = build_batchers(
                word2id, args.cuda, args.debug)

    # configure training setting
    criterion, train_params = configure_training('adam', args.lr, args.clip,
                                                 args.decay, args.batch)

    # save experiment setting
    if not exists(args.path):
        os.makedirs(args.path)
    with open(join(args.path, 'vocab.pkl'), 'wb') as f:
        pkl.dump(word2id, f, pkl.HIGHEST_PROTOCOL)
    meta = {}
    meta['net'] = 'base_abstractor'
    meta['net_args'] = net_args
    meta['traing_params'] = train_params
    with open(join(args.path, 'meta.json'), 'w') as f:
        json.dump(meta, f, indent=4)

    local_coh_fun = None

    # prepare trainer
    if args.cuda:
        net = net.cuda()

    multigpu = False

    val_fn = rl_validate(net,
                         reward_func=reward_func,
                         reward_coef=reward_weight,
                         bert=bert)
    grad_fn = get_basic_grad_fn(net, args.clip)

    optimizer = optim.AdamW(net.parameters(), **train_params['optimizer'][1])

    #optimizer = optim.Adagrad(net.parameters(), **train_params['optimizer'][1])

    scheduler = ReduceLROnPlateau(optimizer,
                                  'max',
                                  verbose=True,
                                  factor=args.decay,
                                  min_lr=0,
                                  patience=args.lr_p)
    print('rouge weights:', [args.r1, args.r2, args.rl])
    pipeline = AbsSelfCriticalPipeline(
        meta['net'],
        net,
        train_batcher,
        val_batcher,
        args.batch,
        val_fn,
        optimizer,
        grad_fn,
        reward_func,
        reward_weight,
        local_coh_fun,
        0.,
        accumulate_g_step=args.accumulate_g_step,
        weights=[args.r1, args.r2, args.rl],
        bert=bert,
        multigpu=multigpu,
        ml_loss=args.ml_loss)
    trainer = BasicTrainer(pipeline,
                           args.path,
                           args.ckpt_freq,
                           args.patience,
                           scheduler,
                           val_mode='score')

    print('start training with the following hyper-parameters:')
    print(meta)
    trainer.train()
def main(args):
    # create data batcher, vocabulary
    # batcher
    with open(join(DATA_DIR, 'vocab_cnt.pkl'), 'rb') as f:
        wc = pkl.load(f)
    word2id = make_vocab(wc, args.vsize)
    train_batcher, val_batcher = build_batchers(word2id, args.cuda, args.debug)

    # make net
    net, net_args = configure_net(len(word2id), args.emb_dim, args.n_hidden,
                                  args.bi, args.n_layer)
    if args.w2v:
        # NOTE: the pretrained embedding having the same dimension
        #       as args.emb_dim should already be trained
        embedding, oov = make_embedding({i: w
                                         for w, i in word2id.items()},
                                        args.w2v)
        net.set_embedding(embedding)

    # configure training setting
    criterion, train_params = configure_training('adam', args.lr, args.clip,
                                                 args.decay, args.batch)

    # save experiment setting
    if not exists(args.path):
        os.makedirs(args.path)
    with open(join(args.path, 'vocab.pkl'), 'wb') as f:
        pkl.dump(word2id, f, pkl.HIGHEST_PROTOCOL)
    meta = {}
    meta['net'] = 'base_abstractor'
    meta['net_args'] = net_args
    meta['traing_params'] = train_params
    with open(join(args.path, 'meta.json'), 'w') as f:
        json.dump(meta, f, indent=4)

    # prepare trainer
    val_fn = basic_validate(net, criterion)
    grad_fn = get_basic_grad_fn(net, args.clip)
    optimizer = optim.Adam(net.parameters(), **train_params['optimizer'][1])
    scheduler = ReduceLROnPlateau(optimizer,
                                  'min',
                                  verbose=True,
                                  factor=args.decay,
                                  min_lr=0,
                                  patience=args.lr_p)

    if args.cuda:
        net = net.cuda()
    pipeline = BasicPipeline(meta['net'], net, train_batcher, val_batcher,
                             args.batch, val_fn, criterion, optimizer, grad_fn)
    trainer = BasicTrainer(pipeline, args.path, args.ckpt_freq, args.patience,
                           scheduler)

    print('start training with the following hyper-parameters:')
    print(meta)

    # # Print model's state_dict
    # print("Model's state_dict:")
    # for param_tensor in net.state_dict():
    #     print(param_tensor, "\t", net.state_dict()[param_tensor].size())
    #
    # # Print optimizer's state_dict
    # print("Optimizer's state_dict:")
    # for var_name in optimizer.state_dict():
    #     print(var_name, "\t", optimizer.state_dict()[var_name])

    # # IMPORT PRETRAINED MODEL PARAMETERS
    # net.load_state_dict(torch.load(
    #     'pretrained_eng_model/abstractor/ckpt/ckpt-0-0')['state_dict'])
    # net.eval()  # do I need that or not?

    # copy net
    # from copy import deepcopy
    # net_copy = deepcopy(net)
    # net_copy.load_state_dict(torch.load('pretrained_eng_model/abstractor/ckpt/ckpt-0-0', map_location='cpu')['state_dict'])

    # for key in net_copy.state_dict():
    #     print('key: ', key)
    #     param = net_copy.state_dict()[key]
    #     print('param.shape: ', param.shape)
    #     print('param.requires_grad: ', param.requires_grad)
    #     print('param.shape, param.requires_grad: ', param.shape, param.requires_grad)
    #     print('isinstance(param, nn.Module) ', isinstance(param, nn.Module))
    #     print('isinstance(param, nn.Parameter) ', isinstance(param, nn.Parameter))
    #     print('isinstance(param, torch.Tensor): ', isinstance(param, torch.Tensor))
    #     print('=====')

    # save current state dict
    model_dict = net.state_dict()

    # save some parameters for testing purposes if the dict was loaded successfully
    p1 = net._embedding.weight[0][0].detach().cpu().numpy()
    p2 = net._enc_lstm.weight_hh_l0[0][0].detach().cpu().numpy()
    p3 = net._attn_wm.data[0][0].detach().cpu().numpy()

    # print(p1)
    # print(p2)
    # print(p3)

    # load dict from pretrained net
    ABS_DIR = os.environ['ABS']
    print(ABS_DIR)

    # uncomment for gpu
    # pretrained_dict = torch.load(ABS_DIR)['state_dict']
    pretrained_dict = torch.load(ABS_DIR)['state_dict']

    # skip embedding weights
    pretrained_dict = {
        k: v
        for k, v in pretrained_dict.items() if k != '_embedding.weight'
    }

    # overwrite entries in the existing state dict
    model_dict.update(pretrained_dict)

    print('Model will be trained on device:')
    print(model_dict['_embedding.weight'].device)

    # load the new state dict
    net.load_state_dict(model_dict)

    # check if the update was correct
    pn1 = net._embedding.weight[0][0].detach().cpu().numpy()
    pn2 = net._enc_lstm.weight_hh_l0[0][0].detach().cpu().numpy()
    pn3 = net._attn_wm.data[0][0].detach().cpu().numpy()

    # print(pn1)
    # print(pn2)
    # print(pn3)

    assert p1 == pn1  # embedding layer has to be the same
    assert p2 != pn2
    assert p3 != pn3

    print('Embedding layer has not been overwritten')

    # set updating of the parameters
    for name, param in net.named_parameters():
        #param.requires_grad = True
        print(name, param.requires_grad)

    trainer.train()
Ejemplo n.º 12
0
def main(args):
    assert args.net_type in ['ff', 'rnn']
    # create data batcher, vocabulary
    # batcher
    with open(join(DATA_DIR, 'vocab_cnt.pkl'), 'rb') as f:
        wc = pkl.load(f)
    word2id = make_vocab(wc, args.vsize)
    id2words = {i: w for w, i in word2id.items()}

    elmo = None
    if args.elmo:
        elmo = get_elmo(
            dropout=args.elmo_dropout,
            vocab_to_cache=[id2words[i] for i in range(len(id2words))],
            cuda=args.cuda,
            projection_dim=args.elmo_projection)
        args.emb_dim = elmo.get_output_dim()

    train_batcher, val_batcher = build_batchers(args.net_type, word2id,
                                                args.cuda, args.debug)

    # make net
    net, net_args = configure_net(args.net_type, len(word2id), args.emb_dim,
                                  args.conv_hidden, args.lstm_hidden,
                                  args.lstm_layer, args.bi)

    if elmo:
        net_args['elmo'] = {
            'dropout': args.elmo_dropout,
            'projection': args.elmo_projection,
        }
        net.set_elmo_embedding(elmo)
    elif args.w2v:
        # NOTE: the pretrained embedding having the same dimension
        #       as args.emb_dim should already be trained
        embedding, _ = make_embedding(id2words, args.w2v)
        net.set_embedding(embedding)

    # configure training setting
    criterion, train_params = configure_training(args.net_type, 'adam',
                                                 args.lr, args.clip,
                                                 args.decay, args.batch)

    # save experiment setting
    if not exists(args.path):
        os.makedirs(args.path)
    with open(join(args.path, 'vocab.pkl'), 'wb') as f:
        pkl.dump(word2id, f, pkl.HIGHEST_PROTOCOL)
    meta = {
        'net': 'ml_{}_extractor'.format(args.net_type),
        'net_args': net_args,
        'traing_params': train_params
    }
    with open(join(args.path, 'meta.json'), 'w') as f:
        json.dump(meta, f, indent=4)

    # prepare trainer
    val_fn = basic_validate(net, criterion)
    grad_fn = get_basic_grad_fn(net, args.clip)
    optimizer = optim.Adam(filter(lambda p: p.requires_grad, net.parameters()),
                           **train_params['optimizer'][1])
    scheduler = ReduceLROnPlateau(optimizer,
                                  'min',
                                  verbose=True,
                                  factor=args.decay,
                                  min_lr=0,
                                  patience=args.lr_p)

    if args.cuda:
        net = net.cuda()
    pipeline = BasicPipeline(meta['net'], net, train_batcher, val_batcher,
                             args.batch, val_fn, criterion, optimizer, grad_fn)
    trainer = BasicTrainer(pipeline, args.path, args.ckpt_freq, args.patience,
                           scheduler)

    print('start training with the following hyper-parameters:')
    print(meta)
    trainer.train()
Ejemplo n.º 13
0
def main(args):
    # create data batcher, vocabulary
    # batcher
    word2id = pkl.load(open(join(args.abs_dir, 'vocab.pkl'), 'rb'))
    train_batcher, val_batcher = build_batchers(word2id,
                                                args.cuda, args.debug)

    # make net
    net, net_args = configure_net(args.abs_dir)

    # configure training setting
    criterion, train_params = configure_training(
        'adam', args.lr, args.clip, args.decay, args.batch
    )

    # save experiment setting
    if not exists(args.path):
        os.makedirs(args.path)
    with open(join(args.path, 'vocab.pkl'), 'wb') as f:
        pkl.dump(word2id, f, pkl.HIGHEST_PROTOCOL)
    meta = {}
    meta['net']           = 'base_abstractor'
    meta['net_args']      = net_args
    meta['traing_params'] = train_params
    with open(join(args.path, 'meta.json'), 'w') as f:
        json.dump(meta, f, indent=4)

    if args.coherence or args.all_local:
        print('use coref score')
        word_dict, model, sess = graph_init(model="cnndm")
        coh_func = (model, sess, word_dict)
        print('finish load coref model')
    else:
        coh_func = None
    assert not (args.anaphora and args.apposition)
    if args.anaphora:
        print('use anaphora')
        local_coh_fun = getAnaphoraReward
    elif args.apposition:
        print('use apposition')
        local_coh_fun = getAppositionReward
    elif args.all_local:
        local_coh_fun = getLocalReward
    else:
        local_coh_fun = None
    # prepare trainer
    if args.cuda:
        net = net.cuda()
    val_fn = rl_validate(net, coherence_func=coh_func, coh_coef=args.coh_coef, local_coh_func=local_coh_fun, local_coh_coef=args.local_coh_coef)
    grad_fn = get_basic_grad_fn(net, args.clip)
    optimizer = optim.Adam(net.parameters(), **train_params['optimizer'][1])
    #optimizer = optim.Adagrad(net.parameters(), **train_params['optimizer'][1])
    scheduler = ReduceLROnPlateau(optimizer, 'max', verbose=True,
                                  factor=args.decay, min_lr=0,
                                  patience=args.lr_p)

    pipeline = AbsSelfCriticalPipeline(meta['net'], net,
                             train_batcher, val_batcher, args.batch, val_fn, optimizer, grad_fn, coh_func, args.coh_coef, local_coh_fun, args.local_coh_coef)
    trainer = BasicTrainer(pipeline, args.path,
                           args.ckpt_freq, args.patience, scheduler, val_mode='score')

    print('start training with the following hyper-parameters:')
    print(meta)
    trainer.train()