def __init__(self, abs_dir, max_len=30, cuda=True): abs_meta = json.load(open(join(abs_dir, 'meta.json'))) assert abs_meta['net'] == 'base_abstractor' abs_args = abs_meta['net_args'] abs_ckpt = load_best_ckpt(abs_dir) word2id = pkl.load(open(join(abs_dir, 'vocab.pkl'), 'rb')) elmo = None if 'elmo' in abs_args: elmo_args = abs_args['elmo'] vocab_to_cache = [ w for w, i in sorted(list(word2id.items()), key=itemgetter(1)) ] elmo = get_elmo(dropout=elmo_args.get('dropout', 0), vocab_to_cache=vocab_to_cache, cuda=cuda, projection_dim=elmo_args.get( 'projection_dim', None)) del abs_args['elmo'] abstractor = CopySumm(**abs_args) if elmo is not None: abstractor.set_elmo_embedding(elmo) abstractor.load_state_dict(abs_ckpt) self._device = torch.device('cuda' if cuda else 'cpu') self._net = abstractor.to(self._device) self._word2id = word2id self._id2word = {i: w for w, i in word2id.items()} self._max_len = max_len
def __init__(self, ext_dir, cuda=True): ext_meta = json.load(open(join(ext_dir, 'meta.json'))) assert ext_meta['net'] == 'rnn-ext_abs_rl' ext_args = ext_meta['net_args']['extractor']['net_args'] word2id = pkl.load(open(join(ext_dir, 'agent_vocab.pkl'), 'rb')) elmo = None if 'elmo' in ext_args: elmo_args = ext_args['elmo'] vocab_to_cache = [ w for w, i in sorted(list(word2id.items()), key=itemgetter(1)) ] elmo = get_elmo(dropout=elmo_args.get('dropout', 0), vocab_to_cache=vocab_to_cache, cuda=cuda, projection_dim=elmo_args.get( 'projection_dim', None)) del ext_args['elmo'] extractor = PtrExtractSumm(**ext_args) if elmo is not None: extractor.set_elmo_embedding(elmo) agent = ActorCritic(extractor._sent_enc, extractor._art_enc, extractor._extractor, ArticleBatcher(word2id, cuda)) ext_ckpt = load_best_ckpt(ext_dir, reverse=True) agent.load_state_dict(ext_ckpt) self._device = torch.device('cuda' if cuda else 'cpu') self._net = agent.to(self._device) self._word2id = word2id self._id2word = {i: w for w, i in word2id.items()}
def load_ext_net(ext_dir): ext_meta = json.load(open(join(ext_dir, 'meta.json'))) assert ext_meta['net'] == 'ml_rnn_extractor' ext_ckpt = load_best_ckpt(ext_dir) ext_args = ext_meta['net_args'] vocab = pkl.load(open(join(ext_dir, 'vocab.pkl'), 'rb')) elmo = None if 'elmo' in ext_args: elmo_args = ext_args['elmo'] vocab_to_cache = [ w for w, i in sorted(list(vocab.items()), key=itemgetter(1)) ] elmo = get_elmo(dropout=elmo_args.get('dropout', 0), vocab_to_cache=vocab_to_cache) del ext_args['elmo'] ext = PtrExtractSumm(**ext_args) if elmo is not None: ext.set_elmo_embedding(elmo) ext.load_state_dict(ext_ckpt) return ext, vocab
def main(args): abstractor = get_abstractor(args.abs_dir, args.beam_search, args.cuda) for split in ('train', 'val'): decode(args, split) embedding = abstractor._net._decoder._embedding word2id = abstractor._word2id id2words = {i: w for w, i in word2id.items()} elmo = None if args.elmo: elmo = get_elmo(dropout=args.elmo_dropout, vocab_to_cache=[id2words[i] for i in range(len(id2words))], cuda=args.cuda, projection_dim=args.elmo_projection) args.emb_dim = elmo.get_output_dim() meta = { 'net': '{}_discriminator'.format('cnn'), 'net_args': { 'vocab_size': len(abstractor._word2id), 'emb_dim': embedding.embedding_dim, 'kernel_num': args.kernel_num, 'kernel_sizes': args.kernel_sizes, 'class_num': 2, 'dropout': args.dropout, 'max_norm': args.max_norm, 'static': args.static, }, 'training_params': { 'optimizer': ('adam', {'lr': args.lr}), 'batch_size': args.batch, 'clip_grad_norm': args.clip, 'lr_decay': args.decay, } } net = ConvNet(**meta['net_args']) if elmo: meta['net_args']['elmo'] = { 'dropout': args.elmo_dropout, 'projection': args.elmo_projection, } net.set_elmo_embedding(elmo) else: net.set_embedding(embedding.weight) train_batcher, val_batcher = build_batchers(args, word2id) def criterion(logit, target): return F.cross_entropy(logit, target, reduce=False) val_fn = basic_validate(net, criterion) grad_fn = get_basic_grad_fn(net, args.clip) optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), lr=args.lr) scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=True, factor=args.decay, min_lr=0, patience=args.lr_p) if args.cuda: net = net.cuda() pipeline = BasicPipeline('discriminator', net, train_batcher, val_batcher, args.batch, val_fn, criterion, optimizer, grad_fn) trainer = BasicTrainer(pipeline, args.path, args.ckpt_freq, args.patience, scheduler) print('start training with the following hyper-parameters:') print(meta) trainer.train()
def main(args): assert args.net_type in ['ff', 'rnn'] # create data batcher, vocabulary # batcher with open(join(DATA_DIR, 'vocab_cnt.pkl'), 'rb') as f: wc = pkl.load(f) word2id = make_vocab(wc, args.vsize) id2words = {i: w for w, i in word2id.items()} elmo = None if args.elmo: elmo = get_elmo( dropout=args.elmo_dropout, vocab_to_cache=[id2words[i] for i in range(len(id2words))], cuda=args.cuda, projection_dim=args.elmo_projection) args.emb_dim = elmo.get_output_dim() train_batcher, val_batcher = build_batchers(args.net_type, word2id, args.cuda, args.debug) # make net net, net_args = configure_net(args.net_type, len(word2id), args.emb_dim, args.conv_hidden, args.lstm_hidden, args.lstm_layer, args.bi) if elmo: net_args['elmo'] = { 'dropout': args.elmo_dropout, 'projection': args.elmo_projection, } net.set_elmo_embedding(elmo) elif args.w2v: # NOTE: the pretrained embedding having the same dimension # as args.emb_dim should already be trained embedding, _ = make_embedding(id2words, args.w2v) net.set_embedding(embedding) # configure training setting criterion, train_params = configure_training(args.net_type, 'adam', args.lr, args.clip, args.decay, args.batch) # save experiment setting if not exists(args.path): os.makedirs(args.path) with open(join(args.path, 'vocab.pkl'), 'wb') as f: pkl.dump(word2id, f, pkl.HIGHEST_PROTOCOL) meta = { 'net': 'ml_{}_extractor'.format(args.net_type), 'net_args': net_args, 'traing_params': train_params } with open(join(args.path, 'meta.json'), 'w') as f: json.dump(meta, f, indent=4) # prepare trainer val_fn = basic_validate(net, criterion) grad_fn = get_basic_grad_fn(net, args.clip) optimizer = optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), **train_params['optimizer'][1]) scheduler = ReduceLROnPlateau(optimizer, 'min', verbose=True, factor=args.decay, min_lr=0, patience=args.lr_p) if args.cuda: net = net.cuda() pipeline = BasicPipeline(meta['net'], net, train_batcher, val_batcher, args.batch, val_fn, criterion, optimizer, grad_fn) trainer = BasicTrainer(pipeline, args.path, args.ckpt_freq, args.patience, scheduler) print('start training with the following hyper-parameters:') print(meta) trainer.train()