def __init__(self, args, word_dict, feature_dict, state_dict=None, normalize_q=True,normalize_s=True): self.args = args self.word_dict = word_dict self.args.vocab_size = len(word_dict) # feature to idx self.args.num_features = len(feature_dict) self.feature_dict = feature_dict self.updates = 0 self.use_cuda = False self.parallel = False self.normalize_q=normalize_q self.normalize_s=normalize_s # Building network. If normalize if false, scores are not normalized # 0-1 per paragraph (no softmax). if args.model_type == 'rnn': if args.train_mode=='string_match': self.network = RnnDocReader(args, normalize_s) else: self.network = RnnDocReader_Q(args, normalize_s) else: raise RuntimeError('Unsupported model: %s' % args.model_type) # Load old saved state ,(if use pretrain/ from checkpoint) # include 'fix-embedding' and model state-dict / just return model state-dict if state_dict: # Load buffer separately if 'fixed_embedding' in state_dict: fixed_embedding = state_dict.pop('fixed_embedding') # pop fix embeddings self.network.load_state_dict(state_dict) # model parameters self.network.register_buffer('fixed_embedding', fixed_embedding) # name of the buffer.buffer can be accessed from this module using the given name else: self.network.load_state_dict(state_dict)
def __init__(self, args, word_dict, char_dict, feature_dict, state_dict=None, normalize=True): # Book-keeping. self.args = args self.word_dict = word_dict self.char_dict = char_dict self.args.vocab_size = len(word_dict) self.args.char_size = len(char_dict) self.feature_dict = feature_dict self.args.num_features = len(feature_dict) self.updates = 0 self.use_cuda = False self.parallel = False # Building network. If normalize if false, scores are not normalized # 0-1 per paragraph (no softmax). if args.model_type == 'rnn': self.network = RnnDocReader(args, normalize) elif args.model_type == 'r_net': self.network = R_Net(args, normalize) elif args.model_type == 'mnemonic': self.network = MnemonicReader(args, normalize) else: raise RuntimeError('Unsupported model: %s' % args.model_type) # Load saved state if state_dict: # Load buffer separately if 'fixed_embedding' in state_dict: fixed_embedding = state_dict.pop('fixed_embedding') self.network.load_state_dict(state_dict) self.network.register_buffer('fixed_embedding', fixed_embedding) else: self.network.load_state_dict(state_dict)
def __init__(self, opt, embedding=None, state_dict=None): # Book-keeping. self.opt = opt self.device = torch.cuda.current_device( ) if opt['cuda'] else torch.device('cpu') self.updates = state_dict['updates'] if state_dict else 0 self.train_loss = AverageMeter() if state_dict: self.train_loss.load(state_dict['loss']) # Building network. self.network = RnnDocReader(opt, embedding=embedding) if state_dict: new_state = set(self.network.state_dict().keys()) for k in list(state_dict['network'].keys()): if k not in new_state: del state_dict['network'][k] self.network.load_state_dict(state_dict['network']) self.network.to(self.device) # Building optimizer. self.opt_state_dict = state_dict['optimizer'] if state_dict else None self.build_optimizer()
#!/usr/bin/env python # coding: utf-8 import argparse import torch from torch.nn import functional as F from torch.autograd import Variable from rnn_reader import RnnDocReader parser = argparse.ArgumentParser() args = parser.parse_args() args.char_vocab_size = 100 args.vocab_size = 1000 args.embedding_dim = 300 reader = RnnDocReader(args).cuda() def get_n_params(model): pp = 0 for p in list(model.parameters()): nn = 1 for s in list(p.size()): nn = nn * s pp += nn return pp print(get_n_params(reader)) for _ in range(1000): x1 = Variable(torch.LongTensor(10, 40).cuda())
class DocReaderModel(object): """High level model that handles intializing the underlying network architecture, saving, updating examples, and predicting examples. """ def __init__(self, opt, embedding=None, state_dict=None): # Book-keeping. self.opt = opt self.device = torch.cuda.current_device( ) if opt['cuda'] else torch.device('cpu') self.updates = state_dict['updates'] if state_dict else 0 self.train_loss = AverageMeter() if state_dict: self.train_loss.load(state_dict['loss']) # Building network. self.network = RnnDocReader(opt, embedding=embedding) if state_dict: new_state = set(self.network.state_dict().keys()) for k in list(state_dict['network'].keys()): if k not in new_state: del state_dict['network'][k] self.network.load_state_dict(state_dict['network']) self.network.to(self.device) # Building optimizer. self.opt_state_dict = state_dict['optimizer'] if state_dict else None self.build_optimizer() def build_optimizer(self): parameters = [p for p in self.network.parameters() if p.requires_grad] if self.opt['optimizer'] == 'sgd': self.optimizer = optim.SGD(parameters, self.opt['learning_rate'], momentum=self.opt['momentum'], weight_decay=self.opt['weight_decay']) elif self.opt['optimizer'] == 'adamax': self.optimizer = optim.Adamax( parameters, weight_decay=self.opt['weight_decay']) else: raise RuntimeError('Unsupported optimizer: %s' % self.opt['optimizer']) if self.opt_state_dict: self.optimizer.load_state_dict(self.opt_state_dict) def update(self, ex): #print("======") #print(ex) # Train mode self.network.train() # Transfer to GPU inputs = [e.to(self.device) for e in ex[:7]] target_s = ex[7].to(self.device) target_e = ex[8].to(self.device) # Run forward score_s, score_e = self.network(*inputs) # Compute loss and accuracies loss = F.nll_loss(score_s, target_s) + F.nll_loss(score_e, target_e) self.train_loss.update(loss.item()) # Clear gradients and run backward self.optimizer.zero_grad() loss.backward() # Clip gradients torch.nn.utils.clip_grad_norm_(self.network.parameters(), self.opt['grad_clipping']) # Update parameters self.optimizer.step() self.updates += 1 def predict(self, ex): # Eval mode self.network.eval() # Transfer to GPU if self.opt['cuda']: inputs = [Variable(e.cuda()) for e in ex[:7]] else: inputs = [Variable(e) for e in ex[:7]] # Run forward with torch.no_grad(): score_s, score_e = self.network(*inputs) # Transfer to CPU/normal tensors for numpy ops score_s = score_s.data.cpu() score_e = score_e.data.cpu() # Get argmax text spans text = ex[-2] spans = ex[-1] predictions = [] max_len = self.opt['max_len'] or score_s.size(1) for i in range(score_s.size(0)): scores = torch.ger(score_s[i], score_e[i]) scores.triu_().tril_(max_len - 1) scores = scores.numpy() s_idx, e_idx = np.unravel_index(np.argmax(scores), scores.shape) s_offset, e_offset = spans[i][s_idx][0], spans[i][e_idx][1] predictions.append(text[i][s_offset:e_offset]) return predictions def save(self, filename, epoch, scores): em, f1, best_eval = scores params = { 'state_dict': { 'network': self.network.state_dict(), 'embeddings': self.network.embedding.state_dict(), 'doc_encoder': self.network.doc_rnn.state_dict(), 'q_encoder': self.network.question_rnn.state_dict(), 'self_attn_layer': self.network.self_attn.state_dict(), 'qemb_layer': self.network.qemb_match.state_dict(), 'optimizer': self.optimizer.state_dict(), 'updates': self.updates, 'loss': self.train_loss.state_dict() }, 'config': self.opt, 'epoch': epoch, 'em': em, 'f1': f1, 'best_eval': best_eval, 'random_state': random.getstate(), 'torch_state': torch.random.get_rng_state(), 'torch_cuda_state': torch.cuda.get_rng_state() } try: torch.save(params, filename) #logger.info('model saved to {}'.format(filename)) except BaseException: #logger.warning('[ WARN: Saving failed... continuing anyway. ]') pass