def setUp(self): test_path = os.path.dirname(os.path.realpath(__file__)) src = SourceField() tgt = TargetField() self.dataset = torchtext.data.TabularDataset( path=os.path.join(test_path, 'data/eng-fra.txt'), format='tsv', fields=[('src', src), ('tgt', tgt)], ) src.build_vocab(self.dataset) tgt.build_vocab(self.dataset) encoder = EncoderRNN(len(src.vocab), 10, 10, 10, rnn_cell='lstm') decoder = DecoderRNN(len(tgt.vocab), 10, 10, tgt.sos_id, tgt.eos_id, rnn_cell='lstm') self.seq2seq = Seq2seq(encoder, decoder) for param in self.seq2seq.parameters(): param.data.uniform_(-0.08, 0.08)
def setUpClass(self): test_path = os.path.dirname(os.path.realpath(__file__)) src = SourceField() trg = TargetField() dataset = torchtext.data.TabularDataset( path=os.path.join(test_path, 'data/eng-fra.txt'), format='tsv', fields=[('src', src), ('trg', trg)], ) src.build_vocab(dataset) trg.build_vocab(dataset) encoder = EncoderRNN(len(src.vocab), 5, 10, 10, rnn_cell='lstm') decoder = DecoderRNN(len(trg.vocab), 10, 10, trg.sos_id, trg.eos_id, rnn_cell='lstm') seq2seq = Seq2seq(encoder, decoder) self.predictor = Predictor(seq2seq, src.vocab, trg.vocab)
def __init__(self, opt, shared=None): """Set up model if shared params not set, otherwise no work to do.""" super().__init__(opt, shared) opt = self.opt # there is a deepcopy in the init # all instances may need some params self.truncate = opt['truncate'] if opt['truncate'] > 0 else None self.metrics = {'loss': 0, 'num_tokens': 0} self.history = {} self.batch_idx = shared and shared.get('batchindex') or 0 self.states = {} # check for cuda self.use_cuda = not opt.get('no_cuda') and torch.cuda.is_available() if shared: # set up shared properties self.dict = shared['dict'] self.START_IDX = shared['START_IDX'] self.END_IDX = shared['END_IDX'] self.NULL_IDX = shared['NULL_IDX'] # answers contains a batch_size list of the last answer produced self.answers = shared['answers'] if 'model' in shared: # model is shared during hogwild self.model = shared['model'] else: # this is not a shared instance of this class, so do full init # answers contains a batch_size list of the last answer produced self.answers = [None] * opt['batchsize'] if self.use_cuda: torch.cuda.set_device(opt['gpu']) # check first for 'init_model' for loading model from file if opt.get('init_model') and os.path.isfile(opt['init_model']): init_model = opt['init_model'] # next check for 'model_file' elif opt.get('model_file') and os.path.isfile(opt['model_file']): init_model = opt['model_file'] else: init_model = None if init_model is not None: # load model parameters if available print('Loading existing model params from ' + init_model) new_opt, self.states = self.load(init_model) # override model-specific options with stored ones opt = self.override_opt(new_opt) if opt['dict_file'] is None: if init_model is not None and os.path.isfile(init_model + '.dict'): # check first to see if a dictionary exists opt['dict_file'] = init_model + '.dict' elif opt.get('model_file'): # otherwise, set default dict-file if it is not set opt['dict_file'] = opt['model_file'] + '.dict' # load dictionary and basic tokens & vectors self.dict = DictionaryAgent(opt) self.id = 'Seq2Seq' # we use START markers to start our output self.START_IDX = self.dict[self.dict.start_token] # we use END markers to end our output self.END_IDX = self.dict[self.dict.end_token] # get index of null token from dictionary (probably 0) self.NULL_IDX = self.dict[self.dict.null_token] encoder = EncoderRNN( len(self.dict), opt['maxlength_in'], opt['hiddensize'], dropout_p=opt['dropout'], input_dropout_p=opt['dropout'], n_layers=opt['numlayers'], rnn_cell=opt['rnncell'], bidirectional=opt['bidirectional'], variable_lengths=True, ) decoder = DecoderRNN( len(self.dict), opt['maxlength_out'], opt['hiddensize'] * 2 if opt['bidirectional'] else opt['hiddensize'], dropout_p=opt['dropout'], input_dropout_p=opt['dropout'], n_layers=opt['numlayers'], rnn_cell=opt['rnncell'], bidirectional=opt['bidirectional'], sos_id=self.START_IDX, eos_id=self.END_IDX, use_attention=opt['attention'], ) self.model = Seq2seq(encoder, decoder) if self.states: # set loaded states if applicable self.model.load_state_dict(self.states['model']) if self.use_cuda: self.model.cuda() # set up criteria self.criterion = nn.NLLLoss(ignore_index=self.NULL_IDX, size_average=False) if self.use_cuda: self.criterion.cuda() if 'train' in opt.get('datatype', ''): # if model was built, do more setup self.clip = opt['gradient_clip'] # set up tensors once self.START = torch.LongTensor([self.START_IDX]) if self.use_cuda: # push to cuda self.START = self.START.cuda() # set up optimizer lr = opt['learningrate'] optim_class = IbmSeq2seqAgent.OPTIM_OPTS[opt['optimizer']] kwargs = {'lr': lr} if opt['optimizer'] == 'sgd': kwargs['momentum'] = 0.95 kwargs['nesterov'] = True self.optimizer = optim_class( [p for p in self.model.parameters() if p.requires_grad], **kwargs ) if self.states: if self.states['optimizer_type'] != opt['optimizer']: print( 'WARNING: not loading optim state since optim class ' 'changed.' ) else: self.optimizer.load_state_dict(self.states['optimizer']) self.scheduler = optim.lr_scheduler.ReduceLROnPlateau( self.optimizer, 'min', factor=0.5, patience=3, verbose=True ) self.reset()