def __init__(self, model, loss_function, trainData, validData, dataset, opt): super().__init__(model, loss_function, trainData, validData, dataset, opt) print('Initializing model parameters') init_model_parameters(model, opt) # create a multi-gpu runner here self.runner = MultiprocessingRunner(opt, model, loss_function, device_ids=opt.gpus)
def run(self, save_file=None): opt = self.opt model = self.model optim = self.optim # Try to load the save_file checkpoint = None if save_file: checkpoint = torch.load(save_file, map_location=lambda storage, loc: storage) if checkpoint is not None: print('Loading model and optim from checkpoint at %s' % save_file) self.model.load_state_dict(checkpoint['model']) if opt.reset_optim == False: self.optim.load_state_dict(checkpoint['optim']) batchOrder = checkpoint['batchOrder'] iteration = checkpoint['iteration'] + 1 opt.start_epoch = int( math.floor(float(checkpoint['epoch'] + 1))) resume = True else: batchOrder = None iteration = 0 resume = False del checkpoint['model'] del checkpoint['optim'] del checkpoint else: batchOrder = None iteration = 0 print('Initializing model parameters') init_model_parameters(model, opt) resume = False valid_loss = self.eval(self.validData) valid_ppl = math.exp(min(valid_loss, 100)) print('Validation perplexity: %g' % valid_ppl) self.start_time = time.time() for epoch in range(opt.start_epoch, opt.start_epoch + opt.epochs): print('') # (1) train for one epoch on the training set train_loss = self.train_epoch(epoch, resume=resume, batchOrder=batchOrder, iteration=iteration) train_ppl = math.exp(min(train_loss, 100)) print('Train perplexity: %g' % train_ppl) # (2) evaluate on the validation set valid_loss = self.eval(self.validData) valid_ppl = math.exp(min(valid_loss, 100)) print('Validation perplexity: %g' % valid_ppl) self.save(epoch, valid_ppl) batchOrder = None iteration = None resume = False
def run(self, checkpoint=None): opt = self.opt model = self.model optim = self.optim # Try to load the save_file # checkpoint = None # if save_file: # checkpoint = torch.load(save_file, map_location=lambda storage, loc: storage) if checkpoint is not None: self.model.load_state_dict(checkpoint['model']) if not opt.reset_optim: self.optim.load_state_dict(checkpoint['optim']) if 'batch_order' in checkpoint: batch_order = checkpoint['batch_order'] iteration = checkpoint['iteration'] + 1 else: batch_order = None iteration = 0 opt.start_epoch = int( math.floor(float(checkpoint['epoch'] + 1))) resume = True if len(self.additional_data) > 0: if 'additional_batch_order' in checkpoint: self.additional_batch_order = checkpoint[ 'additional_batch_order'] self.additional_data_iteration = checkpoint[ 'additional_data_iteration'] else: self.init_additional_data() else: batch_order = None iteration = 0 resume = False self.init_additional_data() del checkpoint['model'] del checkpoint['optim'] del checkpoint else: batch_order = None iteration = 0 print('Initializing model parameters') init_model_parameters(model, opt) resume = False self.init_additional_data() valid_loss = self.eval(self.valid_data) valid_ppl = math.exp(min(valid_loss, 100)) print('Validation perplexity: %g' % valid_ppl) self.start_time = time.time() for epoch in range(opt.start_epoch, opt.start_epoch + opt.epochs): print('') # (1) train for one epoch on the training set train_loss = self.train_epoch(epoch, resume=resume, batch_order=batch_order, iteration=iteration) train_ppl = math.exp(min(train_loss, 100)) print('Train perplexity: %g' % train_ppl) # (2) evaluate on the validation set valid_loss = self.eval(self.valid_data) valid_ppl = math.exp(min(valid_loss, 100)) print('Validation perplexity: %g' % valid_ppl) self.save(epoch, valid_ppl) batch_order = None iteration = None resume = False