def validation(model, data_loader): t_valid = time.time() model.eval() valid_loss = 0. yy, yy_pred = [], [] for i, (x, y) in enumerate(data_loader): y_pred = model(x) vl = loss(y_pred, y) valid_loss += float(unwrap(vl)) yv = unwrap(y) y_pred = unwrap(y_pred) yy.append(yv) yy_pred.append(y_pred) valid_loss /= len(data_loader) t1 = time.time() logdict = dict( yy=yy, yy_pred=yy_pred, #mask=mask, w_valid=data_loader.dataset.weights, valid_loss=valid_loss, model=model, logtime=0, ) #logdict.update(train_dict) model.train() logging.info("Validation took {:.1f} seconds".format(time.time() - t_valid)) return logdict
def train_one_batch(model, batch, optimizer, administrator, epoch, batch_number, clip): logger = administrator.logger (x, y, y_mask, batch_mask) = batch # forward model.train() optimizer.zero_grad() y_pred = model(x, mask=batch_mask, logger=logger, epoch=epoch, iters=batch_number) l = loss(y_pred, y, y_mask, batch_mask) # backward l.backward() if clip is not None: torch.nn.utils.clip_grad_norm(model.parameters(), clip) #if False: #if batch_number == 0: #old_params = torch.cat([p.view(-1) for p in model.parameters()], 0) #else: # old_params = None; grads = None #if batch_number == 1: # log_gpu_usage() optimizer.step() #if False: #if batch_number == 0: #model_params = torch.cat([p.view(-1) for p in model.parameters()], 0) #logdict = dict( # grads=grads, # old_params=old_params, # model_params=model_params #) #administrator.training_only_monitors(**logdict) #administrator.training_only_monitors.visualize() #else: # model_params = None del y del y_pred del y_mask del x del batch_mask del batch log_gpu_usage() return float(unwrap(l))
def test_one_model(self,model, data_loader, filename): model.eval() valid_loss = 0. yy, yy_pred = [], [] for i, (x, y) in enumerate(data_loader): y_pred = model(x) vl = self.loss(y_pred, y); valid_loss += float(unwrap(vl)) yv = unwrap(y); y_pred = unwrap(y_pred) yy.append(yv); yy_pred.append(y_pred) valid_loss /= len(data_loader) logdict = dict( yy=yy, yy_pred=yy_pred, test_loss=valid_loss, model=filename ) return logdict
def validation(self, model, data_loader): t_valid = time.time() model.eval() valid_loss = 0. yy, yy_pred = [], [] for i, (x, y) in enumerate(data_loader): y_pred = model(x) vl = self.loss(y_pred, y) valid_loss += float(unwrap(vl)) yv = unwrap(y) y_pred = unwrap(y_pred) yy.append(yv) yy_pred.append(y_pred) #if epoch % admin_args.lf == 0: # y_matrix_monitor(matrix=y) # y_matrix_monitor.visualize('epoch-{}/{}'.format(epoch, 'y'), n=10) # y_pred_matrix_monitor(matrix=y_pred) # y_pred_matrix_monitor.visualize('epoch-{}/{}'.format(epoch, 'y_pred'), n=10) valid_loss /= len(data_loader) t1 = time.time() logdict = dict( yy=yy, yy_pred=yy_pred, #mask=mask, w_valid=data_loader.dataset.weights, valid_loss=valid_loss, model=model, logtime=0, ) #logdict.update(train_dict) model.train() logging.info("Validation took {:.1f} seconds".format(time.time() - t_valid)) return logdict
def train_one_batch(self, model, batch, optimizer, administrator, epoch, batch_number, clip): logger = administrator.logger (x, y) = batch #gc.collect() #logging.info("PRE-MODEL USAGE") #log_gpu_usage() #import ipdb; ipdb.set_trace() # forward model.train() optimizer.zero_grad() y_pred = model(x, logger=logger, epoch=epoch, iters=batch_number) l = self.loss(y_pred, y) # backward l.backward() if clip is not None: torch.nn.utils.clip_grad_norm(model.parameters(), clip) if batch_number == 0: logging.info("COMPUTING GRADS FOR LOGGING") old_params = torch.cat([p.view(-1) for p in model.parameters()], 0) grads = torch.cat([ p.grad.view(-1) for p in model.parameters() if p.grad is not None ], 0) logging.info("POST-MODEL, PRE-OPTIM USAGE") log_gpu_usage() optimizer.step() if batch_number == 0: model_params = torch.cat([p.view(-1) for p in model.parameters()], 0) for m in administrator.grad_monitors: m(model_params=model_params, old_params=old_params, grads=grads) logging.info("FINAL USAGE") log_gpu_usage() logging.info("\n") return float(unwrap(l))
def validation(model, data_loader): t_valid = time.time() model.eval() valid_loss = 0. yy, yy_pred = [], [] half = [] mask = [] hard_pred = [] for i, batch in enumerate(data_loader): (x, y, y_mask, batch_mask) = batch y_pred = model(x, mask=batch_mask) vl = loss(y_pred, y, y_mask, batch_mask) valid_loss = valid_loss + float(unwrap(vl)) yy.append(unwrap(y)) yy_pred.append(unwrap(y_pred)) mask.append(unwrap(batch_mask)) half.append(unwrap(half_and_half(y, y_pred))) hard_pred.append(unwrap(half_and_half(y, (y_pred > 0.5).float()))) del y del y_pred del y_mask del x del batch_mask del batch valid_loss /= len(data_loader) #grads = torch.cat([p.grad.view(-1) for p in model.parameters() if p.grad is not None], 0) logdict = dict( yy=yy, yy_pred=yy_pred, half=half, hard_pred=hard_pred, mask=mask, valid_loss=valid_loss, model=model, #grads=grads, ) model.train() t1 = time.time() logging.info("Validation took {:.1f} seconds".format(time.time() - t_valid)) return logdict