def evaluate(model, dataset_dl, loss_fn=None, metrics=None, params=None): # set model to evaluation mode model.eval() metrics_results = {} if loss_fn is not None: running_loss = utils.RunningAverage() num_batches = len(dataset_dl) if metrics is not None: for metric_name, metric in metrics.items(): metric.reset() with torch.no_grad(): for (xb, yb) in tqdm(dataset_dl): xb = xb.to(params.device) yb = yb.to(params.device) output = model(xb)['out'] if loss_fn is not None: loss_b = loss_fn(output, yb) running_loss.update(loss_b.item()) if metrics is not None: for metric_name, metric in metrics.items(): metric.add(output, yb) if metrics is not None: for metric_name, metric in metrics.items(): metrics_results[metric_name] = metric.value() if loss_fn is not None: return running_loss(), metrics_results else: return None, metrics_results
def train(model, dataloader, optimizer, loss_fns, scheduler, evaluator, writer, epoch, params): # Set model to training mode model.train() evaluator.reset() # Summary for current training loop and a running average object for loss loss_avg = utils.RunningAverage() # Use tqdm for progress bar with tqdm(total=len(dataloader)) as t: for i, sample in enumerate(dataloader): train_batch, labels_batch = sample['image'], sample['label'] if params.cuda: train_batch, labels_batch = train_batch.cuda( ), labels_batch.cuda() current_lr = scheduler(optimizer, i, epoch) optimizer.zero_grad() # Forward output_batch = model(train_batch) # Backward loss = loss_fns['CrossEntropy'](params, output_batch, labels_batch) loss.backward() optimizer.step() # Evaluate summaries only once in a while if i % params.save_summary_steps == 0: output_batch = output_batch.data.cpu().numpy() labels_batch = labels_batch.data.cpu().numpy() output_batch = np.argmax(output_batch, axis=1) evaluator.add_batch(labels_batch, output_batch) # update the average loss loss_avg.update(loss.item()) # tensorboard summary writer.add_scalar('train/total_loss_iter', loss.item(), i + len(dataloader) * epoch) t.set_postfix(loss='{:05.3f}'.format(loss_avg()), lr='{:05.3f}'.format(current_lr)) t.update() # compute mean of all metrics in summary writer.add_scalar('train/mean_loss_epoch', loss_avg(), epoch) metrics_mean = { 'mIOU': evaluator.Mean_Intersection_over_Union(), 'loss': loss_avg() } metrics_string = " ; ".join("{}: {:05.3f}".format(k, v) for k, v in metrics_mean.items()) logging.info("- Train metrics: " + metrics_string)
def train(model, dataloader, optimizer, loss_fn, params, autosave=True): # Set model to train or eval. model.train() for epoch in range(params.num_epochs): loss_avg = utils.RunningAverage() desc = "Epoch: {}".format(epoch) # Informational only, used in tqdm. with tqdm(desc=desc, total=len(dataloader)) as t: for i, (x, _) in enumerate(dataloader): if params.cuda: x, _ = x.cuda(non_blocking=True) y_pred = model(x, k=4) # Set loss comparison to input x loss = loss_fn(y_pred, x) optimizer.zero_grad() loss.backward() #=====MONITORING=====# enc_weights = model.encoder.weight.data # utils.animate_weights(enc_weights, label=i, auto=False) # for s in range(len(x)): # utils.animate_weights(y_pred[s].detach(), label=i, auto=True) #=====END MONIT.=====# optimizer.step() loss_avg.update(loss.item()) # Update tqdm progress bar. t.set_postfix(loss="{:05.8f}".format(loss_avg())) t.update() # Show one last time # utils.animate_weights(enc_weights, auto=False) if autosave: # Autosaves latest state after each epoch (overwrites previous state) state = utils.get_save_state(epoch, model, optimizer) utils.save_checkpoint(state, model_path, name=f"pre_train_{params.batch_size}", silent=False)
def evaluate(model, dataloader, loss_fns, evaluator, writer, epoch, params): model.eval() evaluator.reset() loss_avg = utils.RunningAverage() # Use tqdm for progress bar with tqdm(total=len(dataloader)) as t: for sample in dataloader: data_batch, labels_batch = sample['image'], sample['label'] if params.cuda: data_batch, labels_batch = data_batch.cuda( ), labels_batch.cuda() with torch.no_grad(): output_batch = model(data_batch) loss = loss_fns['CrossEntropy'](params, output_batch, labels_batch) output_batch = output_batch.data.cpu().numpy() data_batch = data_batch.data.cpu().numpy() labels_batch = labels_batch.data.cpu().numpy() output_batch = np.argmax(output_batch, axis=1) evaluator.add_batch(labels_batch, output_batch) # update the average loss loss_avg.update(loss.item()) t.set_postfix(loss='{:05.3f}'.format(loss_avg())) t.update() writer.add_scalar('val/mean_loss_epoch', loss_avg(), epoch) writer.add_scalar('val/mIoU', evaluator.Mean_Intersection_over_Union(), epoch) writer.add_scalar('val/Acc', evaluator.Pixel_Accuracy(), epoch) writer.add_scalar('val/Acc_class', evaluator.Pixel_Accuracy_Class(), epoch) writer.add_scalar('val/fwIoU', evaluator.Frequency_Weighted_Intersection_over_Union(), epoch) metrics_mean = { 'mIOU': evaluator.Mean_Intersection_over_Union(), 'loss': loss_avg() } metrics_string = " ; ".join("{}: {:05.3f}".format(k, v) for k, v in metrics_mean.items()) logging.info("- Eval metrics : " + metrics_string) return metrics_mean
def train_epoch(model, loss_fn, dataset_dl, opt=None, lr_scheduler=None, metrics=None, params=None): running_loss = utils.RunningAverage() num_batches = len(dataset_dl) if metrics is not None: for metric_name, metric in metrics.items(): metric.reset() for (xb, yb) in tqdm(dataset_dl): xb = xb.to(params.device) yb = yb.to(params.device) output = model(xb)['out'] loss_b = loss_fn(output, yb) if opt is not None: opt.zero_grad() loss_b.backward() opt.step() if lr_scheduler is not None: lr_scheduler.step() running_loss.update(loss_b.item()) if metrics is not None: for metric_name, metric in metrics.items(): metric.add(output.detach(), yb) if metrics is not None: metrics_results = OrderedDict({}) for metric_name, metric in metrics.items(): metrics_results[metric_name] = metric.value() return running_loss(), metrics_results else: return running_loss(), None
def train(model, ad_net, grl, ad_net_m, grl_m, optimizer, loss_fn, dataloader, metrics, params, logger): """Train the model on `num_steps` batches Args: model: (torch.nn.Module) the neural network optimizer: (torch.optim) optimizer for parameters of model loss_fn: dataloader: metrics: (dict) params: (Params) hyperparameters """ # set model to training mode model.train() # summary for current training loop and a running average object for loss summ = [] loss_avg = utils.RunningAverage() confusion_meter = torchnet.meter.ConfusionMeter( params.model_args["num_class"], normalized=True) confusion_meter.reset() trade_off = 0.1 # Use tqdm for progress bar with tqdm(total=len(dataloader)) as t: for i, (data_batch, labels_batch) in enumerate(dataloader): # move to GPU if available if params.cuda: if params.data_parallel: data_batch, labels_batch = data_batch.cuda( async=True), labels_batch.cuda(async=True) else: data_batch, labels_batch = data_batch.cuda( params.gpu_id), labels_batch.cuda(params.gpu_id) # convert to torch Variables data_batch, labels_batch = Variable(data_batch), Variable( labels_batch) batch_size = data_batch.shape[0] #print(batch_size) z = data_batch[:, :, :, :, :, 0] # get source data to train y = data_batch[:, :, :, :, :, 1] data_batch = torch.cat((z, y), dim=0) # compute model output and loss output_batch, feature = model(data_batch.float(), target=labels_batch) softmax_layer = nn.Softmax().cuda() ad_target = Variable( torch.from_numpy( np.array([[1]] * batch_size + [[0]] * batch_size)).float()) ad_target = ad_target.cuda() out_labels = output_batch.clone() output_batch = output_batch[:batch_size] #feature = torch.max(feature,2)[0] feature = feature.view(feature.size(0), -1) softmax_out = softmax_layer(out_labels) loss_bag = loss_fn(output_batch, labels_batch, current_epoch=params.current_epoch, params=params) loss_ad = Loss.JAN([ feature.narrow(0, 0, feature.size(0) // 2), softmax_out.narrow(0, 0, softmax_out.size(0) // 2) ], [ feature.narrow(0, feature.size(0) // 2, feature.size(0) // 2), softmax_out.narrow(0, feature.size(0) // 2, softmax_out.size(0) // 2) ]) loss = loss_bag['ls_CE'] + (loss_ad) * trade_off confusion_meter.add(output_batch.data, labels_batch.data) # clear previous gradients, compute gradients of all variables wrt loss optimizer.zero_grad() loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. total_norm = torch.nn.utils.clip_grad_norm_( model.parameters(), params.clip * params.batch_size_train) # print(total_norm,params.clip*params.batch_size) # performs updates using calculated gradients optimizer.step() # Evaluate summaries only once in a while # not every epoch count in train accuracy if i % params.save_summary_steps == 0: # extract data from torch Variable, move to cpu, convert to numpy arrays output_batch = output_batch.data labels_batch = labels_batch.data # compute all metrics on this batch summary_batch = { metric: metrics[metric](output_batch, labels_batch) for metric in metrics } summary_batch['loss'] = loss.data.item() summary_batch['ls_BCE'] = loss_ad.data.item() for l, v in loss_bag.items(): summary_batch[l] = v.data.item() summ.append(summary_batch) # update the average loss # main for progress bar, not logger loss_running = loss.data.item() loss_avg.update(loss_running) t.set_postfix(loss_running='{:05.3f}'.format(loss_avg())) t.update() # compute mean of all metrics in summary #print([metric for metric in summ[0]]) #metrics_mean = {metric:np.mean([x[metric] for x in summ]) for metric in summ[0]} AccTop5_list_train = [x['accuracytop5'][0] for x in summ] AccTop1_list_train = [x['accuracytop1'][0] for x in summ] AccTop5_mean_train = torch.mean( torch.stack(AccTop5_list_train)).cpu().numpy() AccTop1_mean_train = torch.mean( torch.stack(AccTop1_list_train)).cpu().numpy() loss_mean_train = np.mean([x['loss'] for x in summ]) ls_all_mean_train = np.mean([x['ls_all'] for x in summ]) ls_ce_mean_train = np.mean([x['ls_CE'] for x in summ]) ls_bce_mean_train = np.mean([x['ls_BCE'] for x in summ]) metrics_mean = { 'accuracytop5': AccTop5_mean_train, 'accuracytop1': AccTop1_mean_train, 'loss': loss_mean_train, 'ls_all': ls_all_mean_train, 'ls_CE': ls_ce_mean_train, 'ls_BCE': ls_bce_mean_train } print(metrics_mean) metrics_string = " ; ".join("{}: {:05.3f}".format(k, v) for k, v in metrics_mean.items()) logger.info("- Train metrics: " + metrics_string) return metrics_mean, confusion_meter
def train(model,epoch,ad_net, grl,ad_net_mp,grl_mp, optimizer, loss_fn, dataloader, metrics, params,logger): """Train the model on `num_steps` batches Args: model: (torch.nn.Module) the neural network optimizer: (torch.optim) optimizer for parameters of model loss_fn: dataloader: metrics: (dict) params: (Params) hyperparameters """ # set model to training mode model.train() source_feature=np.empty([0,512],dtype=np.float) target_feature=np.empty([0,512],dtype=np.float) source_rotate_label=np.empty([0],dtype=np.float) target_rotate_label=np.empty([0],dtype=np.float) # summary for current training loop and a running average object for loss summ = [] loss_avg = utils.RunningAverage() confusion_meter = torchnet.meter.ConfusionMeter(params.model_args["num_class"], normalized=True) confusion_meter.reset() # Use tqdm for progress bar with tqdm(total=len(dataloader)) as t: for i, (data_batch, labels_batch,rl_label) in enumerate(dataloader): # move to GPU if available if params.cuda: if params.data_parallel: data_batch, labels_batch= data_batch.cuda(non_blocking=True), labels_batch.cuda(non_blocking=True) else: data_batch, labels_batch = data_batch.cuda(params.gpu_id), labels_batch.cuda(params.gpu_id) # convert to torch Variables data_batch, labels_batch= Variable(data_batch), Variable(labels_batch) batch_size = data_batch.shape[0] z = data_batch[:,:,:,:,:,0] # get source data to train y = data_batch[:,:,:,:,:,1] rl_s=rl_label[:,0] rl_t=rl_label[:,1] source_rotate_label=np.concatenate((source_rotate_label,rl_s),axis=0) target_rotate_label=np.concatenate((target_rotate_label,rl_t),axis=0) data_batch = torch.cat((z,y),dim =0) rl_batch = torch.cat((rl_s,rl_t),dim =0) #target label if(params.cuda): if(params.data_parallel): rl_batch = rl_batch.cuda(non_blocking=True) else: rl_batch = rl_batch.cuda(params.gpu_id) rl_batch = Variable(rl_batch) # compute model output and loss output_batch,output_rl,feature = model(data_batch.float(),target=labels_batch) #tsne #pdb.set_trace() if(epoch==300): feature_cpu=feature.cpu() source_feature=np.concatenate((source_feature,feature_cpu.detach().numpy()[:batch_size]),axis=0) target_feature=np.concatenate((target_feature,feature_cpu.detach().numpy()[batch_size:]),axis=0) output_batch = output_batch[:batch_size] loss_bag = loss_fn(output_batch,labels_batch,current_epoch=params.current_epoch, params=params) loss_rl = loss_fn(output_rl,rl_batch,current_epoch=params.current_epoch, params=params) trade_off = 0.1 loss = loss_bag['ls_CE']+ trade_off * loss_rl['ls_CE'] #loss = loss_bag['ls_CE'] confusion_meter.add(output_batch.data, labels_batch.data) # clear previous gradients, compute gradients of all variables wrt loss optimizer.zero_grad() loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. total_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), params.clip*params.batch_size_train) # print(total_norm,params.clip*params.batch_size) # performs updates using calculated gradients optimizer.step() # Evaluate summaries only once in a while # not every epoch count in train accuracy if i % params.save_summary_steps == 0: # extract data from torch Variable, move to cpu, convert to numpy arrays output_batch = output_batch.data labels_batch = labels_batch.data # compute all metrics on this batch summary_batch = {metric:metrics[metric](output_batch, labels_batch) for metric in metrics} summary_batch['loss'] = loss.data.item() #summary_batch['ls_BCE'] = loss_ad.data.item() #summary_batch['ls_BCE_mp'] = loss_ad_mp.data.item() #summary_batch['ls_BCE_m'] = loss_ad_m.data.item() for l,v in loss_bag.items(): summary_batch[l]=v.data.item() summ.append(summary_batch) # update the average loss # main for progress bar, not logger loss_running = loss.data.item() loss_avg.update(loss_running) t.set_postfix(loss_running ='{:05.3f}'.format(loss_avg())) t.update() # compute mean of all metrics in summary AccTop5_list_train = [x['accuracytop5'][0] for x in summ] AccTop1_list_train = [x['accuracytop1'][0] for x in summ] AccTop5_mean_train = torch.mean(torch.stack(AccTop5_list_train)).cpu().numpy() AccTop1_mean_train = torch.mean(torch.stack(AccTop1_list_train)).cpu().numpy() loss_mean_train = np.mean([x['loss'] for x in summ]) ls_all_mean_train = np.mean([x['ls_all'] for x in summ]) ls_ce_mean_train = np.mean([x['ls_CE'] for x in summ]) metrics_mean = {'accuracytop5':AccTop5_mean_train ,'accuracytop1':AccTop1_mean_train , 'loss':loss_mean_train, 'ls_all':ls_all_mean_train,'ls_CE':ls_ce_mean_train} print(metrics_mean) metrics_string = " ; ".join("{}: {:05.3f}".format(k, v) for k, v in metrics_mean.items()) logger.info("- Train metrics: " + metrics_string) return metrics_mean,confusion_meter
def train(dataloader, ectoca3_optimizer, ca1_optimizer, ectoca3_loss_fn, ca1_loss_fn, params, autosave=False, train_mode=True, display=False): # Set model to train or eval. if not train_mode: print("Setting to eval mode.") step1_ec.eval() step4_ectoca3.eval() step5_ca1.eval() # Load weights utils.load_checkpoint(model_path, step4_ectoca3, name="ectoca3_weights") utils.load_checkpoint(model_path, step5_ca1, name="ca1_weights") # Custom loader for ca3. ca3_weights_path = model_path / "ca3_weights.pth.tar" ca3_weights = torch.load(ca3_weights_path.as_posix()) step3_ca3.W = ca3_weights else: step1_ec.train() step4_ectoca3.train() step5_ca1.train() for epoch in range(params.num_epochs): for i, x in enumerate(dataloader): if params.cuda: x = x.cuda(non_blocking=True) #=============RUN EC=============# # entire dataloader is train set for eval. x = dataloader if display: pass utils.animate_weights(x, nrow=5) with torch.no_grad(): ec_maxpool_flat = step1_ec(x, k=4) if display: utils.animate_weights(step1_ec.encoder.weight.data, nrow=11) # exit() # for i, out in enumerate(ec_maxpool_flat): # ec_grid = torchvision.utils.make_grid(out, nrow=11) # utils.animate_weights(ec_grid, i, auto=True) #=====MONITORING=====# # ec_out_weight = step1_ec.encoder.weight.data ## DISPLAY # utils.animate_weights(ec_out_weight, auto=False) # for i, out in enumerate(ec_maxpool_flat): # print(out.shape) # ec_grid = torchvision.utils.make_grid(out, nrow=11) # utils.animate_weights(ec_grid, i) #=====END MONIT.=====# #=============END EC=============# #=============RUN DENTATE GYRUS=============# with torch.no_grad(): dg_sparse = step2_dg(ec_maxpool_flat, k=10) ## DISPLAY if display: utils.showme(dg_sparse, title="DG OUT") # exit() # Polarize output from (0, 1) to (-1, 1) for step3_ca3 dg_sparse_dressed = modules.all_dressed(dg_sparse) ## DISPLAY if display: utils.showme(dg_sparse_dressed, title="DG CLEAN") # exit() #=============END DENTATE GYRUS=============# #=============RUN CA3 TRAINING==============# if not train_mode: pass else: with torch.no_grad(): ca3_weights = step3_ca3.train(dg_sparse_dressed, "pinverse") if autosave: ca3_state = step3_ca3.W utils.save_checkpoint(ca3_state, model_path, name="ca3_weights", silent=False) print("CA3 weights updated.") ## DISPLAY if display: utils.showme(ca3_weights, title="Weights") # exit() #=============END CA3 TRAINING==============# #=============RUN EC->CA3===================# if not train_mode: trained_sparse = step4_ectoca3(ec_maxpool_flat) trained_sparse = modules.get_top_k(trained_sparse, k=10, topk_dim=1, scatter_dim=1) # torch.set_printoptions(profile="full") # print(f"dg_sparse: {dg_sparse[3]}") # print(f"trained: {trained_sparse[3]}") # print(f"trained: {trained_sparse[3].max()}") # torch.set_printoptions(profile="default") ## DISPLAY if display: utils.showme(trained_sparse.detach(), title="Trained Prediction") # exit() else: # Run training loss_avg = utils.RunningAverage() with tqdm (desc="Updating EC->CA3", total=params.ectoca3_iters) as t1: for i in range(params.ectoca3_iters): trained_sparse = step4_ectoca3(ec_maxpool_flat) ectoca3_loss = ectoca3_loss_fn(trained_sparse, dg_sparse) ectoca3_optimizer.zero_grad() ectoca3_loss.backward(retain_graph=True) # print(i, ectoca3_loss) # NOTE: Learning rate has large impact on quality of output ectoca3_optimizer.step() loss_avg.update(ectoca3_loss.item()) t1.set_postfix(loss="{:05.3f}".format(loss_avg())) t1.update() ## DISPLAY if display: utils.animate_weights(trained_sparse.detach(), auto=False) if autosave: ec_state = utils.get_save_state(epoch, step4_ectoca3, ectoca3_optimizer) utils.save_checkpoint(ec_state, model_path, name="ectoca3_weights", silent=False) # Polarize output from (0, 1) to (-1, 1) for step3_ca3 # ectoca3_out_dressed = modules.center_me_zero(trained_sparse) ectoca3_out_dressed = modules.all_dressed(trained_sparse) ## DISPLAY if display: utils.showme(ectoca3_out_dressed.detach(), title="Cleaned-Trained") # exit() #=============END EC->CA3=================# #=============RUN CA3 RECALL==============# ca3_out_recall = step3_ca3.update(ectoca3_out_dressed) # ca3_out_recall = step3_ca3.update(dg_sparse_dressed) ## DISPLAY if display: utils.showme(ca3_out_recall.detach(), title="Hopfield out") # exit() #=============END CA3 TRAINING==============# #=============RUN CA1 ======================# if not train_mode: ca1_reconstruction = step5_ca1(ca3_out_recall) utils.animate_weights(ca1_reconstruction.detach(), nrow=5, auto=False) exit() else: loss_avg.reset() with tqdm (desc="Updating CA1", total=params.ca1_iters) as t2: for i in range(params.ca1_iters): ca1_reconstruction = step5_ca1(ca3_out_recall) ca1_loss = ca1_loss_fn(ca1_reconstruction, x) ca1_optimizer.zero_grad() if i == (params.ca1_iters - 1): ca1_loss.backward(retain_graph=False) else: ca1_loss.backward(retain_graph=True) # print(i, ca1_loss) ca1_optimizer.step() loss_avg.update(ca1_loss.item()) t2.set_postfix(loss="{:05.3f}".format(loss_avg())) t2.update() ## DISPLAY if display: utils.animate_weights(ca1_reconstruction.detach(), nrow=5, auto=False) if autosave: ec_state = utils.get_save_state(epoch, step5_ca1, ectoca3_optimizer) utils.save_checkpoint(ec_state, model_path, name="ca1_weights", silent=False) print("Graph cleared.", end=" ") print("Weights successfully updated.\n") ## DISPLAY utils.animate_weights(ca1_reconstruction.detach(), nrow=5, auto=False) #=============END CA1 =============# # Optional exit to end after one batch exit()
def train_epoch(model_source, model_target, transfer, train_dl_all, opt1, opt2, opt3, loss_fn1, loss_fn2, params, lr_scheduler1, lr_scheduler2, lr_scheduler3): running_loss_depth_carla = utils.RunningAverage() running_loss_segmentation_carla = utils.RunningAverage() source_encoder = model_source.backbone source_decoder = model_source.classifier target_decoder = model_target.classifier for (batch_images_carla, batch_segmentation_carla, batch_depth_carla, batch_images_cs, batch_depth_cs) in tqdm(train_dl_all): input_shape = batch_images_carla.shape[-2:] hook = LayerActivationHook(model_source) # try: # batch_images_cs, batch_depth_cs = next(iter_cs_depth) # except StopIteration: # iter_cs_depth = iter(train_dl_depth_target) # batch_images_cs, batch_depth_cs = next(iter_cs_depth) loss_cs_depth = train_step(model_source, batch_images_cs.to(params.device), batch_depth_cs.to(params.device), opt1, loss_fn1) batch_images_carla = batch_images_carla.to(params.device) batch_segmentation_carla = batch_segmentation_carla.to(params.device) batch_depth_carla = batch_depth_carla.to(params.device) # depth_feature = source_encoder(batch_images_carla)['out'] # depth_feature_copy = depth_feature.detach() hook.features = None depth_prediction = model_source(batch_images_carla) depth_feature_copy = hook.features hook.remove_hook() depth_prediction = F.interpolate(depth_prediction['out'], size=input_shape, mode='bilinear', align_corners=False) loss_depth_carla = loss_fn1(depth_prediction, batch_depth_carla) if opt1 is not None: opt1.zero_grad() loss_depth_carla.backward() opt1.step() if lr_scheduler1 is not None: lr_scheduler1.step() loss_segmentation_carla = train_step(model_target, batch_images_carla, batch_segmentation_carla, opt2, loss_fn2) if lr_scheduler2 is not None: lr_scheduler2.step() adapted_feature = transfer(depth_feature_copy) adapted_carla_prediction = target_decoder(adapted_feature) adapted_carla_prediction = F.interpolate(adapted_carla_prediction, size=input_shape, mode='bilinear', align_corners=False) loss_adapted_carla = loss_fn2(adapted_carla_prediction, batch_segmentation_carla) if opt3 is not None: opt3.zero_grad() loss_adapted_carla.backward() opt3.step() if lr_scheduler3 is not None: lr_scheduler3.step() running_loss_depth_carla.update(loss_depth_carla.item()) running_loss_segmentation_carla.update(loss_segmentation_carla.item()) return running_loss_depth_carla(), running_loss_segmentation_carla()
def train(model, optimizer, loss_fn, dataloader, metrics, params,logger): """Train the model on `num_steps` batches Args: model: (torch.nn.Module) the neural network optimizer: (torch.optim) optimizer for parameters of model loss_fn: dataloader: metrics: (dict) params: (Params) hyperparameters """ # set model to training mode model.train() # summary for current training loop and a running average object for loss summ = [] loss_avg = utils.RunningAverage() confusion_meter = torchnet.meter.ConfusionMeter(params.model_args["num_class"], normalized=True) confusion_meter.reset() # Use tqdm for progress bar with tqdm(total=len(dataloader)) as t: for i, (data_batch, labels_batch) in enumerate(dataloader): # move to GPU if available if params.cuda: if params.data_parallel: data_batch, labels_batch = data_batch.cuda(non_blocking=True), labels_batch.cuda(non_blocking=True) else: data_batch, labels_batch = data_batch.cuda(params.gpu_id), labels_batch.cuda(params.gpu_id) # convert to torch Variables data_batch, labels_batch = Variable(data_batch), Variable(labels_batch) # compute model output and loss output_batch = model(data_batch,target=labels_batch) loss_bag = loss_fn(output_batch,labels_batch,current_epoch=params.current_epoch, params=params) loss = loss_bag['ls_all'] output_batch = output_batch confusion_meter.add(output_batch.data, labels_batch.data) # clear previous gradients, compute gradients of all variables wrt loss optimizer.zero_grad() loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. total_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), params.clip*params.batch_size_train) # print(total_norm,params.clip*params.batch_size) # performs updates using calculated gradients optimizer.step() # Evaluate summaries only once in a while # not every epoch count in train accuracy if i % params.save_summary_steps == 0: # extract data from torch Variable, move to cpu, convert to numpy arrays output_batch = output_batch.data labels_batch = labels_batch.data # compute all metrics on this batch summary_batch = {metric:metrics[metric](output_batch, labels_batch) for metric in metrics} summary_batch['loss'] = loss.data.item() for l,v in loss_bag.items(): summary_batch[l]=v.data.item() summ.append(summary_batch) # update the average loss # main for progress bar, not logger loss_running = loss.data.item() loss_avg.update(loss_running ) t.set_postfix(loss_running ='{:05.3f}'.format(loss_avg())) t.update() # compute mean of all metrics in summary metrics_mean = {metric:np.mean([x[metric] for x in summ]) for metric in summ[0]} metrics_string = " ; ".join("{}: {:05.3f}".format(k, v) for k, v in metrics_mean.items()) logger.info("- Train metrics: " + metrics_string) return metrics_mean,confusion_meter
def train(model, train_loader, metrics_save, loss_func, optimizer, save_summary_steps, experiment, inception): """ Train the model on `num_steps` batches model: (torch.nn.Module) the neural network train_loader: (DataLoader) a torch.utils.data.DataLoader object that fetches training data metrics_save: (dict) a dictionary of functions that compute a metric using the output and labels of each batch loss_func: the loss function optimizer: the optimizer inception: Whether the model is an inception model. """ # set model to training mode model.train() # summary for current training loop and a running average object for loss summ = [] loss_avg = utils.RunningAverage() # Use tqdm for progress bar with tqdm(total=len(train_loader)) as t: with experiment.train(): for i, train_batch in enumerate(train_loader): inputs, labels = train_batch labels = labels.type(torch.LongTensor) optimizer.zero_grad() parallelNet = torch.nn.DataParallel(model) if inception: outputs, aux = parallelNet( utils.tovar(inputs, requires_grad=False)) else: outputs = parallelNet( utils.tovar(inputs, requires_grad=False)) loss = loss_func(outputs, utils.tovar(labels)) loss.backward() optimizer.step() # Evaluate summaries only once in a while if i % save_summary_steps == 0: if inception: softmax = nn.Softmax() outputs = softmax(outputs) output_batch = outputs.data.cpu().numpy() labels_batch = labels.cpu().numpy() # fix outputs ypred = [] for j, x in enumerate(output_batch): ypred.append(x[labels_batch[j]]) logging.info(output_batch) # test pred = [np.argmax(data) for data in output_batch] logging.info(pred) #compute all metrics on this batch summary_batch = { "train_accuracy": metrics_save["accuracy"](output_batch, labels_batch), #AUC is on binary "train_AUC": metrics_save["AUC"](ypred, labels_batch), "train_mean_fpr": metrics_save["fpr"](ypred, labels_batch), "train_mean_tpr": metrics_save["tpr"](ypred, labels_batch), "train_loss": loss.data[0] } summ.append(summary_batch) experiment.log_metrics(summary_batch, step=i) # update the average loss loss_avg.update(loss.data[0]) t.set_postfix(loss='{:05.3f}'.format(loss_avg())) t.update() # compute mean of all metrics in summary metrics_mean = { metric: np.mean([x[metric] for x in summ]) for metric in summ[0] } metrics_string = " ; ".join("{}: {:05.3f}".format(k, v) for k, v in metrics_mean.items()) logging.info("- Train metrics: " + metrics_string)
def train(model, optimizer, loss_fn, dataloader, metrics, params): """Train the model on `num_steps` batches Args: model: (torch.nn.Module) the neural network optimizer: (torch.optim) optimizer for parameters of model loss_fn: a function that takes batch_output and batch_labels and computes the loss for the batch dataloader: (DataLoader) a torch.utils.data.DataLoader object that fetches training data metrics: (dict) a dictionary of functions that compute a metric using the output and labels of each batch params: (Params) hyperparameters num_steps: (int) number of batches to train on, each of size params.batch_size """ # set model to training mode model.train() # summary for current training loop and a running average object for loss summ = [] loss_avg = utils.RunningAverage() # Use tqdm for progress bar with tqdm(total=len(dataloader)) as t: for i, (train_batch, labels_batch) in enumerate(dataloader): # move to GPU if available if params.cuda: train_batch, labels_batch = train_batch.cuda( async=True), labels_batch.cuda(async=True) # convert to torch Variables train_batch, labels_batch = Variable(train_batch), Variable( labels_batch) # compute model output and loss output_batch = model(train_batch) loss = loss_fn(output_batch, labels_batch) # clear previous gradients, compute gradients of all variables wrt loss optimizer.zero_grad() loss.backward() # performs updates using calculated gradients optimizer.step() # Evaluate summaries only once in a while if i % params.save_summary_steps == 0: # extract data from torch Variable, move to cpu, convert to numpy arrays output_batch = output_batch.data.cpu().numpy() labels_batch = labels_batch.data.cpu().numpy() # compute all metrics on this batch summary_batch = { metric: metrics[metric](output_batch, labels_batch) for metric in metrics } summary_batch['loss'] = loss.data[0] summ.append(summary_batch) # update the average loss loss_avg.update(loss.data[0]) t.set_postfix(loss='{:05.3f}'.format(loss_avg())) t.update() # compute mean of all metrics in summary metrics_mean = { metric: np.mean([x[metric] for x in summ]) for metric in summ[0] } metrics_string = " ; ".join("{}: {:05.3f}".format(k, v) for k, v in metrics_mean.items()) logging.info("- Train metrics: " + metrics_string)
def train(model, optimizer, loss_fn, dataloader, metrics, params): """Train the model on `num_steps` batches Args: model: (torch.nn.Module) the neural network optimizer: (torch.optim) optimizer for parameters of model loss_fn: a function that takes batch_output and batch_labels and computes the loss for the batch dataloader: (DataLoader) a torch.utils.data.DataLoader object that fetches training data metrics: (dict) a dictionary of functions that compute a metric using the output and labels of each batch params: (Params) hyperparameters num_steps: (int) number of batches to train on, each of size params.batch_size """ # set model to training mode model.train() # summary for current training loop and a running average object for loss summary = [] loss_over_batch = [] loss_avg = utils.RunningAverage() for i, (highlight_batch, highlight_distance_batch, non_highlight_batch, non_highlight_distance_batch, text_feature_batch) in enumerate(dataloader): highlight_batch = highlight_batch.reshape(highlight_batch.shape[0], -1).float() highlight_distance_batch = highlight_batch.reshape( highlight_distance_batch.shape[0], -1).float() non_highlight_batch = non_highlight_batch.reshape( non_highlight_batch.shape[0], -1).float() non_highlight_distance_batch = non_highlight_batch.reshape( non_highlight_distance_batch.shape[0], -1).float() text_feature_batch = text_feature_batch.reshape( text_feature_batch.shape[0], -1).float() positive_batch = torch.cat( (highlight_batch, highlight_distance_batch, text_feature_batch), dim=1) negative_batch = torch.cat( (non_highlight_batch, non_highlight_distance_batch, text_feature_batch), dim=1) if params.cuda: positive_batch, negative_batch = positive_batch.cuda( async=True), negative_batch.cuda(async=True) device = torch.device("cuda") positive_batch, negative_batch = Variable(positive_batch), Variable( negative_batch) positive_batch_output = model(positive_batch) negative_batch_output = model(negative_batch) if params.cuda: loss = loss_fn( positive_batch_output, negative_batch_output, torch.ones(positive_batch.shape[0], 1, device=device)) else: loss = loss_fn(positive_batch_output, negative_batch_output, torch.ones(positive_batch.shape[0], 1)) # clear previous gradients, compute gradients of all variables wrt loss optimizer.zero_grad() loss.backward() # performs updates using calculated gradients optimizer.step() # Evaluate summaries only once in a while if i % params.save_summary_steps == 0: # compute all metrics on this batch summary_batch = { metric: metrics[metric](positive_batch_output, negative_batch_output) for metric in metrics } summary_batch['loss'] = loss.item() # logging.info("- Batch loss: {}".format(summary_batch['loss'])) summary.append(summary_batch) loss_over_batch.append(loss.item()) # update the average loss loss_avg.update(loss.item()) metrics_mean = { metric: np.mean([x[metric] for x in summary]) for metric in summary[0] } metrics_string = " ; ".join("{}: {:05.3f}".format(k, v) for k, v in metrics_mean.items()) logging.info("- Train metrics: " + metrics_string) return np.array(loss_over_batch)