def train_epoch(train_dloader, model, optimizer, cur_epoch, cfg): model.train() train_tqdm = tqdm(train_dloader, ncols=80) data_size = len(train_dloader) for cur_iter, (inputs, labels, _, extra_data) in enumerate(train_tqdm): # Transfer the data to the current GPU device. if cfg.NUM_GPUS: if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) labels = labels.cuda() for key, val in extra_data.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: extra_data[key] = val.cuda(non_blocking=True) # Update the learning rate. lr = optim.get_epoch_lr(cur_epoch + float(cur_iter) / data_size, cfg) optim.set_lr(optimizer, lr) if cfg.DETECTION.ENABLE: preds = model(inputs, extra_data["boxes"]) else: preds = model(inputs) # Explicitly declare reduction to mean. loss_fun = losses.get_loss_func(cfg.MODEL.LOSS_FUNC)(reduction="mean") # Compute the loss. loss = loss_fun(preds, labels) # Perform the backward pass. optimizer.zero_grad() loss.backward() # Update the parameters. optimizer.step() train_tqdm.set_description("Train_loss: %.4f" % loss.cpu().item())
def train_epoch(train_loader, model, optimizer, train_meter, cur_epoch, cfg, cnt): """ Perform the video training for one epoch. Args: train_loader (loader): video training loader. model (model): the video model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. train_meter (TrainMeter): training meters to log the training performance. cur_epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Enable train mode. model.train() if cfg.BN.FREEZE: model.freeze_fn('bn_statistics') train_meter.iter_tic() data_size = len(train_loader) #for cur_iter, (inputs, bboxs, masks, labels, _, meta) in enumerate(train_loader): for cur_iter, output_dict in enumerate(train_loader): if cfg.EPICKITCHENS.USE_BBOX: inputs = output_dict['inputs'] bboxs = output_dict['bboxs'] masks = output_dict['masks'] labels = output_dict['label'] # output_dict['index'] meta = output_dict['metadata'] else: inputs = output_dict['inputs'] labels = output_dict['label'] meta = output_dict['metadata'] # Transfer the data to the current GPU device. if isinstance(inputs, (list,)): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) if isinstance(labels, (dict,)): labels = {k: v.cuda() for k, v in labels.items()} else: labels = labels.cuda() # Update the learning rate. lr = optim.get_epoch_lr(cur_epoch + float(cur_iter) / data_size, cfg) optim.set_lr(optimizer, lr) if cfg.DETECTION.ENABLE: # Compute the predictions. preds = model(inputs, meta["boxes"]) else: # Perform the forward pass. if cfg.EPICKITCHENS.USE_BBOX: if isinstance(bboxs, (list,)): for i in range(len(bboxs)): bboxs[i] = bboxs[i].cuda(non_blocking=True) masks[i] = masks[i].cuda(non_blocking=True) else: bboxs = bboxs.cuda(non_blocking=True) masks = masks.cuda(non_blocking=True) preds = model(inputs, bboxes=bboxs, masks=masks) else: preds = model(inputs) if isinstance(labels, (dict,)): # Explicitly declare reduction to mean. loss_fun = losses.get_loss_func(cfg.MODEL.LOSS_FUNC)(reduction="mean") # Compute the loss. loss_verb = loss_fun(preds[0], labels['verb']) loss_noun = loss_fun(preds[1], labels['noun']) loss = 0.5 * (loss_verb + loss_noun) # check Nan Loss. misc.check_nan_losses(loss) else: # Explicitly declare reduction to mean. loss_fun = losses.get_loss_func(cfg.MODEL.LOSS_FUNC)(reduction="mean") # Compute the loss. loss = loss_fun(preds, labels) # check Nan Loss. misc.check_nan_losses(loss) # Perform the backward pass. optimizer.zero_grad() loss.backward() # Update the parameters. optimizer.step() if cfg.DETECTION.ENABLE: if cfg.NUM_GPUS > 1: loss = du.all_reduce([loss])[0] loss = loss.item() train_meter.iter_toc() # Update and log stats. train_meter.update_stats(None, None, None, loss, lr) else: if isinstance(labels, (dict,)): # Compute the verb accuracies. verb_top1_acc, verb_top5_acc = metrics.topk_accuracies(preds[0], labels['verb'], (1, 5)) # predicted_answer_softmax = torch.nn.Softmax(dim=1)(preds[0]) # predicted_answer_max = torch.max(predicted_answer_softmax.data, 1).indices # print(cnt, predicted_answer_max, labels['verb']) # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss_verb, verb_top1_acc, verb_top5_acc = du.all_reduce( [loss_verb, verb_top1_acc, verb_top5_acc] ) # Copy the stats from GPU to CPU (sync point). loss_verb, verb_top1_acc, verb_top5_acc = ( loss_verb.item(), verb_top1_acc.item(), verb_top5_acc.item(), ) # Compute the noun accuracies. noun_top1_acc, noun_top5_acc = metrics.topk_accuracies(preds[1], labels['noun'], (1, 5)) # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss_noun, noun_top1_acc, noun_top5_acc = du.all_reduce( [loss_noun, noun_top1_acc, noun_top5_acc] ) # Copy the stats from GPU to CPU (sync point). loss_noun, noun_top1_acc, noun_top5_acc = ( loss_noun.item(), noun_top1_acc.item(), noun_top5_acc.item(), ) # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss = du.all_reduce( [loss] ) if isinstance(loss, (list,)): loss = loss[0] # Copy the stats from GPU to CPU (sync point). loss = loss.item() train_meter.iter_toc() # Update and log stats. train_meter.update_stats( (verb_top1_acc, noun_top1_acc), (verb_top5_acc, noun_top5_acc), (loss_verb, loss_noun, loss), lr, inputs[0].size(0) * cfg.NUM_GPUS ) else: # Compute the errors. num_topks_correct = metrics.topks_correct(preds, labels, (1, 5)) top1_err, top5_err = [ (1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct ] # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss, top1_err, top5_err = du.all_reduce( [loss, top1_err, top5_err] ) # Copy the stats from GPU to CPU (sync point). loss, top1_err, top5_err = ( loss.item(), top1_err.item(), top5_err.item(), ) train_meter.iter_toc() # Update and log stats. train_meter.update_stats( top1_err, top5_err, loss, lr, inputs[0].size(0) * cfg.NUM_GPUS ) train_meter.log_iter_stats(cur_epoch, cur_iter, cnt) train_meter.iter_tic() cnt += 1 # Log epoch stats. train_meter.log_epoch_stats(cur_epoch) train_meter.reset() return cnt
def train_epoch(train_loader, model, optimizer, scaler, train_meter, cur_epoch, cfg, writer=None): """ Perform the video training for one epoch. Args: train_loader (loader): video training loader. model (model): the video model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. train_meter (TrainMeter): training meters to log the training performance. cur_epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter, optional): TensorboardWriter object to writer Tensorboard log. """ # Enable train mode. model.train() train_meter.iter_tic() data_size = len(train_loader) if cfg.MIXUP.ENABLE: mixup_fn = MixUp( mixup_alpha=cfg.MIXUP.ALPHA, cutmix_alpha=cfg.MIXUP.CUTMIX_ALPHA, mix_prob=cfg.MIXUP.PROB, switch_prob=cfg.MIXUP.SWITCH_PROB, label_smoothing=cfg.MIXUP.LABEL_SMOOTH_VALUE, num_classes=cfg.MODEL.NUM_CLASSES, ) for cur_iter, (inputs, labels, _, meta) in enumerate(train_loader): # Transfer the data to the current GPU device. if cfg.NUM_GPUS: if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) labels = labels.cuda() for key, val in meta.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) # Update the learning rate. lr = optim.get_epoch_lr(cur_epoch + float(cur_iter) / data_size, cfg) optim.set_lr(optimizer, lr) train_meter.data_toc() if cfg.MIXUP.ENABLE: samples, labels = mixup_fn(inputs[0], labels) inputs[0] = samples with torch.cuda.amp.autocast(enabled=cfg.TRAIN.MIXED_PRECISION): if cfg.DETECTION.ENABLE: preds = model(inputs, meta["boxes"]) else: preds = model(inputs) # Explicitly declare reduction to mean. loss_fun = losses.get_loss_func( cfg.MODEL.LOSS_FUNC)(reduction="mean") # Compute the loss. loss = loss_fun(preds, labels) # check Nan Loss. misc.check_nan_losses(loss) # Perform the backward pass. optimizer.zero_grad() scaler.scale(loss).backward() # Unscales the gradients of optimizer's assigned params in-place scaler.unscale_(optimizer) # Clip gradients if necessary if cfg.SOLVER.CLIP_GRAD_VAL: torch.nn.utils.clip_grad_value_(model.parameters(), cfg.SOLVER.CLIP_GRAD_VAL) elif cfg.SOLVER.CLIP_GRAD_L2NORM: torch.nn.utils.clip_grad_norm_(model.parameters(), cfg.SOLVER.CLIP_GRAD_L2NORM) # Update the parameters. scaler.step(optimizer) scaler.update() if cfg.MIXUP.ENABLE: _top_max_k_vals, top_max_k_inds = torch.topk(labels, 2, dim=1, largest=True, sorted=True) idx_top1 = torch.arange(labels.shape[0]), top_max_k_inds[:, 0] idx_top2 = torch.arange(labels.shape[0]), top_max_k_inds[:, 1] preds = preds.detach() preds[idx_top1] += preds[idx_top2] preds[idx_top2] = 0.0 labels = top_max_k_inds[:, 0] if cfg.DETECTION.ENABLE: if cfg.NUM_GPUS > 1: loss = du.all_reduce([loss])[0] loss = loss.item() # Update and log stats. train_meter.update_stats(None, None, None, loss, lr) # write to tensorboard format if available. if writer is not None: writer.add_scalars( { "Train/loss": loss, "Train/lr": lr }, global_step=data_size * cur_epoch + cur_iter, ) else: top1_err, top5_err = None, None if cfg.DATA.MULTI_LABEL: # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: [loss] = du.all_reduce([loss]) loss = loss.item() else: # Compute the errors. num_topks_correct = metrics.topks_correct( preds, labels, (1, 5)) top1_err, top5_err = [(1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct] # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss, top1_err, top5_err = du.all_reduce( [loss, top1_err, top5_err]) # Copy the stats from GPU to CPU (sync point). loss, top1_err, top5_err = ( loss.item(), top1_err.item(), top5_err.item(), ) # Update and log stats. train_meter.update_stats( top1_err, top5_err, loss, lr, inputs[0].size(0) * max( cfg.NUM_GPUS, 1 ), # If running on CPU (cfg.NUM_GPUS == 1), use 1 to represent 1 CPU. ) # write to tensorboard format if available. if writer is not None: writer.add_scalars( { "Train/loss": loss, "Train/lr": lr, "Train/Top1_err": top1_err, "Train/Top5_err": top5_err, }, global_step=data_size * cur_epoch + cur_iter, ) train_meter.iter_toc() # measure allreduce for this meter train_meter.log_iter_stats(cur_epoch, cur_iter) train_meter.iter_tic() # Log epoch stats. train_meter.log_epoch_stats(cur_epoch) train_meter.reset()
def train_epoch( train_loader, model, optimizer, train_meter, cur_epoch, cfg, writer=None ): """ Perform the video training for one epoch. Args: train_loader (loader): video training loader. model (model): the video model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. train_meter (TrainMeter): training meters to log the training performance. cur_epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter, optional): TensorboardWriter object to writer Tensorboard log. """ # Enable train mode. model.train() # Check if the correct params are set to requires_grad = True assert_requires_grad_correctness(model, du.is_master_proc(), cfg) train_meter.iter_tic() data_size = len(train_loader) np.set_printoptions(suppress=True) for cur_iter, (inputs, labels, _, meta) in enumerate(train_loader): # Transfer the data to the current GPU device. if isinstance(inputs, (list,)): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) labels = labels.cuda() for key, val in meta.items(): if isinstance(val, (list,)): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) if cfg.MODEL.HEAD_ACT == "softmax" and cfg.TRAIN.DATASET == "custom": # We have to change our labels to long tensor labels = labels.type(torch.LongTensor) labels = labels.cuda() # Update the learning rate. lr = optim.get_epoch_lr(cur_epoch + float(cur_iter) / data_size, cfg) optim.set_lr(optimizer, lr, cfg) if cfg.DETECTION.ENABLE: # Compute the predictions. preds = model(inputs, meta["boxes"], is_train=True) else: # Perform the forward pass. preds = model(inputs) # Explicitly declare reduction to mean. loss_fun = losses.get_loss_func(cfg.MODEL.LOSS_FUNC)(reduction="mean") # Compute the loss. loss = loss_fun(preds, labels) """ if cur_iter % 70 == 0: softmax = torch.nn.Softmax(dim=1) probabilities = softmax(preds) loss_prob = loss_fun(probabilities, labels) preds_numpy = probabilities.cpu().detach().numpy() preds_numpy = np.round(preds_numpy, 4) labels_numpy = labels.cpu().detach().numpy() print("--------------------------") for label, pred in zip (labels_numpy, preds_numpy): print(str(label) + "---->", end= "") print(pred[label]) """ # check Nan Loss. misc.check_nan_losses(loss) # Perform the backward pass. optimizer.zero_grad() loss.backward() # Update the parameters. optimizer.step() # Todo: adjust accordingly if cfg.DETECTION.ENABLE: #and not (cfg.MODEL.HEAD_ACT == "softmax"): if cfg.NUM_GPUS > 1: loss = du.all_reduce([loss])[0] loss = loss.item() train_meter.iter_toc() # Update and log stats. train_meter.update_stats(None, None, None, loss, lr) # write to tensorboard format if available. if writer is not None: writer.add_scalars( {"Train/loss": loss, "Train/lr": lr}, global_step=data_size * cur_epoch + cur_iter, ) else: top1_err, top5_err = None, None if cfg.DATA.MULTI_LABEL: # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: [loss] = du.all_reduce([loss]) loss = loss.item() else: # Compute the errors. num_topks_correct = metrics.topks_correct(preds, labels, (1, 5)) top1_err, top5_err = [ (1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct ] # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss, top1_err, top5_err = du.all_reduce( [loss, top1_err, top5_err] ) # Copy the stats from GPU to CPU (sync point). loss, top1_err, top5_err = ( loss.item(), top1_err.item(), top5_err.item(), ) train_meter.iter_toc() # Update and log stats. train_meter.update_stats( top1_err, top5_err, loss, lr, inputs[0].size(0) * cfg.NUM_GPUS ) # write to tensorboard format if available. if writer is not None: writer.add_scalars( { "Train/loss": loss, "Train/lr": lr, "Train/Top1_err": top1_err, "Train/Top5_err": top5_err, }, global_step=data_size * cur_epoch + cur_iter, ) train_meter.log_iter_stats(cur_epoch, cur_iter) train_meter.iter_tic() # Log epoch stats. train_meter.log_epoch_stats(cur_epoch) train_meter.reset()
def train_epoch(train_loader, model, optimizer, train_meter, cur_epoch, cfg): """ Perform the video training for one epoch. Args: train_loader (loader): video training loader. model (model): the video model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. train_meter (TrainMeter): training meters to log the training performance. cur_epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Enable train mode. model.train() train_meter.iter_tic() data_size = len(train_loader) regr_list = [] num_list = [] top_list = [] for cur_iter, (inputs, labels, _) in enumerate(train_loader): # Transfer the data to the current GPU device. if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) if isinstance(labels, (list, )): for i in range(len(labels)): labels[i] = labels[i].cuda(non_blocking=True) labels = torch.stack((labels)) else: labels = labels.cuda(non_blocking=True) if cfg.MODEL.LOSS_FUNC == 'mse': labels = labels.float() lr = optim.get_epoch_lr(cur_epoch + float(cur_iter) / data_size, cfg) optim.set_lr(optimizer, lr) # Perform the forward pass. preds = model(inputs) # Explicitly declare reduction to mean. loss_fun = losses.get_loss_func(cfg.MODEL.LOSS_FUNC)(reduction="mean") # Compute the loss. loss = loss_fun(preds, labels) # check Nan Loss. misc.check_nan_losses(loss) # Perform the backward pass. optimizer.zero_grad() loss.backward() # Update the parameters. optimizer.step() top1_err = None # Compute the errors. num_classes = cfg.MODEL.NUM_CLASSES if cfg.DATA.LABELS_TYPE == 'regression': ln = (labels.size(1) - 1) // 2 + 1 pr = preds[:, ln:].reshape(-1, 5) lb = labels[:, ln:].reshape(-1) num_topks_correct = metrics.topks_correct(pr, lb, (1, )) top1_err = (1.0 - num_topks_correct[0] / len(lb)) * 100.0 regr = ((preds[:, 0] - labels[:, 0])**2).mean() numbers = ((preds[:, 1:ln] - labels[:, 1:ln])**2).mean() if cfg.NUM_GPUS > 1: regr, numbers = du.all_reduce([regr, numbers]) regr_list.append(regr.item()) num_list.append(numbers.item()) elif cfg.DATA.LABELS_TYPE == 'length': regr = ((preds[:, 0] - labels[:, 0])**2).mean() numbers = ((preds[:, 1:] - labels[:, 1:])**2).mean() if cfg.NUM_GPUS > 1: regr, numbers = du.all_reduce([regr, numbers]) regr_list.append(regr.item()) num_list.append(numbers.item()) num_topks_correct = metrics.topks_correct(preds, labels, (1, )) top1_err = num_topks_correct[0] * 0.0 elif cfg.DATA.LABELS_TYPE == 'stend': top1_err = loss.clone() # sigm = torch.nn.Sigmoid() # start = sigm(preds[:, 0]).cpu().detach().numpy() # end = sigm(preds[:, 1]).cpu().detach().numpy() else: num_topks_correct = metrics.topks_correct(preds, labels, (1, )) preds_ix = preds.size(2) * preds.size( 0) if cfg.DATA.LABELS_TYPE == 'mask' else preds.size(1) top1_err = (1.0 - num_topks_correct[0] / preds_size) * 100.0 # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss, top1_err = du.all_reduce([loss, top1_err]) # Copy the stats from GPU to CPU (sync point). loss, top1_err = (loss.item(), top1_err.item()) top_list.append(top1_err) train_meter.iter_toc() # Update and log stats. train_meter.update_stats(top1_err, loss, lr, inputs[0].size(0) * cfg.NUM_GPUS) train_meter.log_iter_stats(cur_epoch, cur_iter) train_meter.iter_tic() if cfg.DATA.LABELS_TYPE == 'regression' or cfg.DATA.LABELS_TYPE == 'length': print('---------------------') print( f'LOSS VALUES!!: SIZE_LOSS:{np.mean(regr_list)} NUM_LOSS:{np.mean(num_list)} CLASS_LOSS:{np.mean(top_list)}' ) print('---------------------') # Log epoch stats. train_meter.log_epoch_stats(cur_epoch) train_meter.reset()
def train_epoch(train_loader, model, optimizer, train_meter, cur_epoch, cfg, writer=None): """ Perform the video training for one epoch. Args: train_loader (loader): video training loader. model (model): the video model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. train_meter (TrainMeter): training meters to log the training performance. cur_epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter, optional): TensorboardWriter object to writer Tensorboard log. """ # Enable train mode. model.train() train_meter.iter_tic() data_size = len(train_loader) for cur_iter, (inputs, labels, _, meta) in enumerate(train_loader): # Transfer the data to the current GPU device. if cfg.NUM_GPUS: if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) labels = labels.cuda() for key, val in meta.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) # Update the learning rate. lr = optim.get_epoch_lr(cur_epoch + float(cur_iter) / data_size, cfg) optim.set_lr(optimizer, lr) train_meter.data_toc() # _________________________ save model test __________________________________________ if cur_iter % 100 == 1: cu.save_checkpoint(cfg.OUTPUT_DIR, model, optimizer, cur_iter, cfg) # cur_epoch print("----------------------- save done ") # exit(0) # _________________________________________________________________________________________ if cfg.DETECTION.ENABLE: # inputs[4,3,8,224,224], preds[32,2048,7,7] # change {1,3,8,224,224] -> [8,3,224,224] ################################################################################## inputs0 = inputs[0].squeeze(0).permute(1, 0, 2, 3) inputs1 = inputs[1].squeeze(0).permute(1, 0, 2, 3) meta["boxes"] = meta["boxes"].unsqueeze(0).unsqueeze(0) inputs = [inputs0, inputs1] preds = model(inputs, meta["boxes"]) # ################################################################################################################################# # import os # weights = 'checkpoints/checkpoint_epoch_00007.pyth' # os.environ['CUDA_VISIBLE_DEVICES'] = '0' # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # chkpt = torch.load(weights, map_location=device) # try: # model_dict = model.module.state_dict() # except AttributeError: # model_dict = model.state_dict() # 读取原始状态及参数, ## 多GPU训练,导致训练存储的模型时key会加上model # # 将pretrained_dict里不属于model_dict的键剔除掉 # chkpt = {k: v for k, v in chkpt.items() if k in model_dict} # print("load pretrain model") # model_dict.update(chkpt) # model.load_state_dict(model_dict) # model.to(device) # # inputs = [inputs.to(device)] # model.eval() # input_tensor = (inputs, meta["boxes"].to(device)) # traced_script_module = torch.jit.trace(model, input_tensor) # traced_script_module.save("weights/sf_pytorch.pt") # print("************************* out put save **********************************") # exit(0) ############################################################################################## else: preds = model(inputs) # Explicitly declare reduction to mean. loss_fun = losses.get_loss_func(cfg.MODEL.LOSS_FUNC)(reduction="mean") # Compute the loss. loss = loss_fun(preds, labels) # check Nan Loss. misc.check_nan_losses(loss) # Perform the backward pass. optimizer.zero_grad() loss.backward() # Update the parameters. optimizer.step() if cfg.DETECTION.ENABLE: if cfg.NUM_GPUS > 1: loss = du.all_reduce([loss])[0] loss = loss.item() # Update and log stats. train_meter.update_stats(None, None, None, loss, lr) # write to tensorboard format if available. if writer is not None: writer.add_scalars( { "Train/loss": loss, "Train/lr": lr }, global_step=data_size * cur_epoch + cur_iter, ) else: top1_err, top5_err = None, None if cfg.DATA.MULTI_LABEL: # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: [loss] = du.all_reduce([loss]) loss = loss.item() else: # Compute the errors. num_topks_correct = metrics.topks_correct( preds, labels, (1, 5)) top1_err, top5_err = [(1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct] # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss, top1_err, top5_err = du.all_reduce( [loss, top1_err, top5_err]) # Copy the stats from GPU to CPU (sync point). loss, top1_err, top5_err = ( loss.item(), top1_err.item(), top5_err.item(), ) # Update and log stats. train_meter.update_stats( top1_err, top5_err, loss, lr, inputs[0].size(0) * max( cfg.NUM_GPUS, 1 ), # If running on CPU (cfg.NUM_GPUS == 1), use 1 to represent 1 CPU. ) # write to tensorboard format if available. if writer is not None: writer.add_scalars( { "Train/loss": loss, "Train/lr": lr, "Train/Top1_err": top1_err, "Train/Top5_err": top5_err, }, global_step=data_size * cur_epoch + cur_iter, ) train_meter.iter_toc() # measure allreduce for this meter train_meter.log_iter_stats(cur_epoch, cur_iter) train_meter.iter_tic() # Log epoch stats. train_meter.log_epoch_stats(cur_epoch) train_meter.reset()
def train_epoch( train_loader, model, optimizer, scaler, train_meter, cur_epoch, cfg, writer=None, ): """ Perform the video training for one epoch. Args: train_loader (loader): video training loader. model (model): the video model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. train_meter (TrainMeter): training meters to log the training performance. cur_epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter, optional): TensorboardWriter object to writer Tensorboard log. """ print(model) # Enable train mode. model.train() train_meter.iter_tic() data_size = len(train_loader) if cfg.MIXUP.ENABLE: mixup_fn = MixUp( mixup_alpha=cfg.MIXUP.ALPHA, cutmix_alpha=cfg.MIXUP.CUTMIX_ALPHA, mix_prob=cfg.MIXUP.PROB, switch_prob=cfg.MIXUP.SWITCH_PROB, label_smoothing=cfg.MIXUP.LABEL_SMOOTH_VALUE, num_classes=cfg.MODEL.NUM_CLASSES, ) # print(model.patch_embed.proj.weight.device) # if cfg.NUM_GPUS >= 2 and not cfg.MODEL.DDP: # blk_size = int(16/cfg.NUM_GPUS) # start = blk_size # for g in range(cfg.NUM_GPUS-1): # dev = f"cuda:{g+1}" # for i in range(start, start + blk_size): # model.blocks[i] = model.blocks[i].to(dev) # start += blk_size # model.norm = model.norm.to(dev) # model.head = model.head.to(dev) profiler.log_tic("loop_time") # extra_model = Mlp(400, 1000000, 400) # print(extra_model) # extra_model = extra_model.to("cuda:4") if cfg.MODEL.MODEL_NAME == "MViTHybridP1": cfg.MODEL.MODEL_NAME = "MViTHybridP2" original_ddp = cfg.MODEL.DDP cfg.MODEL.DDP = False model_p2 = build_model(cfg) model_p2 = model_p2.to("cuda:2") # cuda() # because the rest of the logic is about the P1 model cfg.MODEL.MODEL_NAME = "MViTHybridP1" cfg.MODEL.DDP = original_ddp for cur_iter, (inputs, labels, index, time, meta) in enumerate(train_loader): print(f"Iteration: {cur_iter}, {inputs.shape}") # print(inputs.shape) # batchsize = 18 # inputs = [ # torch.rand((batchsize, 3, 16, 224, 224)), # ] # labels = torch.zeros(batchsize) # meta = # Transfer the data to the current GPU device. if cfg.MODEL.MODEL_NAME in ["MViT", "MViTHybridP1"] and cfg.NUM_GPUS: print("in MViT model if statement") # if cfg.NUM_GPUS: if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) labels = labels.cuda() # for key, val in meta.items(): # if isinstance(val, (list,)): # for i in range(len(val)): # val[i] = val[i].cuda(non_blocking=True) # else: # meta[key] = val.cuda(non_blocking=True) # else: # inputs[0] = inputs[0].to("cuda:0") # inputs = inputs.to("cuda:0") # labels = labels.to("cuda:0") # print(inputs.shape) # Update the learning rate. lr = optim.get_epoch_lr(cur_epoch + float(cur_iter) / data_size, cfg) optim.set_lr(optimizer, lr) train_meter.data_toc() if cfg.MIXUP.ENABLE: samples, labels = mixup_fn(inputs[0], labels) inputs[0] = samples with torch.cuda.amp.autocast(enabled=cfg.TRAIN.MIXED_PRECISION): # if cfg.DETECTION.ENABLE: # preds = model(inputs, meta["boxes"]) # else: profiler.log_tic("model_time") if cfg.MODEL.MODEL_NAME == "MViTHybridP1": preds, thw = model(inputs) preds = preds.to("cuda:2") # import ipdb; ipdb.set_trace() preds = model_p2(preds, thw) else: preds = model(inputs) # preds = preds.to("cuda:4") # pred = extra_model(preds) profiler.log_toc("model_time", shape=inputs.shape) # Explicitly declare reduction to mean. # loss_fun = losses.get_loss_func(cfg.MODEL.LOSS_FUNC)(reduction="mean") # Compute the loss. # loss = loss_fun(preds, labels) loss = preds.norm() # loss = loss_fun(preds, labels) # check Nan Loss. misc.check_nan_losses(loss) # Perform the backward pass. optimizer.zero_grad() profiler.log_tic("backward_time") scaler.scale(loss).backward() # Unscales the gradients of optimizer's assigned params in-place scaler.unscale_(optimizer) # Clip gradients if necessary if cfg.SOLVER.CLIP_GRAD_VAL: torch.nn.utils.clip_grad_value_(model.parameters(), cfg.SOLVER.CLIP_GRAD_VAL) elif cfg.SOLVER.CLIP_GRAD_L2NORM: torch.nn.utils.clip_grad_norm_(model.parameters(), cfg.SOLVER.CLIP_GRAD_L2NORM) # Update the parameters. scaler.step(optimizer) scaler.update() profiler.log_toc("backward_time", shape=inputs.shape) if cfg.MIXUP.ENABLE: _top_max_k_vals, top_max_k_inds = torch.topk(labels, 2, dim=1, largest=True, sorted=True) idx_top1 = torch.arange(labels.shape[0]), top_max_k_inds[:, 0] idx_top2 = torch.arange(labels.shape[0]), top_max_k_inds[:, 1] preds = preds.detach() preds[idx_top1] += preds[idx_top2] preds[idx_top2] = 0.0 labels = top_max_k_inds[:, 0] if cfg.DETECTION.ENABLE: if cfg.NUM_GPUS > 1: loss = du.all_reduce([loss])[0] loss = loss.item() # Update and log stats. train_meter.update_stats(None, None, None, loss, lr) # write to tensorboard format if available. if writer is not None: writer.add_scalars( { "Train/loss": loss, "Train/lr": lr }, global_step=data_size * cur_epoch + cur_iter, ) # else: # top1_err, top5_err = None, None # if cfg.DATA.MULTI_LABEL: # # Gather all the predictions across all the devices. # if cfg.NUM_GPUS > 1: # [loss] = du.all_reduce([loss]) # loss = loss.item() # else: # Compute the errors. # num_topks_correct = metrics.topks_correct(preds, labels, (1, 5)) # top1_err, top5_err = [ # (1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct # ] # Gather all the predictions across all the devices. # if cfg.NUM_GPUS > 1: # loss, top1_err, top5_err = du.all_reduce([loss, top1_err, top5_err]) # # Copy the stats from GPU to CPU (sync point). # loss, top1_err, top5_err = ( # loss.item(), # top1_err.item(), # top5_err.item(), # ) # # Update and log stats. # train_meter.update_stats( # top1_err, # top5_err, # loss, # lr, # inputs[0].size(0) # * max( # cfg.NUM_GPUS, 1 # ), # If running on CPU (cfg.NUM_GPUS == 1), use 1 to represent 1 CPU. # ) # write to tensorboard format if available. # if writer is not None: # writer.add_scalars( # { # "Train/loss": loss, # "Train/lr": lr, # "Train/Top1_err": top1_err, # "Train/Top5_err": top5_err, # }, # global_step=data_size * cur_epoch + cur_iter, # ) train_meter.iter_toc() # measure allreduce for this meter train_meter.log_iter_stats(cur_epoch, cur_iter) train_meter.iter_tic() profiler.log_toc("loop_time", shape=inputs.shape) profiler.log_tic("loop_time") profiler.report(25) # Log epoch stats. train_meter.log_epoch_stats(cur_epoch) train_meter.reset()
def train_epoch(train_loader, model, optimizer, train_meter, cur_epoch, cfg): """ Perform the video training for one epoch. Args: train_loader (loader): video training loader. model (model): the video model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. train_meter (TrainMeter): training meters to log the training performance. cur_epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Enable train mode. model.train() train_meter.iter_tic() data_size = len(train_loader) for cur_iter, (inputs, labels, _, meta) in enumerate(train_loader): # Transfer the data to the current GPU device. if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) labels = labels.cuda() for key, val in meta.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) # Update the learning rate. lr = optim.get_epoch_lr(cur_epoch + float(cur_iter) / data_size, cfg) optim.set_lr(optimizer, lr) if cfg.DETECTION.ENABLE: # Compute the predictions. preds = model(inputs, meta["boxes"]) else: # Perform the forward pass. preds = model(inputs) # Explicitly declare reduction to mean. loss_fun = losses.get_loss_func(cfg.MODEL.LOSS_FUNC)(reduction="mean") # Compute the loss. loss = loss_fun(preds, labels) # check Nan Loss. misc.check_nan_losses(loss) # Perform the backward pass. optimizer.zero_grad() loss.backward() # Update the parameters. optimizer.step() if cfg.DETECTION.ENABLE: if cfg.NUM_GPUS > 1: loss = du.all_reduce([loss])[0] loss = loss.item() train_meter.iter_toc() # Update and log stats. train_meter.update_stats(None, None, None, loss, lr) else: # Compute the errors. num_topks_correct = metrics.topks_correct(preds, labels, (1, 5)) top1_err, top5_err = [(1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct] # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss, top1_err, top5_err = du.all_reduce( [loss, top1_err, top5_err]) # Copy the stats from GPU to CPU (sync point). loss, top1_err, top5_err = ( loss.item(), top1_err.item(), top5_err.item(), ) train_meter.iter_toc() # Update and log stats. train_meter.update_stats(top1_err, top5_err, loss, lr, inputs[0].size(0) * cfg.NUM_GPUS) train_meter.log_iter_stats(cur_epoch, cur_iter) train_meter.iter_tic() # Log epoch stats. train_meter.log_epoch_stats(cur_epoch) train_meter.reset()
def train_epoch(train_loader, model, optimizer, train_meter, cur_epoch, cfg, writer=None, wandb_log=False): """ Perform the audio training for one epoch. Args: train_loader (loader): audio training loader. model (model): the audio model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. train_meter (TrainMeter): training meters to log the training performance. cur_epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter, optional): TensorboardWriter object to writer Tensorboard log. """ # Enable train mode. model.train() if cfg.BN.FREEZE: model.module.freeze_fn( 'bn_statistics') if cfg.NUM_GPUS > 1 else model.freeze_fn( 'bn_statistics') train_meter.iter_tic() data_size = len(train_loader) for cur_iter, (inputs, labels, _, meta) in enumerate(train_loader): # Transfer the data to the current GPU device. if cfg.NUM_GPUS: if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) if isinstance(labels, (dict, )): labels = {k: v.cuda() for k, v in labels.items()} else: labels = labels.cuda() for key, val in meta.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) # Update the learning rate. lr = optim.get_epoch_lr(cur_epoch + float(cur_iter) / data_size, cfg) optim.set_lr(optimizer, lr) train_meter.data_toc() # preds = model(inputs) #this is how model.forward() is called preds = model(inputs)[ 0] #this is the original output, the output of the last layer linear_layer_output = model(inputs)[1] if isinstance(labels, (dict, )): # Explicitly declare reduction to mean. loss_fun = losses.get_loss_func( cfg.MODEL.LOSS_FUNC)(reduction="mean") # Compute the loss. loss_verb = loss_fun(preds[0], labels['verb']) loss_noun = loss_fun(preds[1], labels['noun']) loss = 0.5 * (loss_verb + loss_noun) # check Nan Loss. misc.check_nan_losses(loss) else: #I believe this is the VGG loss part, as the labels are not split into nouns and verbs # Explicitly declare reduction to mean. loss_fun = losses.get_loss_func( cfg.MODEL.LOSS_FUNC)(reduction="mean") # Embedding loss function. emb_loss_fun = losses.get_loss_func( cfg.MODEL.EMB_LOSS_FUNC)(reduction="mean") # Compute the loss for the main model. loss = loss_fun(preds, labels) # Compute the loss for the embeddings. emb_loss = emb_loss_fun(linear_layer_output, word_embedding) # Add the losses together- use embeddings to fine tune the model's objective loss = loss + emb_loss # check Nan Loss. misc.check_nan_losses(loss) # Perform the backward pass. optimizer.zero_grad() loss.backward() # Update the parameters. optimizer.step() if isinstance(labels, (dict, )): # Compute the verb accuracies. verb_top1_acc, verb_top5_acc = metrics.topk_accuracies( preds[0], labels['verb'], (1, 5)) # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss_verb, verb_top1_acc, verb_top5_acc = du.all_reduce( [loss_verb, verb_top1_acc, verb_top5_acc]) # Copy the stats from GPU to CPU (sync point). loss_verb, verb_top1_acc, verb_top5_acc = ( loss_verb.item(), verb_top1_acc.item(), verb_top5_acc.item(), ) # Compute the noun accuracies. noun_top1_acc, noun_top5_acc = metrics.topk_accuracies( preds[1], labels['noun'], (1, 5)) # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss_noun, noun_top1_acc, noun_top5_acc = du.all_reduce( [loss_noun, noun_top1_acc, noun_top5_acc]) # Copy the stats from GPU to CPU (sync point). loss_noun, noun_top1_acc, noun_top5_acc = ( loss_noun.item(), noun_top1_acc.item(), noun_top5_acc.item(), ) # Compute the action accuracies. action_top1_acc, action_top5_acc = metrics.multitask_topk_accuracies( (preds[0], preds[1]), (labels['verb'], labels['noun']), (1, 5)) # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss, action_top1_acc, action_top5_acc = du.all_reduce( [loss, action_top1_acc, action_top5_acc]) # Copy the stats from GPU to CPU (sync point). loss, action_top1_acc, action_top5_acc = ( loss.item(), action_top1_acc.item(), action_top5_acc.item(), ) # Update and log stats. train_meter.update_stats( (verb_top1_acc, noun_top1_acc, action_top1_acc), (verb_top5_acc, noun_top5_acc, action_top5_acc), (loss_verb, loss_noun, loss), lr, inputs[0].size(0) * max( cfg.NUM_GPUS, 1 ), # If running on CPU (cfg.NUM_GPUS == 1), use 1 to represent 1 CPU. ) # write to tensorboard format if available. if writer is not None and not wandb_log: writer.add_scalars( { "Train/loss": loss, "Train/lr": lr, "Train/Top1_acc": action_top1_acc, "Train/Top5_acc": action_top5_acc, "Train/verb/loss": loss_verb, "Train/noun/loss": loss_noun, "Train/verb/Top1_acc": verb_top1_acc, "Train/verb/Top5_acc": verb_top5_acc, "Train/noun/Top1_acc": noun_top1_acc, "Train/noun/Top5_acc": noun_top5_acc, }, global_step=data_size * cur_epoch + cur_iter, ) if wandb_log: wandb.log( { "Train/loss": loss, "Train/lr": lr, "Train/Top1_acc": action_top1_acc, "Train/Top5_acc": action_top5_acc, "Train/verb/loss": loss_verb, "Train/noun/loss": loss_noun, "Train/verb/Top1_acc": verb_top1_acc, "Train/verb/Top5_acc": verb_top5_acc, "Train/noun/Top1_acc": noun_top1_acc, "Train/noun/Top5_acc": noun_top5_acc, "train_step": data_size * cur_epoch + cur_iter, }, ) else: top1_err, top5_err = None, None if cfg.DATA.MULTI_LABEL: # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: [loss] = du.all_reduce([loss]) loss = loss.item() else: # Compute the errors. num_topks_correct = metrics.topks_correct( preds, labels, (1, 5)) top1_err, top5_err = [(1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct] # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss, top1_err, top5_err = du.all_reduce( [loss, top1_err, top5_err]) # Copy the stats from GPU to CPU (sync point). loss, top1_err, top5_err = ( loss.item(), top1_err.item(), top5_err.item(), ) # Update and log stats. train_meter.update_stats( top1_err, top5_err, loss, lr, inputs[0].size(0) * max( cfg.NUM_GPUS, 1 ), # If running on CPU (cfg.NUM_GPUS == 1), use 1 to represent 1 CPU. ) # write to tensorboard format if available. if writer is not None and not wandb_log: writer.add_scalars( { "Train/loss": loss, "Train/lr": lr, "Train/Top1_err": top1_err, "Train/Top5_err": top5_err, }, global_step=data_size * cur_epoch + cur_iter, ) if wandb_log: wandb.log( { "Train/loss": loss, "Train/lr": lr, "Train/Top1_err": top1_err, "Train/Top5_err": top5_err, "train_step": data_size * cur_epoch + cur_iter, }, ) train_meter.iter_toc() # measure allreduce for this meter train_meter.log_iter_stats(cur_epoch, cur_iter) train_meter.iter_tic() # Log epoch stats. train_meter.log_epoch_stats(cur_epoch) train_meter.reset()
def train_epoch(train_loader, model, optimizer, train_meter, cur_epoch, writer, nep, cfg): """ Perform the video training for one epoch. Args: train_loader (loader): video training loader. model (model): the video model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. train_meter (TrainMeter): training meters to log the training performance. cur_epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Enable train mode. model.train() train_meter.iter_tic() data_size = len(train_loader) global_iters = data_size * cur_epoch for cur_iter, (inputs, labels, _, meta) in enumerate(train_loader): # Transfer the data to the current GPU device. if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) if len(inputs[i].shape) > 5: labels = torch.repeat_interleave(labels, inputs[i].size(1), 0) for i in range(len(inputs)): if len(inputs[i].shape) > 5: inputs[i] = inputs[i].view((-1, ) + inputs[i].shape[2:]) labels = labels.cuda() for key, val in meta.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) # Update the learning rate. lr = optim.get_epoch_lr(cur_epoch + float(cur_iter) / data_size, global_iters, cfg) optim.set_lr(optimizer, lr) if cfg.DETECTION.ENABLE: # Compute the predictions. preds = model(inputs, meta["boxes"]) else: # Perform the forward pass. if 'masks' in meta: preds = model((inputs, meta['masks'])) else: preds = model(inputs) #################################################################################################################################### # check activations #################################################################################################################################### # if writer is not None and global_iters%cfg.SUMMARY_PERIOD==0: # bu_errors = preds['bu_errors']#.cpu()#.data.numpy().squeeze() # for layer in range(len(bu_errors)): # images = bu_errors[layer].transpose(1,2).transpose(0,1) # images = (images-images.min()) # images = images/images.max() # images = images.reshape((-1,) + images.shape[2:]) # # grid = tv.utils.make_grid(images, nrow=18, normalize=True) # # writer.add_image('activations/bu_error_l%d'%layer, grid, global_iters) # tv.utils.save_image(images, os.path.join(cfg.OUTPUT_DIR, 'preds_%d_bu_errors_l%d.jpg'%(global_iters,layer)), nrow=18, normalize=True) # mix_out = preds['mix_layer']#.cpu().data.numpy().squeeze() # for layer in range(len(mix_out)): # images = mix_out[layer].transpose(1,2).transpose(0,1) # images = images.reshape((-1,) + images.shape[2:]) # images = (images-images.min()) # images = images/images.max() # # grid = tv.utils.make_grid(images, nrow=18, normalize=True) # # writer.add_image('activations/mix_layer_l%d'%layer, grid, global_iters) # # tv.utils.save_image(images, os.path.join(cfg.OUTPUT_DIR, 'example_%d_mix_layer_l%d.jpg'%(i,layer)), nrow=18, normalize=True) # tv.utils.save_image(images, os.path.join(cfg.OUTPUT_DIR, 'preds_%d_mix_layer_l%d.jpg'%(global_iters,layer)), nrow=18, normalize=True) # inhibition = preds['H_inh']#.cpu()#.data.numpy().squeeze() # for layer in range(len(inhibition)): # images = inhibition[layer].transpose(1,2).transpose(0,1) # images = (images-images.min()) # images = images/images.max() # images = images.reshape((-1,) + images.shape[2:]) # # grid = tv.utils.make_grid(images, nrow=18, normalize=True) # # writer.add_image('activations/H_inh_l%d'%layer, grid, global_iters) # tv.utils.save_image(images, os.path.join(cfg.OUTPUT_DIR, 'preds_%d_H_inh_l%d.jpg'%(global_iters,layer)), nrow=18, normalize=True) # hidden = preds['hidden']#.cpu()#.data.numpy().squeeze() # for layer in range(len(hidden)): # images = hidden[layer].transpose(1,2).transpose(0,1) # images = (images-images.min()) # images = images/images.max() # images = images.reshape((-1,) + images.shape[2:]) # # grid = tv.utils.make_grid(images, nrow=18, normalize=True) # # writer.add_image('activations/hidden_l%d'%layer, grid, global_iters) # tv.utils.save_image(images, os.path.join(cfg.OUTPUT_DIR, 'preds_%d_hidden_l%d.jpg'%(global_iters,layer)), nrow=18, normalize=True) out_keys = preds.keys() total_loss = 0 if cfg.PREDICTIVE.ENABLE: errors = preds['pred_errors'] if 'frame_errors' in preds: frame_errors = preds['frame_errors'] if 'IoU' in preds: iou = preds['IoU'] if 'Acc' in preds: acc = preds['Acc'] pred_loss = errors.mean() total_loss += pred_loss # if 'frame_errors' in out_keys: # total_loss += frame_errors # copy_baseline = F.smooth_l1_loss(inputs[i][:,:,1:] - inputs[i][:,:,:-1], torch.zeros_like(inputs[i][:,:,1:])) # copy_baseline = F.l1_loss(inputs[i][:,:,1:] - inputs[i][:,:,:-1], torch.zeros_like(inputs[i][:,:,1:])) if cfg.PREDICTIVE.CPC: cpc_loss = preds['cpc_loss'] total_loss += cpc_loss if 'cbp_penalty' in preds: penalty = preds['cbp_penalty'] total_loss += penalty if cfg.SUPERVISED: preds = preds['logits'] if cfg.MODEL.LOSS_FUNC != '': # Explicitly declare reduction to mean. loss_fun = losses.get_loss_func( cfg.MODEL.LOSS_FUNC)(reduction="mean") # Compute the loss. loss = loss_fun(preds, labels) total_loss += loss # check Nan Loss. misc.check_nan_losses(total_loss) # Perform the backward pass. optimizer.zero_grad() total_loss.backward() #################################################################################################################################### # check gradients if writer is not None and global_iters % cfg.SUMMARY_PERIOD == 0: n_p = model.module.named_parameters() if hasattr( model, 'module') else model.named_parameters() fig = viz_helpers.plot_grad_flow_v2(n_p) writer.add_figure('grad_flow/grad_flow', fig, global_iters) #################################################################################################################################### # Update the parameters. optimizer.step() if cfg.DETECTION.ENABLE: if cfg.NUM_GPUS > 1: loss = du.all_reduce([loss])[0] loss = loss.item() train_meter.iter_toc() # Update and log stats. train_meter.update_stats(lr, inputs[0].size(0) * cfg.NUM_GPUS, loss=loss) else: if cfg.SUPERVISED: # Compute the errors. num_topks_correct = metrics.topks_correct( preds, labels, (1, 5)) top1_err, top5_err = [(1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct] # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: if cfg.PREDICTIVE.ENABLE: pred_loss = du.all_reduce([pred_loss]) pred_loss = pred_loss[0] if 'frame_errors' in out_keys: frame_errors = du.all_reduce([frame_errors])[0] if 'IoU' in preds: iou = du.all_reduce([iou])[0] if 'Acc' in preds: acc = du.all_reduce([acc])[0] # copy_baseline = du.all_reduce([copy_baseline]) # copy_baseline = copy_baseline[0] if cfg.PREDICTIVE.CPC: cpc_loss = du.all_reduce([cpc_loss]) cpc_loss = cpc_loss[0] if cfg.SUPERVISED: loss, top1_err, top5_err = du.all_reduce( [loss, top1_err, top5_err]) if 'cbp_penalty' in out_keys: penalty = du.all_reduce([penalty])[0] loss_logs = {} if cfg.PREDICTIVE.ENABLE: pred_loss = pred_loss.item() loss_logs['loss_pred'] = pred_loss if 'frame_errors' in out_keys: frame_errors = frame_errors.item() loss_logs['frame_errors'] = frame_errors if 'IoU' in preds: loss_logs['IoU'] = iou.item() if 'Acc' in preds: loss_logs['Acc'] = acc.item() # copy_baseline = copy_baseline.item() # loss_logs['copy_comp'] = copy_baseline if cfg.PREDICTIVE.CPC: cpc_loss = cpc_loss.item() loss_logs['loss_cpc'] = cpc_loss if cfg.SUPERVISED: # Copy the stats from GPU to CPU (sync point). loss, top1_err, top5_err = ( loss.item(), top1_err.item(), top5_err.item(), ) loss_logs['loss_class'] = loss loss_logs['top5_err'] = top5_err loss_logs['top1_err'] = top1_err if 'cbp_penalty' in out_keys: loss_logs['cbp_penalty'] = penalty.item() train_meter.iter_toc() # Update and log stats. train_meter.update_stats(lr, inputs[0].size(0) * cfg.NUM_GPUS, **loss_logs) if writer is not None and global_iters % cfg.LOG_PERIOD == 0: for k, v in loss_logs.items(): writer.add_scalar('loss/' + k.strip('loss_'), train_meter.stats[k].get_win_median(), global_iters) if nep is not None and global_iters % cfg.LOG_PERIOD == 0: for k, v in loss_logs.items(): nep.log_metric(k.strip('loss_'), train_meter.stats[k].get_win_median()) nep.log_metric('global_iters', global_iters) # writer.add_scalar('loss/top1_err', train_meter.mb_top1_err.get_win_median(), global_iters) # writer.add_scalar('loss/top5_err', train_meter.mb_top5_err.get_win_median(), global_iters) # writer.add_scalar('loss/loss', train_meter.loss.get_win_median(), global_iters) if global_iters % cfg.SUMMARY_PERIOD == 0 and du.get_rank( ) == 0 and du.is_master_proc(num_gpus=cfg.NUM_GPUS): with torch.no_grad(): # logger.info(inputs[i].shape) # sys.stdout.flush() inputs[0] = inputs[0][:min(3, len(inputs[0]))] if 'masks' in meta: frames = model( (inputs, meta['masks'][:min(3, len(inputs[0]))]), extra=['frames'])['frames'] else: frames = model(inputs, extra=['frames'])['frames'] n_rows = inputs[0].size(2) - 1 inputs = inputs[0].transpose(1, 2)[:, -n_rows:] frames = frames.transpose(1, 2)[:, -n_rows:] inputs = inputs * inputs.new( cfg.DATA.STD)[None, None, :, None, None] + inputs.new( cfg.DATA.MEAN)[None, None, :, None, None] frames = frames * frames.new( cfg.DATA.STD)[None, None, :, None, None] + frames.new( cfg.DATA.MEAN)[None, None, :, None, None] images = torch.cat([inputs, frames], 1).reshape((-1, ) + inputs.shape[2:]) # grid = tv.utils.make_grid(images, nrow=8, normalize=True) # writer.add_image('predictions', images, global_iters) tv.utils.save_image( images, os.path.join(cfg.OUTPUT_DIR, 'preds_%d.jpg' % global_iters), nrow=n_rows, normalize=True) # del images # del frames # del inputs train_meter.log_iter_stats(cur_epoch, cur_iter) train_meter.iter_tic() global_iters += 1 # Log epoch stats. train_meter.log_epoch_stats(cur_epoch) train_meter.reset()
def train_epoch(train_loader, model, optimizer, train_meter, cur_epoch, cfg, writer=None): """ Perform the video training for one epoch. Args: train_loader (loader): video training loader. model (model): the video model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. train_meter (TrainMeter): training meters to log the training performance. cur_epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter, optional): TensorboardWriter object to writer Tensorboard log. """ # Enable train mode. model.train() train_meter.iter_tic() data_size = len(train_loader) #Unsupervised learning => No labels for cur_iter, inputs in enumerate(train_loader): # Transfer the data to the current GPU device. if cfg.NUM_GPUS: if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) # Update the learning rate. lr = optim.get_epoch_lr(cur_epoch + float(cur_iter) / data_size, cfg) optim.set_lr(optimizer, lr) train_meter.data_toc() # if cfg.DETECTION.ENABLE: # preds = model(inputs, meta["boxes"]) # else: # preds = model(inputs) # Loss already computed in model # logger.info("Size: {}".format(inputs.Size())) output = model(inputs) loss = torch.mean(output['loss']) # check Nan Loss. misc.check_nan_losses(loss) # Perform the backward pass. optimizer.zero_grad() loss.backward() # Update the parameters. optimizer.step() if cfg.DETECTION.ENABLE: if cfg.NUM_GPUS > 1: loss = du.all_reduce([loss])[0] loss = loss.item() # Update and log stats. train_meter.update_stats(None, None, None, loss, lr) # write to tensorboard format if available. if writer is not None: writer.add_scalars( { "Train/loss": loss, "Train/lr": lr }, global_step=data_size * cur_epoch + cur_iter, ) else: if cfg.NUM_GPUS > 1: [loss] = du.all_reduce([loss]) loss = loss.item() # Update and log stats. train_meter.update_stats( 1, 1, loss, lr, inputs[0].size(0) * max( cfg.NUM_GPUS, 1 ), # If running on CPU (cfg.NUM_GPUS == 1), use 1 to represent 1 CPU. ) # write to tensorboard format if available. if writer is not None: writer.add_scalars( { "Train/loss": loss, "Train/lr": lr, }, global_step=data_size * cur_epoch + cur_iter, ) train_meter.iter_toc() # measure allreduce for this meter train_meter.log_iter_stats(cur_epoch, cur_iter) train_meter.iter_tic() # Log epoch stats. train_meter.log_epoch_stats(cur_epoch) train_meter.reset()
def train_epoch( train_loader, student_model, teacher_model, optimizer, train_meter, cur_epoch, cfg, writer=None ): """ Perform the video training for one epoch. Args: train_loader (loader): video training loader. model (model): the video model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. train_meter (TrainMeter): training meters to log the training performance. cur_epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter, optional): TensorboardWriter object to writer Tensorboard log. """ # Enable train mode. teacher_model.eval() student_model.train() train_meter.iter_tic() data_size = len(train_loader) for cur_iter, (inputs, labels, _, meta, _) in enumerate(train_loader): # Transfer the data to the current GPU device. if cfg.NUM_GPUS: if isinstance(inputs, (list,)): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) labels = labels.cuda() for key, val in meta.items(): if isinstance(val, (list,)): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) # Update the learning rate. lr = optim.get_epoch_lr(cur_epoch + float(cur_iter) / data_size, cfg) optim.set_lr(optimizer, lr) if cfg.DETECTION.ENABLE: # Compute the predictions. student_preds, student_features = student_model(inputs.copy(), meta["boxes"]) with torch.no_grad(): teacher_preds, teacher_features = teacher_model(inputs.copy(), meta["boxes"]) else: # Perform the forward pass. student_preds, student_features = student_model(inputs.copy()) with torch.no_grad(): teacher_preds, teacher_features = teacher_model(inputs.copy()) # Explicitly declare reduction to mean. # L2 loss for featuremap difference loss_mse_func = losses.get_loss_func('mse')(reduction="mean") # Cross entropy loss for prediction loss_pred_func = losses.get_loss_func('cross_entropy')(reduction="mean") # kl-divergence loss loss_kl_func = losses.get_loss_func('kl_divergence')(reduction="batchmean") T = cfg.KD.TEMPERATURE alpha = cfg.KD.ALPHA loss_pred = loss_pred_func(student_preds, labels) * (1. - alpha) loss_mse = [] loss_kl = [] for s_features, t_features in zip(student_features, teacher_features): for i in range(2): #mse loss loss_mse.append(loss_mse_func(s_features[i], t_features[i]) * (alpha * T * T)) #kl divergence loss b, c, t, h, w = s_features[i].shape s_feature = s_features[i].permute(0, 2, 3, 4, 1).contiguous().view(b*t*h*w, c) t_feature = t_features[i].permute(0, 2, 3, 4, 1).contiguous().view(b*t*h*w, c) loss_kl.append(loss_kl_func(F.log_softmax(s_feature/T, dim = 0), F.softmax(t_feature/T, dim = 0)) * (alpha * T * T)) #TOTAL LOSS = sum of all losses loss = loss_pred + sum(loss_mse) + sum(loss_kl) # check Nan Loss. misc.check_nan_losses(loss) # Perform the backward pass. optimizer.zero_grad() loss.backward() # Update the parameters. optimizer.step() if cfg.DETECTION.ENABLE: if cfg.NUM_GPUS > 1: loss = du.all_reduce([loss])[0] loss = loss.item() train_meter.iter_toc() # Update and log stats. train_meter.update_stats(None, None, None, loss, lr) # write to tensorboard format if available.T if writer is not None: writer.add_scalars( {"Train/loss": loss, "Train/lr": lr, "Train/mse": sum(loss_mse), "Train/loss_kl": sum(loss_kl), "Train/loss_pred": loss_pred}, global_step=data_size * cur_epoch + cur_iter, ) else: top1_err, top5_err = None, None if cfg.DATA.MULTI_LABEL: # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: [loss] = du.all_reduce([loss]) loss = loss.item() else: # Compute the errors. num_topks_correct = metrics.topks_correct(student_preds, labels, (1, 5)) top1_err, top5_err = [ (1.0 - x / student_preds.size(0)) * 100.0 for x in num_topks_correct ] # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss, top1_err, top5_err = du.all_reduce( [loss, top1_err, top5_err] ) # Copy the stats from GPU to CPU (sync point). loss, top1_err, top5_err = ( loss.item(), top1_err.item(), top5_err.item(), ) train_meter.iter_toc() # Update and log stats. train_meter.update_stats( top1_err, top5_err, loss, lr, inputs[0].size(0) * max( cfg.NUM_GPUS, 1 ), # If running on CPU (cfg.NUM_GPUS == 1), use 1 to represent 1 CPU. ) # write to tensorboard format if available. if writer is not None: writer.add_scalars( { "Train/loss": loss, "Train/lr": lr, "Train/Top1_err": top1_err, "Train/Top5_err": top5_err, "Train/mse": sum(loss_mse), "Train/loss_kl": sum(loss_kl), "Train/loss_pred": loss_pred, }, global_step=data_size * cur_epoch + cur_iter, ) train_meter.log_iter_stats(cur_epoch, cur_iter) train_meter.iter_tic() # Log epoch stats. train_meter.log_epoch_stats(cur_epoch) train_meter.reset()
def train_epoch( train_loader, model, optimizer, scaler, train_meter, cur_epoch, cfg, writer=None, ): """ Perform the video training for one epoch. Args: train_loader (loader): video training loader. model (model): the video model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. train_meter (TrainMeter): training meters to log the training performance. cur_epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter, optional): TensorboardWriter object to writer Tensorboard log. """ # Enable train mode. model.train() train_meter.iter_tic() data_size = len(train_loader) if cfg.MIXUP.ENABLE: mixup_fn = MixUp( mixup_alpha=cfg.MIXUP.ALPHA, cutmix_alpha=cfg.MIXUP.CUTMIX_ALPHA, mix_prob=cfg.MIXUP.PROB, switch_prob=cfg.MIXUP.SWITCH_PROB, label_smoothing=cfg.MIXUP.LABEL_SMOOTH_VALUE, num_classes=cfg.MODEL.NUM_CLASSES, ) iters_noupdate = 0 if cfg.MODEL.MODEL_NAME == "ContrastiveModel" and cfg.CONTRASTIVE.TYPE == "moco": assert cfg.CONTRASTIVE.QUEUE_LEN % (cfg.TRAIN.BATCH_SIZE * cfg.NUM_SHARDS) == 0 iters_noupdate = (cfg.CONTRASTIVE.QUEUE_LEN // cfg.TRAIN.BATCH_SIZE // cfg.NUM_SHARDS) if cfg.MODEL.FROZEN_BN: misc.frozen_bn_stats(model) # Explicitly declare reduction to mean. loss_fun = losses.get_loss_func(cfg.MODEL.LOSS_FUNC)(reduction="mean") # import ipdb; ipdb.set_trace() profiler.log_tic("loop_time") for cur_iter, (inputs, labels, index, time, meta) in enumerate(train_loader): if not isinstance(inputs, list): inputs = [inputs] # Transfer the data to the current GPU device. if cfg.NUM_GPUS: if isinstance(inputs, (list, )): for i in range(len(inputs)): if isinstance(inputs[i], (list, )): for j in range(len(inputs[i])): # inputs[i][j] = inputs[i][j].cuda(non_blocking=True) inputs[i][j] = inputs[i][j].to("cuda:0") else: # inputs[i] = inputs[i].cuda(non_blocking=True) inputs[i] = inputs[i].to("cuda:0") else: # inputs = inputs.cuda(non_blocking=True) inputs = inputs.to("cuda:0") # labels = labels.cuda() labels = labels.to("cuda:0") for key, val in meta.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) index = index.to("cuda:0") time = time.to("cuda:0") batch_size = (inputs[0][0].size(0) if isinstance(inputs[0], list) else inputs[0].size(0)) # Update the learning rate. epoch_exact = cur_epoch + float(cur_iter) / data_size lr = optim.get_epoch_lr(epoch_exact, cfg) optim.set_lr(optimizer, lr) train_meter.data_toc() if cfg.MIXUP.ENABLE: samples, labels = mixup_fn(inputs[0], labels) inputs[0] = samples with torch.cuda.amp.autocast(enabled=cfg.TRAIN.MIXED_PRECISION): # Explicitly declare reduction to mean. perform_backward = True optimizer.zero_grad() if cfg.MODEL.MODEL_NAME == "ContrastiveModel": ( model, preds, partial_loss, perform_backward, ) = contrastive_forward(model, cfg, inputs, index, time, epoch_exact, scaler) elif cfg.DETECTION.ENABLE: # Compute the predictions. preds = model(inputs, meta["boxes"]) else: profiler.log_tic("model_time") preds = model(inputs) profiler.log_toc("model_time", shape=inputs[0].shape) if cfg.TASK == "ssl" and cfg.MODEL.MODEL_NAME == "ContrastiveModel": labels = torch.zeros(preds.size(0), dtype=labels.dtype, device=labels.device) if cfg.MODEL.MODEL_NAME == "ContrastiveModel" and partial_loss: loss = partial_loss else: # Compute the loss. loss = loss_fun(preds, labels) # check Nan Loss. misc.check_nan_losses(loss) if perform_backward: # print("Running backward!") scaler.scale(loss).backward() # Unscales the gradients of optimizer's assigned params in-place scaler.unscale_(optimizer) # Clip gradients if necessary if cfg.SOLVER.CLIP_GRAD_VAL: torch.nn.utils.clip_grad_value_(model.parameters(), cfg.SOLVER.CLIP_GRAD_VAL) elif cfg.SOLVER.CLIP_GRAD_L2NORM: torch.nn.utils.clip_grad_norm_(model.parameters(), cfg.SOLVER.CLIP_GRAD_L2NORM) model = cancel_swav_gradients(model, cfg, epoch_exact) if cur_iter < iters_noupdate and cur_epoch == 0: # for e.g. MoCo logger.info("Not updating parameters {}/{}".format( cur_iter, iters_noupdate)) else: # print("Updating optimizer!") # Update the parameters. scaler.step(optimizer) scaler.update() if cfg.MIXUP.ENABLE: _top_max_k_vals, top_max_k_inds = torch.topk(labels, 2, dim=1, largest=True, sorted=True) idx_top1 = torch.arange(labels.shape[0]), top_max_k_inds[:, 0] idx_top2 = torch.arange(labels.shape[0]), top_max_k_inds[:, 1] preds = preds.detach() preds[idx_top1] += preds[idx_top2] preds[idx_top2] = 0.0 labels = top_max_k_inds[:, 0] if cfg.DETECTION.ENABLE: if cfg.NUM_GPUS > 1: loss = du.all_reduce([loss])[0] loss = loss.item() # Update and log stats. train_meter.update_stats(None, None, None, loss, lr) # write to tensorboard format if available. if writer is not None: writer.add_scalars( { "Train/loss": loss, "Train/lr": lr }, global_step=data_size * cur_epoch + cur_iter, ) else: top1_err, top5_err = None, None if cfg.DATA.MULTI_LABEL: # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: [loss] = du.all_reduce([loss]) loss = loss.item() else: # Compute the errors. num_topks_correct = metrics.topks_correct( preds, labels, (1, 5)) top1_err, top5_err = [(1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct] # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss, top1_err, top5_err = du.all_reduce( [loss.detach(), top1_err, top5_err]) # Copy the stats from GPU to CPU (sync point). loss, top1_err, top5_err = ( loss.item(), top1_err.item(), top5_err.item(), ) # Update and log stats. train_meter.update_stats( top1_err, top5_err, loss, lr, batch_size * max( cfg.NUM_GPUS, 1 ), # If running on CPU (cfg.NUM_GPUS == 1), use 1 to represent 1 CPU. ) # write to tensorboard format if available. if writer is not None: writer.add_scalars( { "Train/loss": loss, "Train/lr": lr, "Train/Top1_err": top1_err, "Train/Top5_err": top5_err, }, global_step=data_size * cur_epoch + cur_iter, ) torch.cuda.synchronize() train_meter.iter_toc() # do measure allreduce for this meter train_meter.log_iter_stats(cur_epoch, cur_iter) torch.cuda.synchronize() train_meter.iter_tic() profiler.log_toc("loop_time", shape=inputs[0].shape) profiler.log_tic("loop_time") profiler.report(25) # profiler.blahblah del inputs # Log epoch stats. train_meter.log_epoch_stats(cur_epoch) train_meter.reset()
def train_epoch( train_loader, model, optimizer, train_meter, cur_epoch, cfg, test_imp=False ): """ Perform the video training for one epoch. Args: train_loader (loader): video training loader. model (model): the video model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. train_meter (ClevrerTrainMeter): training meters to log the training performance. cur_epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ test_counter = 0 # Enable train mode. model.train() train_meter.iter_tic() data_size = len(train_loader) for cur_iter, sampled_batch in enumerate(train_loader): frames = sampled_batch['frames'] des_q = sampled_batch['question_dict']['question'] des_ans = sampled_batch['question_dict']['ans'] # des_len = sampled_batch['question_dict']['len'] # Transfer the data to the current GPU device. if cfg.NUM_GPUS: if isinstance(frames, (list,)): for i in range(len(frames)): frames[i] = frames[i].cuda(non_blocking=True) else: frames = frames.cuda(non_blocking=True) des_q = des_q.cuda(non_blocking=True) des_ans = des_ans.cuda() # des_len = des_len.cuda(non_blocking=True) # Update the learning rate. lr = optim.get_epoch_lr(cur_epoch + float(cur_iter) / data_size, cfg) optim.set_lr(optimizer, lr) train_meter.data_toc() #Separated batches #Des pred_des_ans = model(frames, des_q, True) des_loss_fun = losses.get_loss_func('cross_entropy')(reduction="mean") loss = des_loss_fun(pred_des_ans, des_ans) # check Nan Loss. misc.check_nan_losses(loss) #Backward pass optimizer.zero_grad() loss.backward() optimizer.step() #Save for stats loss_des_val = loss top1_err, top5_err = None, None # Compute the errors. num_topks_correct = metrics.topks_correct(pred_des_ans, des_ans, (1, 5)) top1_err, top5_err = [ (1.0 - x / pred_des_ans.size(0)) * 100.0 for x in num_topks_correct ] mc_opt_err, mc_q_err = None, None mb_size_mc = None loss_des_val, top1_err, top5_err = ( loss_des_val.item(), top1_err.item(), top5_err.item() ) #top1_err, top5_err, mc_opt_err, mc_q_err, loss_des, loss_mc, lr, mb_size # Update and log stats. train_meter.update_stats( top1_err, top5_err, mc_opt_err, mc_q_err, loss_des_val, None, lr, des_q.size()[0], mb_size_mc ) train_meter.iter_toc() # measure allreduce for this meter train_meter.log_iter_stats(cur_epoch, cur_iter) train_meter.iter_tic() #For testing implementation if test_imp: print(" --- Descriptive questions results --- ") # print("Des_q") # print(des_q) print("Des_ans") print(des_ans) #print("Des_ans_pred") #print(pred_des_ans) print("Argmax => prediction") print(torch.argmax(pred_des_ans, dim=1, keepdim=False)) print("Top1_err and Top5err") print(top1_err, top5_err) print("Loss_des_val = {}".format(loss_des_val)) test_counter += 1 if test_counter == 1: break # Log epoch stats. train_meter.log_epoch_stats(cur_epoch) train_meter.reset()
def train_epoch(self, train_loader, model, optimizer, train_meter, cur_epoch, cfg, writer=None): """ Perform the video training for one epoch. Args: train_loader (loader): video training loader. model (model): the video model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. train_meter (TrainMeter): training meters to log the training performance. cur_epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter, optional): TensorboardWriter object to writer Tensorboard log. """ # Enable train mode. model.train() train_meter.iter_tic() data_size = len(train_loader) start = time.time() btch = cfg.TRAIN.BATCH_SIZE * self.cfg.NUM_SHARDS rankE = os.environ.get("RANK", None) worldE = os.environ.get("WORLD_SIZE", None) dSize = data_size * btch self.logger.info( "Train Epoch {} dLen {} Batch {} dSize {} localRank {} rank {} {} world {} {}" .format(cur_epoch, data_size, btch, dSize, du.get_local_rank(), du.get_rank(), rankE, du.get_world_size(), worldE)) tot = 0 first = True predsAll = [] labelsAll = [] for cur_iter, (inputs, labels, _, meta) in enumerate(train_loader): # Transfer the data to the current GPU device. tot += len(labels) if isinstance(inputs, (list, )): if first: self.logger.info( "rank {} LEN {} {} shape Slow {} Fast {} {} tot {}". format(du.get_rank(), len(labels), len(inputs), inputs[0].shape, inputs[1].shape, labels[0].shape, tot)) first = False for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: if first: self.logger.info( "rank {} LEN {} shape {} {} tot {}".format( du.get_rank(), len(labels), inputs.shape, labels[0].shape, tot)) first = False inputs = inputs.cuda(non_blocking=True) labels = labels.cuda() for key, val in meta.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) # Update the learning rate. lr = optim.get_epoch_lr(cur_epoch + float(cur_iter) / data_size, cfg) optim.set_lr(optimizer, lr) if cfg.DETECTION.ENABLE: # Compute the predictions. preds = model(inputs, meta["boxes"]) else: # Perform the forward pass. preds = model(inputs) # Explicitly declare reduction to mean. loss_fun = losses.get_loss_func( cfg.MODEL.LOSS_FUNC)(reduction="mean") # Compute the loss. loss = loss_fun(preds, labels) # check Nan Loss. misc.check_nan_losses(loss) # Perform the backward pass. optimizer.zero_grad() loss.backward() # Update the parameters. optimizer.step() if cfg.DETECTION.ENABLE: if cfg.NUM_GPUS > 1: loss = du.all_reduce([loss])[0] loss = loss.item() train_meter.iter_toc() # Update and log stats. train_meter.update_stats(None, None, None, loss, lr) # write to tensorboard format if available. if writer is not None: writer.add_scalars( { "Train/loss": loss, "Train/lr": lr }, global_step=data_size * cur_epoch + cur_iter, ) ite = data_size * cur_epoch + cur_iter if du.is_master_proc(): self.logger.log_row(name='TrainLoss', iter=ite, loss=loss, description="train loss") self.logger.log_row(name='TrainLr', iter=ite, lr=lr, description="train learn rate") else: top1_err, top5_err = None, None if cfg.DATA.MULTI_LABEL: # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: [loss] = du.all_reduce([loss]) loss = loss.item() else: # Binary classifier - save preds / labels for metrics if cfg.MODEL.NUM_CLASSES == 2: predsAll.extend(preds.detach().cpu().numpy()[:, -1]) labelsAll.extend(labels.detach().cpu().numpy()) # Compute the errors. num_topks_correct = metrics.topks_correct( preds, labels, (1, min(5, cfg.MODEL.NUM_CLASSES))) top1_err, top5_err = [(1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct] # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss, top1_err, top5_err = du.all_reduce( [loss, top1_err, top5_err]) # Copy the stats from GPU to CPU (sync point). loss, top1_err, top5_err = ( loss.item(), top1_err.item(), top5_err.item(), ) train_meter.iter_toc() # Update and log stats. # self.logger.info("UPDATING stat {} {} {}".format(inputs[0].size(0), cfg.NUM_GPUS, inputs[0].size(0) * cfg.NUM_GPUS)) train_meter.update_stats(top1_err, top5_err, loss, lr, inputs[0].size(0) * cfg.NUM_GPUS) # write to tensorboard format if available. if writer is not None: writer.add_scalars( { "Train/loss": loss, "Train/lr": lr, "Train/Top1_err": top1_err, "Train/Top5_err": top5_err, }, global_step=data_size * cur_epoch + cur_iter, ) stats = train_meter.log_iter_stats(cur_epoch, cur_iter, predsAll, labelsAll) ite = dSize * cur_epoch + btch * (cur_iter + 1) self.plotStats(stats, ite, 'TrainIter') train_meter.iter_tic() if du.is_master_proc() and cfg.LOG_MODEL_INFO: misc.log_model_info(model, cfg, use_train_input=True) # Log epoch stats. gathered = du.all_gather([ torch.tensor(predsAll).to(torch.device("cuda")), torch.tensor(labelsAll).to(torch.device("cuda")) ]) stats = train_meter.log_epoch_stats(cur_epoch, gathered[0].detach().cpu().numpy(), gathered[1].detach().cpu().numpy()) ite = (cur_epoch + 1) * dSize self.plotStats(stats, ite, 'TrainEpoch') train_meter.reset() end = time.time() el = end - start totAll = du.all_reduce([torch.tensor(tot).cuda()], average=False) tSum = totAll[0].item() elT = torch.tensor(el).cuda() elMax = du.all_reduce([elT], op=dist.ReduceOp.MAX, average=False)[0].item() jobRate = tSum / elMax self.logger.info( "totSampCnt {} workerSampCnt {} eTimeMax {} eTimeWorker {} SampPerSecJob {:.1f} SampPerSecWorker {:.1f}" .format(tSum, tot, elMax, el, jobRate, tot / el)) return jobRate
def train_epoch(train_loader, model, optimizer, train_meter, cur_epoch, writer, nep, cfg): """ Perform the video training for one epoch. Args: train_loader (loader): video training loader. model (model): the video model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. train_meter (TrainMeter): training meters to log the training performance. cur_epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ # Enable train mode. model.train() train_meter.iter_tic() data_size = len(train_loader) global_iters = data_size*cur_epoch for cur_iter, (inputs, labels, _, meta) in enumerate(train_loader): # Transfer the data to the current GPU device. inputs = inputs.cuda(non_blocking=True) # labels = torch.repeat_interleave(labels,inputs[i].size(1),0) # for i in range(len(inputs)): # if len(inputs[i].shape) > 5: # inputs[i] = inputs[i].view((-1,)+inputs[i].shape[2:]) # labels = labels.cuda() # for key, val in meta.items(): # if isinstance(val, (list,)): # for i in range(len(val)): # val[i] = val[i].cuda(non_blocking=True) # else: # meta[key] = val.cuda(non_blocking=True) # Update the learning rate. lr = optim.get_epoch_lr(cur_epoch + float(cur_iter) / data_size, global_iters, cfg) optim.set_lr(optimizer, lr) preds = model(inputs) out_keys = list(preds.keys()) total_loss = preds['total_loss'] # check Nan Loss. misc.check_nan_losses(total_loss) # Perform the backward pass. optimizer.zero_grad() total_loss.backward() #################################################################################################################################### # check gradients # if writer is not None and global_iters%cfg.SUMMARY_PERIOD==0: # n_p = model.module.named_parameters() if hasattr(model,'module') else model.named_parameters() # fig = viz_helpers.plot_grad_flow_v2(n_p) # writer.add_figure('grad_flow/grad_flow', fig, global_iters) #################################################################################################################################### # Update the parameters. optimizer.step() losses = [preds[k] for k in out_keys] # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: losses = du.all_reduce(losses) losses = [l.item() for l in losses] loss_logs = {} for i in range(len(losses)): loss_logs[out_keys[i]] = losses[i] train_meter.iter_toc() # Update and log stats. train_meter.update_stats( lr, inputs[0].size(0) * cfg.NUM_GPUS, **loss_logs ) if writer is not None and global_iters%cfg.LOG_PERIOD==0: logger.info(model.conv0[2].weight) logger.info(model.conv0[2].bias) for k,v in loss_logs.items(): writer.add_scalar('loss/'+k.strip('loss_'), train_meter.stats[k].get_win_median(), global_iters) if nep is not None and global_iters%cfg.LOG_PERIOD==0: for k,v in loss_logs.items(): nep.log_metric(k.strip('loss_'), train_meter.stats[k].get_win_median()) nep.log_metric('global_iters', global_iters) if global_iters%cfg.SUMMARY_PERIOD==0 and du.get_rank()==0 and du.is_master_proc(num_gpus=cfg.NUM_GPUS): with torch.no_grad(): # logger.info(inputs[i].shape) # sys.stdout.flush() inputs = inputs[:min(3,len(inputs))] if 'masks' in meta: frames = model((inputs, meta['masks'][:min(3,len(inputs))]), extra=['frames'])['frames'] else: frames = model(inputs, extra=['frames'])['frames'] n_rows = inputs.size(2)-1 inputs = inputs.transpose(1,2)[:, -n_rows:] frames = frames.transpose(1,2)[:, -n_rows:] frames = torch.cat([(frames!=inputs)*frames, (frames==inputs)*inputs, torch.zeros_like(frames)], 2) inputs = torch.cat([inputs]*3, 2) # inputs = inputs*inputs.new(cfg.DATA.STD)[None,None,:,None,None]+inputs.new(cfg.DATA.MEAN)[None,None,:,None,None] # frames = frames*frames.new(cfg.DATA.STD)[None,None,:,None,None]+frames.new(cfg.DATA.MEAN)[None,None,:,None,None] images = torch.cat([inputs, frames], 1).reshape((-1,) + inputs.shape[2:]) # grid = tv.utils.make_grid(images, nrow=8, normalize=True) # writer.add_image('predictions', images, global_iters) tv.utils.save_image(images, os.path.join(cfg.OUTPUT_DIR, 'preds_%d.jpg'%global_iters), nrow=n_rows, normalize=True) # del images # del frames # del inputs train_meter.log_iter_stats(cur_epoch, cur_iter) train_meter.iter_tic() global_iters+=1 # Log epoch stats. train_meter.log_epoch_stats(cur_epoch) train_meter.reset()
def train_epoch(train_loader, model, optimizer, train_meter, cur_epoch, cfg, writer=None): """ Perform the video training for one epoch. Args: train_loader (loader): video training loader. model (model): the video model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. train_meter (ClevrerTrainMeter): training meters to log the training performance. cur_epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter, optional): TensorboardWriter object to writer Tensorboard log. """ # Enable train mode. model.train() train_meter.iter_tic() data_size = len(train_loader) for cur_iter, sampled_batch in enumerate(train_loader): frames = sampled_batch['frames'] des_q = sampled_batch['question_dict']['des_q'] des_ans = sampled_batch['question_dict']['des_ans'] mc_q = sampled_batch['question_dict']['mc_q'] mc_ans = sampled_batch['question_dict']['mc_ans'] # Transfer the data to the current GPU device. if cfg.NUM_GPUS: frames = frames.cuda(non_blocking=True) des_q = des_q.cuda(non_blocking=True) des_ans = des_ans.cuda() mc_q = mc_q.cuda(non_blocking=True) mc_ans = mc_ans.cuda() # Update the learning rate. lr = optim.get_epoch_lr(cur_epoch + float(cur_iter) / data_size, cfg) optim.set_lr(optimizer, lr) train_meter.data_toc() pred_des_ans = model(frames, des_q, True) pred_mc_ans = model(frames, mc_q, False) # Explicitly declare reduction to mean. des_loss_fun = losses.get_loss_func('cross_entropy')(reduction="mean") mc_loss_fun = losses.get_loss_func('bce_logit')(reduction="mean") # Compute the loss. loss = des_loss_fun(pred_des_ans, des_ans) loss += mc_loss_fun(pred_mc_ans, mc_ans) # check Nan Loss. misc.check_nan_losses(loss) #Check if plateau # Perform the backward pass. optimizer.zero_grad() loss.backward() # Update the parameters. optimizer.step() top1_err, top5_err = None, None # Compute the errors. num_topks_correct = metrics.topks_correct(pred_des_ans, des_ans, (1, 5)) top1_err, top5_err = [(1.0 - x / pred_des_ans.size(0)) * 100.0 for x in num_topks_correct] diff_mc_ans = torch.abs( mc_ans - (torch.sigmoid(pred_mc_ans) >= 0.5).float()) #Errors mc_opt_err = 100 * torch.true_divide(diff_mc_ans.sum(), (4 * des_q.size()[0])) mc_q_err = 100 * torch.true_divide( (diff_mc_ans.sum(dim=1, keepdim=True) != 0).float().sum(), des_q.size()[0]) # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss, top1_err, top5_err, mc_opt_err, mc_q_err = du.all_reduce( [loss, top1_err, top5_err, mc_opt_err, mc_q_err]) # Copy the stats from GPU to CPU (sync point). loss, top1_err, top5_err, mc_opt_err, mc_q_err = (loss.item(), top1_err.item(), top5_err.item(), mc_opt_err.item(), mc_q_err.item()) # Update and log stats. train_meter.update_stats( top1_err, top5_err, mc_opt_err, mc_q_err, loss, lr, frames.size()[0] * max( cfg.NUM_GPUS, 1 ), # If running on CPU (cfg.NUM_GPUS == 1), use 1 to represent 1 CPU. ) # write to tensorboard format if available. if writer is not None: writer.add_scalars( { "Train/loss": loss, "Train/lr": lr, "Train/Top1_err": top1_err, "Train/Top5_err": top5_err, "Train/mc_opt_err": mc_opt_err, "Train/mc_q_err": mc_q_err, }, global_step=data_size * cur_epoch + cur_iter, ) train_meter.iter_toc() # measure allreduce for this meter train_meter.log_iter_stats(cur_epoch, cur_iter) train_meter.iter_tic() # Log epoch stats. train_meter.log_epoch_stats(cur_epoch) train_meter.reset()
def train_epoch(train_loader, model, optimizer, train_meter, cur_epoch, cfg, writer=None): """ Perform the video training for one epoch. Args: train_loader (loader): video training loader. model (model): the video model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. train_meter (TrainMeter): training meters to log the training performance. cur_epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py writer (TensorboardWriter, optional): TensorboardWriter object to writer Tensorboard log. """ # Enable train mode. model.train() train_meter.iter_tic() data_size = len(train_loader) for cur_iter, (inputs, labels, _, meta, boxes, b_indices) in enumerate(train_loader): # Transfer the data to the current GPU device. if cfg.NUM_GPUS: if isinstance(inputs, (list, )): for i in range(len(inputs)): inputs[i] = inputs[i].cuda(non_blocking=True) else: inputs = inputs.cuda(non_blocking=True) labels = labels.cuda() for key, val in meta.items(): if isinstance(val, (list, )): for i in range(len(val)): val[i] = val[i].cuda(non_blocking=True) else: meta[key] = val.cuda(non_blocking=True) # Update the learning rate. lr = optim.get_epoch_lr(cur_epoch + float(cur_iter) / data_size, cfg) optim.set_lr(optimizer, lr) train_meter.data_toc() if cfg.DETECTION.ENABLE: preds = model(inputs, meta["boxes"]) else: preds = model(inputs) # Explicitly declare reduction to mean. loss_fun = losses.get_loss_func(cfg.MODEL.LOSS_FUNC)(reduction="mean") # Compute the loss. loss = loss_fun(preds, labels) # check Nan Loss. misc.check_nan_losses(loss) # Perform the backward pass. optimizer.zero_grad() loss.backward() # Update the parameters. optimizer.step() if cfg.DETECTION.ENABLE: if cfg.NUM_GPUS > 1: loss = du.all_reduce([loss])[0] loss = loss.item() # Update and log stats. train_meter.update_stats(None, None, None, loss, lr) # write to tensorboard format if available. if writer is not None: writer.add_scalars( { "Train/loss": loss, "Train/lr": lr }, global_step=data_size * cur_epoch + cur_iter, ) else: top1_err, top5_err = None, None if cfg.DATA.MULTI_LABEL: # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: [loss] = du.all_reduce([loss]) loss = loss.item() else: # Compute the errors. num_topks_correct = metrics.topks_correct( preds, labels, (1, 5)) top1_err, top5_err = [(1.0 - x / preds.size(0)) * 100.0 for x in num_topks_correct] # Gather all the predictions across all the devices. if cfg.NUM_GPUS > 1: loss, top1_err, top5_err = du.all_reduce( [loss, top1_err, top5_err]) # Copy the stats from GPU to CPU (sync point). loss, top1_err, top5_err = ( loss.item(), top1_err.item(), top5_err.item(), ) # Update and log stats. train_meter.update_stats( top1_err, top5_err, loss, lr, inputs[0].size(0) * max( cfg.NUM_GPUS, 1 ), # If running on CPU (cfg.NUM_GPUS == 1), use 1 to represent 1 CPU. ) # write to tensorboard format if available. if writer is not None: writer.add_scalars( { "Train/loss": loss, "Train/lr": lr, "Train/Top1_err": top1_err, "Train/Top5_err": top5_err, }, global_step=data_size * cur_epoch + cur_iter, ) train_meter.iter_toc() # measure allreduce for this meter train_meter.log_iter_stats(cur_epoch, cur_iter) train_meter.iter_tic() # Log epoch stats. train_meter.log_epoch_stats(cur_epoch) train_meter.reset()
def train_epoch(train_loader, model, optimizer, train_meter, cur_epoch, cfg, test_imp=False): """ Perform the video training for one epoch. Args: train_loader (loader): video training loader. model (model): the video model to train. optimizer (optim): the optimizer to perform optimization on the model's parameters. train_meter (ClevrerTrainMeter): training meters to log the training performance. cur_epoch (int): current epoch of training. cfg (CfgNode): configs. Details can be found in slowfast/config/defaults.py """ test_counter = 0 # Enable train mode. model.train() train_meter.iter_tic() data_size = len(train_loader) for cur_iter, sampled_batch in enumerate(train_loader): #Samples 2 batches. One for des and one for mc #There are much more des, then some batches are only des des_batch = sampled_batch['des'] des_q = des_batch['question_dict']['question'] des_ans = des_batch['question_dict']['ans'] des_len = des_batch['question_dict']['len'] # Transfer the data to the current GPU device. if cfg.NUM_GPUS: des_q = des_q.cuda(non_blocking=True) des_ans = des_ans.cuda() des_len = des_len.cuda(non_blocking=True) has_mc = sampled_batch['has_mc'][0] if has_mc: mc_batch = sampled_batch['mc'] mc_q = mc_batch['question_dict']['question'] mc_ans = mc_batch['question_dict']['ans'] mc_len = mc_batch['question_dict']['len'] if cfg.NUM_GPUS: mc_q = mc_q.cuda(non_blocking=True) mc_ans = mc_ans.cuda() mc_len = mc_len.cuda(non_blocking=True) # Update the learning rate. lr = optim.get_epoch_lr(cur_epoch + float(cur_iter) / data_size, cfg) optim.set_lr(optimizer, lr) train_meter.data_toc() #Separated batches #Des pred_des_ans = model(des_q, True) des_loss_fun = losses.get_loss_func('cross_entropy')(reduction="mean") loss = des_loss_fun(pred_des_ans, des_ans) # check Nan Loss. misc.check_nan_losses(loss) #Backward pass optimizer.zero_grad() loss.backward() optimizer.step() #Save for stats loss_des_val = loss #MC loss_mc_val = None if has_mc: pred_mc_ans = model(mc_q, False) mc_loss_fun = losses.get_loss_func('bce_logit')(reduction="mean") loss = mc_loss_fun(pred_mc_ans, mc_ans) #Multiply by 4 # check Nan Loss. misc.check_nan_losses(loss) #Backward pass optimizer.zero_grad() loss.backward() optimizer.step() #Save for stats loss_mc_val = loss # #Non separated Not updated for same batch 2 questions: # pred_des_ans = model(des_q, True) # pred_mc_ans = model(mc_q, False) # # Explicitly declare reduction to mean. # des_loss_fun = losses.get_loss_func('cross_entropy')(reduction="mean") # mc_loss_fun = losses.get_loss_func('bce_logit')(reduction="mean") # # Compute the loss. # loss_des_val = des_loss_fun(pred_des_ans, des_ans) # loss_mc_val = mc_loss_fun(pred_mc_ans, mc_ans) # loss = loss_mc_val + loss_des_val # # check Nan Loss. # misc.check_nan_losses(loss) # # Perform the backward pass. # optimizer.zero_grad() # loss.backward() # # Update the parameters. # optimizer.step() top1_err, top5_err = None, None # Compute the errors. num_topks_correct = metrics.topks_correct(pred_des_ans, des_ans, (1, 5)) top1_err, top5_err = [(1.0 - x / pred_des_ans.size(0)) * 100.0 for x in num_topks_correct] if has_mc: diff_mc_ans = torch.abs( mc_ans - (torch.sigmoid(pred_mc_ans) >= 0.5).float()) #Errors mc_opt_err = 100 * torch.true_divide(diff_mc_ans.sum(), (4 * mc_q.size()[0])) mc_q_err = 100 * torch.true_divide( (diff_mc_ans.sum(dim=1, keepdim=True) != 0).float().sum(), mc_q.size()[0]) # Copy the stats from GPU to CPU (sync point). loss_des_val, loss_mc_val, top1_err, top5_err, mc_opt_err, mc_q_err = ( loss_des_val.item(), loss_mc_val.item(), top1_err.item(), top5_err.item(), mc_opt_err.item(), mc_q_err.item()) mb_size_mc = mc_q.size()[0] else: mc_opt_err, mc_q_err = None, None mb_size_mc = None loss_des_val, top1_err, top5_err = (loss_des_val.item(), top1_err.item(), top5_err.item()) #top1_err, top5_err, mc_opt_err, mc_q_err, loss_des, loss_mc, lr, mb_size # Update and log stats. train_meter.update_stats(top1_err, top5_err, mc_opt_err, mc_q_err, loss_des_val, loss_mc_val, lr, des_q.size()[0], mb_size_mc) train_meter.iter_toc() # measure allreduce for this meter train_meter.log_iter_stats(cur_epoch, cur_iter) train_meter.iter_tic() #For testing implementation if test_imp: print(" --- Descriptive questions results --- ") # print("Des_q") # print(des_q) print("Des_ans") print(des_ans) #print("Des_ans_pred") #print(pred_des_ans) print("Argmax => prediction") print(torch.argmax(pred_des_ans, dim=1, keepdim=False)) print("Top1_err and Top5err") print(top1_err, top5_err) print("Loss_des_val = {}".format(loss_des_val)) if has_mc: print(" --- Multiple Choice questions results --- ") # print("Mc_q") # print(mc_q) # print("Mc errors pred x ans") # print(torch.abs(mc_ans - (torch.sigmoid(pred_mc_ans) >= 0.5).float())) print("mc_opt_err = {} \nmc_q_err = {}".format( mc_opt_err, mc_q_err)) print("Loss_mc_val = {}".format(loss_mc_val)) test_counter += 1 if test_counter == 4: break # Log epoch stats. train_meter.log_epoch_stats(cur_epoch) train_meter.reset()