def updateMetrics(args,model,allOutput,allTarget,precVidName,nbVideos,metrDict,outDict,targDict): if args.temp_model.find("net") != -1: allOutput = trainVal.computeScore(model,allOutput,allTarget,args.val_l_temp,args.pool_temp_mod,precVidName) outDict[precVidName] = allOutput targDict[precVidName] = allTarget if args.compute_val_metrics: weights = trainVal.getWeights(allTarget,args.class_weight) loss = F.binary_cross_entropy(allOutput,allTarget,weight=weights).data.item() metrDict["Loss"] += loss cov,overflow,iou = metrics.binaryToMetrics(allOutput>0.5,allTarget) metrDict["Coverage"] += cov metrDict["Overflow"] += overflow metrDict["True F-score"] += 2*cov*(1-overflow)/(cov+1-overflow) metrDict["IoU"] += iou metrDict["AuC"] += roc_auc_score(allTarget.view(-1).cpu().numpy(),allOutput.view(-1).cpu().numpy()) metrDict['DED'] += metrics.computeDED(allOutput.data>0.5,allTarget.long()) nbVideos += 1 return allOutput,nbVideos
def epochSeqTr(model,optim,log_interval,loader, epoch, args,writer,**kwargs): ''' Train a model during one epoch Args: - model (torch.nn.Module): the model to be trained - optim (torch.optim): the optimiser - log_interval (int): the number of epochs to wait before printing a log - loader (load_data.TrainLoader): the train data loader - epoch (int): the current epoch - args (Namespace): the namespace containing all the arguments required for training and building the network - writer (tensorboardX.SummaryWriter): the writer to use to log metrics evolution to tensorboardX - width (int): the width of the triangular window (i.e. the number of steps over which the window is spreading) ''' model.train() print("Epoch",epoch," : train") metrDict = {"Loss":0,"Coverage":0,"Overflow":0,"True F-score":0,"AuC":0,\ "IoU":0,"Disc Accuracy":0,"Dist Pos":0,"Dist Neg":0,"DED":0} validBatch = 0 allOut = None allGT = None for batch_idx,(data,target,vidNames) in enumerate(loader): if target.sum() > 0: if (batch_idx % log_interval == 0): print("\t",batch_idx*len(data)*len(target[0]),"/",len(loader.dataset)) #Puting tensors on cuda if args.cuda: data, target = data.cuda(), target.cuda() #Computing predictions if args.temp_model.find("net") != -1: output = model(data) else: output,_ = model(data) #Computing loss output = output[:,args.train_step_to_ignore:output.size(1)-args.train_step_to_ignore] target = target[:,args.train_step_to_ignore:target.size(1)-args.train_step_to_ignore] weights = getWeights(target,args.class_weight) loss = F.binary_cross_entropy(output, target,weight=weights) #Adding loss term loss = lossTerms.addDistTerm(loss,args,output,target) loss,discMeanAcc = lossTerms.addAdvTerm(loss,args,model.features,model.featModel,kwargs["discrModel"],kwargs["discrIter"],kwargs["discrOptim"]) loss,distPos,distNeg = lossTerms.addSiamTerm(loss,args,model.features,target) loss.backward() optim.step() optim.zero_grad() #Metrics pred = output.data > 0.5 cov,overflow,iou = metrics.binaryToMetrics(pred,target) metrDict["Coverage"] += cov metrDict["Overflow"] += overflow metrDict["True F-score"] += 2*cov*(1-overflow)/(cov+1-overflow) metrDict["IoU"] += iou metrDict["Disc Accuracy"] += discMeanAcc metrDict["Dist Pos"] += distPos metrDict["Dist Neg"] += distNeg metrDict["DED"] += metrics.computeDED(output.data>0.5,target.long()) if allOut is None: allOut = output.data allGT = target else: allOut = torch.cat((allOut,output.data),dim=-1) allGT = torch.cat((allGT,target),dim=-1) metrDict["Loss"] += loss.detach().data.item() validBatch += 1 if validBatch > 3 and args.debug: break #If the training set is empty (which we might want to for kust evaluate the model), then allOut and allGT will still be None if not allGT is None: metrDict["AuC"] = roc_auc_score(allGT.view(-1).cpu().numpy(),allOut.view(-1).cpu().numpy()) torch.save(model.state_dict(), "../models/{}/model{}_epoch{}".format(args.exp_id,args.model_id, epoch)) writeSummaries(metrDict,validBatch,writer,epoch,"train",args.model_id,args.exp_id)
def epochSeqTr(model, optim, log_interval, loader, epoch, args, writer, **kwargs): ''' Train a model during one epoch Args: - model (torch.nn.Module): the model to be trained - optim (torch.optim): the optimiser - log_interval (int): the number of epochs to wait before printing a log - loader (load_data.TrainLoader): the train data loader - epoch (int): the current epoch - args (Namespace): the namespace containing all the arguments required for training and building the network - writer (tensorboardX.SummaryWriter): the writer to use to log metrics evolution to tensorboardX ''' start_time = time.time() if args.debug or args.benchmark else None model.train() print("Epoch", epoch, " : train") metrDict = None validBatch = 0 totalImgNb = 0 allOut, allGT = None, None for batch_idx, batch in enumerate(loader): optim.zero_grad() if (batch_idx % log_interval == 0): processedImgNb = batch_idx * len(batch[0]) print("\t", processedImgNb, "/", len(loader.dataset)) data, target = batch[0], batch[1] if args.with_seg: seg = batch[2] else: seg = None if args.cuda: data, target = data.cuda(), target.cuda() if args.with_seg: seg = seg.cuda() if args.very_big_images: output,resDict,loss = subBatchTrain(args,data,target,model) else: resDict = model(data) output = resDict["pred"] if args.master_net: with torch.no_grad(): resDict["master_net_pred"] = kwargs["master_net"](data)["pred"] loss = computeLoss(args, output, target, resDict, data) loss.backward() loss = loss.detach().data.item() if args.distributed: average_gradients(model) optim.step() update.updateHardWareOccupation(args.debug, args.benchmark, args.cuda, epoch, "train", args.exp_id, args.model_id, batch_idx) # Metrics with torch.no_grad(): metDictSample = metrics.binaryToMetrics(output, target, seg,resDict) metDictSample["Loss"] = loss metrDict = metrics.updateMetrDict(metrDict, metDictSample) validBatch += 1 totalImgNb += target.size(0) if validBatch > 3 and args.debug: break # If the training set is empty (which we might want to just evaluate the model), then allOut and allGT will still be None if validBatch > 0: if not args.optuna: torch.save(model.state_dict(), "../models/{}/model{}_epoch{}".format(args.exp_id, args.model_id, epoch)) writeSummaries(metrDict, totalImgNb, writer, epoch, "train", args.model_id, args.exp_id) if args.debug or args.benchmark: totalTime = time.time() - start_time update.updateTimeCSV(epoch, "train", args.exp_id, args.model_id, totalTime, batch_idx)
def epochImgEval(model, log_interval, loader, epoch, args, writer, metricEarlyStop, mode="val",**kwargs): ''' Train a model during one epoch Args: - model (torch.nn.Module): the model to be trained - optim (torch.optim): the optimiser - log_interval (int): the number of epochs to wait before printing a log - loader (load_data.TrainLoader): the train data loader - epoch (int): the current epoch - args (Namespace): the namespace containing all the arguments required for training and building the network - writer (tensorboardX.SummaryWriter): the writer to use to log metrics evolution to tensorboardX ''' if args.debug or args.benchmark: start_time = time.time() dataset = getattr(args,"dataset_{}".format(mode)) model.eval() if args.bil_clu_deconv: model.firstModel.updateDeconv() print("Epoch", epoch, " : {}".format(mode)) metrDict = None validBatch = 0 totalImgNb = 0 allOut = None allGT = None intermVarDict = {"fullAttMap": None, "fullFeatMapSeq": None, "fullAffTransSeq": None, "fullPointsSeq": None,"fullPointsWeightSeq":None, "fullPointsSeq_pureText": None,"fullPointsWeightSeq_pureText":None,"fullPointsSeqDropped":None,"fullPNReconstSeq": None,"fullProbMap":None,\ "fullReconstSeq":None,"fullAttMap_glob": None,"fullFeatMapSeq_glob": None} compute_latency = args.compute_latency and mode == "test" if compute_latency: latency_list=[] batchSize_list = [] else: latency_list,batchSize_list =None,None for batch_idx, batch in enumerate(loader): data, target = batch[:2] if (batch_idx % log_interval == 0): print("\t", batch_idx * len(data), "/", len(loader.dataset)) if args.with_seg: seg=batch[2] else: seg=None # Puting tensors on cuda if args.cuda: data, target = data.cuda(), target.cuda() if args.with_seg: seg = seg.cuda() # Computing predictions if compute_latency: lat_start_time = time.time() resDict = model(data) latency_list.append(time.time()-lat_start_time) batchSize_list.append(data.size(0)) else: resDict = model(data) output = resDict["pred"] if args.master_net: resDict["master_net_pred"] = kwargs["master_net"](data)["pred"] # Loss loss = computeLoss(args, output, target, resDict, data,reduction="sum") # Other variables produced by the net if mode == "test" and (dataset.find("emb") == -1 or (dataset.find("emb") != -1 and validBatch*data.size(0) < 7000)): intermVarDict = update.catIntermediateVariables(resDict, intermVarDict, validBatch) # Harware occupation update.updateHardWareOccupation(args.debug, args.benchmark, args.cuda, epoch, mode, args.exp_id, args.model_id, batch_idx) # Metrics metDictSample = metrics.binaryToMetrics(output, target, seg,resDict,comp_spars=(mode=="test") and args.with_seg) metDictSample["Loss"] = loss.detach().data.item() metrDict = metrics.updateMetrDict(metrDict, metDictSample) writePreds(output, target, epoch, args.exp_id, args.model_id, args.class_nb, batch_idx,mode) validBatch += 1 totalImgNb += target.size(0) if validBatch >= 4*(50.0/args.val_batch_size) and args.debug: break if mode == "test": intermVarDict = update.saveIntermediateVariables(intermVarDict, args.exp_id, args.model_id, epoch, mode) writeSummaries(metrDict, totalImgNb, writer, epoch, mode, args.model_id, args.exp_id) if compute_latency: latency_list = np.array(latency_list)[:,np.newaxis] batchSize_list = np.array(batchSize_list)[:,np.newaxis] latency_list = np.concatenate((latency_list,batchSize_list),axis=1) np.savetxt("../results/{}/latency_{}_epoch{}.csv".format(args.exp_id,args.model_id,epoch),latency_list,header="latency,batch_size",delimiter=",") if args.debug or args.benchmark: totalTime = time.time() - start_time update.updateTimeCSV(epoch, mode, args.exp_id, args.model_id, totalTime, batch_idx) return metrDict[metricEarlyStop]