def test(cfg, writer, logger): torch.manual_seed(cfg.get('seed', 1337)) torch.cuda.manual_seed(cfg.get('seed', 1337)) np.random.seed(cfg.get('seed', 1337)) random.seed(cfg.get('seed', 1337)) ## create dataset default_gpu = cfg['model']['default_gpu'] device = torch.device( "cuda:{}".format(default_gpu) if torch.cuda.is_available() else 'cpu') datasets = create_dataset( cfg, writer, logger ) #source_train\ target_train\ source_valid\ target_valid + _loader model = CustomModel(cfg, writer, logger) running_metrics_val = runningScore(cfg['data']['target']['n_class']) source_running_metrics_val = runningScore(cfg['data']['target']['n_class']) val_loss_meter = averageMeter() source_val_loss_meter = averageMeter() time_meter = averageMeter() loss_fn = get_loss_function(cfg) path = cfg['test']['path'] checkpoint = torch.load(path) model.adaptive_load_nets(model.BaseNet, checkpoint['DeepLab']['model_state']) validation( model, logger, writer, datasets, device, running_metrics_val, val_loss_meter, loss_fn,\ source_val_loss_meter, source_running_metrics_val, iters = model.iter )
def __init__(self, numbers=19): self.class_numbers = numbers self.classes_recall_thr = np.zeros([19, 3]) self.classes_recall_thr_num = np.zeros([19]) self.classes_recall_clu = np.zeros([19, 3]) self.classes_recall_clu_num = np.zeros([19]) self.running_metrics_val_threshold = runningScore(self.class_numbers) self.running_metrics_val_clusters = runningScore(self.class_numbers) self.clu_threshold = np.full((19), 2.5)
def train(data_loader,model,epoch,num_batch,learning_rate): #### Model One #resnet = PM.ResNet(PM.BottleBlock(), FLAGS.kernal_num, True, 0.5) #logites = resnet(tf_image) ## (batch, 7, size, size) #Loss = PM.Dec_Loss_2(logites=logites, gt_texts=tf_gt, gt_kernels=tf_kernal, training_masks=tf_mask) running_metric_text = runningScore(2) running_metric_kernel = runningScore(2) ''' learning_rate = tf.train.exponential_decay( learning_rate=FLAGS.learning_rate, global_step= step, decay_steps=num_batch * 200, decay_rate=0.5, staircase=True) ''' #optim = tf.train.AdamOptimizer(learning_rate=learning_rate) optim = tf.optimizers.Adam(learning_rate=learning_rate) step = epoch * num_batch for i in range(num_batch): images, gt_texts, train_masks, kernal_images = pre_tools.batch_data(data_loader, i, FLAGS.batch_size) images = tf.convert_to_tensor(images, dtype=tf.float32) gt_texts = tf.convert_to_tensor(gt_texts, dtype=tf.float32) train_masks = tf.convert_to_tensor(train_masks, dtype=tf.float32) kernal_images = tf.convert_to_tensor(kernal_images, dtype=tf.float32) with tf.GradientTape() as tape: logites = model(images) ### (32,320,320,3) logites = tf.transpose(logites, (0, 3, 1, 2)) ### (32,3,320,320) Loss = PM.Dec_Loss_2(logites=logites, gt_texts=gt_texts, gt_kernels=kernal_images, training_masks=train_masks,kernal=FLAGS.kernal_num) # 计算梯度 tape模式,保持跟踪 grads = tape.gradient(Loss, model.trainable_weights) optim.apply_gradients(zip(grads, model.trainable_weights)) texts = logites[:, 0, :, :] kernels = logites[:, 1:, :, :] score_text = cal_text_score(texts, gt_texts, train_masks, running_metric_text) score_kernel = cal_kernel_score(kernels, kernal_images, gt_texts, train_masks, running_metric_kernel) acc = score_text['Mean Acc'] iou_t = score_text['Mean IoU'] iou_k = score_kernel['Mean IoU'] step = step + 1 if i % 20 == 0: information = '## Epoch:{:d} Step_Train / Total_Batch: {:d} / {:d} train_loss= {:5f} train_acc= {:5f} IOU_t={:5f} IOU_k={:5f}'. \ format(epoch,step, num_batch, Loss, acc,iou_t,iou_k) print(information) ### 输出到屏幕
def __init__(self, numbers=19, modal_num=3, model=None): self.class_numbers = numbers self.classes_recall_thr = np.zeros([19, 3]) self.classes_recall_thr_num = np.zeros([19]) self.classes_recall_clu = np.zeros([19, 3]) self.classes_recall_clu_num = np.zeros([19]) self.running_metrics_val_threshold = runningScore(self.class_numbers) self.running_metrics_val_clusters = runningScore(self.class_numbers) self.clu_threshold = torch.full((modal_num + 1, 19), 3.0).cuda() self.multimodal_merger = CustomMetricsMultimodalMerger( modal_num=modal_num + 1, category_num=numbers, model=model )
def boxplotvis(cfg): # device = torch.device("cuda:{}".format(cfg["other"]["gpu_idx"]) if torch.cuda.is_available() else "cpu") data_loader = get_loader(cfg["data"]["dataset"]) data_path = cfg["data"]["path"] v_loader = data_loader(data_path, split='val') n_classes = v_loader.n_classes n_val = len(v_loader.files['val']) # test differnet models' prediction vgg16lstm_metric = runningScore(n_classes, n_val) vgg16gru_metric = runningScore(n_classes, n_val) segnet_metric = runningScore(n_classes, n_val) with torch.no_grad(): for i_val, (images_val, labels_val, img_name_val) in tqdm(enumerate(v_loader)): gt = np.squeeze(labels_val.data.cpu().numpy()) vgg16lstm_pred = m.imread( pjoin(cfg["data"]["pred_path"], 'vgg16_lstm_brainweb', img_name_val + '.bmp')) vgg16gru_pred = m.imread( pjoin(cfg["data"]["pred_path"], 'vgg16_gru_brainweb', img_name_val + '.bmp')) segnet_pred = m.imread( pjoin(cfg["data"]["pred_path"], 'segnet_brainweb', img_name_val + '.bmp')) vgg16lstm_encode = v_loader.encode_segmap(vgg16lstm_pred) vgg16gru_encode = v_loader.encode_segmap(vgg16gru_pred) segnet_encode = v_loader.encode_segmap(segnet_pred) vgg16lstm_metric.update(gt, vgg16lstm_encode, i_val) vgg16gru_metric.update(gt, vgg16gru_encode, i_val) segnet_metric.update(gt, segnet_encode, i_val) vgg16lstm_acc_all, vgg16lstm_dsc_cls = vgg16lstm_metric.get_list() vgg16gru_acc_all, vgg16gru_dsc_cls = vgg16gru_metric.get_list() segnet_acc_all, segnet_dsc_cls = segnet_metric.get_list() # dsc_list = [vgg16lstm_dsc_cls.transpose(), vgg16gru_dsc_cls.transpose(), segnet_dsc_cls.transpose()] data0 = array2dataframe(vgg16lstm_dsc_cls) data0['Method'] = 'VGG16-LSTM' data1 = array2dataframe(vgg16gru_dsc_cls) data1['Method'] = 'VGG16-GRU' data2 = array2dataframe(segnet_dsc_cls) data2['Method'] = 'SegNet' data = pd.concat([data0, data1, data2])
def val(args, model, dataloader): segmiou_cal = runningScore(n_classes=args.num_classes) posmiou_cal = runningScore(n_classes=args.num_char) model.eval() with torch.no_grad(): for i, (data, seg, pos) in enumerate(dataloader): seg_pred, pos_pred = model(data) seg_pred = seg_pred.cpu().numpy() seg_pred = np.argmax(seg_pred, axis=1) seg = seg.numpy() segmiou_cal.update(seg, seg_pred) pos_pred = pos_pred.cpu().numpy() pos_pred = np.argmax(pos_pred, axis=1) pos = pos.numpy() posmiou_cal.update(pos, pos_pred) segmiou = segmiou_cal.get_scores() posmiou = posmiou_cal.get_scores() print('segmiou:{}'.format(segmiou)) print('posmiou:{}'.format(posmiou)) return segmiou, posmiou
def main(test_args): testset = "/mnt/iusers01/eee01/mchiwml4/CamVid/test" transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(mean, std)]) test_dataset = DataLoader(Loaddata(testset, transform=transform, target_transform=MaskToTensor()), batch_size=1, shuffle=False, num_workers=8) label_num = 11 model = segnetmodel.segnet(label_num) model = model.cuda() model.load_state_dict(torch.load(test_args.load_param)) model.eval() total = np.zeros((label_num, )) running_metrics = runningScore(label_num) for j, data in enumerate(test_dataset): inputs, labels = data inputs = Variable(inputs.cuda()) outputs = model(inputs) pred = outputs.data.max(1)[1].cpu().numpy() gt = labels.numpy() running_metrics.update(gt, pred) for i in range(label_num): mask = gt == i # ground truth mask of class i total[i] += np.sum( mask) # total number of pixels of class i (tp+fn) score, class_iou, class_acc = running_metrics.get_scores() for k, v in score.items(): print(k, v) print('class iou: ') for i in range(label_num): print(i, class_iou[i]) print('class acc: ') for i in range(label_num): print(i, class_acc[i]) print('number of pixels:') print(total)
def test(opt, logger): torch.manual_seed(opt.seed) torch.cuda.manual_seed(opt.seed) np.random.seed(opt.seed) random.seed(opt.seed) ## create dataset device = torch.device("cuda:0" if torch.cuda.is_available() else 'cpu') datasets = create_dataset(opt, logger) if opt.model_name == 'deeplabv2': checkpoint = torch.load(opt.resume_path)['ResNet101']["model_state"] model = adaptation_modelv2.CustomModel(opt, logger) model.BaseNet.load_state_dict(checkpoint) running_metrics_val = runningScore(opt.n_class) validation(model, logger, datasets, device, running_metrics_val)
def validate(val_loader, model, criterion): with torch.no_grad(): model.eval() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() running_metric_text = runningScore(2) running_metric_kernel = runningScore(2) end = time.time() for batch_idx, (imgs, gt_texts, gt_kernels, training_masks) in enumerate(val_loader): data_time.update(time.time() - end) imgs = Variable(imgs.cuda()) gt_texts = Variable(gt_texts.cuda()) gt_kernels = Variable(gt_kernels.cuda()) training_masks = Variable(training_masks.cuda()) outputs = model(imgs) texts = outputs[:, 0, :, :] kernels = outputs[:, 1:, :, :] selected_masks = ohem_batch(texts, gt_texts, training_masks) selected_masks = Variable(selected_masks.cuda()) loss_text = criterion(texts, gt_texts, selected_masks) loss_kernels = [] mask0 = torch.sigmoid(texts).data.cpu().numpy() mask1 = training_masks.data.cpu().numpy() selected_masks = ((mask0 > 0.5) & (mask1 > 0.5)).astype('float32') selected_masks = torch.from_numpy(selected_masks).float() selected_masks = Variable(selected_masks.cuda()) for i in range(6): kernel_i = kernels[:, i, :, :] gt_kernel_i = gt_kernels[:, i, :, :] loss_kernel_i = criterion(kernel_i, gt_kernel_i, selected_masks) loss_kernels.append(loss_kernel_i) loss_kernel = sum(loss_kernels) / len(loss_kernels) loss = 0.7 * loss_text + 0.3 * loss_kernel losses.update(loss.item(), imgs.size(0)) score_text = cal_text_score(texts, gt_texts, training_masks, running_metric_text) score_kernel = cal_kernel_score(kernels, gt_kernels, gt_texts, training_masks, running_metric_kernel) batch_time.update(time.time() - end) end = time.time() if batch_idx % 5 == 0: output_log = '({batch}/{size}) Batch: {bt:.3f}s | TOTAL: {total:.0f}min | ETA: {eta:.0f}min '.format( batch=batch_idx + 1, size=len(val_loader), bt=batch_time.avg, total=batch_time.avg * batch_idx / 60.0, eta=batch_time.avg * (len(val_loader) - batch_idx) / 60.0) print(output_log) sys.stdout.flush() return (float(losses.avg), float(score_text['Mean Acc']), float(score_kernel['Mean Acc']), float(score_text['Mean IoU']), float(score_kernel['Mean IoU']))
def train(args): os.environ["CUDA_VISIBLE_DEVICES"]=str(args.gpu_id) #torch.manual_seed(1337) print(args) # setup dataloader t_loader=MR18loader_CV(root=args.data_path,val_num=args.val_num,is_val=False,is_transform=True,is_flip=True,is_rotate=True,is_crop=True,is_histeq=True,forest=args.num_forest) v_loader=MR18loader_CV(root=args.data_path,val_num=args.val_num,is_val=True,is_transform=True,is_flip=False,is_rotate=False,is_crop=True,is_histeq=True,forest=args.num_forest) n_classes = t_loader.n_classes trainloader = data.DataLoader(t_loader, batch_size=args.batch_size, num_workers=1, shuffle=True) valloader = data.DataLoader(v_loader, batch_size=1, num_workers=1,shuffle=True) # setup Metrics running_metrics_single = runningScore(n_classes) running_metrics_single_test = runningScore(4) # setup Model model=fcn_mul(n_classes=n_classes) vgg16 = models.vgg16(pretrained=True) model.init_vgg16_params(vgg16) model.cuda() # setup optimizer and loss optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=0.99, weight_decay=5e-4) loss_ce = cross_entropy2d #loss_ce_weight = weighted_loss #loss_dc = dice_loss #loss_hed= bce2d_hed # resume best_iou=-100.0 if args.resume is not None: if os.path.isfile(args.resume): print("Loading model and optimizer from checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) best_iou=checkpoint['best_iou'] model.load_state_dict(checkpoint['model_state']) optimizer.load_state_dict(checkpoint['optimizer_state']) print("Loaded checkpoint '{}' (epoch {}), best_iou={}" .format(args.resume, checkpoint['epoch'],best_iou)) else: best_iou=-100.0 print("No checkpoint found at '{}'".format(args.resume)) # visualization t = [] loss_seg_list=[] loss_hed_list=[] Dice_mean=[] Dice_CSF=[] Dice_GM=[] Dice_WM=[] t_pre=time.time() print('training prepared, cost {} seconds\n\n'.format(t_pre-t_begin)) for epoch in range(args.n_epoch): t.append(epoch+1) model.train() adjust_learning_rate(optimizer,epoch) #loss_sum=0.0 loss_epoch=0.0 t_epoch=time.time() for i_train, (regions,T1s,IRs,T2s,lbls) in enumerate(trainloader): T1s=Variable(T1s.cuda()) IRs,T2s=Variable(IRs.cuda()),Variable(T2s.cuda()) lbls=Variable(lbls.cuda()[:,int(args.num_forest/2),:,:].unsqueeze(1)) #edges=Variable(edges.cuda()[:,int(args.num_forest/2),:,:].unsqueeze(1)) optimizer.zero_grad() outputs=model(T1s,IRs,T2s) seg_out=F.log_softmax(outputs,dim=1) max_prob,_=torch.max(seg_out,dim=1) max_prob=-max_prob.detach().unsqueeze(1) loss_seg_value=loss_ce(input=outputs,target=lbls) #+0.5*loss_dc(input=outputs,target=lbls) #+0.5*loss_ce_weight(input=outputs,target=lbls,weight=max_prob)\ #+0.5*loss_ce_weight(input=outputs,target=lbls,weight=edges)\ #loss_hed_value=loss_hed(input=outputs[1],target=edges) #+0.5*loss_hed(input=outputs[2],target=edges) \ #+0.5*loss_hed(input=outputs[3],target=edges) \ #+0.5*loss_hed(input=outputs[4],target=edges) \ #+0.5*loss_hed(input=outputs[5],target=edges) loss=loss_seg_value #loss=loss_seg_value+loss_hed_value # loss average #loss_sum+=loss #if (i_train+1)%args.loss_avg==0: # loss_sum/=args.loss_avg # loss_sum.backward() # optimizer.step() # loss_sum=0.0 loss.backward() optimizer.step() loss_epoch+=loss.item() # visualization if i_train==40: ax1=plt.subplot(241) ax1.imshow(T1s[0,1,:,:].data.cpu().numpy(),cmap ='gray') ax1.set_title('train_img') ax1.axis('off') ax2=plt.subplot(242) ax2.imshow(t_loader.decode_segmap(lbls[0,0,:,:].data.cpu().numpy()).astype(np.uint8)) ax2.set_title('train_label') ax2.axis('off') ax3=plt.subplot(243) model.eval() train_show=model(T1s,IRs,T2s) ax3.imshow(t_loader.decode_segmap(train_show[0].data.max(0)[1].cpu().numpy()).astype(np.uint8)) ax3.set_title('train_predict') ax3.axis('off') ax4=plt.subplot(244) ax4.imshow(max_prob[0,0].cpu().numpy()) ax4.set_title('uncertainty') ax4.axis('off') model.train() loss_epoch/=i_train loss_seg_list.append(loss_epoch) loss_hed_list.append(0) t_train=time.time() print('epoch: ',epoch+1) print('--------------------------------Training--------------------------------') print('average loss in this epoch: ',loss_epoch) print('final loss in this epoch: ',loss.data.item()) print('cost {} seconds up to now'.format(t_train-t_begin)) print('cost {} seconds in this train epoch'.format(t_train-t_epoch)) model.eval() for i_val, (regions_val,T1s_val,IRs_val,T2s_val,lbls_val) in enumerate(valloader): T1s_val=Variable(T1s_val.cuda()) IRs_val,T2s_val=Variable(IRs_val.cuda()),Variable(T2s_val.cuda()) with torch.no_grad(): outputs_single=model(T1s_val,IRs_val,T2s_val)[0,:,:,:] # get predict pred_single=outputs_single.data.max(0)[1].cpu().numpy() # pad to 240 pred_pad=np.zeros((240,240),np.uint8) pred_pad[regions_val[0]:regions_val[1],regions_val[2]:regions_val[3]]= \ pred_single[0:regions_val[1]-regions_val[0],0:regions_val[3]-regions_val[2]] # convert to 3 classes pred_single_test=np.zeros((240,240),np.uint8) pred_single_test=v_loader.lbl_totest(pred_pad) # get gt gt = lbls_val[0][int(args.num_forest/2)].numpy() # pad to 240 gt_pad=np.zeros((240,240),np.uint8) gt_pad[regions_val[0]:regions_val[1],regions_val[2]:regions_val[3]]= \ gt[0:regions_val[1]-regions_val[0],0:regions_val[3]-regions_val[2]] # convert to 3 classes gt_test=np.zeros((240,240),np.uint8) gt_test=v_loader.lbl_totest(gt_pad) # metrics update running_metrics_single.update(gt_pad, pred_pad) running_metrics_single_test.update(gt_test, pred_single_test) # visualization if i_val==40: ax5=plt.subplot(245) ax5.imshow((T1s_val[0,int(args.num_forest/2),:,:].data.cpu().numpy()*255+t_loader.T1mean).astype(np.uint8),cmap ='gray') ax5.set_title('src_img') ax5.axis('off') ax6=plt.subplot(246) ax6.imshow(t_loader.decode_segmap(gt).astype(np.uint8)) ax6.set_title('gt') ax6.axis('off') ax7=plt.subplot(247) ax7.imshow(t_loader.decode_segmap(pred_single).astype(np.uint8)) ax7.set_title('pred_single') ax7.axis('off') ax8=plt.subplot(248) ax8.imshow(pred_single_test[regions_val[0]:regions_val[1],regions_val[2]:regions_val[3]].astype(np.uint8)) ax8.set_title('pred_single_test') ax8.axis('off') plt.tight_layout() plt.subplots_adjust(wspace=.1,hspace=.3) plt.savefig('./fig_out/val_{}_out_{}.png'.format(str(args.val_num),epoch+1)) # compute dice coefficients during validation score_single, class_iou_single = running_metrics_single.get_scores() score_single_test, class_iou_single_test = running_metrics_single_test.get_scores() Dice_mean.append(score_single['Mean Dice : \t']) Dice_CSF.append(score_single_test['Dice : \t'][1]) Dice_GM.append(score_single_test['Dice : \t'][2]) Dice_WM.append(score_single_test['Dice : \t'][3]) print('--------------------------------All tissues--------------------------------') print('Back: Background,') print('GM: Cortical GM(red), Basal ganglia(green),') print('WM: WM(yellow), WM lesions(blue),') print('CSF: CSF(pink), Ventricles(light blue),') print('Back: Cerebellum(white), Brainstem(dark red)') print('single predict: ') for k, v in score_single.items(): print(k, v) print('--------------------------------Only tests--------------------------------') print('tissue : Back , CSF , GM , WM') print('single predict: ') for k, v in score_single_test.items(): print(k, v) t_test=time.time() print('cost {} seconds up to now'.format(t_test-t_begin)) print('cost {} seconds in this validation epoch'.format(t_test-t_train)) # save model at best validation metrics if score_single['Mean Dice : \t'] >= best_iou: best_iou = score_single['Mean Dice : \t'] state = {'epoch': epoch+1, 'model_state': model.state_dict(), 'optimizer_state' : optimizer.state_dict(), 'best_iou':best_iou} torch.save(state, "val_{}_best.pkl".format(str(args.val_num))) print('model saved!!!') # save model every 10 epochs if (epoch+1)%10==0: state = {'epoch': epoch+1, 'model_state': model.state_dict(), 'optimizer_state' : optimizer.state_dict(), 'score':score_single} torch.save(state, "val_{}_e_{}.pkl".format(str(args.val_num),epoch+1)) # plot curve ax1=plt.subplot(211) ax1.plot(t,loss_seg_list,'g') ax1.plot(t,loss_hed_list,'r') ax1.set_title('train loss') ax2=plt.subplot(212) ax2.plot(t,Dice_mean,'k') ax2.plot(t,Dice_CSF,'r') ax2.plot(t,Dice_GM,'g') ax2.plot(t,Dice_WM,'b') ax2.set_title('validate Dice, R/G/B for CSF/GM/WM') plt.tight_layout() plt.subplots_adjust(wspace=0,hspace=.3) plt.savefig('./fig_out/val_{}_curve.png'.format(str(args.val_num))) # metric reset running_metrics_single.reset() running_metrics_single_test.reset() print('\n\n')
def train(train_loader, model, criterion, optimizer, epoch): #model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() running_metric_text = runningScore(2) running_metric_kernel = runningScore(2) end = time.time() for batch_idx, (imgs, gt_texts, gt_kernels, training_masks, data_length) in enumerate(train_loader): with tf.GradientTape() as tape: data_time.update(time.time() - end) outputs = model(imgs) outputs = tf.transpose(outputs, (0, 3, 1, 2)) texts = outputs[:, 0, :, :] kernels = outputs[:, 1:, :, :] selected_masks = ohem_batch(texts, gt_texts, training_masks) loss_text = criterion(texts, gt_texts, selected_masks) loss_kernels = [] mask0 = tf.sigmoid(texts).numpy() mask1 = training_masks.numpy() selected_masks = ((mask0 > 0.5) & (mask1 > 0.5)).astype('float32') #selected_masks = torch.from_numpy(selected_masks).float() selected_masks = tf.convert_to_tensor(selected_masks, dtype=tf.float32) #selected_masks = Variable(selected_masks.cuda()) for i in range(6): kernel_i = kernels[:, i, :, :] gt_kernel_i = gt_kernels[:, i, :, :] loss_kernel_i = criterion(kernel_i, gt_kernel_i, selected_masks) loss_kernels.append(loss_kernel_i) loss_kernel = sum(loss_kernels) / len(loss_kernels) loss = 0.7 * loss_text + 0.3 * loss_kernel #反向计算各层loss losses.update(loss.numpy(), imgs.shape[0]) #计算梯度 tape模式,保持跟踪 grads = tape.gradient(loss, model.trainable_weights) # optimizer.apply_gradients(zip(grads, model.trainable_weights)) score_text = cal_text_score(texts, gt_texts, training_masks, running_metric_text) score_kernel = cal_kernel_score(kernels, gt_kernels, gt_texts, training_masks, running_metric_kernel) batch_time.update(time.time() - end) end = time.time() size = data_length / args.batch_size if batch_idx % 20 == 0: output_log = '({batch}/{size}) Batch: {bt:.3f}s | TOTAL: {total:.0f}min \ | ETA: {eta:.0f}min | Loss: {loss:.4f} | Acc_t: {acc: .4f} | IOU_t: {iou_t: .4f}\ | IOU_k: {iou_k: .4f}'.format( batch=batch_idx + 1, #size=len(train_loader), size=data_length / args.batch_size, bt=batch_time.avg, total=batch_time.avg * batch_idx / 60.0, #eta=batch_time.avg * (len(train_loader) - batch_idx) / 60.0, eta=batch_time.avg * (size - batch_idx) / 60.0, loss=losses.avg, acc=score_text['Mean Acc'], iou_t=score_text['Mean IoU'], iou_k=score_kernel['Mean IoU']) print(output_log) sys.stdout.flush() return (losses.avg, score_text['Mean Acc'], score_kernel['Mean Acc'], score_text['Mean IoU'], score_kernel['Mean IoU'])
def train(cfg): # Setup seeds torch.manual_seed(cfg.get('seed', 1337)) torch.cuda.manual_seed(cfg.get('seed', 1337)) np.random.seed(cfg.get('seed', 1337)) random.seed(cfg.get('seed', 1337)) # Setup device device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Setup Augmentations augmentations = cfg['training'].get('augmentations', None) data_aug = get_composed_augmentations(augmentations) # Setup Dataloader data_loader = get_loader(cfg['data']['dataset']) data_path = cfg['data']['path'] t_loader = data_loader( data_path, is_transform=True, split=cfg['data']['train_split'], #img_size=(cfg['data']['img_rows'], cfg['data']['img_cols']), augmentations=data_aug) v_loader = data_loader( data_path, is_transform=True, split=cfg['data']['val_split'], #img_size=(cfg['data']['img_rows'], cfg['data']['img_cols']), ) n_classes = t_loader.n_classes trainloader = data.DataLoader(t_loader, batch_size=cfg['training']['batch_size'], num_workers=cfg['training']['n_workers'], shuffle=True) valloader = data.DataLoader(v_loader, batch_size=cfg['training']['batch_size'], num_workers=cfg['training']['n_workers']) # Setup Metrics running_metrics_val = runningScore(n_classes) # Setup Model model = get_model(cfg['model'], n_classes).to(device) model = torch.nn.DataParallel(model, device_ids=range(torch.cuda.device_count())) # Setup optimizer, lr_scheduler and loss function optimizer_cls = get_optimizer(cfg) optimizer_params = {k:v for k, v in cfg['training']['optimizer'].items() if k != 'name'} optimizer = optimizer_cls(model.parameters(), **optimizer_params) scheduler = get_scheduler(optimizer, cfg['training']['lr_schedule']) loss_fn = get_loss_function(cfg) start_iter = 0 if cfg['training']['resume'] is not None: if os.path.isfile(cfg['training']['resume']): checkpoint = torch.load(cfg['training']['resume']) model.load_state_dict(checkpoint["model_state"]) optimizer.load_state_dict(checkpoint["optimizer_state"]) scheduler.load_state_dict(checkpoint["scheduler_state"]) start_iter = checkpoint["epoch"] print("=====>", "Loaded checkpoint '{}' (iter {})".format( cfg['training']['resume'], checkpoint["epoch"] ) ) else: print("=====>","No checkpoint found at '{}'".format(cfg['training']['resume'])) val_loss_meter = averageMeter() time_meter = averageMeter() best_iou = -100.0 i = start_iter flag = True while i <= cfg['training']['train_iters'] and flag: for (images, labels) in trainloader: i += 1 start_ts = time.time() scheduler.step() model.train() images = images.to(device) labels = labels.to(device) optimizer.zero_grad() outputs = model(images) loss = loss_fn(input=outputs, target=labels) loss.backward() optimizer.step() time_meter.update(time.time() - start_ts) if (i + 1) % cfg['training']['print_interval'] == 0: fmt_str = "Iter [{:d}/{:d}] Loss: {:.4f} Time/Image: {:.4f}" print_str = fmt_str.format(i + 1, cfg['training']['train_iters'], loss.item(), time_meter.avg / cfg['training']['batch_size']) print(print_str) time_meter.reset() if (i + 1) % cfg['training']['val_interval'] == 0 or \ (i + 1) == cfg['training']['train_iters']: model.eval() with torch.no_grad(): for i_val, (images_val, labels_val) in tqdm(enumerate(valloader)): images_val = images_val.to(device) labels_val = labels_val.to(device) outputs = model(images_val) val_loss = loss_fn(input=outputs, target=labels_val) pred = outputs.data.max(1)[1].cpu().numpy() gt = labels_val.data.cpu().numpy() running_metrics_val.update(gt, pred) val_loss_meter.update(val_loss.item()) print("Iter %d Loss: %.4f" % (i + 1, val_loss_meter.avg)) score, class_iou = running_metrics_val.get_scores() for k, v in score.items(): print(k,':',v) for k, v in class_iou.items(): print('{}: {}'.format(k, v)) val_loss_meter.reset() running_metrics_val.reset() if score["Mean IoU : \t"] >= best_iou: best_iou = score["Mean IoU : \t"] state = { "epoch": i + 1, "model_state": model.state_dict(), "optimizer_state": optimizer.state_dict(), "scheduler_state": scheduler.state_dict(), "best_iou": best_iou, } save_path = os.path.join('./checkpoint', "{}_{}_best_model.pkl".format( cfg['model']['arch'], cfg['data']['dataset'])) print("saving···") torch.save(state, save_path) if (i + 1) == cfg['training']['train_iters']: flag = False break
input = input.transpose(1, 2).transpose(2, 3).contiguous().view(-1, c) target = target.view(-1) loss = F.cross_entropy(input, target, weight=weight, size_average=size_average, ignore_index=250) return loss # optimier optimizer = SGD(model.parameters(), lr=0.01, momentum=0.9) #optimizer = Adam(model.parameters(), lr = 0.01) # Setup Metrics running_metrics_val = runningScore(n_classes) val_loss_meter = averageMeter() num_epochs = 30 step = 0 epoch = 0 if load_model_file is not None: step = start_step epoch = start_epoch score_list = [] while epoch <= num_epochs: epoch += 1 print("Starting epoch %s" % epoch) for (images, labels) in trainloader: step += 1
def train(train_loader, model, criterion, optimizer, epoch,writer): model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() losses_Lagg = AverageMeter() losses_Ldis = AverageMeter() running_metric_text = runningScore(2) running_metric_kernel = runningScore(2) end = time.time() for batch_idx, (imgs, gt_texts, gt_kernels, training_masks,gt_text_key,gt_kernels_key) in enumerate(train_loader): data_time.update(time.time() - end) # print(imgs.shape,gt_texts.shape,gt_kernels.shape,training_masks.shape,gt_text_key.shape,gt_kernels_key.shape) imgs = Variable(imgs.cuda()) # batch_size*channel*w*h gt_texts = Variable(gt_texts.cuda())# batch_size*w*h gt_kernels = Variable(gt_kernels.cuda())# batch_size*1*w*h gt_text_key = Variable(gt_text_key.cuda())# batch_size*w*h gt_kernels_key = Variable(gt_kernels_key.cuda())# batch_size*w*h training_masks = Variable(training_masks.cuda())# batch_size*w*h outputs = model(imgs) ind = 'cat_34' cv2.imwrite('text'+str(ind)+'.jpg',torch.sigmoid(outputs[0, 0, :, :]).data.cpu().numpy().astype(np.uint8)*255) cv2.imwrite('kernel'+str(ind)+'.jpg',torch.sigmoid(outputs[0, 1, :, :]).data.cpu().numpy().astype(np.uint8)*255) if batch_idx % 20 == 0: writer.add_image('/data/ori_image', torchvision.utils.make_grid(imgs,nrow=8, padding=10,normalize=True).cpu(),0) writer.add_image('/data/label_text', torchvision.utils.make_grid(gt_texts.cpu().unsqueeze(1),nrow=8, padding=10,normalize=True),0) writer.add_image('/data/predict_text', torchvision.utils.make_grid(torch.sigmoid(outputs[:, 0, :, :]).unsqueeze(1),nrow=8, padding=10,normalize=True).cpu(),0) writer.add_image('/data/label_kernel', torchvision.utils.make_grid(gt_kernels,nrow=8, padding=10,normalize=True).cpu(),0) writer.add_image('/data/predict_kernel', torchvision.utils.make_grid(torch.sigmoid(outputs[:, 1, :, :]).unsqueeze(1),nrow=8, padding=10,normalize=True).cpu(),0) texts = outputs[:, 0, :, :] kernels = outputs[:, 1:2, :, :] similarity_vector=outputs[:,2:,:,:]#torch.sigmoid(outputs[:,2:,:,:]) selected_masks = ohem_batch(texts, gt_texts, training_masks) selected_masks = Variable(selected_masks.cuda()) loss_text = criterion(texts, gt_texts, selected_masks) mask0 = torch.sigmoid(texts).data.cpu().numpy() mask1 = training_masks.data.cpu().numpy() selected_masks = ((mask0 > 0.5) & (mask1 > 0.5)).astype('float32') selected_masks = torch.from_numpy(selected_masks).float() selected_masks = Variable(selected_masks.cuda()) loss_kernels = [] for i in range(1): kernel_i = kernels[:, i, :, :] gt_kernel_i = gt_kernels[:, i, :, :] loss_kernel_i = criterion(kernel_i, gt_kernel_i, selected_masks) loss_kernels.append(loss_kernel_i) loss_kernel = sum(loss_kernels) / len(loss_kernels) # total_tag,total_label_kernel,total_label_text = get_batch_tag(outputs,outputs.shape[0],gt_texts,gt_kernels) # loss_agg = cal_Lagg(similarity_vector,total_tag,total_label_kernel,total_label_text) # loss_ldis = cal_Ldis(similarity_vector,total_tag,total_label_kernel) loss_agg = cal_Lagg_gt(similarity_vector,gt_kernels_key,gt_text_key,training_masks) loss_ldis = cal_Ldis_gt(similarity_vector,gt_kernels_key,training_masks) # loss_agg,loss_ldis = agg_dis_loss(outputs[:, 0, :, :], outputs[:, 1, :, :], gt_text_key, gt_kernels_key, similarity_vector) # loss_agg = loss_agg.mean() # loss_ldis = loss_ldis.mean() loss = loss_text + 0.5*loss_kernel+0.25*(loss_agg+loss_ldis) writer.add_scalar('Loss/total_loss',loss,batch_idx+epoch*(1000/8)) writer.add_scalar('Loss/loss_text',loss_text,batch_idx+epoch*(1000/8)) writer.add_scalar('Loss/loss_kernel',loss_kernel,batch_idx+epoch*(1000/8)) writer.add_scalar('Loss/loss_agg',loss_agg,batch_idx+epoch*(1000/8)) writer.add_scalar('Loss/loss_ldis',loss_ldis,batch_idx+epoch*(1000/8)) losses.update(loss.item(), imgs.size(0)) losses_Lagg.update(loss_agg.item(), imgs.size(0)) losses_Ldis.update(loss_ldis.item(), imgs.size(0)) optimizer.zero_grad() loss.backward() # print('loss_text',loss_text.grad) # print('loss_kernel',loss_kernel.grad) # print('loss_agg',loss_agg.grad) # print('loss_ldis',loss_ldis.grad) # print('loss',loss.grad) optimizer.step() score_text = cal_text_score(texts, gt_texts, training_masks, running_metric_text) score_kernel = cal_kernel_score(kernels, gt_kernels, gt_texts, training_masks, running_metric_kernel) batch_time.update(time.time() - end) end = time.time() if batch_idx % 20 == 0: output_log = '({batch}/{size}) Batch: {bt:.3f}s | TOTAL: {total:.0f}min | ETA: {eta:.0f}min | Loss: {loss:.4f} | Acc_t: {acc: .4f} | IOU_t: {iou_t: .4f} | IOU_k: {iou_k: .4f} | Lagg: {lagg:.4f} | Ldis: {ldis:.4f}'.format( batch=batch_idx + 1, size=len(train_loader), bt=batch_time.avg, total=batch_time.avg * batch_idx / 60.0, eta=batch_time.avg * (len(train_loader) - batch_idx) / 60.0, loss=losses.avg, acc=score_text['Mean Acc'], iou_t=score_text['Mean IoU'], iou_k=score_kernel['Mean IoU'], lagg=losses_Lagg.avg, ldis=losses_Ldis.avg ) print(output_log) sys.stdout.flush() return (losses.avg, score_text['Mean Acc'], score_kernel['Mean Acc'], score_text['Mean IoU'], score_kernel['Mean IoU'],losses_Lagg.avg,losses_Ldis.avg)
def train(data_path, models_path, backend, snapshot, crop_x, crop_y, batch_size, alpha, epochs, start_lr, milestones, gpu): os.environ["CUDA_VISIBLE_DEVICES"] = gpu net, starting_epoch = build_network(snapshot, backend) data_path = os.path.abspath(os.path.expanduser(data_path)) models_path = os.path.abspath(os.path.expanduser(models_path)) os.makedirs(models_path, exist_ok=True) ''' To follow this training routine you need a DataLoader that yields the tuples of the following format: (Bx3xHxW FloatTensor x, BxHxW LongTensor y, BxN LongTensor y_cls) where x - batch of input images, y - batch of groung truth seg maps, y_cls - batch of 1D tensors of dimensionality N: N total number of classes, y_cls[i, T] = 1 if class T is present in image i, 0 otherwise ''' voc_data = pascalVOCLoader(root=data_path, is_transform=True, augmentations=None) # train_loader, class_weights, n_images = None, None, None train_loader = DataLoader(voc_data, batch_size=batch_size, shuffle=True, num_workers=0) max_steps = len(voc_data) class_weights = None optimizer = optim.Adam(net.parameters(), lr=start_lr) scheduler = MultiStepLR(optimizer, milestones=[int(x) for x in milestones.split(',')], gamma=0.1) running_score = runningScore(21) for epoch in range(starting_epoch, starting_epoch + epochs): seg_criterion = nn.NLLLoss2d(weight=class_weights) cls_criterion = nn.BCEWithLogitsLoss(weight=class_weights) epoch_losses = [] # train_iterator = tqdm(train_loader, total=max_steps // batch_size + 1) net.train() print('------------epoch[{}]----------'.format(epoch + 1)) for i, (x, y, y_cls) in enumerate(train_loader): optimizer.zero_grad() x, y, y_cls = Variable(x).cuda(), Variable(y).cuda(), Variable( y_cls).float().cuda() out, out_cls = net(x) pred = out.data.max(1)[1].cpu().numpy() seg_loss, cls_loss = seg_criterion(out, y), cls_criterion( out_cls, y_cls) loss = seg_loss + alpha * cls_loss epoch_losses.append(loss.item()) running_score.update(y.data.cpu().numpy(), pred) if (i + 1) % 138 == 0: score, class_iou = running_score.get_scores() for k, v in score.items(): print(k, v) logger.info('{}:{}'.format(k, v)) running_score.reset() print_format_str = "Epoch[{}] batch[{}] loss = {:.4f} LR = {}" print_str = print_format_str.format(epoch + 1, i + 1, loss.item(), scheduler.get_lr()[0]) print(print_str) logger.info(print_str) ''' status = '[{}] loss = {:.4f} avg = {:.4f}, LR = {}'.format( epoch + 1, loss.item(), np.mean(epoch_losses), scheduler.get_lr()[0]) train_iterator.set_description(status) ''' loss.backward() optimizer.step() scheduler.step() if epoch + 1 > 20: train_loss = ('%.4f' % np.mean(epoch_losses)) torch.save( net.state_dict(), os.path.join( models_path, '_'.join(["PSPNet", str(epoch + 1), train_loss]) + '.pth'))
def train(cycle_num, dirs, path_to_net, plotter, batch_size=12, test_split=0.3, random_state=666, epochs=100, learning_rate=0.0001, momentum=0.9, num_folds=5, num_slices=155, n_classes=4): """ Applies training on the network Args: cycle_num (int): number of cycle in n-fold (num_folds) cross validation dirs (string): path to dataset subject directories path_to_net (string): path to directory where to save network plotter (callable): visdom plotter batch_size - default (int): batch size test_split - default (float): percentage of test split random_state - default (int): seed for k-fold cross validation epochs - default (int): number of epochs learning_rate - default (float): learning rate momentum - default (float): momentum num_folds - default (int): number of folds in cross validation num_slices - default (int): number of slices per volume n_classes - default (int): number of classes (regions) """ print('Setting started', flush=True) # Creating data indices # arange len of list of subject dirs indices = np.arange(len(glob.glob(dirs + '*'))) test_indices, trainset_indices = get_test_indices(indices, test_split) # kfold index generator for cv_num, (train_indices, val_indices) in enumerate( get_train_cv_indices(trainset_indices, num_folds, random_state)): # splitted the 5-fold CV in 5 jobs if cv_num != int(cycle_num): continue net = U_Net() device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') num_GPU = torch.cuda.device_count() if num_GPU > 1: print('Let us use {} GPUs!'.format(num_GPU), flush=True) net = nn.DataParallel(net) net.to(device) criterion = nn.CrossEntropyLoss() if cycle_num % 2 == 0: optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=momentum) else: optimizer = optim.Adam(net.parameters(), lr=learning_rate) scheduler = ReduceLROnPlateau(optimizer, threshold=1e-6, patience=0) print('cv cycle number: ', cycle_num, flush=True) start = time.time() print('Start Train and Val loading', flush=True) MRIDataset_train = dataset.MRIDataset(dirs, train_indices) MRIDataset_val = dataset.MRIDataset(dirs, val_indices) datalengths = { 'train': len(MRIDataset_train), 'val': len(MRIDataset_val) } dataloaders = { 'train': get_dataloader(MRIDataset_train, batch_size, num_GPU), 'val': get_dataloader(MRIDataset_val, batch_size, num_GPU) } print('Train and Val loading took: ', time.time() - start, flush=True) # make loss and acc history for train and val separatly # Setup Metrics running_metrics_val = runningScore(n_classes) running_metrics_train = runningScore(n_classes) val_loss_meter = averageMeter() train_loss_meter = averageMeter() itr = 0 iou_best = 0. for epoch in tqdm(range(epochs), desc='Epochs'): print('Epoch: ', epoch + 1, flush=True) phase = 'train' print('Phase: ', phase, flush=True) start = time.time() # Set model to training mode net.train() # Iterate over data. for i, data in tqdm(enumerate(dataloaders[phase]), desc='Data Iteration ' + phase): if (i + 1) % 100 == 0: print('Number of Iteration [{}/{}]'.format( i + 1, int(datalengths[phase] / batch_size)), flush=True) # get the inputs inputs = data['mri_data'].to(device) GT = data['seg'].to(device) subject_slice_path = data['subject_slice_path'] # Clear all accumulated gradients optimizer.zero_grad() # Predict classes using inputs from the train set SR = net(inputs) # Compute the loss based on the predictions and # actual segmentation loss = criterion(SR, GT) # Backpropagate the loss loss.backward() # Adjust parameters according to the computed # gradients # -- weight update optimizer.step() # Trake and plot metrics and loss, and save network predictions = SR.data.max(1)[1].cpu().numpy() GT_cpu = GT.data.cpu().numpy() running_metrics_train.update(GT_cpu, predictions) train_loss_meter.update(loss.item(), n=1) if (i + 1) % 100 == 0: itr += 1 score, class_iou = running_metrics_train.get_scores() for k, v in score.items(): plotter.plot(k, 'itr', phase, k, itr, v) for k, v in class_iou.items(): print('Class {} IoU: {}'.format(k, v), flush=True) plotter.plot( str(k) + ' Class IoU', 'itr', phase, str(k) + ' Class IoU', itr, v) print('Loss Train', train_loss_meter.avg, flush=True) plotter.plot('Loss', 'itr', phase, 'Loss Train', itr, train_loss_meter.avg) print('Phase {} took {} s for whole {}set!'.format( phase, time.time() - start, phase), flush=True) # Validation Phase phase = 'val' print('Phase: ', phase, flush=True) start = time.time() # Set model to evaluation mode net.eval() start = time.time() with torch.no_grad(): # Iterate over data. for i, data in tqdm(enumerate(dataloaders[phase]), desc='Data Iteration ' + phase): if (i + 1) % 100 == 0: print('Number of Iteration [{}/{}]'.format( i + 1, int(datalengths[phase] / batch_size)), flush=True) # get the inputs inputs = data['mri_data'].to(device) GT = data['seg'].to(device) subject_slice_path = data['subject_slice_path'] # Clear all accumulated gradients optimizer.zero_grad() # Predict classes using inputs from the train set SR = net(inputs) # Compute the loss based on the predictions and # actual segmentation loss = criterion(SR, GT) # Trake and plot metrics and loss predictions = SR.data.max(1)[1].cpu().numpy() GT_cpu = GT.data.cpu().numpy() running_metrics_val.update(GT_cpu, predictions) val_loss_meter.update(loss.item(), n=1) if (i + 1) % 100 == 0: itr += 1 score, class_iou = running_metrics_val.get_scores() for k, v in score.items(): plotter.plot(k, 'itr', phase, k, itr, v) for k, v in class_iou.items(): print('Class {} IoU: {}'.format(k, v), flush=True) plotter.plot( str(k) + ' Class IoU', 'itr', phase, str(k) + ' Class IoU', itr, v) print('Loss Val', val_loss_meter.avg, flush=True) plotter.plot('Loss ', 'itr', phase, 'Loss Val', itr, val_loss_meter.avg) if (epoch + 1) % 10 == 0: if score['Mean IoU'] > iou_best: save_net(path_to_net, batch_size, epoch, cycle_num, train_indices, val_indices, test_indices, net, optimizer) iou_best = score['Mean IoU'] save_output(epoch, path_to_net, subject_slice_path, SR.data.cpu().numpy(), GT_cpu) print('Phase {} took {} s for whole {}set!'.format( phase, time.time() - start, phase), flush=True) # Call the learning rate adjustment function after every epoch scheduler.step(val_loss_meter.avg) # save network after training save_net(path_to_net, batch_size, epochs, cycle_num, train_indices, val_indices, test_indices, net, optimizer, iter_num=None)
def train(cfg, logger): # Setup Seeds torch.manual_seed(cfg.get("seed", 1337)) torch.cuda.manual_seed(cfg.get("seed", 1337)) np.random.seed(cfg.get("seed", 1337)) random.seed(cfg.get("seed", 1337)) # Setup Device device = torch.device("cuda:{}".format(cfg["training"]["gpu_idx"]) if torch.cuda.is_available() else "cpu") # Setup Augmentations augmentations = cfg["training"].get("augmentations", None) # Setup Dataloader data_loader = get_loader(cfg["data"]["dataset"]) data_path = cfg["data"]["path"] t_loader = data_loader( data_path, split=cfg["data"]["train_split"], ) v_loader = data_loader( data_path, split=cfg["data"]["val_split"], ) n_classes = t_loader.n_classes n_val = len(v_loader.files['val']) trainloader = data.DataLoader( t_loader, batch_size=cfg["training"]["batch_size"], num_workers=cfg["training"]["n_workers"], shuffle=True, ) valloader = data.DataLoader(v_loader, batch_size=cfg["training"]["batch_size"], num_workers=cfg["training"]["n_workers"]) # Setup Metrics running_metrics_val = runningScore(n_classes, n_val) # Setup Model model = get_model(cfg["model"], n_classes).to(device) model = torch.nn.DataParallel(model, device_ids=[cfg["training"]["gpu_idx"]]) # Setup Optimizer, lr_scheduler and Loss Function optimizer_cls = get_optimizer(cfg) optimizer_params = { k: v for k, v in cfg["training"]["optimizer"].items() if k != "name" } optimizer = optimizer_cls(model.parameters(), **optimizer_params) logger.info("Using optimizer {}".format(optimizer)) scheduler = get_scheduler(optimizer, cfg["training"]["lr_schedule"]) loss_fn = get_loss_function(cfg) logger.info("Using loss {}".format(loss_fn)) # Resume Trained Model if cfg["training"]["resume"] is not None: if os.path.isfile(cfg["training"]["resume"]): logger.info( "Loading model and optimizer from checkpoint '{}'".format( cfg["training"]["resume"])) checkpoint = torch.load(cfg["training"]["resume"]) model.load_state_dict(checkpoint["model_state"]) optimizer.load_state_dict(checkpoint["optimizer_state"]) scheduler.load_state_dict(checkpoint["scheduler_state"]) start_iter = checkpoint["epoch"] logger.info("Loaded checkpoint '{}' (iter {})".format( cfg["training"]["resume"], checkpoint["epoch"])) else: logger.info("No checkpoint found at '{}'".format( cfg["training"]["resume"])) # Start Training val_loss_meter = averageMeter() time_meter = averageMeter() start_iter = 0 best_dice = -100.0 i = start_iter flag = True while i <= cfg["training"]["train_iters"] and flag: for (images, labels, img_name) in trainloader: i += 1 start_ts = time.time() scheduler.step() model.train() images = images.to(device) labels = labels.to(device) optimizer.zero_grad() outputs = model(images) loss = loss_fn(input=outputs, target=labels) loss.backward() optimizer.step() time_meter.update(time.time() - start_ts) # print train loss if (i + 1) % cfg["training"]["print_interval"] == 0: fmt_str = "Iter [{:d}/{:d}] Loss: {:.4f} Time/Image: {:.4f}" print_str = fmt_str.format( i + 1, cfg["training"]["train_iters"], loss.item(), time_meter.avg / cfg["training"]["batch_size"], ) print(print_str) logger.info(print_str) time_meter.reset() # validation if (i + 1) % cfg["training"]["val_interval"] == 0 or ( i + 1) == cfg["training"]["train_iters"]: model.eval() with torch.no_grad(): for i_val, (images_val, labels_val, img_name_val) in tqdm(enumerate(valloader)): images_val = images_val.to(device) labels_val = labels_val.to(device) outputs = model(images_val) val_loss = loss_fn(input=outputs, target=labels_val) pred = outputs.data.max(1)[1].cpu().numpy() gt = labels_val.data.cpu().numpy() running_metrics_val.update(gt, pred, i_val) val_loss_meter.update(val_loss.item()) logger.info("Iter %d Loss: %.4f" % (i + 1, val_loss_meter.avg)) # print val metrics score, class_dice = running_metrics_val.get_scores() for k, v in score.items(): print(k, v) logger.info("{}: {}".format(k, v)) for k, v in class_dice.items(): logger.info("{}: {}".format(k, v)) val_loss_meter.reset() running_metrics_val.reset() # save model if score["Dice : \t"] >= best_dice: best_dice = score["Dice : \t"] state = { "epoch": i + 1, "model_state": model.state_dict(), "optimizer_state": optimizer.state_dict(), "scheduler_state": scheduler.state_dict(), "best_dice": best_dice, } save_path = os.path.join( cfg["training"]["model_dir"], "{}_{}.pkl".format(cfg["model"]["arch"], cfg["data"]["dataset"]), ) torch.save(state, save_path) if (i + 1) == cfg["training"]["train_iters"]: flag = False break
def train(train_loader, model, criterion, optimizer, epoch): model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() running_metric_text = runningScore(2) running_metric_kernel = runningScore(2) end = time.time() for batch_idx, (imgs, gt_texts, gt_kernels, training_masks) in enumerate(train_loader): data_time.update(time.time() - end) imgs = imgs.to(device) gt_texts = gt_texts.to(device) gt_kernels = gt_kernels.to(device) training_masks = training_masks.to(device) outputs = model(imgs) texts = outputs[:, 0, :, :] kernels = outputs[:, 1:, :, :] selected_masks = ohem_batch(texts, gt_texts, training_masks) selected_masks = selected_masks.to(device) loss_text = criterion(texts, gt_texts, selected_masks) loss_kernels = [] mask0 = torch.sigmoid(texts).data.cpu().numpy() mask1 = training_masks.data.cpu().numpy() selected_masks = ((mask0 > 0.5) & (mask1 > 0.5)).astype('float32') selected_masks = torch.from_numpy(selected_masks).float() selected_masks = selected_masks.to(device) for i in range(args.kernelnum - 1): kernel_i = kernels[:, i, :, :] gt_kernel_i = gt_kernels[:, i, :, :] loss_kernel_i = criterion(kernel_i, gt_kernel_i, selected_masks) loss_kernels.append(loss_kernel_i) loss_kernel = sum(loss_kernels) / len(loss_kernels) loss = 0.7 * loss_text + 0.3 * loss_kernel losses.update(loss.item(), imgs.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() score_text = cal_text_score(texts, gt_texts, training_masks, running_metric_text) score_kernel = cal_kernel_score(kernels, gt_kernels, gt_texts, training_masks, running_metric_kernel) batch_time.update(time.time() - end) end = time.time() if batch_idx % 20 == 0: output_log = '[{epoch}/{allepoch}][{batch}/{size}] Batch: {bt:.3f}s | TOTAL: {total:.0f}min | ETA: {eta:.0f}min | Loss: {loss:.4f} | Acc_t: {acc: .4f} | IOU_t: {iou_t: .4f} | IOU_k: {iou_k: .4f}'.format( epoch=epoch, allepoch=args.n_epoch, batch=batch_idx + 1, size=len(train_loader), bt=batch_time.avg, total=batch_time.avg * batch_idx / 60.0, eta=batch_time.avg * (len(train_loader) - batch_idx) / 60.0, loss=losses.avg, acc=score_text['Mean Acc'], iou_t=score_text['Mean IoU'], iou_k=score_kernel['Mean IoU']) print(output_log) sys.stdout.flush() torch.save(model.state_dict(), 'training_model') return (losses.avg, score_text['Mean Acc'], score_kernel['Mean Acc'], score_text['Mean IoU'], score_kernel['Mean IoU'])
def test(cfg): device = torch.device("cuda:{}".format(cfg["training"]["gpu_idx"]) if torch.cuda.is_available() else "cpu") data_loader = get_loader(cfg["data"]["dataset"]) data_path = cfg["data"]["path"] v_loader = data_loader(data_path, split='val') n_classes = v_loader.n_classes n_val = len(v_loader.files['val']) valLoader = data.DataLoader(v_loader, batch_size=1, num_workers=cfg["training"]["n_workers"]) model = get_model(cfg["model"], n_classes).to(device) state = convert_state_dict( torch.load(cfg["testing"]["trained_model"], map_location=device)["model_state"]) model.load_state_dict(state) model.eval() model.to(device) running_metrics_val = runningScore(n_classes, n_val) with torch.no_grad(): for i_val, (images_val, labels_val, img_name_val) in tqdm(enumerate(valLoader)): images_val = images_val.to(device) labels_val = labels_val.to(device) outputs = model(images_val) pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy()) gt = np.squeeze(labels_val.data.cpu().numpy()) running_metrics_val.update(gt, pred, i_val) decoded = v_loader.decode_segmap(pred, plot=False) m.imsave( pjoin(cfg["testing"]["path"], '{}.bmp'.format(img_name_val[0])), decoded) score = running_metrics_val.get_scores() acc_all, dsc_cls = running_metrics_val.get_list() for k, v in score[0].items(): print(k, v) if cfg["testing"]["boxplot"] == True: sns.set_style("whitegrid") labels = ['CSF', 'Gray Matter', 'White Matter'] fig1, ax1 = plt.subplots() ax1.set_title('Basic Plot') # ax1.boxplot(dsc_cls.transpose()[:,1:n_classes], showfliers=False, labels=labels) ax1 = sns.boxplot(data=dsc_cls.transpose()[:, 1:n_classes]) # ax1.yaxis.grid(True) ax1.set_xlabel('Three separate samples') ax1.set_ylabel('Dice Score') # path to save boxplot plt.savefig('/home/jwliu/disk/kxie/CNN_LSTM/test_results/box.pdf')
def train(cfg, writer, logger): torch.manual_seed(cfg.get('seed', 1337)) torch.cuda.manual_seed(cfg.get('seed', 1337)) np.random.seed(cfg.get('seed', 1337)) random.seed(cfg.get('seed', 1337)) ## create dataset default_gpu = cfg['model']['default_gpu'] device = torch.device( "cuda:{}".format(default_gpu) if torch.cuda.is_available() else 'cpu') datasets = create_dataset(cfg, writer, logger) use_pseudo_label = False model = CustomModel(cfg, writer, logger, use_pseudo_label, modal_num=3) # Setup Metrics running_metrics_val = runningScore(cfg['data']['target']['n_class']) source_running_metrics_val = runningScore(cfg['data']['target']['n_class']) val_loss_meter = averageMeter() source_val_loss_meter = averageMeter() time_meter = averageMeter() loss_fn = get_loss_function(cfg) flag_train = True epoches = cfg['training']['epoches'] source_train_loader = datasets.source_train_loader target_train_loader = datasets.target_train_loader logger.info('source train batchsize is {}'.format( source_train_loader.args.get('batch_size'))) print('source train batchsize is {}'.format( source_train_loader.args.get('batch_size'))) logger.info('target train batchsize is {}'.format( target_train_loader.batch_size)) print('target train batchsize is {}'.format( target_train_loader.batch_size)) val_loader = None if cfg.get('valset') == 'gta5': val_loader = datasets.source_valid_loader logger.info('valset is gta5') print('valset is gta5') else: val_loader = datasets.target_valid_loader logger.info('valset is cityscapes') print('valset is cityscapes') logger.info('val batchsize is {}'.format(val_loader.batch_size)) print('val batchsize is {}'.format(val_loader.batch_size)) # load category anchors """ objective_vectors = torch.load('category_anchors') model.objective_vectors = objective_vectors['objective_vectors'] model.objective_vectors_num = objective_vectors['objective_num'] """ # begin training model.iter = 0 for epoch in range(epoches): if not flag_train: break if model.iter > cfg['training']['train_iters']: break if use_pseudo_label: # monitoring the accuracy and recall of CAG-based PLA and probability-based PLA score_cl, _ = model.metrics.running_metrics_val_clusters.get_scores( ) print('clus_IoU: {}'.format(score_cl["Mean IoU : \t"])) logger.info('clus_IoU: {}'.format(score_cl["Mean IoU : \t"])) logger.info('clus_Recall: {}'.format( model.metrics.calc_mean_Clu_recall())) logger.info(model.metrics.classes_recall_clu[:, 0] / model.metrics.classes_recall_clu[:, 1]) logger.info('clus_Acc: {}'.format( np.mean(model.metrics.classes_recall_clu[:, 0] / model.metrics.classes_recall_clu[:, 1]))) logger.info(model.metrics.classes_recall_clu[:, 0] / model.metrics.classes_recall_clu[:, 2]) score_cl, _ = model.metrics.running_metrics_val_threshold.get_scores( ) logger.info('thr_IoU: {}'.format(score_cl["Mean IoU : \t"])) logger.info('thr_Recall: {}'.format( model.metrics.calc_mean_Thr_recall())) logger.info(model.metrics.classes_recall_thr[:, 0] / model.metrics.classes_recall_thr[:, 1]) logger.info('thr_Acc: {}'.format( np.mean(model.metrics.classes_recall_thr[:, 0] / model.metrics.classes_recall_thr[:, 1]))) logger.info(model.metrics.classes_recall_thr[:, 0] / model.metrics.classes_recall_thr[:, 2]) model.metrics.reset() for (target_image, target_label, target_img_name) in datasets.target_train_loader: model.iter += 1 i = model.iter if i > cfg['training']['train_iters']: break source_batchsize = cfg['data']['source']['batch_size'] # load source data images, labels, source_img_name = datasets.source_train_loader.next( ) start_ts = time.time() images = images.to(device) labels = labels.to(device) # load target data target_image = target_image.to(device) target_label = target_label.to(device) #model.scheduler_step() model.train(logger=logger) if cfg['training'].get('freeze_bn') == True: model.freeze_bn_apply() model.optimizer_zerograd() # Switch on modals source_modal_ids = [] for _img_name in source_img_name: if 'gtav2cityscapes' in _img_name: source_modal_ids.append(0) elif 'gtav2cityfoggy' in _img_name: source_modal_ids.append(1) elif 'gtav2cityrain' in _img_name: source_modal_ids.append(2) else: assert False, "[ERROR] unknown image source, neither gtav2cityscapes, gtav2cityfoggy!" target_modal_ids = [] for _img_name in target_img_name: if 'Cityscapes_foggy' in _img_name: target_modal_ids.append(1) elif 'Cityscapes_rain' in _img_name: target_modal_ids.append(2) else: target_modal_ids.append(0) loss, loss_cls_L2, loss_pseudo = model.step( images, labels, source_modal_ids, target_image, target_label, target_modal_ids, use_pseudo_label) # scheduler step model.scheduler_step() if loss_cls_L2 > 10: logger.info('loss_cls_l2 abnormal!!') time_meter.update(time.time() - start_ts) if (i + 1) % cfg['training']['print_interval'] == 0: unchanged_cls_num = 0 if use_pseudo_label: fmt_str = "Epoches [{:d}/{:d}] Iter [{:d}/{:d}] Loss: {:.4f} Loss_L2: {:.4f} Loss_pseudo: {:.4f} Time/Image: {:.4f}" else: fmt_str = "Epoches [{:d}/{:d}] Iter [{:d}/{:d}] Loss_GTA: {:.4f} Loss_adv: {:.4f} Loss_D: {:.4f} Time/Image: {:.4f}" print_str = fmt_str.format( epoch + 1, epoches, i + 1, cfg['training']['train_iters'], loss.item(), loss_cls_L2.item(), loss_pseudo.item(), time_meter.avg / cfg['data']['source']['batch_size']) print(print_str) logger.info(print_str) logger.info( 'unchanged number of objective class vector: {}'.format( unchanged_cls_num)) if use_pseudo_label: loss_names = [ 'train_loss', 'train_L2Loss', 'train_pseudoLoss' ] else: loss_names = [ 'train_loss_GTA', 'train_loss_adv', 'train_loss_D' ] writer.add_scalar('loss/{}'.format(loss_names[0]), loss.item(), i + 1) writer.add_scalar('loss/{}'.format(loss_names[1]), loss_cls_L2.item(), i + 1) writer.add_scalar('loss/{}'.format(loss_names[2]), loss_pseudo.item(), i + 1) time_meter.reset() if use_pseudo_label: score_cl, _ = model.metrics.running_metrics_val_clusters.get_scores( ) logger.info('clus_IoU: {}'.format( score_cl["Mean IoU : \t"])) logger.info('clus_Recall: {}'.format( model.metrics.calc_mean_Clu_recall())) logger.info('clus_Acc: {}'.format( np.mean(model.metrics.classes_recall_clu[:, 0] / model.metrics.classes_recall_clu[:, 2]))) score_cl, _ = model.metrics.running_metrics_val_threshold.get_scores( ) logger.info('thr_IoU: {}'.format( score_cl["Mean IoU : \t"])) logger.info('thr_Recall: {}'.format( model.metrics.calc_mean_Thr_recall())) logger.info('thr_Acc: {}'.format( np.mean(model.metrics.classes_recall_thr[:, 0] / model.metrics.classes_recall_thr[:, 2]))) # evaluation if (i + 1) % cfg['training']['val_interval'] == 0 or \ (i + 1) == cfg['training']['train_iters']: validation( model, logger, writer, datasets, device, running_metrics_val, val_loss_meter, loss_fn,\ source_val_loss_meter, source_running_metrics_val, iters = model.iter ) torch.cuda.empty_cache() logger.info('Best iou until now is {}'.format(model.best_iou)) if (i + 1) == cfg['training']['train_iters']: flag = False break
def main(test_args): testset = "/mnt/iusers01/eee01/mchiwml4/CamVid/test" transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(mean, std)]) test_dataset = DataLoader(Loaddata(testset, transform=transform, target_transform=MaskToTensor()), batch_size=1, shuffle=False, num_workers=8) label_num = 11 model = deeplab_v2.Deeplab_Resnet(label_num) model = model.cuda() model.load_state_dict(torch.load(test_args.load_param)) model.eval() total = np.zeros((label_num, )) running_metrics = runningScore(label_num) for j, data in enumerate(test_dataset): inputs, labels = data inputs = Variable(inputs.cuda()) outputs = model(inputs) H = inputs.size()[2] W = inputs.size()[3] interp_resize = nn.Upsample(size=(int(H), int(W)), mode='bilinear') output = interp_resize(outputs[3]) output = F.softmax(output, dim=1) output = output.data.cpu().numpy() if test_args.crf: crf_output = np.zeros(output.shape) images = inputs.data.cpu().numpy().astype(np.uint8) for i, (image, prob_map) in enumerate(zip(images, output)): image = image.transpose(1, 2, 0) crf_output[i] = dense_crf(image, prob_map) output = crf_output pred = np.argmax(output, axis=1) gt = labels.numpy() running_metrics.update(gt, pred) for i in range(label_num): mask = gt == i # ground truth mask of class i total[i] += np.sum( mask) # total number of pixels of class i (tp+fn) score, class_iou, class_acc = running_metrics.get_scores() for k, v in score.items(): print(k, v) print('class iou: ') for i in range(label_num): print(i, class_iou[i]) print('class acc: ') for i in range(label_num): print(i, class_acc[i]) print('number of pixels:') print(total)
def train(train_loader, model, criterion, optimizer, epoch): model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() running_metric_text = runningScore(2) running_metric_kernel = runningScore(2) end = time.time() for batch_idx, (imgs, gt_texts, gt_kernels, training_masks) in enumerate(train_loader): data_time.update(time.time() - end) imgs = Variable(imgs.cuda()) gt_texts = Variable(gt_texts.cuda()) gt_kernels = Variable(gt_kernels.cuda()) training_masks = Variable(training_masks.cuda()) outputs = model(imgs) texts = outputs[:, 0, :, :] kernels = outputs[:, 1:, :, :] selected_masks = ohem_batch(texts, gt_texts, training_masks) selected_masks = Variable(selected_masks.cuda()) loss_text = criterion(texts, gt_texts, selected_masks) #dice loss #loss_text = (F.binary_cross_entropy_with_logits(texts, gt_texts, reduce=False) * selected_masks).mean()#bce loss loss_kernels = [] mask0 = torch.sigmoid(texts).data.cpu().numpy() mask1 = training_masks.data.cpu().numpy() selected_masks = ((mask0 > 0.5) & (mask1 > 0.5)).astype('float32') selected_masks = torch.from_numpy(selected_masks).float() selected_masks = Variable(selected_masks.cuda()) for i in range(6): kernel_i = kernels[:, i, :, :] gt_kernel_i = gt_kernels[:, i, :, :] loss_kernel_i = criterion(kernel_i, gt_kernel_i, selected_masks) loss_kernels.append(loss_kernel_i) loss_kernel = sum(loss_kernels) / len(loss_kernels) loss = 0.7 * loss_text + 0.3 * loss_kernel losses.update(loss.item(), imgs.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() score_text = cal_text_score(texts, gt_texts, training_masks, running_metric_text) score_kernel = cal_kernel_score(kernels, gt_kernels, gt_texts, training_masks, running_metric_kernel) batch_time.update(time.time() - end) end = time.time() if batch_idx % 20 == 0: output_log = '({batch}/{size}) Batch: {bt:.3f}s | TOTAL: {total:.0f}min | ETA: {eta:.0f}min | Loss: {loss:.4f} | Acc_t: {acc: .4f} | IOU_t: {iou_t: .4f} | IOU_k: {iou_k: .4f}'.format( batch=batch_idx + 1, size=len(train_loader), bt=batch_time.avg, total=batch_time.avg * batch_idx / 60.0, eta=batch_time.avg * (len(train_loader) - batch_idx) / 60.0, loss=losses.avg, acc=score_text['Mean Acc'], iou_t=score_text['Mean IoU'], iou_k=score_kernel['Mean IoU']) print(output_log) sys.stdout.flush() return (losses.avg, score_text['Mean Acc'], score_kernel['Mean Acc'], score_text['Mean IoU'], score_kernel['Mean IoU'])
net = FCN_Dilated() net.load_state_dict(torch.load('best_seg_dcgan.wts')) net.to(device) net.eval() dst = kittiLoader( '/home-local/rohitrishabh/utilScripts/Segmentation/FCN_Dilated/data_road/', split="val") valloader = data.DataLoader(dst, batch_size=1, shuffle=True, num_workers=1, pin_memory=True) score = runningScore(2) for i, data in enumerate(valloader): imgs, labels = data imgs, labels = imgs.to(device), labels.to(device) with torch.no_grad(): out = net(imgs) pred = out.data.max(1)[1].cpu().numpy() plt.imshow(pred[0]) plt.savefig('output_test/' + str(i) + '_out.png') plt.imshow(labels[0]) plt.savefig('output_test/' + str(i) + '_orig.png') print(np.sum(pred)) gt = labels.data.cpu().numpy()
def train(train_loader, model, criterion, optimizer, epoch, summary_writer): model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() running_metric_text = runningScore(2) L1_loss = torch.nn.L1Loss() end = time.time() for batch_idx, (imgs, gt_texts, gt_kernels, training_masks, ori_imgs, mask_kernels) in enumerate(train_loader): data_time.update(time.time() - end) imgs = Variable(imgs.cuda()) gt_texts = Variable(gt_texts.cuda()) gt_kernels = Variable(gt_kernels.cuda()) training_masks = Variable(training_masks.cuda()) outputs = model(imgs) probability_map, threshold_map, binarization_map = outputs # loss for probability_map selected_masks = ohem_batch(probability_map, gt_texts, training_masks) selected_masks = Variable(selected_masks.cuda()) loss_probability = criterion(probability_map, gt_texts, selected_masks) # loss for binary_map binary_selected_masks = ohem_batch(binarization_map, gt_texts, training_masks) binary_selected_masks = Variable(binary_selected_masks.cuda()) loss_binary = criterion(binarization_map, gt_texts, binary_selected_masks) # loss for threshold_map loss_thresh = L1_loss(threshold_map * mask_kernels, gt_kernels * mask_kernels) loss = loss_probability + loss_binary + 10 * loss_thresh losses.update(loss.item(), imgs.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() score_prob = cal_text_score(probability_map, gt_texts, training_masks, running_metric_text) score_binary = cal_text_score(binarization_map, gt_texts, training_masks, running_metric_text) batch_time.update(time.time() - end) end = time.time() if batch_idx % 20 == 0: # visualization global_step = epoch * len(train_loader) + batch_idx summary_writer.add_images('gt/img', ori_imgs[:2], global_step=global_step) summary_writer.add_images('gt/score_map', torch.unsqueeze(gt_texts[:2], 1), global_step=global_step) summary_writer.add_images('gt/thresh_map', torch.unsqueeze(gt_kernels[:2], 1), global_step=global_step) summary_writer.add_images('predicition/score_map', torch.sigmoid(probability_map[:2]), global_step=global_step) summary_writer.add_images('predicition/binary_map', torch.sigmoid(binarization_map[:2]), global_step=global_step) summary_writer.add_images('predicition/threshold_map', torch.sigmoid(threshold_map[:2]), global_step=global_step) summary_writer.add_scalar('loss/text_loss', loss_probability, global_step=global_step) summary_writer.add_scalar('loss/binary_loss', loss_binary, global_step=global_step) summary_writer.add_scalar('loss/thresh_loss', loss_thresh, global_step=global_step) summary_writer.add_scalar('metric/acc_t', score_prob['Mean Acc'], global_step=global_step) summary_writer.add_scalar('metric/acc_b', score_binary['Mean Acc'], global_step=global_step) summary_writer.add_scalar('metric/iou_t', score_prob['Mean IoU'], global_step=global_step) summary_writer.add_scalar('metric/iou_b', score_binary['Mean IoU'], global_step=global_step) output_log = '({batch}/{size}) Batch: {bt:.3f}s | TOTAL: {total:.0f}min | ETA: {eta:.0f}min | Loss: {loss:.4f} | Acc_t: {acc_t: .4f} | Acc_b: {acc_b: .4f} | IOU_t: {iou_t: .4f} | IOU_b: {iou_k: .4f}'.format( batch=batch_idx + 1, size=len(train_loader), bt=batch_time.avg, total=batch_time.avg * batch_idx / 60.0, eta=batch_time.avg * (len(train_loader) - batch_idx) / 60.0, loss=losses.avg, acc_t=score_prob['Mean Acc'], acc_b=score_binary['Mean Acc'], iou_t=score_prob['Mean IoU'], iou_k=score_binary['Mean IoU']) print(output_log) sys.stdout.flush() return (losses.avg, score_prob['Mean Acc'], score_binary['Mean Acc'], score_prob['Mean IoU'], score_binary['Mean IoU'])
def train(train_loader, model, criterion, optimizer, epoch, summary_writer): model.train() batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() running_metric_text = runningScore(2) L1_loss = torch.nn.L1Loss() end = time.time() for batch_idx, (imgs, gt_texts, training_masks, ori_imgs, border_map) in enumerate(train_loader): data_time.update(time.time() - end) imgs = Variable(imgs.cuda()) gt_texts = Variable(gt_texts[:, ::4, ::4].cuda()) training_masks = Variable(training_masks[:, ::4, ::4].cuda()) border_map = Variable(border_map.cuda()) outputs = model(imgs) gaussian_map = outputs # gaussian_map, center_map, region_map = outputs weighted_mse_loss, mse_region_loss, loss_center = weighted_regression( gaussian_map, gt_texts, training_masks) center_gt = torch.where(gt_texts > 0.7, gt_texts, torch.zeros_like(gt_texts)) # center_mask = torch.where(gt_texts > 0.7, torch.ones_like(gt_texts), torch.zeros_like(gt_texts)) region_gt = torch.where(gt_texts > 0.4, gt_texts, torch.zeros_like(gt_texts)) # region_mask = torch.where(gt_texts > 0.4, torch.ones_like(gt_texts), torch.zeros_like(gt_texts)) # loss for center_map # loss_center_dice = criterion(gaussian_map, center_gt, training_masks) # loss for region_map loss_region_dice = criterion(gaussian_map, region_gt, training_masks) # loss for border_map # border_mask = 1. - (center_other - border_map) # loss_border = criterion(gaussian_map, gt_texts, training_masks) loss = loss_center + weighted_mse_loss + mse_region_loss + loss_region_dice # print("loss:", loss_center, "loss_region:", loss_region, "weighted_mse_loss:", weighted_mse_loss) losses.update(loss.item(), imgs.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() score_center = cal_text_score(gaussian_map, gt_texts, training_masks, running_metric_text, 0, 0.8) # score_region = cal_text_score(gaussian_map, gt_texts, training_masks * region_mask, running_metric_text, 0, 0.2) batch_time.update(time.time() - end) end = time.time() if batch_idx % 20 == 0: # visualization global_step = epoch * len(train_loader) + batch_idx maps = torch.sigmoid(gaussian_map[0:1]) center_map = torch.where(maps > 0.8, maps, torch.zeros_like(maps)) text_map = torch.where(maps > 0.4, maps, torch.zeros_like(maps)) summary_writer.add_images('gt/img', ori_imgs[0:1], global_step=global_step) summary_writer.add_images('gt/score_map', torch.unsqueeze(gt_texts[0:1], 1), global_step=global_step) summary_writer.add_images('gt/center_map', torch.unsqueeze(center_gt[0:1], 1), global_step=global_step) summary_writer.add_images('gt/region_map', torch.unsqueeze(region_gt[0:1], 1), global_step=global_step) # summary_writer.add_images('gt/border_map', torch.unsqueeze(border_mask[0:1], 1), global_step=global_step) summary_writer.add_images('predicition/score_map', torch.sigmoid(gaussian_map[0:1]), global_step=global_step) summary_writer.add_images('predicition/center_map', torch.sigmoid(center_map[0:1]), global_step=global_step) summary_writer.add_images('predicition/region_map', torch.sigmoid(text_map[0:1]), global_step=global_step) summary_writer.add_scalar('loss/reg_loss', weighted_mse_loss, global_step=global_step) summary_writer.add_scalar('loss/reg_center_loss', loss_center, global_step=global_step) # summary_writer.add_scalar('loss/center_dice_loss', loss_center_dice, global_step=global_step) summary_writer.add_scalar('loss/region_dice_loss', loss_region_dice, global_step=global_step) # summary_writer.add_scalar('loss/border_loss', loss_border, global_step=global_step) summary_writer.add_scalar('loss/text_region_loss', mse_region_loss, global_step=global_step) summary_writer.add_scalar('metric/acc_c', score_center['Mean Acc'], global_step=global_step) summary_writer.add_scalar('metric/iou_c', score_center['Mean IoU'], global_step=global_step) # summary_writer.add_scalar('metric/acc_t', score_region['Mean Acc'], global_step=global_step) # summary_writer.add_scalar('metric/iou_t', score_region['Mean IoU'], global_step=global_step) output_log = '({batch}/{size}) Batch: {bt:.3f}s | TOTAL: {total:.0f}min | ETA: {eta:.0f}min | Loss: {loss:.4f} | Acc_c: {acc_c: .4f} | IOU_c: {iou_c: .4f} '.format( batch=batch_idx + 1, size=len(train_loader), bt=batch_time.avg, total=batch_time.avg * batch_idx / 60.0, eta=batch_time.avg * (len(train_loader) - batch_idx) / 60.0, loss=losses.avg, acc_c=score_center['Mean Acc'], iou_c=score_center['Mean IoU'], # acc_t=score_region['Mean Acc'], # iou_t=score_region['Mean IoU'], ) print(output_log) sys.stdout.flush() return (losses.avg, score_center['Mean Acc'], score_center['Mean IoU'])
def train( models_path, backend, snapshot, alpha, epochs, init_lr, ): # os.environ["CUDA_VISIBLE_DEVICES"] = gpu net, starting_epoch = build_network(snapshot, backend) # net.train() models_path = os.path.abspath(os.path.expanduser(models_path)) os.makedirs(models_path, exist_ok=True) class_weights = torch.ones(num_classes).cuda() class_weight = torch.ones(batch_size, num_classes).cuda() optimizer = optim.Adam(net.parameters(), lr=start_lr, weight_decay=0.0001) # Setup Metrics running_metrics = runningScore(num_classes) best_iou = -100.0 #从断点出恢复训练 for epoch in range(starting_epoch, starting_epoch + epochs): seg_criterion = nn.NLLLoss2d(weight=class_weights) # cls_criterion = nn.BCEWithLogitsLoss(weight=class_weights)#二分类 epoch_losses = [] train_iterator = tqdm(train_loader, total=len(train_loader)) net.train() for x, y, y_cls in train_iterator: optimizer.zero_grad() x, y, y_cls = Variable(x).cuda(), Variable(y).cuda(), Variable( y_cls).cuda() # #y:torch.Size([16, 1, 256, 256]) out, out_cls = net(x) # print('out_cls:',out_cls.size())#16,150,256,256 seg_loss = seg_criterion(out, y.squeeze(1)) cls_loss = seg_criterion(out_cls, y.squeeze(1)) loss = seg_loss + alpha * cls_loss epoch_losses.append(loss.data[0]) status = '[{0}] loss = {1:0.5f} avg = {2:0.5f}, '.format( epoch + 1, loss.data[0], np.mean(epoch_losses)) train_iterator.set_description(status) #tadm中可以打印信息 loss.backward() optimizer.step() net.eval() for i_val, (images_val, labels_val, label_cls) in tqdm(enumerate(val_loader)): images_val = Variable(images_val.cuda(), volatile=True) labels_val = Variable(labels_val.cuda(), volatile=True) outputs, outputs_cls = net( images_val) #outputs=batch,num_classes,H,W pred = outputs.data.max(1)[1].cpu().numpy() gt = labels_val.data.cpu().numpy() running_metrics.update(gt, pred) score, class_iou = running_metrics.get_scores() running_metrics.reset() if score['Mean IoU : \t'] >= best_iou: best_iou = score['Mean IoU : \t'] print("{}_{}_best_model.pkl".format( os.path.join(models_path, 'PSPNet'), 'ADEK')) torch.save( net.state_dict(), "{}_{}_best_model.pkl".format( os.path.join(models_path, 'PSPNet'), 'ADEK')) poly_lr_scheduler(optimizer, init_lr, epoch, lr_decay_iter=10, max_iter=100, power=0.9)
IMG_Path = Path("C:\\Users\dell\Desktop\\tt\img") IMG_File = natsort.natsorted(list(IMG_Path.glob("*.png")), alg=natsort.PATH) IMG_Str = [] for i in IMG_File: IMG_Str.append(str(i)) GT_Path = Path("C:\\Users\dell\Desktop\\tt\gt") # GT_Path = Path("I:\\DVS_dataset\scnn_result\\vgg_SCNN_merge\merge") GT_File = natsort.natsorted(list(GT_Path.glob("*.png")), alg=natsort.PATH) GT_Str = [] for j in GT_File: GT_Str.append(str(j)) t = time.time() running_metrics_val = runningScore(5) label_values = [[0, 0, 0], [100, 100, 100], [150, 150, 150], [200, 200, 200], [250, 250, 250]] def compute_two(img_path, gt_path, img_path2, gt_path2): out = load_image(img_path) # gt = load_image(gt_path) # 不要用interpolation=cv.INTER_NEAREST,不然结果不一样,估计opencv bug gt = cv.resize(load_image(gt_path), (512, 256), cv.INTER_NEAREST) # val_gt_erode paired with [0,0,0]label value # label order: R G B # num_classes = len(label_values) gt = util.reverse_one_hot(util.one_hot_it(gt, label_values)) output_image = util.reverse_one_hot(util.one_hot_it(out, label_values))
def validate(self): visualizations = [] val_metrics = runningScore(self.n_classes) val_loss_meter = averageMeter() with torch.no_grad(): self.model.eval() for rgb, ir, target in tqdm.tqdm( self.val_loader, total=len(self.val_loader), desc=f'Valid epoch={self.epoch}', ncols=80, leave=False): rgb, ir, target = rgb.to(self.device), ir.to(self.device), target.to(self.device) score = self.model(rgb, ir) # score = self.model(rgb) weight = self.val_loader.dataset.class_weight if weight: weight = torch.Tensor(weight).to(self.device) loss = CrossEntropyLoss(score, target, weight=weight, reduction='mean', ignore_index=-1) loss_data = loss.data.item() if np.isnan(loss_data): raise ValueError('loss is nan while validating') val_loss_meter.update(loss_data) rgbs = rgb.data.cpu() irs = ir.data.cpu() if isinstance(score, (tuple, list)): lbl_pred = score[0].data.max(1)[1].cpu().numpy() else: lbl_pred = score.data.max(1)[1].cpu().numpy() lbl_true = target.data.cpu() for rgb, ir, lt, lp in zip(rgbs, irs, lbl_true, lbl_pred): rgb, ir, lt = self.val_loader.dataset.untransform(rgb, ir, lt) val_metrics.update(lt, lp) if len(visualizations) < 9: viz = visualize_segmentation( lbl_pred=lp, lbl_true=lt, img=rgb, ir=ir, n_classes=self.n_classes, dataloader=self.train_loader) visualizations.append(viz) acc, acc_cls, mean_iou, fwavacc, cls_iu = val_metrics.get_scores() metrics = [acc, acc_cls, mean_iou, fwavacc] print(f'\nEpoch: {self.epoch}', f'loss: {val_loss_meter.avg}, mIoU: {mean_iou}') out = osp.join(self.out, 'visualization_viz') if not osp.exists(out): os.makedirs(out) out_file = osp.join(out, 'epoch{:0>5d}.jpg'.format(self.epoch)) scipy.misc.imsave(out_file, get_tile_image(visualizations)) with open(osp.join(self.out, 'log.csv'), 'a') as f: elapsed_time = ( datetime.datetime.now(pytz.timezone('UTC')) - self.timestamp_start).total_seconds() log = [self.epoch] + [''] * 5 + \ [val_loss_meter.avg] + metrics + [elapsed_time] log = map(str, log) f.write(','.join(log) + '\n') mean_iu = metrics[2] is_best = mean_iu > self.best_mean_iu if is_best: self.best_mean_iu = mean_iu torch.save({ 'epoch': self.epoch, 'arch': self.model.__class__.__name__, 'optim_state_dict': self.optim.state_dict(), 'model_state_dict': self.model.state_dict(), 'best_mean_iu': self.best_mean_iu, }, osp.join(self.out, 'checkpoint.pth.tar')) if is_best: shutil.copy(osp.join(self.out, 'checkpoint.pth.tar'), osp.join(self.out, 'model_best.pth.tar')) val_loss_meter.reset() val_metrics.reset() class_name = self.val_loader.dataset.class_names if class_name is not None: for index, value in enumerate(cls_iu.values()): offset = 20 - len(class_name[index]) print(class_name[index] + ' ' * offset + f'{value * 100:>.2f}') else: print("\nyou don't specify class_names, use number instead") for key, value in cls_iu.items(): print(key, f'{value * 100:>.2f}')
def train(train_loader, model, criterion, optimizer, epoch, tflogger): model.train() #taglist = ['module.conv1.weight','module.bn1.weight','module.bn1.bias','module.conv2.weight','module.conv2.bias','module.bn2.weight','module.bn2.bias','module.conv3.weight','module.conv3.bias','module.bn3.weight','module.bn3.bia','module.conv4.weight','module.conv4.bias'] batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() running_metric_text = runningScore(2) running_metric_kernel = runningScore(2) global globalcounter end = time.time() for batch_idx, (imgs, gt_texts, gt_kernels, training_masks) in enumerate(train_loader): data_time.update(time.time() - end) imgs = Variable(imgs.cuda()) gt_texts = Variable(gt_texts.cuda()) gt_kernels = Variable(gt_kernels.cuda()) training_masks = Variable(training_masks.cuda()) outputs = model(imgs) texts = outputs[:, 0, :, :] kernels = outputs[:, 1:, :, :] selected_masks = ohem_batch(texts, gt_texts, training_masks) selected_masks = Variable(selected_masks.cuda()) loss_text = criterion(texts, gt_texts, selected_masks) loss_kernels = [] mask0 = torch.sigmoid(texts).data.cpu().numpy() mask1 = training_masks.data.cpu().numpy() selected_masks = ((mask0 > 0.5) & (mask1 > 0.5)).astype('float32') selected_masks = torch.from_numpy(selected_masks).float() selected_masks = Variable(selected_masks.cuda()) for i in range(6): kernel_i = kernels[:, i, :, :] gt_kernel_i = gt_kernels[:, i, :, :] loss_kernel_i = criterion(kernel_i, gt_kernel_i, selected_masks) loss_kernels.append(loss_kernel_i) loss_kernel = sum(loss_kernels) / len(loss_kernels) loss = 0.7 * loss_text + 0.3 * loss_kernel losses.update(loss.item(), imgs.size(0)) optimizer.zero_grad() loss.backward() optimizer.step() score_text = cal_text_score(texts, gt_texts, training_masks, running_metric_text) score_kernel = cal_kernel_score(kernels, gt_kernels, gt_texts, training_masks, running_metric_kernel) batch_time.update(time.time() - end) end = time.time() if batch_idx % 20 == 0: output_log = '({batch}/{size}) Batch: {bt:.3f}s | TOTAL: {total:.0f}min | ETA: {eta:.0f}min | Loss: {loss:.4f} | Acc_t: {acc: .4f} | IOU_t: {iou_t: .4f} | IOU_k: {iou_k: .4f}'.format( batch=batch_idx + 1, size=len(train_loader), bt=batch_time.avg, total=batch_time.avg * batch_idx / 60.0, eta=batch_time.avg * (len(train_loader) - batch_idx) / 60.0, loss=losses.avg, acc=score_text['Mean Acc'], iou_t=score_text['Mean IoU'], iou_k=score_kernel['Mean IoU']) print(output_log) sys.stdout.flush() if batch_idx % 100 == 0: for tag, value in model.named_parameters(): tag = tag.replace('.', '/') tflogger.histo_summary(tag, value.data.detach().cpu().numpy(), globalcounter) tflogger.histo_summary(tag + '/grad', value.grad.data.detach().cpu().numpy(), globalcounter) globalcounter += 1 return (float(losses.avg), float(score_text['Mean Acc']), float(score_kernel['Mean Acc']), float(score_text['Mean IoU']), float(score_kernel['Mean IoU']))
def train_epoch(self): if self.epoch % self.val_epoch == 0 or self.epoch == 1: self.validate() self.model.train() train_metrics = runningScore(self.n_classes) train_loss_meter = averageMeter() self.optim.zero_grad() for rgb, ir, target in tqdm.tqdm( self.train_loader, total=len(self.train_loader), desc=f'Train epoch={self.epoch}', ncols=80, leave=False): self.iter += 1 assert self.model.training rgb, ir, target = rgb.to(self.device), ir.to(self.device), target.to(self.device) score = self.model(rgb, ir) # score = self.model(rgb) weight = self.train_loader.dataset.class_weight if weight: weight = torch.Tensor(weight).to(self.device) loss = CrossEntropyLoss(score, target, weight=weight, ignore_index=-1, reduction='mean') loss_data = loss.data.item() train_loss_meter.update(loss_data) if np.isnan(loss_data): raise ValueError('loss is nan while training') # loss.backward(retain_graph=True) loss.backward() self.optim.step() self.optim.zero_grad() if isinstance(score, (tuple, list)): lbl_pred = score[0].data.max(1)[1].cpu().numpy() else: lbl_pred = score.data.max(1)[1].cpu().numpy() lbl_true = target.data.cpu().numpy() train_metrics.update(lbl_true, lbl_pred) acc, acc_cls, mean_iou, fwavacc, _ = train_metrics.get_scores() metrics = [acc, acc_cls, mean_iou, fwavacc] with open(osp.join(self.out, 'log.csv'), 'a') as f: elapsed_time = ( datetime.datetime.now(pytz.timezone('UTC')) - self.timestamp_start).total_seconds() log = [self.epoch] + [train_loss_meter.avg] + \ metrics + [''] * 5 + [elapsed_time] log = map(str, log) f.write(','.join(log) + '\n') if self.scheduler: self.scheduler.step() if self.epoch % self.val_epoch == 0 or self.epoch == 1: lr = self.optim.param_groups[0]['lr'] print(f'\nCurrent base learning rate of epoch {self.epoch}: {lr:.7f}') train_loss_meter.reset() train_metrics.reset()