def mean_iou(input: torch.Tensor, target: torch.Tensor, num_classes: int, eps: Optional[float] = 1e-6) -> torch.Tensor: r"""Calculate mean Intersection-Over-Union (mIOU). The function internally computes the confusion matrix. Args: input (torch.Tensor) : tensor with estimated targets returned by a classifier. The shape can be :math:`(B, *)` and must contain integer values between 0 and K-1. target (torch.Tensor) : tensor with ground truth (correct) target values. The shape can be :math:`(B, *)` and must contain integer values between 0 and K-1, whete targets are assumed to be provided as one-hot vectors. num_classes (int): total possible number of classes in target. Returns: torch.Tensor: a tensor representing the mean intersection-over union with shape :math:`(B, K)` where K is the number of classes. """ if not torch.is_tensor(input) and input.dtype is not torch.int64: raise TypeError("Input input type is not a torch.Tensor with " "torch.int64 dtype. Got {}".format(type(input))) if not torch.is_tensor(target) and target.dtype is not torch.int64: raise TypeError("Input target type is not a torch.Tensor with " "torch.int64 dtype. Got {}".format(type(target))) if not input.shape == target.shape: raise ValueError("Inputs input and target must have the same shape. " "Got: {}".format(input.shape, target.shape)) if not input.device == target.device: raise ValueError("Inputs must be in the same device. " "Got: {} - {}".format(input.device, target.device)) if not isinstance(num_classes, int) or num_classes < 2: raise ValueError("The number of classes must be an intenger bigger " "than two. Got: {}".format(num_classes)) # we first compute the confusion matrix conf_mat: torch.Tensor = confusion_matrix(input, target, num_classes) # compute the actual intersection over union sum_over_row = torch.sum(conf_mat, dim=1) sum_over_col = torch.sum(conf_mat, dim=2) conf_mat_diag = torch.diagonal(conf_mat, dim1=-2, dim2=-1) denominator = sum_over_row + sum_over_col - conf_mat_diag # NOTE: we add epsilon so that samples that are neither in the # prediction or ground truth are taken into account. ious = (conf_mat_diag + eps) / (denominator + eps) return ious
def main(): setSeed(10) opt = opt_global_inti() num_gpu = torch.cuda.device_count() assert num_gpu == opt.num_gpu,"opt.num_gpu NOT equals torch.cuda.device_count()" gpu_name_list = [] for i in range(num_gpu): gpu_name_list.append(torch.cuda.get_device_name(i)) opt.gpu_list = gpu_name_list if(opt.load_pretrain!=''): opt,model,f_loss,optimizer,scheduler,opt_deepgcn = load_pretrained(opt) else: opt,model,f_loss,optimizer,scheduler,opt_deepgcn = creating_new_model(opt) print('----------------------Load Dataset----------------------') print('Root of dataset: ', opt.dataset_root) print('Phase: ', opt.phase) print('debug: ', opt.debug) #pdb.set_trace() if(opt.model!='deepgcn'): train_dataset = BigredDataSet( root=opt.dataset_root, is_train=True, is_validation=False, is_test=False, num_channel = opt.num_channel, test_code = opt.debug, including_ring = opt.including_ring ) f_loss.load_weight(train_dataset.labelweights) train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=opt.batch_size, shuffle=True, pin_memory=True, drop_last=True, num_workers=int(opt.num_workers)) validation_dataset = BigredDataSet( root=opt.dataset_root, is_train=False, is_validation=True, is_test=False, num_channel = opt.num_channel, test_code = opt.debug, including_ring = opt.including_ring) validation_loader = torch.utils.data.DataLoader( validation_dataset, batch_size=opt.batch_size, shuffle=False, pin_memory=True, drop_last=True, num_workers=int(opt.num_workers)) else: train_dataset = BigredDataSetPTG(root = opt.dataset_root, is_train=True, is_validation=False, is_test=False, num_channel=opt.num_channel, new_dataset = False, test_code = opt.debug, pre_transform=torch_geometric.transforms.NormalizeScale() ) train_loader = DenseDataLoader(train_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers) validation_dataset = BigredDataSetPTG(root = opt.dataset_root, is_train=False, is_validation=True, is_test=False, new_dataset = False, test_code = opt.debug, num_channel=opt.num_channel, pre_transform=torch_geometric.transforms.NormalizeScale() ) validation_loader = DenseDataLoader(validation_dataset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers) labelweights = np.zeros(2) labelweights, _ = np.histogram(train_dataset.data.y.numpy(), range(3)) labelweights = labelweights.astype(np.float32) labelweights = labelweights / np.sum(labelweights) labelweights = np.power(np.amax(labelweights) / labelweights, 1 / 3.0) weights = torch.Tensor(labelweights).cuda() f_loss.load_weight(weights) print('train dataset num_frame: ',len(train_dataset)) print('num_batch: ', int(len(train_loader) / opt.batch_size)) print('validation dataset num_frame: ',len(validation_dataset)) print('num_batch: ', int(len(validation_loader) / opt.batch_size)) print('Batch_size: ', opt.batch_size) print('----------------------Prepareing Training----------------------') metrics_list = ['Miou','Biou','Fiou','loss','OA','time_complexicity','storage_complexicity'] manager_test = metrics_manager(metrics_list) metrics_list_train = ['Miou','Biou', 'Fiou','loss', 'storage_complexicity', 'time_complexicity'] manager_train = metrics_manager(metrics_list_train) wandb.init(project=opt.wd_project,name=opt.model_name,resume=False) if(opt.wandb_history == False): best_value = 0 else: temp = wandb.restore('best_model.pth',run_path = opt.wandb_id) best_value = torch.load(temp.name)['Miou_validation_ave'] wandb.config.update(opt) if opt.epoch_ckpt == 0: opt.unsave_epoch = 0 else: opt.epoch_ckpt = opt.epoch_ckpt+1 for epoch in range(opt.epoch_ckpt,opt.epoch_max): manager_train.reset() model.train() tic_epoch = time.perf_counter() print('---------------------Training----------------------') print("Epoch: ",epoch) for i, data in tqdm(enumerate(train_loader), total=len(train_loader), smoothing=0.9): if(opt.model == 'deepgcn'): points = torch.cat((data.pos.transpose(2, 1).unsqueeze(3), data.x.transpose(2, 1).unsqueeze(3)), 1) points = points[:, :opt.num_channel, :, :] target = data.y.cuda() else: points, target = data #target.shape [B,N] #points.shape [B,N,C] points, target = points.cuda(non_blocking=True), target.cuda(non_blocking=True) # pdb.set_trace() #training... optimizer.zero_grad() tic = time.perf_counter() pred_mics = model(points) toc = time.perf_counter() #compute loss #For loss #target.shape [B,N] ->[B*N] #pred.shape [B,N,2]->[B*N,2] #pdb.set_trace() #pdb.set_trace() loss = f_loss(pred_mics, target) if(opt.apex): with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() optimizer.step() #pred.shape [B,N,2] since pred returned pass F.log_softmax pred, target = pred_mics[0].cpu(), target.cpu() #pred:[B,N,2]->[B,N] #pdb.set_trace() pred = pred.data.max(dim=2)[1] #compute iou Biou,Fiou = mean_iou(pred,target,num_classes =2).mean(dim=0) miou = (Biou+Fiou)/2 #compute Training time complexity time_complexity = toc - tic #compute Training storage complexsity num_device = torch.cuda.device_count() assert num_device == opt.num_gpu,"opt.num_gpu NOT equals torch.cuda.device_count()" temp = [] for k in range(num_device): temp.append(torch.cuda.memory_allocated(k)) RAM_usagePeak = torch.tensor(temp).float().mean() #print(loss.item()) #print(miou.item()) #writeup logger manager_train.update('loss',loss.item()) manager_train.update('Biou',Biou.item()) manager_train.update('Fiou',Fiou.item()) manager_train.update('Miou',miou.item()) manager_train.update('time_complexicity',float(1/time_complexity)) manager_train.update('storage_complexicity',RAM_usagePeak.item()) log_dict = {'loss_online':loss.item(), 'Biou_online':Biou.item(), 'Fiou_online':Fiou.item(), 'Miou_online':miou.item(), 'time_complexicity_online':float(1/time_complexity), 'storage_complexicity_online':RAM_usagePeak.item() } if(epoch - opt.unsave_epoch>=0): wandb.log(log_dict) toc_epoch = time.perf_counter() time_tensor = toc_epoch-tic_epoch summery_dict = manager_train.summary() log_train_end = {} for key in summery_dict: log_train_end[key+'_train_ave'] = summery_dict[key] print(key+'_train_ave: ',summery_dict[key]) log_train_end['Time_PerEpoch'] = time_tensor if(epoch - opt.unsave_epoch>=0): wandb.log(log_train_end) else: print('No data upload to wandb. Start upload: Epoch[%d] Current: Epoch[%d]'%(opt.unsave_epoch,epoch)) scheduler.step() if(epoch % 10 == 1): print('---------------------Validation----------------------') manager_test.reset() model.eval() print("Epoch: ",epoch) with torch.no_grad(): for j, data in tqdm(enumerate(validation_loader), total=len(validation_loader), smoothing=0.9): if(opt.model == 'deepgcn'): points = torch.cat((data.pos.transpose(2, 1).unsqueeze(3), data.x.transpose(2, 1).unsqueeze(3)), 1) points = points[:, :opt.num_channel, :, :] target = data.y.cuda() else: points, target = data #target.shape [B,N] #points.shape [B,N,C] points, target = points.cuda(non_blocking=True), target.cuda(non_blocking=True) tic = time.perf_counter() pred_mics = model(points) toc = time.perf_counter() #pred.shape [B,N,2] since pred returned pass F.log_softmax pred, target = pred_mics[0].cpu(), target.cpu() #compute loss test_loss = 0 #pred:[B,N,2]->[B,N] pred = pred.data.max(dim=2)[1] #compute confusion matrix cm = confusion_matrix(pred,target,num_classes =2).sum(dim=0) #compute OA overall_correct_site = torch.diag(cm).sum() overall_reference_site = cm.sum() # if(overall_reference_site != opt.batch_size * opt.num_points): #pdb.set_trace() #assert overall_reference_site == opt.batch_size * opt.num_points,"Confusion_matrix computing error" oa = float(overall_correct_site/overall_reference_site) #compute iou Biou,Fiou = mean_iou(pred,target,num_classes =2).mean(dim=0) miou = (Biou+Fiou)/2 #compute inference time complexity time_complexity = toc - tic #compute inference storage complexsity num_device = torch.cuda.device_count() assert num_device == opt.num_gpu,"opt.num_gpu NOT equals torch.cuda.device_count()" temp = [] for k in range(num_device): temp.append(torch.cuda.memory_allocated(k)) RAM_usagePeak = torch.tensor(temp).float().mean() #writeup logger # metrics_list = ['test_loss','OA','Biou','Fiou','Miou','time_complexicity','storage_complexicity'] manager_test.update('loss',test_loss) manager_test.update('OA',oa) manager_test.update('Biou',Biou.item()) manager_test.update('Fiou',Fiou.item()) manager_test.update('Miou',miou.item()) manager_test.update('time_complexicity',float(1/time_complexity)) manager_test.update('storage_complexicity',RAM_usagePeak.item()) summery_dict = manager_test.summary() log_val_end = {} for key in summery_dict: log_val_end[key+'_validation_ave'] = summery_dict[key] print(key+'_validation_ave: ',summery_dict[key]) package = dict() package['state_dict'] = model.state_dict() package['scheduler'] = scheduler package['optimizer'] = optimizer package['epoch'] = epoch opt_temp = vars(opt) for k in opt_temp: package[k] = opt_temp[k] if(opt_deepgcn is not None): opt_temp = vars(opt_deepgcn) for k in opt_temp: package[k+'_opt2'] = opt_temp[k] for k in log_val_end: package[k] = log_val_end[k] save_root = opt.save_root+'/val_miou%.4f_Epoch%s.pth'%(package['Miou_validation_ave'],package['epoch']) torch.save(package,save_root) print('Is Best?: ',(package['Miou_validation_ave']>best_value)) if(package['Miou_validation_ave']>best_value): best_value = package['Miou_validation_ave'] save_root = opt.save_root+'/best_model.pth' torch.save(package,save_root) if(epoch - opt.unsave_epoch>=0): wandb.log(log_val_end) else: print('No data upload to wandb. Start upload: Epoch[%d] Current: Epoch[%d]'%(opt.unsave_epoch,epoch)) if(opt.debug == True): pdb.set_trace()
def main(): setSeed(10) opt = opt_global_inti() print('----------------------Load ckpt----------------------') pretrained_model_path = os.path.join(opt.load_pretrain, 'saves/best_model.pth') package = torch.load(pretrained_model_path) para_state_dict = package['state_dict'] opt.num_channel = package['num_channel'] opt.time = package['time'] opt.epoch_ckpt = package['epoch'] try: state_dict = convert_state_dict(para_state_dict) except: para_state_dict = para_state_dict.state_dict() state_dict = convert_state_dict(para_state_dict) # state_dict = para_state_dict ckpt_, ckpt_file_name = opt.load_pretrain.split("/") module_name = ckpt_ + '.' + ckpt_file_name + '.' + 'model' MODEL = importlib.import_module(module_name) opt_deepgcn = [] print(opt.model) if (opt.model == 'deepgcn'): opt_deepgcn = OptInit_deepgcn().initialize() model = MODEL.get_model(opt2=opt_deepgcn, input_channel=opt.num_channel) else: # print('opt.num_channel: ',opt.num_channel) model = MODEL.get_model(input_channel=opt.num_channel, is_synchoization='Instance') Model_Specification = MODEL.get_model_name(input_channel=opt.num_channel) print('----------------------Test Model----------------------') print('Root of prestrain model: ', pretrained_model_path) print('Model: ', opt.model) print('Pretrained model name: ', Model_Specification) print('Trained Date: ', opt.time) print('num_channel: ', opt.num_channel) name = input("Edit the name or press ENTER to skip: ") if (name != ''): opt.model_name = name else: opt.model_name = Model_Specification print('Pretrained model name: ', opt.model_name) package['name'] = opt.model_name try: package["Miou_validation_ave"] = package.pop("Validation_ave_miou") except: pass save_model(package, pretrained_model_path) #pdb.set_trace() #pdb.set_trace() # save_model(package,root,name) # if(model == 'pointnet'): # #add args # model = pointnet.Pointnet_sem_seg(k=2,num_channel=opt.num_channel) # elif(model == 'pointnetpp'): # print() # elif(model == 'deepgcn'): # print() # elif(model == 'dgcnn'): # print() #pdb.set_trace() model.load_state_dict(state_dict) model.cuda() print('----------------------Load Dataset----------------------') print('Root of dataset: ', opt.dataset_root) print('Phase: ', opt.phase) print('debug: ', opt.debug) print('opt.model', opt.model) print(opt.model == 'deepgcn') if (opt.model != 'deepgcn'): test_dataset = BigredDataSet(root=opt.dataset_root, is_train=False, is_validation=False, is_test=True, num_channel=opt.num_channel, test_code=opt.debug, including_ring=opt.including_ring, file_name=opt.file_name) result_sheet = test_dataset.result_sheet file_dict = test_dataset.file_dict tag_Getter = tag_getter(file_dict) testloader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, pin_memory=True, drop_last=True, num_workers=int( opt.num_workers)) else: test_dataset = BigredDataSetPTG( root=opt.dataset_root, is_train=False, is_validation=False, is_test=True, num_channel=opt.num_channel, new_dataset=True, test_code=opt.debug, pre_transform=torch_geometric.transforms.NormalizeScale(), file_name=opt.file_name) result_sheet = test_dataset.result_sheet file_dict = test_dataset.file_dict print(file_dict) tag_Getter = tag_getter(file_dict) testloader = DenseDataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=opt.num_workers) print('num_frame: ', len(test_dataset)) print('batch_size: ', opt.batch_size) print('num_batch: ', int(len(testloader) / opt.batch_size)) print('----------------------Testing----------------------') metrics_list = [ 'Miou', 'Biou', 'Fiou', 'test_loss', 'OA', 'time_complexicity', 'storage_complexicity' ] print(result_sheet) for name in result_sheet: metrics_list.append(name) print(metrics_list) manager = metrics_manager(metrics_list) model.eval() wandb.init(project="Test", name=package['name']) wandb.config.update(opt) prediction_set = [] with torch.no_grad(): for j, data in tqdm(enumerate(testloader), total=len(testloader), smoothing=0.9): #pdb.set_trace() if (opt.model == 'deepgcn'): points = torch.cat((data.pos.transpose( 2, 1).unsqueeze(3), data.x.transpose(2, 1).unsqueeze(3)), 1) points = points[:, :opt.num_channel, :, :].cuda() target = data.y.cuda() else: points, target = data #target.shape [B,N] #points.shape [B,N,C] points, target = points.cuda(), target.cuda() torch.cuda.synchronize() since = int(round(time.time() * 1000)) pred_mics = model(points) torch.cuda.synchronize() #compute inference time complexity time_complexity = int(round(time.time() * 1000)) - since #print(time_complexity) #pred_mics[0] is pred #pred_mics[1] is feat [only pointnet and pointnetpp has it] #compute loss test_loss = 0 #pred.shape [B,N,2] since pred returned pass F.log_softmax pred, target, points = pred_mics[0].cpu(), target.cpu( ), points.cpu() #pred:[B,N,2]->[B,N] # pdb.set_trace() pred = pred.data.max(dim=2)[1] prediction_set.append(pred) #compute confusion matrix cm = confusion_matrix(pred, target, num_classes=2).sum(dim=0) #compute OA overall_correct_site = torch.diag(cm).sum() overall_reference_site = cm.sum() assert overall_reference_site == opt.batch_size * opt.num_points, "Confusion_matrix computing error" oa = float(overall_correct_site / overall_reference_site) #compute iou Biou, Fiou = mean_iou(pred, target, num_classes=2).mean(dim=0) miou = (Biou + Fiou) / 2 #compute inference storage complexsity num_device = torch.cuda.device_count() assert num_device == opt.num_gpu, "opt.num_gpu NOT equals torch.cuda.device_count()" temp = [] for k in range(num_device): temp.append(torch.cuda.memory_allocated(k)) RAM_usagePeak = torch.tensor(temp).float().mean() #writeup logger # metrics_list = ['test_loss','OA','Biou','Fiou','Miou','time_complexicity','storage_complexicity'] manager.update('test_loss', test_loss) manager.update('OA', oa) manager.update('Biou', Biou.item()) manager.update('Fiou', Fiou.item()) manager.update('Miou', miou.item()) manager.update('time_complexicity', time_complexity) manager.update('storage_complexicity', RAM_usagePeak.item()) #get tags,compute the save miou for corresponding class difficulty, location, isSingle, file_name = tag_Getter.get_difficulty_location_isSingle( j) manager.update(file_name, miou.item()) manager.update(difficulty, miou.item()) manager.update(isSingle, miou.item()) prediction_set = np.concatenate(prediction_set, axis=0) point_set, label_set, ermgering_set = test_dataset.getVis(prediction_set) #pdb.set_trace() experiment_dir = Path('visulization_data/' + opt.model) experiment_dir.mkdir(exist_ok=True) root = 'visulization_data/' + opt.model with open(root + '/point_set.npy', 'wb') as f: np.save(f, point_set) with open(root + '/label_set.npy', 'wb') as f: np.save(f, label_set) with open(root + '/ermgering_set.npy', 'wb') as f: np.save(f, ermgering_set) with open(root + '/prediction_set.npy', 'wb') as f: np.save(f, prediction_set) summery_dict = manager.summary() generate_report(summery_dict, package) wandb.log(summery_dict)
def main(): setSeed(10) opt = opt_global_inti() print('----------------------Load ckpt----------------------') pretrained_model_path = os.path.join(opt.load_pretrain, 'best_model.pth') package = torch.load(pretrained_model_path) para_state_dict = package['state_dict'] opt.num_channel = package['num_channel'] opt.time = package['time'] opt.epoch_ckpt = package['epoch'] # opt.val_miou = package['validation_mIoU'] # package.pop('validation_mIoU') # package['Validation_ave_miou'] = opt.val_miou # num_gpu = package['gpuNum'] # package.pop('gpuNum') # package['num_gpu'] = num_gpu # save_model(package,pretrained_model_path) state_dict = convert_state_dict(para_state_dict) ckpt_, ckpt_file_name = opt.load_pretrain.split("/") module_name = ckpt_ + '.' + ckpt_file_name + '.' + 'model' MODEL = importlib.import_module(module_name) # print('opt.num_channel: ',opt.num_channel) model = MODEL.get_model(input_channel=opt.num_channel) Model_Specification = MODEL.get_model_name(input_channel=opt.num_channel) print('----------------------Test Model----------------------') print('Root of prestrain model: ', pretrained_model_path) print('Model: ', opt.model) print('Pretrained model name: ', Model_Specification) print('Trained Date: ', opt.time) print('num_channel: ', opt.num_channel) name = input("Edit the name or press ENTER to skip: ") if (name != ''): opt.model_name = name else: opt.model_name = Model_Specification print('Pretrained model name: ', opt.model_name) package['name'] = opt.model_name save_model(package, pretrained_model_path) # pdb.set_trace() # save_model(package,root,name) # if(model == 'pointnet'): # #add args # model = pointnet.Pointnet_sem_seg(k=2,num_channel=opt.num_channel) # elif(model == 'pointnetpp'): # print() # elif(model == 'deepgcn'): # print() # elif(model == 'dgcnn'): # print() model.load_state_dict(state_dict) model.cuda() print('----------------------Load Dataset----------------------') print('Root of dataset: ', opt.dataset_root) print('Phase: ', opt.phase) print('debug: ', opt.debug) test_dataset = BigredDataSet(root=opt.dataset_root, is_train=False, is_validation=False, is_test=True, num_channel=opt.num_channel, test_code=opt.debug, including_ring=opt.including_ring) result_sheet = test_dataset.result_sheet file_dict = test_dataset.file_dict tag_Getter = tag_getter(file_dict) testloader = torch.utils.data.DataLoader(test_dataset, batch_size=opt.batch_size, shuffle=False, pin_memory=True, drop_last=True, num_workers=int(opt.num_workers)) print('num_frame: ', len(test_dataset)) print('batch_size: ', opt.batch_size) print('num_batch: ', int(len(testloader) / opt.batch_size)) print('----------------------Testing----------------------') metrics_list = [ 'Miou', 'Biou', 'Fiou', 'test_loss', 'OA', 'time_complexicity', 'storage_complexicity' ] for name in result_sheet: metrics_list.append(name) manager = metrics_manager(metrics_list) model.eval() wandb.init(project="Test", name=package['name']) wandb.config.update(opt) points_gt_list = [] points_pd_list = [] points_important_list = [] with torch.no_grad(): for j, data in tqdm(enumerate(testloader), total=len(testloader), smoothing=0.9): points, target = data #target.shape [B,N] #points.shape [B,N,C] points, target = points.cuda(), target.cuda() tic = time.perf_counter() pred_mics = model(points) toc = time.perf_counter() #pred_mics[0] is pred #pred_mics[1] is feat [only pointnet and pointnetpp has it] #compute loss test_loss = 0 #pred.shape [B,N,2] since pred returned pass F.log_softmax pred, target, points = pred_mics[0].cpu(), target.cpu( ), points.cpu() imp_glob = pred_mics[2].cpu() #pred:[B,N,2]->[B,N] # pdb.set_trace() pred = pred.data.max(dim=2)[1] #compute confusion matrix cm = confusion_matrix(pred, target, num_classes=2).sum(dim=0) #compute OA overall_correct_site = torch.diag(cm).sum() overall_reference_site = cm.sum() assert overall_reference_site == opt.batch_size * opt.num_points, "Confusion_matrix computing error" oa = float(overall_correct_site / overall_reference_site) #compute iou Biou, Fiou = mean_iou(pred, target, num_classes=2).mean(dim=0) miou = (Biou + Fiou) / 2 #compute inference time complexity time_complexity = toc - tic #compute inference storage complexsity num_device = torch.cuda.device_count() assert num_device == opt.num_gpu, "opt.num_gpu NOT equals torch.cuda.device_count()" temp = [] for k in range(num_device): temp.append(torch.cuda.memory_allocated(k)) RAM_usagePeak = torch.tensor(temp).float().mean() #writeup logger # metrics_list = ['test_loss','OA','Biou','Fiou','Miou','time_complexicity','storage_complexicity'] manager.update('test_loss', test_loss) manager.update('OA', oa) manager.update('Biou', Biou.item()) manager.update('Fiou', Fiou.item()) manager.update('Miou', miou.item()) manager.update('time_complexicity', float(1 / time_complexity)) manager.update('storage_complexicity', RAM_usagePeak.item()) #get tags,compute the save miou for corresponding class difficulty, location, isSingle, file_name = tag_Getter.get_difficulty_location_isSingle( j) manager.update(file_name, miou.item()) manager.update(difficulty, miou.item()) manager.update(isSingle, miou.item()) dim_num = points.shape[2] points = points.view(-1, dim_num).numpy() pred = pred.view(-1, 1).numpy() target = target.view(-1, 1).numpy() imp_glob = imp_glob.view(-1, ) number_sheet, _, bin_sheet = torch.unique(imp_glob, sorted=True, return_inverse=True, return_counts=True, dim=None) temp_arr = np.zeros(len(target)) temp_arr[number_sheet] = bin_sheet temp_arr = temp_arr.reshape(-1, 1) points_gt = np.concatenate((points[:, [0, 1, 2]], target), axis=1) points_pd = np.concatenate((points[:, [0, 1, 2]], pred), axis=1) points_important = np.concatenate((points[:, [0, 1, 2]], temp_arr), axis=1) if (opt.including_ring): temp_arr2 = np.zeros(len(target)) imp_ring = pred_mics[3].cpu() imp_ring = imp_ring.view(-1, ) number_sheet, _, bin_sheet = torch.unique(imp_ring, sorted=True, return_inverse=True, return_counts=True, dim=None) temp_arr2[number_sheet] = bin_sheet temp_arr2 = temp_arr2.reshape(-1, 1) points_important = np.concatenate( (points_important, temp_arr2), axis=1) points_gt_list.append(points_gt) points_pd_list.append(points_pd) points_important_list.append(points_important) # visualize_wandb(points,pred,target,index_important) # pdb.set_trace() f = h5py.File('resluts.h5', 'w') f.create_dataset('points_gt_list', data=np.array(points_gt_list)) f.create_dataset('points_pd_list', data=np.array(points_pd_list)) f.create_dataset('points_important_list', data=np.array(points_important_list)) f.close() summery_dict = manager.summary() generate_report(summery_dict, package) wandb.log(summery_dict)
def main(): setSeed(10) opt = opt_global_inti() print('----------------------Load ckpt----------------------') pretrained_model_path = os.path.join(opt.load_pretrain, 'best_model.pth') package = torch.load(pretrained_model_path) para_state_dict = package['state_dict'] opt.num_channel = package['num_channel'] opt.time = package['time'] opt.epoch_ckpt = package['epoch'] #pdb.set_trace() state_dict = convert_state_dict(para_state_dict) ckpt_, ckpt_file_name = opt.load_pretrain.split("/") module_name = ckpt_ + '.' + ckpt_file_name + '.' + 'model' MODEL = importlib.import_module(module_name) opt_deepgcn = [] print(opt.model) if (opt.model == 'deepgcn'): opt_deepgcn = OptInit_deepgcn().initialize() model = MODEL.get_model(opt2=opt_deepgcn, input_channel=opt.num_channel) else: # print('opt.num_channel: ',opt.num_channel) model = MODEL.get_model(input_channel=opt.num_channel) Model_Specification = MODEL.get_model_name(input_channel=opt.num_channel) f_loss = MODEL.get_loss(input_channel=opt.num_channel) print('----------------------Test Model----------------------') print('Root of prestrain model: ', pretrained_model_path) print('Model: ', opt.model) print('Pretrained model name: ', Model_Specification) print('Trained Date: ', opt.time) print('num_channel: ', opt.num_channel) name = input("Edit the name or press ENTER to skip: ") if (name != ''): opt.model_name = name else: opt.model_name = Model_Specification print('Pretrained model name: ', opt.model_name) package['name'] = opt.model_name save_model(package, pretrained_model_path) print( '----------------------Configure optimizer and scheduler----------------------' ) experiment_dir = Path('ckpt/') experiment_dir.mkdir(exist_ok=True) experiment_dir = experiment_dir.joinpath(opt.model_name) experiment_dir.mkdir(exist_ok=True) experiment_dir = experiment_dir.joinpath('saves') experiment_dir.mkdir(exist_ok=True) opt.save_root = str(experiment_dir) model.ini_ft() model.frozen_ft() if (opt.apex == True): # model = apex.parallel.convert_syncbn_model(model) model.cuda() f_loss.cuda() optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999)) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1) model, optimizer = amp.initialize(model, optimizer, opt_level="O2") model = torch.nn.DataParallel(model, device_ids=[0, 1]) else: # model = apex.parallel.convert_syncbn_model(model) model = torch.nn.DataParallel(model) model.cuda() f_loss.cuda() # optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999)) # scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1) # optimizer = package['optimizer'] # scheduler = package['scheduler'] # optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999)) # optimizer_dict = package['optimizer'].state_dict() # optimizer.load_state_dict(optimizer_dict) # scheduler = package['scheduler'] optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999)) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.1) print('----------------------Load Dataset----------------------') print('Root of dataset: ', opt.dataset_root) print('Phase: ', opt.phase) print('debug: ', opt.debug) if (opt.model != 'deepgcn'): train_dataset = BigredDataSet_finetune( root=opt.dataset_root, is_train=True, is_validation=False, is_test=False, num_channel=opt.num_channel, test_code=opt.debug, including_ring=opt.including_ring) f_loss.load_weight(train_dataset.labelweights) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=opt.batch_size, shuffle=True, pin_memory=True, drop_last=True, num_workers=int( opt.num_workers)) validation_dataset = BigredDataSet_finetune( root=opt.dataset_root, is_train=False, is_validation=True, is_test=False, num_channel=opt.num_channel, test_code=opt.debug, including_ring=opt.including_ring) validation_loader = torch.utils.data.DataLoader( validation_dataset, batch_size=opt.batch_size, shuffle=False, pin_memory=True, drop_last=True, num_workers=int(opt.num_workers)) else: train_dataset = BigredDataSetPTG( root=opt.dataset_root, is_train=True, is_validation=False, is_test=False, num_channel=opt.num_channel, new_dataset=False, test_code=opt.debug, pre_transform=torch_geometric.transforms.NormalizeScale()) train_loader = DenseDataLoader(train_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers) validation_dataset = BigredDataSetPTG( root=opt.dataset_root, is_train=False, is_validation=True, is_test=False, new_dataset=False, test_code=opt.debug, num_channel=opt.num_channel, pre_transform=torch_geometric.transforms.NormalizeScale()) validation_loader = DenseDataLoader(validation_dataset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers) labelweights = np.zeros(2) labelweights, _ = np.histogram(train_dataset.data.y.numpy(), range(3)) labelweights = labelweights.astype(np.float32) labelweights = labelweights / np.sum(labelweights) labelweights = np.power(np.amax(labelweights) / labelweights, 1 / 3.0) weights = torch.Tensor(labelweights).cuda() f_loss.load_weight(weights) print('train dataset num_frame: ', len(train_dataset)) print('num_batch: ', int(len(train_loader) / opt.batch_size)) print('validation dataset num_frame: ', len(validation_dataset)) print('num_batch: ', int(len(validation_loader) / opt.batch_size)) print('Batch_size: ', opt.batch_size) print('----------------------Prepareing Training----------------------') metrics_list = [ 'Miou', 'Biou', 'Fiou', 'loss', 'OA', 'time_complexicity', 'storage_complexicity' ] manager_test = metrics_manager(metrics_list) metrics_list_train = [ 'Miou', 'Biou', 'Fiou', 'loss', 'storage_complexicity', 'time_complexicity' ] manager_train = metrics_manager(metrics_list_train) wandb.init(project=opt.wd_project, name=opt.model_name, resume=False) if (opt.wandb_history == False): best_value = 0 else: temp = wandb.restore('best_model.pth', run_path=opt.wandb_id) best_value = torch.load(temp.name)['Miou_validation_ave'] best_value = 0 wandb.config.update(opt) if opt.epoch_ckpt == 0: opt.unsave_epoch = 0 else: opt.epoch_ckpt = opt.epoch_ckpt + 1 # pdb.set_trace() for epoch in range(opt.epoch_ckpt, opt.epoch_max): manager_train.reset() model.train() tic_epoch = time.perf_counter() print('---------------------Training----------------------') print("Epoch: ", epoch) for i, data in tqdm(enumerate(train_loader), total=len(train_loader), smoothing=0.9): if (opt.model == 'deepgcn'): points = torch.cat((data.pos.transpose( 2, 1).unsqueeze(3), data.x.transpose(2, 1).unsqueeze(3)), 1) points = points[:, :opt.num_channel, :, :] target = data.y.cuda() else: points, target = data #target.shape [B,N] #points.shape [B,N,C] points, target = points.cuda(non_blocking=True), target.cuda( non_blocking=True) # pdb.set_trace() #training... optimizer.zero_grad() tic = time.perf_counter() pred_mics = model(points) toc = time.perf_counter() #compute loss #For loss #target.shape [B,N] ->[B*N] #pred.shape [B,N,2]->[B*N,2] #pdb.set_trace() #pdb.set_trace() loss = f_loss(pred_mics, target) if (opt.apex): with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() optimizer.step() #pred.shape [B,N,2] since pred returned pass F.log_softmax pred, target = pred_mics[0].cpu(), target.cpu() #pred:[B,N,2]->[B,N] #pdb.set_trace() pred = pred.data.max(dim=2)[1] #compute iou Biou, Fiou = mean_iou(pred, target, num_classes=2).mean(dim=0) miou = (Biou + Fiou) / 2 #compute Training time complexity time_complexity = toc - tic #compute Training storage complexsity num_device = torch.cuda.device_count() assert num_device == opt.num_gpu, "opt.num_gpu NOT equals torch.cuda.device_count()" temp = [] for k in range(num_device): temp.append(torch.cuda.memory_allocated(k)) RAM_usagePeak = torch.tensor(temp).float().mean() #print(loss.item()) #print(miou.item()) #writeup logger manager_train.update('loss', loss.item()) manager_train.update('Biou', Biou.item()) manager_train.update('Fiou', Fiou.item()) manager_train.update('Miou', miou.item()) manager_train.update('time_complexicity', float(1 / time_complexity)) manager_train.update('storage_complexicity', RAM_usagePeak.item()) log_dict = { 'loss_online': loss.item(), 'Biou_online': Biou.item(), 'Fiou_online': Fiou.item(), 'Miou_online': miou.item(), 'time_complexicity_online': float(1 / time_complexity), 'storage_complexicity_online': RAM_usagePeak.item() } if (epoch - opt.unsave_epoch >= 0): wandb.log(log_dict) toc_epoch = time.perf_counter() time_tensor = toc_epoch - tic_epoch summery_dict = manager_train.summary() log_train_end = {} for key in summery_dict: log_train_end[key + '_train_ave'] = summery_dict[key] print(key + '_train_ave: ', summery_dict[key]) log_train_end['Time_PerEpoch'] = time_tensor if (epoch - opt.unsave_epoch >= 0): wandb.log(log_train_end) else: print( 'No data upload to wandb. Start upload: Epoch[%d] Current: Epoch[%d]' % (opt.unsave_epoch, epoch)) scheduler.step() if (epoch % 10 == 1): print('---------------------Validation----------------------') manager_test.reset() model.eval() print("Epoch: ", epoch) with torch.no_grad(): for j, data in tqdm(enumerate(validation_loader), total=len(validation_loader), smoothing=0.9): if (opt.model == 'deepgcn'): points = torch.cat( (data.pos.transpose(2, 1).unsqueeze(3), data.x.transpose(2, 1).unsqueeze(3)), 1) points = points[:, :opt.num_channel, :, :] target = data.y.cuda() else: points, target = data #target.shape [B,N] #points.shape [B,N,C] points, target = points.cuda( non_blocking=True), target.cuda(non_blocking=True) tic = time.perf_counter() pred_mics = model(points) toc = time.perf_counter() #pred.shape [B,N,2] since pred returned pass F.log_softmax pred, target = pred_mics[0].cpu(), target.cpu() #compute loss test_loss = 0 #pred:[B,N,2]->[B,N] pred = pred.data.max(dim=2)[1] #compute confusion matrix cm = confusion_matrix(pred, target, num_classes=2).sum(dim=0) #compute OA overall_correct_site = torch.diag(cm).sum() overall_reference_site = cm.sum() # if(overall_reference_site != opt.batch_size * opt.num_points): #pdb.set_trace() #assert overall_reference_site == opt.batch_size * opt.num_points,"Confusion_matrix computing error" oa = float(overall_correct_site / overall_reference_site) #compute iou Biou, Fiou = mean_iou(pred, target, num_classes=2).mean(dim=0) miou = (Biou + Fiou) / 2 #compute inference time complexity time_complexity = toc - tic #compute inference storage complexsity num_device = torch.cuda.device_count() assert num_device == opt.num_gpu, "opt.num_gpu NOT equals torch.cuda.device_count()" temp = [] for k in range(num_device): temp.append(torch.cuda.memory_allocated(k)) RAM_usagePeak = torch.tensor(temp).float().mean() #writeup logger # metrics_list = ['test_loss','OA','Biou','Fiou','Miou','time_complexicity','storage_complexicity'] manager_test.update('loss', test_loss) manager_test.update('OA', oa) manager_test.update('Biou', Biou.item()) manager_test.update('Fiou', Fiou.item()) manager_test.update('Miou', miou.item()) manager_test.update('time_complexicity', float(1 / time_complexity)) manager_test.update('storage_complexicity', RAM_usagePeak.item()) summery_dict = manager_test.summary() log_val_end = {} for key in summery_dict: log_val_end[key + '_validation_ave'] = summery_dict[key] print(key + '_validation_ave: ', summery_dict[key]) package = dict() package['state_dict'] = model.state_dict() package['scheduler'] = scheduler package['optimizer'] = optimizer package['epoch'] = epoch opt_temp = vars(opt) for k in opt_temp: package[k] = opt_temp[k] if (opt_deepgcn is None): opt_temp = vars(opt_deepgcn) for k in opt_temp: package[k + '_opt2'] = opt_temp[k] for k in log_val_end: package[k] = log_val_end[k] save_root = opt.save_root + '/val_miou%.4f_Epoch%s.pth' % ( package['Miou_validation_ave'], package['epoch']) torch.save(package, save_root) print('Is Best?: ', (package['Miou_validation_ave'] > best_value)) if (package['Miou_validation_ave'] > best_value): best_value = package['Miou_validation_ave'] save_root = opt.save_root + '/best_model.pth' torch.save(package, save_root) if (epoch - opt.unsave_epoch >= 0): wandb.log(log_val_end) else: print( 'No data upload to wandb. Start upload: Epoch[%d] Current: Epoch[%d]' % (opt.unsave_epoch, epoch)) if (opt.debug == True): pdb.set_trace()