def init(): #training session settings global sess global config global class_names global label_values global num_classes global min_width global min_height global resize_resolution global data_size config_proto = tf.ConfigProto() # config_proto.gpu_options.allow_growth = True config_proto.gpu_options.per_process_gpu_memory_fraction=0.8 sess=tf.Session(config=config_proto) #fetch config parameters with open("./config.yaml") as config_file: config=yaml.load(config_file) class_names, label_values = utils.get_label_info() num_classes = len(label_values) # resize_resolution = utils.calculate_optimal_resize_image_resolution() resize_resolution = [384, 512] data_size = config["data"]["data_size"] #clean up checkpoint directory ckpt_model_dirs = os.listdir(config["training_setting"]["checkpoints"]["save_directory"]) for model_dir in ckpt_model_dirs: for ckpt_dir in os.listdir(config["training_setting"]["checkpoints"]["save_directory"]+"/"+model_dir): no_checkpoints = (len(os.listdir(config["training_setting"]["checkpoints"]["save_directory"]+"/"+model_dir+"/"+ckpt_dir)) == 3) or (os.listdir(config["training_setting"]["checkpoints"]["save_directory"]+"/"+model_dir+"/"+ckpt_dir) == []) if no_checkpoints: shutil.rmtree(config["training_setting"]["checkpoints"]["save_directory"]+"/"+model_dir+"/"+ckpt_dir)
def val(args, model, val_img_path, val_label_path, csv_path): print('start val!') dataset_val = ADE(val_img_path, val_label_path, scale=(args.crop_height, args.crop_width), mode='val') dataloader_val = DataLoader( dataset_val, # this has to be 1 batch_size=1, shuffle=True, num_workers=args.num_workers ) label_info = get_label_info(csv_path) with torch.no_grad(): model.eval() precision_record = [] for i, (data, label) in enumerate(dataloader_val): if torch.cuda.is_available() and args.use_gpu: data = data.cuda() label = label.cuda() # get RGB predict image predict = model(data).squeeze() predict = reverse_one_hot(predict) predict = colour_code_segmentation(np.array(predict), label_info) # predict info # get RGB label image label = label.squeeze() label = reverse_one_hot(label) label = colour_code_segmentation(np.array(label), label_info) # compute per pixel accuracy precision = compute_global_accuracy(predict, label) precision_record.append(precision) dice = np.mean(precision_record) print('precision per pixel for validation: %.3f' % dice) return dice
def val(args, model, dataloader, csv_path): print('start val!') label_info = get_label_info(csv_path) with torch.no_grad(): model.eval() precision_record = [] for i, (data, label) in enumerate(dataloader): if torch.cuda.is_available() and args.use_gpu: data = data.cuda() label = label.cuda() # get RGB predict image predict = model(data).squeeze() predict = reverse_one_hot(predict) predict = colour_code_segmentation(np.array(predict), label_info) # get RGB label image label = label.squeeze() label = reverse_one_hot(label) label = colour_code_segmentation(np.array(label), label_info) # compute per pixel accuracy precision = compute_global_accuracy(predict, label) precision_record.append(precision) dice = np.mean(precision_record) print('precision per pixel for validation: %.3f' % dice) return dice
def __init__(self, image_path, label_path, csv_path, scale, loss='dice', mode='train'): super().__init__() self.mode = mode self.image_list = [] if not isinstance(image_path, list): image_path = [image_path] for image_path_ in image_path: self.image_list.extend(glob.glob(os.path.join(image_path_, '*.png'))) self.image_list.sort() self.label_list = [] if not isinstance(label_path, list): label_path = [label_path] for label_path_ in label_path: self.label_list.extend(glob.glob(os.path.join(label_path_, '*.png'))) self.label_list.sort() # self.image_name = [x.split('/')[-1].split('.')[0] for x in self.image_list] # self.label_list = [os.path.join(label_path, x + '_L.png') for x in self.image_list] self.fliplr = iaa.Fliplr(0.5) self.label_info = get_label_info(csv_path) # resize # self.resize_label = transforms.Resize(scale, Image.NEAREST) # self.resize_img = transforms.Resize(scale, Image.BILINEAR) # normalization self.to_tensor = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), ]) # self.crop = transforms.RandomCrop(scale, pad_if_needed=True) self.image_size = scale self.scale = [0.5, 1, 1.25, 1.5, 1.75, 2] self.loss = loss
def predict_on_image(model, args, data, label_file, img_info): # read csv label path label_info = get_label_info(args.csv_path) # pre-processing on image label = Image.open(label_file) label = np.array(label) label = one_hot_it_v11_dice(label, label_info).astype(np.uint8) label = np.transpose(label, [2, 0, 1]).astype(np.float32) label = label.squeeze() label = np.argmax(label, axis=0) image = cv2.imread(data, -1) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) resize = iaa.Scale({'height': args.crop_height, 'width': args.crop_width}) resize_det = resize.to_deterministic() image = resize_det.augment_image(image) image = Image.fromarray(image).convert('RGB') image = transforms.ToTensor()(image) image = transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))(image).unsqueeze(0) # predict model.eval() predict = model(image).squeeze() # 512 * 512 predict = reverse_one_hot(predict) predict_ = colour_code_segmentation(np.array(predict), label_info) predict_ = cv2.resize(np.uint8(predict_), (512, 512)) cv2.imwrite('res/pred_' + 'img_info' + '.png', cv2.cvtColor(np.uint8(predict_), cv2.COLOR_RGB2BGR)) diff = plot_diff(np.array(predict), label) cv2.imwrite('res/diff_' + 'img_info' + '.png', cv2.cvtColor(np.uint8(diff), cv2.COLOR_RGB2BGR))
def __init__(self, images_path, labels_path, info_path, csv_path, scale, loss='dice'): """ Args: images_path (string): path to images folder labels_path (string): path to labels folder info_path (string): path to info json file csv_path (string): path to CamVid csv file scale (int, int): desired shape of the image loss (string): type of loss to use - either 'dice' or 'crossentropy' """ super().__init__() self.images = [] self.labels = [] self.dataset_info = get_Idda_info(info_path) self.shape = scale self.scale = [0.5, 1, 1.25, 1.5, 1.75, 2] #loading dictionary for labels translation self.toCamVidDict = { 0: [0, 128, 192], 1: [128, 0, 0], 2: [64, 0, 128], 3: [192, 192, 128], 4: [64, 64, 128], 5: [64, 64, 0], 6: [128, 64, 128], 7: [0, 0, 192], 8: [192, 128, 128], 9: [128, 128, 128], 10: [128, 128, 0], 255: [0, 0, 0] } self.label_info = get_label_info(csv_path) #creating lists of images and labels self.images.extend(glob.glob(os.path.join(images_path, '*.jpg'))) self.images.sort() self.labels.extend(glob.glob(os.path.join(labels_path, '*.png'))) self.labels.sort() self.loss = loss #transformations pipeline to transform image to tensor self.to_tensor = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), ])
def __init__(self, image_path, depth_path, label_path, csv_path, scale, mode='train'): super().__init__() self.mode = mode self.image_list = glob.glob(os.path.join(image_path, '*.jpg')) self.image_name = [x.split('/')[-1].split('.')[0] for x in self.image_list] self.depth_list = [os.path.join(depth_path, x + '.png') for x in self.image_name] self.label_list = [os.path.join(label_path, x + '.png') for x in self.image_name] self.fliplr = iaa.Fliplr(0.5) self.label_info = get_label_info(csv_path) # resize self.resize_img = transforms.Resize(scale, Image.BILINEAR) self.resize_depth = transforms.Resize(scale, Image.NEAREST) self.resize_label = transforms.Resize(scale, Image.NEAREST) # normalization self.to_tensor = transforms.ToTensor()
def val(args, model, dataloader, csv_path): print('start val!!') label_info = get_label_info(csv_path) with torch.no_grad(): model.eval() precision_record = [] hist = np.zeros((args.num_classes, args.num_classes)) for i, (data, label) in enumerate(dataloader): if torch.cuda.is_available() and args.use_gpu: data = data.cuda() label = label.cuda() # get RGB predict image predict = model(data).squeeze() predict = reverse_one_hot(predict) predict = np.array(predict.cpu()) # get RGB label image label = label.squeeze() if args.loss == 'dice': label = reverse_one_hot(label) label = np.array(label.cpu()) # compute per pixel accuracy precision = compute_global_accuracy(predict, label) hist += fast_hist(label.flatten(), predict.flatten(), args.num_classes) # there is no need to transform the one-hot array to visual RGB array # predict = colour_code_segmentation(np.array(predict), label_info) # label = colour_code_segmentation(np.array(label), label_info) precision_record.append(precision) precision = np.mean(precision_record) miou = np.mean(per_class_iu(hist)) miou_list = per_class_iu(hist)[:-1] miou_dict, miou = cal_miou(miou_list, csv_path) miou = np.mean(miou_list) print('precision per pixel for test: %.3f' % precision) print('mIoU for validation: %.3f' % miou) miou_str = '' for key in miou_dict: miou_str += '{}:{},\n'.format(key, miou_dict[key]) print('mIoU for each class:') print(miou_str) return precision, miou
def __init__(self, model_path, csv_path): # retrieve label info self.label_info = get_label_info(csv_path) # build model and load weight self.model = BiSeNet(12, 'resnet18') self.device = torch.device( 'cuda:0' if torch.cuda.is_available() else 'cpu') self.model.load_state_dict(torch.load(model_path)) self.model.to(self.device).eval() self.transform = transforms.Compose([ transforms.ToPILImage(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) # retrieve person color self.person_color = self.label_info['Pedestrian'][:-1]
def main(params): # basic parameters parser = argparse.ArgumentParser() parser.add_argument('--checkpoint_path', type=str, default=None, required=True, help='The path to the pretrained weights of model') parser.add_argument('--crop_height', type=int, default=640, help='Height of cropped/resized input image to network') parser.add_argument('--crop_width', type=int, default=640, help='Width of cropped/resized input image to network') parser.add_argument('--data', type=str, default='/path/to/data', help='Path of training data') parser.add_argument('--batch_size', type=int, default=1, help='Number of images in each batch') parser.add_argument('--context_path', type=str, default="resnet101", help='The context path model you are using.') parser.add_argument('--cuda', type=str, default='0', help='GPU ids used for training') parser.add_argument('--use_gpu', type=bool, default=True, help='Whether to user gpu for training') parser.add_argument('--num_classes', type=int, default=32, help='num of object classes (with void)') args = parser.parse_args(params) # create dataset and dataloader test_path = os.path.join(args.data, 'test') # test_path = os.path.join(args.data, 'train') test_label_path = os.path.join(args.data, 'test_labels') # test_label_path = os.path.join(args.data, 'train_labels') csv_path = os.path.join(args.data, 'class_dict.csv') dataset = CamVid(test_path, test_label_path, csv_path, scale=(args.crop_height, args.crop_width), mode='test') dataloader = DataLoader( dataset, batch_size=1, shuffle=True, num_workers=4, ) # build model os.environ['CUDA_VISIBLE_DEVICES'] = args.cuda model = BiSeNet(args.num_classes, args.context_path) if torch.cuda.is_available() and args.use_gpu: model = torch.nn.DataParallel(model).cuda() # load pretrained model if exists print('load model from %s ...' % args.checkpoint_path) model.module.load_state_dict(torch.load(args.checkpoint_path)) print('Done!') # get label info label_info = get_label_info(csv_path) # test eval(model, dataloader, args, label_info)
def predict_on_image(model, args): # pre-processing on image image = cv2.imread(args.data, -1) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) resize = iaa.Scale({'height': args.crop_height, 'width': args.crop_width}) resize_det = resize.to_deterministic() image = resize_det.augment_image(image) image = Image.fromarray(image).convert('RGB') image = transforms.ToTensor()(image).unsqueeze(0) # read csv label path label_info = get_label_info(args.csv_path) # predict model.eval() predict = model(image).squeeze() predict = reverse_one_hot(predict) predict = colour_code_segmentation(np.array(predict), label_info) predict = cv2.resize(np.uint8(predict), (960, 720)) cv2.imwrite(args.save_path, cv2.cvtColor(np.uint8(predict), cv2.COLOR_RGB2BGR))
def predict_on_image(model, args, image): ''' run inference and return the resultant image ''' # pre-processing on image image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) resize = iaa.Scale({'height': args.crop_height, 'width': args.crop_width}) resize_det = resize.to_deterministic() image = resize_det.augment_image(image) image = Image.fromarray(image).convert('RGB') image = transforms.ToTensor()(image) image = transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))(image).unsqueeze(0) # read csv label path label_info = get_label_info(args.csv_path) # predict model.eval() predict = model(image).squeeze() predict = reverse_one_hot(predict) # predict = colour_code_segmentation(np.array(predict), label_info) predict = colour_code_segmentation(np.array(predict.cpu()), label_info) predict = cv2.resize(np.uint8(predict), (960, 720)) # cv2.imwrite(args.save_path, cv2.cvtColor(np.uint8(predict), cv2.COLOR_RGB2BGR)) return predict
parser = argparse.ArgumentParser() parser.add_argument('--checkpoint_path', type=str, default='./checkpoints/epoch_90.pth', help='The path to the pretrained weights of model') parser.add_argument('--context_path', type=str, default="Xception", help='The context path model you are using.') parser.add_argument('--num_classes', type=int, default=151, help='num of object classes (with void)') parser.add_argument('--crop_height', type=int, default=480, help='Height of cropped/resized input image to network') parser.add_argument('--crop_width', type=int, default=640, help='Width of cropped/resized input image to network') parser.add_argument('--cuda', type=str, default='1', help='GPU ids used for training') parser.add_argument('--use_gpu', type=bool, default=True, help='Whether to user gpu for training') parser.add_argument('--csv_path', type=str, default='/home/disk1/xs/ADEChallengeData2016/ade150.csv', help='Path to label info csv file') args = parser.parse_args() # read csv label path label_info = get_label_info(args.csv_path) scale = (args.crop_height, args.crop_width) # build model os.environ['CUDA_VISIBLE_DEVICES'] = args.cuda model = BiSeNet(args.num_classes, args.context_path) # load pretrained model if exists print('load model from %s ...' % args.checkpoint_path) if torch.cuda.is_available() and args.use_gpu: model = torch.nn.DataParallel(model).cuda() model.module.load_state_dict(torch.load(args.checkpoint_path)) # GPU -> GPU else: model.load_state_dict(torch.load(args.checkpoint_path, map_location=lambda storage, loc: storage)) # GPU -> CPU
label = torch.from_numpy(label) return img, label elif self.loss == 'crossentropy': label = one_hot_it_v11(label, self.label_info).astype(np.uint8) # label = label.astype(np.float32) label = torch.from_numpy(label).long() return img, label def __len__(self): return len(self.image_list) if __name__ == '__main__': # data = CamVid('/path/to/CamVid/train', '/path/to/CamVid/train_labels', '/path/to/CamVid/class_dict.csv', (640, 640)) data = CamVid( ['/data/sqy/CamVid/train', '/data/sqy/CamVid/val'], ['/data/sqy/CamVid/train_labels', '/data/sqy/CamVid/val_labels'], '/data/sqy/CamVid/class_dict.csv', (720, 960), loss='crossentropy', mode='val') from model.build_BiSeNet import BiSeNet from utils import reverse_one_hot, get_label_info, colour_code_segmentation, compute_global_accuracy label_info = get_label_info('/data/sqy/CamVid/class_dict.csv') for i, (img, label) in enumerate(data): print(label.size()) print(torch.max(label))
label = seq_det.augment_image(label) # resize image and label # resize_det = self.resize.to_deterministic() # img = resize_det.augment_image(img) # label = resize_det.augment_image(label) # image -> [C, H, W] img = Image.fromarray(img).convert('RGB') img = self.to_tensor(img).float() # label -> [num_classes, H, W] label = np.transpose(label, [2, 0, 1]).astype(np.float32) label = torch.from_numpy(label) return img, label def __len__(self): return len(self.image_list) if __name__ == '__main__': data = CamVid('/path/to/CamVid/train', '/path/to/CamVid/train_labels', '/path/to/CamVid/class_dict.csv', (640, 640)) from model.build_BiSeNet import BiSeNet from utils import reverse_one_hot, get_label_info, colour_code_segmentation, compute_global_accuracy label_info = get_label_info('/path/to/CamVid/class_dict.csv') for i, (img, label) in enumerate(data): print(img.shape)
# image -> to_tensor [3, H, W] img = Image.fromarray(img).convert('RGB') img = self.to_tensor(img).float() # depth -> to_tensor [1, H, W] depth = depth / 65535 depth = self.to_tensor(depth).float() # image + depth = RGBD rgbd = torch.cat((img, depth), 0) # label -> [num_classes, H, W] label = np.transpose(label, [2, 0, 1]).astype(np.float32) label = torch.from_numpy(label) return rgbd, label def __len__(self): return len(self.image_list) if __name__ == '__main__': data = SUN('/temp_disk/xs/sun/train/image', '/temp_disk/xs/sun/train/label_img', '/temp_disk/xs/sun/seg37_class_dict.csv', (480, 640)) from utils import reverse_one_hot, get_label_info, colour_code_segmentation, compute_global_accuracy label_info = get_label_info('/temp_disk/xs/sun/seg37_class_dict.csv') for i, (img, label) in enumerate(data): print(img.shape) print(label.shape) print()
# resize image and label # resize_det = self.resize.to_deterministic() # img = resize_det.augment_image(img) # label = resize_det.augment_image(label) # image -> [C, H, W] img = Image.fromarray(img).convert('RGB') img = self.to_tensor(img).float() # label -> [num_classes, H, W] label = np.transpose(label, [2, 0, 1]).astype(np.float32) label = torch.from_numpy(label) return img, label def __len__(self): return len(self.image_list) if __name__ == '__main__': data = CamVid('/temp_disk/xs/CamVid/train', '/temp_disk/xs/CamVid/train_labels', '/temp_disk/xs/CamVid/class_dict.csv', (640, 640)) from utils import reverse_one_hot, get_label_info, colour_code_segmentation, compute_global_accuracy label_info = get_label_info('/temp_disk/xs/CamVid/class_dict.csv') for i, (img, label) in enumerate(data): print(img.shape) print(label.shape) print()
label = torch.from_numpy(label) return img, label elif self.loss == 'crossentropy': label = one_hot_it_v11(label, self.label_info).astype(np.uint8) # label = label.astype(np.float32) label = torch.from_numpy(label).long() return img, label def __len__(self): return len(self.image_list) if __name__ == '__main__': # data = potsdam('/path/to/potsdam/train', '/path/to/potsdam/train_labels', '/path/to/potsdam/class_dict.csv', (640, 640)) data = potsdam( ['/data/sqy/potsdam/train', '/data/sqy/potsdam/val'], ['/data/sqy/potsdam/train_labels', '/data/sqy/potsdam/val_labels'], '/data/sqy/potsdam/class_dict.csv', (720, 960), loss='crossentropy', mode='val') from model.build_BiSeNet import BiSeNet from utils import reverse_one_hot, get_label_info, colour_code_segmentation, compute_global_accuracy label_info = get_label_info('/data/sqy/potsdam/class_dict.csv') for i, (img, label) in enumerate(data): print(label.size()) print(torch.max(label))