def run_epoch(self, epoch, iteration_loss=False): """Runs an epoch of validation. Keyword arguments: - iteration_loss (``bool``, optional): Prints loss at every step. Returns: - The epoch loss (float), and the values of the specified metrics """ self.model.eval() epoch_loss = 0.0 self.metric.reset() for step, batch_data in enumerate(self.data_loader): if step > 0: break # Get the inputs and labels inputs = batch_data[0].to(self.device) labels = batch_data[1].to(self.device) with torch.no_grad(): # Forward propagation outputs = self.model(inputs) _, preds = torch.max(outputs.data, 1) # Loss computation loss = self.criterion(outputs, labels) # Keep track of loss for current epoch epoch_loss += loss.item() # Keep track of evaluation the metric self.metric.add(outputs.detach(), labels.detach()) if iteration_loss: print("[Step: %d] Iteration loss: %.4f" % (step, loss.item())) if epoch == 59 or epoch == 99 or epoch == 159 or epoch == 199: if step % 500 == 0: print('Visualization of Val:') label_to_rgb = transforms.Compose([ ext_transforms.LongTensorToRGBPIL(self.color_encoding), transforms.ToTensor() ]) color_labels = utils.batch_transform( labels.cpu(), label_to_rgb) color_outputs = utils.batch_transform( preds.cpu(), label_to_rgb) utils.imshow_batch(color_outputs, color_labels) return epoch_loss / len(self.data_loader), self.metric.value()
def displaybatch(loader, tag): """ helper function that displays 10 random images from the loader, their predicted segmentations under the victim model, their transformations under the malicious autoencoder, and the predicted segmentations of their transformations It saves these pictures to the results directory. """ for i in range(10): images, labels = next(iter(loader)) print("Image size:", images.size()) print("Label size:", labels.size()) print("Class-color encoding:", class_encoding) label_to_rgb = transforms.Compose([ ext_transforms.LongTensorToRGBPIL(class_encoding), transforms.ToTensor() ]) color_labels = utils.batch_transform(labels, label_to_rgb) imagescuda = images.to(device) vlabels = model.bboxmodel(imagescuda).to('cpu') vlabels = out2segs(vlabels) vlabels = utils.batch_transform(vlabels, label_to_rgb) print("Original...") img = images[i].numpy().transpose((1, 2, 0)) v = vlabels[i].numpy().transpose((1, 2, 0)) plt.imsave("orig" + tag + str(i) + ".png", img) plt.imsave("origseg" + tag + str(i) + ".png", v) newimages = model.gettransformeddata(imagescuda) _, psegs = model(newimages) newimages = newimages.to('cpu') psegs = out2segs(psegs.to('cpu')) psegs = utils.batch_transform(psegs, label_to_rgb) img = newimages[i].numpy().transpose((1, 2, 0)) v = psegs[i].numpy().transpose((1, 2, 0)) plt.imsave("results/trans" + tag + str(i) + ".png", img) plt.imsave("results/transseg" + tag + str(i) + ".png", v)
def process_predict(predictions, class_encoding): # Predictions is one-hot encoded with "num_classes" channels. # Convert it to a single int using the indices where the maximum (1) occurs _, predictions = torch.max(predictions.data, 1) label_to_rgb = transforms.Compose([ ext_transforms.LongTensorToRGBPIL(class_encoding), transforms.ToTensor() ]) color_predictions = utils.batch_transform(predictions.cpu(), label_to_rgb) return color_predictions
def forward(self, x, y): x_f, y_f = self.encoder(x), self.encoder(y) concat = torch.cat((x_f, y_f), dim=1) out = self.decoder(concat) batch_t, batch_quat = out[:, :3], out[:, 3:] / torch.norm( out[:, 3:], dim=1, keepdim=True) batch_R = batch_quat2mat(batch_quat) transformed_x = batch_transform( x.permute(0, 2, 1).contiguous(), batch_R, batch_t) return batch_R, batch_t, transformed_x
def predict(model, images, gt_labels, class_encoding): images = images.to(device) # Make predictions! model.eval() with torch.no_grad(): predictions = model(images)['out'] # Predictions is one-hot encoded with "num_classes" channels. # Convert it to a single int using the indices where the maximum (1) occurs _, predictions = torch.max(predictions.data, 1) label_to_rgb = transforms.Compose([ ext_transforms.LongTensorToRGBPIL(class_encoding), transforms.ToTensor() ]) color_predictions = utils.batch_transform(predictions.cpu(), label_to_rgb) color_gt_labels = utils.batch_transform(gt_labels.cpu(), label_to_rgb) # utils.imshow_batch(images.data.cpu(), color_predictions) utils.imshow_batch_2(images.data.cpu(), color_predictions, color_gt_labels)
def forward(self, x, y): x_f, y_f = self.encoder(x), self.encoder(y) concat = torch.cat((x_f, y_f), dim=1) out = self.decoder(concat) batch_t, batch_quat = out[:, :3], out[:, 3:] / torch.norm( out[:, 3:], dim=1, keepdim=True) batch_R = batch_quat2mat(batch_quat) if self.in_dim1 == 3: transformed_x = batch_transform( x.permute(0, 2, 1).contiguous(), batch_R, batch_t) elif self.in_dim1 == 6: transformed_pts = batch_transform( x.permute(0, 2, 1)[:, :, :3].contiguous(), batch_R, batch_t) transformed_nls = batch_transform( x.permute(0, 2, 1)[:, :, 3:].contiguous(), batch_R) transformed_x = torch.cat([transformed_pts, transformed_nls], dim=-1) else: raise ValueError return batch_R, batch_t, transformed_x
def in_training_visualization(model, images, labels, class_encoding, writer, epoch, data): images = images.to(device) labels = labels.to(device) # Make predictions! model.eval() with torch.no_grad(): predictions = model(images) # Predictions is one-hot encoded with "num_classes" channels. # Convert it to a single int using the indices where the maximum (1) occurs _, predictions = torch.max(predictions.data, 1) label_to_rgb = transforms.Compose([ ext_transforms.LongTensorToRGBPIL(class_encoding), transforms.ToTensor() ]) color_predictions = utils.batch_transform(predictions.cpu(), label_to_rgb) color_train = utils.batch_transform(labels.data.cpu(), label_to_rgb) utils.write_summary_batch(images.data.cpu(), color_train, color_predictions, writer, epoch, data)
def print_results(self, loss, miou, class_encoding, iou, aurocs, auprs, fprs, args): # Print Loss and IoU print(">>>> Avg. loss: {:.4f}".format(loss)) print(">>>> Avg. IoU : {:.4f}".format(miou)) print(4 * " " + 22 * "-") print(4 * " " + "|{0:11}| {1:6} |".format("classes", "IoU")) print(4 * " " + 22 * "-") for key, class_iou in zip(class_encoding.keys(), iou): print(4 * " " + "|{0:11}| {1:.4f} |".format(key, class_iou)) print(4 * " " + 22 * "-") # print AUROC, AUPR and FPR print("\n>>>> | mean(AUROC) | mean(AUPR) | mean(FPR) |") print(10 * " " + 40 * "-") print("max_logit | {0:.4f} | {1:.4f} | {2:.4f} |".format( np.mean(aurocs['max_logit']), np.mean(auprs['max_logit']), np.mean(fprs['max_logit']))) print("msp | {0:.4f} | {1:.4f} | {2:.4f} |".format( np.mean(aurocs['msp']), np.mean(auprs['msp']), np.mean(fprs['msp']))) print("backg | {0:.4f} | {1:.4f} | {2:.4f} |".format( np.mean(aurocs['backg']), np.mean(auprs['backg']), np.mean(fprs['backg']))) print(10 * " " + 40 * "-") # Display a batch of samples and labels if args.imshow_batch: print("\nA batch of predictions from the test set...") images, _ = iter(self.data_loader).next() images = images.to(args.device) # Make predictions self.model.eval() with torch.no_grad(): predictions = self.model(images) max_logit, max_index = torch.max(predictions.data, 1) msp, _ = torch.max(nn.functional.softmax(predictions.data, dim=1), dim=1) backg = predictions.data[:, 11] label_to_rgb = transforms.Compose([ ext_transforms.LongTensorToRGBPIL(class_encoding), transforms.ToTensor() ]) color_predictions = utils.batch_transform(max_index.cpu(), label_to_rgb) utils.imshow_metrics(images.data.cpu(), color_predictions, msp, max_logit, backg, args.batch_size)
def predict(model, images, class_encoding): images = Variable(images) if use_cuda: images = images.cuda() # Make predictions! predictions = model(images) #Predictions用"num_classes" channels进行one-hot encoded,使用maximum (1)的索引将其转换为单个int _, predictions = torch.max(predictions.data, 1) # max返回在通道维度的最大值的索引,也就是一维 label_to_rgb = transforms.Compose([ # 把label转为RBG的图片,方便显示 ext_transforms.LongTensorToRGBPIL(class_encoding), transforms.ToTensor() ]) color_predictions = utils.batch_transform(predictions.cpu(), label_to_rgb) utils.imshow_batch(images.data.cpu(), color_predictions)
def predict(model, images, class_encoding): images = Variable(images) if use_cuda: images = images.cuda() # Make predictions! predictions = model(images) # Predictions is one-hot encoded with "num_classes" channels. # Convert it to a single int using the indices where the maximum (1) occurs _, predictions = torch.max(predictions.data, 1) label_to_rgb = transforms.Compose([ ext_transforms.LongTensorToRGBPIL(class_encoding), transforms.ToTensor() ]) color_predictions = utils.batch_transform(predictions.cpu(), label_to_rgb) utils.imshow_batch(images.data.cpu(), color_predictions)
def single(): print('Mode: Single') img = Image.open('test_content/example_01.png').convert('RGB') class_encoding = color_encoding = OrderedDict([ ('unlabeled', (0, 0, 0)), ('road', (128, 64, 128)), ('sidewalk', (244, 35, 232)), ('building', (70, 70, 70)), ('wall', (102, 102, 156)), ('fence', (190, 153, 153)), ('pole', (153, 153, 153)), ('traffic_light', (250, 170, 30)), ('traffic_sign', (220, 220, 0)), ('vegetation', (107, 142, 35)), ('terrain', (152, 251, 152)), ('sky', (70, 130, 180)), ('person', (220, 20, 60)), ('rider', (255, 0, 0)), ('car', (0, 0, 142)), ('truck', (0, 0, 70)), ('bus', (0, 60, 100)), ('train', (0, 80, 100)), ('motorcycle', (0, 0, 230)), ('bicycle', (119, 11, 32)) ]) num_classes = len(class_encoding) model = ERFNet(num_classes) model_path = os.path.join(args.save_dir, args.name) print('Loading model at:', model_path) checkpoint = torch.load(model_path) # model = ENet(num_classes) model = model.cuda() model.load_state_dict(checkpoint['state_dict']) img = img.resize((args.width, args.height), Image.BILINEAR) start = time.time() images = transforms.ToTensor()(img) torch.reshape(images, (1, 3, args.width, args.height)) images = images.unsqueeze(0) with torch.no_grad(): images = images.cuda() predictions = model(images) end = time.time() print('model speed:', int(1 / (end - start)), "FPS") _, predictions = torch.max(predictions.data, 1) label_to_rgb = transforms.Compose( [utils.LongTensorToRGBPIL(class_encoding), transforms.ToTensor()]) color_predictions = utils.batch_transform(predictions.cpu(), label_to_rgb) end = time.time() print('model+transform:', int(1 / (end - start)), "FPS") utils.imshow_batch(images.data.cpu(), color_predictions)
def predict(model, images, class_encoding): StartTime = time.time() images = images.to(device) # Make predictions! model.eval() with torch.no_grad(): predictions = model(images) # Predictions is one-hot encoded with "num_classes" channels. # Convert it to a single int using the indices where the maximum (1) occurs _, predictions1 = torch.max(predictions.data, 1) label_to_rgb = transforms.Compose([ ext_transforms.LongTensorToRGBPIL(class_encoding), transforms.ToTensor() ]) color_predictions = utils.batch_transform(predictions1.cpu(), label_to_rgb) utils.imshow_batch(images.data.cpu(), color_predictions) EndTime = time.time() RunTime = EndTime - StartTime print("For each figure, the running time is %.4f s." % (RunTime)) if args.generate_images is True: cur_rgb = image cur_output = torch.clone(predictions) _, cur_output = cur_output.max(0) cur_output = cur_output.detach().cpu().numpy() pred_label_image = create_label_image(cur_output, self.color_palette) rgb_image = image height = cur_output.shape[0] width = cur_output.shape[1] composite_image = np.zeros((2 * height, width, 3), dtype=np.uint8) composite_image[0:height, :, :] = rgb_image composite_image[height:2 * height, :, :] = pred_label_image imageio.imwrite(os.path.join(self.generate_image_dir, str(fileName)+'.png'), \ composite_image)
def load_dataset(dataset): print("\n加载数据...\n") print("选择的数据:", args.dataset) print("Dataset 目录:", args.dataset_dir) print("存储目录:", args.save_dir) # 数据转换和标准化 image_transform = transforms.Compose( [transforms.Resize((args.height, args.width)), transforms.ToTensor()]) # 转化:PILToLongTensor,因为是label,所以不能进行标准化 label_transform = transforms.Compose([ transforms.Resize((args.height, args.width)), ext_transforms.PILToLongTensor() # (H x W x C) 转到 (C x H x W ) ]) # 获取选定的数据集 # 加载数据集作为一个tensors train_set = dataset(args.dataset_dir, transform=image_transform, label_transform=label_transform) train_loader = data.DataLoader(train_set, batch_size=args.batch_size, shuffle=True, num_workers=args.workers) # 加载验证集作为一个tensors val_set = dataset(args.dataset_dir, mode='val', transform=image_transform, label_transform=label_transform) val_loader = data.DataLoader(val_set, batch_size=3, shuffle=True, num_workers=args.workers) # 加载测试集作为一个tensors test_set = dataset(args.dataset_dir, mode='test', transform=image_transform, label_transform=label_transform) test_loader = data.DataLoader(test_set, batch_size=3, shuffle=True, num_workers=args.workers) # 获取标签图像和RGB颜色中的像素值之间的编码 class_encoding = train_set.color_encoding # 获取需要预测的类别的数量 num_classes = len(class_encoding) # 打印调试的信息 print("Number of classes to predict:", num_classes) print("Train dataset size:", len(train_set)) print("Validation dataset size:", len(val_set)) # 展示一个batch的样本 if args.mode.lower() == 'test': images, labels = iter(test_loader).next() else: images, labels = iter(train_loader).next() print("Image size:", images.size()) print("Label size:", labels.size()) print("Class-color encoding:", class_encoding) # 展示一个batch的samples和labels if args.imshow_batch: print("Close the figure window to continue...") label_to_rgb = transforms.Compose([ ext_transforms.LongTensorToRGBPIL(class_encoding), transforms.ToTensor() ]) color_labels = utils.batch_transform(labels, label_to_rgb) utils.imshow_batch(images, color_labels) # 获取类别的权重 print("\nWeighing technique:", args.weighing) print("Computing class weights...") print("(this can take a while depending on the dataset size)") class_weights = 0 if args.weighing.lower() == 'enet': # 传回的class_weights是一个list class_weights = np.array([ 1.44752114, 33.41317956, 43.89576605, 47.85765692, 48.3393951, 47.18958997, 40.2809274, 46.61960781, 48.28854284 ]) # class_weights = enet_weighing(train_loader, num_classes) else: class_weights = None if class_weights is not None: class_weights = torch.Tensor(class_weights) # 把没有标记的类别设置为0 # if args.ignore_unlabeled: # ignore_index = list(class_encoding).index('unlabeled') # class_weights[ignore_index] = 0 print("Class weights:", class_weights) return (train_loader, val_loader, test_loader), class_weights, class_encoding
def video(): print('testing from video') cameraWidth = 1920 cameraHeight = 1080 cameraMatrix = np.matrix([[1.3878727764994030e+03, 0, cameraWidth / 2], [0, 1.7987055172413220e+03, cameraHeight / 2], [0, 0, 1]]) distCoeffs = np.matrix([ -5.8881725390917083e-01, 5.8472404395779809e-01, -2.8299599929891900e-01, 0 ]) vidcap = cv2.VideoCapture('test_content/massachusetts.mp4') success = True i = 0 while success: success, img = vidcap.read() if i % 1000 == 0: print("frame: ", i) if args.rmdistort: P = cv2.fisheye.estimateNewCameraMatrixForUndistortRectify( cameraMatrix, distCoeffs, (cameraWidth, cameraHeight), None) map1, map2 = cv2.fisheye.initUndistortRectifyMap( cameraMatrix, distCoeffs, np.eye(3), P, (1920, 1080), cv2.CV_16SC2) img = cv2.remap(img, map1, map2, cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = Image.fromarray(img) # img = img.convert('RGB') # cv2.imshow('',img) # cv2.waitKey(0) # img2 = Image.open(filename).convert('RGB') class_encoding = color_encoding = OrderedDict([ ('unlabeled', (0, 0, 0)), ('road', (128, 64, 128)), ('sidewalk', (244, 35, 232)), ('building', (70, 70, 70)), ('wall', (102, 102, 156)), ('fence', (190, 153, 153)), ('pole', (153, 153, 153)), ('traffic_light', (250, 170, 30)), ('traffic_sign', (220, 220, 0)), ('vegetation', (107, 142, 35)), ('terrain', (152, 251, 152)), ('sky', (70, 130, 180)), ('person', (220, 20, 60)), ('rider', (255, 0, 0)), ('car', (0, 0, 142)), ('truck', (0, 0, 70)), ('bus', (0, 60, 100)), ('train', (0, 80, 100)), ('motorcycle', (0, 0, 230)), ('bicycle', (119, 11, 32)) ]) num_classes = len(class_encoding) model_path = os.path.join(args.save_dir, args.name) checkpoint = torch.load(model_path) model = ERFNet(num_classes) model = model.cuda() model.load_state_dict(checkpoint['state_dict']) img = img.resize((args.width, args.height), Image.BILINEAR) start = time.time() images = transforms.ToTensor()(img) torch.reshape(images, (1, 3, args.width, args.height)) images = images.unsqueeze(0) with torch.no_grad(): images = images.cuda() predictions = model(images) end = time.time() print('model speed:', int(1 / (end - start)), "FPS") _, predictions = torch.max(predictions.data, 1) label_to_rgb = transforms.Compose([ utils.LongTensorToRGBPIL(class_encoding), transforms.ToTensor() ]) color_predictions = utils.batch_transform( predictions.cpu(), label_to_rgb) end = time.time() print('model+transform:', int(1 / (end - start)), "FPS") utils.imshow_batch(images.data.cpu(), color_predictions) i += 1
def predict(model, images, class_encoding): images = images.to(device) # Make predictions! model.eval() with torch.no_grad(): predictions = model(images) # Predictions is one-hot encoded with "num_classes" channels. # Convert it to a single int using the indices where the maximum (1) occurs _, predictions = torch.max(predictions.data, 1) print(predictions.size()) print(images.size()) # print (image_pil.size) label_to_rgb = transforms.Compose([ ext_transforms.LongTensorToRGBPIL(class_encoding), transforms.ToTensor() ]) color_predictions = utils.batch_transform(predictions.cpu(), label_to_rgb) # color_predictions.convert('1') label_to_pil = transforms.Compose( [ext_transforms.LongTensorToRGBPIL(class_encoding)]) color_predictions_pil = [ label_to_pil(tensor) for tensor in torch.unbind(predictions.cpu()) ] color_images_pil = [ transforms.ToPILImage()(tensor) for tensor in torch.unbind(images.cpu()) ] import numpy as np # print (len(color_predictions_pil)) count = 0 print(type(images)) for (pil, img) in zip(color_predictions_pil, color_images_pil): # pil = pil.convert('L') # pil = pil.convert('1') # pil = pil.convert('L') # pil = pil.filter(ImageFilter.GaussianBlur) pil_cv = ext_transforms.RGBPILToCvMat(pil) kernel = np.ones((11, 11), np.uint8) # pil_open = cv2.morphologyEx(pil_cv, cv2.MORPH_OPEN, kernel) _, pil_open = cv2.threshold(pil_cv, 200, 255, cv2.THRESH_BINARY) pil_open = cv2.blur(pil_open, (11, 11)) img_cv = ext_transforms.RGBPILToCvMat(img) img_filter = cv2.GaussianBlur(img_cv, (5, 5), 0) img_filter = cv2.GaussianBlur(img_filter, (5, 5), 0) ########## plan 1 ############# # for row in range(pil_cv.shape[0]): # for col in range(pil_cv.shape[1]): # for chan in range(pil_cv.shape[2]): # img_filter[row][col][chan] = float(img_filter[row][col][chan]) * float(pil_open[row][col][chan]) / 255 + float(img_cv[row][col][chan]) * float(255 - pil_open[row][col][chan]) / 255 # img_out = ext_transforms.CvMatToRGBPIL(img_filter) ########## plan 2 ############# img_filter_np = np.asarray(img_filter).astype(np.float32) img_cv_np = np.asarray(img_cv).astype(np.float32) pil_open_np = np.asarray(pil_open) img_filter2 = img_filter_np * pil_open_np / 255 + img_cv_np * ( 255 - pil_open_np) / 255 img_out = Image.fromarray( cv2.cvtColor(img_filter2.astype(np.uint8), cv2.COLOR_BGR2RGB)) img.save( '/media/nv/7174c323-375e-4334-b15e-019bd2c8af08/PyTorch-ENet-master/icome/icome_test_images/' + str(count + 300) + '.png') img_out.save( '/media/nv/7174c323-375e-4334-b15e-019bd2c8af08/PyTorch-ENet-master/icome/icome_test_images/' + str(count + 400) + '.png') Image.fromarray( cv2.cvtColor(pil_open.astype(np.uint8), cv2.COLOR_BGR2RGB) ).save( '/media/nv/7174c323-375e-4334-b15e-019bd2c8af08/PyTorch-ENet-master/icome/icome_test_images/' + str(count + 500) + '.png') ########## plan 3 ############# # img_inv = img.copy() # img.paste(pil, fliterLabeltoBinary(pil.convert('L'), 1)) # # img_inv.paste(pil, fliterLabeltoBinary(pil.convert('L'), 0)) # img_inv = img_inv.filter(ImageFilter.GaussianBlur) # # img_inv = img_inv.filter(ImageFilter.BLUR) # img_inv = img_inv.filter(ImageFilter.GaussianBlur) # img.save('/media/nv/7174c323-375e-4334-b15e-019bd2c8af08/PyTorch-ENet-master/icome/icome_test_images/' + str(count) + '.png') # img_inv.save('/media/nv/7174c323-375e-4334-b15e-019bd2c8af08/PyTorch-ENet-master/icome/icome_test_images/' + str(count + 100) + '.png') # # img.convert('RGBA') # # img_inv.convert('RGBA') # img_inv.paste(img, fliterLabeltoBinary(pil.convert('L'), 0)) # img_inv.save('/media/nv/7174c323-375e-4334-b15e-019bd2c8af08/PyTorch-ENet-master/icome/icome_test_images/' + str(count + 200) + '.png') # (fliterLabeltoBinary(pil.convert('L'))).save('/media/nv/7174c323-375e-4334-b15e-019bd2c8af08/PyTorch-ENet-master/icome/icome_test_images/' + str(count + 100) + '.png') count = count + 1 utils.imshow_batch(images.data.cpu(), color_predictions)
def load_dataset(dataset): print("\nLoading dataset...\n") print("Selected dataset:", args.dataset) print("Dataset directory:", args.dataset_dir) print('Train file:', args.trainFile) print('Val file:', args.valFile) print('Test file:', args.testFile) print("Save directory:", args.save_dir) image_transform = transforms.Compose( [transforms.Resize((args.height, args.width)), transforms.ToTensor()]) label_transform = transforms.Compose([ transforms.Resize((args.height, args.width)), ext_transforms.PILToLongTensor() ]) # Get selected dataset # Load the training set as tensors train_set = dataset(args.dataset_dir, args.trainFile, mode='train', transform=image_transform, \ label_transform=label_transform, color_mean=color_mean, color_std=color_std) train_loader = data.DataLoader(train_set, batch_size=args.batch_size, shuffle=True, num_workers=args.workers) # Load the validation set as tensors val_set = dataset(args.dataset_dir, args.valFile, mode='val', transform=image_transform, \ label_transform=label_transform, color_mean=color_mean, color_std=color_std) val_loader = data.DataLoader(val_set, batch_size=args.batch_size, shuffle=False, num_workers=args.workers) # Load the test set as tensors test_set = dataset(args.dataset_dir, args.testFile, mode='inference', transform=image_transform, \ label_transform=label_transform, color_mean=color_mean, color_std=color_std) test_loader = data.DataLoader(test_set, batch_size=args.batch_size, shuffle=False, num_workers=args.workers) # Get encoding between pixel valus in label images and RGB colors class_encoding = train_set.color_encoding # Get number of classes to predict num_classes = len(class_encoding) # Print information for debugging print("Number of classes to predict:", num_classes) print("Train dataset size:", len(train_set)) print("Validation dataset size:", len(val_set)) # Get a batch of samples to display if args.mode.lower() == 'test': images, labels = iter(test_loader).next() else: images, labels = iter(train_loader).next() print("Image size:", images.size()) print("Label size:", labels.size()) print("Class-color encoding:", class_encoding) # Show a batch of samples and labels if args.imshow_batch: print("Close the figure window to continue...") label_to_rgb = transforms.Compose([ ext_transforms.LongTensorToRGBPIL(class_encoding), transforms.ToTensor() ]) color_labels = utils.batch_transform(labels, label_to_rgb) utils.imshow_batch(images, color_labels) # Get class weights from the selected weighing technique print("Weighing technique:", args.weighing) # If a class weight file is provided, try loading weights from in there class_weights = None if args.class_weights_file: print('Trying to load class weights from file...') try: class_weights = np.loadtxt(args.class_weights_file) except Exception as e: raise e if class_weights is None: print("Computing class weights...") print("(this can take a while depending on the dataset size)") class_weights = 0 if args.weighing.lower() == 'enet': class_weights = enet_weighing(train_loader, num_classes) elif args.weighing.lower() == 'mfb': class_weights = median_freq_balancing(train_loader, num_classes) else: class_weights = None if class_weights is not None: class_weights = torch.from_numpy(class_weights).float().to(device) # Set the weight of the unlabeled class to 0 print("Ignoring unlabeled class: ", args.ignore_unlabeled) if args.ignore_unlabeled: ignore_index = list(class_encoding).index('unlabeled') class_weights[ignore_index] = 0 print("Class weights:", class_weights) return (train_loader, val_loader, test_loader), class_weights, class_encoding
def run_epoch(self, epoch, iteration_loss=False): """Runs an epoch of training. Keyword arguments: - iteration_loss (``bool``, optional): Prints loss at every step. Returns: - The epoch loss (float). """ self.model.train() epoch_loss = 0.0 self.metric.reset() for step, batch_data in enumerate(self.data_loader): if step > 0: break # Get the inputs and labels inputs = batch_data[0].to(self.device) labels = batch_data[1].to(self.device) #print('labels!!!!!!!!!!!!!!!!!!!!!!',labels) # Forward propagation outputs = self.model(inputs) _, preds = torch.max(outputs.data, 1) # Loss computation loss = self.criterion(outputs, labels) # Backpropagation self.optim.zero_grad() loss.backward() self.optim.step() # Keep track of loss for current epoch epoch_loss += loss.item() # Keep track of the evaluation metric self.metric.add(outputs.detach(), labels.detach()) if iteration_loss: print("[Step: %d] Iteration loss: %.4f" % (step, loss.item())) if epoch == 59 or epoch == 99 or epoch == 159 or epoch == 199: if step % 500 == 0: print('Visualization of Train:') label_to_rgb = transforms.Compose([ ext_transforms.LongTensorToRGBPIL(self.color_encoding), transforms.ToTensor() ]) color_labels = utils.batch_transform( labels.cpu(), label_to_rgb) color_outputs = utils.batch_transform( preds.cpu(), label_to_rgb) utils.imshow_batch(color_outputs, color_labels) if epoch == 80: torch.save(self.model.state_dict(), '/home/wan/PyTorch-ENet/model_80') if epoch == 120: torch.save(self.model.state_dict(), '/home/wan/PyTorch-ENet/model_120') if epoch == 140: torch.save(self.model.state_dict(), '/home/wan/PyTorch-ENet/model_140') if epoch == 160: torch.save(self.model.state_dict(), '/home/wan/PyTorch-ENet/model_160') if epoch == 180: torch.save(self.model.state_dict(), '/home/wan/PyTorch-ENet/model_180') if epoch == 199: torch.save(self.model.state_dict(), '/home/wan/PyTorch-ENet/model_200') return epoch_loss / len(self.data_loader), self.metric.value()
unloader = transforms.ToPILImage() for i in range(len(Dataset)): data_path = Dataset[i] inputs = Image.open(data_path) inputs = image_transform(inputs) imname = Dataset[i].split('\\')[-1] print(imname) imname = args.test_out + '/' + imname inputs = Variable(inputs.to(device)) # Give the pridiction outputs f_i = input_encoder(inputs.unsqueeze(0)) outputs = decoder_t(f_i) # Transfer the probability maps to color labels maps _, predictions = torch.max(outputs, 1) label_to_rgb = transforms.Compose([ ext_transforms.LongTensorToRGBPIL(class_encoding), transforms.ToTensor() ]) color_predictions = utils.batch_transform(predictions.cpu(), label_to_rgb) # Save the outputs as the color labels maps image = color_predictions.cpu().clone() image = image.squeeze(0) image = unloader(image) image.save(imname)
def load_dataset(dataset): print("\nLoading dataset...\n") print("Selected dataset:", args.dataset) print("Dataset directory:", args.dataset_dir) print('Test file:', args.testFile) print("Save directory:", args.save_dir) image_transform = transforms.Compose( [transforms.Resize((args.height, args.width)), transforms.ToTensor()]) label_transform = transforms.Compose([ transforms.Resize((args.height, args.width)), ext_transforms.PILToLongTensor() ]) # Load the test set as tensors test_set = dataset(args.dataset_dir, args.testFile, mode='inference', transform=image_transform, \ label_transform=label_transform, color_mean=color_mean, color_std=color_std, \ load_depth=(args.arch=='rgbd'), seg_classes=args.seg_classes) test_loader = data.DataLoader(test_set, batch_size=args.batch_size, shuffle=False, num_workers=args.workers) # Get encoding between pixel valus in label images and RGB colors class_encoding = test_set.color_encoding # Get number of classes to predict num_classes = len(class_encoding) # Print information for debugging print("Number of classes to predict:", num_classes) print("Test dataset size:", len(test_set)) # Get a batch of samples to display if args.arch == 'rgbd': images, labels, data_path, depth_path, label_path = iter(test_loader).next() else: images, labels, data_path, label_path = iter(test_loader).next() print("Image size:", images.size()) print("Label size:", labels.size()) print("Class-color encoding:", class_encoding) # Show a batch of samples and labels if args.imshow_batch: print("Close the figure window to continue...") label_to_rgb = transforms.Compose([ ext_transforms.LongTensorToRGBPIL(class_encoding), transforms.ToTensor() ]) color_labels = utils.batch_transform(labels, label_to_rgb) utils.imshow_batch(images, color_labels) # Get class weights # If a class weight file is provided, try loading weights from in there class_weights = None if args.class_weights_file: print('Trying to load class weights from file...') try: class_weights = np.loadtxt(args.class_weights_file) except Exception as e: raise e else: print('No class weights found...') if class_weights is not None: class_weights = torch.from_numpy(class_weights).float().to(device) # Set the weight of the unlabeled class to 0 if args.ignore_unlabeled: ignore_index = list(class_encoding).index('unlabeled') class_weights[ignore_index] = 0 print("Class weights:", class_weights) return test_loader, class_weights, class_encoding
def train(train_loader, val_loader, class_weights, class_encoding): print("\nTraining...\n") num_classes = len(class_encoding) # Intialize ENet model = ENet(num_classes).to(device) # Check if the network architecture is correct print(model) # We are going to use the CrossEntropyLoss loss function as it's most # frequentely used in classification problems with multiple classes which # fits the problem. This criterion combines LogSoftMax and NLLLoss. criterion = nn.CrossEntropyLoss(weight=class_weights) # ENet authors used Adam as the optimizer optimizer = optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) # Learning rate decay scheduler lr_updater = lr_scheduler.StepLR(optimizer, args.lr_decay_epochs, args.lr_decay) # Evaluation metric if args.ignore_unlabeled: ignore_index = list(class_encoding).index('unlabeled') else: ignore_index = None metric = IoU(num_classes, ignore_index=ignore_index) # Optionally resume from a checkpoint if args.resume: model, optimizer, start_epoch, best_miou = utils.load_checkpoint( model, optimizer, args.save_dir, args.name) print("Resuming from model: Start epoch = {0} " "| Best mean IoU = {1:.4f}".format(start_epoch, best_miou)) else: start_epoch = 0 best_miou = 0 # Start Training print() train = Train(model, train_loader, optimizer, criterion, metric, device) val = Test(model, val_loader, criterion, metric, device) for epoch in range(start_epoch, args.epochs): print(">>>> [Epoch: {0:d}] Training".format(epoch)) lr_updater.step() epoch_loss, (iou, miou) = train.run_epoch(args.print_step) # Visualization by TensorBoardX writer.add_scalar('data/train/loss', epoch_loss, epoch) writer.add_scalar('data/train/mean_IoU', miou, epoch) print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}". format(epoch, epoch_loss, miou)) if (epoch + 1) % 1 == 0 or epoch + 1 == args.epochs: print(">>>> [Epoch: {0:d}] Validation".format(epoch)) loss, (iou, miou) = val.run_epoch(args.print_step) # Visualization by TensorBoardX writer.add_scalar('data/val/loss', loss, epoch) writer.add_scalar('data/val/mean_IoU', miou, epoch) print(">>>> [Epoch: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}". format(epoch, loss, miou)) # Print per class IoU on last epoch or if best iou if epoch + 1 == args.epochs or miou > best_miou: for key, class_iou in zip(class_encoding.keys(), iou): print("{0}: {1:.4f}".format(key, class_iou)) # Save the model if it's the best thus far if miou > best_miou: print("\nBest model thus far. Saving...\n") best_miou = miou utils.save_checkpoint(model, optimizer, epoch + 1, best_miou, args) # Visualization of the predicted batch in TensorBoard for i, batch in enumerate(val_loader): if i == 1: break # Get the inputs and labels inputs = batch[0].to(device) labels = batch[1].to(device) # Forward propagation with torch.no_grad(): predictions = model(inputs) # Predictions is one-hot encoded with "num_classes" channels. # Convert it to a single int using the indices where the maximum (1) occurs _, predictions = torch.max(predictions.data, 1) label_to_rgb = transforms.Compose([ ext_transforms.LongTensorToRGBPIL(class_encoding), transforms.ToTensor() ]) color_predictions = utils.batch_transform( predictions.cpu(), label_to_rgb) in_training_visualization(model, inputs, labels, class_encoding, writer, epoch, 'val') return model
def load_dataset(dataset): print("\nLoading dataset...\n") print("Selected dataset:", args.dataset) print("Dataset directory:", args.dataset_dir) print("Save directory:", args.save_dir) image_transform = transforms.Compose( [transforms.Resize((args.height, args.width)), transforms.ToTensor()]) label_transform = transforms.Compose([ transforms.Resize((args.height, args.width), transforms.InterpolationMode.NEAREST), ext_transforms.PILToLongTensor() ]) # Get selected dataset # Load the training set as tensors train_set = dataset( args.dataset_dir, transform=image_transform, label_transform=label_transform) train_loader = data.DataLoader( train_set, batch_size=args.batch_size, shuffle=True, num_workers=args.workers) # Load the validation set as tensors val_set = dataset( args.dataset_dir, mode='val', transform=image_transform, label_transform=label_transform) val_loader = data.DataLoader( val_set, batch_size=args.batch_size, shuffle=False, num_workers=args.workers) # Load the test set as tensors test_set = dataset( args.dataset_dir, mode='test', transform=image_transform, label_transform=label_transform) test_loader = data.DataLoader( test_set, batch_size=args.batch_size, shuffle=False, num_workers=args.workers) # Get encoding between pixel valus in label images and RGB colors class_encoding = train_set.color_encoding # Remove the road_marking class from the CamVid dataset as it's merged # with the road class if args.dataset.lower() == 'camvid': del class_encoding['road_marking'] # Get number of classes to predict num_classes = len(class_encoding) # Print information for debugging print("Number of classes to predict:", num_classes) print("Train dataset size:", len(train_set)) print("Validation dataset size:", len(val_set)) # Get a batch of samples to display if args.mode.lower() == 'test': images, labels = iter(test_loader).next() else: images, labels = iter(train_loader).next() print("Image size:", images.size()) print("Label size:", labels.size()) print("Class-color encoding:", class_encoding) # Show a batch of samples and labels if args.imshow_batch: print("Close the figure window to continue...") label_to_rgb = transforms.Compose([ ext_transforms.LongTensorToRGBPIL(class_encoding), transforms.ToTensor() ]) color_labels = utils.batch_transform(labels, label_to_rgb) utils.imshow_batch(images, color_labels) # Get class weights from the selected weighing technique print("\nWeighing technique:", args.weighing) print("Computing class weights...") print("(this can take a while depending on the dataset size)") class_weights = 0 if args.weighing.lower() == 'enet': class_weights = enet_weighing(train_loader, num_classes) elif args.weighing.lower() == 'mfb': class_weights = median_freq_balancing(train_loader, num_classes) else: class_weights = None if class_weights is not None: class_weights = torch.from_numpy(class_weights).float().to(device) # Set the weight of the unlabeled class to 0 if args.ignore_unlabeled: ignore_index = list(class_encoding).index('unlabeled') class_weights[ignore_index] = 0 print("Class weights:", class_weights) return (train_loader, val_loader, test_loader), class_weights, class_encoding
def main(): assert os.path.isdir( args.dataset_dir), "The directory \"{0}\" doesn't exist.".format( args.dataset_dir) # Fail fast if the saving directory doesn't exist assert os.path.isdir( args.save_dir), "The directory \"{0}\" doesn't exist.".format( args.save_dir) # Import the requested dataset if args.dataset.lower() == 'cityscapes': from data import Cityscapes as dataset else: # Should never happen...but just in case it does raise RuntimeError("\"{0}\" is not a supported dataset.".format( args.dataset)) print("\nLoading dataset...\n") print("Selected dataset:", args.dataset) print("Dataset directory:", args.dataset_dir) print("Save directory:", args.save_dir) image_transform = transforms.Compose( [transforms.Resize((args.height, args.width)), transforms.ToTensor()]) label_transform = transforms.Compose([ transforms.Resize((args.height, args.width)), ext_transforms.PILToLongTensor() ]) # Get selected dataset # Load the training set as tensors train_set = dataset(args.dataset_dir, mode='train', max_iters=args.max_iters, transform=image_transform, label_transform=label_transform) train_loader = data.DataLoader(train_set, batch_size=args.batch_size, shuffle=True, num_workers=args.workers) trainloader_iter = enumerate(train_loader) # Load the validation set as tensors val_set = dataset(args.dataset_dir, mode='val', max_iters=args.max_iters, transform=image_transform, label_transform=label_transform) val_loader = data.DataLoader(val_set, batch_size=args.batch_size, shuffle=False, num_workers=args.workers) # Load the test set as tensors test_set = dataset(args.dataset_dir, mode='test', max_iters=args.max_iters, transform=image_transform, label_transform=label_transform) test_loader = data.DataLoader(test_set, batch_size=args.batch_size, shuffle=False, num_workers=args.workers) # Get encoding between pixel valus in label images and RGB colors class_encoding = train_set.color_encoding # Get number of classes to predict num_classes = len(class_encoding) # Print information for debugging print("Number of classes to predict:", num_classes) print("Train dataset size:", len(train_set)) print("Validation dataset size:", len(val_set)) # Get the parameters for the validation set if args.mode.lower() == 'test': images, labels = iter(test_loader).next() else: images, labels = iter(train_loader).next() print("Image size:", images.size()) print("Label size:", labels.size()) print("Class-color encoding:", class_encoding) # Show a batch of samples and labels if args.imshow_batch: print("Close the figure window to continue...") label_to_rgb = transforms.Compose([ ext_transforms.LongTensorToRGBPIL(class_encoding), transforms.ToTensor() ]) color_labels = utils.batch_transform(labels, label_to_rgb) utils.imshow_batch(images, color_labels) # Get class weights from the selected weighing technique print("\nTraining...\n") num_classes = len(class_encoding) # Define the model with the encoder and decoder from the deeplabv2 input_encoder = Encoder().to(device) decoder_t = Decoder(num_classes).to(device) # Define the entropy loss for the segmentation task criterion = CrossEntropy2d() # Set the optimizer function for model optimizer_g = optim.SGD(itertools.chain(input_encoder.parameters(), decoder_t.parameters()), lr=args.learning_rate, momentum=0.9, weight_decay=1e-4) optimizer_g.zero_grad() # Evaluation metric if args.ignore_unlabeled: ignore_index = list(class_encoding).index('unlabeled') else: ignore_index = None metric = IoU(num_classes, ignore_index=ignore_index) # Optionally resume from a checkpoint if args.resume: input_encoder, decoder_t, optimizer_g, start_epoch, best_miou = utils.load_checkpoint( input_encoder, decoder_t, optimizer_g, args.save_dir, args.name) print("Resuming from model: Start epoch = {0} " "| Best mean IoU = {1:.4f}".format(start_epoch, best_miou)) else: start_epoch = 0 best_miou = 0 # Start Training print() metric.reset() val = Test(input_encoder, decoder_t, val_loader, criterion, metric, device) for i_iter in range(args.max_iters): optimizer_g.zero_grad() adjust_learning_rate(optimizer_g, i_iter) _, batch_data = trainloader_iter.__next__() inputs = batch_data[0].to(device) labels = batch_data[1].to(device) f_i = input_encoder(inputs) outputs_i = decoder_t(f_i) loss_seg = criterion(outputs_i, labels) loss_g = loss_seg loss_g.backward() optimizer_g.step() if i_iter % args.save_pred_every == 0 and i_iter != 0: print('iter = {0:8d}/{1:8d}, loss_seg = {2:.3f}'.format( i_iter, args.max_iters, loss_g)) print(">>>> [iter: {0:d}] Validation".format(i_iter)) # Validate the trained model after the weights are saved loss, (iou, miou) = val.run_epoch(args.print_step) print(">>>> [iter: {0:d}] Avg. loss: {1:.4f} | Mean IoU: {2:.4f}". format(i_iter, loss, miou)) if miou > best_miou: for key, class_iou in zip(class_encoding.keys(), iou): print("{0}: {1:.4f}".format(key, class_iou)) # Save the model if it's the best thus far if miou > best_miou: print("\nBest model thus far. Saving...\n") best_miou = miou utils.save_checkpoint(input_encoder, decoder_t, optimizer_g, i_iter + 1, best_miou, args)