def __init__(self, root, split="train_aug", is_transform=False, img_size=512): self.root = root self.split = split self.is_transform = is_transform self.ignore_index = 255 self.n_classes = 21 self.img_size = img_size if isinstance(img_size, tuple) else (img_size, img_size) self.files = collections.defaultdict(list) self.image_transform = Compose([ ToTensor(), Normalize([.485, .456, .406], [.229, .224, .225]), ]) self.filler = [0, 0, 0] # Reading pascal VOC dataset list self.voc_path = get_data_path('pascal') for split in ["train", "val", "trainval", "test"]: file_list = tuple( open( self.voc_path + '/ImageSets/Segmentation/' + split + '.txt', 'r')) file_list = [id_.rstrip() for id_ in file_list] self.files[split] = file_list # Reading SBD dataset list self.sbd_path = get_data_path('sbd') self.sbd_train_list = tuple( open(self.sbd_path + 'dataset/train_withValdata.txt', 'r')) self.sbd_train_list = [id_.rstrip() for id_ in self.sbd_train_list] self.sbd_val_list = tuple(open(self.sbd_path + 'dataset/val.txt', 'r')) self.sbd_val_list = [id_.rstrip() for id_ in self.sbd_val_list] # Augmenting pascal and SBD dataset list self.files[ 'trainval_aug'] = self.sbd_train_list + self.sbd_val_list + self.files[ 'train'] self.files['train_aug'] = list( set(self.files['trainval_aug']) - set(self.files['val'])) # needed for extracting GT of sbd and pascal dataset if not os.path.isdir(self.root + '/pre_encoded'): self.setup(pre_encode=True) else: self.setup(pre_encode=False) self.files = self.files[self.split]
def filtertraindata(self): datapath = get_data_path('coco') train_list = tuple(open(datapath + 'annotations/train2014.txt', 'r')) val_list = tuple(open(datapath + 'annotations/val2014.txt', 'r')) total_list = ['/train2014/'+id_.rstrip() for id_ in train_list] + ['/val2014/'+id_.rstrip() for id_ in val_list] annotation_path = os.path.join(datapath, 'seg_mask') aug_list = [] for filename in total_list: lbl_path = annotation_path + filename + '.png' lbl = Image.open(lbl_path).convert('P') lbl = np.array(lbl, dtype=np.int32) if np.sum(pascal_map[lbl] != 0) > 1000 and np.intersect1d(np.unique(lbl),pascal_classes).any(): aug_list.append(filename) val_aug_list = random.sample(aug_list, 1500) train_aug_list = list(set(aug_list) - set(val_aug_list)) with open(os.path.join(datapath, 'annotations', 'train_aug.txt'), 'w') as txtfile: [txtfile.write(file + '\n') for file in train_aug_list] with open(os.path.join(datapath, 'annotations', 'val.txt'), 'w') as txtfile: [txtfile.write(file + '\n') for file in val_aug_list]
def __init__(self, root, split="train_aug", is_transform=False, img_size=512): self.root = root self.split = split self.is_transform = is_transform self.ignore_index = 91 self.n_classes = 21 self.img_size = img_size if isinstance(img_size, tuple) else (img_size, img_size) self.files = collections.defaultdict(list) self.image_transform = Compose([ ToTensor(), Normalize([.485, .456, .406], [.229, .224, .225]), ]) self.filler = [0, 0, 0] # Reading COCO dataset list - train2014, val2014, train_aug, val, test2014, test2015 self.data_path = get_data_path('coco') filepath = self.data_path + '/annotations/' + split + '.txt', 'r' if split is "train_aug" and not os.path.exists(filepath): self.filtertraindata() file_list = tuple(open(filepath)) file_list = [id_.rstrip() for id_ in file_list] self.files = file_list
def train(args): global n_classes # Set the seed for reproducing the results random.seed(args.manualSeed) np.random.seed(args.manualSeed) torch.manual_seed(args.manualSeed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(args.manualSeed) cudnn.benchmark = True # Set up results folder if not os.path.exists('results/saved_val_images'): os.makedirs('results/saved_val_images') if not os.path.exists('results/saved_train_images'): os.makedirs('results/saved_train_images') # Setup Dataloader data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) traindata = data_loader(data_path, split=args.split, is_transform=True, img_size=(args.img_rows, args.img_cols)) trainloader = data.DataLoader(traindata, batch_size=args.batch_size, num_workers=7, shuffle=True) valdata = data_loader(data_path, split="val", is_transform=False, img_size=(args.img_rows, args.img_cols)) valloader = data.DataLoader(valdata, batch_size=args.batch_size, num_workers=7, shuffle=False) n_classes = traindata.n_classes n_trainsamples = len(traindata) n_iters_per_epoch = np.ceil(n_trainsamples / float(args.batch_size * args.iter_size)) # Setup Model model = torch.nn.DataParallel( get_model(args.arch, n_classes, ignore_index=traindata.ignore_index, output_stride=args.ost)) if torch.cuda.is_available(): model.cuda() epochs_done = 0 X = [] Y = [] Y_test = [] avg_pixel_acc = 0 mean_class_acc = 0 mIoU = 0 avg_pixel_acc_test = 0 mean_class_acc_test = 0 mIoU_test = 0 if args.model_path: model_name = args.model_path.split('.') checkpoint_name = model_name[0] + '_optimizer.pkl' checkpoint = torch.load(checkpoint_name) optm = checkpoint['optimizer'] model.load_state_dict(checkpoint['state_dict']) split_str = model_name[0].split('_') epochs_done = int(split_str[-1]) saved_loss = pickle.load(open("results/saved_loss.p", "rb")) saved_accuracy = pickle.load(open("results/saved_accuracy.p", "rb")) X = saved_loss["X"][:epochs_done] Y = saved_loss["Y"][:epochs_done] Y_test = saved_loss["Y_test"][:epochs_done] avg_pixel_acc = saved_accuracy["P"][:epochs_done, :] mean_class_acc = saved_accuracy["M"][:epochs_done, :] mIoU = saved_accuracy["I"][:epochs_done, :] avg_pixel_acc_test = saved_accuracy["P_test"][:epochs_done, :] mean_class_acc_test = saved_accuracy["M_test"][:epochs_done, :] mIoU_test = saved_accuracy["I_test"][:epochs_done, :] # Learning rates: For new layers (such as final layer), we set lr to be 10x the learning rate of layers already trained bias_10x_params = filter( lambda x: ('bias' in x[0]) and ('final' in x[0]) and ('conv' in x[0]), model.named_parameters()) bias_10x_params = list(map(lambda x: x[1], bias_10x_params)) bias_params = filter(lambda x: ('bias' in x[0]) and ('final' not in x[0]), model.named_parameters()) bias_params = list(map(lambda x: x[1], bias_params)) nonbias_10x_params = filter( lambda x: (('bias' not in x[0]) or ('bn' in x[0])) and ('final' in x[0]), model.named_parameters()) nonbias_10x_params = list(map(lambda x: x[1], nonbias_10x_params)) nonbias_params = filter( lambda x: ('bias' not in x[0]) and ('final' not in x[0]), model.named_parameters()) nonbias_params = list(map(lambda x: x[1], nonbias_params)) optimizer = torch.optim.SGD([ { 'params': bias_params, 'lr': args.l_rate }, { 'params': bias_10x_params, 'lr': 20 * args.l_rate }, { 'params': nonbias_10x_params, 'lr': 10 * args.l_rate }, { 'params': nonbias_params, 'lr': args.l_rate }, ], lr=args.l_rate, momentum=args.momentum, weight_decay=args.wd, nesterov=(args.optim == 'Nesterov')) numgroups = 4 # Setting up scheduler if args.model_path and args.restore: # Here we restore all states of optimizer optimizer.load_state_dict(optm) total_iters = n_iters_per_epoch * args.n_epoch lambda1 = lambda step: 0.5 + 0.5 * math.cos(np.pi * step / total_iters) scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=[lambda1] * numgroups, last_epoch=epochs_done * n_iters_per_epoch) else: # Here we simply restart the training if args.T0: total_iters = args.T0 * n_iters_per_epoch else: total_iters = ((args.n_epoch - epochs_done) * n_iters_per_epoch) lambda1 = lambda step: 0.5 + 0.5 * math.cos(np.pi * step / total_iters) scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=[lambda1] * numgroups) global l_avg, totalclasswise_pixel_acc, totalclasswise_gtpixels, totalclasswise_predpixels global l_avg_test, totalclasswise_pixel_acc_test, totalclasswise_gtpixels_test, totalclasswise_predpixels_test global steps, steps_test scheduler.step() for epoch in range(epochs_done, args.n_epoch): # Reset all variables every epoch l_avg = 0 totalclasswise_pixel_acc = 0 totalclasswise_gtpixels = 0 totalclasswise_predpixels = 0 l_avg_test = 0 totalclasswise_pixel_acc_test = 0 totalclasswise_gtpixels_test = 0 totalclasswise_predpixels_test = 0 steps = 0 steps_test = 0 trainmodel(model, optimizer, trainloader, epoch, scheduler, traindata) valmodel(model, valloader, epoch) # save the model every 10 epochs if (epoch + 1) % 10 == 0 or epoch == args.n_epoch - 1: torch.save( model, "results/{}_{}_{}.pkl".format(args.arch, args.dataset, epoch + 1)) torch.save( { 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict() }, "results/{}_{}_{}_optimizer.pkl".format( args.arch, args.dataset, epoch + 1)) if os.path.isfile("results/saved_loss.p"): os.remove("results/saved_loss.p") if os.path.isfile("results/saved_accuracy.p"): os.remove("results/saved_accuracy.p") # saving train and validation loss X.append(epoch + 1) Y.append(l_avg / steps) Y_test.append(l_avg_test / steps_test) saved_loss = {"X": X, "Y": Y, "Y_test": Y_test} pickle.dump(saved_loss, open("results/saved_loss.p", "wb")) # pixel accuracy totalclasswise_pixel_acc = totalclasswise_pixel_acc.reshape( (-1, n_classes)).astype(np.float32) totalclasswise_gtpixels = totalclasswise_gtpixels.reshape( (-1, n_classes)) totalclasswise_predpixels = totalclasswise_predpixels.reshape( (-1, n_classes)) totalclasswise_pixel_acc_test = totalclasswise_pixel_acc_test.reshape( (-1, n_classes)).astype(np.float32) totalclasswise_gtpixels_test = totalclasswise_gtpixels_test.reshape( (-1, n_classes)) totalclasswise_predpixels_test = totalclasswise_predpixels_test.reshape( (-1, n_classes)) if isinstance(avg_pixel_acc, np.ndarray): avg_pixel_acc = np.vstack( (avg_pixel_acc, np.sum(totalclasswise_pixel_acc, axis=1) / np.sum(totalclasswise_gtpixels, axis=1))) mean_class_acc = np.vstack( (mean_class_acc, np.mean(totalclasswise_pixel_acc / totalclasswise_gtpixels, axis=1))) mIoU = np.vstack( (mIoU, np.mean(totalclasswise_pixel_acc / (totalclasswise_gtpixels + totalclasswise_predpixels - totalclasswise_pixel_acc), axis=1))) avg_pixel_acc_test = np.vstack( (avg_pixel_acc_test, np.sum(totalclasswise_pixel_acc_test, axis=1) / np.sum(totalclasswise_gtpixels_test, axis=1))) mean_class_acc_test = np.vstack( (mean_class_acc_test, np.mean(totalclasswise_pixel_acc_test / totalclasswise_gtpixels_test, axis=1))) mIoU_test = np.vstack((mIoU_test, np.mean(totalclasswise_pixel_acc_test / (totalclasswise_gtpixels_test + totalclasswise_predpixels_test - totalclasswise_pixel_acc_test), axis=1))) else: avg_pixel_acc = np.sum(totalclasswise_pixel_acc, axis=1) / np.sum( totalclasswise_gtpixels, axis=1) mean_class_acc = np.mean(totalclasswise_pixel_acc / totalclasswise_gtpixels, axis=1) mIoU = np.mean( totalclasswise_pixel_acc / (totalclasswise_gtpixels + totalclasswise_predpixels - totalclasswise_pixel_acc), axis=1) avg_pixel_acc_test = np.sum( totalclasswise_pixel_acc_test, axis=1) / np.sum( totalclasswise_gtpixels_test, axis=1) mean_class_acc_test = np.mean(totalclasswise_pixel_acc_test / totalclasswise_gtpixels_test, axis=1) mIoU_test = np.mean( totalclasswise_pixel_acc_test / (totalclasswise_gtpixels_test + totalclasswise_predpixels_test - totalclasswise_pixel_acc_test), axis=1) saved_accuracy = { "X": X, "P": avg_pixel_acc, "M": mean_class_acc, "I": mIoU, "P_test": avg_pixel_acc_test, "M_test": mean_class_acc_test, "I_test": mIoU_test } pickle.dump(saved_accuracy, open("results/saved_accuracy.p", "wb"))
def test(args): data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) testdata = data_loader(data_path, split="val", is_transform=False, img_size=(512, 512)) n_classes = testdata.n_classes eps = 1e-10 # (TODO): Choose the scale according to dataset requirements scales = [0.5, 0.75, 1.0, 1.25] base_size = min(testdata.img_size) crop_size = (args.img_rows, args.img_cols) stride = [0, 0] stride[0] = int(np.ceil(float(crop_size[0]) * 2 / 3)) stride[1] = int(np.ceil(float(crop_size[1]) * 2 / 3)) size_transform_img = [Scale(int(base_size * i)) for i in scales] mask1_len = np.zeros(n_classes, dtype=float) mask2_len = np.zeros(n_classes, dtype=float) correct_len = np.zeros(n_classes, dtype=float) # Setup Model model = torch.nn.DataParallel( get_model(args.arch, n_classes, ignore_index=testdata.ignore_index)) model_name = args.model_path.split('.') checkpoint_name = model_name[0] + '_optimizer.pkl' checkpoint = torch.load(checkpoint_name) model.load_state_dict(checkpoint['state_dict']) model.eval() soft = nn.Softmax2d() cm = np.zeros((n_classes, n_classes), dtype=np.float64) if torch.cuda.is_available(): model.cuda() soft.cuda() for f_no, line in enumerate(testdata.files): imgr, lblr = testdata.readfile(line) lbl = np.array(lblr) origw, origh = imgr.size # Maintain final prediction array for each image pred = np.zeros((n_classes, origh, origw), dtype=np.float32) # Loop over all scales for single image for i in range(len(scales)): img = size_transform_img[i](imgr) imsw, imsh = img.size imwstart, imhstart = 0, 0 imw, imh = imsw, imsh # Zero padding if any size if smaller than crop_size if imsw < crop_size[1] or imsh < crop_size[0]: padw, padh = max(crop_size[1] - imsw, 0), max(crop_size[0] - imsh, 0) imw += padw imh += padh im = Image.new(img.mode, (imw, imh), tuple(testdata.filler)) im.paste(img, (int(padw / 2), int(padh / 2))) imwstart += int(padw / 2) imhstart += int(padh / 2) img = im # Now tile image - each of crop_size and loop over them h_grid = int(np.ceil(float(imh - crop_size[0]) / stride[0])) + 1 w_grid = int(np.ceil(float(imw - crop_size[1]) / stride[1])) + 1 # maintain prediction probability for each pixel datascale = torch.zeros(n_classes, imh, imw).cuda() countscale = torch.zeros(n_classes, imh, imw).cuda() for w in range(w_grid): for h in range(h_grid): # crop portion from image - crop_size x1, y1 = w * stride[1], h * stride[0] x2, y2 = int(min(x1 + crop_size[1], imw)), int(min(y1 + crop_size[0], imh)) x1, y1 = x2 - crop_size[1], y2 - crop_size[0] img_cropped = img.crop((x1, y1, x2, y2)) # Input image as well its flipped version img1 = testdata.image_transform(img_cropped) img2 = testdata.image_transform( img_cropped.transpose(Image.FLIP_LEFT_RIGHT)) images = torch.stack((img1, img2), dim=0) if torch.cuda.is_available(): images = Variable(images.cuda(), volatile=True) else: images = Variable(images, volatile=True) # Output prediction for image and its flip version outputs = model(images) # Sum prediction from image and its flip and then normalize prob = outputs[0] + outputs[ 1][:, :, getattr(torch.arange(outputs.size(3) - 1, -1, -1), 'cuda')().long()] prob = soft(prob.view(-1, *prob.size())) # Place the score in the proper position datascale[:, y1:y2, x1:x2] += prob.data countscale[:, y1:y2, x1:x2] += 1 # After looping over all tiles of image, normalize the scores and bilinear interpolation to orignal image size datascale /= (countscale + eps) datascale = datascale[:, imhstart:imhstart + imsh, imwstart:imwstart + imsw] datascale = datascale.cpu().numpy() datascale = np.transpose(datascale, (1, 2, 0)) datascale = resize(datascale, (origh, origw), order=1, preserve_range=True, mode='symmetric', clip=False) datascale = np.transpose(datascale, (2, 0, 1)) # Sum up all the scores for all scales pred += (datascale / (np.sum(datascale, axis=0) + eps)) pred = pred / len(scales) pred = pred.argmax(0) pred[lbl == testdata.ignore_index] = testdata.ignore_index for m in range(n_classes): mask1 = lbl == m mask2 = pred == m diff = pred[mask1] - lbl[mask1] mask1_len[m] += float(np.sum(mask1)) mask2_len[m] += float(np.sum(mask2)) correct_len[m] += np.sum(diff == 0) cm += confusion_matrix(lbl.ravel(), pred.ravel(), labels=range(n_classes)) indexes_to_avg = mask1_len > 0 print("pixel accuracy") print( np.sum(correct_len[indexes_to_avg]) / np.sum(mask1_len[indexes_to_avg])) print("Class_wise_IOU") print(correct_len[indexes_to_avg] / (mask1_len[indexes_to_avg] + mask2_len[indexes_to_avg] - correct_len[indexes_to_avg])) print("mean IOU") print( np.mean(correct_len[indexes_to_avg] / (mask1_len[indexes_to_avg] + mask2_len[indexes_to_avg] - correct_len[indexes_to_avg]))) print("mean accuracy") print(np.mean(correct_len[indexes_to_avg] / mask1_len[indexes_to_avg])) decoded = testdata.decode_segmap(pred) pickle.dump( np.transpose(np.array(imgr, dtype=np.uint8), [2, 0, 1]), open("results/saved_test_images/" + str(f_no) + "_input.p", "wb")) pickle.dump( np.transpose(decoded, [2, 0, 1]), open("results/saved_test_images/" + str(f_no) + "_output.p", "wb")) pickle.dump( np.transpose(testdata.decode_segmap(lbl), [2, 0, 1]), open("results/saved_test_images/" + str(f_no) + "_target.p", "wb")) sio.savemat("results/cm.mat", {'cm': cm})
def test(args): data_loader = get_loader(args.dataset) data_path = get_data_path(args.dataset) testdata = data_loader(data_path, split=args.split, is_transform=False, img_size=(512, 512)) n_classes = testdata.n_classes eps = 1e-10 args.coco += 5 scales = [0.5, 0.75, 1.0, 1.25] base_size = min(testdata.img_size) crop_size = (args.img_rows, args.img_cols) stride = [0, 0] stride[0] = int(np.ceil(float(crop_size[0]) * 2/3)) stride[1] = int(np.ceil(float(crop_size[1]) * 2/3)) size_transform_img = [Scale(int(base_size*i)) for i in scales] # Setup Model model = torch.nn.DataParallel(get_model(args.arch, n_classes, ignore_index=testdata.ignore_index, output_stride=args.ost)) model_name = args.model_path.split('.') checkpoint_name = model_name[0] + '_optimizer.pkl' checkpoint = torch.load(checkpoint_name) model.load_state_dict(checkpoint['state_dict']) model.eval() soft = nn.Softmax2d() if torch.cuda.is_available(): model.cuda() soft.cuda() for f_no, line in enumerate(testdata.files): imgr = readfile(args.img_path, line) origw, origh = imgr.size # Maintain final prediction array for each image pred = np.zeros((n_classes, origh, origw), dtype=np.float32) # Loop over all scales for single image for i in range(len(scales)): img = size_transform_img[i](imgr) imsw, imsh = img.size imwstart, imhstart = 0, 0 imw, imh = imsw, imsh # Zero padding if any size if smaller than crop_size if imsw < crop_size[1] or imsh < crop_size[0]: padw, padh = max(crop_size[1] - imsw, 0), max(crop_size[0] - imsh, 0) imw += padw imh += padh im = Image.new(img.mode, (imw, imh), tuple(testdata.filler)) im.paste(img, (int(padw / 2), int(padh / 2))) imwstart += int(padw / 2) imhstart += int(padh / 2) img = im # Now tile image - each of crop_size and loop over them h_grid = int(np.ceil(float(imh - crop_size[0]) / stride[0])) + 1 w_grid = int(np.ceil(float(imw - crop_size[1]) / stride[1])) + 1 # maintain prediction probability for each pixel datascale = torch.zeros(n_classes, imh, imw).cuda() countscale = torch.zeros(n_classes, imh, imw).cuda() for w in range(w_grid): for h in range(h_grid): # crop portion from image - crop_size x1, y1 = w * stride[1], h * stride[0] x2, y2 = int(min(x1 + crop_size[1], imw)), int(min(y1 + crop_size[0], imh)) x1, y1 = x2 - crop_size[1], y2 - crop_size[0] img_cropped = img.crop((x1, y1, x2, y2)) # Input image as well its flipped version img1 = testdata.image_transform(img_cropped) img2 = testdata.image_transform(img_cropped.transpose(Image.FLIP_LEFT_RIGHT)) images = torch.stack((img1, img2), dim=0) if torch.cuda.is_available(): images = Variable(images.cuda(), volatile=True) else: images = Variable(images, volatile=True) # Output prediction for image and its flip version outputs = model(images) # Sum prediction from image and its flip and then normalize prob = outputs[0] + outputs[1][:, :, getattr(torch.arange(outputs.size(3)-1, -1, -1), 'cuda')().long()] prob = soft(prob.view(-1, *prob.size())) # Place the score in the proper position datascale[:, y1:y2, x1:x2] += prob.data countscale[:, y1:y2, x1:x2] += 1 # After looping over all tiles of image, normalize the scores and bilinear interpolation to orignal image size datascale /= (countscale + eps) datascale = datascale[:, imhstart:imhstart+imsh, imwstart:imwstart+imsw] datascale = datascale.cpu().numpy() datascale = np.transpose(datascale, (1, 2, 0)) datascale = resize(datascale, (origh, origw), order=1, preserve_range=True, mode='symmetric', clip=False) datascale = np.transpose(datascale, (2, 0, 1)) # Sum up all the scores for all scales pred += (datascale / (np.sum(datascale, axis=0) + eps)) pred = pred / len(scales) pred = pred.argmax(0).astype(np.uint32) im = Image.fromarray(pred) im.save(os.path.join(args.outpath, str(args.coco) + "_" + str(args.split) + "_cls/" + line + ".png"))