def __getitem__(self, index): input = load_img(self.image_filenames[index]) #input是预先合成的4通道RGDB图片 #数据增强 if self.crop: input = RandomCrop(64)(input) #取patch input = RandomHorizontalFlip()(input) #水平翻转 input = RandomVerticalFlip()(input) #竖直翻转 input = RandomRotation(180)(input) #随机旋转 input_tensor = ToTensor()(input) rgb_tensor = torch.zeros(3, input_tensor.shape[1], input_tensor.shape[2]) depth_tensor = torch.zeros(1, input_tensor.shape[1], input_tensor.shape[2]) rgb_tensor[0, :, :] = input_tensor[0, :, :] rgb_tensor[1, :, :] = input_tensor[1, :, :] rgb_tensor[2, :, :] = input_tensor[2, :, :] depth_tensor[0, :, :] = input_tensor[3, :, :] depth = ToPILImage()(depth_tensor) size = min(depth.size[0], depth.size[1]) guide = ToPILImage()(rgb_tensor) target = depth.copy() guide = guide.convert('L') #生成LR depth = downsampling(depth, self.upscale_factor) depth = Resize(size=size, interpolation=Image.BICUBIC)(depth) depth = ToTensor()(depth) guide = ToTensor()(guide) depth = torch.cat((depth, guide), 0) #concatenate 生成输入张量 target = ToTensor()(target) return depth, target
def save_output(outputs, road_mask, images, filename, val_ct): #compatibility with criterion dataparallel if isinstance(outputs, list): #merge gpu tensors outputs_cpu = outputs[0].cpu() for i in range(1, len(outputs)): outputs_cpu = torch.cat((outputs_cpu, outputs[i].cpu()), 0) else: outputs_cpu = outputs.cpu() outputs_gpu = outputs pred_img_gpu = outputs_gpu[0].max(0)[1].unsqueeze(0) roadMask_gpu = road_mask[0] pred_img_gpu[0][roadMask_gpu == 0] = 255 print("pred_img_gpu [0] =", pred_img_gpu.flatten().unique()) print("outputs_cpu = ", outputs_cpu.shape) print("pred_img_gpu = ", pred_img_gpu.shape) col_img_gpu = Colorize()(pred_img_gpu) print("col_img = ", col_img_gpu.flatten()[0:5]) for i in range(0, outputs_cpu.size(0)): #args.batch_size val_ct += 1 pred_img = outputs_cpu[i].max(0)[1].data.unsqueeze(0) #print(type(pred_img), pred_img.shape) roadMask = road_mask[i].data.cpu() pred_img[0][roadMask == 0] = 255 col_img = Colorize()(pred_img.byte()) print("col_img = ", col_img.flatten()[0:5]) predictionClr = ToPILImage()(col_img) #prediction = ToPILImage()(pred_img.byte()) filenameSave = "./predicts/" + str(val_ct).zfill(3) + '.png' filename_break = str(filename[0]).split('/') filename_path = '/'.join(filename_break[-3:]) filenameSave = "./predicts/" + str(filename_path) os.makedirs(os.path.dirname(filenameSave), exist_ok=True) ## SAve transparent color orig_img = Image.fromarray(tensor2im(images).astype(np.uint8)) orig_file_save = filenameSave + 'orig.png' background = orig_img.convert("RGBA") overlay = predictionClr.convert("RGBA") new_img = Image.blend(background, overlay, 0.3) overlay_file_save = filenameSave + 'overlay.png' #predictionClr.save(filenameSave) orig_img.save(orig_file_save) new_img.save(overlay_file_save)
def save_one_output(pred_img_gpu, images, filename, val_ct): #print("pred_img_gpu sum [0] =", pred_img_gpu.flatten().sum()) col_img = Colorize()(pred_img_gpu) predictionClr = ToPILImage()(col_img.cpu().byte()) filenameSave = "./predicts/" + str(val_ct).zfill(3) + '.png' filename_break = str(filename[0]).split('/') filename_path = '/'.join(filename_break[-3:]) filenameSave = "./predicts/" + str(filename_path) os.makedirs(os.path.dirname(filenameSave), exist_ok=True) ## SAve transparent color orig_img = Image.fromarray(tensor2im(images).astype(np.uint8)) orig_file_save = filenameSave + 'orig.png' background = orig_img.convert("RGBA") overlay = predictionClr.convert("RGBA") new_img = Image.blend(background, overlay, 0.3) overlay_file_save = filenameSave + 'overlay.png' predictionClr.save(filenameSave) orig_img.save(orig_file_save) new_img.save(overlay_file_save)
def train_batch(b): """ :param b: contains: :param imgs: the image, [batch_size, 3, IM_SIZE, IM_SIZE] :param all_anchors: [num_anchors, 4] the boxes of all anchors that we'll be using :param all_anchor_inds: [num_anchors, 2] array of the indices into the concatenated RPN feature vector that give us all_anchors, each one (img_ind, fpn_idx) :param im_sizes: a [batch_size, 4] numpy array of (h, w, scale, num_good_anchors) for each image. :param num_anchors_per_img: int, number of anchors in total over the feature pyramid per img Training parameters: :param train_anchor_inds: a [num_train, 5] array of indices for the anchors that will be used to compute the training loss (img_ind, fpn_idx) :param gt_boxes: [num_gt, 4] GT boxes over the batch. :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class) :return: """ ''' return Result( od_obj_dists=od_obj_dists, # 1 rm_obj_dists=obj_dists, # 2 obj_scores=nms_scores, # 3 obj_preds=nms_preds, # 4 obj_fmap=obj_fmap, # 5 pick od_box_deltas=od_box_deltas, # 6 rm_box_deltas=box_deltas, # 7 od_box_targets=bbox_targets, # 8 rm_box_targets=bbox_targets, # 9 od_box_priors=od_box_priors, # 10 rm_box_priors=box_priors, # 11 pick boxes_assigned=nms_boxes_assign, # 12 boxes_all=nms_boxes, # 13 od_obj_labels=obj_labels, # 14 rm_obj_labels=rm_obj_labels, # 15 rpn_scores=rpn_scores, # 16 rpn_box_deltas=rpn_box_deltas, # 17 rel_labels=rel_labels, # 18 im_inds=im_inds, # 19 pick fmap=fmap if return_fmap else None, # 20 ) ''' # b.imgs = F.upsample(b.imgs, size=592, mode='bilinear') # b.im_sizes[0, :, :2] = 592 result = detector[b] print("imgs.shape", b.imgs.shape) print("im_sizes", b.im_sizes) print("boxes", result.rm_box_priors) print("im_inds", result.im_inds) print("rm_obj_dists.shape", result.rm_obj_dists.shape) # tform = [ # Normalize(mean=[0, 0, 0], std=[1 / 0.229, 1 / 0.224, 1 / 0.225]), # Normalize(mean=[-0.485, -0.456, -0.406], std=[1, 1, 1]), # ToPILImage() # ] for i in range(len(b.imgs)): # pil_img = transform_pipeline(b.imgs[i]).convert("RGB") img_tensor = b.imgs[i].data.cpu() print(img_tensor.shape, img_tensor.max(), img_tensor.min()) img_tensor = Normalize(mean=[0, 0, 0], std=[1 / 0.229, 1 / 0.224, 1 / 0.225])(img_tensor) img_tensor = Normalize(mean=[-0.485, -0.456, -0.406], std=[1, 1, 1])(img_tensor) pil_img = ToPILImage()(img_tensor) pil_img = pil_img.convert("RGB") draw = ImageDraw.Draw(pil_img) for j in range(len(result.rm_box_priors)): if result.im_inds.data[j] == i: # class_ind = int(result.rm_obj_dists.data[j].max(0)[1]) class_ind = int(result.obj_preds[j]) class_score = float(result.obj_scores[j]) # if class_ind != 0: draw = draw_box( draw, result.rm_box_priors.data[j], "%s[%.3f]" % (train.ind_to_classes[class_ind], class_score)) pil_img.save( "/newNAS/Workspaces/UCGroup/gslu/aws_ailab/code/neural-motifs/checkpoints/%d.png" % i) # scores = result.od_obj_dists # box_deltas = result.od_box_deltas # labels = result.od_obj_labels # roi_boxes = result.od_box_priors # bbox_targets = result.od_box_targets # rpn_scores = result.rpn_scores # rpn_box_deltas = result.rpn_box_deltas # # # detector loss # valid_inds = (labels.data != 0).nonzero().squeeze(1) # fg_cnt = valid_inds.size(0) # bg_cnt = labels.size(0) - fg_cnt # class_loss = F.cross_entropy(scores, labels) # # # No gather_nd in pytorch so instead convert first 2 dims of tensor to 1d # box_reg_mult = 2 * (1. / FG_FRACTION) * fg_cnt / (fg_cnt + bg_cnt + 1e-4) # twod_inds = valid_inds * box_deltas.size(1) + labels[valid_inds].data # # box_loss = bbox_loss(roi_boxes[valid_inds], box_deltas.view(-1, 4)[twod_inds], # bbox_targets[valid_inds]) * box_reg_mult # # loss = class_loss + box_loss # # # RPN loss # if not conf.use_proposals: # train_anchor_labels = b.train_anchor_labels[:, -1] # train_anchors = b.train_anchors[:, :4] # train_anchor_targets = b.train_anchors[:, 4:] # # train_valid_inds = (train_anchor_labels.data == 1).nonzero().squeeze(1) # rpn_class_loss = F.cross_entropy(rpn_scores, train_anchor_labels) # # # print("{} fg {} bg, ratio of {:.3f} vs {:.3f}. RPN {}fg {}bg ratio of {:.3f} vs {:.3f}".format( # # fg_cnt, bg_cnt, fg_cnt / (fg_cnt + bg_cnt + 1e-4), FG_FRACTION, # # train_valid_inds.size(0), train_anchor_labels.size(0)-train_valid_inds.size(0), # # train_valid_inds.size(0) / (train_anchor_labels.size(0) + 1e-4), RPN_FG_FRACTION), flush=True) # rpn_box_mult = 2 * (1. / RPN_FG_FRACTION) * train_valid_inds.size(0) / (train_anchor_labels.size(0) + 1e-4) # rpn_box_loss = bbox_loss(train_anchors[train_valid_inds], # rpn_box_deltas[train_valid_inds], # train_anchor_targets[train_valid_inds]) * rpn_box_mult # # loss += rpn_class_loss + rpn_box_loss # res = pd.Series([rpn_class_loss.data[0], rpn_box_loss.data[0], # class_loss.data[0], box_loss.data[0], loss.data[0]], # ['rpn_class_loss', 'rpn_box_loss', 'class_loss', 'box_loss', 'total']) # else: # res = pd.Series([class_loss.data[0], box_loss.data[0], loss.data[0]], # ['class_loss', 'box_loss', 'total']) # # optimizer.zero_grad() # loss.backward() # clip_grad_norm( # [(n, p) for n, p in detector.named_parameters() if p.grad is not None], # max_norm=conf.clip, clip=True) # optimizer.step() return res
def rgb2ycbcr(rgb): # Tensor of [N, C, H, W] rgb = ToPILImage()(rgb.squeeze()) y, cb, cr = rgb.convert('YCbCr').split() # y = Variable(ToTensor()(y).unsqueeze(0)) return np.asarray(y)
def saveTensorToImage(tsr, path, mode='RGB'): img = ToPILImage(mode)(tsr) if img.mode != 'RGB': img = img.convert('RGB') img.save(path)