Ejemplo n.º 1
0
    def __getitem__(self, index):
        input = load_img(self.image_filenames[index])  #input是预先合成的4通道RGDB图片
        #数据增强
        if self.crop:
            input = RandomCrop(64)(input)  #取patch
            input = RandomHorizontalFlip()(input)  #水平翻转
            input = RandomVerticalFlip()(input)  #竖直翻转
            input = RandomRotation(180)(input)  #随机旋转
        input_tensor = ToTensor()(input)
        rgb_tensor = torch.zeros(3, input_tensor.shape[1],
                                 input_tensor.shape[2])
        depth_tensor = torch.zeros(1, input_tensor.shape[1],
                                   input_tensor.shape[2])
        rgb_tensor[0, :, :] = input_tensor[0, :, :]
        rgb_tensor[1, :, :] = input_tensor[1, :, :]
        rgb_tensor[2, :, :] = input_tensor[2, :, :]
        depth_tensor[0, :, :] = input_tensor[3, :, :]
        depth = ToPILImage()(depth_tensor)
        size = min(depth.size[0], depth.size[1])
        guide = ToPILImage()(rgb_tensor)
        target = depth.copy()

        guide = guide.convert('L')
        #生成LR
        depth = downsampling(depth, self.upscale_factor)
        depth = Resize(size=size, interpolation=Image.BICUBIC)(depth)

        depth = ToTensor()(depth)
        guide = ToTensor()(guide)
        depth = torch.cat((depth, guide), 0)  #concatenate 生成输入张量
        target = ToTensor()(target)

        return depth, target
Ejemplo n.º 2
0
def save_output(outputs, road_mask, images, filename, val_ct):
    #compatibility with criterion dataparallel
    if isinstance(outputs, list):  #merge gpu tensors
        outputs_cpu = outputs[0].cpu()
        for i in range(1, len(outputs)):
            outputs_cpu = torch.cat((outputs_cpu, outputs[i].cpu()), 0)
    else:
        outputs_cpu = outputs.cpu()

    outputs_gpu = outputs
    pred_img_gpu = outputs_gpu[0].max(0)[1].unsqueeze(0)
    roadMask_gpu = road_mask[0]
    pred_img_gpu[0][roadMask_gpu == 0] = 255
    print("pred_img_gpu [0] =", pred_img_gpu.flatten().unique())

    print("outputs_cpu = ", outputs_cpu.shape)
    print("pred_img_gpu = ", pred_img_gpu.shape)

    col_img_gpu = Colorize()(pred_img_gpu)
    print("col_img = ", col_img_gpu.flatten()[0:5])

    for i in range(0, outputs_cpu.size(0)):  #args.batch_size
        val_ct += 1
        pred_img = outputs_cpu[i].max(0)[1].data.unsqueeze(0)
        #print(type(pred_img), pred_img.shape)
        roadMask = road_mask[i].data.cpu()

        pred_img[0][roadMask == 0] = 255
        col_img = Colorize()(pred_img.byte())
        print("col_img = ", col_img.flatten()[0:5])

        predictionClr = ToPILImage()(col_img)
        #prediction = ToPILImage()(pred_img.byte())

        filenameSave = "./predicts/" + str(val_ct).zfill(3) + '.png'
        filename_break = str(filename[0]).split('/')
        filename_path = '/'.join(filename_break[-3:])
        filenameSave = "./predicts/" + str(filename_path)
        os.makedirs(os.path.dirname(filenameSave), exist_ok=True)

        ## SAve transparent color
        orig_img = Image.fromarray(tensor2im(images).astype(np.uint8))
        orig_file_save = filenameSave + 'orig.png'

        background = orig_img.convert("RGBA")
        overlay = predictionClr.convert("RGBA")
        new_img = Image.blend(background, overlay, 0.3)
        overlay_file_save = filenameSave + 'overlay.png'
        #predictionClr.save(filenameSave)
        orig_img.save(orig_file_save)
        new_img.save(overlay_file_save)
Ejemplo n.º 3
0
def save_one_output(pred_img_gpu, images, filename, val_ct):
    #print("pred_img_gpu sum [0] =", pred_img_gpu.flatten().sum())

    col_img = Colorize()(pred_img_gpu)
    predictionClr = ToPILImage()(col_img.cpu().byte())

    filenameSave = "./predicts/" + str(val_ct).zfill(3) + '.png'
    filename_break = str(filename[0]).split('/')
    filename_path = '/'.join(filename_break[-3:])
    filenameSave = "./predicts/" + str(filename_path)
    os.makedirs(os.path.dirname(filenameSave), exist_ok=True)

    ## SAve transparent color
    orig_img = Image.fromarray(tensor2im(images).astype(np.uint8))
    orig_file_save = filenameSave + 'orig.png'

    background = orig_img.convert("RGBA")
    overlay = predictionClr.convert("RGBA")
    new_img = Image.blend(background, overlay, 0.3)
    overlay_file_save = filenameSave + 'overlay.png'
    predictionClr.save(filenameSave)
    orig_img.save(orig_file_save)
    new_img.save(overlay_file_save)
Ejemplo n.º 4
0
def train_batch(b):
    """
    :param b: contains:
          :param imgs: the image, [batch_size, 3, IM_SIZE, IM_SIZE]
          :param all_anchors: [num_anchors, 4] the boxes of all anchors that we'll be using
          :param all_anchor_inds: [num_anchors, 2] array of the indices into the concatenated
                                  RPN feature vector that give us all_anchors,
                                  each one (img_ind, fpn_idx)
          :param im_sizes: a [batch_size, 4] numpy array of (h, w, scale, num_good_anchors) for each image.

          :param num_anchors_per_img: int, number of anchors in total over the feature pyramid per img

          Training parameters:
          :param train_anchor_inds: a [num_train, 5] array of indices for the anchors that will
                                    be used to compute the training loss (img_ind, fpn_idx)
          :param gt_boxes: [num_gt, 4] GT boxes over the batch.
          :param gt_classes: [num_gt, 2] gt boxes where each one is (img_id, class)

    :return:
    """
    '''
    return Result(
            od_obj_dists=od_obj_dists, # 1
            rm_obj_dists=obj_dists, # 2
            obj_scores=nms_scores, # 3
            obj_preds=nms_preds, # 4
            obj_fmap=obj_fmap, # 5 pick
            od_box_deltas=od_box_deltas, # 6
            rm_box_deltas=box_deltas, # 7
            od_box_targets=bbox_targets, # 8
            rm_box_targets=bbox_targets, # 9
            od_box_priors=od_box_priors, # 10
            rm_box_priors=box_priors, # 11 pick
            boxes_assigned=nms_boxes_assign, # 12
            boxes_all=nms_boxes, # 13
            od_obj_labels=obj_labels, # 14
            rm_obj_labels=rm_obj_labels, # 15
            rpn_scores=rpn_scores, # 16
            rpn_box_deltas=rpn_box_deltas, # 17
            rel_labels=rel_labels, # 18
            im_inds=im_inds, # 19 pick
            fmap=fmap if return_fmap else None, # 20
        )
    '''
    # b.imgs = F.upsample(b.imgs, size=592, mode='bilinear')
    # b.im_sizes[0, :, :2] = 592
    result = detector[b]
    print("imgs.shape", b.imgs.shape)
    print("im_sizes", b.im_sizes)
    print("boxes", result.rm_box_priors)
    print("im_inds", result.im_inds)
    print("rm_obj_dists.shape", result.rm_obj_dists.shape)

    # tform = [
    #     Normalize(mean=[0, 0, 0], std=[1 / 0.229, 1 / 0.224, 1 / 0.225]),
    #     Normalize(mean=[-0.485, -0.456, -0.406], std=[1, 1, 1]),
    #     ToPILImage()
    # ]
    for i in range(len(b.imgs)):
        # pil_img = transform_pipeline(b.imgs[i]).convert("RGB")
        img_tensor = b.imgs[i].data.cpu()
        print(img_tensor.shape, img_tensor.max(), img_tensor.min())
        img_tensor = Normalize(mean=[0, 0, 0],
                               std=[1 / 0.229, 1 / 0.224,
                                    1 / 0.225])(img_tensor)
        img_tensor = Normalize(mean=[-0.485, -0.456, -0.406],
                               std=[1, 1, 1])(img_tensor)
        pil_img = ToPILImage()(img_tensor)
        pil_img = pil_img.convert("RGB")
        draw = ImageDraw.Draw(pil_img)
        for j in range(len(result.rm_box_priors)):
            if result.im_inds.data[j] == i:
                # class_ind = int(result.rm_obj_dists.data[j].max(0)[1])
                class_ind = int(result.obj_preds[j])
                class_score = float(result.obj_scores[j])
                # if class_ind != 0:
                draw = draw_box(
                    draw, result.rm_box_priors.data[j], "%s[%.3f]" %
                    (train.ind_to_classes[class_ind], class_score))
        pil_img.save(
            "/newNAS/Workspaces/UCGroup/gslu/aws_ailab/code/neural-motifs/checkpoints/%d.png"
            % i)

    # scores = result.od_obj_dists
    # box_deltas = result.od_box_deltas
    # labels = result.od_obj_labels
    # roi_boxes = result.od_box_priors
    # bbox_targets = result.od_box_targets
    # rpn_scores = result.rpn_scores
    # rpn_box_deltas = result.rpn_box_deltas
    #
    # # detector loss
    # valid_inds = (labels.data != 0).nonzero().squeeze(1)
    # fg_cnt = valid_inds.size(0)
    # bg_cnt = labels.size(0) - fg_cnt
    # class_loss = F.cross_entropy(scores, labels)
    #
    # # No gather_nd in pytorch so instead convert first 2 dims of tensor to 1d
    # box_reg_mult = 2 * (1. / FG_FRACTION) * fg_cnt / (fg_cnt + bg_cnt + 1e-4)
    # twod_inds = valid_inds * box_deltas.size(1) + labels[valid_inds].data
    #
    # box_loss = bbox_loss(roi_boxes[valid_inds], box_deltas.view(-1, 4)[twod_inds],
    #                      bbox_targets[valid_inds]) * box_reg_mult
    #
    # loss = class_loss + box_loss
    #
    # # RPN loss
    # if not conf.use_proposals:
    #     train_anchor_labels = b.train_anchor_labels[:, -1]
    #     train_anchors = b.train_anchors[:, :4]
    #     train_anchor_targets = b.train_anchors[:, 4:]
    #
    #     train_valid_inds = (train_anchor_labels.data == 1).nonzero().squeeze(1)
    #     rpn_class_loss = F.cross_entropy(rpn_scores, train_anchor_labels)
    #
    #     # print("{} fg {} bg, ratio of {:.3f} vs {:.3f}. RPN {}fg {}bg ratio of {:.3f} vs {:.3f}".format(
    #     #     fg_cnt, bg_cnt, fg_cnt / (fg_cnt + bg_cnt + 1e-4), FG_FRACTION,
    #     #     train_valid_inds.size(0), train_anchor_labels.size(0)-train_valid_inds.size(0),
    #     #     train_valid_inds.size(0) / (train_anchor_labels.size(0) + 1e-4), RPN_FG_FRACTION), flush=True)
    #     rpn_box_mult = 2 * (1. / RPN_FG_FRACTION) * train_valid_inds.size(0) / (train_anchor_labels.size(0) + 1e-4)
    #     rpn_box_loss = bbox_loss(train_anchors[train_valid_inds],
    #                              rpn_box_deltas[train_valid_inds],
    #                              train_anchor_targets[train_valid_inds]) * rpn_box_mult
    #
    #     loss += rpn_class_loss + rpn_box_loss
    #     res = pd.Series([rpn_class_loss.data[0], rpn_box_loss.data[0],
    #                      class_loss.data[0], box_loss.data[0], loss.data[0]],
    #                     ['rpn_class_loss', 'rpn_box_loss', 'class_loss', 'box_loss', 'total'])
    # else:
    #     res = pd.Series([class_loss.data[0], box_loss.data[0], loss.data[0]],
    #                     ['class_loss', 'box_loss', 'total'])
    #
    # optimizer.zero_grad()
    # loss.backward()
    # clip_grad_norm(
    #     [(n, p) for n, p in detector.named_parameters() if p.grad is not None],
    #     max_norm=conf.clip, clip=True)
    # optimizer.step()

    return res
Ejemplo n.º 5
0
def rgb2ycbcr(rgb): # Tensor of [N, C, H, W]
    rgb = ToPILImage()(rgb.squeeze())
    y, cb, cr = rgb.convert('YCbCr').split()
    # y = Variable(ToTensor()(y).unsqueeze(0))
    return np.asarray(y)
Ejemplo n.º 6
0
def saveTensorToImage(tsr, path, mode='RGB'):
    img = ToPILImage(mode)(tsr)
    if img.mode != 'RGB':
        img = img.convert('RGB')
    img.save(path)