コード例 #1
0
    def __getitem__(self, idx):
        """
        """
        fname = self.fnames[idx]
        img = Image.open(os.path.join(self.root, fname))
        if img.mode != 'RGB':
            img = img.convert('RGB')

        boxes = self.boxes[idx].clone()
        # converting ratios into numbers
        w, h = img.size
        boxes = boxes * torch.Tensor([w, h, w, h])
        labels = self.labels[idx].clone()
        size = self.input_size

        #Data Augmentation
        if self.train:
            img, boxes = random_flip(img, boxes)
            img, boxes = random_crop(img, boxes)
            img, boxes = resize(img, boxes, (size, size))
        else:
            img, boxes = resize(img, boxes, (size, size))
            #img, boxes = center_crop(img, boxes, (size,size))

        img = self.transform(img)
        return img, boxes, labels
コード例 #2
0
ファイル: eval.py プロジェクト: silkylove/ObjectDetection
def transform(img, boxes, labels):
    img, boxes = resize(img, boxes, size=(img_size, img_size))
    img = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])(img)
    return img, boxes, labels
コード例 #3
0
def train_transform(img, boxes, labels):
    img, boxes = random_paste(img, boxes, max_ratio=4, fill=(123, 116, 103))
    img, boxes, labels = random_crop(img, boxes, labels)
    img, boxes = resize(img, boxes, size=600, random_interpolation=True)
    img, boxes = random_flip(img, boxes)
    print(img.size)
    img = transforms.ToTensor()(img)
    print(boxes)
    return img, boxes, labels
コード例 #4
0
            def transform_test(img, boxes, labels):
                img, boxes = resize(img,
                                    boxes,
                                    size=(self.img_size, self.img_size))

                img = transforms.Compose([
                    transforms.ToTensor(),
                    transforms.Normalize((0.485, 0.456, 0.406),
                                         (0.229, 0.224, 0.225))
                ])(img)
                boxes, labels = self.box_coder.encode(boxes, labels)
                return img, boxes, labels
コード例 #5
0
def transform_image_w_bbox(img, boxes, labels, img_size=224):

    assert torch.is_tensor(boxes), 'type(boxes) : {}'.format(type(boxes))

    img = random_distort(img)
    # This slows down learning too much...
    # if random.random() < 0.5:
    #     img, boxes = random_paste(img, boxes, max_ratio=4, fill=(123, 116, 103))
    img, boxes, labels = random_crop(img, boxes, labels)
    img, boxes = resize(img,
                        boxes,
                        size=(img_size, img_size),
                        random_interpolation=True)
    img, boxes = random_flip(img, boxes)
    img = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])(img)
    return img, boxes, labels
コード例 #6
0
 def transform_train(img, boxes, labels):
     img = random_distort(img)
     if random.random() < 0.5:
         img, boxes = random_paste(img,
                                   boxes,
                                   max_ratio=4,
                                   fill=(123, 116, 103))
     img, boxes, labels = random_crop(img, boxes, labels)
     img, boxes = resize(img,
                         boxes,
                         size=(self.img_size, self.img_size),
                         random_interpolation=False)
     img, boxes = random_flip(img, boxes)
     img = transforms.Compose([
         transforms.ToTensor(),
         transforms.Normalize((0.485, 0.456, 0.406),
                              (0.229, 0.224, 0.225))
     ])(img)
     boxes, labels = self.box_coder.encode(boxes, labels)
     return img, boxes, labels
コード例 #7
0
ファイル: infer.py プロジェクト: silkylove/ObjectDetection
def main(args):
    print('Loading fpnssd model...')
    net = FPNSSD512(num_classes=21).cuda(3)
    box_coder = SSDBboxCoder(net)
    net = nn.DataParallel(net, [3])
    net.load_state_dict(torch.load('./checkpoint/ckpt.pt')['net'])
    # net.load_state_dict(torch.load('./fpnssd512_20_trained.pth'))
    # net.load_state_dict(torch.load(args[1])['net'])
    net.eval()

    start = time.time()
    print('Loading image...')
    # img = Image.open('/home/yhuangcc/data/voc(07+12)/JPEGImages/000002.jpg')
    # img=Image.open('/home/yhuangcc/data/coco/images/val2017/000000000285.jpg')
    img = Image.open(args[1])
    w, h = img.size
    img = img.resize((512, 512))

    print('Predicting...')
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
    ])
    x = transform(img).cuda(3)
    loc_preds, cls_preds = net(x.unsqueeze(0))

    print('Decoding...')
    loc_preds = loc_preds.squeeze().cpu()
    cls_preds = F.softmax(cls_preds.squeeze().cpu(), dim=1)
    boxes, labels, scores = box_coder.decode(loc_preds, cls_preds)
    print(f'Detection ends... Consuming time {time.time()-start:.4f}s')

    label_names = np.loadtxt(
        '/home/yhuangcc/ObjectDetection/datasets/voc/labels.txt',
        np.object).tolist()
    # label_names = np.loadtxt(args[3], np.object).tolist()
    img, boxes = resize(img, boxes.cpu(), (w, h))
    vis_image_bbox(img, boxes, [label_names[label] for label in labels],
                   scores)
    plt.close()
コード例 #8
0
def main(
    fast=False,
    batch_size=None,
    **kwargs,
):

    # CONFIG
    batch_size = batch_size or (4 if fast else 32)
    energy_loss = get_energy_loss(config="consistency_two_path",
                                  mode="standard",
                                  **kwargs)

    # LOGGING
    logger = VisdomLogger("train", env=JOB)

    # DATA LOADING
    video_dataset = ImageDataset(
        files=sorted(
            glob.glob(f"mount/taskonomy_house_tour/original/image*.png"),
            key=lambda x: int(os.path.basename(x)[5:-4])),
        return_tuple=True,
        resize=720,
    )
    video = RealityTask("video",
                        video_dataset, [
                            tasks.rgb,
                        ],
                        batch_size=batch_size,
                        shuffle=False)

    # GRAPHS
    graph_baseline = TaskGraph(tasks=energy_loss.tasks + [video],
                               finetuned=False)
    graph_baseline.compile(torch.optim.Adam,
                           lr=3e-5,
                           weight_decay=2e-6,
                           amsgrad=True)

    graph_finetuned = TaskGraph(tasks=energy_loss.tasks + [video],
                                finetuned=True)
    graph_finetuned.compile(torch.optim.Adam,
                            lr=3e-5,
                            weight_decay=2e-6,
                            amsgrad=True)

    graph_conservative = TaskGraph(tasks=energy_loss.tasks + [video],
                                   finetuned=True)
    graph_conservative.compile(torch.optim.Adam,
                               lr=3e-5,
                               weight_decay=2e-6,
                               amsgrad=True)
    graph_conservative.load_weights(
        f"{MODELS_DIR}/conservative/conservative.pth")

    graph_ood_conservative = TaskGraph(tasks=energy_loss.tasks + [video],
                                       finetuned=True)
    graph_ood_conservative.compile(torch.optim.Adam,
                                   lr=3e-5,
                                   weight_decay=2e-6,
                                   amsgrad=True)
    graph_ood_conservative.load_weights(
        f"{SHARED_DIR}/results_2F_grounded_1percent_gt_twopath_512_256_crop_7/graph_grounded_1percent_gt_twopath.pth"
    )

    graphs = {
        "baseline": graph_baseline,
        "finetuned": graph_finetuned,
        "conservative": graph_conservative,
        "ood_conservative": graph_ood_conservative,
    }

    inv_transform = transforms.ToPILImage()
    data = {key: {"losses": [], "zooms": []} for key in graphs}
    size = 256
    for batch in range(0, 700):

        if batch * batch_size > len(video_dataset.files): break

        frac = (batch * batch_size * 1.0) / len(video_dataset.files)
        if frac < 0.3:
            size = int(256.0 - 128 * frac / 0.3)
        elif frac < 0.5:
            size = int(128.0 + 128 * (frac - 0.3) / 0.2)
        else:
            size = int(256.0 + (720 - 256) * (frac - 0.5) / 0.5)
        print(size)
        # video.reload()
        size = (size // 32) * 32
        print(size)
        video.step()
        video.task_data[tasks.rgb] = resize(
            video.task_data[tasks.rgb].to(DEVICE), size).data
        print(video.task_data[tasks.rgb].shape)

        with torch.no_grad():

            for i, img in enumerate(video.task_data[tasks.rgb]):
                inv_transform(img.clamp(min=0, max=1.0).data.cpu()).save(
                    f"mount/taskonomy_house_tour/distorted/image{batch*batch_size + i}.png"
                )

            for name, graph in graphs.items():
                normals = graph.sample_path([tasks.rgb, tasks.normal],
                                            reality=video)
                normals2 = graph.sample_path(
                    [tasks.rgb, tasks.principal_curvature, tasks.normal],
                    reality=video)

                for i, img in enumerate(normals):
                    energy, _ = tasks.normal.norm(normals[i:(i + 1)],
                                                  normals2[i:(i + 1)])
                    data[name]["losses"] += [energy.data.cpu().numpy().mean()]
                    data[name]["zooms"] += [size]
                    inv_transform(img.clamp(min=0, max=1.0).data.cpu()).save(
                        f"mount/taskonomy_house_tour/normals_{name}/image{batch*batch_size + i}.png"
                    )

                for i, img in enumerate(normals2):
                    inv_transform(img.clamp(min=0, max=1.0).data.cpu()).save(
                        f"mount/taskonomy_house_tour/path2_{name}/image{batch*batch_size + i}.png"
                    )

    pickle.dump(data, open(f"mount/taskonomy_house_tour/data.pkl", 'wb'))
    os.system("bash ~/scaling/scripts/create_vids.sh")
コード例 #9
0
 def forward(self, x):
     return resize(x, val=dest_task.resize)
コード例 #10
0
# img = Image.open(args[2])
w, h = img.size
img = img.resize((512, 512))

print('Predicting...')
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])
x = transform(img).cuda(3)
loc_preds, cls_preds = net(x.unsqueeze(0))

print('Decoding...')
box_coder = SSDBboxCoder(net)
box_coder.anchor_boxes = box_coder.anchor_boxes.cuda(3)
loc_preds = loc_preds.squeeze()
cls_preds = F.softmax(cls_preds.squeeze(), dim=1)
boxes, labels, scores = box_coder.decode(loc_preds, cls_preds)
print(f'Detection ends... Consuming time {time.time()-start:.4f}s')

label_names = np.loadtxt(
    '/home/yhuangcc/ObjectDetection/datasets/voc/labels.txt',
    np.object).tolist()
# label_names = np.loadtxt(args[3], np.object).tolist()
img, boxes = resize(img, boxes.cpu(), (w, h))
vis_image_bbox(img, boxes, [label_names[label] for label in labels], scores)
plt.close()

# if __name__ == '__main__':
#     main(sys.argv)
コード例 #11
0
            ct = c.copy()
            dt = d.copy()
            ce = e.copy()
            ct, dt, ce = enchancement_transform(ct, dt, ce)
            #if len(dt.squeeze()) != 0:
            if dt.numel() != 0:
                dt[:, [0, 2]] *= ct.shape[1]
                dt[:, [1, 3]] *= ct.shape[0]
            show_detection(ct, dt)

    if True:
        print('test resize')
        for i in range(1):
            ct = c.copy()
            dt = d.copy()
            ct = resize(ct)
            show_detection(ct, dt)

    if True:
        print('test enchancement_transform(many image)')
        num_test = 10
        for c, d, e in dataset:
            ct = c.copy()
            dt = d.copy()
            ct, dt, ce = enchancement_transform(ct, dt, ce)

            print(dt)
            #if len(dt.squeeze()) != 0:
            if dt.numel() != 0:
                dt[:, [0, 2]] *= ct.shape[1]
                dt[:, [1, 3]] *= ct.shape[0]