예제 #1
0
def get_voc_dataset(root):
    from dataset.voc_dataset import VOCDataset, custom_collate_fn
    dset_name = 'voc2012'

    transform_og = transforms.Compose([transforms.Resize((416, 416)),
                                       transforms.ToTensor()])
    transform_norm = transforms.Compose([transforms.Resize((416, 416)),
                                         transforms.ToTensor(),
                                         transforms.Normalize([.485, .456, .406], [.229, .224, .225])])
    transform_noise = transforms.Compose([transforms.Resize((416, 416)),
                                          transforms.ToTensor(),
                                          GaussianNoise(mean=0, std=.2)])
    transform_norm_noise = transforms.Compose([transforms.Resize((416, 416)),
                                               transforms.ToTensor(),
                                               transforms.Normalize([.485, .456, .406], [.229, .224, .225]),
                                               GaussianNoise(mean=0, std=.2)])
    transform_rotate = transforms.Compose([transforms.Resize((416, 416)),
                                            transforms.RandomRotation((-60, 60)),
                                            transforms.ToTensor(),
                                            transforms.Normalize([.485, .456, .406], [.229, .224, .225])])
    transform_vflip = transforms.Compose([transforms.Resize((416, 416)),
                                                   transforms.RandomVerticalFlip(1),
                                                   transforms.ToTensor(),
                                                   transforms.Normalize([.485, .456, .406], [.229, .224, .225])])
    transform_hflip = transforms.Compose([transforms.Resize((416, 416)),
                                                     transforms.RandomHorizontalFlip(1),
                                                     transforms.ToTensor(),
                                                     transforms.Normalize([.485, .456, .406], [.229, .224, .225])])

    dset_og = VOCDataset(root, img_size=(416, 416), transforms=transform_og, is_categorical=True)
    dset_norm = VOCDataset(root, img_size=(416, 416), transforms=transform_norm, is_categorical=True)
    dset_noise = VOCDataset(root, img_size=(416, 416), transforms=transform_noise, is_categorical=True)
    dset_norm_noise = VOCDataset(root, img_size=(416, 416), transforms=transform_norm_noise, is_categorical=True)
    # dset_rotate = VOCDataset(root, img_size=(416, 416), transforms=transform_rotate, is_categorical=True)
    # dset_vflip = VOCDataset(root, img_size=(416, 416), transforms=transform_vflip, is_categorical=True)
    # dset_hflip = VOCDataset(root, img_size=(416, 416), transforms=transform_hflip, is_categorical=True)

    num_classes = dset_og.num_classes

    n_data = len(dset_og)
    n_train_data = int(n_data * .7)
    indices = list(range(n_data))

    np.random.shuffle(indices)
    train_idx, val_idx = indices[:n_train_data], indices[n_train_data:]
    train_dset_og = Subset(dset_og, indices=train_idx)
    train_dset_norm = Subset(dset_norm, indices=train_idx)
    train_dset_noise = Subset(dset_noise, indices=train_idx)
    train_dset_norm_noise = Subset(dset_norm_noise, indices=train_idx)
    # train_dset_rotate = Subset(dset_rotate, indices=train_idx)
    # train_dset_vflip = Subset(dset_vflip, indices=train_idx)
    # train_dset_hflip = Subset(dset_hflip, indices=train_idx)

    # train_dset = ConcatDataset([dset_og, dset_norm, dset_noise, dset_norm_noise])
    train_dset = train_dset_og
    val_dset = Subset(dset_og, indices=val_idx)

    collate_fn = custom_collate_fn

    return dset_name, train_dset, val_dset, collate_fn
예제 #2
0
        feed_dict = {
            self.input_img: img,
            self.input_roi: roi,
            self.input_labels: labels
        }
        tests = session.run(self.bbox_logit, feed_dict=feed_dict)
        return tests


if __name__ == '__main__':
    import numpy as np

    from dataset.voc_dataset import VOCDataset, bbox_visualization, bbox_transform_inv, bbox_transform, bb_norm, bb_denorm
    from tool.tool import array_img_save

    dataset = VOCDataset()
    network = FastRCNN('FastRCNN', 21, 64)
    network.buind()
    network.complete()
    restorer_fc = tf.train.Saver(tf.trainable_variables(scope='vgg_16'))
    session = tf.get_default_session()
    restorer_fc.restore(session, "./VGG/vgg_16.ckpt")

    for step in range(100000):
        images, labels, rois = dataset.get_minbatch(1,
                                                    step % 3,
                                                    selectivesearch=True)
        labels = np.reshape(labels, (-1, 5))
        labels[:, 0:4] = bbox_transform_inv(rois[0], labels[:, 0:4])
        rois[0] = bb_norm(images[0].shape[1], images[0].shape[0], rois[0])
                                             transforms.ToTensor(),
                                             transforms.Normalize([.485, .456, .406], [.229, .224, .225])])
    transform_rotate = transforms.Compose([transforms.Resize((224, 224)),
                                           transforms.RandomRotation((-30, 30)),
                                           transforms.ToTensor(),
                                           transforms.Normalize([.485, .456, .406], [.229, .224, .225])])
    transform_vertical_flip = transforms.Compose([transforms.Resize((224, 224)),
                                                  transforms.RandomVerticalFlip(1),
                                                  transforms.ToTensor(),
                                                  transforms.Normalize([.485, .456, .406], [.229, .224, .225])])
    transform_horizontal_flip = transforms.Compose([transforms.Resize((224, 224)),
                                                    transforms.RandomHorizontalFlip(1),
                                                    transforms.ToTensor(),
                                                    transforms.Normalize([.485, .456, .406], [.229, .224, .225])])

    dset_original = VOCDataset(root, img_size=img_size, transforms=transform_original, is_categorical=True)
    dset_rotate = VOCDataset(root, img_size=img_size, transforms=transform_rotate, is_categorical=True)
    dset_vertical_flip = VOCDataset(root, img_size=img_size, transforms=transform_vertical_flip, is_categorical=True)
    dset_horizontal_flip = VOCDataset(root, img_size=img_size, transforms=transform_horizontal_flip, is_categorical=True)

    n_val_data = int(len(dset_original) * .3)
    train_val_ratio = [len(dset_original) - n_val_data, n_val_data]

    dset_original, dset_val = random_split(dset_original, train_val_ratio)
    dset_rotate, _ = random_split(dset_rotate, train_val_ratio)
    dset_vertical_flip, _ = random_split(dset_vertical_flip, train_val_ratio)
    dset_horizontal_flip, _ = random_split(dset_horizontal_flip, train_val_ratio)

    dset_train = ConcatDataset([dset_original, dset_rotate, dset_vertical_flip, dset_horizontal_flip])

    print('Train: {} Validation: {}'.format(len(dset_train), len(dset_val)))
예제 #4
0
    ])
    horizontal_flip_transforms = transforms.Compose([
        transforms.RandomCrop((224, 224), pad_if_needed=True),
        transforms.RandomHorizontalFlip(1),
        transforms.ToTensor(),
        transforms.Normalize([.485, .456, .406], [.229, .224, .225])
    ])
    vertical_flip_transforms = transforms.Compose([
        transforms.RandomCrop((224, 224), pad_if_needed=True),
        transforms.RandomVerticalFlip(1),
        transforms.ToTensor(),
        transforms.Normalize([.485, .456, .406], [.229, .224, .225])
    ])

    original_dset = VOCDataset(root,
                               img_size=(224, 224),
                               transforms=original_transforms,
                               is_categorical=True)
    rotate_dset = VOCDataset(root,
                             img_size=(224, 224),
                             transforms=rotate_transforms,
                             is_categorical=True)
    horizontal_flip_dset = VOCDataset(root,
                                      img_size=(224, 224),
                                      transforms=horizontal_flip_transforms,
                                      is_categorical=True)
    vertical_flip_dset = VOCDataset(root,
                                    img_size=(224, 224),
                                    transforms=vertical_flip_transforms,
                                    is_categorical=True)

    n_class = original_dset.num_classes
예제 #5
0
    # root_val = os.path.join(root, 'images', 'val')
    # ann_train = os.path.join(root, 'annotations', 'instances_train2017.json')
    # ann_val = os.path.join(root, 'annotations', 'instances_val2017.json')
    #
    # dset_train = COCODataset(root_train, ann_train, transforms.Compose([transforms.ToTensor()]))
    # dset_val = COCODataset(root_val, ann_val, transforms.Compose([transforms.ToTensor()]))

    #################### VOC Dataset ####################
    root = 'C://DeepLearningData/VOC2012/'
    transforms = transforms.Compose(
        [transforms.Resize((224, 224)),
         transforms.ToTensor()])

    dset_train = VOCDataset(root,
                            img_size=(224, 224),
                            is_validation=False,
                            transforms=transforms,
                            is_categorical=False)
    dset_val = VOCDataset(root,
                          img_size=(224, 224),
                          is_validation=True,
                          transforms=transforms,
                          is_categorical=False)

    train_data_loader = DataLoader(dset_train,
                                   batch_size,
                                   shuffle=True,
                                   collate_fn=collate_fn)
    val_data_loader = DataLoader(dset_val,
                                 batch_size,
                                 shuffle=True,
예제 #6
0
    # ann_train = os.path.join(root, 'annotations', 'instances_train2017.json')
    # ann_val = os.path.join(root, 'annotations', 'instances_val20z17.json')
    #
    # dset_train = COCODataset(root_train, ann_train, transforms.Compose([transforms.ToTensor()]))
    # dset_val = COCODataset(root_val, ann_val, transforms.Compose([transforms.ToTensor()]))

    #################### VOC Dataset ####################
    root = 'C://DeepLearningData/VOC2012/'
    transforms = transforms.Compose(
        [transforms.Resize((224, 224)),
         transforms.ToTensor()])

    # dset_train = VOCDataset(root, is_validation=False, transforms=transforms, is_categorical=True)
    # dset_val = VOCDataset(root, is_validation=True, transforms=transforms, is_categorical=True)
    dset = VOCDataset(root,
                      img_size,
                      transforms=transforms,
                      is_categorical=True)

    n_data = len(dset)
    n_train_data = int(n_data * .7)
    n_val_data = n_data - n_train_data

    dset_train, dset_val = random_split(dset, [n_train_data, n_val_data])

    train_data_loader = DataLoader(dset_train,
                                   batch_size,
                                   shuffle=True,
                                   collate_fn=collate_fn)
    val_data_loader = DataLoader(dset_val,
                                 batch_size,
                                 shuffle=True,