Beispiel #1
0
    def __init__(self,
                 root,
                 output_size,
                 imset='2017/train.txt',
                 clip_n=3,
                 max_obj_n=11):
        self.root = root
        self.clip_n = clip_n
        self.output_size = output_size
        self.max_obj_n = max_obj_n

        dataset_path = os.path.join(root, 'ImageSets', imset)
        self.dataset_list = list()
        with open(os.path.join(dataset_path), 'r') as lines:
            for line in lines:
                dataset_name = line.strip()
                if len(dataset_name) > 0:
                    self.dataset_list.append(dataset_name)

        self.random_horizontal_flip = mytrans.RandomHorizontalFlip(0.3)
        self.color_jitter = TF.ColorJitter(0.1, 0.1, 0.1, 0.02)
        self.random_affine = mytrans.RandomAffine(degrees=15,
                                                  translate=(0.1, 0.1),
                                                  scale=(0.95, 1.05),
                                                  shear=10)
        self.random_resize_crop = mytrans.RandomResizedCrop(
            output_size, (0.8, 1), (0.95, 1.05))
        self.to_tensor = TF.ToTensor()
        self.to_onehot = mytrans.ToOnehot(max_obj_n, shuffle=True)
Beispiel #2
0
    def __init__(self,
                 root,
                 output_size,
                 dataset_file='meta.json',
                 clip_n=3,
                 max_obj_n=11):
        self.root = root
        self.clip_n = clip_n
        self.output_size = output_size
        self.max_obj_n = max_obj_n

        dataset_path = os.path.join(root, dataset_file)
        with open(dataset_path, 'r') as json_file:
            meta_data = json.load(json_file)

        self.dataset_list = list(meta_data['videos'])
        self.dataset_size = len(self.dataset_list)

        self.random_horizontal_flip = mytrans.RandomHorizontalFlip(0.3)
        self.color_jitter = TF.ColorJitter(0.1, 0.1, 0.1, 0.02)
        self.random_affine = mytrans.RandomAffine(degrees=15,
                                                  translate=(0.1, 0.1),
                                                  scale=(0.95, 1.05),
                                                  shear=10)
        self.random_resize_crop = mytrans.RandomResizedCrop(
            output_size, (0.3, 0.5), (0.95, 1.05))
        self.to_tensor = TF.ToTensor()
        self.to_onehot = mytrans.ToOnehot(max_obj_n, shuffle=True)
    def trans(is_training = True):

        transforms = []
        transforms.append(T.ToTensor())
        if is_training:
            transforms.append(T.RandomHorizontalFlip(0.5))

        return T.Compose(transforms)
Beispiel #4
0
def make_coco_transforms(image_set):

    normalize = T.Compose([T.ToTensor()])

    if image_set == 'train':
        return T.Compose([
            T.RandomHorizontalFlip(0.5),
            normalize,
        ])

    if image_set == 'val':
        return T.Compose([
            normalize,
        ])

    raise ValueError(f'unknown {image_set}')
Beispiel #5
0
    def __init__(self, root, output_size, dataset_file='./assets/pretrain.txt', clip_n=3, max_obj_n=11):
        self.root = root
        self.clip_n = clip_n
        self.output_size = output_size
        self.max_obj_n = max_obj_n

        self.img_list = list()
        self.mask_list = list()

        dataset_list = list()
        with open(os.path.join(dataset_file), 'r') as lines:
            for line in lines:
                dataset_name = line.strip()

                img_dir = os.path.join(root, 'JPEGImages', dataset_name)
                mask_dir = os.path.join(root, 'Annotations', dataset_name)

                img_list = sorted(glob(os.path.join(img_dir, '*.jpg'))) + sorted(glob(os.path.join(img_dir, '*.png')))
                mask_list = sorted(glob(os.path.join(mask_dir, '*.png')))

                if len(img_list) > 0:
                    if len(img_list) == len(mask_list):
                        dataset_list.append(dataset_name)
                        self.img_list += img_list
                        self.mask_list += mask_list
                        print(f'\t{dataset_name}: {len(img_list)} imgs.')
                    else:
                        print(f'\tPreTrain dataset {dataset_name} has {len(img_list)} imgs and {len(mask_list)} annots. Not match! Skip.')
                else:
                    print(f'\tPreTrain dataset {dataset_name} doesn\'t exist. Skip.')

        print(myutils.gct(), f'{len(self.img_list)} imgs are used for PreTrain. They are from {dataset_list}.')

        self.random_horizontal_flip = mytrans.RandomHorizontalFlip(0.3)
        self.color_jitter = TF.ColorJitter(0.1, 0.1, 0.1, 0.03)
        self.random_affine = mytrans.RandomAffine(degrees=20, translate=(0.1, 0.1), scale=(0.9, 1.1), shear=10)
        self.random_resize_crop = mytrans.RandomResizedCrop(output_size, (0.8, 1))
        self.to_tensor = TF.ToTensor()
        self.to_onehot = mytrans.ToOnehot(max_obj_n, shuffle=True)
Beispiel #6
0
def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)
# trainloader = torch.utils.data.DataLoader(CSDataSet(args.data_dir, './dataset/list/cityscapes/train.lst', max_iters=args.num_steps*args.batch_size, crop_size=(h, w),
#                 scale=args.random_scale, mirror=args.random_mirror, mean=IMG_MEAN),
#                 batch_size=args.batch_size, shuffle=True, num_workers=4, pin_memory=True)
# valloader = torch.utils.data.DataLoader(CSDataSet(args.data_dir, './dataset/list/cityscapes/val.lst', crop_size=(1024, 2048), mean=IMG_MEAN, scale=False, mirror=False),
#                                 batch_size=2, shuffle=False, pin_memory=True)

value_scale = 255
mean = [0.485, 0.456, 0.406]
mean = [item * 255 for item in mean]
std = [0.229, 0.224, 0.225]
std = [item * 255 for item in std]
train_transform = my_trans.Compose([
    # my_trans.Resize((args.height, args.width)),
    # my_trans.RandScale([0.5, 2.0]),
    # my_trans.RandomGaussianBlur(),
    my_trans.RandomHorizontalFlip(),
    # my_trans.Crop([args.height, args.width],crop_type='rand', padding=mean, ignore_label=255),
    my_trans.ToTensor(),  # without div 255
    my_trans.Normalize(mean=mean, std=std)
])
val_transform = my_trans.Compose([
    # my_trans.Resize((args.height, args.width)),
    my_trans.ToTensor(),  # without div 255
    my_trans.Normalize(mean=mean, std=std)
])

data_dir = '/data/zzg/CamVid/'
train_dataset = CamVid(data_dir,
                       mode='train',
                       p=None,
                       transform=train_transform)
Beispiel #8
0
                        default='cuda:0',
                        help='cpu or cuda:0 or cuda:1')

    args = parser.parse_args() if string is None else parser.parse_args(string)
    return args


if __name__ == '__main__':

    args = parse_args()

    wandb.init(config=args, project='dlcv_gan_face')

    transform = transforms.Compose([
        transforms.Resize(args.img_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.5] * 3, [0.5] * 3)
    ])
    train_dataset = Face_Dataset('../hw3_data/face/train', transform)
    valid_dataset = Face_Dataset('../hw3_data/face/test', transform)
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=args.batch,
                                  shuffle=True,
                                  num_workers=args.num_workers)
    valid_dataloader = DataLoader(valid_dataset,
                                  batch_size=args.batch,
                                  num_workers=args.num_workers)

    train(args, train_dataloader, valid_dataloader)