Ejemplo n.º 1
0
    def __init__(self, root=None, batch_size=100,
                 train_shuffle=True, val_shuffle=False, test_shuffle=False,
                 train_aug=True, val_aug=False, test_aug=False,
                 size=32, color=True, val_ratio=0.5, aug_list=None, domain_id=None, sample_ratio=1.0, resize_pad=False, double_aug=False):

        assert(sample_ratio == 1.0)
        assert(resize_pad == False)
        
        ## default tforms
        tforms_dft = [
            Map2Tuple(),
            Resize([size, size]) if size!=32 else Identity(),
            Identity() if color else RGB2Gray(),
            Float(),
            Normalize(0.5, 0.5),
            #PerImageStd(),
        ]

        ## data augmentation ops
        if aug_list is not None:
            tforms_aug = [DuplicateX() if double_aug else Identity()]
            tforms_aug += [aug_op for aug_op in compose_augment_seq(aug_list, is_training=True)]
        else:
            tforms_aug = []
            
        ## domain label
        tforms_dom = [DomainLabel(domain_id)] if domain_id is not None else []

        ## tforms
        tforms_train = tforms_dft + tforms_aug if train_aug else tforms_dft
        tforms_val = tforms_dft + tforms_aug if val_aug else tforms_dft
        tforms_test = tforms_dft + tforms_aug if test_aug else tforms_dft
        if domain_id is not None:
            tforms_train_dom = tforms_train + tforms_dom
            tforms_val_dom = tforms_val + tforms_dom
            tforms_test_dom = tforms_test + tforms_dom

        ## load data
        tf.random.set_seed(0)
        ds_train = tfds.load('svhn_cropped', split='train', shuffle_files=True)
        ds_val = tfds.load('svhn_cropped', split='test[:%d'%(round(val_ratio*100.))+'%]', shuffle_files=True)
        ds_test = tfds.load('svhn_cropped', split='test[%d'%(round(val_ratio*100.))+'%:]', shuffle_files=True)

        ## construct data loaders
        self.train = self._init_loader(ds_train, batch_size*10, train_shuffle, batch_size, tforms_train)
        self.val = self._init_loader(ds_val, batch_size*10, val_shuffle, batch_size, tforms_val)
        self.test = self._init_loader(ds_test, batch_size*10, test_shuffle, batch_size, tforms_test)
        if domain_id is not None:
            self.train_dom = self._init_loader(ds_train, batch_size*10, train_shuffle, batch_size, tforms_train_dom)
            self.val_dom = self._init_loader(ds_val, batch_size*10, val_shuffle, batch_size, tforms_val_dom)
            self.test_dom = self._init_loader(ds_test, batch_size*10, test_shuffle, batch_size, tforms_test_dom)
Ejemplo n.º 2
0
    def __init__(self,
                 root=None,
                 batch_size=100,
                 train_shuffle=True,
                 val_shuffle=False,
                 test_shuffle=False,
                 train_aug=True,
                 val_aug=False,
                 test_aug=False,
                 size=32,
                 color=True,
                 val_ratio=0.5,
                 aug_list=None,
                 domain_id=None,
                 sample_ratio=1.0,
                 resize_pad=False,
                 double_aug=False):

        assert (size >= 28)

        ## default tforms
        tforms_dft = [
            Mat2RGB() if color else Mat2Gray(),
            #Pad([size-28, size-28]) if size>28 else Identity(),
            #Resize([size, size]) if size!=28 else Identity(),
            Identity() if size == 28 else
            Pad([size - 28, size -
                 28]) if resize_pad else Resize([size, size]),
            Cast(tf.float32, tf.int32),
            Scaling(1.0 / 255.0),
            Normalize(0.5, 0.5),
            #PerImageStd(),
        ]

        ## data augmentation ops
        if aug_list is not None:
            tforms_aug = [DuplicateX() if double_aug else Identity()]
            tforms_aug += [
                aug_op
                for aug_op in compose_augment_seq(aug_list, is_training=True)
            ]
        else:
            tforms_aug = []

        ## domain label
        tforms_dom = [DomainLabel(domain_id)] if domain_id is not None else []

        ## tforms
        tforms_train = tforms_dft + tforms_aug if train_aug else tforms_dft
        tforms_val = tforms_dft + tforms_aug if val_aug else tforms_dft
        tforms_test = tforms_dft + tforms_aug if test_aug else tforms_dft
        if domain_id is not None:
            tforms_train_dom = tforms_train + tforms_dom
            tforms_val_dom = tforms_val + tforms_dom
            tforms_test_dom = tforms_test + tforms_dom

        ##TODO: tfds?
        ## load data
        (x_train, y_train), (x_test,
                             y_test) = tf.keras.datasets.mnist.load_data()
        (x_val, y_val), (x_test,
                         y_test) = self._split_data(x_test, y_test, val_ratio,
                                                    True)

        ## reduce train/val samples (shuffle before sampling)
        x_train, y_train = shuffle_labeled_examples(x_train, y_train)
        x_train, y_train = x_train[:round(x_train.shape[0] * sample_ratio
                                          )], y_train[:round(y_train.shape[0] *
                                                             sample_ratio)]
        x_val, y_val = shuffle_labeled_examples(x_val, y_val)
        x_val, y_val = x_val[:round(x_val.shape[0] * sample_ratio
                                    )], y_val[:round(y_val.shape[0] *
                                                     sample_ratio)]

        ## init meta data
        self.n_train = y_train.shape[0]
        self.n_val = y_val.shape[0]
        self.n_test = y_test.shape[0]

        ## construct data loaders
        self.train = self._init_loader(
            tf.data.Dataset.from_tensor_slices((x_train, y_train)),
            self.n_train, train_shuffle, batch_size, tforms_train)
        self.val = self._init_loader(
            tf.data.Dataset.from_tensor_slices((x_val, y_val)), self.n_val,
            val_shuffle, batch_size, tforms_val)
        self.test = self._init_loader(
            tf.data.Dataset.from_tensor_slices((x_test, y_test)), self.n_test,
            test_shuffle, batch_size, tforms_test)
        if domain_id is not None:
            self.train_dom = self._init_loader(
                tf.data.Dataset.from_tensor_slices((x_train, y_train)),
                self.n_train, train_shuffle, batch_size, tforms_train_dom)
            self.val_dom = self._init_loader(
                tf.data.Dataset.from_tensor_slices((x_val, y_val)), self.n_val,
                val_shuffle, batch_size, tforms_val_dom)
            self.test_dom = self._init_loader(
                tf.data.Dataset.from_tensor_slices((x_test, y_test)),
                self.n_test, test_shuffle, batch_size, tforms_test_dom)
Ejemplo n.º 3
0
    def __init__(self,
                 root,
                 batch_size=100,
                 train_shuffle=True,
                 val_shuffle=False,
                 test_shuffle=False,
                 train_aug=True,
                 val_aug=False,
                 test_aug=False,
                 size=32,
                 val_ratio=0.5,
                 aug_list=None,
                 domain_id=None):

        ## default tforms
        tforms_dft = [
            File2Tensor(3),
            Resize([size, size]),
            Cast(tf.float32, tf.int32),
            Scaling(1.0 / 255.0),
            Normalize(0.5, 0.5),
        ]

        ## data augmentation ops
        tforms_aug = [
            aug_op
            for aug_op in compose_augment_seq(aug_list, is_training=True)
        ] if aug_list is not None else []

        ## domain label
        tforms_dom = [DomainLabel(domain_id)] if domain_id is not None else []

        ## tforms
        tforms_train = tforms_dft + tforms_aug if train_aug else tforms_dft
        tforms_val = tforms_dft + tforms_aug if val_aug else tforms_dft
        tforms_test = tforms_dft + tforms_aug if test_aug else tforms_dft
        if domain_id is not None:
            tforms_train_dom = tforms_train + tforms_dom
            tforms_val_dom = tforms_val + tforms_dom
            tforms_test_dom = tforms_test + tforms_dom

        ## load data
        np.random.seed(0)
        tr_fns = glob.glob(os.path.join(root, 'imgs_train', '*/*.jpg'))
        te_fns = glob.glob(os.path.join(root, 'imgs_valid', '*/*.jpg'))
        np.random.shuffle(tr_fns)
        np.random.shuffle(te_fns)
        val_fns = te_fns[:round(len(te_fns) * val_ratio)]
        te_fns = te_fns[round(len(te_fns) * val_ratio):]

        ds_train = tf.data.Dataset.from_tensor_slices(tr_fns)
        ds_val = tf.data.Dataset.from_tensor_slices(val_fns)
        ds_test = tf.data.Dataset.from_tensor_slices(te_fns)

        ## construct data loaders
        self.train = self._init_loader(ds_train, batch_size * 10,
                                       train_shuffle, batch_size, tforms_train)
        self.val = self._init_loader(ds_val, batch_size * 10, val_shuffle,
                                     batch_size, tforms_val)
        self.test = self._init_loader(ds_test, batch_size * 10, test_shuffle,
                                      batch_size, tforms_test)
        if domain_id is not None:
            self.train_dom = self._init_loader(ds_train, batch_size * 10,
                                               train_shuffle, batch_size,
                                               tforms_train_dom)
            self.val_dom = self._init_loader(ds_val, batch_size * 10,
                                             val_shuffle, batch_size,
                                             tforms_val_dom)
            self.test_dom = self._init_loader(ds_test, batch_size * 10,
                                              test_shuffle, batch_size,
                                              tforms_test_dom)
Ejemplo n.º 4
0
    def __init__(self,
                 root,
                 batch_size=100,
                 train_shuffle=True,
                 val_shuffle=False,
                 test_shuffle=False,
                 train_aug=True,
                 val_aug=False,
                 test_aug=False,
                 size=32,
                 color=True,
                 val_ratio=0.5,
                 aug_list=None,
                 domain_id=None,
                 sample_ratio=1.0,
                 resize_pad=False,
                 double_aug=False):

        assert (size >= 28)
        assert (resize_pad == False)

        ## default tforms
        tforms_dft = [
            Mat2RGB() if color else Mat2Gray(),
            #Pad([size-28, size-28]) if size>28 else Identity(),
            Resize([size, size]) if size != 16 else Identity(),
            Normalize(0.5, 0.5),
            Cast(None, tf.int32)
        ]

        ## data augmentation ops
        if aug_list is not None:
            tforms_aug = [DuplicateX() if double_aug else Identity()]
            tforms_aug += [
                aug_op
                for aug_op in compose_augment_seq(aug_list, is_training=True)
            ]
        else:
            tforms_aug = []

        ## domain label
        tforms_dom = [DomainLabel(domain_id)] if domain_id is not None else []

        ## tforms
        tforms_train = tforms_dft + tforms_aug if train_aug else tforms_dft
        tforms_val = tforms_dft + tforms_aug if val_aug else tforms_dft
        tforms_test = tforms_dft + tforms_aug if test_aug else tforms_dft
        if domain_id is not None:
            tforms_train_dom = tforms_train + tforms_dom
            tforms_val_dom = tforms_val + tforms_dom
            tforms_test_dom = tforms_test + tforms_dom

        ## load data
        with h5py.File(os.path.join(root, 'usps.h5'), 'r') as hf:
            train = hf.get('train')
            x_train, y_train = np.reshape(train.get('data')[:],
                                          (-1, 16, 16)), train.get('target')[:]
            test = hf.get('test')
            x_test, y_test = np.reshape(test.get('data')[:],
                                        (-1, 16, 16)), test.get('target')[:]
        (x_val, y_val), (x_test,
                         y_test) = self._split_data(x_test, y_test, val_ratio,
                                                    True)

        ## reduce train/val samples (shuffle before sampling)
        x_train, y_train = shuffle_labeled_examples(x_train, y_train)
        x_train, y_train = x_train[:round(x_train.shape[0] * sample_ratio
                                          )], y_train[:round(y_train.shape[0] *
                                                             sample_ratio)]
        x_val, y_val = shuffle_labeled_examples(x_val, y_val)
        x_val, y_val = x_val[:round(x_val.shape[0] * sample_ratio
                                    )], y_val[:round(y_val.shape[0] *
                                                     sample_ratio)]

        ## init meta data
        self.n_train = y_train.shape[0]
        self.n_val = y_val.shape[0]
        self.n_test = y_test.shape[0]

        ## construct data loaders
        self.train = self._init_loader(
            tf.data.Dataset.from_tensor_slices((x_train, y_train)),
            self.n_train, train_shuffle, batch_size, tforms_train)
        self.val = self._init_loader(
            tf.data.Dataset.from_tensor_slices((x_val, y_val)), self.n_val,
            val_shuffle, batch_size, tforms_val)
        self.test = self._init_loader(
            tf.data.Dataset.from_tensor_slices((x_test, y_test)), self.n_test,
            test_shuffle, batch_size, tforms_test)
        if domain_id is not None:
            self.train_dom = self._init_loader(
                tf.data.Dataset.from_tensor_slices((x_train, y_train)),
                self.n_train, train_shuffle, batch_size, tforms_train_dom)
            self.val_dom = self._init_loader(
                tf.data.Dataset.from_tensor_slices((x_val, y_val)), self.n_val,
                val_shuffle, batch_size, tforms_val_dom)
            self.test_dom = self._init_loader(
                tf.data.Dataset.from_tensor_slices((x_test, y_test)),
                self.n_test, test_shuffle, batch_size, tforms_test_dom)
Ejemplo n.º 5
0
    def __init__(self,
                 root,
                 batch_size=100,
                 train_shuffle=True,
                 val_shuffle=False,
                 test_shuffle=False,
                 train_aug=True,
                 val_aug=False,
                 test_aug=False,
                 color=True,
                 size=32,
                 aug_list=None,
                 domain_id=None):
        assert (size == 32)
        ## default tforms
        tforms_dft = [
            Cast(tf.float32, tf.int32),
            Scaling(1.0 / 255.0),
            Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ]

        ## default augment
        tforms_size_aug_rnd = [
            #RandomCrop((28, 28, 3)),
            RandomHorizontalFlip(),
        ]
        tforms_size_aug_no_rnd = [
            #CenterCrop(28, 32),
        ]

        ## data augmentation ops
        tforms_aug = [
            aug_op
            for aug_op in compose_augment_seq(aug_list, is_training=True)
        ] if aug_list is not None else []

        ## domain label
        tforms_dom = [DomainLabel(domain_id)] if domain_id is not None else []

        ## tforms
        tforms_train = tforms_dft + tforms_aug if train_aug else tforms_dft
        tforms_train += tforms_size_aug_rnd if train_shuffle else tforms_size_aug_no_rnd
        tforms_val = tforms_dft + tforms_aug if val_aug else tforms_dft
        tforms_val += tforms_size_aug_rnd if val_shuffle else tforms_size_aug_no_rnd
        tforms_test = tforms_dft + tforms_aug if test_aug else tforms_dft
        tforms_test += tforms_size_aug_rnd if test_shuffle else tforms_size_aug_no_rnd
        if domain_id is not None:
            tforms_train_dom = tforms_train + tforms_dom
            tforms_val_dom = tforms_val + tforms_dom
            tforms_test_dom = tforms_test + tforms_dom

        ## construct data loaders
        self.train = DataFolderLoader(os.path.join(root, 'train'), batch_size,
                                      train_shuffle, tforms_train)
        self.val = DataFolderLoader(os.path.join(root, 'val'), batch_size,
                                    val_shuffle, tforms_val)
        self.test = DataFolderLoader(os.path.join(root, 'test'), batch_size,
                                     test_shuffle, tforms_test)
        if domain_id is not None:
            self.train_dom = DataFolderLoader(os.path.join(root, 'train'),
                                              batch_size, train_shuffle,
                                              tforms_train_dom)
            self.val_dom = DataFolderLoader(os.path.join(root,
                                                         'val'), batch_size,
                                            val_shuffle, tforms_val_dom)
            self.test_dom = DataFolderLoader(os.path.join(root,
                                                          'test'), batch_size,
                                             test_shuffle, tforms_test_dom)