def get_train_dataflow(datadir, args): parallel = min(args.process_nums, multiprocessing.cpu_count() // 2) ds = ILSVRC12Files(datadir, shuffle=False) def mapf(dp): fname, cls = dp im_0 = ReadImageToCVMat(fname[0], args) im_1 = ReadImageToCVMat(fname[1], args) im_0 = occ_augImage(im_0) im_1 = occ_augImage(im_1) im_0 = transform(im_0, args) im_1 = transform(im_1, args) return np.array([im_0, im_1]), np.array([cls, cls]) ds = MultiThreadMapData(ds, parallel, mapf, buffer_size=min(2000, ds.size()), strict=True) # ds = MultiProcessRunnerZMQ(ds, parallel) # ds = BatchData(ds, batch_size, remainder=True) # do not fork() under MPI return ds
def get_test_dataflow(datadir, args): parallel = min(args.process_nums, multiprocessing.cpu_count()) ds = ILSVRC12Files(datadir, shuffle=False) def mapf(dp): fname, cls = dp im_0 = ReadImageToCVMat(fname, args) im_0 = transform(im_0, args) return np.array(im_0), np.array(cls) ds = MultiThreadMapData(ds, parallel, mapf, buffer_size=min(2000, ds.size()), strict=True) # ds = BatchData(ds, batch_size, remainder=True) # do not fork() under MPI return ds
def process_s2b_data(df, batch_size, num_threads): """ Perform preprocessing for the avatar synth data. :param df: An AvatarSynthDataFlow. :param batch_size: The minibatch size. :param num_threads: The number of threads to read and process data. :return: A dataflow with extra processing steps applied. """ augmentor = imgaug.AugmentorList([imgaug.MinMaxNormalize(min=-1, max=1)]) def get_imgs(dp): """ :param dp: A datapoint tuple, (path_to_face.jpg, path_to_bitmoji.jpg) """ face_img = augmentor.augment(imread(dp[0])) bitmoji_img = augmentor.augment(imread(dp[1])) if len(face_img.shape) == 2: face_img = np.stack([face_img] * 3, axis=-1) if len(bitmoji_img.shape) == 2: bitmoji_img = np.stack([bitmoji_img] * 3, axis=-1) return [face_img, bitmoji_img] df = MultiThreadMapData(df, nr_thread=num_threads, map_func=get_imgs, buffer_size=min(df.size(), 200)) df = PrefetchDataZMQ(df, nr_proc=num_threads) # TODO: switch back to remainder=True when s2b input batch size switched back to None df = BatchData(df, batch_size, remainder=False) # df = BatchData(df, batch_size, remainder=True) return df