def load_class_samples(self, d):
        if d['class'] not in self.data_cache:
            if d['class'] == '_silence_':
                samples = torch.zeros(self.silence_num_samples, 1,
                                      self.desired_samples)
                sample_ds = TransformDataset(
                    ListDataset(samples),
                    compose([
                        partial(convert_dict, 'data'),
                        partial(self.mix_background, True, 'data'),
                        partial(self.extract_features, 'data')
                    ]))

            else:
                samples = []

                if d['class'] == '_unknown_':
                    unknown_dir = os.path.join(self.data_dir, '..',
                                               '_unknown_')
                    split = os.path.basename(self.class_file)
                    unknown_wavs = os.path.join(unknown_dir, split)
                    with open(unknown_wavs, 'r') as rf:
                        samples = [
                            os.path.join(unknown_dir, wav_file.strip('\n'))
                            for wav_file in rf.readlines()
                        ]
                else:
                    keyword_dir = os.path.join(self.data_dir, d['class'])
                    samples = glob.glob(os.path.join(keyword_dir, '*.wav'))

                if len(samples) == 0:
                    raise Exception(
                        "No Samples found for GoogleSpeechCommand {} at {}".
                        format(d['class'], keyword_dir))

                sample_ds = TransformDataset(
                    ListDataset(samples),
                    compose([
                        partial(convert_dict, 'file_name'),
                        partial(self.load_audio, 'file_name', 'data'),
                        partial(self.adjust_volume, 'data'),
                        partial(self.shift_and_pad, 'data'),
                        partial(self.mix_background, self.use_background,
                                'data'),
                        partial(self.extract_features, 'data')
                    ]))

            loader = torch.utils.data.DataLoader(sample_ds,
                                                 batch_size=len(sample_ds),
                                                 shuffle=False)

            for sample in loader:
                self.data_cache[d['class']] = sample['data']
                break  # only need one sample because batch size equal to dataset length

        return {'class': d['class'], 'data': self.data_cache[d['class']]}
Exemple #2
0
def load_class_images(d):
    if d['class'] not in OMNIGLOT_CACHE:
        alphabet, character, rot = d['class'].split('/')
        image_dir = os.path.join(OMNIGLOT_DATA_DIR, 'data', alphabet,
                                 character)
        #print(">>>" + image_dir)
        image_ds = TransformDataset(
            ListDataset(sorted(glob.glob(os.path.join(image_dir, '*.png')))),
            compose([
                partial(convert_dict, 'file_name'),
                partial(load_image_path, 'file_name', 'data'),
                partial(rotate_image, 'data', float(rot[3:])),
                partial(scale_image, 'data', 28, 28),
                partial(convert_tensor, 'data')
            ]))
        #print(len(image_ds))

        loader = torch.utils.data.DataLoader(image_ds,
                                             batch_size=len(image_ds),
                                             shuffle=False)
        for sample in loader:
            #print(sample)
            OMNIGLOT_CACHE[d['class']] = sample['data']
            break  # only need one sample because batch size equal to dataset length

    return {'class': d['class'], 'data': OMNIGLOT_CACHE[d['class']]}
Exemple #3
0
def load_class_images(d):
    if d['class'] not in OMNIGLOT_CACHE:
		  # 获取路径
        alphabet, character, rot = d['class'].split('/')
        image_dir = os.path.join(OMNIGLOT_DATA_DIR, 'data', alphabet, character)
        # 获取指定路径下的所有图片
        class_images = sorted(glob.glob(os.path.join(image_dir, '*.png')))
        if len(class_images) == 0:
            raise Exception("No images found for omniglot class {} at {}. Did you run download_omniglot.sh first?".format(d['class'], image_dir))

         # ListDataset从图片列表中加载数据
        # 数据处理,包括旋转、创建字典、规范图片大小,转化为tensor
        image_ds = TransformDataset(ListDataset(class_images),
                                    compose([partial(convert_dict, 'file_name'),
                                             partial(load_image_path, 'file_name', 'data'),
                                             partial(rotate_image, 'data', float(rot[3:])),
                                             partial(scale_image, 'data', 28, 28),
                                             partial(convert_tensor, 'data')]))

			# 所有数据放到一个batch内
        loader = torch.utils.data.DataLoader(image_ds, batch_size=len(image_ds), shuffle=False)
        
        # 取一个数据
        for sample in loader:
            # 将图片数据写入Omniglot_cache中
            OMNIGLOT_CACHE[d['class']] = sample['data']
            break # only need one sample because batch size equal to dataset length
    
    # 返回类及类中的一个数据组成的字典
    return { 'class': d['class'], 'data': OMNIGLOT_CACHE[d['class']] }
Exemple #4
0
def load_class_images(d):
    if d['class'] not in CIFAR100_CACHE:
        image_dir = os.path.join(CIFAR100_DATA_DIR, 'data', d['class'])

        class_images = sorted(glob.glob(os.path.join(image_dir, '*.jpg')))
        if len(class_images) == 0:
            raise Exception(
                "No images found for CIFAR100 class {} at {}.".format(
                    d['class'], image_dir))

        image_ds = TransformDataset(
            ListDataset(class_images),
            compose([
                partial(convert_dict, 'file_name'),
                partial(load_image_path, 'file_name', 'data'),
                partial(scale_image, 'data', 32, 32),
                partial(convert_tensor, 'data')
                # partial(normalize_image, 'data', {'mean': (0.50400572, 0.48892908, 0.44281732),
                #                                   'std': (0.26477088, 0.25454896, 0.27408391)})
            ]))

        loader = torch.utils.data.DataLoader(image_ds,
                                             batch_size=len(image_ds),
                                             shuffle=False)

        for sample in loader:
            CIFAR100_CACHE[d['class']] = sample['data']
            break  # only need one sample because batch size equal to dataset length

    return {'class': d['class'], 'data': CIFAR100_CACHE[d['class']]}
Exemple #5
0
def load_class_images(d):
    if d['class'] not in MINIIMAGENET_CACHE:
        image, classname = d['class'].split(',')
        image_dir = os.path.join(MINIIMAGENET_DATA_DIR, 'images')
        # Get all images with same class
        class_images = sorted(
            glob.glob(os.path.join(image_dir, '{}*'.format(classname))))
        if len(class_images) == 0:
            raise Exception(
                "No images found for omniglot class {} at {}. Did you run download_omniglot.sh first?"
                .format(d['class'], image_dir))

        image_ds = TransformDataset(
            ListDataset(class_images),
            compose([
                partial(convert_dict, 'file_name'),
                partial(load_image_path, 'file_name', 'data'),
                #partial(rotate_image, 'data', float(rot[3:])),
                partial(scale_image, 'data', 84, 84),
                partial(convert_tensor, 'data')
            ]))

        loader = torch.utils.data.DataLoader(image_ds,
                                             batch_size=len(image_ds),
                                             shuffle=False)

        for sample in loader:
            print(sample['data'].shape)
            MINIIMAGENET_CACHE[d['class']] = sample['data']
            break  # only need one sample because batch size equal to dataset length

    return {'class': d['class'], 'data': MINIIMAGENET_CACHE[d['class']]}
 def __init__(self, data_dir, class_file, n_support, n_query, cuda, args):
     self.sample_rate = args['sample_rate']
     self.clip_duration_ms = args['clip_duration']
     self.window_size_ms = args['window_size']
     self.window_stride_ms = args['window_stride']
     self.feature_bin_count = args['num_features']
     self.foreground_volume = args['foreground_volume']
     self.time_shift_ms = args['time_shift']
     self.use_background = args['include_background']
     self.background_volume = args['bg_volume']
     self.background_frequency = args['bg_frequency']
     self.desired_samples = int(self.sample_rate * self.clip_duration_ms /
                                1000)
     self.silence = args['include_silence']
     self.silence_num_samples = args['num_silence']
     self.unknown = args['include_unknown']
     self.data_cache = {}
     self.data_dir = data_dir
     self.class_file = class_file
     self.n_support = n_support
     self.n_query = n_query
     self.background_data = self.load_background_data()
     self.mfcc = self.build_mfcc_extractor()
     self.transforms = [
         partial(convert_dict, 'class'), self.load_class_samples,
         self.extract_episode
     ]
     if cuda:
         self.transforms.append(CudaTransform())
     self.class_names = self.read()
     transforms = compose(self.transforms)
     super().__init__(ListDataset(self.class_names), transforms)
def load_class_images(d):
    if d['class'] not in OMNIGLOT_CACHE:
        alphabet, character, rot = d['class'].split('/')
        image_dir = os.path.join(OMNIGLOT_DATA_DIR, 'data', alphabet,
                                 character)

        class_images = sorted(glob.glob(os.path.join(image_dir, '*.png')))
        if len(class_images) == 0:
            raise Exception(
                "No images found for omniglot class {} at {}. Did you run download_omniglot.sh first?"
                .format(d['class'], image_dir))

        image_ds = TransformDataset(
            ListDataset(class_images),
            compose([
                partial(convert_dict, 'file_name'),
                partial(load_image_path, 'file_name', 'data'),
                partial(rotate_image, 'data', float(rot[3:])),
                partial(scale_image, 'data', 28, 28),
                partial(convert_tensor, 'data')
            ]))

        loader = torch.utils.data.DataLoader(image_ds,
                                             batch_size=len(image_ds),
                                             shuffle=False)

        for sample in loader:
            OMNIGLOT_CACHE[d['class']] = sample['data']
            break  # only need one sample because batch size equal to dataset length

    return {'class': d['class'], 'data': OMNIGLOT_CACHE[d['class']]}
Exemple #8
0
def load_class_images(d):
    if d['class'] not in IMAGENET_CACHE:
        image_dir = os.path.join(IMAGENET_DATA_DIR, 'data', d['class'])

        class_images = sorted(glob.glob(os.path.join(image_dir, '*.jpg')))
        if len(class_images) == 0:
            raise Exception(
                "No images found for miniImagenet class {} at {}.".format(
                    d['class'], image_dir))

        image_ds = TransformDataset(
            ListDataset(class_images),
            compose([
                partial(convert_dict, 'file_name'),
                partial(load_image_path, 'file_name', 'data'),
                partial(scale_image, 'data', 84, 84),
                partial(convert_tensor, 'data')
            ]))

        loader = torch.utils.data.DataLoader(image_ds,
                                             batch_size=len(image_ds),
                                             shuffle=False)

        for sample in loader:
            IMAGENET_CACHE[d['class']] = sample['data']
            break  # only need one sample because batch size equal to dataset length

    return {'class': d['class'], 'data': IMAGENET_CACHE[d['class']]}
Exemple #9
0
def load(opt, splits):
    split_dir = os.path.join(MINI_IMGNET_DATA_DIR, 'splits', opt['data.split'])

    ret = {}
    for split in splits:
        if split in ['val', 'test'] and opt['data.test_way'] != 0:
            n_way = opt['data.test_way']
        else:
            n_way = opt['data.way']

        if split in ['val', 'test'] and opt['data.test_shot'] != 0:
            n_support = opt['data.test_shot']
        else:
            n_support = opt['data.shot']

        if split in ['val', 'test'] and opt['data.test_query'] != 0:
            n_query = opt['data.test_query']
        else:
            n_query = opt['data.query']

        if split in ['val', 'test']:
            n_episodes = opt['data.test_episodes']
        else:
            n_episodes = opt['data.train_episodes']

        cache_path = get_cache_path(split)
        if os.path.exists(cache_path):
            with open(cache_path, "rb") as f:
                try:
                    data = pkl.load(f, encoding='bytes')
                    img_data = data[b'image_data']
                    class_dict = data[b'class_dict']
                except:
                    data = pkl.load(f)
                    img_data = data['image_data']
                    class_dict = data['class_dict']

        transforms = [
            partial(convert_dict, 'class'),
            partial(load_class_images, img_data, class_dict),
            partial(extract_episode, n_support, n_query)
        ]
        if opt['data.cuda']:
            transforms.append(CudaTransform())
        class_names = [key for key in class_dict]
        transforms = compose(transforms)
        ds = TransformDataset(ListDataset(class_names), transforms)

        if opt['data.sequential']:
            sampler = SequentialBatchSampler(len(ds))
        else:
            sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes)

        # use num_workers=0, otherwise may receive duplicate episodes
        ret[split] = torch.utils.data.DataLoader(ds,
                                                 batch_sampler=sampler,
                                                 num_workers=0)

    return ret
def load(opt, splits):
    split_dir = os.path.join(MINIIMAGENET_DATA_DIR, 'splits',
                             opt['data.split'])

    ret = {}
    for split in splits:
        if split in ['val', 'test'] and opt['data.test_way'] != 0:
            n_way = opt['data.test_way']
        else:
            n_way = opt['data.way']

        if split in ['val', 'test'] and opt['data.test_shot'] != 0:
            n_support = opt['data.test_shot']
        else:
            n_support = opt['data.shot']

        if split in ['val', 'test'] and opt['data.test_query'] != 0:
            n_query = opt['data.test_query']
        else:
            n_query = opt['data.query']

        if split in ['val', 'test']:
            n_episodes = opt['data.test_episodes']
        else:
            n_episodes = opt['data.train_episodes']

        class_index = defaultdict(list)
        with open(os.path.join(split_dir, "{:s}.csv".format(split)), 'r') as f:
            f.readline()
            for image_class in f.readlines():
                image, class_name = image_class.split(',')
                class_name = class_name.rstrip('\n')
                class_index[class_name].append(image)
        class_names = list(class_index.keys())

        transforms = [
            partial(convert_dict, 'class'),
            partial(load_class_images, class_index),
            partial(extract_episode, n_support, n_query)
        ]
        if opt['data.cuda']:
            transforms.append(CudaTransform())

        transforms = compose(transforms)

        ds = TransformDataset(ListDataset(class_names), transforms)

        if opt['data.sequential']:
            sampler = SequentialBatchSampler(len(ds))
        else:
            sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes)

        # use num_workers=0, otherwise may receive duplicate episodes
        ret[split] = torch.utils.data.DataLoader(ds,
                                                 batch_sampler=sampler,
                                                 num_workers=0)

    return ret
Exemple #11
0
def load(opt, splits):
    split_dir = os.path.join(OMNIGLOT_DATA_DIR, 'splits', opt['data.split'])

    ret = { }
    for split in splits:
	      # 获取n_way
        if split in ['val', 'test'] and opt['data.test_way'] != 0:
            n_way = opt['data.test_way']
        else:
            n_way = opt['data.way']
        # 获取support的数量
        if split in ['val', 'test'] and opt['data.test_shot'] != 0:
            n_support = opt['data.test_shot']
        else:
            n_support = opt['data.shot']
        # 获取query的数量
        if split in ['val', 'test'] and opt['data.test_query'] != 0:
            n_query = opt['data.test_query']
        else:
            n_query = opt['data.query']
        # 获取episode
        if split in ['val', 'test']:
            n_episodes = opt['data.test_episodes']
        else:
            n_episodes = opt['data.train_episodes']
        # 定义了三个函数:class字典,加载类的一张图片,取一个episode的数据
        transforms = [partial(convert_dict, 'class'), # 取key是class的字典内容
                      load_class_images, # 取一个类中的一条数据
                      partial(extract_episode, n_support, n_query)] # 获取每个类的support和query

        if opt['data.cuda']:
            transforms.append(CudaTransform())

        transforms = compose(transforms)

        class_names = []
        # 按照分割数据集的方式,获取相应的所有类名
        with open(os.path.join(split_dir, "{:s}.txt".format(split)), 'r') as f:
            for class_name in f.readlines():
                class_names.append(class_name.rstrip('\n'))
        
        # 对所有类划分support和query数据集
        ds = TransformDataset(ListDataset(class_names), transforms)
        
        
        if opt['data.sequential']:
            sampler = SequentialBatchSampler(len(ds))
        # 每个episode随机取n_way个类别
        else:
            sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes)
        
        # 封装数据,数据划分为多个episode
        # use num_workers=0, otherwise may receive duplicate episodes
        ret[split] = torch.utils.data.DataLoader(ds, batch_sampler=sampler, num_workers=0)

    return ret
Exemple #12
0
    def testCompose(self):
        def f1(x):
            return x + 1

        def f2(x):
            return x + 2

        def f3(x):
            return x / 2

        self.assertEqual(transform.compose([f1, f2, f3])(1), 2)
Exemple #13
0
def load(opt, splits):
    split_dir = os.path.join(OMNIGLOT_DATA_DIR, 'splits', opt['data.split'])

    ret = {}
    for split in splits:
        if split in ['val', 'test'] and opt['data.test_way'] != 0:
            n_way = opt['data.test_way']
        else:
            n_way = opt['data.way']

        if split in ['val', 'test'] and opt['data.test_shot'] != 0:
            n_support = opt['data.test_shot']
        else:
            n_support = opt['data.shot']

        if split in ['val', 'test'] and opt['data.test_query'] != 0:
            n_query = opt['data.test_query']
        else:
            n_query = opt['data.query']

        if split in ['val', 'test']:
            n_episodes = opt['data.test_episodes']
        else:
            n_episodes = opt['data.train_episodes']

        transforms = [
            partial(convert_dict, 'class'), load_class_images,
            partial(extract_episode, n_support, n_query)
        ]
        if opt['data.cuda']:
            transforms.append(CudaTransform())

        transforms = compose(transforms)

        class_names = []
        with open(os.path.join(split_dir, "{:s}.txt".format(split)), 'r') as f:
            for class_name in f.readlines():
                class_names.append(class_name.rstrip('\n'))

        ds = TransformDataset(ListDataset(class_names), transforms)

        if opt['data.sequential']:
            sampler = SequentialBatchSampler(len(ds))
        else:
            sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes)

        # use num_workers=0, otherwise may receive duplicate episodes
        ret[split] = torch.utils.data.DataLoader(ds,
                                                 batch_sampler=sampler,
                                                 num_workers=0)
    print("Ret:", type(ret))
    for key, value in ret.items():
        print(key, type(value))
    return ret
Exemple #14
0
def load(opt, splits):
    ret = {}
    for split in splits:
        if split in ['val', 'test'] and opt['data.test_way'] != 0:
            n_way = opt['data.test_way']
        else:
            n_way = opt['data.way']

        if split in ['val', 'test'] and opt['data.test_shot'] != 0:
            n_support = opt['data.test_shot']
        else:
            n_support = opt['data.shot']

        if split in ['val', 'test'] and opt['data.test_query'] != 0:
            n_query = opt['data.test_query']
        else:
            n_query = opt['data.query']

        if split in ['val', 'test']:
            n_episodes = opt['data.test_episodes']
        else:
            n_episodes = opt['data.train_episodes']

        speaker_ids = dataset[split]['class']
        data_split = dataset[split]['data']

        transforms = [
            partial(convert_dict, 'class'),
            partial(extract_episode, 'class', data_split, opt['data.min_len'],
                    opt['data.max_len'], n_support, n_query),
            partial(convert_tensor,
                    ['xq_padded', 'xs_padded', 'xq_len', 'xs_len'])
        ]
        if opt['data.cuda']:
            transforms.append(CudaTransform())

        transforms = compose(transforms)

        ds = TransformDataset(ListDataset(speaker_ids), transforms)

        #sampler = SequencialEpisodicBatchSampler(len(ds), n_way)
        if opt['data.sequential']:
            sampler = SequentialBatchSampler(len(ds))
        else:
            sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes)

        ret[split] = torch.utils.data.DataLoader(ds,
                                                 batch_sampler=sampler,
                                                 num_workers=0)

    return ret
Exemple #15
0
    def __iter__(self):
        if self.dataset is None:
            self.dataset = self.load_dataset(from_disk=True)[self.split]
            transforms = [partial(batch_from_index, self.dataset['data']), partial(convert_tensor, 'data')]
            if self.if_cuda:
                transforms.append(CudaTransform())
            self.transforms = compose(transforms)
        index_batches = self.shuffle_dataset()
        batches = TransformDataset(ListDataset(index_batches), self.transforms)

        print(f"\nSize of batches: {len(batches)}")
        for batch in batches:
            batch['n_way'] = self.n_way
            batch['n_support'] = self.n_support
            batch['n_query'] = self.n_query
            yield batch
def loader(opt):

    split_dir = os.path.join(opt.split_dir, opt.split_name)
    if opt.state == 'train':
        splits = opt.train_split_mode
    else:
        splits = ['test']

    ret = { }
    for split in splits:
        if split in ['val', 'test']:
            n_way = opt.test_way
            n_support = opt.test_shot
            n_query = opt.test_query
            n_episodes = opt.test_episodes
        else:
            n_way = opt.train_way
            n_support = opt.train_shot
            n_query = opt.train_query
            n_episodes = opt.train_episodes

        transforms = [partial(convert_dict, 'class'),
                      partial(load_class_images, opt.dataset_dir),
                      partial(extract_episode, n_support, n_query)]

        if opt.cuda:
            transforms.append(CudaTransform())

        transforms = compose(transforms)
        class_names = []

        with open(os.path.join(split_dir, "{:s}.txt".format(split)), 'r') as f:
            for class_name in f.readlines():
                class_names.append(class_name.rstrip('\n'))
        ds = TransformDataset(ListDataset(class_names), transforms)

        if opt.sequential:
            sampler = SequentialBatchSampler(len(ds))
        else:
            sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes)

        # use num_workers=0, otherwise may receive duplicate episodes
        ret[split] = torch.utils.data.DataLoader(ds, batch_sampler=sampler, num_workers=0)

    return ret
Exemple #17
0
def load_class_images(d):
    label, rot = d['class'], -1

    if 'rot' in d['class']:
        label, rot = d['class'].split('/rot')
        rot = int(rot)

    if label not in MINIIMAGENET_CACHE:
        image_dir = os.path.join(MINIIMAGENET_DATA_DIR, 'data', label)

        class_images = sorted(glob.glob(os.path.join(image_dir, '*.jpg')))
        if len(class_images) == 0:
            raise Exception(
                "No images found for miniimagenet class {} at {}.".format(
                    label, image_dir))

        image_ds = TransformDataset(
            ListDataset(class_images),
            compose([
                partial(convert_dict, 'file_name'),
                partial(load_image_path, 'file_name', 'data'),
                partial(scale_image, 'data', 84, 84),
                partial(convert_tensor, 'data')
                # partial(normalize_image, 'data', {'mean': (0.47234195 0.45386744 0.41036746),
                #                                   'std': (0.28678342 0.27806091 0.29304931)})
            ]))

        loader = torch.utils.data.DataLoader(image_ds,
                                             batch_size=len(image_ds),
                                             shuffle=False)

        for sample in loader:
            MINIIMAGENET_CACHE[label] = sample['data']
            break  # only need one sample because batch size equal to dataset length

    samples = MINIIMAGENET_CACHE[label]

    # Rotates images if needed
    if rot != -1:
        nRot = rot // 90
        samples = torch.rot90(samples.cuda(), nRot, dims=[2, 3]).cpu()

    return {'class': d['class'], 'data': samples}
Exemple #18
0
def load_class_images(dataset, index_set, d):
    if d['class'] not in MINI_IMGNET_CACHE:
        image_ds = TransformDataset(
            ListDataset(index_set[d['class']]),
            compose([
                partial(convert_dict, 'img_idx'),
                partial(load_image, dataset, 'img_idx', 'data'),
                #partial(rotate_image, 'data', float(rot[3:])),
                partial(scale_image, 'data', 84, 84),
                partial(convert_tensor, 'data')
            ]))

        loader = torch.utils.data.DataLoader(image_ds,
                                             batch_size=len(image_ds),
                                             shuffle=False)

        for sample in loader:
            MINI_IMGNET_CACHE[d['class']] = sample['data']
            break  # only need one sample because batch size equal to dataset length

    return {'class': d['class'], 'data': MINI_IMGNET_CACHE[d['class']]}
Exemple #19
0
def _setup_class_omniglot(split, d, cache, init_entry, crop_transforms,
                          target_size, root_dir, augm_opt):
    alphabet, character, rot = d['class'].split('/')
    image_dir = os.path.join(root_dir, 'omniglot', 'data', alphabet, character)

    if augm_opt['rotation']:
        rotation_f = partial(utils.rotate_image, 'data', float(rot[3:]))
    else:
        rotation_f = partial(utils.nop)
        print(
            'WARNING - rotation augmentation is the default protocol for Omniglot'
        )

    if augm_opt['crop']:
        crop_f = partial(utils.crop, 'data', crop_transforms,
                         augm_opt['max_crop_shrink'])
    else:
        crop_f = partial(utils.nop)

    image_ds = TransformDataset(
        ListDataset(sorted(glob.glob(os.path.join(image_dir, '*.png')))),
        compose([
            partial(base.convert_dict, 'file_name'),
            partial(utils.load_image_path, 'file_name', 'data'),
            rotation_f,
            crop_f,
            partial(utils.scale_image, 'data', target_size, target_size),
            partial(utils.convert_tensor, 'data'),
        ]))

    loader = torch.utils.data.DataLoader(image_ds,
                                         batch_size=len(image_ds),
                                         shuffle=False)

    for sample in loader:
        if init_entry:
            cache.data[d['class']] = []

        cache.data[d['class']].append(sample['data'])
        break  # only need one sample because batch size equal to dataset length
Exemple #20
0
def load_class_nlp(corpus, d):
    if d['class'] not in NLP_CACHE:
        class_corpus = list(filter(lambda x: x.domain == d["class"], corpus))

        image_ds = TransformDataset(
            ListDataset(class_corpus),
            compose([
                partial(convert_corpus, 'data'),
                partial(lookup, vocab, 'data'),
                partial(pad_text, 'data', 28),
                partial(convert_tensor, 'data')
            ]))

        loader = torch.utils.data.DataLoader(image_ds,
                                             batch_size=len(image_ds),
                                             shuffle=False)

        for sample in loader:
            NLP_CACHE[d['class']] = sample['data']
            break  # only need one sample because batch size equal to dataset length

    return {'class': d['class'], 'data': NLP_CACHE[d['class']]}
Exemple #21
0
def load_class_audio(split, d):
    class_audio = dataset[split][d['class']]

    if len(class_audio) == 0:
        raise Exception(f"No audio found for speaker {d['class']}")

    audio_ds = TransformDataset(
        ListDataset(class_audio),
        compose([
            partial(convert_dict, 'file_name'),
            partial(extract_audio_mfcc, d['class'], 'file_name', 'data')
        ]))

    loader = torch.utils.data.DataLoader(audio_ds,
                                         batch_size=len(audio_ds),
                                         shuffle=False)

    for sample in loader:
        data = sample
        break  # only need one sample because batch size equal to dataset length

    return {'class': d['class'], 'data': data}
Exemple #22
0
def _setup_class_miniimagenet(split, d, cache, init_entry, crop_transforms,
                              target_size, root_dir, augm_opt):
    image_dir = os.path.join(root_dir, 'miniimagenet', 'data', d['class'])

    if augm_opt['rotation']:
        raise ValueError(
            'Augmentation with rotation not implemented for miniimagenet')

    if augm_opt['crop']:
        crop_f = partial(utils.crop, 'data', crop_transforms,
                         augm_opt['max_crop_shrink'])
        scale_f = partial(utils.scale_image, 'data', target_size, target_size)
    else:
        crop_f = partial(utils.nop)
        scale_f = partial(utils.nop)

    image_ds = TransformDataset(
        ListDataset(sorted(glob.glob(os.path.join(image_dir, '*.jpg')))),
        compose([
            partial(base.convert_dict, 'file_name'),
            partial(utils.load_image_path, 'file_name', 'data'),
            crop_f,
            scale_f,
            partial(utils.to_tensor, 'data'),
            # partial(utils.normalize_mini_image, 'data')
        ]))

    loader = torch.utils.data.DataLoader(image_ds,
                                         batch_size=len(image_ds),
                                         shuffle=False)

    for sample in loader:
        if init_entry:
            cache.data[d['class']] = []

        cache.data[d['class']].append(sample['data'])
        break  # only need one sample because batch size equal to dataset length
def load_class_images(class_index, d):
    if d['class'] not in MINIIMAGENET_CACHE:
        class_id = d['class']
        image_dir = os.path.join(MINIIMAGENET_DATA_DIR, 'data', class_id)

        class_images = [
            os.path.join(image_dir, 'images', img)
            for img in class_index[d['class']]
        ]

        if len(class_images) == 0:
            raise Exception("No images found for class %s." % d['class'])

        for image_path in class_images:
            if not os.path.exists(image_path):
                extract_images(class_index[d['class']], image_dir)
                break

        image_ds = TransformDataset(
            ListDataset(class_images),
            compose([
                partial(convert_dict, 'file_name'),
                partial(load_image_path, 'file_name', 'data'),
                #partial(rotate_image, 'data', float(rot[3:])),
                partial(scale_image, 'data', 84, 84),
                partial(convert_tensor, 'data')
            ]))

        loader = torch.utils.data.DataLoader(image_ds,
                                             batch_size=len(image_ds),
                                             shuffle=False)

        for sample in loader:
            MINIIMAGENET_CACHE[d['class']] = sample['data']
            break  # only need one sample because batch size equal to dataset length

    return {'class': d['class'], 'data': MINIIMAGENET_CACHE[d['class']]}
Exemple #24
0
 def testCompose(self):
     f1 = lambda x: x + 1
     f2 = lambda x: x + 2
     f3 = lambda x: x / 2
     self.assertEqual(transform.compose([f1, f2, f3])(1), 2)
Exemple #25
0
def load_data(opt, splits):
    global root_dir
    root_dir = opt['data.root_dir']
    augm_opt = filter_opt(opt, 'augm')
    dataset = opt['data.dataset']
    split_dir = os.path.join(opt['data.root_dir'], opt['data.dataset'],
                             'splits', opt['data.split'])

    ret = {}
    # cache = {}
    cache = Cache()

    for split in splits:
        if split in ['val1', 'val5', 'test']:
            n_way = opt['data.test_way']
        else:
            n_way = opt['data.way']

        if split in ['train', 'trainval']:
            # random shots
            SE = SetupEpisode(batch_size=opt['data.batch_size'],
                              shot_max=opt['data.shot_max'],
                              fixed_shot=opt['data.shot'],
                              way_min=opt['data.way_min'],
                              fixed_way=n_way)
        elif split == 'val1':
            SE = SetupEpisode(batch_size=opt['data.batch_size'],
                              shot_max=opt['data.shot_max'],
                              fixed_shot=1,
                              way_min=opt['data.way_min'],
                              fixed_way=n_way)
        elif split == 'val5':
            SE = SetupEpisode(batch_size=opt['data.batch_size'],
                              shot_max=opt['data.shot_max'],
                              fixed_shot=5,
                              way_min=opt['data.way_min'],
                              fixed_way=n_way)
        else:
            SE = SetupEpisode(batch_size=opt['data.batch_size'],
                              shot_max=opt['data.shot_max'],
                              fixed_shot=opt['data.test_shot'],
                              way_min=opt['data.way_min'],
                              fixed_way=n_way)

        if split in ['val1', 'val5', 'test']:
            n_episodes = opt['data.test_episodes']
        else:
            n_episodes = opt['data.train_episodes']

        transforms = [
            partial(convert_dict, 'class'),
            partial(load_class_images, split, dataset, cache, augm_opt),
            partial(extract_episode, SE, augm_opt)
        ]

        if opt['data.cuda']:
            transforms.append(CudaTransform())

        transforms = compose(transforms)

        class_names = []
        split_file = 'val.txt' if split in ['val1', 'val5'
                                            ] else "{:s}.txt".format(split)
        with open(os.path.join(split_dir, split_file), 'r') as f:
            for class_name in f.readlines():
                class_names.append(class_name.rstrip('\n'))
        ds = TransformDataset(ListDataset(class_names), transforms)

        sampler = EpisodicBatchSampler(SE, len(ds), n_episodes)

        # use num_workers=0, otherwise may receive duplicate episodes
        ret[split] = torch.utils.data.DataLoader(ds,
                                                 batch_sampler=sampler,
                                                 num_workers=0)

    return ret
Exemple #26
0
def load_kws(opt, splits):
    #split_dir = os.path.join(KWS_DATA_DIR, 'splits', opt['data.split'])
    dataset_self = {}
    if splits[0] == 'test':
        files = sorted(os.listdir(KWS_DATA_DIR_TEST))
        class_names = []
        for file in files:
            class_name = file.split('_')[0]
            if not class_names.__contains__(class_name):
                class_names.append(class_name)
        dataset_self['test'] = class_names
        data_dir = KWS_DATA_DIR_TEST
    else:
        data_dir = KWS_DATA_DIR
        files = sorted(os.listdir(KWS_DATA_DIR))
        val_class_names = [
            'label01', 'label13', 'label03', 'label13', 'label03', 'label13',
            'label03', 'label03'
        ]
        class_names = []
        for file in files:
            class_name = file.split('_')[0]
            if not class_names.__contains__(
                    class_name) and not val_class_names.__contains__(
                        class_name):
                class_names.append(class_name)
        train_data = {}
        for name in class_names:
            name_files = []
            for file in files:
                if file.__contains__(name):
                    name_files.append(file)
            train_data[name] = name_files

        val_data = {}
        for name in val_class_names:
            name_files = []
            for file in files:
                if file.__contains__(name):
                    name_files.append(file)
            val_data[name] = name_files

        dataset_self['train'] = class_names
        dataset_self['val'] = val_class_names
    ret = {}
    for split in splits:
        if split in ['val', 'test'] and opt['data.test_way'] != 0:
            n_way = opt['data.test_way']
        else:
            n_way = opt['data.way']

        if split in ['val', 'test'] and opt['data.test_shot'] != 0:
            n_support = opt['data.test_shot']
        else:
            n_support = opt['data.shot']

        if split in ['val', 'test'] and opt['data.test_query'] != 0:
            n_query = opt['data.test_query']
        else:
            n_query = opt['data.query']

        if split in ['val', 'test']:
            n_episodes = opt['data.test_episodes']
        else:
            n_episodes = opt['data.train_episodes']

        transforms = [
            partial(convert_dict, 'class'),
            partial(load_class_features, data_dir),
            partial(extract_episode, n_support, n_query)
        ]
        if opt['data.cuda']:
            transforms.append(CudaTransform())

        transforms = compose(transforms)
        ds = TransformDataset(ListDataset(dataset_self[split]), transforms)

        if opt['data.sequential']:
            sampler = SequentialBatchSampler(len(ds))
        else:
            sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes)

        # use num_workers=0, otherwise may receive duplicate episodes
        ret[split] = torch.utils.data.DataLoader(ds,
                                                 batch_sampler=sampler,
                                                 num_workers=0)

    return ret
Exemple #27
0
 def testCompose(self):
     self.assertEqual(
         transform.compose(
             [lambda x: x + 1, lambda x: x + 2, lambda x: x / 2])(1), 2)
Exemple #28
0
def load(opt, splits):
    split_dir = os.path.join(MINIIMAGENET_DATA_DIR, 'splits',
                             opt['data.split'])

    ret = {}
    for split in splits:
        if split in ['val', 'test'] and opt['data.test_way'] != 0:
            n_way = opt['data.test_way']
        else:
            n_way = opt['data.way']

        if split in ['val', 'test'] and opt['data.test_shot'] != 0:
            n_support = opt['data.test_shot']
        else:
            n_support = opt['data.shot']

        if split in ['val', 'test'] and opt['data.test_query'] != 0:
            n_query = opt['data.test_query']
        else:
            n_query = opt['data.query']

        if split in ['val', 'test']:
            n_episodes = opt['data.test_episodes']
        else:
            n_episodes = opt['data.train_episodes']

        transforms = [
            partial(convert_dict, 'class'), load_class_images,
            partial(extract_episode, n_support, n_query)
        ]
        if opt['data.cuda']:
            transforms.append(CudaTransform())

        transforms = compose(transforms)

        class_names = []
        with open(os.path.join(split_dir, "{:s}.csv".format(split)), 'r') as f:
            for class_name in f.readlines():
                name = class_name.split(',')[1].rstrip('\n')

                if name == 'label':
                    continue

                if opt['data.augmented']:
                    class_names.extend([
                        name + '/rot000', name + '/rot090', name + '/rot180',
                        name + '/rot270'
                    ])
                else:
                    class_names.append(name)
        ds = TransformDataset(ListDataset(class_names), transforms)

        if opt['data.sequential']:
            sampler = SequentialBatchSampler(len(ds))
        else:
            sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes)

        # use num_workers=0, otherwise may receive duplicate episodes
        ret[split] = torch.utils.data.DataLoader(ds,
                                                 batch_sampler=sampler,
                                                 num_workers=0)

    return ret