Esempio n. 1
0
        def get_list_dataset(pair_type):
            ele_list = dataset[pair_type][:len(dataset[pair_type]) //
                                          batch_size * batch_size]

            def load(idx):
                o = {
                    'input':
                    np.stack((dataset['patches'][v].astype(np.float32) -
                              dataset['mean'][v]) / 256.0 for v in idx),
                    'target':
                    1 if pair_type == 'matches' else -1
                }
                o['input'] = torch.from_numpy(o['input'])
                o['target'] = torch.LongTensor([o['target']])
                o['input'] = o['input'].float()
                o['target'] = o['target'].float()
                o['input'] = o['input'].cuda()
                o['target'] = o['target'].cuda()
                return o

            ele_list = list(map(load, ele_list))
            ds = ListDataset(elem_list=ele_list)
            # ds = ds.transform({'input': torch.from_numpy, 'target': lambda x: torch.LongTensor([x])})
            # ds = ds.transform({'input': torch.from_numpy, 'target': lambda x: torch.Cudaha([x])})

            return ds.batch(policy='include-last', batchsize=batch_size // 2)
Esempio n. 2
0
    def get_list_dataset(pair_type):
        ds = ListDataset(elem_list=dataset[pair_type],
                         load=lambda idx: {'input': np.stack((dataset['patches'][v].astype(np.float32)
                                                              - dataset['mean'][v]) / 256.0 for v in idx),
                                           'target': 1 if pair_type == 'matches' else -1})
        ds = ds.transform({'input': torch.from_numpy, 'target': lambda x: torch.LongTensor([x])})

        return ds.batch(policy='include-last', batchsize=batch_size // 2)
Esempio n. 3
0
    def load_class_samples(self, d):
        if d['class'] not in self.data_cache:
            if d['class'] == '_silence_':
                samples = torch.zeros(self.silence_num_samples, 1,
                                      self.desired_samples)
                sample_ds = TransformDataset(
                    ListDataset(samples),
                    compose([
                        partial(convert_dict, 'data'),
                        partial(self.mix_background, True, 'data'),
                        partial(self.extract_features, 'data')
                    ]))

            else:
                samples = []

                if d['class'] == '_unknown_':
                    unknown_dir = os.path.join(self.data_dir, '..',
                                               '_unknown_')
                    split = os.path.basename(self.class_file)
                    unknown_wavs = os.path.join(unknown_dir, split)
                    with open(unknown_wavs, 'r') as rf:
                        samples = [
                            os.path.join(unknown_dir, wav_file.strip('\n'))
                            for wav_file in rf.readlines()
                        ]
                else:
                    keyword_dir = os.path.join(self.data_dir, d['class'])
                    samples = glob.glob(os.path.join(keyword_dir, '*.wav'))

                if len(samples) == 0:
                    raise Exception(
                        "No Samples found for GoogleSpeechCommand {} at {}".
                        format(d['class'], keyword_dir))

                sample_ds = TransformDataset(
                    ListDataset(samples),
                    compose([
                        partial(convert_dict, 'file_name'),
                        partial(self.load_audio, 'file_name', 'data'),
                        partial(self.adjust_volume, 'data'),
                        partial(self.shift_and_pad, 'data'),
                        partial(self.mix_background, self.use_background,
                                'data'),
                        partial(self.extract_features, 'data')
                    ]))

            loader = torch.utils.data.DataLoader(sample_ds,
                                                 batch_size=len(sample_ds),
                                                 shuffle=False)

            for sample in loader:
                self.data_cache[d['class']] = sample['data']
                break  # only need one sample because batch size equal to dataset length

        return {'class': d['class'], 'data': self.data_cache[d['class']]}
Esempio n. 4
0
    def get_list_dataset(pair_type):
        ds = ListDataset(elem_list=dataset[pair_type],
                         load=lambda idx: {
                             'input':
                             np.stack((dataset['patches'][v].astype(np.float32)
                                       - dataset['mean'][v]) / 256.0
                                      for v in idx),
                             'target':
                             1 if pair_type == 'matches' else -1
                         })
        ds = ds.transform({
            'input': torch.from_numpy,
            'target': lambda x: torch.LongTensor([x])
        })

        return ds.batch(policy='include-last', batchsize=batch_size // 2)
Esempio n. 5
0
def load_class_images(d):
    if d['class'] not in CIFAR100_CACHE:
        image_dir = os.path.join(CIFAR100_DATA_DIR, 'data', d['class'])

        class_images = sorted(glob.glob(os.path.join(image_dir, '*.jpg')))
        if len(class_images) == 0:
            raise Exception(
                "No images found for CIFAR100 class {} at {}.".format(
                    d['class'], image_dir))

        image_ds = TransformDataset(
            ListDataset(class_images),
            compose([
                partial(convert_dict, 'file_name'),
                partial(load_image_path, 'file_name', 'data'),
                partial(scale_image, 'data', 32, 32),
                partial(convert_tensor, 'data')
                # partial(normalize_image, 'data', {'mean': (0.50400572, 0.48892908, 0.44281732),
                #                                   'std': (0.26477088, 0.25454896, 0.27408391)})
            ]))

        loader = torch.utils.data.DataLoader(image_ds,
                                             batch_size=len(image_ds),
                                             shuffle=False)

        for sample in loader:
            CIFAR100_CACHE[d['class']] = sample['data']
            break  # only need one sample because batch size equal to dataset length

    return {'class': d['class'], 'data': CIFAR100_CACHE[d['class']]}
def load_class_images(d):
    if d['class'] not in OMNIGLOT_CACHE:
        alphabet, character, rot = d['class'].split('/')
        image_dir = os.path.join(OMNIGLOT_DATA_DIR, 'data', alphabet,
                                 character)

        class_images = sorted(glob.glob(os.path.join(image_dir, '*.png')))
        if len(class_images) == 0:
            raise Exception(
                "No images found for omniglot class {} at {}. Did you run download_omniglot.sh first?"
                .format(d['class'], image_dir))

        image_ds = TransformDataset(
            ListDataset(class_images),
            compose([
                partial(convert_dict, 'file_name'),
                partial(load_image_path, 'file_name', 'data'),
                partial(rotate_image, 'data', float(rot[3:])),
                partial(scale_image, 'data', 28, 28),
                partial(convert_tensor, 'data')
            ]))

        loader = torch.utils.data.DataLoader(image_ds,
                                             batch_size=len(image_ds),
                                             shuffle=False)

        for sample in loader:
            OMNIGLOT_CACHE[d['class']] = sample['data']
            break  # only need one sample because batch size equal to dataset length

    return {'class': d['class'], 'data': OMNIGLOT_CACHE[d['class']]}
Esempio n. 7
0
def load_class_images(d):
    if d['class'] not in OMNIGLOT_CACHE:
        alphabet, character, rot = d['class'].split('/')
        image_dir = os.path.join(OMNIGLOT_DATA_DIR, 'data', alphabet,
                                 character)
        #print(">>>" + image_dir)
        image_ds = TransformDataset(
            ListDataset(sorted(glob.glob(os.path.join(image_dir, '*.png')))),
            compose([
                partial(convert_dict, 'file_name'),
                partial(load_image_path, 'file_name', 'data'),
                partial(rotate_image, 'data', float(rot[3:])),
                partial(scale_image, 'data', 28, 28),
                partial(convert_tensor, 'data')
            ]))
        #print(len(image_ds))

        loader = torch.utils.data.DataLoader(image_ds,
                                             batch_size=len(image_ds),
                                             shuffle=False)
        for sample in loader:
            #print(sample)
            OMNIGLOT_CACHE[d['class']] = sample['data']
            break  # only need one sample because batch size equal to dataset length

    return {'class': d['class'], 'data': OMNIGLOT_CACHE[d['class']]}
Esempio n. 8
0
def load_class_images(d):
    if d['class'] not in OMNIGLOT_CACHE:
		  # 获取路径
        alphabet, character, rot = d['class'].split('/')
        image_dir = os.path.join(OMNIGLOT_DATA_DIR, 'data', alphabet, character)
        # 获取指定路径下的所有图片
        class_images = sorted(glob.glob(os.path.join(image_dir, '*.png')))
        if len(class_images) == 0:
            raise Exception("No images found for omniglot class {} at {}. Did you run download_omniglot.sh first?".format(d['class'], image_dir))

         # ListDataset从图片列表中加载数据
        # 数据处理,包括旋转、创建字典、规范图片大小,转化为tensor
        image_ds = TransformDataset(ListDataset(class_images),
                                    compose([partial(convert_dict, 'file_name'),
                                             partial(load_image_path, 'file_name', 'data'),
                                             partial(rotate_image, 'data', float(rot[3:])),
                                             partial(scale_image, 'data', 28, 28),
                                             partial(convert_tensor, 'data')]))

			# 所有数据放到一个batch内
        loader = torch.utils.data.DataLoader(image_ds, batch_size=len(image_ds), shuffle=False)
        
        # 取一个数据
        for sample in loader:
            # 将图片数据写入Omniglot_cache中
            OMNIGLOT_CACHE[d['class']] = sample['data']
            break # only need one sample because batch size equal to dataset length
    
    # 返回类及类中的一个数据组成的字典
    return { 'class': d['class'], 'data': OMNIGLOT_CACHE[d['class']] }
Esempio n. 9
0
    def load(self, config, splits):
        split_dir = os.path.join(self.split_dir, config.data.split)

        ret = {}
        for split in splits:
            which = split in ['val', 'test']
            n_way = config.data.test_way if which and config.data.test_way != 0 else config.data.way
            n_support = config.data.test_shot if which and config.data.test_shot != 0 else config.data.shot
            n_query = config.data.test_query if which and config.data.test_query != 0 else config.data.query
            n_episodes = config.data.test_episodes if which else config.data.train_episodes

            class_names = self.read_class_names(split_dir=split_dir,
                                                split=split)
            dataset = TransformDataset(
                ListDataset(class_names),
                TransformCompose([
                    self.load_class_images,
                    TransformExtractEpisode(n_support=n_support,
                                            n_query=n_query)
                ]))
            if config.data.sequential:
                sampler = SequentialBatchSampler(len(dataset))
            else:
                sampler = EpisodicBatchSampler(len(dataset), n_way, n_episodes)

            ret[split] = torch.utils.data.DataLoader(dataset,
                                                     batch_sampler=sampler,
                                                     num_workers=0)
            pass
        return ret
Esempio n. 10
0
def load_class_images(d):
    if d['class'] not in MINIIMAGENET_CACHE:
        image, classname = d['class'].split(',')
        image_dir = os.path.join(MINIIMAGENET_DATA_DIR, 'images')
        # Get all images with same class
        class_images = sorted(
            glob.glob(os.path.join(image_dir, '{}*'.format(classname))))
        if len(class_images) == 0:
            raise Exception(
                "No images found for omniglot class {} at {}. Did you run download_omniglot.sh first?"
                .format(d['class'], image_dir))

        image_ds = TransformDataset(
            ListDataset(class_images),
            compose([
                partial(convert_dict, 'file_name'),
                partial(load_image_path, 'file_name', 'data'),
                #partial(rotate_image, 'data', float(rot[3:])),
                partial(scale_image, 'data', 84, 84),
                partial(convert_tensor, 'data')
            ]))

        loader = torch.utils.data.DataLoader(image_ds,
                                             batch_size=len(image_ds),
                                             shuffle=False)

        for sample in loader:
            print(sample['data'].shape)
            MINIIMAGENET_CACHE[d['class']] = sample['data']
            break  # only need one sample because batch size equal to dataset length

    return {'class': d['class'], 'data': MINIIMAGENET_CACHE[d['class']]}
Esempio n. 11
0
 def __init__(self, data_dir, class_file, n_support, n_query, cuda, args):
     self.sample_rate = args['sample_rate']
     self.clip_duration_ms = args['clip_duration']
     self.window_size_ms = args['window_size']
     self.window_stride_ms = args['window_stride']
     self.feature_bin_count = args['num_features']
     self.foreground_volume = args['foreground_volume']
     self.time_shift_ms = args['time_shift']
     self.use_background = args['include_background']
     self.background_volume = args['bg_volume']
     self.background_frequency = args['bg_frequency']
     self.desired_samples = int(self.sample_rate * self.clip_duration_ms /
                                1000)
     self.silence = args['include_silence']
     self.silence_num_samples = args['num_silence']
     self.unknown = args['include_unknown']
     self.data_cache = {}
     self.data_dir = data_dir
     self.class_file = class_file
     self.n_support = n_support
     self.n_query = n_query
     self.background_data = self.load_background_data()
     self.mfcc = self.build_mfcc_extractor()
     self.transforms = [
         partial(convert_dict, 'class'), self.load_class_samples,
         self.extract_episode
     ]
     if cuda:
         self.transforms.append(CudaTransform())
     self.class_names = self.read()
     transforms = compose(self.transforms)
     super().__init__(ListDataset(self.class_names), transforms)
Esempio n. 12
0
def load_class_images(d):
    if d['class'] not in IMAGENET_CACHE:
        image_dir = os.path.join(IMAGENET_DATA_DIR, 'data', d['class'])

        class_images = sorted(glob.glob(os.path.join(image_dir, '*.jpg')))
        if len(class_images) == 0:
            raise Exception(
                "No images found for miniImagenet class {} at {}.".format(
                    d['class'], image_dir))

        image_ds = TransformDataset(
            ListDataset(class_images),
            compose([
                partial(convert_dict, 'file_name'),
                partial(load_image_path, 'file_name', 'data'),
                partial(scale_image, 'data', 84, 84),
                partial(convert_tensor, 'data')
            ]))

        loader = torch.utils.data.DataLoader(image_ds,
                                             batch_size=len(image_ds),
                                             shuffle=False)

        for sample in loader:
            IMAGENET_CACHE[d['class']] = sample['data']
            break  # only need one sample because batch size equal to dataset length

    return {'class': d['class'], 'data': IMAGENET_CACHE[d['class']]}
Esempio n. 13
0
def load(opt, splits):
    split_dir = os.path.join(MINI_IMGNET_DATA_DIR, 'splits', opt['data.split'])

    ret = {}
    for split in splits:
        if split in ['val', 'test'] and opt['data.test_way'] != 0:
            n_way = opt['data.test_way']
        else:
            n_way = opt['data.way']

        if split in ['val', 'test'] and opt['data.test_shot'] != 0:
            n_support = opt['data.test_shot']
        else:
            n_support = opt['data.shot']

        if split in ['val', 'test'] and opt['data.test_query'] != 0:
            n_query = opt['data.test_query']
        else:
            n_query = opt['data.query']

        if split in ['val', 'test']:
            n_episodes = opt['data.test_episodes']
        else:
            n_episodes = opt['data.train_episodes']

        cache_path = get_cache_path(split)
        if os.path.exists(cache_path):
            with open(cache_path, "rb") as f:
                try:
                    data = pkl.load(f, encoding='bytes')
                    img_data = data[b'image_data']
                    class_dict = data[b'class_dict']
                except:
                    data = pkl.load(f)
                    img_data = data['image_data']
                    class_dict = data['class_dict']

        transforms = [
            partial(convert_dict, 'class'),
            partial(load_class_images, img_data, class_dict),
            partial(extract_episode, n_support, n_query)
        ]
        if opt['data.cuda']:
            transforms.append(CudaTransform())
        class_names = [key for key in class_dict]
        transforms = compose(transforms)
        ds = TransformDataset(ListDataset(class_names), transforms)

        if opt['data.sequential']:
            sampler = SequentialBatchSampler(len(ds))
        else:
            sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes)

        # use num_workers=0, otherwise may receive duplicate episodes
        ret[split] = torch.utils.data.DataLoader(ds,
                                                 batch_sampler=sampler,
                                                 num_workers=0)

    return ret
def load(opt, splits):
    split_dir = os.path.join(MINIIMAGENET_DATA_DIR, 'splits',
                             opt['data.split'])

    ret = {}
    for split in splits:
        if split in ['val', 'test'] and opt['data.test_way'] != 0:
            n_way = opt['data.test_way']
        else:
            n_way = opt['data.way']

        if split in ['val', 'test'] and opt['data.test_shot'] != 0:
            n_support = opt['data.test_shot']
        else:
            n_support = opt['data.shot']

        if split in ['val', 'test'] and opt['data.test_query'] != 0:
            n_query = opt['data.test_query']
        else:
            n_query = opt['data.query']

        if split in ['val', 'test']:
            n_episodes = opt['data.test_episodes']
        else:
            n_episodes = opt['data.train_episodes']

        class_index = defaultdict(list)
        with open(os.path.join(split_dir, "{:s}.csv".format(split)), 'r') as f:
            f.readline()
            for image_class in f.readlines():
                image, class_name = image_class.split(',')
                class_name = class_name.rstrip('\n')
                class_index[class_name].append(image)
        class_names = list(class_index.keys())

        transforms = [
            partial(convert_dict, 'class'),
            partial(load_class_images, class_index),
            partial(extract_episode, n_support, n_query)
        ]
        if opt['data.cuda']:
            transforms.append(CudaTransform())

        transforms = compose(transforms)

        ds = TransformDataset(ListDataset(class_names), transforms)

        if opt['data.sequential']:
            sampler = SequentialBatchSampler(len(ds))
        else:
            sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes)

        # use num_workers=0, otherwise may receive duplicate episodes
        ret[split] = torch.utils.data.DataLoader(ds,
                                                 batch_sampler=sampler,
                                                 num_workers=0)

    return ret
Esempio n. 15
0
def load(opt, splits):
    split_dir = os.path.join(OMNIGLOT_DATA_DIR, 'splits', opt['data.split'])

    ret = { }
    for split in splits:
	      # 获取n_way
        if split in ['val', 'test'] and opt['data.test_way'] != 0:
            n_way = opt['data.test_way']
        else:
            n_way = opt['data.way']
        # 获取support的数量
        if split in ['val', 'test'] and opt['data.test_shot'] != 0:
            n_support = opt['data.test_shot']
        else:
            n_support = opt['data.shot']
        # 获取query的数量
        if split in ['val', 'test'] and opt['data.test_query'] != 0:
            n_query = opt['data.test_query']
        else:
            n_query = opt['data.query']
        # 获取episode
        if split in ['val', 'test']:
            n_episodes = opt['data.test_episodes']
        else:
            n_episodes = opt['data.train_episodes']
        # 定义了三个函数:class字典,加载类的一张图片,取一个episode的数据
        transforms = [partial(convert_dict, 'class'), # 取key是class的字典内容
                      load_class_images, # 取一个类中的一条数据
                      partial(extract_episode, n_support, n_query)] # 获取每个类的support和query

        if opt['data.cuda']:
            transforms.append(CudaTransform())

        transforms = compose(transforms)

        class_names = []
        # 按照分割数据集的方式,获取相应的所有类名
        with open(os.path.join(split_dir, "{:s}.txt".format(split)), 'r') as f:
            for class_name in f.readlines():
                class_names.append(class_name.rstrip('\n'))
        
        # 对所有类划分support和query数据集
        ds = TransformDataset(ListDataset(class_names), transforms)
        
        
        if opt['data.sequential']:
            sampler = SequentialBatchSampler(len(ds))
        # 每个episode随机取n_way个类别
        else:
            sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes)
        
        # 封装数据,数据划分为多个episode
        # use num_workers=0, otherwise may receive duplicate episodes
        ret[split] = torch.utils.data.DataLoader(ds, batch_sampler=sampler, num_workers=0)

    return ret
Esempio n. 16
0
def get_data_generator(path, params):
    list_dataset = ListDataset(
        os.listdir(path), get_data,
        path)  #list.txt contain list of datafiles, one per line
    concat_dataset = ConcatDataset(list_dataset)
    generator = DataLoader(
        dataset=concat_dataset, **params
    )  #, collate_fn=batchify) #This will load data when needed, in parallel, up to <num_workers> thread.
    n_features = len(concat_dataset[0]) - 1
    return generator, n_features
Esempio n. 17
0
def load(opt, splits):
    split_dir = os.path.join(OMNIGLOT_DATA_DIR, 'splits', opt['data.split'])

    ret = {}
    for split in splits:
        if split in ['val', 'test'] and opt['data.test_way'] != 0:
            n_way = opt['data.test_way']
        else:
            n_way = opt['data.way']

        if split in ['val', 'test'] and opt['data.test_shot'] != 0:
            n_support = opt['data.test_shot']
        else:
            n_support = opt['data.shot']

        if split in ['val', 'test'] and opt['data.test_query'] != 0:
            n_query = opt['data.test_query']
        else:
            n_query = opt['data.query']

        if split in ['val', 'test']:
            n_episodes = opt['data.test_episodes']
        else:
            n_episodes = opt['data.train_episodes']

        transforms = [
            partial(convert_dict, 'class'), load_class_images,
            partial(extract_episode, n_support, n_query)
        ]
        if opt['data.cuda']:
            transforms.append(CudaTransform())

        transforms = compose(transforms)

        class_names = []
        with open(os.path.join(split_dir, "{:s}.txt".format(split)), 'r') as f:
            for class_name in f.readlines():
                class_names.append(class_name.rstrip('\n'))

        ds = TransformDataset(ListDataset(class_names), transforms)

        if opt['data.sequential']:
            sampler = SequentialBatchSampler(len(ds))
        else:
            sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes)

        # use num_workers=0, otherwise may receive duplicate episodes
        ret[split] = torch.utils.data.DataLoader(ds,
                                                 batch_sampler=sampler,
                                                 num_workers=0)
    print("Ret:", type(ret))
    for key, value in ret.items():
        print(key, type(value))
    return ret
Esempio n. 18
0
def load(opt, splits):
    ret = {}
    for split in splits:
        if split in ['val', 'test'] and opt['data.test_way'] != 0:
            n_way = opt['data.test_way']
        else:
            n_way = opt['data.way']

        if split in ['val', 'test'] and opt['data.test_shot'] != 0:
            n_support = opt['data.test_shot']
        else:
            n_support = opt['data.shot']

        if split in ['val', 'test'] and opt['data.test_query'] != 0:
            n_query = opt['data.test_query']
        else:
            n_query = opt['data.query']

        if split in ['val', 'test']:
            n_episodes = opt['data.test_episodes']
        else:
            n_episodes = opt['data.train_episodes']

        speaker_ids = dataset[split]['class']
        data_split = dataset[split]['data']

        transforms = [
            partial(convert_dict, 'class'),
            partial(extract_episode, 'class', data_split, opt['data.min_len'],
                    opt['data.max_len'], n_support, n_query),
            partial(convert_tensor,
                    ['xq_padded', 'xs_padded', 'xq_len', 'xs_len'])
        ]
        if opt['data.cuda']:
            transforms.append(CudaTransform())

        transforms = compose(transforms)

        ds = TransformDataset(ListDataset(speaker_ids), transforms)

        #sampler = SequencialEpisodicBatchSampler(len(ds), n_way)
        if opt['data.sequential']:
            sampler = SequentialBatchSampler(len(ds))
        else:
            sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes)

        ret[split] = torch.utils.data.DataLoader(ds,
                                                 batch_sampler=sampler,
                                                 num_workers=0)

    return ret
Esempio n. 19
0
    def __iter__(self):
        if self.dataset is None:
            self.dataset = self.load_dataset(from_disk=True)[self.split]
            transforms = [partial(batch_from_index, self.dataset['data']), partial(convert_tensor, 'data')]
            if self.if_cuda:
                transforms.append(CudaTransform())
            self.transforms = compose(transforms)
        index_batches = self.shuffle_dataset()
        batches = TransformDataset(ListDataset(index_batches), self.transforms)

        print(f"\nSize of batches: {len(batches)}")
        for batch in batches:
            batch['n_way'] = self.n_way
            batch['n_support'] = self.n_support
            batch['n_query'] = self.n_query
            yield batch
def loader(opt):

    split_dir = os.path.join(opt.split_dir, opt.split_name)
    if opt.state == 'train':
        splits = opt.train_split_mode
    else:
        splits = ['test']

    ret = { }
    for split in splits:
        if split in ['val', 'test']:
            n_way = opt.test_way
            n_support = opt.test_shot
            n_query = opt.test_query
            n_episodes = opt.test_episodes
        else:
            n_way = opt.train_way
            n_support = opt.train_shot
            n_query = opt.train_query
            n_episodes = opt.train_episodes

        transforms = [partial(convert_dict, 'class'),
                      partial(load_class_images, opt.dataset_dir),
                      partial(extract_episode, n_support, n_query)]

        if opt.cuda:
            transforms.append(CudaTransform())

        transforms = compose(transforms)
        class_names = []

        with open(os.path.join(split_dir, "{:s}.txt".format(split)), 'r') as f:
            for class_name in f.readlines():
                class_names.append(class_name.rstrip('\n'))
        ds = TransformDataset(ListDataset(class_names), transforms)

        if opt.sequential:
            sampler = SequentialBatchSampler(len(ds))
        else:
            sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes)

        # use num_workers=0, otherwise may receive duplicate episodes
        ret[split] = torch.utils.data.DataLoader(ds, batch_sampler=sampler, num_workers=0)

    return ret
Esempio n. 21
0
def load_class_images(d):
    label, rot = d['class'], -1

    if 'rot' in d['class']:
        label, rot = d['class'].split('/rot')
        rot = int(rot)

    if label not in MINIIMAGENET_CACHE:
        image_dir = os.path.join(MINIIMAGENET_DATA_DIR, 'data', label)

        class_images = sorted(glob.glob(os.path.join(image_dir, '*.jpg')))
        if len(class_images) == 0:
            raise Exception(
                "No images found for miniimagenet class {} at {}.".format(
                    label, image_dir))

        image_ds = TransformDataset(
            ListDataset(class_images),
            compose([
                partial(convert_dict, 'file_name'),
                partial(load_image_path, 'file_name', 'data'),
                partial(scale_image, 'data', 84, 84),
                partial(convert_tensor, 'data')
                # partial(normalize_image, 'data', {'mean': (0.47234195 0.45386744 0.41036746),
                #                                   'std': (0.28678342 0.27806091 0.29304931)})
            ]))

        loader = torch.utils.data.DataLoader(image_ds,
                                             batch_size=len(image_ds),
                                             shuffle=False)

        for sample in loader:
            MINIIMAGENET_CACHE[label] = sample['data']
            break  # only need one sample because batch size equal to dataset length

    samples = MINIIMAGENET_CACHE[label]

    # Rotates images if needed
    if rot != -1:
        nRot = rot // 90
        samples = torch.rot90(samples.cuda(), nRot, dims=[2, 3]).cpu()

    return {'class': d['class'], 'data': samples}
Esempio n. 22
0
def load_class_images(dataset, index_set, d):
    if d['class'] not in MINI_IMGNET_CACHE:
        image_ds = TransformDataset(
            ListDataset(index_set[d['class']]),
            compose([
                partial(convert_dict, 'img_idx'),
                partial(load_image, dataset, 'img_idx', 'data'),
                #partial(rotate_image, 'data', float(rot[3:])),
                partial(scale_image, 'data', 84, 84),
                partial(convert_tensor, 'data')
            ]))

        loader = torch.utils.data.DataLoader(image_ds,
                                             batch_size=len(image_ds),
                                             shuffle=False)

        for sample in loader:
            MINI_IMGNET_CACHE[d['class']] = sample['data']
            break  # only need one sample because batch size equal to dataset length

    return {'class': d['class'], 'data': MINI_IMGNET_CACHE[d['class']]}
Esempio n. 23
0
def _setup_class_omniglot(split, d, cache, init_entry, crop_transforms,
                          target_size, root_dir, augm_opt):
    alphabet, character, rot = d['class'].split('/')
    image_dir = os.path.join(root_dir, 'omniglot', 'data', alphabet, character)

    if augm_opt['rotation']:
        rotation_f = partial(utils.rotate_image, 'data', float(rot[3:]))
    else:
        rotation_f = partial(utils.nop)
        print(
            'WARNING - rotation augmentation is the default protocol for Omniglot'
        )

    if augm_opt['crop']:
        crop_f = partial(utils.crop, 'data', crop_transforms,
                         augm_opt['max_crop_shrink'])
    else:
        crop_f = partial(utils.nop)

    image_ds = TransformDataset(
        ListDataset(sorted(glob.glob(os.path.join(image_dir, '*.png')))),
        compose([
            partial(base.convert_dict, 'file_name'),
            partial(utils.load_image_path, 'file_name', 'data'),
            rotation_f,
            crop_f,
            partial(utils.scale_image, 'data', target_size, target_size),
            partial(utils.convert_tensor, 'data'),
        ]))

    loader = torch.utils.data.DataLoader(image_ds,
                                         batch_size=len(image_ds),
                                         shuffle=False)

    for sample in loader:
        if init_entry:
            cache.data[d['class']] = []

        cache.data[d['class']].append(sample['data'])
        break  # only need one sample because batch size equal to dataset length
Esempio n. 24
0
def load_class_nlp(corpus, d):
    if d['class'] not in NLP_CACHE:
        class_corpus = list(filter(lambda x: x.domain == d["class"], corpus))

        image_ds = TransformDataset(
            ListDataset(class_corpus),
            compose([
                partial(convert_corpus, 'data'),
                partial(lookup, vocab, 'data'),
                partial(pad_text, 'data', 28),
                partial(convert_tensor, 'data')
            ]))

        loader = torch.utils.data.DataLoader(image_ds,
                                             batch_size=len(image_ds),
                                             shuffle=False)

        for sample in loader:
            NLP_CACHE[d['class']] = sample['data']
            break  # only need one sample because batch size equal to dataset length

    return {'class': d['class'], 'data': NLP_CACHE[d['class']]}
Esempio n. 25
0
def load_class_audio(split, d):
    class_audio = dataset[split][d['class']]

    if len(class_audio) == 0:
        raise Exception(f"No audio found for speaker {d['class']}")

    audio_ds = TransformDataset(
        ListDataset(class_audio),
        compose([
            partial(convert_dict, 'file_name'),
            partial(extract_audio_mfcc, d['class'], 'file_name', 'data')
        ]))

    loader = torch.utils.data.DataLoader(audio_ds,
                                         batch_size=len(audio_ds),
                                         shuffle=False)

    for sample in loader:
        data = sample
        break  # only need one sample because batch size equal to dataset length

    return {'class': d['class'], 'data': data}
Esempio n. 26
0
    def load_class_images(self, class_name):
        if class_name not in self.cache:
            alphabet, character, rot = class_name.split('/')
            image_dir = os.path.join(self.data_dir, 'data', alphabet,
                                     character)
            class_images = sorted(glob.glob(os.path.join(image_dir, '*.png')))
            assert len(class_images) > 0

            image_ds = TransformDataset(
                ListDataset(class_images),
                TransformCompose([
                    TransformLoadImage(),
                    TransformRotateImage(rot=float(rot[3:])),
                    TransformScaleImage(height=28, width=28),
                    TransformConvertTensor()
                ]))
            for sample in torch.utils.data.DataLoader(image_ds,
                                                      batch_size=len(image_ds),
                                                      shuffle=False):
                self.cache[class_name] = sample
                break  # only need one sample because batch size equal to dataset length
            pass
        return self.cache[class_name]
def load_class_images(class_index, d):
    if d['class'] not in MINIIMAGENET_CACHE:
        class_id = d['class']
        image_dir = os.path.join(MINIIMAGENET_DATA_DIR, 'data', class_id)

        class_images = [
            os.path.join(image_dir, 'images', img)
            for img in class_index[d['class']]
        ]

        if len(class_images) == 0:
            raise Exception("No images found for class %s." % d['class'])

        for image_path in class_images:
            if not os.path.exists(image_path):
                extract_images(class_index[d['class']], image_dir)
                break

        image_ds = TransformDataset(
            ListDataset(class_images),
            compose([
                partial(convert_dict, 'file_name'),
                partial(load_image_path, 'file_name', 'data'),
                #partial(rotate_image, 'data', float(rot[3:])),
                partial(scale_image, 'data', 84, 84),
                partial(convert_tensor, 'data')
            ]))

        loader = torch.utils.data.DataLoader(image_ds,
                                             batch_size=len(image_ds),
                                             shuffle=False)

        for sample in loader:
            MINIIMAGENET_CACHE[d['class']] = sample['data']
            break  # only need one sample because batch size equal to dataset length

    return {'class': d['class'], 'data': MINIIMAGENET_CACHE[d['class']]}
Esempio n. 28
0
def _setup_class_miniimagenet(split, d, cache, init_entry, crop_transforms,
                              target_size, root_dir, augm_opt):
    image_dir = os.path.join(root_dir, 'miniimagenet', 'data', d['class'])

    if augm_opt['rotation']:
        raise ValueError(
            'Augmentation with rotation not implemented for miniimagenet')

    if augm_opt['crop']:
        crop_f = partial(utils.crop, 'data', crop_transforms,
                         augm_opt['max_crop_shrink'])
        scale_f = partial(utils.scale_image, 'data', target_size, target_size)
    else:
        crop_f = partial(utils.nop)
        scale_f = partial(utils.nop)

    image_ds = TransformDataset(
        ListDataset(sorted(glob.glob(os.path.join(image_dir, '*.jpg')))),
        compose([
            partial(base.convert_dict, 'file_name'),
            partial(utils.load_image_path, 'file_name', 'data'),
            crop_f,
            scale_f,
            partial(utils.to_tensor, 'data'),
            # partial(utils.normalize_mini_image, 'data')
        ]))

    loader = torch.utils.data.DataLoader(image_ds,
                                         batch_size=len(image_ds),
                                         shuffle=False)

    for sample in loader:
        if init_entry:
            cache.data[d['class']] = []

        cache.data[d['class']].append(sample['data'])
        break  # only need one sample because batch size equal to dataset length
Esempio n. 29
0
def load(opt, splits):
    split_dir = os.path.join(MINIIMAGENET_DATA_DIR, 'splits',
                             opt['data.split'])

    ret = {}
    for split in splits:
        if split in ['val', 'test'] and opt['data.test_way'] != 0:
            n_way = opt['data.test_way']
        else:
            n_way = opt['data.way']

        if split in ['val', 'test'] and opt['data.test_shot'] != 0:
            n_support = opt['data.test_shot']
        else:
            n_support = opt['data.shot']

        if split in ['val', 'test'] and opt['data.test_query'] != 0:
            n_query = opt['data.test_query']
        else:
            n_query = opt['data.query']

        if split in ['val', 'test']:
            n_episodes = opt['data.test_episodes']
        else:
            n_episodes = opt['data.train_episodes']

        transforms = [
            partial(convert_dict, 'class'), load_class_images,
            partial(extract_episode, n_support, n_query)
        ]
        if opt['data.cuda']:
            transforms.append(CudaTransform())

        transforms = compose(transforms)

        class_names = []
        with open(os.path.join(split_dir, "{:s}.csv".format(split)), 'r') as f:
            for class_name in f.readlines():
                name = class_name.split(',')[1].rstrip('\n')

                if name == 'label':
                    continue

                if opt['data.augmented']:
                    class_names.extend([
                        name + '/rot000', name + '/rot090', name + '/rot180',
                        name + '/rot270'
                    ])
                else:
                    class_names.append(name)
        ds = TransformDataset(ListDataset(class_names), transforms)

        if opt['data.sequential']:
            sampler = SequentialBatchSampler(len(ds))
        else:
            sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes)

        # use num_workers=0, otherwise may receive duplicate episodes
        ret[split] = torch.utils.data.DataLoader(ds,
                                                 batch_sampler=sampler,
                                                 num_workers=0)

    return ret
Esempio n. 30
0
def load_kws(opt, splits):
    #split_dir = os.path.join(KWS_DATA_DIR, 'splits', opt['data.split'])
    dataset_self = {}
    if splits[0] == 'test':
        files = sorted(os.listdir(KWS_DATA_DIR_TEST))
        class_names = []
        for file in files:
            class_name = file.split('_')[0]
            if not class_names.__contains__(class_name):
                class_names.append(class_name)
        dataset_self['test'] = class_names
        data_dir = KWS_DATA_DIR_TEST
    else:
        data_dir = KWS_DATA_DIR
        files = sorted(os.listdir(KWS_DATA_DIR))
        val_class_names = [
            'label01', 'label13', 'label03', 'label13', 'label03', 'label13',
            'label03', 'label03'
        ]
        class_names = []
        for file in files:
            class_name = file.split('_')[0]
            if not class_names.__contains__(
                    class_name) and not val_class_names.__contains__(
                        class_name):
                class_names.append(class_name)
        train_data = {}
        for name in class_names:
            name_files = []
            for file in files:
                if file.__contains__(name):
                    name_files.append(file)
            train_data[name] = name_files

        val_data = {}
        for name in val_class_names:
            name_files = []
            for file in files:
                if file.__contains__(name):
                    name_files.append(file)
            val_data[name] = name_files

        dataset_self['train'] = class_names
        dataset_self['val'] = val_class_names
    ret = {}
    for split in splits:
        if split in ['val', 'test'] and opt['data.test_way'] != 0:
            n_way = opt['data.test_way']
        else:
            n_way = opt['data.way']

        if split in ['val', 'test'] and opt['data.test_shot'] != 0:
            n_support = opt['data.test_shot']
        else:
            n_support = opt['data.shot']

        if split in ['val', 'test'] and opt['data.test_query'] != 0:
            n_query = opt['data.test_query']
        else:
            n_query = opt['data.query']

        if split in ['val', 'test']:
            n_episodes = opt['data.test_episodes']
        else:
            n_episodes = opt['data.train_episodes']

        transforms = [
            partial(convert_dict, 'class'),
            partial(load_class_features, data_dir),
            partial(extract_episode, n_support, n_query)
        ]
        if opt['data.cuda']:
            transforms.append(CudaTransform())

        transforms = compose(transforms)
        ds = TransformDataset(ListDataset(dataset_self[split]), transforms)

        if opt['data.sequential']:
            sampler = SequentialBatchSampler(len(ds))
        else:
            sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes)

        # use num_workers=0, otherwise may receive duplicate episodes
        ret[split] = torch.utils.data.DataLoader(ds,
                                                 batch_sampler=sampler,
                                                 num_workers=0)

    return ret
Esempio n. 31
0
def load_data(opt, splits):
    global root_dir
    root_dir = opt['data.root_dir']
    augm_opt = filter_opt(opt, 'augm')
    dataset = opt['data.dataset']
    split_dir = os.path.join(opt['data.root_dir'], opt['data.dataset'],
                             'splits', opt['data.split'])

    ret = {}
    # cache = {}
    cache = Cache()

    for split in splits:
        if split in ['val1', 'val5', 'test']:
            n_way = opt['data.test_way']
        else:
            n_way = opt['data.way']

        if split in ['train', 'trainval']:
            # random shots
            SE = SetupEpisode(batch_size=opt['data.batch_size'],
                              shot_max=opt['data.shot_max'],
                              fixed_shot=opt['data.shot'],
                              way_min=opt['data.way_min'],
                              fixed_way=n_way)
        elif split == 'val1':
            SE = SetupEpisode(batch_size=opt['data.batch_size'],
                              shot_max=opt['data.shot_max'],
                              fixed_shot=1,
                              way_min=opt['data.way_min'],
                              fixed_way=n_way)
        elif split == 'val5':
            SE = SetupEpisode(batch_size=opt['data.batch_size'],
                              shot_max=opt['data.shot_max'],
                              fixed_shot=5,
                              way_min=opt['data.way_min'],
                              fixed_way=n_way)
        else:
            SE = SetupEpisode(batch_size=opt['data.batch_size'],
                              shot_max=opt['data.shot_max'],
                              fixed_shot=opt['data.test_shot'],
                              way_min=opt['data.way_min'],
                              fixed_way=n_way)

        if split in ['val1', 'val5', 'test']:
            n_episodes = opt['data.test_episodes']
        else:
            n_episodes = opt['data.train_episodes']

        transforms = [
            partial(convert_dict, 'class'),
            partial(load_class_images, split, dataset, cache, augm_opt),
            partial(extract_episode, SE, augm_opt)
        ]

        if opt['data.cuda']:
            transforms.append(CudaTransform())

        transforms = compose(transforms)

        class_names = []
        split_file = 'val.txt' if split in ['val1', 'val5'
                                            ] else "{:s}.txt".format(split)
        with open(os.path.join(split_dir, split_file), 'r') as f:
            for class_name in f.readlines():
                class_names.append(class_name.rstrip('\n'))
        ds = TransformDataset(ListDataset(class_names), transforms)

        sampler = EpisodicBatchSampler(SE, len(ds), n_episodes)

        # use num_workers=0, otherwise may receive duplicate episodes
        ret[split] = torch.utils.data.DataLoader(ds,
                                                 batch_sampler=sampler,
                                                 num_workers=0)

    return ret