def load_class_samples(self, d): if d['class'] not in self.data_cache: if d['class'] == '_silence_': samples = torch.zeros(self.silence_num_samples, 1, self.desired_samples) sample_ds = TransformDataset( ListDataset(samples), compose([ partial(convert_dict, 'data'), partial(self.mix_background, True, 'data'), partial(self.extract_features, 'data') ])) else: samples = [] if d['class'] == '_unknown_': unknown_dir = os.path.join(self.data_dir, '..', '_unknown_') split = os.path.basename(self.class_file) unknown_wavs = os.path.join(unknown_dir, split) with open(unknown_wavs, 'r') as rf: samples = [ os.path.join(unknown_dir, wav_file.strip('\n')) for wav_file in rf.readlines() ] else: keyword_dir = os.path.join(self.data_dir, d['class']) samples = glob.glob(os.path.join(keyword_dir, '*.wav')) if len(samples) == 0: raise Exception( "No Samples found for GoogleSpeechCommand {} at {}". format(d['class'], keyword_dir)) sample_ds = TransformDataset( ListDataset(samples), compose([ partial(convert_dict, 'file_name'), partial(self.load_audio, 'file_name', 'data'), partial(self.adjust_volume, 'data'), partial(self.shift_and_pad, 'data'), partial(self.mix_background, self.use_background, 'data'), partial(self.extract_features, 'data') ])) loader = torch.utils.data.DataLoader(sample_ds, batch_size=len(sample_ds), shuffle=False) for sample in loader: self.data_cache[d['class']] = sample['data'] break # only need one sample because batch size equal to dataset length return {'class': d['class'], 'data': self.data_cache[d['class']]}
def load_class_images(d): if d['class'] not in OMNIGLOT_CACHE: alphabet, character, rot = d['class'].split('/') image_dir = os.path.join(OMNIGLOT_DATA_DIR, 'data', alphabet, character) #print(">>>" + image_dir) image_ds = TransformDataset( ListDataset(sorted(glob.glob(os.path.join(image_dir, '*.png')))), compose([ partial(convert_dict, 'file_name'), partial(load_image_path, 'file_name', 'data'), partial(rotate_image, 'data', float(rot[3:])), partial(scale_image, 'data', 28, 28), partial(convert_tensor, 'data') ])) #print(len(image_ds)) loader = torch.utils.data.DataLoader(image_ds, batch_size=len(image_ds), shuffle=False) for sample in loader: #print(sample) OMNIGLOT_CACHE[d['class']] = sample['data'] break # only need one sample because batch size equal to dataset length return {'class': d['class'], 'data': OMNIGLOT_CACHE[d['class']]}
def load_class_images(d): if d['class'] not in OMNIGLOT_CACHE: # 获取路径 alphabet, character, rot = d['class'].split('/') image_dir = os.path.join(OMNIGLOT_DATA_DIR, 'data', alphabet, character) # 获取指定路径下的所有图片 class_images = sorted(glob.glob(os.path.join(image_dir, '*.png'))) if len(class_images) == 0: raise Exception("No images found for omniglot class {} at {}. Did you run download_omniglot.sh first?".format(d['class'], image_dir)) # ListDataset从图片列表中加载数据 # 数据处理,包括旋转、创建字典、规范图片大小,转化为tensor image_ds = TransformDataset(ListDataset(class_images), compose([partial(convert_dict, 'file_name'), partial(load_image_path, 'file_name', 'data'), partial(rotate_image, 'data', float(rot[3:])), partial(scale_image, 'data', 28, 28), partial(convert_tensor, 'data')])) # 所有数据放到一个batch内 loader = torch.utils.data.DataLoader(image_ds, batch_size=len(image_ds), shuffle=False) # 取一个数据 for sample in loader: # 将图片数据写入Omniglot_cache中 OMNIGLOT_CACHE[d['class']] = sample['data'] break # only need one sample because batch size equal to dataset length # 返回类及类中的一个数据组成的字典 return { 'class': d['class'], 'data': OMNIGLOT_CACHE[d['class']] }
def load_class_images(d): if d['class'] not in CIFAR100_CACHE: image_dir = os.path.join(CIFAR100_DATA_DIR, 'data', d['class']) class_images = sorted(glob.glob(os.path.join(image_dir, '*.jpg'))) if len(class_images) == 0: raise Exception( "No images found for CIFAR100 class {} at {}.".format( d['class'], image_dir)) image_ds = TransformDataset( ListDataset(class_images), compose([ partial(convert_dict, 'file_name'), partial(load_image_path, 'file_name', 'data'), partial(scale_image, 'data', 32, 32), partial(convert_tensor, 'data') # partial(normalize_image, 'data', {'mean': (0.50400572, 0.48892908, 0.44281732), # 'std': (0.26477088, 0.25454896, 0.27408391)}) ])) loader = torch.utils.data.DataLoader(image_ds, batch_size=len(image_ds), shuffle=False) for sample in loader: CIFAR100_CACHE[d['class']] = sample['data'] break # only need one sample because batch size equal to dataset length return {'class': d['class'], 'data': CIFAR100_CACHE[d['class']]}
def load_class_images(d): if d['class'] not in MINIIMAGENET_CACHE: image, classname = d['class'].split(',') image_dir = os.path.join(MINIIMAGENET_DATA_DIR, 'images') # Get all images with same class class_images = sorted( glob.glob(os.path.join(image_dir, '{}*'.format(classname)))) if len(class_images) == 0: raise Exception( "No images found for omniglot class {} at {}. Did you run download_omniglot.sh first?" .format(d['class'], image_dir)) image_ds = TransformDataset( ListDataset(class_images), compose([ partial(convert_dict, 'file_name'), partial(load_image_path, 'file_name', 'data'), #partial(rotate_image, 'data', float(rot[3:])), partial(scale_image, 'data', 84, 84), partial(convert_tensor, 'data') ])) loader = torch.utils.data.DataLoader(image_ds, batch_size=len(image_ds), shuffle=False) for sample in loader: print(sample['data'].shape) MINIIMAGENET_CACHE[d['class']] = sample['data'] break # only need one sample because batch size equal to dataset length return {'class': d['class'], 'data': MINIIMAGENET_CACHE[d['class']]}
def __init__(self, data_dir, class_file, n_support, n_query, cuda, args): self.sample_rate = args['sample_rate'] self.clip_duration_ms = args['clip_duration'] self.window_size_ms = args['window_size'] self.window_stride_ms = args['window_stride'] self.feature_bin_count = args['num_features'] self.foreground_volume = args['foreground_volume'] self.time_shift_ms = args['time_shift'] self.use_background = args['include_background'] self.background_volume = args['bg_volume'] self.background_frequency = args['bg_frequency'] self.desired_samples = int(self.sample_rate * self.clip_duration_ms / 1000) self.silence = args['include_silence'] self.silence_num_samples = args['num_silence'] self.unknown = args['include_unknown'] self.data_cache = {} self.data_dir = data_dir self.class_file = class_file self.n_support = n_support self.n_query = n_query self.background_data = self.load_background_data() self.mfcc = self.build_mfcc_extractor() self.transforms = [ partial(convert_dict, 'class'), self.load_class_samples, self.extract_episode ] if cuda: self.transforms.append(CudaTransform()) self.class_names = self.read() transforms = compose(self.transforms) super().__init__(ListDataset(self.class_names), transforms)
def load_class_images(d): if d['class'] not in OMNIGLOT_CACHE: alphabet, character, rot = d['class'].split('/') image_dir = os.path.join(OMNIGLOT_DATA_DIR, 'data', alphabet, character) class_images = sorted(glob.glob(os.path.join(image_dir, '*.png'))) if len(class_images) == 0: raise Exception( "No images found for omniglot class {} at {}. Did you run download_omniglot.sh first?" .format(d['class'], image_dir)) image_ds = TransformDataset( ListDataset(class_images), compose([ partial(convert_dict, 'file_name'), partial(load_image_path, 'file_name', 'data'), partial(rotate_image, 'data', float(rot[3:])), partial(scale_image, 'data', 28, 28), partial(convert_tensor, 'data') ])) loader = torch.utils.data.DataLoader(image_ds, batch_size=len(image_ds), shuffle=False) for sample in loader: OMNIGLOT_CACHE[d['class']] = sample['data'] break # only need one sample because batch size equal to dataset length return {'class': d['class'], 'data': OMNIGLOT_CACHE[d['class']]}
def load_class_images(d): if d['class'] not in IMAGENET_CACHE: image_dir = os.path.join(IMAGENET_DATA_DIR, 'data', d['class']) class_images = sorted(glob.glob(os.path.join(image_dir, '*.jpg'))) if len(class_images) == 0: raise Exception( "No images found for miniImagenet class {} at {}.".format( d['class'], image_dir)) image_ds = TransformDataset( ListDataset(class_images), compose([ partial(convert_dict, 'file_name'), partial(load_image_path, 'file_name', 'data'), partial(scale_image, 'data', 84, 84), partial(convert_tensor, 'data') ])) loader = torch.utils.data.DataLoader(image_ds, batch_size=len(image_ds), shuffle=False) for sample in loader: IMAGENET_CACHE[d['class']] = sample['data'] break # only need one sample because batch size equal to dataset length return {'class': d['class'], 'data': IMAGENET_CACHE[d['class']]}
def load(opt, splits): split_dir = os.path.join(MINI_IMGNET_DATA_DIR, 'splits', opt['data.split']) ret = {} for split in splits: if split in ['val', 'test'] and opt['data.test_way'] != 0: n_way = opt['data.test_way'] else: n_way = opt['data.way'] if split in ['val', 'test'] and opt['data.test_shot'] != 0: n_support = opt['data.test_shot'] else: n_support = opt['data.shot'] if split in ['val', 'test'] and opt['data.test_query'] != 0: n_query = opt['data.test_query'] else: n_query = opt['data.query'] if split in ['val', 'test']: n_episodes = opt['data.test_episodes'] else: n_episodes = opt['data.train_episodes'] cache_path = get_cache_path(split) if os.path.exists(cache_path): with open(cache_path, "rb") as f: try: data = pkl.load(f, encoding='bytes') img_data = data[b'image_data'] class_dict = data[b'class_dict'] except: data = pkl.load(f) img_data = data['image_data'] class_dict = data['class_dict'] transforms = [ partial(convert_dict, 'class'), partial(load_class_images, img_data, class_dict), partial(extract_episode, n_support, n_query) ] if opt['data.cuda']: transforms.append(CudaTransform()) class_names = [key for key in class_dict] transforms = compose(transforms) ds = TransformDataset(ListDataset(class_names), transforms) if opt['data.sequential']: sampler = SequentialBatchSampler(len(ds)) else: sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes) # use num_workers=0, otherwise may receive duplicate episodes ret[split] = torch.utils.data.DataLoader(ds, batch_sampler=sampler, num_workers=0) return ret
def load(opt, splits): split_dir = os.path.join(MINIIMAGENET_DATA_DIR, 'splits', opt['data.split']) ret = {} for split in splits: if split in ['val', 'test'] and opt['data.test_way'] != 0: n_way = opt['data.test_way'] else: n_way = opt['data.way'] if split in ['val', 'test'] and opt['data.test_shot'] != 0: n_support = opt['data.test_shot'] else: n_support = opt['data.shot'] if split in ['val', 'test'] and opt['data.test_query'] != 0: n_query = opt['data.test_query'] else: n_query = opt['data.query'] if split in ['val', 'test']: n_episodes = opt['data.test_episodes'] else: n_episodes = opt['data.train_episodes'] class_index = defaultdict(list) with open(os.path.join(split_dir, "{:s}.csv".format(split)), 'r') as f: f.readline() for image_class in f.readlines(): image, class_name = image_class.split(',') class_name = class_name.rstrip('\n') class_index[class_name].append(image) class_names = list(class_index.keys()) transforms = [ partial(convert_dict, 'class'), partial(load_class_images, class_index), partial(extract_episode, n_support, n_query) ] if opt['data.cuda']: transforms.append(CudaTransform()) transforms = compose(transforms) ds = TransformDataset(ListDataset(class_names), transforms) if opt['data.sequential']: sampler = SequentialBatchSampler(len(ds)) else: sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes) # use num_workers=0, otherwise may receive duplicate episodes ret[split] = torch.utils.data.DataLoader(ds, batch_sampler=sampler, num_workers=0) return ret
def load(opt, splits): split_dir = os.path.join(OMNIGLOT_DATA_DIR, 'splits', opt['data.split']) ret = { } for split in splits: # 获取n_way if split in ['val', 'test'] and opt['data.test_way'] != 0: n_way = opt['data.test_way'] else: n_way = opt['data.way'] # 获取support的数量 if split in ['val', 'test'] and opt['data.test_shot'] != 0: n_support = opt['data.test_shot'] else: n_support = opt['data.shot'] # 获取query的数量 if split in ['val', 'test'] and opt['data.test_query'] != 0: n_query = opt['data.test_query'] else: n_query = opt['data.query'] # 获取episode if split in ['val', 'test']: n_episodes = opt['data.test_episodes'] else: n_episodes = opt['data.train_episodes'] # 定义了三个函数:class字典,加载类的一张图片,取一个episode的数据 transforms = [partial(convert_dict, 'class'), # 取key是class的字典内容 load_class_images, # 取一个类中的一条数据 partial(extract_episode, n_support, n_query)] # 获取每个类的support和query if opt['data.cuda']: transforms.append(CudaTransform()) transforms = compose(transforms) class_names = [] # 按照分割数据集的方式,获取相应的所有类名 with open(os.path.join(split_dir, "{:s}.txt".format(split)), 'r') as f: for class_name in f.readlines(): class_names.append(class_name.rstrip('\n')) # 对所有类划分support和query数据集 ds = TransformDataset(ListDataset(class_names), transforms) if opt['data.sequential']: sampler = SequentialBatchSampler(len(ds)) # 每个episode随机取n_way个类别 else: sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes) # 封装数据,数据划分为多个episode # use num_workers=0, otherwise may receive duplicate episodes ret[split] = torch.utils.data.DataLoader(ds, batch_sampler=sampler, num_workers=0) return ret
def testCompose(self): def f1(x): return x + 1 def f2(x): return x + 2 def f3(x): return x / 2 self.assertEqual(transform.compose([f1, f2, f3])(1), 2)
def load(opt, splits): split_dir = os.path.join(OMNIGLOT_DATA_DIR, 'splits', opt['data.split']) ret = {} for split in splits: if split in ['val', 'test'] and opt['data.test_way'] != 0: n_way = opt['data.test_way'] else: n_way = opt['data.way'] if split in ['val', 'test'] and opt['data.test_shot'] != 0: n_support = opt['data.test_shot'] else: n_support = opt['data.shot'] if split in ['val', 'test'] and opt['data.test_query'] != 0: n_query = opt['data.test_query'] else: n_query = opt['data.query'] if split in ['val', 'test']: n_episodes = opt['data.test_episodes'] else: n_episodes = opt['data.train_episodes'] transforms = [ partial(convert_dict, 'class'), load_class_images, partial(extract_episode, n_support, n_query) ] if opt['data.cuda']: transforms.append(CudaTransform()) transforms = compose(transforms) class_names = [] with open(os.path.join(split_dir, "{:s}.txt".format(split)), 'r') as f: for class_name in f.readlines(): class_names.append(class_name.rstrip('\n')) ds = TransformDataset(ListDataset(class_names), transforms) if opt['data.sequential']: sampler = SequentialBatchSampler(len(ds)) else: sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes) # use num_workers=0, otherwise may receive duplicate episodes ret[split] = torch.utils.data.DataLoader(ds, batch_sampler=sampler, num_workers=0) print("Ret:", type(ret)) for key, value in ret.items(): print(key, type(value)) return ret
def load(opt, splits): ret = {} for split in splits: if split in ['val', 'test'] and opt['data.test_way'] != 0: n_way = opt['data.test_way'] else: n_way = opt['data.way'] if split in ['val', 'test'] and opt['data.test_shot'] != 0: n_support = opt['data.test_shot'] else: n_support = opt['data.shot'] if split in ['val', 'test'] and opt['data.test_query'] != 0: n_query = opt['data.test_query'] else: n_query = opt['data.query'] if split in ['val', 'test']: n_episodes = opt['data.test_episodes'] else: n_episodes = opt['data.train_episodes'] speaker_ids = dataset[split]['class'] data_split = dataset[split]['data'] transforms = [ partial(convert_dict, 'class'), partial(extract_episode, 'class', data_split, opt['data.min_len'], opt['data.max_len'], n_support, n_query), partial(convert_tensor, ['xq_padded', 'xs_padded', 'xq_len', 'xs_len']) ] if opt['data.cuda']: transforms.append(CudaTransform()) transforms = compose(transforms) ds = TransformDataset(ListDataset(speaker_ids), transforms) #sampler = SequencialEpisodicBatchSampler(len(ds), n_way) if opt['data.sequential']: sampler = SequentialBatchSampler(len(ds)) else: sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes) ret[split] = torch.utils.data.DataLoader(ds, batch_sampler=sampler, num_workers=0) return ret
def __iter__(self): if self.dataset is None: self.dataset = self.load_dataset(from_disk=True)[self.split] transforms = [partial(batch_from_index, self.dataset['data']), partial(convert_tensor, 'data')] if self.if_cuda: transforms.append(CudaTransform()) self.transforms = compose(transforms) index_batches = self.shuffle_dataset() batches = TransformDataset(ListDataset(index_batches), self.transforms) print(f"\nSize of batches: {len(batches)}") for batch in batches: batch['n_way'] = self.n_way batch['n_support'] = self.n_support batch['n_query'] = self.n_query yield batch
def loader(opt): split_dir = os.path.join(opt.split_dir, opt.split_name) if opt.state == 'train': splits = opt.train_split_mode else: splits = ['test'] ret = { } for split in splits: if split in ['val', 'test']: n_way = opt.test_way n_support = opt.test_shot n_query = opt.test_query n_episodes = opt.test_episodes else: n_way = opt.train_way n_support = opt.train_shot n_query = opt.train_query n_episodes = opt.train_episodes transforms = [partial(convert_dict, 'class'), partial(load_class_images, opt.dataset_dir), partial(extract_episode, n_support, n_query)] if opt.cuda: transforms.append(CudaTransform()) transforms = compose(transforms) class_names = [] with open(os.path.join(split_dir, "{:s}.txt".format(split)), 'r') as f: for class_name in f.readlines(): class_names.append(class_name.rstrip('\n')) ds = TransformDataset(ListDataset(class_names), transforms) if opt.sequential: sampler = SequentialBatchSampler(len(ds)) else: sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes) # use num_workers=0, otherwise may receive duplicate episodes ret[split] = torch.utils.data.DataLoader(ds, batch_sampler=sampler, num_workers=0) return ret
def load_class_images(d): label, rot = d['class'], -1 if 'rot' in d['class']: label, rot = d['class'].split('/rot') rot = int(rot) if label not in MINIIMAGENET_CACHE: image_dir = os.path.join(MINIIMAGENET_DATA_DIR, 'data', label) class_images = sorted(glob.glob(os.path.join(image_dir, '*.jpg'))) if len(class_images) == 0: raise Exception( "No images found for miniimagenet class {} at {}.".format( label, image_dir)) image_ds = TransformDataset( ListDataset(class_images), compose([ partial(convert_dict, 'file_name'), partial(load_image_path, 'file_name', 'data'), partial(scale_image, 'data', 84, 84), partial(convert_tensor, 'data') # partial(normalize_image, 'data', {'mean': (0.47234195 0.45386744 0.41036746), # 'std': (0.28678342 0.27806091 0.29304931)}) ])) loader = torch.utils.data.DataLoader(image_ds, batch_size=len(image_ds), shuffle=False) for sample in loader: MINIIMAGENET_CACHE[label] = sample['data'] break # only need one sample because batch size equal to dataset length samples = MINIIMAGENET_CACHE[label] # Rotates images if needed if rot != -1: nRot = rot // 90 samples = torch.rot90(samples.cuda(), nRot, dims=[2, 3]).cpu() return {'class': d['class'], 'data': samples}
def load_class_images(dataset, index_set, d): if d['class'] not in MINI_IMGNET_CACHE: image_ds = TransformDataset( ListDataset(index_set[d['class']]), compose([ partial(convert_dict, 'img_idx'), partial(load_image, dataset, 'img_idx', 'data'), #partial(rotate_image, 'data', float(rot[3:])), partial(scale_image, 'data', 84, 84), partial(convert_tensor, 'data') ])) loader = torch.utils.data.DataLoader(image_ds, batch_size=len(image_ds), shuffle=False) for sample in loader: MINI_IMGNET_CACHE[d['class']] = sample['data'] break # only need one sample because batch size equal to dataset length return {'class': d['class'], 'data': MINI_IMGNET_CACHE[d['class']]}
def _setup_class_omniglot(split, d, cache, init_entry, crop_transforms, target_size, root_dir, augm_opt): alphabet, character, rot = d['class'].split('/') image_dir = os.path.join(root_dir, 'omniglot', 'data', alphabet, character) if augm_opt['rotation']: rotation_f = partial(utils.rotate_image, 'data', float(rot[3:])) else: rotation_f = partial(utils.nop) print( 'WARNING - rotation augmentation is the default protocol for Omniglot' ) if augm_opt['crop']: crop_f = partial(utils.crop, 'data', crop_transforms, augm_opt['max_crop_shrink']) else: crop_f = partial(utils.nop) image_ds = TransformDataset( ListDataset(sorted(glob.glob(os.path.join(image_dir, '*.png')))), compose([ partial(base.convert_dict, 'file_name'), partial(utils.load_image_path, 'file_name', 'data'), rotation_f, crop_f, partial(utils.scale_image, 'data', target_size, target_size), partial(utils.convert_tensor, 'data'), ])) loader = torch.utils.data.DataLoader(image_ds, batch_size=len(image_ds), shuffle=False) for sample in loader: if init_entry: cache.data[d['class']] = [] cache.data[d['class']].append(sample['data']) break # only need one sample because batch size equal to dataset length
def load_class_nlp(corpus, d): if d['class'] not in NLP_CACHE: class_corpus = list(filter(lambda x: x.domain == d["class"], corpus)) image_ds = TransformDataset( ListDataset(class_corpus), compose([ partial(convert_corpus, 'data'), partial(lookup, vocab, 'data'), partial(pad_text, 'data', 28), partial(convert_tensor, 'data') ])) loader = torch.utils.data.DataLoader(image_ds, batch_size=len(image_ds), shuffle=False) for sample in loader: NLP_CACHE[d['class']] = sample['data'] break # only need one sample because batch size equal to dataset length return {'class': d['class'], 'data': NLP_CACHE[d['class']]}
def load_class_audio(split, d): class_audio = dataset[split][d['class']] if len(class_audio) == 0: raise Exception(f"No audio found for speaker {d['class']}") audio_ds = TransformDataset( ListDataset(class_audio), compose([ partial(convert_dict, 'file_name'), partial(extract_audio_mfcc, d['class'], 'file_name', 'data') ])) loader = torch.utils.data.DataLoader(audio_ds, batch_size=len(audio_ds), shuffle=False) for sample in loader: data = sample break # only need one sample because batch size equal to dataset length return {'class': d['class'], 'data': data}
def _setup_class_miniimagenet(split, d, cache, init_entry, crop_transforms, target_size, root_dir, augm_opt): image_dir = os.path.join(root_dir, 'miniimagenet', 'data', d['class']) if augm_opt['rotation']: raise ValueError( 'Augmentation with rotation not implemented for miniimagenet') if augm_opt['crop']: crop_f = partial(utils.crop, 'data', crop_transforms, augm_opt['max_crop_shrink']) scale_f = partial(utils.scale_image, 'data', target_size, target_size) else: crop_f = partial(utils.nop) scale_f = partial(utils.nop) image_ds = TransformDataset( ListDataset(sorted(glob.glob(os.path.join(image_dir, '*.jpg')))), compose([ partial(base.convert_dict, 'file_name'), partial(utils.load_image_path, 'file_name', 'data'), crop_f, scale_f, partial(utils.to_tensor, 'data'), # partial(utils.normalize_mini_image, 'data') ])) loader = torch.utils.data.DataLoader(image_ds, batch_size=len(image_ds), shuffle=False) for sample in loader: if init_entry: cache.data[d['class']] = [] cache.data[d['class']].append(sample['data']) break # only need one sample because batch size equal to dataset length
def load_class_images(class_index, d): if d['class'] not in MINIIMAGENET_CACHE: class_id = d['class'] image_dir = os.path.join(MINIIMAGENET_DATA_DIR, 'data', class_id) class_images = [ os.path.join(image_dir, 'images', img) for img in class_index[d['class']] ] if len(class_images) == 0: raise Exception("No images found for class %s." % d['class']) for image_path in class_images: if not os.path.exists(image_path): extract_images(class_index[d['class']], image_dir) break image_ds = TransformDataset( ListDataset(class_images), compose([ partial(convert_dict, 'file_name'), partial(load_image_path, 'file_name', 'data'), #partial(rotate_image, 'data', float(rot[3:])), partial(scale_image, 'data', 84, 84), partial(convert_tensor, 'data') ])) loader = torch.utils.data.DataLoader(image_ds, batch_size=len(image_ds), shuffle=False) for sample in loader: MINIIMAGENET_CACHE[d['class']] = sample['data'] break # only need one sample because batch size equal to dataset length return {'class': d['class'], 'data': MINIIMAGENET_CACHE[d['class']]}
def testCompose(self): f1 = lambda x: x + 1 f2 = lambda x: x + 2 f3 = lambda x: x / 2 self.assertEqual(transform.compose([f1, f2, f3])(1), 2)
def load_data(opt, splits): global root_dir root_dir = opt['data.root_dir'] augm_opt = filter_opt(opt, 'augm') dataset = opt['data.dataset'] split_dir = os.path.join(opt['data.root_dir'], opt['data.dataset'], 'splits', opt['data.split']) ret = {} # cache = {} cache = Cache() for split in splits: if split in ['val1', 'val5', 'test']: n_way = opt['data.test_way'] else: n_way = opt['data.way'] if split in ['train', 'trainval']: # random shots SE = SetupEpisode(batch_size=opt['data.batch_size'], shot_max=opt['data.shot_max'], fixed_shot=opt['data.shot'], way_min=opt['data.way_min'], fixed_way=n_way) elif split == 'val1': SE = SetupEpisode(batch_size=opt['data.batch_size'], shot_max=opt['data.shot_max'], fixed_shot=1, way_min=opt['data.way_min'], fixed_way=n_way) elif split == 'val5': SE = SetupEpisode(batch_size=opt['data.batch_size'], shot_max=opt['data.shot_max'], fixed_shot=5, way_min=opt['data.way_min'], fixed_way=n_way) else: SE = SetupEpisode(batch_size=opt['data.batch_size'], shot_max=opt['data.shot_max'], fixed_shot=opt['data.test_shot'], way_min=opt['data.way_min'], fixed_way=n_way) if split in ['val1', 'val5', 'test']: n_episodes = opt['data.test_episodes'] else: n_episodes = opt['data.train_episodes'] transforms = [ partial(convert_dict, 'class'), partial(load_class_images, split, dataset, cache, augm_opt), partial(extract_episode, SE, augm_opt) ] if opt['data.cuda']: transforms.append(CudaTransform()) transforms = compose(transforms) class_names = [] split_file = 'val.txt' if split in ['val1', 'val5' ] else "{:s}.txt".format(split) with open(os.path.join(split_dir, split_file), 'r') as f: for class_name in f.readlines(): class_names.append(class_name.rstrip('\n')) ds = TransformDataset(ListDataset(class_names), transforms) sampler = EpisodicBatchSampler(SE, len(ds), n_episodes) # use num_workers=0, otherwise may receive duplicate episodes ret[split] = torch.utils.data.DataLoader(ds, batch_sampler=sampler, num_workers=0) return ret
def load_kws(opt, splits): #split_dir = os.path.join(KWS_DATA_DIR, 'splits', opt['data.split']) dataset_self = {} if splits[0] == 'test': files = sorted(os.listdir(KWS_DATA_DIR_TEST)) class_names = [] for file in files: class_name = file.split('_')[0] if not class_names.__contains__(class_name): class_names.append(class_name) dataset_self['test'] = class_names data_dir = KWS_DATA_DIR_TEST else: data_dir = KWS_DATA_DIR files = sorted(os.listdir(KWS_DATA_DIR)) val_class_names = [ 'label01', 'label13', 'label03', 'label13', 'label03', 'label13', 'label03', 'label03' ] class_names = [] for file in files: class_name = file.split('_')[0] if not class_names.__contains__( class_name) and not val_class_names.__contains__( class_name): class_names.append(class_name) train_data = {} for name in class_names: name_files = [] for file in files: if file.__contains__(name): name_files.append(file) train_data[name] = name_files val_data = {} for name in val_class_names: name_files = [] for file in files: if file.__contains__(name): name_files.append(file) val_data[name] = name_files dataset_self['train'] = class_names dataset_self['val'] = val_class_names ret = {} for split in splits: if split in ['val', 'test'] and opt['data.test_way'] != 0: n_way = opt['data.test_way'] else: n_way = opt['data.way'] if split in ['val', 'test'] and opt['data.test_shot'] != 0: n_support = opt['data.test_shot'] else: n_support = opt['data.shot'] if split in ['val', 'test'] and opt['data.test_query'] != 0: n_query = opt['data.test_query'] else: n_query = opt['data.query'] if split in ['val', 'test']: n_episodes = opt['data.test_episodes'] else: n_episodes = opt['data.train_episodes'] transforms = [ partial(convert_dict, 'class'), partial(load_class_features, data_dir), partial(extract_episode, n_support, n_query) ] if opt['data.cuda']: transforms.append(CudaTransform()) transforms = compose(transforms) ds = TransformDataset(ListDataset(dataset_self[split]), transforms) if opt['data.sequential']: sampler = SequentialBatchSampler(len(ds)) else: sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes) # use num_workers=0, otherwise may receive duplicate episodes ret[split] = torch.utils.data.DataLoader(ds, batch_sampler=sampler, num_workers=0) return ret
def testCompose(self): self.assertEqual( transform.compose( [lambda x: x + 1, lambda x: x + 2, lambda x: x / 2])(1), 2)
def load(opt, splits): split_dir = os.path.join(MINIIMAGENET_DATA_DIR, 'splits', opt['data.split']) ret = {} for split in splits: if split in ['val', 'test'] and opt['data.test_way'] != 0: n_way = opt['data.test_way'] else: n_way = opt['data.way'] if split in ['val', 'test'] and opt['data.test_shot'] != 0: n_support = opt['data.test_shot'] else: n_support = opt['data.shot'] if split in ['val', 'test'] and opt['data.test_query'] != 0: n_query = opt['data.test_query'] else: n_query = opt['data.query'] if split in ['val', 'test']: n_episodes = opt['data.test_episodes'] else: n_episodes = opt['data.train_episodes'] transforms = [ partial(convert_dict, 'class'), load_class_images, partial(extract_episode, n_support, n_query) ] if opt['data.cuda']: transforms.append(CudaTransform()) transforms = compose(transforms) class_names = [] with open(os.path.join(split_dir, "{:s}.csv".format(split)), 'r') as f: for class_name in f.readlines(): name = class_name.split(',')[1].rstrip('\n') if name == 'label': continue if opt['data.augmented']: class_names.extend([ name + '/rot000', name + '/rot090', name + '/rot180', name + '/rot270' ]) else: class_names.append(name) ds = TransformDataset(ListDataset(class_names), transforms) if opt['data.sequential']: sampler = SequentialBatchSampler(len(ds)) else: sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes) # use num_workers=0, otherwise may receive duplicate episodes ret[split] = torch.utils.data.DataLoader(ds, batch_sampler=sampler, num_workers=0) return ret