def __init__(self, data_dir, class_file, n_support, n_query, cuda, args): self.sample_rate = args['sample_rate'] self.clip_duration_ms = args['clip_duration'] self.window_size_ms = args['window_size'] self.window_stride_ms = args['window_stride'] self.feature_bin_count = args['num_features'] self.foreground_volume = args['foreground_volume'] self.time_shift_ms = args['time_shift'] self.use_background = args['include_background'] self.background_volume = args['bg_volume'] self.background_frequency = args['bg_frequency'] self.desired_samples = int(self.sample_rate * self.clip_duration_ms / 1000) self.silence = args['include_silence'] self.silence_num_samples = args['num_silence'] self.unknown = args['include_unknown'] self.data_cache = {} self.data_dir = data_dir self.class_file = class_file self.n_support = n_support self.n_query = n_query self.background_data = self.load_background_data() self.mfcc = self.build_mfcc_extractor() self.transforms = [ partial(convert_dict, 'class'), self.load_class_samples, self.extract_episode ] if cuda: self.transforms.append(CudaTransform()) self.class_names = self.read() transforms = compose(self.transforms) super().__init__(ListDataset(self.class_names), transforms)
def load(opt, splits): split_dir = os.path.join(MINI_IMGNET_DATA_DIR, 'splits', opt['data.split']) ret = {} for split in splits: if split in ['val', 'test'] and opt['data.test_way'] != 0: n_way = opt['data.test_way'] else: n_way = opt['data.way'] if split in ['val', 'test'] and opt['data.test_shot'] != 0: n_support = opt['data.test_shot'] else: n_support = opt['data.shot'] if split in ['val', 'test'] and opt['data.test_query'] != 0: n_query = opt['data.test_query'] else: n_query = opt['data.query'] if split in ['val', 'test']: n_episodes = opt['data.test_episodes'] else: n_episodes = opt['data.train_episodes'] cache_path = get_cache_path(split) if os.path.exists(cache_path): with open(cache_path, "rb") as f: try: data = pkl.load(f, encoding='bytes') img_data = data[b'image_data'] class_dict = data[b'class_dict'] except: data = pkl.load(f) img_data = data['image_data'] class_dict = data['class_dict'] transforms = [ partial(convert_dict, 'class'), partial(load_class_images, img_data, class_dict), partial(extract_episode, n_support, n_query) ] if opt['data.cuda']: transforms.append(CudaTransform()) class_names = [key for key in class_dict] transforms = compose(transforms) ds = TransformDataset(ListDataset(class_names), transforms) if opt['data.sequential']: sampler = SequentialBatchSampler(len(ds)) else: sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes) # use num_workers=0, otherwise may receive duplicate episodes ret[split] = torch.utils.data.DataLoader(ds, batch_sampler=sampler, num_workers=0) return ret
def load(opt, splits): split_dir = os.path.join(MINIIMAGENET_DATA_DIR, 'splits', opt['data.split']) ret = {} for split in splits: if split in ['val', 'test'] and opt['data.test_way'] != 0: n_way = opt['data.test_way'] else: n_way = opt['data.way'] if split in ['val', 'test'] and opt['data.test_shot'] != 0: n_support = opt['data.test_shot'] else: n_support = opt['data.shot'] if split in ['val', 'test'] and opt['data.test_query'] != 0: n_query = opt['data.test_query'] else: n_query = opt['data.query'] if split in ['val', 'test']: n_episodes = opt['data.test_episodes'] else: n_episodes = opt['data.train_episodes'] class_index = defaultdict(list) with open(os.path.join(split_dir, "{:s}.csv".format(split)), 'r') as f: f.readline() for image_class in f.readlines(): image, class_name = image_class.split(',') class_name = class_name.rstrip('\n') class_index[class_name].append(image) class_names = list(class_index.keys()) transforms = [ partial(convert_dict, 'class'), partial(load_class_images, class_index), partial(extract_episode, n_support, n_query) ] if opt['data.cuda']: transforms.append(CudaTransform()) transforms = compose(transforms) ds = TransformDataset(ListDataset(class_names), transforms) if opt['data.sequential']: sampler = SequentialBatchSampler(len(ds)) else: sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes) # use num_workers=0, otherwise may receive duplicate episodes ret[split] = torch.utils.data.DataLoader(ds, batch_sampler=sampler, num_workers=0) return ret
def load(opt, splits): split_dir = os.path.join(OMNIGLOT_DATA_DIR, 'splits', opt['data.split']) ret = { } for split in splits: # 获取n_way if split in ['val', 'test'] and opt['data.test_way'] != 0: n_way = opt['data.test_way'] else: n_way = opt['data.way'] # 获取support的数量 if split in ['val', 'test'] and opt['data.test_shot'] != 0: n_support = opt['data.test_shot'] else: n_support = opt['data.shot'] # 获取query的数量 if split in ['val', 'test'] and opt['data.test_query'] != 0: n_query = opt['data.test_query'] else: n_query = opt['data.query'] # 获取episode if split in ['val', 'test']: n_episodes = opt['data.test_episodes'] else: n_episodes = opt['data.train_episodes'] # 定义了三个函数:class字典,加载类的一张图片,取一个episode的数据 transforms = [partial(convert_dict, 'class'), # 取key是class的字典内容 load_class_images, # 取一个类中的一条数据 partial(extract_episode, n_support, n_query)] # 获取每个类的support和query if opt['data.cuda']: transforms.append(CudaTransform()) transforms = compose(transforms) class_names = [] # 按照分割数据集的方式,获取相应的所有类名 with open(os.path.join(split_dir, "{:s}.txt".format(split)), 'r') as f: for class_name in f.readlines(): class_names.append(class_name.rstrip('\n')) # 对所有类划分support和query数据集 ds = TransformDataset(ListDataset(class_names), transforms) if opt['data.sequential']: sampler = SequentialBatchSampler(len(ds)) # 每个episode随机取n_way个类别 else: sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes) # 封装数据,数据划分为多个episode # use num_workers=0, otherwise may receive duplicate episodes ret[split] = torch.utils.data.DataLoader(ds, batch_sampler=sampler, num_workers=0) return ret
def load(opt, splits): split_dir = os.path.join(OMNIGLOT_DATA_DIR, 'splits', opt['data.split']) ret = {} for split in splits: if split in ['val', 'test'] and opt['data.test_way'] != 0: n_way = opt['data.test_way'] else: n_way = opt['data.way'] if split in ['val', 'test'] and opt['data.test_shot'] != 0: n_support = opt['data.test_shot'] else: n_support = opt['data.shot'] if split in ['val', 'test'] and opt['data.test_query'] != 0: n_query = opt['data.test_query'] else: n_query = opt['data.query'] if split in ['val', 'test']: n_episodes = opt['data.test_episodes'] else: n_episodes = opt['data.train_episodes'] transforms = [ partial(convert_dict, 'class'), load_class_images, partial(extract_episode, n_support, n_query) ] if opt['data.cuda']: transforms.append(CudaTransform()) transforms = compose(transforms) class_names = [] with open(os.path.join(split_dir, "{:s}.txt".format(split)), 'r') as f: for class_name in f.readlines(): class_names.append(class_name.rstrip('\n')) ds = TransformDataset(ListDataset(class_names), transforms) if opt['data.sequential']: sampler = SequentialBatchSampler(len(ds)) else: sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes) # use num_workers=0, otherwise may receive duplicate episodes ret[split] = torch.utils.data.DataLoader(ds, batch_sampler=sampler, num_workers=0) print("Ret:", type(ret)) for key, value in ret.items(): print(key, type(value)) return ret
def load(opt, splits): ret = {} for split in splits: if split in ['val', 'test'] and opt['data.test_way'] != 0: n_way = opt['data.test_way'] else: n_way = opt['data.way'] if split in ['val', 'test'] and opt['data.test_shot'] != 0: n_support = opt['data.test_shot'] else: n_support = opt['data.shot'] if split in ['val', 'test'] and opt['data.test_query'] != 0: n_query = opt['data.test_query'] else: n_query = opt['data.query'] if split in ['val', 'test']: n_episodes = opt['data.test_episodes'] else: n_episodes = opt['data.train_episodes'] speaker_ids = dataset[split]['class'] data_split = dataset[split]['data'] transforms = [ partial(convert_dict, 'class'), partial(extract_episode, 'class', data_split, opt['data.min_len'], opt['data.max_len'], n_support, n_query), partial(convert_tensor, ['xq_padded', 'xs_padded', 'xq_len', 'xs_len']) ] if opt['data.cuda']: transforms.append(CudaTransform()) transforms = compose(transforms) ds = TransformDataset(ListDataset(speaker_ids), transforms) #sampler = SequencialEpisodicBatchSampler(len(ds), n_way) if opt['data.sequential']: sampler = SequentialBatchSampler(len(ds)) else: sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes) ret[split] = torch.utils.data.DataLoader(ds, batch_sampler=sampler, num_workers=0) return ret
def __iter__(self): if self.dataset is None: self.dataset = self.load_dataset(from_disk=True)[self.split] transforms = [partial(batch_from_index, self.dataset['data']), partial(convert_tensor, 'data')] if self.if_cuda: transforms.append(CudaTransform()) self.transforms = compose(transforms) index_batches = self.shuffle_dataset() batches = TransformDataset(ListDataset(index_batches), self.transforms) print(f"\nSize of batches: {len(batches)}") for batch in batches: batch['n_way'] = self.n_way batch['n_support'] = self.n_support batch['n_query'] = self.n_query yield batch
def load_kws(opt, splits): #split_dir = os.path.join(KWS_DATA_DIR, 'splits', opt['data.split']) dataset_self = {} if splits[0] == 'test': files = sorted(os.listdir(KWS_DATA_DIR_TEST)) class_names = [] for file in files: class_name = file.split('_')[0] if not class_names.__contains__(class_name): class_names.append(class_name) dataset_self['test'] = class_names data_dir = KWS_DATA_DIR_TEST else: data_dir = KWS_DATA_DIR files = sorted(os.listdir(KWS_DATA_DIR)) val_class_names = [ 'label01', 'label13', 'label03', 'label13', 'label03', 'label13', 'label03', 'label03' ] class_names = [] for file in files: class_name = file.split('_')[0] if not class_names.__contains__( class_name) and not val_class_names.__contains__( class_name): class_names.append(class_name) train_data = {} for name in class_names: name_files = [] for file in files: if file.__contains__(name): name_files.append(file) train_data[name] = name_files val_data = {} for name in val_class_names: name_files = [] for file in files: if file.__contains__(name): name_files.append(file) val_data[name] = name_files dataset_self['train'] = class_names dataset_self['val'] = val_class_names ret = {} for split in splits: if split in ['val', 'test'] and opt['data.test_way'] != 0: n_way = opt['data.test_way'] else: n_way = opt['data.way'] if split in ['val', 'test'] and opt['data.test_shot'] != 0: n_support = opt['data.test_shot'] else: n_support = opt['data.shot'] if split in ['val', 'test'] and opt['data.test_query'] != 0: n_query = opt['data.test_query'] else: n_query = opt['data.query'] if split in ['val', 'test']: n_episodes = opt['data.test_episodes'] else: n_episodes = opt['data.train_episodes'] transforms = [ partial(convert_dict, 'class'), partial(load_class_features, data_dir), partial(extract_episode, n_support, n_query) ] if opt['data.cuda']: transforms.append(CudaTransform()) transforms = compose(transforms) ds = TransformDataset(ListDataset(dataset_self[split]), transforms) if opt['data.sequential']: sampler = SequentialBatchSampler(len(ds)) else: sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes) # use num_workers=0, otherwise may receive duplicate episodes ret[split] = torch.utils.data.DataLoader(ds, batch_sampler=sampler, num_workers=0) return ret
def load(opt, splits): split_dir = os.path.join(MINIIMAGENET_DATA_DIR, 'splits', opt['data.split']) ret = {} for split in splits: if split in ['val', 'test'] and opt['data.test_way'] != 0: n_way = opt['data.test_way'] else: n_way = opt['data.way'] if split in ['val', 'test'] and opt['data.test_shot'] != 0: n_support = opt['data.test_shot'] else: n_support = opt['data.shot'] if split in ['val', 'test'] and opt['data.test_query'] != 0: n_query = opt['data.test_query'] else: n_query = opt['data.query'] if split in ['val', 'test']: n_episodes = opt['data.test_episodes'] else: n_episodes = opt['data.train_episodes'] transforms = [ partial(convert_dict, 'class'), load_class_images, partial(extract_episode, n_support, n_query) ] if opt['data.cuda']: transforms.append(CudaTransform()) transforms = compose(transforms) class_names = [] with open(os.path.join(split_dir, "{:s}.csv".format(split)), 'r') as f: for class_name in f.readlines(): name = class_name.split(',')[1].rstrip('\n') if name == 'label': continue if opt['data.augmented']: class_names.extend([ name + '/rot000', name + '/rot090', name + '/rot180', name + '/rot270' ]) else: class_names.append(name) ds = TransformDataset(ListDataset(class_names), transforms) if opt['data.sequential']: sampler = SequentialBatchSampler(len(ds)) else: sampler = EpisodicBatchSampler(len(ds), n_way, n_episodes) # use num_workers=0, otherwise may receive duplicate episodes ret[split] = torch.utils.data.DataLoader(ds, batch_sampler=sampler, num_workers=0) return ret