def __init__(self, data_path, batch_size=100): self.data_dir = data_path self.batch_size = batch_size self.train_image, self.train_label, self.test_image, self.test_label = read_npys( self.data_dir) self.train_image, self.train_label = shuffle_image_label( self.train_image, self.train_label) self.test_image, self.test_label = shuffle_image_label( self.test_image, self.test_label) split_rate = [0.8] splited_images = split_array(self.train_image, num=2, rate=split_rate) splited_labels = split_array(self.train_label, num=2, rate=split_rate) self.train_image = splited_images[0] self.train_label = splited_labels[0] self.val_image = splited_images[1] self.val_label = splited_labels[1] print np.shape(self.train_image), np.shape(self.val_image), np.shape( self.test_image) print np.shape(self.train_label), np.shape(self.val_label), np.shape( self.test_label) self.train_generator = GenerateBatch( self.train_image, self.train_label, self.batch_size, epoch_num=None).generate_next_batch() self.val_generator = GenerateBatch( self.val_image, self.val_label, self.batch_size, epoch_num=None).generate_next_batch() self.test_generator = GenerateBatch(self.test_image, self.test_label, self.batch_size, epoch_num=1).generate_next_batch()
def extract_patches_singledir(data_dir, target_label, patch_size, patch_step, save_dir, multiprocess=8): names = os.listdir(data_dir) patches = [] labeles = [] coding_labeles = [] if multiprocess is None: patches, coding_labeles, labeles = extract_patches_multifiles(data_dir, names, target_label, patch_size, patch_step, None) else: names_group = split_array(names, multiprocess) pool = Pool() results = [] for i in range(multiprocess): result = pool.apply_async(extract_patches_multifiles, (data_dir, names_group[i], target_label, patch_size, patch_step, None,)) results.append(result) pool.close() pool.join() for i in range(multiprocess): cur_patches, cur_coding_labeles, cur_labeles = results[i].get() patches.extend(cur_patches) labeles.extend(cur_labeles) coding_labeles.extend(cur_coding_labeles) return patches, coding_labeles, labeles
def extract_patches_singledir( data_dir, target_label, patch_size, patch_step, save_dir, multiprocess=8, extract_patches_multifiles_function=extract_patches_multifiles_interior ): names = os.listdir(data_dir) patches = [] labeles = [] if multiprocess is None: patches, labeles = extract_patches_multifiles_function( data_dir, names, target_label, patch_size, patch_step, None) else: names_group = split_array(names, multiprocess) pool = Pool() results = [] for i in range(multiprocess): result = pool.apply_async(extract_patches_multifiles_function, ( data_dir, names_group[i], target_label, patch_size, patch_step, None, )) results.append(result) pool.close() pool.join() for i in range(multiprocess): try: cur_patches, cur_labeles = results[i].get() patches.extend(cur_patches) labeles.extend(cur_labeles) except ValueError: pass return patches, labeles
def __init__(self, data_dir, batch_size, reshape_flag, is_fine=True, one_hot=True): ''' 加载cifar数据 :param data_dir: Cifar100数据所在的文件夹,里面应该包括三个文件meta, train, test :param batch_size: 返回的一个batch的数据大小 :param reshape_flag: 是否reshape,因为默认的是列向量 :param is_fine: cifar100有两个label, 一个是fine一个是coarse,前者是小类别,共有100个,后者是大类别,共有20个 ''' self.file_pathes = [] self.batch_size = batch_size self.training_file_path = os.path.join(data_dir, 'train') self.testing_file_path = os.path.join(data_dir, 'test') if (not os.path.exists(self.testing_file_path)) or (not os.path.exists( self.training_file_path)): print 'Please download the fully dataset' return # coarse_labels——一个范围在0 - 19 # 的包含n个元素的列表, 对应图像的大类别 # fine_labels——一个范围在0 - 99 # 的包含n个元素的列表, 对应图像的小类别 self.training_data, self.training_fine_labels, self.training_coarse_labels, self.training_filenames = self.generate_training_dataset( ) self.testing_data, self.testing_fine_labels, self.testing_coarse_labels, self.testing_filenames = self.generate_testing_dataset( ) if one_hot: self.training_fine_labels = indices_to_one_hot( self.training_fine_labels, 100) self.training_coarse_labels = indices_to_one_hot( self.training_coarse_labels, 20) self.testing_fine_labels = indices_to_one_hot( self.testing_fine_labels, 100) self.testing_coarse_labels = indices_to_one_hot( self.testing_coarse_labels, 20) split_rate = [0.8] data_splited = split_array(self.training_data, 2, split_rate) fine_labels_splited = split_array(self.training_fine_labels, 2, split_rate) coarse_labels_splited = split_array(self.training_coarse_labels, 2, split_rate) filenames_splited = split_array(self.training_filenames, 2, split_rate) self.training_data = data_splited[0] self.training_fine_labels = fine_labels_splited[0] self.training_coarse_labels = coarse_labels_splited[0] self.training_filenames = filenames_splited[0] self.val_data = data_splited[1] self.val_fine_labels = fine_labels_splited[1] self.val_coarse_labels = coarse_labels_splited[1] self.val_filenames = filenames_splited[1] if reshape_flag: self.training_data = self.reshape(np.array(self.training_data)) self.val_data = self.reshape(np.array(self.val_data)) self.testing_data = self.reshape(np.array(self.testing_data)) if is_fine: self.train_generator = GenerateBatch( self.training_data, self.training_fine_labels, self.batch_size, epoch_num=None).generate_next_batch() self.val_generator = GenerateBatch( self.val_data, self.val_fine_labels, self.batch_size, epoch_num=None).generate_next_batch() self.test_generator = GenerateBatch( self.testing_data, self.testing_fine_labels, self.batch_size, epoch_num=1).generate_next_batch() else: self.train_generator = GenerateBatch( self.training_data, self.training_coarse_labels, self.batch_size, epoch_num=None).generate_next_batch() self.val_generator = GenerateBatch( self.val_data, self.val_coarse_labels, self.batch_size, epoch_num=None).generate_next_batch() self.test_generator = GenerateBatch( self.testing_data, self.testing_coarse_labels, self.batch_size, epoch_num=1).generate_next_batch()