Ejemplo n.º 1
0
 def __init__(self, data_path, batch_size=100):
     self.data_dir = data_path
     self.batch_size = batch_size
     self.train_image, self.train_label, self.test_image, self.test_label = read_npys(
         self.data_dir)
     self.train_image, self.train_label = shuffle_image_label(
         self.train_image, self.train_label)
     self.test_image, self.test_label = shuffle_image_label(
         self.test_image, self.test_label)
     split_rate = [0.8]
     splited_images = split_array(self.train_image, num=2, rate=split_rate)
     splited_labels = split_array(self.train_label, num=2, rate=split_rate)
     self.train_image = splited_images[0]
     self.train_label = splited_labels[0]
     self.val_image = splited_images[1]
     self.val_label = splited_labels[1]
     print np.shape(self.train_image), np.shape(self.val_image), np.shape(
         self.test_image)
     print np.shape(self.train_label), np.shape(self.val_label), np.shape(
         self.test_label)
     self.train_generator = GenerateBatch(
         self.train_image,
         self.train_label,
         self.batch_size,
         epoch_num=None).generate_next_batch()
     self.val_generator = GenerateBatch(
         self.val_image, self.val_label, self.batch_size,
         epoch_num=None).generate_next_batch()
     self.test_generator = GenerateBatch(self.test_image,
                                         self.test_label,
                                         self.batch_size,
                                         epoch_num=1).generate_next_batch()
Ejemplo n.º 2
0
def extract_patches_singledir(data_dir, target_label, patch_size, patch_step, save_dir, multiprocess=8):
    names = os.listdir(data_dir)
    patches = []
    labeles = []
    coding_labeles = []
    if multiprocess is None:
        patches, coding_labeles, labeles = extract_patches_multifiles(data_dir, names, target_label, patch_size,
                                                                      patch_step, None)
    else:
        names_group = split_array(names, multiprocess)
        pool = Pool()
        results = []
        for i in range(multiprocess):
            result = pool.apply_async(extract_patches_multifiles,
                                      (data_dir, names_group[i], target_label, patch_size, patch_step, None,))
            results.append(result)
        pool.close()
        pool.join()
        for i in range(multiprocess):
            cur_patches, cur_coding_labeles, cur_labeles = results[i].get()
            patches.extend(cur_patches)
            labeles.extend(cur_labeles)
            coding_labeles.extend(cur_coding_labeles)

    return patches, coding_labeles, labeles
Ejemplo n.º 3
0
def extract_patches_singledir(
        data_dir,
        target_label,
        patch_size,
        patch_step,
        save_dir,
        multiprocess=8,
        extract_patches_multifiles_function=extract_patches_multifiles_interior
):
    names = os.listdir(data_dir)
    patches = []
    labeles = []
    if multiprocess is None:
        patches, labeles = extract_patches_multifiles_function(
            data_dir, names, target_label, patch_size, patch_step, None)
    else:
        names_group = split_array(names, multiprocess)
        pool = Pool()
        results = []
        for i in range(multiprocess):
            result = pool.apply_async(extract_patches_multifiles_function, (
                data_dir,
                names_group[i],
                target_label,
                patch_size,
                patch_step,
                None,
            ))
            results.append(result)
        pool.close()
        pool.join()
        for i in range(multiprocess):
            try:
                cur_patches, cur_labeles = results[i].get()
                patches.extend(cur_patches)
                labeles.extend(cur_labeles)
            except ValueError:
                pass
    return patches, labeles
Ejemplo n.º 4
0
    def __init__(self,
                 data_dir,
                 batch_size,
                 reshape_flag,
                 is_fine=True,
                 one_hot=True):
        '''
        加载cifar数据
        :param data_dir: Cifar100数据所在的文件夹,里面应该包括三个文件meta, train, test
        :param batch_size: 返回的一个batch的数据大小
        :param reshape_flag: 是否reshape,因为默认的是列向量
        :param is_fine: cifar100有两个label, 一个是fine一个是coarse,前者是小类别,共有100个,后者是大类别,共有20个
        '''
        self.file_pathes = []
        self.batch_size = batch_size
        self.training_file_path = os.path.join(data_dir, 'train')
        self.testing_file_path = os.path.join(data_dir, 'test')
        if (not os.path.exists(self.testing_file_path)) or (not os.path.exists(
                self.training_file_path)):
            print 'Please download the fully dataset'
            return
        # coarse_labels——一个范围在0 - 19
        # 的包含n个元素的列表, 对应图像的大类别
        # fine_labels——一个范围在0 - 99
        # 的包含n个元素的列表, 对应图像的小类别
        self.training_data, self.training_fine_labels, self.training_coarse_labels, self.training_filenames = self.generate_training_dataset(
        )
        self.testing_data, self.testing_fine_labels, self.testing_coarse_labels, self.testing_filenames = self.generate_testing_dataset(
        )
        if one_hot:
            self.training_fine_labels = indices_to_one_hot(
                self.training_fine_labels, 100)
            self.training_coarse_labels = indices_to_one_hot(
                self.training_coarse_labels, 20)
            self.testing_fine_labels = indices_to_one_hot(
                self.testing_fine_labels, 100)
            self.testing_coarse_labels = indices_to_one_hot(
                self.testing_coarse_labels, 20)

        split_rate = [0.8]
        data_splited = split_array(self.training_data, 2, split_rate)
        fine_labels_splited = split_array(self.training_fine_labels, 2,
                                          split_rate)
        coarse_labels_splited = split_array(self.training_coarse_labels, 2,
                                            split_rate)
        filenames_splited = split_array(self.training_filenames, 2, split_rate)
        self.training_data = data_splited[0]
        self.training_fine_labels = fine_labels_splited[0]
        self.training_coarse_labels = coarse_labels_splited[0]
        self.training_filenames = filenames_splited[0]
        self.val_data = data_splited[1]
        self.val_fine_labels = fine_labels_splited[1]
        self.val_coarse_labels = coarse_labels_splited[1]
        self.val_filenames = filenames_splited[1]
        if reshape_flag:
            self.training_data = self.reshape(np.array(self.training_data))
            self.val_data = self.reshape(np.array(self.val_data))
            self.testing_data = self.reshape(np.array(self.testing_data))
        if is_fine:
            self.train_generator = GenerateBatch(
                self.training_data,
                self.training_fine_labels,
                self.batch_size,
                epoch_num=None).generate_next_batch()
            self.val_generator = GenerateBatch(
                self.val_data,
                self.val_fine_labels,
                self.batch_size,
                epoch_num=None).generate_next_batch()
            self.test_generator = GenerateBatch(
                self.testing_data,
                self.testing_fine_labels,
                self.batch_size,
                epoch_num=1).generate_next_batch()
        else:
            self.train_generator = GenerateBatch(
                self.training_data,
                self.training_coarse_labels,
                self.batch_size,
                epoch_num=None).generate_next_batch()
            self.val_generator = GenerateBatch(
                self.val_data,
                self.val_coarse_labels,
                self.batch_size,
                epoch_num=None).generate_next_batch()
            self.test_generator = GenerateBatch(
                self.testing_data,
                self.testing_coarse_labels,
                self.batch_size,
                epoch_num=1).generate_next_batch()