def __init__(self, num_images, category, lmdb_dir, batch_size, episode_len=20, episode_shift=10, shuffle=True, seed=None, save_to_dir=None, class_mode='categorical', save_prefix='', save_format='jpeg'): self.category = category self.batch_size = batch_size self.lmdb_dir = lmdb_dir self.episode_len = episode_len self.episode_shift = episode_shift self.class_mode = class_mode self.save_to_dir = save_to_dir self.save_prefix = save_prefix self.save_format = save_format print("Initializing Iterator " + category + " Number of images " + str(num_images)) #print(category, lmdb_dir, batch_size, shuffle, seed) self.env = lmdb.open(lmdb_dir, readonly=True) Iterator.__init__(self, num_images, batch_size, shuffle, seed)
def __init__(self, bson_file, images_df, offsets_df, num_class, image_data_generator, lock, target_size=(180, 180), with_labels=True, batch_size=32, shuffle=False, seed=None): self.images_df = images_df self.offsets_df = offsets_df self.with_labels = with_labels self.samples = len(images_df) self.num_class = num_class self.file = bson_file self.image_data_generator = image_data_generator self.target_size = tuple(target_size) self.image_shape = self.target_size + (3,) Iterator.__init__(self.samples, batch_size, shuffle, seed)# initilize for Iterator self.lock = lock
def flow_from_directory(self, batch_size = 32, shuffle = True, seed = 42): self.regressIter = Iterator(len(self.filenames), batch_size = batch_size, shuffle = shuffle, seed = seed) if self.do_augmentation: factor = 3 else: factor = 1 self.steps = math.ceil(len(self.filenames)/batch_size) * factor return self
def __init__(self, memmap_path, memmap_shape, images_df, num_classes=None, batch_size=32, shuffle=True, seed=None, pool_wrokers=4, only_single=False, include_singles=True, max_images=2, use_side_input=True): if seed: np.random.seed(seed) self.x = np.memmap(memmap_path, dtype=np.float32, mode='r', shape=memmap_shape) self.images_df = images_df.sort_values('product_id') self.images_df_index = np.copy(self.images_df.index.values) self.images_df_num_imgs = np.copy(self.images_df.num_imgs.as_matrix()) self.images_df_img_idx = np.copy(self.images_df.img_idx.as_matrix()) self.has_y = 'category_idx' in images_df.columns if self.has_y: self.images_df_category_idx = np.copy(self.images_df.category_idx.as_matrix()) self.num_classes = num_classes self.batch_size = batch_size self.shuffle = shuffle self.max_images = max_images self.use_side_input = use_side_input self.smpls = [] cur_index = [] prev_product_id = -1 for i, row in enumerate( itertools.chain(self.images_df.itertuples(), [namedtuple('Pandas', ['Index', 'product_id'])(0, 0)])): if prev_product_id != -1 and row.product_id != prev_product_id: if include_singles or len(cur_index) == 1: self.smpls.extend([[idx] for idx in cur_index]) if len(cur_index) > 1 and not only_single: self.smpls.append(cur_index) cur_index = [] prev_product_id = row.product_id cur_index.append(i) del self.images_df self.samples = len(self.smpls) self.rnd = np.random.RandomState(seed) self.it = Iterator(self.samples, self.batch_size, self.shuffle, seed) self.queue = Queue(maxsize=40) self.stop_flag = False self.threads = [] for i in range(pool_wrokers): thread = Thread(target=self.read_batches) thread.start() self.threads.append(thread)
def __init__(self, memmap_path, memmap_shape, images_df, num_classes=None, batch_size=32, shuffle=True, seed=None, pool_wrokers=4, use_side_input=False): if seed: np.random.seed(seed) self.x = np.memmap(memmap_path, dtype=np.float32, mode='r', shape=memmap_shape) self.images_df = images_df self.images_df_index = np.copy(self.images_df.index.values) self.images_df_num_imgs = np.copy(self.images_df.num_imgs.as_matrix()) self.images_df_img_idx = np.copy(self.images_df.img_idx.as_matrix()) self.has_y = 'category_idx' in images_df.columns if self.has_y: self.images_df_category_idx = np.copy( self.images_df.category_idx.as_matrix()) del self.images_df self.num_classes = num_classes self.batch_size = batch_size self.shuffle = shuffle self.use_side_input = use_side_input self.samples = len(self.images_df_index) self.it = Iterator(self.samples, self.batch_size, self.shuffle, seed) self.queue = Queue(maxsize=40) self.stop_flag = False self.threads = [] for i in range(pool_wrokers): thread = Thread(target=self.read_batches) thread.start() self.threads.append(thread)
def get_chunk(self): self.chunk_idx += 1 if CHUNK_SIZE * self.chunk_idx >= len(self.x_idxs): self.chunk_idx = 0 self.next_idx = self.chunk_idx + 1 if CHUNK_SIZE * self.next_idx >= len(self.x_idxs): self.next_idx = 0 idxs = self.x_idxs[(CHUNK_SIZE * self.chunk_idx):(CHUNK_SIZE * self.chunk_idx + CHUNK_SIZE)] self.thread.join() self.chunk_x = self.preload_x if self.side_x is not None: self.chunk_side_x = self.preload_side_x self.thread = threading.Thread(target=self.preload, args=(self.next_idx, )) self.thread.start() if self.y is not None: self.chunk_y = self.y[(CHUNK_SIZE * self.chunk_idx):( CHUNK_SIZE * self.chunk_idx + CHUNK_SIZE)] self.chunk_seen = 0 self.it = Iterator(len(idxs), self.batch_size, self.shuffle, None)
def __init__(self, image_lists, image_data_generator, category, image_dir, target_size=(256, 256, 3), color_mode='rgb', class_mode='categorical', batch_size=32, episode_len=20, episode_shift=10, shuffle=True, seed=None, data_format=None, save_to_dir=None, save_prefix='', save_format='jpeg', dtype=K.floatx() ): if data_format is None: data_format = K.image_data_format() classes = list(image_lists.keys()) self.category = category self.num_class = len(classes) self.image_lists = image_lists self.image_dir = image_dir self.episode_len = episode_len self.episode_shift = episode_shift how_many_files = 0 for label_name in classes: for _ in self.image_lists[label_name][category]: how_many_files += 1 self.samples = how_many_files self.class2id = dict(zip(classes, range(len(classes)))) self.id2class = dict((v, k) for k, v in self.class2id.items()) self.classes = np.zeros((self.samples,), dtype='int32') self.image_data_generator = image_data_generator self.target_size = tuple(target_size) if color_mode not in {'rgb', 'grayscale'}: raise ValueError('Invalid color mode:', color_mode, '; expected "rgb" or "grayscale".') self.color_mode = color_mode self.data_format = data_format self.image_shape = self.target_size if (class_mode not in {'categorical', 'binary', 'sparse', 'episode', None}) and (not hasattr(class_mode, '__call__')): raise ValueError('Invalid class_mode:', class_mode, '; expected one of "categorical", ' '"binary", "sparse", "episode", or None.') self.class_mode = class_mode self.dtype = dtype self.save_to_dir = save_to_dir self.save_prefix = save_prefix self.save_format = save_format i = 0 self.filenames = [] for label_name in classes: for j, _ in enumerate(self.image_lists[label_name][category]): self.classes[i] = self.class2id[label_name] img_path = get_file_path(self.image_lists, label_name, j, self.image_dir, self.category) self.filenames.append(img_path) i += 1 log_message("Found {} {} files".format(len(self.filenames), category), logging.INFO) Iterator.__init__(self, self.samples, batch_size, shuffle, seed)