def get(self, serialized): """Return image and labels from a serialized str. Parameters ---------- serialized : str The protobuf serialized str. Returns ------- tuple The tuple image and labels. """ # Decode datum = pb.Datum() datum.ParseFromString(serialized) im = np.fromstring(datum.data, np.uint8) if datum.encoded is True: im = cv2.imdecode(im, -1) else: im = im.reshape((datum.height, datum.width, datum.channels)) if datum.channels == 3 and \ self.color_space == 'RGB': im = im[:, :, ::-1] # Labels labels = [] if len(datum.labels) > 0: labels.extend(datum.labels) else: labels.append(datum.label) return self.transform(im), labels
def make_db(image_path, label_path, database_path): if os.path.isfile(label_path) is False: raise ValueError('input path is empty or wrong.') if os.path.isdir(database_path) is True: raise ValueError('the database path is already exist.') print('start time: ', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime())) db = LMDB(max_commit=10000) db.open(database_path, mode='w') total_line = sum(1 for line in open(label_path)) count = 0 zfill_flag = '{0:0%d}' % (ZFILL) encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), 95] start_time = time.time() with open(label_path, 'r') as input_file: for record in input_file: count += 1 if count % 10000 == 0: now_time = time.time() print('{0} / {1} in {2:.2f} sec'.format( count, total_line, now_time - start_time)) db.commit() record = record.split() path = record[0] label = record[1] img = cv2.imread(os.path.join(image_path, path)) result, imgencode = cv2.imencode('.jpg', img, encode_param) datum = caffe_pb2.Datum() datum.height, datum.width, datum.channels = img.shape datum.label = int(label) datum.encoded = True datum.data = imgencode.tostring() db.put(zfill_flag.format(count - 1), datum.SerializeToString()) now_time = time.time() print('{0} / {1} in {2:.2f} sec'.format(count, total_line, now_time - start_time)) db.put('size', wrapper_str(str(count))) db.put('zfill', wrapper_str(str(ZFILL))) db.commit() db.close() shutil.copy(label_path, database_path + '/image_list.txt') end_time = time.time() print('{0} images have been stored in the database.'.format(total_line)) print('This task finishes within {0:.2f} seconds.'.format(end_time - start_time)) print('The size of database is {0} MB.'.format( float(os.path.getsize(database_path + '/data.mdb') / 1000 / 1000)))
def make_db(images_list, database_path, pad=0): if os.path.isdir(database_path) is True: raise ValueError('the database path is already exist.') print('start time: ', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime())) db = LMDB(max_commit=10000) db.open(database_path, mode='w') total_line = len(images_list) count = 0 zfill_flag = '{0:0%d}' % (ZFILL) start_time = time.time() for record in images_list: count += 1 if count % 10000 == 0: now_time = time.time() print('{0} / {1} in {2:.2f} sec'.format(count, total_line, now_time - start_time)) db.commit() img = record[0] label = record[1] if pad > 0: pad_img = np.zeros( (img.shape[0] + 2 * pad, img.shape[1] + 2 * pad, 3), dtype=np.uint8) pad_img[pad:pad + img.shape[0], pad:pad + img.shape[1], :] = img img = pad_img datum = caffe_pb2.Datum() datum.height, datum.width, datum.channels = img.shape datum.label = int(label) datum.encoded = False datum.data = img.tostring() db.put(zfill_flag.format(count - 1), datum.SerializeToString()) now_time = time.time() print('{0} / {1} in {2:.2f} sec'.format(count, total_line, now_time - start_time)) db.put('size', wrapper_str(str(count))) db.put('zfill', wrapper_str(str(ZFILL))) db.commit() db.close() end_time = time.time() print('{0} images have been stored in the database.'.format(total_line)) print('This task finishes within {0:.2f} seconds.'.format(end_time - start_time)) print('The size of database is {0} MB.'.format( float(os.path.getsize(database_path + '/data.mdb') / 1000 / 1000)))
def transform_image_labels(self, serialized): """Get image and labels from a serialized str. Parameters ---------- serialized : str The protobuf serialized str. Returns ------- tuple The tuple image and labels. """ # decode datum = pb.Datum() datum.ParseFromString(serialized) im = np.fromstring(datum.data, np.uint8) if datum.encoded is True: im = cv2.imdecode(im, -1) else: im = im.reshape((datum.height, datum.width, datum.channels)) # random scale random_scale = npr.uniform() * (self._max_random_scale - self._min_random_scale) \ + self._min_random_scale if random_scale != 1.0: new_shape = (int(im.shape[1] * random_scale), int(im.shape[0] * random_scale)) im = PIL.Image.fromarray(im) im = im.resize(new_shape, PIL.Image.BILINEAR) im = np.array(im) # random crop h_off = w_off = 0 if self._crop_size > 0: if self._phase == 'TRAIN': h_off = npr.randint(im.shape[0] - self._crop_size + 1) w_off = npr.randint(im.shape[1] - self._crop_size + 1) else: h_off = (im.shape[0] - self._crop_size) / 2 w_off = (im.shape[1] - self._crop_size) / 2 im = im[h_off:h_off + self._crop_size, w_off:w_off + self._crop_size, :] # random mirror if self._mirror: if npr.randint(0, 2) > 0: im = im[:, ::-1, :] # gray transformation if self._force_color: if im.shape[2] == 1: im = np.concatenate([im, im, im], axis=2) # duplicate to 3 channels # color augmentation if self._color_aug: im = PIL.Image.fromarray(im) delta_brightness = npr.uniform(-0.4, 0.4) + 1.0 delta_contrast = npr.uniform(-0.4, 0.4) + 1.0 delta_saturation = npr.uniform(-0.4, 0.4) + 1.0 im = PIL.ImageEnhance.Brightness(im) im = im.enhance(delta_brightness) im = PIL.ImageEnhance.Contrast(im) im = im.enhance(delta_contrast) im = PIL.ImageEnhance.Color(im) im = im.enhance(delta_saturation) im = np.array(im) # padding if self._padding > 0: pad_img = np.empty((im.shape[0] + 2 * self._padding, im.shape[1] + 2 * self._padding, im.shape[2]), dtype=im.dtype) pad_img.fill(self._fill_value) pad_img[self._padding:self._padding + im.shape[0], self._padding:self._padding + im.shape[1], :] = im im = pad_img im = im.astype(np.float32, copy=False) # mean subtraction if len(self._mean_values) > 0: im = im - self._mean_values # numerical scale if self._scale != 1.0: im = im * self._scale return im, [datum.label]
def get(self, serialized): """Return image and labels from a serialized str. Parameters ---------- serialized : str The protobuf serialized str. Returns ------- tuple The tuple image and labels. """ # decode datum = pb.Datum() datum.ParseFromString(serialized) im = np.fromstring(datum.data, np.uint8) if datum.encoded is True: im = cv2.imdecode(im, -1) else: im = im.reshape((datum.height, datum.width, datum.channels)) # random scale random_scale = npr.uniform() * ( self._max_random_scale - self._min_random_scale) \ + self._min_random_scale if random_scale != 1.0: if sys.version_info >= (3, 0): im = cv2.resize(im, None, interpolation=cv2.INTER_LINEAR, fx=random_scale, fy=random_scale) else: # F**k F**k F**k opencv-python2, it always has a BUG # that leads to duplicate cuDA handles created at gpu:0 new_shape = (int(np.ceil(im.shape[1] * random_scale)), int(np.ceil(im.shape[0] * random_scale))) im = PIL.Image.fromarray(im) im = im.resize(new_shape, PIL.Image.BILINEAR) im = np.array(im) # random crop if self._crop_size > 0: if self._phase == 'TRAIN': h_off = npr.randint(im.shape[0] - self._crop_size + 1) w_off = npr.randint(im.shape[1] - self._crop_size + 1) else: h_off = int((im.shape[0] - self._crop_size) / 2) w_off = int((im.shape[1] - self._crop_size) / 2) im = im[h_off:h_off + self._crop_size, w_off:w_off + self._crop_size, :] # random mirror if self._mirror: if npr.randint(0, 2) > 0: im = im[:, ::-1, :] # gray transformation if self._force_color: if im.shape[2] == 1: # duplicate to 3 channels im = np.concatenate([im, im, im], axis=2) # color augmentation if self._color_aug: im = PIL.Image.fromarray(im) delta_brightness = npr.uniform(-0.4, 0.4) + 1.0 delta_contrast = npr.uniform(-0.4, 0.4) + 1.0 delta_saturation = npr.uniform(-0.4, 0.4) + 1.0 im = PIL.ImageEnhance.Brightness(im) im = im.enhance(delta_brightness) im = PIL.ImageEnhance.Contrast(im) im = im.enhance(delta_contrast) im = PIL.ImageEnhance.Color(im) im = im.enhance(delta_saturation) im = np.array(im) # padding if self._padding > 0: pad_img = np.empty((im.shape[0] + 2 * self._padding, im.shape[1] + 2 * self._padding, im.shape[2]), dtype=im.dtype) pad_img.fill(self._fill_value) pad_img[self._padding:self._padding + im.shape[0], self._padding:self._padding + im.shape[1], :] = im im = pad_img # labels labels = [] if len(datum.labels) > 0: labels.extend(datum.labels) else: labels.append(datum.label) return im, labels
def get(self, serialized): """Return image and labels from a serialized str. Parameters ---------- serialized : str The protobuf serialized str. Returns ------- tuple The tuple image and labels. """ # Decode datum = _proto_def.Datum() datum.ParseFromString(serialized) im = numpy.fromstring(datum.data, numpy.uint8) if datum.encoded is True: im = cv2.imdecode(im, -1) else: im = im.reshape((datum.height, datum.width, datum.channels)) # Random scale rand_scale = numpy.random.uniform() * ( self._max_rand_scale - self._min_rand_scale) + self._min_rand_scale if rand_scale != 1.0: im = cv2.resize( im, None, fx=rand_scale, fy=rand_scale, interpolation=cv2.INTER_LINEAR, ) # Padding if self._padding > 0: pad_im = numpy.empty( (im.shape[0] + 2 * self._padding, im.shape[1] + 2 * self._padding, im.shape[2]), dtype=im.dtype) pad_im[:] = self._fill_value pad_im[self._padding:self._padding + im.shape[0], self._padding:self._padding + im.shape[1], :] = im im = pad_im # Random crop if self._crop_size > 0: if self._phase == 'TRAIN': h_off = numpy.random.randint(im.shape[0] - self._crop_size + 1) w_off = numpy.random.randint(im.shape[1] - self._crop_size + 1) else: h_off = int((im.shape[0] - self._crop_size) / 2) w_off = int((im.shape[1] - self._crop_size) / 2) im = im[h_off:h_off + self._crop_size, w_off:w_off + self._crop_size, :] # CutOut if self._cutout_size > 0: h, w = im.shape[:2] y = numpy.random.randint(h) x = numpy.random.randint(w) y1 = numpy.clip(y - self._cutout_size // 2, 0, h) y2 = numpy.clip(y + self._cutout_size // 2, 0, h) x1 = numpy.clip(x - self._cutout_size // 2, 0, w) x2 = numpy.clip(x + self._cutout_size // 2, 0, w) im[y1:y2, x1:x2] = self._fill_value # Random mirror if self._mirror: if numpy.random.randint(0, 2) > 0: im = im[:, ::-1, :] # Gray Transformation if self._force_color: if im.shape[2] == 1: # Duplicate to 3 channels im = numpy.concatenate([im, im, im], axis=2) # Color Augmentation if self._color_aug: im = PIL.Image.fromarray(im) delta_brightness = numpy.random.uniform(-0.4, 0.4) + 1.0 delta_contrast = numpy.random.uniform(-0.4, 0.4) + 1.0 delta_saturation = numpy.random.uniform(-0.4, 0.4) + 1.0 im = PIL.ImageEnhance.Brightness(im) im = im.enhance(delta_brightness) im = PIL.ImageEnhance.Contrast(im) im = im.enhance(delta_contrast) im = PIL.ImageEnhance.Color(im) im = im.enhance(delta_saturation) im = numpy.array(im) # Extract Labels labels = [] if len(datum.labels) > 0: labels.extend(datum.labels) else: labels.append(datum.label) return im, labels
def transform_image_label(self, serialized): datum = pb.Datum() datum.ParseFromString(serialized) im = np.fromstring(datum.data, np.uint8) if datum.encoded is True: im = cv2.imdecode(im, -1) else: im = im.reshape((datum.height, datum.width, datum.channels)) # handle scale random_scale = npr.uniform() * (self._max_random_scale - self._min_random_scale) \ + self._min_random_scale if random_scale != 1.0: new_shape = (int(im.shape[1] * random_scale), int(im.shape[0] * random_scale)) im = PIL.Image.fromarray(im) im = im.resize(new_shape, PIL.Image.BILINEAR) im = np.array(im) # handle gray if not self._force_gray: if im.shape[2] == 1: im = np.concatenate([im, im, im], axis=2) # copy to 3 channels # handle crop h_off = w_off = 0 if self._crop_size > 0: if self._phase == 0: h_off = npr.randint(im.shape[0] - self._crop_size + 1) w_off = npr.randint(im.shape[1] - self._crop_size + 1) else: h_off = (im.shape[0] - self._crop_size) / 2 w_off = (im.shape[1] - self._crop_size) / 2 im = im[h_off:h_off + self._crop_size, w_off:w_off + self._crop_size, :] # handle mirror if self._mirror: if npr.randint(0, 2) > 0: im = im[:, ::-1, :] # handle color augmentation if self._color_aug: if npr.randint(0, 2) > 0: im = im[:, :, ::-1] # BGR -> RGB im = skimage.color.rgb2hsv(im) h, s, v = np.split(im, 3, 2) delta_h = npr.uniform() * 0.2 - 0.1 delta_s = npr.uniform() * 0.2 - 0.1 delta_v = npr.uniform() * 0.2 - 0.1 h = np.clip(h + delta_h, 0, 1) s = np.clip(s + delta_s, 0, 1) v = np.clip(v + delta_v, 0, 1) im = np.concatenate([h, s, v], axis=2) im = skimage.color.hsv2rgb(im) im = im[:, :, ::-1] * np.array([255]) im = im.astype(np.float32, copy=False) # handle mean subtraction if len(self._mean_value) > 0: if self._mean_file: if self._crop_size > 0: im = im - self._mean_value[h_off:h_off + self._crop_size, w_off:w_off + self._crop_size, :] else: im = im - self._mean_value[:, :, :] else: im = im - self._mean_value # handle range scale if self._scale != 1.0: im = im * self._scale return im, [datum.label]
def make_db(args): if os.path.isfile(args.list) is False: raise ValueError('the path of image list is invalid.') if os.path.isdir(args.database) is True: raise ValueError('the database is already exist or invalid.') print('start time: ', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime())) db = LMDB(max_commit=10000) db.open(args.database, mode='w') total_line = sum(1 for line in open(args.list)) count = 0 zfill_flag = '{0:0%d}' % (args.zfill) encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), args.quality] start_time = time.time() with open(args.list, 'r') as input_file: records = input_file.readlines() if args.shuffle: import random random.shuffle(records) for record in records: count += 1 if count % 10000 == 0: now_time = time.time() print('{0} / {1} in {2:.2f} sec'.format( count, total_line, now_time - start_time)) db.commit() record = record.split() path = record[0] label = record[1] img = cv2.imread(os.path.join(args.root, path)) if args.resize > 0: img = resize_image(img, args.resize) if args.pad > 0: pad_img = np.zeros((img.shape[0] + 2 * args.pad, img.shape[1] + 2 * args.pad, 3), dtype=img.dtype) pad_img[args.pad:args.pad + img.shape[0], args.pad:args.pad + img.shape[1], :] = img img = pad_img result, imgencode = cv2.imencode('.jpg', img, encode_param) datum = caffe_pb2.Datum() datum.height, datum.width, datum.channels = img.shape datum.label = int(label) datum.encoded = True datum.data = imgencode.tostring() db.put(zfill_flag.format(count - 1), datum.SerializeToString()) now_time = time.time() print('{0} / {1} in {2:.2f} sec'.format(count, total_line, now_time - start_time)) db.put('size', str(count)) db.put('zfill', str(args.zfill)) db.commit() db.close() shutil.copy(args.list, args.database + '/image_list.txt') end_time = time.time() print('{0} images have been stored in the database.'.format(total_line)) print('This task finishes within {0:.2f} seconds.'.format(end_time - start_time)) print('The size of database is {0} MB.'.format( float(os.path.getsize(args.database + '/data.mdb') / 1000 / 1000)))
def make_db(args): """Make the sequential database for images. Parameters ---------- database : str The path of database. root : str The root folder of raw images. list : str The path of image list file. resize : int The size of the shortest edge. Default is ``0`` (Disabled). zfill : int The number of zeros for encoding keys. quality : int JPEG quality for encoding, 1-100. Default is ``95``. shuffle : boolean Whether to randomize the order in list file. """ if os.path.isfile(args.list) is False: raise ValueError('the path of image list is invalid.') if os.path.isdir(args.database) is True: raise ValueError('the database is already exist or invalid.') print('start time: ', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime())) db = LMDB(max_commit=10000) db.open(args.database, mode='w') total_line = sum(1 for line in open(args.list)) count = 0 zfill_flag = '{0:0%d}' % (args.zfill) encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), args.quality] start_time = time.time() with open(args.list, 'r') as input_file: records = input_file.readlines() if args.shuffle: import random random.shuffle(records) for record in records: count += 1 if count % 10000 == 0: now_time = time.time() print('{0} / {1} in {2:.2f} sec'.format( count, total_line, now_time - start_time)) db.commit() record = record.split() path = record[0] label = record[1] img = cv2.imread(os.path.join(args.root, path)) if args.resize > 0: img = resize_image(img, args.resize) result, imgencode = cv2.imencode('.jpg', img, encode_param) datum = caffe_pb2.Datum() datum.height, datum.width, datum.channels = img.shape datum.label = int(label) datum.encoded = True datum.data = imgencode.tostring() db.put(zfill_flag.format(count - 1), datum.SerializeToString()) now_time = time.time() print('{0} / {1} in {2:.2f} sec'.format(count, total_line, now_time - start_time)) db.put('size', str(count)) db.put('zfill', str(args.zfill)) db.commit() db.close() shutil.copy(args.list, args.database + '/image_list.txt') end_time = time.time() print('{0} images have been stored in the database.'.format(total_line)) print('This task finishes within {0:.2f} seconds.'.format(end_time - start_time)) print('The size of database is {0} MB.'.format( float(os.path.getsize(args.database + '/data.mdb') / 1000 / 1000)))