def make_db(image_path, label_path, database_path): if os.path.isfile(label_path) is False: raise ValueError('input path is empty or wrong.') if os.path.isdir(database_path) is True: raise ValueError('the database path is already exist.') print('start time: ', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime())) db = LMDB(max_commit=10000) db.open(database_path, mode='w') total_line = sum(1 for line in open(label_path)) count = 0 zfill_flag = '{0:0%d}' % (ZFILL) encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), 95] start_time = time.time() with open(label_path, 'r') as input_file: for record in input_file: count += 1 if count % 10000 == 0: now_time = time.time() print('{0} / {1} in {2:.2f} sec'.format( count, total_line, now_time - start_time)) db.commit() record = record.split() path = record[0] label = record[1] img = cv2.imread(os.path.join(image_path, path)) result, imgencode = cv2.imencode('.jpg', img, encode_param) datum = caffe_pb2.Datum() datum.height, datum.width, datum.channels = img.shape datum.label = int(label) datum.encoded = True datum.data = imgencode.tostring() db.put(zfill_flag.format(count - 1), datum.SerializeToString()) now_time = time.time() print('{0} / {1} in {2:.2f} sec'.format(count, total_line, now_time - start_time)) db.put('size', wrapper_str(str(count))) db.put('zfill', wrapper_str(str(ZFILL))) db.commit() db.close() shutil.copy(label_path, database_path + '/image_list.txt') end_time = time.time() print('{0} images have been stored in the database.'.format(total_line)) print('This task finishes within {0:.2f} seconds.'.format(end_time - start_time)) print('The size of database is {0} MB.'.format( float(os.path.getsize(database_path + '/data.mdb') / 1000 / 1000)))
def make_db(images_list, database_path, pad=0): if os.path.isdir(database_path) is True: raise ValueError('the database path is already exist.') print('start time: ', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime())) db = LMDB(max_commit=10000) db.open(database_path, mode='w') total_line = len(images_list) count = 0 zfill_flag = '{0:0%d}' % (ZFILL) start_time = time.time() for record in images_list: count += 1 if count % 10000 == 0: now_time = time.time() print('{0} / {1} in {2:.2f} sec'.format(count, total_line, now_time - start_time)) db.commit() img = record[0] label = record[1] if pad > 0: pad_img = np.zeros( (img.shape[0] + 2 * pad, img.shape[1] + 2 * pad, 3), dtype=np.uint8) pad_img[pad:pad + img.shape[0], pad:pad + img.shape[1], :] = img img = pad_img datum = caffe_pb2.Datum() datum.height, datum.width, datum.channels = img.shape datum.label = int(label) datum.encoded = False datum.data = img.tostring() db.put(zfill_flag.format(count - 1), datum.SerializeToString()) now_time = time.time() print('{0} / {1} in {2:.2f} sec'.format(count, total_line, now_time - start_time)) db.put('size', wrapper_str(str(count))) db.put('zfill', wrapper_str(str(ZFILL))) db.commit() db.close() end_time = time.time() print('{0} images have been stored in the database.'.format(total_line)) print('This task finishes within {0:.2f} seconds.'.format(end_time - start_time)) print('The size of database is {0} MB.'.format( float(os.path.getsize(database_path + '/data.mdb') / 1000 / 1000)))
def make_db(images_list, database_path): if os.path.isdir(database_path) is True: raise ValueError('the database path is already exist.') print('start time: ', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime())) db = LMDB(max_commit=10000) db.open(database_path, mode='w') total_line = len(images_list) count = 0 zfill_flag = '{0:0%d}' % (ZFILL) start_time = time.time() for record in images_list: count += 1 if count % 10000 == 0: now_time = time.time() print('{0} / {1} in {2:.2f} sec'.format( count, total_line, now_time - start_time)) db.commit() img = record[0] label = record[1] datum = caffe_pb2.Datum() datum.height, datum.width, datum.channels = img.shape datum.label = int(label) datum.encoded = False datum.data = img.tostring() db.put(zfill_flag.format(count - 1), datum.SerializeToString()) now_time = time.time() print('{0} / {1} in {2:.2f} sec'.format(count, total_line, now_time - start_time)) db.put('size', wrapper_str(str(count))) db.put('zfill', wrapper_str(str(ZFILL))) db.commit() db.close() end_time = time.time() print('{0} images have been stored in the database.'.format(total_line)) print('This task finishes within {0:.2f} seconds.'.format( end_time - start_time)) print('The size of database is {0} MB.'.format( float(os.path.getsize(database_path + '/data.mdb') / 1000 / 1000)))
def make_db(args): if os.path.isfile(args.list) is False: raise ValueError('the path of image list is invalid.') if os.path.isdir(args.database) is True: raise ValueError('the database is already exist or invalid.') print('start time: ', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime())) db = LMDB(max_commit=10000) db.open(args.database, mode='w') total_line = sum(1 for line in open(args.list)) count = 0 zfill_flag = '{0:0%d}' % (args.zfill) encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), args.quality] start_time = time.time() with open(args.list, 'r') as input_file: records = input_file.readlines() if args.shuffle: import random random.shuffle(records) for record in records: count += 1 if count % 10000 == 0: now_time = time.time() print('{0} / {1} in {2:.2f} sec'.format( count, total_line, now_time - start_time)) db.commit() record = record.split() path = record[0] label = record[1] img = cv2.imread(os.path.join(args.root, path)) if args.resize > 0: img = resize_image(img, args.resize) if args.pad > 0: pad_img = np.zeros((img.shape[0] + 2 * args.pad, img.shape[1] + 2 * args.pad, 3), dtype=img.dtype) pad_img[args.pad:args.pad + img.shape[0], args.pad:args.pad + img.shape[1], :] = img img = pad_img result, imgencode = cv2.imencode('.jpg', img, encode_param) datum = caffe_pb2.Datum() datum.height, datum.width, datum.channels = img.shape datum.label = int(label) datum.encoded = True datum.data = imgencode.tostring() db.put(zfill_flag.format(count - 1), datum.SerializeToString()) now_time = time.time() print('{0} / {1} in {2:.2f} sec'.format(count, total_line, now_time - start_time)) db.put('size', str(count)) db.put('zfill', str(args.zfill)) db.commit() db.close() shutil.copy(args.list, args.database + '/image_list.txt') end_time = time.time() print('{0} images have been stored in the database.'.format(total_line)) print('This task finishes within {0:.2f} seconds.'.format(end_time - start_time)) print('The size of database is {0} MB.'.format( float(os.path.getsize(args.database + '/data.mdb') / 1000 / 1000)))
def make_db(args): """Make the sequential database for images. Parameters ---------- database : str The path of database. root : str The root folder of raw images. list : str The path of image list file. resize : int The size of the shortest edge. Default is ``0`` (Disabled). zfill : int The number of zeros for encoding keys. quality : int JPEG quality for encoding, 1-100. Default is ``95``. shuffle : boolean Whether to randomize the order in list file. """ if os.path.isfile(args.list) is False: raise ValueError('the path of image list is invalid.') if os.path.isdir(args.database) is True: raise ValueError('the database is already exist or invalid.') print('start time: ', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime())) db = LMDB(max_commit=10000) db.open(args.database, mode='w') total_line = sum(1 for line in open(args.list)) count = 0 zfill_flag = '{0:0%d}' % (args.zfill) encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), args.quality] start_time = time.time() with open(args.list, 'r') as input_file: records = input_file.readlines() if args.shuffle: import random random.shuffle(records) for record in records: count += 1 if count % 10000 == 0: now_time = time.time() print('{0} / {1} in {2:.2f} sec'.format( count, total_line, now_time - start_time)) db.commit() record = record.split() path = record[0] label = record[1] img = cv2.imread(os.path.join(args.root, path)) if args.resize > 0: img = resize_image(img, args.resize) result, imgencode = cv2.imencode('.jpg', img, encode_param) datum = caffe_pb2.Datum() datum.height, datum.width, datum.channels = img.shape datum.label = int(label) datum.encoded = True datum.data = imgencode.tostring() db.put(zfill_flag.format(count - 1), datum.SerializeToString()) now_time = time.time() print('{0} / {1} in {2:.2f} sec'.format(count, total_line, now_time - start_time)) db.put('size', str(count)) db.put('zfill', str(args.zfill)) db.commit() db.close() shutil.copy(args.list, args.database + '/image_list.txt') end_time = time.time() print('{0} images have been stored in the database.'.format(total_line)) print('This task finishes within {0:.2f} seconds.'.format(end_time - start_time)) print('The size of database is {0} MB.'.format( float(os.path.getsize(args.database + '/data.mdb') / 1000 / 1000)))
def make_db(args): """Make the sequential database for images. Parameters ---------- database : str The path of database. root : str The root folder of raw images. list : str The path of image list file. resize : int The size of the shortest edge. Default is ``0`` (Disabled). zfill : int The number of zeros for encoding keys. quality : int JPEG quality for encoding, 1-100. Default is ``95``. shuffle : boolean Whether to randomize the order in list file. """ if os.path.isfile(args.list) is False: raise ValueError('the path of image list is invalid.') if os.path.isdir(args.database) is True: raise ValueError('the database is already exist or invalid.') print('start time: ', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime())) db = LMDB(max_commit=10000) db.open(args.database, mode='w') total_line = sum(1 for line in open(args.list)) count = 0 zfill_flag = '{0:0%d}' % (args.zfill) encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), args.quality] start_time = time.time() with open(args.list, 'r') as input_file: records = input_file.readlines() if args.shuffle: import random random.shuffle(records) for record in records: count += 1 if count % 10000 == 0: now_time = time.time() print('{0} / {1} in {2:.2f} sec'.format( count, total_line, now_time - start_time)) db.commit() record = record.split() path = record[0] label = record[1] img = cv2.imread(os.path.join(args.root, path)) if args.resize > 0: img = resize_image(img, args.resize) result, imgencode = cv2.imencode('.jpg', img, encode_param) datum = caffe_pb2.Datum() datum.height, datum.width, datum.channels = img.shape datum.label = int(label) datum.encoded = True datum.data = imgencode.tostring() db.put(zfill_flag.format(count - 1), datum.SerializeToString()) now_time = time.time() print('{0} / {1} in {2:.2f} sec'.format(count, total_line, now_time - start_time)) db.put('size', str(count)) db.put('zfill', str(args.zfill)) db.commit() db.close() shutil.copy(args.list, args.database + '/image_list.txt') end_time = time.time() print('{0} images have been stored in the database.'.format(total_line)) print('This task finishes within {0:.2f} seconds.'.format(end_time - start_time)) print('The size of database is {0} MB.'. format(float(os.path.getsize(args.database + '/data.mdb') / 1000 / 1000)))