Esempio n. 1
0
def make_db(image_path, label_path, database_path):
    if os.path.isfile(label_path) is False:
        raise ValueError('input path is empty or wrong.')
    if os.path.isdir(database_path) is True:
        raise ValueError('the database path is already exist.')

    print('start time: ', time.strftime("%a, %d %b %Y %H:%M:%S",
                                        time.gmtime()))

    db = LMDB(max_commit=10000)
    db.open(database_path, mode='w')

    total_line = sum(1 for line in open(label_path))
    count = 0
    zfill_flag = '{0:0%d}' % (ZFILL)

    encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), 95]

    start_time = time.time()

    with open(label_path, 'r') as input_file:
        for record in input_file:
            count += 1
            if count % 10000 == 0:
                now_time = time.time()
                print('{0} / {1} in {2:.2f} sec'.format(
                    count, total_line, now_time - start_time))
                db.commit()

            record = record.split()
            path = record[0]
            label = record[1]

            img = cv2.imread(os.path.join(image_path, path))
            result, imgencode = cv2.imencode('.jpg', img, encode_param)

            datum = caffe_pb2.Datum()
            datum.height, datum.width, datum.channels = img.shape
            datum.label = int(label)
            datum.encoded = True
            datum.data = imgencode.tostring()
            db.put(zfill_flag.format(count - 1), datum.SerializeToString())

    now_time = time.time()
    print('{0} / {1} in {2:.2f} sec'.format(count, total_line,
                                            now_time - start_time))
    db.put('size', wrapper_str(str(count)))
    db.put('zfill', wrapper_str(str(ZFILL)))
    db.commit()
    db.close()

    shutil.copy(label_path, database_path + '/image_list.txt')
    end_time = time.time()
    print('{0} images have been stored in the database.'.format(total_line))
    print('This task finishes within {0:.2f} seconds.'.format(end_time -
                                                              start_time))
    print('The size of database is {0} MB.'.format(
        float(os.path.getsize(database_path + '/data.mdb') / 1000 / 1000)))
Esempio n. 2
0
def make_db(images_list, database_path, pad=0):
    if os.path.isdir(database_path) is True:
        raise ValueError('the database path is already exist.')

    print('start time: ', time.strftime("%a, %d %b %Y %H:%M:%S",
                                        time.gmtime()))

    db = LMDB(max_commit=10000)
    db.open(database_path, mode='w')

    total_line = len(images_list)
    count = 0
    zfill_flag = '{0:0%d}' % (ZFILL)

    start_time = time.time()

    for record in images_list:
        count += 1
        if count % 10000 == 0:
            now_time = time.time()
            print('{0} / {1} in {2:.2f} sec'.format(count, total_line,
                                                    now_time - start_time))
            db.commit()

        img = record[0]
        label = record[1]
        if pad > 0:
            pad_img = np.zeros(
                (img.shape[0] + 2 * pad, img.shape[1] + 2 * pad, 3),
                dtype=np.uint8)
            pad_img[pad:pad + img.shape[0], pad:pad + img.shape[1], :] = img
            img = pad_img

        datum = caffe_pb2.Datum()
        datum.height, datum.width, datum.channels = img.shape
        datum.label = int(label)
        datum.encoded = False
        datum.data = img.tostring()
        db.put(zfill_flag.format(count - 1), datum.SerializeToString())

    now_time = time.time()
    print('{0} / {1} in {2:.2f} sec'.format(count, total_line,
                                            now_time - start_time))
    db.put('size', wrapper_str(str(count)))
    db.put('zfill', wrapper_str(str(ZFILL)))
    db.commit()
    db.close()

    end_time = time.time()
    print('{0} images have been stored in the database.'.format(total_line))
    print('This task finishes within {0:.2f} seconds.'.format(end_time -
                                                              start_time))
    print('The size of database is {0} MB.'.format(
        float(os.path.getsize(database_path + '/data.mdb') / 1000 / 1000)))
Esempio n. 3
0
def make_db(images_list, database_path):
    if os.path.isdir(database_path) is True:
        raise ValueError('the database path is already exist.')

    print('start time: ', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))

    db = LMDB(max_commit=10000)
    db.open(database_path, mode='w')

    total_line = len(images_list)
    count = 0
    zfill_flag = '{0:0%d}' % (ZFILL)

    start_time = time.time()

    for record in images_list:
        count += 1
        if count % 10000 == 0:
            now_time = time.time()
            print('{0} / {1} in {2:.2f} sec'.format(
                count, total_line, now_time - start_time))
            db.commit()

        img = record[0]
        label = record[1]

        datum = caffe_pb2.Datum()
        datum.height, datum.width, datum.channels = img.shape
        datum.label = int(label)
        datum.encoded = False
        datum.data = img.tostring()
        db.put(zfill_flag.format(count - 1), datum.SerializeToString())

    now_time = time.time()
    print('{0} / {1} in {2:.2f} sec'.format(count, total_line, now_time - start_time))
    db.put('size', wrapper_str(str(count)))
    db.put('zfill', wrapper_str(str(ZFILL)))
    db.commit()
    db.close()

    end_time = time.time()
    print('{0} images have been stored in the database.'.format(total_line))
    print('This task finishes within {0:.2f} seconds.'.format(
        end_time - start_time))
    print('The size of database is {0} MB.'.format(
        float(os.path.getsize(database_path + '/data.mdb') / 1000 / 1000)))
Esempio n. 4
0
def make_db(args):
    if os.path.isfile(args.list) is False:
        raise ValueError('the path of image list is invalid.')
    if os.path.isdir(args.database) is True:
        raise ValueError('the database is already exist or invalid.')

    print('start time: ', time.strftime("%a, %d %b %Y %H:%M:%S",
                                        time.gmtime()))

    db = LMDB(max_commit=10000)
    db.open(args.database, mode='w')

    total_line = sum(1 for line in open(args.list))
    count = 0
    zfill_flag = '{0:0%d}' % (args.zfill)

    encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), args.quality]

    start_time = time.time()

    with open(args.list, 'r') as input_file:
        records = input_file.readlines()
        if args.shuffle:
            import random
            random.shuffle(records)

        for record in records:
            count += 1
            if count % 10000 == 0:
                now_time = time.time()
                print('{0} / {1} in {2:.2f} sec'.format(
                    count, total_line, now_time - start_time))
                db.commit()

            record = record.split()
            path = record[0]
            label = record[1]

            img = cv2.imread(os.path.join(args.root, path))
            if args.resize > 0:
                img = resize_image(img, args.resize)
            if args.pad > 0:
                pad_img = np.zeros((img.shape[0] + 2 * args.pad,
                                    img.shape[1] + 2 * args.pad, 3),
                                   dtype=img.dtype)
                pad_img[args.pad:args.pad + img.shape[0],
                        args.pad:args.pad + img.shape[1], :] = img
                img = pad_img
            result, imgencode = cv2.imencode('.jpg', img, encode_param)

            datum = caffe_pb2.Datum()
            datum.height, datum.width, datum.channels = img.shape
            datum.label = int(label)
            datum.encoded = True
            datum.data = imgencode.tostring()
            db.put(zfill_flag.format(count - 1), datum.SerializeToString())

    now_time = time.time()
    print('{0} / {1} in {2:.2f} sec'.format(count, total_line,
                                            now_time - start_time))
    db.put('size', str(count))
    db.put('zfill', str(args.zfill))
    db.commit()
    db.close()

    shutil.copy(args.list, args.database + '/image_list.txt')
    end_time = time.time()
    print('{0} images have been stored in the database.'.format(total_line))
    print('This task finishes within {0:.2f} seconds.'.format(end_time -
                                                              start_time))
    print('The size of database is {0} MB.'.format(
        float(os.path.getsize(args.database + '/data.mdb') / 1000 / 1000)))
Esempio n. 5
0
def make_db(args):
    """Make the sequential database for images.

    Parameters
    ----------
    database : str
        The path of database.
    root : str
        The root folder of raw images.
    list : str
        The path of image list file.
    resize : int
        The size of the shortest edge. Default is ``0`` (Disabled).
    zfill : int
        The number of zeros for encoding keys.
    quality : int
        JPEG quality for encoding, 1-100. Default is ``95``.
    shuffle : boolean
        Whether to randomize the order in list file.

    """
    if os.path.isfile(args.list) is False:
        raise ValueError('the path of image list is invalid.')
    if os.path.isdir(args.database) is True:
        raise ValueError('the database is already exist or invalid.')

    print('start time: ', time.strftime("%a, %d %b %Y %H:%M:%S",
                                        time.gmtime()))

    db = LMDB(max_commit=10000)
    db.open(args.database, mode='w')

    total_line = sum(1 for line in open(args.list))
    count = 0
    zfill_flag = '{0:0%d}' % (args.zfill)

    encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), args.quality]

    start_time = time.time()

    with open(args.list, 'r') as input_file:
        records = input_file.readlines()
        if args.shuffle:
            import random
            random.shuffle(records)

        for record in records:
            count += 1
            if count % 10000 == 0:
                now_time = time.time()
                print('{0} / {1} in {2:.2f} sec'.format(
                    count, total_line, now_time - start_time))
                db.commit()

            record = record.split()
            path = record[0]
            label = record[1]

            img = cv2.imread(os.path.join(args.root, path))
            if args.resize > 0:
                img = resize_image(img, args.resize)
            result, imgencode = cv2.imencode('.jpg', img, encode_param)

            datum = caffe_pb2.Datum()
            datum.height, datum.width, datum.channels = img.shape
            datum.label = int(label)
            datum.encoded = True
            datum.data = imgencode.tostring()
            db.put(zfill_flag.format(count - 1), datum.SerializeToString())

    now_time = time.time()
    print('{0} / {1} in {2:.2f} sec'.format(count, total_line,
                                            now_time - start_time))
    db.put('size', str(count))
    db.put('zfill', str(args.zfill))
    db.commit()
    db.close()

    shutil.copy(args.list, args.database + '/image_list.txt')
    end_time = time.time()
    print('{0} images have been stored in the database.'.format(total_line))
    print('This task finishes within {0:.2f} seconds.'.format(end_time -
                                                              start_time))
    print('The size of database is {0} MB.'.format(
        float(os.path.getsize(args.database + '/data.mdb') / 1000 / 1000)))
Esempio n. 6
0
def make_db(args):
    """Make the sequential database for images.

    Parameters
    ----------
    database : str
        The path of database.
    root : str
        The root folder of raw images.
    list : str
        The path of image list file.
    resize : int
        The size of the shortest edge. Default is ``0`` (Disabled).
    zfill : int
        The number of zeros for encoding keys.
    quality : int
        JPEG quality for encoding, 1-100. Default is ``95``.
    shuffle : boolean
        Whether to randomize the order in list file.

    """
    if os.path.isfile(args.list) is False:
        raise ValueError('the path of image list is invalid.')
    if os.path.isdir(args.database) is True:
        raise ValueError('the database is already exist or invalid.')

    print('start time: ', time.strftime("%a, %d %b %Y %H:%M:%S", time.gmtime()))

    db = LMDB(max_commit=10000)
    db.open(args.database, mode='w')

    total_line = sum(1 for line in open(args.list))
    count = 0
    zfill_flag = '{0:0%d}' % (args.zfill)

    encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), args.quality]

    start_time = time.time()

    with open(args.list, 'r') as input_file:
        records = input_file.readlines()
        if args.shuffle:
            import random
            random.shuffle(records)

        for record in records:
            count += 1
            if count % 10000 == 0:
                now_time = time.time()
                print('{0} / {1} in {2:.2f} sec'.format(
                    count, total_line, now_time - start_time))
                db.commit()

            record = record.split()
            path = record[0]
            label = record[1]

            img = cv2.imread(os.path.join(args.root, path))
            if args.resize > 0:
                img = resize_image(img, args.resize)
            result, imgencode = cv2.imencode('.jpg', img, encode_param)

            datum = caffe_pb2.Datum()
            datum.height, datum.width, datum.channels = img.shape
            datum.label = int(label)
            datum.encoded = True
            datum.data = imgencode.tostring()
            db.put(zfill_flag.format(count - 1), datum.SerializeToString())

    now_time = time.time()
    print('{0} / {1} in {2:.2f} sec'.format(count, total_line, now_time - start_time))
    db.put('size', str(count))
    db.put('zfill', str(args.zfill))
    db.commit()
    db.close()

    shutil.copy(args.list, args.database + '/image_list.txt')
    end_time = time.time()
    print('{0} images have been stored in the database.'.format(total_line))
    print('This task finishes within {0:.2f} seconds.'.format(end_time - start_time))
    print('The size of database is {0} MB.'.
          format(float(os.path.getsize(args.database + '/data.mdb') / 1000 / 1000)))