Beispiel #1
0
    def get(self, serialized):
        """Return image and labels from a serialized str.

        Parameters
        ----------
        serialized : str
            The protobuf serialized str.

        Returns
        -------
        tuple
            The tuple image and labels.

        """
        # Decode
        datum = pb.Datum()
        datum.ParseFromString(serialized)
        im = np.fromstring(datum.data, np.uint8)
        if datum.encoded is True:
            im = cv2.imdecode(im, -1)
        else:
            im = im.reshape((datum.height, datum.width, datum.channels))

        if datum.channels == 3 and \
            self.color_space == 'RGB':
            im = im[:, :, ::-1]

        # Labels
        labels = []
        if len(datum.labels) > 0: labels.extend(datum.labels)
        else: labels.append(datum.label)
        return self.transform(im), labels
Beispiel #2
0
def make_db(image_path, label_path, database_path):
    if os.path.isfile(label_path) is False:
        raise ValueError('input path is empty or wrong.')
    if os.path.isdir(database_path) is True:
        raise ValueError('the database path is already exist.')

    print('start time: ', time.strftime("%a, %d %b %Y %H:%M:%S",
                                        time.gmtime()))

    db = LMDB(max_commit=10000)
    db.open(database_path, mode='w')

    total_line = sum(1 for line in open(label_path))
    count = 0
    zfill_flag = '{0:0%d}' % (ZFILL)

    encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), 95]

    start_time = time.time()

    with open(label_path, 'r') as input_file:
        for record in input_file:
            count += 1
            if count % 10000 == 0:
                now_time = time.time()
                print('{0} / {1} in {2:.2f} sec'.format(
                    count, total_line, now_time - start_time))
                db.commit()

            record = record.split()
            path = record[0]
            label = record[1]

            img = cv2.imread(os.path.join(image_path, path))
            result, imgencode = cv2.imencode('.jpg', img, encode_param)

            datum = caffe_pb2.Datum()
            datum.height, datum.width, datum.channels = img.shape
            datum.label = int(label)
            datum.encoded = True
            datum.data = imgencode.tostring()
            db.put(zfill_flag.format(count - 1), datum.SerializeToString())

    now_time = time.time()
    print('{0} / {1} in {2:.2f} sec'.format(count, total_line,
                                            now_time - start_time))
    db.put('size', wrapper_str(str(count)))
    db.put('zfill', wrapper_str(str(ZFILL)))
    db.commit()
    db.close()

    shutil.copy(label_path, database_path + '/image_list.txt')
    end_time = time.time()
    print('{0} images have been stored in the database.'.format(total_line))
    print('This task finishes within {0:.2f} seconds.'.format(end_time -
                                                              start_time))
    print('The size of database is {0} MB.'.format(
        float(os.path.getsize(database_path + '/data.mdb') / 1000 / 1000)))
Beispiel #3
0
def make_db(images_list, database_path, pad=0):
    if os.path.isdir(database_path) is True:
        raise ValueError('the database path is already exist.')

    print('start time: ', time.strftime("%a, %d %b %Y %H:%M:%S",
                                        time.gmtime()))

    db = LMDB(max_commit=10000)
    db.open(database_path, mode='w')

    total_line = len(images_list)
    count = 0
    zfill_flag = '{0:0%d}' % (ZFILL)

    start_time = time.time()

    for record in images_list:
        count += 1
        if count % 10000 == 0:
            now_time = time.time()
            print('{0} / {1} in {2:.2f} sec'.format(count, total_line,
                                                    now_time - start_time))
            db.commit()

        img = record[0]
        label = record[1]
        if pad > 0:
            pad_img = np.zeros(
                (img.shape[0] + 2 * pad, img.shape[1] + 2 * pad, 3),
                dtype=np.uint8)
            pad_img[pad:pad + img.shape[0], pad:pad + img.shape[1], :] = img
            img = pad_img

        datum = caffe_pb2.Datum()
        datum.height, datum.width, datum.channels = img.shape
        datum.label = int(label)
        datum.encoded = False
        datum.data = img.tostring()
        db.put(zfill_flag.format(count - 1), datum.SerializeToString())

    now_time = time.time()
    print('{0} / {1} in {2:.2f} sec'.format(count, total_line,
                                            now_time - start_time))
    db.put('size', wrapper_str(str(count)))
    db.put('zfill', wrapper_str(str(ZFILL)))
    db.commit()
    db.close()

    end_time = time.time()
    print('{0} images have been stored in the database.'.format(total_line))
    print('This task finishes within {0:.2f} seconds.'.format(end_time -
                                                              start_time))
    print('The size of database is {0} MB.'.format(
        float(os.path.getsize(database_path + '/data.mdb') / 1000 / 1000)))
Beispiel #4
0
    def transform_image_labels(self, serialized):
        """Get image and labels from a serialized str.

        Parameters
        ----------
        serialized : str
            The protobuf serialized str.

        Returns
        -------
        tuple
            The tuple image and labels.

        """
        # decode
        datum = pb.Datum()
        datum.ParseFromString(serialized)
        im = np.fromstring(datum.data, np.uint8)
        if datum.encoded is True:
            im = cv2.imdecode(im, -1)
        else:
            im = im.reshape((datum.height, datum.width, datum.channels))

        # random scale
        random_scale = npr.uniform() * (self._max_random_scale - self._min_random_scale) \
                            + self._min_random_scale
        if random_scale != 1.0:
            new_shape = (int(im.shape[1] * random_scale),
                         int(im.shape[0] * random_scale))
            im = PIL.Image.fromarray(im)
            im = im.resize(new_shape, PIL.Image.BILINEAR)
            im = np.array(im)

        # random crop
        h_off = w_off = 0
        if self._crop_size > 0:
            if self._phase == 'TRAIN':
                h_off = npr.randint(im.shape[0] - self._crop_size + 1)
                w_off = npr.randint(im.shape[1] - self._crop_size + 1)
            else:
                h_off = (im.shape[0] - self._crop_size) / 2
                w_off = (im.shape[1] - self._crop_size) / 2
            im = im[h_off:h_off + self._crop_size,
                    w_off:w_off + self._crop_size, :]

        # random mirror
        if self._mirror:
            if npr.randint(0, 2) > 0:
                im = im[:, ::-1, :]

        # gray transformation
        if self._force_color:
            if im.shape[2] == 1:
                im = np.concatenate([im, im, im],
                                    axis=2)  # duplicate to 3 channels

        # color augmentation
        if self._color_aug:
            im = PIL.Image.fromarray(im)
            delta_brightness = npr.uniform(-0.4, 0.4) + 1.0
            delta_contrast = npr.uniform(-0.4, 0.4) + 1.0
            delta_saturation = npr.uniform(-0.4, 0.4) + 1.0
            im = PIL.ImageEnhance.Brightness(im)
            im = im.enhance(delta_brightness)
            im = PIL.ImageEnhance.Contrast(im)
            im = im.enhance(delta_contrast)
            im = PIL.ImageEnhance.Color(im)
            im = im.enhance(delta_saturation)
            im = np.array(im)

        # padding
        if self._padding > 0:
            pad_img = np.empty((im.shape[0] + 2 * self._padding,
                                im.shape[1] + 2 * self._padding, im.shape[2]),
                               dtype=im.dtype)
            pad_img.fill(self._fill_value)
            pad_img[self._padding:self._padding + im.shape[0],
                    self._padding:self._padding + im.shape[1], :] = im
            im = pad_img

        im = im.astype(np.float32, copy=False)

        # mean subtraction
        if len(self._mean_values) > 0:
            im = im - self._mean_values

        # numerical scale
        if self._scale != 1.0:
            im = im * self._scale

        return im, [datum.label]
Beispiel #5
0
    def get(self, serialized):
        """Return image and labels from a serialized str.

        Parameters
        ----------
        serialized : str
            The protobuf serialized str.

        Returns
        -------
        tuple
            The tuple image and labels.

        """
        # decode
        datum = pb.Datum()
        datum.ParseFromString(serialized)
        im = np.fromstring(datum.data, np.uint8)
        if datum.encoded is True:
            im = cv2.imdecode(im, -1)
        else:
            im = im.reshape((datum.height, datum.width, datum.channels))

        # random scale
        random_scale = npr.uniform() * (
            self._max_random_scale - self._min_random_scale) \
                + self._min_random_scale
        if random_scale != 1.0:
            if sys.version_info >= (3, 0):
                im = cv2.resize(im,
                                None,
                                interpolation=cv2.INTER_LINEAR,
                                fx=random_scale,
                                fy=random_scale)
            else:
                # F**k F**k F**k opencv-python2, it always has a BUG
                # that leads to duplicate cuDA handles created at gpu:0
                new_shape = (int(np.ceil(im.shape[1] * random_scale)),
                             int(np.ceil(im.shape[0] * random_scale)))
                im = PIL.Image.fromarray(im)
                im = im.resize(new_shape, PIL.Image.BILINEAR)
                im = np.array(im)

        # random crop
        if self._crop_size > 0:
            if self._phase == 'TRAIN':
                h_off = npr.randint(im.shape[0] - self._crop_size + 1)
                w_off = npr.randint(im.shape[1] - self._crop_size + 1)
            else:
                h_off = int((im.shape[0] - self._crop_size) / 2)
                w_off = int((im.shape[1] - self._crop_size) / 2)
            im = im[h_off:h_off + self._crop_size,
                    w_off:w_off + self._crop_size, :]

        # random mirror
        if self._mirror:
            if npr.randint(0, 2) > 0:
                im = im[:, ::-1, :]

        # gray transformation
        if self._force_color:
            if im.shape[2] == 1:
                # duplicate to 3 channels
                im = np.concatenate([im, im, im], axis=2)

        # color augmentation
        if self._color_aug:
            im = PIL.Image.fromarray(im)
            delta_brightness = npr.uniform(-0.4, 0.4) + 1.0
            delta_contrast = npr.uniform(-0.4, 0.4) + 1.0
            delta_saturation = npr.uniform(-0.4, 0.4) + 1.0
            im = PIL.ImageEnhance.Brightness(im)
            im = im.enhance(delta_brightness)
            im = PIL.ImageEnhance.Contrast(im)
            im = im.enhance(delta_contrast)
            im = PIL.ImageEnhance.Color(im)
            im = im.enhance(delta_saturation)
            im = np.array(im)

        # padding
        if self._padding > 0:
            pad_img = np.empty((im.shape[0] + 2 * self._padding,
                                im.shape[1] + 2 * self._padding, im.shape[2]),
                               dtype=im.dtype)
            pad_img.fill(self._fill_value)
            pad_img[self._padding:self._padding + im.shape[0],
                    self._padding:self._padding + im.shape[1], :] = im
            im = pad_img

        # labels
        labels = []
        if len(datum.labels) > 0: labels.extend(datum.labels)
        else: labels.append(datum.label)

        return im, labels
Beispiel #6
0
    def get(self, serialized):
        """Return image and labels from a serialized str.

        Parameters
        ----------
        serialized : str
            The protobuf serialized str.

        Returns
        -------
        tuple
            The tuple image and labels.

        """
        # Decode
        datum = _proto_def.Datum()
        datum.ParseFromString(serialized)
        im = numpy.fromstring(datum.data, numpy.uint8)
        if datum.encoded is True:
            im = cv2.imdecode(im, -1)
        else:
            im = im.reshape((datum.height, datum.width, datum.channels))

        # Random scale
        rand_scale = numpy.random.uniform() * (
            self._max_rand_scale - self._min_rand_scale) + self._min_rand_scale
        if rand_scale != 1.0:
            im = cv2.resize(
                im,
                None,
                fx=rand_scale,
                fy=rand_scale,
                interpolation=cv2.INTER_LINEAR,
            )

        # Padding
        if self._padding > 0:
            pad_im = numpy.empty(
                (im.shape[0] + 2 * self._padding,
                 im.shape[1] + 2 * self._padding, im.shape[2]),
                dtype=im.dtype)
            pad_im[:] = self._fill_value
            pad_im[self._padding:self._padding + im.shape[0],
                   self._padding:self._padding + im.shape[1], :] = im
            im = pad_im

        # Random crop
        if self._crop_size > 0:
            if self._phase == 'TRAIN':
                h_off = numpy.random.randint(im.shape[0] - self._crop_size + 1)
                w_off = numpy.random.randint(im.shape[1] - self._crop_size + 1)
            else:
                h_off = int((im.shape[0] - self._crop_size) / 2)
                w_off = int((im.shape[1] - self._crop_size) / 2)
            im = im[h_off:h_off + self._crop_size,
                    w_off:w_off + self._crop_size, :]

        # CutOut
        if self._cutout_size > 0:
            h, w = im.shape[:2]
            y = numpy.random.randint(h)
            x = numpy.random.randint(w)
            y1 = numpy.clip(y - self._cutout_size // 2, 0, h)
            y2 = numpy.clip(y + self._cutout_size // 2, 0, h)
            x1 = numpy.clip(x - self._cutout_size // 2, 0, w)
            x2 = numpy.clip(x + self._cutout_size // 2, 0, w)
            im[y1:y2, x1:x2] = self._fill_value

        # Random mirror
        if self._mirror:
            if numpy.random.randint(0, 2) > 0:
                im = im[:, ::-1, :]

        # Gray Transformation
        if self._force_color:
            if im.shape[2] == 1:
                # Duplicate to 3 channels
                im = numpy.concatenate([im, im, im], axis=2)

        # Color Augmentation
        if self._color_aug:
            im = PIL.Image.fromarray(im)
            delta_brightness = numpy.random.uniform(-0.4, 0.4) + 1.0
            delta_contrast = numpy.random.uniform(-0.4, 0.4) + 1.0
            delta_saturation = numpy.random.uniform(-0.4, 0.4) + 1.0
            im = PIL.ImageEnhance.Brightness(im)
            im = im.enhance(delta_brightness)
            im = PIL.ImageEnhance.Contrast(im)
            im = im.enhance(delta_contrast)
            im = PIL.ImageEnhance.Color(im)
            im = im.enhance(delta_saturation)
            im = numpy.array(im)

        # Extract Labels
        labels = []
        if len(datum.labels) > 0: labels.extend(datum.labels)
        else: labels.append(datum.label)

        return im, labels
Beispiel #7
0
    def transform_image_label(self, serialized):
        datum = pb.Datum()
        datum.ParseFromString(serialized)
        im = np.fromstring(datum.data, np.uint8)
        if datum.encoded is True:
            im = cv2.imdecode(im, -1)
        else:
            im = im.reshape((datum.height, datum.width, datum.channels))

        # handle scale
        random_scale = npr.uniform() * (self._max_random_scale - self._min_random_scale) \
                             + self._min_random_scale
        if random_scale != 1.0:
            new_shape = (int(im.shape[1] * random_scale),
                         int(im.shape[0] * random_scale))
            im = PIL.Image.fromarray(im)
            im = im.resize(new_shape, PIL.Image.BILINEAR)
            im = np.array(im)

        # handle gray
        if not self._force_gray:
            if im.shape[2] == 1:
                im = np.concatenate([im, im, im], axis=2)  # copy to 3 channels

        # handle crop
        h_off = w_off = 0
        if self._crop_size > 0:
            if self._phase == 0:
                h_off = npr.randint(im.shape[0] - self._crop_size + 1)
                w_off = npr.randint(im.shape[1] - self._crop_size + 1)
            else:
                h_off = (im.shape[0] - self._crop_size) / 2
                w_off = (im.shape[1] - self._crop_size) / 2
            im = im[h_off:h_off + self._crop_size,
                    w_off:w_off + self._crop_size, :]

        # handle mirror
        if self._mirror:
            if npr.randint(0, 2) > 0:
                im = im[:, ::-1, :]

        # handle color augmentation
        if self._color_aug:
            if npr.randint(0, 2) > 0:
                im = im[:, :, ::-1]  # BGR -> RGB
                im = skimage.color.rgb2hsv(im)
                h, s, v = np.split(im, 3, 2)
                delta_h = npr.uniform() * 0.2 - 0.1
                delta_s = npr.uniform() * 0.2 - 0.1
                delta_v = npr.uniform() * 0.2 - 0.1
                h = np.clip(h + delta_h, 0, 1)
                s = np.clip(s + delta_s, 0, 1)
                v = np.clip(v + delta_v, 0, 1)
                im = np.concatenate([h, s, v], axis=2)
                im = skimage.color.hsv2rgb(im)
                im = im[:, :, ::-1] * np.array([255])

        im = im.astype(np.float32, copy=False)

        # handle mean subtraction
        if len(self._mean_value) > 0:
            if self._mean_file:
                if self._crop_size > 0:
                    im = im - self._mean_value[h_off:h_off + self._crop_size,
                                               w_off:w_off +
                                               self._crop_size, :]
                else:
                    im = im - self._mean_value[:, :, :]
            else:
                im = im - self._mean_value

        # handle range scale
        if self._scale != 1.0:
            im = im * self._scale

        return im, [datum.label]
Beispiel #8
0
def make_db(args):
    if os.path.isfile(args.list) is False:
        raise ValueError('the path of image list is invalid.')
    if os.path.isdir(args.database) is True:
        raise ValueError('the database is already exist or invalid.')

    print('start time: ', time.strftime("%a, %d %b %Y %H:%M:%S",
                                        time.gmtime()))

    db = LMDB(max_commit=10000)
    db.open(args.database, mode='w')

    total_line = sum(1 for line in open(args.list))
    count = 0
    zfill_flag = '{0:0%d}' % (args.zfill)

    encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), args.quality]

    start_time = time.time()

    with open(args.list, 'r') as input_file:
        records = input_file.readlines()
        if args.shuffle:
            import random
            random.shuffle(records)

        for record in records:
            count += 1
            if count % 10000 == 0:
                now_time = time.time()
                print('{0} / {1} in {2:.2f} sec'.format(
                    count, total_line, now_time - start_time))
                db.commit()

            record = record.split()
            path = record[0]
            label = record[1]

            img = cv2.imread(os.path.join(args.root, path))
            if args.resize > 0:
                img = resize_image(img, args.resize)
            if args.pad > 0:
                pad_img = np.zeros((img.shape[0] + 2 * args.pad,
                                    img.shape[1] + 2 * args.pad, 3),
                                   dtype=img.dtype)
                pad_img[args.pad:args.pad + img.shape[0],
                        args.pad:args.pad + img.shape[1], :] = img
                img = pad_img
            result, imgencode = cv2.imencode('.jpg', img, encode_param)

            datum = caffe_pb2.Datum()
            datum.height, datum.width, datum.channels = img.shape
            datum.label = int(label)
            datum.encoded = True
            datum.data = imgencode.tostring()
            db.put(zfill_flag.format(count - 1), datum.SerializeToString())

    now_time = time.time()
    print('{0} / {1} in {2:.2f} sec'.format(count, total_line,
                                            now_time - start_time))
    db.put('size', str(count))
    db.put('zfill', str(args.zfill))
    db.commit()
    db.close()

    shutil.copy(args.list, args.database + '/image_list.txt')
    end_time = time.time()
    print('{0} images have been stored in the database.'.format(total_line))
    print('This task finishes within {0:.2f} seconds.'.format(end_time -
                                                              start_time))
    print('The size of database is {0} MB.'.format(
        float(os.path.getsize(args.database + '/data.mdb') / 1000 / 1000)))
Beispiel #9
0
def make_db(args):
    """Make the sequential database for images.

    Parameters
    ----------
    database : str
        The path of database.
    root : str
        The root folder of raw images.
    list : str
        The path of image list file.
    resize : int
        The size of the shortest edge. Default is ``0`` (Disabled).
    zfill : int
        The number of zeros for encoding keys.
    quality : int
        JPEG quality for encoding, 1-100. Default is ``95``.
    shuffle : boolean
        Whether to randomize the order in list file.

    """
    if os.path.isfile(args.list) is False:
        raise ValueError('the path of image list is invalid.')
    if os.path.isdir(args.database) is True:
        raise ValueError('the database is already exist or invalid.')

    print('start time: ', time.strftime("%a, %d %b %Y %H:%M:%S",
                                        time.gmtime()))

    db = LMDB(max_commit=10000)
    db.open(args.database, mode='w')

    total_line = sum(1 for line in open(args.list))
    count = 0
    zfill_flag = '{0:0%d}' % (args.zfill)

    encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), args.quality]

    start_time = time.time()

    with open(args.list, 'r') as input_file:
        records = input_file.readlines()
        if args.shuffle:
            import random
            random.shuffle(records)

        for record in records:
            count += 1
            if count % 10000 == 0:
                now_time = time.time()
                print('{0} / {1} in {2:.2f} sec'.format(
                    count, total_line, now_time - start_time))
                db.commit()

            record = record.split()
            path = record[0]
            label = record[1]

            img = cv2.imread(os.path.join(args.root, path))
            if args.resize > 0:
                img = resize_image(img, args.resize)
            result, imgencode = cv2.imencode('.jpg', img, encode_param)

            datum = caffe_pb2.Datum()
            datum.height, datum.width, datum.channels = img.shape
            datum.label = int(label)
            datum.encoded = True
            datum.data = imgencode.tostring()
            db.put(zfill_flag.format(count - 1), datum.SerializeToString())

    now_time = time.time()
    print('{0} / {1} in {2:.2f} sec'.format(count, total_line,
                                            now_time - start_time))
    db.put('size', str(count))
    db.put('zfill', str(args.zfill))
    db.commit()
    db.close()

    shutil.copy(args.list, args.database + '/image_list.txt')
    end_time = time.time()
    print('{0} images have been stored in the database.'.format(total_line))
    print('This task finishes within {0:.2f} seconds.'.format(end_time -
                                                              start_time))
    print('The size of database is {0} MB.'.format(
        float(os.path.getsize(args.database + '/data.mdb') / 1000 / 1000)))