Ejemplo n.º 1
0
def _generate_leveldb(file_path, image_paths, targets, width, height):
    """
    Caffe uses the LevelDB format to efficiently load its training and validation data; this method
    writes paired out faces in an efficient way into this format.
    """
    print "\t\tGenerating LevelDB file at %s..." % file_path
    shutil.rmtree(file_path, ignore_errors=True)
    db = plyvel.DB(file_path, create_if_missing=True)
    wb = db.write_batch()
    commit_every = 10000
    start_time = int(round(time.time() * 1000))
    for idx in range(len(image_paths)):
        # Each image is a top level key with a keyname like 00000000011, in increasing
        # order starting from 00000000000.
        key = utils.get_key(idx)

        # Do common normalization that might happen across both testing and validation.
        try:
            image = _preprocess_data(
                _load_numpy_image(image_paths[idx], width, height))
        except:
            print "\t\t\tWarning: Unable to process leveldb image %s" % image_paths[
                idx]
            continue

        # Each entry in the leveldb is a Caffe protobuffer "Datum" object containing details.
        datum = Datum()
        datum.channels = 3  # RGB
        datum.height = height
        datum.width = width
        datum.data = image.tostring()
        datum.label = targets[idx]
        value = datum.SerializeToString()
        wb.put(key, value)

        if (idx + 1) % commit_every == 0:
            wb.write()
            del wb
            wb = db.write_batch()
            end_time = int(round(time.time() * 1000))
            total_time = end_time - start_time
            print "\t\t\tWrote batch, key: %s, time for batch: %d ms" % (
                key, total_time)
            start_time = int(round(time.time() * 1000))

    end_time = int(round(time.time() * 1000))
    total_time = end_time - start_time
    print "\t\t\tWriting final batch, time for batch: %d ms" % total_time
    wb.write()
    db.close()
Ejemplo n.º 2
0
    def _generate_leveldb(self, file_path, pairs, target, single_data):
        """
    Caffe uses the LevelDB format to efficiently load its training and validation data; this method
    writes paired out faces in an efficient way into this format.
    """
        print "\tGenerating LevelDB file at %s..." % file_path
        shutil.rmtree(file_path, ignore_errors=True)
        db = plyvel.DB(file_path, create_if_missing=True)
        wb = db.write_batch()
        commit_every = 250000
        start_time = int(round(time.time() * 1000))
        for idx in range(len(pairs)):
            # Each image pair is a top level key with a keyname like 00000000011, in increasing
            # order starting from 00000000000.
            key = siamese_utils.get_key(idx)

            # Actually expand our images now, taking the index reference and turning it into real
            # image pairs; we delay doing this until now for efficiency reasons, as we will probably
            # have more pairs of images than actual computer memory.
            image_1 = single_data[pairs[idx][0]]
            image_2 = single_data[pairs[idx][1]]
            paired_image = np.concatenate([image_1, image_2])

            # Do things like mean normalize, etc. that happen across both testing and validation.
            paired_image = self._preprocess_data(paired_image)

            # Each entry in the leveldb is a Caffe protobuffer "Datum" object containing details.
            datum = Datum()
            # One channel for each image in the pair.
            datum.channels = 2  # One channel for each image in the pair.
            datum.height = constants.HEIGHT
            datum.width = constants.WIDTH
            datum.data = paired_image.tostring()
            datum.label = target[idx]
            value = datum.SerializeToString()
            wb.put(key, value)

            if (idx + 1) % commit_every == 0:
                wb.write()
                del wb
                wb = db.write_batch()
                end_time = int(round(time.time() * 1000))
                total_time = end_time - start_time
                print "Wrote batch, key: %s, time for batch: %d ms" % (
                    key, total_time)
                start_time = int(round(time.time() * 1000))

        wb.write()
        db.close()
Ejemplo n.º 3
0
  def _generate_leveldb(self, file_path, pairs, target, single_data):
    """
    Caffe uses the LevelDB format to efficiently load its training and validation data; this method
    writes paired out faces in an efficient way into this format.
    """
    print "\tGenerating LevelDB file at %s..." % file_path
    shutil.rmtree(file_path, ignore_errors=True)
    db = plyvel.DB(file_path, create_if_missing=True)
    wb = db.write_batch()
    commit_every = 250000
    start_time = int(round(time.time() * 1000))
    for idx in range(len(pairs)):
      # Each image pair is a top level key with a keyname like 00000000011, in increasing
      # order starting from 00000000000.
      key = siamese_utils.get_key(idx)

      # Actually expand our images now, taking the index reference and turning it into real
      # image pairs; we delay doing this until now for efficiency reasons, as we will probably
      # have more pairs of images than actual computer memory.
      image_1 = single_data[pairs[idx][0]]
      image_2 = single_data[pairs[idx][1]]
      paired_image = np.concatenate([image_1, image_2])

      # Do things like mean normalize, etc. that happen across both testing and validation.
      paired_image = self._preprocess_data(paired_image)

      # Each entry in the leveldb is a Caffe protobuffer "Datum" object containing details.
      datum = Datum()
      # One channel for each image in the pair.
      datum.channels = 2 # One channel for each image in the pair.
      datum.height = constants.HEIGHT
      datum.width = constants.WIDTH
      datum.data = paired_image.tostring()
      datum.label = target[idx]
      value = datum.SerializeToString()
      wb.put(key, value)

      if (idx + 1) % commit_every == 0:
        wb.write()
        del wb
        wb = db.write_batch()
        end_time = int(round(time.time() * 1000))
        total_time = end_time - start_time
        print "Wrote batch, key: %s, time for batch: %d ms" % (key, total_time)
        start_time = int(round(time.time() * 1000))

    wb.write()
    db.close()
Ejemplo n.º 4
0
def _generate_leveldb(file_path, image_paths, targets, width, height):
    """
    Caffe uses the LevelDB format to efficiently load its training and validation data; this method
    writes paired out faces in an efficient way into this format.
    """
    print "\t\tGenerating LevelDB file at %s..." % file_path
    shutil.rmtree(file_path, ignore_errors=True)
    db = plyvel.DB(file_path, create_if_missing=True)
    wb = db.write_batch()
    commit_every = 10000
    start_time = int(round(time.time() * 1000))
    for idx in range(len(image_paths)):
      # Each image is a top level key with a keyname like 00000000011, in increasing
      # order starting from 00000000000.
      key = utils.get_key(idx)

      # Do common normalization that might happen across both testing and validation.
      try:
        image = _preprocess_data(_load_numpy_image(image_paths[idx], width, height))
      except:
        print "\t\t\tWarning: Unable to process leveldb image %s" % image_paths[idx]
        continue

      # Each entry in the leveldb is a Caffe protobuffer "Datum" object containing details.
      datum = Datum()
      datum.channels = 3 # RGB
      datum.height = height
      datum.width = width
      datum.data = image.tostring()
      datum.label = targets[idx]
      value = datum.SerializeToString()
      wb.put(key, value)

      if (idx + 1) % commit_every == 0:
        wb.write()
        del wb
        wb = db.write_batch()
        end_time = int(round(time.time() * 1000))
        total_time = end_time - start_time
        print "\t\t\tWrote batch, key: %s, time for batch: %d ms" % (key, total_time)
        start_time = int(round(time.time() * 1000))

    end_time = int(round(time.time() * 1000))
    total_time = end_time - start_time
    print "\t\t\tWriting final batch, time for batch: %d ms" % total_time
    wb.write()
    db.close()
Ejemplo n.º 5
0
def _generate_leveldb(self, file_path, image, target, single_data):
    """
    Caffe uses the LevelDB format to efficiently load its training and validation data; this method
    writes paired out faces in an efficient way into this format.
    """
    print "\tGenerating LevelDB file at %s..." % file_path
    shutil.rmtree(file_path, ignore_errors=True)
    db = plyvel.DB(file_path, create_if_missing=True)
    wb = db.write_batch()
    commit_every = 250000
    start_time = int(round(time.time() * 1000))
    for idx in range(len(pairs)):
      # Each image is a top level key with a keyname like 00000000011, in increasing
      # order starting from 00000000000.
      key = utils.get_key(idx)

      # Do things like mean normalize, etc. that happen across both testing and validation.
      paired_image = self._preprocess_data(paired_image)

      # Each entry in the leveldb is a Caffe protobuffer "Datum" object containing details.
      datum = Datum()
      # TODO(neuberg): Confirm that this is the correct way to setup RGB images for
      # Caffe for our dataset.
      datum.channels = 3
      datum.height = constants.HEIGHT
      datum.width = constants.WIDTH
      datum.data = image.tostring()
      datum.label = target[idx]
      value = datum.SerializeToString()
      wb.put(key, value)

      if (idx + 1) % commit_every == 0:
        wb.write()
        del wb
        wb = db.write_batch()
        end_time = int(round(time.time() * 1000))
        total_time = end_time - start_time
        print "Wrote batch, key: %s, time for batch: %d ms" % (key, total_time)
        start_time = int(round(time.time() * 1000))

    wb.write()
    db.close()