def _generate_leveldb(file_path, image_paths, targets, width, height): """ Caffe uses the LevelDB format to efficiently load its training and validation data; this method writes paired out faces in an efficient way into this format. """ print "\t\tGenerating LevelDB file at %s..." % file_path shutil.rmtree(file_path, ignore_errors=True) db = plyvel.DB(file_path, create_if_missing=True) wb = db.write_batch() commit_every = 10000 start_time = int(round(time.time() * 1000)) for idx in range(len(image_paths)): # Each image is a top level key with a keyname like 00000000011, in increasing # order starting from 00000000000. key = utils.get_key(idx) # Do common normalization that might happen across both testing and validation. try: image = _preprocess_data( _load_numpy_image(image_paths[idx], width, height)) except: print "\t\t\tWarning: Unable to process leveldb image %s" % image_paths[ idx] continue # Each entry in the leveldb is a Caffe protobuffer "Datum" object containing details. datum = Datum() datum.channels = 3 # RGB datum.height = height datum.width = width datum.data = image.tostring() datum.label = targets[idx] value = datum.SerializeToString() wb.put(key, value) if (idx + 1) % commit_every == 0: wb.write() del wb wb = db.write_batch() end_time = int(round(time.time() * 1000)) total_time = end_time - start_time print "\t\t\tWrote batch, key: %s, time for batch: %d ms" % ( key, total_time) start_time = int(round(time.time() * 1000)) end_time = int(round(time.time() * 1000)) total_time = end_time - start_time print "\t\t\tWriting final batch, time for batch: %d ms" % total_time wb.write() db.close()
def _generate_leveldb(self, file_path, pairs, target, single_data): """ Caffe uses the LevelDB format to efficiently load its training and validation data; this method writes paired out faces in an efficient way into this format. """ print "\tGenerating LevelDB file at %s..." % file_path shutil.rmtree(file_path, ignore_errors=True) db = plyvel.DB(file_path, create_if_missing=True) wb = db.write_batch() commit_every = 250000 start_time = int(round(time.time() * 1000)) for idx in range(len(pairs)): # Each image pair is a top level key with a keyname like 00000000011, in increasing # order starting from 00000000000. key = siamese_utils.get_key(idx) # Actually expand our images now, taking the index reference and turning it into real # image pairs; we delay doing this until now for efficiency reasons, as we will probably # have more pairs of images than actual computer memory. image_1 = single_data[pairs[idx][0]] image_2 = single_data[pairs[idx][1]] paired_image = np.concatenate([image_1, image_2]) # Do things like mean normalize, etc. that happen across both testing and validation. paired_image = self._preprocess_data(paired_image) # Each entry in the leveldb is a Caffe protobuffer "Datum" object containing details. datum = Datum() # One channel for each image in the pair. datum.channels = 2 # One channel for each image in the pair. datum.height = constants.HEIGHT datum.width = constants.WIDTH datum.data = paired_image.tostring() datum.label = target[idx] value = datum.SerializeToString() wb.put(key, value) if (idx + 1) % commit_every == 0: wb.write() del wb wb = db.write_batch() end_time = int(round(time.time() * 1000)) total_time = end_time - start_time print "Wrote batch, key: %s, time for batch: %d ms" % ( key, total_time) start_time = int(round(time.time() * 1000)) wb.write() db.close()
def _generate_leveldb(self, file_path, pairs, target, single_data): """ Caffe uses the LevelDB format to efficiently load its training and validation data; this method writes paired out faces in an efficient way into this format. """ print "\tGenerating LevelDB file at %s..." % file_path shutil.rmtree(file_path, ignore_errors=True) db = plyvel.DB(file_path, create_if_missing=True) wb = db.write_batch() commit_every = 250000 start_time = int(round(time.time() * 1000)) for idx in range(len(pairs)): # Each image pair is a top level key with a keyname like 00000000011, in increasing # order starting from 00000000000. key = siamese_utils.get_key(idx) # Actually expand our images now, taking the index reference and turning it into real # image pairs; we delay doing this until now for efficiency reasons, as we will probably # have more pairs of images than actual computer memory. image_1 = single_data[pairs[idx][0]] image_2 = single_data[pairs[idx][1]] paired_image = np.concatenate([image_1, image_2]) # Do things like mean normalize, etc. that happen across both testing and validation. paired_image = self._preprocess_data(paired_image) # Each entry in the leveldb is a Caffe protobuffer "Datum" object containing details. datum = Datum() # One channel for each image in the pair. datum.channels = 2 # One channel for each image in the pair. datum.height = constants.HEIGHT datum.width = constants.WIDTH datum.data = paired_image.tostring() datum.label = target[idx] value = datum.SerializeToString() wb.put(key, value) if (idx + 1) % commit_every == 0: wb.write() del wb wb = db.write_batch() end_time = int(round(time.time() * 1000)) total_time = end_time - start_time print "Wrote batch, key: %s, time for batch: %d ms" % (key, total_time) start_time = int(round(time.time() * 1000)) wb.write() db.close()
def _generate_leveldb(file_path, image_paths, targets, width, height): """ Caffe uses the LevelDB format to efficiently load its training and validation data; this method writes paired out faces in an efficient way into this format. """ print "\t\tGenerating LevelDB file at %s..." % file_path shutil.rmtree(file_path, ignore_errors=True) db = plyvel.DB(file_path, create_if_missing=True) wb = db.write_batch() commit_every = 10000 start_time = int(round(time.time() * 1000)) for idx in range(len(image_paths)): # Each image is a top level key with a keyname like 00000000011, in increasing # order starting from 00000000000. key = utils.get_key(idx) # Do common normalization that might happen across both testing and validation. try: image = _preprocess_data(_load_numpy_image(image_paths[idx], width, height)) except: print "\t\t\tWarning: Unable to process leveldb image %s" % image_paths[idx] continue # Each entry in the leveldb is a Caffe protobuffer "Datum" object containing details. datum = Datum() datum.channels = 3 # RGB datum.height = height datum.width = width datum.data = image.tostring() datum.label = targets[idx] value = datum.SerializeToString() wb.put(key, value) if (idx + 1) % commit_every == 0: wb.write() del wb wb = db.write_batch() end_time = int(round(time.time() * 1000)) total_time = end_time - start_time print "\t\t\tWrote batch, key: %s, time for batch: %d ms" % (key, total_time) start_time = int(round(time.time() * 1000)) end_time = int(round(time.time() * 1000)) total_time = end_time - start_time print "\t\t\tWriting final batch, time for batch: %d ms" % total_time wb.write() db.close()
def _generate_leveldb(self, file_path, image, target, single_data): """ Caffe uses the LevelDB format to efficiently load its training and validation data; this method writes paired out faces in an efficient way into this format. """ print "\tGenerating LevelDB file at %s..." % file_path shutil.rmtree(file_path, ignore_errors=True) db = plyvel.DB(file_path, create_if_missing=True) wb = db.write_batch() commit_every = 250000 start_time = int(round(time.time() * 1000)) for idx in range(len(pairs)): # Each image is a top level key with a keyname like 00000000011, in increasing # order starting from 00000000000. key = utils.get_key(idx) # Do things like mean normalize, etc. that happen across both testing and validation. paired_image = self._preprocess_data(paired_image) # Each entry in the leveldb is a Caffe protobuffer "Datum" object containing details. datum = Datum() # TODO(neuberg): Confirm that this is the correct way to setup RGB images for # Caffe for our dataset. datum.channels = 3 datum.height = constants.HEIGHT datum.width = constants.WIDTH datum.data = image.tostring() datum.label = target[idx] value = datum.SerializeToString() wb.put(key, value) if (idx + 1) % commit_every == 0: wb.write() del wb wb = db.write_batch() end_time = int(round(time.time() * 1000)) total_time = end_time - start_time print "Wrote batch, key: %s, time for batch: %d ms" % (key, total_time) start_time = int(round(time.time() * 1000)) wb.write() db.close()