def CreateMixedJpeg(jpeg_a, jpeg_b): image_a = Image.open(StringIO.StringIO(jpeg_a.data)) image_b = Image.open(StringIO.StringIO(jpeg_b.data)) width_a, height_a = image_a.size width_b, height_b = image_b.size # find the scaling of b it as tall as image a scaling_b = float(height_a)/height_b new_width_b, new_height_b = (int(scaling_b*width_b), height_a) image_b = image_b.resize((new_width_b, new_height_b)) out_image = Image.new("RGB", (width_a + new_width_b, height_a), "black") out_image.paste(image_a, (0,0)) out_image.paste(image_b, (width_a,0)) #out_image.show() output = StringIO.StringIO() out_image.save(output, format="JPEG") new_jpeg = iw_pb2.JpegImage() new_jpeg.data = output.getvalue() new_jpeg.width, new_jpeg.height =out_image.size CHECK(new_jpeg.IsInitialized()) return new_jpeg
def GetNextImage(self): jpeg_image = iw_pb2.JpegImage() ok, k,v = self.reader.Next() CHECK(ok) jpeg_image.ParseFromString(v) CHECK(jpeg_image.IsInitialized()) return jpeg_image
def GetImage(self, image_id): ok, value = self.image_reader.Find(py_base.Uint64ToKey(image_id)) if not ok: return None jpeg = iw_pb2.JpegImage() jpeg.ParseFromString(value) CHECK(jpeg.IsInitialized()) return jpeg
def GetNextImage(self): ok, key, value = self.image_reader.Next() CHECK(ok) image_id = iwutil.KeyToUint64(key) jpeg = iw_pb2.JpegImage() jpeg.ParseFromString(value) CHECK(jpeg.IsInitialized()) return image_id, jpeg
def __ImportImageArchive(self, extract_dir, dataset_root): image_filenames = [] print 'Searching for image files...' for root, dirnames, filenames in os.walk(extract_dir): for filename in filenames: if fnmatch.fnmatch(filename, '*.jpg') or fnmatch.fnmatch( filename, '*.jpeg'): image_filenames.append(os.path.join(root, filename)) pert_uri = 'local://%s/photoid_to_image.pert' % (dataset_root) if len(image_filenames) <= 2: return False fingerprinted_path = '%s/fingerprinted/' % (extract_dir) os.mkdir(fingerprinted_path) print 'Fingerprinting image files...' progress = util.MakeProgressBar(len(image_filenames)) # rename all files according to fingerprint for i, filename in enumerate(image_filenames): data = open(filename).read() fp = py_base.FingerprintString(data) dst = '%s/%064d.jpg' % ( fingerprinted_path, fp ) # ensure lexical sort = numeric sort = key sort os.rename(filename, dst) progress.update(i) filenames = glob.glob('%s/*.jpg' % fingerprinted_path) filenames.sort() output_uri = 'local://%s/photoid_to_image.pert' % (dataset_root) # write to pert in sorted order print 'Generating image PERT file...' writer = py_pert.StringTableWriter() CHECK(writer.Open(output_uri, 1)) progress = util.MakeProgressBar(len(filenames)) for i, filename in enumerate(filenames): data = open(filename).read() key = py_base.Uint64ToKey(py_base.FingerprintString(data)) try: im = Image.open(StringIO.StringIO(data)) except IOError as e: LOG(INFO, 'Error opening %s - %s' % (filename, e)) continue width, height = im.size jpeg = iw_pb2.JpegImage() jpeg.data = data jpeg.width = width jpeg.height = height CHECK(jpeg.IsInitialized()) writer.Add(key, jpeg.SerializeToString()) progress.update(i) writer.Close() return True
def GetImageSizes(uri): imageid_to_size = {} reader = py_pert.StringTableShardSetReader() CHECK(reader.Open(uri)) jpeg_image = iw_pb2.JpegImage() progress = iwutil.MakeProgressBar(reader.Entries()) for i, (k, v) in enumerate(reader): image_id = py_base.KeyToUint64(k) jpeg_image.ParseFromString(v) imageid_to_size[image_id] = (jpeg_image.width, jpeg_image.height) progress.update(i) return imageid_to_size
def BatchGetImages(self, image_ids): """ Does efficient batch lookup of images returning a dict mapping image_id to raw jpeg data. """ id_to_jpeg = {} image_ids.sort() for image_id in image_ids: ok, value = self.image_reader.Find(py_base.Uint64ToKey(image_id)) CHECK(ok) jpeg = iw_pb2.JpegImage() jpeg.ParseFromString(value) CHECK(jpeg.IsInitialized()) id_to_jpeg[image_id] = jpeg return id_to_jpeg
def BatchGetImagesAsDataUri(self, image_ids): """ Does efficient batch lookup of images returning a dict mapping image_id to raw jpeg data. """ id_to_datauri = {} image_ids.sort() self.image_reader.SeekToStart() for image_id in image_ids: ok, value = self.image_reader.Find(py_base.Uint64ToKey(image_id)) CHECK(ok, 'failed to find image_id: %d' % (image_id)) jpeg = iw_pb2.JpegImage() jpeg.ParseFromString(value) CHECK(jpeg.IsInitialized()) id_to_datauri[image_id] = JpegToDataUrl(jpeg.data) return id_to_datauri
def ExtractFeatures(feature_extractor_params, images_uri, features_uri): extractor = py_featureextractor.CreateFeatureExtractorOrDie( feature_extractor_params) CHECK(extractor) reader = py_pert.StringTableReader() CHECK(reader.Open(images_uri), 'can not open file: %s' % (images_uri)) image = iw_pb2.JpegImage() writer = py_pert.ProtoTableWriter() features = iw_pb2.ImageFeatures() writer.Open(features, features_uri, 1) progress = MakeProgressBar(reader.Entries()) for i, (k, v) in enumerate(reader): image.ParseFromString(v) ok, features = extractor.Run(image.data) if ok: writer.Add(k, features.SerializeToString()) progress.update(i) return
def __ComputeExtractorFpm(self, extractor): # extract features using current settings image = iw_pb2.JpegImage() total_image_area_megapixels = 0.0 total_num_features = 0 for sample_index in self.sample_indices: ok, k, v = self.reader.GetIndex(sample_index) CHECK(ok) image.ParseFromString(v) CHECK(image.IsInitialized()) ok, features = extractor.Run(image.data) total_image_area_megapixels += image.width*image.height*1e-6; if ok: total_num_features += len(features.keypoints) # compute stats mean_fpm = total_num_features/total_image_area_megapixels LOG(INFO, 'mean_fpm: %f' % (mean_fpm)) return mean_fpm
def main(): base_uri = 'local://home/ubuntu/Desktop/datasets/tide_v12/' tide_uri = '%s/objectid_to_object.pert' % (base_uri) dataset = tide.TideDataset(tide_uri) print tide pos_imageids = [] imageid_to_objectname = {} for id, obj in dataset.objectid_to_object.iteritems(): print obj.name pos_imageids.extend(obj.pos_image_ids) for image_id in obj.pos_image_ids: imageid_to_objectname[image_id] = obj.name # sort for efficient access to pert pos_imageids.sort() images_pert_uri = '%s/photoid_to_image.pert' % (base_uri) reader = py_pert.StringTableReader() CHECK(reader.Open(images_pert_uri)) for image_id in pos_imageids: ok, data = reader.Find(py_base.Uint64ToKey(image_id)) CHECK(ok) jpeg_image = iw_pb2.JpegImage() jpeg_image.ParseFromString(data) objectname = imageid_to_objectname[image_id] dirname = './extracted/%s' % (objectname) filename = '%s/%d.jpg' % (dirname, image_id) if not os.path.exists(dirname): os.makedirs(dirname) f = open(filename, 'wb') f.write(jpeg_image.data) return
def main(): images_pert_uril = 'local:///media/ebs/4a4b34/tide_v13/photoid_to_image.pert' images_to_extract = [ 2071492, 2087400, 2112291, 2102113, 2080088, 2083122, 2107730 ] reader = py_pert.StringTableReader() CHECK(reader.Open(images_pert_uril)) for image_id in images_to_extract: ok, data = reader.Find(py_strings.Uint64ToKey(image_id)) CHECK(ok) jpeg_image = iw_pb2.JpegImage() jpeg_image.ParseFromString(data) filename = '%d.jpg' % (image_id) f = open(filename, 'wb') f.write(jpeg_image.data) return
def PackImagesDirectoryToPert(src_path, output_uri, randomize=True, max_num_images=None): CHECK(os.path.isdir(src_path), 'expected dir: %s' % src_path) filenames = glob.glob('%s/*.jpg' % src_path) if randomize: random.shuffle( filenames ) # shuffle so there is no accidental coherence of filename and order in pert... any prefix should be from a random image (otherwise might be fulled from same object type) else: filenames.sort() if max_num_images: filenames = filenames[:max_num_images] writer = py_pert.StringTableWriter() CHECK(writer.Open(output_uri, 1)) progress = MakeProgressBar(len(filenames)) for i, filename in enumerate(filenames): #print filename data = open(filename).read() #key = py_base.FingerprintString(data) #key = py_base.Uint64ToKey(i) key = os.path.basename(filename) try: im = Image.open(StringIO.StringIO(data)) except: LOG(FATAL, 'Error opening %s' % (filename)) continue width, height = im.size jpeg = iw_pb2.JpegImage() jpeg.data = data jpeg.width = width jpeg.height = height CHECK(jpeg.IsInitialized()) writer.Add(key, jpeg.SerializeToString()) progress.update(i) writer.Close() return
def main(): dataset_name = 'tide_v08' sizes = {} sizes['thumbnail'] = 100 * 100 sizes['small'] = 640 * 480 reset_bucket = False #dataset_base_uri = 'local://home/ubuntu/Desktop/vol-0449ca74/itergraph/%s/' % (dataset_name) #images_uri = '%s/cropped_scaled_photoid_to_image.pert' % (dataset_base_uri) images_uri = 'local://home/ubuntu/Desktop/vol-0449ca74/itergraph/tide_v14/cropped_scaled_photoid_to_image_randomaccess.pert' bucket_name = 'tide_image_cache' s3 = boto.connect_s3() bucket = s3.create_bucket(bucket_name) if reset_bucket: LOG(INFO, 'listing contents of bucket...') all_keys = [key.name for key in bucket.list()] LOG(INFO, 'deleting contents of bucket...') bucket.delete_keys(all_keys) s3.delete_bucket(bucket_name) bucket = s3.create_bucket(bucket_name) bucket.set_acl('public-read') reader = py_pert.StringTableReader() CHECK(reader.Open(images_uri)) progress = iwutil.MakeProgressBar(reader.Entries()) num_workers = 200 max_queue_size = 200 job_queue = JobQueue(num_workers, max_queue_size) for i, (key, value) in enumerate(reader): image_id = py_base.KeyToUint64(key) jpeg_image = iw_pb2.JpegImage() jpeg_image.ParseFromString(value) job_queue.AddJob( ResizeAndUploadImageJob(bucket, sizes, image_id, jpeg_image.data)) progress.update(i) job_queue.WaitForJobsDone() return
def ExportImages(self): image_size_cache_filename = '%s/images/size_cache.pickle' % self.output_path if os.path.exists(image_size_cache_filename): self.imageid_to_size = iwutil.LoadObject(image_size_cache_filename) return base_path = '%s/images/' % (self.output_path) os.mkdir(base_path) LOG(INFO, 'exporting images...') reader = py_pert.StringTableReader() CHECK(reader.Open(self.images_uri)) jpeg_image = iw_pb2.JpegImage() progress = iwutil.MakeProgressBar(reader.Entries()) for i, (k,v) in enumerate(reader): image_id = py_base.KeyToUint64(k) jpeg_image.ParseFromString(v) filename = '%s/%s.jpg' % (base_path, JSPad(image_id)) f = open(filename, 'wb') f.write(jpeg_image.data) f.close() self.imageid_to_size[image_id] = (jpeg_image.width, jpeg_image.height) progress.update(i) iwutil.SaveObject(self.imageid_to_size, image_size_cache_filename) return
def main(): # images_uri = 'local://media/vol-0449ca74/itergraph/tide_v14/cropped_scaled_photoid_to_image_randomaccess.pert' # tide_uri = 'local://media/vol-0449ca74/itergraph/tide_v14/objectid_to_object.pert' # distractor_images_uri = 'local://media/vol-0449ca74/oxc1_100k/photoid_to_image.pert' # output_base_uri = 'local://media/vol-0449ca74/itergraph/tide_v14_mixed_v2/' # images_uri = 'local://media/vol-0449ca74/itergraph/tide_v16/photoid_to_image.pert' # tide_uri = 'local://media/vol-0449ca74/itergraph/tide_v16/objectid_to_object.pert' # distractor_images_uri = 'local://media/vol-0449ca74/oxc1_100k/photoid_to_image.pert' # output_base_uri = 'local://media/vol-0449ca74/itergraph/tide_v16_mixed/' images_uri = 'local://media/vol-0449ca74/itergraph/tide_v18/photoid_to_image.pert' tide_uri = 'local://media/vol-0449ca74/itergraph/tide_v18/objectid_to_object.pert' distractor_images_uri = 'local://media/vol-0449ca74/oxc1_100k/photoid_to_image.pert' output_base_uri = 'local://media/vol-0449ca74/itergraph/tide_v18_mixed/' output_tide_uri = '%s/objectid_to_object.pert' % (output_base_uri) output_images_uri = '%s/photoid_to_image.pert' % (output_base_uri) image_loader = visutil.BatchImageLoader(images_uri) tide_objects = OpenTideDataset(tide_uri) distractor_image_loader = DistractorImageLoader(distractor_images_uri) CHECK_EQ(len(tide_objects), 2) object_a = tide_objects[0] object_b = tide_objects[1] new_object_a = InitNewObject(object_a) new_object_b = InitNewObject(object_b) a_none_image_ids = [photo.id for photo in object_a.photos if photo.label == tide_pb2.NONE ] b_none_image_ids = [photo.id for photo in object_b.photos if photo.label == tide_pb2.NONE ] mixed_aux_images = {} for i, (imageid_a, imageid_b) in enumerate(zip(a_none_image_ids, b_none_image_ids)): mixed_aux_images[imageid_a] = (imageid_a, imageid_b) mixed_image_ids = mixed_aux_images.keys() n = int(len(mixed_image_ids)/2.0) InitNoneLabels(new_object_a, mixed_image_ids[0:n]) InitNoneLabels(new_object_b, mixed_image_ids[n:-1]) new_objects = [new_object_a, new_object_b] image_ids = [] for obj in new_objects: for photo in obj.photos: image_ids.append(photo.id) image_ids.sort() # write new tide pert tide_writer = py_pert.ProtoTableWriter() tide_writer.Open(tide_pb2.Object(), output_tide_uri, 1) for obj in new_objects: tide_writer.Add(iwutil.Uint64ToKey(obj.id), obj.SerializeToString()) tide_writer.Close() # write new image pert try: image_writer = py_pert.ProtoTableWriter() image_writer.Open(iw_pb2.JpegImage(), output_images_uri, 1) used_image_ids = set() progress = iwutil.MakeProgressBar(len(image_ids)) for i, image_id in enumerate(image_ids): jpeg = None if image_id in mixed_aux_images: imageid_a, imageid_b = mixed_aux_images[image_id] jpeg_a = image_loader.GetImage(imageid_a) jpeg_b = image_loader.GetImage(imageid_b) if jpeg_a == None or jpeg_b == None: LOG(INFO, 'skipping missing jpeg') continue jpeg = CreateMixedJpeg(jpeg_a, jpeg_b) else: distractor = None while True: distractor = distractor_image_loader.GetNextImage() if distractor.width > distractor.height: break CHECK(distractor) jpeg = CreateMixedJpeg(image_loader.GetImage(image_id), distractor) CHECK(image_id not in used_image_ids) CHECK(jpeg) image_writer.Add(iwutil.Uint64ToKey(image_id), jpeg.SerializeToString()) used_image_ids.add(image_id) progress.update(i) image_writer.Close() except: pass return