def main(): dataset_root = '/home/ubuntu/Desktop/vol-0449ca74/itergraph/' dataset_name = 'tide_v08' filename = 'local://%s/%s/objectid_to_object.pert' % (dataset_root, dataset_name) reader = py_pert.StringTableReader() reader.Open(filename) obj = tide_pb2.Object() total = 0 items = [] for k, v in reader: obj.ParseFromString(v) #print obj.name #print obj.purity #print len(obj.photos) area_mean, area_std = ComputeObjectAreaStats(obj) items.append((obj.name, area_mean)) total += len(obj.photos) items.sort(key=lambda i: i[1]) for item in items: #print item[0], math.sqrt(item[1]) print '%0.3f %s' % (item[1] / (640 * 480), item[0]) #print "key %s obj %s" % (k, obj) print 'total: %d' % total
def ExportMatchesJson(self): base_path = '%s/matches/' % (self.output_path) if os.path.exists(base_path): return LOG(INFO, 'exporting match json...') os.mkdir(base_path) reader = py_pert.StringTableReader() CHECK(reader.Open(self.matches_uri)) progress = iwutil.MakeProgressBar(reader.Entries()) match_result = iw_pb2.GeometricMatchResult() for i, (k,v) in enumerate(reader): image_a_id, image_b_id = iwutil.ParseUint64KeyPair(k) match_result.ParseFromString(v) if not match_result.matches: continue filename = '%s/%s_%s.json' % (base_path, JSPad(image_a_id), JSPad(image_b_id)) f = open(filename, 'w') data = {} CHECK_LT(image_a_id, image_b_id) data['image_a'] = JSPad(image_a_id) # use string because js can't handle 64bit int data['image_b'] = JSPad(image_b_id) # use string because js can't handle 64bit int data['image_a_size'] = self.imageid_to_size[image_a_id] data['image_b_size'] = self.imageid_to_size[image_b_id] matches = [] for match in match_result.matches: for c in match.correspondences: match_info = [c.a.pos.x, c.a.pos.y, c.a.radius, c.b.pos.x, c.b.pos.y, c.b.radius] matches.append(match_info) data['matches'] = matches f.write(json.dumps(data)) progress.update(i) return
def __init__(self, tide_uri): self.objectid_to_object = {} self.imageid_to_label = {} LOG(INFO, 'starting to load tide dataset...') # load list of images that belong to each tide object tide_reader = py_pert.StringTableReader() tide_reader.Open(tide_uri) for index, (k, v) in enumerate(tide_reader): tide_object = tide_pb2.Object() tide_object.ParseFromString(v) CHECK(tide_object.IsInitialized()) obj = TideObject() obj.LoadFromProto(index, tide_object) self.objectid_to_object[obj.id] = obj for obj in self.objectid_to_object.itervalues(): for image_id in obj.pos_image_ids: self.imageid_to_label[image_id] = TideLabel(obj.id, 'pos') for image_id in obj.neg_image_ids: self.imageid_to_label[image_id] = TideLabel(obj.id, 'neg') for image_id in obj.none_image_ids: self.imageid_to_label[image_id] = TideLabel(obj.id, 'none') LOG(INFO, 'done loading tide dataset...') return
def __init__(self, param_data_uri): self.num_configs = None self.rows = [] self.configs_per_page = 10 self.num_pages = None self.exclude_rows = [ 'corresondence_filter', 'type', 'dataset_name', 'visual_vocabulary_uri', 'max_image_replication_factor', 'max_match_batch_size', 'max_vertex_degree', 'dataset name' ] reader = py_pert.StringTableReader() CHECK(reader.Open(param_data_uri)) configs = [] progress = iwutil.MakeProgressBar(reader.Entries()) result = lpbench_pb2.ConfigurationResult() for i, (k, v) in enumerate(reader): result.ParseFromString(v) config = itergraph_pb2.IterGraphParams() config.CopyFrom(result.config.iter_graph) configs.append(config) progress.update(i) #if i > 0: # break self.__BuildTable(configs) return
def test_CopyLocalToUri(): local_uri = "local://tmp/data/test_ufs.pert"; remote_uri = "maprfs://data/tmp/test_ufs.pert"; CreateTestFile(local_uri) ok, scheme, path, error = py_pert.ParseUri(local_uri) CHECK(ok) py_pert.CopyLocalToUri(path, remote_uri) CHECK(py_pert.Exists(local_uri)) CHECK(py_pert.Exists(remote_uri)) reader = py_pert.StringTableReader() reader.Open(remote_uri) expected_count = 1000 count = 0 for (key, value), (expected_key, expected_value) in zip(reader, GenerateTestData()): CHECK_EQ(key, expected_key) CHECK_EQ(value, expected_value) count += 1 CHECK_EQ(count, expected_count) print py_pert.ListDirectory(local_uri) print py_pert.ListDirectory(remote_uri) return
def LoadImageRegionGraph(uri): irg = iw_pb2.ImageRegionGraph() reader = py_pert.StringTableReader() CHECK(reader.Open(uri)) ok, tmp = reader.GetMetadata("num_edges") num_edges = long(tmp) ok, tmp = reader.GetMetadata("num_vertices") CHECK(ok, "this doesn't appear to be a irg uri: %s" % (uri)) num_vertices = long(tmp) CHECK_EQ(reader.Entries(), num_edges + num_vertices) progress = iwutil.MakeProgressBar(num_edges) # load edges for i in range(num_edges): ok, key, value = reader.Next() CHECK(ok) CHECK_EQ(key[0], 'e') irg.edge.add().ParseFromString(value) progress.update(i) # load vertices progress = iwutil.MakeProgressBar(num_vertices) for i in range(num_vertices): ok, key, value = reader.Next() CHECK(ok) CHECK_EQ(key[0], 'v') irg.vertex.add().ParseFromString(value) progress.update(i) return irg
def __init__(self, param_data_uri): self.rows = [] self.rigid_objects = [ 'starbucks_logo', 'prius', 'nasa_spaceshuttle', 'starwars_r2d2', 'kfcsanders_logo', 'british_telephone_booth', 'csx_locomotive', 'thinker', 'kindle', 'superman', 'vw_bug', 'parking_meter', 'violin' ] self.nonrigid_objects = [ 'monarch_butterfly', 'peacock', 'pineapple', 'giraffe', 'mallard_duck', 'ladybug', 'pug', 'bull_terrier', 'elephant', 'artichoke' ] reader = py_pert.StringTableReader() CHECK(reader.Open(param_data_uri)) CHECK_EQ(reader.Entries(), 1) result = eval2_pb2.Result() for k, v in reader: result.ParseFromString(v) for i in range(len(result.label_names)): name = result.label_names[i] if name not in self.rigid_objects: continue CHECK(name in self.rigid_objects or name in self.nonrigid_objects) self.rows.append( self.Row(result.label_names[i], result.object_precision.mean[i], result.object_recall.mean[i])) self.rows.sort(key=lambda r: r.f_score, reverse=True) return
def ReadPrecisionRecallEval2(base_uri): reader = py_pert.StringTableReader() CHECK(reader.Open(base_uri )) CHECK_EQ(reader.Entries(), 1) result = eval2_pb2.Result() for k,v in reader: result.ParseFromString(v) return result.precision.mean, result.recall.mean
def ReadPrecisionRecallEval1(base_uri): reader = py_pert.StringTableReader() CHECK(reader.Open(base_uri)) CHECK_EQ(reader.Entries(), 1) result = eval1_pb2.Result() for k,v in reader: result.ParseFromString(v) CHECK(result.IsInitialized()) return result.phases[-1].precision.mean, result.phases[-1].recall.mean
def LoadProtoFromUriOrDie(template_proto, uri): template_proto.Clear() reader = py_pert.StringTableReader() CHECK(reader.Open(uri)) for k, v in reader: template_proto.ParseFromString(v) break reader.Close() CHECK(template_proto.IsInitialized(), 'Wrong proto type...') return
def OpenTideDataset(tide_uri): LOG(INFO, 'starting to load tide dataset...') # load list of images that belong to each tide object tide_reader = py_pert.StringTableReader() tide_reader.Open(tide_uri) tide_objects = [] for index, (k, v) in enumerate(tide_reader): tide_object = tide_pb2.Object() tide_object.ParseFromString(v) CHECK(tide_object.IsInitialized()) tide_objects.append(tide_object) return tide_objects
def CreateTestData(uri): writer = py_pert.StringTableWriter() writer.Open(uri, 4) num_entries = 10000 for i in range(num_entries): d = '%05d' % i writer.Add(d,d) writer.Close() reader = py_pert.StringTableReader() CHECK(reader.Open(uri)) CHECK_EQ(reader.Entries(), num_entries) reader.Close() return
def ExtractFeatures(feature_extractor_params, images_uri, features_uri): extractor = py_featureextractor.CreateFeatureExtractorOrDie( feature_extractor_params) CHECK(extractor) reader = py_pert.StringTableReader() CHECK(reader.Open(images_uri), 'can not open file: %s' % (images_uri)) image = iw_pb2.JpegImage() writer = py_pert.ProtoTableWriter() features = iw_pb2.ImageFeatures() writer.Open(features, features_uri, 1) progress = MakeProgressBar(reader.Entries()) for i, (k, v) in enumerate(reader): image.ParseFromString(v) ok, features = extractor.Run(image.data) if ok: writer.Add(k, features.SerializeToString()) progress.update(i) return
def main(): crop_fraction = 0.05 base_uri = 'local://home/ubuntu/Desktop/vol-7f209e0c/itergraph/tide_v08_distractors/' orig_image_uri = '%s/photoid_to_image.pert' % (base_uri) tide_uri = '%s/objectid_to_object.pert' % (base_uri) new_tide_uri = '%s/cropped_objectid_to_object.pert' % (base_uri) #cropped_image_uri = '%s/cropped_scaled_photoid_to_image.pert' % (base_uri) orig_sizes_dict = GetCachedImageSizes(orig_image_uri) reader = py_pert.StringTableReader() writer = py_pert.ProtoTableWriter() tide_object = tide_pb2.Object() CHECK(writer.Open(tide_object, new_tide_uri, 1)) CHECK(reader.Open(tide_uri)) progress = iwutil.MakeProgressBar(reader.Entries()) for i, (k, v) in enumerate(reader): tide_object.ParseFromString(v) CHECK(tide_object.IsInitialized()) # adjust the bb of all the photos for photo in tide_object.photos: CHECK(photo.id in orig_sizes_dict) width, height = orig_sizes_dict[photo.id] try: crop_rect = crop.CropRect(width, height, crop_fraction) for region in photo.regions: bb1_x, bb1_y, bb1_w, bb1_h = region.x1, region.y1, region.x2 - region.x1, region.y2 - region.y1 bb2_x, bb2_y, bb2_w, bb2_h = crop_rect.ApplyCropToRect( bb1_x, bb1_y, bb1_w, bb1_h) region.x1 = bb2_x region.y1 = bb2_y region.x2 = bb2_x + bb2_w region.y2 = bb2_y + bb2_h except ValueError: print 'crop failed, not adjusting bb' # write adjusted proto to output writer.Add(k, tide_object.SerializeToString()) progress.update(i) return
def main(): dataset_name = 'tide_v08' sizes = {} sizes['thumbnail'] = 100 * 100 sizes['small'] = 640 * 480 reset_bucket = False #dataset_base_uri = 'local://home/ubuntu/Desktop/vol-0449ca74/itergraph/%s/' % (dataset_name) #images_uri = '%s/cropped_scaled_photoid_to_image.pert' % (dataset_base_uri) images_uri = 'local://home/ubuntu/Desktop/vol-0449ca74/itergraph/tide_v14/cropped_scaled_photoid_to_image_randomaccess.pert' bucket_name = 'tide_image_cache' s3 = boto.connect_s3() bucket = s3.create_bucket(bucket_name) if reset_bucket: LOG(INFO, 'listing contents of bucket...') all_keys = [key.name for key in bucket.list()] LOG(INFO, 'deleting contents of bucket...') bucket.delete_keys(all_keys) s3.delete_bucket(bucket_name) bucket = s3.create_bucket(bucket_name) bucket.set_acl('public-read') reader = py_pert.StringTableReader() CHECK(reader.Open(images_uri)) progress = iwutil.MakeProgressBar(reader.Entries()) num_workers = 200 max_queue_size = 200 job_queue = JobQueue(num_workers, max_queue_size) for i, (key, value) in enumerate(reader): image_id = py_base.KeyToUint64(key) jpeg_image = iw_pb2.JpegImage() jpeg_image.ParseFromString(value) job_queue.AddJob( ResizeAndUploadImageJob(bucket, sizes, image_id, jpeg_image.data)) progress.update(i) job_queue.WaitForJobsDone() return
def ExportImages(self): image_size_cache_filename = '%s/images/size_cache.pickle' % self.output_path if os.path.exists(image_size_cache_filename): self.imageid_to_size = iwutil.LoadObject(image_size_cache_filename) return base_path = '%s/images/' % (self.output_path) os.mkdir(base_path) LOG(INFO, 'exporting images...') reader = py_pert.StringTableReader() CHECK(reader.Open(self.images_uri)) jpeg_image = iw_pb2.JpegImage() progress = iwutil.MakeProgressBar(reader.Entries()) for i, (k,v) in enumerate(reader): image_id = py_base.KeyToUint64(k) jpeg_image.ParseFromString(v) filename = '%s/%s.jpg' % (base_path, JSPad(image_id)) f = open(filename, 'wb') f.write(jpeg_image.data) f.close() self.imageid_to_size[image_id] = (jpeg_image.width, jpeg_image.height) progress.update(i) iwutil.SaveObject(self.imageid_to_size, image_size_cache_filename) return
def __init__(self, images_uri): # open images table LOG(INFO, 'opening image uri: %s' % (images_uri)) self.image_reader = py_pert.StringTableReader() CHECK(self.image_reader.Open(images_uri)) return
def __init__(self, images_uri): self.reader = py_pert.StringTableReader() CHECK(self.reader.Open(images_uri))