Example #1
0
def main():
    dataset_root = '/home/ubuntu/Desktop/vol-0449ca74/itergraph/'
    dataset_name = 'tide_v08'
    filename = 'local://%s/%s/objectid_to_object.pert' % (dataset_root,
                                                          dataset_name)

    reader = py_pert.StringTableReader()
    reader.Open(filename)

    obj = tide_pb2.Object()
    total = 0

    items = []
    for k, v in reader:
        obj.ParseFromString(v)
        #print obj.name
        #print obj.purity
        #print len(obj.photos)

        area_mean, area_std = ComputeObjectAreaStats(obj)
        items.append((obj.name, area_mean))
        total += len(obj.photos)

    items.sort(key=lambda i: i[1])
    for item in items:
        #print item[0], math.sqrt(item[1])
        print '%0.3f %s' % (item[1] / (640 * 480), item[0])
        #print "key %s obj %s" % (k, obj)

    print 'total: %d' % total
Example #2
0
 def ExportMatchesJson(self):
   base_path = '%s/matches/' % (self.output_path)
   if os.path.exists(base_path):
     return
   LOG(INFO, 'exporting match json...')
   os.mkdir(base_path)
   reader = py_pert.StringTableReader()
   CHECK(reader.Open(self.matches_uri))    
   progress = iwutil.MakeProgressBar(reader.Entries())
   match_result = iw_pb2.GeometricMatchResult()
   for i, (k,v) in enumerate(reader):
     image_a_id, image_b_id = iwutil.ParseUint64KeyPair(k)      
     match_result.ParseFromString(v)
     if not match_result.matches:
       continue
     filename = '%s/%s_%s.json' % (base_path, JSPad(image_a_id), JSPad(image_b_id))
     f = open(filename, 'w')
     data = {}
     
     CHECK_LT(image_a_id, image_b_id)
     
     data['image_a'] = JSPad(image_a_id) # use string because js can't handle 64bit int
     data['image_b'] = JSPad(image_b_id) # use string because js can't handle 64bit int
     data['image_a_size'] = self.imageid_to_size[image_a_id]
     data['image_b_size'] = self.imageid_to_size[image_b_id]
     
     matches = []
     for match in match_result.matches:
       for c in match.correspondences:
         match_info = [c.a.pos.x, c.a.pos.y, c.a.radius, c.b.pos.x, c.b.pos.y, c.b.radius]
         matches.append(match_info)
     data['matches'] = matches    
     f.write(json.dumps(data))
     progress.update(i)           
   return
Example #3
0
File: tide.py Project: heathkh/iwct
    def __init__(self, tide_uri):
        self.objectid_to_object = {}
        self.imageid_to_label = {}
        LOG(INFO, 'starting to load tide dataset...')
        # load list of images that belong to each tide object
        tide_reader = py_pert.StringTableReader()
        tide_reader.Open(tide_uri)
        for index, (k, v) in enumerate(tide_reader):
            tide_object = tide_pb2.Object()
            tide_object.ParseFromString(v)
            CHECK(tide_object.IsInitialized())
            obj = TideObject()
            obj.LoadFromProto(index, tide_object)
            self.objectid_to_object[obj.id] = obj

        for obj in self.objectid_to_object.itervalues():
            for image_id in obj.pos_image_ids:
                self.imageid_to_label[image_id] = TideLabel(obj.id, 'pos')
            for image_id in obj.neg_image_ids:
                self.imageid_to_label[image_id] = TideLabel(obj.id, 'neg')
            for image_id in obj.none_image_ids:
                self.imageid_to_label[image_id] = TideLabel(obj.id, 'none')

        LOG(INFO, 'done loading tide dataset...')
        return
Example #4
0
    def __init__(self, param_data_uri):
        self.num_configs = None
        self.rows = []
        self.configs_per_page = 10
        self.num_pages = None

        self.exclude_rows = [
            'corresondence_filter', 'type', 'dataset_name',
            'visual_vocabulary_uri', 'max_image_replication_factor',
            'max_match_batch_size', 'max_vertex_degree', 'dataset name'
        ]
        reader = py_pert.StringTableReader()
        CHECK(reader.Open(param_data_uri))
        configs = []
        progress = iwutil.MakeProgressBar(reader.Entries())
        result = lpbench_pb2.ConfigurationResult()
        for i, (k, v) in enumerate(reader):
            result.ParseFromString(v)
            config = itergraph_pb2.IterGraphParams()
            config.CopyFrom(result.config.iter_graph)
            configs.append(config)
            progress.update(i)
            #if i > 0:
            #  break

        self.__BuildTable(configs)
        return
Example #5
0
def test_CopyLocalToUri():
  local_uri = "local://tmp/data/test_ufs.pert";
  remote_uri = "maprfs://data/tmp/test_ufs.pert";
  
  CreateTestFile(local_uri)
  
  ok, scheme, path, error = py_pert.ParseUri(local_uri)
  CHECK(ok)
  
  py_pert.CopyLocalToUri(path, remote_uri)
  
  CHECK(py_pert.Exists(local_uri))
  CHECK(py_pert.Exists(remote_uri))
  
  reader = py_pert.StringTableReader()
  reader.Open(remote_uri)
  
  expected_count = 1000
  count = 0
  for (key, value), (expected_key, expected_value) in zip(reader, GenerateTestData()):
    CHECK_EQ(key, expected_key)
    CHECK_EQ(value, expected_value)
    count += 1
    
  CHECK_EQ(count, expected_count)
  
  
  print py_pert.ListDirectory(local_uri)
  print py_pert.ListDirectory(remote_uri)
  
  return
Example #6
0
def LoadImageRegionGraph(uri):
    irg = iw_pb2.ImageRegionGraph()
    reader = py_pert.StringTableReader()
    CHECK(reader.Open(uri))
    ok, tmp = reader.GetMetadata("num_edges")
    num_edges = long(tmp)
    ok, tmp = reader.GetMetadata("num_vertices")
    CHECK(ok, "this doesn't appear to be a irg uri: %s" % (uri))
    num_vertices = long(tmp)
    CHECK_EQ(reader.Entries(), num_edges + num_vertices)

    progress = iwutil.MakeProgressBar(num_edges)
    # load edges
    for i in range(num_edges):
        ok, key, value = reader.Next()
        CHECK(ok)
        CHECK_EQ(key[0], 'e')
        irg.edge.add().ParseFromString(value)
        progress.update(i)

    # load vertices
    progress = iwutil.MakeProgressBar(num_vertices)
    for i in range(num_vertices):
        ok, key, value = reader.Next()
        CHECK(ok)
        CHECK_EQ(key[0], 'v')
        irg.vertex.add().ParseFromString(value)
        progress.update(i)

    return irg
Example #7
0
    def __init__(self, param_data_uri):
        self.rows = []
        self.rigid_objects = [
            'starbucks_logo', 'prius', 'nasa_spaceshuttle', 'starwars_r2d2',
            'kfcsanders_logo', 'british_telephone_booth', 'csx_locomotive',
            'thinker', 'kindle', 'superman', 'vw_bug', 'parking_meter',
            'violin'
        ]
        self.nonrigid_objects = [
            'monarch_butterfly', 'peacock', 'pineapple', 'giraffe',
            'mallard_duck', 'ladybug', 'pug', 'bull_terrier', 'elephant',
            'artichoke'
        ]

        reader = py_pert.StringTableReader()
        CHECK(reader.Open(param_data_uri))
        CHECK_EQ(reader.Entries(), 1)
        result = eval2_pb2.Result()
        for k, v in reader:
            result.ParseFromString(v)

        for i in range(len(result.label_names)):
            name = result.label_names[i]
            if name not in self.rigid_objects:
                continue
            CHECK(name in self.rigid_objects or name in self.nonrigid_objects)
            self.rows.append(
                self.Row(result.label_names[i],
                         result.object_precision.mean[i],
                         result.object_recall.mean[i]))

        self.rows.sort(key=lambda r: r.f_score, reverse=True)

        return
Example #8
0
def ReadPrecisionRecallEval2(base_uri):
  reader = py_pert.StringTableReader()
  CHECK(reader.Open(base_uri ))
  CHECK_EQ(reader.Entries(), 1)
  result = eval2_pb2.Result()
  for k,v in reader:
    result.ParseFromString(v)
  return result.precision.mean, result.recall.mean    
Example #9
0
def ReadPrecisionRecallEval1(base_uri):
  reader = py_pert.StringTableReader()
  CHECK(reader.Open(base_uri))
  CHECK_EQ(reader.Entries(), 1)
  result = eval1_pb2.Result()
  for k,v in reader:
    result.ParseFromString(v)
  CHECK(result.IsInitialized())
  return result.phases[-1].precision.mean, result.phases[-1].recall.mean    
Example #10
0
def LoadProtoFromUriOrDie(template_proto, uri):
    template_proto.Clear()
    reader = py_pert.StringTableReader()
    CHECK(reader.Open(uri))
    for k, v in reader:
        template_proto.ParseFromString(v)
        break
    reader.Close()
    CHECK(template_proto.IsInitialized(), 'Wrong proto type...')
    return
Example #11
0
def OpenTideDataset(tide_uri):
  LOG(INFO, 'starting to load tide dataset...')
  # load list of images that belong to each tide object    
  tide_reader = py_pert.StringTableReader()
  tide_reader.Open(tide_uri)
  tide_objects = []
  for index, (k, v) in enumerate(tide_reader):                                  
    tide_object = tide_pb2.Object()
    tide_object.ParseFromString(v)
    CHECK(tide_object.IsInitialized())
    tide_objects.append(tide_object)
  return tide_objects  
Example #12
0
def CreateTestData(uri):
   
  writer = py_pert.StringTableWriter()
  writer.Open(uri, 4)  
  num_entries = 10000
  for i in range(num_entries):
    d = '%05d' % i
    writer.Add(d,d)
  writer.Close()
  
  reader = py_pert.StringTableReader()
  CHECK(reader.Open(uri))
  
  CHECK_EQ(reader.Entries(), num_entries)
  reader.Close()
  
  return
Example #13
0
def ExtractFeatures(feature_extractor_params, images_uri, features_uri):
    extractor = py_featureextractor.CreateFeatureExtractorOrDie(
        feature_extractor_params)
    CHECK(extractor)
    reader = py_pert.StringTableReader()
    CHECK(reader.Open(images_uri), 'can not open file: %s' % (images_uri))
    image = iw_pb2.JpegImage()
    writer = py_pert.ProtoTableWriter()
    features = iw_pb2.ImageFeatures()
    writer.Open(features, features_uri, 1)
    progress = MakeProgressBar(reader.Entries())
    for i, (k, v) in enumerate(reader):
        image.ParseFromString(v)
        ok, features = extractor.Run(image.data)
        if ok:
            writer.Add(k, features.SerializeToString())
        progress.update(i)
    return
Example #14
0
def main():
    crop_fraction = 0.05
    base_uri = 'local://home/ubuntu/Desktop/vol-7f209e0c/itergraph/tide_v08_distractors/'
    orig_image_uri = '%s/photoid_to_image.pert' % (base_uri)
    tide_uri = '%s/objectid_to_object.pert' % (base_uri)
    new_tide_uri = '%s/cropped_objectid_to_object.pert' % (base_uri)
    #cropped_image_uri = '%s/cropped_scaled_photoid_to_image.pert' % (base_uri)

    orig_sizes_dict = GetCachedImageSizes(orig_image_uri)
    reader = py_pert.StringTableReader()
    writer = py_pert.ProtoTableWriter()

    tide_object = tide_pb2.Object()

    CHECK(writer.Open(tide_object, new_tide_uri, 1))
    CHECK(reader.Open(tide_uri))
    progress = iwutil.MakeProgressBar(reader.Entries())
    for i, (k, v) in enumerate(reader):
        tide_object.ParseFromString(v)
        CHECK(tide_object.IsInitialized())

        # adjust the bb of all the photos
        for photo in tide_object.photos:
            CHECK(photo.id in orig_sizes_dict)
            width, height = orig_sizes_dict[photo.id]
            try:
                crop_rect = crop.CropRect(width, height, crop_fraction)
                for region in photo.regions:
                    bb1_x, bb1_y, bb1_w, bb1_h = region.x1, region.y1, region.x2 - region.x1, region.y2 - region.y1
                    bb2_x, bb2_y, bb2_w, bb2_h = crop_rect.ApplyCropToRect(
                        bb1_x, bb1_y, bb1_w, bb1_h)
                    region.x1 = bb2_x
                    region.y1 = bb2_y
                    region.x2 = bb2_x + bb2_w
                    region.y2 = bb2_y + bb2_h
            except ValueError:
                print 'crop failed, not adjusting bb'

        # write adjusted proto to output
        writer.Add(k, tide_object.SerializeToString())
        progress.update(i)

    return
Example #15
0
def main():
    dataset_name = 'tide_v08'
    sizes = {}
    sizes['thumbnail'] = 100 * 100
    sizes['small'] = 640 * 480
    reset_bucket = False

    #dataset_base_uri = 'local://home/ubuntu/Desktop/vol-0449ca74/itergraph/%s/' % (dataset_name)
    #images_uri = '%s/cropped_scaled_photoid_to_image.pert' % (dataset_base_uri)
    images_uri = 'local://home/ubuntu/Desktop/vol-0449ca74/itergraph/tide_v14/cropped_scaled_photoid_to_image_randomaccess.pert'
    bucket_name = 'tide_image_cache'
    s3 = boto.connect_s3()

    bucket = s3.create_bucket(bucket_name)
    if reset_bucket:
        LOG(INFO, 'listing contents of bucket...')
        all_keys = [key.name for key in bucket.list()]
        LOG(INFO, 'deleting contents of bucket...')
        bucket.delete_keys(all_keys)
        s3.delete_bucket(bucket_name)
        bucket = s3.create_bucket(bucket_name)
        bucket.set_acl('public-read')

    reader = py_pert.StringTableReader()
    CHECK(reader.Open(images_uri))
    progress = iwutil.MakeProgressBar(reader.Entries())

    num_workers = 200
    max_queue_size = 200
    job_queue = JobQueue(num_workers, max_queue_size)
    for i, (key, value) in enumerate(reader):
        image_id = py_base.KeyToUint64(key)
        jpeg_image = iw_pb2.JpegImage()
        jpeg_image.ParseFromString(value)
        job_queue.AddJob(
            ResizeAndUploadImageJob(bucket, sizes, image_id, jpeg_image.data))
        progress.update(i)

    job_queue.WaitForJobsDone()

    return
Example #16
0
 def ExportImages(self):
   image_size_cache_filename = '%s/images/size_cache.pickle' % self.output_path
   if os.path.exists(image_size_cache_filename):
     self.imageid_to_size = iwutil.LoadObject(image_size_cache_filename)
     return
   base_path = '%s/images/' % (self.output_path)
   os.mkdir(base_path)
   LOG(INFO, 'exporting images...')
   reader = py_pert.StringTableReader()
   CHECK(reader.Open(self.images_uri))    
   jpeg_image = iw_pb2.JpegImage()
   progress = iwutil.MakeProgressBar(reader.Entries())
   for i, (k,v) in enumerate(reader):      
     image_id = py_base.KeyToUint64(k)
     jpeg_image.ParseFromString(v)      
     filename = '%s/%s.jpg' % (base_path, JSPad(image_id))
     f = open(filename, 'wb')
     f.write(jpeg_image.data)
     f.close()
     self.imageid_to_size[image_id] = (jpeg_image.width, jpeg_image.height)
     progress.update(i)          
   iwutil.SaveObject(self.imageid_to_size, image_size_cache_filename)    
   return
Example #17
0
 def __init__(self, images_uri):
     # open images table
     LOG(INFO, 'opening image uri: %s' % (images_uri))
     self.image_reader = py_pert.StringTableReader()
     CHECK(self.image_reader.Open(images_uri))
     return
Example #18
0
 def __init__(self, images_uri):
   self.reader = py_pert.StringTableReader()
   CHECK(self.reader.Open(images_uri))