def UndirectedEdgeKey(image_index_a, image_index_b): CHECK_GT(image_index_a, 0) CHECK_GT(image_index_b, 0) # ensure a is smaller than b if image_index_a > image_index_b: image_index_a, image_index_b = image_index_b, image_index_a CHECK_GE(image_index_b, image_index_a) key_pair = py_base.Uint64ToKey(image_index_a) + py_base.Uint64ToKey( image_index_b) return key_pair
def Run(self): reader = py_pert.StringTableShardSetReader() reader.Open(self.GetInput('images').GetUri()) image_ids = [] for i, (k, v) in enumerate(reader): image_ids.append(ParseUint64Key(k)) LOG(INFO, 'creating match groups') match_groups = [] # a list of tuples (primary_id, secondary id list) widgets = [Percentage(), ' ', Bar(), ' ', ETA()] pbar = ProgressBar(widgets=widgets, maxval=len(image_ids)).start() for i in range(len(image_ids)): primary_id = image_ids[i] secondary_ids = list(image_ids) secondary_ids.remove(primary_id) for secondary_id_chunk in chunks(secondary_ids, self.max_batch_size): match_groups.append((primary_id, secondary_id_chunk)) pbar.update(i) # write out the match plan (must be later sorted by key for future join stage) writer = py_pert.StringTableWriter() options = py_pert.WriterOptions() options.SetUnsorted() LOG(INFO, 'writing match groups') CHECK( writer.Open( self.GetOutput('unsorted_match_batches').GetUri(), 1, options)) metadata = iw_pb2.MatchBatchMetadata() pbar = ProgressBar(widgets=widgets, maxval=len(match_groups)).start() for batch_id, (batch_primary_image, batch_image_ids) in enumerate(match_groups): if len(batch_image_ids) == 0: continue batch_name = py_base.Uint64ToKey(batch_id) metadata = iw_pb2.MatchBatchMetadata() metadata.image_id = batch_primary_image metadata.batch_name = batch_name metadata.is_primary = True writer.Add(py_base.Uint64ToKey(metadata.image_id), metadata.SerializeToString()) for image_id in batch_image_ids: metadata.image_id = image_id metadata.batch_name = batch_name metadata.is_primary = False writer.Add(py_base.Uint64ToKey(metadata.image_id), metadata.SerializeToString()) pbar.update(batch_id) return
def GetImage(self, image_id): ok, value = self.image_reader.Find(py_base.Uint64ToKey(image_id)) if not ok: return None jpeg = iw_pb2.JpegImage() jpeg.ParseFromString(value) CHECK(jpeg.IsInitialized()) return jpeg
def __ImportImageArchive(self, extract_dir, dataset_root): image_filenames = [] print 'Searching for image files...' for root, dirnames, filenames in os.walk(extract_dir): for filename in filenames: if fnmatch.fnmatch(filename, '*.jpg') or fnmatch.fnmatch( filename, '*.jpeg'): image_filenames.append(os.path.join(root, filename)) pert_uri = 'local://%s/photoid_to_image.pert' % (dataset_root) if len(image_filenames) <= 2: return False fingerprinted_path = '%s/fingerprinted/' % (extract_dir) os.mkdir(fingerprinted_path) print 'Fingerprinting image files...' progress = util.MakeProgressBar(len(image_filenames)) # rename all files according to fingerprint for i, filename in enumerate(image_filenames): data = open(filename).read() fp = py_base.FingerprintString(data) dst = '%s/%064d.jpg' % ( fingerprinted_path, fp ) # ensure lexical sort = numeric sort = key sort os.rename(filename, dst) progress.update(i) filenames = glob.glob('%s/*.jpg' % fingerprinted_path) filenames.sort() output_uri = 'local://%s/photoid_to_image.pert' % (dataset_root) # write to pert in sorted order print 'Generating image PERT file...' writer = py_pert.StringTableWriter() CHECK(writer.Open(output_uri, 1)) progress = util.MakeProgressBar(len(filenames)) for i, filename in enumerate(filenames): data = open(filename).read() key = py_base.Uint64ToKey(py_base.FingerprintString(data)) try: im = Image.open(StringIO.StringIO(data)) except IOError as e: LOG(INFO, 'Error opening %s - %s' % (filename, e)) continue width, height = im.size jpeg = iw_pb2.JpegImage() jpeg.data = data jpeg.width = width jpeg.height = height CHECK(jpeg.IsInitialized()) writer.Add(key, jpeg.SerializeToString()) progress.update(i) writer.Close() return True
def BatchGetImages(self, image_ids): """ Does efficient batch lookup of images returning a dict mapping image_id to raw jpeg data. """ id_to_jpeg = {} image_ids.sort() for image_id in image_ids: ok, value = self.image_reader.Find(py_base.Uint64ToKey(image_id)) CHECK(ok) jpeg = iw_pb2.JpegImage() jpeg.ParseFromString(value) CHECK(jpeg.IsInitialized()) id_to_jpeg[image_id] = jpeg return id_to_jpeg
def BatchGetImagesAsDataUri(self, image_ids): """ Does efficient batch lookup of images returning a dict mapping image_id to raw jpeg data. """ id_to_datauri = {} image_ids.sort() self.image_reader.SeekToStart() for image_id in image_ids: ok, value = self.image_reader.Find(py_base.Uint64ToKey(image_id)) CHECK(ok, 'failed to find image_id: %d' % (image_id)) jpeg = iw_pb2.JpegImage() jpeg.ParseFromString(value) CHECK(jpeg.IsInitialized()) id_to_datauri[image_id] = JpegToDataUrl(jpeg.data) return id_to_datauri
def main(): base_uri = 'local://home/ubuntu/Desktop/datasets/tide_v12/' tide_uri = '%s/objectid_to_object.pert' % (base_uri) dataset = tide.TideDataset(tide_uri) print tide pos_imageids = [] imageid_to_objectname = {} for id, obj in dataset.objectid_to_object.iteritems(): print obj.name pos_imageids.extend(obj.pos_image_ids) for image_id in obj.pos_image_ids: imageid_to_objectname[image_id] = obj.name # sort for efficient access to pert pos_imageids.sort() images_pert_uri = '%s/photoid_to_image.pert' % (base_uri) reader = py_pert.StringTableReader() CHECK(reader.Open(images_pert_uri)) for image_id in pos_imageids: ok, data = reader.Find(py_base.Uint64ToKey(image_id)) CHECK(ok) jpeg_image = iw_pb2.JpegImage() jpeg_image.ParseFromString(data) objectname = imageid_to_objectname[image_id] dirname = './extracted/%s' % (objectname) filename = '%s/%d.jpg' % (dirname, image_id) if not os.path.exists(dirname): os.makedirs(dirname) f = open(filename, 'wb') f.write(jpeg_image.data) return
def Uint64ToKey(i): return py_base.Uint64ToKey(i)
def GenerateTestData(): for i in range(1000): yield (py_base.Uint64ToKey(i), py_base.Uint64ToKey(i))
def RenderSvg(self, image_a_id, image_b_id): svg = None # look up match for this image pair if image_a_id > image_b_id: image_a_id, image_b_id = (image_b_id, image_a_id) CHECK_LT(image_a_id, image_b_id) match_result = iw_pb2.GeometricMatchResult() match_key = py_base.Uint64ToKey(image_a_id) + py_base.Uint64ToKey( image_b_id) ok, value = self.match_reader.Find(match_key) CHECK(ok, 'cant find key: %d %d' % (image_a_id, image_b_id)) match_result.ParseFromString(value) # get image data id_to_jpeg = self._BatchGetImages([image_a_id, image_b_id]) image_a = id_to_jpeg[image_a_id] image_b = id_to_jpeg[image_b_id] # render svg width = image_a.width + image_b.width height = max(image_a.height, image_b.height) svg = """<?xml version="1.0" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">""" svg += """<style> line { opacity: 0.25;} line:hover { opacity: 1.0;} </style>""" svg += '<svg width="%d" height="%d" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">' % ( width, height) svg += '<image x="%dpx" y="%dpx" width="%dpx" height="%dpx" xlink:href="%s"> </image> \n' % ( 0, 0, image_a.width, image_a.height, JpegToDataUrl(image_a.data)) svg += '<image x="%dpx" y="%dpx" width="%dpx" height="%dpx" xlink:href="%s"> </image> \n' % ( image_a.width, 0, image_b.width, image_b.height, JpegToDataUrl(image_b.data)) colors = [ 'yellow', 'red', 'blue', ] match_info = [] for match_index, match in enumerate(match_result.matches): match_info.append(match.nfa) color = colors[match_index % len(colors)] for correspondence_index, c in enumerate(match.correspondences): a_x = c.a.pos.x a_y = c.a.pos.y b_x = c.b.pos.x + image_a.width b_y = c.b.pos.y #svg += '<line x1="%d" y1="%d" x2="%d" y2="%d" style="stroke:%s;stroke-width:2"/>\n' % (x1,y1,x2,y2, color) left_pt_id = "lp%d" % (correspondence_index) right_pt_id = "rp%d" % (correspondence_index) svg += "<circle id=\"%s\" cx=\"%f\" cy=\"%f\" r=\"3\" stroke=\"black\" stroke-width=\"0\" fill=\"%s\"/>\n" % ( left_pt_id, a_x, a_y, color) svg += "<circle id=\"%s\" cx=\"%f\" cy=\"%f\" r=\"3\" stroke=\"black\" stroke-width=\"0\" fill=\"%s\"/>\n" % ( right_pt_id, b_x, b_y, color) svg += "<circle id=\"%s_support\" cx=\"%f\" cy=\"%f\" r=\"%f\" stroke-width=\"5\" fill=\"none\" opacity=\"0.5\" stroke=\"%s\" >\n" % ( left_pt_id, a_x, a_y, c.a.radius, color) svg += "<set attributeName=\"opacity\" from=\"0.5\" to=\"1.0\" begin=\"%s.mouseover\" end=\"%s.mouseout\"/>" % ( left_pt_id, left_pt_id) svg += "<set attributeName=\"opacity\" from=\"0.5\" to=\"1.0\" begin=\"%s.mouseover\" end=\"%s.mouseout\"/>" % ( right_pt_id, right_pt_id) svg += "</circle>" svg += "<circle id=\"%s_support\" cx=\"%f\" cy=\"%f\" r=\"%f\" stroke-width=\"5\" fill=\"none\" opacity=\"0.5\" stroke=\"%s\" >\n" % ( right_pt_id, b_x, b_y, c.b.radius, color) svg += "<set attributeName=\"opacity\" from=\"0.5\" to=\"1.0\" begin=\"%s.mouseover\" end=\"%s.mouseout\"/>" % ( left_pt_id, left_pt_id) svg += "<set attributeName=\"opacity\" from=\"0.5\" to=\"1.0\" begin=\"%s.mouseover\" end=\"%s.mouseout\"/>" % ( right_pt_id, right_pt_id) svg += "</circle>" svg += "<line x1=\"%f\" y1=\"%f\" x2=\"%f\" y2=\"%f\" style=\"stroke:rgb(255,0,0);stroke-width:2\" visibility=\"hidden\">" % ( a_x, a_y, b_x, b_y) svg += "<set attributeName=\"visibility\" from=\"hidden\" to=\"visible\" begin=\"%s.mouseover\" end=\"%s.mouseout\"/>" % ( left_pt_id, left_pt_id) svg += "<set attributeName=\"visibility\" from=\"hidden\" to=\"visible\" begin=\"%s.mouseover\" end=\"%s.mouseout\"/>" % ( right_pt_id, right_pt_id) svg += "</line>" svg += '</svg>' for info in match_info: svg += '<pre>nfa: %s</pre>' % (info) return svg