Ejemplo n.º 1
0
def CreateMixedJpeg(jpeg_a, jpeg_b):  
  image_a = Image.open(StringIO.StringIO(jpeg_a.data))
  image_b = Image.open(StringIO.StringIO(jpeg_b.data))
  
  width_a, height_a = image_a.size
  width_b, height_b = image_b.size
  
  # find the scaling of b it as tall as image a
  scaling_b = float(height_a)/height_b
  
  new_width_b, new_height_b = (int(scaling_b*width_b), height_a)
  
  image_b = image_b.resize((new_width_b, new_height_b))
  
  out_image = Image.new("RGB", (width_a + new_width_b, height_a), "black")
  out_image.paste(image_a, (0,0))
  out_image.paste(image_b, (width_a,0))
  
  #out_image.show()
  output = StringIO.StringIO()
  out_image.save(output, format="JPEG")
  new_jpeg = iw_pb2.JpegImage()
  new_jpeg.data = output.getvalue()
  new_jpeg.width, new_jpeg.height =out_image.size
  CHECK(new_jpeg.IsInitialized())
  return new_jpeg
Ejemplo n.º 2
0
 def GetNextImage(self):
   jpeg_image = iw_pb2.JpegImage()
   ok, k,v = self.reader.Next()
   CHECK(ok)
   jpeg_image.ParseFromString(v)
   CHECK(jpeg_image.IsInitialized())
   return jpeg_image
Ejemplo n.º 3
0
 def GetImage(self, image_id):
     ok, value = self.image_reader.Find(py_base.Uint64ToKey(image_id))
     if not ok:
         return None
     jpeg = iw_pb2.JpegImage()
     jpeg.ParseFromString(value)
     CHECK(jpeg.IsInitialized())
     return jpeg
Ejemplo n.º 4
0
Archivo: tide.py Proyecto: heathkh/iwct
 def GetNextImage(self):
     ok, key, value = self.image_reader.Next()
     CHECK(ok)
     image_id = iwutil.KeyToUint64(key)
     jpeg = iw_pb2.JpegImage()
     jpeg.ParseFromString(value)
     CHECK(jpeg.IsInitialized())
     return image_id, jpeg
Ejemplo n.º 5
0
    def __ImportImageArchive(self, extract_dir, dataset_root):
        image_filenames = []
        print 'Searching for image files...'
        for root, dirnames, filenames in os.walk(extract_dir):
            for filename in filenames:
                if fnmatch.fnmatch(filename, '*.jpg') or fnmatch.fnmatch(
                        filename, '*.jpeg'):
                    image_filenames.append(os.path.join(root, filename))
        pert_uri = 'local://%s/photoid_to_image.pert' % (dataset_root)

        if len(image_filenames) <= 2:
            return False

        fingerprinted_path = '%s/fingerprinted/' % (extract_dir)
        os.mkdir(fingerprinted_path)

        print 'Fingerprinting image files...'
        progress = util.MakeProgressBar(len(image_filenames))
        # rename all files according to fingerprint
        for i, filename in enumerate(image_filenames):
            data = open(filename).read()
            fp = py_base.FingerprintString(data)
            dst = '%s/%064d.jpg' % (
                fingerprinted_path, fp
            )  # ensure lexical sort = numeric sort = key sort
            os.rename(filename, dst)
            progress.update(i)

        filenames = glob.glob('%s/*.jpg' % fingerprinted_path)
        filenames.sort()
        output_uri = 'local://%s/photoid_to_image.pert' % (dataset_root)

        # write to pert in sorted order
        print 'Generating image PERT file...'
        writer = py_pert.StringTableWriter()
        CHECK(writer.Open(output_uri, 1))
        progress = util.MakeProgressBar(len(filenames))
        for i, filename in enumerate(filenames):
            data = open(filename).read()
            key = py_base.Uint64ToKey(py_base.FingerprintString(data))
            try:
                im = Image.open(StringIO.StringIO(data))
            except IOError as e:
                LOG(INFO, 'Error opening %s - %s' % (filename, e))
                continue
            width, height = im.size
            jpeg = iw_pb2.JpegImage()
            jpeg.data = data
            jpeg.width = width
            jpeg.height = height
            CHECK(jpeg.IsInitialized())
            writer.Add(key, jpeg.SerializeToString())
            progress.update(i)
        writer.Close()
        return True
Ejemplo n.º 6
0
def GetImageSizes(uri):
    imageid_to_size = {}
    reader = py_pert.StringTableShardSetReader()
    CHECK(reader.Open(uri))
    jpeg_image = iw_pb2.JpegImage()
    progress = iwutil.MakeProgressBar(reader.Entries())
    for i, (k, v) in enumerate(reader):
        image_id = py_base.KeyToUint64(k)
        jpeg_image.ParseFromString(v)
        imageid_to_size[image_id] = (jpeg_image.width, jpeg_image.height)
        progress.update(i)
    return imageid_to_size
Ejemplo n.º 7
0
    def BatchGetImages(self, image_ids):
        """ Does efficient batch lookup of images returning a dict mapping image_id to raw jpeg data. """
        id_to_jpeg = {}
        image_ids.sort()

        for image_id in image_ids:
            ok, value = self.image_reader.Find(py_base.Uint64ToKey(image_id))
            CHECK(ok)
            jpeg = iw_pb2.JpegImage()
            jpeg.ParseFromString(value)
            CHECK(jpeg.IsInitialized())
            id_to_jpeg[image_id] = jpeg

        return id_to_jpeg
Ejemplo n.º 8
0
    def BatchGetImagesAsDataUri(self, image_ids):
        """ Does efficient batch lookup of images returning a dict mapping image_id to raw jpeg data. """
        id_to_datauri = {}
        image_ids.sort()

        self.image_reader.SeekToStart()
        for image_id in image_ids:
            ok, value = self.image_reader.Find(py_base.Uint64ToKey(image_id))
            CHECK(ok, 'failed to find image_id: %d' % (image_id))
            jpeg = iw_pb2.JpegImage()
            jpeg.ParseFromString(value)
            CHECK(jpeg.IsInitialized())
            id_to_datauri[image_id] = JpegToDataUrl(jpeg.data)

        return id_to_datauri
Ejemplo n.º 9
0
def ExtractFeatures(feature_extractor_params, images_uri, features_uri):
    extractor = py_featureextractor.CreateFeatureExtractorOrDie(
        feature_extractor_params)
    CHECK(extractor)
    reader = py_pert.StringTableReader()
    CHECK(reader.Open(images_uri), 'can not open file: %s' % (images_uri))
    image = iw_pb2.JpegImage()
    writer = py_pert.ProtoTableWriter()
    features = iw_pb2.ImageFeatures()
    writer.Open(features, features_uri, 1)
    progress = MakeProgressBar(reader.Entries())
    for i, (k, v) in enumerate(reader):
        image.ParseFromString(v)
        ok, features = extractor.Run(image.data)
        if ok:
            writer.Add(k, features.SerializeToString())
        progress.update(i)
    return
Ejemplo n.º 10
0
 def __ComputeExtractorFpm(self, extractor):
   # extract features using current settings
   image = iw_pb2.JpegImage()
   total_image_area_megapixels = 0.0
   total_num_features = 0
   for sample_index in self.sample_indices:
     ok, k, v = self.reader.GetIndex(sample_index)
     CHECK(ok)
     image.ParseFromString(v)
     CHECK(image.IsInitialized())
     ok, features = extractor.Run(image.data)
     total_image_area_megapixels += image.width*image.height*1e-6;
     if ok:        
       total_num_features += len(features.keypoints)
         
   # compute stats
   mean_fpm = total_num_features/total_image_area_megapixels
   LOG(INFO, 'mean_fpm: %f' % (mean_fpm))    
   return mean_fpm
Ejemplo n.º 11
0
def main():

    base_uri = 'local://home/ubuntu/Desktop/datasets/tide_v12/'
    tide_uri = '%s/objectid_to_object.pert' % (base_uri)

    dataset = tide.TideDataset(tide_uri)

    print tide

    pos_imageids = []

    imageid_to_objectname = {}

    for id, obj in dataset.objectid_to_object.iteritems():
        print obj.name
        pos_imageids.extend(obj.pos_image_ids)
        for image_id in obj.pos_image_ids:
            imageid_to_objectname[image_id] = obj.name

    # sort for efficient access to pert
    pos_imageids.sort()

    images_pert_uri = '%s/photoid_to_image.pert' % (base_uri)

    reader = py_pert.StringTableReader()
    CHECK(reader.Open(images_pert_uri))

    for image_id in pos_imageids:
        ok, data = reader.Find(py_base.Uint64ToKey(image_id))
        CHECK(ok)
        jpeg_image = iw_pb2.JpegImage()
        jpeg_image.ParseFromString(data)
        objectname = imageid_to_objectname[image_id]
        dirname = './extracted/%s' % (objectname)
        filename = '%s/%d.jpg' % (dirname, image_id)

        if not os.path.exists(dirname):
            os.makedirs(dirname)

        f = open(filename, 'wb')
        f.write(jpeg_image.data)

    return
Ejemplo n.º 12
0
def main():

    images_pert_uril = 'local:///media/ebs/4a4b34/tide_v13/photoid_to_image.pert'
    images_to_extract = [
        2071492, 2087400, 2112291, 2102113, 2080088, 2083122, 2107730
    ]

    reader = py_pert.StringTableReader()
    CHECK(reader.Open(images_pert_uril))

    for image_id in images_to_extract:
        ok, data = reader.Find(py_strings.Uint64ToKey(image_id))
        CHECK(ok)
        jpeg_image = iw_pb2.JpegImage()
        jpeg_image.ParseFromString(data)
        filename = '%d.jpg' % (image_id)
        f = open(filename, 'wb')
        f.write(jpeg_image.data)

    return
Ejemplo n.º 13
0
def PackImagesDirectoryToPert(src_path,
                              output_uri,
                              randomize=True,
                              max_num_images=None):
    CHECK(os.path.isdir(src_path), 'expected dir: %s' % src_path)
    filenames = glob.glob('%s/*.jpg' % src_path)
    if randomize:
        random.shuffle(
            filenames
        )  # shuffle so there is no accidental coherence of filename and order in pert... any prefix should be from a random image (otherwise might be fulled from same object type)
    else:
        filenames.sort()

    if max_num_images:
        filenames = filenames[:max_num_images]

    writer = py_pert.StringTableWriter()
    CHECK(writer.Open(output_uri, 1))
    progress = MakeProgressBar(len(filenames))
    for i, filename in enumerate(filenames):
        #print filename
        data = open(filename).read()
        #key = py_base.FingerprintString(data)
        #key = py_base.Uint64ToKey(i)
        key = os.path.basename(filename)
        try:
            im = Image.open(StringIO.StringIO(data))
        except:
            LOG(FATAL, 'Error opening %s' % (filename))
            continue
        width, height = im.size
        jpeg = iw_pb2.JpegImage()
        jpeg.data = data
        jpeg.width = width
        jpeg.height = height
        CHECK(jpeg.IsInitialized())
        writer.Add(key, jpeg.SerializeToString())
        progress.update(i)

    writer.Close()
    return
Ejemplo n.º 14
0
def main():
    dataset_name = 'tide_v08'
    sizes = {}
    sizes['thumbnail'] = 100 * 100
    sizes['small'] = 640 * 480
    reset_bucket = False

    #dataset_base_uri = 'local://home/ubuntu/Desktop/vol-0449ca74/itergraph/%s/' % (dataset_name)
    #images_uri = '%s/cropped_scaled_photoid_to_image.pert' % (dataset_base_uri)
    images_uri = 'local://home/ubuntu/Desktop/vol-0449ca74/itergraph/tide_v14/cropped_scaled_photoid_to_image_randomaccess.pert'
    bucket_name = 'tide_image_cache'
    s3 = boto.connect_s3()

    bucket = s3.create_bucket(bucket_name)
    if reset_bucket:
        LOG(INFO, 'listing contents of bucket...')
        all_keys = [key.name for key in bucket.list()]
        LOG(INFO, 'deleting contents of bucket...')
        bucket.delete_keys(all_keys)
        s3.delete_bucket(bucket_name)
        bucket = s3.create_bucket(bucket_name)
        bucket.set_acl('public-read')

    reader = py_pert.StringTableReader()
    CHECK(reader.Open(images_uri))
    progress = iwutil.MakeProgressBar(reader.Entries())

    num_workers = 200
    max_queue_size = 200
    job_queue = JobQueue(num_workers, max_queue_size)
    for i, (key, value) in enumerate(reader):
        image_id = py_base.KeyToUint64(key)
        jpeg_image = iw_pb2.JpegImage()
        jpeg_image.ParseFromString(value)
        job_queue.AddJob(
            ResizeAndUploadImageJob(bucket, sizes, image_id, jpeg_image.data))
        progress.update(i)

    job_queue.WaitForJobsDone()

    return
Ejemplo n.º 15
0
 def ExportImages(self):
   image_size_cache_filename = '%s/images/size_cache.pickle' % self.output_path
   if os.path.exists(image_size_cache_filename):
     self.imageid_to_size = iwutil.LoadObject(image_size_cache_filename)
     return
   base_path = '%s/images/' % (self.output_path)
   os.mkdir(base_path)
   LOG(INFO, 'exporting images...')
   reader = py_pert.StringTableReader()
   CHECK(reader.Open(self.images_uri))    
   jpeg_image = iw_pb2.JpegImage()
   progress = iwutil.MakeProgressBar(reader.Entries())
   for i, (k,v) in enumerate(reader):      
     image_id = py_base.KeyToUint64(k)
     jpeg_image.ParseFromString(v)      
     filename = '%s/%s.jpg' % (base_path, JSPad(image_id))
     f = open(filename, 'wb')
     f.write(jpeg_image.data)
     f.close()
     self.imageid_to_size[image_id] = (jpeg_image.width, jpeg_image.height)
     progress.update(i)          
   iwutil.SaveObject(self.imageid_to_size, image_size_cache_filename)    
   return
Ejemplo n.º 16
0
def main():
#  images_uri = 'local://media/vol-0449ca74/itergraph/tide_v14/cropped_scaled_photoid_to_image_randomaccess.pert'
#  tide_uri = 'local://media/vol-0449ca74/itergraph/tide_v14/objectid_to_object.pert'
#  distractor_images_uri = 'local://media/vol-0449ca74/oxc1_100k/photoid_to_image.pert'  
#  output_base_uri = 'local://media/vol-0449ca74/itergraph/tide_v14_mixed_v2/'
  
#  images_uri = 'local://media/vol-0449ca74/itergraph/tide_v16/photoid_to_image.pert'
#  tide_uri = 'local://media/vol-0449ca74/itergraph/tide_v16/objectid_to_object.pert'
#  distractor_images_uri = 'local://media/vol-0449ca74/oxc1_100k/photoid_to_image.pert'  
#  output_base_uri = 'local://media/vol-0449ca74/itergraph/tide_v16_mixed/'

  images_uri = 'local://media/vol-0449ca74/itergraph/tide_v18/photoid_to_image.pert'
  tide_uri = 'local://media/vol-0449ca74/itergraph/tide_v18/objectid_to_object.pert'
  distractor_images_uri = 'local://media/vol-0449ca74/oxc1_100k/photoid_to_image.pert'  
  output_base_uri = 'local://media/vol-0449ca74/itergraph/tide_v18_mixed/'
  
  output_tide_uri = '%s/objectid_to_object.pert' % (output_base_uri)
  output_images_uri = '%s/photoid_to_image.pert'  % (output_base_uri)
  
  image_loader = visutil.BatchImageLoader(images_uri)  
  tide_objects = OpenTideDataset(tide_uri)
  distractor_image_loader = DistractorImageLoader(distractor_images_uri)
  CHECK_EQ(len(tide_objects), 2)
  object_a = tide_objects[0]
  object_b = tide_objects[1]
   
  new_object_a = InitNewObject(object_a) 
  new_object_b = InitNewObject(object_b)
  
  a_none_image_ids = [photo.id for photo in object_a.photos if photo.label == tide_pb2.NONE ]  
  b_none_image_ids = [photo.id for photo in object_b.photos if photo.label == tide_pb2.NONE ]  
  
  mixed_aux_images = {}
  for i, (imageid_a, imageid_b) in enumerate(zip(a_none_image_ids, b_none_image_ids)):    
    mixed_aux_images[imageid_a] = (imageid_a, imageid_b)

  mixed_image_ids = mixed_aux_images.keys()
  n = int(len(mixed_image_ids)/2.0)
  InitNoneLabels(new_object_a, mixed_image_ids[0:n])
  InitNoneLabels(new_object_b, mixed_image_ids[n:-1])
  new_objects = [new_object_a, new_object_b]
  
  image_ids = []
  for obj in new_objects:
    for photo in obj.photos:
      image_ids.append(photo.id)
  
  image_ids.sort()
  
  # write new tide pert
  tide_writer = py_pert.ProtoTableWriter()  
  tide_writer.Open(tide_pb2.Object(), output_tide_uri, 1)
  for obj in new_objects:
    tide_writer.Add(iwutil.Uint64ToKey(obj.id), obj.SerializeToString())  
  tide_writer.Close()
  
  # write new image pert
  try:
    image_writer = py_pert.ProtoTableWriter()  
    image_writer.Open(iw_pb2.JpegImage(), output_images_uri, 1)
    used_image_ids = set()
    progress = iwutil.MakeProgressBar(len(image_ids))
    for i, image_id in enumerate(image_ids):
      jpeg = None
      if image_id in mixed_aux_images:
        imageid_a, imageid_b = mixed_aux_images[image_id]
        jpeg_a = image_loader.GetImage(imageid_a)
        jpeg_b = image_loader.GetImage(imageid_b)
        if jpeg_a == None or jpeg_b == None:
          LOG(INFO, 'skipping missing jpeg') 
          continue
        jpeg = CreateMixedJpeg(jpeg_a, jpeg_b)
      else:
        distractor = None
        while True:
          distractor = distractor_image_loader.GetNextImage()
          if distractor.width > distractor.height:
            break
        CHECK(distractor)
        jpeg = CreateMixedJpeg(image_loader.GetImage(image_id), 
                               distractor)
      
      CHECK(image_id not in used_image_ids)
      CHECK(jpeg)
      image_writer.Add(iwutil.Uint64ToKey(image_id), jpeg.SerializeToString())
      used_image_ids.add(image_id)
      progress.update(i)
    image_writer.Close()
  except:
    pass  
  
                                                        
  return