コード例 #1
0
  def run(self):
    from cufacesearch.imgio.imgio import get_buffer_from_URL, get_buffer_from_filepath, buffer_to_B64

    while self.q_in.empty() == False:
      try:
        # The queue should already have items, no need to block
        (sha1, in_img, push_back) = self.q_in.get(False)
      except:
        continue

      try:
        if self.url_input:
          try:
            img_buffer = get_buffer_from_URL(in_img)
          except Exception as inst:
            if self.fallback_pattern:
              # Adding fallback to Tellfinder images here
              # TODO: should we and how could we also update URL in DB?
              img_buffer = get_buffer_from_URL(self.fallback_pattern.format(sha1))
            else:
              raise inst
        else:
          img_buffer = get_buffer_from_filepath(in_img)
        if img_buffer:
          # Push
          self.q_out.put((sha1, buffer_to_B64(img_buffer), push_back, None))
      except Exception as inst:
        self.q_out.put((sha1, None, push_back, inst))

      # Mark as done
      self.q_in.task_done()
コード例 #2
0
    def run(self):
        # Cannot use local import with main in this file
        from cufacesearch.imgio.imgio import get_SHA1_img_info_from_buffer, get_buffer_from_URL

        while self.q_in.empty() == False:
            try:
                # The queue should already have items, no need to block
                url, obj_pos = self.q_in.get(False)
            except:
                continue

            img_buffer = None
            img_info = None
            inst = None
            start_process = time.time()
            try:
                img_buffer = get_buffer_from_URL(url)
                if img_buffer:
                    sha1, img_type, width, height = get_SHA1_img_info_from_buffer(
                        img_buffer)
                    img_info = (sha1, img_type, width, height)
                    end_process = time.time()
                else:
                    end_process = time.time()
            except Exception as inst:
                end_process = time.time()

            # Push
            self.q_out.put((url, obj_pos, img_buffer, img_info, start_process,
                            end_process, inst))

            # Mark as done
            self.q_in.task_done()
コード例 #3
0
    def process(self):
        from cufacesearch.imgio.imgio import get_SHA1_img_info_from_buffer, get_buffer_from_URL

        # Get images data and infos
        for sha1, url in self.get_next_img():

            if (self.process_count +
                    self.process_failed) % self.display_count == 0:
                avg_process_time = self.process_time / max(
                    1, self.process_count + self.process_failed)
                print_msg = "[%s] dl count: %d, failed: %d, time: %f"
                print print_msg % (self.pp, self.process_count,
                                   self.process_failed, avg_process_time)

            dict_imgs = dict()
            # Could we multi-thread that?
            start_process = time.time()
            if self.verbose > 2:
                print_msg = "[{}.process_one: info] Downloading image from: {}"
                print print_msg.format(self.pp, url)
            try:
                img_buffer = get_buffer_from_URL(url)
                if img_buffer:
                    sha1, img_type, width, height = get_SHA1_img_info_from_buffer(
                        img_buffer)
                    dict_imgs[url] = {
                        'img_buffer': img_buffer,
                        'sha1': sha1,
                        'img_info': {
                            'format': img_type,
                            'width': width,
                            'height': height
                        }
                    }
                    self.toc_process_ok(start_process)
                else:
                    self.toc_process_failed(start_process)
                    if self.verbose > 1:
                        print_msg = "[{}.process_one: info] Could not download image from: {}"
                        print print_msg.format(self.pp, url)
            except Exception as inst:
                self.toc_process_failed(start_process)
                if self.verbose > 0:
                    print_msg = "[{}.process_one: error] Could not download image from: {} ({})"
                    print print_msg.format(self.pp, url, inst)

            # Push to images_out_topic
            # Beware, this pushes a LOT of data to the Kafka topic self.images_out_topic...
            for img_out_msg in self.build_image_msg(dict_imgs):
                self.producer.send(self.images_out_topic, img_out_msg)
コード例 #4
0
    diffs = []

    rows = []
    if list_sha1s[0]:
        hbi = HBaseIndexerMinimal(conf, prefix="HBI_")
        rows = hbi.get_columns_from_sha1_rows(
            list_sha1s, columns=["info:featnorm_cu", "info:s3_url"])
    sbclif = SentiBankCmdLineImgFeaturizer(conf)
    sbpcif = SentiBankPyCaffeImgFeaturizer(pyconf)

    for row in rows:
        feat_hbase_b64 = featB64decode(row[1]["info:featnorm_cu"])
        #print feat_hbase_b64.shape
        img_url = row[1]["info:s3_url"]
        start_extr = time.time()
        img_buffer = get_buffer_from_URL(img_url)
        feat, data = sbclif.featurize(img_buffer, sha1=row[0])
        img_buffer.seek(0)
        pydata = sbpcif.preprocess_img(img_buffer)
        fpydata = pydata.flatten()
        idata = data.reshape((3, 227, 227))
        print img_url
        print idata.shape
        print pydata.shape
        misc.imsave(row[0] + "_cmd.jpg", np.swapaxes(idata, 0, 2))
        misc.imsave(row[0] + "_py.jpg", np.swapaxes(pydata, 0, 2))
        sqdiff = [
            np.sqrt((idata[c, :, :] - pydata[c, :, :])**2) for c in range(3)
        ]
        print[(np.sum(sqdiff[0]), np.mean(sqdiff[c]), np.max(sqdiff[c]),
               np.min(sqdiff[c])) for c in range(3)]
コード例 #5
0
    def process_one(self, msg):
        # Cannot use local import with main in this file
        from cufacesearch.imgio.imgio import get_SHA1_img_info_from_buffer, get_buffer_from_URL

        self.print_stats(msg)
        msg_value = json.loads(msg.value)

        # From msg value get list_urls for image objects only
        list_urls = self.get_images_urls(msg_value)
        if self.verbose > 3:
            print_msg = "[{}.process_one: info] Got {} image urls from ad id {}"
            print print_msg.format(self.pp, len(list_urls), msg_value['_id'])

        # Get images data and infos
        dict_imgs = dict()
        # Could we multi-thread that?
        for url, obj_pos in list_urls:
            # process time is by image and not by msg...
            start_process = time.time()
            if self.verbose > 2:
                print_msg = "[{}.process_one: info] Downloading image from: {}"
                print print_msg.format(self.pp, url)
            try:
                img_buffer = get_buffer_from_URL(url)
                if img_buffer:
                    sha1, img_type, width, height = get_SHA1_img_info_from_buffer(
                        img_buffer)
                    dict_imgs[url] = {
                        'obj_pos': obj_pos,
                        'img_buffer': img_buffer,
                        'sha1': sha1,
                        'img_info': {
                            'format': img_type,
                            'width': width,
                            'height': height
                        }
                    }
                    self.toc_process_ok(start_process)
                else:
                    self.toc_process_failed(start_process)
                    if self.verbose > 1:
                        print_msg = "[{}.process_one: info] Could not download image from: {}"
                        print print_msg.format(self.pp, url)
            except Exception as inst:
                self.toc_process_failed(start_process)
                if self.verbose > 0:
                    print_msg = "[{}.process_one: error] Could not download image from: {} ({})"
                    print print_msg.format(self.pp, url, inst)
                    sys.stdout.flush()

        # Push to cdr_out_topic
        # check if self.cdr_out_topic is empty? as this is only for DIG ingestion...
        if self.cdr_out_topic:
            self.producer.send(self.cdr_out_topic,
                               self.build_cdr_msg(msg_value, dict_imgs))
        else:
            print_msg = "[{}.process_one: warning] cdr_out_topic is not defined"
            print print_msg.format(self.pp)

        # NB: we could have all extraction registered here,
        # and not pushing an image if it has been processed by all extractions.
        # But that violates the consumer design of Kafka...

        # Push to images_out_topic
        for img_out_msg in self.build_image_msg(dict_imgs):
            self.producer.send(self.images_out_topic, img_out_msg)
コード例 #6
0
def show_face_from_URL(img_url, bbox, close_after=None):
  from cufacesearch.imgio.imgio import get_buffer_from_URL
  from PIL import Image
  img_buffer = get_buffer_from_URL(img_url)
  img = Image.open(img_buffer)
  show_face(img, bbox, close_after)