コード例 #1
0
    def read_images(self):
        """ read in image samples """

        worker = Worker(50)

        for line in self.fio_r:
            line = line.strip()
            try:
                item = json.loads(line)
            except:
                continue
            worker.add_task(self.tag_image, item)

        logging.debug('%d images have been read' % len(items))

        worker.join()
コード例 #2
0
  def run_page_crawler(self):
    ''' listen to crawler priority queue and crawl pages '''

    worker = Worker(self.args.thread)
    while True:
      # get one item from the queue
      # initialize a generic crawler instance
      page = self.queue.de_queue()
      if page:
        self.stats.crawl_in_progress += 1
        page.time_dequeue = time.time()
        worker.add_task(self.call_crawl_page, page)

      if self.stats.crawl_in_progress == self.max_num_pages:
        break

    worker.join()
    self.shutdown = True
    self.log_queue.put(self.end_page_log_item)
コード例 #3
0
    def run_page_crawler(self):
        ''' listen to crawler priority queue and crawl pages '''

        worker = Worker(self.args.thread)
        while True:
            # get one item from the queue
            # initialize a generic crawler instance
            page = self.queue.de_queue()
            if page:
                self.stats.crawl_in_progress += 1
                page.time_dequeue = time.time()
                worker.add_task(self.call_crawl_page, page)

            if self.stats.crawl_in_progress == self.max_num_pages:
                break

        worker.join()
        self.shutdown = True
        self.log_queue.put(self.end_page_log_item)