Esempio n. 1
0
def scrape(source, identifiers):
  """Scrape webcam metadata.

  Args:
    scraper (webcam.metadata.scraper.Abstract): The scraper to use.
    identifiers (list (string)): A list of identifiers uniquely identifying
        webcams to scrape.
  """
  try:
    scrapers = "webcam.metadata.scraper"
    scraper_module = __import__("%s.%s" % (scrapers, source), fromlist=[scrapers])
  except ImportError:
    print("Could not import scraper %s." % source)
    return
  num_unsaved = 0
  with webcam.metadata.manager.Manager() as manager:
    manager.set_scraper(scraper_module.Scraper)
    for identifier in identifiers:
      manager.get(identifier)
      num_unsaved += 1
      time.sleep(1)
      if num_unsaved >= 50:
        manager.persist_changes()
        num_unsaved = 0
Esempio n. 2
0
def scrape_frames(source, identifiers, period, duration, num_scrapers):
  """Scrapes frames in parallel.

  Args:
    source (str): The source to scrape from.
    identifiers (list (str)): A list of identifiers which uniquely identify a
        webcam for the source.
    period (datetime.timedelta): The period  between frames.
    duration (datetime.timedelta): The duration to scrape for.
    num_scrapers (int): The number of scraper threads.
  """
  # Setup the manager.
  manager = webcam.metadata.manager.Manager()

  # Populate a list of live webcams to scrape.
  webcams = []
  for identifier in identifiers:
    cam = webcam.webcam.Webcam(manager.get(identifier, source))
    if cam.is_live():
      webcams.append(cam)

  # Delegate the work to a dispatcher (producer) and scrapers (consumers).
  webcam_queue = queue.Queue()
  dispatcher = threading.Thread(target=dispatcher_thread_fn, args=(webcams,
      webcam_queue, period, duration))
  scrapers = []
  for i in range(num_scrapers):
    scrapers.append(threading.Thread(target=scraper_thread_fn,
        args=(webcam_queue, duration)))

  # Start the threads.
  dispatcher.start()
  for scraper in scrapers:
    scraper.start()

  # Wait for the threads to finish.
  dispatcher.join()
  for scraper in scrapers:
    scraper.join()