def scrape(source, identifiers): """Scrape webcam metadata. Args: scraper (webcam.metadata.scraper.Abstract): The scraper to use. identifiers (list (string)): A list of identifiers uniquely identifying webcams to scrape. """ try: scrapers = "webcam.metadata.scraper" scraper_module = __import__("%s.%s" % (scrapers, source), fromlist=[scrapers]) except ImportError: print("Could not import scraper %s." % source) return num_unsaved = 0 with webcam.metadata.manager.Manager() as manager: manager.set_scraper(scraper_module.Scraper) for identifier in identifiers: manager.get(identifier) num_unsaved += 1 time.sleep(1) if num_unsaved >= 50: manager.persist_changes() num_unsaved = 0
def scrape_frames(source, identifiers, period, duration, num_scrapers): """Scrapes frames in parallel. Args: source (str): The source to scrape from. identifiers (list (str)): A list of identifiers which uniquely identify a webcam for the source. period (datetime.timedelta): The period between frames. duration (datetime.timedelta): The duration to scrape for. num_scrapers (int): The number of scraper threads. """ # Setup the manager. manager = webcam.metadata.manager.Manager() # Populate a list of live webcams to scrape. webcams = [] for identifier in identifiers: cam = webcam.webcam.Webcam(manager.get(identifier, source)) if cam.is_live(): webcams.append(cam) # Delegate the work to a dispatcher (producer) and scrapers (consumers). webcam_queue = queue.Queue() dispatcher = threading.Thread(target=dispatcher_thread_fn, args=(webcams, webcam_queue, period, duration)) scrapers = [] for i in range(num_scrapers): scrapers.append(threading.Thread(target=scraper_thread_fn, args=(webcam_queue, duration))) # Start the threads. dispatcher.start() for scraper in scrapers: scraper.start() # Wait for the threads to finish. dispatcher.join() for scraper in scrapers: scraper.join()