def pull_events(options, settings): aggregator_summary_logger.info("Starting pull_events") talks_api = OxTalksAPI(settings.oxtalks_hostname, settings.oxtalks_username, settings.oxtalks_password) talks, list_manager = talks_api.load_talks() sources = load_sources(settings.sources_filename) single_trawler_to_run = getattr(options, "trawler", None) if single_trawler_to_run is not None: sources = filter(lambda source: source.name == single_trawler_to_run, sources) if len(sources) == 0: raise ValueError("Could not find trawler named %s, use --list_trawlers to see all those in the system" % single_trawler_to_run) all_instructions = [] failed_trawler = None # Make use of threadpoolexecutor to run all sources on different threads. with ThreadPoolExecutor(max_workers=10) as sources_executor: source_results = [] for source in sources: future = sources_executor.submit(_load_talks_from_source, source, list_manager) source_results.append((source, future)) for source, future in source_results: new_instructions, succeeded = future.result() all_instructions.extend(new_instructions) if not succeeded: failed_trawler = source # Remove dull events all_instructions = filter(lambda instruction: not isinstance(instruction, AddEvent) or is_worthy_talk(instruction.event), all_instructions) if single_trawler_to_run is None: if failed_trawler is None: all_instructions.append(DeleteOutstanding()) else: logger.error("Since %s failed, we will not flush out stale " "events, as we can't be sure it's not a temporary " "failure" % failed_trawler.name) if options.dry_run: talks_api.print_dry_run_output(all_instructions) else: try: talks_api.upload(all_instructions) except: log_exception(logger, "Failed to upload events to OxTalks")
def _load_talks_from_source(source, list_manager): started = time.time() succeeded = True new_instructions = [] try: logger.info("Loading events from %s" % source) for event in source(list_manager): new_instructions.append(AddEvent(event)) logger.info("Finished loading events from %s" % source) return new_instructions, True except Exception: log_exception(logger, "Failed to load from source %s" % source) succeeded = False # Return as many events as we were able to fetch. Maybe should return [] ? return new_instructions, False finally: aggregator_summary_logger.info("Source %s finished %s. It took %i to return %i events" % (source, "successfully" if succeeded else "unsuccessfully", time.time() - started, len(new_instructions)))