def process_venue_list(venue_id_list): pool = ThreadPool(5) m = ThreadManager() d = m.dict() for venue_id in venue_id_list: d[venue_id] = 'none' print("[.] Processing %s venues" % len(venue_id_list)) result = pool.map_async(process_venue, zip(venue_id_list, itertools.repeat(d))) monitor_map_progress(result, d, len(venue_id_list)) result.wait() _ = result.get() print("[x] Done with %s venues" % len(venue_id_list))
def get_init_params(self, manager: Manager): d = {} if manager is not None: d = manager.dict() return None, self.zoom_offset, self.max_zoom_level, d
import json import logging from multiprocessing.dummy import Manager import random from urllib.parse import urlparse, parse_qs from config.config import BASEPYCURLCONFIG from core import MySpider from core.Spider import CrawlJob __author__ = 'Florian' m = Manager() crawled = m.dict() logger = logging.getLogger("logger") fh = logging.FileHandler("clubs.jsonl", 'a+') simpleFormat = logging.Formatter("%(message)s") fh.setFormatter(simpleFormat) fh.setLevel(logging.WARNING) ch = logging.StreamHandler() ch.setFormatter(simpleFormat) ch.setLevel(logging.DEBUG) logger.addHandler(fh) logger.addHandler(ch) logger.setLevel(logging.DEBUG) @MySpider.QueueInitializer.register() def seeds(): jobs = [] logger = logging.getLogger("logger") for federation in [