def got_coll_params(params): self.__dict__.update(params) if template_exclusion_category: self.template_exclusion_category = template_exclusion_category if print_template_pattern: self.print_template_pattern = print_template_pattern if self.print_template_pattern: self.make_print_template = utils.get_print_template_maker(self.print_template_pattern) else: self.make_print_template = None titles, revids = self._split_titles_revids(pages) self.fetch_html("page", titles) self.fetch_html("oldid", revids) self.fetch_used("titles", titles) self.fetch_used("revids", revids) self.report() self.dispatch()
def __init__(self, api, fsout, pages, licenses, status=None, progress=None, print_template_pattern=None, template_exclusion_category=None, cover_image=None, imagesize=800, fetch_images=True): self.dispatch_event = gevent.event.Event() self.api_semaphore = gevent.coros.Semaphore(20) self.print_template_pattern = None self.template_exclusion_category = None self.template_blacklist = None self.cover_image = cover_image self.pages = pages self.image_download_pool = gevent.pool.Pool(10) self.fatal_error = "stopped by signal" self.api = api self.api.report = self.report self.api_cache = {self.api.apiurl: self.api,} self.fsout = fsout self.licenses = licenses self.status = status self.progress = progress or shared_progress(status=status) self.imagesize = imagesize self.fetch_images = fetch_images self.scheduled = set() self.count_total = 0 self.count_done = 0 self.redirects = {} self.cat2members = {} self.img_max_retries = 2 self.title2latest = {} self.pages_todo = [] self.revids_todo = [] self.imageinfo_todo = [] self.imagedescription_todo = {} # base path -> list self._nshandler = None siteinfo = self.get_siteinfo_for(self.api) self.fsout.write_siteinfo(siteinfo) self.nshandler = nshandling.nshandler(siteinfo) if self.template_exclusion_category: ns, partial, fqname = self.nshandler.splitname(self.template_exclusion_category, 14) if ns != 14: print "bad category name:", repr(self.template_exclusion_category) params = mwapi.get_collection_params(api) self.__dict__.update(params) if template_exclusion_category: self.template_exclusion_category = template_exclusion_category if print_template_pattern: self.print_template_pattern = print_template_pattern if self.print_template_pattern: self.make_print_template = utils.get_print_template_maker(self.print_template_pattern) else: self.make_print_template = None titles, revids = self._split_titles_revids(pages) self.pool = gevent.pool.Pool() self.refcall_pool = gevent.pool.Pool(1024) self._refcall(self.fetch_html, "page", titles) self._refcall(self.fetch_html, "oldid", revids) self._refcall(self.fetch_used, "titles", titles) self._refcall(self.fetch_used, "revids", revids)
def set_make_print_template(self): p = self.nfo.get("print_template_pattern") if p and "$1" in p: self.make_print_template = utils.get_print_template_maker(p) else: self.make_print_template = None