def is_wikisource_author_page(self, title): if not self.site.family.name == 'wikisource': return author_ns = 0 try: author_ns = self.site.family.authornamespaces[self.site.lang][0] except: pass if author_ns: author_ns_prefix = self.site.namespace(author_ns) pywikibot.debug(u'Author ns: %d; name: %s' % (author_ns, author_ns_prefix)) if title.find(author_ns_prefix+':') == 0: return True if pywikibot.verbose: author_page_name = title[len(author_ns_prefix)+1:] pywikibot.output(u'Found author %s' % author_page_name) return
def is_wikisource_author_page(self, title): if not self.site.family.name == 'wikisource': return author_ns = 0 try: author_ns = self.site.family.authornamespaces[self.site.lang][0] except: pass if author_ns: author_ns_prefix = self.site.namespace(author_ns) pywikibot.debug(u'Author ns: %d; name: %s' % (author_ns, author_ns_prefix)) if title.find(author_ns_prefix + ':') == 0: return True if pywikibot.verbose: author_page_name = title[len(author_ns_prefix) + 1:] pywikibot.output(u'Found author %s' % author_page_name) return
def subTemplate(self, content, param): """Substitute the template tags in content according to param. @param content: Content with tags to substitute. @type content: string @param param: Param with data how to substitute tags. @type param: dict Returns a tuple containig the new content with tags substituted and a list of those tags. """ substed_tags = [] # DRTRIGON-73 metadata = {'mw-signature': u'~~~~', 'mw-timestamp': u'~~~~~',} # DRTRIGON-132 # 0.2.) check for 'simple' mode and get additional params if param['simple']: p = self.site.getExpandedString(param['simple']) param.update(pywikibot.extract_templates_and_params(p)[0][1]) # 0.5.) check cron/date if param['cron']: # [min] [hour] [day of month] [month] [day of week] # (date supported only, thus [min] and [hour] dropped) if not (param['cron'][0] == '@'): param['cron'] = '* * ' + param['cron'] entry = crontab.CronTab(param['cron']) # find the delay from midnight (does not return 0.0 - but next) delay = entry.next(datetime.datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)- \ datetime.timedelta(microseconds=1)) pywikibot.output(u'CRON delay for execution: %.3f (<= %i)' % (delay, self._bot_config['CRONMaxDelay'])) if not (delay <= self._bot_config['CRONMaxDelay']): return (content, substed_tags, metadata) # 1.) getUrl or wiki text # (security: check url not to point to a local file on the server, # e.g. 'file://' - same as used in xsalt.py) secure = False for item in [u'http://', u'https://', u'mail://', u'local://', u'wiki://']: secure = secure or (param['url'][:len(item)] == item) param['zip'] = ast.literal_eval(param['zip']) if not secure: return (content, substed_tags, metadata) if (param['url'][:7] == u'wiki://'): url = param['url'][7:].strip('[]') # enable wiki-links if ast.literal_eval(param['expandtemplates']): # DRTRIGON-93 (only with 'wiki://') external_buffer = pywikibot.Page(self.site, url).get(expandtemplates=True) else: external_buffer = self.load( pywikibot.Page(self.site, url) ) elif (param['url'][:7] == u'mail://'): # DRTRIGON-101 url = param['url'].replace(u'{{@}}', u'@') # e.g. nlwiki mbox = SubsterMailbox( pywikibot.config.datafilepath(self._bot_config['data_path'], self._bot_config['mbox_file'], '')) external_buffer = mbox.find_data(url) mbox.close() elif (param['url'][:8] == u'local://'): # DRTRIGON-131 if (param['url'][8:] == u'cache/state_bots'): # filename hard-coded d = shelve.open(pywikibot.config.datafilepath('cache', 'state_bots')) external_buffer = pprint.pformat( ast.literal_eval(pprint.pformat(d))) d.close() else: external_buffer = u'n/a' else: # consider using 'expires', 'last-modified', 'etag' in order to # make the updating data requests more efficient! use those stored # on page, if the user placed them, else use the conventional mode. # http://www.diveintopython.net/http_web_services/etags.html f_url, external_buffer = http.request(self.site, param['url'], no_hostname = True, back_response = True) headers = f_url.headers # same like 'f_url.info()' #if param['zip']: if ('text/' not in headers['content-type']): pywikibot.output(u'Source is of non-text content-type, ' u'using raw data instead.') external_buffer = f_url.read() del f_url # free some memory (no need to keep copy) for h in ['content-length', 'date', 'last-modified', 'expires']: if h in headers: metadata['url-%s' % h] = headers[h] # some intermediate processing (unzip, xlsx2csv, ...) if param['zip']: # 'application/zip', ... fileno = 0 if (param['zip'] == True) else (param['zip']-1) external_buffer = self.unzip(external_buffer, fileno) if param['xlsx']: # 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' external_buffer = self.xlsx2csv(external_buffer, param['xlsx']) if param['ods']: # 'application/vnd.oasis.opendocument.spreadsheet' external_buffer = self.ods2csv(external_buffer, param['ods']) if not ast.literal_eval(param['beautifulsoup']): # DRTRIGON-88 # 2.) regexp #for subitem in param['regex']: subitem = param['regex'] regex = re.compile(subitem, re.S | re.I) # 3.) subst in content external_data = regex.search(external_buffer) external_data_dict = {} if external_data: # not None external_data = external_data.groups() pywikibot.output(u'Groups found by regex: %i' % len(external_data)) # DRTRIGON-114: Support for named groups in regexs if regex.groupindex: for item in regex.groupindex: external_data_dict[u'%s-%s' % (param['value'], item)] = external_data[regex.groupindex[item]-1] elif (len(external_data) == 1): external_data_dict = {param['value']: external_data[0]} else: external_data_dict = {param['value']: str(external_data)} pywikibot.debug( str(external_data_dict) ) param['postproc'] = eval(param['postproc']) # should be secured as given below, but needs code changes in wiki too #param['postproc'] = ast.literal_eval(param['postproc']) for value in external_data_dict: external_data = external_data_dict[value] # 4.) postprocessing func = param['postproc'][0] # needed by exec call of self._code DATA = [ external_data ] # args = param['postproc'][1:] # scope = {} # (scope to run in) scope.update( locals() ) # (add DATA, *args, ...) scope.update( globals() ) # (add imports and else) if func: exec(self._code + (self._bot_config['CodeTemplate'] % func), scope, scope) external_data = DATA[0] pywikibot.debug( external_data ) # 5.) subst content (content, tags) = self.subTag(content, value, external_data, int(param['count'])) substed_tags += tags else: # DRTRIGON-105: Support for multiple BS template configurations value = param['value'] if value: value += u'-' # DRTRIGON-88: Enable Beautiful Soup power for Subster BS_tags = self.get_BS_regex(value).findall(content) pywikibot.output(u'BeautifulSoup tags found by regex: %i' % len(BS_tags)) prev_content = content BS = BeautifulSoup.BeautifulSoup(external_buffer) for item in BS_tags: external_data = eval('BS.%s' % item[1]) external_data = self._BS_regex_str%{'var1':value+'BS:'+item[1],'var2':value,'cont':external_data} content = content.replace(item[0], external_data, 1) if (content != prev_content): substed_tags.append(value+'BS') metadata['bot-timestamp'] = pywikibot.Timestamp.now().isoformat(' ') return (content, substed_tags, metadata)
def process_children(obj, current_user): pywikibot.debug(u'parsing node: %s' % obj) for c in obj.children: temp = process_node(c, current_user) if temp and not current_user: current_user = temp