def refresh(site, sysop=False): """Fetch the watchlist.""" if not site.logged_in(sysop=sysop): site.forceLogin(sysop=sysop) params = { 'action': 'query', 'list': 'watchlistraw', 'site': site, 'wrlimit': config.special_page_limit, } pywikibot.output(u'Retrieving watchlist for %s via API.' % str(site)) # pywikibot.put_throttle() # It actually is a get, but a heavy one. watchlist = [] while True: req = pywikibot.data.api.Request(**params) data = req.submit() if 'error' in data: raise RuntimeError('ERROR: %s' % data) watchlist.extend([w['title'] for w in data['watchlistraw']]) if 'query-continue' in data: params.update(data['query-continue']['watchlistraw']) else: break # Save the watchlist to disk # The file is stored in the watchlists subdir. Create if necessary. with open(config.datafilepath('watchlists', 'watchlist-%s-%s%s.dat' % (site.family.name, site.code, '-sysop' if sysop else '')), 'wb') as f: pickle.dump(watchlist, f)
def __init__(self, site, mindelay=None, maxdelay=None, writedelay=None, multiplydelay=True): """Constructor.""" self.lock = threading.RLock() self.mysite = str(site) self.ctrlfilename = config.datafilepath('throttle.ctrl') self.mindelay = mindelay if self.mindelay is None: self.mindelay = config.minthrottle self.maxdelay = maxdelay if self.maxdelay is None: self.maxdelay = config.maxthrottle self.writedelay = writedelay if self.writedelay is None: self.writedelay = config.put_throttle self.last_read = 0 self.last_write = 0 self.next_multiplicity = 1.0 # Check logfile again after this many seconds: self.checkdelay = 300 # Ignore processes that have not made a check in this many seconds: self.dropdelay = 600 # Free the process id after this many seconds: self.releasepid = 1200 self.lastwait = 0.0 self.delay = 0 self.checktime = 0 self.multiplydelay = multiplydelay if self.multiplydelay: self.checkMultiplicity() self.setDelays()
def dump(self, filename=None): """Save the dictionaries to disk if not empty. Pickle the contents of the dictionaries superclassDB and catContentDB if at least one is not empty. If both are empty, removes the file from the disk. If the filename is None, it'll use the filename determined in __init__. """ if filename is None: filename = self.filename elif not os.path.isabs(filename): filename = config.datafilepath(filename) if self.is_loaded and (self.catContentDB or self.superclassDB): pywikibot.output(u'Dumping to %s, please wait...' % config.shortpath(filename)) databases = { 'catContentDB': self.catContentDB, 'superclassDB': self.superclassDB } # store dump to disk in binary format with open(filename, 'wb') as f: try: pickle.dump(databases, f, protocol=config.pickle_protocol) except pickle.PicklingError as e: pywikibot.output(e) else: try: os.remove(filename) except EnvironmentError as e: pywikibot.output(e) else: pywikibot.output(u'Database is empty. %s removed' % config.shortpath(filename))
def get(site=None): """Load the watchlist, fetching it if necessary.""" if site is None: site = pywikibot.Site() if site in cache: # Use cached copy if it exists. watchlist = cache[site] else: fn = config.datafilepath( 'watchlists', 'watchlist-%s-%s.dat' % (site.family.name, site.code)) try: # find out how old our saved dump is (in seconds) file_age = time.time() - os.path.getmtime(fn) # if it's older than 1 month, reload it if file_age > 30 * 24 * 60 * 60: pywikibot.output( u'Copy of watchlist is one month old, reloading') refresh(site) except OSError: # no saved watchlist exists yet, retrieve one refresh(site) with open(fn, 'rb') as f: watchlist = pickle.load(f) # create cached copy cache[site] = watchlist return watchlist
def get(site=None): """Load the watchlist, fetching it if necessary.""" if site is None: site = pywikibot.Site() if site in cache: # Use cached copy if it exists. watchlist = cache[site] else: fn = config.datafilepath('watchlists', 'watchlist-%s-%s.dat' % (site.family.name, site.code)) try: # find out how old our saved dump is (in seconds) file_age = time.time() - os.path.getmtime(fn) # if it's older than 1 month, reload it if file_age > 30 * 24 * 60 * 60: pywikibot.output( u'Copy of watchlist is one month old, reloading') refresh(site) except OSError: # no saved watchlist exists yet, retrieve one refresh(site) with open(fn, 'rb') as f: watchlist = pickle.load(f) # create cached copy cache[site] = watchlist return watchlist
def __init__(self, rebuild=False, filename='category.dump.bz2'): """Initializer.""" if not os.path.isabs(filename): filename = config.datafilepath(filename) self.filename = filename if rebuild: self.rebuild()
def dump(self, filename='category.dump.bz2'): '''Saves the contents of the dictionaries superclassDB and catContentDB to disk. ''' if not os.path.isabs(filename): filename = config.datafilepath(filename) if self.catContentDB or self.superclassDB: pywikibot.output(u'Dumping to %s, please wait...' % config.shortpath(filename)) f = bz2.BZ2File(filename, 'w') databases = { 'catContentDB': self.catContentDB, 'superclassDB': self.superclassDB } # store dump to disk in binary format try: pickle.dump(databases, f, protocol=pickle.HIGHEST_PROTOCOL) except pickle.PicklingError: pass f.close() else: try: os.remove(filename) except EnvironmentError: pass else: pywikibot.output(u'Database is empty. %s removed' % config.shortpath(filename))
def dump(self, filename=None): """Save the dictionaries to disk if not empty. Pickle the contents of the dictionaries superclassDB and catContentDB if at least one is not empty. If both are empty, removes the file from the disk. If the filename is None, it'll use the filename determined in __init__. """ if filename is None: filename = self.filename elif not os.path.isabs(filename): filename = config.datafilepath(filename) if self.is_loaded and (self.catContentDB or self.superclassDB): pywikibot.output("Dumping to %s, please wait..." % config.shortpath(filename)) f = bz2.BZ2File(filename, "w") databases = {"catContentDB": self.catContentDB, "superclassDB": self.superclassDB} # store dump to disk in binary format try: pickle.dump(databases, f, protocol=config.pickle_protocol) except pickle.PicklingError: pass f.close() else: try: os.remove(filename) except EnvironmentError: pass else: pywikibot.output("Database is empty. %s removed" % config.shortpath(filename))
def __init__(self, rebuild=False, filename="category.dump.bz2"): """Constructor.""" if not os.path.isabs(filename): filename = config.datafilepath(filename) self.filename = filename if rebuild: self.rebuild()
def dump(self, filename='category.dump.bz2'): """Save the contents of the dictionaries superclassDB and catContentDB to disk. """ if not os.path.isabs(filename): filename = config.datafilepath(filename) if self.catContentDB or self.superclassDB: pywikibot.output(u'Dumping to %s, please wait...' % config.shortpath(filename)) f = bz2.BZ2File(filename, 'w') databases = { 'catContentDB': self.catContentDB, 'superclassDB': self.superclassDB } # store dump to disk in binary format try: pickle.dump(databases, f, protocol=pickle.HIGHEST_PROTOCOL) except pickle.PicklingError: pass f.close() else: try: os.remove(filename) except EnvironmentError: pass else: pywikibot.output(u'Database is empty. %s removed' % config.shortpath(filename))
def __init__(self, site, *, mindelay: Optional[int] = None, maxdelay: Optional[int] = None, writedelay: Union[int, float, None] = None): """Initializer.""" self.lock = threading.RLock() self.lock_write = threading.RLock() self.lock_read = threading.RLock() self.mysite = str(site) self.ctrlfilename = config.datafilepath('throttle.ctrl') self.mindelay = mindelay or config.minthrottle self.maxdelay = maxdelay or config.maxthrottle self.writedelay = writedelay or config.put_throttle self.last_read = 0 self.last_write = 0 self.next_multiplicity = 1.0 # Check logfile again after this many seconds: self.checkdelay = 300 # Ignore processes that have not made a check in this many seconds: self.dropdelay = 600 # Free the process id after this many seconds: self.releasepid = 1200 self.retry_after = 0 # set by http.request self.delay = 0 self.checktime = 0 self.modules = Counter() self.checkMultiplicity() self.setDelays()
def refresh(site, sysop=False): """Fetch the watchlist.""" if not site.logged_in(sysop=sysop): site.forceLogin(sysop=sysop) params = { 'action': 'query', 'list': 'watchlistraw', 'site': site, 'wrlimit': config.special_page_limit, } pywikibot.output(u'Retrieving watchlist for %s via API.' % str(site)) # pywikibot.put_throttle() # It actually is a get, but a heavy one. watchlist = [] while True: req = pywikibot.data.api.Request(**params) data = req.submit() if 'error' in data: raise RuntimeError('ERROR: %s' % data) watchlist.extend([w['title'] for w in data['watchlistraw']]) if 'query-continue' in data: params.update(data['query-continue']['watchlistraw']) else: break # Save the watchlist to disk # The file is stored in the watchlists subdir. Create if necessary. with open( config.datafilepath( 'watchlists', 'watchlist-%s-%s%s.dat' % (site.family.name, site.code, '-sysop' if sysop else '')), 'wb') as f: pickle.dump(watchlist, f)
def __init__(self, disambPage, enabled=False): self.disambPage = disambPage self.enabled = enabled self.ignorelist = [] folder = config.datafilepath('disambiguations') if os.path.exists(folder): self._read_ignorelist(folder)
def __init__(self, catTitle, catDB, filename=None, maxDepth=10): self.catTitle = catTitle self.catDB = catDB if filename and not os.path.isabs(filename): filename = config.datafilepath(filename) self.filename = filename self.maxDepth = maxDepth self.site = pywikibot.Site()
def __init__(self, catTitle, catDB, filename=None, maxDepth=10): self.catTitle = catTitle self.catDB = catDB if filename and not os.path.isabs(filename): filename = config.datafilepath(filename) self.filename = filename # TODO: make maxDepth changeable with a parameter or config file entry self.maxDepth = maxDepth self.site = pywikibot.getSite()
def run(self) -> None: """Write graphs to the data directory.""" for fmt in config.interwiki_graph_formats: filename = config.datafilepath('interwiki-graphs/' + getFilename(self.origin, fmt)) if self.graph.write(filename, prog='dot', format=fmt): pywikibot.output('Graph saved as ' + filename) else: pywikibot.output('Graph could not be saved as ' + filename)
def storecookiedata(self, data: str) -> None: """ Store cookie data. @param data: The raw data as returned by getCookie() """ # THIS IS OVERRIDDEN IN data/api.py filename = config.datafilepath('pywikibot.lwp') pywikibot.debug('Storing cookies to {}'.format(filename), _logger) with open(filename, 'w') as f: f.write(data)
def ignore(self, refPage): if self.enabled: # Skip this occurence next time. filename = config.datafilepath('disambiguations', self.disambPage.urlname() + '.txt') try: # Open file for appending. If none exists yet, create a new one. f = codecs.open(filename, 'a', 'utf-8') f.write(refPage.urlname() + '\n') f.close() except IOError: pass
def ignore(self, refPage): if self.enabled: # Skip this occurrence next time. filename = config.datafilepath( 'disambiguations', self.disambPage.title(asUrl=True) + '.txt') try: # Open file for appending. If none exists yet, create a new one. f = codecs.open(filename, 'a', 'utf-8') f.write(refPage.title(asUrl=True) + '\n') f.close() except IOError: pass
def storecookiedata(self, data): """ Store cookie data. The argument data is the raw data, as returned by getCookie(). Returns nothing. """ # THIS IS OVERRIDDEN IN data/api.py filename = config.datafilepath('pywikibot.lwp') pywikibot.debug('Storing cookies to %s' % filename, _logger) with open(filename, 'w') as f: f.write(data)
def storecookiedata(self, data): """ Store cookie data. The argument data is the raw data, as returned by getCookie(). Returns nothing. """ # THIS IS OVERRIDDEN IN data/api.py filename = config.datafilepath("pywikibot.lwp") pywikibot.debug(u"Storing cookies to %s" % filename, _logger) f = open(filename, "w") f.write(data) f.close()
def __init__(self, catTitle, catDB, filename=None, maxDepth=10, lang='en', source='wikipedia'): """Initializer.""" self.catTitle = catTitle self.catDB = catDB if filename and not os.path.isabs(filename): filename = config.datafilepath(filename) self.filename = filename self.maxDepth = maxDepth self.site = pywikibot.Site(lang, source)
def __init__(self, disambPage, enabled=False): """Initializer. @type disambPage: pywikibot.Page @type enabled: bool @rtype: None """ self.disambPage = disambPage self.enabled = enabled self.ignorelist = [] folder = config.datafilepath('disambiguations') if os.path.exists(folder): self._read_ignorelist(folder)
def __init__(self, disambPage, enabled=False): """Constructor. @type disambPage: pywikibot.Page @type enabled: bool @rtype: None """ self.disambPage = disambPage self.enabled = enabled self.ignorelist = [] folder = config.datafilepath('disambiguations') if os.path.exists(folder): self._read_ignorelist(folder)
def ignore(self, refPage): """Write page to ignorelist. @type refPage: pywikibot.Page @rtype: None """ if self.enabled: # Skip this occurrence next time. filename = config.datafilepath( 'disambiguations', self.disambPage.title(asUrl=True) + '.txt') try: # Open file for appending. If none exists, create a new one. with codecs.open(filename, 'a', 'utf-8') as f: f.write(refPage.title(asUrl=True) + '\n') except IOError: pass
def __init__(self, rebuild=False, filename='category.dump.bz2'): if rebuild: self.rebuild() else: try: if not os.path.isabs(filename): filename = config.datafilepath(filename) f = bz2.BZ2File(filename, 'r') pywikibot.output(u'Reading dump from %s' % config.shortpath(filename)) databases = pickle.load(f) f.close() # keys are categories, values are 2-tuples with lists as entries. self.catContentDB = databases['catContentDB'] # like the above, but for supercategories self.superclassDB = databases['superclassDB'] del databases except: # If something goes wrong, just rebuild the database self.rebuild()
def __init__(self, disambPage, enabled=False): self.disambPage = disambPage self.enabled = enabled self.ignorelist = [] filename = config.datafilepath( 'disambiguations', self.disambPage.title(as_filename=True) + '.txt') try: # The file is stored in the disambiguation/ subdir. # Create if necessary. f = codecs.open(filename, 'r', 'utf-8') for line in f.readlines(): # remove trailing newlines and carriage returns while line[-1] in ['\n', '\r']: line = line[:-1] #skip empty lines if line != '': self.ignorelist.append(line) f.close() except IOError: pass
def __init__(self, disambPage, enabled=False): self.disambPage = disambPage self.enabled = enabled self.ignorelist = [] filename = config.datafilepath( 'disambiguations', self.disambPage.title(as_filename=True) + '.txt') try: # The file is stored in the disambiguation/ subdir. # Create if necessary. f = codecs.open(filename, 'r', 'utf-8') for line in f.readlines(): # remove trailing newlines and carriage returns while line[-1] in ['\n', '\r']: line = line[:-1] # skip empty lines if line != '': self.ignorelist.append(line) f.close() except IOError: pass
def refresh_all(new=False, sysop=False): """Fetch and locally cache several watchlists.""" if new: pywikibot.output( 'Downloading all watchlists for your accounts in user-config.py') for family in config.usernames: for lang in config.usernames[family]: refresh(pywikibot.Site(lang, family), sysop=sysop) for family in config.sysopnames: for lang in config.sysopnames[family]: refresh(pywikibot.Site(lang, family), sysop=sysop) else: import dircache filenames = dircache.listdir(config.datafilepath('watchlists')) watchlist_filenameR = re.compile('watchlist-([a-z\-:]+).dat') for filename in filenames: match = watchlist_filenameR.match(filename) if match: arr = match.group(1).split('-') family = arr[0] lang = '-'.join(arr[1:]) refresh(pywikibot.Site(lang, family))
def refresh_all(new=False, sysop=False): """Fetch and locally cache several watchlists.""" if new: pywikibot.output( 'Downloading all watchlists for your accounts in user-config.py') for family in config.usernames: for lang in config.usernames[family]: refresh(pywikibot.Site(lang, family), sysop=sysop) for family in config.sysopnames: for lang in config.sysopnames[family]: refresh(pywikibot.Site(lang, family), sysop=sysop) else: import dircache filenames = dircache.listdir( config.datafilepath('watchlists')) watchlist_filenameR = re.compile('watchlist-([a-z\-:]+).dat') for filename in filenames: match = watchlist_filenameR.match(filename) if match: arr = match.group(1).split('-') family = arr[0] lang = '-'.join(arr[1:]) refresh(pywikibot.Site(lang, family))
# Find the {{self|author= selfMatch = selfRegex.search(pageText) # Check if both are found and are equal if (informationMatch and selfMatch): if informationMatch.group('author') == selfMatch.group('author'): # Replace |Author=Original uploader was ... with |Author= ... pageText = informationRegex.sub(r'|Author=\g<author>', pageText) return pageText # -label ok skip view # textarea archivo = config.datafilepath("Uploadbot.localskips.txt") if not path.exists(archivo): with open(archivo, 'w') as tocreate: tocreate.write('{{NowCommons') def getautoskip(): """Get a list of templates to skip.""" with codecs.open(archivo, 'r', 'utf-8') as f: txt = f.read() toreturn = txt.split('{{')[1:] return toreturn class TkdialogIC(Tkdialog):
from pywikibot.tools import deprecate_arg import pywikibot.version _logger = "comm.http" # global variables numthreads = 1 threads = [] connection_pool = threadedhttp.ConnectionPool() http_queue = Queue.Queue() cookie_jar = threadedhttp.LockableCookieJar( config.datafilepath("pywikibot.lwp")) try: cookie_jar.load() except (IOError, cookielib.LoadError): pywikibot.debug(u"Loading cookies failed.", _logger) else: pywikibot.debug(u"Loaded cookies from file.", _logger) # Build up HttpProcessors pywikibot.log(u'Starting %(numthreads)i threads...' % locals()) for i in range(numthreads): proc = threadedhttp.HttpProcessor(http_queue, cookie_jar, connection_pool) proc.setDaemon(True) threads.append(proc) proc.start()
(u'www.mfa.gov.yu', u'www.mfa.gov.rs'), (u'www.drzavnauprava.sr.gov.yu', u'www.drzavnauprava.gov.rs'), ], }, # These replacements will convert HTML tag from FCK-editor to wiki syntax. # 'fckeditor': { 'regex': True, 'msg': { 'en': u'Robot: Fixing rich-editor html', 'fa': u'ربات: تصحیح اچتیامال ویرایشگر پیشرفته', }, 'replacements': [ # replace <br> with a new line (r'(?i)<br>', r'\n'), # replace with a space (r'(?i) ', r' '), ], }, } # # Load the user fixes file. from pywikibot import config try: execfile(config.datafilepath("user-fixes.py")) except IOError: pass
True, 'msg': 'pywikibot-fixes-fckeditor', 'replacements': [ # replace <br> with a new line (r'(?i)<br>', r'\n'), # replace with a space (r'(?i) ', r' '), ], }, } def _load_file(filename): """Load the fixes from the given filename.""" if os.path.exists(filename): # load binary, to let compile decode it according to the file header with open(filename, 'rb') as f: exec(compile(f.read(), filename, 'exec')) return True else: return False # # Load the user fixes file. if _load_file(config.datafilepath('user-fixes.py')): user_fixes_loaded = True else: user_fixes_loaded = False
def init_handlers(strm=None): """Initialize logging system for terminal-based bots. This function must be called before using pywikibot.output(); and must be called again if the destination stream is changed. @param strm: Output stream. If None, re-uses the last stream if one was defined, otherwise uses sys.stderr Note: this function is called by handleArgs(), so it should normally not need to be called explicitly All user output is routed through the logging module. Each type of output is handled by an appropriate handler object. This structure is used to permit eventual development of other user interfaces (GUIs) without modifying the core bot code. The following output levels are defined: DEBUG - only for file logging; debugging messages STDOUT - output that must be sent to sys.stdout (for bots that may have their output redirected to a file or other destination) VERBOSE - optional progress information for display to user INFO - normal (non-optional) progress information for display to user INPUT - prompts requiring user response WARN - user warning messages ERROR - user error messages CRITICAL - fatal error messages Accordingly, do ''not'' use print statements in bot code; instead, use pywikibot.output function. """ global _handlers_initialized moduleName = calledModuleName() if not moduleName: moduleName = "terminal-interface" logging.addLevelName(VERBOSE, "VERBOSE") # for messages to be displayed on terminal at "verbose" setting # use INFO for messages to be displayed even on non-verbose setting logging.addLevelName(STDOUT, "STDOUT") # for messages to be displayed to stdout logging.addLevelName(INPUT, "INPUT") # for prompts requiring user response root_logger = logging.getLogger("pywiki") root_logger.setLevel(DEBUG + 1) # all records except DEBUG go to logger if hasattr(root_logger, 'captureWarnings'): root_logger.captureWarnings(True) # introduced in Python >= 2.7 root_logger.handlers = [] # remove any old handlers # configure handler(s) for display to user interface ui.init_handlers(root_logger, **config.userinterface_init_kwargs) # if user has enabled file logging, configure file handler if moduleName in config.log or '*' in config.log: if config.logfilename: logfile = config.datafilepath("logs", config.logfilename) else: logfile = config.datafilepath("logs", "%s-bot.log" % moduleName) file_handler = RotatingFileHandler(filename=logfile, maxBytes=1024 * config.logfilesize, backupCount=config.logfilecount) file_handler.setLevel(DEBUG) form = LoggingFormatter( fmt="%(asctime)s %(caller_file)18s, %(caller_line)4s " "in %(caller_name)18s: %(levelname)-8s %(message)s", datefmt="%Y-%m-%d %H:%M:%S" ) file_handler.setFormatter(form) root_logger.addHandler(file_handler) # Turn on debugging for each component requested by user # or for all components if nothing was specified for component in config.debug_log: if component: debuglogger = logging.getLogger("pywiki." + component) else: debuglogger = logging.getLogger("pywiki") debuglogger.setLevel(DEBUG) debuglogger.addHandler(file_handler) _handlers_initialized = True writelogheader()
class PywikibotCookieJar(cookielib.LWPCookieJar, object): """CookieJar which checks file permissions.""" @mode_check_decorator def load(self, **kwargs): """Load cookies from file.""" super(PywikibotCookieJar, self).load() @mode_check_decorator def save(self, **kwargs): """Save cookies to file.""" super(PywikibotCookieJar, self).save() cookie_jar = PywikibotCookieJar(config.datafilepath('pywikibot.lwp')) try: cookie_jar.load() except (IOError, cookielib.LoadError): debug('Loading cookies failed.', _logger) else: debug('Loaded cookies from file.', _logger) session = requests.Session() session.cookies = cookie_jar # Prepare flush on quit def _flush(): session.close() message = 'Closing network session.'
def main(): botName=config.usernames['wikipedia']['fr'] debug=0 debugwrite=False uniqueproject=None allprojects = False force = False warningtext=u'' for arg in pywikibot.handleArgs(): if arg.startswith('-project:'): parts=re.split(u':', arg) uniqueproject=parts[1] elif arg.startswith('-force'): force = True elif arg.startswith('-debugw'): debug = 2 elif arg.startswith('-dbg'): debug = 1 else: pywikibot.output(u'Syntax: translation_project.py -project:projet') exit() # Get category site = pywikibot.getSite() projectslist=list() if uniqueproject: projectslist.append(uniqueproject) else: maincat=catlib.Category(site, u'Traduction par projet') subcategorieslist=maincat.subcategories() for subcategory in subcategorieslist: found=re.search(r'.*Traduction du Projet (.*)', subcategory.title()) if found: project=found.group(1) projectslist.append(project) commandLogFilename = config.datafilepath('logs', 'translation_project.log') commandLogFile = codecs.open(commandLogFilename, 'w', 'utf-8') for project in projectslist: finalpagename=u'Projet:Traduction/*/Projet/%s' % project finalpage=pywikibot.Page(site, finalpagename) proceed=False if debug==0 and not force: if (finalpage.exists()): if (finalpage.getVersionHistory(revCount=1)[0][2]==botName): pywikibot.output(u'%s last edit by bot, we can proceed' % finalpagename) proceed=True else: pywikibot.output(u'%s modifed, skipping' % finalpagename) pywikibot.output(botName) pywikibot.output(finalpage.getVersionHistory(revCount=1)[0][2]) commandLogFile.write(u'%s modifed, skipping\n' % finalpagename) tmpCommandLogFilename = config.datafilepath('logs', 'translation_project_%s.log' % project) tmpCommandLogFile = codecs.open(tmpCommandLogFilename, 'w', 'utf-8') tmpCommandLogFile.write(u'%s modifed, skipping\n' % finalpagename) tmpCommandLogFile.close() else: proceed=True pywikibot.output(u'%s does not exists, we can proceed' % finalpagename) else: proceed=True if proceed: if not debug==0: #artlist=list() #artlist.append(pywikibot.Page(site, u'Projet:Traduction/Boccace')) #artlist.append(pywikibot.Page(site, u'Projet:Traduction/Administrateur systèmes')) #artlist.append(pywikibot.Page(site, u'Projet:Traduction/Alexa Internet')) #artlist.append(pywikibot.Page(site, u'Projet:Traduction/Algèbre de Kleene')) category = catlib.Category(site, u'Traduction du Projet %s' % project) gen=category.articles(True) else: category = catlib.Category(site, u'Traduction du Projet %s' % project) gen=category.articles(True) pagesToProcess=pagegenerators.PreloadingGenerator(gen, 60) demande=list() encours=list() arelire=list() enrelecture=list() terminee=list() for page in pagesToProcess: title=page.title(withNamespace=False) #pywikibot.output(title) subtitle=re.sub(u'/Traduction', u'', title) if title!=u'Traduction/*/Projet/%s' % project: text=page.get() status=findStatus(text) #pywikibot.output(status) if status == u'Demande' or status == u'1': demande.append(subtitle) elif status == u'En cours' or status == u'2': encours.append(subtitle) elif status == u'A relire' or status == u'3': arelire.append(subtitle) elif status == u'En relecture' or status == u'4': enrelecture.append(subtitle) elif status == u'Terminée' or status == u'5': terminee.append(subtitle) else: pywikibot.output(u'No match: %s (%s)' % (subtitle, status)) warningtext+=u'No match: %s (%s)\n' % (subtitle, status) # Sort lists demande.sort() encours.sort() arelire.sort() enrelecture.sort() terminee.sort() # Create page newtext=u'<noinclude>{{Projet:Traduction/EnteteProjet|%s}}\n</noinclude>\n== Demande de traduction ==\n'% project for title in demande: newtext+=u'{{Discussion:%s/Traduction}}\n' % title newtext+=u'== Traduction en cours ==\n' for title in encours: newtext+=u'{{Discussion:%s/Traduction}}\n' % title newtext+=u'== Traduction à relire ==\n' for title in arelire: newtext+=u'{{Discussion:%s/Traduction}}\n' % title newtext+=u'== Relecture en cours ==\n' for title in enrelecture: newtext+=u'{{Discussion:%s/Traduction}}\n' % title newtext+=u'<noinclude>\n== Traduction terminée ==\n' for title in terminee: newtext+=u'{{Discussion:%s/Traduction}}\n' % title newtext+=u'</noinclude>' if debug==0: if finalpage.exists(): text=finalpage.get() if (text!=newtext): finalpage.put(newtext, u'Traductions du projet %s' % project) else: page=pywikibot.Page(site, u'Utilisateur:'+botName+'/Test') text=page.get() if (text!=newtext): page.put(newtext, u'Traductions du projet %s' % project) commandLogFile.close() if len(warningtext)>0 : logpage=pywikibot.Page(site, u'Utilisateur:'+botName+u'/Log/Avertissements') pagetext=logpage.get() pagetext+=u'\n== Traduction des projets ==' pagetext+=warningtext logpage.put(pagetext, u'Traduction des projets', watchArticle = None, minorEdit = False);
True, 'msg': 'pywikibot-fixes-fckeditor', 'replacements': [ # replace <br> with a new line (r'(?i)<br>', r'\n'), # replace with a space (r'(?i) ', r' '), ], }, } def _load_file(filename): """Load the fixes from the given filename.""" if os.path.exists(filename): # load binary, to let compile decode it according to the file header with open(filename, 'rb') as f: exec(compile(f.read(), filename, 'exec'), globals()) return True else: return False # Load the user fixes file. filename = config.datafilepath('user-fixes.py') if _load_file(filename): user_fixes_loaded = True else: user_fixes_loaded = False
'fckeditor': { 'regex': True, 'msg': 'pywikibot-fixes-fckeditor', 'replacements': [ # replace <br> with a new line (r'(?i)<br>', r'\n'), # replace with a space (r'(?i) ', r' '), ], }, } def _load_file(filename): """Load the fixes from the given filename.""" if os.path.exists(filename): # load binary, to let compile decode it according to the file header with open(filename, 'rb') as f: exec(compile(f.read(), filename, 'exec'), globals()) return True else: return False # Load the user fixes file. filename = config.datafilepath('user-fixes.py') if _load_file(filename): user_fixes_loaded = True else: user_fixes_loaded = False
# These replacements will convert HTML tag from FCK-editor to wiki syntax. # 'fckeditor': { 'regex': True, 'msg': { 'en': u'Robot: Fixing rich-editor html', 'fa': u'ربات: تصحیح اچتیامال ویرایشگر پیشرفته', }, 'replacements': [ # replace <br> with a new line (r'(?i)<br>', r'\n'), # replace with a space (r'(?i) ', r' '), ], }, } # # Load the user fixes file. from pywikibot import config try: exec( compile( open(config.datafilepath("user-fixes.py")).read(), config.datafilepath("user-fixes.py"), 'exec')) except IOError: pass
# certificate verify failed # cf. `openssl errstr 14090086` SSL_CERT_VERIFY_FAILED_MSG = ":14090086:" _logger = "comm.http" # global variables numthreads = 1 threads = [] connection_pool = threadedhttp.ConnectionPool() http_queue = Queue.Queue() cookie_jar = threadedhttp.LockableCookieJar( config.datafilepath("pywikibot.lwp")) try: cookie_jar.load() except (IOError, cookielib.LoadError): pywikibot.debug(u"Loading cookies failed.", _logger) else: pywikibot.debug(u"Loaded cookies from file.", _logger) # Build up HttpProcessors pywikibot.log(u'Starting %(numthreads)i threads...' % locals()) for i in range(numthreads): proc = threadedhttp.HttpProcessor(http_queue, cookie_jar, connection_pool) proc.setDaemon(True) threads.append(proc) proc.start()
def __init__(self, *args, **kwargs): """Initialize the class.""" super(PywikibotCookieJar, self).__init__(*args, **kwargs) @mode_check_decorator def load(self, **kwargs): """Load cookies from file.""" super(PywikibotCookieJar, self).load() @mode_check_decorator def save(self, **kwargs): """Save cookies to file.""" super(PywikibotCookieJar, self).save() cookie_file_path = config.datafilepath('pywikibot.lwp') file_mode_checker(cookie_file_path, create=True) cookie_jar = cookielib.LWPCookieJar(cookie_file_path) try: cookie_jar.load() except cookielib.LoadError: debug('Loading cookies failed.', _logger) else: debug('Loaded cookies from file.', _logger) session = requests.Session() session.cookies = cookie_jar # Prepare flush on quit def _flush():
# The error message for failed SSL certificate verification # 'certificate verify failed' is a commonly detectable string SSL_CERT_VERIFY_FAILED_MSG = 'certificate verify failed' _logger = "comm.http" if (isinstance(config.socket_timeout, tuple) and StrictVersion(requests.__version__) < StrictVersion('2.4.0')): warning('The configured timeout is a tuple but requests does not ' 'support a tuple as a timeout. It uses the lower of the ' 'two.') config.socket_timeout = min(config.socket_timeout) cookie_jar = cookielib.LWPCookieJar( config.datafilepath('pywikibot.lwp')) try: cookie_jar.load() except (IOError, cookielib.LoadError): debug('Loading cookies failed.', _logger) else: debug('Loaded cookies from file.', _logger) session = requests.Session() session.cookies = cookie_jar # Prepare flush on quit def _flush(): session.close() message = 'Closing network session.'
(u'www.mfa.gov.yu', u'www.mfa.gov.rs'), (u'www.drzavnauprava.sr.gov.yu', u'www.drzavnauprava.gov.rs'), ], }, # These replacements will convert HTML tag from FCK-editor to wiki syntax. # 'fckeditor': { 'regex': True, 'msg': { 'en': u'Robot: Fixing rich-editor html', 'fa': u'ربات: تصحیح اچتیامال ویرایشگر پیشرفته', }, 'replacements': [ # replace <br> with a new line (r'(?i)<br>', r'\n'), # replace with a space (r'(?i) ', r' '), ], }, } # # Load the user fixes file. from pywikibot import config try: exec(compile(open(config.datafilepath("user-fixes.py")).read(), config.datafilepath("user-fixes.py"), 'exec')) except IOError: pass
def main(): botName=config.usernames['wikipedia']['fr'] categ=False debug=0 debugwrite=False lang='' checkcat=False startindex=None finishindex=None db=False for arg in pywikibot.handleArgs(): if arg.startswith('-categ'): categ = True if arg.startswith('-categ:'): parts=re.split(u':', arg) lang=parts[1] if (len(parts)>2): parts=re.split(u'-', parts[2]) startindex=int(parts[0]) if (len(parts)>1): finishindex=int(parts[1]) elif arg.startswith('-debugwt'): debug = 2 elif arg.startswith('-debugw'): debug = 3 elif arg.startswith('-debug'): debug = 1 elif arg.startswith('-db'): db = True elif arg.startswith('-checkcat'): checkcat = True else: pywikibot.output(u'Syntax: translation.py [-categ[:lang]] [-debug]') exit() # Get category site = pywikibot.getSite() # Open logfile commandLogFilename = config.datafilepath('logs', 'translation.log') try: commandLogFile = codecs.open(commandLogFilename, 'a', 'utf-8') except IOError: commandLogFile = codecs.open(commandLogFilename, 'w', 'utf-8') if not debug==0: # SPECIFIC PAGES artlist=list() artlist.append(pywikibot.Page(site, u'Discussion:Psychologie/Traduction')) #artlist.append(pywikibot.Page(site, u'Utilisateur:'+botName+'/Tests')) #artlist.append(pywikibot.Page(site, u'Utilisateur:Almabot/Junior/Projet:Traduction/Carlos María de Alvear')) #artlist.append(pywikibot.Page(site, u'Discussion:Surtsey/Traduction')) gen = iter(artlist) # -CATEG #catname = u'Catégorie:Traduction par langue' #categ=catlib.Category(site, catname) #commandLogFile.write(u'== Traitement de [[:%s]] ==\r\n' % catname) #artlist=list(subcat.articles(True)) #newlist=list() #for page in artlist: # if not re.match (u'Projet:Traduction/\*/', page.title()): # newlist.append(page) #pywikibot.output(u'Traitement de %d articles' % len(newlist)) #gen = iter(newlist) elif categ: if lang=='0': catname=u'Catégorie:Page de suivi de traduction' categ=catlib.Category(site, catname) subcats=categ.subcategoriesList() artlist=list() for subcat in subcats: title=subcat.title() if re.match(u'Catégorie:Page de suivi de traduction', title) and (not re.search(u'/en', title)) and (not re.search(u'/de', title)): sublist=list(subcat.articles(True)) artlist=artlist+sublist commandLogFile.write(u'== Traitement des petites catégories ==\r\n') elif lang: catname = u'Catégorie:Page de suivi de traduction/'+lang categ=catlib.Category(site, catname) commandLogFile.write(u'== Traitement de [[:%s]] ==\r\n' % catname) artlist=list(categ.articles(True)) #pywikibot.output(u'index %d %d'% (startindex, finishindex)) if startindex>=0 and finishindex: artlist=artlist[startindex:finishindex] else: catname = u'Catégorie:Traduction par langue' categ=catlib.Category(site, catname) commandLogFile.write(u'== Traitement de [[:%s]] ==\r\n' % catname) artlist=list(categ.articles(True)) newlist=list() for page in artlist: if not re.match (u'Projet:Traduction/\*/', page.title()): newlist.append(page) #pywikibot.output(u'Traitement de %d articles' % len(newlist)) gen = iter(newlist) elif db: database=_mysql.connect(host='frwiki-p.db.toolserver.org', db='frwiki_p', read_default_file="/home/totoazero/.my.cnf") database.query('SELECT page_title FROM page WHERE page_title REGEXP "/Traduction" AND page_namespace=1') results=database.store_result() result=results.fetch_row(maxrows=0) #pywikibot.output(u'Traitement de %d articles' % len(result)) artlist=list() for res in result: title=res[0].decode('utf-8') page=pywikibot.Page(site, u'Discussion:%s' % title) artlist.append(page) gen=iter(artlist) else: commandLogFile.write(u'== Traitement des sous-pages de Projet:Traduction/ ==\r\n') catname = u'Catégorie:Traduction par projet' categ=catlib.Category(site, catname) commandLogFile.write(u'== Traitement de [[:%s]] ==\r\n' % catname) gen=categ.articles(True) if checkcat: pagesToProcess=pagegenerators.PreloadingGenerator(gen, 60) else: pagesToProcess=translationiterator(gen, 60) allset=0 processed=0 total=0 addedprojects=list() dictionary=dict() for tuple in pagesToProcess: total=total+1 if checkcat: projectpage=tuple else: projectpage=tuple[0] #linkedpage=tuple[1] linkedpage=pywikibot.Page(site, re.sub(u'/Traduction', u'', projectpage.title())) #pywikibot.output(u'Processing %s and %s' % (projectpage.title(), linkedpage.title())) if checkcat: commandLogFile.write(u'Processing [[%s]]\n' % projectpage.title()) if projectpage.title() == u"Discussion:Interprétations politiques de Harry Potter/Traduction": commandLogFile.write(u'Escaping this page…') continue pywikibot.output(u'Processing [[%s]]\n' % projectpage.title()) #if not checkcat: # pywikibot.output('**** Traitement de %s (et %s)' % (projectpage.title(), linkedpage.title())) projectlist=list() # Templates broken #templates=page.templates() #for template in templates: # templatetitle=template.title() # #pywikibot.output(templatetitle) origtext=projectpage.get(False, True) if re.search(u'Traduction/Suivi', origtext): parts=re.split(u'Traduction/Suivi', origtext, 1) newtext=parts[0]+u'Traduction/Suivi' parts=re.split(u'\|', parts[1]) existingprojects=list() for part in parts: subparts=re.split(u'(.*)<!--', part) subpart=subparts[0] if subpart[0:6] == u'projet': substrings=subpart.split(u'=', 1) fstring=substrings[0].strip(); if len(fstring)==6: index=1 else: index=int(fstring[6]) string=substrings[1].strip(); if len(string)>1: existingprojects.append( (capitalizefirst(string), index, False) ) if checkcat: for existingproject in existingprojects: addedprojects.append(existingproject[0]) commandLogFile.write(u' Adding [[%s]]\n' % existingproject[0]) if (len(addedprojects)>=200): createcategories(addedprojects, debug, commandLogFile) addedprojects=list() else: for (templatepage, args) in linkedpage.templatesWithParams(): #pywikibot.output(templatetitle) if templatepage.namespace()==10: templatetitle=templatepage.title(withNamespace=False) if re.match(u'Wikiprojet ', templatetitle) or (re.match(u'Projet', templatetitle) and not re.match(u'Projet:Traduction', templatetitle)): #pywikibot.output(templatetitle) if templatepage.isRedirectPage(): targettemplate=templatepage.getRedirectTarget() templatetitle=targettemplate.title(withNamespace=False) #pywikibot.output(u'Template redirect: replacing with %s' % templatetitle) locallen=0 if re.match(u'Wikiprojet', templatetitle): locallen=11 elif re.match(u'Projet', templatetitle): locallen=7 #pywikibot.output(u'%d' % locallen) if not re.match(u'méta ', templatetitle[locallen:]): string=templatetitle[locallen:] key = capitalizefirst(string) if key in dictionary: projectname=dictionary[key] #pywikibot.output(u'Found %s for %s in dictionary' % (projectname, key)) else: projectname=checkproject(key) dictionary[key]=projectname if projectname: projectlist.append(projectname) else: pywikibot.output('... while processing %s' % projectpage.title()) elif re.match(u'Évaluation multiprojet', templatetitle) or re.match(u'Wikiprojet', templatetitle): projectflag=True for arg in args: arg=re.sub(u'\r\n', u'', arg) while len(arg)>0 and arg[0]==' ': arg=arg[1:] if projectflag and re.match(u'[Aa]vancement=', arg): break if projectflag and re.match(u'[Àà] faire=', arg): break elif len(arg)==0: break elif projectflag and not re.match(u'[Aa]vancement', arg) and not re.match(u'[Àà] faire', arg) and not re.match(u'[Rr]aison', arg) and not re.match(u'[Tt]odo', arg) and not re.match(u'WP1.0', arg) and not re.match(u'[Ll]umière', arg) and not re.match(u'[Ww]ikiconcours', arg): if re.search(u'=', arg): commandLogFile.write(u'::Potential issue %s in %s:\r\n' % (arg, projectpage.title())) _errorhandler.message(u'::Potential issue %s in %s:\n' % (arg, projectpage.title())) else: key = capitalizefirst(arg) if key in dictionary: projectname=dictionary[key] #pywikibot.output(u'Found %s for %s in dictionary' % (projectname, key)) else: projectname=checkproject(key) dictionary[key]=projectname if projectname: projectlist.append(projectname) else: pywikibot.output('... while processing %s' % projectpage.title()) projectflag=False else: projectflag=True #pywikibot.output(u'LENS: %d %d' % (len(projectlist), len(existingprojects))) listLength=len(projectlist) projectlist.sort() if listLength==len(existingprojects): if (listLength==0): projectChanges=False else: existingprojects.sort() projectChanges=False index=0 while (index<listLength) and (not projectChanges): #pywikibot.output(u'Compare: %s | %s' % (projectlist[index], existingprojects[index][0])) if not (projectlist[index]==existingprojects[index][0]): projectChanges=True index=index+1 else: projectChanges=True #pywikibot.output(u'LENS: %d %d' % (len(projectlist), len(existingprojects))) if projectChanges: #pywikibot.output(u'Mise à jour des projets') index = 1 projecttext = '' for project in projectlist: addedprojects.append(project) if index==1: projecttext+= 'projet=%s\n|' % project else: projecttext= projecttext + 'projet%d=%s\n|' % (index, project) index = index+1 inserted = False comments='' for part in parts: if not inserted: if (len(existingprojects)==0) and (re.search(u'=', part)): newtext=newtext+projecttext+part+'|' inserted=True elif (len(existingprojects)>0) and (re.match(u'projet', part)): newtext=newtext+projecttext inserted=True if (re.search(u'<!--', part)): subparts=re.split(u'<!--', part, 1) newtext=newtext[:-1]+u'<!--'+subparts[1]+u'|' else: newtext=newtext+part+'|' else: if not re.match(u'projet', part): newtext=newtext+part+'|' else: if (re.search(u'<!--', part)): subparts=re.split(u'<!--', part, 1) newtext=newtext[:-1]+u'<!--'+subparts[1]+u'|' finaltext=newtext[0:len(newtext)-1] if not origtext==finaltext: pywikibot.output('**** Traitement de %s (et %s)' % (projectpage.title(), linkedpage.title())) commandLogFile.write(u'* Traitement de [[%s]]\r\n' % projectpage.title()) if (debug==0) or (debug==3): projectpage.put(finaltext, u'Catégorisation en projet des pages de traduction') elif debug==2: newpage=pywikibot.Page(site, u'Utilisateur:'+botName+'/Test') newpage.put(origtext, u'Texte original de [[%s]]' % projectpage.title()) newpage.put(finaltext, u'Nouveau texte de [[%s]]' % projectpage.title()) if (len(addedprojects)>=60): createcategories(addedprojects, debug, commandLogFile) addedprojects=list() if (len(addedprojects)>0): createcategories(addedprojects, debug, commandLogFile)
informationMatch = informationRegex.search(pageText) # Find the {{self|author= selfMatch = selfRegex.search(pageText) # Check if both are found and are equal if (informationMatch and selfMatch): if(informationMatch.group('author') == selfMatch.group('author')): # Replace |Author=Original uploader was ... with |Author= ... pageText = informationRegex.sub(r'|Author=\g<author>', pageText) return pageText # -label ok skip view # textarea archivo = config.datafilepath("Uploadbot.localskips.txt") try: open(archivo, 'r') except IOError: tocreate = open(archivo, 'w') tocreate.write("{{NowCommons") tocreate.close() def getautoskip(): """Get a list of templates to skip.""" f = codecs.open(archivo, 'r', 'utf-8') txt = f.read() f.close() toreturn = txt.split('{{')[1:] return toreturn
# cf. `openssl errstr 14090086` SSL_CERT_VERIFY_FAILED = ":14090086:" # the User-agent: header. The default is # '<script>/<revision> Pywikipediabot/2.0', where '<script>' is the currently # executing script and version is the SVN revision of Pywikipediabot. USER_AGENT_FORMAT = '{script}/r{version[rev]} Pywikipediabot/2.0' useragent = USER_AGENT_FORMAT.format(script=pywikibot.calledModuleName(), version=pywikibot.version.getversiondict()) numthreads = 1 threads = [] connection_pool = threadedhttp.ConnectionPool() http_queue = Queue.Queue() cookie_jar = threadedhttp.LockableCookieJar(config.datafilepath("pywikibot.lwp")) try: cookie_jar.load() except (IOError, cookielib.LoadError): pywikibot.debug(u"Loading cookies failed.", _logger) else: pywikibot.debug(u"Loaded cookies from file.", _logger) # Build up HttpProcessors pywikibot.log('Starting %(numthreads)i threads...' % locals()) for i in range(numthreads): proc = threadedhttp.HttpProcessor(http_queue, cookie_jar, connection_pool) proc.setDaemon(True) threads.append(proc) proc.start()
# 'fckeditor': { 'regex': True, 'msg': 'pywikibot-fixes-fckeditor', 'replacements': [ # replace <br> with a new line (r'(?i)<br>', r'\n'), # replace with a space (r'(?i) ', r' '), ], }, } def _load_file(filename): """Load the fixes from the given filename.""" if os.path.exists(filename): # load binary, to let compile decode it according to the file header with open(filename, 'rb') as f: exec(compile(f.read(), filename, 'exec')) return True else: return False # # Load the user fixes file. if _load_file(config.datafilepath('user-fixes.py')): user_fixes_loaded = True else: user_fixes_loaded = False
def main(): botName=config.usernames['wikipedia']['fr'] categ=False debug=0 debugwrite=False lang='' checkcat=False startindex=None finishindex=None db=False for arg in pywikibot.handleArgs(): if arg.startswith('-categ'): categ = True if arg.startswith('-categ:'): parts=re.split(u':', arg) lang=parts[1] if (len(parts)>2): parts=re.split(u'-', parts[2]) startindex=int(parts[0]) if (len(parts)>1): finishindex=int(parts[1]) elif arg.startswith('-debugwt'): debug = 2 elif arg.startswith('-debugw'): debug = 3 elif arg.startswith('-debug'): debug = 1 elif arg.startswith('-db'): db = True elif arg.startswith('-checkcat'): checkcat = True else: pywikibot.output(u'Syntax: translation.py [-categ[:lang]] [-debug]') exit() # Get category site = pywikibot.getSite() # Open logfile commandLogFilename = config.datafilepath('logs', 'translation.log') try: commandLogFile = codecs.open(commandLogFilename, 'a', 'utf-8') except IOError: commandLogFile = codecs.open(commandLogFilename, 'w', 'utf-8') if not debug==0: # SPECIFIC PAGES artlist=list() artlist.append(pywikibot.Page(site, u'Discussion:Psychologie/Traduction')) #artlist.append(pywikibot.Page(site, u'Utilisateur:'+botName+'/Tests')) #artlist.append(pywikibot.Page(site, u'Utilisateur:Almabot/Junior/Projet:Traduction/Carlos María de Alvear')) #artlist.append(pywikibot.Page(site, u'Discussion:Surtsey/Traduction')) gen = iter(artlist) # -CATEG #catname = u'Catégorie:Traduction par langue' #categ=catlib.Category(site, catname) #commandLogFile.write(u'== Traitement de [[:%s]] ==\r\n' % catname) #artlist=list(subcat.articles(True)) #newlist=list() #for page in artlist: # if not re.match (u'Projet:Traduction/\*/', page.title()): # newlist.append(page) #pywikibot.output(u'Traitement de %d articles' % len(newlist)) #gen = iter(newlist) elif categ: if lang=='0': catname=u'Catégorie:Page de suivi de traduction' categ=catlib.Category(site, catname) subcats=categ.subcategoriesList() artlist=list() for subcat in subcats: title=subcat.title() if re.match(u'Catégorie:Page de suivi de traduction', title) and (not re.search(u'/en', title)) and (not re.search(u'/de', title)): sublist=list(subcat.articles(True)) artlist=artlist+sublist commandLogFile.write(u'== Traitement des petites catégories ==\r\n') elif lang: catname = u'Catégorie:Page de suivi de traduction/'+lang categ=catlib.Category(site, catname) commandLogFile.write(u'== Traitement de [[:%s]] ==\r\n' % catname) artlist=list(categ.articles(True)) #pywikibot.output(u'index %d %d'% (startindex, finishindex)) if startindex>=0 and finishindex: artlist=artlist[startindex:finishindex] else: catname = u'Catégorie:Traduction par langue' categ=catlib.Category(site, catname) commandLogFile.write(u'== Traitement de [[:%s]] ==\r\n' % catname) artlist=list(categ.articles(True)) newlist=list() for page in artlist: if not re.match (u'Projet:Traduction/\*/', page.title()): newlist.append(page) #pywikibot.output(u'Traitement de %d articles' % len(newlist)) gen = iter(newlist) elif db: database=_mysql.connect(host='frwiki-p.db.toolserver.org', db='frwiki_p', read_default_file="/home/totoazero/.my.cnf") database.query('SELECT page_title FROM page WHERE page_title REGEXP "/Traduction" AND page_namespace=1') results=database.store_result() result=results.fetch_row(maxrows=0) #pywikibot.output(u'Traitement de %d articles' % len(result)) artlist=list() for res in result: title=res[0].decode('utf-8') page=pywikibot.Page(site, u'Discussion:%s' % title) artlist.append(page) gen=iter(artlist) else: commandLogFile.write(u'== Traitement des sous-pages de Projet:Traduction/ ==\r\n') catname = u'Catégorie:Traduction par projet' categ=catlib.Category(site, catname) commandLogFile.write(u'== Traitement de [[:%s]] ==\r\n' % catname) gen=categ.articles(True) if checkcat: pagesToProcess=pagegenerators.PreloadingGenerator(gen, 60) else: pagesToProcess=translationiterator(gen, 60) allset=0 processed=0 total=0 addedprojects=list() dictionary=dict() for tuple in pagesToProcess: total=total+1 if checkcat: projectpage=tuple else: projectpage=tuple[0] #linkedpage=tuple[1] linkedpage=pywikibot.Page(site, re.sub(u'/Traduction', u'', projectpage.title())) #pywikibot.output(u'Processing %s and %s' % (projectpage.title(), linkedpage.title())) if checkcat: commandLogFile.write(u'Processing [[%s]]\n' % projectpage.title()) if projectpage.title() == u"Discussion:Interprétations politiques de Harry Potter/Traduction": commandLogFile.write(u'Escaping this page…') continue pywikibot.output(u'Processing [[%s]]\n' % projectpage.title()) #if not checkcat: # pywikibot.output('**** Traitement de %s (et %s)' % (projectpage.title(), linkedpage.title())) projectlist=list() # Templates broken #templates=page.templates() #for template in templates: # templatetitle=template.title() # #pywikibot.output(templatetitle) origtext=projectpage.get(False, True) if re.search(u'Traduction/Suivi', origtext): parts=re.split(u'Traduction/Suivi', origtext, 1) newtext=parts[0]+u'Traduction/Suivi' parts=re.split(u'\|', parts[1]) existingprojects=list() for part in parts: subparts=re.split(u'(.*)<!--', part) subpart=subparts[0] if subpart[0:6] == u'projet': substrings=subpart.split(u'=', 1) fstring=substrings[0].strip(); if len(fstring)==6: index=1 else: index=int(fstring[6]) string=substrings[1].strip(); if len(string)>1: existingprojects.append( (capitalizefirst(string), index, False) ) if checkcat: for existingproject in existingprojects: addedprojects.append(existingproject[0]) commandLogFile.write(u' Adding [[%s]]\n' % existingproject[0]) if (len(addedprojects)>=200): createcategories(addedprojects, debug, commandLogFile) addedprojects=list() else: for (templatepage, args) in linkedpage.templatesWithParams(): #pywikibot.output(templatetitle) if templatepage.namespace()==10: templatetitle=templatepage.title(withNamespace=False) if re.match(u'Wikiprojet ', templatetitle) or (re.match(u'Projet', templatetitle) and not re.match(u'Projet:Traduction', templatetitle)): #pywikibot.output(templatetitle) if templatepage.isRedirectPage(): targettemplate=templatepage.getRedirectTarget() templatetitle=targettemplate.title(withNamespace=False) #pywikibot.output(u'Template redirect: replacing with %s' % templatetitle) locallen=0 if re.match(u'Wikiprojet', templatetitle): locallen=11 elif re.match(u'Projet', templatetitle): locallen=7 #pywikibot.output(u'%d' % locallen) if not re.match(u'méta ', templatetitle[locallen:]): string=templatetitle[locallen:] key = capitalizefirst(string) if key in dictionary: projectname=dictionary[key] #pywikibot.output(u'Found %s for %s in dictionary' % (projectname, key)) else: projectname=checkproject(key) dictionary[key]=projectname if projectname: projectlist.append(projectname) else: pywikibot.output('... while processing %s' % projectpage.title()) almalog2.logs+=u'... while processing %s\n' % projectpage.title() elif re.match(u'Évaluation multiprojet', templatetitle) or re.match(u'Wikiprojet', templatetitle): projectflag=True for arg in args: arg=re.sub(u'\r\n', u'', arg) while len(arg)>0 and arg[0]==' ': arg=arg[1:] if projectflag and re.match(u'[Aa]vancement=', arg): break if projectflag and re.match(u'[Àà] faire=', arg): break elif len(arg)==0: break elif projectflag and not re.match(u'[Aa]vancement', arg) and not re.match(u'[Àà] faire', arg) and not re.match(u'[Rr]aison', arg) and not re.match(u'[Tt]odo', arg) and not re.match(u'WP1.0', arg) and not re.match(u'[Ll]umière', arg) and not re.match(u'[Ww]ikiconcours', arg): if re.search(u'=', arg): commandLogFile.write(u'::Potential issue %s in %s:\r\n' % (arg, projectpage.title())) almalog2.logs+=(u'::Potential issue %s in %s:\n' % (arg, projectpage.title())) pywikibot.output(u'::Potential issue %s in %s' % (arg, projectpage.title())) else: key = capitalizefirst(arg) if key in dictionary: projectname=dictionary[key] #pywikibot.output(u'Found %s for %s in dictionary' % (projectname, key)) else: projectname=checkproject(key) dictionary[key]=projectname if projectname: projectlist.append(projectname) else: pywikibot.output('... while processing %s' % projectpage.title()) projectflag=False else: projectflag=True #pywikibot.output(u'LENS: %d %d' % (len(projectlist), len(existingprojects))) listLength=len(projectlist) projectlist.sort() if listLength==len(existingprojects): if (listLength==0): projectChanges=False else: existingprojects.sort() projectChanges=False index=0 while (index<listLength) and (not projectChanges): #pywikibot.output(u'Compare: %s | %s' % (projectlist[index], existingprojects[index][0])) if not (projectlist[index]==existingprojects[index][0]): projectChanges=True index=index+1 else: projectChanges=True #pywikibot.output(u'LENS: %d %d' % (len(projectlist), len(existingprojects))) if projectChanges: #pywikibot.output(u'Mise à jour des projets') index = 1 projecttext = '' for project in projectlist: addedprojects.append(project) if index==1: projecttext+= 'projet=%s\n|' % project else: projecttext= projecttext + 'projet%d=%s\n|' % (index, project) index = index+1 inserted = False comments='' for part in parts: if not inserted: if (len(existingprojects)==0) and (re.search(u'=', part)): newtext=newtext+projecttext+part+'|' inserted=True elif (len(existingprojects)>0) and (re.match(u'projet', part)): newtext=newtext+projecttext inserted=True if (re.search(u'<!--', part)): subparts=re.split(u'<!--', part, 1) newtext=newtext[:-1]+u'<!--'+subparts[1]+u'|' else: newtext=newtext+part+'|' else: if not re.match(u'projet', part): newtext=newtext+part+'|' else: if (re.search(u'<!--', part)): subparts=re.split(u'<!--', part, 1) newtext=newtext[:-1]+u'<!--'+subparts[1]+u'|' finaltext=newtext[0:len(newtext)-1] if not origtext==finaltext: pywikibot.output('**** Traitement de %s (et %s)' % (projectpage.title(), linkedpage.title())) commandLogFile.write(u'* Traitement de [[%s]]\r\n' % projectpage.title()) if (debug==0) or (debug==3): projectpage.put(finaltext, u'Catégorisation en projet des pages de traduction') elif debug==2: newpage=pywikibot.Page(site, u'Utilisateur:'+botName+'/Test') newpage.put(origtext, u'Texte original de [[%s]]' % projectpage.title()) newpage.put(finaltext, u'Nouveau texte de [[%s]]' % projectpage.title()) if (len(addedprojects)>=60): createcategories(addedprojects, debug, commandLogFile) addedprojects=list() if (len(addedprojects)>0): createcategories(addedprojects, debug, commandLogFile)
def main(): # Process command line arguments fullmode = False debug = 0 for arg in pywikibot.handleArgs(): if arg.startswith('-full'): fullmode = True elif arg.startswith('-debug'): debug = 1 else: pywikibot.output(u'Syntax: spa.py [-full]') exit() # Get interesting pages and category site = pywikibot.Site() catname = u'Catégorie:Wikipédien recherchant un parrain' pagename = u'Wikipédia:Parrainage_des_nouveaux/Nouveaux_en_attente' #pagename = u'Utilisateur:ZéroBot/Test' if debug: mainpage=pywikibot.Page(site, u'Utilisateur:ZéroBot/Tests') else: mainpage=pywikibot.Page(site, pagename) if fullmode: #wikipedia.output(u'Liste des utilisateurs à parrainer : mode complet'); commandLogFilename = config.datafilepath('logs', 'spa.log') try: commandLogFile = codecs.open(commandLogFilename, 'a', 'utf-8') except IOError: commandLogFile = codecs.open(commandLogFilename, 'w', 'utf-8') # add a timestamp in ISO 8601 formulation isoDate = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()) commandLogFile.write("%s Starting ...\n" % isoDate) else: #wikipedia.output(u'Liste des utilisateurs à parrainer : mode rapide'); oldpagetext=mainpage.get() #DEBUG if debug: artlist=list() #artlist.append(wikipedia.Page(site, u'Utilisateur:Amétisthe67')) artlist.append(pywikibot.Page(site, u'Utilisateur:Big 08')) artlist.append(pywikibot.Page(site, u'Utilisateur:Guillaume Dürr')) #artlist.append(wikipedia.Page(site, u'Utilisateur:Pouding')) #artlist.append(wikipedia.Page(site, u'Utilisateur:Grischkaja')) #artlist.append(wikipedia.Page(site, u'Utilisateur:Zeynab')) #artlist.append(wikipedia.Page(site, u'Utilisateur:Adieptel')) #categ=catlib.Category(site, catname) #artlist=categ.articlesList() else: categ=pywikibot.Category(site, catname) artlist=categ.articles() # Step one: remove invalid entries preloadingGen=pagegenerators.PreloadingGenerator(artlist, 60) #step two: add new entries user_list = list() time_list = list() tuples = list() redirectuser = list() existingusers='' for page in artlist: userpage=page.title(withNamespace=False) username=re.split(u'/', userpage,1)[0] #wikipedia.output(u'Processing %s' % username) if fullmode: commandLogFile.write(" processing %s\n" % username) if not fullmode: existing=re.search(username, oldpagetext) if fullmode or not existing: #history = page.getVersionHistory() history = page.fullVersionHistory() #print history history.sort(key=tuple_sort) #history.reverse() spadate=history[0][1] #print page.title(withNamespace=False) #wikipedia.output(u'#### Original date: %s / %d' % (spadate, len(history))) for data in history: revisionid=data[0] #print data #wikipedia.output(revisionid) try: usertext=page.getOldVersion(revisionid) except pywikibot.IsRedirectPage: redirectuser.append(username) except KeyError: usertext='' # We check for the spa model. If not present we do not save the date and keep the previous one when it was added spaok=re.search('[P|p]arrainez[-| ]moi', usertext) if spaok: spadate=data[1] #wikipedia.output(spadate) break #wikipedia.output(u'#### Intermediate date: %s' % spadate) #wikipedia.output(username) #username2=username.encode('iso-8859-1') #wikipedia.output(username2) #urlusername=urllib.quote(username2) #wikipedia.output(urlusername) #user=userlib.User(site, urlusername) user=pywikibot.User(site,username) contribs=user.contributions(1) contribdate=u'' for contrib in contribs: contribdate=contrib[2] tuple=list() tuple.append(username) tuple.append(spadate) tuple.append(contribdate) tuples.append(tuple) #wikipedia.output(u' .. processed') else: existingusers+=username+' ' #wikipedia.output(u' .. skipped') tuples.sort(key=tuple_sort) #tuples.reverse() if fullmode: newpagetext = u"<noinclude>" + u""" {{Mise à jour bot|Toto Azéro|période=quotidiennement}} {{confusion|Special:Newpages}}{{raccourci|WP:NEW}} </noinclude> La liste suivante regroupe les wikipédiens en quête d'un parrain. Elle est régulièrement mise à jour par un [[Wikipédia:Bot|robot]]. Suivez cette page si vous souhaitez être informé lorsqu'un nouveau contributeur demande un parrainage. {| class="wikitable sortable" border="1" cellpadding="5" |-----\n! Utilisateur\n! Depuis\n! Dernière contribution""" else: mainsplit=oldpagetext.split(u'\n|{{u|', 1) newpagetext=mainsplit[0]+u'\n' if not fullmode: submainsplit=mainsplit[1].split('|{{u|') for subsplit in submainsplit: #wikipedia.output(u'subsplit:'+subsplit) usersplit=subsplit.split('}}') stillthere=re.search(usersplit[0], existingusers) if stillthere: newpagetext+=u'|{{u|'+subsplit splits=newpagetext.split(u'\n\n[[Catégorie:') newpagetext=splits[0] #curlocale=locale.getlocale() #locale.setlocale(locale.LC_ALL, 'fr_FR.UTF-8') for tuple in tuples: otime=time.strptime(unicode(tuple[1]), '%Y-%m-%dT%H:%M:%SZ') ttime1=time.strftime('%d %b %Y', otime) try: otime=time.strptime("%d" % tuple[2], '%Y%m%d%H%M%S') ttime2=time.strftime('%d %b %Y', otime) newpagetext+='\n|-----\n|{{u|' + tuple[0] + '}}\n|' + ttime1 + u'\n|' + ttime2 except TypeError: pass except: raise #locale.setlocale(locale.LC_ALL, curlocale) newpagetext+=u'\n|}\n\n[[Catégorie:Wikipédia:Parrainage]]' if debug: newpage=pywikibot.Page(site, u'Utilisateur:ZéroBot/Test') botflag=True else: newpage=pywikibot.Page(site, u'Wikipédia:Parrainage_des_nouveaux/Nouveaux_en_attente') botflag=False #newpage=wikipedia.Page(site, u'Utilisateur:ZéroBot/Test') newpage.put(newpagetext, u'Mise à jour automatique de la liste', minorEdit = botflag, botflag = botflag); if fullmode and not debug: if len(redirectuser)>0: logpage=pywikibot.Page(site, u'Utilisateur:ZéroBot/Log/Avertissements') pagetext=logpage.get() pagetext+=u'\n==Utilisateur avec un redirect ==' for user in redirectuser: pagetext+='\n* {{u|' + user + '}}' logpage.put(pagetext, u'Utilisateur en attente de parrainage et parrainés', minorEdit = False); isoDate = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()) commandLogFile.write("%s Finished.\n" % isoDate) #commandLogFile.write(s + os.linesep) commandLogFile.close()