def main(args): """ Grab a bunch of images and tag them if they are not categorized. """ generator = None genFactory = pagegenerators.GeneratorFactory() site = pywikibot.getSite(u'commons', u'commons') pywikibot.setSite(site) for arg in pywikibot.handleArgs(): if arg.startswith('-yesterday'): generator = uploadedYesterday(site) elif arg.startswith('-recentchanges'): generator = recentChanges(site=site, delay=120) else: genFactory.handleArg(arg) if not generator: generator = genFactory.getCombinedGenerator() if not generator: pywikibot.output(u'You have to specify the generator you want to use ' u'for the program!') else: pregenerator = pagegenerators.PreloadingGenerator(generator) for page in pregenerator: if page.exists() and (page.namespace() == 6) \ and (not page.isRedirectPage()): if isUncat(page): addUncat(page)
def main(): global mysite, linktrail, page start = [] for arg in pywikibot.handleArgs(): start.append(arg) if start: start = " ".join(start) else: start = "!" mysite = pywikibot.getSite() linktrail = mysite.linktrail() try: generator = pagegenerators.CategorizedPageGenerator( mysite.disambcategory(), start=start) except pywikibot.NoPage: pywikibot.output( "The bot does not know the disambiguation category for your wiki.") raise # only work on articles generator = pagegenerators.NamespaceFilterPageGenerator(generator, [0]) generator = pagegenerators.PreloadingGenerator(generator) pagestodo = [] pagestoload = [] for page in generator: if page.isRedirectPage(): continue linked = page.linkedPages() pagestodo.append((page, linked)) pagestoload += linked if len(pagestoload) > 49: pagestoload = pagegenerators.PreloadingGenerator(pagestoload) for page, links in pagestodo: workon(page, links) pagestoload = [] pagestodo = []
def load_word_function(raw): """Load the badword list and the whitelist.""" page = re.compile(r"(?:\"|\')(.*?)(?:\"|\')(?:, |\))", re.UNICODE) list_loaded = page.findall(raw) if len(list_loaded) == 0: pywikibot.output(u'There was no input on the real-time page.') return list_loaded
def getPhotos(photoset=u'', start_id='', end_id='', interval=100): """Loop over a set of Panoramio photos.""" i = 0 has_more = True url = ('http://www.panoramio.com/map/get_panoramas.php?' 'set=%s&from=%s&to=%s&size=original') while has_more: gotInfo = False maxtries = 10 tries = 0 while not gotInfo: try: if tries < maxtries: tries += 1 panoramioApiPage = urlopen(url % (photoset, i, i + interval)) contents = panoramioApiPage.read().decode('utf-8') gotInfo = True i += interval else: break except IOError: pywikibot.output(u'Got an IOError, let\'s try again') except socket.timeout: pywikibot.output(u'Got a timeout, let\'s try again') metadata = json.loads(contents) photos = metadata.get(u'photos') for photo in photos: yield photo has_more = metadata.get(u'has_more') return
def isUncat(page): """ Do we want to skip this page. If we found a category which is not in the ignore list it means that the page is categorized so skip the page. If we found a template which is in the ignore list, skip the page. """ pywikibot.output(u'Working on ' + page.title()) for category in page.categories(): if category not in ignoreCategories: pywikibot.output(u'Got category ' + category.title()) return False for templateWithTrail in page.templates(): # Strip of trailing garbage template = templateWithTrail.title().rstrip('\n').rstrip() if template in skipTemplates: # Already tagged with a template, skip it pywikibot.output(u'Already tagged, skip it') return False elif template in ignoreTemplates: # template not relevant for categorization pywikibot.output(u'Ignore ' + template) else: pywikibot.output(u'Not ignoring ' + template) return False return True
def convertAllHTMLTables(self, text): """ Convert all HTML tables in text to wiki syntax. Returns the converted text, the number of converted tables and the number of warnings that occured. """ text = self.markActiveTables(text) convertedTables = 0 warningSum = 0 warningMessages = u'' while True: table, start, end = self.findTable(text) if not table: # no more HTML tables left break # convert the current table newTable, warningsThisTable, warnMsgsThisTable = self.convertTable( table) warningSum += warningsThisTable for msg in warnMsgsThisTable: warningMessages += 'In table %i: %s' % (convertedTables + 1, msg) text = text[:start] + newTable + text[end:] convertedTables += 1 pywikibot.output(warningMessages) return text, convertedTables, warningSum
def _do_insert(self, valuesdict): sqlreq = u"insert into `%(DB)s`.`%(table)s` ("%self.infos for i in valuesdict: sqlreq += u"`%s`,"%self.connect.escape_string(i) sqlreq = sqlreq.strip(',') sqlreq += u") values (" for i in valuesdict: valuesdict[i]=valuesdict[i].replace("'","\\'") sqlreq += u"'%s',"%valuesdict[i] sqlreq = sqlreq.strip(',') sqlreq += u")" try: self.cursor.execute(sqlreq) except UnicodeError: sqlreq = sqlreq.encode('utf8') self.cursor.execute(sqlreq) except Exception as e: if verbose: wikipedia.output(sqlreq) raise e self.querycount+=1 if not self.querycount%1000: qcstr = str(self.querycount) qcstr = qcstr + chr(8)*(len(qcstr)+1) if verbose: print qcstr,
def withImage(self, institutionItem, invId=217, imageId=18, cacheMaxAge=0): ''' Query Wikidata to fill the cache of monuments we already have an object for ''' result = {} collectionId = institutionItem.title().replace(u'Q', u'') query = u'CLAIM[195:%s] AND CLAIM[%s] AND CLAIM[%s]'% (collectionId, invId, imageId) wd_queryset = wdquery.QuerySet(query) wd_query = wdquery.WikidataQuery(cacheMaxAge=cacheMaxAge) data = wd_query.query(wd_queryset, props=[str(imageId),]) if data.get('status').get('error')=='OK': expectedItems = data.get('status').get('items') props = data.get('props').get(str(imageId)) for prop in props: # FIXME: This will overwrite id's that are used more than once. # Use with care and clean up your dataset first result[prop[2]] = prop[0] if expectedItems==len(result): pywikibot.output('I now have %s items with an image in cache' % expectedItems) else: pywikibot.output('I now have %s items with an image in cache, but I expected %s' % (len(result), expectedItems)) return result
def test_archivebot(self, code=None): """Test archivebot for one site.""" site = self.get_site(code) if code != 'de': # bug T69663 page = pywikibot.Page(site, 'user talk:xqt') else: page = pywikibot.Page(site, 'user talk:ladsgroup') talk = archivebot.DiscussionPage(page, None) self.assertIsInstance(talk.archives, dict) self.assertIsInstance(talk.archived_threads, int) self.assertTrue(talk.archiver is None) self.assertIsInstance(talk.header, basestring) self.assertIsInstance(talk.timestripper, TimeStripper) self.assertIsInstance(talk.threads, list) self.assertGreaterEqual( len(talk.threads), THREADS[code], u'{0:d} Threads found on {1!s},\n{2:d} or more expected'.format(len(talk.threads), talk, THREADS[code])) for thread in talk.threads: self.assertIsInstance(thread, archivebot.DiscussionThread) self.assertIsInstance(thread.title, basestring) self.assertIsInstance(thread.now, datetime) self.assertEqual(thread.now, talk.now) self.assertIsInstance(thread.ts, TimeStripper) self.assertEqual(thread.ts, talk.timestripper) self.assertIsInstance(thread.code, basestring) self.assertEqual(thread.code, talk.timestripper.site.code) self.assertIsInstance(thread.content, basestring) try: self.assertIsInstance(thread.timestamp, datetime) except AssertionError: if thread.code not in self.expected_failures: pywikibot.output('code {0!s}: {1!s}'.format(thread.code, thread.content)) raise
def load_page(self): """Load the page to be archived and break it up into threads.""" self.header = '' self.threads = [] self.archives = {} self.archived_threads = 0 lines = self.get().split('\n') found = False # Reading header cur_thread = None for line in lines: thread_header = re.search('^== *([^=].*?) *== *$', line) if thread_header: found = True # Reading threads now if cur_thread: self.threads.append(cur_thread) cur_thread = DiscussionThread(thread_header.group(1), self.now, self.timestripper) else: if found: cur_thread.feed_line(line) else: self.header += line + '\n' if cur_thread: self.threads.append(cur_thread) # This extra info is not desirable when run under the unittest # framework, which may be run either directly or via setup.py if pywikibot.calledModuleName() not in ['archivebot_tests', 'setup']: pywikibot.output(u'%d Threads found on %s' % (len(self.threads), self))
def run(self): """ Starts the robot. """ for imagePage in self.generator: pywikibot.output(u'Working on %s' % (imagePage.title(),)) if imagePage.title(withNamespace=False) in self.withImage: pywikibot.output(u'Image is already in use in item %s' % (self.withImage.get(imagePage.title(withNamespace=False),))) continue text = imagePage.get() regex = '\s*\|\s*accession number\s*=\s*([^\s]+)\s*' match = re.search(regex, text) if match: paintingId = match.group(1).strip() pywikibot.output(u'Found ID %s on the image' % (paintingId,)) if paintingId in self.withoutImage: pywikibot.output(u'Found an item to add it to!') paintingItemTitle = u'Q%s' % (self.withoutImage.get(paintingId),) paintingItem = pywikibot.ItemPage(self.repo, title=paintingItemTitle) paintingItem.get() if u'P18' not in paintingItem.claims: newclaim = pywikibot.Claim(self.repo, u'P18') newclaim.setTarget(imagePage) pywikibot.output('Adding image claim to %s' % paintingItem) summary = u'Adding image based on %s' % (paintingId,) paintingItem.addClaim(newclaim, summary=summary)
def run(self): """Start the bot.""" template_image = i18n.translate(self.site, template_to_the_image) template_user = i18n.translate(self.site, template_to_the_user) summary = i18n.translate(self.site, comment, fallback=True) if not all([template_image, template_user, comment]): raise pywikibot.Error(u'This script is not localized for %s site.' % self.site) self.summary = summary generator = pagegenerators.UnusedFilesGenerator(site=self.site) generator = pagegenerators.PreloadingGenerator(generator) for image in generator: if not image.exists(): pywikibot.output(u"File '%s' does not exist (see bug 69133)." % image.title()) continue # Use fileUrl() and fileIsShared() to confirm it is local media # rather than a local page with the same name as shared media. if (image.fileUrl() and not image.fileIsShared() and u'http://' not in image.text): if template_image in image.text: pywikibot.output(u"%s done already" % image.title(asLink=True)) continue self.append_text(image, u"\n\n" + template_image) uploader = image.getFileVersionHistory().pop(0)['user'] user = pywikibot.User(image.site, uploader) usertalkpage = user.getUserTalkPage() msg2uploader = template_user % {'title': image.title()} self.append_text(usertalkpage, msg2uploader)
def addReleased(self, item, imdbid): ''' Add the first airdate to the item based on the imdbid ''' pywikibot.output(u'Trying to add date to %s based on %s' % (item, imdbid)) data = item.get() claims = data.get('claims') if u'P1191' in claims: return True if imdbid not in self.imdbcache: return False releasedate = self.imdbcache[imdbid].get('released') regex = u'^(\d\d\d\d)-(\d\d)-(\d\d)$' match = re.match(regex, releasedate) if not match: return False newdate = pywikibot.WbTime(year=int(match.group(1)), month=int(match.group(2)), day=int(match.group(3)),) newclaim = pywikibot.Claim(self.repo, u'P1191') newclaim.setTarget(newdate) pywikibot.output('Adding release date claim %s to %s' % (releasedate, item)) item.addClaim(newclaim) refurl = pywikibot.Claim(self.repo, u'P854') refurl.setTarget(u'http://www.omdbapi.com/?i=%s' % (imdbid,)) refdate = pywikibot.Claim(self.repo, u'P813') today = datetime.datetime.today() date = pywikibot.WbTime(year=today.year, month=today.month, day=today.day) refdate.setTarget(date) newclaim.addSources([refurl, refdate])
def login(self, retry=False): if not self.password: # As we don't want the password to appear on the screen, we set # password = True self.password = pywikibot.input( u'Password for user %(name)s on %(site)s (no characters will ' u'be shown):' % {'name': self.username, 'site': self.site}, password=True) # self.password = self.password.encode(self.site.encoding()) pywikibot.output(u"Logging in to %(site)s as %(name)s" % {'name': self.username, 'site': self.site}) try: cookiedata = self.getCookie() except pywikibot.data.api.APIError as e: pywikibot.error(u"Login failed (%s)." % e.code) if retry: self.password = None return self.login(retry=True) else: return False self.storecookiedata(cookiedata) pywikibot.log(u"Should be logged in now") ## # Show a warning according to the local bot policy ## FIXME: disabled due to recursion; need to move this to the Site object after ## login ## if not self.botAllowed(): ## logger.error( ## u"Username '%(name)s' is not listed on [[%(page)s]]." ## % {'name': self.username, ## 'page': botList[self.site.family.name][self.site.code]}) ## logger.error( ##"Please make sure you are allowed to use the robot before actually using it!") ## return False return True
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ all = False new = False sysop = False for arg in pywikibot.handle_args(args): if arg in ('-all', '-update'): all = True elif arg == '-new': new = True elif arg == '-sysop': sysop = True if all: refresh_all(sysop=sysop) elif new: refresh_new(sysop=sysop) else: site = pywikibot.Site() watchlist = refresh(site, sysop=sysop) pywikibot.output(u'{0:d} pages in the watchlist.'.format(len(watchlist))) for page in watchlist: try: pywikibot.stdout(page.title()) except pywikibot.InvalidTitle: pywikibot.exception()
def main(*args): # Process global args and prepare generator args parser local_args = pywikibot.handle_args(args) googlecat = False collectionid = False for arg in local_args: if arg.startswith('-googlecat'): if len(arg) == 10: googlecat = pywikibot.input( u'Please enter the category you want to work on:') else: googlecat = arg[11:] elif arg.startswith('-collectionid'): if len(arg) == 13: collectionid = pywikibot.input( u'Please enter the collectionid you want to work on:') else: collectionid = arg[14:] #else: # generator_factory.handleArg(arg) if googlecat and collectionid: imageFindBot = ImageFindBot(googlecat, collectionid) imageFindBot.run() else: pywikibot.output(u'Usage: pwb.py add_google_images.py -googlecat:<category name> -collectionid:Q<123>')
def fillCache(self, collectionqid, idProperty, queryoverride=u'', cacheMaxAge=0): ''' Query Wikidata to fill the cache of items we already have an object for ''' result = {} if queryoverride: query = queryoverride else: query = u'CLAIM[195:%s] AND CLAIM[%s]' % (collectionqid.replace(u'Q', u''), idProperty,) wd_queryset = wdquery.QuerySet(query) wd_query = wdquery.WikidataQuery(cacheMaxAge=cacheMaxAge) data = wd_query.query(wd_queryset, props=[str(idProperty),]) if data.get('status').get('error')=='OK': expectedItems = data.get('status').get('items') props = data.get('props').get(str(idProperty)) for prop in props: # FIXME: This will overwrite id's that are used more than once. # Use with care and clean up your dataset first result[prop[2]] = prop[0] if expectedItems==len(result): pywikibot.output('I now have %s items in cache' % expectedItems) else: pywikibot.output('I expected %s items, but I have %s items in cache' % (expectedItems, len(result),)) return result
def subcatquery(enlink, firstsite): if _cache.get(tuple([enlink, firstsite, "subcat_query"])): return _cache[tuple([enlink, firstsite, "subcat_query"])] cats = [] try: enlink = ( unicode(str(enlink), "UTF-8") .replace(u"[[", u"") .replace(u"]]", u"") .replace(u"en:", u"") .replace(u"fa:", u"") ) except: enlink = enlink.replace(u"[[", u"").replace(u"]]", u"").replace(u"en:", u"").replace(u"fa:", u"") enlink = enlink.split(u"#")[0].strip() if enlink == u"": _cache[tuple([enlink, firstsite, "subcat_query"])] = False return False enlink = enlink.replace(u" ", u"_") site = pywikibot.Site(firstsite) params = {"action": "query", "list": "categorymembers", "cmtitle": enlink, "cmtype": "subcat", "cmlimit": 500} try: categoryname = pywikibot.data.api.Request(site=site, **params).submit() for item in categoryname[u"query"][u"categorymembers"]: categoryha = item[u"title"] pywikibot.output(categoryha) cats.append(categoryha) if cats != []: _cache[tuple([enlink, firstsite, "subcat_query"])] = cats return cats except: _cache[tuple([enlink, firstsite, "subcat_query"])] = False return False
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ parent = None basename = None options = {} # Process global args and prepare generator args parser local_args = pywikibot.handle_args(args) genFactory = pagegenerators.GeneratorFactory() for arg in local_args: if arg == '-always': options['always'] = True elif arg.startswith('-parent:'): parent = arg[len('-parent:'):].strip() elif arg.startswith('-basename'): basename = arg[len('-basename:'):].strip() else: genFactory.handleArg(arg) generator = genFactory.getCombinedGenerator() if generator and parent and basename: bot = CreateCategoriesBot(generator, parent, basename, **options) bot.run() pywikibot.output(u'All done') else: pywikibot.output(u'No pages to work on') pywikibot.showHelp()
def showImageList(self, imagelist): """Print image list.""" for i in range(len(imagelist)): image = imagelist[i] print("-" * 60) pywikibot.output(u"%s. Found image: %s" % (i, image.title(asLink=True))) try: # Show the image description page's contents pywikibot.output(image.get()) # look if page already exists with this name. # TODO: consider removing this: a different image of the same # name may exist on the target wiki, and the bot user may want # to upload anyway, using another name. try: # Maybe the image is on the target site already targetTitle = '%s:%s' % (self.targetSite.namespaces.FILE, image.title().split(':', 1)[1]) targetImage = pywikibot.Page(self.targetSite, targetTitle) targetImage.get() pywikibot.output(u"Image with this name is already on %s." % self.targetSite) print("-" * 60) pywikibot.output(targetImage.get()) sys.exit() except pywikibot.NoPage: # That's the normal case pass except pywikibot.IsRedirectPage: pywikibot.output( u"Description page on target wiki is redirect?!") except pywikibot.NoPage: break print("=" * 60)
def pre(taskid=-1, lock=None, sites=[], continuous=False, main=None): """ Return argument list, site object, and configuration of the script. This function also handles default arguments, generates lockfile and halt the script if lockfile exists before. """ import imp global info info["main"] = main == "__main__" if continuous: lock = False pywikibot.handleArgs("-log") pywikibot.output("start task #%s at %s" % (taskid, getTime())) info["taskid"] = taskid info["lock"] = lock info["lockfile"] = simplifypath([os.environ["WPROBOT_DIR"], "tmp", info["basescript"] + ".lock"]) info["continuous"] = continuous if os.path.exists(info["lockfile"]) and lock: error("lockfile found. unable to execute the script.") if info["main"]: pywikibot.stopme() sys.exit(ExitCode.LockFileError) open(info["lockfile"], "w").close() args = pywikibot.handleArgs() # must be called before Site() site = pywikibot.Site() info["site"] = site confpath = simplifypath([os.environ["WPROBOT_DIR"], "conf", info["basescript"]]) module = imp.load_source("conf", confpath) if os.path.exists(confpath) else None return args, site, module
def try_to_add(self): """Add current page in repo.""" wd_data = set() for iw_page in self.iwlangs.values(): try: wd_data.add(pywikibot.ItemPage.fromPage(iw_page)) except pywikibot.NoPage: warning('Interwiki %s does not exist, skipping...' % iw_page.title(asLink=True)) continue except pywikibot.InvalidTitle: warning('Invalid title %s, skipping...' % iw_page.title(asLink=True)) continue if len(wd_data) != 1: warning('Interwiki conflict in %s, skipping...' % self.current_page.title(asLink=True)) return False item = list(wd_data).pop() if self.current_page.site.dbName() in item.sitelinks: warning('Interwiki conflict in %s, skipping...' % item.title(asLink=True)) return False output('Adding link to %s' % item.title()) item.setSitelink(self.current_page) return item
def findCommonscatLink(self, page=None): """Find CommonsCat template on interwiki pages. In Pywikibot 2.0, page.interwiki() now returns Link objects, not Page objects @rtype: unicode, name of a valid commons category """ for ipageLink in page.langlinks(): ipage = pywikibot.page.Page(ipageLink) pywikibot.log("Looking for template on %s" % (ipage.title())) try: if (not ipage.exists() or ipage.isRedirectPage() or ipage.isDisambig()): continue commonscatLink = self.getCommonscatLink(ipage) if not commonscatLink: continue (currentTemplate, possibleCommonscat, linkText, Note) = commonscatLink checkedCommonscat = self.checkCommonscatLink(possibleCommonscat) if (checkedCommonscat != u''): pywikibot.output( u"Found link for %s at [[%s:%s]] to %s." % (page.title(), ipage.site.code, ipage.title(), checkedCommonscat)) return checkedCommonscat except pywikibot.BadTitle: # The interwiki was incorrect return u'' return u''
def createGraph(self): """ Create graph of the interwiki links. For more info see U{http://meta.wikimedia.org/wiki/Interwiki_graphs} """ pywikibot.output(u'Preparing graph for %s' % self.subject.originPage.title()) # create empty graph self.graph = pydot.Dot() # self.graph.set('concentrate', 'true') self.octagon_sites = self._octagon_site_set() for page in self.subject.foundIn.keys(): # a node for each found page self.addNode(page) # mark start node by pointing there from a black dot. firstLabel = self.getLabel(self.subject.originPage) self.graph.add_node(pydot.Node('start', shape='point')) self.graph.add_edge(pydot.Edge('start', firstLabel)) for page, referrers in self.subject.foundIn.items(): for refPage in referrers: self.addDirectedEdge(page, refPage) self.saveGraphFile()
def fillCaches(collectionqid): ''' Build an ID cache so we can quickly look up the id's for property. Only return items in this ID cache for which we don't already have the Art UK artwork ID (P1679) link Build a second art uk -> Qid cache for items we don't have to process ''' invcache = {} artukcache = {} sq = pywikibot.data.sparql.SparqlQuery() # FIXME: Do something with the collection qualifier query = u'SELECT ?item ?inv ?artukid WHERE { ?item wdt:P195 wd:%s . ?item wdt:P217 ?inv . OPTIONAL { ?item wdt:P1679 ?artukid } } ' % (collectionqid,) sq = pywikibot.data.sparql.SparqlQuery() queryresult = sq.select(query) for resultitem in queryresult: qid = resultitem.get('item').replace(u'http://www.wikidata.org/entity/', u'') if resultitem.get('artukid'): artukcache[resultitem.get('artukid')] = qid else: invcache[resultitem.get('inv')] = qid pywikibot.output(u'The query "%s" returned %s with and %s items without an ART UK work link' % (query, len(artukcache), len(invcache))) return (invcache,artukcache)
def addQualifier(self, item, claim, qual): """ Check if a qualifier is present at the given claim, otherwise add it Known issue: This will qualify an already referenced claim this must therefore be tested before param item: itemPage to check param claim: Claim to check param qual: Qualifier to check """ # check if already present if self.hasQualifier(qual, claim): return False qClaim = self.make_simple_claim(qual.prop, qual.itis) try: claim.addQualifier(qClaim) # writes to database pywikibot.output('Adding qualifier %s to %s in %s' % (qual.prop, claim.getID(), item)) return True except pywikibot.data.api.APIError, e: if e.code == u'modification-failed': pywikibot.output(u'modification-failed error: ' u'qualifier to %s to %s in %s' % (qual.prop, claim.getID(), item)) return False else: raise pywikibot.Error( 'Something went very wrong trying to add a qualifier: %s' % e)
def update_or_create_page(self, old_page, new_text): """ Reads the current text of page old_page, compare it with new_text, prompts the user, and uploads the page """ # Read the original content old_text = old_page.get() # Give the user some context if old_text != new_text: pywikibot.output(new_text) pywikibot.showDiff(old_text, new_text) # Get a decision prompt = u'Modify this page ?' # Did anything change ? if old_text == new_text: pywikibot.output(u'No changes necessary to %s' % old_page.title()); else: if not self.acceptall: choice = pywikibot.input_choice(u'Do you want to accept these changes?', [('Yes', 'Y'), ('No', 'n'), ('All', 'a')], 'N') if choice == 'a': self.acceptall = True if self.acceptall or choice == 'y': # Write out the new version old_page.put(new_text, summary)
def run(self): # If the enable page is set to disable, turn off the bot # (useful when the bot is run on a server) if not self.enable_page(): pywikibot.output('The bot is disabled') return super(LonelyPagesBot, self).run()
def addReference(self, item, claim, ref): """Add a reference if not already present. param item: the item on which all of this happens param claim: the pywikibot.Claim to be sourced param ref: the WD.Reference to add """ # check if any of the sources are already present # note that this can be in any of its references if ref is None: return False if any(self.hasRef(source.getID(), source.getTarget(), claim) for source in ref.source_test): return False try: claim.addSources(ref.get_all_sources()) # writes to database pywikibot.output('Adding reference claim to %s in %s' % (claim.getID(), item)) return True except pywikibot.data.api.APIError, e: if e.code == u'modification-failed': pywikibot.output(u'modification-failed error: ' u'ref to %s in %s' % (claim.getID(), item)) return False else: raise pywikibot.Error( 'Something went very wrong trying to add a source: %s' % e)
def treat_page(self): """Check page.""" if (self.current_page.namespace() not in namespaces and not self.getOption('ignore_ns')): output('{page} is not in allowed namespaces, skipping' .format(page=self.current_page.title( asLink=True))) return False self.iwlangs = pywikibot.textlib.getLanguageLinks( self.current_page.text, insite=self.current_page.site) if not self.iwlangs: output('No interlanguagelinks on {page}'.format( page=self.current_page.title(asLink=True))) return False try: item = pywikibot.ItemPage.fromPage(self.current_page) except pywikibot.NoPage: item = None if item is None: item = self.try_to_add() if self.getOption('create') and item is None: item = self.create_item() self.current_item = item if item and self.getOption('clean'): self.clean_page()
def main(*args): """Process command line arguments and invoke PatrolBot.""" # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. usercontribs = None gen = None recentchanges = False newpages = False repeat = False options = {} # Parse command line arguments local_args = pywikibot.handle_args(args) site = pywikibot.Site() gen_factory = pagegenerators.GeneratorFactory(site) for arg in local_args: if arg.startswith('-ask'): options['ask'] = True elif arg.startswith('-autopatroluserns'): options['autopatroluserns'] = True elif arg.startswith('-repeat'): repeat = True elif arg.startswith('-newpages'): newpages = True elif arg.startswith('-recentchanges'): recentchanges = True elif arg.startswith('-usercontribs:'): usercontribs = arg[14:] elif arg.startswith('-versionchecktime:'): versionchecktime = arg[len('-versionchecktime:'):] options['versionchecktime'] = int(versionchecktime) elif arg.startswith('-whitelist:'): options['whitelist'] = arg[len('-whitelist:'):] else: generator = gen_factory.handleArg(arg) if not generator: if ':' in arg: m = arg.split(':') options[m[0]] = m[1] if usercontribs: user = pywikibot.User(site, usercontribs) if user.isAnonymous() or user.isRegistered(): pywikibot.output('Processing user: {}'.format(usercontribs)) else: pywikibot.warning('User {} does not exist on site {}.'.format( usercontribs, site)) # default behaviour if not any((newpages, recentchanges, usercontribs)): if site.family.name == 'wikipedia': newpages = True else: recentchanges = True bot = PatrolBot(**options) if isinstance(mwparserfromhell, ImportError): suggest_help(missing_dependencies=('mwparserfromhell', )) return if newpages or usercontribs: pywikibot.output('Newpages:') gen = site.newpages feed = api_feed_repeater(gen, delay=60, repeat=repeat, user=usercontribs, namespaces=gen_factory.namespaces, recent_new_gen=False) bot.run(feed) if recentchanges or usercontribs: pywikibot.output('Recentchanges:') gen = site.recentchanges feed = api_feed_repeater(gen, delay=60, repeat=repeat, namespaces=gen_factory.namespaces, user=usercontribs) bot.run(feed) pywikibot.output('{0}/{1} patrolled'.format(bot.patrol_counter, bot.rc_item_counter))
def transferImage(self, sourceImagePage): """ Download image and its description, and upload it to another site. @return: the filename which was used to upload the image """ sourceSite = sourceImagePage.site url = sourceImagePage.fileUrl().encode('utf-8') pywikibot.output(u"URL should be: %s" % url) # localize the text that should be printed on the image description page try: description = sourceImagePage.get() # try to translate license templates if (sourceSite.sitename, self.targetSite.sitename) in licenseTemplates: for old, new in licenseTemplates[( sourceSite.sitename, self.targetSite.sitename)].items(): new = '{{%s}}' % new old = re.compile('{{%s}}' % old) description = textlib.replaceExcept( description, old, new, ['comment', 'math', 'nowiki', 'pre']) description = i18n.twtranslate( self.targetSite, 'imagetransfer-file_page_message', dict(site=sourceSite, description=description)) description += '\n\n' description += sourceImagePage.getFileVersionHistoryTable() # add interwiki link if sourceSite.family == self.targetSite.family: description += u'\r\n\r\n{0}'.format(sourceImagePage) except pywikibot.NoPage: description = '' pywikibot.output( 'Image does not exist or description page is empty.') except pywikibot.IsRedirectPage: description = '' pywikibot.output('Image description page is redirect.') else: bot = UploadRobot(url=url, description=description, targetSite=self.targetSite, urlEncoding=sourceSite.encoding(), keepFilename=self.keep_name, verifyDescription=not self.keep_name, ignoreWarning=self.ignore_warning) # try to upload targetFilename = bot.run() if targetFilename and self.targetSite.family.name == 'commons' and \ self.targetSite.code == 'commons': # upload to Commons was successful reason = i18n.twtranslate(sourceSite, 'imagetransfer-nowcommons_notice') # try to delete the original image if we have a sysop account if sourceSite.family.name in config.sysopnames and \ sourceSite.lang in config.sysopnames[sourceSite.family.name]: if sourceImagePage.delete(reason): return if sourceSite.lang in nowCommonsTemplate and \ sourceSite.family.name in config.usernames and \ sourceSite.lang in config.usernames[sourceSite.family.name]: # add the nowCommons template. pywikibot.output(u'Adding nowCommons template to %s' % sourceImagePage.title()) sourceImagePage.put( sourceImagePage.get() + '\n\n' + nowCommonsTemplate[sourceSite.lang] % targetFilename, summary=reason)
def main(*args): """ Process command line arguments and perform task. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ # Loading the comments global categoryToCheck, project_inserted # always, define a generator to understand if the user sets one, # defining what's genFactory always = False generator = False show = False moveBlockCheck = False protectedpages = False protectType = 'edit' namespace = 0 # To prevent Infinite loops errorCount = 0 # Process global args and prepare generator args parser local_args = pywikibot.handle_args(args) genFactory = pagegenerators.GeneratorFactory() # Process local args for arg in local_args: option, sep, value = arg.partition(':') if option == '-always': always = True elif option == '-move': moveBlockCheck = True elif option == '-show': show = True elif option in ('-protectedpages', '-moveprotected'): protectedpages = True if option == '-moveprotected': protectType = 'move' if value: namespace = int(value) else: genFactory.handleArg(arg) if config.mylang not in project_inserted: pywikibot.output(u"Your project is not supported by this script.\n" u"You have to edit the script and add it!") return site = pywikibot.Site() if protectedpages: generator = site.protectedpages(namespace=namespace, type=protectType) # Take the right templates to use, the category and the comment TSP = i18n.translate(site, templateSemiProtection) TTP = i18n.translate(site, templateTotalProtection) TSMP = i18n.translate(site, templateSemiMoveProtection) TTMP = i18n.translate(site, templateTotalMoveProtection) TNR = i18n.translate(site, templateNoRegex) TU = i18n.translate(site, templateUnique) categories = i18n.translate(site, categoryToCheck) commentUsed = i18n.twtranslate(site, 'blockpageschecker-summary') if not generator: generator = genFactory.getCombinedGenerator() if not generator: generator = [] pywikibot.output(u'Loading categories...') # Define the category if no other generator has been setted for CAT in categories: cat = pywikibot.Category(site, CAT) # Define the generator gen = pagegenerators.CategorizedPageGenerator(cat) for pageCat in gen: generator.append(pageCat) pywikibot.output(u'Categories loaded, start!') # Main Loop if not genFactory.nopreload: generator = pagegenerators.PreloadingGenerator(generator, groupsize=60) for page in generator: pagename = page.title(asLink=True) pywikibot.output('Loading %s...' % pagename) try: text = page.text except pywikibot.NoPage: pywikibot.output("%s doesn't exist! Skipping..." % pagename) continue except pywikibot.IsRedirectPage: pywikibot.output("%s is a redirect! Skipping..." % pagename) if show: showQuest(page) continue # FIXME: This check does not work : # PreloadingGenerator cannot set correctly page.editRestriction # (see bug T57322) # if not page.canBeEdited(): # pywikibot.output("%s is sysop-protected : this account can't edit " # "it! Skipping..." % pagename) # continue restrictions = page.protection() try: editRestr = restrictions['edit'] except KeyError: editRestr = None if not page.canBeEdited(): pywikibot.output(u"%s is protected: " u"this account can't edit it! Skipping..." % pagename) continue # Understand, according to the template in the page, what should be the # protection and compare it with what there really is. TemplateInThePage = understandBlock(text, TTP, TSP, TSMP, TTMP, TU) # Only to see if the text is the same or not... oldtext = text # keep track of the changes for each step (edit then move) changes = -1 if not editRestr: # page is not edit-protected # Deleting the template because the page doesn't need it. if not (TTP or TSP): raise pywikibot.Error( 'This script is not localized to use it on \n{0}. ' 'Missing "templateSemiProtection" or' '"templateTotalProtection"'.format(site.sitename)) if TU: replaceToPerform = u'|'.join(TTP + TSP + TU) else: replaceToPerform = u'|'.join(TTP + TSP) text, changes = re.subn( '<noinclude>(%s)</noinclude>' % replaceToPerform, '', text) if changes == 0: text, changes = re.subn('(%s)' % replaceToPerform, '', text) msg = u'The page is editable for all' if not moveBlockCheck: msg += u', deleting the template..' pywikibot.output(u'%s.' % msg) elif editRestr[0] == 'sysop': # total edit protection if (TemplateInThePage[0] == 'sysop-total' and TTP) or \ (TemplateInThePage[0] == 'unique' and TU): msg = 'The page is protected to the sysop' if not moveBlockCheck: msg += ', skipping...' pywikibot.output(msg) else: if not TNR or TU and not TNR[4] or not (TU or TNR[1]): raise pywikibot.Error( 'This script is not localized to use it on \n{0}. ' 'Missing "templateNoRegex"'.format(site.sitename)) pywikibot.output( u'The page is protected to the sysop, but the ' u'template seems not correct. Fixing...') if TU: text, changes = re.subn(TemplateInThePage[1], TNR[4], text) else: text, changes = re.subn(TemplateInThePage[1], TNR[1], text) elif TSP or TU: # implicitely editRestr[0] = 'autoconfirmed', edit-Semi-protection if TemplateInThePage[0] == 'autoconfirmed-total' or \ TemplateInThePage[0] == 'unique': msg = 'The page is editable only for the autoconfirmed users' if not moveBlockCheck: msg += ', skipping...' pywikibot.output(msg) else: if not TNR or TU and not TNR[4] or not (TU or TNR[1]): raise pywikibot.Error( 'This script is not localized to use it on \n{0}. ' 'Missing "templateNoRegex"'.format(site.sitename)) pywikibot.output(u'The page is editable only for the ' u'autoconfirmed users, but the template ' u'seems not correct. Fixing...') if TU: text, changes = re.subn(TemplateInThePage[1], TNR[4], text) else: text, changes = re.subn(TemplateInThePage[1], TNR[0], text) if changes == 0: # We tried to fix edit-protection templates, but it did not work. pywikibot.warning('No edit-protection template could be found') if moveBlockCheck and changes > -1: # checking move protection now try: moveRestr = restrictions['move'] except KeyError: moveRestr = False changes = -1 if not moveRestr: pywikibot.output(u'The page is movable for all, deleting the ' u'template...') # Deleting the template because the page doesn't need it. if TU: replaceToPerform = u'|'.join(TSMP + TTMP + TU) else: replaceToPerform = u'|'.join(TSMP + TTMP) text, changes = re.subn( '<noinclude>(%s)</noinclude>' % replaceToPerform, '', text) if changes == 0: text, changes = re.subn('(%s)' % replaceToPerform, '', text) elif moveRestr[0] == 'sysop': # move-total-protection if (TemplateInThePage[0] == 'sysop-move' and TTMP) or \ (TemplateInThePage[0] == 'unique' and TU): pywikibot.output(u'The page is protected from moving to ' u'the sysop, skipping...') if TU: # no changes needed, better to revert the old text. text = oldtext else: pywikibot.output(u'The page is protected from moving to ' u'the sysop, but the template seems not ' u'correct. Fixing...') if TU: text, changes = re.subn(TemplateInThePage[1], TNR[4], text) else: text, changes = re.subn(TemplateInThePage[1], TNR[3], text) elif TSMP or TU: # implicitely moveRestr[0] = 'autoconfirmed', # move-semi-protection if TemplateInThePage[0] == 'autoconfirmed-move' or \ TemplateInThePage[0] == 'unique': pywikibot.output(u'The page is movable only for the ' u'autoconfirmed users, skipping...') if TU: # no changes needed, better to revert the old text. text = oldtext else: pywikibot.output(u'The page is movable only for the ' u'autoconfirmed users, but the template ' u'seems not correct. Fixing...') if TU: text, changes = re.subn(TemplateInThePage[1], TNR[4], text) else: text, changes = re.subn(TemplateInThePage[1], TNR[2], text) if changes == 0: # We tried to fix move-protection templates, but it did not work pywikibot.warning('No move-protection template could be found') if oldtext != text: # Ok, asking if the change has to be performed and do it if yes. pywikibot.output( color_format('\n\n>>> {lightpurple}{0}{default} <<<', page.title())) pywikibot.showDiff(oldtext, text) if not always: choice = pywikibot.input_choice( u'Do you want to accept these ' u'changes?', [('Yes', 'y'), ('No', 'n'), ('All', 'a')], 'n') if choice == 'a': always = True if always or choice == 'y': while True: try: page.put(text, commentUsed, force=True) except pywikibot.EditConflict: pywikibot.output(u'Edit conflict! skip!') break except pywikibot.ServerError: # Sometimes there is this error that's quite annoying # because can block the whole process for nothing. errorCount += 1 if errorCount < 5: pywikibot.output(u'Server Error! Wait..') time.sleep(3) continue else: # Prevent Infinite Loops raise pywikibot.ServerError(u'Fifth Server Error!') except pywikibot.SpamfilterError as e: pywikibot.output(u'Cannot change %s because of ' u'blacklist entry %s' % (page.title(), e.url)) break except pywikibot.LockedPage: pywikibot.output(u'The page is still protected. ' u'Skipping...') break except pywikibot.PageNotSaved as error: pywikibot.output(u'Error putting page: %s' % (error.args, )) break else: # Break only if the errors are one after the other errorCount = 0 break
def move_to_category(self, article, original_cat, current_cat): ''' Given an article which is in category original_cat, ask the user if it should be moved to one of original_cat's subcategories. Recursively run through subcategories' subcategories. NOTE: current_cat is only used for internal recursion. You should always use current_cat = original_cat. ''' pywikibot.output(u'') # Show the title of the page where the link was found. # Highlight the title in purple. pywikibot.output(u'Treating page \03{lightpurple}%s\03{default}, ' u'currently in \03{lightpurple}%s\03{default}' % (article.title(), current_cat.title())) # Determine a reasonable amount of context to print try: full_text = article.get(get_redirect=True) except pywikibot.NoPage: pywikibot.output(u'Page %s not found.' % article.title()) return try: contextLength = full_text.index('\n\n') except ValueError: # substring not found contextLength = 500 if full_text.startswith(u'[['): # probably an image # Add extra paragraph. contextLength = full_text.find('\n\n', contextLength + 2) if contextLength > 1000 or contextLength < 0: contextLength = 500 pywikibot.output('\n' + full_text[:contextLength] + '\n') # we need list to index the choice subcatlist = list(self.catDB.getSubcats(current_cat)) supercatlist = list(self.catDB.getSupercats(current_cat)) if not subcatlist: pywikibot.output('This category has no subcategories.\n') if not supercatlist: pywikibot.output('This category has no supercategories.\n') # show subcategories as possible choices (with numbers) for i, supercat in enumerate(supercatlist): # layout: we don't expect a cat to have more than 10 supercats pywikibot.output(u'u%d - Move up to %s' % (i, supercat.title())) for i, subcat in enumerate(subcatlist): # layout: we don't expect a cat to have more than 100 subcats pywikibot.output(u'%2d - Move down to %s' % (i, subcat.title())) pywikibot.output(' j - Jump to another category') pywikibot.output(' s - Skip this article') pywikibot.output(' r - Remove this category tag') pywikibot.output( ' ? - Print first part of the page (longer and longer)') pywikibot.output(u'Enter - Save category as %s' % current_cat.title()) flag = False while not flag: pywikibot.output('') choice = pywikibot.input(u'Choice:') if choice in ['s', 'S']: flag = True elif choice == '': pywikibot.output(u'Saving category as %s' % current_cat.title()) if current_cat == original_cat: pywikibot.output('No changes necessary.') else: article.change_category(original_cat, current_cat, comment=self.editSummary) flag = True elif choice in ['j', 'J']: newCatTitle = pywikibot.input(u'Please enter the category the ' u'article should be moved to:') newCat = pywikibot.Category( pywikibot.Link('Category:' + newCatTitle)) # recurse into chosen category self.move_to_category(article, original_cat, newCat) flag = True elif choice in ['r', 'R']: # remove the category tag article.change_category(original_cat, None, comment=self.editSummary) flag = True elif choice == '?': contextLength += 500 pywikibot.output('\n' + full_text[:contextLength] + '\n') # if categories possibly weren't visible, show them additionally # (maybe this should always be shown?) if len(full_text) > contextLength: pywikibot.output('') pywikibot.output('Original categories: ') for cat in article.categories(): pywikibot.output(u'* %s' % cat.title()) elif choice[0] == 'u': try: choice = int(choice[1:]) except ValueError: # user pressed an unknown command. Prompt him again. continue self.move_to_category(article, original_cat, supercatlist[choice]) flag = True else: try: choice = int(choice) except ValueError: # user pressed an unknown command. Prompt him again. continue # recurse into subcategory self.move_to_category(article, original_cat, subcatlist[choice]) flag = True
def treat(self, page): text = self.load(page) if text is None: return cats = [c for c in page.categories()] # Show the title of the page we're working on. # Highlight the title in purple. pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) pywikibot.output(u"Current categories:") for cat in cats: pywikibot.output(u"* %s" % cat.title()) newcatTitle = self.newcatTitle if not page.site.nocapitalize: newcatTitle = newcatTitle[:1].upper() + newcatTitle[1:] catpl = pywikibot.Page(page.site, newcatTitle, ns=14) if catpl in cats: pywikibot.output(u"%s is already in %s." % (page.title(), catpl.title())) else: if self.sort: catpl = self.sorted_by_last_name(catpl, page) pywikibot.output(u'Adding %s' % catpl.title(asLink=True)) cats.append(catpl) text = pywikibot.replaceCategoryLinks(text, cats) if not self.save(text, page, newcatTitle): pywikibot.output(u'Page %s not saved.' % page.title(asLink=True))
def login(self, retry=False, autocreate=False): """ Attempt to log into the server. @param retry: infinitely retry if the API returns an unknown error @type retry: bool @param autocreate: if true, allow auto-creation of the account using unified login @type autocreate: bool @raises NoUsername: Username is not recognised by the site. """ if not self.password: # First check that the username exists, # to avoid asking for a password that will not work. if not autocreate: self.check_user_exists() # As we don't want the password to appear on the screen, we set # password = True self.password = pywikibot.input( u'Password for user %(name)s on %(site)s (no characters will ' u'be shown):' % { 'name': self.login_name, 'site': self.site }, password=True) pywikibot.output(u"Logging in to %(site)s as %(name)s" % { 'name': self.login_name, 'site': self.site }) try: cookiedata = self.getCookie() except pywikibot.data.api.APIError as e: pywikibot.error(u"Login failed (%s)." % e.code) if e.code == 'NotExists': raise NoUsername(u"Username '%s' does not exist on %s" % (self.login_name, self.site)) elif e.code == 'Illegal': raise NoUsername(u"Username '%s' is invalid on %s" % (self.login_name, self.site)) elif e.code == 'readapidenied': raise NoUsername( 'Username "{0}" does not have read permissions on ' '{1}'.format(self.login_name, self.site)) elif e.code == 'Failed': raise NoUsername( 'Username "{0}" does not have read permissions on ' '{1}\n.{2}'.format(self.login_name, self.site, e.info)) # TODO: investigate other unhandled API codes (bug T75539) if retry: self.password = None return self.login(retry=True) else: return False self.storecookiedata(cookiedata) pywikibot.log(u"Should be logged in now") # # Show a warning according to the local bot policy # FIXME: disabled due to recursion; need to move this to the Site object after # login # if not self.botAllowed(): # logger.error( # u"Username '%(name)s' is not listed on [[%(page)s]]." # % {'name': self.username, # 'page': botList[self.site.family.name][self.site.code]}) # logger.error( # "Please make sure you are allowed to use the robot before actually using it!") # return False return True
def verbose_output(string): """Verbose output.""" if pywikibot.config.verbose_output: pywikibot.output(string)
def main(*args) -> None: """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. :param args: command line arguments :type args: str """ url = '' description = [] summary = None keep_filename = False always = False use_filename = None filename_prefix = None verify_description = True aborts = set() ignorewarn = set() chunk_size = 0 asynchronous = False recursive = False description_file = None # process all global bot args # returns a list of non-global args, i.e. args for upload.py local_args = pywikibot.handle_args(args) for option in local_args: arg, _, value = option.partition(':') if arg == '-always': keep_filename = True always = True verify_description = False elif arg == '-recursive': recursive = True elif arg == '-keep': keep_filename = True elif arg == '-filename': use_filename = value elif arg == '-prefix': filename_prefix = value elif arg == '-summary': summary = value elif arg == '-noverify': verify_description = False elif arg == '-abortonwarn': if value and aborts is not True: aborts.add(value) else: aborts = True elif arg == '-ignorewarn': if value and ignorewarn is not True: ignorewarn.add(value) else: ignorewarn = True elif arg == '-chunked': match = CHUNK_SIZE_REGEX.match(option) chunk_size = get_chunk_size(match) elif arg == '-async': asynchronous = True elif arg == '-descfile': description_file = value elif not url: url = option else: description.append(option) description = ' '.join(description) if description_file: if description: pywikibot.error('Both a description and a -descfile were ' 'provided. Please specify only one of those.') return with codecs.open(description_file, encoding=pywikibot.config.textfile_encoding) as f: description = f.read().replace('\r\n', '\n') while not ('://' in url or os.path.exists(url)): if not url: error = 'No input filename given.' else: error = 'Invalid input filename given.' if not always: error += ' Try again.' if always: url = None break pywikibot.output(error) url = pywikibot.input('URL, file or directory where files are now:') if always and (aborts is not True and ignorewarn is not True or not description or url is None): additional = '' missing = [] if url is None: missing += ['filename'] additional = error + ' ' if description is None: missing += ['description'] if aborts is not True and ignorewarn is not True: additional += ('Either -ignorewarn or -abortonwarn must be ' 'defined for all codes. ') additional += 'Unable to run in -always mode' suggest_help(missing_parameters=missing, additional_text=additional) return if os.path.isdir(url): file_list = [] for directory_info in os.walk(url): if not recursive: # Do not visit any subdirectories directory_info[1][:] = [] for dir_file in directory_info[2]: file_list.append(os.path.join(directory_info[0], dir_file)) url = file_list else: url = [url] bot = UploadRobot(url, description=description, use_filename=use_filename, keep_filename=keep_filename, verify_description=verify_description, aborts=aborts, ignore_warning=ignorewarn, chunk_size=chunk_size, asynchronous=asynchronous, always=always, summary=summary, filename_prefix=filename_prefix) bot.run()
def run(self): """Run thread.""" while not self.killed: if len(self.queue) == 0: if self.finishing: break else: time.sleep(0.1) else: with self.semaphore: url, errorReport, containingPage, archiveURL = \ self.queue[0] self.queue = self.queue[1:] talkPage = containingPage.toggleTalkPage() pywikibot.output(color_format( '{lightaqua}** Reporting dead link on ' '{0}...{default}', talkPage.title(as_link=True))) try: content = talkPage.get() + '\n\n\n' if url in content: pywikibot.output(color_format( '{lightaqua}** Dead link seems to have ' 'already been reported on {0}{default}', talkPage.title(as_link=True))) continue except (pywikibot.NoPage, pywikibot.IsRedirectPage): content = '' if archiveURL: archiveMsg = '\n' + \ i18n.twtranslate( containingPage.site, 'weblinkchecker-archive_msg', {'URL': archiveURL}) else: archiveMsg = '' # The caption will default to "Dead link". But if there # is already such a caption, we'll use "Dead link 2", # "Dead link 3", etc. caption = i18n.twtranslate(containingPage.site, 'weblinkchecker-caption') i = 1 count = '' # Check if there is already such a caption on # the talk page. while re.search('= *{0}{1} *='.format(caption, count), content) is not None: i += 1 count = ' ' + str(i) caption += count content += '== {0} ==\n\n{1}\n\n{2}{3}\n--~~~~'.format( caption, i18n.twtranslate(containingPage.site, 'weblinkchecker-report'), errorReport, archiveMsg) comment = '[[{0}#{1}|→]] {2}'.format( talkPage.title(), caption, i18n.twtranslate(containingPage.site, 'weblinkchecker-summary')) try: talkPage.put(content, comment) except pywikibot.SpamfilterError as error: pywikibot.output(color_format( '{lightaqua}** SpamfilterError while trying to ' 'change {0}: {1}{default}', talkPage.title(as_link=True), error.url))
def treat(self, page): """It loads the given page, does some changes, and saves it.""" choice = False try: # page: title, date, username, comment, loginfo, rcid, token username = page['user'] # when the feed isn't from the API, it used to contain # '(not yet written)' or '(page does not exist)' when it was # a redlink rcid = page['rcid'] title = page['title'] if not rcid: raise Exception('rcid not present') # check whether we have wrapped around to higher rcids # which indicates a new RC feed is being processed if rcid > self.last_rcid: # refresh the whitelist self.load_whitelist() self.repeat_start_ts = time.time() if pywikibot.config.verbose_output or self.getOption('ask'): pywikibot.output( 'User {0} has created or modified page {1}'.format( username, title)) if (self.getOption('autopatroluserns') and page['ns'] in (2, 3)): # simple rule to whitelist any user editing their own userspace if title.partition(':')[2].split('/')[0].startswith(username): verbose_output('{0} is whitelisted to modify {1}'.format( username, title)) choice = True if not choice and username in self.whitelist: if self.in_list(self.whitelist[username], title): verbose_output('{0} is whitelisted to modify {1}'.format( username, title)) choice = True if self.getOption('ask'): choice = pywikibot.input_yn( 'Do you want to mark page as patrolled?') # Patrol the page if choice: # list() iterates over patrol() which returns a generator list(self.site.patrol(rcid)) self.patrol_counter = self.patrol_counter + 1 pywikibot.output('Patrolled {0} (rcid {1}) by user {2}'.format( title, rcid, username)) else: verbose_output('Skipped') if rcid > self.highest_rcid: self.highest_rcid = rcid self.last_rcid = rcid self.rc_item_counter = self.rc_item_counter + 1 except pywikibot.NoPage: pywikibot.output('Page {0} does not exist; skipping.'.format( title(as_link=True))) except pywikibot.IsRedirectPage: pywikibot.output('Page {0} is a redirect; skipping.'.format( title(as_link=True)))
def run(self): """Run the bot.""" commons = self.commons comment = self.summary for page in self.generator: self.current_page = page try: localImagePage = pywikibot.FilePage(self.site, page.title()) if localImagePage.file_is_shared(): pywikibot.output('File is already on Commons.') continue sha1 = localImagePage.latest_file_info.sha1 filenameOnCommons = self.findFilenameOnCommons(localImagePage) if not filenameOnCommons: pywikibot.output('NowCommons template not found.') continue commonsImagePage = pywikibot.FilePage(commons, 'Image:' + filenameOnCommons) if (localImagePage.title(with_ns=False) != commonsImagePage.title(with_ns=False)): usingPages = list(localImagePage.usingPages()) if usingPages and usingPages != [localImagePage]: pywikibot.output(color_format( '"{lightred}{0}{default}" ' 'is still used in {1} pages.', localImagePage.title(with_ns=False), len(usingPages))) if self.opt.replace is True: pywikibot.output(color_format( 'Replacing "{lightred}{0}{default}" by ' '"{lightgreen}{1}{default}\".', localImagePage.title(with_ns=False), commonsImagePage.title(with_ns=False))) bot = ImageBot( pg.FileLinksGenerator(localImagePage), localImagePage.title(with_ns=False), commonsImagePage.title(with_ns=False), '', self.opt.replacealways, self.opt.replaceloose) bot.run() # If the image is used with the urlname the # previous function won't work is_used = bool(list(pywikibot.FilePage( self.site, page.title()).usingPages(total=1))) if is_used and self.opt.replaceloose: bot = ImageBot( pg.FileLinksGenerator( localImagePage), localImagePage.title( with_ns=False, as_url=True), commonsImagePage.title(with_ns=False), '', self.opt.replacealways, self.opt.replaceloose) bot.run() # refresh because we want the updated list usingPages = len(list(pywikibot.FilePage( self.site, page.title()).usingPages())) else: pywikibot.output('Please change them manually.') continue pywikibot.output(color_format( 'No page is using "{lightgreen}{0}{default}" ' 'anymore.', localImagePage.title(with_ns=False))) commonsText = commonsImagePage.get() if self.opt.replaceonly is False: if sha1 == commonsImagePage.latest_file_info.sha1: pywikibot.output( 'The image is identical to the one on Commons.') if len(localImagePage.get_file_history()) > 1: pywikibot.output( 'This image has a version history. Please ' 'delete it manually after making sure that ' 'the old versions are not worth keeping.') continue if self.opt.always is False: format_str = color_format( '\n\n>>>> Description on {lightpurple}%s' '{default} <<<<\n') pywikibot.output(format_str % page.title()) pywikibot.output(localImagePage.get()) pywikibot.output(format_str % commonsImagePage.title()) pywikibot.output(commonsText) if pywikibot.input_yn( 'Does the description on Commons contain ' 'all required source and license\n' 'information?', default=False, automatic_quit=False): localImagePage.delete( '{0} [[:commons:Image:{1}]]' .format(comment, filenameOnCommons), prompt=False) else: localImagePage.delete( comment + ' [[:commons:Image:{0}]]' .format(filenameOnCommons), prompt=False) else: pywikibot.output('The image is not identical to ' 'the one on Commons.') except (pywikibot.NoPage, pywikibot.IsRedirectPage) as e: pywikibot.output('{0}'.format(e[0])) continue else: self._treat_counter += 1 if not self._treat_counter: pywikibot.output('No transcluded files found for {0}.' .format(self.ncTemplates()[0])) self.exit()
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: unicode """ gen = None xmlFilename = None HTTPignore = [] if isinstance(memento_client, ImportError): warn('memento_client not imported: {0}'.format(memento_client), ImportWarning) # Process global args and prepare generator args parser local_args = pywikibot.handle_args(args) genFactory = pagegenerators.GeneratorFactory() for arg in local_args: if arg == '-talk': config.report_dead_links_on_talk = True elif arg == '-notalk': config.report_dead_links_on_talk = False elif arg == '-repeat': gen = RepeatPageGenerator() elif arg.startswith('-ignore:'): HTTPignore.append(int(arg[8:])) elif arg.startswith('-day:'): config.weblink_dead_days = int(arg[5:]) elif arg.startswith('-xmlstart'): if len(arg) == 9: xmlStart = pywikibot.input( 'Please enter the dumped article to start with:') else: xmlStart = arg[10:] elif arg.startswith('-xml'): if len(arg) == 4: xmlFilename = i18n.input('pywikibot-enter-xml-filename') else: xmlFilename = arg[5:] else: genFactory.handleArg(arg) if xmlFilename: try: xmlStart except NameError: xmlStart = None gen = XmlDumpPageGenerator(xmlFilename, xmlStart, genFactory.namespaces) if not gen: gen = genFactory.getCombinedGenerator() if gen: if not genFactory.nopreload: # fetch at least 240 pages simultaneously from the wiki, but more # if a high thread number is set. pageNumber = max(240, config.max_external_links * 2) gen = pagegenerators.PreloadingGenerator(gen, groupsize=pageNumber) gen = pagegenerators.RedirectFilterPageGenerator(gen) bot = WeblinkCheckerRobot(gen, HTTPignore, config.weblink_dead_days) try: bot.run() finally: waitTime = 0 # Don't wait longer than 30 seconds for threads to finish. while countLinkCheckThreads() > 0 and waitTime < 30: try: pywikibot.output('Waiting for remaining {0} threads to ' 'finish, please wait...' .format(countLinkCheckThreads())) # wait 1 second time.sleep(1) waitTime += 1 except KeyboardInterrupt: pywikibot.output('Interrupted.') break if countLinkCheckThreads() > 0: pywikibot.output('Remaining {0} threads will be killed.' .format(countLinkCheckThreads())) # Threads will die automatically because they are daemonic. if bot.history.reportThread: bot.history.reportThread.shutdown() # wait until the report thread is shut down; the user can # interrupt it by pressing CTRL-C. try: while bot.history.reportThread.isAlive(): time.sleep(0.1) except KeyboardInterrupt: pywikibot.output('Report thread interrupted.') bot.history.reportThread.kill() pywikibot.output('Saving history...') bot.history.save() return True else: pywikibot.bot.suggest_help(missing_generator=True) return False
def process_entries(cache_path, func, use_accesstime=None, output_func=None, action_func=None): """ Check the contents of the cache. This program tries to use file access times to determine whether cache files are being used. However file access times are not always usable. On many modern filesystems, they have been disabled. On unix, check the filesystem mount options. You may need to remount with 'strictatime'. @param use_accesstime: Whether access times should be used. @type use_accesstime: bool tristate: - None = detect - False = dont use - True = always use """ if not cache_path: cache_path = os.path.join(pywikibot.config2.base_dir, 'apicache') if not os.path.exists(cache_path): pywikibot.error('%s: no such file or directory' % cache_path) return if os.path.isdir(cache_path): filenames = [ os.path.join(cache_path, filename) for filename in os.listdir(cache_path) ] else: filenames = [cache_path] for filepath in filenames: filename = os.path.basename(filepath) cache_dir = os.path.dirname(filepath) if use_accesstime is not False: stinfo = os.stat(filepath) entry = CacheEntry(cache_dir, filename) try: entry._load_cache() except ValueError as e: pywikibot.error('Failed loading {0}'.format( entry._cachefile_path())) pywikibot.exception(e, tb=True) continue if use_accesstime is None: stinfo2 = os.stat(filepath) use_accesstime = stinfo.st_atime != stinfo2.st_atime if use_accesstime: # Reset access times to values before loading cache entry. os.utime(filepath, (stinfo.st_atime, stinfo.st_mtime)) entry.stinfo = stinfo try: entry.parse_key() except ParseError: pywikibot.error(u'Problems parsing %s with key %s' % (entry.filename, entry.key)) pywikibot.exception() continue try: entry._rebuild() except Exception as e: pywikibot.error(u'Problems loading %s with key %s, %r' % (entry.filename, entry.key, entry._parsed_key)) pywikibot.exception(e, tb=True) continue if func is None or func(entry): if output_func or action_func is None: if output_func is None: output = entry else: output = output_func(entry) if output is not None: pywikibot.output(output) if action_func: action_func(entry)
def check(self, useHEAD=False): """ Return True and the server status message if the page is alive. @rtype: tuple of (bool, unicode) """ try: wasRedirected = self.resolveRedirect(useHEAD=useHEAD) except UnicodeError as error: return False, 'Encoding Error: {0} ({1})'.format( error.__class__.__name__, error) except httplib.error as error: return False, 'HTTP Error: {}'.format(error.__class__.__name__) except socket.error as error: # https://docs.python.org/2/library/socket.html : # socket.error : # The accompanying value is either a string telling what went # wrong or a pair (errno, string) representing an error # returned by a system call, similar to the value # accompanying os.error if isinstance(error, basestring): msg = error else: try: msg = error[1] except IndexError: pywikibot.output('### DEBUG information for T57282') raise IndexError(type(error)) # TODO: decode msg. On Linux, it's encoded in UTF-8. # How is it encoded in Windows? Or can we somehow just # get the English message? return False, 'Socket Error: {}'.format(repr(msg)) if wasRedirected: if self.url in self.redirectChain: if useHEAD: # Some servers don't seem to handle HEAD requests properly, # which leads to a cyclic list of redirects. # We simply start from the beginning, but this time, # we don't use HEAD, but GET requests. redirChecker = LinkChecker( self.redirectChain[0], serverEncoding=self.serverEncoding, HTTPignore=self.HTTPignore) return redirChecker.check(useHEAD=False) else: urlList = ['[{0}]'.format(url) for url in self.redirectChain + [self.url]] return (False, 'HTTP Redirect Loop: {0}'.format( ' -> '.join(urlList))) elif len(self.redirectChain) >= 19: if useHEAD: # Some servers don't seem to handle HEAD requests properly, # which leads to a long (or infinite) list of redirects. # We simply start from the beginning, but this time, # we don't use HEAD, but GET requests. redirChecker = LinkChecker( self.redirectChain[0], serverEncoding=self.serverEncoding, HTTPignore=self.HTTPignore) return redirChecker.check(useHEAD=False) else: urlList = ['[{0}]'.format(url) for url in self.redirectChain + [self.url]] return (False, 'Long Chain of Redirects: {0}' .format(' -> '.join(urlList))) else: redirChecker = LinkChecker(self.url, self.redirectChain, self.serverEncoding, HTTPignore=self.HTTPignore) return redirChecker.check(useHEAD=useHEAD) else: try: conn = self.getConnection() except httplib.error as error: return False, 'HTTP Error: {0}'.format( error.__class__.__name__) try: conn.request('GET', '{0}{1}'.format(self.path, self.query), None, self.header) except socket.error as error: return False, 'Socket Error: {0}'.format(repr(error[1])) try: self.response = conn.getresponse() except Exception as error: return False, 'Error: {0}'.format(error) # read the server's encoding, in case we need it later self.readEncodingFromResponse(self.response) # site down if the server status is between 400 and 499 alive = self.response.status not in range(400, 500) if self.response.status in self.HTTPignore: alive = False return alive, '{0} {1}'.format(self.response.status, self.response.reason)
def output_range(self, start_context, end_context): """Output a section from the text.""" pywikibot.output(self.text[start_context:end_context])
def main(): """Process command line arguments and invoke bot.""" local_args = pywikibot.handleArgs() cache_paths = None delete = False command = None output = None for arg in local_args: if command == '': command = arg elif output == '': output = arg elif arg == '-delete': delete = True elif arg == '-password': command = 'has_password(entry)' elif arg == '-c': if command: pywikibot.error('Only one command may be executed.') exit(1) command = '' elif arg == '-o': if output: pywikibot.error('Only one output may be defined.') exit(1) output = '' else: if not cache_paths: cache_paths = [arg] else: cache_paths.append(arg) if not cache_paths: cache_paths = ['apicache', 'tests/apicache'] # Also process the base directory, if it isnt the current directory if os.path.abspath(os.getcwd()) != pywikibot.config2.base_dir: cache_paths += [ os.path.join(pywikibot.config2.base_dir, 'apicache') ] # Also process the user home cache, if it isnt the config directory if os.path.expanduser('~/.pywikibot') != pywikibot.config2.base_dir: cache_paths += [ os.path.join(os.path.expanduser('~/.pywikibot'), 'apicache') ] if delete: action_func = CacheEntry._delete else: action_func = None if output: output_func = _parse_command(output, 'output') if output_func is None: return False else: output_func = None if command: filter_func = _parse_command(command, 'filter') if filter_func is None: return False else: filter_func = None for cache_path in cache_paths: if len(cache_paths) > 1: pywikibot.output(u'Processing %s' % cache_path) process_entries(cache_path, filter_func, output_func=output_func, action_func=action_func)
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: str """ add_cat = None gen = None # summary message edit_summary = '' # Array which will collect commandline parameters. # First element is original text, second element is replacement text. commandline_replacements = [] # A list of 2-tuples of original text and replacement text. replacements = [] # Don't edit pages which contain certain texts. exceptions = { 'title': [], 'text-contains': [], 'inside': [], 'inside-tags': [], 'require-title': [], # using a separate requirements dict needs some } # major refactoring of code. # Should the elements of 'replacements' and 'exceptions' be interpreted # as regular expressions? regex = False # Predefined fixes from dictionary 'fixes' (see above). fixes_set = [] # the dump's path, either absolute or relative, which will be used # if -xml flag is present xmlFilename = None useSql = False sql_query = None # will become True when the user presses a ('yes to all') or uses the # -always flag. acceptall = False # Will become True if the user inputs the commandline parameter -nocase caseInsensitive = False # Will become True if the user inputs the commandline parameter -dotall dotall = False # Will become True if the user inputs the commandline parameter -multiline multiline = False # Do all hits when they overlap allowoverlap = False # Do not recurse replacement recursive = False # Between a regex and another (using -fix) sleep some time (not to waste # too much CPU sleep = None # Request manual replacements even if replacements are already defined manual_input = False # Replacements loaded from a file replacement_file = None replacement_file_arg_misplaced = False # Read commandline parameters. local_args = pywikibot.handle_args(args) genFactory = pagegenerators.GeneratorFactory() for arg in local_args: if genFactory.handleArg(arg): continue if arg == '-regex': regex = True elif arg.startswith('-xmlstart'): if len(arg) == 9: xmlStart = pywikibot.input( 'Please enter the dumped article to start with:') else: xmlStart = arg[10:] elif arg.startswith('-xml'): if len(arg) == 4: xmlFilename = i18n.input('pywikibot-enter-xml-filename') else: xmlFilename = arg[5:] elif arg.startswith(('-sql', '-mysqlquery')): if arg.startswith('-sql'): issue_deprecation_warning('The usage of "-sql"', '-mysqlquery', 1, ArgumentDeprecationWarning, since='20180617') useSql = True sql_query = arg.partition(':')[2] elif arg.startswith('-excepttitle:'): exceptions['title'].append(arg[13:]) elif arg.startswith('-requiretitle:'): exceptions['require-title'].append(arg[14:]) elif arg.startswith('-excepttext:'): exceptions['text-contains'].append(arg[12:]) elif arg.startswith('-exceptinside:'): exceptions['inside'].append(arg[14:]) elif arg.startswith('-exceptinsidetag:'): exceptions['inside-tags'].append(arg[17:]) elif arg.startswith('-fix:'): fixes_set += [arg[5:]] elif arg.startswith('-sleep:'): sleep = float(arg[7:]) elif arg == '-always': acceptall = True elif arg == '-recursive': recursive = True elif arg == '-nocase': caseInsensitive = True elif arg == '-dotall': dotall = True elif arg == '-multiline': multiline = True elif arg.startswith('-addcat:'): add_cat = arg[8:] elif arg.startswith('-summary:'): edit_summary = arg[9:] elif arg.startswith('-automaticsummary'): edit_summary = True elif arg.startswith('-allowoverlap'): allowoverlap = True elif arg.startswith('-manualinput'): manual_input = True elif arg.startswith('-replacementfile'): issue_deprecation_warning('-replacementfile', '-pairsfile', 2, ArgumentDeprecationWarning, since='20160304') elif arg.startswith('-pairsfile'): if len(commandline_replacements) % 2: replacement_file_arg_misplaced = True if arg == '-pairsfile': replacement_file = pywikibot.input( 'Please enter the filename to read replacements from:') else: replacement_file = arg[len('-pairsfile:'):] else: commandline_replacements.append(arg) site = pywikibot.Site() if len(commandline_replacements) % 2: pywikibot.error('Incomplete command line pattern replacement pair.') return False if replacement_file_arg_misplaced: pywikibot.error('-pairsfile used between a pattern replacement pair.') return False if replacement_file: try: with codecs.open(replacement_file, 'r', 'utf-8') as f: # strip newlines, but not other characters file_replacements = f.read().splitlines() except (IOError, OSError) as e: pywikibot.error('Error loading {0}: {1}'.format( replacement_file, e)) return False if len(file_replacements) % 2: pywikibot.error( '{0} contains an incomplete pattern replacement pair.'.format( replacement_file)) return False # Strip BOM from first line file_replacements[0].lstrip('\uFEFF') commandline_replacements.extend(file_replacements) if not (commandline_replacements or fixes_set) or manual_input: old = pywikibot.input('Please enter the text that should be replaced:') while old: new = pywikibot.input('Please enter the new text:') commandline_replacements += [old, new] old = pywikibot.input( 'Please enter another text that should be replaced,' '\nor press Enter to start:') # The summary stored here won't be actually used but is only an example single_summary = None for i in range(0, len(commandline_replacements), 2): replacement = Replacement(commandline_replacements[i], commandline_replacements[i + 1]) if not single_summary: single_summary = i18n.twtranslate( site, 'replace-replacing', { 'description': ' (-{0} +{1})'.format(replacement.old, replacement.new) }) replacements.append(replacement) # Perform one of the predefined actions. missing_fixes_summaries = [] # which a fixes/replacements miss a summary generators_given = bool(genFactory.gens) for fix_name in fixes_set: try: fix = fixes.fixes[fix_name] except KeyError: pywikibot.output('Available predefined fixes are: {0}'.format( ', '.join(fixes.fixes.keys()))) if not fixes.user_fixes_loaded: pywikibot.output('The user fixes file could not be found: ' '{0}'.format(fixes.filename)) return if not fix['replacements']: pywikibot.warning('No replacements defined for fix ' '"{0}"'.format(fix_name)) continue if 'msg' in fix: if isinstance(fix['msg'], UnicodeType): set_summary = i18n.twtranslate(site, str(fix['msg'])) else: set_summary = i18n.translate(site, fix['msg'], fallback=True) else: set_summary = None if not generators_given and 'generator' in fix: gen_args = fix['generator'] if isinstance(gen_args, UnicodeType): gen_args = [gen_args] for gen_arg in gen_args: genFactory.handleArg(gen_arg) replacement_set = ReplacementList(fix.get('regex'), fix.get('exceptions'), fix.get('nocase'), set_summary, name=fix_name) # Whether some replacements have a summary, if so only show which # have none, otherwise just mention the complete fix missing_fix_summaries = [] for index, replacement in enumerate(fix['replacements'], start=1): summary = None if len(replacement) < 3 else replacement[2] if not set_summary and not summary: missing_fix_summaries.append('"{0}" (replacement #{1})'.format( fix_name, index)) if chars.contains_invisible(replacement[0]): pywikibot.warning('The old string "{0}" contains formatting ' 'characters like U+200E'.format( chars.replace_invisible(replacement[0]))) if (not callable(replacement[1]) and chars.contains_invisible(replacement[1])): pywikibot.warning('The new string "{0}" contains formatting ' 'characters like U+200E'.format( chars.replace_invisible(replacement[1]))) replacement_set.append( ReplacementListEntry( old=replacement[0], new=replacement[1], fix_set=replacement_set, edit_summary=summary, )) # Exceptions specified via 'fix' shall be merged to those via CLI. if replacement_set: replacements.extend(replacement_set) if replacement_set._exceptions is not None: for k, v in replacement_set._exceptions.items(): if k in exceptions: exceptions[k] = list(set(exceptions[k]) | set(v)) else: exceptions[k] = v if len(fix['replacements']) == len(missing_fix_summaries): missing_fixes_summaries.append( '"{0}" (all replacements)'.format(fix_name)) else: missing_fixes_summaries += missing_fix_summaries if ((not edit_summary or edit_summary is True) and (missing_fixes_summaries or single_summary)): if single_summary: pywikibot.output('The summary message for the command line ' 'replacements will be something like: ' + single_summary) if missing_fixes_summaries: pywikibot.output('The summary will not be used when the fix has ' 'one defined but the following fix(es) do(es) ' 'not have a summary defined: ' '{0}'.format(', '.join(missing_fixes_summaries))) if edit_summary is not True: edit_summary = pywikibot.input( 'Press Enter to use this automatic message, or enter a ' 'description of the\nchanges your bot will make:') else: edit_summary = '' # Set the regular expression flags flags = re.UNICODE if caseInsensitive: flags = flags | re.IGNORECASE if dotall: flags = flags | re.DOTALL if multiline: flags = flags | re.MULTILINE # Pre-compile all regular expressions here to save time later for replacement in replacements: replacement.compile(regex, flags) precompile_exceptions(exceptions, regex, flags) if xmlFilename: try: xmlStart except NameError: xmlStart = None gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart, replacements, exceptions, site) elif useSql: if not sql_query: whereClause = 'WHERE (%s)' % ' OR '.join( "old_text RLIKE '%s'" % prepareRegexForMySQL(old_regexp.pattern) for (old_regexp, new_text) in replacements) if exceptions: exceptClause = 'AND NOT (%s)' % ' OR '.join( "old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern) for exc in exceptions) else: exceptClause = '' query = sql_query or """ SELECT page_namespace, page_title FROM page JOIN text ON (page_id = old_id) %s %s LIMIT 200""" % (whereClause, exceptClause) gen = pagegenerators.MySQLPageGenerator(query) gen = genFactory.getCombinedGenerator(gen, preload=True) if not gen: pywikibot.bot.suggest_help(missing_generator=True) return bot = ReplaceRobot(gen, replacements, exceptions, allowoverlap, recursive, add_cat, sleep, edit_summary, always=acceptall, site=site) site.login() bot.run() # Explicitly call pywikibot.stopme(). It will make sure the callback is # triggered before replace.py is unloaded. pywikibot.stopme() pywikibot.output('\n{0} pages changed.'.format(bot.changed_pages))
def output_range(self, start, end): """Show normal context with a red center region.""" pywikibot.output(self.text[start:self.start] + '\03{lightred}' + self.text[self.start:self.end] + '\03{default}' + self.text[self.end:end])
def run(self): """ Starts the robot. """ fhmuseum = pywikibot.ItemPage(self.repo, u'Q574961') for painting in self.generator: # Make sure it's the Frans Hals Museum if painting['object']['proxies'][0]['about'].startswith( u'/proxy/provider/92034/GVNRC_FHM01'): paintingId = painting['object']['proxies'][0]['dcIdentifier'][ 'def'][0].strip() uri = painting['object']['proxies'][0]['dcIdentifier']['def'][ 1].strip() europeanaUrl = u'http://europeana.eu/portal/record/%s.html' % ( painting['object']['about'], ) print paintingId print uri if painting['object']['proxies'][0].get('dcCreator'): dcCreator = painting['object']['proxies'][0]['dcCreator'][ 'def'][0].strip() else: dcCreator = u'anoniem' #print dcCreator paintingItem = None newclaims = [] if paintingId in self.paintingIds: paintingItemTitle = u'Q%s' % ( self.paintingIds.get(paintingId), ) print paintingItemTitle paintingItem = pywikibot.ItemPage(self.repo, title=paintingItemTitle) else: #print 'bla' #monumentItem = pywikibot.ItemPage(self.repo, title=u'') #print dcCreatorName data = { 'labels': {}, 'descriptions': {}, } data['labels'][u'nl'] = { 'language': u'nl', 'value': painting['object']['title'][0] } if dcCreator: data['descriptions']['en'] = { 'language': u'en', 'value': u'painting by %s' % (dcCreator, ) } data['descriptions']['nl'] = { 'language': u'nl', 'value': u'schilderij van %s' % (dcCreator, ) } print data identification = {} summary = u'Creating new item with data from %s ' % ( europeanaUrl, ) pywikibot.output(summary) #monumentItem.editEntity(data, summary=summary) result = self.repo.editEntity(identification, data, summary=summary) #print result paintingItemTitle = result.get(u'entity').get('id') paintingItem = pywikibot.ItemPage(self.repo, title=paintingItemTitle) newclaim = pywikibot.Claim( self.repo, u'P%s' % (self.paintingIdProperty, )) newclaim.setTarget(paintingId) pywikibot.output('Adding new id claim to %s' % paintingItem) paintingItem.addClaim(newclaim) newreference = pywikibot.Claim( self.repo, u'P854') #Add url, isReference=True newreference.setTarget(uri) pywikibot.output('Adding new reference claim to %s' % paintingItem) newclaim.addSource(newreference) newqualifier = pywikibot.Claim( self.repo, u'P195') #Add collection, isQualifier=True newqualifier.setTarget(fhmuseum) pywikibot.output('Adding new qualifier claim to %s' % paintingItem) newclaim.addQualifier(newqualifier) collectionclaim = pywikibot.Claim(self.repo, u'P195') collectionclaim.setTarget(fhmuseum) pywikibot.output('Adding collection claim to %s' % paintingItem) paintingItem.addClaim(collectionclaim) newreference = pywikibot.Claim( self.repo, u'P854') #Add url, isReference=True newreference.setTarget(europeanaUrl) pywikibot.output('Adding new reference claim to %s' % paintingItem) collectionclaim.addSource(newreference) if paintingItem and paintingItem.exists(): data = paintingItem.get() claims = data.get('claims') #print claims # located in if u'P276' not in claims: newclaim = pywikibot.Claim(self.repo, u'P276') newclaim.setTarget(fhmuseum) pywikibot.output('Adding located in claim to %s' % paintingItem) paintingItem.addClaim(newclaim) newreference = pywikibot.Claim( self.repo, u'P854') #Add url, isReference=True newreference.setTarget(europeanaUrl) pywikibot.output('Adding new reference claim to %s' % paintingItem) newclaim.addSource(newreference) # instance of always painting while working on the painting collection if u'P31' not in claims: dcformatItem = pywikibot.ItemPage(self.repo, title='Q3305213') newclaim = pywikibot.Claim(self.repo, u'P31') newclaim.setTarget(dcformatItem) pywikibot.output('Adding instance claim to %s' % paintingItem) paintingItem.addClaim(newclaim) newreference = pywikibot.Claim( self.repo, u'P854') #Add url, isReference=True newreference.setTarget(europeanaUrl) pywikibot.output('Adding new reference claim to %s' % paintingItem) newclaim.addSource(newreference) # creator if u'P170' not in claims and dcCreator: creategen = pagegenerators.PreloadingItemGenerator( pagegenerators.WikidataItemGenerator( pagegenerators.SearchPageGenerator( dcCreator, step=None, total=10, namespaces=[0], site=self.repo))) newcreator = None for creatoritem in creategen: print creatoritem.title() if creatoritem.get().get('labels').get( 'en') == dcCreator or creatoritem.get( ).get('labels').get('nl') == dcCreator: print creatoritem.get().get('labels').get('en') print creatoritem.get().get('labels').get('nl') # Check occupation and country of citizinship if u'P106' in creatoritem.get( ).get('claims') and ( u'P21' in creatoritem.get().get('claims') or u'P800' in creatoritem.get().get('claims')): newcreator = creatoritem continue elif (creatoritem.get().get('aliases').get('en') and dcCreator in creatoritem.get().get('aliases').get('en') ) or (creatoritem.get().get('aliases').get( 'nl') and dcCreator in creatoritem.get(). get('aliases').get('nl')): if u'P106' in creatoritem.get( ).get('claims') and ( u'P21' in creatoritem.get().get('claims') or u'P800' in creatoritem.get().get('claims')): newcreator = creatoritem continue if newcreator: pywikibot.output(newcreator.title()) newclaim = pywikibot.Claim(self.repo, u'P170') newclaim.setTarget(newcreator) pywikibot.output('Adding creator claim to %s' % paintingItem) paintingItem.addClaim(newclaim) newreference = pywikibot.Claim( self.repo, u'P854') #Add url, isReference=True newreference.setTarget(europeanaUrl) pywikibot.output( 'Adding new reference claim to %s' % paintingItem) newclaim.addSource(newreference) #creatoritem = pywikibot.ItemPage(self.repo, creatorpage) print creatoritem.title() print creatoritem.get() else: pywikibot.output('No item found for %s' % (dcCreator, )) # date of creation if u'P571' not in claims: if painting['object']['proxies'][0].get( 'dctermsCreated'): dccreated = painting['object']['proxies'][0][ 'dctermsCreated']['def'][0].strip() if len(dccreated) == 4: # It's a year newdate = pywikibot.WbTime(year=dccreated) newclaim = pywikibot.Claim(self.repo, u'P571') newclaim.setTarget(newdate) pywikibot.output( 'Adding date of creation claim to %s' % paintingItem) paintingItem.addClaim(newclaim) newreference = pywikibot.Claim( self.repo, u'P854') #Add url, isReference=True newreference.setTarget(europeanaUrl) pywikibot.output( 'Adding new reference claim to %s' % paintingItem) newclaim.addSource(newreference) ''' # material used if u'P186' not in claims: dcFormats = { u'http://vocab.getty.edu/aat/300014078' : u'Q4259259', # Canvas u'http://vocab.getty.edu/aat/300015050' : u'Q296955', # Oil paint } if painting['object']['proxies'][0].get('dcFormat') and painting['object']['proxies'][0]['dcFormat'].get('def'): for dcFormat in painting['object']['proxies'][0]['dcFormat']['def']: if dcFormat in dcFormats: dcformatItem = pywikibot.ItemPage(self.repo, title=dcFormats[dcFormat]) newclaim = pywikibot.Claim(self.repo, u'P186') newclaim.setTarget(dcformatItem) pywikibot.output('Adding material used claim to %s' % paintingItem) paintingItem.addClaim(newclaim) newreference = pywikibot.Claim(self.repo, u'P854') #Add url, isReference=True newreference.setTarget(europeanaUrl) pywikibot.output('Adding new reference claim to %s' % paintingItem) newclaim.addSource(newreference) # Handle if u'P1184' not in claims: handleUrl = painting['object']['proxies'][0]['dcIdentifier']['def'][0] handle = handleUrl.replace(u'http://hdl.handle.net/', u'') newclaim = pywikibot.Claim(self.repo, u'P1184') newclaim.setTarget(handle) pywikibot.output('Adding handle claim to %s' % paintingItem) paintingItem.addClaim(newclaim) newreference = pywikibot.Claim(self.repo, u'P854') #Add url, isReference=True newreference.setTarget(europeanaUrl) pywikibot.output('Adding new reference claim to %s' % paintingItem) newclaim.addSource(newreference) ''' # Europeana ID if u'P727' not in claims: europeanaID = painting['object']['about'].lstrip('/') newclaim = pywikibot.Claim(self.repo, u'P727') newclaim.setTarget(europeanaID) pywikibot.output('Adding Europeana ID claim to %s' % paintingItem) paintingItem.addClaim(newclaim) newreference = pywikibot.Claim( self.repo, u'P854') #Add url, isReference=True newreference.setTarget(europeanaUrl) pywikibot.output('Adding new reference claim to %s' % paintingItem) newclaim.addSource(newreference)
def output(self): """Output the suboptions.""" pywikibot.output(self._output)
def traitement(self): pageTraitees = pywikibot.Page(self.site, u"Wikipédia:Demande de restauration de page/Traitées") pageRefusees = pywikibot.Page(self.site, u"Wikipédia:Demande de restauration de page/Refusées") list = [(self.main_page, u'Requêtes à traiter'), (self.main_page, u'Requêtes en cours d\'examen'), (pageTraitees, None), (pageRefusees, None)] for couple in list: dict = self.analyse_une_section(page = couple[0], match_debut = couple[1]) sections = dict['sections'] if not sections: continue for numero_section in sections: pywikibot.output('\n') titre_section = dict['titres'][numero_section] section = sections[numero_section] templates = textlib.extract_templates_and_params(section) # templates est du type : # [(u'DRP début', {u'date': u'27 février 2010 à 14:56 (CEC)' # , u'statut': u'oui'}), (u'DRP fin', {})] PaS = False found_full_template = False for template in templates: if template[0] == u'DRP début': if not ('statut' in template[1]): pywikibot.output(u"pas de paramètre 'statut' trouvé") continue elif not ('date' in template[1]): pywikibot.output(u"pas de paramètre 'date' trouvé") continue found_full_template = True statut_actuel = template[1]['statut'] date = template[1]['date'] if template[1].has_key(u'PàS'): pywikibot.output('phase try 0') pywikibot.output(template[1][u'PàS']) if template[1][u'PàS'] == 'oui': pywikibot.output('phase try 1') PaS = True page_PaS = None elif template[1][u'PàS'] != '': pywikibot.output('phase try 2') PaS = True page_PaS = pywikibot.Page(self.site, u"%s/Suppression" % template[1][u'PàS']).toggleTalkPage() pywikibot.output(u'found_full_template = %s' % found_full_template) if not found_full_template: pywikibot.output('Fully fulfilled template was not found, skipping to next section.') continue pywikibot.output(u"PaS = %s" % PaS) if PaS: try: pywikibot.output(u"page_PaS = %s" % page_PaS) except: pywikibot.output(u"no page_PaS") # Pour enlever les == et les éventuels espaces # du titre de la section puis les [[…]] qui sont # supprimés de l'URL par MediaWiki. titre_section = titre_section[2:-2] titre_section = titre_section.strip() titre_section_SQL = titre_section titre_section_MediaWiki = titre_section titre_section_MediaWiki = titre_section_MediaWiki.replace("[[", "") titre_section_MediaWiki = titre_section_MediaWiki.replace("]]", "") pywikibot.output(u"=== %s ===" % titre_section) pywikibot.output(u"statut_actuel = %s" % statut_actuel) pywikibot.output(u"date = %s" % date) if statut_actuel not in self.status_knonw: # Si le demande de restauration ne possède pas un de ces statuts, # il est inutile d'aller plus loin car seuls ceux-ci nécessitent # de laisser un message au demandeur. continue # Vérifier si la requête a déjà été analysée par le bot. self.database.query('SELECT * FROM drp WHERE titre_section = "%s"' % titre_section_SQL.replace('"', '\\"').encode('utf-8')) results=self.database.store_result() result=results.fetch_row(maxrows=0) if result: # Si oui, et si le statut est toujours le même, il n'y a rien à faire statut_traite = result[0][1] pywikibot.output(statut_traite) # La vérification d'un éventuel lancement d'une PàS technique # pour la restauration n'est faite que par la suite, le statut # 'oui_PaS' ne peut donc pas encore être le statut_actuel, # même si une PàS a été lancée ! # On remplace donc l'éventuel statut traité 'oui_PaS' par un # simple 'oui'. if statut_traite == 'oui_PaS': statut_traite = 'oui' if statut_traite.decode('utf-8') == statut_actuel: # Si le statut actuel est le même que celui qui a déjà été # traité, il n'y a rien d'autre à faire : le demandeur # a déjà été averti. pywikibot.output(u'DRP déjà traitée !') continue else: pywikibot.output(u'DRP déjà traitée mais statut différent…') # Supprimer la requête de la base de donnée SQL pour éviter # qu'elle ne se retrouve en double avec deux statuts # différents. self.database.query('DELETE FROM drp WHERE titre_section = "%s"' % titre_section_SQL.replace('"', '\\"').encode('utf-8')) #print section # Si on arrive ici, c'est que le demandeur n'a pas été averti du # statut actuel m1 = re.search(u"[dD]emandée? par .*\[ *\[ *([uU]tilisateur:|[uU]ser:|[sS]p[eé]cial:[cC]ontributions/)(?P<nom_demandeur>[^|\]]+)(\|| *\] *\])", section) m2 = re.search(u"[dD]emandée? par {{u'?\|(?P<nom_demandeur>[^|]+)}}", section) m3 = re.search(u"[dD]emandée? par (?P<nom_demandeur>[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)", section) if m1: nom_demandeur = m1.group('nom_demandeur') #print 'm1' elif m2: nom_demandeur = m2.group('nom_demandeur') #print 'm2' elif m3: nom_demandeur = m3.group('nom_demandeur') #print 'm3' else: pywikibot.output(u'nom du demandeur introuvable !') continue #print nom_demandeur demandeur = pywikibot.User(self.site, nom_demandeur) if u'autopatrolled' in demandeur.groups(): pywikibot.output(u'demandeur autopatrolled : inutile de laisser un message') continue elif demandeur in self.whitelist: pywikibot.output(u"l'utilisateur est sur la whitelist") continue page_discussion_demandeur = demandeur.getUserTalkPage() pywikibot.output(page_discussion_demandeur) m = re.search(u"\[ *\[ *(?P<titre_page>.*) *\] *\]", titre_section) if not m: pywikibot.output(u'Titre de la page concernée introuvable !') continue titre_page_concernee = m.group('titre_page').strip() pywikibot.output(titre_page_concernee) # Vérifier si une PàS technique pour la restauration a été # lancée ou non. if statut_actuel == 'oui': if PaS: statut_actuel = 'oui_PaS' pywikibot.output('oui_PaS') if not page_PaS or page_PaS.exists(): try: page_PaS = pywikibot.Page(self.site, titre_page_concernee + "/Suppression").toggleTalkPage() #pywikibot.Page(self.site, u"Discussion:%s/Suppression" % titre_page_concernee) page_PaS.get() except: pywikibot.output(u'erreur : la PàS technique ne semble pas exister ou n\'est pas normalisée !') statut_actuel = 'oui_PaS_mais_introuvable' if page_PaS: # La PàS peut avoir été renommée if page_PaS.isRedirectPage(): page_PaS = page_PaS.getRedirectTarget() if re.search(u"[pP]roposé *par.* ([0-9]{1,2}.*20[01][0-9]) à [0-9]{2}:[0-9]{2}", page_PaS.get()): date_debut_PaS = re.search(u"[pP]roposé *par.* ([0-9]{1,2}.*20[01][0-9]) à [0-9]{2}:[0-9]{2}", page_PaS.get()).group(1) else: # Si la date n'est pas formatée comme attendue sur la PàS, le bot # cherche sa date de création en remontant l'historique, puis l'exprime # sous la forme attendue. date_creation = page_PaS.getVersionHistory()[-1][1] date_debut_PaS = date_creation.strftime("%d %B %Y") message = self.messages[statut_actuel] # La fonction urllib.quote() permet d'encoder une URL. # Ici, seul le titre de la section a besoin d'être encodé. # Cependant, MediaWiki remplace les espaces par des tirets bas ('_') # et les % dans l'encodage par des points ('.'). lien_drp = u"%s#%s" % (self.main_page.title(asLink = False), urllib.quote(titre_section_MediaWiki.encode('utf-8'), safe=" /").replace(" ", "_").replace("%", ".")) #pywikibot.output(u'lien_drp = %s' % lien_drp) if statut_actuel == 'non' or statut_actuel == 'oui' or statut_actuel == 'oui_PaS_mais_introuvable': message = message % {'titre_page':titre_page_concernee, 'lien_drp':lien_drp, 'date_debut_lien_valide':date} elif statut_actuel == 'oui_PaS': if not type(date_debut_PaS) == unicode: pywikibot.output(u"Formattage de date_debut_PaS") date_debut_PaS = date_debut_PaS.decode('utf-8') message = message % {'titre_page':titre_page_concernee, 'lien_drp':lien_drp, 'date_debut_lien_valide':date, 'titre_PaS':page_PaS.title(asLink = False), 'date_debut_PaS':date_debut_PaS} elif statut_actuel in ['attente', 'autre', 'autreavis']: message = message % {'titre_page':titre_page_concernee, 'lien_drp':lien_drp} else: pywikibot.output(u'statut inconnu : %s' % statut_actuel) continue # # Mauvaise gestion des IPv6 par pywikibot # Les caractères doivent être en majuscules # pattern_ipv6 = "Discussion utilisateur:(([0-9a-zA-Z]{,4}:){7}[0-9a-zA-Z]{,4})" if re.search(pattern_ipv6, page_discussion_demandeur.title()): ipv6 = re.search(pattern_ipv6, page_discussion_demandeur.title()).group(1) ipv6 = ipv6.upper() page_discussion_demandeur = pywikibot.Page(pywikibot.Site(), u"Discussion utilisateur:"+ipv6) # if page_discussion_demandeur.exists(): while page_discussion_demandeur.isRedirectPage(): page_discussion_demandeur = page_discussion_demandeur.getRedirectTarget() text = page_discussion_demandeur.get() newtext = text newtext += '\n\n' newtext += u"== %s ==" % self.titre_message % {'titre_page': titre_page_concernee} newtext += '\n' newtext += message # pwb_error pywikibot.showDiff(page_discussion_demandeur.get(), newtext) else: newtext = u"== %s ==" % self.titre_message % {'titre_page': titre_page_concernee} newtext += '\n' newtext += message pywikibot.output(newtext) comment = self.resume % {'titre_page': titre_page_concernee} pywikibot.output(comment) try: page_discussion_demandeur.put(newtext, comment=comment, minorEdit=False) except: pywikibot.output(u'erreur lors de la publication du message !') continue # Enregistrer la requête comme analysée par le bot self.database.query('INSERT INTO drp VALUES ("%s", "%s", CURRENT_TIMESTAMP)' % (titre_section_SQL.replace('"', '\\"').encode('utf-8'), statut_actuel.encode('utf-8')))
def run(self): """Start the bot.""" # Run the generator which will yield Pages which might need to be # changed. for page in self.generator: if self.isTitleExcepted(page.title()): pywikibot.output( 'Skipping {0} because the title is on the exceptions list.' .format(page.title(as_link=True))) continue try: # Load the page's text from the wiki original_text = page.get(get_redirect=True) if not page.has_permission(): pywikibot.output("You can't edit page " + page.title(as_link=True)) continue except pywikibot.NoPage: pywikibot.output('Page {0} not found'.format( page.title(as_link=True))) continue applied = set() new_text = original_text last_text = None context = 0 while True: if self.isTextExcepted(new_text): pywikibot.output('Skipping {0} because it contains text ' 'that is on the exceptions list.'.format( page.title(as_link=True))) break while new_text != last_text: last_text = new_text new_text = self.apply_replacements(last_text, applied, page) if not self.recursive: break if new_text == original_text: pywikibot.output('No changes were necessary in ' + page.title(as_link=True)) break if hasattr(self, 'addedCat'): # Fetch only categories in wikitext, otherwise the others # will be explicitly added. cats = textlib.getCategoryLinks(new_text, site=page.site) if self.addedCat not in cats: cats.append(self.addedCat) new_text = textlib.replaceCategoryLinks(new_text, cats, site=page.site) # Show the title of the page we're working on. # Highlight the title in purple. pywikibot.output( color_format('\n\n>>> {lightpurple}{0}{default} <<<', page.title())) pywikibot.showDiff(original_text, new_text, context=context) if self.getOption('always'): break choice = pywikibot.input_choice( 'Do you want to accept these changes?', [('Yes', 'y'), ('No', 'n'), ('Edit original', 'e'), ('edit Latest', 'l'), ('open in Browser', 'b'), ('More context', 'm'), ('All', 'a')], default='N') if choice == 'm': context = context * 3 if context else 3 continue if choice == 'e': editor = editarticle.TextEditor() as_edited = editor.edit(original_text) # if user didn't press Cancel if as_edited and as_edited != new_text: new_text = as_edited continue if choice == 'l': editor = editarticle.TextEditor() as_edited = editor.edit(new_text) # if user didn't press Cancel if as_edited and as_edited != new_text: new_text = as_edited # prevent changes from being applied again last_text = new_text continue if choice == 'b': pywikibot.bot.open_webbrowser(page) try: original_text = page.get(get_redirect=True, force=True) except pywikibot.NoPage: pywikibot.output('Page {0} has been deleted.'.format( page.title())) break new_text = original_text last_text = None continue if choice == 'a': self.options['always'] = True if choice == 'y': page.text = new_text page.save(summary=self.generate_summary(applied), asynchronous=True, callback=self._replace_async_callback, quiet=True) while not self._pending_processed_titles.empty(): proc_title, res = self._pending_processed_titles.get() pywikibot.output('Page {0}{1} saved'.format( proc_title, '' if res else ' not')) # choice must be 'N' break if self.getOption('always') and new_text != original_text: try: page.text = new_text page.save(summary=self.generate_summary(applied), callback=self._replace_sync_callback, quiet=True) except pywikibot.EditConflict: pywikibot.output( 'Skipping {0} because of edit conflict'.format( page.title(), )) except pywikibot.SpamfilterError as e: pywikibot.output( 'Cannot change {0} because of blacklist entry {1}'. format(page.title(), e.url)) except pywikibot.LockedPage: pywikibot.output('Skipping {0} (locked page)'.format( page.title(), )) except pywikibot.PageNotSaved as error: pywikibot.output('Error putting page: {0}'.format( error.args, )) if self._pending_processed_titles.qsize() > 50: while not self._pending_processed_titles.empty(): proc_title, res = self._pending_processed_titles.get() pywikibot.output('Page {0}{1} saved'.format( proc_title, '' if res else ' not'))
def addImdb(self, item): if item.title() in self.processeditems: pywikibot.output(u'Already processed %s, skipping it' % (item.title(), )) return self.processeditems.append(item.title()) data = item.get() claims = data.get('claims') if u'P345' in claims: self.addReleased(item, claims.get(u'P345')[0].getTarget()) return True langs = [u'en', u'es', u'fr', u'de', u'nl'] label = u'' for lang in langs: if data.get('labels').get(lang): label = data.get('labels').get(lang) break if not label: #FIXME: Implement # label = data.get('labels').get(u'en') pywikibot.output(u'Did not find a label for %s' % (item.title(), )) previousitem = None nextitem = None if u'P155' in claims: previousitem = claims.get(u'P155')[0].getTarget() if u'P156' in claims: nextitem = claims.get(u'P156')[0].getTarget() imdbid_from_previous = u'' imdbtitle_from_previous = u'' if previousitem: previousclaims = previousitem.get().get('claims') if u'P345' in previousclaims: previousimdb = previousclaims.get(u'P345')[0].getTarget() if previousimdb in self.imdbcache: imdbid_from_previous = self.imdbcache[previousimdb].get( u'next') imdbtitle_from_previous = self.imdbcache[previousimdb].get( u'nexttitle') imdbid_from_next = u'' imdbtitle_from_next = u'' if nextitem: nextclaims = nextitem.get().get('claims') if u'P345' in nextclaims: nextimdb = nextclaims.get(u'P345')[0].getTarget() if nextimdb in self.imdbcache: imdbid_from_next = self.imdbcache[nextimdb].get( u'previous') imdbtitle_from_next = self.imdbcache[nextimdb].get( u'previoustitle') if imdbid_from_previous and imdbid_from_next: if imdbid_from_previous == imdbid_from_next: if label == imdbtitle_from_previous: newclaim = pywikibot.Claim(self.repo, u'P345') newclaim.setTarget(imdbid_from_previous) summary = u'Adding link based on same label and link from [[%s|previous]] and [[%s|next item]]' % ( previousitem.title(), nextitem.title()) pywikibot.output(summary) item.addClaim(newclaim, summary=summary) self.addReleased(item, imdbid_from_previous) return True else: pywikibot.output( u'The label "%s" is not the same as imdb "%s", skipping' % (label, imdbtitle_from_previous)) return False else: pywikibot.output( u'We have a mix up, found "%s" & "%s", skipping' % (imdbid_from_previous, imdbid_from_next)) return False elif imdbid_from_previous: if label == imdbtitle_from_previous: newclaim = pywikibot.Claim(self.repo, u'P345') newclaim.setTarget(imdbid_from_previous) summary = u'Adding link based on same label and link from [[%s|previous item]]' % ( previousitem.title(), ) pywikibot.output(summary) item.addClaim(newclaim, summary=summary) self.addReleased(item, imdbid_from_previous) if nextitem: self.addImdb(nextitem) return True else: pywikibot.output( u'The label "%s" is not the same as imdb "%s", skipping' % (label, imdbtitle_from_previous)) # This will make the bot iterate the linked list. if nextitem: self.addImdb(nextitem) elif imdbid_from_next: if label == imdbtitle_from_next: newclaim = pywikibot.Claim(self.repo, u'P345') newclaim.setTarget(imdbid_from_next) summary = u'Adding link based on same label and link from [[%s|next item]]' % ( nextitem.title(), ) pywikibot.output(summary) item.addClaim(newclaim, summary=summary) self.addReleased(item, imdbid_from_next) if previousitem: self.addImdb(previousitem) return True else: pywikibot.output( u'The label "%s" is not the same as imdb "%s", skipping' % (label, imdbtitle_from_next)) # This will make the bot iterate the linked list. if previousitem: self.addImdb(previousitem) pywikibot.output( u'Something went wrong. Couldn\'t add anything to %s' % (item.title(), ))
def treat(self, page): """Purge the given page.""" pywikibot.output( u'Page %s%s purged' % (page.title(as_link=True), "" if page.purge() else " not"))
def findTranslated(self, page, oursite=None): quiet = self.getOption('quiet') if not oursite: oursite = self.site if page.isRedirectPage(): page = page.getRedirectTarget() ourpage = None for link in page.iterlanglinks(): if link.site == oursite: ourpage = pywikibot.Page(link) break if not ourpage: if not quiet: pywikibot.output('%s -> no corresponding page in %s' % (page.title(), oursite)) elif ourpage.section(): pywikibot.output('%s -> our page is a section link: %s' % (page.title(), ourpage.title())) elif not ourpage.exists(): pywikibot.output("%s -> our page doesn't exist: %s" % (page.title(), ourpage.title())) else: if ourpage.isRedirectPage(): ourpage = ourpage.getRedirectTarget() pywikibot.output('%s -> corresponding page is %s' % (page.title(), ourpage.title())) if ourpage.namespace() != 0: pywikibot.output('%s -> not in the main namespace, skipping' % page.title()) elif ourpage.isRedirectPage(): pywikibot.output('%s -> double redirect, skipping' % page.title()) elif not ourpage.exists(): pywikibot.output("%s -> page doesn't exist, skipping" % ourpage.title()) else: backpage = None for link in ourpage.iterlanglinks(): if link.site == page.site: backpage = pywikibot.Page(link) break if not backpage: pywikibot.output('%s -> no back interwiki ref' % page.title()) elif backpage == page: # everything is ok yield ourpage elif backpage.isRedirectPage(): backpage = backpage.getRedirectTarget() if backpage == page: # everything is ok yield ourpage else: pywikibot.output( '%s -> back interwiki ref target is redirect to %s' % (page.title(), backpage.title())) else: pywikibot.output('%s -> back interwiki ref target is %s' % (page.title(), backpage.title()))
def buildImdbCache(self, series): result = {} data = series.get() claims = data.get('claims') if not u'P345' in claims: pywikibot.output(u'Error: No IMDB id found') return result seriesimdb = claims.get(u'P345')[0].getTarget() mainurl = u'http://www.omdbapi.com/?i=%s' seasonurl = u'http://www.omdbapi.com/?i=%s&Season=%s' mainSeriesPage = requests.get(mainurl % (seriesimdb, )) seasons = mainSeriesPage.json().get(u'totalSeasons') previous = u'' previousTitle = u'' previousReleased = u'' current = u'' currentTitle = u'' currentReleased = u'' next = u'' nextTitle = u'' nextReleased = u'' try: for i in range(1, int(seasons) + 1): seasonpage = requests.get(seasonurl % (seriesimdb, i)) episodes = seasonpage.json().get('Episodes') if episodes: for episode in episodes: if not previous: previous = episode.get('imdbID') previousTitle = episode.get('Title') previousReleased = episode.get('Released') elif not current: current = episode.get('imdbID') currentTitle = episode.get('Title') currentReleased = episode.get('Released') else: next = episode.get('imdbID') nextTitle = episode.get('Title') nextReleased = episode.get('Released') if not result: # Result is empty, we need to add the first item result[previous] = { u'previous': u'', u'previoustitle': u'', u'previousReleased': u'', u'title': previousTitle, u'released': previousReleased, u'next': next, u'nexttitle': nextTitle, u'nextReleased': nextReleased } result[current] = { u'previous': previous, u'previoustitle': previousTitle, u'previousReleased': previousReleased, u'title': currentTitle, u'released': currentReleased, u'next': next, u'nexttitle': nextTitle, u'nextReleased': nextReleased } previous = current previousTitle = currentTitle previousReleased = currentReleased current = next currentTitle = nextTitle currentReleased = nextReleased next = u'' nextTitle = u'' nextReleased = u'' time.sleep(1) result[current] = { u'previous': previous, u'previoustitle': previousTitle, u'previousReleased': previousReleased, u'title': currentTitle, u'released': currentReleased, u'next': next, u'nexttitle': nextTitle, u'nextReleased': nextReleased } except ValueError: pywikibot.output(u'Ran into a value error while working on %s' % (mainurl % (seriesimdb, ), )) return result
def listTemplates(cls, templates, namespaces): templateDict = cls.template_dict(templates, namespaces) pywikibot.output(u'\nList of pages transcluding templates:', toStdout=True) for key in templates: pywikibot.output(u'* %s' % key) pywikibot.output(u'-' * 36, toStdout=True) total = 0 for key in templateDict: for page in templateDict[key]: pywikibot.output(page.title(), toStdout=True) total += 1 pywikibot.output(u'Total page count: %d' % total) pywikibot.output(u'Report generated on %s' % datetime.datetime.utcnow().isoformat(), toStdout=True)
def add_template(self, source, dest, task, fromsite): """Place or remove the Link_GA/FA template on/from a page.""" def compile_link(site, templates): """Compile one link template list.""" findtemplate = '(%s)' % '|'.join(templates) return re.compile( r'\{\{%s\|%s\}\}' % (findtemplate.replace(' ', '[ _]'), site.code), re.IGNORECASE) tosite = dest.site add_tl, remove_tl = self.getTemplateList(tosite.code, task) re_link_add = compile_link(fromsite, add_tl) re_link_remove = compile_link(fromsite, remove_tl) text = dest.text m1 = add_tl and re_link_add.search(text) m2 = remove_tl and re_link_remove.search(text) changed = False interactive = self.getOption('interactive') if add_tl: if m1: pywikibot.output('(already added)') else: # insert just before interwiki if (not interactive or pywikibot.input_yn('Connecting %s -> %s. Proceed?' % (source.title(), dest.title()), default=False, automatic_quit=False)): if self.getOption('side'): # Placing {{Link FA|xx}} right next to # corresponding interwiki text = (text[:m1.end()] + ' {{%s|%s}}' % (add_tl[0], fromsite.code) + text[m1.end():]) else: # Moving {{Link FA|xx}} to top of interwikis iw = textlib.getLanguageLinks(text, tosite) text = textlib.removeLanguageLinks(text, tosite) text += '%s{{%s|%s}}%s' % (config.LS, add_tl[0], fromsite.code, config.LS) text = textlib.replaceLanguageLinks(text, iw, tosite) changed = True if remove_tl: if m2: if (changed # Don't force the user to say "Y" twice or not interactive or pywikibot.input_yn('Connecting %s -> %s. Proceed?' % (source.title(), dest.title()), default=False, automatic_quit=False)): text = re.sub(re_link_remove, '', text) changed = True elif task == 'former': pywikibot.output('(already removed)') if changed: comment = i18n.twtranslate(tosite, 'featured-' + task, {'page': source}) try: dest.put(text, comment) self._save_counter += 1 except pywikibot.LockedPage: pywikibot.output('Page %s is locked!' % dest.title()) except pywikibot.PageNotSaved: pywikibot.output('Page not saved')
def __init__(self): """Constructor with arg parsing.""" for arg in pywikibot.handle_args(): arg, sep, value = arg.partition(':') if arg == '-from': self.apfrom = value or pywikibot.input( 'Which page to start from: ') elif arg == '-reqsize': self.aplimit = int(value) elif arg == '-links': self.links = True elif arg == '-linksonly': self.links = True self.titles = False elif arg == '-replace': self.replace = True elif arg == '-redir': self.filterredir = 'all' elif arg == '-redironly': self.filterredir = 'redirects' elif arg == '-limit': self.stopAfter = int(value) elif arg in ('-autonomous', '-a'): self.autonomous = True elif arg == '-ns': self.namespaces.append(int(value)) elif arg == '-wikilog': self.wikilogfile = value elif arg == '-failedlog': self.failedTitles = value elif arg == '-failed': self.doFailed = True else: pywikibot.output(u'Unknown argument %s.' % arg) pywikibot.showHelp() sys.exit() if self.namespaces == [] and not self.doFailed: if self.apfrom == u'': # 0 should be after templates ns self.namespaces = [14, 10, 12, 0] else: self.namespaces = [0] if self.aplimit is None: self.aplimit = 200 if self.links else 'max' if not self.doFailed: self.queryParams = { 'action': 'query', 'generator': 'allpages', 'gaplimit': self.aplimit, 'gapfilterredir': self.filterredir } else: self.queryParams = {'action': 'query'} if self.apfrom != u'': pywikibot.output(u'Argument "-from" is ignored with "-failed"') propParam = 'info' if self.links: propParam += '|links|categories' self.queryParams['pllimit'] = 'max' self.queryParams['cllimit'] = 'max' self.queryParams['prop'] = propParam self.site = pywikibot.Site() if len(self.localSuspects) != len(self.latinSuspects): raise ValueError(u'Suspects must be the same size') if len(self.localKeyboard) != len(self.latinKeyboard): raise ValueError(u'Keyboard info must be the same size') if not os.path.isabs(self.wikilogfile): self.wikilogfile = pywikibot.config.datafilepath(self.wikilogfile) self.wikilog = self.OpenLogFile(self.wikilogfile) if not os.path.isabs(self.failedTitles): self.failedTitles = pywikibot.config.datafilepath( self.failedTitles) if self.doFailed: with codecs.open(self.failedTitles, 'r', 'utf-8') as f: self.titleList = [self.Page(t) for t in f] self.failedTitles += '.failed' self.lclToLatDict = dict( (ord(self.localSuspects[i]), self.latinSuspects[i]) for i in xrange(len(self.localSuspects))) self.latToLclDict = dict( (ord(self.latinSuspects[i]), self.localSuspects[i]) for i in xrange(len(self.localSuspects))) if self.localKeyboard is not None: self.lclToLatKeybDict = dict( (ord(self.localKeyboard[i]), self.latinKeyboard[i]) for i in xrange(len(self.localKeyboard))) self.latToLclKeybDict = dict( (ord(self.latinKeyboard[i]), self.localKeyboard[i]) for i in xrange(len(self.localKeyboard))) else: self.lclToLatKeybDict = {} self.latToLclKeybDict = {} badPtrnStr = u'([%s][%s]|[%s][%s])' \ % (ascii_letters, self.localLtr, self.localLtr, ascii_letters) self.badWordPtrn = re.compile( u'[%s%s]*%s[%s%s]*' % (ascii_letters, self.localLtr, badPtrnStr, ascii_letters, self.localLtr)) # Get whitelist self.knownWords = set() self.seenUnresolvedLinks = set() # TODO: handle "continue" if self.site.code in self.whitelists: wlpage = self.whitelists[self.site.code] pywikibot.output(u'Loading whitelist from %s' % wlpage) wlparams = { 'action': 'query', 'prop': 'links', 'titles': wlpage, 'redirects': '', 'indexpageids': '', 'pllimit': 'max', } req = api.Request(site=self.site, parameters=wlparams) data = req.submit() if len(data['query']['pageids']) == 1: pageid = data['query']['pageids'][0] links = data['query']['pages'][pageid]['links'] allWords = [ nn for n in links for nn in self.FindBadWords(n['title']) ] self.knownWords = set(allWords) else: raise ValueError(u'The number of pageids is not 1') pywikibot.output(u'Loaded whitelist with %i items' % len(self.knownWords)) if len(self.knownWords) > 0: pywikibot.log(u'Whitelist: %s' % u', '.join( [self.MakeLink(i, False) for i in self.knownWords])) else: pywikibot.output(u'Whitelist is not known for language %s' % self.site.code)
def run(self): for task in self.tasks: self.run_task(task) pywikibot.output('%d pages written.' % self._save_counter)