def notify(user): """The function checks that a number of conditions are true, then notifies the user in question. Calls generate_subst(). """ usertalk = 'User talk:' + user if already_told(user) == False: if bot.nobots(page=usertalk) == True: print "We're cleared to edit." if notified_already(user) == True: print "User hasn't already been notified!" page = site.Pages[usertalk] text = page.edit() text = text + generate_subst(user) #text = generate_subst(user) try: page.save(text,summary="Notifying user about file(s) with inadequate source information ([[WP:BOT|bot]] - [[User:Theo's Little Bot/disable/bsr|disable]])") global total_done_now total_done_now = total_done_now + 1 global users_notfied users_notified.append(user) now123 = datetime.datetime.utcnow() users_notified.append(now123) print user.encode('ascii', 'ignore') + " notified" except: print "Unknown error // skipping user." else: print "User was notifed already." else: print "Bot denied!" else: print "User has already been notified in past 7 days."
def notify(user): """The function checks that a number of conditions are true, then notifies the user in question. Calls generate_subst(). """ usertalk = "User talk:" + user if were_they_told_in_three(user) == False: if bot.nobots(page=usertalk) == True: print "We're cleared to edit." if notified_already(user) == True: print "User hasn't already been notified!" page = site.Pages[usertalk] text = page.edit() text = text + generate_subst(user) page.save(text, summary="Notifying user about missing file description(s) ([[WP:BOT|bot]] on trial)") global total_done_now total_done_now = total_done_now + 1 global users_notfied users_notified.append(user) now123 = datetime.datetime.utcnow() users_notified.append(now123) print user.encode("ascii", "ignore") + " notified" else: print "User was notifed already." else: print "Bot denied!" else: print "User has already been notified in past 3 days."
def process_page(page): """Given an image object, gets its uploader and its upload date, fills in {{Information}} for it, and saves the new page. """ revision = page.revisions(dir='newer').next() user = revision['user'] date = get_exif_date(page) if date == None: date = time.strftime("%d %B %Y",revision['timestamp']) contents = page.edit() if contents != "": description = contents.strip() desc_code = mwparserfromhell.parse(description) for bad_code in desc_code.ifilter_templates(): # Remove templates description = description.replace(unicode(bad_code),'') for bad_code in desc_code.ifilter_headings(): # Remove headers description = description.replace(unicode(bad_code),'') if description.find('<nowiki') != -1: return # Skip complex descriptions description = description.replace('|','{{!}}') # Escape pipe symbols description = re.sub(r"""[ ]{2,}"""," ",description,flags=re.U) # Remove excessive spaces description = re.sub(r"""\[\[(?:File|Image):(.*?)(?:\|.*?)\]\]""",r"[[:File:\1]]",description,flags=re.U) # Turn images into links description = re.sub(r"""\[\[User:.*?\]\] \(\[\[User talk:J.*?\]\]\).*?\(UTC\)""",'',description,flags=re.U) # Remove signatures when possible else: description = "" contents = u"""{{Information | description = """+description+""" | source = {{own}} | date = """ + unicode(date) + """ | author = {{subst:usernameexpand|""" + user.replace(" ","_") + """}} }}\n""" + contents global donenow if bot.donenow("User:Theo's Little Bot/disable/selfimages",donenow=donenow,donenow_div=5,shutdown=100) == True: # adding the template page.save(contents,"[[WP:BOT|Bot]]: Automatically adding {{[[Template:Information|Information]]}} to self-published work) ([[User:Theo's Little Bot/disable/selfimages|disable]]") donenow += 1 # notifying the uploader usertalktitle = "User talk:"+user if bot.nobots(usertalktitle,user="******",task='selfimages') == True: usertalk = site.Pages[usertalktitle] notification = "\n\n== Notification of automated file description generation ==\n{{subst:Un-botfill|file="+page.page_title+"|sig=~~~~}}" usertalk.save(appendtext=notification,"[[WP:BOT|Bot]]: Notifying user about autogenerated {{[[Template:Information|Information]]}} addition) ([[User:Theo's Little Bot/disable/selfimages|disable]]",redirect=True) else: sys.exit()
def main(): global site site = mwclient.Site('en.wikipedia.org') site.login(password.username, password.password) print "And we're live." connection = MySQLdb.connect( host = 'enwiki.labsdb', db = 'enwiki_p', read_default_file = '~/replica.my.cnf' ) # The script runs in 500 article increments. # In other words, in each run, it will process # and fix 500 articles and then stop. # !todo figure out how long a run takes vs replag # and then optimize crontab cursor = connection.cursor() query = """\ SELECT page_title FROM externallinks JOIN page ON page_id = el_from WHERE el_to LIKE "%&utm_%=%" AND page_namespace = 0 LIMIT 5000; """ cursor.execute(query) donenow = 0 for title in cursor.fetchall(): title = title[0].decode("utf-8") # since tuples are returned if bot.donenow("User:Theo's Little Bot/disable/tracking",donenow=donenow,donenow_div=5) == True: if bot.nobots(page=title,task='tracking') == True: if process(site.Pages[title]) == True: donenow += 1 else: print "No changes to make." else: print "Bot was denied, boo hoo." else: print "Bot was disabled...shutting down..." sys.exit()
def notify(user): """The function checks that a number of conditions are true, then notifies the user in question. Calls generate_subst(). """ usertalk = 'User talk:' + user if already_told(user) == False: if bot.nobots(page=usertalk) == True: print "We're cleared to edit." if notified_already(user) == True: print "User hasn't already been notified!" page = site.Pages[usertalk] text = page.edit() text = text + generate_subst(user) #text = generate_subst(user) try: page.save( text, summary= "Notifying user about missing file description(s) ([[WP:BOT|bot]] - [[User:Theo's Little Bot/disable/desc_notifier|disable]])" ) global total_done_now total_done_now = total_done_now + 1 global users_notfied users_notified.append(user) now123 = datetime.datetime.utcnow() users_notified.append(now123) print user.encode('ascii', 'ignore') + " notified" except: print "Unknown error // skipping user." else: print "User was notifed already." else: print "Bot denied!" else: print "User has already been notified in past 3 days."
def run(self): category = mwclient.listing.Category(site, 'Category:All articles with dead external links') # category = [site.Pages['10 Hronia Mazi']] - debugging only for page in category: dead_refs = [] print page.page_title orig_contents = page.edit() contents = page.edit() number_done = 0 all_refs = re.findall(r"""<ref[^>]*>.*?</ref>""",contents,flags=re.UNICODE | re.IGNORECASE) for ref in all_refs: ref_lower = ref.lower() if any(name in ref_lower for name in ['{{'+name for name in self.deadlink_names]): dead_refs.append(ref) for ref in dead_refs: ref_code = mwparserfromhell.parse(ref) updated = False for template in ref_code.filter_templates(): if "cite web" in template.name and template.has_param('archiveurl') == False: url = unicode(template.get('url').value.strip()) try: if url.find('web.archive.org') != -1: okay_to_edit = False print "The url is already an archive link!" elif requests.get(url, timeout=15).status_code != requests.codes.ok: okay_to_edit = True elif requests.get(url, timeout=15).status_code == requests.codes.ok: okay_to_edit = False print "No need to add an archive, since the citations's URL currently works!" except: okay_to_edit = True if okay_to_edit == True: if template.has_param('accessdate'): try: accessdate = parser.parse(str(template.get('accessdate').value)) wayback_date = accessdate.strftime("%Y%m%d%H%M%S") r = requests.get("http://web.archive.org/web/{date}/{url}".format(date=wayback_date,url=url)) except ValueError: # in case we can't parse the accessdate r = requests.get("http://web.archive.org/web/form-submit.jsp", params={'url':url, 'type':'replay'}) else: r = requests.get("http://web.archive.org/web/form-submit.jsp", params={'url':url, 'type':'replay'}) print r.url print r.status_code if r.status_code == requests.codes.ok: number_done += 1 updated = True wayback_url = r.url try: wayback_date_object = datetime.strptime(wayback_url.split('/')[4],"%Y%m%d%H%M%S") wayback_date = wayback_date_object.strftime('%d %B %Y') template.add('archivedate',wayback_date) except ValueError: print "Unable to fetch date...no worries, we have exception handing!" template.add('archiveurl',wayback_url) else: print "{url} not archived in wayback machine.".format(url=url) continue # this url was not archived by the wayback machine; nothing we can do here. else: print "Not adding archive link, per above." for template in ref_code.filter_templates(): nameoftemp = template.name.lower() if any(name in nameoftemp for name in self.deadlink_names) and updated == True: ref_code.remove(template) if updated == True: new_ref = unicode(ref_code) contents = re.sub(re.escape(ref),new_ref,contents,flags=re.U) else: pass if self.DRYRUN == False and number_done > 0: if bot.donenow("User:Theo's Little Bot/disable/deadlinks",donenow=self.donenow,donenow_div=5) == True: if bot.nobots(page=page.page_title) == True: try: page.save(contents,summary="Adding archiveurl for {0} dead link{1} ([[WP:BOT|bot]] - [[User:Theo's Little Bot/disable/deadlinks|disable]])".format(number_done,'s' if number_done > 1 else '')) print "{0} saved!".format(page.page_title) self.donenow += 1 except mwclient.errors.EditError as e: print "ERROR - unable to save page: ", e else: print "Could not save page...bot not authorized." else: print "Bot was disabled." sys.exit() elif self.DRYRUN == True and self.VERBOSE == True: diff = difflib.unified_diff(orig_contents.splitlines(), contents.splitlines()) print '\n'.join(diff)