def replace_links(self, text, linkedPage, targetPage): """Replace all source links by target.""" mysite = pywikibot.Site() linktrail = mysite.linktrail() # make a backup of the original text so we can show the changes later linkR = re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?' r'(\|(?P<label>[^\]]*))?\]\](?P<linktrail>' + linktrail + ')') curpos = 0 # This loop will run until we have finished the current page while True: m = linkR.search(text, pos=curpos) if not m: break # Make sure that next time around we will not find this same hit. curpos = m.start() + 1 # ignore interwiki links and links to sections of the same page if m.group('title').strip() == '' or \ mysite.isInterwikiLink(m.group('title')): continue else: actualLinkPage = pywikibot.Page(targetPage.site, m.group('title')) # Check whether the link found is to page. if actualLinkPage != linkedPage: continue # The link looks like this: # [[page_title|link_text]]trailing_chars page_title = m.group('title') link_text = m.group('label') if not link_text: # or like this: [[page_title]]trailing_chars link_text = page_title if m.group('section') is None: section = '' else: section = m.group('section') if section and targetPage.section(): pywikibot.warning( 'Source section {0} and target section {1} found. ' 'Skipping.'.format(section, targetPage)) continue trailing_chars = m.group('linktrail') if trailing_chars: link_text += trailing_chars # remove preleading ":" if link_text[0] == ':': link_text = link_text[1:] if link_text[0].isupper() or link_text[0].isdigit(): new_page_title = targetPage.title() else: new_page_title = first_lower(targetPage.title()) # remove preleading ":" if new_page_title[0] == ':': new_page_title = new_page_title[1:] if (new_page_title == link_text and not section): newlink = "[[%s]]" % new_page_title # check if we can create a link with trailing characters instead of a # pipelink elif (len(new_page_title) <= len(link_text) and firstcap(link_text[:len(new_page_title)]) == firstcap(new_page_title) and re.sub(re.compile(linktrail), '', link_text[len(new_page_title):]) == '' and not section): newlink = "[[%s]]%s" % (link_text[:len(new_page_title)], link_text[len(new_page_title):]) else: newlink = "[[%s%s|%s]]" % (new_page_title, section, link_text) text = text[:m.start()] + newlink + text[m.end():] continue return text
def treat(self, refPage, disambPage): """Treat a page. @param disambPage: the disambiguation page or redirect we don't want anything to link to @type disambPage: pywikibot.Page @param refPage: a page linking to disambPage @type refPage: pywikibot.Page @return: False if the user pressed q to completely quit the program, True otherwise @rtype: bool """ # TODO: break this function up into subroutines! self.current_page = refPage include = False unlink_counter = 0 new_targets = [] try: text = refPage.get() ignoreReason = self.checkContents(text) if ignoreReason: pywikibot.output('\n\nSkipping %s because it contains %s.\n\n' % (refPage.title(), ignoreReason)) else: include = True except pywikibot.IsRedirectPage: pywikibot.output(u'%s is a redirect to %s' % (refPage.title(), disambPage.title())) if disambPage.isRedirectPage(): target = self.alternatives[0] if pywikibot.input_yn(u'Do you want to make redirect %s point ' 'to %s?' % (refPage.title(), target), default=False, automatic_quit=False): redir_text = '#%s [[%s]]' \ % (self.mysite.redirect(), target) try: refPage.put_async(redir_text, summary=self.comment) except pywikibot.PageNotSaved as error: pywikibot.output(u'Page not saved: %s' % error.args) else: choice = pywikibot.input_choice( u'Do you want to work on pages linking to %s?' % refPage.title(), [('yes', 'y'), ('no', 'n'), ('change redirect', 'c')], 'n', automatic_quit=False) if choice == 'y': gen = ReferringPageGeneratorWithIgnore( refPage, self.primary, main_only=self.main_only ) preloadingGen = pagegenerators.PreloadingGenerator(gen) for refPage2 in preloadingGen: # run until the user selected 'quit' if not self.treat(refPage2, refPage): break elif choice == 'c': text = refPage.get(get_redirect=True) include = "redirect" except pywikibot.NoPage: pywikibot.output( u'Page [[%s]] does not seem to exist?! Skipping.' % refPage.title()) include = False if include in (True, "redirect"): # make a backup of the original text so we can show the changes later original_text = text n = 0 curpos = 0 dn = False edited = False # This loop will run until we have finished the current page while True: m = self.linkR.search(text, pos=curpos) if not m: if n == 0: pywikibot.output(u"No changes necessary in %s" % refPage.title()) return True else: # stop loop and save page break # Make sure that next time around we will not find this same hit. curpos = m.start() + 1 try: foundlink = pywikibot.Link(m.group('title'), disambPage.site) foundlink.parse() except pywikibot.Error: continue # ignore interwiki links if foundlink.site != disambPage.site: continue # Check whether the link found is to disambPage. try: if foundlink.canonical_title() != disambPage.title(): continue except pywikibot.Error: # must be a broken link pywikibot.log(u"Invalid link [[%s]] in page [[%s]]" % (m.group('title'), refPage.title())) continue n += 1 # how many bytes should be displayed around the current link context = 60 # check if there's a dn-template here already if (self.dnSkip and self.dn_template_str and self.dn_template_str[:-2] in text[m.end():m.end() + len(self.dn_template_str) + 8]): continue edit = EditOption('edit page', 'e', text, m.start(), disambPage.title()) context_option = HighlightContextOption( 'more context', 'm', text, 60, start=m.start(), end=m.end()) context_option.before_question = True options = [ListOption(self.alternatives, ''), ListOption(self.alternatives, 'r'), StandardOption('skip link', 's'), edit, StandardOption('next page', 'n'), StandardOption('unlink', 'u')] if self.dn_template_str: # '?', '/' for old choice options += [AliasOption('tag template %s' % self.dn_template_str, ['t', '?', '/'])] options += [context_option] if not edited: options += [ShowPageOption('show disambiguation page', 'd', m.start(), disambPage)] options += [ OutputProxyOption('list', 'l', SequenceOutputter(self.alternatives)), AddAlternativeOption('add new', 'a', SequenceOutputter(self.alternatives))] if edited: options += [StandardOption('save in this form', 'x')] # TODO: Output context on each question answer = pywikibot.input_choice('Option', options, default=self.always, force=bool(self.always)) if answer == 'x': assert edited, 'invalid option before editing' break elif answer == 's': n -= 1 # TODO what's this for? continue elif answer == 'e': text = edit.new_text edited = True curpos = 0 continue elif answer == 'n': # skip this page if self.primary: # If run with the -primary argument, skip this # occurrence next time. self.primaryIgnoreManager.ignore(refPage) return True # The link looks like this: # [[page_title|link_text]]trailing_chars page_title = m.group('title') link_text = m.group('label') if not link_text: # or like this: [[page_title]]trailing_chars link_text = page_title if m.group('section') is None: section = '' else: section = m.group('section') trailing_chars = m.group('linktrail') if trailing_chars: link_text += trailing_chars if answer == 't': assert self.dn_template_str # small chunk of text to search search_text = text[m.end():m.end() + context] # figure out where the link (and sentance) ends, put note # there end_of_word_match = re.search(r'\s', search_text) if end_of_word_match: position_split = end_of_word_match.start(0) else: position_split = 0 # insert dab needed template text = (text[:m.end() + position_split] + self.dn_template_str + text[m.end() + position_split:]) dn = True continue elif answer == 'u': # unlink - we remove the section if there's any text = text[:m.start()] + link_text + text[m.end():] unlink_counter += 1 continue else: # Check that no option from above was missed assert isinstance(answer, tuple), 'only tuple answer left.' assert answer[0] in ['r', ''], 'only valid tuple answers.' if answer[0] == 'r': # we want to throw away the original link text replaceit = link_text == page_title elif include == "redirect": replaceit = True else: replaceit = False new_page_title = answer[1] repPl = pywikibot.Page(pywikibot.Link(new_page_title, disambPage.site)) if (new_page_title[0].isupper() or link_text[0].isupper()): new_page_title = repPl.title() else: new_page_title = repPl.title() new_page_title = first_lower(new_page_title) if new_page_title not in new_targets: new_targets.append(new_page_title) if replaceit and trailing_chars: newlink = "[[%s%s]]%s" % (new_page_title, section, trailing_chars) elif replaceit or (new_page_title == link_text and not section): newlink = "[[%s]]" % new_page_title # check if we can create a link with trailing characters # instead of a pipelink elif ( (len(new_page_title) <= len(link_text)) and (firstcap(link_text[:len(new_page_title)]) == firstcap(new_page_title)) and (re.sub(self.trailR, '', link_text[len(new_page_title):]) == '') and (not section) ): newlink = "[[%s]]%s" \ % (link_text[:len(new_page_title)], link_text[len(new_page_title):]) else: newlink = "[[%s%s|%s]]" \ % (new_page_title, section, link_text) text = text[:m.start()] + newlink + text[m.end():] continue pywikibot.output(text[max(0, m.start() - 30):m.end() + 30]) if text == original_text: pywikibot.output(u'\nNo changes have been made:\n') else: pywikibot.output(u'\nThe following changes have been made:\n') pywikibot.showDiff(original_text, text) pywikibot.output(u'') # save the page self.setSummaryMessage(disambPage, new_targets, unlink_counter, dn) try: refPage.put_async(text, summary=self.comment) except pywikibot.LockedPage: pywikibot.output(u'Page not saved: page is locked') except pywikibot.PageNotSaved as error: pywikibot.output(u'Page not saved: %s' % error.args) return True
def treat_disamb_only(self, refPage, disambPage): """Resolve the links to disambPage but don't look for its redirects. @param disambPage: the disambiguation page or redirect we don't want anything to link to @type disambPage: pywikibot.Page @param refPage: a page linking to disambPage @type refPage: pywikibot.Page @return: "nextpage" if the user enters "n" to skip this page, "nochange" if the page needs no change, and "done" if the page is processed successfully @rtype: str """ # TODO: break this function up into subroutines! self.current_page = refPage include = False unlink_counter = 0 new_targets = [] try: text = refPage.get() ignoreReason = self.checkContents(text) if ignoreReason: pywikibot.output( '\n\nSkipping %s because it contains %s.\n\n' % (refPage.title(), ignoreReason)) else: include = True except pywikibot.IsRedirectPage: pywikibot.output(u'%s is a redirect to %s' % (refPage.title(), disambPage.title())) if disambPage.isRedirectPage(): target = self.alternatives[0] if pywikibot.input_yn(u'Do you want to make redirect %s point ' 'to %s?' % (refPage.title(), target), default=False, automatic_quit=False): redir_text = '#%s [[%s]]' \ % (self.mysite.redirect(), target) try: refPage.put(redir_text, summary=self.comment, asynchronous=True) except pywikibot.PageNotSaved as error: pywikibot.output(u'Page not saved: %s' % error.args) else: choice = pywikibot.input_choice( u'Do you want to work on pages linking to %s?' % refPage.title(), [('yes', 'y'), ('no', 'n'), ('change redirect', 'c')], 'n', automatic_quit=False) if choice == 'y': gen = ReferringPageGeneratorWithIgnore( refPage, self.primary, main_only=self.main_only) preloadingGen = pagegenerators.PreloadingGenerator(gen) for refPage2 in preloadingGen: # run until the user selected 'quit' self.treat(refPage2, refPage) elif choice == 'c': text = refPage.get(get_redirect=True) include = "redirect" except pywikibot.NoPage: pywikibot.output( u'Page [[%s]] does not seem to exist?! Skipping.' % refPage.title()) include = False if include in (True, "redirect"): # save the original text so we can show the changes later original_text = text n = 0 curpos = 0 dn = False edited = False # This loop will run until we have finished the current page while True: m = self.linkR.search(text, pos=curpos) if not m: if n == 0: # No changes necessary for this disambiguation title. return 'nochange' else: # stop loop and save page break # Ensure that next time around we will not find this same hit. curpos = m.start() + 1 try: foundlink = pywikibot.Link(m.group('title'), disambPage.site) foundlink.parse() except pywikibot.Error: continue # ignore interwiki links if foundlink.site != disambPage.site: continue # Check whether the link found is to disambPage. try: if foundlink.canonical_title() != disambPage.title(): continue except pywikibot.Error: # must be a broken link pywikibot.log(u"Invalid link [[%s]] in page [[%s]]" % (m.group('title'), refPage.title())) continue n += 1 # how many bytes should be displayed around the current link context = 60 # check if there's a dn-template here already if (self.dnSkip and self.dn_template_str and self.dn_template_str[:-2] in text[m.end():m.end() + len(self.dn_template_str) + 8]): continue edit = EditOption('edit page', 'e', text, m.start(), disambPage.title()) context_option = HighlightContextOption('more context', 'm', text, 60, start=m.start(), end=m.end()) context_option.before_question = True options = [ ListOption(self.alternatives, ''), ListOption(self.alternatives, 'r'), StandardOption('skip link', 's'), edit, StandardOption('next page', 'n'), StandardOption('unlink', 'u') ] if self.dn_template_str: # '?', '/' for old choice options += [ AliasOption('tag template %s' % self.dn_template_str, ['t', '?', '/']) ] options += [context_option] if not edited: options += [ ShowPageOption('show disambiguation page', 'd', m.start(), disambPage) ] options += [ OutputProxyOption('list', 'l', SequenceOutputter(self.alternatives)), AddAlternativeOption('add new', 'a', SequenceOutputter(self.alternatives)) ] if edited: options += [StandardOption('save in this form', 'x')] # TODO: Output context on each question answer = pywikibot.input_choice('Option', options, default=self.always, force=bool(self.always)) if answer == 'x': assert edited, 'invalid option before editing' break elif answer == 's': n -= 1 # TODO what's this for? continue elif answer == 'e': text = edit.new_text edited = True curpos = 0 continue elif answer == 'n': # skip this page if self.primary: # If run with the -primary argument, skip this # occurrence next time. self.primaryIgnoreManager.ignore(refPage) return 'nextpage' # The link looks like this: # [[page_title|link_text]]trailing_chars page_title = m.group('title') link_text = m.group('label') if not link_text: # or like this: [[page_title]]trailing_chars link_text = page_title if m.group('section') is None: section = '' else: section = m.group('section') trailing_chars = m.group('linktrail') if trailing_chars: link_text += trailing_chars if answer == 't': assert self.dn_template_str # small chunk of text to search search_text = text[m.end():m.end() + context] # figure out where the link (and sentance) ends, put note # there end_of_word_match = re.search(r'\s', search_text) if end_of_word_match: position_split = end_of_word_match.start(0) else: position_split = 0 # insert dab needed template text = (text[:m.end() + position_split] + self.dn_template_str + text[m.end() + position_split:]) dn = True continue elif answer == 'u': # unlink - we remove the section if there's any text = text[:m.start()] + link_text + text[m.end():] unlink_counter += 1 continue else: # Check that no option from above was missed assert isinstance(answer, tuple), 'only tuple answer left.' assert answer[0] in ['r', ''], 'only valid tuple answers.' if answer[0] == 'r': # we want to throw away the original link text replaceit = link_text == page_title elif include == "redirect": replaceit = True else: replaceit = False new_page_title = answer[1] repPl = pywikibot.Page( pywikibot.Link(new_page_title, disambPage.site)) if (new_page_title[0].isupper() or link_text[0].isupper()): new_page_title = repPl.title() else: new_page_title = repPl.title() new_page_title = first_lower(new_page_title) if new_page_title not in new_targets: new_targets.append(new_page_title) if replaceit and trailing_chars: newlink = "[[%s%s]]%s" % (new_page_title, section, trailing_chars) elif replaceit or (new_page_title == link_text and not section): newlink = "[[%s]]" % new_page_title # check if we can create a link with trailing characters # instead of a pipelink elif ((len(new_page_title) <= len(link_text)) and (firstcap(link_text[:len(new_page_title)]) == firstcap(new_page_title)) and (re.sub(self.trailR, '', link_text[len(new_page_title):]) == '') and (not section)): newlink = "[[%s]]%s" \ % (link_text[:len(new_page_title)], link_text[len(new_page_title):]) else: newlink = "[[%s%s|%s]]" \ % (new_page_title, section, link_text) text = text[:m.start()] + newlink + text[m.end():] continue # Todo: This line is unreachable (T155337) pywikibot.output(text[max(0, m.start() - 30):m.end() + 30]) if text == original_text: pywikibot.output(u'\nNo changes have been made:\n') else: pywikibot.output(u'\nThe following changes have been made:\n') pywikibot.showDiff(original_text, text) pywikibot.output(u'') # save the page self.setSummaryMessage(disambPage, new_targets, unlink_counter, dn) try: refPage.put(text, summary=self.comment, asynchronous=True) except pywikibot.LockedPage: pywikibot.output(u'Page not saved: page is locked') except pywikibot.PageNotSaved as error: pywikibot.output(u'Page not saved: %s' % error.args) return 'done'
def treat(self, refPage, disambPage): """ Treat a page. Parameters: disambPage - The disambiguation page or redirect we don't want anything to link to refPage - A page linking to disambPage Returns False if the user pressed q to completely quit the program. Otherwise, returns True. """ # TODO: break this function up into subroutines! include = False unlink_counter = 0 new_targets = [] try: text = refPage.get() ignoreReason = self.checkContents(text) if ignoreReason: pywikibot.output('\n\nSkipping %s because it contains %s.\n\n' % (refPage.title(), ignoreReason)) else: include = True except pywikibot.IsRedirectPage: pywikibot.output(u'%s is a redirect to %s' % (refPage.title(), disambPage.title())) if disambPage.isRedirectPage(): target = self.alternatives[0] if pywikibot.input_yn(u'Do you want to make redirect %s point ' 'to %s?' % (refPage.title(), target), default=False, automatic_quit=False): redir_text = '#%s [[%s]]' \ % (self.mysite.redirect(), target) try: refPage.put_async(redir_text, summary=self.comment) except pywikibot.PageNotSaved as error: pywikibot.output(u'Page not saved: %s' % error.args) else: choice = pywikibot.input_choice( u'Do you want to work on pages linking to %s?' % refPage.title(), [('yes', 'y'), ('no', 'n'), ('change redirect', 'c')], 'n', automatic_quit=False) if choice == 'y': gen = ReferringPageGeneratorWithIgnore(refPage, self.primary) preloadingGen = pagegenerators.PreloadingGenerator(gen) for refPage2 in preloadingGen: # run until the user selected 'quit' if not self.treat(refPage2, refPage): break elif choice == 'c': text = refPage.get(get_redirect=True) include = "redirect" except pywikibot.NoPage: pywikibot.output( u'Page [[%s]] does not seem to exist?! Skipping.' % refPage.title()) include = False if include in (True, "redirect"): # make a backup of the original text so we can show the changes later original_text = text n = 0 curpos = 0 dn = False edited = False # This loop will run until we have finished the current page while True: m = self.linkR.search(text, pos=curpos) if not m: if n == 0: pywikibot.output(u"No changes necessary in %s" % refPage.title()) return True else: # stop loop and save page break # Make sure that next time around we will not find this same hit. curpos = m.start() + 1 try: foundlink = pywikibot.Link(m.group('title'), disambPage.site) foundlink.parse() except pywikibot.Error: continue # ignore interwiki links if foundlink.site != disambPage.site: continue # Check whether the link found is to disambPage. try: if foundlink.canonical_title() != disambPage.title(): continue except pywikibot.Error: # must be a broken link pywikibot.log(u"Invalid link [[%s]] in page [[%s]]" % (m.group('title'), refPage.title())) continue n += 1 # how many bytes should be displayed around the current link context = 60 # check if there's a dn-template here already if (self.dnSkip and self.dn_template_str and self.dn_template_str[:-2] in text[m.end():m.end() + len(self.dn_template_str) + 8]): continue # This loop will run while the user doesn't choose an option # that will actually change the page while True: self.current_page = refPage if not self.always: # at the beginning of the link, start red color. # at the end of the link, reset the color to default pywikibot.output( text[max(0, m.start() - context):m.start()] + '\03{lightred}' + text[m.start():m.end()] + '\03{default}' + text[m.end():m.end() + context]) options = ['#', 'r#', '[s]kip link', '[e]dit page', '[n]ext page', '[u]nlink', '[q]uit'] if self.dn_template_str: options.append(u'[t]ag template %s' % self.dn_template_str) options.append('[m]ore context') if not edited: options.append('show [d]isambiguation page') options += ['[l]ist', '[a]dd new'] if edited: options += ['save in this form [x]'] options = concat_options('Option', 72, options) choice = pywikibot.input(options) else: choice = self.always if choice in ['a', 'A']: newAlternative = pywikibot.input(u'New alternative:') self.alternatives.append(newAlternative) self.listAlternatives() elif choice in ['e', 'E']: editor = editarticle.TextEditor() newText = editor.edit(text, jumpIndex=m.start(), highlight=disambPage.title()) # if user didn't press Cancel if newText and newText != text: text = newText break elif choice in ['d', 'D']: editor = editarticle.TextEditor() if disambPage.isRedirectPage(): disambredir = disambPage.getRedirectTarget() editor.edit( disambredir.get(), jumpIndex=m.start(), highlight=disambredir.title()) else: editor.edit( disambPage.get(), jumpIndex=m.start(), highlight=disambPage.title()) elif choice in ['l', 'L']: self.listAlternatives() elif choice in ['m', 'M']: # show more text around the link we're working on context *= 2 else: break if choice in ['e', 'E']: # user has edited the page and then pressed 'OK' edited = True curpos = 0 continue elif choice in ['n', 'N']: # skip this page if self.primary: # If run with the -primary argument, skip this # occurrence next time. self.primaryIgnoreManager.ignore(refPage) return True elif choice in ['q', 'Q']: # quit the program self.quit() elif choice in ['s', 'S']: # Next link on this page n -= 1 continue elif choice in ['x', 'X'] and edited: # Save the page as is break # The link looks like this: # [[page_title|link_text]]trailing_chars page_title = m.group('title') link_text = m.group('label') if not link_text: # or like this: [[page_title]]trailing_chars link_text = page_title if m.group('section') is None: section = '' else: section = m.group('section') trailing_chars = m.group('linktrail') if trailing_chars: link_text += trailing_chars # '?', '/' for old choice if choice in ['t', 'T', '?', '/'] and self.dn_template_str: # small chunk of text to search search_text = text[m.end():m.end() + context] # figure out where the link (and sentance) ends, put note # there end_of_word_match = re.search(r'\s', search_text) if end_of_word_match: position_split = end_of_word_match.start(0) else: position_split = 0 # insert dab needed template text = (text[:m.end() + position_split] + self.dn_template_str + text[m.end() + position_split:]) dn = True continue elif choice in ['u', 'U']: # unlink - we remove the section if there's any text = text[:m.start()] + link_text + text[m.end():] unlink_counter += 1 continue else: if len(choice) > 0 and choice[0] == 'r': # we want to throw away the original link text replaceit = link_text == page_title choice = choice[1:] elif include == "redirect": replaceit = True else: replaceit = False try: choice = int(choice) except ValueError: pywikibot.output(u"Unknown option") # step back to ask the user again what to do with the # current link curpos -= 1 continue if choice >= len(self.alternatives) or choice < 0: pywikibot.output( u"Choice out of range. Please select a number " u"between 0 and %i." % (len(self.alternatives) - 1)) # show list of possible choices self.listAlternatives() # step back to ask the user again what to do with the # current link curpos -= 1 continue new_page_title = self.alternatives[choice] repPl = pywikibot.Page(pywikibot.Link(new_page_title, disambPage.site)) if (new_page_title[0].isupper() or link_text[0].isupper()): new_page_title = repPl.title() else: new_page_title = repPl.title() new_page_title = first_lower(new_page_title) if new_page_title not in new_targets: new_targets.append(new_page_title) if replaceit and trailing_chars: newlink = "[[%s%s]]%s" % (new_page_title, section, trailing_chars) elif replaceit or (new_page_title == link_text and not section): newlink = "[[%s]]" % new_page_title # check if we can create a link with trailing characters # instead of a pipelink elif ( (len(new_page_title) <= len(link_text)) and (firstcap(link_text[:len(new_page_title)]) == firstcap(new_page_title)) and (re.sub(self.trailR, '', link_text[len(new_page_title):]) == '') and (not section) ): newlink = "[[%s]]%s" \ % (link_text[:len(new_page_title)], link_text[len(new_page_title):]) else: newlink = "[[%s%s|%s]]" \ % (new_page_title, section, link_text) text = text[:m.start()] + newlink + text[m.end():] continue pywikibot.output(text[max(0, m.start() - 30):m.end() + 30]) if text == original_text: pywikibot.output(u'\nNo changes have been made:\n') else: pywikibot.output(u'\nThe following changes have been made:\n') pywikibot.showDiff(original_text, text) pywikibot.output(u'') # save the page self.setSummaryMessage(disambPage, new_targets, unlink_counter, dn) try: refPage.put_async(text, summary=self.comment) except pywikibot.LockedPage: pywikibot.output(u'Page not saved: page is locked') except pywikibot.PageNotSaved as error: pywikibot.output(u'Page not saved: %s' % error.args) return True
def treat(text, linkedPage, targetPage): """Based on the method of the same name in solve_disambiguation.py.""" # make a backup of the original text so we can show the changes later mysite = pywikibot.Site() linktrail = mysite.linktrail() linkR = re.compile( r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>%s)' % linktrail) curpos = 0 # This loop will run until we have finished the current page while True: m = linkR.search(text, pos=curpos) if not m: break # Make sure that next time around we will not find this same hit. curpos = m.start() + 1 # ignore interwiki links and links to sections of the same page if m.group('title') == '' or mysite.isInterwikiLink(m.group('title')): continue else: actualLinkPage = pywikibot.Page(mysite, m.group('title')) # Check whether the link found is to page. if actualLinkPage != linkedPage: continue # how many bytes should be displayed around the current link context = 30 # at the beginning of the link, start red color. # at the end of the link, reset the color to default pywikibot.output(text[max(0, m.start() - context): m.start()] + '\03{lightred}' + text[m.start(): m.end()] + '\03{default}' + text[m.end(): m.end() + context]) choice = pywikibot.input_choice( 'What should be done with the link?', (('Do not change', 'n'), ('Change link to \03{lightpurple}%s\03{default}' % targetPage.title(), 'y'), ('Change and replace text', 'r'), ('Unlink', 'u')), default='n', automatic_quit=False) if choice == 'n': continue # The link looks like this: # [[page_title|link_text]]trailing_chars page_title = m.group('title') link_text = m.group('label') if not link_text: # or like this: [[page_title]]trailing_chars link_text = page_title if m.group('section') is None: section = '' else: section = m.group('section') trailing_chars = m.group('linktrail') if trailing_chars: link_text += trailing_chars if choice == 'u': # unlink - we remove the section if there's any text = text[:m.start()] + link_text + text[m.end():] continue if link_text[0].isupper(): new_page_title = targetPage.title() else: new_page_title = first_lower(targetPage.title()) if choice == 'r' and trailing_chars: newlink = "[[%s%s]]%s" % (new_page_title, section, trailing_chars) elif choice == 'r' or (new_page_title == link_text and not section): newlink = "[[%s]]" % new_page_title # check if we can create a link with trailing characters instead of a # pipelink elif len(new_page_title) <= len(link_text) and \ firstcap(link_text[:len(new_page_title)]) == \ firstcap(new_page_title) and \ re.sub(re.compile(linktrail), '', link_text[len(new_page_title):]) == '' and not section: newlink = "[[%s]]%s" % (link_text[:len(new_page_title)], link_text[len(new_page_title):]) else: newlink = "[[%s%s|%s]]" % (new_page_title, section, link_text) text = text[:m.start()] + newlink + text[m.end():] continue return text
def treat(self, refPage, disambPage): """ Treat a page. Parameters: disambPage - The disambiguation page or redirect we don't want anything to link to refPage - A page linking to disambPage Returns False if the user pressed q to completely quit the program. Otherwise, returns True. """ # TODO: break this function up into subroutines! include = False unlink_counter = 0 new_targets = [] try: text = refPage.get() ignoreReason = self.checkContents(text) if ignoreReason: pywikibot.output( '\n\nSkipping %s because it contains %s.\n\n' % (refPage.title(), ignoreReason)) else: include = True except pywikibot.IsRedirectPage: pywikibot.output(u'%s is a redirect to %s' % (refPage.title(), disambPage.title())) if disambPage.isRedirectPage(): target = self.alternatives[0] if pywikibot.input_yn(u'Do you want to make redirect %s point ' 'to %s?' % (refPage.title(), target), default=False, automatic_quit=False): redir_text = '#%s [[%s]]' \ % (self.mysite.redirect(), target) try: refPage.put_async(redir_text, summary=self.comment) except pywikibot.PageNotSaved as error: pywikibot.output(u'Page not saved: %s' % error.args) else: choice = pywikibot.input_choice( u'Do you want to work on pages linking to %s?' % refPage.title(), [('yes', 'y'), ('no', 'n'), ('change redirect', 'c')], 'n', automatic_quit=False) if choice == 'y': gen = ReferringPageGeneratorWithIgnore( refPage, self.primary) preloadingGen = pagegenerators.PreloadingGenerator(gen) for refPage2 in preloadingGen: # run until the user selected 'quit' if not self.treat(refPage2, refPage): break elif choice == 'c': text = refPage.get(get_redirect=True) include = "redirect" except pywikibot.NoPage: pywikibot.output( u'Page [[%s]] does not seem to exist?! Skipping.' % refPage.title()) include = False if include in (True, "redirect"): # make a backup of the original text so we can show the changes later original_text = text n = 0 curpos = 0 dn = False edited = False # This loop will run until we have finished the current page while True: m = self.linkR.search(text, pos=curpos) if not m: if n == 0: pywikibot.output(u"No changes necessary in %s" % refPage.title()) return True else: # stop loop and save page break # Make sure that next time around we will not find this same hit. curpos = m.start() + 1 try: foundlink = pywikibot.Link(m.group('title'), disambPage.site) foundlink.parse() except pywikibot.Error: continue # ignore interwiki links if foundlink.site != disambPage.site: continue # Check whether the link found is to disambPage. try: if foundlink.canonical_title() != disambPage.title(): continue except pywikibot.Error: # must be a broken link pywikibot.log(u"Invalid link [[%s]] in page [[%s]]" % (m.group('title'), refPage.title())) continue n += 1 # how many bytes should be displayed around the current link context = 60 # check if there's a dn-template here already if (self.dnSkip and self.dn_template_str and self.dn_template_str[:-2] in text[m.end():m.end() + len(self.dn_template_str) + 8]): continue # This loop will run while the user doesn't choose an option # that will actually change the page while True: self.current_page = refPage if not self.always: # at the beginning of the link, start red color. # at the end of the link, reset the color to default pywikibot.output(text[max(0, m.start() - context):m.start()] + '\03{lightred}' + text[m.start():m.end()] + '\03{default}' + text[m.end():m.end() + context]) options = [ '#', 'r#', '[s]kip link', '[e]dit page', '[n]ext page', '[u]nlink', '[q]uit' ] if self.dn_template_str: options.append(u'[t]ag template %s' % self.dn_template_str) options.append('[m]ore context') if not edited: options.append('show [d]isambiguation page') options += ['[l]ist', '[a]dd new'] if edited: options += ['save in this form [x]'] options = concat_options('Option', 72, options) choice = pywikibot.input(options) else: choice = self.always if choice in ['a', 'A']: newAlternative = pywikibot.input(u'New alternative:') self.alternatives.append(newAlternative) self.listAlternatives() elif choice in ['e', 'E']: editor = editarticle.TextEditor() newText = editor.edit(text, jumpIndex=m.start(), highlight=disambPage.title()) # if user didn't press Cancel if newText and newText != text: text = newText break elif choice in ['d', 'D']: editor = editarticle.TextEditor() if disambPage.isRedirectPage(): disambredir = disambPage.getRedirectTarget() editor.edit(disambredir.get(), jumpIndex=m.start(), highlight=disambredir.title()) else: editor.edit(disambPage.get(), jumpIndex=m.start(), highlight=disambPage.title()) elif choice in ['l', 'L']: self.listAlternatives() elif choice in ['m', 'M']: # show more text around the link we're working on context *= 2 else: break if choice in ['e', 'E']: # user has edited the page and then pressed 'OK' edited = True curpos = 0 continue elif choice in ['n', 'N']: # skip this page if self.primary: # If run with the -primary argument, skip this # occurrence next time. self.primaryIgnoreManager.ignore(refPage) return True elif choice in ['q', 'Q']: # quit the program self.quit() elif choice in ['s', 'S']: # Next link on this page n -= 1 continue elif choice in ['x', 'X'] and edited: # Save the page as is break # The link looks like this: # [[page_title|link_text]]trailing_chars page_title = m.group('title') link_text = m.group('label') if not link_text: # or like this: [[page_title]]trailing_chars link_text = page_title if m.group('section') is None: section = '' else: section = m.group('section') trailing_chars = m.group('linktrail') if trailing_chars: link_text += trailing_chars # '?', '/' for old choice if choice in ['t', 'T', '?', '/'] and self.dn_template_str: # small chunk of text to search search_text = text[m.end():m.end() + context] # figure out where the link (and sentance) ends, put note # there end_of_word_match = re.search(r'\s', search_text) if end_of_word_match: position_split = end_of_word_match.start(0) else: position_split = 0 # insert dab needed template text = (text[:m.end() + position_split] + self.dn_template_str + text[m.end() + position_split:]) dn = True continue elif choice in ['u', 'U']: # unlink - we remove the section if there's any text = text[:m.start()] + link_text + text[m.end():] unlink_counter += 1 continue else: if len(choice) > 0 and choice[0] == 'r': # we want to throw away the original link text replaceit = link_text == page_title choice = choice[1:] elif include == "redirect": replaceit = True else: replaceit = False try: choice = int(choice) except ValueError: pywikibot.output(u"Unknown option") # step back to ask the user again what to do with the # current link curpos -= 1 continue if choice >= len(self.alternatives) or choice < 0: pywikibot.output( u"Choice out of range. Please select a number " u"between 0 and %i." % (len(self.alternatives) - 1)) # show list of possible choices self.listAlternatives() # step back to ask the user again what to do with the # current link curpos -= 1 continue new_page_title = self.alternatives[choice] repPl = pywikibot.Page( pywikibot.Link(new_page_title, disambPage.site)) if (new_page_title[0].isupper() or link_text[0].isupper()): new_page_title = repPl.title() else: new_page_title = repPl.title() new_page_title = first_lower(new_page_title) if new_page_title not in new_targets: new_targets.append(new_page_title) if replaceit and trailing_chars: newlink = "[[%s%s]]%s" % (new_page_title, section, trailing_chars) elif replaceit or (new_page_title == link_text and not section): newlink = "[[%s]]" % new_page_title # check if we can create a link with trailing characters # instead of a pipelink elif ((len(new_page_title) <= len(link_text)) and (firstcap(link_text[:len(new_page_title)]) == firstcap(new_page_title)) and (re.sub(self.trailR, '', link_text[len(new_page_title):]) == '') and (not section)): newlink = "[[%s]]%s" \ % (link_text[:len(new_page_title)], link_text[len(new_page_title):]) else: newlink = "[[%s%s|%s]]" \ % (new_page_title, section, link_text) text = text[:m.start()] + newlink + text[m.end():] continue pywikibot.output(text[max(0, m.start() - 30):m.end() + 30]) if text == original_text: pywikibot.output(u'\nNo changes have been made:\n') else: pywikibot.output(u'\nThe following changes have been made:\n') pywikibot.showDiff(original_text, text) pywikibot.output(u'') # save the page self.setSummaryMessage(disambPage, new_targets, unlink_counter, dn) try: refPage.put_async(text, summary=self.comment) except pywikibot.LockedPage: pywikibot.output(u'Page not saved: page is locked') except pywikibot.PageNotSaved as error: pywikibot.output(u'Page not saved: %s' % error.args) return True
def treat(text, linkedPage, targetPage): """Based on the method of the same name in solve_disambiguation.py.""" mysite = pywikibot.Site() linktrail = mysite.linktrail() # make a backup of the original text so we can show the changes later linkR = re.compile( r"\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?" r"(\|(?P<label>[^\]]*))?\]\](?P<linktrail>" + linktrail + ")" ) curpos = 0 # This loop will run until we have finished the current page while True: m = linkR.search(text, pos=curpos) if not m: break # Make sure that next time around we will not find this same hit. curpos = m.start() + 1 # ignore interwiki links and links to sections of the same page if m.group("title").strip() == "" or mysite.isInterwikiLink(m.group("title")): continue else: actualLinkPage = pywikibot.Page(targetPage.site, m.group("title")) # Check whether the link found is to page. if actualLinkPage != linkedPage: continue choice = "y" # The link looks like this: # [[page_title|link_text]]trailing_chars page_title = m.group("title") link_text = m.group("label") if not link_text: # or like this: [[page_title]]trailing_chars link_text = page_title if m.group("section") is None: section = "" else: section = m.group("section") trailing_chars = m.group("linktrail") if trailing_chars: link_text += trailing_chars if choice in "uU": # unlink - we remove the section if there's any text = text[: m.start()] + link_text + text[m.end() :] continue replaceit = choice in "rR" # remove preleading ":" if link_text[0] == ":": link_text = link_text[1:] if link_text[0].isupper(): new_page_title = targetPage.title() else: new_page_title = first_lower(targetPage.title()) # remove preleading ":" if new_page_title[0] == ":": new_page_title = new_page_title[1:] if replaceit and trailing_chars: newlink = "[[%s%s]]%s" % (new_page_title, section, trailing_chars) elif replaceit or (new_page_title == link_text and not section): newlink = "[[%s]]" % new_page_title # check if we can create a link with trailing characters instead of a # pipelink elif ( len(new_page_title) <= len(link_text) and firstcap(link_text[: len(new_page_title)]) == firstcap(new_page_title) and re.sub(re.compile(linktrail), "", link_text[len(new_page_title) :]) == "" and not section ): newlink = "[[%s]]%s" % (link_text[: len(new_page_title)], link_text[len(new_page_title) :]) else: newlink = "[[%s%s|%s]]" % (new_page_title, section, link_text) text = text[: m.start()] + newlink + text[m.end() :] continue return text
def treat(text, linkedPage, targetPage): """Based on the method of the same name in solve_disambiguation.py.""" mysite = pywikibot.Site() linktrail = mysite.linktrail() # make a backup of the original text so we can show the changes later linkR = re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?' r'(\|(?P<label>[^\]]*))?\]\](?P<linktrail>' + linktrail + ')') curpos = 0 # This loop will run until we have finished the current page while True: m = linkR.search(text, pos=curpos) if not m: break # Make sure that next time around we will not find this same hit. curpos = m.start() + 1 # ignore interwiki links and links to sections of the same page if m.group('title').strip() == '' or \ mysite.isInterwikiLink(m.group('title')): continue else: actualLinkPage = pywikibot.Page(targetPage.site, m.group('title')) # Check whether the link found is to page. if actualLinkPage != linkedPage: continue choice = 'y' # The link looks like this: # [[page_title|link_text]]trailing_chars page_title = m.group('title') link_text = m.group('label') if not link_text: # or like this: [[page_title]]trailing_chars link_text = page_title if m.group('section') is None: section = '' else: section = m.group('section') trailing_chars = m.group('linktrail') if trailing_chars: link_text += trailing_chars if choice in "uU": # unlink - we remove the section if there's any text = text[:m.start()] + link_text + text[m.end():] continue replaceit = choice in "rR" # remove preleading ":" if link_text[0] == ':': link_text = link_text[1:] if link_text[0].isupper(): new_page_title = targetPage.title() else: new_page_title = first_lower(targetPage.title()) # remove preleading ":" if new_page_title[0] == ':': new_page_title = new_page_title[1:] if replaceit and trailing_chars: newlink = "[[%s%s]]%s" % (new_page_title, section, trailing_chars) elif replaceit or (new_page_title == link_text and not section): newlink = "[[%s]]" % new_page_title # check if we can create a link with trailing characters instead of a # pipelink elif len(new_page_title) <= len(link_text) and \ firstcap(link_text[:len(new_page_title)]) == \ firstcap(new_page_title) and \ re.sub(re.compile(linktrail), '', link_text[len(new_page_title):]) == '' and not section: newlink = "[[%s]]%s" % (link_text[:len(new_page_title)], link_text[len(new_page_title):]) else: newlink = "[[%s%s|%s]]" % (new_page_title, section, link_text) text = text[:m.start()] + newlink + text[m.end():] continue return text
def treat(text, linkedPage, targetPage): """Based on the method of the same name in solve_disambiguation.py.""" # make a backup of the original text so we can show the changes later mysite = pywikibot.Site() linktrail = mysite.linktrail() linkR = re.compile( r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>%s)' % linktrail) curpos = 0 # This loop will run until we have finished the current page while True: m = linkR.search(text, pos=curpos) if not m: break # Make sure that next time around we will not find this same hit. curpos = m.start() + 1 # ignore interwiki links and links to sections of the same page if m.group('title') == '' or mysite.isInterwikiLink(m.group('title')): continue else: actualLinkPage = pywikibot.Page(mysite, m.group('title')) # Check whether the link found is to page. if actualLinkPage != linkedPage: continue # how many bytes should be displayed around the current link context = 30 # at the beginning of the link, start red color. # at the end of the link, reset the color to default pywikibot.output(text[max(0, m.start() - context):m.start()] + '\03{lightred}' + text[m.start():m.end()] + '\03{default}' + text[m.end():m.end() + context]) choice = pywikibot.input_choice( 'What should be done with the link?', (('Do not change', 'n'), ('Change link to \03{lightpurple}%s\03{default}' % targetPage.title(), 'y'), ('Change and replace text', 'r'), ('Unlink', 'u')), default='n', automatic_quit=False) if choice == 'n': continue # The link looks like this: # [[page_title|link_text]]trailing_chars page_title = m.group('title') link_text = m.group('label') if not link_text: # or like this: [[page_title]]trailing_chars link_text = page_title if m.group('section') is None: section = '' else: section = m.group('section') trailing_chars = m.group('linktrail') if trailing_chars: link_text += trailing_chars if choice == 'u': # unlink - we remove the section if there's any text = text[:m.start()] + link_text + text[m.end():] continue if link_text[0].isupper(): new_page_title = targetPage.title() else: new_page_title = first_lower(targetPage.title()) if choice == 'r' and trailing_chars: newlink = "[[%s%s]]%s" % (new_page_title, section, trailing_chars) elif choice == 'r' or (new_page_title == link_text and not section): newlink = "[[%s]]" % new_page_title # check if we can create a link with trailing characters instead of a # pipelink elif len(new_page_title) <= len(link_text) and \ firstcap(link_text[:len(new_page_title)]) == \ firstcap(new_page_title) and \ re.sub(re.compile(linktrail), '', link_text[len(new_page_title):]) == '' and not section: newlink = "[[%s]]%s" % (link_text[:len(new_page_title)], link_text[len(new_page_title):]) else: newlink = "[[%s%s|%s]]" % (new_page_title, section, link_text) text = text[:m.start()] + newlink + text[m.end():] continue return text