Beispiel #1
0
 def copyAndKeep(self, catname, cfdTemplates):
     """
     Returns true if copying was successful, false if target page already
     existed.
     """
     catname = self.site().category_namespace() + ':' + catname
     targetCat = wikipedia.Page(self.site(), catname)
     if targetCat.exists():
         wikipedia.output('Target page %s already exists!' % targetCat.title())
         return False
     else:
         wikipedia.output('Moving text from %s to %s.' % (self.title(), targetCat.title()))
         authors = ', '.join(self.contributingUsers())
         creationSummary = wikipedia.translate(wikipedia.getSite(), msg_created_for_renaming) % (self.title(), authors)
         newtext = self.get()
     for regexName in cfdTemplates:
         matchcfd = re.compile(r"{{%s.*?}}" % regexName, re.IGNORECASE)
         newtext = matchcfd.sub('',newtext)
         matchcomment = re.compile(r"<!--BEGIN CFD TEMPLATE-->.*<!--END CFD TEMPLATE-->", re.IGNORECASE | re.MULTILINE | re.DOTALL)
         newtext = matchcomment.sub('',newtext)
         pos = 0
         while (newtext[pos:pos+1] == "\n"):
             pos = pos + 1
         newtext = newtext[pos:]
     targetCat.put(newtext, creationSummary)
     return True
 def listTemplates(self, templates, namespaces):
     mysite = pywikibot.getSite()
     count = 0
     # The names of the templates are the keys, and lists of pages
     # transcluding templates are the values.
     templateDict = {}
     finalText = [u'', u'List of pages transcluding templates:']
     for template in templates:
         finalText.append(u'* %s' % template)
     finalText.append(u'-' * 36)
     for template in templates:
         transcludingArray = []
         gen = pagegenerators.ReferringPageGenerator(
             pywikibot.Page(mysite,
                            mysite.template_namespace() + ':' + template),
             onlyTemplateInclusion=True)
         if namespaces:
             gen = pagegenerators.NamespaceFilterPageGenerator(gen,
                                                               namespaces)
         for page in gen:
             finalText.append(u'%s' % page.title())
             count += 1
             transcludingArray.append(page)
         templateDict[template] = transcludingArray;
     finalText.append(u'Total page count: %d' % count)
     for line in finalText:
         pywikibot.output(line, toStdout=True)
     pywikibot.output(u'Report generated on %s'
                      % datetime.datetime.utcnow().isoformat(),
                      toStdout=True)
     return templateDict
Beispiel #3
0
 def process_children(obj,current_user):
     if pywikibot.debug:
         pywikibot.output(u'parsing node: %s' % obj)
     for c in obj.children:
         temp = process_node(c,current_user)
         if temp and not current_user:
             current_user = temp
Beispiel #4
0
def main():  
    # If debug is True, don't edit pages, but only show what would have been
    # changed.
    debug = False
    # The AfD log that should be treated.
    date = None
    # Whether to confirm edits.
    always = False

    # Parse command line arguments
    for arg in wikipedia.handleArgs():
        if arg.startswith('-debug'):
            wikipedia.output(u'Debug mode.')
            debug = True
        elif arg.startswith('-date'):        
            if len(arg) == 5:
                date = wikipedia.input(u'Please enter the date of the log that should be treated (yyyymmdd):')
            else:
                date = arg[6:]
        elif arg.startswith('-always'):
            always = True
  
    if date:
        page_title = u'Wikipedia:Te verwijderen pagina\'s/Toegevoegd %s' % date
    else:
        page_title = u'Wikipedia:Te verwijderen pagina\'s/Toegevoegd %s' % time.strftime("%Y%m%d", time.localtime(time.time()-60*60*24))

    wikipedia.output(u'Checking: %s.' % page_title)
    page = wikipedia.Page(wikipedia.getSite(code = 'nl', fam = 'wikipedia'), page_title)
    bot = AfDBot(page, always, debug)
    bot.run()
    def countTemplates(self, templates, namespaces):
        mysite = pywikibot.getSite()
        total = 0
        # The names of the templates are the keys, and the numbers of
        # transclusions are the values.
        templateDict = {}
        pg = pagegenerators
        getall = templates
        mytpl  = mysite.template_namespace()+':'
        for template in getall:
            gen = pg.ReferringPageGenerator(pywikibot.Page(mysite,
                                                           mytpl + template),
                onlyTemplateInclusion = True)
            if namespaces:
                gen = pg.NamespaceFilterPageGenerator(gen, namespaces)
            count = 0
            for page in gen:
                count += 1
            templateDict[template] = count

            total += count
        pywikibot.output(u'\nNumber of transclusions per template',
                         toStdout=True)
        pywikibot.output(u'-' * 36, toStdout=True)
        for key in templateDict.keys():
            pywikibot.output(u'%-10s: %5d' % (key, templateDict[key]),
                             toStdout=True)
        pywikibot.output(u'TOTAL     : %5d' % total, toStdout=True)
        pywikibot.output(u'Report generated on %s'
                         % datetime.datetime.utcnow().isoformat(),
                         toStdout=True)
        return templateDict
Beispiel #6
0
    def processImage(self, fields):
        '''
        Work on a single image
        '''
        if self.autonomous:
            # Check if the image already exists. Do nothing if the name is already taken.
            CommonsPage=pywikibot.Page(pywikibot.getSite('commons', 'commons'), u'File:' + fields.get('filename'))
            if CommonsPage.exists():
                return False
        else:
            while True:
                # Do the Tkdialog to accept/reject and change te name
                fields=Tkdialog(fields).getnewmetadata()

                if fields.get('skip'):
                    pywikibot.output(u'Skipping %s : User pressed skip.' % fields.get('imagepage').title())
                    return False

                # Check if the image already exists
                CommonsPage=pywikibot.Page(pywikibot.getSite('commons', 'commons'), u'File:' + fields.get('filename'))
                if not CommonsPage.exists():
                    break
                else:
                    pywikibot.output('Image already exists, pick another name or skip this image')
                    # We dont overwrite images, pick another name, go to the start of the loop

        # Put the fields in the queue to be uploaded
        self.uploadQueue.put(fields)
Beispiel #7
0
    def in_list(self, pagelist, title, laxyload=True):
        if pywikibot.verbose:
            pywikibot.output(u'Checking whitelist for: %s' % title)

        # quick check for exact match
        if title in pagelist:
            return title

        # quick check for wildcard
        if '' in pagelist:
            if pywikibot.verbose:
                pywikibot.output(u"wildcarded")
            return '.*'

        for item in pagelist:
            if pywikibot.verbose:
                pywikibot.output(u"checking against whitelist item = %s" % item)

            if isinstance(item, PatrolRule):
                if pywikibot.verbose:
                    pywikibot.output(u"invoking programmed rule")
                if item.match(title):
                    return item

            elif title_match(item, title):
                return item

        if pywikibot.verbose:
            pywikibot.output(u'not found')
    def showImageList(self, imagelist):
        for i in range(len(imagelist)):
            image = imagelist[i]
            #sourceSite = sourceImagePage.site()
            print "-" * 60
            pywikibot.output(u"%s. Found image: %s"
                             % (i, image.title(asLink=True)))
            try:
                # Show the image description page's contents
                pywikibot.output(image.get(throttle=False))
                # look if page already exists with this name.
                # TODO: consider removing this: a different image of the same
                # name may exist on the target wiki, and the bot user may want
                # to upload anyway, using another name.
                try:
                    # Maybe the image is on the target site already
                    targetTitle = '%s:%s' % (self.targetSite.image_namespace(),
                                             image.title().split(':', 1)[1])
                    targetImage = pywikibot.Page(self.targetSite, targetTitle)
                    targetImage.get(throttle=False)
                    pywikibot.output(u"Image with this name is already on %s."
                                     % self.targetSite)
                    print "-" * 60
                    pywikibot.output(targetImage.get(throttle=False))
                    sys.exit()
                except pywikibot.NoPage:
                    # That's the normal case
                    pass
                except pywikibot.IsRedirectPage:
                    pywikibot.output(
                        u"Description page on target wiki is redirect?!")

            except pywikibot.NoPage:
                break
        print "="*60
Beispiel #9
0
    def processImage(self, page):
        '''
        Work on a single image
        '''
        if page.exists() and (page.namespace() == 6) and \
           (not page.isRedirectPage()):
            imagepage = pywikibot.ImagePage(page.site(), page.title())

            #First do autoskip.
            if self.doiskip(imagepage):
                pywikibot.output(
                    u'Skipping %s : Got a template on the skip list.'
                    % page.title())
                return False

            text = imagepage.get()
            foundMatch = False
            for (regex, replacement) in licenseTemplates[page.site().language()]:
                match = re.search(regex, text, flags=re.IGNORECASE)
                if match:
                    foundMatch = True
            if not foundMatch:
                pywikibot.output(
                    u'Skipping %s : No suitable license template was found.'
                    % page.title())
                return False
            self.prefetchQueue.put(self.getNewFields(imagepage))
Beispiel #10
0
    def __iter__(self):
        try:
            # this array will contain up to pageNumber pages and will be flushed
            # after these pages have been preloaded and yielded.
            somePages = []
            for page in self.wrapped_gen:
##                if self.finished.isSet():
##                    return
                somePages.append(page)
                # We don't want to load too many pages at once using XML export.
                # We only get a maximum number at a time.
                if len(somePages) >= self.pageNumber:
                    for loaded_page in self.preload(somePages):
                        yield loaded_page
                    somePages = []
            if somePages:
                # wrapped generator is exhausted but some pages still unloaded
                # preload remaining pages
                for loaded_page in self.preload(somePages):
                    yield loaded_page
        except GeneratorExit:
            pass
        except Exception, e:
            traceback.print_exc()
            pywikibot.output(unicode(e))
Beispiel #11
0
def MySQLPageGenerator(query, site = None):
    import MySQLdb as mysqldb
    if site is None:
        site = pywikibot.getSite()
    conn = mysqldb.connect(config.db_hostname, db = site.dbName(),
                           user = config.db_username,
                           passwd = config.db_password)
    cursor = conn.cursor()
    pywikibot.output(u'Executing query:\n%s' % query)
    query = query.encode(site.encoding())
    cursor.execute(query)
    while True:
        try:
            namespaceNumber, pageName = cursor.fetchone()
            print namespaceNumber, pageName
        except TypeError:
            # Limit reached or no more results
            break
        #print pageName
        if pageName:
            namespace = site.namespace(namespaceNumber)
            pageName = unicode(pageName, site.encoding())
            if namespace:
                pageTitle = '%s:%s' % (namespace, pageName)
            else:
                pageTitle = pageName
            page = pywikibot.Page(site, pageTitle)
            yield page
Beispiel #12
0
 def save(self, text, page, comment, minorEdit=False, botflag=False):
   # only save if something was changed
   if text != page.get():
     # Show the title of the page we're working on.
     # Highlight the title in purple.
     pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
                      % page.title())
     # show what was changed
     pywikibot.showDiff(page.get(), text)
     
     pywikibot.output(u'Comment: %s' %comment)
     choice = pywikibot.inputChoice(
       u'Do you want to accept these changes?',
       ['Yes', 'No'], ['y', 'N'], 'N')
     if choice == 'y':
       try:
           # Save the page
           page.put(text, comment=comment,
                    minorEdit=minorEdit, botflag=botflag)
       except pywikibot.LockedPage:
           pywikibot.output(u"Page %s is locked; skipping."
                            % page.title(asLink=True))
       except pywikibot.EditConflict:
           pywikibot.output(
               u'Skipping %s because of edit conflict'
               % (page.title()))
       except pywikibot.SpamfilterError, error:
           pywikibot.output(
               u'Cannot change %s because of spam blacklist entry %s'
               % (page.title(), error.url))
       else:
           return True
Beispiel #13
0
def PageTitleFilterPageGenerator(generator, ignoreList):
    """
    Wraps around another generator. Yields only those pages are not
    listed in the ignore list.

    The ignoreList is a dictionary. Family names are mapped to
    dictionaries in which language codes are mapped to lists of
    page titles.
    """

    def isIgnored(page):
        if not (page.site().family.name in ignoreList and page.site().lang in ignoreList[page.site().family.name]):
            return False

        for ig in ignoreList[page.site().family.name][page.site().lang]:
            if re.match(ig, page.title()):
                return True
        return False

    for page in generator:
        if isIgnored(page):
            if pywikibot.verbose:
                pywikibot.output('Ignoring page %s' % page.title())
        else:
            yield page
Beispiel #14
0
def load_word_function(raw):
    """ This is a function used to load the badword and the whitelist."""
    page = re.compile(r"(?:\"|\')(.*?)(?:\"|\')(?:, |\))", re.UNICODE)
    list_loaded = page.findall(raw)
    if len(list_loaded) == 0:
        pywikibot.output(u'There was no input on the real-time page.')
    return list_loaded
def _catlib_Category__parseCategory(self, recurse=False, purge=False, startFrom=None):
    if not startFrom:
        startFrom = 0
    ns = self.site().category_namespaces()
    catsdone = []
    catstodo = [(self, recurse)]

    # Get subcats and articles
    for (cat, recurselevel) in catstodo:
        if type(recurselevel) == type(1):
            newrecurselevel = recurselevel - 1
        else:
            newrecurselevel = recurselevel
        catsdone.append(cat)

        wikipedia.output("Getting [[%s]] from %s..." % (cat.title(), cat.site().dbName()))
        for page in toolserver.Generators.getCategoryMembers(cat, startFrom):
            if type(page) == catlib.Category:
                if recurselevel and page not in catsdone:
                    catstodo.append((page, newrecurselevel))
                yield catlib.SUBCATEGORY, page.title()
            else:
                yield catlib.ARTICLE, page.title()
    # Get supercats
    for supercat in toolserver.Generators.getCategories(self):
        yield catlib.SUPERCATEGORY, supercat.title()
Beispiel #16
0
    def copyTo(self, catname):
        """Returns true if copying was successful, false if target page already
        existed.

        """
        catname = self.site().category_namespace() + ":" + catname
        targetCat = pywikibot.Page(self.site(), catname)
        if targetCat.exists():
            pywikibot.output("Target page %s already exists!" % targetCat.title())
            return
        else:
            pywikibot.output("Moving text from %s to %s." % (self.title(), targetCat.title()))
            authors = ", ".join(self.contributingUsers())
            creationSummary = pywikibot.translate(pywikibot.getSite(), msg_created_for_renaming) % (
                self.title(),
                authors,
            )
            # Maybe sometimes length of summary is more than 200 characters and
            # thus will not be shown. For avoidning copyright violation bot must
            # listify authors in another place
            if len(creationSummary) > 200:
                talkpage = targetCat.toggleTalkPage()
                try:
                    talktext = talkpage.get()
                except pywikibot.NoPage:
                    talkpage.put(u"==Authors==\n%s-~~~~" % authors, u"Bot:Listifying authors")
                else:
                    talkpage.put(talktext + u"\n==Authors==\n%s-~~~~" % authors, u"Bot:Listifying authors")
            targetCat.put(self.get(), creationSummary)
            return True
Beispiel #17
0
def categoryAllElementsAPI(CatName, cmlimit=5000, categories_parsed=[], site=None):
    """ Category to load all the elements in a category using the APIs.
    Limit: 5000 elements.

    """
    pywikibot.output("Loading %s..." % CatName)

    # action=query&list=categorymembers&cmlimit=500&cmtitle=Category:License_tags
    params = {"action": "query", "list": "categorymembers", "cmlimit": cmlimit, "cmtitle": CatName}

    data = query.GetData(params, site)
    categories_parsed.append(CatName)
    try:
        members = data["query"]["categorymembers"]
    except KeyError:
        if int(cmlimit) != 500:
            pywikibot.output(u"An Error occured, trying to reload the category.")
            return categoryAllElementsAPI(CatName, cmlimit=500)
        else:
            raise pywikibot.Error(data)
    if len(members) == int(cmlimit):
        raise pywikibot.Error(u"The category selected has >= %s elements, limit reached." % cmlimit)
    allmembers = members
    results = list()
    for subcat in members:
        ns = subcat["ns"]
        title = subcat["title"]
        if ns == 14:
            if title not in categories_parsed:
                categories_parsed.append(title)
                (results_part, categories_parsed) = categoryAllElementsAPI(title, 5000, categories_parsed)
                allmembers.extend(results_part)
    for member in allmembers:
        results.append(member)
    return (results, categories_parsed)
    def revert(self, item):
        predata = {
            'action': 'query',
            'titles': item['title'],
            'prop': 'revisions',
            'rvprop': 'ids|timestamp|user|content',
            'rvlimit': '2',
            'rvstart': item['timestamp'],
        }
        data = query.GetData(predata, self.site)

        if 'error' in data:
            raise RuntimeError(data['error'])

        pages = data['query'].get('pages', ())
        if not pages: return False
        page = pages.itervalues().next()
        if len(page.get('revisions', ())) != 2: return False
        rev = page['revisions'][1]

        comment = u'Reverted to revision %s by %s on %s' % (rev['revid'],
            rev['user'], rev['timestamp'])
        if self.comment: comment += ': ' + self.comment

        page = pywikibot.Page(self.site, item['title'])
        pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
                         % page.aslink(True, True))
        old = page.get()
        new = rev['*']
        pywikibot.showDiff(old, new)
        page.put(new, comment)
        return comment
def main(args):
    '''
    Main loop. Get a generator and options. Work on all images in the generator.
    '''
    generator = None
    onlyFilter = False
    onlyUncat = False
    genFactory = pagegenerators.GeneratorFactory()

    global search_wikis
    global hint_wiki

    site = pywikibot.getSite(u'commons', u'commons')
    pywikibot.setSite(site)
    for arg in pywikibot.handleArgs():
        if arg == '-onlyfilter':
            onlyFilter = True
        elif arg == '-onlyuncat':
            onlyUncat = True
        elif arg.startswith('-hint:'):
            hint_wiki = arg [len('-hint:'):]
        elif arg.startswith('-onlyhint'):
            search_wikis = arg [len('-onlyhint:'):]
        else:
            genFactory.handleArg(arg)

    generator = genFactory.getCombinedGenerator()
    if not generator:
        generator = pagegenerators.CategorizedPageGenerator(
            catlib.Category(site, u'Category:Media needing categories'),
            recurse=True)
    initLists()
    categorizeImages(generator, onlyFilter, onlyUncat)
    pywikibot.output(u'All done')
Beispiel #20
0
def UserEditFilterGenerator(generator, username, timestamp=None, skip=False):
    """
    Generator which will yield Pages depending of user:username is an Author of
    that page (only looks at the last 100 editors).
    If timestamp is set in MediaWiki format JJJJMMDDhhmmss, older edits are
    ignored
    If skip is set, pages edited by the given user are ignored otherwise only
    pages edited by this user are given back

    """
    if timestamp:
        ts = pywikibot.Timestamp.fromtimestampformat(timestamp)
    for page in generator:
        editors = page.getLatestEditors(limit=100)
        found = False
        for ed in editors:
            uts = pywikibot.Timestamp.fromISOformat(ed['timestamp'])
            if not timestamp or uts>=ts:
                if username == ed['user']:
                    found = True
                    break
            else:
                break
        if found and not skip or not found and skip:
            yield page
        else:
            pywikibot.output(u'Skipping %s' % page.title(asLink=True))
def getPoisonedLinks(pl):
    """Returns a list of known corrupted links that should be removed if seen

    """
    result = []
    pywikibot.output(u'getting poisoned links for %s' % pl.title())
    dictName, value = date.getAutoFormat(pl.site().language(), pl.title())
    if dictName is not None:
        pywikibot.output( u'date found in %s' % dictName )
        # errors in year BC
        if dictName in date.bcFormats:
            for fmt in bcDateErrors:
                result.append( fmt % value )
        # i guess this is like friday the 13th for the years
        if value == 398 and dictName == 'yearsBC':
            appendFormatedDates(result, dictName, 399)
        if dictName == 'yearsBC':
            appendFormatedDates(result, 'decadesBC', value)
            appendFormatedDates(result, 'yearsAD', value)
        if dictName == 'yearsAD':
            appendFormatedDates(result, 'decadesAD', value)
            appendFormatedDates(result, 'yearsBC', value)
        if dictName == 'centuriesBC':
            appendFormatedDates(result, 'decadesBC', value * 100 + 1)
        if dictName == 'centuriesAD':
            appendFormatedDates(result, 'decadesAD', value * 100 + 1)
    return result
Beispiel #22
0
def getPhotos(photoset=u'', start_id='', end_id='', interval=100):
    ''' Loop over a set of Panoramio photos. '''
    i = 0
    has_more = True
    url = u'http://www.panoramio.com/map/get_panoramas.php?set=%s&from=%s&to=%s&size=original'
    while has_more:
        gotInfo = False
        maxtries = 10
        tries = 0
        while not gotInfo:
            try:
                if tries < maxtries:
                    tries += 1
                    panoramioApiPage = urllib2.urlopen(url % (photoset, i,
                                                              i + interval))
                    contents = panoramioApiPage.read().decode('utf-8')
                    gotInfo = True
                    i += interval
                else:
                    break
            except IOError:
                pywikibot.output(u'Got an IOError, let\'s try again')
            except socket.timeout:
                pywikibot.output(u'Got a timeout, let\'s try again')

        metadata = json.loads(contents)
        count = metadata.get(u'count')  # Useless?
        photos = metadata.get(u'photos')
        for photo in photos:
            yield photo
        has_more = metadata.get(u'has_more')
    return
Beispiel #23
0
    def dump(self, filename = 'category.dump.bz2'):
        '''Saves the contents of the dictionaries superclassDB and catContentDB
        to disk.

        '''
        if not os.path.isabs(filename):
            filename = pywikibot.config.datafilepath(filename)
        if self.catContentDB or self.superclassDB:
            pywikibot.output(u'Dumping to %s, please wait...'
                             % pywikibot.config.shortpath(filename))
            f = bz2.BZ2File(filename, 'w')
            databases = {
                'catContentDB': self.catContentDB,
                'superclassDB': self.superclassDB
            }
            # store dump to disk in binary format
            try:
                pickle.dump(databases, f, protocol=pickle.HIGHEST_PROTOCOL)
            except pickle.PicklingError:
                pass
            f.close()
        else:
            try:
                os.remove(filename)
            except EnvironmentError:
                pass
            else:
                pywikibot.output(u'Database is empty. %s removed'
                                 % pywikibot.config.shortpath(filename))
def getExtendedFindNearby(lat, lng):
    '''
    Get the result from http://ws.geonames.org/extendedFindNearby
    and put it in a list of dictionaries to play around with
    '''
    result = [] 
    gotInfo = False
    parameters = urllib.urlencode({'lat' : lat, 'lng' : lng})
    while(not gotInfo):
	try:
	    page = urllib.urlopen("http://ws.geonames.org/extendedFindNearby?%s" % parameters)
	    et = xml.etree.ElementTree.parse(page)
	    gotInfo=True
        except IOError:
            wikipedia.output(u'Got an IOError, let\'s try again')
	    time.sleep(30)
        except socket.timeout:
            wikipedia.output(u'Got a timeout, let\'s try again')
	    time.sleep(30)
	    
    for geoname in et.getroot().getchildren():
	geonamedict = {}
	if geoname.tag=='geoname':
	    for element in geoname.getchildren():
		geonamedict[element.tag]=element.text
	    result.append(geonamedict)
    #print result
    return result
def main():
    pywikibot.warning("this script should not be run manually/directly, but automatically by maintainer.py")
    if len(sys.argv) == 1:
        pywikibot.output("Usage: censure.py <article title>")
        sys.exit(1)
    del sys.argv[0]
    checkPage(" ".join(sys.argv).decode("utf-8"))
def _refreshOld(site, sysop=False):
    # get watchlist special page's URL
    path = site.watchlist_address()
    pywikibot.output(u'Retrieving watchlist for %s' % repr(site))
    #pywikibot.put_throttle() # It actually is a get, but a heavy one.
    watchlistHTML = site.getUrl(path, sysop=sysop)

    pywikibot.output(u'Parsing watchlist')
    watchlist = []
    for itemR in [re.compile(r'<li><input type="checkbox" name="id\[\]" value="(.+?)" />'),
                  re.compile(r'<li><input name="titles\[\]" type="checkbox" value="(.+?)" />')]:
        for m in itemR.finditer(watchlistHTML):
            pageName = m.group(1)
            watchlist.append(pageName)

    # Save the watchlist to disk
    # The file is stored in the watchlists subdir. Create if necessary.
    if sysop:
        f = open(pywikibot.config.datafilepath('watchlists',
                                               'watchlist-%s-%s-sysop.dat'
                                               % (site.family.name, site.lang)),
                 'w')
    else:
        f = open(pywikibot.config.datafilepath('watchlists',
                                               'watchlist-%s-%s.dat'
                                               % (site.family.name, site.lang)),
                 'w')
    pickle.dump(watchlist, f)
    f.close()
def get(site = None):
    if site is None:
        site = pywikibot.getSite()
    if site in cache:
        # Use cached copy if it exists.
        watchlist = cache[site]
    else:
        fn = pywikibot.config.datafilepath('watchlists',
                  'watchlist-%s-%s.dat' % (site.family.name, site.lang))
        try:
            # find out how old our saved dump is (in seconds)
            file_age = time.time() - os.path.getmtime(fn)
            # if it's older than 1 month, reload it
            if file_age > 30 * 24 * 60 * 60:
                pywikibot.output(
                    u'Copy of watchlist is one month old, reloading')
                refresh(site)
        except OSError:
            # no saved watchlist exists yet, retrieve one
            refresh(site)
        f = open(fn, 'r')
        watchlist = pickle.load(f)
        f.close()
        # create cached copy
        cache[site] = watchlist
    return watchlist
def writeMainFreeUploads(subpages):
    site = wikipedia.getSite(u'en', u'wikipedia')
    page = wikipedia.Page(site, u'User:Multichill/Free_uploads')
    oldtext = page.get()
    text = u'__TOC__\n'
    #text = text + u'== Links to day pages ==\n'
    #text = text + u'{{Special:PrefixIndex/User:Multichill/Free uploads/20}}\n'
    text = text + u'== This week ==\n'

    i = 0
    limit = 7

    # From new to old
    subpages.reverse()

    for subpage in subpages:
	date = subpage.replace(u'User:Multichill/Free uploads/', u'')
	if i < limit:
            text = text + u'===[[%s|%s]]===\n' % (subpage, date)
            text = text + u'{{%s}}\n' % (subpage,)
        elif i == limit:
            text = text + u'== Older ==\n'
            text = text + u'* [[%s|%s]]\n' % (subpage, date)
        else:
            text = text + u'* [[%s|%s]]\n' % (subpage, date)

        i = i + 1

    comment = u'Updating list, %d subpages contain images' % (len(subpages),)
    wikipedia.showDiff(oldtext, text)
    wikipedia.output(comment)
    page.put(text, comment)
def output(message, toStdout = True):
    message = time.strftime('[%Y-%m-%d %H:%M:%S] ') + message
    wikipedia.output(message, toStdout = toStdout)
    if toStdout:
        sys.stdout.flush()
    else:
        sys.stderr.flush()
def makeStatistics(mconfig, totals):
    text = u'{| class="wikitable sortable"\n'
    text = text + u'! country !! lang !! total !! page !! row template !! Commons template\n'
    
    totalImages = 0
    for ((countrycode, lang), countryconfig) in sorted(mconfig.countries.items()):
        if countryconfig.get('unusedImagesPage') and countryconfig.get('commonsTemplate'):
            text = text + u'|-\n'
            text = text + u'| %s ' % countrycode
            text = text + u'|| %s ' % lang
            text = text + u'|| %s ' % totals.get((countrycode, lang))
            totalImages = totalImages + totals.get((countrycode, lang))
            text = text + u'|| [[:%s:%s|%s]] ' % (lang, countryconfig.get('unusedImagesPage'), countryconfig.get('unusedImagesPage'))
            text = text + u'|| [[:%s:Template:%s|%s]] ' % (lang, countryconfig.get('rowTemplate'), countryconfig.get('rowTemplate'))
            text = text + u'|| {{tl|%s}}\n' % countryconfig.get('commonsTemplate')
    text = text + u'|-\n'
    text = text + u'| || || %s \n' % totalImages
    text = text + u'|}\n'
    
    site = wikipedia.getSite('commons', 'commons')
    page = wikipedia.Page(site, u'Commons:Monuments database/Unused images/Statistics')
    
    comment = u'Updating unused image statistics. Total unused images: %s' % totalImages
    wikipedia.output(text)
    page.put(newtext = text, comment = comment) 
Beispiel #31
0
class AfDBot:
    # Edit summary message that should be used.
    msg = {
        'en':
        u'New section: /* [[Wikipedia:Articles for deletion|AfD]] nomination */ Notification',
    }

    def __init__(self, AfDlog, always, debug=False):
        """
        Constructor. Parameters:
            * AfDlog        - The AfD log to be treated.
            * always        - If True, the user won't be prompted before changes
                             are made.
            * debug         - If True, don't edit pages. Only show proposed
                             edits.
        """
        self.AfDlog = AfDlog
        self.always = always
        self.debug = debug
        self.site = AfDlog.site()
        self.db = None
        self.replag = None

        #locale.setlocale(locale.LC_ALL, 'nl_NL')
        os.environ['TZ'] = 'Europe/Amsterdam'

    def run(self):
        # Set up database access
        try:
            self.db = querier.querier(host="nlwiki.labsdb")
        except Exception, error:
            wikipedia.output(u'Could not connect to database: %s.' % error,
                             toStdout=False)

        # Dictionaries of users with page_title and AfD_title tuple.
        self.contributors = {}

        if self.db:
            # Get replag
            sql = """
                    SELECT time_to_sec(timediff(now()+0,CAST(rev_timestamp AS int))) AS replag
                    FROM nlwiki_p.revision
                    ORDER BY rev_timestamp DESC
                    LIMIT 1;"""
            result = self.db.do(sql)

            if not result:
                wikipedia.output(
                    u'Could not get replag. Assuming it\'s infinite (= 1 month).'
                )
                self.replag = 30 * 25 * 3600
            else:
                self.replag = int(result[0]['replag'])
                wikipedia.output(u'Replag: %is.' % self.replag)

        wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), self.msg))
        try:
            # Load the page
            text = self.AfDlog.get()
        except wikipedia.NoPage:
            wikipedia.output(u"Page %s does not exist; skipping." %
                             self.AfDlog.aslink())
            return
        except wikipedia.IsRedirectPage:
            wikipedia.output(u"Page %s is a redirect; skipping." %
                             self.AfDlog.aslink())
            return

        # Find AfD's
        pageR = re.compile(r'^\*[ ]*?\[\[(?P<page>.*?)(?:\|.*?\]\]|\]\])')
        timestampR = re.compile('(\d{1,2}) (.{3}) (\d{4}) (\d{2}):(\d{2})')
        userR = re.compile(
            r'\[\[(?:[Uu]ser|[Gg]ebruiker):(?P<user>.*?)(?:\|.*?\]\]|\]\])')
        strictTemplateR = re.compile(
            r'\{\{(?:[Uu]ser|[Gg]ebruiker):(?P<user>.*?)\/[Hh]andtekening\}\}')
        templateR = re.compile(
            r'\{\{(?:[Uu]ser|[Gg]ebruiker):(?P<user>.*?)\/.*?\}\}')
        pages = []
        lines = text.splitlines()
        for line in lines:
            mPage = pageR.search(line)
            mTimestamp = timestampR.search(line)
            if mTimestamp:
                t = time.strftime(
                    '%Y%m%d%H%M%S',
                    time.gmtime(
                        time.mktime(
                            time.strptime(mTimestamp.group(),
                                          '%d %b %Y %H:%M'))))
            else:
                t = None
            if mPage and userR.search(line):
                pages.append(
                    (mPage.group('page'), userR.search(line).group('user'), t))
                continue
            elif mPage and strictTemplateR.search(line):
                pages.append((mPage.group('page'),
                              strictTemplateR.search(line).group('user'), t))
                continue
            elif mPage and templateR.search(line):
                pages.append((mPage.group('page'),
                              templateR.search(line).group('user'), t))
                continue
            elif mPage:
                pages.append((mPage.group('page'), None, t))
                continue
        wikipedia.output(u'Found %i AfD\'s.' % len(pages))

        # Treat AfD's
        for p in pages:
            page = wikipedia.Page(self.site, p[0])
            nominator = p[1]
            timestamp = p[2]
            page_contributors = self.getcontributors(page, timestamp)

            for contributor in page_contributors:
                if not self.contributors.has_key(contributor):
                    self.contributors[contributor] = [(page.title(), nominator)
                                                      ]
                else:
                    self.contributors[contributor].append(
                        (page.title(), nominator))

        # Treat users
        wikipedia.output(u'\n\nFound %i unique users.' %
                         len(self.contributors))
        pages = []  # User talk pages
        for user in self.contributors.keys():
            pages.append(u'%s:%s' % (self.site.namespace(3), user))

        gen = pagegenerators.PagesFromTitlesGenerator(pages, site=self.site)
        gen = pagegenerators.PreloadingGenerator(gen)

        for page in gen:
            self.treatUser(page)
 def outputall(self):
     list = self.dict.keys()
     list.sort()
     for name in self.dict:
         pywikibot.output("There are " + str(self.dict[name]) + " " + name)
Beispiel #33
0
            for id in pageobjs['query']['badrevids']:
                if id == int(revid):
                    # print rv
                    pywikibot.output('* ' + revid)
                    return False
    return True


cat = catlib.Category(
    pywikibot.getSite(),
    'Category:%s' % pywikibot.translate(pywikibot.getSite(), reports_cat))
gen = pagegenerators.CategorizedPageGenerator(cat, recurse=True)

for page in gen:
    data = page.get()
    pywikibot.output(page.title(asLink=True))
    output = ''

    #
    # Preserve text before of the sections
    #

    m = re.search("(?m)^==\s*[^=]*?\s*==", data)
    if m:
        output = data[:m.end() + 1]
    else:
        m = re.search("(?m)^===\s*[^=]*?", data)
        if not m:
            continue
        output = data[:m.start()]
Beispiel #34
0
savetext = u"{{#switch:{{{1|ur}}}"

# sql part
for lang in [
        "ur", "fa", "ar", "ro", "tr", "en", "fr", "de", "hi", "az", "id",
        "pnb", "hu", "he"
]:
    site = wikipedia.getSite(lang)
    query = "select /* SLOW_OK */ count(rc_title),0 from recentchanges join page on rc_cur_id=page_id where rc_new=1 and rc_namespace=0 and page_is_redirect=0 and page.page_len>70 and rc_deleted=0 and DATE_SUB(CURRENT_TIMESTAMP, INTERVAL 1 DAY)<rc_timestamp;"

    conn = mysqldb.connect(lang + "wiki.labsdb",
                           db=site.dbName(),
                           user=config.db_username,
                           passwd=config.db_password)
    cursor = conn.cursor()

    wikipedia.output(u'Executing query:\n%s' % query)
    query = query.encode(site.encoding())
    cursor.execute(query)

    wikinum, nunum = cursor.fetchone()
    if wikinum:
        savetext = savetext + u"|" + lang + u"=" + numbertopersian(wikinum)

# pywikipedia part
savetext = savetext + "}}"
wikipedia.output(savetext)
site = wikipedia.getSite()
page = wikipedia.Page(site, u"سانچہ:شماریات گذشتہ 24/شمار")
page.put(savetext, u"(روبالہ:تجديد شماريات")
Beispiel #35
0
    def pages(self):
        for page in self.generator:
            try:
                pywikibot.output(u'\n>>>> %s <<<<' % page.title())
                commons = pywikibot.getSite('commons', 'commons')
                commonspage = pywikibot.Page(commons, page.title())
                try:
                    getcommons = commonspage.get(get_redirect=True)
                    if page.title() == commonspage.title():
                        oldText = page.get()
                        text = oldText

                        # for commons template
                        findTemplate = re.compile(ur'\{\{[Cc]ommonscat')
                        s = findTemplate.search(text)
                        findTemplate2 = re.compile(ur'\{\{[Ss]isterlinks')
                        s2 = findTemplate2.search(text)
                        if s or s2:
                            pywikibot.output(u'** Already done.')
                        else:
                            text = pywikibot.replaceCategoryLinks(
                                text + u'{{commons|%s}}' % commonspage.title(),
                                page.categories())
                            if oldText != text:
                                pywikibot.showDiff(oldText, text)
                                if not self.acceptall:
                                    choice = pywikibot.inputChoice(
                                        u'Do you want to accept these changes?',
                                        ['Yes', 'No', 'All'], ['y', 'N', 'a'],
                                        'N')
                                    if choice == 'a':
                                        self.acceptall = True
                                if self.acceptall or choice == 'y':
                                    try:
                                        msg = pywikibot.translate(
                                            pywikibot.getSite(), comment1)
                                        page.put(text, msg)
                                    except pywikibot.EditConflict:
                                        pywikibot.output(
                                            u'Skipping %s because of edit conflict'
                                            % (page.title()))

                except pywikibot.NoPage:
                    pywikibot.output(u'Page does not exist in Commons!')

            except pywikibot.NoPage:
                pywikibot.output(u'Page %s does not exist?!' % page.title())
            except pywikibot.IsRedirectPage:
                pywikibot.output(u'Page %s is a redirect; skipping.' %
                                 page.title())
            except pywikibot.LockedPage:
                pywikibot.output(u'Page %s is locked?!' % page.title())
Beispiel #36
0
def asktoadd(pl):
    if pl.site() != mysite:
        return
    if pl.isRedirectPage():
        pl2 = pl.getRedirectTarget()
        if needcheck(pl2):
            tocheck.append(pl2)
            checked[pl2] = pl2
        return
    ctoshow = 500
    pywikibot.output(u'')
    pywikibot.output(u"==%s==" % pl.title())
    while 1:
        answer = raw_input("y(es)/n(o)/i(gnore)/(o)ther options? ")
        if answer == 'y':
            include(pl)
            break
        if answer == 'c':
            include(pl, realinclude=False)
            break
        if answer == 'z':
            if pl.exists():
                if not pl.isRedirectPage():
                    linkterm = pywikibot.input(
                        u"In what manner should it be alphabetized?")
                    include(pl, linkterm=linkterm)
                    break
            include(pl)
            break
        elif answer == 'n':
            exclude(pl)
            break
        elif answer == 'i':
            exclude(pl, real_exclude=False)
            break
        elif answer == 'o':
            pywikibot.output(u"t: Give the beginning of the text of the page")
            pywikibot.output(
                u"z: Add under another title (as [[Category|Title]])")
            pywikibot.output(
                u"x: Add the page, but do not check links to and from it")
            pywikibot.output(u"c: Do not add the page, but do check links")
            pywikibot.output(u"a: Add another page")
            pywikibot.output(u"l: Give a list of the pages to check")
        elif answer == 'a':
            pagetitle = raw_input("Specify page to add:")
            page = pywikibot.Page(pywikibot.getSite(), pagetitle)
            if not page in checked.keys():
                include(page)
        elif answer == 'x':
            if pl.exists():
                if pl.isRedirectPage():
                    pywikibot.output(
                        u"Redirect page. Will be included normally.")
                    include(pl, realinclude=False)
                else:
                    include(pl, checklinks=False)
            else:
                pywikibot.output(u"Page does not exist; not added.")
                exclude(pl, real_exclude=False)
            break
        elif answer == 'l':
            pywikibot.output(u"Number of pages still to check: %s" %
                             len(tocheck))
            pywikibot.output(u"Pages to be checked:")
            pywikibot.output(u" - ".join(page.title() for page in tocheck))
            pywikibot.output(u"==%s==" % pl.title())
        elif answer == 't':
            pywikibot.output(u"==%s==" % pl.title())
            try:
                pywikibot.output(u'' + pl.get(get_redirect=True)[0:ctoshow])
            except pywikibot.NoPage:
                pywikibot.output(u"Page does not exist.")
            ctoshow += 500
        else:
            pywikibot.output(u"Not understood.")
Beispiel #37
0
     pywikibot.getall(mysite, subcatlist)
     for cat in subcatlist:
         list = cat.articlesList()
         for page in list:
             exclude(page.title(), real_exclude=False)
             checked[page] = page
 list = workingcat.articlesList()
 if list:
     for pl in list:
         checked[pl] = pl
     pywikibot.getall(mysite, list)
     for pl in list:
         include(pl)
 else:
     pywikibot.output(
         u"Category %s does not exist or is empty. Which page to start with?"
         % workingcatname)
     answer = pywikibot.input(u"(Default is [[%s]]):" % workingcatname)
     if not answer:
         answer = workingcatname
     pywikibot.output(u'' + answer)
     pl = pywikibot.Page(mysite, answer)
     tocheck = []
     checked[pl] = pl
     include(pl)
 loaded = 0
 while tocheck:
     if loaded == 0:
         if len(tocheck) < 50:
             loaded = len(tocheck)
         else:
Beispiel #38
0
    def getcontributors(self, page, timestamp):
        """
        Return a page's major contributors.
        """
        wikipedia.output(u'\n>>> %s <<<' % (page.title()))
        if page.isRedirectPage():
            wikipedia.output(u'Page is a redirect.')

            if self.db:
                sql = """
                        SELECT 1
                        FROM nlwiki_p.logging
                        WHERE log_namespace = %s
                        AND log_title = %s
                        AND log_timestamp > %s
                        AND log_type = 'move'
                        ORDER BY log_timestamp ASC
                        LIMIT 1;"""
                args = (page.namespace(),
                        self.sqltitle(page.titleWithoutNamespace()), timestamp)
                result = self.db.do(sql, args)

                if result:
                    page = page.getRedirectTarget()
                    wikipedia.output(
                        u'Page was moved after the nomination. Checking target: %s.'
                        % page.aslink())

        # Get first author of article
        if self.site.versionnumber() >= 12:
            #API Mode
            params = {
                'action': 'query',
                'titles': self.sqltitle(page.title()),
                'prop': 'revisions',
                'rvdir': 'newer',
                'rvlimit': 1,
                'rvprop': 'timestamp|user',
            }

            datas = query.GetData(params, self.site)
            try:
                users = [
                    datas['query']['pages'][page_id]['revisions'][0]['user']
                    for page_id in datas['query']['pages'].keys()
                ]
                creator = users[0]
            except:
                wikipedia.output(
                    u'Could not get first author from api for %s. The page has probably been deleted. Ignoring.'
                    % page.title(),
                    toStdout=True)
                return set()
        elif self.db:
            wikipedia.output(
                u'Can not use api for version history. Trying database.')
            sql = """
                    SELECT *
                    FROM nlwiki_p.revision
                    LEFT JOIN nlwiki_p.page
                    ON page_id = rev_page
                    WHERE page_namespace = %s
                    AND page_title = %s
                    ORDER BY rev_timestamp ASC
                    LIMIT 1;"""
            args = (page.namespace(), self.sqltitle(page.title()))
            result = self.db.do(sql, args)

            if result:
                creator = result[0]['rev_user_text']
            else:
                creator = None
        else:
            wikipedia.output(
                u'Both api and database are unavailable. Aborting.',
                toStdout=False)

        # Get authors with more than 5 major edits.
        # FIXME: It's actually faster to select * than rev_user_text. Don't know why.
        if self.db:
            sql = """
                    SELECT *
                    FROM nlwiki_p.revision
                    LEFT JOIN nlwiki_p.page
                    ON page_id = rev_page
                    WHERE page_namespace = %s
                    AND page_title = %s
                    AND rev_timestamp < %s
                    AND rev_minor_edit = 0
                    GROUP BY rev_user_text
                    HAVING COUNT(1) > 5;"""
            args = (page.namespace(), self.sqltitle(page.title()), timestamp)
            results = self.db.do(sql, args)

            try:
                contributors = set([
                    unicode(result['rev_user_text'], 'utf8')
                    for result in results
                ])
            except Exception, error:
                wikipedia.output(u'Could not get contributors.')
                print error
Beispiel #39
0
    def run(self):
        """
        Starts the robot's action.
        """

        keepGoing = True
        startFromBeginning = True
        while keepGoing:
            if startFromBeginning:
                self.savedProgress = None
            self.refreshGenerator()
            count = 0
            for page in self.preloadingGen:
                try:
                    pageText = page.get(get_redirect=True).split("\n")
                    count += 1
                except pywikibot.NoPage:
                    pywikibot.output(u'Page %s does not exist or has already '
                                     u'been deleted, skipping.' %
                                     page.title(asLink=True))
                    continue
                # Show the title of the page we're working on.
                # Highlight the title in purple.
                pywikibot.output(
                    u"\n\n>>> \03{lightpurple}%s\03{default} <<<" %
                    page.title())
                pywikibot.output(u'-  -  -  -  -  -  -  -  -  ')
                if len(pageText) > 75:
                    pywikibot.output('The page detail is too many lines, '
                                     u'only output first 50 lines:')
                    pywikibot.output(u'-  ' * 9)
                    pywikibot.output(u'\n'.join(pageText[:50]))
                else:
                    pywikibot.output(u'\n'.join(pageText))
                pywikibot.output(u'-  -  -  -  -  -  -  -  -  ')
                choice = pywikibot.inputChoice(
                    u'Input action?', ['delete', 'skip', 'update', 'quit'],
                    ['d', 'S', 'u', 'q'], 'S')
                if choice == 'q':
                    keepGoing = False
                    break
                elif choice == 'u':
                    pywikibot.output(u'Updating from CSD category.')
                    self.savedProgress = page.title()
                    startFromBeginning = False
                    break
                elif choice == 'd':
                    reason = self.getReasonForDeletion(page)
                    pywikibot.output(
                        u'The chosen reason is: \03{lightred}%s\03{default}' %
                        reason)
                    page.delete(reason, prompt=False)
                else:
                    pywikibot.output(u'Skipping page %s' % page.title())
                startFromBeginning = True
            if count == 0:
                if startFromBeginning:
                    pywikibot.output(
                        u'There are no pages to delete.\n'
                        u'Waiting for 30 seconds or press Ctrl+C to quit...')
                    try:
                        time.sleep(30)
                    except KeyboardInterrupt:
                        keepGoing = False
                else:
                    startFromBeginning = True
        pywikibot.output(u'Quitting program.')
Beispiel #40
0
    def treatUser(self, page):
        """
        Leave a message for the user.
        """
        wikipedia.output(u'\n>>> %s <<<' % (page.title()))
        user = page.titleWithoutNamespace()
        welcomeUser = False
        afds = []

        try:
            # Load the page
            original_text = page.get()
        except wikipedia.NoPage:
            wikipedia.output(u"Page %s does not exist." % page.aslink())
            original_text = ''
            welcomeUser = True
        except wikipedia.IsRedirectPage:
            wikipedia.output(u"Page %s is a redirect. Skipping." %
                             page.aslink())
            return

        if not user in self.contributors.keys():
            wikipedia.output(
                u'Could not find AfD information for this user. Skipping.')
            return
        else:
            for page_title, nominator in self.contributors[user]:
                if nominator == page.title():
                    # Pagina is gestart en genomineerd door dezelfde gebruiker.
                    wikipedia.output(
                        u'* [[%s]]: Article has been nominated for deletion by its author.'
                        % page_title)
                    continue
                # Try to find links to the page using the replicated database.
                if self.db and self.replag < 600:
                    # FIXME: pl_namespace should not be fixed at 0.
                    sql = """
                             SELECT 1
                             FROM nlwiki_p.page
                             LEFT JOIN nlwiki_p.pagelinks
                             ON pl_from = page_id
                             WHERE page_namespace = 3
                             AND page_title = %s
                             AND pl_namespace = 0
                             AND pl_title = %s
                             LIMIT 1;"""
                    args = (self.sqltitle(user), self.sqltitle(page_title))
                    result = self.db.do(sql, args)
                    if result:
                        wikipedia.output(u'* [[%s]]: Found link in database.' %
                                         page_title)
                        continue
                else:
                    if re.search(
                            r'\[\[\:{0,1}%s(?:.*?|)\]\]' %
                            re.escape(page_title).replace('\\ ', '[_ ]'),
                            original_text):
                        wikipedia.output(
                            u'* [[%s]]: Found a link in text. Ignoring.' %
                            page_title)
                        continue
                    elif re.search(
                            r'\{\{vvn\|%s.*?\}\}' %
                            re.escape(page_title).replace('\\ ', '[_ ]'),
                            original_text):
                        wikipedia.output(u'* [[%s]]: {{vvn}} found.' %
                                         page_title)
                        continue

                wikipedia.output(u'* [[%s]]: Leaving message.' % page_title)
                afds.append((page_title, nominator))

            if len(afds) == 0:
                wikipedia.output(u'User has been notified of all AfD\'s.')
                return

            if len(afds) == 1:
                header = u'Beoordelingsnominatie [[%s]]' % afds[0][0]
                if afds[0][1]:
                    titles = u'Het gaat om [[%s]] dat is genomineerd door [[Gebruiker:%s|%s]].' % (
                        afds[0][0], afds[0][1], afds[0][1])
                else:
                    titles = u'Het gaat om [[%s]].' % (afds[0][0])
            elif len(afds) > 1:
                header = u'Beoordelingsnominatie van o.a. [[%s]]' % afds[0][0]
                titles = u'De genomineerde artikelen zijn: '
                for page_title, nominator in afds:
                    if nominator:
                        titles += u'[[%s]] door [[Gebruiker:%s|%s]], ' % (
                            page_title, nominator, nominator)
                    else:
                        titles += u'[[%s]] door een onbekende gebruiker, ' % (
                            page_title)

                titles = u'%s.' % titles[:-2]

            comment = u'Nieuw onderwerp: /* %s */ Automatische melding van beoordelingsnominatie' % header
            AfDMessage = u'{{subst:Gebruiker:Erwin/Bot/Verwijderbericht/SPagina|%s|%s|%s}} --~~~~' % (
                header, titles, self.AfDlog.title())
            if welcomeUser:
                comment = u'Welkom op Wikipedia!; %s' % comment
                text = u'{{welkomstbericht}}' + u'\n\n' + AfDMessage
            else:
                text = original_text + u'\n\n' + AfDMessage
            text = text.strip()

        # only save if something was changed
        if text != original_text:
            # show what was changed
            if not self.always or self.debug:
                wikipedia.showDiff(original_text, text)
            if not self.debug:
                if not self.always:
                    choice = wikipedia.inputChoice(
                        u'Do you want to accept these changes?', ['Yes', 'No'],
                        ['y', 'N'], 'N')
                else:
                    choice = 'y'
                if choice == 'y':
                    try:
                        # Save the page
                        page.put(text, comment=comment, minorEdit=False)
                    except wikipedia.LockedPage:
                        wikipedia.output(u"Page %s is locked; skipping." %
                                         page.aslink())
                    except wikipedia.EditConflict:
                        wikipedia.output(
                            u'Skipping %s because of edit conflict' %
                            (page.title()))
                    except wikipedia.SpamfilterError, error:
                        wikipedia.output(
                            u'Cannot change %s because of spam blacklist entry %s'
                            % (page.title(), error.url))
                    except wikipedia.PageNotSaved:
                        wikipedia.output(
                            u'Page %s could not be saved; skipping.' %
                            page.aslink())
Beispiel #41
0
    def put(self, title, contents):
        mysite = pywikibot.getSite()

        page = pywikibot.Page(mysite, title)
        # Show the title of the page we're working on.
        # Highlight the title in purple.
        pywikibot.output(u">>> \03{lightpurple}%s\03{default} <<<"
                         % page.title())

        if self.summary:
            comment = self.summary
        else:
            comment = pywikibot.translate(mysite, self.msg)

        comment_top = comment + " - " + pywikibot.translate(mysite,
                                                            self.msg_top)
        comment_bottom = comment + " - " + pywikibot.translate(mysite,
                                                               self.msg_bottom)
        comment_force = comment + " *** " + pywikibot.translate(mysite,
                                                                self.msg_force) + " ***"

        # Remove trailing newlines (cause troubles when creating redirects)
        contents = re.sub('^[\r\n]*','', contents)

        if page.exists():
            if self.append == "Top":
                pywikibot.output(u"Page %s already exists, appending on top!"
                                 % title)
                contents = contents + page.get()
                comment = comment_top
            elif self.append == "Bottom":
                pywikibot.output(u"Page %s already exists, appending on bottom!"
                                 % title)
                contents = page.get() + contents
                comment = comment_bottom
            elif self.force:
                pywikibot.output(u"Page %s already exists, ***overwriting!"
                                 % title)
                comment = comment_force
            else:
                pywikibot.output(u"Page %s already exists, not adding!" % title)
                return
        else:
           if self.autosummary:
                comment = ''
                pywikibot.setAction('')

        if self.dry:
            pywikibot.output("*** Dry mode ***\n" + \
                "\03{lightpurple}title\03{default}: " + title + "\n" + \
                "\03{lightpurple}contents\03{default}:\n" + contents + "\n" \
                "\03{lightpurple}comment\03{default}: " + comment + "\n")
            return

        try:
            page.put(contents, comment = comment, minorEdit = self.minor)
        except pywikibot.LockedPage:
            pywikibot.output(u"Page %s is locked; skipping." % title)
        except pywikibot.EditConflict:
            pywikibot.output(u'Skipping %s because of edit conflict' % title)
        except pywikibot.SpamfilterError, error:
            pywikibot.output(
                u'Cannot change %s because of spam blacklist entry %s'
                % (title, error.url))
Beispiel #42
0
            try:
                contributors = set([
                    unicode(result['rev_user_text'], 'utf8')
                    for result in results
                ])
            except Exception, error:
                wikipedia.output(u'Could not get contributors.')
                print error
        else:
            contributors = set()

        if creator:
            contributors.add(creator)

        wikipedia.output(u'Found %i contributors: %s.' %
                         (len(contributors), u', '.join(contributors)))
        return contributors

    def sqltitle(self, page_title):
        """
        Return a MySQL style title.
        """
        return page_title.replace(' ', '_').encode('utf8')

    def treatUser(self, page):
        """
        Leave a message for the user.
        """
        wikipedia.output(u'\n>>> %s <<<' % (page.title()))
        user = page.titleWithoutNamespace()
        welcomeUser = False
Beispiel #43
0
    def badNameFilter(self, name, force=False):
        if not globalvar.filtBadName:
            return False

        #initialize blacklist
        if not hasattr(self, '_blacklist') or force:
            elenco = [
                ' ano',
                ' anus',
                'anal ',
                'babies',
                'baldracca',
                'balle',
                'bastardo',
                'bestiali',
                'bestiale',
                'bastarda',
                'b.i.t.c.h.',
                'bitch',
                'boobie',
                'bordello',
                'breast',
                'cacata',
                'cacca',
                'cachapera',
                'cagata',
                'cane',
                'cazz',
                'cazzo',
                'cazzata',
                'chiavare',
                'chiavata',
                'chick',
                'christ ',
                'cristo',
                'clitoride',
                'coione',
                'cojdioonear',
                'cojones',
                'cojo',
                'coglione',
                'coglioni',
                'cornuto',
                'cula',
                'culatone',
                'culattone',
                'culo',
                'deficiente',
                'deficente',
                'dio',
                'die ',
                'died ',
                'ditalino',
                'ejackulate',
                'enculer',
                'eroticunt',
                'fanculo',
                'f******o',
                'fica ',
                'ficken',
                'figa',
                'sfiga',
                'fottere',
                'fotter',
                'fottuto',
                'f**k',
                'f.u.c.k.',
                "funkyass",
                'gay',
                'hentai.com',
                'horne',
                'horney',
                'virgin',
                'hotties',
                'idiot',
                '@alice.it',
                'incest',
                'jesus',
                'gesu',
                'gesù',
                'kazzo',
                'kill',
                'leccaculo',
                'lesbian',
                'lesbica',
                'lesbo',
                'masturbazione',
                'masturbare',
                'masturbo',
                'merda',
                'merdata',
                'merdoso',
                'mignotta',
                'minchia',
                'minkia',
                'minchione',
                'mona',
                'nudo',
                'nuda',
                'nudi',
                'oral',
                'sex',
                'orgasmso',
                'porc',
                'pompa',
                'pompino',
                'porno',
                'puttana',
                'puzza',
                'puzzone',
                "racchia",
                'sborone',
                'sborrone',
                'sborata',
                'sborolata',
                'sboro',
                'scopata',
                'scopare',
                'scroto',
                'scrotum',
                'sega',
                'sesso',
                'shit',
                'shiz',
                's.h.i.t.',
                'sadomaso',
                'sodomist',
                'stronzata',
                'stronzo',
                'succhiamelo',
                'succhiacazzi',
                'testicol',
                'troia',
                'universetoday.net',
                'vaffanculo',
                'v****a',
                'vibrator',
                "vacca",
                'yiddiot',
                "zoccola",
            ]
            elenco_others = [
                '@',
                ".com",
                ".sex",
                ".org",
                ".uk",
                ".en",
                ".it",
                "admin",
                "administrator",
                "amministratore",
                '@yahoo.com',
                '@alice.com',
                "amministratrice",
                "burocrate",
                "checkuser",
                "developer",
                "http://",
                "jimbo",
                "mediawiki",
                "on wheals",
                "on wheal",
                "on wheel",
                "planante",
                "razinger",
                "sysop",
                "troll",
                "vandal",
                " v.f. ",
                "v. fighter",
                "vandal f.",
                "vandal fighter",
                'wales jimmy',
                "wheels",
                "wales",
                "www.",
            ]

            #blacklist from wikipage
            badword_page = pywikibot.Page(
                self.site, pywikibot.translate(self.site, bad_pag))
            list_loaded = list()
            if badword_page.exists():
                pywikibot.output(u'\nLoading the bad words list from %s...' %
                                 self.site)
                list_loaded = load_word_function(badword_page.get())
            else:
                showStatus(4)
                pywikibot.output(u'The bad word page doesn\'t exist!')
            self._blacklist = elenco + elenco_others + list_loaded
            del elenco, elenco_others, list_loaded

        if not hasattr(self, '_whitelist') or force:
            #initialize whitelist
            whitelist_default = ['emiliano']
            wtlpg = pywikibot.translate(self.site, whitelist_pg)
            list_white = list()
            if wtlpg:
                whitelist_page = pywikibot.Page(self.site, wtlpg)
                if whitelist_page.exists():
                    pywikibot.output(u'\nLoading the whitelist from %s...' %
                                     self.site)
                    list_white = load_word_function(whitelist_page.get())
                else:
                    showStatus(4)
                    pywikibot.output(u"The whitelist's page doesn't exist!")
            else:
                showStatus(4)
                pywikibot.output(u"WARNING: The whitelist hasn't been setted!")

            # Join the whitelist words.
            self._whitelist = list_white + whitelist_default
            del list_white, whitelist_default

        try:
            for wname in self._whitelist:
                if wname.lower() in str(name).lower():
                    name = name.lower().replace(wname.lower(), '')
                    for bname in self._blacklist:
                        self.bname[name] = bname
                        return bname.lower() in name.lower()
        except UnicodeEncodeError:
            pass
        try:
            for bname in self._blacklist:
                if bname.lower() in str(name).lower():  #bad name positive
                    self.bname[name] = bname
                    return True
        except UnicodeEncodeError:
            pass
        return False
Beispiel #44
0
def askAlternative(word, context=None):
    correct = None
    wikipedia.output(u"=" * 60)
    wikipedia.output(u"Found unknown word '%s'" % word)
    if context:
        wikipedia.output(u"Context:")
        wikipedia.output(u"" + context)
        wikipedia.output(u"-" * 60)
    while not correct:
        for i in xrange(len(Word(word).getAlternatives())):
            wikipedia.output(
                u"%s: Replace by '%s'" %
                (i + 1, Word(word).getAlternatives()[i].replace('_', ' ')))
        wikipedia.output(u"a: Add '%s' as correct" % word)
        if word[0].isupper():
            wikipedia.output(u"c: Add '%s' as correct" % (uncap(word)))
        wikipedia.output(u"i: Ignore once (default)")
        wikipedia.output(u"p: Ignore on this page")
        wikipedia.output(u"r: Replace text")
        wikipedia.output(u"s: Replace text, but do not save as alternative")
        wikipedia.output(u"g: Guess (give me a list of similar words)")
        wikipedia.output(u"*: Edit by hand")
        wikipedia.output(u"x: Do not check the rest of this page")
        answer = wikipedia.input(u":")
        if answer == "": answer = "i"
        if answer in "aAiIpP":
            correct = word
            if answer in "aA":
                knownwords[word] = word
                newwords.append(word)
            elif answer in "pP":
                pageskip.append(word)
        elif answer in "rRsS":
            correct = wikipedia.input(u"What should I replace it by?")
            if answer in "rR":
                if correct_html_codes:
                    correct = removeHTML(correct)
                if correct != cap(word) and correct != uncap(
                        word) and correct != word:
                    try:
                        knownwords[word] += [correct.replace(' ', '_')]
                    except KeyError:
                        knownwords[word] = [correct.replace(' ', '_')]
                    newwords.append(word)
                knownwords[correct] = correct
                newwords.append(correct)
        elif answer in "cC" and word[0].isupper():
            correct = word
            knownwords[uncap(word)] = uncap(word)
            newwords.append(uncap(word))
        elif answer in "gG":
            possible = getalternatives(word)
            if possible:
                print "Found alternatives:"
                for pos in possible:
                    wikipedia.output("  %s" % pos)
            else:
                print "No similar words found."
        elif answer == "*":
            correct = edit
        elif answer == "x":
            correct = endpage
        else:
            for i in xrange(len(Word(word).getAlternatives())):
                if answer == str(i + 1):
                    correct = Word(word).getAlternatives()[i].replace('_', ' ')
    return correct
Beispiel #45
0
    def addReferences(self, oldText):
        """
        Tries to add a references tag into an existing section where it fits
        into. If there is no such section, creates a new section containing
        the references tag.
        * Returns : The modified pagetext

        """
        # Is there an existing section where we can add the references tag?
        for section in pywikibot.translate(self.site, referencesSections):
            sectionR = re.compile(r'\r\n=+ *%s *=+ *\r\n' % section)
            index = 0
            while index < len(oldText):
                match = sectionR.search(oldText, index)
                if match:
                    if pywikibot.isDisabled(oldText, match.start()):
                        pywikibot.output(
                            'Existing  %s section is commented out, skipping.'
                            % section)
                        index = match.end()
                    else:
                        pywikibot.output(
                            u'Adding references tag to existing %s section...\n'
                            % section)
                        newText = oldText[:match.end(
                        )] + u'\n' + self.referencesText + u'\n' + oldText[
                            match.end():]
                        return newText
                else:
                    break

        # Create a new section for the references tag
        for section in pywikibot.translate(self.site, placeBeforeSections):
            # Find out where to place the new section
            sectionR = re.compile(r'\r\n(?P<ident>=+) *%s *(?P=ident) *\r\n' %
                                  section)
            index = 0
            while index < len(oldText):
                match = sectionR.search(oldText, index)
                if match:
                    if pywikibot.isDisabled(oldText, match.start()):
                        pywikibot.output(
                            'Existing  %s section is commented out, won\'t add the references in front of it.'
                            % section)
                        index = match.end()
                    else:
                        pywikibot.output(
                            u'Adding references section before %s section...\n'
                            % section)
                        index = match.start()
                        ident = match.group('ident')
                        return self.createReferenceSection(
                            oldText, index, ident)
                else:
                    break
        # This gets complicated: we want to place the new references
        # section over the interwiki links and categories, but also
        # over all navigation bars, persondata, and other templates
        # that are at the bottom of the page. So we need some advanced
        # regex magic.
        # The strategy is: create a temporary copy of the text. From that,
        # keep removing interwiki links, templates etc. from the bottom.
        # At the end, look at the length of the temp text. That's the position
        # where we'll insert the references section.
        catNamespaces = '|'.join(self.site.category_namespaces())
        categoryPattern = r'\[\[\s*(%s)\s*:[^\n]*\]\]\s*' % catNamespaces
        interwikiPattern = r'\[\[([a-zA-Z\-]+)\s?:([^\[\]\n]*)\]\]\s*'
        # won't work with nested templates
        # the negative lookahead assures that we'll match the last template
        # occurence in the temp text.
        ### fix me:
        ### {{commons}} or {{commonscat}} are part of Weblinks section
        ### * {{template}} is mostly part of a section
        ### so templatePattern must be fixed
        templatePattern = r'\r\n{{((?!}}).)+?}}\s*'
        commentPattern = r'<!--((?!-->).)*?-->\s*'
        metadataR = re.compile(
            r'(\r\n)?(%s|%s|%s|%s)$' % (categoryPattern, interwikiPattern,
                                        templatePattern, commentPattern),
            re.DOTALL)
        tmpText = oldText
        while True:
            match = metadataR.search(tmpText)
            if match:
                tmpText = tmpText[:match.start()]
            else:
                break
        pywikibot.output(
            u'Found no section that can be preceeded by a new references section.\nPlacing it before interwiki links, categories, and bottom templates.'
        )
        index = len(tmpText)
        return self.createReferenceSection(oldText, index)
Beispiel #46
0
    def run(self):
        while True:
            welcomed_count = 0
            if globalvar.quick and self.site.has_api():
                us = [x for x in self.parseNewUserLog()]
                showStatus()
                try:
                    userlib.getall(self.site, us)
                except NotImplementedError:
                    globalvar.quick = False
                    us = self._parseNewUserLogOld()
            else:
                us = self.parseNewUserLog()
            for users in us:
                if users.isBlocked():
                    showStatus(3)
                    pywikibot.output(u'%s has been blocked!' % users.name())
                    continue
                if 'bot' in users.groups():
                    showStatus(3)
                    pywikibot.output(u'%s is a bot!' % users.name())
                    continue
                if 'bot' in users.name().lower():
                    showStatus(3)
                    pywikibot.output(u'%s might be a global bot!' %
                                     users.name())
                    continue
                #if globalvar.offset != 0 and time.strptime(users.registrationTime(), "%Y-%m-%dT%H:%M:%SZ") >= globalvar.offset:
                #
                if users.editCount() >= globalvar.attachEditCount:
                    showStatus(2)
                    pywikibot.output(u'%s has enough edits to be welcomed.' %
                                     users.name())
                    ustp = users.getUserTalkPage()
                    if ustp.exists():
                        showStatus(3)
                        pywikibot.output(u'%s has been already welcomed.' %
                                         users.name())
                        continue
                    else:
                        if self.badNameFilter(users.name()):
                            self.reportBadAccount(users.name())
                            continue
                        welcome_text = pywikibot.translate(self.site, netext)
                        if globalvar.randomSign:
                            if self.site.family != 'wikinews':
                                welcome_text = welcome_text % choice(
                                    self.defineSign())
                            if self.site.family == 'wiktionary' and self.site.lang == 'it':
                                pass
                            else:
                                welcome_text += timeselected
                        elif (self.site.family != 'wikinews'
                              and self.site.lang != 'it'):
                            welcome_text = welcome_text % globalvar.defaultSign
                        if self.site.lang in final_new_text_additions:
                            welcome_text += pywikibot.translate(
                                self.site, final_new_text_additions)
                        welcome_comment = i18n.twtranslate(
                            self.site, 'welcome-welcome')
                        try:
                            #append welcomed, welcome_count++
                            ustp.put(welcome_text,
                                     welcome_comment,
                                     minorEdit=False)
                            welcomed_count += 1
                            self._totallyCount += 1
                            self.welcomed_users.append(users)
                        except pywikibot.EditConflict:
                            showStatus(4)
                            pywikibot.output(
                                u'An edit conflict has occured, skipping this user.'
                            )

                    if globalvar.makeWelcomeLog and pywikibot.translate(
                            self.site, logbook):
                        showStatus(5)
                        if welcomed_count == 1:
                            pywikibot.output(u'One user has been welcomed.')
                        elif welcomed_count == 0:
                            pywikibot.output(u'No users have been welcomed.')
                        else:
                            pywikibot.output(u'%s users have been welcomed.' %
                                             welcomed_count)
                        if welcomed_count >= globalvar.dumpToLog:
                            if self.makelogpage(self.welcomed_users):
                                self.welcomed_users = list()
                                welcomed_count = 0
                            else:
                                continue
                    # If we haven't to report, do nothing.
                else:
                    if users.editCount() == 0:
                        if not globalvar.quiet:
                            showStatus(1)
                            pywikibot.output(u'%s has no contributions.' %
                                             users.name())
                    else:
                        showStatus(1)
                        pywikibot.output(u'%s has only %d contributions.' %
                                         (users.name(), users.editCount()))
                    # That user mustn't be welcomed.
                    continue
            if globalvar.makeWelcomeLog and pywikibot.translate(
                    self.site, logbook) and welcomed_count > 0:
                showStatus()
                if welcomed_count == 1:
                    pywikibot.output(u'Putting the log of the latest user...')
                else:
                    pywikibot.output(
                        u'Putting the log of the latest %d users...' %
                        welcomed_count)
                if self.makelogpage(self.welcomed_users):
                    self.welcomed_users = list()
                else:
                    continue
                self.welcomed_users = list()
            if hasattr(self, '_BAQueue'):
                showStatus()
                pywikibot.output("Putting bad name to report page....")
                self.reportBadAccount(None, final=True)
            try:
                if globalvar.recursive:
                    showStatus()
                    if locale.getlocale()[1]:
                        strfstr = unicode(
                            time.strftime(u"%d %b %Y %H:%M:%S (UTC)",
                                          time.gmtime()),
                            locale.getlocale()[1])
                    else:
                        strfstr = unicode(
                            time.strftime(u"%d %b %Y %H:%M:%S (UTC)",
                                          time.gmtime()))
                    pywikibot.output(u'Sleeping %d seconds before rerun. %s' %
                                     (globalvar.timeRecur, strfstr))
                    time.sleep(globalvar.timeRecur)
                else:
                    raise KeyboardInterrupt
            except KeyboardInterrupt:
                #if globalvar.makeWelcomeLog and len(self.welcomed_users) > 0:
                #    pywikibot.output("Update log before qutting script.")
                #    self.makelogpage(self.welcomed_users)
                #if hasattr(self, '_BAQueue') and len(self._BAQueue) > 0 and globalvar.filtBadName:
                #    self.reportBadAccount(None, final = True)
                break
Beispiel #47
0
 def showpageinfo(self):
     pywikibot.output(u'[[%s]] %s ' % (self.page.title(), self.date))
     print 'Length: %i bytes' % self.length
     pywikibot.output(u'User  : %s' % self.user)
Beispiel #48
0
                        ))
                else:
                    globalvar.dumpToLog = int(arg[11:])
            elif arg == '-quiet':
                globalvar.quiet = True
            elif arg == '-quick':
                globalvar.quick = True

        # Filename and pywikipedia path
        # file where is stored the random signature index
        filename = pywikibot.config.datafilepath(
            'welcome-%s-%s.data' %
            (pywikibot.default_family, pywikibot.default_code))
        if globalvar.offset and globalvar.timeoffset:
            pywikibot.output(
                'WARING: both -offset and -timeoffset were provided, ignoring -offset'
            )
            globalvar.offset = 0
        bot = WelcomeBot()
        try:
            bot.run()
        except KeyboardInterrupt:
            if bot.welcomed_users:
                showStatus()
                pywikibot.output("Put welcomed users before quit...")
                bot.makelogpage(bot.welcomed_users)
            pywikibot.output("\nQuitting...")
    finally:
        # If there is the savedata, the script must save the number_user.
        if globalvar.randomSign and globalvar.saveSignIndex and bot.welcomed_users:
            import cPickle
pre, noinclude, includeonly, tags1, tags2 = u'\n', u'\n', u'\n', u'\n', u'\n'
for entry in dump.new_parse():
    if entry.ns == '0':
        text = entry.text.replace(u' /', u'/').replace(u'/ ', u'/').replace(
            u'< ', u'<').replace(u' >', u'>')

        if u'<noinclude>' in text or u'</noinclude>' in text:
            noinclude += u"#[[%s]]\n" % entry.title
        elif u'<includeonly>' in text or u'</includeonly>' in text:
            includeonly += u"#[[%s]]\n" % entry.title
        elif u'<pre>' in text or u'</pre>' in text:
            pre += u"#[[%s]]\n" % entry.title
        elif u'__NOGALLERY__' in text:
            tags1 += u"#[[%s]]\n" % entry.title
        elif u'__NOEDITSECTION__' in text:
            tags2 += u"#[[%s]]\n" % entry.title
        else:
            continue
        wikipedia.output(entry.title)
my_text = u'\n== pre ==\n' + pre + u'\n== noinclude ==\n' + noinclude + u'\n== includeonly ==\n' + includeonly + u'\n== NOGALLERY ==\n' + tags1 + u'\n== NOEDITSECTION ==\n' + tags2
f = codecs.open(bot_adress + "zztages.txt", "w", "utf-8")
f.write(my_text)
f.close()
#os.system("rm "+bot_adress+"fawiki-%s-pages-meta-current.xml.bz2" %(TheDay))
site = wikipedia.getSite('fa')
page = wikipedia.Page(site,
                      u"ویکی‌پدیا:گزارش دیتابیس/مقالاتی که تگ الگو دارند")
my_text = u'مقالات زیر ممکن است الگو درون آنها به اشتباه استفاده شده‌باشد \n' + my_text
page.put(my_text, u"ربات: به‌روز رسانی آمار دیگر ویکی‌ها")
Beispiel #50
0
    def handlebadpage(self):
        try:
            self.content = self.page.get()
        except pywikibot.IsRedirectPage:
            pywikibot.output(u'Already redirected, skipping.')
            return
        except pywikibot.NoPage:
            pywikibot.output(u'Already deleted')
            return

        for d in pywikibot.translate(pywikibot.getSite(), done):
            if d in self.content:
                pywikibot.output(u'Found: "%s" in content, nothing necessary' %
                                 d)
                return
        print "---- Start content ----------------"
        pywikibot.output(u"%s" % self.content)
        print "---- End of content ---------------"

        # Loop other user answer
        answered = False
        while not answered:
            answer = pywikibot.input(question)

            if answer == 'q':
                sys.exit("Exiting")
            if answer == 'd':
                pywikibot.output(u'Trying to delete page [[%s]].' %
                                 self.page.title())
                self.page.delete()
                return
            if answer == 'e':
                oldText = self.page.get()
                text = oldText
                editor = editarticle.TextEditor()
                text = editor.edit(self.page.get())
                if oldText != text:
                    pywikibot.showDiff(oldText, text)
                    msg = pywikibot.input(u'Summary message:')
                    self.page.put(text, msg)
                return
            if answer == 'b':
                pywikibot.output(u'Blanking page [[%s]].' % self.page.title())
                try:
                    self.page.put('',
                                  comment=pywikibot.translate(
                                      pywikibot.getSite(), blanking) %
                                  self.content)
                except EditConflict:
                    print "An edit conflict occured ! Automatically retrying"
                    handlebadpage(self)
                return
            if answer == '':
                print 'Page correct ! Proceeding with next pages.'
                return
            # Check user input:
            if answer[0] == 'u':
                # Answer entered as an utf8 string
                try:
                    choices = answer[1:].split(',')
                except ValueError:
                    # User entered wrong value
                    pywikibot.error(u'"%s" is not valid' % answer)
                    continue
            else:
                try:
                    choices = answer.split(',')
                except ValueError:
                    # User entered wrong value
                    pywikibot.error(u'"%s" is not valid' % answer)
                    continue
            #test input
            for choice in choices:
                try:
                    x = int(choice)
                except ValueError:
                    break
                else:
                    answered = x in range(1, len(questionlist) + 1)
            if not answered:
                pywikibot.error(u'"%s" is not valid' % answer)
                continue
        summary = u''
        for choice in choices:
            answer = int(choice)
            # grab the template parameters
            tpl = pywikibot.translate(pywikibot.getSite(),
                                      templates)[questionlist[answer]]
            if tpl['pos'] == 'top':
                pywikibot.output(u'prepending %s...' % questionlist[answer])
                self.content = questionlist[answer] + '\n' + self.content
            elif tpl['pos'] == 'bottom':
                pywikibot.output(u'appending %s...' % questionlist[answer])
                self.content += '\n' + questionlist[answer]
            else:
                pywikibot.error(
                    u'"pos" should be "top" or "bottom" for template '
                    u'%s. Contact a developer.' % questionlist[answer])
                sys.exit("Exiting")
            summary += tpl['msg'] + ' '
            pywikibot.output(u'Probably added %s' % questionlist[answer])


#        pywikibot.output(newcontent) bug #2986247
        self.page.put(self.content, comment=summary)
        pywikibot.output(u'with comment %s\n' % summary)
Beispiel #51
0
    def standardizePageFooter(self, text):
        """
        Makes sure that interwiki links, categories and star templates are
        put to the correct position and into the right order. This combines the
        old instances standardizeInterwiki and standardizeCategories
        The page footer has the following section in that sequence:
        1. categories
        2. ## TODO: template beyond categories ##
        3. additional information depending on local site policy
        4. stars templates for featured and good articles
        5. interwiki links

        """
        starsList = [
            u'bueno',
            u'bom interwiki',
            u'cyswllt[ _]erthygl[ _]ddethol', u'dolen[ _]ed',
            u'destacado', u'destaca[tu]',
            u'enllaç[ _]ad',
            u'enllaz[ _]ad',
            u'leam[ _]vdc',
            u'legătură[ _]a[bcf]',
            u'liamm[ _]pub',
            u'lien[ _]adq',
            u'lien[ _]ba',
            u'liên[ _]kết[ _]bài[ _]chất[ _]lượng[ _]tốt',
            u'liên[ _]kết[ _]chọn[ _]lọc',
            u'ligam[ _]adq',
            u'ligoelstara',
            u'ligoleginda',
            u'link[ _][afgu]a', u'link[ _]adq', u'link[ _]f[lm]', u'link[ _]km',
            u'link[ _]sm', u'linkfa',
            u'na[ _]lotura',
            u'nasc[ _]ar',
            u'tengill[ _][úg]g',
            u'ua',
            u'yüm yg',
            u'רא',
            u'وصلة مقالة جيدة',
            u'وصلة مقالة مختارة',
        ]

        categories = None
        interwikiLinks = None
        allstars = []

        # The PyWikipediaBot is no longer allowed to touch categories on the
        # German Wikipedia. See
        # http://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/1#Position_der_Personendaten_am_.22Artikelende.22
        # ignoring nn-wiki of cause of the comment line above iw section
        if not self.template and not '{{Personendaten' in text and \
           not '{{SORTIERUNG' in text and not '{{DEFAULTSORT' in text and \
           not self.site.lang in ('et', 'it', 'bg', 'ru'):
            try:
                categories = pywikibot.getCategoryLinks(text, site=self.site)
            # there are categories like [[categoy:Foo {{#time:Y...}}]]
            except InvalidTitle:
                pass

        if not self.talkpage:  # and pywikibot.calledModuleName() <> 'interwiki':
            subpage = False
            if self.template:
                loc = None
                try:
                    tmpl, loc = moved_links[self.site.lang]
                    del tmpl
                except KeyError:
                    pass
                if loc is not None and loc in self.title:
                    subpage = True
            interwikiLinks = pywikibot.getLanguageLinks(
                text, insite=self.site, template_subpage=subpage)

            # Removing the interwiki
            text = pywikibot.removeLanguageLinks(text, site=self.site)
            # Removing the stars' issue
            starstext = pywikibot.removeDisabledParts(text)
            for star in starsList:
                regex = re.compile('(\{\{(?:template:|)%s\|.*?\}\}[\s]*)'
                                   % star, re.I)
                found = regex.findall(starstext)
                if found != []:
                    text = regex.sub('', text)
                    allstars += found

        # Adding categories
        if categories:
            ##Sorting categories in alphabetic order. beta test only on Persian Wikipedia, TODO fix bug for sorting
            #if self.site.language() == 'fa':
            #   categories.sort()
            ##Taking main cats to top
            #   for name in categories:
            #       if re.search(u"(.+?)\|(.{,1}?)",name.title()) or name.title()==name.title().split(":")[0]+title:
            #            categories.remove(name)
            #            categories.insert(0, name)
            text = pywikibot.replaceCategoryLinks(text, categories,
                                                  site=self.site)
        # Adding stars templates
        if allstars:
            text = text.strip() + self.site.family.interwiki_text_separator
            allstars.sort()
            for element in allstars:
                text += '%s\r\n' % element.strip()
                if pywikibot.verbose:
                    pywikibot.output(u'%s' % element.strip())

        # Adding the interwiki
        if interwikiLinks:
            text = pywikibot.replaceLanguageLinks(text, interwikiLinks,
                                                  site=self.site,
                                                  template=self.template,
                                                  template_subpage=subpage)
        return text
Beispiel #52
0
                                      ['y', 'n'], 'y') == 'y'

    if email:
        preferences.set_all(
            ['wpUserEmail', 'wpEmailFlag', 'wpOpenotifusertalkpages'],
            [email, True, False],
            verbose=True)

    if ssl:
        pop = poplib.POP3_SSL(host, port)
    else:
        pop = poplib.POP3(host, port)

    pop.user(username)
    pop.pass_(password)
    wikipedia.output(unicode(pop.getwelcome()))
    messages = [i.split(' ', 1)[0] for i in pop.list()[1]]
    for i in messages:
        msg = pop.retr(i)
        confirmed = False
        for line in msg[1]:
            if r_mail.search(line):
                confirmed = True
                link = r_mail.search(line).group(1)
                wikipedia.output(u'Confirming %s.' % link)
                confirm(link)

        if not confirmed:
            wikipedia.output(u'Unconfirmed mail!')
        elif do_delete:
            pop.dele(i)
Beispiel #53
0
    def run(self):
        site = pywikibot.getSite()
        newCat = catlib.Category(site, self.newCatTitle)
        # set edit summary message
        if not self.editSummary:
            self.editSummary = i18n.twtranslate(site, 'category-changing') \
                               % {'oldcat':self.oldCat.title(),
                                  'newcat':newCat.title()}

        if self.useSummaryForDeletion and self.editSummary:
            reason = self.editSummary
        else:
            reason = i18n.twtranslate(site, deletion_reason_move) \
                     % {'newcat': self.newCatTitle, 'title': self.newCatTitle}

        # Copy the category contents to the new category page
        copied = False
        oldMovedTalk = None
        if self.oldCat.exists() and self.moveCatPage:
            copied = self.oldCat.copyAndKeep(
                self.newCatTitle, pywikibot.translate(site, cfd_templates))
            # Also move the talk page
            if copied:
                oldTalk = self.oldCat.toggleTalkPage()
                if oldTalk.exists():
                    newTalkTitle = newCat.toggleTalkPage().title()
                    try:
                        talkMoved = oldTalk.move(newTalkTitle, reason)
                    except (pywikibot.NoPage, pywikibot.PageNotSaved), e:
                        #in order :
                        #Source talk does not exist, or
                        #Target talk already exists
                        pywikibot.output(e.message)
                    else:
                        if talkMoved:
                            oldMovedTalk = oldTalk

                if self.withHistory:
                    # Whether or not there was an old talk page, we write
                    # the page history to the new talk page
                    history = self.oldCat.getVersionHistoryTable()
                    # Set the section title for the old cat's history on the new
                    # cat's talk page.
                    sectionTitle = i18n.twtranslate(site,
                                                    'category-section-title') \
                                   % {'oldcat': self.oldCat.title()}
                    #Should be OK, we are within if self.oldCat.exists()
                    historySection = u'\n== %s ==\n%s' % (sectionTitle,
                                                          history)
                    try:
                        text = newCat.toggleTalkPage().get() + historySection
                    except pywikibot.NoPage:
                        text = historySection
                    try:
                        newCat.toggleTalkPage().put(
                            text,
                            i18n.twtranslate(site, 'category-version-history')
                            % {'oldcat': self.oldCat.title()})
                    except:
                        pywikibot.output(
                            'History of the category has not been saved to new talk page'
                        )
Beispiel #54
0
 def treat(self, page):
     try:
         # Show the title of the page we're working on.
         # Highlight the title in purple.
         pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
                          % page.title())
         ccToolkit = CosmeticChangesToolkit(page.site, debug=True,
                                            namespace=page.namespace(),
                                            pageTitle=page.title())
         changedText = ccToolkit.change(page.get())
         if changedText.strip() != page.get().strip():
             if not self.acceptall:
                 choice = pywikibot.inputChoice(
                     u'Do you want to accept these changes?',
                     ['Yes', 'No', 'All', 'Quit'], ['y', 'n', 'a', 'q'], 'n')
                 if choice == 'a':
                     self.acceptall = True
                 elif choice == 'q':
                     self.done = True
                     return
             if self.acceptall or choice == 'y':
                 if self.async:
                     page.put_async(changedText, comment=self.comment)
                 else:
                     page.put(changedText, comment=self.comment)
         else:
             pywikibot.output('No changes were necessary in %s'
                              % page.title())
     except pywikibot.NoPage:
         pywikibot.output("Page %s does not exist?!"
                          % page.title(asLink=True))
     except pywikibot.IsRedirectPage:
         pywikibot.output("Page %s is a redirect; skipping."
                          % page.title(asLink=True))
     except pywikibot.LockedPage:
         pywikibot.output("Page %s is locked?!" % page.title(asLink=True))
     except pywikibot.EditConflict:
         pywikibot.output("An edit conflict has occured at %s."
                          % page.title(asLink=True))
Beispiel #55
0
 def treat(self, page):
     text = self.load(page)
     if text is None:
         return
     cats = page.categories()
     # Show the title of the page we're working on.
     # Highlight the title in purple.
     pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" %
                      page.title())
     pywikibot.output(u"Current categories:")
     for cat in cats:
         pywikibot.output(u"* %s" % cat.title())
     catpl = pywikibot.Page(self.site,
                            self.newcatTitle,
                            defaultNamespace=14)
     if catpl in cats:
         pywikibot.output(u"%s is already in %s." %
                          (page.title(), catpl.title()))
     else:
         if self.sort:
             catpl = self.sorted_by_last_name(catpl, page)
         pywikibot.output(u'Adding %s' % catpl.title(asLink=True))
         cats.append(catpl)
         text = pywikibot.replaceCategoryLinks(text, cats)
         if not self.save(text, page, self.editSummary):
             pywikibot.output(u'Page %s not saved.' %
                              page.title(asLink=True))
Beispiel #56
0
    def move_to_category(self, article, original_cat, current_cat):
        '''
        Given an article which is in category original_cat, ask the user if
        it should be moved to one of original_cat's subcategories.
        Recursively run through subcategories' subcategories.
        NOTE: current_cat is only used for internal recursion. You should
        always use current_cat = original_cat.
        '''
        pywikibot.output(u'')
        # Show the title of the page where the link was found.
        # Highlight the title in purple.
        pywikibot.output(
            u'Treating page \03{lightpurple}%s\03{default}, currently in \03{lightpurple}%s\03{default}'
            % (article.title(), current_cat.title()))

        # Determine a reasonable amount of context to print
        try:
            full_text = article.get(get_redirect=True)
        except pywikibot.NoPage:
            pywikibot.output(u'Page %s not found.' % article.title())
            return
        try:
            contextLength = full_text.index('\n\n')
        except ValueError:  # substring not found
            contextLength = 500
        if full_text.startswith(u'[['):  # probably an image
            # Add extra paragraph.
            contextLength = full_text.find('\n\n', contextLength + 2)
        if contextLength > 1000 or contextLength < 0:
            contextLength = 500
        print
        pywikibot.output(full_text[:contextLength])
        print

        subcatlist = self.catDB.getSubcats(current_cat)
        supercatlist = self.catDB.getSupercats(current_cat)
        alternatives = u'\n'
        if len(subcatlist) == 0:
            alternatives += u'This category has no subcategories.\n\n'
        if len(supercatlist) == 0:
            alternatives += u'This category has no supercategories.\n\n'
        # show subcategories as possible choices (with numbers)
        for i in range(len(supercatlist)):
            # layout: we don't expect a cat to have more than 10 supercats
            alternatives += (u"u%d - Move up to %s\n" %
                             (i, supercatlist[i].title()))
        for i in range(len(subcatlist)):
            # layout: we don't expect a cat to have more than 100 subcats
            alternatives += (u"%2d - Move down to %s\n" %
                             (i, subcatlist[i].title()))
        alternatives += u" j - Jump to another category\n"
        alternatives += u" s - Skip this article\n"
        alternatives += u" r - Remove this category tag\n"
        alternatives += u" l - list these options again\n"
        alternatives += u" m - more context\n"
        alternatives += (u"Enter - Save category as %s\n" %
                         current_cat.title())
        flag = False
        longchoice = True
        while not flag:
            if longchoice:
                longchoice = False
                pywikibot.output(alternatives)
                choice = pywikibot.input(u"Option:")
            else:
                choice = pywikibot.input(
                    u"Option (#, [j]ump, [s]kip, [r]emove, [l]ist, [m]ore context, [RETURN]):"
                )
            if choice in ['s', 'S']:
                flag = True
            elif choice == '':
                pywikibot.output(u'Saving category as %s' %
                                 current_cat.title())
                if current_cat == original_cat:
                    print 'No changes necessary.'
                else:
                    newcat = u'[[:%s|%s]]' % (current_cat.title(
                        savetitle=True), current_cat.title(
                            withNamespace=False))
                    editsum = i18n.twtranslate(
                        pywikibot.getSite(), 'category-replacing', {
                            'oldcat': original_cat.title(withNamespace=False),
                            'newcat': newcat
                        })
                    if pywikibot.getSite().family.name == "commons":
                        if original_cat.title(withNamespace=False).startswith(
                                "Media needing categories as of"):
                            parts = original_cat.title().split()
                            catstring = u"{{Uncategorized|year=%s|month=%s|day=%s}}" % (
                                parts[-1], parts[-2], parts[-3])
                            if catstring in article.get():
                                article.put(article.get().replace(
                                    catstring, u"[[%s]]" %
                                    current_cat.title(savetitle=True)),
                                            comment=editsum)
                                flag = True
                    if not flag:
                        catlib.change_category(article,
                                               original_cat,
                                               current_cat,
                                               comment=editsum)
                flag = True
            elif choice in ['j', 'J']:
                newCatTitle = pywikibot.input(
                    u'Please enter the category the article should be moved to:'
                )
                newCat = catlib.Category(pywikibot.getSite(),
                                         'Category:' + newCatTitle)
                # recurse into chosen category
                self.move_to_category(article, original_cat, newCat)
                flag = True
            elif choice in ['r', 'R']:
                # remove the category tag
                catlib.change_category(article,
                                       original_cat,
                                       None,
                                       comment=self.editSummary)
                flag = True
            elif choice in ['l', 'L']:
                longchoice = True
            elif choice in ['m', 'M', '?']:
                contextLength += 500
                print
                pywikibot.output(full_text[:contextLength])
                print

                # if categories possibly weren't visible, show them additionally
                # (maybe this should always be shown?)
                if len(full_text) > contextLength:
                    print ''
                    print 'Original categories: '
                    for cat in article.categories():
                        pywikibot.output(u'* %s' % cat.title())
            elif choice[0] == 'u':
                try:
                    choice = int(choice[1:])
                except ValueError:
                    # user pressed an unknown command. Prompt him again.
                    continue
                self.move_to_category(article, original_cat,
                                      supercatlist[choice])
                flag = True
            else:
                try:
                    choice = int(choice)
                except ValueError:
                    # user pressed an unknown command. Prompt him again.
                    continue
                # recurse into subcategory
                self.move_to_category(article, original_cat,
                                      subcatlist[choice])
                flag = True
Beispiel #57
0
 def getPDFTitle(self, ref, f):
     """
     Use pdfinfo to retrieve title from a PDF.
     Unix-only, I'm afraid.
     """
     pywikibot.output(u'PDF file.')
     fd, infile = tempfile.mkstemp()
     urlobj = os.fdopen(fd, 'r+w')
     urlobj.write(f.read())
     try:
         pdfinfo_out = subprocess.Popen([r"pdfinfo", "/dev/stdin"],
                                        stdin=urlobj,
                                        stdout=subprocess.PIPE,
                                        stderr=subprocess.PIPE,
                                        shell=False).communicate()[0]
         for aline in pdfinfo_out.splitlines():
             if aline.lower().startswith('title'):
                 ref.title = aline.split(None)[1:]
                 ref.title = ' '.join(ref.title)
                 if ref.title != '':
                     pywikibot.output(u'title: %s' % ref.title)
         pywikibot.output(u'PDF done.')
     except ValueError:
         pywikibot.output(u'pdfinfo value error.')
     except OSError:
         pywikibot.output(u'pdfinfo OS error.')
     except:  # Ignore errors
         pywikibot.output(u'PDF processing error.')
         pass
     finally:
         urlobj.close()
         os.unlink(infile)
Beispiel #58
0
class CategoryMoveRobot:
    """Robot to move pages from one category to another."""
    def __init__(self,
                 oldCatTitle,
                 newCatTitle,
                 batchMode=False,
                 editSummary='',
                 inPlace=False,
                 moveCatPage=True,
                 deleteEmptySourceCat=True,
                 titleRegex=None,
                 useSummaryForDeletion=True,
                 withHistory=False):
        site = pywikibot.getSite()
        self.editSummary = editSummary
        self.oldCat = catlib.Category(site, oldCatTitle)
        self.newCatTitle = newCatTitle
        self.inPlace = inPlace
        self.moveCatPage = moveCatPage
        self.batchMode = batchMode
        self.deleteEmptySourceCat = deleteEmptySourceCat
        self.titleRegex = titleRegex
        self.useSummaryForDeletion = useSummaryForDeletion
        self.withHistory = withHistory

    def run(self):
        site = pywikibot.getSite()
        newCat = catlib.Category(site, self.newCatTitle)
        # set edit summary message
        if not self.editSummary:
            self.editSummary = i18n.twtranslate(site, 'category-changing') \
                               % {'oldcat':self.oldCat.title(),
                                  'newcat':newCat.title()}

        if self.useSummaryForDeletion and self.editSummary:
            reason = self.editSummary
        else:
            reason = i18n.twtranslate(site, deletion_reason_move) \
                     % {'newcat': self.newCatTitle, 'title': self.newCatTitle}

        # Copy the category contents to the new category page
        copied = False
        oldMovedTalk = None
        if self.oldCat.exists() and self.moveCatPage:
            copied = self.oldCat.copyAndKeep(
                self.newCatTitle, pywikibot.translate(site, cfd_templates))
            # Also move the talk page
            if copied:
                oldTalk = self.oldCat.toggleTalkPage()
                if oldTalk.exists():
                    newTalkTitle = newCat.toggleTalkPage().title()
                    try:
                        talkMoved = oldTalk.move(newTalkTitle, reason)
                    except (pywikibot.NoPage, pywikibot.PageNotSaved), e:
                        #in order :
                        #Source talk does not exist, or
                        #Target talk already exists
                        pywikibot.output(e.message)
                    else:
                        if talkMoved:
                            oldMovedTalk = oldTalk

                if self.withHistory:
                    # Whether or not there was an old talk page, we write
                    # the page history to the new talk page
                    history = self.oldCat.getVersionHistoryTable()
                    # Set the section title for the old cat's history on the new
                    # cat's talk page.
                    sectionTitle = i18n.twtranslate(site,
                                                    'category-section-title') \
                                   % {'oldcat': self.oldCat.title()}
                    #Should be OK, we are within if self.oldCat.exists()
                    historySection = u'\n== %s ==\n%s' % (sectionTitle,
                                                          history)
                    try:
                        text = newCat.toggleTalkPage().get() + historySection
                    except pywikibot.NoPage:
                        text = historySection
                    try:
                        newCat.toggleTalkPage().put(
                            text,
                            i18n.twtranslate(site, 'category-version-history')
                            % {'oldcat': self.oldCat.title()})
                    except:
                        pywikibot.output(
                            'History of the category has not been saved to new talk page'
                        )
                        #TODO: some nicer exception handling (not too important)
                        #      first move the page, than tagg the vh

        # Move articles
        gen = pagegenerators.CategorizedPageGenerator(self.oldCat,
                                                      recurse=False)
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        for article in preloadingGen:
            if not self.titleRegex or re.search(self.titleRegex,
                                                article.title()):
                catlib.change_category(article,
                                       self.oldCat,
                                       newCat,
                                       comment=self.editSummary,
                                       inPlace=self.inPlace)

        # Move subcategories
        gen = pagegenerators.SubCategoriesPageGenerator(self.oldCat,
                                                        recurse=False)
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        for subcategory in preloadingGen:
            if not self.titleRegex or re.search(self.titleRegex,
                                                subcategory.title()):
                catlib.change_category(subcategory,
                                       self.oldCat,
                                       newCat,
                                       comment=self.editSummary,
                                       inPlace=self.inPlace)

        # Delete the old category and its moved talk page
        if copied and self.deleteEmptySourceCat == True:
            if self.oldCat.isEmptyCategory():
                confirm = not self.batchMode
                self.oldCat.delete(reason, confirm, mark=True)
                if oldMovedTalk is not None:
                    oldMovedTalk.delete(reason, confirm, mark=True)
            else:
                pywikibot.output('Couldn\'t delete %s - not empty.' %
                                 self.oldCat.title())
Beispiel #59
0
 def httpError(self, err_num, link, pagetitleaslink):
     """Log HTTP Error"""
     pywikibot.output(u'HTTP error (%s) for %s on %s' %
                      (err_num, link, pagetitleaslink),
                      toStdout=True)
Beispiel #60
0
    def run(self):
        """
        Runs the Bot
        """
        pywikibot.setAction(pywikibot.translate(self.site, msg))
        try:
            deadLinks = codecs.open(listof404pages, 'r', 'latin_1').read()
        except IOError:
            pywikibot.output(
                'You need to download http://www.twoevils.org/files/wikipedia/404-links.txt.gz and to ungzip it in the same directory'
            )
            raise
        socket.setdefaulttimeout(30)
        editedpages = 0
        for page in self.generator:
            try:
                # Load the page's text from the wiki
                new_text = page.get()
                if not page.canBeEdited():
                    pywikibot.output(u"You can't edit page %s" %
                                     page.title(asLink=True))
                    continue
            except pywikibot.NoPage:
                pywikibot.output(u'Page %s not found' %
                                 page.title(asLink=True))
                continue
            except pywikibot.IsRedirectPage:
                pywikibot.output(u'Page %s is a redirect' %
                                 page.title(asLink=True))
                continue

            for match in linksInRef.finditer(
                    pywikibot.removeDisabledParts(page.get())):
                #for each link to change
                link = match.group(u'url')
                #debugging purpose
                #print link
                if u'jstor.org' in link:
                    #TODO: Clean URL blacklist
                    continue

                ref = RefLink(link, match.group('name'))
                f = None
                try:
                    socket.setdefaulttimeout(20)
                    try:
                        f = urllib2.urlopen(ref.url.decode("utf8"))
                    except UnicodeError:
                        ref.url = urllib2.quote(ref.url.encode("utf8"), "://")
                        f = urllib2.urlopen(ref.url)
                    #Try to get Content-Type from server
                    headers = f.info()
                    contentType = headers.getheader('Content-Type')
                    if contentType and not self.MIME.search(contentType):
                        if ref.link.lower().endswith('.pdf') and \
                           not self.ignorepdf:
                            # If file has a PDF suffix
                            self.getPDFTitle(ref, f)
                        else:
                            pywikibot.output(
                                u'\03{lightyellow}WARNING\03{default} : media : %s '
                                % ref.link)
                        if ref.title:
                            if not re.match(
                                    '(?i) *microsoft (word|excel|visio)',
                                    ref.title):
                                ref.transform(ispdf=True)
                                repl = ref.refTitle()
                            else:
                                pywikibot.output(
                                    '\03{lightyellow}WARNING\03{default} : PDF title blacklisted : %s '
                                    % ref.title)
                                repl = ref.refLink()
                        else:
                            repl = ref.refLink()
                        new_text = new_text.replace(match.group(), repl)
                        continue
                    # Get the real url where we end (http redirects !)
                    redir = f.geturl()
                    if redir != ref.link and \
                       domain.findall(redir) == domain.findall(link):
                        if soft404.search(redir) and \
                           not soft404.search(ref.link):
                            pywikibot.output(
                                u'\03{lightyellow}WARNING\03{default} : Redirect 404 : %s '
                                % ref.link)
                            continue
                        if dirIndex.match(redir) and \
                           not dirIndex.match(ref.link):
                            pywikibot.output(
                                u'\03{lightyellow}WARNING\03{default} : Redirect to root : %s '
                                % ref.link)
                            continue

                    # uncompress if necessary
                    if headers.get('Content-Encoding') in ('gzip', 'x-gzip'):
                        # XXX: small issue here: the whole page is downloaded
                        # through f.read(). It might fetch big files/pages.
                        # However, truncating an encoded gzipped stream is not
                        # an option, for unzipping will fail.
                        compressed = StringIO.StringIO(f.read())
                        f = gzip.GzipFile(fileobj=compressed)

                    # Read the first 1,000,000 bytes (0.95 MB)
                    linkedpagetext = f.read(1000000)
                    socket.setdefaulttimeout(None)

                except UnicodeError:
                    #example : http://www.adminet.com/jo/20010615¦/ECOC0100037D.html
                    # in [[fr:Cyanure]]
                    pywikibot.output(
                        u'\03{lightred}Bad link\03{default} : %s in %s' %
                        (ref.url, page.title(asLink=True)))
                    continue
                except urllib2.HTTPError, e:
                    pywikibot.output(
                        u'HTTP error (%s) for %s on %s' %
                        (e.code, ref.url, page.title(asLink=True)),
                        toStdout=True)
                    # 410 Gone, indicates that the resource has been purposely
                    # removed
                    if e.code == 410 or \
                       (e.code == 404 and (u'\t%s\t' % ref.url in deadLinks)):
                        repl = ref.refDead()
                        new_text = new_text.replace(match.group(), repl)
                    continue
                except (urllib2.URLError, socket.error, IOError,
                        httplib.error), e:
                    #except (urllib2.URLError, socket.timeout, ftplib.error, httplib.error, socket.error), e:
                    pywikibot.output(u'Can\'t retrieve page %s : %s' %
                                     (ref.url, e))
                    continue
                except ValueError:
                    #Known bug of httplib, google for :
                    #"httplib raises ValueError reading chunked content"
                    continue