Example #1
0
def main(args):
    """ Grab a bunch of images and tag them if they are not categorized. """
    generator = None
    genFactory = pagegenerators.GeneratorFactory()

    site = pywikibot.getSite(u'commons', u'commons')
    pywikibot.setSite(site)
    for arg in pywikibot.handleArgs():
        if arg.startswith('-yesterday'):
            generator = uploadedYesterday(site)
        elif arg.startswith('-recentchanges'):
            generator = recentChanges(site=site, delay=120)
        else:
            genFactory.handleArg(arg)
    if not generator:
        generator = genFactory.getCombinedGenerator()
    if not generator:
        pywikibot.output(u'You have to specify the generator you want to use '
                         u'for the program!')
    else:
        pregenerator = pagegenerators.PreloadingGenerator(generator)
        for page in pregenerator:
            if page.exists() and (page.namespace() == 6) \
               and (not page.isRedirectPage()):
                if isUncat(page):
                    addUncat(page)
Example #2
0
def main():
    global mysite, linktrail, page
    start = []
    for arg in pywikibot.handleArgs():
        start.append(arg)
    if start:
        start = " ".join(start)
    else:
        start = "!"
    mysite = pywikibot.getSite()
    linktrail = mysite.linktrail()
    try:
        generator = pagegenerators.CategorizedPageGenerator(
            mysite.disambcategory(), start=start)
    except pywikibot.NoPage:
        pywikibot.output(
            "The bot does not know the disambiguation category for your wiki.")
        raise
    # only work on articles
    generator = pagegenerators.NamespaceFilterPageGenerator(generator, [0])
    generator = pagegenerators.PreloadingGenerator(generator)
    pagestodo = []
    pagestoload = []
    for page in generator:
        if page.isRedirectPage():
            continue
        linked = page.linkedPages()
        pagestodo.append((page, linked))
        pagestoload += linked
        if len(pagestoload) > 49:
            pagestoload = pagegenerators.PreloadingGenerator(pagestoload)
            for page, links in pagestodo:
                workon(page, links)
            pagestoload = []
            pagestodo = []
def load_word_function(raw):
    """Load the badword list and the whitelist."""
    page = re.compile(r"(?:\"|\')(.*?)(?:\"|\')(?:, |\))", re.UNICODE)
    list_loaded = page.findall(raw)
    if len(list_loaded) == 0:
        pywikibot.output(u'There was no input on the real-time page.')
    return list_loaded
Example #4
0
def getPhotos(photoset=u'', start_id='', end_id='', interval=100):
    """Loop over a set of Panoramio photos."""
    i = 0
    has_more = True
    url = ('http://www.panoramio.com/map/get_panoramas.php?'
           'set=%s&from=%s&to=%s&size=original')
    while has_more:
        gotInfo = False
        maxtries = 10
        tries = 0
        while not gotInfo:
            try:
                if tries < maxtries:
                    tries += 1
                    panoramioApiPage = urlopen(url % (photoset, i, i + interval))
                    contents = panoramioApiPage.read().decode('utf-8')
                    gotInfo = True
                    i += interval
                else:
                    break
            except IOError:
                pywikibot.output(u'Got an IOError, let\'s try again')
            except socket.timeout:
                pywikibot.output(u'Got a timeout, let\'s try again')

        metadata = json.loads(contents)
        photos = metadata.get(u'photos')
        for photo in photos:
            yield photo
        has_more = metadata.get(u'has_more')
    return
Example #5
0
def isUncat(page):
    """
    Do we want to skip this page.

    If we found a category which is not in the ignore list it means
    that the page is categorized so skip the page.
    If we found a template which is in the ignore list, skip the page.
    """
    pywikibot.output(u'Working on ' + page.title())

    for category in page.categories():
        if category not in ignoreCategories:
            pywikibot.output(u'Got category ' + category.title())
            return False

    for templateWithTrail in page.templates():
        # Strip of trailing garbage
        template = templateWithTrail.title().rstrip('\n').rstrip()
        if template in skipTemplates:
            # Already tagged with a template, skip it
            pywikibot.output(u'Already tagged, skip it')
            return False
        elif template in ignoreTemplates:
            # template not relevant for categorization
            pywikibot.output(u'Ignore ' + template)
        else:
            pywikibot.output(u'Not ignoring ' + template)
            return False
    return True
Example #6
0
    def convertAllHTMLTables(self, text):
        """
        Convert all HTML tables in text to wiki syntax.

        Returns the converted text, the number of converted tables and the
        number of warnings that occured.
        """
        text = self.markActiveTables(text)

        convertedTables = 0
        warningSum = 0
        warningMessages = u''

        while True:
            table, start, end = self.findTable(text)
            if not table:
                # no more HTML tables left
                break

            # convert the current table
            newTable, warningsThisTable, warnMsgsThisTable = self.convertTable(
                table)
            warningSum += warningsThisTable
            for msg in warnMsgsThisTable:
                warningMessages += 'In table %i: %s' % (convertedTables + 1,
                                                        msg)
            text = text[:start] + newTable + text[end:]
            convertedTables += 1

        pywikibot.output(warningMessages)
        return text, convertedTables, warningSum
Example #7
0
    def _do_insert(self, valuesdict):
        sqlreq = u"insert into `%(DB)s`.`%(table)s` ("%self.infos
        for i in valuesdict:
            sqlreq += u"`%s`,"%self.connect.escape_string(i)
        sqlreq = sqlreq.strip(',')
        sqlreq += u") values ("
        for i in valuesdict:
            valuesdict[i]=valuesdict[i].replace("'","\\'")
            sqlreq += u"'%s',"%valuesdict[i]
        sqlreq = sqlreq.strip(',')
        sqlreq += u")"
        try:
            self.cursor.execute(sqlreq)
        except UnicodeError:
            sqlreq = sqlreq.encode('utf8')
            self.cursor.execute(sqlreq)
        except Exception as e:
            if verbose: wikipedia.output(sqlreq)
            raise e

        self.querycount+=1
        if not self.querycount%1000:
            qcstr = str(self.querycount)
            qcstr = qcstr + chr(8)*(len(qcstr)+1)
            if verbose: print qcstr,
Example #8
0
    def withImage(self, institutionItem, invId=217, imageId=18, cacheMaxAge=0):
        '''
        Query Wikidata to fill the cache of monuments we already have an object for
        '''
        result = {}
        collectionId = institutionItem.title().replace(u'Q', u'')
        query = u'CLAIM[195:%s] AND CLAIM[%s] AND CLAIM[%s]'% (collectionId, invId, imageId)

        wd_queryset = wdquery.QuerySet(query)

        wd_query = wdquery.WikidataQuery(cacheMaxAge=cacheMaxAge)
        data = wd_query.query(wd_queryset, props=[str(imageId),])

        if data.get('status').get('error')=='OK':
            expectedItems = data.get('status').get('items')
            props = data.get('props').get(str(imageId))
            for prop in props:
                # FIXME: This will overwrite id's that are used more than once.
                # Use with care and clean up your dataset first
                result[prop[2]] = prop[0]

            if expectedItems==len(result):
                pywikibot.output('I now have %s items with an image in cache' % expectedItems)
            else:
                pywikibot.output('I now have %s items with an image in cache, but I expected %s' % (len(result), expectedItems))

        return result
    def test_archivebot(self, code=None):
        """Test archivebot for one site."""
        site = self.get_site(code)
        if code != 'de':  # bug T69663
            page = pywikibot.Page(site, 'user talk:xqt')
        else:
            page = pywikibot.Page(site, 'user talk:ladsgroup')
        talk = archivebot.DiscussionPage(page, None)
        self.assertIsInstance(talk.archives, dict)
        self.assertIsInstance(talk.archived_threads, int)
        self.assertTrue(talk.archiver is None)
        self.assertIsInstance(talk.header, basestring)
        self.assertIsInstance(talk.timestripper, TimeStripper)

        self.assertIsInstance(talk.threads, list)
        self.assertGreaterEqual(
            len(talk.threads), THREADS[code],
            u'{0:d} Threads found on {1!s},\n{2:d} or more expected'.format(len(talk.threads), talk, THREADS[code]))

        for thread in talk.threads:
            self.assertIsInstance(thread, archivebot.DiscussionThread)
            self.assertIsInstance(thread.title, basestring)
            self.assertIsInstance(thread.now, datetime)
            self.assertEqual(thread.now, talk.now)
            self.assertIsInstance(thread.ts, TimeStripper)
            self.assertEqual(thread.ts, talk.timestripper)
            self.assertIsInstance(thread.code, basestring)
            self.assertEqual(thread.code, talk.timestripper.site.code)
            self.assertIsInstance(thread.content, basestring)
            try:
                self.assertIsInstance(thread.timestamp, datetime)
            except AssertionError:
                if thread.code not in self.expected_failures:
                    pywikibot.output('code {0!s}: {1!s}'.format(thread.code, thread.content))
                raise
Example #10
0
 def load_page(self):
     """Load the page to be archived and break it up into threads."""
     self.header = ''
     self.threads = []
     self.archives = {}
     self.archived_threads = 0
     lines = self.get().split('\n')
     found = False  # Reading header
     cur_thread = None
     for line in lines:
         thread_header = re.search('^== *([^=].*?) *== *$', line)
         if thread_header:
             found = True  # Reading threads now
             if cur_thread:
                 self.threads.append(cur_thread)
             cur_thread = DiscussionThread(thread_header.group(1), self.now,
                                           self.timestripper)
         else:
             if found:
                 cur_thread.feed_line(line)
             else:
                 self.header += line + '\n'
     if cur_thread:
         self.threads.append(cur_thread)
     # This extra info is not desirable when run under the unittest
     # framework, which may be run either directly or via setup.py
     if pywikibot.calledModuleName() not in ['archivebot_tests', 'setup']:
         pywikibot.output(u'%d Threads found on %s'
                          % (len(self.threads), self))
Example #11
0
    def run(self):
        """
        Starts the robot.
        """

        for imagePage in self.generator:
            pywikibot.output(u'Working on %s' % (imagePage.title(),))
            if imagePage.title(withNamespace=False) in self.withImage:
                pywikibot.output(u'Image is already in use in item %s' % (self.withImage.get(imagePage.title(withNamespace=False),)))
                continue

            text = imagePage.get()
            regex = '\s*\|\s*accession number\s*=\s*([^\s]+)\s*'
            match = re.search(regex, text)
            if match:
                paintingId = match.group(1).strip()
                pywikibot.output(u'Found ID %s on the image' % (paintingId,))

                if paintingId in self.withoutImage:
                    pywikibot.output(u'Found an item to add it to!')

                    paintingItemTitle = u'Q%s' % (self.withoutImage.get(paintingId),)
                    paintingItem = pywikibot.ItemPage(self.repo, title=paintingItemTitle)
                    paintingItem.get()

                    if u'P18' not in paintingItem.claims:
                        newclaim = pywikibot.Claim(self.repo, u'P18')
                        newclaim.setTarget(imagePage)
                        pywikibot.output('Adding image claim to %s' % paintingItem)
                        summary = u'Adding image based on %s' % (paintingId,)
                        paintingItem.addClaim(newclaim, summary=summary)
Example #12
0
 def run(self):
     """Start the bot."""
     template_image = i18n.translate(self.site,
                                     template_to_the_image)
     template_user = i18n.translate(self.site,
                                    template_to_the_user)
     summary = i18n.translate(self.site, comment, fallback=True)
     if not all([template_image, template_user, comment]):
         raise pywikibot.Error(u'This script is not localized for %s site.'
                               % self.site)
     self.summary = summary
     generator = pagegenerators.UnusedFilesGenerator(site=self.site)
     generator = pagegenerators.PreloadingGenerator(generator)
     for image in generator:
         if not image.exists():
             pywikibot.output(u"File '%s' does not exist (see bug 69133)."
                              % image.title())
             continue
         # Use fileUrl() and fileIsShared() to confirm it is local media
         # rather than a local page with the same name as shared media.
         if (image.fileUrl() and not image.fileIsShared() and
                 u'http://' not in image.text):
             if template_image in image.text:
                 pywikibot.output(u"%s done already"
                                  % image.title(asLink=True))
                 continue
             self.append_text(image, u"\n\n" + template_image)
             uploader = image.getFileVersionHistory().pop(0)['user']
             user = pywikibot.User(image.site, uploader)
             usertalkpage = user.getUserTalkPage()
             msg2uploader = template_user % {'title': image.title()}
             self.append_text(usertalkpage, msg2uploader)
Example #13
0
    def addReleased(self, item, imdbid):
        '''
        Add the first airdate to the item based on the imdbid
        '''
        pywikibot.output(u'Trying to add date to %s based on %s' % (item, imdbid))
        data = item.get()
        claims = data.get('claims')
        if u'P1191' in claims:
            return True
        if imdbid not in self.imdbcache:
            return False
        releasedate = self.imdbcache[imdbid].get('released')
        regex = u'^(\d\d\d\d)-(\d\d)-(\d\d)$'
        match = re.match(regex, releasedate)
        if not match:
            return False

        newdate = pywikibot.WbTime(year=int(match.group(1)),
                                    month=int(match.group(2)),
                                    day=int(match.group(3)),)

        newclaim = pywikibot.Claim(self.repo, u'P1191')
        newclaim.setTarget(newdate)
        pywikibot.output('Adding release date claim %s to %s' % (releasedate, item))
        item.addClaim(newclaim)
        refurl = pywikibot.Claim(self.repo, u'P854')
        refurl.setTarget(u'http://www.omdbapi.com/?i=%s' % (imdbid,))
        refdate = pywikibot.Claim(self.repo, u'P813')
        today = datetime.datetime.today()
        date = pywikibot.WbTime(year=today.year, month=today.month, day=today.day)
        refdate.setTarget(date)
        newclaim.addSources([refurl, refdate])
Example #14
0
    def login(self, retry=False):
        if not self.password:
            # As we don't want the password to appear on the screen, we set
            # password = True
            self.password = pywikibot.input(
                u'Password for user %(name)s on %(site)s (no characters will '
                u'be shown):' % {'name': self.username, 'site': self.site},
                password=True)
#        self.password = self.password.encode(self.site.encoding())

        pywikibot.output(u"Logging in to %(site)s as %(name)s"
                         % {'name': self.username, 'site': self.site})
        try:
            cookiedata = self.getCookie()
        except pywikibot.data.api.APIError as e:
            pywikibot.error(u"Login failed (%s)." % e.code)
            if retry:
                self.password = None
                return self.login(retry=True)
            else:
                return False
        self.storecookiedata(cookiedata)
        pywikibot.log(u"Should be logged in now")
##        # Show a warning according to the local bot policy
##   FIXME: disabled due to recursion; need to move this to the Site object after
##   login
##        if not self.botAllowed():
##            logger.error(
##                u"Username '%(name)s' is not listed on [[%(page)s]]."
##                 % {'name': self.username,
##                    'page': botList[self.site.family.name][self.site.code]})
##            logger.error(
##"Please make sure you are allowed to use the robot before actually using it!")
##            return False
        return True
Example #15
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    all = False
    new = False
    sysop = False
    for arg in pywikibot.handle_args(args):
        if arg in ('-all', '-update'):
            all = True
        elif arg == '-new':
            new = True
        elif arg == '-sysop':
            sysop = True
    if all:
        refresh_all(sysop=sysop)
    elif new:
        refresh_new(sysop=sysop)
    else:
        site = pywikibot.Site()
        watchlist = refresh(site, sysop=sysop)
        pywikibot.output(u'{0:d} pages in the watchlist.'.format(len(watchlist)))
        for page in watchlist:
            try:
                pywikibot.stdout(page.title())
            except pywikibot.InvalidTitle:
                pywikibot.exception()
Example #16
0
def main(*args):

    # Process global args and prepare generator args parser
    local_args = pywikibot.handle_args(args)
    googlecat = False
    collectionid = False
    for arg in local_args:
        if arg.startswith('-googlecat'):
            if len(arg) == 10:
                googlecat = pywikibot.input(
                    u'Please enter the category you want to work on:')
            else:
                googlecat = arg[11:]
        elif arg.startswith('-collectionid'):
            if len(arg) == 13:
                collectionid = pywikibot.input(
                    u'Please enter the collectionid you want to work on:')
            else:
                collectionid = arg[14:]
        #else:
        #    generator_factory.handleArg(arg)

    if googlecat and collectionid:
        imageFindBot = ImageFindBot(googlecat, collectionid)
        imageFindBot.run()
    else:
        pywikibot.output(u'Usage: pwb.py add_google_images.py -googlecat:<category name> -collectionid:Q<123>')
Example #17
0
    def fillCache(self, collectionqid, idProperty, queryoverride=u'', cacheMaxAge=0):
        '''
        Query Wikidata to fill the cache of items we already have an object for
        '''
        result = {}
        if queryoverride:
            query = queryoverride
        else:
            query = u'CLAIM[195:%s] AND CLAIM[%s]' % (collectionqid.replace(u'Q', u''),
                                                      idProperty,)

        wd_queryset = wdquery.QuerySet(query)

        wd_query = wdquery.WikidataQuery(cacheMaxAge=cacheMaxAge)
        data = wd_query.query(wd_queryset, props=[str(idProperty),])

        if data.get('status').get('error')=='OK':
            expectedItems = data.get('status').get('items')
            props = data.get('props').get(str(idProperty))
            for prop in props:
                # FIXME: This will overwrite id's that are used more than once.
                # Use with care and clean up your dataset first
                result[prop[2]] = prop[0]

            if expectedItems==len(result):
                pywikibot.output('I now have %s items in cache' % expectedItems)
            else:
                pywikibot.output('I expected %s items, but I have %s items in cache' % (expectedItems, len(result),))

        return result
Example #18
0
def subcatquery(enlink, firstsite):
    if _cache.get(tuple([enlink, firstsite, "subcat_query"])):
        return _cache[tuple([enlink, firstsite, "subcat_query"])]
    cats = []
    try:
        enlink = (
            unicode(str(enlink), "UTF-8")
            .replace(u"[[", u"")
            .replace(u"]]", u"")
            .replace(u"en:", u"")
            .replace(u"fa:", u"")
        )
    except:
        enlink = enlink.replace(u"[[", u"").replace(u"]]", u"").replace(u"en:", u"").replace(u"fa:", u"")
    enlink = enlink.split(u"#")[0].strip()
    if enlink == u"":
        _cache[tuple([enlink, firstsite, "subcat_query"])] = False
        return False
    enlink = enlink.replace(u" ", u"_")
    site = pywikibot.Site(firstsite)
    params = {"action": "query", "list": "categorymembers", "cmtitle": enlink, "cmtype": "subcat", "cmlimit": 500}
    try:
        categoryname = pywikibot.data.api.Request(site=site, **params).submit()
        for item in categoryname[u"query"][u"categorymembers"]:
            categoryha = item[u"title"]
            pywikibot.output(categoryha)
            cats.append(categoryha)
        if cats != []:
            _cache[tuple([enlink, firstsite, "subcat_query"])] = cats
            return cats
    except:
        _cache[tuple([enlink, firstsite, "subcat_query"])] = False
        return False
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    parent = None
    basename = None
    options = {}

    # Process global args and prepare generator args parser
    local_args = pywikibot.handle_args(args)
    genFactory = pagegenerators.GeneratorFactory()

    for arg in local_args:
        if arg == '-always':
            options['always'] = True
        elif arg.startswith('-parent:'):
            parent = arg[len('-parent:'):].strip()
        elif arg.startswith('-basename'):
            basename = arg[len('-basename:'):].strip()
        else:
            genFactory.handleArg(arg)

    generator = genFactory.getCombinedGenerator()
    if generator and parent and basename:
        bot = CreateCategoriesBot(generator, parent, basename, **options)
        bot.run()
        pywikibot.output(u'All done')
    else:
        pywikibot.output(u'No pages to work on')
        pywikibot.showHelp()
Example #20
0
    def showImageList(self, imagelist):
        """Print image list."""
        for i in range(len(imagelist)):
            image = imagelist[i]
            print("-" * 60)
            pywikibot.output(u"%s. Found image: %s"
                             % (i, image.title(asLink=True)))
            try:
                # Show the image description page's contents
                pywikibot.output(image.get())
                # look if page already exists with this name.
                # TODO: consider removing this: a different image of the same
                # name may exist on the target wiki, and the bot user may want
                # to upload anyway, using another name.
                try:
                    # Maybe the image is on the target site already
                    targetTitle = '%s:%s' % (self.targetSite.namespaces.FILE,
                                             image.title().split(':', 1)[1])
                    targetImage = pywikibot.Page(self.targetSite, targetTitle)
                    targetImage.get()
                    pywikibot.output(u"Image with this name is already on %s."
                                     % self.targetSite)
                    print("-" * 60)
                    pywikibot.output(targetImage.get())
                    sys.exit()
                except pywikibot.NoPage:
                    # That's the normal case
                    pass
                except pywikibot.IsRedirectPage:
                    pywikibot.output(
                        u"Description page on target wiki is redirect?!")

            except pywikibot.NoPage:
                break
        print("=" * 60)
Example #21
0
def pre(taskid=-1, lock=None, sites=[], continuous=False, main=None):
    """
    Return argument list, site object, and configuration of the script.
    This function also handles default arguments, generates lockfile
    and halt the script if lockfile exists before.
    """
    import imp

    global info
    info["main"] = main == "__main__"
    if continuous:
        lock = False
    pywikibot.handleArgs("-log")
    pywikibot.output("start task #%s at %s" % (taskid, getTime()))
    info["taskid"] = taskid
    info["lock"] = lock
    info["lockfile"] = simplifypath([os.environ["WPROBOT_DIR"], "tmp", info["basescript"] + ".lock"])
    info["continuous"] = continuous
    if os.path.exists(info["lockfile"]) and lock:
        error("lockfile found. unable to execute the script.")
        if info["main"]:
            pywikibot.stopme()
        sys.exit(ExitCode.LockFileError)

    open(info["lockfile"], "w").close()

    args = pywikibot.handleArgs()  # must be called before Site()
    site = pywikibot.Site()
    info["site"] = site

    confpath = simplifypath([os.environ["WPROBOT_DIR"], "conf", info["basescript"]])

    module = imp.load_source("conf", confpath) if os.path.exists(confpath) else None
    return args, site, module
 def try_to_add(self):
     """Add current page in repo."""
     wd_data = set()
     for iw_page in self.iwlangs.values():
         try:
             wd_data.add(pywikibot.ItemPage.fromPage(iw_page))
         except pywikibot.NoPage:
             warning('Interwiki %s does not exist, skipping...' %
                     iw_page.title(asLink=True))
             continue
         except pywikibot.InvalidTitle:
             warning('Invalid title %s, skipping...' %
                     iw_page.title(asLink=True))
             continue
     if len(wd_data) != 1:
         warning('Interwiki conflict in %s, skipping...' %
                 self.current_page.title(asLink=True))
         return False
     item = list(wd_data).pop()
     if self.current_page.site.dbName() in item.sitelinks:
         warning('Interwiki conflict in %s, skipping...' %
                 item.title(asLink=True))
         return False
     output('Adding link to %s' % item.title())
     item.setSitelink(self.current_page)
     return item
Example #23
0
    def findCommonscatLink(self, page=None):
        """Find CommonsCat template on interwiki pages.

        In Pywikibot 2.0, page.interwiki() now returns Link objects,
        not Page objects

        @rtype: unicode, name of a valid commons category
        """
        for ipageLink in page.langlinks():
            ipage = pywikibot.page.Page(ipageLink)
            pywikibot.log("Looking for template on %s" % (ipage.title()))
            try:
                if (not ipage.exists() or ipage.isRedirectPage() or
                        ipage.isDisambig()):
                    continue
                commonscatLink = self.getCommonscatLink(ipage)
                if not commonscatLink:
                    continue
                (currentTemplate,
                 possibleCommonscat, linkText, Note) = commonscatLink
                checkedCommonscat = self.checkCommonscatLink(possibleCommonscat)
                if (checkedCommonscat != u''):
                    pywikibot.output(
                        u"Found link for %s at [[%s:%s]] to %s."
                        % (page.title(), ipage.site.code,
                           ipage.title(), checkedCommonscat))
                    return checkedCommonscat
            except pywikibot.BadTitle:
                # The interwiki was incorrect
                return u''
        return u''
    def createGraph(self):
        """
        Create graph of the interwiki links.

        For more info see U{http://meta.wikimedia.org/wiki/Interwiki_graphs}
        """
        pywikibot.output(u'Preparing graph for %s'
                         % self.subject.originPage.title())
        # create empty graph
        self.graph = pydot.Dot()
        # self.graph.set('concentrate', 'true')

        self.octagon_sites = self._octagon_site_set()

        for page in self.subject.foundIn.keys():
            # a node for each found page
            self.addNode(page)
        # mark start node by pointing there from a black dot.
        firstLabel = self.getLabel(self.subject.originPage)
        self.graph.add_node(pydot.Node('start', shape='point'))
        self.graph.add_edge(pydot.Edge('start', firstLabel))
        for page, referrers in self.subject.foundIn.items():
            for refPage in referrers:
                self.addDirectedEdge(page, refPage)
        self.saveGraphFile()
Example #25
0
def fillCaches(collectionqid):
    '''
    Build an ID cache so we can quickly look up the id's for property.
    Only return items in this ID cache for which we don't already have the Art UK artwork ID (P1679) link

    Build a second art uk -> Qid cache for items we don't have to process
    '''
    invcache = {}
    artukcache = {}
    sq = pywikibot.data.sparql.SparqlQuery()

    # FIXME: Do something with the collection qualifier
    query = u'SELECT ?item ?inv ?artukid WHERE { ?item wdt:P195 wd:%s . ?item wdt:P217 ?inv . OPTIONAL { ?item wdt:P1679 ?artukid } } ' % (collectionqid,)
    sq = pywikibot.data.sparql.SparqlQuery()
    queryresult = sq.select(query)

    for resultitem in queryresult:
        qid = resultitem.get('item').replace(u'http://www.wikidata.org/entity/', u'')
        if resultitem.get('artukid'):
            artukcache[resultitem.get('artukid')] = qid
        else:
            invcache[resultitem.get('inv')] = qid
    pywikibot.output(u'The query "%s" returned %s with and %s items without an ART UK work link' % (query,
                                                                                                    len(artukcache),
                                                                                                    len(invcache)))
    return (invcache,artukcache)
    def addQualifier(self, item, claim, qual):
        """
        Check if a qualifier is present at the given claim,
        otherwise add it

        Known issue: This will qualify an already referenced claim
            this must therefore be tested before

        param item: itemPage to check
        param claim: Claim to check
        param qual: Qualifier to check
        """
        # check if already present
        if self.hasQualifier(qual, claim):
            return False

        qClaim = self.make_simple_claim(qual.prop, qual.itis)

        try:
            claim.addQualifier(qClaim)  # writes to database
            pywikibot.output('Adding qualifier %s to %s in %s' %
                             (qual.prop, claim.getID(), item))
            return True
        except pywikibot.data.api.APIError, e:
            if e.code == u'modification-failed':
                pywikibot.output(u'modification-failed error: '
                                 u'qualifier to %s to %s in %s' %
                                 (qual.prop, claim.getID(), item))
                return False
            else:
                raise pywikibot.Error(
                    'Something went very wrong trying to add a qualifier: %s' %
                    e)
Example #27
0
 def update_or_create_page(self, old_page, new_text):
     """
     Reads the current text of page old_page,
     compare it with new_text, prompts the user,
     and uploads the page
     """
     # Read the original content
     old_text = old_page.get()
     # Give the user some context
     if old_text != new_text:
         pywikibot.output(new_text)
     pywikibot.showDiff(old_text, new_text)
     # Get a decision
     prompt = u'Modify this page ?'
     # Did anything change ?
     if old_text == new_text:
         pywikibot.output(u'No changes necessary to %s' % old_page.title());
     else:
         if not self.acceptall:
             choice = pywikibot.input_choice(u'Do you want to accept these changes?',  [('Yes', 'Y'), ('No', 'n'), ('All', 'a')], 'N')
             if choice == 'a':
                 self.acceptall = True
         if self.acceptall or choice == 'y':
             # Write out the new version
             old_page.put(new_text, summary)
Example #28
0
 def run(self):
     # If the enable page is set to disable, turn off the bot
     # (useful when the bot is run on a server)
     if not self.enable_page():
         pywikibot.output('The bot is disabled')
         return
     super(LonelyPagesBot, self).run()
    def addReference(self, item, claim, ref):
        """Add a reference if not already present.

        param item: the item on which all of this happens
        param claim: the pywikibot.Claim to be sourced
        param ref: the WD.Reference to add
        """
        # check if any of the sources are already present
        # note that this can be in any of its references
        if ref is None:
            return False

        if any(self.hasRef(source.getID(), source.getTarget(), claim)
                for source in ref.source_test):
            return False

        try:
            claim.addSources(ref.get_all_sources())  # writes to database
            pywikibot.output('Adding reference claim to %s in %s' %
                             (claim.getID(), item))
            return True
        except pywikibot.data.api.APIError, e:
            if e.code == u'modification-failed':
                pywikibot.output(u'modification-failed error: '
                                 u'ref to %s in %s' % (claim.getID(), item))
                return False
            else:
                raise pywikibot.Error(
                    'Something went very wrong trying to add a source: %s' % e)
    def treat_page(self):
        """Check page."""
        if (self.current_page.namespace() not in namespaces and
                not self.getOption('ignore_ns')):
            output('{page} is not in allowed namespaces, skipping'
                   .format(page=self.current_page.title(
                       asLink=True)))
            return False
        self.iwlangs = pywikibot.textlib.getLanguageLinks(
            self.current_page.text, insite=self.current_page.site)
        if not self.iwlangs:
            output('No interlanguagelinks on {page}'.format(
                page=self.current_page.title(asLink=True)))
            return False
        try:
            item = pywikibot.ItemPage.fromPage(self.current_page)
        except pywikibot.NoPage:
            item = None

        if item is None:
            item = self.try_to_add()
            if self.getOption('create') and item is None:
                item = self.create_item()

        self.current_item = item
        if item and self.getOption('clean'):
            self.clean_page()
Example #31
0
def main(*args):
    """Process command line arguments and invoke PatrolBot."""
    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    usercontribs = None
    gen = None
    recentchanges = False
    newpages = False
    repeat = False
    options = {}

    # Parse command line arguments
    local_args = pywikibot.handle_args(args)
    site = pywikibot.Site()
    gen_factory = pagegenerators.GeneratorFactory(site)
    for arg in local_args:
        if arg.startswith('-ask'):
            options['ask'] = True
        elif arg.startswith('-autopatroluserns'):
            options['autopatroluserns'] = True
        elif arg.startswith('-repeat'):
            repeat = True
        elif arg.startswith('-newpages'):
            newpages = True
        elif arg.startswith('-recentchanges'):
            recentchanges = True
        elif arg.startswith('-usercontribs:'):
            usercontribs = arg[14:]
        elif arg.startswith('-versionchecktime:'):
            versionchecktime = arg[len('-versionchecktime:'):]
            options['versionchecktime'] = int(versionchecktime)
        elif arg.startswith('-whitelist:'):
            options['whitelist'] = arg[len('-whitelist:'):]
        else:
            generator = gen_factory.handleArg(arg)
            if not generator:
                if ':' in arg:
                    m = arg.split(':')
                    options[m[0]] = m[1]

    if usercontribs:
        user = pywikibot.User(site, usercontribs)
        if user.isAnonymous() or user.isRegistered():
            pywikibot.output('Processing user: {}'.format(usercontribs))
        else:
            pywikibot.warning('User {} does not exist on site {}.'.format(
                usercontribs, site))

    # default behaviour
    if not any((newpages, recentchanges, usercontribs)):
        if site.family.name == 'wikipedia':
            newpages = True
        else:
            recentchanges = True

    bot = PatrolBot(**options)

    if isinstance(mwparserfromhell, ImportError):
        suggest_help(missing_dependencies=('mwparserfromhell', ))
        return

    if newpages or usercontribs:
        pywikibot.output('Newpages:')
        gen = site.newpages
        feed = api_feed_repeater(gen,
                                 delay=60,
                                 repeat=repeat,
                                 user=usercontribs,
                                 namespaces=gen_factory.namespaces,
                                 recent_new_gen=False)
        bot.run(feed)

    if recentchanges or usercontribs:
        pywikibot.output('Recentchanges:')
        gen = site.recentchanges
        feed = api_feed_repeater(gen,
                                 delay=60,
                                 repeat=repeat,
                                 namespaces=gen_factory.namespaces,
                                 user=usercontribs)
        bot.run(feed)

    pywikibot.output('{0}/{1} patrolled'.format(bot.patrol_counter,
                                                bot.rc_item_counter))
Example #32
0
    def transferImage(self, sourceImagePage):
        """
        Download image and its description, and upload it to another site.

        @return: the filename which was used to upload the image
        """
        sourceSite = sourceImagePage.site
        url = sourceImagePage.fileUrl().encode('utf-8')
        pywikibot.output(u"URL should be: %s" % url)
        # localize the text that should be printed on the image description page
        try:
            description = sourceImagePage.get()
            # try to translate license templates
            if (sourceSite.sitename,
                    self.targetSite.sitename) in licenseTemplates:
                for old, new in licenseTemplates[(
                        sourceSite.sitename,
                        self.targetSite.sitename)].items():
                    new = '{{%s}}' % new
                    old = re.compile('{{%s}}' % old)
                    description = textlib.replaceExcept(
                        description, old, new,
                        ['comment', 'math', 'nowiki', 'pre'])

            description = i18n.twtranslate(
                self.targetSite, 'imagetransfer-file_page_message',
                dict(site=sourceSite, description=description))
            description += '\n\n'
            description += sourceImagePage.getFileVersionHistoryTable()
            # add interwiki link
            if sourceSite.family == self.targetSite.family:
                description += u'\r\n\r\n{0}'.format(sourceImagePage)
        except pywikibot.NoPage:
            description = ''
            pywikibot.output(
                'Image does not exist or description page is empty.')
        except pywikibot.IsRedirectPage:
            description = ''
            pywikibot.output('Image description page is redirect.')
        else:
            bot = UploadRobot(url=url,
                              description=description,
                              targetSite=self.targetSite,
                              urlEncoding=sourceSite.encoding(),
                              keepFilename=self.keep_name,
                              verifyDescription=not self.keep_name,
                              ignoreWarning=self.ignore_warning)
            # try to upload
            targetFilename = bot.run()
            if targetFilename and self.targetSite.family.name == 'commons' and \
               self.targetSite.code == 'commons':
                # upload to Commons was successful
                reason = i18n.twtranslate(sourceSite,
                                          'imagetransfer-nowcommons_notice')
                # try to delete the original image if we have a sysop account
                if sourceSite.family.name in config.sysopnames and \
                   sourceSite.lang in config.sysopnames[sourceSite.family.name]:
                    if sourceImagePage.delete(reason):
                        return
                if sourceSite.lang in nowCommonsTemplate and \
                   sourceSite.family.name in config.usernames and \
                   sourceSite.lang in config.usernames[sourceSite.family.name]:
                    # add the nowCommons template.
                    pywikibot.output(u'Adding nowCommons template to %s' %
                                     sourceImagePage.title())
                    sourceImagePage.put(
                        sourceImagePage.get() + '\n\n' +
                        nowCommonsTemplate[sourceSite.lang] % targetFilename,
                        summary=reason)
def main(*args):
    """
    Process command line arguments and perform task.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    # Loading the comments
    global categoryToCheck, project_inserted
    # always, define a generator to understand if the user sets one,
    # defining what's genFactory
    always = False
    generator = False
    show = False
    moveBlockCheck = False
    protectedpages = False
    protectType = 'edit'
    namespace = 0

    # To prevent Infinite loops
    errorCount = 0

    # Process global args and prepare generator args parser
    local_args = pywikibot.handle_args(args)
    genFactory = pagegenerators.GeneratorFactory()

    # Process local args
    for arg in local_args:
        option, sep, value = arg.partition(':')
        if option == '-always':
            always = True
        elif option == '-move':
            moveBlockCheck = True
        elif option == '-show':
            show = True
        elif option in ('-protectedpages', '-moveprotected'):
            protectedpages = True
            if option == '-moveprotected':
                protectType = 'move'
            if value:
                namespace = int(value)
        else:
            genFactory.handleArg(arg)

    if config.mylang not in project_inserted:
        pywikibot.output(u"Your project is not supported by this script.\n"
                         u"You have to edit the script and add it!")
        return

    site = pywikibot.Site()

    if protectedpages:
        generator = site.protectedpages(namespace=namespace, type=protectType)
    # Take the right templates to use, the category and the comment
    TSP = i18n.translate(site, templateSemiProtection)
    TTP = i18n.translate(site, templateTotalProtection)
    TSMP = i18n.translate(site, templateSemiMoveProtection)
    TTMP = i18n.translate(site, templateTotalMoveProtection)
    TNR = i18n.translate(site, templateNoRegex)
    TU = i18n.translate(site, templateUnique)

    categories = i18n.translate(site, categoryToCheck)
    commentUsed = i18n.twtranslate(site, 'blockpageschecker-summary')
    if not generator:
        generator = genFactory.getCombinedGenerator()
    if not generator:
        generator = []
        pywikibot.output(u'Loading categories...')
        # Define the category if no other generator has been setted
        for CAT in categories:
            cat = pywikibot.Category(site, CAT)
            # Define the generator
            gen = pagegenerators.CategorizedPageGenerator(cat)
            for pageCat in gen:
                generator.append(pageCat)
        pywikibot.output(u'Categories loaded, start!')
    # Main Loop
    if not genFactory.nopreload:
        generator = pagegenerators.PreloadingGenerator(generator, groupsize=60)
    for page in generator:
        pagename = page.title(asLink=True)
        pywikibot.output('Loading %s...' % pagename)
        try:
            text = page.text
        except pywikibot.NoPage:
            pywikibot.output("%s doesn't exist! Skipping..." % pagename)
            continue
        except pywikibot.IsRedirectPage:
            pywikibot.output("%s is a redirect! Skipping..." % pagename)
            if show:
                showQuest(page)
            continue
        # FIXME: This check does not work :
        # PreloadingGenerator cannot set correctly page.editRestriction
        # (see bug T57322)
        # if not page.canBeEdited():
        #    pywikibot.output("%s is sysop-protected : this account can't edit "
        #                     "it! Skipping..." % pagename)
        #    continue
        restrictions = page.protection()
        try:
            editRestr = restrictions['edit']
        except KeyError:
            editRestr = None
        if not page.canBeEdited():
            pywikibot.output(u"%s is protected: "
                             u"this account can't edit it! Skipping..." %
                             pagename)
            continue

        # Understand, according to the template in the page, what should be the
        # protection and compare it with what there really is.
        TemplateInThePage = understandBlock(text, TTP, TSP, TSMP, TTMP, TU)
        # Only to see if the text is the same or not...
        oldtext = text
        # keep track of the changes for each step (edit then move)
        changes = -1

        if not editRestr:
            # page is not edit-protected
            # Deleting the template because the page doesn't need it.
            if not (TTP or TSP):
                raise pywikibot.Error(
                    'This script is not localized to use it on \n{0}. '
                    'Missing "templateSemiProtection" or'
                    '"templateTotalProtection"'.format(site.sitename))

            if TU:
                replaceToPerform = u'|'.join(TTP + TSP + TU)
            else:
                replaceToPerform = u'|'.join(TTP + TSP)
            text, changes = re.subn(
                '<noinclude>(%s)</noinclude>' % replaceToPerform, '', text)
            if changes == 0:
                text, changes = re.subn('(%s)' % replaceToPerform, '', text)
            msg = u'The page is editable for all'
            if not moveBlockCheck:
                msg += u', deleting the template..'
            pywikibot.output(u'%s.' % msg)

        elif editRestr[0] == 'sysop':
            # total edit protection
            if (TemplateInThePage[0] == 'sysop-total' and TTP) or \
               (TemplateInThePage[0] == 'unique' and TU):
                msg = 'The page is protected to the sysop'
                if not moveBlockCheck:
                    msg += ', skipping...'
                pywikibot.output(msg)
            else:
                if not TNR or TU and not TNR[4] or not (TU or TNR[1]):
                    raise pywikibot.Error(
                        'This script is not localized to use it on \n{0}. '
                        'Missing "templateNoRegex"'.format(site.sitename))

                pywikibot.output(
                    u'The page is protected to the sysop, but the '
                    u'template seems not correct. Fixing...')
                if TU:
                    text, changes = re.subn(TemplateInThePage[1], TNR[4], text)
                else:
                    text, changes = re.subn(TemplateInThePage[1], TNR[1], text)

        elif TSP or TU:
            # implicitely editRestr[0] = 'autoconfirmed', edit-Semi-protection
            if TemplateInThePage[0] == 'autoconfirmed-total' or \
               TemplateInThePage[0] == 'unique':
                msg = 'The page is editable only for the autoconfirmed users'
                if not moveBlockCheck:
                    msg += ', skipping...'
                pywikibot.output(msg)
            else:
                if not TNR or TU and not TNR[4] or not (TU or TNR[1]):
                    raise pywikibot.Error(
                        'This script is not localized to use it on \n{0}. '
                        'Missing "templateNoRegex"'.format(site.sitename))
                pywikibot.output(u'The page is editable only for the '
                                 u'autoconfirmed users, but the template '
                                 u'seems not correct. Fixing...')
                if TU:
                    text, changes = re.subn(TemplateInThePage[1], TNR[4], text)
                else:
                    text, changes = re.subn(TemplateInThePage[1], TNR[0], text)

        if changes == 0:
            # We tried to fix edit-protection templates, but it did not work.
            pywikibot.warning('No edit-protection template could be found')

        if moveBlockCheck and changes > -1:
            # checking move protection now
            try:
                moveRestr = restrictions['move']
            except KeyError:
                moveRestr = False
            changes = -1

            if not moveRestr:
                pywikibot.output(u'The page is movable for all, deleting the '
                                 u'template...')
                # Deleting the template because the page doesn't need it.
                if TU:
                    replaceToPerform = u'|'.join(TSMP + TTMP + TU)
                else:
                    replaceToPerform = u'|'.join(TSMP + TTMP)
                text, changes = re.subn(
                    '<noinclude>(%s)</noinclude>' % replaceToPerform, '', text)
                if changes == 0:
                    text, changes = re.subn('(%s)' % replaceToPerform, '',
                                            text)
            elif moveRestr[0] == 'sysop':
                # move-total-protection
                if (TemplateInThePage[0] == 'sysop-move' and TTMP) or \
                   (TemplateInThePage[0] == 'unique' and TU):
                    pywikibot.output(u'The page is protected from moving to '
                                     u'the sysop, skipping...')
                    if TU:
                        # no changes needed, better to revert the old text.
                        text = oldtext
                else:
                    pywikibot.output(u'The page is protected from moving to '
                                     u'the sysop, but the template seems not '
                                     u'correct. Fixing...')
                    if TU:
                        text, changes = re.subn(TemplateInThePage[1], TNR[4],
                                                text)
                    else:
                        text, changes = re.subn(TemplateInThePage[1], TNR[3],
                                                text)

            elif TSMP or TU:
                # implicitely moveRestr[0] = 'autoconfirmed',
                # move-semi-protection
                if TemplateInThePage[0] == 'autoconfirmed-move' or \
                   TemplateInThePage[0] == 'unique':
                    pywikibot.output(u'The page is movable only for the '
                                     u'autoconfirmed users, skipping...')
                    if TU:
                        # no changes needed, better to revert the old text.
                        text = oldtext
                else:
                    pywikibot.output(u'The page is movable only for the '
                                     u'autoconfirmed users, but the template '
                                     u'seems not correct. Fixing...')
                    if TU:
                        text, changes = re.subn(TemplateInThePage[1], TNR[4],
                                                text)
                    else:
                        text, changes = re.subn(TemplateInThePage[1], TNR[2],
                                                text)

            if changes == 0:
                # We tried to fix move-protection templates, but it did not work
                pywikibot.warning('No move-protection template could be found')

        if oldtext != text:
            # Ok, asking if the change has to be performed and do it if yes.
            pywikibot.output(
                color_format('\n\n>>> {lightpurple}{0}{default} <<<',
                             page.title()))
            pywikibot.showDiff(oldtext, text)
            if not always:
                choice = pywikibot.input_choice(
                    u'Do you want to accept these '
                    u'changes?', [('Yes', 'y'), ('No', 'n'), ('All', 'a')],
                    'n')
                if choice == 'a':
                    always = True
            if always or choice == 'y':
                while True:
                    try:
                        page.put(text, commentUsed, force=True)
                    except pywikibot.EditConflict:
                        pywikibot.output(u'Edit conflict! skip!')
                        break
                    except pywikibot.ServerError:
                        # Sometimes there is this error that's quite annoying
                        # because can block the whole process for nothing.
                        errorCount += 1
                        if errorCount < 5:
                            pywikibot.output(u'Server Error! Wait..')
                            time.sleep(3)
                            continue
                        else:
                            # Prevent Infinite Loops
                            raise pywikibot.ServerError(u'Fifth Server Error!')
                    except pywikibot.SpamfilterError as e:
                        pywikibot.output(u'Cannot change %s because of '
                                         u'blacklist entry %s' %
                                         (page.title(), e.url))
                        break
                    except pywikibot.LockedPage:
                        pywikibot.output(u'The page is still protected. '
                                         u'Skipping...')
                        break
                    except pywikibot.PageNotSaved as error:
                        pywikibot.output(u'Error putting page: %s' %
                                         (error.args, ))
                        break
                    else:
                        # Break only if the errors are one after the other
                        errorCount = 0
                        break
Example #34
0
    def move_to_category(self, article, original_cat, current_cat):
        '''
        Given an article which is in category original_cat, ask the user if
        it should be moved to one of original_cat's subcategories.
        Recursively run through subcategories' subcategories.
        NOTE: current_cat is only used for internal recursion. You should
        always use current_cat = original_cat.
        '''
        pywikibot.output(u'')
        # Show the title of the page where the link was found.
        # Highlight the title in purple.
        pywikibot.output(u'Treating page \03{lightpurple}%s\03{default}, '
                         u'currently in \03{lightpurple}%s\03{default}' %
                         (article.title(), current_cat.title()))

        # Determine a reasonable amount of context to print
        try:
            full_text = article.get(get_redirect=True)
        except pywikibot.NoPage:
            pywikibot.output(u'Page %s not found.' % article.title())
            return
        try:
            contextLength = full_text.index('\n\n')
        except ValueError:  # substring not found
            contextLength = 500
        if full_text.startswith(u'[['):  # probably an image
            # Add extra paragraph.
            contextLength = full_text.find('\n\n', contextLength + 2)
        if contextLength > 1000 or contextLength < 0:
            contextLength = 500

        pywikibot.output('\n' + full_text[:contextLength] + '\n')

        # we need list to index the choice
        subcatlist = list(self.catDB.getSubcats(current_cat))
        supercatlist = list(self.catDB.getSupercats(current_cat))

        if not subcatlist:
            pywikibot.output('This category has no subcategories.\n')
        if not supercatlist:
            pywikibot.output('This category has no supercategories.\n')
        # show subcategories as possible choices (with numbers)
        for i, supercat in enumerate(supercatlist):
            # layout: we don't expect a cat to have more than 10 supercats
            pywikibot.output(u'u%d - Move up to %s' % (i, supercat.title()))
        for i, subcat in enumerate(subcatlist):
            # layout: we don't expect a cat to have more than 100 subcats
            pywikibot.output(u'%2d - Move down to %s' % (i, subcat.title()))
        pywikibot.output(' j - Jump to another category')
        pywikibot.output(' s - Skip this article')
        pywikibot.output(' r - Remove this category tag')
        pywikibot.output(
            ' ? - Print first part of the page (longer and longer)')
        pywikibot.output(u'Enter - Save category as %s' % current_cat.title())

        flag = False
        while not flag:
            pywikibot.output('')
            choice = pywikibot.input(u'Choice:')
            if choice in ['s', 'S']:
                flag = True
            elif choice == '':
                pywikibot.output(u'Saving category as %s' %
                                 current_cat.title())
                if current_cat == original_cat:
                    pywikibot.output('No changes necessary.')
                else:
                    article.change_category(original_cat,
                                            current_cat,
                                            comment=self.editSummary)
                flag = True
            elif choice in ['j', 'J']:
                newCatTitle = pywikibot.input(u'Please enter the category the '
                                              u'article should be moved to:')
                newCat = pywikibot.Category(
                    pywikibot.Link('Category:' + newCatTitle))
                # recurse into chosen category
                self.move_to_category(article, original_cat, newCat)
                flag = True
            elif choice in ['r', 'R']:
                # remove the category tag
                article.change_category(original_cat,
                                        None,
                                        comment=self.editSummary)
                flag = True
            elif choice == '?':
                contextLength += 500
                pywikibot.output('\n' + full_text[:contextLength] + '\n')

                # if categories possibly weren't visible, show them additionally
                # (maybe this should always be shown?)
                if len(full_text) > contextLength:
                    pywikibot.output('')
                    pywikibot.output('Original categories: ')
                    for cat in article.categories():
                        pywikibot.output(u'* %s' % cat.title())
            elif choice[0] == 'u':
                try:
                    choice = int(choice[1:])
                except ValueError:
                    # user pressed an unknown command. Prompt him again.
                    continue
                self.move_to_category(article, original_cat,
                                      supercatlist[choice])
                flag = True
            else:
                try:
                    choice = int(choice)
                except ValueError:
                    # user pressed an unknown command. Prompt him again.
                    continue
                # recurse into subcategory
                self.move_to_category(article, original_cat,
                                      subcatlist[choice])
                flag = True
Example #35
0
 def treat(self, page):
     text = self.load(page)
     if text is None:
         return
     cats = [c for c in page.categories()]
     # Show the title of the page we're working on.
     # Highlight the title in purple.
     pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" %
                      page.title())
     pywikibot.output(u"Current categories:")
     for cat in cats:
         pywikibot.output(u"* %s" % cat.title())
     newcatTitle = self.newcatTitle
     if not page.site.nocapitalize:
         newcatTitle = newcatTitle[:1].upper() + newcatTitle[1:]
     catpl = pywikibot.Page(page.site, newcatTitle, ns=14)
     if catpl in cats:
         pywikibot.output(u"%s is already in %s." %
                          (page.title(), catpl.title()))
     else:
         if self.sort:
             catpl = self.sorted_by_last_name(catpl, page)
         pywikibot.output(u'Adding %s' % catpl.title(asLink=True))
         cats.append(catpl)
         text = pywikibot.replaceCategoryLinks(text, cats)
         if not self.save(text, page, newcatTitle):
             pywikibot.output(u'Page %s not saved.' %
                              page.title(asLink=True))
Example #36
0
    def login(self, retry=False, autocreate=False):
        """
        Attempt to log into the server.

        @param retry: infinitely retry if the API returns an unknown error
        @type retry: bool

        @param autocreate: if true, allow auto-creation of the account
                           using unified login
        @type autocreate: bool

        @raises NoUsername: Username is not recognised by the site.
        """
        if not self.password:
            # First check that the username exists,
            # to avoid asking for a password that will not work.
            if not autocreate:
                self.check_user_exists()

            # As we don't want the password to appear on the screen, we set
            # password = True
            self.password = pywikibot.input(
                u'Password for user %(name)s on %(site)s (no characters will '
                u'be shown):' % {
                    'name': self.login_name,
                    'site': self.site
                },
                password=True)

        pywikibot.output(u"Logging in to %(site)s as %(name)s" % {
            'name': self.login_name,
            'site': self.site
        })
        try:
            cookiedata = self.getCookie()
        except pywikibot.data.api.APIError as e:
            pywikibot.error(u"Login failed (%s)." % e.code)
            if e.code == 'NotExists':
                raise NoUsername(u"Username '%s' does not exist on %s" %
                                 (self.login_name, self.site))
            elif e.code == 'Illegal':
                raise NoUsername(u"Username '%s' is invalid on %s" %
                                 (self.login_name, self.site))
            elif e.code == 'readapidenied':
                raise NoUsername(
                    'Username "{0}" does not have read permissions on '
                    '{1}'.format(self.login_name, self.site))
            elif e.code == 'Failed':
                raise NoUsername(
                    'Username "{0}" does not have read permissions on '
                    '{1}\n.{2}'.format(self.login_name, self.site, e.info))
            # TODO: investigate other unhandled API codes (bug T75539)
            if retry:
                self.password = None
                return self.login(retry=True)
            else:
                return False
        self.storecookiedata(cookiedata)
        pywikibot.log(u"Should be logged in now")
        #        # Show a warning according to the local bot policy
        #   FIXME: disabled due to recursion; need to move this to the Site object after
        #   login
        #        if not self.botAllowed():
        #            logger.error(
        #                u"Username '%(name)s' is not listed on [[%(page)s]]."
        #                 % {'name': self.username,
        #                    'page': botList[self.site.family.name][self.site.code]})
        #            logger.error(
        # "Please make sure you are allowed to use the robot before actually using it!")
        #            return False
        return True
Example #37
0
def verbose_output(string):
    """Verbose output."""
    if pywikibot.config.verbose_output:
        pywikibot.output(string)
Example #38
0
def main(*args) -> None:
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    :param args: command line arguments
    :type args: str
    """
    url = ''
    description = []
    summary = None
    keep_filename = False
    always = False
    use_filename = None
    filename_prefix = None
    verify_description = True
    aborts = set()
    ignorewarn = set()
    chunk_size = 0
    asynchronous = False
    recursive = False
    description_file = None

    # process all global bot args
    # returns a list of non-global args, i.e. args for upload.py
    local_args = pywikibot.handle_args(args)
    for option in local_args:
        arg, _, value = option.partition(':')
        if arg == '-always':
            keep_filename = True
            always = True
            verify_description = False
        elif arg == '-recursive':
            recursive = True
        elif arg == '-keep':
            keep_filename = True
        elif arg == '-filename':
            use_filename = value
        elif arg == '-prefix':
            filename_prefix = value
        elif arg == '-summary':
            summary = value
        elif arg == '-noverify':
            verify_description = False
        elif arg == '-abortonwarn':
            if value and aborts is not True:
                aborts.add(value)
            else:
                aborts = True
        elif arg == '-ignorewarn':
            if value and ignorewarn is not True:
                ignorewarn.add(value)
            else:
                ignorewarn = True
        elif arg == '-chunked':
            match = CHUNK_SIZE_REGEX.match(option)
            chunk_size = get_chunk_size(match)
        elif arg == '-async':
            asynchronous = True
        elif arg == '-descfile':
            description_file = value
        elif not url:
            url = option
        else:
            description.append(option)

    description = ' '.join(description)

    if description_file:
        if description:
            pywikibot.error('Both a description and a -descfile were '
                            'provided. Please specify only one of those.')
            return
        with codecs.open(description_file,
                         encoding=pywikibot.config.textfile_encoding) as f:
            description = f.read().replace('\r\n', '\n')

    while not ('://' in url or os.path.exists(url)):
        if not url:
            error = 'No input filename given.'
        else:
            error = 'Invalid input filename given.'
            if not always:
                error += ' Try again.'
        if always:
            url = None
            break
        pywikibot.output(error)
        url = pywikibot.input('URL, file or directory where files are now:')

    if always and (aborts is not True and ignorewarn is not True
                   or not description or url is None):
        additional = ''
        missing = []
        if url is None:
            missing += ['filename']
            additional = error + ' '
        if description is None:
            missing += ['description']
        if aborts is not True and ignorewarn is not True:
            additional += ('Either -ignorewarn or -abortonwarn must be '
                           'defined for all codes. ')
        additional += 'Unable to run in -always mode'
        suggest_help(missing_parameters=missing, additional_text=additional)
        return

    if os.path.isdir(url):
        file_list = []
        for directory_info in os.walk(url):
            if not recursive:
                # Do not visit any subdirectories
                directory_info[1][:] = []
            for dir_file in directory_info[2]:
                file_list.append(os.path.join(directory_info[0], dir_file))
        url = file_list
    else:
        url = [url]

    bot = UploadRobot(url,
                      description=description,
                      use_filename=use_filename,
                      keep_filename=keep_filename,
                      verify_description=verify_description,
                      aborts=aborts,
                      ignore_warning=ignorewarn,
                      chunk_size=chunk_size,
                      asynchronous=asynchronous,
                      always=always,
                      summary=summary,
                      filename_prefix=filename_prefix)
    bot.run()
Example #39
0
    def run(self):
        """Run thread."""
        while not self.killed:
            if len(self.queue) == 0:
                if self.finishing:
                    break
                else:
                    time.sleep(0.1)
            else:
                with self.semaphore:
                    url, errorReport, containingPage, archiveURL = \
                        self.queue[0]
                    self.queue = self.queue[1:]
                    talkPage = containingPage.toggleTalkPage()
                    pywikibot.output(color_format(
                        '{lightaqua}** Reporting dead link on '
                        '{0}...{default}',
                        talkPage.title(as_link=True)))
                    try:
                        content = talkPage.get() + '\n\n\n'
                        if url in content:
                            pywikibot.output(color_format(
                                '{lightaqua}** Dead link seems to have '
                                'already been reported on {0}{default}',
                                talkPage.title(as_link=True)))
                            continue
                    except (pywikibot.NoPage, pywikibot.IsRedirectPage):
                        content = ''

                    if archiveURL:
                        archiveMsg = '\n' + \
                                     i18n.twtranslate(
                                         containingPage.site,
                                         'weblinkchecker-archive_msg',
                                         {'URL': archiveURL})
                    else:
                        archiveMsg = ''
                    # The caption will default to "Dead link". But if there
                    # is already such a caption, we'll use "Dead link 2",
                    # "Dead link 3", etc.
                    caption = i18n.twtranslate(containingPage.site,
                                               'weblinkchecker-caption')
                    i = 1
                    count = ''
                    # Check if there is already such a caption on
                    # the talk page.
                    while re.search('= *{0}{1} *='.format(caption, count),
                                    content) is not None:
                        i += 1
                        count = ' ' + str(i)
                    caption += count
                    content += '== {0} ==\n\n{1}\n\n{2}{3}\n--~~~~'.format(
                        caption, i18n.twtranslate(containingPage.site,
                                                  'weblinkchecker-report'),
                        errorReport, archiveMsg)

                    comment = '[[{0}#{1}|→]] {2}'.format(
                        talkPage.title(), caption,
                        i18n.twtranslate(containingPage.site,
                                         'weblinkchecker-summary'))
                    try:
                        talkPage.put(content, comment)
                    except pywikibot.SpamfilterError as error:
                        pywikibot.output(color_format(
                            '{lightaqua}** SpamfilterError while trying to '
                            'change {0}: {1}{default}',
                            talkPage.title(as_link=True), error.url))
Example #40
0
    def treat(self, page):
        """It loads the given page, does some changes, and saves it."""
        choice = False
        try:
            # page: title, date, username, comment, loginfo, rcid, token
            username = page['user']
            # when the feed isn't from the API, it used to contain
            # '(not yet written)' or '(page does not exist)' when it was
            # a redlink
            rcid = page['rcid']
            title = page['title']
            if not rcid:
                raise Exception('rcid not present')

            # check whether we have wrapped around to higher rcids
            # which indicates a new RC feed is being processed
            if rcid > self.last_rcid:
                # refresh the whitelist
                self.load_whitelist()
                self.repeat_start_ts = time.time()

            if pywikibot.config.verbose_output or self.getOption('ask'):
                pywikibot.output(
                    'User {0} has created or modified page {1}'.format(
                        username, title))

            if (self.getOption('autopatroluserns') and page['ns'] in (2, 3)):
                # simple rule to whitelist any user editing their own userspace
                if title.partition(':')[2].split('/')[0].startswith(username):
                    verbose_output('{0} is whitelisted to modify {1}'.format(
                        username, title))
                    choice = True

            if not choice and username in self.whitelist:
                if self.in_list(self.whitelist[username], title):
                    verbose_output('{0} is whitelisted to modify {1}'.format(
                        username, title))
                    choice = True

            if self.getOption('ask'):
                choice = pywikibot.input_yn(
                    'Do you want to mark page as patrolled?')

            # Patrol the page
            if choice:
                # list() iterates over patrol() which returns a generator
                list(self.site.patrol(rcid))
                self.patrol_counter = self.patrol_counter + 1
                pywikibot.output('Patrolled {0} (rcid {1}) by user {2}'.format(
                    title, rcid, username))
            else:
                verbose_output('Skipped')

            if rcid > self.highest_rcid:
                self.highest_rcid = rcid
            self.last_rcid = rcid
            self.rc_item_counter = self.rc_item_counter + 1

        except pywikibot.NoPage:
            pywikibot.output('Page {0} does not exist; skipping.'.format(
                title(as_link=True)))
        except pywikibot.IsRedirectPage:
            pywikibot.output('Page {0} is a redirect; skipping.'.format(
                title(as_link=True)))
Example #41
0
    def run(self):
        """Run the bot."""
        commons = self.commons
        comment = self.summary

        for page in self.generator:
            self.current_page = page
            try:
                localImagePage = pywikibot.FilePage(self.site, page.title())
                if localImagePage.file_is_shared():
                    pywikibot.output('File is already on Commons.')
                    continue
                sha1 = localImagePage.latest_file_info.sha1
                filenameOnCommons = self.findFilenameOnCommons(localImagePage)
                if not filenameOnCommons:
                    pywikibot.output('NowCommons template not found.')
                    continue
                commonsImagePage = pywikibot.FilePage(commons, 'Image:'
                                                      + filenameOnCommons)
                if (localImagePage.title(with_ns=False)
                        != commonsImagePage.title(with_ns=False)):
                    usingPages = list(localImagePage.usingPages())
                    if usingPages and usingPages != [localImagePage]:
                        pywikibot.output(color_format(
                            '"{lightred}{0}{default}" '
                            'is still used in {1} pages.',
                            localImagePage.title(with_ns=False),
                            len(usingPages)))
                        if self.opt.replace is True:
                            pywikibot.output(color_format(
                                'Replacing "{lightred}{0}{default}" by '
                                '"{lightgreen}{1}{default}\".',
                                localImagePage.title(with_ns=False),
                                commonsImagePage.title(with_ns=False)))
                            bot = ImageBot(
                                pg.FileLinksGenerator(localImagePage),
                                localImagePage.title(with_ns=False),
                                commonsImagePage.title(with_ns=False),
                                '', self.opt.replacealways,
                                self.opt.replaceloose)
                            bot.run()
                            # If the image is used with the urlname the
                            # previous function won't work
                            is_used = bool(list(pywikibot.FilePage(
                                self.site,
                                page.title()).usingPages(total=1)))
                            if is_used and self.opt.replaceloose:
                                bot = ImageBot(
                                    pg.FileLinksGenerator(
                                        localImagePage),
                                    localImagePage.title(
                                        with_ns=False, as_url=True),
                                    commonsImagePage.title(with_ns=False),
                                    '', self.opt.replacealways,
                                    self.opt.replaceloose)
                                bot.run()
                            # refresh because we want the updated list
                            usingPages = len(list(pywikibot.FilePage(
                                self.site, page.title()).usingPages()))

                        else:
                            pywikibot.output('Please change them manually.')
                        continue
                    pywikibot.output(color_format(
                        'No page is using "{lightgreen}{0}{default}" '
                        'anymore.',
                        localImagePage.title(with_ns=False)))
                commonsText = commonsImagePage.get()
                if self.opt.replaceonly is False:
                    if sha1 == commonsImagePage.latest_file_info.sha1:
                        pywikibot.output(
                            'The image is identical to the one on Commons.')
                        if len(localImagePage.get_file_history()) > 1:
                            pywikibot.output(
                                'This image has a version history. Please '
                                'delete it manually after making sure that '
                                'the old versions are not worth keeping.')
                            continue
                        if self.opt.always is False:
                            format_str = color_format(
                                '\n\n>>>> Description on {lightpurple}%s'
                                '{default} <<<<\n')
                            pywikibot.output(format_str % page.title())
                            pywikibot.output(localImagePage.get())
                            pywikibot.output(format_str %
                                             commonsImagePage.title())
                            pywikibot.output(commonsText)
                            if pywikibot.input_yn(
                                    'Does the description on Commons contain '
                                    'all required source and license\n'
                                    'information?',
                                    default=False, automatic_quit=False):
                                localImagePage.delete(
                                    '{0} [[:commons:Image:{1}]]'
                                    .format(comment, filenameOnCommons),
                                    prompt=False)
                        else:
                            localImagePage.delete(
                                comment + ' [[:commons:Image:{0}]]'
                                          .format(filenameOnCommons),
                                          prompt=False)
                    else:
                        pywikibot.output('The image is not identical to '
                                         'the one on Commons.')
            except (pywikibot.NoPage, pywikibot.IsRedirectPage) as e:
                pywikibot.output('{0}'.format(e[0]))
                continue
            else:
                self._treat_counter += 1
        if not self._treat_counter:
            pywikibot.output('No transcluded files found for {0}.'
                             .format(self.ncTemplates()[0]))
        self.exit()
Example #42
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: unicode
    """
    gen = None
    xmlFilename = None
    HTTPignore = []

    if isinstance(memento_client, ImportError):
        warn('memento_client not imported: {0}'.format(memento_client),
             ImportWarning)

    # Process global args and prepare generator args parser
    local_args = pywikibot.handle_args(args)
    genFactory = pagegenerators.GeneratorFactory()

    for arg in local_args:
        if arg == '-talk':
            config.report_dead_links_on_talk = True
        elif arg == '-notalk':
            config.report_dead_links_on_talk = False
        elif arg == '-repeat':
            gen = RepeatPageGenerator()
        elif arg.startswith('-ignore:'):
            HTTPignore.append(int(arg[8:]))
        elif arg.startswith('-day:'):
            config.weblink_dead_days = int(arg[5:])
        elif arg.startswith('-xmlstart'):
            if len(arg) == 9:
                xmlStart = pywikibot.input(
                    'Please enter the dumped article to start with:')
            else:
                xmlStart = arg[10:]
        elif arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
        else:
            genFactory.handleArg(arg)

    if xmlFilename:
        try:
            xmlStart
        except NameError:
            xmlStart = None
        gen = XmlDumpPageGenerator(xmlFilename, xmlStart,
                                   genFactory.namespaces)

    if not gen:
        gen = genFactory.getCombinedGenerator()
    if gen:
        if not genFactory.nopreload:
            # fetch at least 240 pages simultaneously from the wiki, but more
            # if a high thread number is set.
            pageNumber = max(240, config.max_external_links * 2)
            gen = pagegenerators.PreloadingGenerator(gen, groupsize=pageNumber)
        gen = pagegenerators.RedirectFilterPageGenerator(gen)
        bot = WeblinkCheckerRobot(gen, HTTPignore, config.weblink_dead_days)
        try:
            bot.run()
        finally:
            waitTime = 0
            # Don't wait longer than 30 seconds for threads to finish.
            while countLinkCheckThreads() > 0 and waitTime < 30:
                try:
                    pywikibot.output('Waiting for remaining {0} threads to '
                                     'finish, please wait...'
                                     .format(countLinkCheckThreads()))
                    # wait 1 second
                    time.sleep(1)
                    waitTime += 1
                except KeyboardInterrupt:
                    pywikibot.output('Interrupted.')
                    break
            if countLinkCheckThreads() > 0:
                pywikibot.output('Remaining {0} threads will be killed.'
                                 .format(countLinkCheckThreads()))
                # Threads will die automatically because they are daemonic.
            if bot.history.reportThread:
                bot.history.reportThread.shutdown()
                # wait until the report thread is shut down; the user can
                # interrupt it by pressing CTRL-C.
                try:
                    while bot.history.reportThread.isAlive():
                        time.sleep(0.1)
                except KeyboardInterrupt:
                    pywikibot.output('Report thread interrupted.')
                    bot.history.reportThread.kill()
            pywikibot.output('Saving history...')
            bot.history.save()
        return True
    else:
        pywikibot.bot.suggest_help(missing_generator=True)
        return False
Example #43
0
def process_entries(cache_path,
                    func,
                    use_accesstime=None,
                    output_func=None,
                    action_func=None):
    """
    Check the contents of the cache.

    This program tries to use file access times to determine
    whether cache files are being used.
    However file access times are not always usable.
    On many modern filesystems, they have been disabled.
    On unix, check the filesystem mount options.  You may
    need to remount with 'strictatime'.

    @param use_accesstime: Whether access times should be used.
    @type use_accesstime: bool tristate:
         - None  = detect
         - False = dont use
         - True  = always use
    """
    if not cache_path:
        cache_path = os.path.join(pywikibot.config2.base_dir, 'apicache')

    if not os.path.exists(cache_path):
        pywikibot.error('%s: no such file or directory' % cache_path)
        return

    if os.path.isdir(cache_path):
        filenames = [
            os.path.join(cache_path, filename)
            for filename in os.listdir(cache_path)
        ]
    else:
        filenames = [cache_path]

    for filepath in filenames:
        filename = os.path.basename(filepath)
        cache_dir = os.path.dirname(filepath)
        if use_accesstime is not False:
            stinfo = os.stat(filepath)

        entry = CacheEntry(cache_dir, filename)
        try:
            entry._load_cache()
        except ValueError as e:
            pywikibot.error('Failed loading {0}'.format(
                entry._cachefile_path()))
            pywikibot.exception(e, tb=True)
            continue

        if use_accesstime is None:
            stinfo2 = os.stat(filepath)
            use_accesstime = stinfo.st_atime != stinfo2.st_atime

        if use_accesstime:
            # Reset access times to values before loading cache entry.
            os.utime(filepath, (stinfo.st_atime, stinfo.st_mtime))
            entry.stinfo = stinfo

        try:
            entry.parse_key()
        except ParseError:
            pywikibot.error(u'Problems parsing %s with key %s' %
                            (entry.filename, entry.key))
            pywikibot.exception()
            continue

        try:
            entry._rebuild()
        except Exception as e:
            pywikibot.error(u'Problems loading %s with key %s, %r' %
                            (entry.filename, entry.key, entry._parsed_key))
            pywikibot.exception(e, tb=True)
            continue

        if func is None or func(entry):
            if output_func or action_func is None:
                if output_func is None:
                    output = entry
                else:
                    output = output_func(entry)
                if output is not None:
                    pywikibot.output(output)
            if action_func:
                action_func(entry)
Example #44
0
    def check(self, useHEAD=False):
        """
        Return True and the server status message if the page is alive.

        @rtype: tuple of (bool, unicode)
        """
        try:
            wasRedirected = self.resolveRedirect(useHEAD=useHEAD)
        except UnicodeError as error:
            return False, 'Encoding Error: {0} ({1})'.format(
                error.__class__.__name__, error)
        except httplib.error as error:
            return False, 'HTTP Error: {}'.format(error.__class__.__name__)
        except socket.error as error:
            # https://docs.python.org/2/library/socket.html :
            # socket.error :
            # The accompanying value is either a string telling what went
            # wrong or a pair (errno, string) representing an error
            # returned by a system call, similar to the value
            # accompanying os.error
            if isinstance(error, basestring):
                msg = error
            else:
                try:
                    msg = error[1]
                except IndexError:
                    pywikibot.output('### DEBUG information for T57282')
                    raise IndexError(type(error))
            # TODO: decode msg. On Linux, it's encoded in UTF-8.
            # How is it encoded in Windows? Or can we somehow just
            # get the English message?
            return False, 'Socket Error: {}'.format(repr(msg))
        if wasRedirected:
            if self.url in self.redirectChain:
                if useHEAD:
                    # Some servers don't seem to handle HEAD requests properly,
                    # which leads to a cyclic list of redirects.
                    # We simply start from the beginning, but this time,
                    # we don't use HEAD, but GET requests.
                    redirChecker = LinkChecker(
                        self.redirectChain[0],
                        serverEncoding=self.serverEncoding,
                        HTTPignore=self.HTTPignore)
                    return redirChecker.check(useHEAD=False)
                else:
                    urlList = ['[{0}]'.format(url)
                               for url in self.redirectChain + [self.url]]
                    return (False,
                            'HTTP Redirect Loop: {0}'.format(
                                ' -> '.join(urlList)))
            elif len(self.redirectChain) >= 19:
                if useHEAD:
                    # Some servers don't seem to handle HEAD requests properly,
                    # which leads to a long (or infinite) list of redirects.
                    # We simply start from the beginning, but this time,
                    # we don't use HEAD, but GET requests.
                    redirChecker = LinkChecker(
                        self.redirectChain[0],
                        serverEncoding=self.serverEncoding,
                        HTTPignore=self.HTTPignore)
                    return redirChecker.check(useHEAD=False)
                else:
                    urlList = ['[{0}]'.format(url)
                               for url in self.redirectChain + [self.url]]
                    return (False,
                            'Long Chain of Redirects: {0}'
                            .format(' -> '.join(urlList)))
            else:
                redirChecker = LinkChecker(self.url, self.redirectChain,
                                           self.serverEncoding,
                                           HTTPignore=self.HTTPignore)
                return redirChecker.check(useHEAD=useHEAD)
        else:
            try:
                conn = self.getConnection()
            except httplib.error as error:
                return False, 'HTTP Error: {0}'.format(
                    error.__class__.__name__)
            try:
                conn.request('GET', '{0}{1}'.format(self.path, self.query),
                             None, self.header)
            except socket.error as error:
                return False, 'Socket Error: {0}'.format(repr(error[1]))
            try:
                self.response = conn.getresponse()
            except Exception as error:
                return False, 'Error: {0}'.format(error)
            # read the server's encoding, in case we need it later
            self.readEncodingFromResponse(self.response)
            # site down if the server status is between 400 and 499
            alive = self.response.status not in range(400, 500)
            if self.response.status in self.HTTPignore:
                alive = False
            return alive, '{0} {1}'.format(self.response.status,
                                           self.response.reason)
Example #45
0
 def output_range(self, start_context, end_context):
     """Output a section from the text."""
     pywikibot.output(self.text[start_context:end_context])
Example #46
0
def main():
    """Process command line arguments and invoke bot."""
    local_args = pywikibot.handleArgs()
    cache_paths = None
    delete = False
    command = None
    output = None

    for arg in local_args:
        if command == '':
            command = arg
        elif output == '':
            output = arg
        elif arg == '-delete':
            delete = True
        elif arg == '-password':
            command = 'has_password(entry)'
        elif arg == '-c':
            if command:
                pywikibot.error('Only one command may be executed.')
                exit(1)
            command = ''
        elif arg == '-o':
            if output:
                pywikibot.error('Only one output may be defined.')
                exit(1)
            output = ''
        else:
            if not cache_paths:
                cache_paths = [arg]
            else:
                cache_paths.append(arg)

    if not cache_paths:
        cache_paths = ['apicache', 'tests/apicache']

        # Also process the base directory, if it isnt the current directory
        if os.path.abspath(os.getcwd()) != pywikibot.config2.base_dir:
            cache_paths += [
                os.path.join(pywikibot.config2.base_dir, 'apicache')
            ]

        # Also process the user home cache, if it isnt the config directory
        if os.path.expanduser('~/.pywikibot') != pywikibot.config2.base_dir:
            cache_paths += [
                os.path.join(os.path.expanduser('~/.pywikibot'), 'apicache')
            ]

    if delete:
        action_func = CacheEntry._delete
    else:
        action_func = None

    if output:
        output_func = _parse_command(output, 'output')
        if output_func is None:
            return False
    else:
        output_func = None

    if command:
        filter_func = _parse_command(command, 'filter')
        if filter_func is None:
            return False
    else:
        filter_func = None

    for cache_path in cache_paths:
        if len(cache_paths) > 1:
            pywikibot.output(u'Processing %s' % cache_path)
        process_entries(cache_path,
                        filter_func,
                        output_func=output_func,
                        action_func=action_func)
Example #47
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: str
    """
    add_cat = None
    gen = None
    # summary message
    edit_summary = ''
    # Array which will collect commandline parameters.
    # First element is original text, second element is replacement text.
    commandline_replacements = []
    # A list of 2-tuples of original text and replacement text.
    replacements = []
    # Don't edit pages which contain certain texts.
    exceptions = {
        'title': [],
        'text-contains': [],
        'inside': [],
        'inside-tags': [],
        'require-title': [],  # using a separate requirements dict needs some
    }  # major refactoring of code.

    # Should the elements of 'replacements' and 'exceptions' be interpreted
    # as regular expressions?
    regex = False
    # Predefined fixes from dictionary 'fixes' (see above).
    fixes_set = []
    # the dump's path, either absolute or relative, which will be used
    # if -xml flag is present
    xmlFilename = None
    useSql = False
    sql_query = None
    # will become True when the user presses a ('yes to all') or uses the
    # -always flag.
    acceptall = False
    # Will become True if the user inputs the commandline parameter -nocase
    caseInsensitive = False
    # Will become True if the user inputs the commandline parameter -dotall
    dotall = False
    # Will become True if the user inputs the commandline parameter -multiline
    multiline = False
    # Do all hits when they overlap
    allowoverlap = False
    # Do not recurse replacement
    recursive = False
    # Between a regex and another (using -fix) sleep some time (not to waste
    # too much CPU
    sleep = None
    # Request manual replacements even if replacements are already defined
    manual_input = False
    # Replacements loaded from a file
    replacement_file = None
    replacement_file_arg_misplaced = False

    # Read commandline parameters.

    local_args = pywikibot.handle_args(args)
    genFactory = pagegenerators.GeneratorFactory()

    for arg in local_args:
        if genFactory.handleArg(arg):
            continue
        if arg == '-regex':
            regex = True
        elif arg.startswith('-xmlstart'):
            if len(arg) == 9:
                xmlStart = pywikibot.input(
                    'Please enter the dumped article to start with:')
            else:
                xmlStart = arg[10:]
        elif arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
        elif arg.startswith(('-sql', '-mysqlquery')):
            if arg.startswith('-sql'):
                issue_deprecation_warning('The usage of "-sql"',
                                          '-mysqlquery',
                                          1,
                                          ArgumentDeprecationWarning,
                                          since='20180617')
            useSql = True
            sql_query = arg.partition(':')[2]
        elif arg.startswith('-excepttitle:'):
            exceptions['title'].append(arg[13:])
        elif arg.startswith('-requiretitle:'):
            exceptions['require-title'].append(arg[14:])
        elif arg.startswith('-excepttext:'):
            exceptions['text-contains'].append(arg[12:])
        elif arg.startswith('-exceptinside:'):
            exceptions['inside'].append(arg[14:])
        elif arg.startswith('-exceptinsidetag:'):
            exceptions['inside-tags'].append(arg[17:])
        elif arg.startswith('-fix:'):
            fixes_set += [arg[5:]]
        elif arg.startswith('-sleep:'):
            sleep = float(arg[7:])
        elif arg == '-always':
            acceptall = True
        elif arg == '-recursive':
            recursive = True
        elif arg == '-nocase':
            caseInsensitive = True
        elif arg == '-dotall':
            dotall = True
        elif arg == '-multiline':
            multiline = True
        elif arg.startswith('-addcat:'):
            add_cat = arg[8:]
        elif arg.startswith('-summary:'):
            edit_summary = arg[9:]
        elif arg.startswith('-automaticsummary'):
            edit_summary = True
        elif arg.startswith('-allowoverlap'):
            allowoverlap = True
        elif arg.startswith('-manualinput'):
            manual_input = True
        elif arg.startswith('-replacementfile'):
            issue_deprecation_warning('-replacementfile',
                                      '-pairsfile',
                                      2,
                                      ArgumentDeprecationWarning,
                                      since='20160304')
        elif arg.startswith('-pairsfile'):
            if len(commandline_replacements) % 2:
                replacement_file_arg_misplaced = True

            if arg == '-pairsfile':
                replacement_file = pywikibot.input(
                    'Please enter the filename to read replacements from:')
            else:
                replacement_file = arg[len('-pairsfile:'):]
        else:
            commandline_replacements.append(arg)

    site = pywikibot.Site()

    if len(commandline_replacements) % 2:
        pywikibot.error('Incomplete command line pattern replacement pair.')
        return False

    if replacement_file_arg_misplaced:
        pywikibot.error('-pairsfile used between a pattern replacement pair.')
        return False

    if replacement_file:
        try:
            with codecs.open(replacement_file, 'r', 'utf-8') as f:
                # strip newlines, but not other characters
                file_replacements = f.read().splitlines()
        except (IOError, OSError) as e:
            pywikibot.error('Error loading {0}: {1}'.format(
                replacement_file, e))
            return False

        if len(file_replacements) % 2:
            pywikibot.error(
                '{0} contains an incomplete pattern replacement pair.'.format(
                    replacement_file))
            return False

        # Strip BOM from first line
        file_replacements[0].lstrip('\uFEFF')
        commandline_replacements.extend(file_replacements)

    if not (commandline_replacements or fixes_set) or manual_input:
        old = pywikibot.input('Please enter the text that should be replaced:')
        while old:
            new = pywikibot.input('Please enter the new text:')
            commandline_replacements += [old, new]
            old = pywikibot.input(
                'Please enter another text that should be replaced,'
                '\nor press Enter to start:')

    # The summary stored here won't be actually used but is only an example
    single_summary = None
    for i in range(0, len(commandline_replacements), 2):
        replacement = Replacement(commandline_replacements[i],
                                  commandline_replacements[i + 1])
        if not single_summary:
            single_summary = i18n.twtranslate(
                site, 'replace-replacing', {
                    'description':
                    ' (-{0} +{1})'.format(replacement.old, replacement.new)
                })
        replacements.append(replacement)

    # Perform one of the predefined actions.
    missing_fixes_summaries = []  # which a fixes/replacements miss a summary
    generators_given = bool(genFactory.gens)
    for fix_name in fixes_set:
        try:
            fix = fixes.fixes[fix_name]
        except KeyError:
            pywikibot.output('Available predefined fixes are: {0}'.format(
                ', '.join(fixes.fixes.keys())))
            if not fixes.user_fixes_loaded:
                pywikibot.output('The user fixes file could not be found: '
                                 '{0}'.format(fixes.filename))
            return
        if not fix['replacements']:
            pywikibot.warning('No replacements defined for fix '
                              '"{0}"'.format(fix_name))
            continue
        if 'msg' in fix:
            if isinstance(fix['msg'], UnicodeType):
                set_summary = i18n.twtranslate(site, str(fix['msg']))
            else:
                set_summary = i18n.translate(site, fix['msg'], fallback=True)
        else:
            set_summary = None
        if not generators_given and 'generator' in fix:
            gen_args = fix['generator']
            if isinstance(gen_args, UnicodeType):
                gen_args = [gen_args]
            for gen_arg in gen_args:
                genFactory.handleArg(gen_arg)
        replacement_set = ReplacementList(fix.get('regex'),
                                          fix.get('exceptions'),
                                          fix.get('nocase'),
                                          set_summary,
                                          name=fix_name)
        # Whether some replacements have a summary, if so only show which
        # have none, otherwise just mention the complete fix
        missing_fix_summaries = []
        for index, replacement in enumerate(fix['replacements'], start=1):
            summary = None if len(replacement) < 3 else replacement[2]
            if not set_summary and not summary:
                missing_fix_summaries.append('"{0}" (replacement #{1})'.format(
                    fix_name, index))
            if chars.contains_invisible(replacement[0]):
                pywikibot.warning('The old string "{0}" contains formatting '
                                  'characters like U+200E'.format(
                                      chars.replace_invisible(replacement[0])))
            if (not callable(replacement[1])
                    and chars.contains_invisible(replacement[1])):
                pywikibot.warning('The new string "{0}" contains formatting '
                                  'characters like U+200E'.format(
                                      chars.replace_invisible(replacement[1])))
            replacement_set.append(
                ReplacementListEntry(
                    old=replacement[0],
                    new=replacement[1],
                    fix_set=replacement_set,
                    edit_summary=summary,
                ))

        # Exceptions specified via 'fix' shall be merged to those via CLI.
        if replacement_set:
            replacements.extend(replacement_set)
            if replacement_set._exceptions is not None:
                for k, v in replacement_set._exceptions.items():
                    if k in exceptions:
                        exceptions[k] = list(set(exceptions[k]) | set(v))
                    else:
                        exceptions[k] = v

        if len(fix['replacements']) == len(missing_fix_summaries):
            missing_fixes_summaries.append(
                '"{0}" (all replacements)'.format(fix_name))
        else:
            missing_fixes_summaries += missing_fix_summaries

    if ((not edit_summary or edit_summary is True)
            and (missing_fixes_summaries or single_summary)):
        if single_summary:
            pywikibot.output('The summary message for the command line '
                             'replacements will be something like: ' +
                             single_summary)
        if missing_fixes_summaries:
            pywikibot.output('The summary will not be used when the fix has '
                             'one defined but the following fix(es) do(es) '
                             'not have a summary defined: '
                             '{0}'.format(', '.join(missing_fixes_summaries)))
        if edit_summary is not True:
            edit_summary = pywikibot.input(
                'Press Enter to use this automatic message, or enter a '
                'description of the\nchanges your bot will make:')
        else:
            edit_summary = ''

    # Set the regular expression flags
    flags = re.UNICODE
    if caseInsensitive:
        flags = flags | re.IGNORECASE
    if dotall:
        flags = flags | re.DOTALL
    if multiline:
        flags = flags | re.MULTILINE

    # Pre-compile all regular expressions here to save time later
    for replacement in replacements:
        replacement.compile(regex, flags)

    precompile_exceptions(exceptions, regex, flags)

    if xmlFilename:
        try:
            xmlStart
        except NameError:
            xmlStart = None
        gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart, replacements,
                                          exceptions, site)
    elif useSql:
        if not sql_query:
            whereClause = 'WHERE (%s)' % ' OR '.join(
                "old_text RLIKE '%s'" %
                prepareRegexForMySQL(old_regexp.pattern)
                for (old_regexp, new_text) in replacements)
            if exceptions:
                exceptClause = 'AND NOT (%s)' % ' OR '.join(
                    "old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern)
                    for exc in exceptions)
            else:
                exceptClause = ''
        query = sql_query or """
SELECT page_namespace, page_title
FROM page
JOIN text ON (page_id = old_id)
%s
%s
LIMIT 200""" % (whereClause, exceptClause)
        gen = pagegenerators.MySQLPageGenerator(query)

    gen = genFactory.getCombinedGenerator(gen, preload=True)

    if not gen:
        pywikibot.bot.suggest_help(missing_generator=True)
        return

    bot = ReplaceRobot(gen,
                       replacements,
                       exceptions,
                       allowoverlap,
                       recursive,
                       add_cat,
                       sleep,
                       edit_summary,
                       always=acceptall,
                       site=site)
    site.login()
    bot.run()

    # Explicitly call pywikibot.stopme(). It will make sure the callback is
    # triggered before replace.py is unloaded.
    pywikibot.stopme()
    pywikibot.output('\n{0} pages changed.'.format(bot.changed_pages))
Example #48
0
 def output_range(self, start, end):
     """Show normal context with a red center region."""
     pywikibot.output(self.text[start:self.start] + '\03{lightred}' +
                      self.text[self.start:self.end] + '\03{default}' +
                      self.text[self.end:end])
Example #49
0
    def run(self):
        """
        Starts the robot.
        """
        fhmuseum = pywikibot.ItemPage(self.repo, u'Q574961')
        for painting in self.generator:
            # Make sure it's the Frans Hals Museum
            if painting['object']['proxies'][0]['about'].startswith(
                    u'/proxy/provider/92034/GVNRC_FHM01'):
                paintingId = painting['object']['proxies'][0]['dcIdentifier'][
                    'def'][0].strip()
                uri = painting['object']['proxies'][0]['dcIdentifier']['def'][
                    1].strip()
                europeanaUrl = u'http://europeana.eu/portal/record/%s.html' % (
                    painting['object']['about'], )

                print paintingId
                print uri

                if painting['object']['proxies'][0].get('dcCreator'):
                    dcCreator = painting['object']['proxies'][0]['dcCreator'][
                        'def'][0].strip()
                else:
                    dcCreator = u'anoniem'
                #print dcCreator

                paintingItem = None
                newclaims = []
                if paintingId in self.paintingIds:
                    paintingItemTitle = u'Q%s' % (
                        self.paintingIds.get(paintingId), )
                    print paintingItemTitle
                    paintingItem = pywikibot.ItemPage(self.repo,
                                                      title=paintingItemTitle)

                else:

                    #print 'bla'
                    #monumentItem = pywikibot.ItemPage(self.repo, title=u'')

                    #print dcCreatorName

                    data = {
                        'labels': {},
                        'descriptions': {},
                    }

                    data['labels'][u'nl'] = {
                        'language': u'nl',
                        'value': painting['object']['title'][0]
                    }

                    if dcCreator:
                        data['descriptions']['en'] = {
                            'language': u'en',
                            'value': u'painting by %s' % (dcCreator, )
                        }
                        data['descriptions']['nl'] = {
                            'language': u'nl',
                            'value': u'schilderij van %s' % (dcCreator, )
                        }

                    print data

                    identification = {}
                    summary = u'Creating new item with data from %s ' % (
                        europeanaUrl, )
                    pywikibot.output(summary)
                    #monumentItem.editEntity(data, summary=summary)
                    result = self.repo.editEntity(identification,
                                                  data,
                                                  summary=summary)
                    #print result
                    paintingItemTitle = result.get(u'entity').get('id')
                    paintingItem = pywikibot.ItemPage(self.repo,
                                                      title=paintingItemTitle)

                    newclaim = pywikibot.Claim(
                        self.repo, u'P%s' % (self.paintingIdProperty, ))
                    newclaim.setTarget(paintingId)
                    pywikibot.output('Adding new id claim to %s' %
                                     paintingItem)
                    paintingItem.addClaim(newclaim)

                    newreference = pywikibot.Claim(
                        self.repo, u'P854')  #Add url, isReference=True
                    newreference.setTarget(uri)
                    pywikibot.output('Adding new reference claim to %s' %
                                     paintingItem)
                    newclaim.addSource(newreference)

                    newqualifier = pywikibot.Claim(
                        self.repo, u'P195')  #Add collection, isQualifier=True
                    newqualifier.setTarget(fhmuseum)
                    pywikibot.output('Adding new qualifier claim to %s' %
                                     paintingItem)
                    newclaim.addQualifier(newqualifier)

                    collectionclaim = pywikibot.Claim(self.repo, u'P195')
                    collectionclaim.setTarget(fhmuseum)
                    pywikibot.output('Adding collection claim to %s' %
                                     paintingItem)
                    paintingItem.addClaim(collectionclaim)

                    newreference = pywikibot.Claim(
                        self.repo, u'P854')  #Add url, isReference=True
                    newreference.setTarget(europeanaUrl)
                    pywikibot.output('Adding new reference claim to %s' %
                                     paintingItem)
                    collectionclaim.addSource(newreference)

                if paintingItem and paintingItem.exists():

                    data = paintingItem.get()
                    claims = data.get('claims')
                    #print claims

                    # located in
                    if u'P276' not in claims:
                        newclaim = pywikibot.Claim(self.repo, u'P276')
                        newclaim.setTarget(fhmuseum)
                        pywikibot.output('Adding located in claim to %s' %
                                         paintingItem)
                        paintingItem.addClaim(newclaim)

                        newreference = pywikibot.Claim(
                            self.repo, u'P854')  #Add url, isReference=True
                        newreference.setTarget(europeanaUrl)
                        pywikibot.output('Adding new reference claim to %s' %
                                         paintingItem)
                        newclaim.addSource(newreference)

                    # instance of always painting while working on the painting collection
                    if u'P31' not in claims:

                        dcformatItem = pywikibot.ItemPage(self.repo,
                                                          title='Q3305213')

                        newclaim = pywikibot.Claim(self.repo, u'P31')
                        newclaim.setTarget(dcformatItem)
                        pywikibot.output('Adding instance claim to %s' %
                                         paintingItem)
                        paintingItem.addClaim(newclaim)

                        newreference = pywikibot.Claim(
                            self.repo, u'P854')  #Add url, isReference=True
                        newreference.setTarget(europeanaUrl)
                        pywikibot.output('Adding new reference claim to %s' %
                                         paintingItem)
                        newclaim.addSource(newreference)

                    # creator
                    if u'P170' not in claims and dcCreator:
                        creategen = pagegenerators.PreloadingItemGenerator(
                            pagegenerators.WikidataItemGenerator(
                                pagegenerators.SearchPageGenerator(
                                    dcCreator,
                                    step=None,
                                    total=10,
                                    namespaces=[0],
                                    site=self.repo)))

                        newcreator = None

                        for creatoritem in creategen:
                            print creatoritem.title()
                            if creatoritem.get().get('labels').get(
                                    'en') == dcCreator or creatoritem.get(
                                    ).get('labels').get('nl') == dcCreator:
                                print creatoritem.get().get('labels').get('en')
                                print creatoritem.get().get('labels').get('nl')
                                # Check occupation and country of citizinship
                                if u'P106' in creatoritem.get(
                                ).get('claims') and (
                                        u'P21'
                                        in creatoritem.get().get('claims')
                                        or u'P800'
                                        in creatoritem.get().get('claims')):
                                    newcreator = creatoritem
                                    continue
                            elif (creatoritem.get().get('aliases').get('en')
                                  and dcCreator
                                  in creatoritem.get().get('aliases').get('en')
                                  ) or (creatoritem.get().get('aliases').get(
                                      'nl') and dcCreator in creatoritem.get().
                                        get('aliases').get('nl')):
                                if u'P106' in creatoritem.get(
                                ).get('claims') and (
                                        u'P21'
                                        in creatoritem.get().get('claims')
                                        or u'P800'
                                        in creatoritem.get().get('claims')):
                                    newcreator = creatoritem
                                    continue

                        if newcreator:
                            pywikibot.output(newcreator.title())

                            newclaim = pywikibot.Claim(self.repo, u'P170')
                            newclaim.setTarget(newcreator)
                            pywikibot.output('Adding creator claim to %s' %
                                             paintingItem)
                            paintingItem.addClaim(newclaim)

                            newreference = pywikibot.Claim(
                                self.repo, u'P854')  #Add url, isReference=True
                            newreference.setTarget(europeanaUrl)
                            pywikibot.output(
                                'Adding new reference claim to %s' %
                                paintingItem)
                            newclaim.addSource(newreference)

                            #creatoritem = pywikibot.ItemPage(self.repo, creatorpage)
                            print creatoritem.title()
                            print creatoritem.get()

                        else:
                            pywikibot.output('No item found for %s' %
                                             (dcCreator, ))

                    # date of creation
                    if u'P571' not in claims:
                        if painting['object']['proxies'][0].get(
                                'dctermsCreated'):
                            dccreated = painting['object']['proxies'][0][
                                'dctermsCreated']['def'][0].strip()
                            if len(dccreated) == 4:  # It's a year
                                newdate = pywikibot.WbTime(year=dccreated)
                                newclaim = pywikibot.Claim(self.repo, u'P571')
                                newclaim.setTarget(newdate)
                                pywikibot.output(
                                    'Adding date of creation claim to %s' %
                                    paintingItem)
                                paintingItem.addClaim(newclaim)

                                newreference = pywikibot.Claim(
                                    self.repo,
                                    u'P854')  #Add url, isReference=True
                                newreference.setTarget(europeanaUrl)
                                pywikibot.output(
                                    'Adding new reference claim to %s' %
                                    paintingItem)
                                newclaim.addSource(newreference)
                    '''
                    # material used
                    if u'P186' not in claims:
                        dcFormats = { u'http://vocab.getty.edu/aat/300014078' : u'Q4259259', # Canvas
                                      u'http://vocab.getty.edu/aat/300015050' : u'Q296955', # Oil paint
                                      }
                        if painting['object']['proxies'][0].get('dcFormat') and painting['object']['proxies'][0]['dcFormat'].get('def'):
                            for dcFormat in painting['object']['proxies'][0]['dcFormat']['def']:
                                if dcFormat in dcFormats:
                                    dcformatItem = pywikibot.ItemPage(self.repo, title=dcFormats[dcFormat])

                                    newclaim = pywikibot.Claim(self.repo, u'P186')
                                    newclaim.setTarget(dcformatItem)
                                    pywikibot.output('Adding material used claim to %s' % paintingItem)
                                    paintingItem.addClaim(newclaim)

                                    newreference = pywikibot.Claim(self.repo, u'P854') #Add url, isReference=True
                                    newreference.setTarget(europeanaUrl)
                                    pywikibot.output('Adding new reference claim to %s' % paintingItem)
                                    newclaim.addSource(newreference)
                    
                    # Handle 
                    if u'P1184' not in claims:
                        handleUrl = painting['object']['proxies'][0]['dcIdentifier']['def'][0]
                        handle = handleUrl.replace(u'http://hdl.handle.net/', u'')
                        
                        newclaim = pywikibot.Claim(self.repo, u'P1184')
                        newclaim.setTarget(handle)
                        pywikibot.output('Adding handle claim to %s' % paintingItem)
                        paintingItem.addClaim(newclaim)

                        newreference = pywikibot.Claim(self.repo, u'P854') #Add url, isReference=True
                        newreference.setTarget(europeanaUrl)
                        pywikibot.output('Adding new reference claim to %s' % paintingItem)
                        newclaim.addSource(newreference)
                    '''
                    # Europeana ID
                    if u'P727' not in claims:
                        europeanaID = painting['object']['about'].lstrip('/')

                        newclaim = pywikibot.Claim(self.repo, u'P727')
                        newclaim.setTarget(europeanaID)
                        pywikibot.output('Adding Europeana ID claim to %s' %
                                         paintingItem)
                        paintingItem.addClaim(newclaim)

                        newreference = pywikibot.Claim(
                            self.repo, u'P854')  #Add url, isReference=True
                        newreference.setTarget(europeanaUrl)
                        pywikibot.output('Adding new reference claim to %s' %
                                         paintingItem)
                        newclaim.addSource(newreference)
Example #50
0
 def output(self):
     """Output the suboptions."""
     pywikibot.output(self._output)
Example #51
0
	def traitement(self):
		pageTraitees = pywikibot.Page(self.site, u"Wikipédia:Demande de restauration de page/Traitées")
		pageRefusees = pywikibot.Page(self.site, u"Wikipédia:Demande de restauration de page/Refusées")
		list = [(self.main_page, u'Requêtes à traiter'), (self.main_page, u'Requêtes en cours d\'examen'), (pageTraitees, None), (pageRefusees, None)]
		
		for couple in list:
			dict = self.analyse_une_section(page = couple[0], match_debut = couple[1])
			sections = dict['sections']
			
			if not sections:
				continue
			
			for numero_section in sections:
				pywikibot.output('\n')
				titre_section = dict['titres'][numero_section]
				section = sections[numero_section]
				templates = textlib.extract_templates_and_params(section)
				# templates est du type :
				#     [(u'DRP début', {u'date': u'27 février 2010 à 14:56 (CEC)'
				#     , u'statut': u'oui'}), (u'DRP fin', {})]
				
				PaS = False
				found_full_template = False
				for template in templates:
					if template[0] == u'DRP début':
						if not ('statut' in template[1]):
							pywikibot.output(u"pas de paramètre 'statut' trouvé")
							continue
						elif not ('date' in template[1]):
							pywikibot.output(u"pas de paramètre 'date' trouvé")
							continue
						
						found_full_template = True
						statut_actuel = template[1]['statut']
						date = template[1]['date']
						if template[1].has_key(u'PàS'):
							pywikibot.output('phase try 0')
							pywikibot.output(template[1][u'PàS'])
							if template[1][u'PàS'] == 'oui':
								pywikibot.output('phase try 1')
								PaS = True
								page_PaS = None
							elif template[1][u'PàS'] != '':
								pywikibot.output('phase try 2')
								PaS = True
								page_PaS = pywikibot.Page(self.site, u"%s/Suppression" % template[1][u'PàS']).toggleTalkPage()
				
				pywikibot.output(u'found_full_template = %s' % found_full_template)
				
				if not found_full_template:
					pywikibot.output('Fully fulfilled template was not found, skipping to next section.')
					continue
				
				pywikibot.output(u"PaS = %s" % PaS)
				if PaS:
					try:
						pywikibot.output(u"page_PaS = %s" % page_PaS)
					except:
						pywikibot.output(u"no page_PaS")
				
				# Pour enlever les == et les éventuels espaces
				# du titre de la section puis les [[…]] qui sont
				# supprimés de l'URL par MediaWiki.
				titre_section = titre_section[2:-2]
				titre_section = titre_section.strip()
				
				titre_section_SQL = titre_section
				
				titre_section_MediaWiki = titre_section
				titre_section_MediaWiki = titre_section_MediaWiki.replace("[[", "")
				titre_section_MediaWiki = titre_section_MediaWiki.replace("]]", "")
				
				
				pywikibot.output(u"=== %s ===" % titre_section)
				pywikibot.output(u"statut_actuel = %s" % statut_actuel)
				pywikibot.output(u"date = %s" % date)

				
				if statut_actuel not in self.status_knonw:
					# Si le demande de restauration ne possède pas un de ces statuts,
					# il est inutile d'aller plus loin car seuls ceux-ci nécessitent
					# de laisser un message au demandeur.
					continue
				
				# Vérifier si la requête a déjà été analysée par le bot.
				self.database.query('SELECT * FROM drp WHERE titre_section = "%s"' % titre_section_SQL.replace('"', '\\"').encode('utf-8'))
				results=self.database.store_result()
				result=results.fetch_row(maxrows=0)
				if result:
					# Si oui, et si le statut est toujours le même, il n'y a rien à faire
					statut_traite = result[0][1]
					
					pywikibot.output(statut_traite)
					
					# La vérification d'un éventuel lancement d'une PàS technique
					# pour la restauration n'est faite que par la suite, le statut
					# 'oui_PaS' ne peut donc pas encore être le statut_actuel, 
					# même si une PàS a été lancée !
					# On remplace donc l'éventuel statut traité 'oui_PaS' par un
					# simple 'oui'.
					if statut_traite == 'oui_PaS':
						statut_traite = 'oui'
					
					if statut_traite.decode('utf-8') == statut_actuel:
						# Si le statut actuel est le même que celui qui a déjà été
						# traité, il n'y a rien d'autre à faire : le demandeur
						# a déjà été averti.
						pywikibot.output(u'DRP déjà traitée !')
						continue
					else:
						pywikibot.output(u'DRP déjà traitée mais statut différent…')
						# Supprimer la requête de la base de donnée SQL pour éviter
						# qu'elle ne se retrouve en double avec deux statuts
						# différents.
						self.database.query('DELETE FROM drp WHERE titre_section = "%s"' % titre_section_SQL.replace('"', '\\"').encode('utf-8'))
				
				#print section
				
				# Si on arrive ici, c'est que le demandeur n'a pas été averti du 
				# statut actuel
				m1 = re.search(u"[dD]emandée? par .*\[ *\[ *([uU]tilisateur:|[uU]ser:|[sS]p[eé]cial:[cC]ontributions/)(?P<nom_demandeur>[^|\]]+)(\|| *\] *\])", section)
				m2 = re.search(u"[dD]emandée? par {{u'?\|(?P<nom_demandeur>[^|]+)}}", section)
				m3 = re.search(u"[dD]emandée? par (?P<nom_demandeur>[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+)", section)
				
				if m1:
					nom_demandeur = m1.group('nom_demandeur')
					#print 'm1'
				elif m2:
					nom_demandeur = m2.group('nom_demandeur')
					#print 'm2'
				elif m3:
					nom_demandeur = m3.group('nom_demandeur')
					#print 'm3'
				else:
					pywikibot.output(u'nom du demandeur introuvable !')
					continue
				
				#print nom_demandeur
				
				demandeur = pywikibot.User(self.site, nom_demandeur)
				if u'autopatrolled' in demandeur.groups():
					pywikibot.output(u'demandeur autopatrolled : inutile de laisser un message')
					continue
				elif demandeur in self.whitelist:
					pywikibot.output(u"l'utilisateur est sur la whitelist")
					continue

				
				page_discussion_demandeur = demandeur.getUserTalkPage()
				pywikibot.output(page_discussion_demandeur)
				
				m = re.search(u"\[ *\[ *(?P<titre_page>.*) *\] *\]", titre_section)
				if not m:
					pywikibot.output(u'Titre de la page concernée introuvable !')
					continue
					
				titre_page_concernee = m.group('titre_page').strip()
				pywikibot.output(titre_page_concernee)
					
				# Vérifier si une PàS technique pour la restauration a été
				# lancée ou non.
				if statut_actuel == 'oui':
					if PaS:
						statut_actuel = 'oui_PaS'
						pywikibot.output('oui_PaS')
						if not page_PaS or page_PaS.exists():
							try:
								page_PaS = pywikibot.Page(self.site, titre_page_concernee + "/Suppression").toggleTalkPage() #pywikibot.Page(self.site, u"Discussion:%s/Suppression" % titre_page_concernee)
								page_PaS.get()
							except:
								pywikibot.output(u'erreur : la PàS technique ne semble pas exister ou n\'est pas normalisée !')
								statut_actuel = 'oui_PaS_mais_introuvable'
						if page_PaS:
							# La PàS peut avoir été renommée
							if page_PaS.isRedirectPage():
								page_PaS = page_PaS.getRedirectTarget()
							
							if re.search(u"[pP]roposé *par.* ([0-9]{1,2}.*20[01][0-9]) à [0-9]{2}:[0-9]{2}", page_PaS.get()):
								date_debut_PaS = re.search(u"[pP]roposé *par.* ([0-9]{1,2}.*20[01][0-9]) à [0-9]{2}:[0-9]{2}", page_PaS.get()).group(1)
							else:
								# Si la date n'est pas formatée comme attendue sur la PàS, le bot
								# cherche sa date de création en remontant l'historique, puis l'exprime
								# sous la forme attendue.
								date_creation = page_PaS.getVersionHistory()[-1][1]
								date_debut_PaS = date_creation.strftime("%d %B %Y")
						
				message = self.messages[statut_actuel]
								
				# La fonction urllib.quote() permet d'encoder une URL.
				# Ici, seul le titre de la section a besoin d'être encodé.
				# Cependant, MediaWiki remplace les espaces par des tirets bas ('_')
				# et les % dans l'encodage par des points ('.').
				lien_drp = u"%s#%s" % (self.main_page.title(asLink = False), urllib.quote(titre_section_MediaWiki.encode('utf-8'), safe=" /").replace(" ", "_").replace("%", "."))
				
				#pywikibot.output(u'lien_drp = %s' % lien_drp)
				
				if statut_actuel == 'non' or statut_actuel == 'oui' or statut_actuel == 'oui_PaS_mais_introuvable':
					message = message % {'titre_page':titre_page_concernee, 'lien_drp':lien_drp, 'date_debut_lien_valide':date}
				elif statut_actuel == 'oui_PaS':
					if not type(date_debut_PaS) == unicode:
						pywikibot.output(u"Formattage de date_debut_PaS")
						date_debut_PaS = date_debut_PaS.decode('utf-8')
					message = message % {'titre_page':titre_page_concernee, 'lien_drp':lien_drp, 'date_debut_lien_valide':date, 'titre_PaS':page_PaS.title(asLink = False), 'date_debut_PaS':date_debut_PaS}
				elif statut_actuel in ['attente', 'autre', 'autreavis']:
					message = message % {'titre_page':titre_page_concernee, 'lien_drp':lien_drp}
				else:
					pywikibot.output(u'statut inconnu : %s' % statut_actuel)
					continue				
				
				#
				# Mauvaise gestion des IPv6 par pywikibot
				# Les caractères doivent être en majuscules
				#
				pattern_ipv6 = "Discussion utilisateur:(([0-9a-zA-Z]{,4}:){7}[0-9a-zA-Z]{,4})"
				if re.search(pattern_ipv6, page_discussion_demandeur.title()):
					ipv6 = re.search(pattern_ipv6, page_discussion_demandeur.title()).group(1)
					ipv6 = ipv6.upper()
					page_discussion_demandeur = pywikibot.Page(pywikibot.Site(), u"Discussion utilisateur:"+ipv6)
				#
				
				if page_discussion_demandeur.exists():
					while page_discussion_demandeur.isRedirectPage():
						page_discussion_demandeur = page_discussion_demandeur.getRedirectTarget()
					
					text = page_discussion_demandeur.get()
					newtext = text
					newtext += '\n\n'
					newtext += u"== %s ==" % self.titre_message % {'titre_page': titre_page_concernee}
					newtext += '\n'
					newtext += message
		# pwb_error			pywikibot.showDiff(page_discussion_demandeur.get(), newtext)
				else:
					newtext = u"== %s ==" % self.titre_message % {'titre_page': titre_page_concernee}
					newtext += '\n'
					newtext += message
					pywikibot.output(newtext)
				
				
				comment = self.resume % {'titre_page': titre_page_concernee}
				pywikibot.output(comment)
				
				try:
					page_discussion_demandeur.put(newtext, comment=comment, minorEdit=False)
				except:
					pywikibot.output(u'erreur lors de la publication du message !')
					continue
				
				# Enregistrer la requête comme analysée par le bot
				self.database.query('INSERT INTO drp VALUES ("%s", "%s", CURRENT_TIMESTAMP)' % (titre_section_SQL.replace('"', '\\"').encode('utf-8'), statut_actuel.encode('utf-8')))
Example #52
0
 def run(self):
     """Start the bot."""
     # Run the generator which will yield Pages which might need to be
     # changed.
     for page in self.generator:
         if self.isTitleExcepted(page.title()):
             pywikibot.output(
                 'Skipping {0} because the title is on the exceptions list.'
                 .format(page.title(as_link=True)))
             continue
         try:
             # Load the page's text from the wiki
             original_text = page.get(get_redirect=True)
             if not page.has_permission():
                 pywikibot.output("You can't edit page " +
                                  page.title(as_link=True))
                 continue
         except pywikibot.NoPage:
             pywikibot.output('Page {0} not found'.format(
                 page.title(as_link=True)))
             continue
         applied = set()
         new_text = original_text
         last_text = None
         context = 0
         while True:
             if self.isTextExcepted(new_text):
                 pywikibot.output('Skipping {0} because it contains text '
                                  'that is on the exceptions list.'.format(
                                      page.title(as_link=True)))
                 break
             while new_text != last_text:
                 last_text = new_text
                 new_text = self.apply_replacements(last_text, applied,
                                                    page)
                 if not self.recursive:
                     break
             if new_text == original_text:
                 pywikibot.output('No changes were necessary in ' +
                                  page.title(as_link=True))
                 break
             if hasattr(self, 'addedCat'):
                 # Fetch only categories in wikitext, otherwise the others
                 # will be explicitly added.
                 cats = textlib.getCategoryLinks(new_text, site=page.site)
                 if self.addedCat not in cats:
                     cats.append(self.addedCat)
                     new_text = textlib.replaceCategoryLinks(new_text,
                                                             cats,
                                                             site=page.site)
             # Show the title of the page we're working on.
             # Highlight the title in purple.
             pywikibot.output(
                 color_format('\n\n>>> {lightpurple}{0}{default} <<<',
                              page.title()))
             pywikibot.showDiff(original_text, new_text, context=context)
             if self.getOption('always'):
                 break
             choice = pywikibot.input_choice(
                 'Do you want to accept these changes?',
                 [('Yes', 'y'), ('No', 'n'), ('Edit original', 'e'),
                  ('edit Latest', 'l'), ('open in Browser', 'b'),
                  ('More context', 'm'), ('All', 'a')],
                 default='N')
             if choice == 'm':
                 context = context * 3 if context else 3
                 continue
             if choice == 'e':
                 editor = editarticle.TextEditor()
                 as_edited = editor.edit(original_text)
                 # if user didn't press Cancel
                 if as_edited and as_edited != new_text:
                     new_text = as_edited
                 continue
             if choice == 'l':
                 editor = editarticle.TextEditor()
                 as_edited = editor.edit(new_text)
                 # if user didn't press Cancel
                 if as_edited and as_edited != new_text:
                     new_text = as_edited
                     # prevent changes from being applied again
                     last_text = new_text
                 continue
             if choice == 'b':
                 pywikibot.bot.open_webbrowser(page)
                 try:
                     original_text = page.get(get_redirect=True, force=True)
                 except pywikibot.NoPage:
                     pywikibot.output('Page {0} has been deleted.'.format(
                         page.title()))
                     break
                 new_text = original_text
                 last_text = None
                 continue
             if choice == 'a':
                 self.options['always'] = True
             if choice == 'y':
                 page.text = new_text
                 page.save(summary=self.generate_summary(applied),
                           asynchronous=True,
                           callback=self._replace_async_callback,
                           quiet=True)
             while not self._pending_processed_titles.empty():
                 proc_title, res = self._pending_processed_titles.get()
                 pywikibot.output('Page {0}{1} saved'.format(
                     proc_title, '' if res else ' not'))
             # choice must be 'N'
             break
         if self.getOption('always') and new_text != original_text:
             try:
                 page.text = new_text
                 page.save(summary=self.generate_summary(applied),
                           callback=self._replace_sync_callback,
                           quiet=True)
             except pywikibot.EditConflict:
                 pywikibot.output(
                     'Skipping {0} because of edit conflict'.format(
                         page.title(), ))
             except pywikibot.SpamfilterError as e:
                 pywikibot.output(
                     'Cannot change {0} because of blacklist entry {1}'.
                     format(page.title(), e.url))
             except pywikibot.LockedPage:
                 pywikibot.output('Skipping {0} (locked page)'.format(
                     page.title(), ))
             except pywikibot.PageNotSaved as error:
                 pywikibot.output('Error putting page: {0}'.format(
                     error.args, ))
             if self._pending_processed_titles.qsize() > 50:
                 while not self._pending_processed_titles.empty():
                     proc_title, res = self._pending_processed_titles.get()
                     pywikibot.output('Page {0}{1} saved'.format(
                         proc_title, '' if res else ' not'))
Example #53
0
    def addImdb(self, item):
        if item.title() in self.processeditems:
            pywikibot.output(u'Already processed %s, skipping it' %
                             (item.title(), ))
            return

        self.processeditems.append(item.title())
        data = item.get()
        claims = data.get('claims')
        if u'P345' in claims:
            self.addReleased(item, claims.get(u'P345')[0].getTarget())
            return True

        langs = [u'en', u'es', u'fr', u'de', u'nl']
        label = u''
        for lang in langs:
            if data.get('labels').get(lang):
                label = data.get('labels').get(lang)
                break

        if not label:
            #FIXME: Implement
            # label = data.get('labels').get(u'en')
            pywikibot.output(u'Did not find a label for %s' % (item.title(), ))

        previousitem = None
        nextitem = None

        if u'P155' in claims:
            previousitem = claims.get(u'P155')[0].getTarget()
        if u'P156' in claims:
            nextitem = claims.get(u'P156')[0].getTarget()

        imdbid_from_previous = u''
        imdbtitle_from_previous = u''

        if previousitem:
            previousclaims = previousitem.get().get('claims')
            if u'P345' in previousclaims:
                previousimdb = previousclaims.get(u'P345')[0].getTarget()
                if previousimdb in self.imdbcache:
                    imdbid_from_previous = self.imdbcache[previousimdb].get(
                        u'next')
                    imdbtitle_from_previous = self.imdbcache[previousimdb].get(
                        u'nexttitle')

        imdbid_from_next = u''
        imdbtitle_from_next = u''

        if nextitem:
            nextclaims = nextitem.get().get('claims')
            if u'P345' in nextclaims:
                nextimdb = nextclaims.get(u'P345')[0].getTarget()
                if nextimdb in self.imdbcache:
                    imdbid_from_next = self.imdbcache[nextimdb].get(
                        u'previous')
                    imdbtitle_from_next = self.imdbcache[nextimdb].get(
                        u'previoustitle')

        if imdbid_from_previous and imdbid_from_next:
            if imdbid_from_previous == imdbid_from_next:
                if label == imdbtitle_from_previous:
                    newclaim = pywikibot.Claim(self.repo, u'P345')
                    newclaim.setTarget(imdbid_from_previous)
                    summary = u'Adding link based on same label and link from [[%s|previous]] and [[%s|next item]]' % (
                        previousitem.title(), nextitem.title())
                    pywikibot.output(summary)
                    item.addClaim(newclaim, summary=summary)
                    self.addReleased(item, imdbid_from_previous)
                    return True
                else:
                    pywikibot.output(
                        u'The label "%s" is not the same as imdb "%s", skipping'
                        % (label, imdbtitle_from_previous))
                    return False
            else:
                pywikibot.output(
                    u'We have a mix up, found "%s" & "%s", skipping' %
                    (imdbid_from_previous, imdbid_from_next))
                return False
        elif imdbid_from_previous:
            if label == imdbtitle_from_previous:
                newclaim = pywikibot.Claim(self.repo, u'P345')
                newclaim.setTarget(imdbid_from_previous)
                summary = u'Adding link based on same label and link from [[%s|previous item]]' % (
                    previousitem.title(), )
                pywikibot.output(summary)
                item.addClaim(newclaim, summary=summary)
                self.addReleased(item, imdbid_from_previous)
                if nextitem:
                    self.addImdb(nextitem)
                return True
            else:
                pywikibot.output(
                    u'The label "%s" is not the same as imdb "%s", skipping' %
                    (label, imdbtitle_from_previous))
                # This will make the bot iterate the linked list.
                if nextitem:
                    self.addImdb(nextitem)
        elif imdbid_from_next:
            if label == imdbtitle_from_next:
                newclaim = pywikibot.Claim(self.repo, u'P345')
                newclaim.setTarget(imdbid_from_next)
                summary = u'Adding link based on same label and link from [[%s|next item]]' % (
                    nextitem.title(), )
                pywikibot.output(summary)
                item.addClaim(newclaim, summary=summary)
                self.addReleased(item, imdbid_from_next)
                if previousitem:
                    self.addImdb(previousitem)
                return True
            else:
                pywikibot.output(
                    u'The label "%s" is not the same as imdb "%s", skipping' %
                    (label, imdbtitle_from_next))
                # This will make the bot iterate the linked list.
                if previousitem:
                    self.addImdb(previousitem)
        pywikibot.output(
            u'Something went wrong. Couldn\'t add anything to %s' %
            (item.title(), ))
Example #54
0
 def treat(self, page):
     """Purge the given page."""
     pywikibot.output(
         u'Page %s%s purged' %
         (page.title(as_link=True), "" if page.purge() else " not"))
Example #55
0
    def findTranslated(self, page, oursite=None):
        quiet = self.getOption('quiet')
        if not oursite:
            oursite = self.site
        if page.isRedirectPage():
            page = page.getRedirectTarget()

        ourpage = None
        for link in page.iterlanglinks():
            if link.site == oursite:
                ourpage = pywikibot.Page(link)
                break

        if not ourpage:
            if not quiet:
                pywikibot.output('%s -> no corresponding page in %s' %
                                 (page.title(), oursite))
        elif ourpage.section():
            pywikibot.output('%s -> our page is a section link: %s' %
                             (page.title(), ourpage.title()))
        elif not ourpage.exists():
            pywikibot.output("%s -> our page doesn't exist: %s" %
                             (page.title(), ourpage.title()))
        else:
            if ourpage.isRedirectPage():
                ourpage = ourpage.getRedirectTarget()

            pywikibot.output('%s -> corresponding page is %s' %
                             (page.title(), ourpage.title()))
            if ourpage.namespace() != 0:
                pywikibot.output('%s -> not in the main namespace, skipping' %
                                 page.title())
            elif ourpage.isRedirectPage():
                pywikibot.output('%s -> double redirect, skipping' %
                                 page.title())
            elif not ourpage.exists():
                pywikibot.output("%s -> page doesn't exist, skipping" %
                                 ourpage.title())
            else:
                backpage = None
                for link in ourpage.iterlanglinks():
                    if link.site == page.site:
                        backpage = pywikibot.Page(link)
                        break
                if not backpage:
                    pywikibot.output('%s -> no back interwiki ref' %
                                     page.title())
                elif backpage == page:
                    # everything is ok
                    yield ourpage
                elif backpage.isRedirectPage():
                    backpage = backpage.getRedirectTarget()
                    if backpage == page:
                        # everything is ok
                        yield ourpage
                    else:
                        pywikibot.output(
                            '%s -> back interwiki ref target is redirect to %s'
                            % (page.title(), backpage.title()))
                else:
                    pywikibot.output('%s -> back interwiki ref target is %s' %
                                     (page.title(), backpage.title()))
Example #56
0
    def buildImdbCache(self, series):
        result = {}
        data = series.get()
        claims = data.get('claims')
        if not u'P345' in claims:
            pywikibot.output(u'Error: No IMDB id found')
            return result

        seriesimdb = claims.get(u'P345')[0].getTarget()

        mainurl = u'http://www.omdbapi.com/?i=%s'
        seasonurl = u'http://www.omdbapi.com/?i=%s&Season=%s'

        mainSeriesPage = requests.get(mainurl % (seriesimdb, ))
        seasons = mainSeriesPage.json().get(u'totalSeasons')

        previous = u''
        previousTitle = u''
        previousReleased = u''
        current = u''
        currentTitle = u''
        currentReleased = u''
        next = u''
        nextTitle = u''
        nextReleased = u''

        try:
            for i in range(1, int(seasons) + 1):
                seasonpage = requests.get(seasonurl % (seriesimdb, i))
                episodes = seasonpage.json().get('Episodes')
                if episodes:
                    for episode in episodes:
                        if not previous:
                            previous = episode.get('imdbID')
                            previousTitle = episode.get('Title')
                            previousReleased = episode.get('Released')
                        elif not current:
                            current = episode.get('imdbID')
                            currentTitle = episode.get('Title')
                            currentReleased = episode.get('Released')
                        else:
                            next = episode.get('imdbID')
                            nextTitle = episode.get('Title')
                            nextReleased = episode.get('Released')

                            if not result:
                                # Result is empty, we need to add the first item
                                result[previous] = {
                                    u'previous': u'',
                                    u'previoustitle': u'',
                                    u'previousReleased': u'',
                                    u'title': previousTitle,
                                    u'released': previousReleased,
                                    u'next': next,
                                    u'nexttitle': nextTitle,
                                    u'nextReleased': nextReleased
                                }
                            result[current] = {
                                u'previous': previous,
                                u'previoustitle': previousTitle,
                                u'previousReleased': previousReleased,
                                u'title': currentTitle,
                                u'released': currentReleased,
                                u'next': next,
                                u'nexttitle': nextTitle,
                                u'nextReleased': nextReleased
                            }
                            previous = current
                            previousTitle = currentTitle
                            previousReleased = currentReleased
                            current = next
                            currentTitle = nextTitle
                            currentReleased = nextReleased
                            next = u''
                            nextTitle = u''
                            nextReleased = u''
                time.sleep(1)

            result[current] = {
                u'previous': previous,
                u'previoustitle': previousTitle,
                u'previousReleased': previousReleased,
                u'title': currentTitle,
                u'released': currentReleased,
                u'next': next,
                u'nexttitle': nextTitle,
                u'nextReleased': nextReleased
            }
        except ValueError:
            pywikibot.output(u'Ran into a value error while working on %s' %
                             (mainurl % (seriesimdb, ), ))

        return result
Example #57
0
 def listTemplates(cls, templates, namespaces):
     templateDict = cls.template_dict(templates, namespaces)
     pywikibot.output(u'\nList of pages transcluding templates:',
                      toStdout=True)
     for key in templates:
         pywikibot.output(u'* %s' % key)
     pywikibot.output(u'-' * 36, toStdout=True)
     total = 0
     for key in templateDict:
         for page in templateDict[key]:
             pywikibot.output(page.title(), toStdout=True)
             total += 1
     pywikibot.output(u'Total page count: %d' % total)
     pywikibot.output(u'Report generated on %s' %
                      datetime.datetime.utcnow().isoformat(),
                      toStdout=True)
Example #58
0
    def add_template(self, source, dest, task, fromsite):
        """Place or remove the Link_GA/FA template on/from a page."""
        def compile_link(site, templates):
            """Compile one link template list."""
            findtemplate = '(%s)' % '|'.join(templates)
            return re.compile(
                r'\{\{%s\|%s\}\}' %
                (findtemplate.replace(' ', '[ _]'), site.code), re.IGNORECASE)

        tosite = dest.site
        add_tl, remove_tl = self.getTemplateList(tosite.code, task)
        re_link_add = compile_link(fromsite, add_tl)
        re_link_remove = compile_link(fromsite, remove_tl)

        text = dest.text
        m1 = add_tl and re_link_add.search(text)
        m2 = remove_tl and re_link_remove.search(text)
        changed = False
        interactive = self.getOption('interactive')
        if add_tl:
            if m1:
                pywikibot.output('(already added)')
            else:
                # insert just before interwiki
                if (not interactive
                        or pywikibot.input_yn('Connecting %s -> %s. Proceed?' %
                                              (source.title(), dest.title()),
                                              default=False,
                                              automatic_quit=False)):
                    if self.getOption('side'):
                        # Placing {{Link FA|xx}} right next to
                        # corresponding interwiki
                        text = (text[:m1.end()] + ' {{%s|%s}}' %
                                (add_tl[0], fromsite.code) + text[m1.end():])
                    else:
                        # Moving {{Link FA|xx}} to top of interwikis
                        iw = textlib.getLanguageLinks(text, tosite)
                        text = textlib.removeLanguageLinks(text, tosite)
                        text += '%s{{%s|%s}}%s' % (config.LS, add_tl[0],
                                                   fromsite.code, config.LS)
                        text = textlib.replaceLanguageLinks(text, iw, tosite)
                    changed = True
        if remove_tl:
            if m2:
                if (changed  # Don't force the user to say "Y" twice
                        or not interactive or
                        pywikibot.input_yn('Connecting %s -> %s. Proceed?' %
                                           (source.title(), dest.title()),
                                           default=False,
                                           automatic_quit=False)):
                    text = re.sub(re_link_remove, '', text)
                    changed = True
            elif task == 'former':
                pywikibot.output('(already removed)')
        if changed:
            comment = i18n.twtranslate(tosite, 'featured-' + task,
                                       {'page': source})
            try:
                dest.put(text, comment)
                self._save_counter += 1
            except pywikibot.LockedPage:
                pywikibot.output('Page %s is locked!' % dest.title())
            except pywikibot.PageNotSaved:
                pywikibot.output('Page not saved')
Example #59
0
    def __init__(self):
        """Constructor with arg parsing."""
        for arg in pywikibot.handle_args():
            arg, sep, value = arg.partition(':')
            if arg == '-from':
                self.apfrom = value or pywikibot.input(
                    'Which page to start from: ')
            elif arg == '-reqsize':
                self.aplimit = int(value)
            elif arg == '-links':
                self.links = True
            elif arg == '-linksonly':
                self.links = True
                self.titles = False
            elif arg == '-replace':
                self.replace = True
            elif arg == '-redir':
                self.filterredir = 'all'
            elif arg == '-redironly':
                self.filterredir = 'redirects'
            elif arg == '-limit':
                self.stopAfter = int(value)
            elif arg in ('-autonomous', '-a'):
                self.autonomous = True
            elif arg == '-ns':
                self.namespaces.append(int(value))
            elif arg == '-wikilog':
                self.wikilogfile = value
            elif arg == '-failedlog':
                self.failedTitles = value
            elif arg == '-failed':
                self.doFailed = True
            else:
                pywikibot.output(u'Unknown argument %s.' % arg)
                pywikibot.showHelp()
                sys.exit()

        if self.namespaces == [] and not self.doFailed:
            if self.apfrom == u'':
                # 0 should be after templates ns
                self.namespaces = [14, 10, 12, 0]
            else:
                self.namespaces = [0]

        if self.aplimit is None:
            self.aplimit = 200 if self.links else 'max'

        if not self.doFailed:
            self.queryParams = {
                'action': 'query',
                'generator': 'allpages',
                'gaplimit': self.aplimit,
                'gapfilterredir': self.filterredir
            }
        else:
            self.queryParams = {'action': 'query'}
            if self.apfrom != u'':
                pywikibot.output(u'Argument "-from" is ignored with "-failed"')

        propParam = 'info'
        if self.links:
            propParam += '|links|categories'
            self.queryParams['pllimit'] = 'max'
            self.queryParams['cllimit'] = 'max'

        self.queryParams['prop'] = propParam

        self.site = pywikibot.Site()

        if len(self.localSuspects) != len(self.latinSuspects):
            raise ValueError(u'Suspects must be the same size')
        if len(self.localKeyboard) != len(self.latinKeyboard):
            raise ValueError(u'Keyboard info must be the same size')

        if not os.path.isabs(self.wikilogfile):
            self.wikilogfile = pywikibot.config.datafilepath(self.wikilogfile)
        self.wikilog = self.OpenLogFile(self.wikilogfile)

        if not os.path.isabs(self.failedTitles):
            self.failedTitles = pywikibot.config.datafilepath(
                self.failedTitles)

        if self.doFailed:
            with codecs.open(self.failedTitles, 'r', 'utf-8') as f:
                self.titleList = [self.Page(t) for t in f]
            self.failedTitles += '.failed'

        self.lclToLatDict = dict(
            (ord(self.localSuspects[i]), self.latinSuspects[i])
            for i in xrange(len(self.localSuspects)))
        self.latToLclDict = dict(
            (ord(self.latinSuspects[i]), self.localSuspects[i])
            for i in xrange(len(self.localSuspects)))

        if self.localKeyboard is not None:
            self.lclToLatKeybDict = dict(
                (ord(self.localKeyboard[i]), self.latinKeyboard[i])
                for i in xrange(len(self.localKeyboard)))
            self.latToLclKeybDict = dict(
                (ord(self.latinKeyboard[i]), self.localKeyboard[i])
                for i in xrange(len(self.localKeyboard)))
        else:
            self.lclToLatKeybDict = {}
            self.latToLclKeybDict = {}

        badPtrnStr = u'([%s][%s]|[%s][%s])' \
                     % (ascii_letters, self.localLtr,
                        self.localLtr, ascii_letters)
        self.badWordPtrn = re.compile(
            u'[%s%s]*%s[%s%s]*' % (ascii_letters, self.localLtr, badPtrnStr,
                                   ascii_letters, self.localLtr))

        # Get whitelist
        self.knownWords = set()
        self.seenUnresolvedLinks = set()

        # TODO: handle "continue"
        if self.site.code in self.whitelists:
            wlpage = self.whitelists[self.site.code]
            pywikibot.output(u'Loading whitelist from %s' % wlpage)
            wlparams = {
                'action': 'query',
                'prop': 'links',
                'titles': wlpage,
                'redirects': '',
                'indexpageids': '',
                'pllimit': 'max',
            }

            req = api.Request(site=self.site, parameters=wlparams)
            data = req.submit()
            if len(data['query']['pageids']) == 1:
                pageid = data['query']['pageids'][0]
                links = data['query']['pages'][pageid]['links']

                allWords = [
                    nn for n in links for nn in self.FindBadWords(n['title'])
                ]

                self.knownWords = set(allWords)
            else:
                raise ValueError(u'The number of pageids is not 1')

            pywikibot.output(u'Loaded whitelist with %i items' %
                             len(self.knownWords))
            if len(self.knownWords) > 0:
                pywikibot.log(u'Whitelist: %s' % u', '.join(
                    [self.MakeLink(i, False) for i in self.knownWords]))
        else:
            pywikibot.output(u'Whitelist is not known for language %s' %
                             self.site.code)
Example #60
0
 def run(self):
     for task in self.tasks:
         self.run_task(task)
     pywikibot.output('%d pages written.' % self._save_counter)