Python Site Beispiele, pywikibot.Site Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: solve_disambiguation.py Projekt: edgarskos/pywikibot_test_wikidata

def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    # the option that's always selected when the bot wonders what to do with
    # a link. If it's None, the user is prompted (default behaviour).
    always = None
    alternatives = []
    getAlternatives = True
    dnSkip = False
    generator = None
    primary = False
    main_only = False

    # For sorting the linked pages, case can be ignored
    minimum = 0

    local_args = pywikibot.handle_args(args)
    generator_factory = pagegenerators.GeneratorFactory(
        positional_arg_name='page')

    for arg in local_args:
        if arg.startswith('-primary:'):
            primary = True
            getAlternatives = False
            alternatives.append(arg[9:])
        elif arg == '-primary':
            primary = True
        elif arg.startswith('-always:'):
            always = arg[8:]
        elif arg.startswith('-pos:'):
            if arg[5] != ':':
                mysite = pywikibot.Site()
                page = pywikibot.Page(pywikibot.Link(arg[5:], mysite))
                if page.exists():
                    alternatives.append(page.title())
                else:
                    if pywikibot.input_yn(
                            u'Possibility %s does not actually exist. Use it '
                            'anyway?' % page.title(),
                            default=False, automatic_quit=False):
                        alternatives.append(page.title())
            else:
                alternatives.append(arg[5:])
        elif arg == '-just':
            getAlternatives = False
        elif arg == '-dnskip':
            dnSkip = True
        elif arg == '-main':
            main_only = True
        elif arg.startswith('-min:'):
            minimum = int(arg[5:])
        elif arg.startswith('-start'):
            try:
                generator = pagegenerators.CategorizedPageGenerator(
                    pywikibot.Site().disambcategory(),
                    start=arg[7:], namespaces=[0])
            except pywikibot.NoPage:
                pywikibot.output("Disambiguation category for your wiki is not known.")
                raise
        else:
            generator_factory.handleArg(arg)

    site = pywikibot.Site()

    generator = generator_factory.getCombinedGenerator(generator)

    if not generator:
        pywikibot.bot.suggest_help(missing_generator=True)
        return False

    site.login()

    bot = DisambiguationRobot(always, alternatives, getAlternatives, dnSkip,
                              generator, primary, main_only,
                              minimum=minimum)
    bot.run()

Beispiel #2

0

Datei anzeigen

def main(*args):
    """Process command line arguments and invoke PatrolBot."""
    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    usercontribs = None
    gen = None
    recentchanges = False
    newpages = False
    repeat = False
    genFactory = pagegenerators.GeneratorFactory()
    options = {}

    # Parse command line arguments
    for arg in pywikibot.handle_args(args):
        if arg.startswith('-ask'):
            options['ask'] = True
        elif arg.startswith('-autopatroluserns'):
            options['autopatroluserns'] = True
        elif arg.startswith('-repeat'):
            repeat = True
        elif arg.startswith('-newpages'):
            newpages = True
        elif arg.startswith('-recentchanges'):
            recentchanges = True
        elif arg.startswith('-usercontribs:'):
            usercontribs = arg[14:]
        elif arg.startswith('-versionchecktime:'):
            versionchecktime = arg[len('-versionchecktime:'):]
            options['versionchecktime'] = int(versionchecktime)
        elif arg.startswith("-whitelist:"):
            options['whitelist'] = arg[len('-whitelist:'):]
        else:
            generator = genFactory.handleArg(arg)
            if not generator:
                if ':' in arg:
                    m = arg.split(':')
                    options[m[0]] = m[1]

    site = pywikibot.Site()
    site.login()

    if usercontribs:
        pywikibot.output(u'Processing user: %s' % usercontribs)

    if not newpages and not recentchanges and not usercontribs:
        if site.family.name == 'wikipedia':
            newpages = True
        else:
            recentchanges = True

    bot = PatrolBot(**options)

    if newpages or usercontribs:
        pywikibot.output(u'Newpages:')
        gen = site.newpages
        feed = api_feed_repeater(gen,
                                 delay=60,
                                 repeat=repeat,
                                 user=usercontribs,
                                 namespaces=genFactory.namespaces,
                                 recent_new_gen=False)
        bot.run(feed)

    if recentchanges or usercontribs:
        pywikibot.output(u'Recentchanges:')
        gen = site.recentchanges
        feed = api_feed_repeater(gen,
                                 delay=60,
                                 repeat=repeat,
                                 namespaces=genFactory.namespaces,
                                 user=usercontribs)
        bot.run(feed)

    pywikibot.output(u'%d/%d patrolled' %
                     (bot.patrol_counter, bot.rc_item_counter))

Beispiel #3

0

Datei anzeigen

Datei: login.py Projekt: refeed/pywikibot

    def __init__(self, password=None, sysop=False, site=None, user=None):
        """
        Initializer.

        All parameters default to defaults in user-config.

        @param site: Site object to log into
        @type site: BaseSite
        @param user: username to use.
            If user is None, the username is loaded from config.usernames.
        @type user: basestring
        @param password: password to use
        @type password: basestring
        @param sysop: login as sysop account.
            The sysop username is loaded from config.sysopnames.
        @type sysop: bool

        @raises NoUsername: No username is configured for the requested site.
        """
        if site is not None:
            self.site = site
        else:
            self.site = pywikibot.Site()
        if user:
            self.username = user
        elif sysop:
            config_names = config.sysopnames
            family_sysopnames = (config_names[self.site.family.name]
                                 or config_names['*'])
            self.username = family_sysopnames.get(self.site.code, None)
            try:
                self.username = self.username or family_sysopnames['*']
            except KeyError:
                raise NoUsername(""" \
ERROR: Sysop username for %(fam_name)s:%(wiki_code)s is undefined.
If you have a sysop account for that site, please add a line to user-config.py:

sysopnames['%(fam_name)s']['%(wiki_code)s'] = 'myUsername'""" % {
                    'fam_name': self.site.family.name,
                    'wiki_code': self.site.code
                })
        else:
            config_names = config.usernames
            family_usernames = (config_names[self.site.family.name]
                                or config_names['*'])
            self.username = family_usernames.get(self.site.code, None)
            try:
                self.username = self.username or family_usernames['*']
            except KeyError:
                raise NoUsername(""" \
ERROR: Username for %(fam_name)s:%(wiki_code)s is undefined.
If you have an account for that site, please add a line to user-config.py:

usernames['%(fam_name)s']['%(wiki_code)s'] = 'myUsername'""" % {
                    'fam_name': self.site.family.name,
                    'wiki_code': self.site.code
                })
        self.password = password
        self.login_name = self.username
        if getattr(config, 'password_file', ''):
            self.readPassword()

Beispiel #4

0

Datei anzeigen

Datei: wikilib.py Projekt: Lamphobic/Arcanum_Wikifier

def bot_update(page_name, file_name):
	with open(file_name, "r", encoding="UTF-8") as wiki_dump:
		site = pywikibot.Site()  # The site we want to run our bot on
		page = pywikibot.Page(site, page_name)
		page.text = wiki_dump.read()
		page.save('Automatic update from: ' + str(datetime.datetime.now()))  # Saves the page

Beispiel #5

0

Datei anzeigen

Datei: getwikidataitems.py Projekt: KBNLresearch/Demosaurus

import pywikibot
import pprint
import re
import sqlite3

file = open('wikidatamapping.txt', "r", encoding="utf-8")
lines = file.readlines()
wdids = []
for line in lines:
    match = re.search(r":\s(.*)", line)
    wdid = match.group(1)
    wdids.append(wdid)

for wdid in wdids:
    site = pywikibot.Site('nl')
    repo = site.data_repository()
    item = pywikibot.ItemPage(repo, wdid)
    item_dict = item.get()
    clm_dict = item_dict["claims"]
    clm_add_all = []
    for clm in clm_dict:
        clm_add_all.append(clm)
    #print(clm_add_all)
    dict = {}
    for property in clm_add_all:
        clm_list = clm_dict[property]
        qids = []
        for clm in clm_list:
            pprint.pprint(clm.toJSON())
            #pprint.pprint(clm.toJSON())
            #pprint.pprint(clm.toJSON()['mainsnak'].get('datavalue', {}).get('value', {}).get('numeric-id'))

Beispiel #6

0

Datei anzeigen

Datei: transferbot.py Projekt: hashar/pywikibot

def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: unicode
    """
    local_args = pywikibot.handle_args(args)

    fromsite = pywikibot.Site()
    tolang = fromsite.code
    tofamily = fromsite.family.name
    prefix = ''
    overwrite = False
    gen_args = []

    gen_factory = pagegenerators.GeneratorFactory()

    for arg in local_args:
        if gen_factory.handleArg(arg):
            gen_args.append(arg)
            continue
        if arg.startswith('-tofamily'):
            tofamily = arg[len('-tofamily:'):]
        elif arg.startswith('-tolang'):
            tolang = arg[len('-tolang:'):]
        elif arg.startswith('-prefix'):
            prefix = arg[len('-prefix:'):]
        elif arg == '-overwrite':
            overwrite = True

    tosite = pywikibot.Site(tolang, tofamily)
    if fromsite == tosite:
        raise TargetSiteMissing('Target site not different from source site')

    gen = gen_factory.getCombinedGenerator()
    if not gen:
        raise TargetPagesMissing('Target pages not specified')

    gen_args = ' '.join(gen_args)
    pywikibot.output(
        """
    Page transfer configuration
    ---------------------------
    Source: %(fromsite)r
    Target: %(tosite)r

    Pages to transfer: %(gen_args)s

    Prefix for transferred pages: %(prefix)s
    """ % {
            'fromsite': fromsite,
            'tosite': tosite,
            'gen_args': gen_args,
            'prefix': prefix
        })

    for page in gen:
        target_title = (prefix + page.namespace().canonical_prefix() +
                        page.title(with_ns=False))
        targetpage = pywikibot.Page(tosite, target_title)
        edithistpage = pywikibot.Page(tosite, target_title + '/edithistory')
        summary = 'Moved page from {old} ([[{new}/edithistory|history]])' \
                  .format(old=page.title(as_link=True, insite=tosite),
                          new=targetpage.title() if not
                          targetpage.namespace().subpages else '')

        if targetpage.exists() and not overwrite:
            pywikibot.output('Skipped {0} (target page {1} exists)'.format(
                page.title(as_link=True), targetpage.title(as_link=True)))
            continue

        pywikibot.output('Moving {0} to {1}...'.format(
            page.title(as_link=True), targetpage.title(as_link=True)))

        pywikibot.log('Getting page text.')
        text = page.get(get_redirect=True)
        text += (
            "<noinclude>\n\n<small>This page was moved from {0}. It's "
            'edit history can be viewed at {1}</small></noinclude>'.format(
                page.title(as_link=True, insite=targetpage.site),
                edithistpage.title(as_link=True, insite=targetpage.site)))

        pywikibot.log('Getting edit history.')
        historytable = page.getVersionHistoryTable()

        pywikibot.log('Putting page text.')
        targetpage.put(text, summary=summary)

        pywikibot.log('Putting edit history.')
        edithistpage.put(historytable, summary=summary)

Beispiel #7

0

Datei anzeigen

 def test_iterlinks_page_object(self):
     page = [pg for pg in self.wdp.iterlinks() if pg.site.language() == 'af'][0]
     self.assertEquals(page, pywikibot.Page(pywikibot.Site('af', 'wikipedia'), u'New York Stad'))

Beispiel #8

0

Datei anzeigen

    def getWikidataLookupTables(self):
        '''
        Query to make 10 lookup tables.
        These 4 lookup tables for with and without images:
        * CIA : Creator, institution & accession number
        * CI : Creator & instution
        * IA : Institution & accession number
        * CA: Creator & accession number
        And also:
        * Wikidata id -> url table(?)
        * Filename -> wikidata id
        '''
        query = u"""SELECT ?item ?image ?creator ?institution ?invnum ?location ?url ?idurl WHERE {
        ?item wdt:P31 wd:Q3305213 . # /wdt:P279* wd:Q3305213 .
        OPTIONAL { ?item wdt:P18 ?image } .
        OPTIONAL { ?item wdt:P170 ?creator } .
        OPTIONAL { ?item wdt:P195 ?institution } .
        OPTIONAL { ?item wdt:P217 ?invnum } .
        OPTIONAL { ?item wdt:P276 ?location } .
        OPTIONAL { ?item wdt:P973 ?url } .
        #This seems to break the query. Disabled for now.
        #OPTIONAL { ?item ?identifierproperty ?identifier .
        #           ?property wikibase:directClaim ?identifierproperty .
        #           ?property wikibase:propertyType wikibase:ExternalId .
        #           ?property wdt:P1630 ?formatterurl .
        #           BIND(IRI(REPLACE(?identifier, '^(.+)$', ?formatterurl)) AS ?idurl).
        #            }
}"""
        sq = pywikibot.data.sparql.SparqlQuery()
        queryresult = sq.select(query)

        for resultitem in queryresult:
            item = resultitem.get('item').replace(
                u'http://www.wikidata.org/entity/', u'')
            # First clean up and put in a dictionary
            paintingdict = {
                u'item': item,
                u'image': False,
                u'creator': False,
                u'institution': False,
                u'invnum': False,
                u'location': False,
                u'url': False
            }
            if resultitem.get('image'):
                paintingdict['image'] = pywikibot.FilePage(
                    pywikibot.Site('commons', 'commons'),
                    resultitem.get('image').replace(
                        u'http://commons.wikimedia.org/wiki/Special:FilePath/',
                        u'')).title(underscore=True, withNamespace=False)
            if resultitem.get('creator'):
                paintingdict['creator'] = resultitem.get('creator').replace(
                    u'http://www.wikidata.org/entity/', u'')
            if resultitem.get('institution'):
                paintingdict['institution'] = resultitem.get(
                    'institution').replace(u'http://www.wikidata.org/entity/',
                                           u'')
            if resultitem.get('invnum'):
                paintingdict['invnum'] = resultitem.get('invnum')
            if resultitem.get('location'):
                paintingdict['location'] = resultitem.get('location').replace(
                    u'http://www.wikidata.org/entity/', u'')
            if resultitem.get('url'):
                paintingdict['url'] = resultitem.get('url')
            elif resultitem.get('idurl'):
                paintingdict['url'] = resultitem.get('idurl')

            ciakey = None
            clakey = None
            cikey = None
            clkey = None
            iakey = None
            lakey = None
            cakey = None

            if paintingdict.get(u'creator') and paintingdict.get(
                    u'institution') and paintingdict.get(u'invnum'):
                ciakey = (paintingdict.get(u'creator'),
                          paintingdict.get(u'institution'),
                          paintingdict.get(u'invnum'))
            if paintingdict.get(u'creator') and paintingdict.get(
                    u'location') and paintingdict.get(u'invnum'):
                clakey = (paintingdict.get(u'creator'),
                          paintingdict.get(u'location'),
                          paintingdict.get(u'invnum'))
            if paintingdict.get(u'creator') and paintingdict.get(
                    u'institution'):
                cikey = (paintingdict.get(u'creator'),
                         paintingdict.get(u'institution'))
            if paintingdict.get(u'creator') and paintingdict.get(u'location'):
                clkey = (paintingdict.get(u'creator'),
                         paintingdict.get(u'location'))
            if paintingdict.get(u'institution') and paintingdict.get(
                    u'invnum'):
                iakey = (paintingdict.get(u'institution'),
                         paintingdict.get(u'invnum'))
            if paintingdict.get(u'location') and paintingdict.get(u'invnum'):
                lakey = (paintingdict.get(u'location'),
                         paintingdict.get(u'invnum'))
            if paintingdict.get(u'creator') and paintingdict.get(u'invnum'):
                cakey = (paintingdict.get(u'creator'),
                         paintingdict.get(u'invnum'))

            if paintingdict.get(u'image'):
                self.wikidataImages[paintingdict.get(u'image')] = paintingdict
                self.wikidataWithImages[paintingdict.get(
                    u'item')] = paintingdict
                if ciakey:
                    if not ciakey in self.wikidataWithCIA:
                        self.wikidataWithCIA[ciakey] = []
                    self.wikidataWithCIA[ciakey].append(paintingdict)
                if clakey and clakey != ciakey:
                    if not clakey in self.wikidataWithCIA:
                        self.wikidataWithCIA[clakey] = []
                    self.wikidataWithCIA[clakey].append(paintingdict)
                if cikey:
                    if not cikey in self.wikidataWithCI:
                        self.wikidataWithCI[cikey] = []
                    self.wikidataWithCI[cikey].append(paintingdict)
                if clkey and clkey != cikey:
                    if not clkey in self.wikidataWithCI:
                        self.wikidataWithCI[clkey] = []
                    self.wikidataWithCI[clkey].append(paintingdict)
                if iakey:
                    if not iakey in self.wikidataWithIA:
                        self.wikidataWithIA[iakey] = []
                    self.wikidataWithIA[iakey].append(paintingdict)
                if lakey and lakey != iakey:
                    if not lakey in self.wikidataWithIA:
                        self.wikidataWithIA[lakey] = []
                    self.wikidataWithIA[lakey].append(paintingdict)
                if cakey:
                    if not cakey in self.wikidataWithCA:
                        self.wikidataWithCA[cakey] = []
                    self.wikidataWithCA[cakey].append(paintingdict)
            else:
                self.wikidataNoImages[paintingdict.get(u'item')] = paintingdict
                if ciakey:
                    if not ciakey in self.wikidataWithoutCIA:
                        self.wikidataWithoutCIA[ciakey] = []
                    self.wikidataWithoutCIA[ciakey].append(paintingdict)
                if clakey and clakey != ciakey:
                    if not clakey in self.wikidataWithoutCIA:
                        self.wikidataWithoutCIA[clakey] = []
                    self.wikidataWithoutCIA[clakey].append(paintingdict)
                if cikey:
                    if not cikey in self.wikidataWithoutCI:
                        self.wikidataWithoutCI[cikey] = []
                    self.wikidataWithoutCI[cikey].append(paintingdict)
                if clkey and clkey != cikey:
                    if not clkey in self.wikidataWithoutCI:
                        self.wikidataWithoutCI[clkey] = []
                    self.wikidataWithoutCI[clkey].append(paintingdict)
                if iakey:
                    if not iakey in self.wikidataWithoutIA:
                        self.wikidataWithoutIA[iakey] = []
                    self.wikidataWithoutIA[iakey].append(paintingdict)
                if lakey and lakey != iakey:
                    if not lakey in self.wikidataWithoutIA:
                        self.wikidataWithoutIA[lakey] = []
                    self.wikidataWithoutIA[lakey].append(paintingdict)
                if cakey:
                    if not cakey in self.wikidataWithoutCA:
                        self.wikidataWithoutCA[cakey] = []
                    self.wikidataWithoutCA[cakey].append(paintingdict)

Beispiel #9

0

Datei anzeigen

Datei: redirect.py Projekt: trishnaguha/pywikibot-core

def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    options = {}
    # what the bot should do (either resolve double redirs, or delete broken
    # redirs)
    action = None
    # where the bot should get his infos from (either None to load the
    # maintenance special page from the live wiki, or the filename of a
    # local XML dump file)
    xmlFilename = None
    # Which namespace should be processed when using a XML dump
    # default to -1 which means all namespaces will be processed
    namespaces = []
    # at which redirect shall we start searching double redirects again
    # (only with dump); default to -1 which means all redirects are checked
    offset = -1
    moved_pages = False
    fullscan = False
    start = ''
    until = ''
    number = None
    step = None
    pagename = None

    for arg in pywikibot.handle_args(args):
        if arg == 'double' or arg == 'do':
            action = 'double'
        elif arg == 'broken' or arg == 'br':
            action = 'broken'
        elif arg == 'both':
            action = 'both'
        elif arg == '-fullscan':
            fullscan = True
        elif arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
        elif arg.startswith('-moves'):
            moved_pages = True
        elif arg.startswith('-namespace:'):
            ns = arg[11:]
            if ns == '':
                # "-namespace:" does NOT yield -namespace:0 further down the road!
                ns = i18n.input('pywikibot-enter-namespace-number')
            # TODO: at least for some generators enter a namespace by its name
            # or number
            if ns == '':
                ns = '0'
            try:
                ns = int(ns)
            except ValueError:
                # -namespace:all Process all namespaces.
                # Only works with the API read interface.
                pass
            if ns not in namespaces:
                namespaces.append(ns)
        elif arg.startswith('-offset:'):
            offset = int(arg[8:])
        elif arg.startswith('-start:'):
            start = arg[7:]
        elif arg.startswith('-until:'):
            until = arg[7:]
        elif arg.startswith('-total:'):
            number = int(arg[7:])
        elif arg.startswith('-step:'):
            step = int(arg[6:])
        elif arg.startswith('-page:'):
            pagename = arg[6:]
        elif arg == '-always':
            options['always'] = True
        elif arg == '-delete':
            options['delete'] = True
        else:
            pywikibot.output(u'Unknown argument: %s' % arg)

    if (
        not action or
        xmlFilename and moved_pages or
        fullscan and xmlFilename
    ):
        problems = []
        if xmlFilename and moved_pages:
            problems += ['Either use a XML file or the moved pages from the API']
        if xmlFilename and fullscan:
            problems += ['Either use a XML file or do a full scan using the API']
        pywikibot.bot.suggest_help(additional_text='\n'.join(problems),
                                   missing_action=not action)
    else:
        pywikibot.Site().login()
        gen = RedirectGenerator(xmlFilename, namespaces, offset, moved_pages,
                                fullscan, start, until, number, step, pagename)
        bot = RedirectRobot(action, gen, number=number, **options)
        bot.run()

Beispiel #10

0

Datei anzeigen

    def __init__(self):
        """Initializer."""
        self.handle_args()
        if not self.namespaces and not self.doFailed:
            if not self.apfrom:
                # 0 should be after templates ns
                self.namespaces = [14, 10, 12, 0]
            else:
                self.namespaces = [0]

        if not self.aplimit:
            self.aplimit = 200 if self.links else 'max'

        if not self.doFailed:
            self.queryParams = {
                'action': 'query',
                'generator': 'allpages',
                'gaplimit': self.aplimit,
                'gapfilterredir': self.filterredir
            }
        else:
            self.queryParams = {'action': 'query'}
            if self.apfrom:
                pywikibot.output('Argument "-from" is ignored with "-failed"')

        propParam = 'info'
        if self.links:
            propParam += '|links|categories'
            self.queryParams['pllimit'] = 'max'
            self.queryParams['cllimit'] = 'max'

        self.queryParams['prop'] = propParam

        self.site = pywikibot.Site()

        if len(self.localSuspects) != len(self.latinSuspects):
            raise ValueError('Suspects must be the same size')

        if len(self.localKeyboard) != len(self.latinKeyboard):
            raise ValueError('Keyboard info must be the same size')

        if not os.path.isabs(self.wikilogfile):
            self.wikilogfile = pywikibot.config.datafilepath(self.wikilogfile)

        self.wikilog = self.OpenLogFile(self.wikilogfile)

        if not os.path.isabs(self.failedTitles):
            self.failedTitles = pywikibot.config.datafilepath(
                self.failedTitles)

        if self.doFailed:
            with codecs.open(self.failedTitles, 'r', 'utf-8') as f:
                self.titleList = [self.Page(t) for t in f]
            self.failedTitles += '.failed'

        iterzip = zip(self.localSuspects, self.latinSuspects)
        self.lclToLatDict = {ord(local): latin for local, latin in iterzip}
        self.latToLclDict = {ord(latin): local for local, latin in iterzip}

        if self.localKeyboard is not None:
            iterzip = zip(self.localKeyboard, self.latinKeyboard)
            self.lclToLatKeybDict = {
                ord(local): latin
                for local, latin in iterzip
            }
            self.latToLclKeybDict = {
                ord(latin): local
                for local, latin in iterzip
            }
        else:
            self.lclToLatKeybDict = {}
            self.latToLclKeybDict = {}

        badPtrnStr = '([{ascii}][{local}]|[{local}][{ascii}])'.format(
            ascii=ascii_letters, local=self.localLtr)
        self.badWordPtrn = re.compile(
            '[{ascii}{local}]*{bad}[{ascii}{local}]*'.format(
                ascii=ascii_letters, local=self.localLtr, bad=badPtrnStr))
        self.get_whitelist()

Beispiel #11

0

Datei anzeigen

    def __init__(self):
        """
        Build all the lookup tables to work on
        """
        self.commons = pywikibot.Site(u'commons', u'commons')
        self.repo = pywikibot.Site().data_repository()

        self.commonsNoLink = []  # List of images without a link
        self.commonsWithoutCIA = {
        }  # Creator, institution & accession number -> image
        self.commonsWithoutCI = {}  # Creator & instution -> image
        self.commonsWithoutIA = {}  # Institution & accession number -> image
        self.commonsWithoutCA = {}  # Creator & accession number -> image

        self.commonsLink = {
        }  # Dictionary of images with a wikidata link, file -> item
        self.commonsWithCIA = {
        }  # Creator, institution & accession number -> image & item
        self.commonsWithCI = {}  # Creator & instution -> image & item
        self.commonsWithIA = {
        }  # Institution & accession number -> image & item
        self.commonsWithCA = {}  # Creator & accession number -> image & item

        self.bettersuggestions = []  # List of images with better images

        self.wikidataNoImages = {
        }  # Dictionary of items without images -> item & url
        self.wikidataWithoutCIA = {
        }  # Creator, institution & accession number -> item & url
        self.wikidataWithoutCI = {}  # Creator & instution -> item & url
        self.wikidataWithoutIA = {
        }  # Institution & accession number -> item & url
        self.wikidataWithoutCA = {}  # Creator & accession number -> item & url

        self.wikidataImages = {
        }  # Dictionary of image on wikidata file -> item, image & url
        self.wikidataWithImages = {
        }  # Dictionary of items with images -> item, image & url
        self.wikidataWithCIA = {
        }  # Creator, institution & accession number -> item, image & url
        self.wikidataWithCI = {}  # Creator & instution -> item, image & url
        self.wikidataWithIA = {
        }  # Institution & accession number -> item, image & url
        self.wikidataWithCA = {
        }  # Creator & accession number -> item, image & url

        self.categorysuggestions = [
        ]  # List of images to connect to Wikidata based on category

        self.getCommonsWithoutLookupTables()

        print 'self.commonsNoLink %s' % (len(self.commonsNoLink), )
        print 'self.commonsWithoutCIA %s' % (len(self.commonsWithoutCIA), )
        print 'self.commonsWithoutCI %s' % (len(self.commonsWithoutCI), )
        print 'self.commonsWithoutIA %s' % (len(self.commonsWithoutIA), )
        print 'self.commonsWithoutCA %s' % (len(self.commonsWithoutCA), )

        self.getCommonsWithLookupTables()

        print 'self.commonsLink %s' % (len(self.commonsLink), )
        print 'self.commonsWithCIA %s' % (len(self.commonsWithCIA), )
        print 'self.commonsWithCI %s' % (len(self.commonsWithCI), )
        print 'self.commonsWithIA %s' % (len(self.commonsWithIA), )
        print 'self.commonsWithCA %s' % (len(self.commonsWithCA), )

        self.getBetterImageSuggestions()

        print 'self.bettersuggestions %s' % (len(self.bettersuggestions), )

        self.getWikidataLookupTables()

        print 'self.wikidataNoImages %s' % (len(self.wikidataNoImages), )
        print 'self.wikidataWithoutCIA %s' % (len(self.wikidataWithoutCIA), )
        print 'self.wikidataWithoutCI %s' % (len(self.wikidataWithoutCI), )
        print 'self.wikidataWithoutIA %s' % (len(self.wikidataWithoutIA), )
        print 'self.wikidataWithoutCA %s' % (len(self.wikidataWithoutCA), )

        print 'self.wikidataImages %s' % (len(self.wikidataImages), )
        print 'self.wikidataWithImages %s' % (len(self.wikidataWithImages), )
        print 'self.wikidataWithCIA %s' % (len(self.wikidataWithCIA), )
        print 'self.wikidataWithCI %s' % (len(self.wikidataWithCI), )
        print 'self.wikidataWithIA %s' % (len(self.wikidataWithIA), )
        print 'self.wikidataWithCA %s' % (len(self.wikidataWithCA), )

        self.getCommonsCategorySuggestions()

        print 'self.categorysuggestions %s' % (len(self.categorysuggestions), )

Beispiel #12

0

Datei anzeigen

Datei: new_poty_setup.py Projekt: Commons-POTY/poty-scripts

import datetime
import traceback

import pywikibot
from pywikibot.comms import http
from pywikibot.pagegenerators import PrefixingPageGenerator

try:
    input = raw_input
except NameError:
    pass

YEAR = datetime.datetime.now().year - 1
LAST = YEAR - 1

SITE = pywikibot.Site()


def is_translation(page):
    url = "%s/index.php?title=%s" % (SITE.scriptpath(), page.title(asUrl=True))
    return '"wgTranslatePageTranslation":"translation"' in http.request(
        SITE, url)


def setup(src):
    # print(src)
    target = pywikibot.Page(
        SITE,
        src.title().replace(str(YEAR),
                            str(YEAR + 1)).replace(str(LAST), str(YEAR)))
    # if target.exists(): return

Beispiel #13

0

Datei anzeigen

Datei: redirect.py Projekt: th3architect/pywikibot

def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: unicode
    """
    options = {}
    gen_options = {}
    # what the bot should do (either resolve double redirs, or process broken
    # redirs)
    action = None
    namespaces = set()
    source = set()

    for arg in pywikibot.handle_args(args):
        arg, sep, value = arg.partition(':')
        option = arg.partition('-')[2]
        # bot options
        if arg == 'do':
            action = 'double'
        elif arg == 'br':
            action = 'broken'
        elif arg in ('both', 'broken', 'double'):
            action = arg
        elif option in ('always', 'delete'):
            options[option] = True
        elif option == 'total':
            options[option] = gen_options[option] = int(value)
        elif option == 'sdtemplate':
            options['sdtemplate'] = value or pywikibot.input(
                'Which speedy deletion template to use?')
        # generator options
        elif option in ('fullscan', 'moves'):
            gen_options[option] = True
            source.add(arg)
        elif option == 'xml':
            gen_options[option] = value or i18n.input(
                'pywikibot-enter-xml-filename')
            source.add(arg)
        elif option == 'namespace':
            # "-namespace:" does NOT yield -namespace:0 further down the road!
            ns = value or i18n.input('pywikibot-enter-namespace-number')
            # TODO: at least for some generators enter a namespace by its name
            # or number
            if ns == '':
                ns = '0'
            try:
                ns = int(ns)
            except ValueError:
                # -namespace:all Process all namespaces.
                # Only works with the API read interface.
                pass
            else:
                namespaces.add(ns)
        elif option == 'offset':
            gen_options[option] = int(value)
        elif option in ('page', 'start', 'until'):
            gen_options[option] = value
        else:
            pywikibot.output('Unknown argument: ' + arg)

    if namespaces:
        gen_options['namespaces'] = namespaces

    if len(source) > 1:
        problem = 'You can only use one of {0} options.'.format(
            ' or '.join(source))
        pywikibot.bot.suggest_help(additional_text=problem,
                                   missing_action=not action)
        return
    if not action:
        pywikibot.bot.suggest_help(missing_action=True)
    else:
        pywikibot.Site().login()
        options['generator'] = RedirectGenerator(action, **gen_options)
        bot = RedirectRobot(action, **options)
        bot.run()

Beispiel #14

0

Datei anzeigen

Datei: list_countries_ru_en.py Projekt: componavt/botva

# See https://en.wikiversity.org/wiki/Research_in_programming_Wikidata/Countries

import pywikibot
from pywikibot import pagegenerators

# item is 'country'
# https://query.wikidata.org/#%23List of countries in English and Russian%0ASELECT %3Fcountry %3Flabel_en %3Flabel_ru%0AWHERE%0A{%0A %3Fcountry wdt%3AP31 wd%3AQ6256.%0A %3Fcountry rdfs%3Alabel %3Flabel_en filter (lang(%3Flabel_en) %3D "en").%0A %3Fcountry rdfs%3Alabel %3Flabel_ru filter (lang(%3Flabel_ru) %3D "ru").%0A}
query = 'SELECT ?item ?label_en ?label_ru ' + \
        'WHERE { ' + \
        '  ?item wdt:P31 wd:Q6256.' + \
        '  ?item rdfs:label ?label_en filter (lang(?label_en) = "en").' + \
        '  ?item rdfs:label ?label_ru filter (lang(?label_ru) = "ru").' + \
        '}' # LIMIT 3'

wikidata_site = pywikibot.Site('wikidata', 'wikidata')
generator = pagegenerators.WikidataSPARQLPageGenerator(query,
                                                       site=wikidata_site)

repo = wikidata_site.data_repository()

mysql_string = """DROP TABLE IF EXISTS `countries` ;

CREATE TABLE IF NOT EXISTS `countries` (
  `id` INT NOT NULL,
  `name_en` VARCHAR(45) NULL,
  `name_ru` VARCHAR(45) NULL,
  PRIMARY KEY (`id`))
ENGINE = InnoDB;
"""

Beispiel #15

0

Datei anzeigen

def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    generator = None

    local_args = pywikibot.handle_args(args)

    site = pywikibot.Site()

    if site.code != 'commons' or site.family.name != 'commons':
        pywikibot.warning(
            'This script is primarily written for Wikimedia '
            'Commons, but has been invoked with site {0}. It '
            'might work for other sites but there is no '
            'guarantee that it does the right thing.'.format(site))
        choice = pywikibot.input_choice(
            'How do you want to continue?',
            (('Continue using {0}'.format(site), 'c'),
             ('Switch to Wikimedia Commons', 's'), ('Quit', 'q')),
            automatic_quit=False)
        if choice == 's':
            site = pywikibot.Site('commons', 'commons')
        elif choice == 'q':
            return False

    genFactory = pagegenerators.GeneratorFactory(site)

    for arg in local_args:
        param_arg, sep, param_value = arg.partition(':')
        if param_value == '':
            param_value = None
        if arg.startswith('-yesterday'):
            generator = uploadedYesterday(site)
            issue_deprecation_warning(
                'The usage of "-yesterday"',
                '-logevents:"upload,,YYYYMMDD,YYYYMMDD"', 2,
                ArgumentDeprecationWarning)
        elif arg.startswith('-recentchanges'):
            if param_value is None:
                arg = arg + ':120,70'
                issue_deprecation_warning('-recentchanges without parameters',
                                          '-recentchanges:offset,duration', 2,
                                          ArgumentDeprecationWarning)
            genFactory.handleArg(arg)
        else:
            genFactory.handleArg(arg)

    generator = genFactory.getCombinedGenerator(gen=generator, preload=True)
    if not generator:
        pywikibot.bot.suggest_help(missing_generator=True)
        return False
    else:
        site.login()
        for page in generator:
            pywikibot.output(page.title())
            if page.exists() and (page.namespace() == 6) \
                    and (not page.isRedirectPage()):
                if isUncat(page):
                    addUncat(page)
        return True

Beispiel #16

0

Datei anzeigen

import itertools
import math
import json
import acnutils as utils
from string import Template
from dataclasses import dataclass
from datetime import datetime
from typing import Optional, Tuple, Iterator, Iterable, cast, Dict, Union

__version__ = "1.0"

logger = utils.getInitLogger("essayassesment",
                             level="VERBOSE",
                             filename="essayimpact.log")

site = pywikibot.Site("en", "wikipedia")
session = requests.session()
session.headers.update(
    {"User-Agent": toolforge.set_user_agent("anticompositebot")})
simulate = False


@dataclass
class Essay:
    page: pywikibot.Page
    links: Optional[int] = None
    watchers: Optional[int] = None
    views: Optional[int] = None
    score: Optional[float] = None

    def get_views_and_watchers(self) -> None:

Beispiel #17

0

Datei anzeigen

Datei: current_legislators.py Projekt: g0v/democracy_common

#!/usr/bin/python3
import json
import time
import pywikibot

from common import db_settings, utils

conn = db_settings.con_ly()
c = conn.cursor()

sleep_second = 5
site = pywikibot.Site("zh", "wikipedia")
repo = site.data_repository()
ad = 9
election_years = [1989, 1992, 1995, 1998, 2001, 2004, 2008, 2012, 2016, 2020]
election_title = '%d年立法委員選舉' % election_years[ad - 1]
election_id = utils.get_qnumber(wikiarticle=election_title, lang="zh-tw")
election_target = pywikibot.ItemPage(repo, election_id)
term_id = utils.get_qnumber(wikiarticle="第%d屆立法委員" % ad, lang="zh-tw")
term_target = pywikibot.ItemPage(repo, term_id)


def person_qid_from_db(name):
    c.execute(
        '''
        select lm.data->>'wikidata_qid'
        from legislator_legislatordetail l
        left join legislator_legislator lm on l.legislator_id = lm.uid
        where l.ad = %s and l.name = %s
    ''', [ad, name])
    r = c.fetchone()

Beispiel #18

0

Datei anzeigen

Datei: oorlogsmonumenten_import.py Projekt: xqt/toollabs

    def run(self):
        """
        Starts the robot.
        """

        for metadata in self.generator:
            # Buh, for this one I know for sure it's in there

            #print metadata[u'id']
            #print metadata[u'url']

            # Do some url magic so that all url fields are always filled
            if not metadata.get('refurl'):
                metadata['refurl'] = metadata['url']
            if not metadata.get('idrefurl'):
                metadata['idrefurl'] = metadata['refurl']
            if not metadata.get('describedbyurl'):
                metadata['describedbyurl'] = metadata['url']

            artworkItem = None
            newclaims = []
            if metadata[u'id'] in self.artworkIds:
                artworkItemTitle = self.artworkIds.get(metadata[u'id'])
                print(artworkItemTitle)
                artworkItem = pywikibot.ItemPage(self.repo,
                                                 title=artworkItemTitle)

            elif self.create:
                #Break for now
                #print u'Let us create stuff'
                #continue
                #print u'WTFTFTFTFT???'

                #print 'bla'

                data = {
                    'labels': {},
                    'descriptions': {},
                }

                # loop over stuff
                if metadata.get('title'):
                    for lang, label in metadata['title'].items():
                        data['labels'][lang] = {
                            'language': lang,
                            'value': label
                        }

                if metadata.get('description'):
                    for lang, description in metadata['description'].items():
                        data['descriptions'][lang] = {
                            'language': lang,
                            'value': description
                        }

                identification = {}
                summary = u'Creating new item with data from %s ' % (
                    metadata[u'url'], )
                pywikibot.output(summary)
                try:
                    result = self.repo.editEntity(identification,
                                                  data,
                                                  summary=summary)
                except pywikibot.exceptions.APIError:
                    ## TODO: Check if this is pywikibot.OtherPageSaveError too
                    ## We got ourselves a duplicate label and description, let's correct that by adding collection and the id
                    pywikibot.output(
                        u'Oops, already had that one. Trying again')
                    for lang, description in metadata['description'].items():
                        data['descriptions'][lang] = {
                            'language':
                            lang,
                            'value':
                            u'%s (%s %s)' % (
                                description,
                                metadata['inception'],
                                metadata['id'],
                            )
                        }
                    result = self.repo.editEntity(identification,
                                                  data,
                                                  summary=summary)
                    pass

                # Crash here
                artworkItemTitle = result.get(u'entity').get('id')

                # Wikidata is sometimes lagging. Wait for 10 seconds before trying to actually use the item
                time.sleep(10)

                artworkItem = pywikibot.ItemPage(self.repo,
                                                 title=artworkItemTitle)

                # Add to self.artworkIds so that we don't create dupes
                self.artworkIds[metadata[u'id']] = artworkItemTitle

                # Add the id to the item so we can get back to it later
                newclaim = pywikibot.Claim(self.repo, self.idProperty)
                newclaim.setTarget(metadata[u'id'])
                pywikibot.output('Adding new id claim to %s' % artworkItem)
                artworkItem.addClaim(newclaim)

                #self.addReference(artworkItem, newclaim, metadata[u'idrefurl'])

                #newqualifier = pywikibot.Claim(self.repo, u'P195') #Add collection, isQualifier=True
                #newqualifier.setTarget(self.collectionitem)
                #pywikibot.output('Adding new qualifier claim to %s' % artworkItem)
                #newclaim.addQualifier(newqualifier)

                #collectionclaim = pywikibot.Claim(self.repo, u'P195')
                #collectionclaim.setTarget(self.collectionitem)
                #pywikibot.output('Adding collection claim to %s' % artworkItem)
                #artworkItem.addClaim(collectionclaim)

                ## Add the date they got it as a qualifier to the collection
                #if metadata.get(u'acquisitiondate'):
                #    if type(metadata[u'acquisitiondate']) is int or (len(metadata[u'acquisitiondate'])==4 and \
                #                                                   metadata[u'acquisitiondate'].isnumeric()): # It's a year
                #        acdate = pywikibot.WbTime(year=metadata[u'acquisitiondate'])
                #        colqualifier = pywikibot.Claim(self.repo, u'P580')
                #        colqualifier.setTarget(acdate)
                #        pywikibot.output('Adding new acquisition date qualifier claim to collection on %s' % artworkItem)
                #        collectionclaim.addQualifier(colqualifier)
                ## FIXME: Still have to rewrite this part
                '''
                if metadata.get(u'acquisitiondate'):
                    colqualifier = pywikibot.Claim(self.repo, u'P580')
                    acdate = None
                    if len(painting[u'acquisitiondate'])==4 and painting[u'acquisitiondate'].isnumeric(): # It's a year
                        acdate = pywikibot.WbTime(year=painting[u'acquisitiondate'])
                    elif len(painting[u'acquisitiondate'].split(u'-', 2))==3:
                        (acday, acmonth, acyear) = painting[u'acquisitiondate'].split(u'-', 2)
                        acdate = pywikibot.WbTime(year=int(acyear), month=int(acmonth), day=int(acday))
                    if acdate:
                        colqualifier.setTarget(acdate)

                '''

                #self.addReference(artworkItem, collectionclaim, metadata[u'refurl'])

            if artworkItem and artworkItem.exists():
                metadata['wikidata'] = artworkItem.title()

                data = artworkItem.get()
                claims = data.get('claims')

                # Add missing labels
                # FIXME: Move to a function
                # FIXME Do something with aliases too
                labels = data.get('labels')
                if metadata.get('title'):
                    labelschanged = False
                    for lang, label in metadata['title'].items():
                        if lang not in labels:
                            labels[lang] = label
                            labelschanged = True
                    if labelschanged:
                        summary = u'Adding missing label(s) from %s' % (
                            metadata.get(u'refurl'), )
                        try:
                            artworkItem.editLabels(labels, summary=summary)
                        except pywikibot.OtherPageSaveError:
                            # Just skip it for no
                            pywikibot.output(
                                u'Oops, already had that label/description combination. Skipping'
                            )
                            pass
                """
                # Add missing descriptions
                # FIXME Move to a function
                descriptions = copy.deepcopy(data.get('descriptions'))
                if metadata.get('description'):
                    descriptionschanged = False
                    for lang, description in metadata['description'].items():
                        if lang not in descriptions:
                            descriptions[lang] = description
                            descriptionschanged = True
                    if descriptionschanged:
                        summary = u'Adding missing description(s) from %s' % (metadata.get(u'refurl'),)
                        try:
                            artworkItem.editDescriptions(descriptions, summary=summary)
                        except pywikibot.exceptions.OtherPageSaveError: # pywikibot.exceptions.APIError:
                            # We got ourselves a duplicate label and description, let's correct that by adding collection and the id
                            descriptions = copy.deepcopy(data.get('descriptions'))
                            pywikibot.output(u'Oops, already had that label/description combination. Trying again')
                            for lang, description in metadata['description'].items():
                                if lang not in descriptions:
                                    descriptions[lang] = u'%s (%s %s)' % (description,
                                                                             metadata['collectionshort'],
                                                                             metadata['id'],)
                            artworkItem.editDescriptions(descriptions, summary=summary)
                            pass
                #print claims
                """

                # instance of
                self.addItemStatement(artworkItem, u'P31',
                                      metadata.get(u'instanceofqid'),
                                      metadata.get(u'refurl'))

                # country
                self.addItemStatement(artworkItem, u'P17',
                                      metadata.get(u'countryqid'),
                                      metadata.get(u'refurl'))

                # adminlocation
                self.addItemStatement(artworkItem, u'P131',
                                      metadata.get(u'adminlocationqid'),
                                      metadata.get(u'refurl'))

                # location
                self.addItemStatement(artworkItem, u'P276',
                                      metadata.get(u'locationqid'),
                                      metadata.get(u'refurl'))

                # creator
                self.addItemStatement(artworkItem, u'P170',
                                      metadata.get(u'creatorqid'),
                                      metadata.get(u'refurl'))

                # genre
                self.addItemStatement(artworkItem, u'P136',
                                      metadata.get(u'genreqid'),
                                      metadata.get(u'refurl'))

                # Inception
                if u'P571' not in claims and metadata.get(u'inception'):
                    if type(metadata[u'inception']) is int or (len(metadata[u'inception'])==4 and \
                                                                   metadata[u'inception'].isnumeric()): # It's a year
                        newdate = pywikibot.WbTime(year=metadata[u'inception'])
                        newclaim = pywikibot.Claim(self.repo, u'P571')
                        newclaim.setTarget(newdate)
                        pywikibot.output(
                            'Adding date of creation claim to %s' %
                            artworkItem)
                        artworkItem.addClaim(newclaim)

                        self.addReference(artworkItem, newclaim,
                                          metadata[u'refurl'])
                        # TODO: Implement circa

                if metadata.get('image') and u'P18' not in claims:
                    print u'no image found'
                    # Construct
                    newclaim = pywikibot.Claim(self.repo, u'P18')
                    commonssite = pywikibot.Site("commons", "commons")
                    imagelink = pywikibot.Link(metadata.get('image'),
                                               source=commonssite,
                                               defaultNamespace=6)
                    image = pywikibot.ImagePage(imagelink)
                    if image.isRedirectPage():
                        image = pywikibot.ImagePage(image.getRedirectTarget())
                    if not image.exists():
                        pywikibot.output(
                            '[[%s]] doesn\'t exist so I can\'t link to it' %
                            (image.title(), ))
                    else:
                        newclaim.setTarget(image)
                        pywikibot.output(
                            'Adding %s --> %s' %
                            (newclaim.getID(), newclaim.getTarget()))
                        artworkItem.addClaim(newclaim)

                if metadata.get('commonscat') and u'P373' not in claims:
                    print u'no image found'
                    # Construct
                    newclaim = pywikibot.Claim(self.repo, u'P373')
                    commonssite = pywikibot.Site("commons", "commons")
                    commonslink = pywikibot.Link(metadata.get('commonscat'),
                                                 source=commonssite,
                                                 defaultNamespace=14)
                    commonscat = pywikibot.Page(commonslink)
                    if commonscat.isRedirectPage():
                        commonscat = pywikibot.Page(
                            commonscat.getRedirectTarget())
                    if not commonscat.exists():
                        pywikibot.output(
                            '[[%s]] doesn\'t exist so I can\'t link to it' %
                            (commonscat.title(), ))
                    else:
                        newclaim.setTarget(
                            commonscat.title(withNamespace=False))
                        pywikibot.output(
                            'Adding %s --> %s' %
                            (newclaim.getID(), newclaim.getTarget()))
                        artworkItem.addClaim(newclaim)

                if metadata.get('lat') and metadata.get(
                        'lon') and u'P625' not in claims:
                    print u'no coordinates found'
                    # Build coordinates and add them
                    coordinate = pywikibot.Coordinate(metadata.get('lat'),
                                                      metadata.get('lon'),
                                                      dim=100)
                    newclaim = pywikibot.Claim(self.repo, u'P625')
                    newclaim.setTarget(coordinate)
                    pywikibot.output(
                        u'Adding %s, %s to %s' %
                        (coordinate.lat, coordinate.lon, artworkItem.title()))
                    artworkItem.addClaim(newclaim)

Beispiel #19

0

Datei anzeigen

# (C) Pywikibot team, 2008-2014
#
# Distributed under the terms of the MIT license.
#
__version__ = '$Id: a8b52e17391069295e9ae0f9a22c3c57117e5340 $'
#

import os
import pywikibot
from pywikibot import pagegenerators
from pywikibot.data.api import APIError
import json

from tests.utils import PywikibotTestCase, unittest

site = pywikibot.Site('en', 'wikipedia')
mainpage = pywikibot.Page(pywikibot.page.Link("Main Page", site))
wikidata = site.data_repository()


# fetch a page which is very likely to be unconnected, which doesnt have
# a generator, and unit tests may be used to test old versions of pywikibot
def get_test_unconnected_page(site):
    gen = pagegenerators.NewpagesPageGenerator(site=site, total=1)
    return next(gen)


class TestGeneral(PywikibotTestCase):

    def testWikibase(self):
        if not site.has_transcluded_data:

Beispiel #20

0

Datei anzeigen

def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    for arg in pywikibot.handle_args(args):
        arg, sep, val = arg.partition(':')
        if arg == '-edit':
            globalvar.attachEditCount = int(val or pywikibot.input(
                'After how many edits would you like to welcome new users? '
                '(0 is allowed)'))
        elif arg == '-timeoffset':
            globalvar.timeoffset = int(val or pywikibot.input(
                'Which time offset (in minutes) for new users would you like '
                'to use?'))
        elif arg == '-time':
            globalvar.timeRecur = int(val or pywikibot.input(
                'For how many seconds would you like to bot to sleep before '
                'checking again?'))
        elif arg == '-offset':
            if not val:
                val = pywikibot.input(
                    'Which time offset for new users would you like to use? '
                    '(yyyymmddhhmmss)')
            try:
                globalvar.offset = pywikibot.Timestamp.fromtimestampformat(val)
            except ValueError:
                # upon request, we might want to check for software version here
                raise ValueError(
                    "Mediawiki has changed, -offset:# is not supported "
                    "anymore, but -offset:TIMESTAMP is, assuming TIMESTAMP "
                    "is yyyymmddhhmmss. -timeoffset is now also supported. "
                    "Please read this script source header for documentation.")
        elif arg == '-file':
            globalvar.randomSign = True
            globalvar.signFileName = val or pywikibot.input(
                'Where have you saved your signatures?')
        elif arg == '-sign':
            globalvar.defaultSign = val or pywikibot.input(
                'Which signature to use?')
            globalvar.defaultSign += timeselected
        elif arg == '-break':
            globalvar.recursive = False
        elif arg == '-nlog':
            globalvar.makeWelcomeLog = False
        elif arg == '-ask':
            globalvar.confirm = True
        elif arg == '-filter':
            globalvar.filtBadName = True
        elif arg == '-savedata':
            globalvar.saveSignIndex = True
        elif arg == '-random':
            globalvar.randomSign = True
        elif arg == '-sul':
            globalvar.welcomeAuto = True
        elif arg == '-limit':
            globalvar.queryLimit = int(val or pywikibot.input(
                u'How many of the latest new users would you like to load?'))
        elif arg == '-numberlog':
            globalvar.dumpToLog = int(val or pywikibot.input(
                'After how many welcomed users would you like to update the '
                'welcome log?'))
        elif arg == '-quiet':
            globalvar.quiet = True
        elif arg == '-quick':
            issue_deprecation_warning('The usage of "-quick" option', None, 2)

    # Filename and Pywikibot path
    # file where is stored the random signature index
    filename = pywikibot.config.datafilepath(
        'welcome-%s-%s.data' %
        (pywikibot.Site().family.name, pywikibot.Site().code))
    if globalvar.offset and globalvar.timeoffset:
        pywikibot.warning(
            'both -offset and -timeoffset were provided, ignoring -offset')
        globalvar.offset = 0

    try:
        bot = WelcomeBot()
    except KeyError as error:
        # site not managed by welcome.py
        pywikibot.bot.suggest_help(exception=error)
        return False

    try:
        bot.run()
    except KeyboardInterrupt:
        if bot.welcomed_users:
            showStatus()
            pywikibot.output("Put welcomed users before quit...")
            bot.makelogpage(bot.welcomed_users)
        pywikibot.output("\nQuitting...")
    finally:
        # If there is the savedata, the script must save the number_user.
        if globalvar.randomSign and globalvar.saveSignIndex and \
           bot.welcomed_users:
            if sys.version_info[0] > 2:
                import pickle as cPickle
            else:
                import cPickle
            with open(filename, 'wb') as f:
                cPickle.dump(bot.welcomed_users,
                             f,
                             protocol=config.pickle_protocol)

Beispiel #21

0

Datei anzeigen

Datei: linkbot.py Projekt: NeoMahler/pywikibots

paraules = { # Add the word using regexp and next to it, the word without regexp (see the example). You can add an infinite amount of words.
        u' ([Pp]ag(e|es))( |\.|\,)': u'Page',
        }

def main(page, paraula):
    if page.title() == paraules[paraula]:
        print(u"Skipping %s because is the link page" % str(page))
        return
    substitucio = ' [[' + paraules[paraula] + '|\\1]]\\3'
    #site = pwb.Site()
    #page = pwb.Page(site, page)
    print(u"Page: %s" % page)
    text = page.text
    comptador = re.search(paraula, text)
    if comptador:
        print("======= EDITING PAGE %s! =======" % page)
        noutext = re.sub(paraula, substitucio, text)
        page.text = noutext
        page.save(u'Bot: Adding links for %s' % paraules[paraula])
    else:
        print("No links added")
        return
    
if __name__ == '__main__':
    allpages = pg.AllpagesPageGenerator(site=pwb.Site(), start="!", namespace=0, includeredirects = True)
    pages = pg.PreloadingGenerator(allpages, pageNumber = 100)
    for page in pages:
        for paraula in paraules:
            main(page, paraula)
    print("\nFinished!")

Beispiel #22

0

Datei anzeigen

Datei: watchlist.py Projekt: megsss/pywikibotGerritMirror

def get(site=None):
    """Load the watchlist, fetching it if necessary."""
    if site is None:
        site = pywikibot.Site()
    watchlist = [p.title() for p in site.watched_pages()]
    return watchlist

Beispiel #23

0

Datei anzeigen

def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    password = None
    sysop = False
    logall = False
    logout = False
    oauth = False
    autocreate = False
    unknown_args = []
    for arg in pywikibot.handle_args(args):
        if arg.startswith("-pass"):
            if len(arg) == 5:
                password = pywikibot.input(
                    'Password for all accounts (no characters will be shown):',
                    password=True)
            else:
                password = arg[6:]
        elif arg == "-sysop":
            sysop = True
        elif arg == "-all":
            logall = True
        elif arg == "-force":
            pywikibot.output(
                u"To force a re-login, please delete the revelant "
                u"lines from '%s' (or the entire file) and try again." %
                join(config.base_dir, 'pywikibot.lwp'))
        elif arg == "-logout":
            logout = True
        elif arg == '-oauth':
            oauth = True
        elif arg == '-autocreate':
            autocreate = True
        else:
            unknown_args += [arg]

    if unknown_args:
        pywikibot.bot.suggest_help(unknown_parameters=unknown_args)
        return False

    if password is not None:
        pywikibot.warning('The -pass argument is not implemented yet. See: '
                          'https://phabricator.wikimedia.org/T102477')

    if logall:
        if sysop and not oauth:
            namedict = config.sysopnames
        else:
            namedict = config.usernames
    else:
        site = pywikibot.Site()
        namedict = {site.family.name: {site.code: None}}
    for family_name in namedict:
        for lang in namedict[family_name]:
            try:
                site = pywikibot.Site(code=lang, fam=family_name)
                if oauth:
                    _oauth_login(site)
                    continue
                if logout:
                    site.logout()
                else:
                    site.login(sysop, autocreate=autocreate)
                user = site.user()
                if user:
                    pywikibot.output('Logged in on {0} as {1}.'.format(
                        site, user))
                else:
                    if logout:
                        pywikibot.output('Logged out of {0}.'.format(site))
                    else:
                        pywikibot.output('Not logged in on {0}.'.format(site))
            except SiteDefinitionError:
                pywikibot.output(u'%s.%s is not a valid site, please remove it'
                                 ' from your config' % (lang, family_name))

Beispiel #24

0

Datei anzeigen

Datei: zumrah.py Projekt: SindhiWikipedia/sdwiki

# for more information see [[fa:ویکی‌پدیا:درخواست‌های ربات/رده همسنگ]] and [[fa:ویکی‌پدیا:رده‌دهی مقالات همسنگ]]

from pywikibot import config
from pywikibot import pagegenerators
import re
import sys
import durusti_core
import pywikibot
import codecs
import string
import time
import MySQLdb
_cache = {}
page_list_run = []
#-----------------------------------------------version-----------------------------------------
fa_site = pywikibot.Site('sd', 'wikipedia')
en_site = pywikibot.Site('en', 'wikipedia')
versionpage = pywikibot.Page(fa_site, u'صارف:ZumrahBot/مساوی زمرہ جات/نسخہ')
lastversion = versionpage.get().strip()
version = u'30'
new_edition = u'1'
if lastversion != version:
    pywikibot.output(
        u"\03{lightred}Your bot dosen't use the last verion please update me!\03{default}"
    )
    pywikibot.stopme()
    sys.exit()
#-----------------------------------------------------------------------------------------------


def namespacefinder(enlink, site):

Beispiel #25

0

Datei anzeigen

    def __init__(self):
        """Initializer with arg parsing."""
        for arg in pywikibot.handle_args():
            arg, sep, value = arg.partition(':')
            if arg == '-from':
                self.apfrom = value or pywikibot.input(
                    'Which page to start from: ')
            elif arg == '-reqsize':
                self.aplimit = int(value)
            elif arg == '-links':
                self.links = True
            elif arg == '-linksonly':
                self.links = True
                self.titles = False
            elif arg == '-replace':
                self.replace = True
            elif arg == '-redir':
                self.filterredir = 'all'
            elif arg == '-redironly':
                self.filterredir = 'redirects'
            elif arg == '-limit':
                self.stopAfter = int(value)
            elif arg in ('-autonomous', '-a'):
                self.autonomous = True
            elif arg == '-ns':
                self.namespaces.append(int(value))
            elif arg == '-wikilog':
                self.wikilogfile = value
            elif arg == '-failedlog':
                self.failedTitles = value
            elif arg == '-failed':
                self.doFailed = True
            else:
                pywikibot.output(u'Unknown argument %s.' % arg)
                pywikibot.showHelp()
                sys.exit()

        if self.namespaces == [] and not self.doFailed:
            if self.apfrom == u'':
                # 0 should be after templates ns
                self.namespaces = [14, 10, 12, 0]
            else:
                self.namespaces = [0]

        if self.aplimit is None:
            self.aplimit = 200 if self.links else 'max'

        if not self.doFailed:
            self.queryParams = {
                'action': 'query',
                'generator': 'allpages',
                'gaplimit': self.aplimit,
                'gapfilterredir': self.filterredir
            }
        else:
            self.queryParams = {'action': 'query'}
            if self.apfrom != u'':
                pywikibot.output(u'Argument "-from" is ignored with "-failed"')

        propParam = 'info'
        if self.links:
            propParam += '|links|categories'
            self.queryParams['pllimit'] = 'max'
            self.queryParams['cllimit'] = 'max'

        self.queryParams['prop'] = propParam

        self.site = pywikibot.Site()

        if len(self.localSuspects) != len(self.latinSuspects):
            raise ValueError(u'Suspects must be the same size')
        if len(self.localKeyboard) != len(self.latinKeyboard):
            raise ValueError(u'Keyboard info must be the same size')

        if not os.path.isabs(self.wikilogfile):
            self.wikilogfile = pywikibot.config.datafilepath(self.wikilogfile)
        self.wikilog = self.OpenLogFile(self.wikilogfile)

        if not os.path.isabs(self.failedTitles):
            self.failedTitles = pywikibot.config.datafilepath(
                self.failedTitles)

        if self.doFailed:
            with codecs.open(self.failedTitles, 'r', 'utf-8') as f:
                self.titleList = [self.Page(t) for t in f]
            self.failedTitles += '.failed'

        ziplist = zip(self.localSuspects, self.latinSuspects)
        self.lclToLatDict = {ord(local): latin for local, latin in ziplist}
        self.latToLclDict = {ord(latin): local for local, latin in ziplist}

        if self.localKeyboard is not None:
            ziplist = zip(self.localKeyboard, self.latinKeyboard)
            self.lclToLatKeybDict = {
                ord(local): latin
                for local, latin in ziplist
            }
            self.latToLclKeybDict = {
                ord(latin): local
                for local, latin in ziplist
            }
        else:
            self.lclToLatKeybDict = {}
            self.latToLclKeybDict = {}

        badPtrnStr = u'([%s][%s]|[%s][%s])' \
                     % (ascii_letters, self.localLtr,
                        self.localLtr, ascii_letters)
        self.badWordPtrn = re.compile(
            u'[%s%s]*%s[%s%s]*' % (ascii_letters, self.localLtr, badPtrnStr,
                                   ascii_letters, self.localLtr))

        # Get whitelist
        self.knownWords = set()
        self.seenUnresolvedLinks = set()

        # TODO: handle "continue"
        if self.site.code in self.whitelists:
            wlpage = self.whitelists[self.site.code]
            pywikibot.output(u'Loading whitelist from %s' % wlpage)
            wlparams = {
                'action': 'query',
                'prop': 'links',
                'titles': wlpage,
                'redirects': '',
                'indexpageids': '',
                'pllimit': 'max',
            }

            req = api.Request(site=self.site, parameters=wlparams)
            data = req.submit()
            if len(data['query']['pageids']) == 1:
                pageid = data['query']['pageids'][0]
                links = data['query']['pages'][pageid]['links']

                allWords = [
                    nn for n in links for nn in self.FindBadWords(n['title'])
                ]

                self.knownWords = set(allWords)
            else:
                raise ValueError(u'The number of pageids is not 1')

            pywikibot.output(u'Loaded whitelist with %i items' %
                             len(self.knownWords))
            if len(self.knownWords) > 0:
                pywikibot.log(u'Whitelist: %s' % u', '.join(
                    [self.MakeLink(i, False) for i in self.knownWords]))
        else:
            pywikibot.output(u'Whitelist is not known for language %s' %
                             self.site.code)

Beispiel #26

0

Datei anzeigen

Datei: zumrah.py Projekt: SindhiWikipedia/sdwiki

def encatlist(encat):
    count = 0
    listenpageTitle = []
    encat = encat.replace(u'[[', u'').replace(u']]', u'').replace(
        u'Category:', u'').replace(u'category:', u'').strip()
    language = 'en'
    encat = pywikibot.Category(pywikibot.Site(language), encat)
    listacategory = [encat]
    for enpageTitle in listacategory:
        try:
            fapages = pagefafinder(enpageTitle)
            if fapages is not False:
                for pages, profix_fa in fapages:
                    if profix_fa == '14':
                        pages = u'Category:' + unicode(pages, 'UTF-8')
                    elif profix_fa == '12':
                        pages = u'Help:' + unicode(pages, 'UTF-8')
                    elif profix_fa == '10':
                        pages = u'Template:' + unicode(pages, 'UTF-8')
                    elif profix_fa == '6':
                        pages = u'File:' + unicode(pages, 'UTF-8')
                    elif profix_fa == '4':
                        pages = u'Wikipedia:' + unicode(pages, 'UTF-8')
                    elif profix_fa == '100':
                        pages = u'Portal:' + unicode(pages, 'UTF-8')
                    elif profix_fa in [
                            '1', '2', '3', '5', '7', '8', '9', '11', '13',
                            '15', '101', '103', '118', '119', '446', '447',
                            '828', '829'
                    ]:
                        continue
                    else:
                        pages = unicode(pages, 'UTF-8')
                    pywikibot.output(u'\03{lightgreen}Adding ' + pages +
                                     u' to fapage lists\03{default}')
                    listenpageTitle.append(pages)

        except:

            try:
                enpageTitle = unicode(
                    str(enpageTitle),
                    'UTF-8').split(u'|')[0].split(u']]')[0].replace(
                        u'[[', u'').strip()
            except:
                enpageTitle = enpageTitle.split(u'|')[0].split(
                    u']]')[0].replace(u'[[', u'').strip()
            cat = pywikibot.Category(pywikibot.Site(language), enpageTitle)
            gent = pagegenerators.CategorizedPageGenerator(cat)
            for pagework in gent:
                count += 1
                try:
                    link = str(pagework).split(u'|')[0].split(
                        u']]')[0].replace(u'[[', u'').strip()
                except:
                    pagework = unicode(str(pagework), 'UTF-8')
                    link = pagework.split(u'|')[0].split(u']]')[0].replace(
                        u'[[', u'').strip()
                pywikibot.output(link)
                fapagetitle = englishdictionry(link, en_site, fa_site)
                if fapagetitle is False:
                    continue
                else:
                    pywikibot.output(u'\03{lightgreen}Adding ' + fapagetitle +
                                     u' to fapage lists\03{default}')
                    listenpageTitle.append(fapagetitle)

    if listenpageTitle == []:
        return False, False
    return listenpageTitle, listacategory

Beispiel #27

0

Datei anzeigen

# -*- coding: utf-8 -*-
import os
import sys
import json
from config import config_page_name  # pylint: disable=E0611,W0614

os.environ['PYWIKIBOT_DIR'] = os.path.dirname(os.path.realpath(__file__))
os.environ['TZ'] = 'UTC'
import pywikibot

if len(sys.argv) < 2:
    exit("no pagename provided.\n")

site = pywikibot.Site()
site.login()

config_page = pywikibot.Page(site, config_page_name)
cfg = config_page.text
cfg = json.loads(cfg)["G15_4"]
print(json.dumps(cfg, indent=4, ensure_ascii=False))

if not cfg["enable"]:
    exit("disabled\n")

pagename = sys.argv[1]

mainpage = pywikibot.Page(site, pagename)

if mainpage.isTalkPage():
    talkpage = mainpage
    mainpage = talkpage.toggleTalkPage()

Beispiel #28

0

Datei anzeigen

    def save(self, title, contents):
        """Upload page content."""
        mysite = pywikibot.Site()

        page = pywikibot.Page(mysite, title)
        self.current_page = page

        if self.getOption('summary'):
            comment = self.getOption('summary')
        else:
            comment = i18n.twtranslate(mysite, 'pagefromfile-msg')

        comment_top = comment + " - " + i18n.twtranslate(
            mysite, 'pagefromfile-msg_top')
        comment_bottom = comment + " - " + i18n.twtranslate(
            mysite, 'pagefromfile-msg_bottom')
        comment_force = "%s *** %s ***" % (
            comment, i18n.twtranslate(mysite, 'pagefromfile-msg_force'))

        # Remove trailing newlines (cause troubles when creating redirects)
        contents = re.sub('^[\r\n]*', '', contents)

        if page.exists():
            if not self.getOption('redirect') and page.isRedirectPage():
                pywikibot.output(u"Page %s is redirect, skipping!" % title)
                return
            pagecontents = page.get(get_redirect=True)
            nocontent = self.getOption('nocontent')
            if nocontent and (nocontent in pagecontents
                              or nocontent.lower() in pagecontents):
                pywikibot.output('Page has %s so it is skipped' % nocontent)
                return
            if self.getOption('append'):
                separator = self.getOption('append')[1]
                if separator == r'\n':
                    separator = '\n'
                if self.getOption('append')[0] == 'top':
                    above, below = contents, pagecontents
                    comment = comment_top
                else:
                    above, below = pagecontents, contents
                    comment = comment_bottom
                pywikibot.output(
                    'Page {0} already exists, appending on {1}!'.format(
                        title,
                        self.getOption('append')[0]))
                contents = above + separator + below
            elif self.getOption('force'):
                pywikibot.output(u"Page %s already exists, ***overwriting!" %
                                 title)
                comment = comment_force
            else:
                pywikibot.output(u"Page %s already exists, not adding!" %
                                 title)
                return
        else:
            if self.getOption('autosummary'):
                comment = ''
                config.default_edit_summary = ''

        self.userPut(page,
                     page.text,
                     contents,
                     summary=comment,
                     minor=self.getOption('minor'),
                     show_diff=self.getOption('showdiff'),
                     ignore_save_related_errors=True)

Beispiel #29

0

Datei anzeigen

    # time.sleep(seconds)


pywikibot.throttle.Throttle.wait = wait

# # if len(sys.argv) == 1:
# #     raise ValueError('Missing input CSV file')

# # csv_path = sys.argv[1]
# # csv_file = open(csv_path,'r')

# # csv_reader = csv.DictReader(csv_file)

# # If you changed the name of the site to something else make sure to change it here

site = pywikibot.Site('ldwg', 'ldwg')
site.login()

repo = site.data_repository()

some_labels = {"en": "Clifford B. Anderson"}
new_item = pywikibot.ItemPage(repo)
new_item.editLabels(labels=some_labels, summary="Setting labels")

claim = pywikibot.Claim(repo, u'P4')  # employer
target = pywikibot.ItemPage(repo, u"Q3")  # Vanderbilt University
claim.setTarget(target)
new_item.addClaim(claim, summary=u'Adding employer claim')

claim = pywikibot.Claim(repo, u'P6')  # instance of
target = pywikibot.ItemPage(repo, u"Q5")  # human

Beispiel #30

0

Datei anzeigen

Datei: curate.newpages.en.py Projekt: yethrosh/wikidata

def main():
    wdsite = pywikibot.Site('wikidata', 'wikidata')
    repo = wdsite.data_repository()
    langs = ['en', 'fr', 'de']
    for lang in langs:
        wikisite = pywikibot.Site(lang, 'wikipedia')
        total = 100
        if len(sys.argv) >= 2:
            total = int(sys.argv[1])
        gen = pagegenerators.NewpagesPageGenerator(site=wikisite,
                                                   namespaces=[0],
                                                   total=total)
        #cat = pywikibot.Category(wikisite, 'Category:Articles without Wikidata item')
        #gen = pagegenerators.CategorizedPageGenerator(cat, recurse=False)
        pre = pagegenerators.PreloadingGenerator(gen, groupsize=50)
        for page in pre:
            if page.isRedirectPage():
                continue
            if not pageIsBiography(page=page, lang=lang):
                continue
            print('\n==', page.title().encode('utf-8'), '==')
            gender = calculateGender(page=page, lang=lang)
            item = ''
            try:
                item = pywikibot.ItemPage.fromPage(page)
            except:
                pass
            if item:
                print('Page has item')
                print('https://www.wikidata.org/wiki/%s' % (item.title()))
                addBiographyClaims(repo=repo,
                                   wikisite=wikisite,
                                   item=item,
                                   page=page,
                                   lang=lang)
            else:
                print('Page without item')
                #search for a valid item, otherwise create
                if authorIsNewbie(page=page, lang=lang):
                    print("Newbie author, checking quality...")
                    if pageIsRubbish(page=page, lang=lang) or \
                       (not pageCategories(page=page, lang=lang)) or \
                       (not pageReferences(page=page, lang=lang)) or \
                       (not len(list(page.getReferences(namespaces=[0])))):
                        print("Page didnt pass minimum quality, skiping")
                        continue

                print(page.title().encode('utf-8'), 'need item', gender)
                wtitle = page.title()
                wtitle_ = wtitle.split('(')[0].strip()
                searchitemurl = 'https://www.wikidata.org/w/api.php?action=wbsearchentities&search=%s&language=%s&format=xml' % (
                    urllib.parse.quote(wtitle_), lang)
                raw = getURL(searchitemurl)
                print(searchitemurl.encode('utf-8'))

                #check birthdate and if it matches, then add data
                numcandidates = ''  #do not set to zero
                if not '<search />' in raw:
                    m = re.findall(r'id="(Q\d+)"', raw)
                    numcandidates = len(m)
                    print("Found %s candidates" % (numcandidates))
                    if numcandidates > 5:  #too many candidates, skiping
                        print("Too many, skiping")
                        continue
                    for itemfoundq in m:
                        itemfound = pywikibot.ItemPage(repo, itemfoundq)
                        itemfound.get()
                        if ('%swiki' % (lang)) in itemfound.sitelinks:
                            print("Candidate %s has sitelink, skiping" %
                                  (itemfoundq))
                            numcandidates -= 1
                            continue
                        pagebirthyear = calculateBirthDate(page=page,
                                                           lang=lang)
                        pagebirthyear = pagebirthyear and int(
                            pagebirthyear.split('-')[0]) or ''
                        if not pagebirthyear:
                            print("Page doesnt have birthdate, skiping")
                            break  #break, dont continue. Without birthdate we cant decide correctly
                        if 'P569' in itemfound.claims and itemfound.claims[
                                'P569'][0].getTarget().precision in [
                                    9, 10, 11
                                ]:
                            #https://www.wikidata.org/wiki/Help:Dates#Precision
                            itemfoundbirthyear = int(
                                itemfound.claims['P569'][0].getTarget().year)
                            print(
                                "candidate birthdate = %s, page birthdate = %s"
                                % (itemfoundbirthyear, pagebirthyear))
                            mindatelen = 4
                            if len(str(
                                    itemfoundbirthyear)) != mindatelen or len(
                                        str(pagebirthyear)) != mindatelen:
                                print("%s birthdate length != %s" %
                                      (itemfoundq, mindatelen))
                                continue
                            #reduce candidates if birthyear are different
                            minyeardiff = 3
                            if itemfoundbirthyear >= pagebirthyear + minyeardiff or itemfoundbirthyear <= pagebirthyear - minyeardiff:
                                print(
                                    "Candidate %s birthdate out of range, skiping"
                                    % (itemfoundq))
                                numcandidates -= 1
                                continue
                            #but only assume it is the same person if birthyears match
                            if itemfoundbirthyear == pagebirthyear:
                                print(
                                    '%s birthyear found in candidate %s. Category:%s births found in page. OK!'
                                    % (itemfoundbirthyear, itemfoundq,
                                       itemfoundbirthyear))
                                print('Adding sitelink %s:%s' %
                                      (lang, page.title().encode('utf-8')))
                                try:
                                    itemfound.setSitelink(
                                        page,
                                        summary=
                                        'BOT - Adding 1 sitelink: [[:%s:%s|%s]] (%s)'
                                        % (lang, page.title(), page.title(),
                                           lang))
                                except:
                                    print("Error adding sitelink. Skiping.")
                                    break
                                addBiographyClaims(repo=repo,
                                                   wikisite=wikisite,
                                                   item=itemfound,
                                                   page=page,
                                                   lang=lang)
                                break

                #no item found, or no candidates are useful
                if '<search />' in raw or (numcandidates == 0):
                    print('No useful item found. Creating a new one...')
                    #create item
                    newitemlabels = {lang: wtitle_}
                    newitem = pywikibot.ItemPage(repo)
                    newitem.editLabels(
                        labels=newitemlabels,
                        summary=
                        "BOT - Creating item for [[:%s:%s|%s]] (%s): %s %s" %
                        (lang, wtitle, wtitle, lang, 'human', gender))
                    newitem.get()
                    try:
                        newitem.setSitelink(
                            page,
                            summary=
                            'BOT - Adding 1 sitelink: [[:%s:%s|%s]] (%s)' %
                            (lang, page.title(), page.title(), lang))
                    except:
                        print("Error adding sitelink. Skiping.")
                        break
                    addBiographyClaims(repo=repo,
                                       wikisite=wikisite,
                                       item=newitem,
                                       page=page,
                                       lang=lang)