def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ # the option that's always selected when the bot wonders what to do with # a link. If it's None, the user is prompted (default behaviour). always = None alternatives = [] getAlternatives = True dnSkip = False generator = None primary = False main_only = False # For sorting the linked pages, case can be ignored minimum = 0 local_args = pywikibot.handle_args(args) generator_factory = pagegenerators.GeneratorFactory( positional_arg_name='page') for arg in local_args: if arg.startswith('-primary:'): primary = True getAlternatives = False alternatives.append(arg[9:]) elif arg == '-primary': primary = True elif arg.startswith('-always:'): always = arg[8:] elif arg.startswith('-pos:'): if arg[5] != ':': mysite = pywikibot.Site() page = pywikibot.Page(pywikibot.Link(arg[5:], mysite)) if page.exists(): alternatives.append(page.title()) else: if pywikibot.input_yn( u'Possibility %s does not actually exist. Use it ' 'anyway?' % page.title(), default=False, automatic_quit=False): alternatives.append(page.title()) else: alternatives.append(arg[5:]) elif arg == '-just': getAlternatives = False elif arg == '-dnskip': dnSkip = True elif arg == '-main': main_only = True elif arg.startswith('-min:'): minimum = int(arg[5:]) elif arg.startswith('-start'): try: generator = pagegenerators.CategorizedPageGenerator( pywikibot.Site().disambcategory(), start=arg[7:], namespaces=[0]) except pywikibot.NoPage: pywikibot.output("Disambiguation category for your wiki is not known.") raise else: generator_factory.handleArg(arg) site = pywikibot.Site() generator = generator_factory.getCombinedGenerator(generator) if not generator: pywikibot.bot.suggest_help(missing_generator=True) return False site.login() bot = DisambiguationRobot(always, alternatives, getAlternatives, dnSkip, generator, primary, main_only, minimum=minimum) bot.run()
def main(*args): """Process command line arguments and invoke PatrolBot.""" # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. usercontribs = None gen = None recentchanges = False newpages = False repeat = False genFactory = pagegenerators.GeneratorFactory() options = {} # Parse command line arguments for arg in pywikibot.handle_args(args): if arg.startswith('-ask'): options['ask'] = True elif arg.startswith('-autopatroluserns'): options['autopatroluserns'] = True elif arg.startswith('-repeat'): repeat = True elif arg.startswith('-newpages'): newpages = True elif arg.startswith('-recentchanges'): recentchanges = True elif arg.startswith('-usercontribs:'): usercontribs = arg[14:] elif arg.startswith('-versionchecktime:'): versionchecktime = arg[len('-versionchecktime:'):] options['versionchecktime'] = int(versionchecktime) elif arg.startswith("-whitelist:"): options['whitelist'] = arg[len('-whitelist:'):] else: generator = genFactory.handleArg(arg) if not generator: if ':' in arg: m = arg.split(':') options[m[0]] = m[1] site = pywikibot.Site() site.login() if usercontribs: pywikibot.output(u'Processing user: %s' % usercontribs) if not newpages and not recentchanges and not usercontribs: if site.family.name == 'wikipedia': newpages = True else: recentchanges = True bot = PatrolBot(**options) if newpages or usercontribs: pywikibot.output(u'Newpages:') gen = site.newpages feed = api_feed_repeater(gen, delay=60, repeat=repeat, user=usercontribs, namespaces=genFactory.namespaces, recent_new_gen=False) bot.run(feed) if recentchanges or usercontribs: pywikibot.output(u'Recentchanges:') gen = site.recentchanges feed = api_feed_repeater(gen, delay=60, repeat=repeat, namespaces=genFactory.namespaces, user=usercontribs) bot.run(feed) pywikibot.output(u'%d/%d patrolled' % (bot.patrol_counter, bot.rc_item_counter))
def __init__(self, password=None, sysop=False, site=None, user=None): """ Initializer. All parameters default to defaults in user-config. @param site: Site object to log into @type site: BaseSite @param user: username to use. If user is None, the username is loaded from config.usernames. @type user: basestring @param password: password to use @type password: basestring @param sysop: login as sysop account. The sysop username is loaded from config.sysopnames. @type sysop: bool @raises NoUsername: No username is configured for the requested site. """ if site is not None: self.site = site else: self.site = pywikibot.Site() if user: self.username = user elif sysop: config_names = config.sysopnames family_sysopnames = (config_names[self.site.family.name] or config_names['*']) self.username = family_sysopnames.get(self.site.code, None) try: self.username = self.username or family_sysopnames['*'] except KeyError: raise NoUsername(""" \ ERROR: Sysop username for %(fam_name)s:%(wiki_code)s is undefined. If you have a sysop account for that site, please add a line to user-config.py: sysopnames['%(fam_name)s']['%(wiki_code)s'] = 'myUsername'""" % { 'fam_name': self.site.family.name, 'wiki_code': self.site.code }) else: config_names = config.usernames family_usernames = (config_names[self.site.family.name] or config_names['*']) self.username = family_usernames.get(self.site.code, None) try: self.username = self.username or family_usernames['*'] except KeyError: raise NoUsername(""" \ ERROR: Username for %(fam_name)s:%(wiki_code)s is undefined. If you have an account for that site, please add a line to user-config.py: usernames['%(fam_name)s']['%(wiki_code)s'] = 'myUsername'""" % { 'fam_name': self.site.family.name, 'wiki_code': self.site.code }) self.password = password self.login_name = self.username if getattr(config, 'password_file', ''): self.readPassword()
def bot_update(page_name, file_name): with open(file_name, "r", encoding="UTF-8") as wiki_dump: site = pywikibot.Site() # The site we want to run our bot on page = pywikibot.Page(site, page_name) page.text = wiki_dump.read() page.save('Automatic update from: ' + str(datetime.datetime.now())) # Saves the page
import pywikibot import pprint import re import sqlite3 file = open('wikidatamapping.txt', "r", encoding="utf-8") lines = file.readlines() wdids = [] for line in lines: match = re.search(r":\s(.*)", line) wdid = match.group(1) wdids.append(wdid) for wdid in wdids: site = pywikibot.Site('nl') repo = site.data_repository() item = pywikibot.ItemPage(repo, wdid) item_dict = item.get() clm_dict = item_dict["claims"] clm_add_all = [] for clm in clm_dict: clm_add_all.append(clm) #print(clm_add_all) dict = {} for property in clm_add_all: clm_list = clm_dict[property] qids = [] for clm in clm_list: pprint.pprint(clm.toJSON()) #pprint.pprint(clm.toJSON()) #pprint.pprint(clm.toJSON()['mainsnak'].get('datavalue', {}).get('value', {}).get('numeric-id'))
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: unicode """ local_args = pywikibot.handle_args(args) fromsite = pywikibot.Site() tolang = fromsite.code tofamily = fromsite.family.name prefix = '' overwrite = False gen_args = [] gen_factory = pagegenerators.GeneratorFactory() for arg in local_args: if gen_factory.handleArg(arg): gen_args.append(arg) continue if arg.startswith('-tofamily'): tofamily = arg[len('-tofamily:'):] elif arg.startswith('-tolang'): tolang = arg[len('-tolang:'):] elif arg.startswith('-prefix'): prefix = arg[len('-prefix:'):] elif arg == '-overwrite': overwrite = True tosite = pywikibot.Site(tolang, tofamily) if fromsite == tosite: raise TargetSiteMissing('Target site not different from source site') gen = gen_factory.getCombinedGenerator() if not gen: raise TargetPagesMissing('Target pages not specified') gen_args = ' '.join(gen_args) pywikibot.output( """ Page transfer configuration --------------------------- Source: %(fromsite)r Target: %(tosite)r Pages to transfer: %(gen_args)s Prefix for transferred pages: %(prefix)s """ % { 'fromsite': fromsite, 'tosite': tosite, 'gen_args': gen_args, 'prefix': prefix }) for page in gen: target_title = (prefix + page.namespace().canonical_prefix() + page.title(with_ns=False)) targetpage = pywikibot.Page(tosite, target_title) edithistpage = pywikibot.Page(tosite, target_title + '/edithistory') summary = 'Moved page from {old} ([[{new}/edithistory|history]])' \ .format(old=page.title(as_link=True, insite=tosite), new=targetpage.title() if not targetpage.namespace().subpages else '') if targetpage.exists() and not overwrite: pywikibot.output('Skipped {0} (target page {1} exists)'.format( page.title(as_link=True), targetpage.title(as_link=True))) continue pywikibot.output('Moving {0} to {1}...'.format( page.title(as_link=True), targetpage.title(as_link=True))) pywikibot.log('Getting page text.') text = page.get(get_redirect=True) text += ( "<noinclude>\n\n<small>This page was moved from {0}. It's " 'edit history can be viewed at {1}</small></noinclude>'.format( page.title(as_link=True, insite=targetpage.site), edithistpage.title(as_link=True, insite=targetpage.site))) pywikibot.log('Getting edit history.') historytable = page.getVersionHistoryTable() pywikibot.log('Putting page text.') targetpage.put(text, summary=summary) pywikibot.log('Putting edit history.') edithistpage.put(historytable, summary=summary)
def test_iterlinks_page_object(self): page = [pg for pg in self.wdp.iterlinks() if pg.site.language() == 'af'][0] self.assertEquals(page, pywikibot.Page(pywikibot.Site('af', 'wikipedia'), u'New York Stad'))
def getWikidataLookupTables(self): ''' Query to make 10 lookup tables. These 4 lookup tables for with and without images: * CIA : Creator, institution & accession number * CI : Creator & instution * IA : Institution & accession number * CA: Creator & accession number And also: * Wikidata id -> url table(?) * Filename -> wikidata id ''' query = u"""SELECT ?item ?image ?creator ?institution ?invnum ?location ?url ?idurl WHERE { ?item wdt:P31 wd:Q3305213 . # /wdt:P279* wd:Q3305213 . OPTIONAL { ?item wdt:P18 ?image } . OPTIONAL { ?item wdt:P170 ?creator } . OPTIONAL { ?item wdt:P195 ?institution } . OPTIONAL { ?item wdt:P217 ?invnum } . OPTIONAL { ?item wdt:P276 ?location } . OPTIONAL { ?item wdt:P973 ?url } . #This seems to break the query. Disabled for now. #OPTIONAL { ?item ?identifierproperty ?identifier . # ?property wikibase:directClaim ?identifierproperty . # ?property wikibase:propertyType wikibase:ExternalId . # ?property wdt:P1630 ?formatterurl . # BIND(IRI(REPLACE(?identifier, '^(.+)$', ?formatterurl)) AS ?idurl). # } }""" sq = pywikibot.data.sparql.SparqlQuery() queryresult = sq.select(query) for resultitem in queryresult: item = resultitem.get('item').replace( u'http://www.wikidata.org/entity/', u'') # First clean up and put in a dictionary paintingdict = { u'item': item, u'image': False, u'creator': False, u'institution': False, u'invnum': False, u'location': False, u'url': False } if resultitem.get('image'): paintingdict['image'] = pywikibot.FilePage( pywikibot.Site('commons', 'commons'), resultitem.get('image').replace( u'http://commons.wikimedia.org/wiki/Special:FilePath/', u'')).title(underscore=True, withNamespace=False) if resultitem.get('creator'): paintingdict['creator'] = resultitem.get('creator').replace( u'http://www.wikidata.org/entity/', u'') if resultitem.get('institution'): paintingdict['institution'] = resultitem.get( 'institution').replace(u'http://www.wikidata.org/entity/', u'') if resultitem.get('invnum'): paintingdict['invnum'] = resultitem.get('invnum') if resultitem.get('location'): paintingdict['location'] = resultitem.get('location').replace( u'http://www.wikidata.org/entity/', u'') if resultitem.get('url'): paintingdict['url'] = resultitem.get('url') elif resultitem.get('idurl'): paintingdict['url'] = resultitem.get('idurl') ciakey = None clakey = None cikey = None clkey = None iakey = None lakey = None cakey = None if paintingdict.get(u'creator') and paintingdict.get( u'institution') and paintingdict.get(u'invnum'): ciakey = (paintingdict.get(u'creator'), paintingdict.get(u'institution'), paintingdict.get(u'invnum')) if paintingdict.get(u'creator') and paintingdict.get( u'location') and paintingdict.get(u'invnum'): clakey = (paintingdict.get(u'creator'), paintingdict.get(u'location'), paintingdict.get(u'invnum')) if paintingdict.get(u'creator') and paintingdict.get( u'institution'): cikey = (paintingdict.get(u'creator'), paintingdict.get(u'institution')) if paintingdict.get(u'creator') and paintingdict.get(u'location'): clkey = (paintingdict.get(u'creator'), paintingdict.get(u'location')) if paintingdict.get(u'institution') and paintingdict.get( u'invnum'): iakey = (paintingdict.get(u'institution'), paintingdict.get(u'invnum')) if paintingdict.get(u'location') and paintingdict.get(u'invnum'): lakey = (paintingdict.get(u'location'), paintingdict.get(u'invnum')) if paintingdict.get(u'creator') and paintingdict.get(u'invnum'): cakey = (paintingdict.get(u'creator'), paintingdict.get(u'invnum')) if paintingdict.get(u'image'): self.wikidataImages[paintingdict.get(u'image')] = paintingdict self.wikidataWithImages[paintingdict.get( u'item')] = paintingdict if ciakey: if not ciakey in self.wikidataWithCIA: self.wikidataWithCIA[ciakey] = [] self.wikidataWithCIA[ciakey].append(paintingdict) if clakey and clakey != ciakey: if not clakey in self.wikidataWithCIA: self.wikidataWithCIA[clakey] = [] self.wikidataWithCIA[clakey].append(paintingdict) if cikey: if not cikey in self.wikidataWithCI: self.wikidataWithCI[cikey] = [] self.wikidataWithCI[cikey].append(paintingdict) if clkey and clkey != cikey: if not clkey in self.wikidataWithCI: self.wikidataWithCI[clkey] = [] self.wikidataWithCI[clkey].append(paintingdict) if iakey: if not iakey in self.wikidataWithIA: self.wikidataWithIA[iakey] = [] self.wikidataWithIA[iakey].append(paintingdict) if lakey and lakey != iakey: if not lakey in self.wikidataWithIA: self.wikidataWithIA[lakey] = [] self.wikidataWithIA[lakey].append(paintingdict) if cakey: if not cakey in self.wikidataWithCA: self.wikidataWithCA[cakey] = [] self.wikidataWithCA[cakey].append(paintingdict) else: self.wikidataNoImages[paintingdict.get(u'item')] = paintingdict if ciakey: if not ciakey in self.wikidataWithoutCIA: self.wikidataWithoutCIA[ciakey] = [] self.wikidataWithoutCIA[ciakey].append(paintingdict) if clakey and clakey != ciakey: if not clakey in self.wikidataWithoutCIA: self.wikidataWithoutCIA[clakey] = [] self.wikidataWithoutCIA[clakey].append(paintingdict) if cikey: if not cikey in self.wikidataWithoutCI: self.wikidataWithoutCI[cikey] = [] self.wikidataWithoutCI[cikey].append(paintingdict) if clkey and clkey != cikey: if not clkey in self.wikidataWithoutCI: self.wikidataWithoutCI[clkey] = [] self.wikidataWithoutCI[clkey].append(paintingdict) if iakey: if not iakey in self.wikidataWithoutIA: self.wikidataWithoutIA[iakey] = [] self.wikidataWithoutIA[iakey].append(paintingdict) if lakey and lakey != iakey: if not lakey in self.wikidataWithoutIA: self.wikidataWithoutIA[lakey] = [] self.wikidataWithoutIA[lakey].append(paintingdict) if cakey: if not cakey in self.wikidataWithoutCA: self.wikidataWithoutCA[cakey] = [] self.wikidataWithoutCA[cakey].append(paintingdict)
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ options = {} # what the bot should do (either resolve double redirs, or delete broken # redirs) action = None # where the bot should get his infos from (either None to load the # maintenance special page from the live wiki, or the filename of a # local XML dump file) xmlFilename = None # Which namespace should be processed when using a XML dump # default to -1 which means all namespaces will be processed namespaces = [] # at which redirect shall we start searching double redirects again # (only with dump); default to -1 which means all redirects are checked offset = -1 moved_pages = False fullscan = False start = '' until = '' number = None step = None pagename = None for arg in pywikibot.handle_args(args): if arg == 'double' or arg == 'do': action = 'double' elif arg == 'broken' or arg == 'br': action = 'broken' elif arg == 'both': action = 'both' elif arg == '-fullscan': fullscan = True elif arg.startswith('-xml'): if len(arg) == 4: xmlFilename = i18n.input('pywikibot-enter-xml-filename') else: xmlFilename = arg[5:] elif arg.startswith('-moves'): moved_pages = True elif arg.startswith('-namespace:'): ns = arg[11:] if ns == '': # "-namespace:" does NOT yield -namespace:0 further down the road! ns = i18n.input('pywikibot-enter-namespace-number') # TODO: at least for some generators enter a namespace by its name # or number if ns == '': ns = '0' try: ns = int(ns) except ValueError: # -namespace:all Process all namespaces. # Only works with the API read interface. pass if ns not in namespaces: namespaces.append(ns) elif arg.startswith('-offset:'): offset = int(arg[8:]) elif arg.startswith('-start:'): start = arg[7:] elif arg.startswith('-until:'): until = arg[7:] elif arg.startswith('-total:'): number = int(arg[7:]) elif arg.startswith('-step:'): step = int(arg[6:]) elif arg.startswith('-page:'): pagename = arg[6:] elif arg == '-always': options['always'] = True elif arg == '-delete': options['delete'] = True else: pywikibot.output(u'Unknown argument: %s' % arg) if ( not action or xmlFilename and moved_pages or fullscan and xmlFilename ): problems = [] if xmlFilename and moved_pages: problems += ['Either use a XML file or the moved pages from the API'] if xmlFilename and fullscan: problems += ['Either use a XML file or do a full scan using the API'] pywikibot.bot.suggest_help(additional_text='\n'.join(problems), missing_action=not action) else: pywikibot.Site().login() gen = RedirectGenerator(xmlFilename, namespaces, offset, moved_pages, fullscan, start, until, number, step, pagename) bot = RedirectRobot(action, gen, number=number, **options) bot.run()
def __init__(self): """Initializer.""" self.handle_args() if not self.namespaces and not self.doFailed: if not self.apfrom: # 0 should be after templates ns self.namespaces = [14, 10, 12, 0] else: self.namespaces = [0] if not self.aplimit: self.aplimit = 200 if self.links else 'max' if not self.doFailed: self.queryParams = { 'action': 'query', 'generator': 'allpages', 'gaplimit': self.aplimit, 'gapfilterredir': self.filterredir } else: self.queryParams = {'action': 'query'} if self.apfrom: pywikibot.output('Argument "-from" is ignored with "-failed"') propParam = 'info' if self.links: propParam += '|links|categories' self.queryParams['pllimit'] = 'max' self.queryParams['cllimit'] = 'max' self.queryParams['prop'] = propParam self.site = pywikibot.Site() if len(self.localSuspects) != len(self.latinSuspects): raise ValueError('Suspects must be the same size') if len(self.localKeyboard) != len(self.latinKeyboard): raise ValueError('Keyboard info must be the same size') if not os.path.isabs(self.wikilogfile): self.wikilogfile = pywikibot.config.datafilepath(self.wikilogfile) self.wikilog = self.OpenLogFile(self.wikilogfile) if not os.path.isabs(self.failedTitles): self.failedTitles = pywikibot.config.datafilepath( self.failedTitles) if self.doFailed: with codecs.open(self.failedTitles, 'r', 'utf-8') as f: self.titleList = [self.Page(t) for t in f] self.failedTitles += '.failed' iterzip = zip(self.localSuspects, self.latinSuspects) self.lclToLatDict = {ord(local): latin for local, latin in iterzip} self.latToLclDict = {ord(latin): local for local, latin in iterzip} if self.localKeyboard is not None: iterzip = zip(self.localKeyboard, self.latinKeyboard) self.lclToLatKeybDict = { ord(local): latin for local, latin in iterzip } self.latToLclKeybDict = { ord(latin): local for local, latin in iterzip } else: self.lclToLatKeybDict = {} self.latToLclKeybDict = {} badPtrnStr = '([{ascii}][{local}]|[{local}][{ascii}])'.format( ascii=ascii_letters, local=self.localLtr) self.badWordPtrn = re.compile( '[{ascii}{local}]*{bad}[{ascii}{local}]*'.format( ascii=ascii_letters, local=self.localLtr, bad=badPtrnStr)) self.get_whitelist()
def __init__(self): """ Build all the lookup tables to work on """ self.commons = pywikibot.Site(u'commons', u'commons') self.repo = pywikibot.Site().data_repository() self.commonsNoLink = [] # List of images without a link self.commonsWithoutCIA = { } # Creator, institution & accession number -> image self.commonsWithoutCI = {} # Creator & instution -> image self.commonsWithoutIA = {} # Institution & accession number -> image self.commonsWithoutCA = {} # Creator & accession number -> image self.commonsLink = { } # Dictionary of images with a wikidata link, file -> item self.commonsWithCIA = { } # Creator, institution & accession number -> image & item self.commonsWithCI = {} # Creator & instution -> image & item self.commonsWithIA = { } # Institution & accession number -> image & item self.commonsWithCA = {} # Creator & accession number -> image & item self.bettersuggestions = [] # List of images with better images self.wikidataNoImages = { } # Dictionary of items without images -> item & url self.wikidataWithoutCIA = { } # Creator, institution & accession number -> item & url self.wikidataWithoutCI = {} # Creator & instution -> item & url self.wikidataWithoutIA = { } # Institution & accession number -> item & url self.wikidataWithoutCA = {} # Creator & accession number -> item & url self.wikidataImages = { } # Dictionary of image on wikidata file -> item, image & url self.wikidataWithImages = { } # Dictionary of items with images -> item, image & url self.wikidataWithCIA = { } # Creator, institution & accession number -> item, image & url self.wikidataWithCI = {} # Creator & instution -> item, image & url self.wikidataWithIA = { } # Institution & accession number -> item, image & url self.wikidataWithCA = { } # Creator & accession number -> item, image & url self.categorysuggestions = [ ] # List of images to connect to Wikidata based on category self.getCommonsWithoutLookupTables() print 'self.commonsNoLink %s' % (len(self.commonsNoLink), ) print 'self.commonsWithoutCIA %s' % (len(self.commonsWithoutCIA), ) print 'self.commonsWithoutCI %s' % (len(self.commonsWithoutCI), ) print 'self.commonsWithoutIA %s' % (len(self.commonsWithoutIA), ) print 'self.commonsWithoutCA %s' % (len(self.commonsWithoutCA), ) self.getCommonsWithLookupTables() print 'self.commonsLink %s' % (len(self.commonsLink), ) print 'self.commonsWithCIA %s' % (len(self.commonsWithCIA), ) print 'self.commonsWithCI %s' % (len(self.commonsWithCI), ) print 'self.commonsWithIA %s' % (len(self.commonsWithIA), ) print 'self.commonsWithCA %s' % (len(self.commonsWithCA), ) self.getBetterImageSuggestions() print 'self.bettersuggestions %s' % (len(self.bettersuggestions), ) self.getWikidataLookupTables() print 'self.wikidataNoImages %s' % (len(self.wikidataNoImages), ) print 'self.wikidataWithoutCIA %s' % (len(self.wikidataWithoutCIA), ) print 'self.wikidataWithoutCI %s' % (len(self.wikidataWithoutCI), ) print 'self.wikidataWithoutIA %s' % (len(self.wikidataWithoutIA), ) print 'self.wikidataWithoutCA %s' % (len(self.wikidataWithoutCA), ) print 'self.wikidataImages %s' % (len(self.wikidataImages), ) print 'self.wikidataWithImages %s' % (len(self.wikidataWithImages), ) print 'self.wikidataWithCIA %s' % (len(self.wikidataWithCIA), ) print 'self.wikidataWithCI %s' % (len(self.wikidataWithCI), ) print 'self.wikidataWithIA %s' % (len(self.wikidataWithIA), ) print 'self.wikidataWithCA %s' % (len(self.wikidataWithCA), ) self.getCommonsCategorySuggestions() print 'self.categorysuggestions %s' % (len(self.categorysuggestions), )
import datetime import traceback import pywikibot from pywikibot.comms import http from pywikibot.pagegenerators import PrefixingPageGenerator try: input = raw_input except NameError: pass YEAR = datetime.datetime.now().year - 1 LAST = YEAR - 1 SITE = pywikibot.Site() def is_translation(page): url = "%s/index.php?title=%s" % (SITE.scriptpath(), page.title(asUrl=True)) return '"wgTranslatePageTranslation":"translation"' in http.request( SITE, url) def setup(src): # print(src) target = pywikibot.Page( SITE, src.title().replace(str(YEAR), str(YEAR + 1)).replace(str(LAST), str(YEAR))) # if target.exists(): return
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: unicode """ options = {} gen_options = {} # what the bot should do (either resolve double redirs, or process broken # redirs) action = None namespaces = set() source = set() for arg in pywikibot.handle_args(args): arg, sep, value = arg.partition(':') option = arg.partition('-')[2] # bot options if arg == 'do': action = 'double' elif arg == 'br': action = 'broken' elif arg in ('both', 'broken', 'double'): action = arg elif option in ('always', 'delete'): options[option] = True elif option == 'total': options[option] = gen_options[option] = int(value) elif option == 'sdtemplate': options['sdtemplate'] = value or pywikibot.input( 'Which speedy deletion template to use?') # generator options elif option in ('fullscan', 'moves'): gen_options[option] = True source.add(arg) elif option == 'xml': gen_options[option] = value or i18n.input( 'pywikibot-enter-xml-filename') source.add(arg) elif option == 'namespace': # "-namespace:" does NOT yield -namespace:0 further down the road! ns = value or i18n.input('pywikibot-enter-namespace-number') # TODO: at least for some generators enter a namespace by its name # or number if ns == '': ns = '0' try: ns = int(ns) except ValueError: # -namespace:all Process all namespaces. # Only works with the API read interface. pass else: namespaces.add(ns) elif option == 'offset': gen_options[option] = int(value) elif option in ('page', 'start', 'until'): gen_options[option] = value else: pywikibot.output('Unknown argument: ' + arg) if namespaces: gen_options['namespaces'] = namespaces if len(source) > 1: problem = 'You can only use one of {0} options.'.format( ' or '.join(source)) pywikibot.bot.suggest_help(additional_text=problem, missing_action=not action) return if not action: pywikibot.bot.suggest_help(missing_action=True) else: pywikibot.Site().login() options['generator'] = RedirectGenerator(action, **gen_options) bot = RedirectRobot(action, **options) bot.run()
# See https://en.wikiversity.org/wiki/Research_in_programming_Wikidata/Countries import pywikibot from pywikibot import pagegenerators # item is 'country' # https://query.wikidata.org/#%23List of countries in English and Russian%0ASELECT %3Fcountry %3Flabel_en %3Flabel_ru%0AWHERE%0A{%0A %3Fcountry wdt%3AP31 wd%3AQ6256.%0A %3Fcountry rdfs%3Alabel %3Flabel_en filter (lang(%3Flabel_en) %3D "en").%0A %3Fcountry rdfs%3Alabel %3Flabel_ru filter (lang(%3Flabel_ru) %3D "ru").%0A} query = 'SELECT ?item ?label_en ?label_ru ' + \ 'WHERE { ' + \ ' ?item wdt:P31 wd:Q6256.' + \ ' ?item rdfs:label ?label_en filter (lang(?label_en) = "en").' + \ ' ?item rdfs:label ?label_ru filter (lang(?label_ru) = "ru").' + \ '}' # LIMIT 3' wikidata_site = pywikibot.Site('wikidata', 'wikidata') generator = pagegenerators.WikidataSPARQLPageGenerator(query, site=wikidata_site) repo = wikidata_site.data_repository() mysql_string = """DROP TABLE IF EXISTS `countries` ; CREATE TABLE IF NOT EXISTS `countries` ( `id` INT NOT NULL, `name_en` VARCHAR(45) NULL, `name_ru` VARCHAR(45) NULL, PRIMARY KEY (`id`)) ENGINE = InnoDB; """
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ generator = None local_args = pywikibot.handle_args(args) site = pywikibot.Site() if site.code != 'commons' or site.family.name != 'commons': pywikibot.warning( 'This script is primarily written for Wikimedia ' 'Commons, but has been invoked with site {0}. It ' 'might work for other sites but there is no ' 'guarantee that it does the right thing.'.format(site)) choice = pywikibot.input_choice( 'How do you want to continue?', (('Continue using {0}'.format(site), 'c'), ('Switch to Wikimedia Commons', 's'), ('Quit', 'q')), automatic_quit=False) if choice == 's': site = pywikibot.Site('commons', 'commons') elif choice == 'q': return False genFactory = pagegenerators.GeneratorFactory(site) for arg in local_args: param_arg, sep, param_value = arg.partition(':') if param_value == '': param_value = None if arg.startswith('-yesterday'): generator = uploadedYesterday(site) issue_deprecation_warning( 'The usage of "-yesterday"', '-logevents:"upload,,YYYYMMDD,YYYYMMDD"', 2, ArgumentDeprecationWarning) elif arg.startswith('-recentchanges'): if param_value is None: arg = arg + ':120,70' issue_deprecation_warning('-recentchanges without parameters', '-recentchanges:offset,duration', 2, ArgumentDeprecationWarning) genFactory.handleArg(arg) else: genFactory.handleArg(arg) generator = genFactory.getCombinedGenerator(gen=generator, preload=True) if not generator: pywikibot.bot.suggest_help(missing_generator=True) return False else: site.login() for page in generator: pywikibot.output(page.title()) if page.exists() and (page.namespace() == 6) \ and (not page.isRedirectPage()): if isUncat(page): addUncat(page) return True
import itertools import math import json import acnutils as utils from string import Template from dataclasses import dataclass from datetime import datetime from typing import Optional, Tuple, Iterator, Iterable, cast, Dict, Union __version__ = "1.0" logger = utils.getInitLogger("essayassesment", level="VERBOSE", filename="essayimpact.log") site = pywikibot.Site("en", "wikipedia") session = requests.session() session.headers.update( {"User-Agent": toolforge.set_user_agent("anticompositebot")}) simulate = False @dataclass class Essay: page: pywikibot.Page links: Optional[int] = None watchers: Optional[int] = None views: Optional[int] = None score: Optional[float] = None def get_views_and_watchers(self) -> None:
#!/usr/bin/python3 import json import time import pywikibot from common import db_settings, utils conn = db_settings.con_ly() c = conn.cursor() sleep_second = 5 site = pywikibot.Site("zh", "wikipedia") repo = site.data_repository() ad = 9 election_years = [1989, 1992, 1995, 1998, 2001, 2004, 2008, 2012, 2016, 2020] election_title = '%d年立法委員選舉' % election_years[ad - 1] election_id = utils.get_qnumber(wikiarticle=election_title, lang="zh-tw") election_target = pywikibot.ItemPage(repo, election_id) term_id = utils.get_qnumber(wikiarticle="第%d屆立法委員" % ad, lang="zh-tw") term_target = pywikibot.ItemPage(repo, term_id) def person_qid_from_db(name): c.execute( ''' select lm.data->>'wikidata_qid' from legislator_legislatordetail l left join legislator_legislator lm on l.legislator_id = lm.uid where l.ad = %s and l.name = %s ''', [ad, name]) r = c.fetchone()
def run(self): """ Starts the robot. """ for metadata in self.generator: # Buh, for this one I know for sure it's in there #print metadata[u'id'] #print metadata[u'url'] # Do some url magic so that all url fields are always filled if not metadata.get('refurl'): metadata['refurl'] = metadata['url'] if not metadata.get('idrefurl'): metadata['idrefurl'] = metadata['refurl'] if not metadata.get('describedbyurl'): metadata['describedbyurl'] = metadata['url'] artworkItem = None newclaims = [] if metadata[u'id'] in self.artworkIds: artworkItemTitle = self.artworkIds.get(metadata[u'id']) print(artworkItemTitle) artworkItem = pywikibot.ItemPage(self.repo, title=artworkItemTitle) elif self.create: #Break for now #print u'Let us create stuff' #continue #print u'WTFTFTFTFT???' #print 'bla' data = { 'labels': {}, 'descriptions': {}, } # loop over stuff if metadata.get('title'): for lang, label in metadata['title'].items(): data['labels'][lang] = { 'language': lang, 'value': label } if metadata.get('description'): for lang, description in metadata['description'].items(): data['descriptions'][lang] = { 'language': lang, 'value': description } identification = {} summary = u'Creating new item with data from %s ' % ( metadata[u'url'], ) pywikibot.output(summary) try: result = self.repo.editEntity(identification, data, summary=summary) except pywikibot.exceptions.APIError: ## TODO: Check if this is pywikibot.OtherPageSaveError too ## We got ourselves a duplicate label and description, let's correct that by adding collection and the id pywikibot.output( u'Oops, already had that one. Trying again') for lang, description in metadata['description'].items(): data['descriptions'][lang] = { 'language': lang, 'value': u'%s (%s %s)' % ( description, metadata['inception'], metadata['id'], ) } result = self.repo.editEntity(identification, data, summary=summary) pass # Crash here artworkItemTitle = result.get(u'entity').get('id') # Wikidata is sometimes lagging. Wait for 10 seconds before trying to actually use the item time.sleep(10) artworkItem = pywikibot.ItemPage(self.repo, title=artworkItemTitle) # Add to self.artworkIds so that we don't create dupes self.artworkIds[metadata[u'id']] = artworkItemTitle # Add the id to the item so we can get back to it later newclaim = pywikibot.Claim(self.repo, self.idProperty) newclaim.setTarget(metadata[u'id']) pywikibot.output('Adding new id claim to %s' % artworkItem) artworkItem.addClaim(newclaim) #self.addReference(artworkItem, newclaim, metadata[u'idrefurl']) #newqualifier = pywikibot.Claim(self.repo, u'P195') #Add collection, isQualifier=True #newqualifier.setTarget(self.collectionitem) #pywikibot.output('Adding new qualifier claim to %s' % artworkItem) #newclaim.addQualifier(newqualifier) #collectionclaim = pywikibot.Claim(self.repo, u'P195') #collectionclaim.setTarget(self.collectionitem) #pywikibot.output('Adding collection claim to %s' % artworkItem) #artworkItem.addClaim(collectionclaim) ## Add the date they got it as a qualifier to the collection #if metadata.get(u'acquisitiondate'): # if type(metadata[u'acquisitiondate']) is int or (len(metadata[u'acquisitiondate'])==4 and \ # metadata[u'acquisitiondate'].isnumeric()): # It's a year # acdate = pywikibot.WbTime(year=metadata[u'acquisitiondate']) # colqualifier = pywikibot.Claim(self.repo, u'P580') # colqualifier.setTarget(acdate) # pywikibot.output('Adding new acquisition date qualifier claim to collection on %s' % artworkItem) # collectionclaim.addQualifier(colqualifier) ## FIXME: Still have to rewrite this part ''' if metadata.get(u'acquisitiondate'): colqualifier = pywikibot.Claim(self.repo, u'P580') acdate = None if len(painting[u'acquisitiondate'])==4 and painting[u'acquisitiondate'].isnumeric(): # It's a year acdate = pywikibot.WbTime(year=painting[u'acquisitiondate']) elif len(painting[u'acquisitiondate'].split(u'-', 2))==3: (acday, acmonth, acyear) = painting[u'acquisitiondate'].split(u'-', 2) acdate = pywikibot.WbTime(year=int(acyear), month=int(acmonth), day=int(acday)) if acdate: colqualifier.setTarget(acdate) ''' #self.addReference(artworkItem, collectionclaim, metadata[u'refurl']) if artworkItem and artworkItem.exists(): metadata['wikidata'] = artworkItem.title() data = artworkItem.get() claims = data.get('claims') # Add missing labels # FIXME: Move to a function # FIXME Do something with aliases too labels = data.get('labels') if metadata.get('title'): labelschanged = False for lang, label in metadata['title'].items(): if lang not in labels: labels[lang] = label labelschanged = True if labelschanged: summary = u'Adding missing label(s) from %s' % ( metadata.get(u'refurl'), ) try: artworkItem.editLabels(labels, summary=summary) except pywikibot.OtherPageSaveError: # Just skip it for no pywikibot.output( u'Oops, already had that label/description combination. Skipping' ) pass """ # Add missing descriptions # FIXME Move to a function descriptions = copy.deepcopy(data.get('descriptions')) if metadata.get('description'): descriptionschanged = False for lang, description in metadata['description'].items(): if lang not in descriptions: descriptions[lang] = description descriptionschanged = True if descriptionschanged: summary = u'Adding missing description(s) from %s' % (metadata.get(u'refurl'),) try: artworkItem.editDescriptions(descriptions, summary=summary) except pywikibot.exceptions.OtherPageSaveError: # pywikibot.exceptions.APIError: # We got ourselves a duplicate label and description, let's correct that by adding collection and the id descriptions = copy.deepcopy(data.get('descriptions')) pywikibot.output(u'Oops, already had that label/description combination. Trying again') for lang, description in metadata['description'].items(): if lang not in descriptions: descriptions[lang] = u'%s (%s %s)' % (description, metadata['collectionshort'], metadata['id'],) artworkItem.editDescriptions(descriptions, summary=summary) pass #print claims """ # instance of self.addItemStatement(artworkItem, u'P31', metadata.get(u'instanceofqid'), metadata.get(u'refurl')) # country self.addItemStatement(artworkItem, u'P17', metadata.get(u'countryqid'), metadata.get(u'refurl')) # adminlocation self.addItemStatement(artworkItem, u'P131', metadata.get(u'adminlocationqid'), metadata.get(u'refurl')) # location self.addItemStatement(artworkItem, u'P276', metadata.get(u'locationqid'), metadata.get(u'refurl')) # creator self.addItemStatement(artworkItem, u'P170', metadata.get(u'creatorqid'), metadata.get(u'refurl')) # genre self.addItemStatement(artworkItem, u'P136', metadata.get(u'genreqid'), metadata.get(u'refurl')) # Inception if u'P571' not in claims and metadata.get(u'inception'): if type(metadata[u'inception']) is int or (len(metadata[u'inception'])==4 and \ metadata[u'inception'].isnumeric()): # It's a year newdate = pywikibot.WbTime(year=metadata[u'inception']) newclaim = pywikibot.Claim(self.repo, u'P571') newclaim.setTarget(newdate) pywikibot.output( 'Adding date of creation claim to %s' % artworkItem) artworkItem.addClaim(newclaim) self.addReference(artworkItem, newclaim, metadata[u'refurl']) # TODO: Implement circa if metadata.get('image') and u'P18' not in claims: print u'no image found' # Construct newclaim = pywikibot.Claim(self.repo, u'P18') commonssite = pywikibot.Site("commons", "commons") imagelink = pywikibot.Link(metadata.get('image'), source=commonssite, defaultNamespace=6) image = pywikibot.ImagePage(imagelink) if image.isRedirectPage(): image = pywikibot.ImagePage(image.getRedirectTarget()) if not image.exists(): pywikibot.output( '[[%s]] doesn\'t exist so I can\'t link to it' % (image.title(), )) else: newclaim.setTarget(image) pywikibot.output( 'Adding %s --> %s' % (newclaim.getID(), newclaim.getTarget())) artworkItem.addClaim(newclaim) if metadata.get('commonscat') and u'P373' not in claims: print u'no image found' # Construct newclaim = pywikibot.Claim(self.repo, u'P373') commonssite = pywikibot.Site("commons", "commons") commonslink = pywikibot.Link(metadata.get('commonscat'), source=commonssite, defaultNamespace=14) commonscat = pywikibot.Page(commonslink) if commonscat.isRedirectPage(): commonscat = pywikibot.Page( commonscat.getRedirectTarget()) if not commonscat.exists(): pywikibot.output( '[[%s]] doesn\'t exist so I can\'t link to it' % (commonscat.title(), )) else: newclaim.setTarget( commonscat.title(withNamespace=False)) pywikibot.output( 'Adding %s --> %s' % (newclaim.getID(), newclaim.getTarget())) artworkItem.addClaim(newclaim) if metadata.get('lat') and metadata.get( 'lon') and u'P625' not in claims: print u'no coordinates found' # Build coordinates and add them coordinate = pywikibot.Coordinate(metadata.get('lat'), metadata.get('lon'), dim=100) newclaim = pywikibot.Claim(self.repo, u'P625') newclaim.setTarget(coordinate) pywikibot.output( u'Adding %s, %s to %s' % (coordinate.lat, coordinate.lon, artworkItem.title())) artworkItem.addClaim(newclaim)
# (C) Pywikibot team, 2008-2014 # # Distributed under the terms of the MIT license. # __version__ = '$Id: a8b52e17391069295e9ae0f9a22c3c57117e5340 $' # import os import pywikibot from pywikibot import pagegenerators from pywikibot.data.api import APIError import json from tests.utils import PywikibotTestCase, unittest site = pywikibot.Site('en', 'wikipedia') mainpage = pywikibot.Page(pywikibot.page.Link("Main Page", site)) wikidata = site.data_repository() # fetch a page which is very likely to be unconnected, which doesnt have # a generator, and unit tests may be used to test old versions of pywikibot def get_test_unconnected_page(site): gen = pagegenerators.NewpagesPageGenerator(site=site, total=1) return next(gen) class TestGeneral(PywikibotTestCase): def testWikibase(self): if not site.has_transcluded_data:
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ for arg in pywikibot.handle_args(args): arg, sep, val = arg.partition(':') if arg == '-edit': globalvar.attachEditCount = int(val or pywikibot.input( 'After how many edits would you like to welcome new users? ' '(0 is allowed)')) elif arg == '-timeoffset': globalvar.timeoffset = int(val or pywikibot.input( 'Which time offset (in minutes) for new users would you like ' 'to use?')) elif arg == '-time': globalvar.timeRecur = int(val or pywikibot.input( 'For how many seconds would you like to bot to sleep before ' 'checking again?')) elif arg == '-offset': if not val: val = pywikibot.input( 'Which time offset for new users would you like to use? ' '(yyyymmddhhmmss)') try: globalvar.offset = pywikibot.Timestamp.fromtimestampformat(val) except ValueError: # upon request, we might want to check for software version here raise ValueError( "Mediawiki has changed, -offset:# is not supported " "anymore, but -offset:TIMESTAMP is, assuming TIMESTAMP " "is yyyymmddhhmmss. -timeoffset is now also supported. " "Please read this script source header for documentation.") elif arg == '-file': globalvar.randomSign = True globalvar.signFileName = val or pywikibot.input( 'Where have you saved your signatures?') elif arg == '-sign': globalvar.defaultSign = val or pywikibot.input( 'Which signature to use?') globalvar.defaultSign += timeselected elif arg == '-break': globalvar.recursive = False elif arg == '-nlog': globalvar.makeWelcomeLog = False elif arg == '-ask': globalvar.confirm = True elif arg == '-filter': globalvar.filtBadName = True elif arg == '-savedata': globalvar.saveSignIndex = True elif arg == '-random': globalvar.randomSign = True elif arg == '-sul': globalvar.welcomeAuto = True elif arg == '-limit': globalvar.queryLimit = int(val or pywikibot.input( u'How many of the latest new users would you like to load?')) elif arg == '-numberlog': globalvar.dumpToLog = int(val or pywikibot.input( 'After how many welcomed users would you like to update the ' 'welcome log?')) elif arg == '-quiet': globalvar.quiet = True elif arg == '-quick': issue_deprecation_warning('The usage of "-quick" option', None, 2) # Filename and Pywikibot path # file where is stored the random signature index filename = pywikibot.config.datafilepath( 'welcome-%s-%s.data' % (pywikibot.Site().family.name, pywikibot.Site().code)) if globalvar.offset and globalvar.timeoffset: pywikibot.warning( 'both -offset and -timeoffset were provided, ignoring -offset') globalvar.offset = 0 try: bot = WelcomeBot() except KeyError as error: # site not managed by welcome.py pywikibot.bot.suggest_help(exception=error) return False try: bot.run() except KeyboardInterrupt: if bot.welcomed_users: showStatus() pywikibot.output("Put welcomed users before quit...") bot.makelogpage(bot.welcomed_users) pywikibot.output("\nQuitting...") finally: # If there is the savedata, the script must save the number_user. if globalvar.randomSign and globalvar.saveSignIndex and \ bot.welcomed_users: if sys.version_info[0] > 2: import pickle as cPickle else: import cPickle with open(filename, 'wb') as f: cPickle.dump(bot.welcomed_users, f, protocol=config.pickle_protocol)
paraules = { # Add the word using regexp and next to it, the word without regexp (see the example). You can add an infinite amount of words. u' ([Pp]ag(e|es))( |\.|\,)': u'Page', } def main(page, paraula): if page.title() == paraules[paraula]: print(u"Skipping %s because is the link page" % str(page)) return substitucio = ' [[' + paraules[paraula] + '|\\1]]\\3' #site = pwb.Site() #page = pwb.Page(site, page) print(u"Page: %s" % page) text = page.text comptador = re.search(paraula, text) if comptador: print("======= EDITING PAGE %s! =======" % page) noutext = re.sub(paraula, substitucio, text) page.text = noutext page.save(u'Bot: Adding links for %s' % paraules[paraula]) else: print("No links added") return if __name__ == '__main__': allpages = pg.AllpagesPageGenerator(site=pwb.Site(), start="!", namespace=0, includeredirects = True) pages = pg.PreloadingGenerator(allpages, pageNumber = 100) for page in pages: for paraula in paraules: main(page, paraula) print("\nFinished!")
def get(site=None): """Load the watchlist, fetching it if necessary.""" if site is None: site = pywikibot.Site() watchlist = [p.title() for p in site.watched_pages()] return watchlist
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ password = None sysop = False logall = False logout = False oauth = False autocreate = False unknown_args = [] for arg in pywikibot.handle_args(args): if arg.startswith("-pass"): if len(arg) == 5: password = pywikibot.input( 'Password for all accounts (no characters will be shown):', password=True) else: password = arg[6:] elif arg == "-sysop": sysop = True elif arg == "-all": logall = True elif arg == "-force": pywikibot.output( u"To force a re-login, please delete the revelant " u"lines from '%s' (or the entire file) and try again." % join(config.base_dir, 'pywikibot.lwp')) elif arg == "-logout": logout = True elif arg == '-oauth': oauth = True elif arg == '-autocreate': autocreate = True else: unknown_args += [arg] if unknown_args: pywikibot.bot.suggest_help(unknown_parameters=unknown_args) return False if password is not None: pywikibot.warning('The -pass argument is not implemented yet. See: ' 'https://phabricator.wikimedia.org/T102477') if logall: if sysop and not oauth: namedict = config.sysopnames else: namedict = config.usernames else: site = pywikibot.Site() namedict = {site.family.name: {site.code: None}} for family_name in namedict: for lang in namedict[family_name]: try: site = pywikibot.Site(code=lang, fam=family_name) if oauth: _oauth_login(site) continue if logout: site.logout() else: site.login(sysop, autocreate=autocreate) user = site.user() if user: pywikibot.output('Logged in on {0} as {1}.'.format( site, user)) else: if logout: pywikibot.output('Logged out of {0}.'.format(site)) else: pywikibot.output('Not logged in on {0}.'.format(site)) except SiteDefinitionError: pywikibot.output(u'%s.%s is not a valid site, please remove it' ' from your config' % (lang, family_name))
# for more information see [[fa:ویکیپدیا:درخواستهای ربات/رده همسنگ]] and [[fa:ویکیپدیا:ردهدهی مقالات همسنگ]] from pywikibot import config from pywikibot import pagegenerators import re import sys import durusti_core import pywikibot import codecs import string import time import MySQLdb _cache = {} page_list_run = [] #-----------------------------------------------version----------------------------------------- fa_site = pywikibot.Site('sd', 'wikipedia') en_site = pywikibot.Site('en', 'wikipedia') versionpage = pywikibot.Page(fa_site, u'صارف:ZumrahBot/مساوی زمرہ جات/نسخہ') lastversion = versionpage.get().strip() version = u'30' new_edition = u'1' if lastversion != version: pywikibot.output( u"\03{lightred}Your bot dosen't use the last verion please update me!\03{default}" ) pywikibot.stopme() sys.exit() #----------------------------------------------------------------------------------------------- def namespacefinder(enlink, site):
def __init__(self): """Initializer with arg parsing.""" for arg in pywikibot.handle_args(): arg, sep, value = arg.partition(':') if arg == '-from': self.apfrom = value or pywikibot.input( 'Which page to start from: ') elif arg == '-reqsize': self.aplimit = int(value) elif arg == '-links': self.links = True elif arg == '-linksonly': self.links = True self.titles = False elif arg == '-replace': self.replace = True elif arg == '-redir': self.filterredir = 'all' elif arg == '-redironly': self.filterredir = 'redirects' elif arg == '-limit': self.stopAfter = int(value) elif arg in ('-autonomous', '-a'): self.autonomous = True elif arg == '-ns': self.namespaces.append(int(value)) elif arg == '-wikilog': self.wikilogfile = value elif arg == '-failedlog': self.failedTitles = value elif arg == '-failed': self.doFailed = True else: pywikibot.output(u'Unknown argument %s.' % arg) pywikibot.showHelp() sys.exit() if self.namespaces == [] and not self.doFailed: if self.apfrom == u'': # 0 should be after templates ns self.namespaces = [14, 10, 12, 0] else: self.namespaces = [0] if self.aplimit is None: self.aplimit = 200 if self.links else 'max' if not self.doFailed: self.queryParams = { 'action': 'query', 'generator': 'allpages', 'gaplimit': self.aplimit, 'gapfilterredir': self.filterredir } else: self.queryParams = {'action': 'query'} if self.apfrom != u'': pywikibot.output(u'Argument "-from" is ignored with "-failed"') propParam = 'info' if self.links: propParam += '|links|categories' self.queryParams['pllimit'] = 'max' self.queryParams['cllimit'] = 'max' self.queryParams['prop'] = propParam self.site = pywikibot.Site() if len(self.localSuspects) != len(self.latinSuspects): raise ValueError(u'Suspects must be the same size') if len(self.localKeyboard) != len(self.latinKeyboard): raise ValueError(u'Keyboard info must be the same size') if not os.path.isabs(self.wikilogfile): self.wikilogfile = pywikibot.config.datafilepath(self.wikilogfile) self.wikilog = self.OpenLogFile(self.wikilogfile) if not os.path.isabs(self.failedTitles): self.failedTitles = pywikibot.config.datafilepath( self.failedTitles) if self.doFailed: with codecs.open(self.failedTitles, 'r', 'utf-8') as f: self.titleList = [self.Page(t) for t in f] self.failedTitles += '.failed' ziplist = zip(self.localSuspects, self.latinSuspects) self.lclToLatDict = {ord(local): latin for local, latin in ziplist} self.latToLclDict = {ord(latin): local for local, latin in ziplist} if self.localKeyboard is not None: ziplist = zip(self.localKeyboard, self.latinKeyboard) self.lclToLatKeybDict = { ord(local): latin for local, latin in ziplist } self.latToLclKeybDict = { ord(latin): local for local, latin in ziplist } else: self.lclToLatKeybDict = {} self.latToLclKeybDict = {} badPtrnStr = u'([%s][%s]|[%s][%s])' \ % (ascii_letters, self.localLtr, self.localLtr, ascii_letters) self.badWordPtrn = re.compile( u'[%s%s]*%s[%s%s]*' % (ascii_letters, self.localLtr, badPtrnStr, ascii_letters, self.localLtr)) # Get whitelist self.knownWords = set() self.seenUnresolvedLinks = set() # TODO: handle "continue" if self.site.code in self.whitelists: wlpage = self.whitelists[self.site.code] pywikibot.output(u'Loading whitelist from %s' % wlpage) wlparams = { 'action': 'query', 'prop': 'links', 'titles': wlpage, 'redirects': '', 'indexpageids': '', 'pllimit': 'max', } req = api.Request(site=self.site, parameters=wlparams) data = req.submit() if len(data['query']['pageids']) == 1: pageid = data['query']['pageids'][0] links = data['query']['pages'][pageid]['links'] allWords = [ nn for n in links for nn in self.FindBadWords(n['title']) ] self.knownWords = set(allWords) else: raise ValueError(u'The number of pageids is not 1') pywikibot.output(u'Loaded whitelist with %i items' % len(self.knownWords)) if len(self.knownWords) > 0: pywikibot.log(u'Whitelist: %s' % u', '.join( [self.MakeLink(i, False) for i in self.knownWords])) else: pywikibot.output(u'Whitelist is not known for language %s' % self.site.code)
def encatlist(encat): count = 0 listenpageTitle = [] encat = encat.replace(u'[[', u'').replace(u']]', u'').replace( u'Category:', u'').replace(u'category:', u'').strip() language = 'en' encat = pywikibot.Category(pywikibot.Site(language), encat) listacategory = [encat] for enpageTitle in listacategory: try: fapages = pagefafinder(enpageTitle) if fapages is not False: for pages, profix_fa in fapages: if profix_fa == '14': pages = u'Category:' + unicode(pages, 'UTF-8') elif profix_fa == '12': pages = u'Help:' + unicode(pages, 'UTF-8') elif profix_fa == '10': pages = u'Template:' + unicode(pages, 'UTF-8') elif profix_fa == '6': pages = u'File:' + unicode(pages, 'UTF-8') elif profix_fa == '4': pages = u'Wikipedia:' + unicode(pages, 'UTF-8') elif profix_fa == '100': pages = u'Portal:' + unicode(pages, 'UTF-8') elif profix_fa in [ '1', '2', '3', '5', '7', '8', '9', '11', '13', '15', '101', '103', '118', '119', '446', '447', '828', '829' ]: continue else: pages = unicode(pages, 'UTF-8') pywikibot.output(u'\03{lightgreen}Adding ' + pages + u' to fapage lists\03{default}') listenpageTitle.append(pages) except: try: enpageTitle = unicode( str(enpageTitle), 'UTF-8').split(u'|')[0].split(u']]')[0].replace( u'[[', u'').strip() except: enpageTitle = enpageTitle.split(u'|')[0].split( u']]')[0].replace(u'[[', u'').strip() cat = pywikibot.Category(pywikibot.Site(language), enpageTitle) gent = pagegenerators.CategorizedPageGenerator(cat) for pagework in gent: count += 1 try: link = str(pagework).split(u'|')[0].split( u']]')[0].replace(u'[[', u'').strip() except: pagework = unicode(str(pagework), 'UTF-8') link = pagework.split(u'|')[0].split(u']]')[0].replace( u'[[', u'').strip() pywikibot.output(link) fapagetitle = englishdictionry(link, en_site, fa_site) if fapagetitle is False: continue else: pywikibot.output(u'\03{lightgreen}Adding ' + fapagetitle + u' to fapage lists\03{default}') listenpageTitle.append(fapagetitle) if listenpageTitle == []: return False, False return listenpageTitle, listacategory
# -*- coding: utf-8 -*- import os import sys import json from config import config_page_name # pylint: disable=E0611,W0614 os.environ['PYWIKIBOT_DIR'] = os.path.dirname(os.path.realpath(__file__)) os.environ['TZ'] = 'UTC' import pywikibot if len(sys.argv) < 2: exit("no pagename provided.\n") site = pywikibot.Site() site.login() config_page = pywikibot.Page(site, config_page_name) cfg = config_page.text cfg = json.loads(cfg)["G15_4"] print(json.dumps(cfg, indent=4, ensure_ascii=False)) if not cfg["enable"]: exit("disabled\n") pagename = sys.argv[1] mainpage = pywikibot.Page(site, pagename) if mainpage.isTalkPage(): talkpage = mainpage mainpage = talkpage.toggleTalkPage()
def save(self, title, contents): """Upload page content.""" mysite = pywikibot.Site() page = pywikibot.Page(mysite, title) self.current_page = page if self.getOption('summary'): comment = self.getOption('summary') else: comment = i18n.twtranslate(mysite, 'pagefromfile-msg') comment_top = comment + " - " + i18n.twtranslate( mysite, 'pagefromfile-msg_top') comment_bottom = comment + " - " + i18n.twtranslate( mysite, 'pagefromfile-msg_bottom') comment_force = "%s *** %s ***" % ( comment, i18n.twtranslate(mysite, 'pagefromfile-msg_force')) # Remove trailing newlines (cause troubles when creating redirects) contents = re.sub('^[\r\n]*', '', contents) if page.exists(): if not self.getOption('redirect') and page.isRedirectPage(): pywikibot.output(u"Page %s is redirect, skipping!" % title) return pagecontents = page.get(get_redirect=True) nocontent = self.getOption('nocontent') if nocontent and (nocontent in pagecontents or nocontent.lower() in pagecontents): pywikibot.output('Page has %s so it is skipped' % nocontent) return if self.getOption('append'): separator = self.getOption('append')[1] if separator == r'\n': separator = '\n' if self.getOption('append')[0] == 'top': above, below = contents, pagecontents comment = comment_top else: above, below = pagecontents, contents comment = comment_bottom pywikibot.output( 'Page {0} already exists, appending on {1}!'.format( title, self.getOption('append')[0])) contents = above + separator + below elif self.getOption('force'): pywikibot.output(u"Page %s already exists, ***overwriting!" % title) comment = comment_force else: pywikibot.output(u"Page %s already exists, not adding!" % title) return else: if self.getOption('autosummary'): comment = '' config.default_edit_summary = '' self.userPut(page, page.text, contents, summary=comment, minor=self.getOption('minor'), show_diff=self.getOption('showdiff'), ignore_save_related_errors=True)
# time.sleep(seconds) pywikibot.throttle.Throttle.wait = wait # # if len(sys.argv) == 1: # # raise ValueError('Missing input CSV file') # # csv_path = sys.argv[1] # # csv_file = open(csv_path,'r') # # csv_reader = csv.DictReader(csv_file) # # If you changed the name of the site to something else make sure to change it here site = pywikibot.Site('ldwg', 'ldwg') site.login() repo = site.data_repository() some_labels = {"en": "Clifford B. Anderson"} new_item = pywikibot.ItemPage(repo) new_item.editLabels(labels=some_labels, summary="Setting labels") claim = pywikibot.Claim(repo, u'P4') # employer target = pywikibot.ItemPage(repo, u"Q3") # Vanderbilt University claim.setTarget(target) new_item.addClaim(claim, summary=u'Adding employer claim') claim = pywikibot.Claim(repo, u'P6') # instance of target = pywikibot.ItemPage(repo, u"Q5") # human
def main(): wdsite = pywikibot.Site('wikidata', 'wikidata') repo = wdsite.data_repository() langs = ['en', 'fr', 'de'] for lang in langs: wikisite = pywikibot.Site(lang, 'wikipedia') total = 100 if len(sys.argv) >= 2: total = int(sys.argv[1]) gen = pagegenerators.NewpagesPageGenerator(site=wikisite, namespaces=[0], total=total) #cat = pywikibot.Category(wikisite, 'Category:Articles without Wikidata item') #gen = pagegenerators.CategorizedPageGenerator(cat, recurse=False) pre = pagegenerators.PreloadingGenerator(gen, groupsize=50) for page in pre: if page.isRedirectPage(): continue if not pageIsBiography(page=page, lang=lang): continue print('\n==', page.title().encode('utf-8'), '==') gender = calculateGender(page=page, lang=lang) item = '' try: item = pywikibot.ItemPage.fromPage(page) except: pass if item: print('Page has item') print('https://www.wikidata.org/wiki/%s' % (item.title())) addBiographyClaims(repo=repo, wikisite=wikisite, item=item, page=page, lang=lang) else: print('Page without item') #search for a valid item, otherwise create if authorIsNewbie(page=page, lang=lang): print("Newbie author, checking quality...") if pageIsRubbish(page=page, lang=lang) or \ (not pageCategories(page=page, lang=lang)) or \ (not pageReferences(page=page, lang=lang)) or \ (not len(list(page.getReferences(namespaces=[0])))): print("Page didnt pass minimum quality, skiping") continue print(page.title().encode('utf-8'), 'need item', gender) wtitle = page.title() wtitle_ = wtitle.split('(')[0].strip() searchitemurl = 'https://www.wikidata.org/w/api.php?action=wbsearchentities&search=%s&language=%s&format=xml' % ( urllib.parse.quote(wtitle_), lang) raw = getURL(searchitemurl) print(searchitemurl.encode('utf-8')) #check birthdate and if it matches, then add data numcandidates = '' #do not set to zero if not '<search />' in raw: m = re.findall(r'id="(Q\d+)"', raw) numcandidates = len(m) print("Found %s candidates" % (numcandidates)) if numcandidates > 5: #too many candidates, skiping print("Too many, skiping") continue for itemfoundq in m: itemfound = pywikibot.ItemPage(repo, itemfoundq) itemfound.get() if ('%swiki' % (lang)) in itemfound.sitelinks: print("Candidate %s has sitelink, skiping" % (itemfoundq)) numcandidates -= 1 continue pagebirthyear = calculateBirthDate(page=page, lang=lang) pagebirthyear = pagebirthyear and int( pagebirthyear.split('-')[0]) or '' if not pagebirthyear: print("Page doesnt have birthdate, skiping") break #break, dont continue. Without birthdate we cant decide correctly if 'P569' in itemfound.claims and itemfound.claims[ 'P569'][0].getTarget().precision in [ 9, 10, 11 ]: #https://www.wikidata.org/wiki/Help:Dates#Precision itemfoundbirthyear = int( itemfound.claims['P569'][0].getTarget().year) print( "candidate birthdate = %s, page birthdate = %s" % (itemfoundbirthyear, pagebirthyear)) mindatelen = 4 if len(str( itemfoundbirthyear)) != mindatelen or len( str(pagebirthyear)) != mindatelen: print("%s birthdate length != %s" % (itemfoundq, mindatelen)) continue #reduce candidates if birthyear are different minyeardiff = 3 if itemfoundbirthyear >= pagebirthyear + minyeardiff or itemfoundbirthyear <= pagebirthyear - minyeardiff: print( "Candidate %s birthdate out of range, skiping" % (itemfoundq)) numcandidates -= 1 continue #but only assume it is the same person if birthyears match if itemfoundbirthyear == pagebirthyear: print( '%s birthyear found in candidate %s. Category:%s births found in page. OK!' % (itemfoundbirthyear, itemfoundq, itemfoundbirthyear)) print('Adding sitelink %s:%s' % (lang, page.title().encode('utf-8'))) try: itemfound.setSitelink( page, summary= 'BOT - Adding 1 sitelink: [[:%s:%s|%s]] (%s)' % (lang, page.title(), page.title(), lang)) except: print("Error adding sitelink. Skiping.") break addBiographyClaims(repo=repo, wikisite=wikisite, item=itemfound, page=page, lang=lang) break #no item found, or no candidates are useful if '<search />' in raw or (numcandidates == 0): print('No useful item found. Creating a new one...') #create item newitemlabels = {lang: wtitle_} newitem = pywikibot.ItemPage(repo) newitem.editLabels( labels=newitemlabels, summary= "BOT - Creating item for [[:%s:%s|%s]] (%s): %s %s" % (lang, wtitle, wtitle, lang, 'human', gender)) newitem.get() try: newitem.setSitelink( page, summary= 'BOT - Adding 1 sitelink: [[:%s:%s|%s]] (%s)' % (lang, page.title(), page.title(), lang)) except: print("Error adding sitelink. Skiping.") break addBiographyClaims(repo=repo, wikisite=wikisite, item=newitem, page=page, lang=lang)