def main(*args): """ Main function. Grab a generator and pass it to the bot to work on """ repo = pywikibot.Site("wikidata", "wikidata").data_repository() CherQu = u"""SELECT ?item WHERE { ?item wdt:P758 _:b1 ; wdt:P18 []. }""" logoQu = u"""SELECT DISTINCT ?item WHERE { ?item p:P154 ?statement. ?statement ps:P154 ?image. ?statement pq:P2096 ?media. }""" humanQu = u"""SELECT DISTINCT ?item WHERE { ?item wdt:P31 wd:Q5. ?item p:P18 ?statement. ?statement ps:P18 ?image. ?statement pq:P2096 ?media. }""" nonHumanQu = u"""SELECT DISTINCT ?item WHERE { ?item p:P18 ?statement. ?statement ps:P18 ?image. ?statement pq:P2096 ?media. MINUS { ?item wdt:P31 wd:Q5. } }""" personGen = pagegenerators.PreloadingEntityGenerator( pagegenerators.WikidataSPARQLPageGenerator(CherQu, site=repo)) count = 0 for item in personGen: if item.isRedirectPage(): pywikibot.output('{0} is a redirect page. Skipping.'.format(item)) continue item.get() #Get the item dictionary value = {} val = None pageid = None print('--> ' + item.getID() + ': ') for claim in item.claims['P18']: value[str(claim.getTarget().pageid)] = {'lang': [], 'text': []} if 'P2096' in claim.qualifiers: for qual in claim.qualifiers['P2096']: try: value[str(claim.getTarget().pageid)]['lang'].append( qual.getTarget().language) value[str(claim.getTarget().pageid)]['text'].append( regex(qual.getTarget().text)) except: continue if value: runOnCommons(value, item.getID()) count += 1 for pageid in value: for (lang), (text) in zip(value[pageid]['lang'], value[pageid]['text']): print('--> {}; {}: {}'.format(pageid, lang, text)) if count == 50: pywikibot.output('Sleep for 60 sec. ...') time.sleep(60)
def custom_generator(self): kwargs = {'class': self.getOption('class')} query = self.store.build_query('duos', **kwargs) return pagegenerators.WikidataSPARQLPageGenerator(query, site=self.repo)
def custom_generator(self): query = self.store.build_query('mixed_claims', limit=self.opt['limit']) return pagegenerators.WikidataSPARQLPageGenerator(query, site=self.repo)
# See https://en.wikiversity.org/wiki/Research_in_programming_Wikidata/Countries import pywikibot from pywikibot import pagegenerators # item is 'country' # https://query.wikidata.org/#%23List of countries in English and Russian%0ASELECT %3Fcountry %3Flabel_en %3Flabel_ru%0AWHERE%0A{%0A %3Fcountry wdt%3AP31 wd%3AQ6256.%0A %3Fcountry rdfs%3Alabel %3Flabel_en filter (lang(%3Flabel_en) %3D "en").%0A %3Fcountry rdfs%3Alabel %3Flabel_ru filter (lang(%3Flabel_ru) %3D "ru").%0A} query = 'SELECT ?item ?label_en ?label_ru ' + \ 'WHERE { ' + \ ' ?item wdt:P31 wd:Q6256.' + \ ' ?item rdfs:label ?label_en filter (lang(?label_en) = "en").' + \ ' ?item rdfs:label ?label_ru filter (lang(?label_ru) = "ru").' + \ '}' # LIMIT 3' wikidata_site = pywikibot.Site('wikidata', 'wikidata') generator = pagegenerators.WikidataSPARQLPageGenerator(query, site=wikidata_site) repo = wikidata_site.data_repository() mysql_string = """DROP TABLE IF EXISTS `countries` ; CREATE TABLE IF NOT EXISTS `countries` ( `id` INT NOT NULL, `name_en` VARCHAR(45) NULL, `name_ru` VARCHAR(45) NULL, PRIMARY KEY (`id`)) ENGINE = InnoDB; """ line = '# Wikidata_country_ID | Country name in English | in Russian' print line
def addPopData(repo, source_url, name_lt, pop_year, pop_count): statoffice_wd = 'Q12663462' # LT stat office #population record date pop_day = 1 pop_mon = 1 #data access date now = datetime.datetime.now() access_day = now.day access_mon = now.month access_year = now.year logging.info("Checking ... %s" % (name_lt)) sparql = """PREFIX schema: <http://schema.org/> SELECT DISTINCT ?item ?LabelEN ?page_titleLT ?itemLabel WHERE { ?item wdt:P17 wd:Q37. ?item wdt:P31 ?sub1 . ?sub1 (wdt:P279)* wd:Q486972 . ?article schema:about ?item. ?article schema:isPartOf <https://lt.wikipedia.org/>. ?article schema:name ?page_titleLT. filter(STR(?page_titleLT) = '%s') } LIMIT 1""" % (name_lt, ) wd_pages = pagegenerators.WikidataSPARQLPageGenerator(sparql, site=wikidata) wd_pages = list(wd_pages) wd_count = 0 for wd_page in wd_pages: if wd_page.exists(): wd_count += 1 dictionary = wd_page.get() if not existingClaimFromYear(wd_page, pop_year, pop_mon, pop_day): time.sleep(10) population_claim = pywikibot.Claim(repo, 'P1082') population_claim.setTarget( pywikibot.WbQuantity(amount=pop_count, site=repo)) #, error=1 pywikibot.output( 'Adding %s --> %s' % (population_claim.getID(), population_claim.getTarget())) wd_page.addClaim(population_claim) #time qualifier qualifier = pywikibot.Claim(repo, 'P585') pop_date = pywikibot.WbTime(year=pop_year, month=pop_mon, day=pop_day, precision='day') qualifier.setTarget(pop_date) population_claim.addQualifier(qualifier) #method qualifier "demographic balance" qualifier = pywikibot.Claim(repo, 'P459') method = pywikibot.ItemPage(repo, 'Q15911027') qualifier.setTarget(method) population_claim.addQualifier(qualifier) #source as wiki page: sourceWiki = pywikibot.Claim(repo, 'P248') sourceWiki.setTarget(pywikibot.ItemPage(repo, statoffice_wd)) #url as source source = pywikibot.Claim(repo, 'P854') source.setTarget(source_url) #accessed accessed = pywikibot.Claim(repo, 'P813') accessed_date = pywikibot.WbTime(year=access_year, month=access_mon, day=access_day, precision='day') accessed.setTarget(accessed_date) population_claim.addSources([sourceWiki, source, accessed]) else: logging.info("Population claim already exists on %s" % (wd_page.title())) else: logging.warning('[[%s]]: no data page in Wikidata' % (wd_page.title())) if (wd_count == 0): logging.warning('No Wikidata match found for ' % (name_lt))
def custom_generator(self): query = self.store.build_query('captions', prop=self.caption_property) return pagegenerators.WikidataSPARQLPageGenerator(query, site=self.repo)
def createGenerator(queryFile, site): query = getQuery(queryFile) return pg.WikidataSPARQLPageGenerator(query, site=site)
# and "is part of" in sync. import pywikibot from pywikibot import pagegenerators as pg import sys site = pywikibot.Site("wikidata", "wikidata") repo = site.data_repository() queryEmptyMetros = open("metros-without-has-parts.sparql").read() queryMetroParts = open("metro-parts.sparql").read() if len(sys.argv) > 1: generatorEmptyMetros = [pywikibot.ItemPage(repo, sys.argv[1])] else: generatorEmptyMetros = pg.WikidataSPARQLPageGenerator(queryEmptyMetros, site=site) for item in generatorEmptyMetros: metroID = item.getID() print("Processing %s" % (metroID)) item.get() if 'P527' in item.claims: print(" Skippping, there are P527 claims already: ", metroID) continue claim = pywikibot.Claim(repo, "P527") generatorMetroParts = pg.WikidataSPARQLPageGenerator(queryMetroParts % metroID, site=site) for part in generatorMetroParts:
def newSparQLGenerator(bot, sparql): return pagegenerators.WikidataSPARQLPageGenerator(sparql, site=bot.site)
# Parse arguments args = parse_args() # Configurate logger config_logger(log_filename=args.log) logger.info('START add_ca_label_description') # Asof: today today = str(datetime.date.today()) # Query query = QUERY.replace('{administrative_division}', args.to).replace('{today}', today) # Create item generator pwb_items = pg.WikidataSPARQLPageGenerator(query, site=wikidatabot.site) # pwb_items = [1] for i, pwb_item in enumerate(pwb_items): # pwb_item = wikidatabot.pywikibot.ItemPage(wikidatabot.repo, 'Q764858') # pwb_item = wikidatabot.pywikibot.ItemPage(wikidatabot.repo, 'Q43781672') # logger.info(pwb_item) pwb_item.get() # pwb_item_id = pwb_item.getID() logger.info(f"Item: {pwb_item.getID()}") item = Item.from_pwb(pwb_item) # Update REPLACES municipalities update_replaced_municipalities(item) data = {}