예제 #1
0
def main(*args):
    """
    Main function. Grab a generator and pass it to the bot to work on
    """
    repo = pywikibot.Site("wikidata", "wikidata").data_repository()

    CherQu = u"""SELECT ?item WHERE {
      ?item wdt:P758 _:b1 ;
            wdt:P18 [].
    }"""

    logoQu = u"""SELECT DISTINCT ?item
    WHERE
    {
     ?item p:P154 ?statement.
     ?statement ps:P154 ?image.
     ?statement pq:P2096 ?media.
    }"""

    humanQu = u"""SELECT DISTINCT ?item
    WHERE
    {
     ?item wdt:P31 wd:Q5.
     ?item p:P18 ?statement.
     ?statement ps:P18 ?image.
     ?statement pq:P2096 ?media.
    }"""

    nonHumanQu = u"""SELECT DISTINCT ?item
    WHERE
    {
     ?item p:P18 ?statement.
     ?statement ps:P18 ?image.
     ?statement pq:P2096 ?media.
     MINUS { ?item wdt:P31 wd:Q5. }
    }"""

    personGen = pagegenerators.PreloadingEntityGenerator(
        pagegenerators.WikidataSPARQLPageGenerator(CherQu, site=repo))
    count = 0
    for item in personGen:
        if item.isRedirectPage():
            pywikibot.output('{0} is a redirect page. Skipping.'.format(item))
            continue
        item.get()  #Get the item dictionary
        value = {}
        val = None
        pageid = None
        print('--> ' + item.getID() + ': ')
        for claim in item.claims['P18']:
            value[str(claim.getTarget().pageid)] = {'lang': [], 'text': []}

            if 'P2096' in claim.qualifiers:
                for qual in claim.qualifiers['P2096']:
                    try:
                        value[str(claim.getTarget().pageid)]['lang'].append(
                            qual.getTarget().language)
                        value[str(claim.getTarget().pageid)]['text'].append(
                            regex(qual.getTarget().text))
                    except:
                        continue

        if value:
            runOnCommons(value, item.getID())
            count += 1
            for pageid in value:
                for (lang), (text) in zip(value[pageid]['lang'],
                                          value[pageid]['text']):
                    print('--> {}; {}: {}'.format(pageid, lang, text))

        if count == 50:
            pywikibot.output('Sleep for 60 sec. ...')
            time.sleep(60)
예제 #2
0
 def custom_generator(self):
     kwargs = {'class': self.getOption('class')}
     query = self.store.build_query('duos', **kwargs)
     return pagegenerators.WikidataSPARQLPageGenerator(query,
                                                       site=self.repo)
 def custom_generator(self):
     query = self.store.build_query('mixed_claims', limit=self.opt['limit'])
     return pagegenerators.WikidataSPARQLPageGenerator(query,
                                                       site=self.repo)
예제 #4
0
# See https://en.wikiversity.org/wiki/Research_in_programming_Wikidata/Countries

import pywikibot
from pywikibot import pagegenerators

# item is 'country'
# https://query.wikidata.org/#%23List of countries in English and Russian%0ASELECT %3Fcountry %3Flabel_en %3Flabel_ru%0AWHERE%0A{%0A %3Fcountry wdt%3AP31 wd%3AQ6256.%0A %3Fcountry rdfs%3Alabel %3Flabel_en filter (lang(%3Flabel_en) %3D "en").%0A %3Fcountry rdfs%3Alabel %3Flabel_ru filter (lang(%3Flabel_ru) %3D "ru").%0A}
query = 'SELECT ?item ?label_en ?label_ru ' + \
        'WHERE { ' + \
        '  ?item wdt:P31 wd:Q6256.' + \
        '  ?item rdfs:label ?label_en filter (lang(?label_en) = "en").' + \
        '  ?item rdfs:label ?label_ru filter (lang(?label_ru) = "ru").' + \
        '}' # LIMIT 3'

wikidata_site = pywikibot.Site('wikidata', 'wikidata')
generator = pagegenerators.WikidataSPARQLPageGenerator(query,
                                                       site=wikidata_site)

repo = wikidata_site.data_repository()

mysql_string = """DROP TABLE IF EXISTS `countries` ;

CREATE TABLE IF NOT EXISTS `countries` (
  `id` INT NOT NULL,
  `name_en` VARCHAR(45) NULL,
  `name_ru` VARCHAR(45) NULL,
  PRIMARY KEY (`id`))
ENGINE = InnoDB;
"""

line = '# Wikidata_country_ID | Country name in English | in Russian'
print line
예제 #5
0
def addPopData(repo, source_url, name_lt, pop_year, pop_count):

    statoffice_wd = 'Q12663462'  # LT stat office

    #population record date
    pop_day = 1
    pop_mon = 1

    #data access date
    now = datetime.datetime.now()
    access_day = now.day
    access_mon = now.month
    access_year = now.year

    logging.info("Checking ... %s" % (name_lt))

    sparql = """PREFIX schema: <http://schema.org/>
    SELECT DISTINCT ?item ?LabelEN ?page_titleLT ?itemLabel WHERE {
    ?item wdt:P17 wd:Q37.
    ?item wdt:P31 ?sub1 . ?sub1 (wdt:P279)* wd:Q486972 .
    ?article schema:about ?item.
    ?article schema:isPartOf <https://lt.wikipedia.org/>.
    ?article schema:name ?page_titleLT.
    filter(STR(?page_titleLT) = '%s')
    }
    LIMIT 1""" % (name_lt, )
    wd_pages = pagegenerators.WikidataSPARQLPageGenerator(sparql,
                                                          site=wikidata)
    wd_pages = list(wd_pages)

    wd_count = 0
    for wd_page in wd_pages:

        if wd_page.exists():
            wd_count += 1
            dictionary = wd_page.get()

            if not existingClaimFromYear(wd_page, pop_year, pop_mon, pop_day):
                time.sleep(10)

                population_claim = pywikibot.Claim(repo, 'P1082')
                population_claim.setTarget(
                    pywikibot.WbQuantity(amount=pop_count,
                                         site=repo))  #, error=1
                pywikibot.output(
                    'Adding %s --> %s' %
                    (population_claim.getID(), population_claim.getTarget()))
                wd_page.addClaim(population_claim)

                #time qualifier
                qualifier = pywikibot.Claim(repo, 'P585')
                pop_date = pywikibot.WbTime(year=pop_year,
                                            month=pop_mon,
                                            day=pop_day,
                                            precision='day')
                qualifier.setTarget(pop_date)
                population_claim.addQualifier(qualifier)

                #method qualifier       "demographic balance"
                qualifier = pywikibot.Claim(repo, 'P459')
                method = pywikibot.ItemPage(repo, 'Q15911027')
                qualifier.setTarget(method)
                population_claim.addQualifier(qualifier)

                #source as wiki page:
                sourceWiki = pywikibot.Claim(repo, 'P248')
                sourceWiki.setTarget(pywikibot.ItemPage(repo, statoffice_wd))

                #url as source
                source = pywikibot.Claim(repo, 'P854')
                source.setTarget(source_url)

                #accessed
                accessed = pywikibot.Claim(repo, 'P813')
                accessed_date = pywikibot.WbTime(year=access_year,
                                                 month=access_mon,
                                                 day=access_day,
                                                 precision='day')
                accessed.setTarget(accessed_date)

                population_claim.addSources([sourceWiki, source, accessed])

            else:
                logging.info("Population claim already exists on %s" %
                             (wd_page.title()))

        else:
            logging.warning('[[%s]]: no data page in Wikidata' %
                            (wd_page.title()))

    if (wd_count == 0):
        logging.warning('No Wikidata match found for ' % (name_lt))
예제 #6
0
 def custom_generator(self):
     query = self.store.build_query('captions', prop=self.caption_property)
     return pagegenerators.WikidataSPARQLPageGenerator(query,
                                                       site=self.repo)
예제 #7
0
def createGenerator(queryFile, site):
    query = getQuery(queryFile)
    return pg.WikidataSPARQLPageGenerator(query, site=site)
예제 #8
0
# and "is part of" in sync.

import pywikibot
from pywikibot import pagegenerators as pg
import sys

site = pywikibot.Site("wikidata", "wikidata")
repo = site.data_repository()

queryEmptyMetros = open("metros-without-has-parts.sparql").read()
queryMetroParts = open("metro-parts.sparql").read()

if len(sys.argv) > 1:
    generatorEmptyMetros = [pywikibot.ItemPage(repo, sys.argv[1])]
else:
    generatorEmptyMetros = pg.WikidataSPARQLPageGenerator(queryEmptyMetros,
                                                          site=site)

for item in generatorEmptyMetros:
    metroID = item.getID()
    print("Processing %s" % (metroID))

    item.get()
    if 'P527' in item.claims:
        print("    Skippping, there are P527 claims already: ", metroID)
        continue
    claim = pywikibot.Claim(repo, "P527")

    generatorMetroParts = pg.WikidataSPARQLPageGenerator(queryMetroParts %
                                                         metroID,
                                                         site=site)
    for part in generatorMetroParts:
예제 #9
0
 def newSparQLGenerator(bot, sparql):
     return pagegenerators.WikidataSPARQLPageGenerator(sparql,
                                                       site=bot.site)
    # Parse arguments
    args = parse_args()

    # Configurate logger
    config_logger(log_filename=args.log)
    logger.info('START add_ca_label_description')

    # Asof: today
    today = str(datetime.date.today())

    # Query
    query = QUERY.replace('{administrative_division}', args.to).replace('{today}', today)

    # Create item generator
    pwb_items = pg.WikidataSPARQLPageGenerator(query, site=wikidatabot.site)
    # pwb_items = [1]

    for i, pwb_item in enumerate(pwb_items):
        # pwb_item = wikidatabot.pywikibot.ItemPage(wikidatabot.repo, 'Q764858')
        # pwb_item = wikidatabot.pywikibot.ItemPage(wikidatabot.repo, 'Q43781672')
        # logger.info(pwb_item)
        pwb_item.get()
        # pwb_item_id = pwb_item.getID()
        logger.info(f"Item: {pwb_item.getID()}")
        item = Item.from_pwb(pwb_item)

        # Update REPLACES municipalities
        update_replaced_municipalities(item)

        data = {}