Ejemplo n.º 1
0
def main():
    pages_with_viaf = set()
    for page in fetch_pages_with_viaf():
        pages_with_viaf.add(page)

    artist_viaf = {}
    rows = sdb.execute("SELECT artist, url, viaf FROM viaf")
    for artist, url, viaf in rows:
        artist_viaf[artist] = {
            'url': url,
            'viaf': viaf,
            'submitted': submitted
        }

    cnt = 0
    for artist in db.execute(wp_url_query):
        if artist['id'] in artist_viaf:
            continue
        page = extract_page_title(artist['url'], wp_lang, normalize=True)
        if page not in pages_with_viaf:
            continue
        cnt += 1
        viaf = fetch_viaf(page)
        print artist, viaf
        sdb.execute('INSERT INTO viaf (artist, url, viaf) VALUES (?, ?, ?)',
                    (artist['id'], artist['url'], viaf))
        sdb.commit()
    print cnt
Ejemplo n.º 2
0
 def fetch(cls, url, use_cache=True):
     m = re.match(r'^http://([a-z]{2})\.wikipedia\.org', url)
     page_lang = m.group(1).encode('utf8')
     page_title = extract_page_title(url, page_lang)
     wp = MediaWiki('http://%s.wikipedia.org/w/api.php' % page_lang)
     return cls(
         page_title,
         get_page_content(wp, page_title, page_lang, use_cache) or '',
         page_lang)
Ejemplo n.º 3
0
 def fetch(cls, url, use_cache=True):
     m = re.match(r'^http://([a-z\-]+)\.wikipedia\.org', url)
     page_lang = m.group(1).encode('utf8')
     page_title = extract_page_title(url, page_lang)
     wp = MediaWiki('http://%s.wikipedia.org/w/api.php' % page_lang)
     resp = wp.call({'action': 'query', 'prop': 'pageprops|revisions', 'titles': page_title.encode('utf8'), 'rvprop': 'content'})
     page = resp['query']['pages'].values()[0]
     content = page['revisions'][0].values()[0] if 'revisions' in page else None
     if 'pageprops' in page and 'wikibase_item' in page['pageprops']:
         wikidata_id = page['pageprops']['wikibase_item']
     else:
         wikidata_id = None
     return cls(page_title, content or '', page_lang, wikidata_id)
Ejemplo n.º 4
0
 def fetch(cls, url, use_cache=True):
     m = re.match(r'^http://([a-z\-]+)\.wikipedia\.org', url)
     page_lang = m.group(1).encode('utf8')
     page_title = extract_page_title(url, page_lang)
     wp = MediaWiki('http://%s.wikipedia.org/w/api.php' % page_lang)
     resp = wp.call({
         'action': 'query',
         'prop': 'pageprops|revisions',
         'titles': page_title.encode('utf8'),
         'rvprop': 'content'
     })
     page = resp['query']['pages'].values()[0]
     content = page['revisions'][0].values(
     )[0] if 'revisions' in page else None
     if 'pageprops' in page and 'wikibase_item' in page['pageprops']:
         wikidata_id = page['pageprops']['wikibase_item']
     else:
         wikidata_id = None
     return cls(page_title, content or '', page_lang, wikidata_id)
Ejemplo n.º 5
0
def main():
    pages_with_viaf = set()
    for page in fetch_pages_with_viaf():
        pages_with_viaf.add(page)

    artist_viaf = {}
    rows = sdb.execute("SELECT artist, url, viaf FROM viaf")
    for artist, url, viaf in rows:
        artist_viaf[artist] = {"url": url, "viaf": viaf, "submitted": submitted}

    cnt = 0
    for artist in db.execute(wp_url_query):
        if artist["id"] in artist_viaf:
            continue
        page = extract_page_title(artist["url"], wp_lang, normalize=True)
        if page not in pages_with_viaf:
            continue
        cnt += 1
        viaf = fetch_viaf(page)
        print artist, viaf
        sdb.execute("INSERT INTO viaf (artist, url, viaf) VALUES (?, ?, ?)", (artist["id"], artist["url"], viaf))
        sdb.commit()
    print cnt
Ejemplo n.º 6
0
 def fetch(cls, url, use_cache=True):
     m = re.match(r'^http://([a-z]{2})\.wikipedia\.org', url)
     page_lang = m.group(1).encode('utf8')
     page_title = extract_page_title(url, page_lang)
     wp = MediaWiki('http://%s.wikipedia.org/w/api.php' % page_lang)
     return cls(page_title, get_page_content(wp, page_title, page_lang, use_cache) or '', page_lang)