def fillCache(self, propertyId, queryoverride=u'', cacheMaxAge=0): ''' Query Wikidata to fill the cache of monuments we already have an object for ''' result = {} if queryoverride: query = queryoverride else: query = u'CLAIM[195:731126] AND CLAIM[%s]' % (propertyId, ) wd_queryset = wdquery.QuerySet(query) wd_query = wdquery.WikidataQuery(cacheMaxAge=cacheMaxAge) data = wd_query.query(wd_queryset, props=[ str(propertyId), ]) if data.get('status').get('error') == 'OK': expectedItems = data.get('status').get('items') props = data.get('props').get(str(propertyId)) for prop in props: # FIXME: This will overwrite id's that are used more than once. # Use with care and clean up your dataset first result[prop[2]] = prop[0] if expectedItems == len(result): pywikibot.output('I now have %s items in cache' % expectedItems) return result
def fillCache(self, propertyId, queryoverride=u'', cacheMaxAge=0): ''' Query Wikidata to fill the cache of paintings we already have an object for https://tools.wmflabs.org/autolist/autolist1.html?q=CLAIM[195%3A430682]%20AND%20NOCLAIM[217] ''' result = {} if queryoverride: query = queryoverride else: query = u'CLAIM[195:430682] AND CLAIM[%s]' % (propertyId,) # collection wd_queryset = wdquery.QuerySet(query) wd_query = wdquery.WikidataQuery(cacheMaxAge=cacheMaxAge) data = wd_query.query(wd_queryset, props=[str(propertyId),]) if data.get('status').get('error')=='OK': expectedItems = data.get('status').get('items') props = data.get('props').get(str(propertyId)) for prop in props: # FIXME: This will overwrite id's that are used more than once. # Use with care and clean up your dataset first result[prop[2]] = prop[0] if expectedItems==len(result): pywikibot.output('I now have %s items in cache' % expectedItems) return result
def getVIAF(self, cacheMaxAge=0): ''' Query Wikidata to fill the cache of monuments we already have an object for ''' result = {} query = u'CLAIM[214]' wd_queryset = wdquery.QuerySet(query) wd_query = wdquery.WikidataQuery(cacheMaxAge=cacheMaxAge) data = wd_query.query(wd_queryset, props=[ str(214), ]) if data.get('status').get('error') == 'OK': expectedItems = data.get('status').get('items') props = data.get('props').get(str(214)) for prop in props: # FIXME: This will overwrite id's that are used more than once. # Use with care and clean up your dataset first result[prop[2]] = u'Q%s' % (prop[0], ) pywikibot.output( 'I expected %s items and now have %s items with VIAF in cache' % (expectedItems, len(result))) return result
def WikidataQueryPageGenerator(query, site=None): """Generate pages that result from the given WikidataQuery. @param query: the WikidataQuery query string. @param site: Site for generator results. @type site: L{pywikibot.site.BaseSite} """ if site is None: site = pywikibot.Site() repo = site.data_repository() wd_queryset = wdquery.QuerySet(query) wd_query = wdquery.WikidataQuery(cacheMaxAge=0) data = wd_query.query(wd_queryset) pywikibot.output(u'retrieved %d items' % data[u'status'][u'items']) foundit = True for item in data[u'items']: if int(item) > 17380752: foundit = True if foundit: itempage = pywikibot.ItemPage(repo, u'Q' + unicode(item)) yield itempage
def getCreators(self, cacheMaxAge=0): ''' Query Wikidata to fill the cache of monuments we already have an object for ''' result = [] query = u'CLAIM[195:190804] AND CLAIM[170]' wd_queryset = wdquery.QuerySet(query) wd_query = wdquery.WikidataQuery(cacheMaxAge=cacheMaxAge) data = wd_query.query(wd_queryset, props=[ str(170), ]) if data.get('status').get('error') == 'OK': expectedItems = data.get('status').get('items') props = data.get('props').get(str(170)) for prop in props: # FIXME: This will overwrite id's that are used more than once. # Use with care and clean up your dataset first result.append(prop[2]) if expectedItems == len(result): pywikibot.output('I now processed %s items for creators' % expectedItems) return set(result)
def get_wdq(dataset=None, data=None): """Find all links from Wikidata to Kulturnav using WDQ. @todo: To replace with wdqs we need something like: SELECT ?item ?value WHERE { ?item p:P1248 ?data . ?item wdt:P1248 ?value . {?data pq:P972 wd:Q20742915} UNION {?data pq:P972 wd:Q20734454} } @param dataset: Q-id (or list of Q-ids) corresponding to a dataset. @type dataset: str or list of str @param data: dictionary to which data should be added @type data: dict @return: (timestamp, dict {qid: uuid}) @rtype: tuple (str, dict) """ # initialise if needed data = data or {} dataset = helpers.listify(dataset) or [] # make query pid = '1248' query = u'CLAIM[%s]' % pid if dataset: query += u'{CLAIM[' for d in dataset: query += u'972:%s,' % d.lstrip('Q') query = query.rstrip(',') + ']}' wd_queryset = wdquery.QuerySet(query) wd_query = wdquery.WikidataQuery(cacheMaxAge=0) j = wd_query.query(wd_queryset, props=[ str(pid), ]) # process data j = j['props'][pid] # extract pairs for i in j: data[u'Q%d' % i[0]] = i[2] # get current timestamp needle = u'Times :' stats = urllib2.urlopen(u'http://wdq.wmflabs.org/stats').read() stats = stats[stats.find(needle):] time = stats[len(needle):stats.find('\n')].strip(' -') return (time, data)
def getPaintersCreators(self, cacheMaxAge=0): ''' Query Wikidata to fill the cache of monuments we already have an object for ''' resultPaintings = {} resultCreators = [] query = u'CLAIM[195:190804] AND CLAIM[170]' wd_queryset = wdquery.QuerySet(query) wd_query = wdquery.WikidataQuery(cacheMaxAge=cacheMaxAge) data = wd_query.query(wd_queryset, props=[str(170),str(217)]) if data.get('status').get('error')=='OK': expectedItems = data.get('status').get('items') creatorprops = data.get('props').get(str(170)) invprops = data.get('props').get(str(217)) for item in data.get('items'): paintingdata = {u'creator' : u'', u'inv' : u'', } resultPaintings[item] = paintingdata for prop in creatorprops: resultPaintings[prop[0]][u'creator'] = prop[2] resultCreators.append(prop[2]) for prop in invprops: invid = prop[2] if invid.startswith(u'SK-'): resultPaintings[prop[0]][u'inv'] =invid #for prop in creatorprops: # # FIXME: This will overwrite id's that are used more than once. # # Use with care and clean up your dataset first # resultPaintings[prop[0]] = prop[2] # resultCreators.append(prop[2]) pywikibot.output('I now processed %s items for creators' % expectedItems) #print resultCreators #resultCreators = #print resultPaintings return resultPaintings, set(resultCreators)
def WikidataQueryItemPageGenerator(query, site=None): """Generate pages that result from the given WikidataQuery. @param query: the WikidataQuery query string. """ if site is None: site = pywikibot.Site() repo = site.data_repository() wd_queryset = wdquery.QuerySet(query) wd_query = wdquery.WikidataQuery(cacheMaxAge=0) data = wd_query.query(wd_queryset) pywikibot.output(u'retrieved %d items' % data[u'status'][u'items']) for item in data[u'items']: yield pywikibot.ItemPage(repo, u'Q' + unicode(item))
def fillCache(self, collectionqid, idProperty, queryoverride=u'', cacheMaxAge=0): ''' Query Wikidata to fill the cache of items we already have an object for ''' result = {} if queryoverride: query = queryoverride else: query = u'CLAIM[195:%s] AND CLAIM[%s]' % ( collectionqid.replace(u'Q', u''), idProperty, ) wd_queryset = wdquery.QuerySet(query) wd_query = wdquery.WikidataQuery(cacheMaxAge=cacheMaxAge) data = wd_query.query(wd_queryset, props=[ str(idProperty), ]) if data.get('status').get('error') == 'OK': expectedItems = data.get('status').get('items') props = data.get('props').get(str(idProperty)) for prop in props: # FIXME: This will overwrite id's that are used more than once. # Use with care and clean up your dataset first result[prop[2]] = prop[0] if expectedItems == len(result): pywikibot.output('I now have %s items in cache' % expectedItems) else: pywikibot.output( 'I expected %s items, but I have %s items in cache' % ( expectedItems, len(result), )) return result
def most_missed_creators(self, cache_max_age=0): """Produce list of most frequent, but unlinked, creators. Query WDQ for all objects in the collection missing an artist then put together a top-list for most desired creator """ expected_items = [] query = u'CLAIM[195:%s] AND NOCLAIM[170]' % \ ',195:'.join(self.collections) # collection wd_queryset = wdquery.QuerySet(query) wd_query = wdquery.WikidataQuery(cacheMaxAge=cache_max_age) data = wd_query.query(wd_queryset) if data.get('status').get('error') == 'OK': expected_items = data.get('items') creator_dict = {} counter = 0 for q_val in expected_items: q_item = self.wd.QtoItemPage(q_val) data = q_item.get() claims = data.get('claims') if u'P170' in claims: continue descr = data.get('descriptions').get('en') if descr and descr.startswith(u'painting by '): creator = descr[len(u'painting by '):] if '(' in creator: # to get rid of disambiguation addition creator = creator[:creator.find('(')].strip() if creator in creator_dict.keys(): creator_dict[creator] += 1 else: creator_dict[creator] = 1 counter += 1 pywikibot.output(u'Found %d mentions of %d creators' % (counter, len(creator_dict))) # output f = codecs.open(u'creatorHitlist.csv', 'w', 'utf-8') for k, v in creator_dict.iteritems(): f.write(u'%d|%s\n' % (v, k)) f.close()