Esempio n. 1
0
    def fillCache(self, propertyId, queryoverride=u'', cacheMaxAge=0):
        '''
        Query Wikidata to fill the cache of monuments we already have an object for
        '''
        result = {}
        if queryoverride:
            query = queryoverride
        else:
            query = u'CLAIM[195:731126] AND CLAIM[%s]' % (propertyId, )
        wd_queryset = wdquery.QuerySet(query)

        wd_query = wdquery.WikidataQuery(cacheMaxAge=cacheMaxAge)
        data = wd_query.query(wd_queryset, props=[
            str(propertyId),
        ])

        if data.get('status').get('error') == 'OK':
            expectedItems = data.get('status').get('items')
            props = data.get('props').get(str(propertyId))
            for prop in props:
                # FIXME: This will overwrite id's that are used more than once.
                # Use with care and clean up your dataset first
                result[prop[2]] = prop[0]

            if expectedItems == len(result):
                pywikibot.output('I now have %s items in cache' %
                                 expectedItems)

        return result
Esempio n. 2
0
    def fillCache(self, propertyId, queryoverride=u'', cacheMaxAge=0):
        '''
        Query Wikidata to fill the cache of paintings we already have an object for
        https://tools.wmflabs.org/autolist/autolist1.html?q=CLAIM[195%3A430682]%20AND%20NOCLAIM[217]
        '''
        result = {}
        if queryoverride:
            query = queryoverride
        else:
            query = u'CLAIM[195:430682] AND CLAIM[%s]' % (propertyId,)  # collection
        wd_queryset = wdquery.QuerySet(query)

        wd_query = wdquery.WikidataQuery(cacheMaxAge=cacheMaxAge)
        data = wd_query.query(wd_queryset, props=[str(propertyId),])

        if data.get('status').get('error')=='OK':
            expectedItems = data.get('status').get('items')
            props = data.get('props').get(str(propertyId))
            for prop in props:
                # FIXME: This will overwrite id's that are used more than once.
                # Use with care and clean up your dataset first
                result[prop[2]] = prop[0]

            if expectedItems==len(result):
                pywikibot.output('I now have %s items in cache' % expectedItems)

        return result
Esempio n. 3
0
    def getVIAF(self, cacheMaxAge=0):
        '''
        Query Wikidata to fill the cache of monuments we already have an object for
        '''
        result = {}
        query = u'CLAIM[214]'

        wd_queryset = wdquery.QuerySet(query)

        wd_query = wdquery.WikidataQuery(cacheMaxAge=cacheMaxAge)
        data = wd_query.query(wd_queryset, props=[
            str(214),
        ])

        if data.get('status').get('error') == 'OK':
            expectedItems = data.get('status').get('items')
            props = data.get('props').get(str(214))
            for prop in props:
                # FIXME: This will overwrite id's that are used more than once.
                # Use with care and clean up your dataset first
                result[prop[2]] = u'Q%s' % (prop[0], )

            pywikibot.output(
                'I expected %s items and now have %s items with VIAF in cache'
                % (expectedItems, len(result)))

        return result
Esempio n. 4
0
def WikidataQueryPageGenerator(query, site=None):
    """Generate pages that result from the given WikidataQuery.

    @param query: the WikidataQuery query string.
    @param site: Site for generator results.
    @type site: L{pywikibot.site.BaseSite}

    """
    if site is None:
        site = pywikibot.Site()
    repo = site.data_repository()

    wd_queryset = wdquery.QuerySet(query)

    wd_query = wdquery.WikidataQuery(cacheMaxAge=0)
    data = wd_query.query(wd_queryset)

    pywikibot.output(u'retrieved %d items' % data[u'status'][u'items'])

    foundit = True

    for item in data[u'items']:
        if int(item) > 17380752:
            foundit = True
        if foundit:
            itempage = pywikibot.ItemPage(repo, u'Q' + unicode(item))
            yield itempage
Esempio n. 5
0
    def getCreators(self, cacheMaxAge=0):
        '''
        Query Wikidata to fill the cache of monuments we already have an object for
        '''
        result = []
        query = u'CLAIM[195:190804] AND CLAIM[170]'
        wd_queryset = wdquery.QuerySet(query)

        wd_query = wdquery.WikidataQuery(cacheMaxAge=cacheMaxAge)
        data = wd_query.query(wd_queryset, props=[
            str(170),
        ])

        if data.get('status').get('error') == 'OK':
            expectedItems = data.get('status').get('items')
            props = data.get('props').get(str(170))
            for prop in props:
                # FIXME: This will overwrite id's that are used more than once.
                # Use with care and clean up your dataset first
                result.append(prop[2])

            if expectedItems == len(result):
                pywikibot.output('I now processed %s items for creators' %
                                 expectedItems)

        return set(result)
def get_wdq(dataset=None, data=None):
    """Find all links from Wikidata to Kulturnav using WDQ.

    @todo:
    To replace with wdqs we need something like:
    SELECT ?item ?value
      WHERE {
          ?item p:P1248 ?data .
          ?item wdt:P1248 ?value .
          {?data pq:P972 wd:Q20742915} UNION
          {?data pq:P972 wd:Q20734454}
     }

    @param dataset: Q-id (or list of Q-ids) corresponding to a dataset.
    @type dataset: str or list of str
    @param data: dictionary to which data should be added
    @type data: dict
    @return: (timestamp, dict {qid: uuid})
    @rtype: tuple (str, dict)
    """
    # initialise if needed
    data = data or {}
    dataset = helpers.listify(dataset) or []

    # make query
    pid = '1248'
    query = u'CLAIM[%s]' % pid
    if dataset:
        query += u'{CLAIM['
        for d in dataset:
            query += u'972:%s,' % d.lstrip('Q')
        query = query.rstrip(',') + ']}'

    wd_queryset = wdquery.QuerySet(query)
    wd_query = wdquery.WikidataQuery(cacheMaxAge=0)
    j = wd_query.query(wd_queryset, props=[
        str(pid),
    ])

    # process data
    j = j['props'][pid]

    # extract pairs
    for i in j:
        data[u'Q%d' % i[0]] = i[2]

    # get current timestamp
    needle = u'Times :'
    stats = urllib2.urlopen(u'http://wdq.wmflabs.org/stats').read()
    stats = stats[stats.find(needle):]
    time = stats[len(needle):stats.find('\n')].strip(' -')

    return (time, data)
Esempio n. 7
0
    def getPaintersCreators(self, cacheMaxAge=0):
        '''
        Query Wikidata to fill the cache of monuments we already have an object for
        '''
        resultPaintings = {}
        resultCreators = []
        query = u'CLAIM[195:190804] AND CLAIM[170]'
        wd_queryset = wdquery.QuerySet(query)

        wd_query = wdquery.WikidataQuery(cacheMaxAge=cacheMaxAge)
        data = wd_query.query(wd_queryset, props=[str(170),str(217)])

        if data.get('status').get('error')=='OK':
            expectedItems = data.get('status').get('items')
            creatorprops = data.get('props').get(str(170))
            invprops = data.get('props').get(str(217))

            for item in data.get('items'):
                paintingdata = {u'creator' : u'',
                                u'inv' : u'',
                                }
                resultPaintings[item] = paintingdata
                
                    
            for prop in creatorprops:
                resultPaintings[prop[0]][u'creator'] = prop[2]
                resultCreators.append(prop[2])

            for prop in invprops:
                invid = prop[2]
                if invid.startswith(u'SK-'):
                    resultPaintings[prop[0]][u'inv'] =invid                
                    
                    
                
            
            #for prop in creatorprops:                
            #    # FIXME: This will overwrite id's that are used more than once.
            #    # Use with care and clean up your dataset first
            #    resultPaintings[prop[0]] = prop[2]
            #    resultCreators.append(prop[2])

            
            pywikibot.output('I now processed %s items for creators' % expectedItems)

        #print resultCreators
        #resultCreators = 

        #print resultPaintings
        return resultPaintings, set(resultCreators)
Esempio n. 8
0
def WikidataQueryItemPageGenerator(query, site=None):
    """Generate pages that result from the given WikidataQuery.

    @param query: the WikidataQuery query string.

    """
    if site is None:
        site = pywikibot.Site()
    repo = site.data_repository()

    wd_queryset = wdquery.QuerySet(query)

    wd_query = wdquery.WikidataQuery(cacheMaxAge=0)
    data = wd_query.query(wd_queryset)

    pywikibot.output(u'retrieved %d items' % data[u'status'][u'items'])
    for item in data[u'items']:
        yield pywikibot.ItemPage(repo, u'Q' + unicode(item))
Esempio n. 9
0
    def fillCache(self,
                  collectionqid,
                  idProperty,
                  queryoverride=u'',
                  cacheMaxAge=0):
        '''
        Query Wikidata to fill the cache of items we already have an object for
        '''
        result = {}
        if queryoverride:
            query = queryoverride
        else:
            query = u'CLAIM[195:%s] AND CLAIM[%s]' % (
                collectionqid.replace(u'Q', u''),
                idProperty,
            )

        wd_queryset = wdquery.QuerySet(query)

        wd_query = wdquery.WikidataQuery(cacheMaxAge=cacheMaxAge)
        data = wd_query.query(wd_queryset, props=[
            str(idProperty),
        ])

        if data.get('status').get('error') == 'OK':
            expectedItems = data.get('status').get('items')
            props = data.get('props').get(str(idProperty))
            for prop in props:
                # FIXME: This will overwrite id's that are used more than once.
                # Use with care and clean up your dataset first
                result[prop[2]] = prop[0]

            if expectedItems == len(result):
                pywikibot.output('I now have %s items in cache' %
                                 expectedItems)
            else:
                pywikibot.output(
                    'I expected %s items, but I have %s items in cache' % (
                        expectedItems,
                        len(result),
                    ))

        return result
Esempio n. 10
0
    def most_missed_creators(self, cache_max_age=0):
        """Produce list of most frequent, but unlinked, creators.

        Query WDQ for all objects in the collection missing an artist
        then put together a top-list for most desired creator
        """
        expected_items = []
        query = u'CLAIM[195:%s] AND NOCLAIM[170]' % \
                ',195:'.join(self.collections)  # collection
        wd_queryset = wdquery.QuerySet(query)

        wd_query = wdquery.WikidataQuery(cacheMaxAge=cache_max_age)
        data = wd_query.query(wd_queryset)

        if data.get('status').get('error') == 'OK':
            expected_items = data.get('items')

        creator_dict = {}
        counter = 0
        for q_val in expected_items:
            q_item = self.wd.QtoItemPage(q_val)
            data = q_item.get()
            claims = data.get('claims')
            if u'P170' in claims:
                continue
            descr = data.get('descriptions').get('en')
            if descr and descr.startswith(u'painting by '):
                creator = descr[len(u'painting by '):]
                if '(' in creator:  # to get rid of disambiguation addition
                    creator = creator[:creator.find('(')].strip()
                if creator in creator_dict.keys():
                    creator_dict[creator] += 1
                else:
                    creator_dict[creator] = 1
                counter += 1
        pywikibot.output(u'Found %d mentions of %d creators' %
                         (counter, len(creator_dict)))
        # output
        f = codecs.open(u'creatorHitlist.csv', 'w', 'utf-8')
        for k, v in creator_dict.iteritems():
            f.write(u'%d|%s\n' % (v, k))
        f.close()