def testQueryApiSyntax(self):
        """Test that we can generate the API query correctly."""
        w = query.WikidataQuery("http://example.com")

        qs = w.getQueryString(query.Link("enwiki"))
        self.assertEqual(qs, "q=link%5Benwiki%5D")

        self.assertEqual(w.getUrl(qs), "http://example.com/api?q=link%5Benwiki%5D")

        # check labels and props work OK
        qs = w.getQueryString(query.Link("enwiki"), ['en', 'fr'], ['prop'])
        self.assertEqual(qs, "q=link%5Benwiki%5D&labels=en,fr&props=prop")
Beispiel #2
0
    def testQueryApiGetter(self):
        """
        Test that we can actually retreive data and that caching works
        """

        w = query.WikidataQuery(cacheMaxAge=0)

        # this query doesn't return any items, save a bit of bandwidth!
        q = query.HasClaim(105).AND([query.NoClaim(225), query.HasClaim(100)])

        # check that the cache file is created
        cacheFile = w.getCacheFilename(w.getQueryString(q, [], []))

        # remove existing cache file
        try:
            os.remove(cacheFile)
        except OSError:
            pass

        data = w.query(q)

        self.assertFalse(os.path.exists(cacheFile))

        w = query.WikidataQuery(cacheMaxAge=0.1)

        data = w.query(q)

        self.assertTrue(os.path.exists(cacheFile))

        self.assertTrue('status' in data)
        self.assertTrue('items' in data)

        t1 = time.time()
        data = w.query(q)
        t2 = time.time()

        # check that the cache access is fast
        self.assertTrue(t2 - t1 < 0.2)
Beispiel #3
0
    def getPaintersCreators(self, cacheMaxAge=0):
        '''
        Query Wikidata to fill the cache of monuments we already have an object for
        '''
        resultPaintings = {}
        resultCreators = []
        query = u'CLAIM[195:190804] AND CLAIM[170]'
        wd_queryset = wdquery.QuerySet(query)

        wd_query = wdquery.WikidataQuery(cacheMaxAge=cacheMaxAge)
        data = wd_query.query(wd_queryset, props=[str(170),str(217)])

        if data.get('status').get('error')=='OK':
            expectedItems = data.get('status').get('items')
            creatorprops = data.get('props').get(str(170))
            invprops = data.get('props').get(str(217))

            for item in data.get('items'):
                paintingdata = {u'creator' : u'',
                                u'inv' : u'',
                                }
                resultPaintings[item] = paintingdata
                
                    
            for prop in creatorprops:
                resultPaintings[prop[0]][u'creator'] = prop[2]
                resultCreators.append(prop[2])

            for prop in invprops:
                invid = prop[2]
                if invid.startswith(u'SK-'):
                    resultPaintings[prop[0]][u'inv'] =invid                
                    
                    
                
            
            #for prop in creatorprops:                
            #    # FIXME: This will overwrite id's that are used more than once.
            #    # Use with care and clean up your dataset first
            #    resultPaintings[prop[0]] = prop[2]
            #    resultCreators.append(prop[2])

            
            pywikibot.output('I now processed %s items for creators' % expectedItems)

        #print resultCreators
        #resultCreators = 

        #print resultPaintings
        return resultPaintings, set(resultCreators)
Beispiel #4
0
def WikidataQueryItemPageGenerator(query, site=None):
    """Generate pages that result from the given WikidataQuery.

    @param query: the WikidataQuery query string.

    """
    if site is None:
        site = pywikibot.Site()
    repo = site.data_repository()

    wd_queryset = wdquery.QuerySet(query)

    wd_query = wdquery.WikidataQuery(cacheMaxAge=0)
    data = wd_query.query(wd_queryset)

    pywikibot.output(u'retrieved %d items' % data[u'status'][u'items'])
    for item in data[u'items']:
        yield pywikibot.ItemPage(repo, u'Q' + unicode(item))
Beispiel #5
0
    def fillCache(self,
                  collectionid,
                  propertyId,
                  queryoverride=u'',
                  cacheMaxAge=0):
        '''
        Query Wikidata to fill the cache of monuments we already have an object for
        '''
        result = {}
        if queryoverride:
            query = queryoverride
        else:
            query = u'CLAIM[195:%s] AND CLAIM[%s]' % (
                collectionid,
                propertyId,
            )

        wd_queryset = wdquery.QuerySet(query)

        wd_query = wdquery.WikidataQuery(cacheMaxAge=cacheMaxAge)
        data = wd_query.query(wd_queryset, props=[
            str(propertyId),
        ])

        if data.get('status').get('error') == 'OK':
            expectedItems = data.get('status').get('items')
            props = data.get('props').get(str(propertyId))
            for prop in props:
                # FIXME: This will overwrite id's that are used more than once.
                # Use with care and clean up your dataset first
                result[prop[2]] = prop[0]

            if expectedItems == len(result):
                pywikibot.output('I now have %s items in cache' %
                                 expectedItems)
            else:
                pywikibot.output(
                    'I expected %s items, but I have %s items in cache' % (
                        expectedItems,
                        len(result),
                    ))

        return result
Beispiel #6
0
def main():
    """Main script of airport dabatase creation."""
    from argparse import ArgumentParser

    parser = ArgumentParser(description="Airport DB")
    parser.add_argument("-f",
                        "--f",
                        type=str,
                        dest="output",
                        metavar="FILE",
                        required=True,
                        help="output file")
    args = parser.parse_args()

    with open(args.output, 'w', 0) as output:

        site = pywikibot.Site("fr", "wikipedia")
        repo = site.data_repository()

        # retrieve airport list
        query = pwq.HasClaim(31, items=[1248784])
        dat = pwq.WikidataQuery(cacheMaxAge=600).query(query)
        items = dat['items']
        print "Found %d items for query: %s\n" % (len(items), query)
        for i in items:
            item = item_from_id(repo, i)
            item.get()
            name = ''
            if 'enwiki' in item.sitelinks:
                name = item.sitelinks['enwiki']
            coordinate = fetch_property_values(item, 'P625')
            latitude = ''
            longitude = ''
            if coordinate is not None:
                latitude = coordinate.lat
                longitude = coordinate.lon
            airport = Airport(iata=fetch_property_values(item, 'P238'),
                              icao=fetch_property_values(item, 'P239'),
                              geo="%s, %s" % (latitude, longitude),
                              country=fetch_property_values(item, 'P17'),
                              en=name)
            output.write(repr(airport) + '\n')
Beispiel #7
0
    def most_missed_creators(self, cache_max_age=0):
        """Produce list of most frequent, but unlinked, creators.

        Query WDQ for all objects in the collection missing an artist
        then put together a top-list for most desired creator
        """
        expected_items = []
        query = u'CLAIM[195:%s] AND NOCLAIM[170]' % \
                ',195:'.join(self.collections)  # collection
        wd_queryset = wdquery.QuerySet(query)

        wd_query = wdquery.WikidataQuery(cacheMaxAge=cache_max_age)
        data = wd_query.query(wd_queryset)

        if data.get('status').get('error') == 'OK':
            expected_items = data.get('items')

        creator_dict = {}
        counter = 0
        for q_val in expected_items:
            q_item = self.wd.QtoItemPage(q_val)
            data = q_item.get()
            claims = data.get('claims')
            if u'P170' in claims:
                continue
            descr = data.get('descriptions').get('en')
            if descr and descr.startswith(u'painting by '):
                creator = descr[len(u'painting by '):]
                if '(' in creator:  # to get rid of disambiguation addition
                    creator = creator[:creator.find('(')].strip()
                if creator in creator_dict.keys():
                    creator_dict[creator] += 1
                else:
                    creator_dict[creator] = 1
                counter += 1
        pywikibot.output(u'Found %d mentions of %d creators' %
                         (counter, len(creator_dict)))
        # output
        f = codecs.open(u'creatorHitlist.csv', 'w', 'utf-8')
        for k, v in creator_dict.iteritems():
            f.write(u'%d|%s\n' % (v, k))
        f.close()
Beispiel #8
0
    def getVIAF(self, cacheMaxAge=0):
        '''
        Query Wikidata to fill the cache of monuments we already have an object for
        '''
        result = {}
        query = u'CLAIM[214]'
        
        wd_queryset = wdquery.QuerySet(query)

        wd_query = wdquery.WikidataQuery(cacheMaxAge=cacheMaxAge)
        data = wd_query.query(wd_queryset, props=[str(214),])

        if data.get('status').get('error')=='OK':
            expectedItems = data.get('status').get('items')
            props = data.get('props').get(str(214))
            for prop in props:
                # FIXME: This will overwrite id's that are used more than once.
                # Use with care and clean up your dataset first
                result[prop[2]] = u'Q%s' % (prop[0],)

            pywikibot.output('I expected %s items and now have %s items with VIAF in cache' % (expectedItems, len(result)))

        return result
Beispiel #9
0
# -*- coding: utf-8  -*-
import pywikibot
import pywikibot.data.wikidataquery as pwq
import json

site = pywikibot.Site("wikidata", "wikidata")
repo = site.data_repository()
#page = pywikibot.Page(site, u"Raimond de Prinhac")
#item = pywikibot.ItemPage.fromPage(page)
#dictionary = item.get()
#print dictionary

# define repo
site = pywikibot.Site("fr", "wikipedia")
repo = site.data_repository()

# retrieve airport list 106:2937507
q= pwq.HasClaim(106, items=[2937507])
dat = pwq.WikidataQuery(cacheMaxAge=600).query(q)
items = dat['items']
for i in items:
    item = pywikibot.ItemPage(repo, "Q%s" % i)
    item.get()
    print item.labels