Esempio n. 1
0
    def find_items(keywords):
        # example use:
        # keyword should be a list of terms
        # DPLA.find_items(keywords=['term1', 'term2'])

        api = Bibs()
        qry = 'api_key->%s:q->%s' % (
            DPLA.API_KEY,
            ' OR '.join(keywords)
        )

        #qry from unicode string to regular string
        qry = qry.encode("utf8", "ignore")
        logger.debug('dpla query: %s' % qry)

        # TODO: restrict to image only, or at least things with preview image
        start = time.time()
        results = api.search(qry, 'dplav2', 'items')
        # TODO: error handling...
        logger.info('dpla query completed in %.2f sec' % (time.time() - start))

        items = []
        for doc in results['docs']:
            src_res = doc['sourceResource']

            # for now, just skip items without an image url
            if not doc.get('object', None): continue

            i = DisplayItem(
                title=src_res.get('title', None),
                format=src_res.get('type', None),
                source=doc['provider'].get('name', None),
                # collection or provider here? src_rec['collection']['title']
                # NOTE: collection apparently not set for all items

                thumbnail=doc.get('object', None),
                # according to dpla docs, should be url preview for item
                # docs reference a field for object mimetype, not seeing in results

                # url on provider's website with context
                url=doc.get('isShownAt', None)
            )
            if 'date' in src_res:
                i.date = src_res['date'].get('displayDate', None)

            if 'spatial' in src_res and src_res['spatial']:
                # sometimes a list but not always
                if isinstance(src_res['spatial'], list):
                    space = src_res['spatial'][0]
                else:
                    space = src_res['spatial']
                # country? state? coords?
                i.location = space.get('name', None)

            # Add the aggregator for reference
            i.aggregator = DPLA.name

            items.append(i)

        return items
Esempio n. 2
0
    def find_items(keywords):
        # example use:
        # keyword should be a list of terms
        # DPLA.find_items(keywords=['term1', 'term2'])

        api = Bibs()
        qry = 'api_key->%s:q->%s' % (DPLA.API_KEY, ' OR '.join(keywords))

        #qry from unicode string to regular string
        qry = qry.encode("utf8", "ignore")
        logger.debug('dpla query: %s' % qry)

        # TODO: restrict to image only, or at least things with preview image
        start = time.time()
        results = api.search(qry, 'dplav2', 'items')
        # TODO: error handling...
        logger.info('dpla query completed in %.2f sec' % (time.time() - start))

        items = []
        for doc in results['docs']:
            src_res = doc['sourceResource']

            # for now, just skip items without an image url
            if not doc.get('object', None):
                continue

            # url on DPLA site
            item_url = '%sitem/%s' % (url, doc.get('id'))

            i = DisplayItem(
                title=src_res.get('title', None),
                format=src_res.get('type', None),
                source=doc['provider'].get('name', None),
                # collection or provider here? src_rec['collection']['title']
                # NOTE: collection apparently not set for all items
                thumbnail=doc.get('object', None),
                # according to dpla docs, should be url preview for item
                # docs reference a field for object mimetype, not seeing in results

                # url on DPLA site
                url=item_url)
            if 'date' in src_res:
                i.date = src_res['date'].get('displayDate', None)

            if 'spatial' in src_res and src_res['spatial']:
                # sometimes a list but not always
                if isinstance(src_res['spatial'], list):
                    space = src_res['spatial'][0]
                else:
                    space = src_res['spatial']
                # country? state? coords?
                i.location = space.get('name', None)

            # Add the aggregator for reference
            i.aggregator = DPLA.name

            items.append(i)

        return items
Esempio n. 3
0
    def find_items(keywords=[]):
        qry = 'wskey->%s:query->%s' % (
            Europeana.API_KEY,
            # ' OR '.join(['%s' % kw for kw in keywords])
            ' OR '.join(keywords))

        #qry from unicode string to regular string
        qry = qry.encode("utf8", "ignore")

        logger.debug('europeana query: %s' % qry)
        b = Bibs()
        results = b.search(qry, 'europeanav2', 'search')

        items = []
        # no results! log this error?
        if 'items' not in results:
            return items

        for doc in results['items']:
            # NOTE: result includes a 'completeness' score
            # which we could use for a first-pass filter to weed out junk records

            # for now, just skip items without an image url
            if not 'edmPreview' in doc or not doc['edmPreview']:
                continue

            i = DisplayItem(
                format=doc.get('type', None),
                source='; '.join(doc.get('dataProvider', [])),
                # NOTE: provider is aggregator (i.e., 'The European Library')
                # dataProvider is original source

                # url on provider's website with context
                url=doc.get('guid', None),
                date=doc.get('edmTimespanLabel', None))

            # NOTE: doc['link'] provides json with full record data
            # if we want more item details
            # should NOT be displayed to users (includes api key)

            # preview and title are both lists; for now, in both cases,
            # just grab the first one

            if 'edmTimespanLabel' in doc:
                i.date = doc['edmTimespanLabel'][0]['def']
            if 'title' in doc:
                i.title = doc['title'][0]
            if 'edmPreview' in doc:
                i.thumbnail = doc['edmPreview'][0]

            # Add the aggregator for reference
            i.aggregator = Europeana.name

            # NOTE: spatial/location information doesn't seem to be included
            # in this item result
            items.append(i)

        return items
Esempio n. 4
0
    def find_items(keywords=[]):
        qry = 'wskey->%s:query->%s' % (
            Europeana.API_KEY,
            # ' OR '.join(['%s' % kw for kw in keywords])
            ' OR '.join(keywords)
        )

        #qry from unicode string to regular string
        qry = qry.encode("utf8", "ignore")

        logger.debug('europeana query: %s' % qry)
        b = Bibs()
        results = b.search(qry, 'europeanav2', 'search')

        items = []
        # no results! log this error?
        if 'items' not in results:
            return items

        for doc in results['items']:
            # NOTE: result includes a 'completeness' score
            # which we could use for a first-pass filter to weed out junk records

            # for now, just skip items without an image url
            if not 'edmPreview' in doc or not doc['edmPreview']:
                continue

            i = DisplayItem(

                format=doc.get('type', None),
                source='; '.join(doc.get('dataProvider', [])),
                # NOTE: provider is aggregator (i.e., 'The European Library')
                # dataProvider is original source

                # url on provider's website with context
                url=doc.get('guid', None),
                date=doc.get('edmTimespanLabel', None)
            )

            # NOTE: doc['link'] provides json with full record data
            # if we want more item details
            # should NOT be displayed to users (includes api key)

            # preview and title are both lists; for now, in both cases,
            # just grab the first one

            if 'edmTimespanLabel' in doc:
                i.date = doc['edmTimespanLabel'][0]['def']
            if 'title' in doc:
                i.title = doc['title'][0]
            if 'edmPreview' in doc:
                i.thumbnail = doc['edmPreview'][0]

            # Add the aggregator for reference
            i.aggregator = Europeana.name

            # NOTE: spatial/location information doesn't seem to be included
            # in this item result
            items.append(i)

        return items