def find_items(keywords): # example use: # keyword should be a list of terms # DPLA.find_items(keywords=['term1', 'term2']) api = Bibs() qry = 'api_key->%s:q->%s' % ( DPLA.API_KEY, ' OR '.join(keywords) ) #qry from unicode string to regular string qry = qry.encode("utf8", "ignore") logger.debug('dpla query: %s' % qry) # TODO: restrict to image only, or at least things with preview image start = time.time() results = api.search(qry, 'dplav2', 'items') # TODO: error handling... logger.info('dpla query completed in %.2f sec' % (time.time() - start)) items = [] for doc in results['docs']: src_res = doc['sourceResource'] # for now, just skip items without an image url if not doc.get('object', None): continue i = DisplayItem( title=src_res.get('title', None), format=src_res.get('type', None), source=doc['provider'].get('name', None), # collection or provider here? src_rec['collection']['title'] # NOTE: collection apparently not set for all items thumbnail=doc.get('object', None), # according to dpla docs, should be url preview for item # docs reference a field for object mimetype, not seeing in results # url on provider's website with context url=doc.get('isShownAt', None) ) if 'date' in src_res: i.date = src_res['date'].get('displayDate', None) if 'spatial' in src_res and src_res['spatial']: # sometimes a list but not always if isinstance(src_res['spatial'], list): space = src_res['spatial'][0] else: space = src_res['spatial'] # country? state? coords? i.location = space.get('name', None) # Add the aggregator for reference i.aggregator = DPLA.name items.append(i) return items
def find_items(keywords): # example use: # keyword should be a list of terms # DPLA.find_items(keywords=['term1', 'term2']) api = Bibs() qry = 'api_key->%s:q->%s' % (DPLA.API_KEY, ' OR '.join(keywords)) #qry from unicode string to regular string qry = qry.encode("utf8", "ignore") logger.debug('dpla query: %s' % qry) # TODO: restrict to image only, or at least things with preview image start = time.time() results = api.search(qry, 'dplav2', 'items') # TODO: error handling... logger.info('dpla query completed in %.2f sec' % (time.time() - start)) items = [] for doc in results['docs']: src_res = doc['sourceResource'] # for now, just skip items without an image url if not doc.get('object', None): continue # url on DPLA site item_url = '%sitem/%s' % (url, doc.get('id')) i = DisplayItem( title=src_res.get('title', None), format=src_res.get('type', None), source=doc['provider'].get('name', None), # collection or provider here? src_rec['collection']['title'] # NOTE: collection apparently not set for all items thumbnail=doc.get('object', None), # according to dpla docs, should be url preview for item # docs reference a field for object mimetype, not seeing in results # url on DPLA site url=item_url) if 'date' in src_res: i.date = src_res['date'].get('displayDate', None) if 'spatial' in src_res and src_res['spatial']: # sometimes a list but not always if isinstance(src_res['spatial'], list): space = src_res['spatial'][0] else: space = src_res['spatial'] # country? state? coords? i.location = space.get('name', None) # Add the aggregator for reference i.aggregator = DPLA.name items.append(i) return items
def find_items(keywords=[]): qry = 'wskey->%s:query->%s' % ( Europeana.API_KEY, # ' OR '.join(['%s' % kw for kw in keywords]) ' OR '.join(keywords)) #qry from unicode string to regular string qry = qry.encode("utf8", "ignore") logger.debug('europeana query: %s' % qry) b = Bibs() results = b.search(qry, 'europeanav2', 'search') items = [] # no results! log this error? if 'items' not in results: return items for doc in results['items']: # NOTE: result includes a 'completeness' score # which we could use for a first-pass filter to weed out junk records # for now, just skip items without an image url if not 'edmPreview' in doc or not doc['edmPreview']: continue i = DisplayItem( format=doc.get('type', None), source='; '.join(doc.get('dataProvider', [])), # NOTE: provider is aggregator (i.e., 'The European Library') # dataProvider is original source # url on provider's website with context url=doc.get('guid', None), date=doc.get('edmTimespanLabel', None)) # NOTE: doc['link'] provides json with full record data # if we want more item details # should NOT be displayed to users (includes api key) # preview and title are both lists; for now, in both cases, # just grab the first one if 'edmTimespanLabel' in doc: i.date = doc['edmTimespanLabel'][0]['def'] if 'title' in doc: i.title = doc['title'][0] if 'edmPreview' in doc: i.thumbnail = doc['edmPreview'][0] # Add the aggregator for reference i.aggregator = Europeana.name # NOTE: spatial/location information doesn't seem to be included # in this item result items.append(i) return items
def find_items(keywords=[]): qry = 'wskey->%s:query->%s' % ( Europeana.API_KEY, # ' OR '.join(['%s' % kw for kw in keywords]) ' OR '.join(keywords) ) #qry from unicode string to regular string qry = qry.encode("utf8", "ignore") logger.debug('europeana query: %s' % qry) b = Bibs() results = b.search(qry, 'europeanav2', 'search') items = [] # no results! log this error? if 'items' not in results: return items for doc in results['items']: # NOTE: result includes a 'completeness' score # which we could use for a first-pass filter to weed out junk records # for now, just skip items without an image url if not 'edmPreview' in doc or not doc['edmPreview']: continue i = DisplayItem( format=doc.get('type', None), source='; '.join(doc.get('dataProvider', [])), # NOTE: provider is aggregator (i.e., 'The European Library') # dataProvider is original source # url on provider's website with context url=doc.get('guid', None), date=doc.get('edmTimespanLabel', None) ) # NOTE: doc['link'] provides json with full record data # if we want more item details # should NOT be displayed to users (includes api key) # preview and title are both lists; for now, in both cases, # just grab the first one if 'edmTimespanLabel' in doc: i.date = doc['edmTimespanLabel'][0]['def'] if 'title' in doc: i.title = doc['title'][0] if 'edmPreview' in doc: i.thumbnail = doc['edmPreview'][0] # Add the aggregator for reference i.aggregator = Europeana.name # NOTE: spatial/location information doesn't seem to be included # in this item result items.append(i) return items