def find_items(keywords=[]): qry = 'wskey->%s:query->%s' % ( Europeana.API_KEY, # ' OR '.join(['%s' % kw for kw in keywords]) ' OR '.join(keywords)) #qry from unicode string to regular string qry = qry.encode("utf8", "ignore") logger.debug('europeana query: %s' % qry) b = Bibs() results = b.search(qry, 'europeanav2', 'search') items = [] # no results! log this error? if 'items' not in results: return items for doc in results['items']: # NOTE: result includes a 'completeness' score # which we could use for a first-pass filter to weed out junk records # for now, just skip items without an image url if not 'edmPreview' in doc or not doc['edmPreview']: continue i = DisplayItem( format=doc.get('type', None), source='; '.join(doc.get('dataProvider', [])), # NOTE: provider is aggregator (i.e., 'The European Library') # dataProvider is original source # url on provider's website with context url=doc.get('guid', None), date=doc.get('edmTimespanLabel', None)) # NOTE: doc['link'] provides json with full record data # if we want more item details # should NOT be displayed to users (includes api key) # preview and title are both lists; for now, in both cases, # just grab the first one if 'edmTimespanLabel' in doc: i.date = doc['edmTimespanLabel'][0]['def'] if 'title' in doc: i.title = doc['title'][0] if 'edmPreview' in doc: i.thumbnail = doc['edmPreview'][0] # Add the aggregator for reference i.aggregator = Europeana.name # NOTE: spatial/location information doesn't seem to be included # in this item result items.append(i) return items
def find_items(keywords): flickr = flickrapi.FlickrAPI(Flickr.API_KEY) # photos = flickr.photos_search(user_id='73509078@N00', per_page='10') start = time.time() # NOTE: flickr does support or, but doesn't like too many terms at once # (15 terms is apparently too many) query = ' OR '.join(set(keywords[:10])) logger.debug('flickr query: %s' % query) results = flickr.photos_search(text=query, format='json', is_commons='true', extras='owner_name', sort='relevance', per_page=15) # restrict to first 15 items (only ~10 for other apis currently) # comma-delimited list of extra fields # need owner name for source # TODO: future enhancement: access to date, location info, etc # extras='owner_name,date_upload,date_taken,geo') logger.info('flickr query completed in %.2f sec' % (time.time() - start)) # this is really stupid and should be uncessary but the 'jsonFlickrApi( )' needs to be stripped for the json to parse properly results = results.lstrip('jsonFlickrApi(') results = results.rstrip(')') results = simplejson.loads(results) # import pprint # pprint.pprint(results) items = [] # no results! log this error? # NOTE: could be bad api key; check code/stat in response if not 'photos' in results or 'photo' not in results['photos']: return items for doc in results['photos']['photo']: # NOTE: result includes a 'completeness' score # which we could use for a first-pass filter to weed out junk records i = DisplayItem( format=doc.get('type', None), source=doc.get('ownername', None), # url on provider's website with context # http://www.flickr.com/photos/{user-id}/{photo-id} url='http://www.flickr.com/photos/%(owner)s/%(id)s/' % (doc) # TODO get date data # date=doc.get('edmTimespanLabel', None) ) # NOTE: doc['link'] provides json with full record data # if we want more item details # should NOT be displayed to users (includes api key) # flickr title not a list if 'title' in doc: i.title = doc['title'] # build the url back to the image # http://farm{farm-id}.staticflickr.com/{server-id}/{id}_{secret}.jpg i.thumbnail = 'http://farm%(farm)s.staticflickr.com/%(server)s/%(id)s_%(secret)s_m.jpg' % doc # i.thumbnail = 'http://farm'+str(doc['farm'])+'.staticflickr.com/'+str(doc['server'])+'/'+str(doc['id'])+'_'+str(doc['secret'])+'.jpg' # Add the aggregator for reference i.aggregator = 'Flickr Commons' # NOTE: spatial/location information doesn't seem to be included # in this item result items.append(i) return items
def find_items(keywords=[]): qry = 'wskey->%s:query->%s' % ( Europeana.API_KEY, # ' OR '.join(['%s' % kw for kw in keywords]) ' OR '.join(keywords) ) #qry from unicode string to regular string qry = qry.encode("utf8", "ignore") logger.debug('europeana query: %s' % qry) b = Bibs() results = b.search(qry, 'europeanav2', 'search') items = [] # no results! log this error? if 'items' not in results: return items for doc in results['items']: # NOTE: result includes a 'completeness' score # which we could use for a first-pass filter to weed out junk records # for now, just skip items without an image url if not 'edmPreview' in doc or not doc['edmPreview']: continue i = DisplayItem( format=doc.get('type', None), source='; '.join(doc.get('dataProvider', [])), # NOTE: provider is aggregator (i.e., 'The European Library') # dataProvider is original source # url on provider's website with context url=doc.get('guid', None), date=doc.get('edmTimespanLabel', None) ) # NOTE: doc['link'] provides json with full record data # if we want more item details # should NOT be displayed to users (includes api key) # preview and title are both lists; for now, in both cases, # just grab the first one if 'edmTimespanLabel' in doc: i.date = doc['edmTimespanLabel'][0]['def'] if 'title' in doc: i.title = doc['title'][0] if 'edmPreview' in doc: i.thumbnail = doc['edmPreview'][0] # Add the aggregator for reference i.aggregator = Europeana.name # NOTE: spatial/location information doesn't seem to be included # in this item result items.append(i) return items