def parse_whoosh_trec(site, query, results): response = Response() response.version = 'trec' response.feed.setdefault('title', "{0}: {1}".format(site, query)) response.feed.setdefault('link','') response.feed.setdefault('description',"Search results for '{0}' at {1}".format(query, site)) response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/") response.feed.setdefault("opensearch_totalresults", results.pagecount ) response.feed.setdefault("opensearch_itemsperpage", pagelen) response.feed.setdefault("opensearch_startindex", results.pagenum) response.feed.setdefault('query', query) try: r = 0 if len(results)>1: for hit in results: r = r + 1 title = hit["title"] title = title.strip() if len(title) < 1: title = query rank = ((int(results.pagenum)-1) * results.pagelen) + r link = "/treconomics/" + str(hit.docnum) + "?rank="+str(rank) desc = hit.highlights("content") docid = hit["docid"] docid = docid.strip() source = hit["source"] response.entries.append({'title': title, 'link': link, 'summary': desc, 'docid': docid ,'source': source}) else: print "No hits found for query: " + query except Exception, e: print "Converting results to OpenSearch Failed"
def parse_whoosh_trec(site, query, results): response = Response() response.version = 'trec' response.feed.setdefault('title', "{0}: {1}".format(site, query)) response.feed.setdefault('link','') response.feed.setdefault('description',"Search results for '{0}' at {1}".format(query, site)) response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/") response.feed.setdefault("opensearch_totalresults", len(results) ) response.feed.setdefault("opensearch_itemsperpage", len(results)) response.feed.setdefault("opensearch_startindex", 1) response.feed.setdefault('query', query) try: if len(results)>1: resultNum = 1 for hit in results: if resultNum > self.resultsPerPage: break title = hit["title"] link = "?query=" + title.replace(' ','+') desc = hit.highlights("content") response.entries.append({'title': title, 'link': link, 'summary': desc }) resultNum += 1 else: print "No hits found for query: " + query except Exception, e: print "Converting results to OpenSearch Failed"
def parse_whoosh_trec(site, query, results): response = Response() response.version = 'trec' response.feed.setdefault('title', "{0}: {1}".format(site, query)) response.feed.setdefault('link', '') response.feed.setdefault( 'description', "Search results for '{0}' at {1}".format(query, site)) response.namespaces.setdefault( "opensearch", "http://a9.com/-/spec/opensearch/1.1/") response.feed.setdefault("opensearch_totalresults", len(results)) response.feed.setdefault("opensearch_itemsperpage", len(results)) response.feed.setdefault("opensearch_startindex", 1) response.feed.setdefault('query', query) try: if len(results) > 1: resultNum = 1 for hit in results: if resultNum > self.resultsPerPage: break title = hit["title"] link = "?query=" + title.replace(' ', '+') desc = hit.highlights("content") response.entries.append({ 'title': title, 'link': link, 'summary': desc }) resultNum += 1 else: print "No hits found for query: " + query except Exception, e: print "Converting results to OpenSearch Failed"
def create_echo_response(query, offset): """Create a Response from the query. The response repeats the terms of the query - only useful for debugging purposes. Parameters: * query (str): query search terms (n.b. not a OpenSearch Query object) * offset (int): result offset Returns: * results (puppy.model.Response) """ response = Response() response.version = 'test' response.feed.setdefault('title', "EchoSearch") response.feed.setdefault('link', "www.puppyIR.eu") response.feed.setdefault('description', "Search engine for testing purposes") response.feed.setdefault('query', query.search_terms) query_list = query.search_terms.split() for term in query_list: response.entries.append({ 'title': term, 'link': "http://www."+term+".com", 'summary': term }) return response
def parse_yahoo_json(self, site, query, results): """Create a OpenSearch Response from Solr/Lucene results. We choose to ask for results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant. Parameters: * site (str): search engine name * query (str): query search terms (n.b. not a OpenSearch Query object) * results (dict): results from service Returns: * OpenSearch.Response """ response = Response() response.version = 'json' response.feed.setdefault('title', "{0}: {1}".format(site, query)) response.feed.setdefault('link', results['link']) response.feed.setdefault('description', "Search results for '{0}' at {1}".format(query, site)) response.feed.setdefault('total_results', results['numFound']) response.feed.setdefault('start', results['start']) for result in results['docs']: response.entries.append({"title": result['title'][0], "link":result['attr_stream_name'][0], "summary":result['attr_content'][0]}) return response
def parse_google_json(site, url, query, num_results, results): """Create a OpenSearch Response from Google AJAX Search results. Google's search API returns results in JSON format. This function loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant. Parameters: * site (str): search engine name * url (str): search url used * query (str): query search terms (n.b. not a OpenSearch Query object) * num_results (int): number of desired results * results (dict): results from service Returns: * results (puppy.model.Response) """ response = Response() response.version = 'json' response.feed.setdefault('title', "{0}: {1}".format(site, query)) response.feed.setdefault('link',results['cursor']['moreResultsUrl']) response.feed.setdefault('description',"Search results for '{0}' at {1}".format(query, site)) try: response.feed.setdefault('opensearch_totalresults',results['cursor']['estimatedResultCount']) response.feed.setdefault('opensearch_startindex', results['cursor']['currentPageIndex']) except KeyError: response.feed.setdefault('opensearch_totalresults',0) response.feed.setdefault('opensearch_startindex', 0) for result in results['results']: response.entries.append( { 'title':result['title'],'link':result['url'], 'summary':result['content'] }) response.feed.setdefault('opensearch_itemsperpage',len(response.entries)) return response
def parse_google_books_json(site, url, pos, query, results): """Create a OpenSearch Response from Google Books results. Google Books's search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant. Parameters: * site (str): search engine name * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response * pos (int): what page number we are on * query (str): query search terms (n.b. not a OpenSearch Query object) * results (dict): results from service Returns: * puppy.model.OpenSearch.Response """ response = Response() response.version = 'json' response.feed.setdefault('title', "{0}: {1}".format(site, query)) response.feed.setdefault('link', url) response.feed.setdefault( 'description', "Search results for '{0}' at {1}".format(query, site)) response.namespaces.setdefault( "opensearch", "http://a9.com/-/spec/opensearch/1.1/") try: response.feed.setdefault("opensearch_totalresults", int(results['totalItems'])) response.feed.setdefault("opensearch_itemsperpage", self.resultsPerPage) response.feed.setdefault("opensearch_startindex", pos) except KeyError: response.feed.setdefault("opensearch_totalresults", 0) response.feed.setdefault("opensearch_itemsperpage", 0) response.feed.setdefault("opensearch_startindex", 0) for result in results['items']: try: book_dict = result book_dict['title'] = result['volumeInfo']['title'] if 'subtitle' in result['volumeInfo']: book_dict['title'] += " {0}".format( result['volumeInfo']['subtitle']) book_dict['link'] = result['selfLink'] if 'description' in result: book_dict['summary'] = result['description'] else: book_dict[ 'summary'] = '' # If there's in no description it's up to the app developer to make use of the other data response.entries.append(book_dict) except Exception, e: # If there is a parsing problem, print out an error and just skip this individual result print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format( e, url) continue
def parse_bing_xml_response(site, query, results, numResults=10, offset=0): xmlSoup = BeautifulSoup(results) response = Response() response.version = 'xml' response.feed.setdefault('title', "{0}: {1}".format(site, query)) response.feed.setdefault('description', "Search results for {0} at {1}".format(query, site)) response.feed.setdefault('link', '') response.namespaces.setdefault('opensearch', 'http://a9.com/-/spec/opensearch/1.1/') resultCount = 0 resultsRetrieved = 0 for r in xmlSoup.findAll('entry'): if (resultCount >= offset) and (resultCount < (numResults+offset)): xmlTitleData = r.find('d:title').string xmlURLData = r.find('d:url').string xmlDescriptionData = r.find('d:description').string response.entries.append({'title': xmlTitleData, 'link': xmlURLData, 'summary': xmlDescriptionData}) resultsRetrieved += 1 resultCount += 1 response.feed.setdefault('opensearch_totalresults', resultCount) response.feed.setdefault('opensearch_startindex', offset) response.feed.setdefault('opensearch_itemsperpage', resultsRetrieved) return response
def parse_whoosh_trec(site, query, results): response = Response() response.version = 'trec' response.feed.setdefault('title', "{0}: {1}".format(site, query)) response.feed.setdefault('link','') response.feed.setdefault('description',"Search results for '{0}' at {1}".format(query, site)) response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/") response.feed.setdefault("opensearch_totalresults", len(results) ) response.feed.setdefault("opensearch_itemsperpage", len(results)) response.feed.setdefault("opensearch_startindex", 1) response.feed.setdefault('query', query) try: duplicates=set() buff="" if len(results)>1: resultNum = 1 for hit in results: if resultNum > self.resultsPerPage: break desc = hit.highlights("content") desc = desc.split("\t")[0] if desc not in duplicates and query.lower() != desc.lower() and desc !=(buff+"?"): response.entries.append({'title': desc, 'link': '', 'summary': desc }) resultNum += 1 duplicates.add(desc) buff =desc else: print "No hits found for query: " + query except Exception, e: print "Converting results to OpenSearch Failed"
def parse_bing_json(site, url, query, results, sources, pos): """Create a OpenSearch Response from Bing V2 results. Bing's search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant. Parameters: * site (str): search engine name * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response * query (str): query search terms (n.b. not a OpenSearch Query object) * results (dict): results from service * sources (array): all the sources we are currently using i.e. Web and News or just Web * pos(int): what page we are starting on Returns: * puppy.model.OpenSearch.Response """ response = Response() response.version = 'json' response.feed.setdefault('title', "{0}: {1}".format(site, query)) response.feed.setdefault('link', url) response.feed.setdefault( 'description', "Search results for '{0}' at {1}".format(query, site)) response.namespaces.setdefault( "opensearch", "http://a9.com/-/spec/opensearch/1.1/") try: response.feed.setdefault("opensearch_totalresults", int(results[self.source]['Total'])) response.feed.setdefault("opensearch_itemsperpage", self.resultsPerPage) response.feed.setdefault("opensearch_startindex", pos) except KeyError: response.feed.setdefault("opensearch_totalresults", 0) response.feed.setdefault("opensearch_itemsperpage", 0) response.feed.setdefault("opensearch_startindex", 0) for sourceType in sources: # Go through every source type selected, parse its results and store them. if (sourceType == 'Web') and ('Results' in results['Web']): response = parseWebJson(response, results, url) elif (sourceType == 'News') and ('News' in results): response = parseNewsJson(response, results, url) elif (sourceType == 'Image') and ('Results' in results['Image']): response = parseImageJson(response, results, url) elif (sourceType == 'Video') and ('Results' in results['Video']): response = parseVideoJson(response, results, url) elif (sourceType == 'Spell') and ('Spell' in results): response = parseSpellJson(response, results, query, url) elif (sourceType == 'RelatedSearch') and ('RelatedSearch' in results): response = parseRelatedSearchJson(response, results, query, url) return response
def parse_soundcloud_json(site, query, results, url, offset): """ SoundCloud's search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant. Parameters: * site (str): search engine name * query (str): query search terms (n.b. not a OpenSearch Query object) * results (dict): results from service * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response * offset (int): which page of results we are retrieving Returns: * puppy.model.OpenSearch.Response """ response = Response() response.version = 'json' response.feed.setdefault('title', "{0}: {1}".format(site, query)) response.feed.setdefault('link', url) response.feed.setdefault( 'description', "Search results for '{0}' at {1}".format(query, site)) response.namespaces.setdefault( "opensearch", "http://a9.com/-/spec/opensearch/1.1/") try: response.feed.setdefault("opensearch_itemsperpage", self.resultsPerPage) response.feed.setdefault("opensearch_totalresults", int(len(results))) response.feed.setdefault("opensearch_startindex", 0) except KeyError: response.feed.setdefault("opensearch_totalresults", 0) response.feed.setdefault("opensearch_itemsperpage", 0) response.feed.setdefault("opensearch_startindex", 0) # There is no pagination as a parameter, all results are simple returned in one, so this mimics pagination startIndex = offset * self.resultsPerPage if (startIndex + self.resultsPerPage) > len(results): endIndex = len(results) else: endIndex = startIndex + self.resultsPerPage # Go through a subset of the results and grab them - corresponding to the page in question for i in range(startIndex, endIndex): try: result_dict = results[i] result_dict['summary'] = results[i]['description'] result_dict['link'] = results[i]['permalink_url'] result_dict['artist'] = results[i]['user']['username'] response.entries.append(result_dict) except Exception, e: print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format( e, url) continue
def parse_wiki_xml(url, query, results): """ Simple Wikipedias's search API returns results in XML format. This function simply loads the XML into memory and creates an equivalent representation that is OpenSearch compliant. Parameters: * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response * query (str): query search terms (n.b. not a OpenSearch Query object) * results (dict): results from service Returns: * puppy.model.OpenSearch.Response """ response = Response() response.feed.setdefault("title", "Simple Wikipedia Search") response.feed.setdefault('link', url) response.feed.setdefault( "description", "Simple Wikipedia Search Suggestions for: {0}".format(query)) response.namespaces.setdefault( "searchsuggest", "{http://opensearch.org/searchsuggest2}") response.version = 'xml' root = etree.XML(results) ns = response.namespaces["searchsuggest"] section = root.find("{0}Section".format(ns)) items = section.findall("{0}Item".format(ns)) for item in items: try: title = item.find("{0}Text".format(ns)).text summary = item.find("{0}Description".format(ns)).text link = item.find("{0}Url".format(ns)).text image = item.find("{0}Image".format(ns)) thumbnail = image.get( "source") if image is not None else "" image_fullsize = "" if thumbnail is not "": image_fullsize = thumbnail.replace( "thumb/", "").rpartition('/')[0] response.entries.append({ 'title': title, 'summary': summary, 'link': link, 'thumbnail': thumbnail, 'image': image_fullsize }) except etree.XMLSyntaxError, e: print "Skipping a result due to an lxml syntax error.\nWhen parsing a result from: {0}\n" ( url) continue except etree.LxmlError, e: print "Skipping a result due to an lxml error: {0} \nWhen parsing a result from: {1}\n" ( e, url) continue
def parse_geocode_json(site, url, query, results): """Create a OpenSearch Response from Google Geoode results results. Google's Geocode search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant. Parameters: * site (str): search engine name * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response * query (str): query search terms (n.b. not a OpenSearch Query object) * results (dict): results from service Returns: * puppy.model.OpenSearch.Response """ response = Response() response.version = 'json' response.feed.setdefault('title', "{0}: {1}".format(site, query)) response.feed.setdefault('link', url) response.feed.setdefault('description', "Search results for '{0}' at {1}".format(query, site)) response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/") response.feed.setdefault("opensearch_startindex", 0) for result in results: try: resultDict ={} resultDict['title'] = result['formatted_address'] longTitle = '' for component in result['address_components']: longTitle += (component['long_name'] + ', ') resultDict['link'] = '' resultDict['longTitle'] = longTitle[:len(longTitle)-2] resultDict['lat'] = result['geometry']['location']['lat'] resultDict['lon'] = result['geometry']['location']['lng'] if 'bounds' in result['geometry']: resultDict['neBorderLat'] = result['geometry']['bounds']['northeast']['lat'] resultDict['neBorderLon'] = result['geometry']['bounds']['northeast']['lng'] resultDict['swBorderLat'] = result['geometry']['bounds']['southwest']['lat'] resultDict['swBorderLon'] = result['geometry']['bounds']['southwest']['lng'] resultDict['distanceAcross'] = self.calcDistance(resultDict['neBorderLat'], resultDict['swBorderLat'], resultDict['neBorderLon'], resultDict['swBorderLon']) resultDict['summary'] = "{0} is found at: Latitude: {1}, Longitude: {2}. The area it covers is {3}km across (between the NE and SW corners).".format(resultDict['title'], resultDict['lat'], resultDict['lon'], resultDict['distanceAcross']) else: resultDict['summary'] = "{0} is found at: Latitude: {1}, Longitude: {2}.".format(resultDict['title'], resultDict['lat'], resultDict['lon']) response.entries.append(resultDict) # If there is an arithmetic error pass on the result but note it for the user and the result in question except ArithmeticError, e: note = "Arithmetic Error occured when calculating the distance across for a result." print "An {0}\nResult: {1}\n\n".format(note, result) continue except Exception, e: print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format(e, url) continue
def parse_guardian_json(site, url, query, results): """Create a OpenSearch Response from Guardian results. Guardians's search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant. Parameters: * site (str): search engine name * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response * query (str): query search terms (n.b. not a OpenSearch Query object) * results (dict): results from service Returns: * puppy.model.OpenSearch.Response """ response = Response() response.version = 'json' response.feed.setdefault('title', "{0}: {1}".format(site, query)) response.feed.setdefault('link', url) response.feed.setdefault( 'description', "Search results for '{0}' at {1}".format(query, site)) response.namespaces.setdefault( "opensearch", "http://a9.com/-/spec/opensearch/1.1/") try: response.feed.setdefault("opensearch_totalresults", int(results['total'])) response.feed.setdefault("opensearch_itemsperpage", int(results['pageSize'])) response.feed.setdefault("opensearch_startindex", int(results['startIndex'])) except KeyError: response.feed.setdefault("opensearch_totalresults", 0) response.feed.setdefault("opensearch_itemsperpage", 0) response.feed.setdefault("opensearch_startindex", 0) for result in results['results']: try: response.entries.append({ 'title': result['webTitle'], 'link': result['webUrl'], 'summary': result['fields']['standfirst'] }) except Exception, e: # If there is a parsing problem, print out an error and just skip this individual result print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format( e, url) continue
def parse_google_books_json(site, url, pos, query, results): """Create a OpenSearch Response from Google Books results. Google Books's search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant. Parameters: * site (str): search engine name * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response * pos (int): what page number we are on * query (str): query search terms (n.b. not a OpenSearch Query object) * results (dict): results from service Returns: * puppy.model.OpenSearch.Response """ response = Response() response.version = 'json' response.feed.setdefault('title', "{0}: {1}".format(site, query)) response.feed.setdefault('link', url) response.feed.setdefault('description', "Search results for '{0}' at {1}".format(query, site)) response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/") try: response.feed.setdefault("opensearch_totalresults", int(results['totalItems'])) response.feed.setdefault("opensearch_itemsperpage", self.resultsPerPage) response.feed.setdefault("opensearch_startindex", pos) except KeyError: response.feed.setdefault("opensearch_totalresults", 0) response.feed.setdefault("opensearch_itemsperpage", 0) response.feed.setdefault("opensearch_startindex", 0) for result in results['items']: try: book_dict = result book_dict['title'] = result['volumeInfo']['title'] if 'subtitle' in result['volumeInfo']: book_dict['title'] += " {0}".format(result['volumeInfo']['subtitle']) book_dict['link'] = result['selfLink'] if 'description' in result: book_dict['summary'] = result['description'] else: book_dict['summary'] = '' # If there's in no description it's up to the app developer to make use of the other data response.entries.append(book_dict) except Exception, e: # If there is a parsing problem, print out an error and just skip this individual result print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format(e, url) continue
def parse_soundcloud_json(site, query, results, url, offset): """ SoundCloud's search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant. Parameters: * site (str): search engine name * query (str): query search terms (n.b. not a OpenSearch Query object) * results (dict): results from service * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response * offset (int): which page of results we are retrieving Returns: * puppy.model.OpenSearch.Response """ response = Response() response.version = 'json' response.feed.setdefault('title', "{0}: {1}".format(site, query)) response.feed.setdefault('link', url) response.feed.setdefault('description', "Search results for '{0}' at {1}".format(query, site)) response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/") try: response.feed.setdefault("opensearch_itemsperpage", self.resultsPerPage) response.feed.setdefault("opensearch_totalresults", int(len(results))) response.feed.setdefault("opensearch_startindex", 0) except KeyError: response.feed.setdefault("opensearch_totalresults", 0) response.feed.setdefault("opensearch_itemsperpage", 0) response.feed.setdefault("opensearch_startindex", 0) # There is no pagination as a parameter, all results are simple returned in one, so this mimics pagination startIndex = offset * self.resultsPerPage if (startIndex + self.resultsPerPage) > len(results): endIndex = len(results) else: endIndex = startIndex + self.resultsPerPage # Go through a subset of the results and grab them - corresponding to the page in question for i in range(startIndex, endIndex): try: result_dict = results[i] result_dict['summary'] = results[i]['description'] result_dict['link'] = results[i]['permalink_url'] result_dict['artist'] = results[i]['user']['username'] response.entries.append(result_dict) except Exception, e: print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format(e, url) continue
def parse_bing_json(site, url, query, results, sources, pos): """Create a OpenSearch Response from Bing V2 results. Bing's search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant. Parameters: * site (str): search engine name * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response * query (str): query search terms (n.b. not a OpenSearch Query object) * results (dict): results from service * sources (array): all the sources we are currently using i.e. Web and News or just Web * pos(int): what page we are starting on Returns: * puppy.model.OpenSearch.Response """ response = Response() response.version = 'json' response.feed.setdefault('title', "{0}: {1}".format(site, query)) response.feed.setdefault('link', url) response.feed.setdefault('description', "Search results for '{0}' at {1}".format(query, site)) response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/") try: response.feed.setdefault("opensearch_totalresults", int(results[self.source]['Total'])) response.feed.setdefault("opensearch_itemsperpage", self.resultsPerPage) response.feed.setdefault("opensearch_startindex", pos ) except KeyError: response.feed.setdefault("opensearch_totalresults", 0) response.feed.setdefault("opensearch_itemsperpage", 0) response.feed.setdefault("opensearch_startindex", 0) for sourceType in sources: # Go through every source type selected, parse its results and store them. if (sourceType == 'Web') and ('Results' in results['Web']): response = parseWebJson(response, results, url) elif (sourceType == 'News') and ('News' in results): response = parseNewsJson(response, results, url) elif (sourceType == 'Image') and ('Results' in results['Image']): response = parseImageJson(response, results, url) elif (sourceType == 'Video') and ('Results' in results['Video']): response = parseVideoJson(response, results, url) elif (sourceType == 'Spell') and ('Spell' in results): response = parseSpellJson(response, results, query, url) elif (sourceType == 'RelatedSearch') and ('RelatedSearch' in results): response = parseRelatedSearchJson(response, results, query, url) return response
def parse_web_spell_checker_xml(site, url, query, results): """ Web Spell Checker's search API returns results in XML format. This function simply loads the XML into memory and creates an equivalent representation that is OpenSearch compliant. Parameters: * site (str): search engine name * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response * query (str): query search terms (n.b. not a OpenSearch Query object) * results (dict): results from service Returns: * puppy.model.OpenSearch.Response """ response = Response() response.version = 'xml' response.feed.setdefault('title', "{0}: {1}".format(site, query)) response.feed.setdefault('link', url) response.feed.setdefault( 'description', "Search results for '{0}' at {1}".format(query, site)) response.namespaces.setdefault( "opensearch", "http://a9.com/-/spec/opensearch/1.1/") response.feed.setdefault("opensearch_itemsperpage", '') response.feed.setdefault("opensearch_startindex", 0) root = etree.XML(results) section = root.find("misspelling") suggestions = section.find("suggestions") for item in suggestions: try: suggestion = item.text spell_dict = { "title": "Spelling Suggestion for: '{0}'".format(query), "link": '' } spell_dict[ 'summary'] = "Original query: '{0}'. Suggested correction of query: '{1}'.".format( query, suggestion) spell_dict['suggestion'] = suggestion response.entries.append(spell_dict) except Exception, e: print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format( e, url) continue
def parse_digg_json(site, url, pos, query, results): """Create a OpenSearch Response from Digg results. Digg's search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant. Parameters: * site (str): search engine name * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response * pos(int): which page number we're on * query (str): query search terms (n.b. not a OpenSearch Query object) * results (dict): results from service Returns: * puppy.model.OpenSearch.Response """ response = Response() response.version = 'json' response.feed.setdefault('title', "{0}: {1}".format(site, query)) response.feed.setdefault('link', url) response.feed.setdefault( 'description', "Search results for '{0}' at {1}".format(query, site)) response.namespaces.setdefault( "opensearch", "http://a9.com/-/spec/opensearch/1.1/") try: response.feed.setdefault("opensearch_totalresults", results['total']) response.feed.setdefault("opensearch_itemsperpage", self.resultsPerPage) response.feed.setdefault("opensearch_startindex", pos) except KeyError: response.feed.setdefault("opensearch_totalresults", 0) response.feed.setdefault("opensearch_itemsperpage", 0) response.feed.setdefault("opensearch_startindex", 0) for result in results['stories']: try: item_dict = result # See Digg for the result format for all the other data item_dict['summary'] = result['description'] item_dict['link'] = result['href'] response.entries.append(item_dict) except Exception, e: # If there is a parsing problem, print out an error and just skip this individual result print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format( e, url) continue
def parse_google_json(site, url, query, num_results, results): """Create a OpenSearch Response from Google AJAX Search results. Google's search API returns results in JSON format. This function loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant. Parameters: * site (str): search engine name * url (str): search url used * query (str): query search terms (n.b. not a OpenSearch Query object) * num_results (int): number of desired results * results (dict): results from service Returns: * results (puppy.model.Response) """ response = Response() response.version = 'json' response.feed.setdefault('title', "{0}: {1}".format(site, query)) response.feed.setdefault('link', results['cursor']['moreResultsUrl']) response.feed.setdefault( 'description', "Search results for '{0}' at {1}".format(query, site)) try: response.feed.setdefault( 'opensearch_totalresults', results['cursor']['estimatedResultCount']) response.feed.setdefault('opensearch_startindex', results['cursor']['currentPageIndex']) except KeyError: response.feed.setdefault('opensearch_totalresults', 0) response.feed.setdefault('opensearch_startindex', 0) for result in results['results']: response.entries.append({ 'title': result['title'], 'link': result['url'], 'summary': result['content'] }) response.feed.setdefault('opensearch_itemsperpage', len(response.entries)) return response
def parse_spotify_json(site, url, query, results): """ Spotify's search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant. Parameters: * site (str): search engine name * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response * query (str): query search terms (n.b. not a OpenSearch Query object) * results (dict): results from service Returns: * puppy.model.OpenSearch.Response """ response = Response() response.version = 'json' response.feed.setdefault('title', "{0}: {1}".format(site, query)) response.feed.setdefault('link', url) response.feed.setdefault( 'description', "Search results for '{0}' at {1}".format(query, site)) response.namespaces.setdefault( "opensearch", "http://a9.com/-/spec/opensearch/1.1/") try: response.feed.setdefault("opensearch_totalresults", int(results['info']['num_results'])) response.feed.setdefault("opensearch_itemsperpage", int(results['info']['limit'])) response.feed.setdefault("opensearch_startindex", int(results['info']['page'])) except KeyError: response.feed.setdefault("opensearch_totalresults", 0) response.feed.setdefault("opensearch_itemsperpage", 0) response.feed.setdefault("opensearch_startindex", 0) if (self.source == 'tracks') and ('tracks' in results): response = parse_tracks_json(response, results, url) elif (self.source == 'albums') and ('albums' in results): response = parse_albums_json(response, results, url) elif (self.source == 'artists') and ('artists' in results): response = parse_artists_json(response, results, url) return response
def parse_wiki_xml(url, query, results): """ Simple Wikipedias's search API returns results in XML format. This function simply loads the XML into memory and creates an equivalent representation that is OpenSearch compliant. Parameters: * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response * query (str): query search terms (n.b. not a OpenSearch Query object) * results (dict): results from service Returns: * puppy.model.OpenSearch.Response """ response = Response() response.feed.setdefault("title", "Simple Wikipedia Search") response.feed.setdefault('link', url) response.feed.setdefault("description", "Simple Wikipedia Search Suggestions for: {0}".format(query)) response.namespaces.setdefault("searchsuggest", "{http://opensearch.org/searchsuggest2}") response.version = 'xml' root = etree.XML(results) ns = response.namespaces["searchsuggest"] section = root.find("{0}Section".format(ns)) items = section.findall("{0}Item".format(ns)) for item in items: try: title = item.find("{0}Text".format(ns)).text summary = item.find("{0}Description".format(ns)).text link = item.find("{0}Url".format(ns)).text image = item.find("{0}Image".format(ns)) thumbnail = image.get("source") if image is not None else "" image_fullsize = "" if thumbnail is not "": image_fullsize = thumbnail.replace("thumb/", "").rpartition('/')[0] response.entries.append({'title': title, 'summary': summary, 'link': link, 'thumbnail': thumbnail, 'image': image_fullsize}) except etree.XMLSyntaxError, e: print "Skipping a result due to an lxml syntax error.\nWhen parsing a result from: {0}\n"(url) continue except etree.LxmlError, e: print "Skipping a result due to an lxml error: {0} \nWhen parsing a result from: {1}\n"(e, url) continue
def parse_guardian_json(site, url, query, results): """Create a OpenSearch Response from Guardian results. Guardians's search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant. Parameters: * site (str): search engine name * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response * query (str): query search terms (n.b. not a OpenSearch Query object) * results (dict): results from service Returns: * puppy.model.OpenSearch.Response """ response = Response() response.version = "json" response.feed.setdefault("title", "{0}: {1}".format(site, query)) response.feed.setdefault("link", url) response.feed.setdefault("description", "Search results for '{0}' at {1}".format(query, site)) response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/") try: response.feed.setdefault("opensearch_totalresults", int(results["total"])) response.feed.setdefault("opensearch_itemsperpage", int(results["pageSize"])) response.feed.setdefault("opensearch_startindex", int(results["startIndex"])) except KeyError: response.feed.setdefault("opensearch_totalresults", 0) response.feed.setdefault("opensearch_itemsperpage", 0) response.feed.setdefault("opensearch_startindex", 0) for result in results["results"]: try: response.entries.append( { "title": result["webTitle"], "link": result["webUrl"], "summary": result["fields"]["standfirst"], } ) except Exception, e: # If there is a parsing problem, print out an error and just skip this individual result print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format(e, url) continue
def parse_whoosh_trec(site, query, results): response = Response() response.version = 'trec' response.feed.setdefault('title', "{0}: {1}".format(site, query)) response.feed.setdefault('link', '') response.feed.setdefault( 'description', "Search results for '{0}' at {1}".format(query, site)) response.namespaces.setdefault( "opensearch", "http://a9.com/-/spec/opensearch/1.1/") response.feed.setdefault("opensearch_totalresults", results.pagecount) response.feed.setdefault("opensearch_itemsperpage", pagelen) response.feed.setdefault("opensearch_startindex", results.pagenum) response.feed.setdefault('query', query) try: r = 0 if len(results) > 1: for hit in results: r = r + 1 title = hit["title"] title = title.strip() if len(title) < 1: title = query rank = ( (int(results.pagenum) - 1) * results.pagelen) + r link = "/treconomics/" + str( hit.docnum) + "?rank=" + str(rank) desc = hit.highlights("content") docid = hit["docid"] docid = docid.strip() source = hit["source"] response.entries.append({ 'title': title, 'link': link, 'summary': desc, 'docid': docid, 'source': source }) else: print "No hits found for query: " + query except Exception, e: print "Converting results to OpenSearch Failed"
def parse_flickr_json(site, query, results): """Create a OpenSearch Response from Flickr results. Flickr's search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant. Parameters: * site (str): search engine name * query (str): query search terms (n.b. not a OpenSearch Query object) * results (dict): results from service Returns: * puppy.model.OpenSearch.Response """ response = Response() response.version = 'json' response.feed.setdefault('title', "{0}: {1}".format(site, query)) response.feed.setdefault('link', results['link']) response.feed.setdefault('description', "Search results for '{0}' at {1}".format(query, site)) response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/") try: response.feed.setdefault("opensearch_totalresults", int(results['total'])) response.feed.setdefault("opensearch_itemsperpage", int(results['perpage'])) response.feed.setdefault("opensearch_startindex", int(results['page'])) except KeyError: response.feed.setdefault("opensearch_totalresults", 0) response.feed.setdefault("opensearch_itemsperpage", 0) response.feed.setdefault("opensearch_startindex", 0) if 'photo' in results: for result in results['photo']: # Links need to be created from several fields - see the Flickr API for a detailed explanation try: resultLink = "http://www.flickr.com/photos/{0}/{1}".format(result['owner'], result['id']) resultThumbnail = "http://farm{0}.static.flickr.com/{1}/{2}_{3}_t.jpg".format(result['farm'], result['server'], result['id'], result['secret']) resultSummary = "Photo result for '{0}' from {1}".format(query, site) response.entries.append({'title': result['title'], 'link': resultLink, 'summary': resultSummary, 'thumbnail': resultThumbnail}) except Exception, e: print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format(e, results['link']) continue
def parse_wordnik_json(site, query, results, url): """ Wordnik's search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant. Parameters: * site (str): search engine name * query (str): query search terms (n.b. not a OpenSearch Query object) * results (dict): results from service * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response Returns: * puppy.model.OpenSearch.Response """ response = Response() response.version = 'json' response.feed.setdefault('title', "{0}: {1}".format(site, query)) response.feed.setdefault('link', url) response.feed.setdefault('description', "Search results for '{0}' at {1}".format(query, site)) response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/") try: response.feed.setdefault("opensearch_itemsperpage", self.resultsPerPage) response.feed.setdefault("opensearch_startindex", 0) except KeyError: response.feed.setdefault("opensearch_itemsperpage", 0) response.feed.setdefault("opensearch_startindex", 0) if (self.source == 'Suggestions') and ('suggestions' in results): response.entries = parseSuggestionsJson(results, query, url) response.feed.setdefault("opensearch_totalresults", int(len(results['suggestions']))) elif (self.source == 'Examples') and ('examples' in results): response.entries = parseExamplesJson(results, url) response.feed.setdefault("opensearch_totalresults", int(len(results['examples']))) elif (self.source == 'Definitions'): response.entries = parseDefinitionsJson(results, url) response.feed.setdefault("opensearch_totalresults", int(len(results))) else: response.feed.setdefault("opensearch_totalresults", 0) return response
def parse_digg_json(site, url, pos, query, results): """Create a OpenSearch Response from Digg results. Digg's search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant. Parameters: * site (str): search engine name * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response * pos(int): which page number we're on * query (str): query search terms (n.b. not a OpenSearch Query object) * results (dict): results from service Returns: * puppy.model.OpenSearch.Response """ response = Response() response.version = 'json' response.feed.setdefault('title', "{0}: {1}".format(site, query)) response.feed.setdefault('link', url) response.feed.setdefault('description', "Search results for '{0}' at {1}".format(query, site)) response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/") try: response.feed.setdefault("opensearch_totalresults", results['total']) response.feed.setdefault("opensearch_itemsperpage", self.resultsPerPage) response.feed.setdefault("opensearch_startindex", pos) except KeyError: response.feed.setdefault("opensearch_totalresults", 0) response.feed.setdefault("opensearch_itemsperpage", 0) response.feed.setdefault("opensearch_startindex", 0) for result in results['stories']: try: item_dict = result # See Digg for the result format for all the other data item_dict['summary'] = result['description'] item_dict['link'] = result['href'] response.entries.append(item_dict) except Exception, e: # If there is a parsing problem, print out an error and just skip this individual result print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format(e, url) continue
def _parse_bing_response(self, query, results, offset): """Handles processing the JSON into a PuppyIR Response""" response = Response() response.version = 'json' response.feed.setdefault( 'title', "Results from %s for: %s" % (self.engineName, query.search_terms)) response.feed.setdefault('link', "") response.feed.setdefault( 'description', "%s results from %s" % (self.source, self.engineName)) response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/") if self.source == "Web": entries = self._parse_web_results(results) elif self.source == "Image": entries = self._parse_image_results(query, results) elif self.source == "News": entries = self._parse_news_results(results) elif self.source == "RelatedSearch": entries = self._parse_related_results(query, results) elif self.source == "Video": entries = self._parse_video_results(query, results) elif self.source == "SpellingSuggestions": entries = self._parse_spelling_results(query, results) for entry in entries: response.entries.append(entry) response.feed.setdefault('opensearch_totalresults', len(entries)) response.feed.setdefault('opensearch_startindex', offset) response.feed.setdefault('opensearch_itemsperpage', self.resultsPerPage) return response
def parse_bing_xml_response(site, query, results, numResults=10, offset=0): xmlSoup = BeautifulSoup(results) response = Response() response.version = 'xml' response.feed.setdefault('title', "{0}: {1}".format(site, query)) response.feed.setdefault( 'description', "Search results for {0} at {1}".format(query, site)) response.feed.setdefault('link', '') response.namespaces.setdefault( 'opensearch', 'http://a9.com/-/spec/opensearch/1.1/') resultCount = 0 resultsRetrieved = 0 for r in xmlSoup.findAll('entry'): if (resultCount >= offset) and (resultCount < (numResults + offset)): xmlTitleData = r.find('d:title').string xmlURLData = r.find('d:url').string xmlDescriptionData = r.find('d:description').string response.entries.append({ 'title': xmlTitleData, 'link': xmlURLData, 'summary': xmlDescriptionData }) resultsRetrieved += 1 resultCount += 1 response.feed.setdefault('opensearch_totalresults', resultCount) response.feed.setdefault('opensearch_startindex', offset) response.feed.setdefault('opensearch_itemsperpage', resultsRetrieved) return response
def parse_spotify_json(site, url, query, results): """ Spotify's search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant. Parameters: * site (str): search engine name * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response * query (str): query search terms (n.b. not a OpenSearch Query object) * results (dict): results from service Returns: * puppy.model.OpenSearch.Response """ response = Response() response.version = 'json' response.feed.setdefault('title', "{0}: {1}".format(site, query)) response.feed.setdefault('link', url) response.feed.setdefault('description', "Search results for '{0}' at {1}".format(query, site)) response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/") try: response.feed.setdefault("opensearch_totalresults", int(results['info']['num_results'])) response.feed.setdefault("opensearch_itemsperpage", int(results['info']['limit'])) response.feed.setdefault("opensearch_startindex", int(results['info']['page'])) except KeyError: response.feed.setdefault("opensearch_totalresults", 0) response.feed.setdefault("opensearch_itemsperpage", 0) response.feed.setdefault("opensearch_startindex", 0) if (self.source == 'tracks') and ('tracks' in results): response = parse_tracks_json(response, results, url) elif (self.source == 'albums') and ('albums' in results): response = parse_albums_json(response, results, url) elif (self.source == 'artists') and ('artists' in results): response = parse_artists_json(response, results, url) return response
def parse_web_spell_checker_xml(site, url, query, results): """ Web Spell Checker's search API returns results in XML format. This function simply loads the XML into memory and creates an equivalent representation that is OpenSearch compliant. Parameters: * site (str): search engine name * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response * query (str): query search terms (n.b. not a OpenSearch Query object) * results (dict): results from service Returns: * puppy.model.OpenSearch.Response """ response = Response() response.version = 'xml' response.feed.setdefault('title', "{0}: {1}".format(site, query)) response.feed.setdefault('link', url) response.feed.setdefault('description', "Search results for '{0}' at {1}".format(query, site)) response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/") response.feed.setdefault("opensearch_itemsperpage", '') response.feed.setdefault("opensearch_startindex", 0) root = etree.XML(results) section = root.find("misspelling") suggestions = section.find("suggestions") for item in suggestions: try: suggestion = item.text spell_dict = {"title": "Spelling Suggestion for: '{0}'".format(query), "link": ''} spell_dict['summary'] = "Original query: '{0}'. Suggested correction of query: '{1}'.".format(query, suggestion) spell_dict['suggestion'] = suggestion response.entries.append(spell_dict) except Exception, e: print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format(e, url) continue
def parse_rotten_tomatoes_json(site, pos, query, results): """ Rotten Tomatoes's search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant. Parameters: * site (str): search engine name * query (str): query search terms (n.b. not a OpenSearch Query object) * results (dict): results from service Returns: * puppy.model.OpenSearch.Response """ response = Response() response.version = 'json' response.feed.setdefault('title', "{0}: {1}".format(site, query)) response.feed.setdefault('link', results['links']['self']) response.feed.setdefault('description', "Search results for '{0}' at {1}".format(query, site)) response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/") try: response.feed.setdefault("opensearch_totalresults", int(results['total'])) response.feed.setdefault("opensearch_itemsperpage", self.resultsPerPage) response.feed.setdefault("opensearch_startindex", pos) except KeyError: response.feed.setdefault("opensearch_totalresults", 0) response.feed.setdefault("opensearch_itemsperpage", 0) response.feed.setdefault("opensearch_startindex", 0) for result in results['movies']: try: movie_dict = result movie_dict['link'] = result['links']['alternate'] movie_dict['summary'] = result['synopsis'] response.entries.append(movie_dict) except Exception, e: print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format(e, url) continue
def parse_yahoo_json(site, query, results): """Create a OpenSearch Response from Yahoo! BOSS results. Yahoo!'s search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant. Parameters: * site (str): search engine name * query (str): query search terms (n.b. not a OpenSearch Query object) * results (dict): results from service Returns: * puppy.model.OpenSearch.Response """ response = Response() response.version = 'json' response.feed.setdefault('title', "{0}: {1}".format(site, query)) response.feed.setdefault('link', results['link']) response.feed.setdefault('description', "Search results for '{0}' at {1}".format(query, site)) response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/") try: response.feed.setdefault("opensearch_totalresults", int(results['totalhits'])) response.feed.setdefault("opensearch_itemsperpage", int(results['count'])) response.feed.setdefault("opensearch_startindex", int(results['start'])) except KeyError: response.feed.setdefault("opensearch_totalresults", 0) response.feed.setdefault("opensearch_itemsperpage", 0) response.feed.setdefault("opensearch_startindex", 0) for result in results['resultset_web']: try: response.entries.append({'title': result['title'], 'link': result['url'], 'summary': result['abstract']}) except Exception, e: # If there is a parsing problem, print out an error and just skip this individual result print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format(e, url) continue
def _parse_bing_response(self, query, results, offset): """Handles processing the JSON into a PuppyIR Response""" response = Response() response.version = 'json' response.feed.setdefault('title', "Results from %s for: %s" % (self.engineName, query.search_terms)) response.feed.setdefault('link', "") response.feed.setdefault('description', "%s results from %s" % (self.source, self.engineName)) response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/") if self.source == "Web": entries = self._parse_web_results(results) elif self.source == "Image": entries = self._parse_image_results(query, results) elif self.source == "News": entries = self._parse_news_results(results) elif self.source == "RelatedSearch": entries = self._parse_related_results(query, results) elif self.source == "Video": entries = self._parse_video_results(query, results) elif self.source == "SpellingSuggestions": entries = self._parse_spelling_results(query, results) for entry in entries: response.entries.append(entry) response.feed.setdefault('opensearch_totalresults', len(entries)) response.feed.setdefault('opensearch_startindex', offset) response.feed.setdefault('opensearch_itemsperpage', self.resultsPerPage) return response
def parse_itunes_json(site, url, query, results): """Create a OpenSearch Response from iTunes results. iTunes's search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant. Parameters: * site (str): search engine name * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response * query (str): query search terms (n.b. not a OpenSearch Query object) * results (dict): results from service Returns: * puppy.model.OpenSearch.Response """ response = Response() response.version = 'json' response.feed.setdefault('title', "{0}: {1}".format(site, query)) response.feed.setdefault('link', url) response.feed.setdefault('description', "Search results for '{0}' at {1}".format(query, site)) response.namespaces.setdefault("opensearch", "http://a9.com/-/spec/opensearch/1.1/") try: response.feed.setdefault("opensearch_totalresults", int(results['resultCount'])) response.feed.setdefault("opensearch_itemsperpage", self.resultsPerPage) response.feed.setdefault("opensearch_startindex", 0) except KeyError: response.feed.setdefault("opensearch_totalresults", 0) response.feed.setdefault("opensearch_itemsperpage", 0) response.feed.setdefault("opensearch_startindex", 0) for result in results['results']: try: result_dict = result result_dict['title'] = '' result_dict['link'] = '' # For ease of access if we have a thumbnail store it under that name as well if 'artworkUrl60' in result: result_dict['thumbnail'] = result['artworkUrl60'] # If we have a trackname use it - this is iTunes's default for title if 'trackName' in result: result_dict['title'] = result['trackName'] # Use censored track name instead if explicit content should be avoided if (self.explicit == False) and ('trackCensoredName' in result): result_dict['title'] = result['trackCensoredName'] result_dict['summary'] = "{0} by {1}".format(result_dict['title'], result['artistName']) # Otherwise see if there's a collection name - if we have a collection of videos or songs it will use this not the above elif 'collectionName' in result: result_dict['title'] = result['collectionName'] # Use censored collection name instead if explicit content should be avoided if (self.explicit == False) and ('collectionCensoredName' in result): result_dict['title'] = result['collectionCensoredName'] result_dict['summary'] = "An item by {0} from the collection {1}".format(result['artistName'], result_dict['title']) # If we have a description then use this instead of the above for the summary if 'longDescription' in result: result_dict['summary'] = result['longDescription'] elif 'shortDescription' in result: result_dict['summary'] = result['shortDescription'] # Track is the default - same as above this is the iTunes default link for an item if 'trackViewUrl' in result: result_dict['link'] = result['trackViewUrl'] # Next check if there's a collection - if its a collection it will use this elif 'collectionViewUrl' in result: result_dict['link'] = result['collectionViewUrl'] # Finally artist - this is the final fallback for a link to this item, a link to the artist page elif 'artistViewUrl' in result: result_dict['link'] = result['artistViewUrl'] response.entries.append(result_dict) except Exception, e: # If there is a parsing problem, print out an error and just skip this individual result print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format(e, url) continue
def parse_itunes_json(site, url, query, results): """Create a OpenSearch Response from iTunes results. iTunes's search API returns results in JSON format. This function simply loads the JSON into memory and creates an equivalent representation that is OpenSearch compliant. Parameters: * site (str): search engine name * url (str): the url for the results that were retrieved to use as the OpenSearch link for the response * query (str): query search terms (n.b. not a OpenSearch Query object) * results (dict): results from service Returns: * puppy.model.OpenSearch.Response """ response = Response() response.version = 'json' response.feed.setdefault('title', "{0}: {1}".format(site, query)) response.feed.setdefault('link', url) response.feed.setdefault( 'description', "Search results for '{0}' at {1}".format(query, site)) response.namespaces.setdefault( "opensearch", "http://a9.com/-/spec/opensearch/1.1/") try: response.feed.setdefault("opensearch_totalresults", int(results['resultCount'])) response.feed.setdefault("opensearch_itemsperpage", self.resultsPerPage) response.feed.setdefault("opensearch_startindex", 0) except KeyError: response.feed.setdefault("opensearch_totalresults", 0) response.feed.setdefault("opensearch_itemsperpage", 0) response.feed.setdefault("opensearch_startindex", 0) for result in results['results']: try: result_dict = result result_dict['title'] = '' result_dict['link'] = '' # For ease of access if we have a thumbnail store it under that name as well if 'artworkUrl60' in result: result_dict['thumbnail'] = result['artworkUrl60'] # If we have a trackname use it - this is iTunes's default for title if 'trackName' in result: result_dict['title'] = result['trackName'] # Use censored track name instead if explicit content should be avoided if (self.explicit == False) and ('trackCensoredName' in result): result_dict['title'] = result['trackCensoredName'] result_dict['summary'] = "{0} by {1}".format( result_dict['title'], result['artistName']) # Otherwise see if there's a collection name - if we have a collection of videos or songs it will use this not the above elif 'collectionName' in result: result_dict['title'] = result['collectionName'] # Use censored collection name instead if explicit content should be avoided if (self.explicit == False) and ('collectionCensoredName' in result): result_dict['title'] = result[ 'collectionCensoredName'] result_dict[ 'summary'] = "An item by {0} from the collection {1}".format( result['artistName'], result_dict['title']) # If we have a description then use this instead of the above for the summary if 'longDescription' in result: result_dict['summary'] = result['longDescription'] elif 'shortDescription' in result: result_dict['summary'] = result['shortDescription'] # Track is the default - same as above this is the iTunes default link for an item if 'trackViewUrl' in result: result_dict['link'] = result['trackViewUrl'] # Next check if there's a collection - if its a collection it will use this elif 'collectionViewUrl' in result: result_dict['link'] = result['collectionViewUrl'] # Finally artist - this is the final fallback for a link to this item, a link to the artist page elif 'artistViewUrl' in result: result_dict['link'] = result['artistViewUrl'] response.entries.append(result_dict) except Exception, e: # If there is a parsing problem, print out an error and just skip this individual result print "Skipping a result due to: {0} \nWhen parsing a result from: {1}\n".format( e, url) continue
def parse_bing_xml_response(site, query, results, offset=0): def extractElementString(node, element): res =node.find(element) if res: return res.string else: return '' xmlSoup = BeautifulSoup(results) response = Response() response.version = 'xml' response.feed.setdefault('title', "{0}: {1}".format(site, query)) response.feed.setdefault('description', "Search results for {0} at {1}".format(query, site)) response.feed.setdefault('link', '') response.namespaces.setdefault('opensearch', 'http://a9.com/-/spec/opensearch/1.1/') resultCount = 0 resultsRetrieved = 0 for r in xmlSoup.findAll('entry'): # These element are in Web xmlTitleData = extractElementString(r, 'd:title') xmlURLData = extractElementString(r,'d:url') xmlDescriptionData = extractElementString(r,'d:description') # These elements are in News xmlSource = extractElementString(r, 'd:source') xmlDate = extractElementString(r, 'd:date') result_dict = {'title': xmlTitleData, 'link': xmlURLData, 'summary': xmlDescriptionData, 'source': xmlSource, 'date': xmlDate } # These elements are in Images xmlLink = extractElementString(r, 'd:mediaurl') if xmlLink: result_dict['link'] = xmlLink xmlSourceUrl = extractElementString(r, 'd:sourceurl') if xmlSourceUrl: result_dict['sourceLink'] = xmlSourceUrl xmlDisplayLink = extractElementString(r,'d:displayurl') if xmlDisplayLink: result_dict['displayLink'] = xmlDisplayLink xmlWidth = extractElementString(r,'d:width') if xmlWidth: result_dict['width'] = xmlWidth xmlHeight = extractElementString(r,'d:height') if xmlHeight: result_dict['height'] = xmlHeight thumbnail = r.find('d:thumbnail') if thumbnail: xmlThumbnail = extractElementString(thumbnail,'d:mediaurl') if xmlThumbnail: result_dict['thumbnail'] = xmlThumbnail xmlThumbnailWidth = extractElementString(thumbnail,'d:width') if xmlThumbnailWidth: result_dict['thumbnailWidth'] = xmlThumbnailWidth xmlThumbnailHeight = extractElementString(thumbnail,'d:height') if xmlThumbnailHeight: result_dict['thumbnailHeight'] = xmlThumbnailHeight response.entries.append(result_dict) resultsRetrieved += 1 resultCount += 1 response.feed.setdefault('opensearch_totalresults', resultCount+offset) response.feed.setdefault('opensearch_startindex', offset) response.feed.setdefault('opensearch_itemsperpage', resultsRetrieved) return response