def _parse_json_response(query, results): """ Parses Pipl's JSON response and returns as an ifind Response. Args: query (ifind Query): object encapsulating details of a search query. results : requests library response object containing search results. Returns: ifind Response: object encapsulating a search request's results. Usage: Private method. """ response = Response(query.terms) content = json.loads(results.text) for record in content[u'records']: name = record[u'names'][0][u'display'] url = record[u'source'][u'url'] imageurl = None try: imageurl = record[u'images'][0][u'url'] except: pass summary = Pipl._build_summary(record) response.add_result(title=name, url=url, summary=summary, imageurl=imageurl) return response
def _parse_xml_response(query, results): """ Parses Wikipedia's XML response and returns as an ifind Response. Args: query (ifind Query): object encapsulating details of a search query. results : requests library response object containing search results. Returns: ifind Response: object encapsulating a search request's results. Usage: Private method. """ response = Response(query.terms) xml_doc = xml.dom.minidom.parseString(results.content) results = xml_doc.getElementsByTagName('Item') for result in results: title = result.getElementsByTagName('Text')[0].firstChild.data url = result.getElementsByTagName('Url')[0].firstChild.data summary = result.getElementsByTagName( 'Description')[0].firstChild.data response.add_result(title=title, url=url, summary=summary) return response
def _parse_json_response(query, results): """ Parses Bing's JSON response and returns as an ifind Response. Args: query (ifind Query): object encapsulating details of a search query. results : requests library response object containing search results. Returns: ifind Response: object encapsulating a search request's results. Usage: Private method. """ response = Response(query.terms) content = json.loads(results.text) rank_counter = 1 if query.result_type == 'web' or not query.result_type: for result in content[u'd'][u'results'][0][u'Web']: response.add_result(title=result[u'Title'], url=result[u'Url'], summary=result[u'Description'], rank=rank_counter) #print result[u'Title'] #print rank_counter #print ' ' rank_counter += 1 if query.result_type == 'image': for result in content[u'd'][u'results'][0][u'Image']: file_size = str(int(result[u'FileSize']) / 1024) # in kilobytes width = result[u'Width'] height = result[u'Height'] media_url = result[u'MediaUrl'] thumb_url = result[u'Thumbnail'][u'MediaUrl'] response.add_result(file_size=file_size, width=width, height=height, media_url=media_url, thumb_url=thumb_url) if query.result_type == 'video': for result in content[u'd'][u'results'][0][u'Video']: run_time = Bing._get_video_length(int(result[u'RunTime'])) title = result[u'Title'] media_url = result[u'MediaUrl'] thumb_url = result.get(u'Thumbnail', {}).get(u'MediaUrl', None) if thumb_url is None: continue response.add_result(title=title, media_url=media_url, run_time=run_time, thumb_url=thumb_url) return response
def _parse_json_response(query, results): """ Parses Facebook's JSON response and returns as an ifind Response. Args: query (ifind Query): object encapsulating details of a search query. results : requests library response object containing search results. Returns: ifind Response: object encapsulating a search request's results. Usage: Private method. """ response = Response(query.terms) content = json.loads(results.text) # Check to see if the response contains any API errors. Facebook._check_errors(content) if query.result_type == 'user' or not query.result_type: # Sample response # { # "data": [ # { # "name": "John Doe", # "id": "999999999999999" # }, # { # "name": "John Doe", # "id": "88888888888888" # } # ], # "paging": { # "next": "long_url" # } # } # The base URL is used to create the link to the profile, it will redirect to a permanent user URL. base_url = "https://www.facebook.com/app_scoped_user_id/" for user in content[u'data']: name = user[u'name'] tempid = user[u'id'] url = base_url + tempid + '/' text = '' img = "https://graph.facebook.com/{}/picture?type=normal".format( tempid) # Minimal information, probably need a second round of querying the API for each user to get something # for the snippet. Better way? response.add_result(title=name, url=url, summary=text, imageurl=img) # Implement the other search tpyes. return response
def _parse_json_response(self, query, results): """ Parses Googleplus's JSON response and returns as an ifind Response. Args: query (ifind Query): object encapsulating details of a search query. results : requests library response object containing search results. Returns: ifind Response: object encapsulating a search request's results. Usage: Private method. """ response = Response(query.terms) content = json.loads(results.text) result_type = DEFAULT_RESULT_TYPE if query.result_type: result_type = query.result_type if result_type == 'people' or result_type == 'people+': for user in content[u'items']: name = user[u'displayName'] url = user[u'url'] imageurl = Googleplus._resize_image(user[u'image'][u'url']) # Check to see if the search results needs recusrively acquired person details. if result_type == 'people+': summary = self._build_person_summary(user[u'id']) else: summary = '' # Add the result to the response response.add_result(title=name, url=url, summary=summary, imageurl=imageurl) elif result_type == 'activities': for activity in content[u'items']: title = activity[u'verb'] + ' ' + activity[u'title'] url = activity[u'url'] summary = Googleplus._build_activity_summary(activity) imageurl = '' try: imageurl = Googleplus._resize_image( activity[u'image'][u'url']) except KeyError: pass # Add the result to the response. response.add_result(title=title, url=url, summary=summary, imageurl=imageurl) return response
def _create_response(self, query): response = Response(query.terms) matches = [ 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten' ] result_list = [ 'rand', 'rand', 'rand', 'rand', 'rand', 'rand', 'rand', 'rand', 'rand', 'rand' ] matched = False if query.terms in matches: matched = True if matched: result_list = matches for x in result_list: response.add_result(x, 'www.' + x + '.com', x + ' ' + ' ' + x) return response
def _parse_json_response(query, results): """ Parses GOV.uk's JSON response and returns as an ifind Response. Args: query (ifind Query): object encapsulating details of a search query. results : requests library response object containing search results. Returns: ifind Response: object encapsulating a search request's results. Usage: Private method. """ response = Response(query.terms) content = json.loads(results.text) # The base url - results do not provide a full link. base_url = "https://www.gov.uk" for result in content[u'results']: try: # Catch results with no description (they exist!) text = result[u'description'] except KeyError: text = '' title = result[u'title'] url = base_url + result[u'link'] response.add_result(title=title, url=url, summary=text) if len(response) == query.top: break return response
def _parse_json_response(query, results): """ Parses Twitter's JSON response and returns as an ifind Response. Args: query (ifind Query): object encapsulating details of a search query. results : requests library response object containing search results. Returns: ifind Response: object encapsulating a search request's results. Usage: Private method. """ response = Response(query.terms) content = json.loads(results.text) for result in content[u'statuses']: text = result[u'text'] result_id = str(result[u'id']) # User dictionary user = {'user_id': result[u'user'][u'id_str'], 'profile_image': result.get(u'user').get(u'profile_image_url'), 'geo_enabled': result.get(u'user').get(u'geo_enabled'), 'description': result.get(u'user').get(u'description'), 'follower_count': result.get(u'user').get(u'followers_count'), 'protected': result.get(u'user').get(u'protected'), 'location': result.get(u'user').get(u'location'), 'utc_offset': result.get(u'user').get(u'utc_offset'), 'time_zone': result.get(u'user').get(u'time_zone'), 'name': result.get(u'user').get(u'name'), 'screen_name': result.get(u'user').get(u'screen_name'), 'member_since': result.get(u'user').get(u'created_at') } # TODO clean this up stamp = result[u'created_at'].split() # Created at in format: '01 Jan, 2014 @ 20:23' created_at = "{} {}, {} @ {}".format(stamp[2], stamp[1], stamp[5], stamp[3][:-3]) url = 'https://www.twitter.com/{0}/status/{1}'.format(user['user_id'], result_id) imageurl = user.get('profile_image') title = u"{} ({}) - {}".format(user['name'], user['screen_name'], created_at) # Kwargs below source = result.get(u'source') coordinates = result.get(u'coordinates') place = result.get(u'place') hashtags= result.get(u'entities').get(u'hashtags') user_info = user reply_to_screen_name = result.get(u'in_reply_to_screen_name') reply_to_userid = result.get(u'in_reply_to_user_id_str') reply_to_status = result.get(u'in_reply_to_status_id_str') # List of links in the tweet. Each item in the list is a dictionary with keys: # u'url, u'indices', u'expanded_url, u'display_url' links = result.get(u'entities').get(u'urls') # List of media items in the tweet. Each item in the list is a dictionary with keys: # u'expanded_url', u'sizes', u'url', u'media_url_https', # u'id_str', u'indices', u'media_url', u'type', u'id', u'display_url' media = result.get(u'entities').get(u'media') # List of users mentioned in the tweet. Each item in the list is a dictionary with keys: # u'indices', 'u'screen_name', u'PSG_inside', u'id', u'name', u'id_str' user_mentions = result.get(u'entities').get(u'user_mentions') response.add_result(title=title, url=url, summary=text, imageurl=imageurl, stamp=stamp, user_info=user_info, media=media, links=links, user_mentions=user_mentions, source=source, coordinates=coordinates, place=place, hashtags=hashtags, reply_to_screen_name=reply_to_screen_name, reply_to_status=reply_to_status, reply_to_userid=reply_to_userid) if len(response) == query.top: break return response
def parse_response(reader, fieldname, analyzer, fragmenter, formatter, query, results, results_are_page=False): """ Returns an ifind Response, given a query and set of results from Whoosh/Redis. Takes an ifind Query object and a list of SORTED results for the given query. If the page requested (query.skip) is < 0, page 1 is returned. If the page requested is greater than the number of available pages, the last page is returned. """ def get_term_list(): if isinstance(query.parsed_terms, unicode): return [query.parsed_terms] return [ text for term_fieldname, text in query.parsed_terms.all_terms() if fieldname == fieldname ] response = Response(query.terms) response.results_total = len(results) if results_are_page: page = results[0] response.total_pages = results[1] results = results[2] else: page, response.total_pages, results = get_page(query, results) page_len = query.top i = 0 for result in results: i = i + 1 rank = (page - 1) * page_len + i whoosh_docnum = result[0] score = result[1] stored_data = reader.stored_fields(whoosh_docnum) title = stored_data['title'] if title: title = title.strip() else: title = "Untitled Document" url = "/treconomics/{0}/".format(whoosh_docnum) trecid = stored_data['docid'].strip() source = stored_data['source'].strip() summary = highlight(stored_data['content'], get_term_list(), analyzer, fragmenter, formatter) summary = "{0}...".format(summary) response.add_result(title=title, url=url, summary=summary, docid=trecid, source=source, rank=rank, whooshid=whoosh_docnum, score=score) # The following two lines are for compatibility purposes with the existing codebase. # Would really like to take these out. setattr(response, 'results_on_page', len(results)) setattr(response, 'actual_page', page) return response
def _parse_json_response(query, results): """ Parses Companycheck's JSON response and returns as an ifind Response. Args: query (ifind Query): object encapsulating details of a search query. results : requests library response object containing search results. Returns: ifind Response: object encapsulating a search request's results. Usage: Private method. """ response = Response(query.terms) content = json.loads(results.text) url_base = 'http://companycheck.co.uk/' if query.result_type: result_type = query.result_type else: result_type = DEFAULT_RESULT_TYPE if result_type == 'company' or not result_type: for company in content: name = company[u'name'] url = url_base + 'company/' + str(company[u'number']) imageurl = None summary = Companycheck._build_company_summary(company) # Keyword args below number = company[u'number'] country = company[u'country'] address = company[u'address'] sic = company[u'sic'] status = company[u'status'] # Add result object to the response response.add_result(title=name, url=url, summary=summary, imageurl=imageurl, number=number, country=country, address=address, sic=sic, status=status) elif result_type == 'director': for director in content: name = director[u'name'] url = url_base + 'director/' + str(director[u'number']) imageurl = None sum_dic = Companycheck._build_director_summary(director) summary = sum_dic.get('summary') postcodes = sum_dic.get('postcode_list') number = director[u'number'] response.add_result(title=name, url=url, summary=summary, imageurl=imageurl, postcodes=postcodes, number=number) return response