Example #1
0
    def _parse_json_response(query, results):
        """
        Parses Pipl's JSON response and returns as an ifind Response.

        Args:
            query (ifind Query): object encapsulating details of a search query.
            results : requests library response object containing search results.

        Returns:
            ifind Response: object encapsulating a search request's results.

        Usage:
            Private method.
        """

        response = Response(query.terms)
        content = json.loads(results.text)

        for record in content[u'records']:
            name = record[u'names'][0][u'display']
            url = record[u'source'][u'url']
            imageurl = None
            try:
                imageurl = record[u'images'][0][u'url']
            except:
                pass
            summary = Pipl._build_summary(record)

            response.add_result(title=name,
                                url=url,
                                summary=summary,
                                imageurl=imageurl)

        return response
Example #2
0
    def _parse_xml_response(query, results):
        """
        Parses Wikipedia's XML response and returns as an ifind Response.

        Args:
            query (ifind Query): object encapsulating details of a search query.
            results : requests library response object containing search results.

        Returns:
            ifind Response: object encapsulating a search request's results.

        Usage:
            Private method.

        """
        response = Response(query.terms)

        xml_doc = xml.dom.minidom.parseString(results.content)
        results = xml_doc.getElementsByTagName('Item')

        for result in results:

            title = result.getElementsByTagName('Text')[0].firstChild.data
            url = result.getElementsByTagName('Url')[0].firstChild.data
            summary = result.getElementsByTagName(
                'Description')[0].firstChild.data

            response.add_result(title=title, url=url, summary=summary)

        return response
Example #3
0
    def _parse_json_response(query, results):
        """
        Parses Bing's JSON response and returns as an ifind Response.

        Args:
            query (ifind Query): object encapsulating details of a search query.
            results : requests library response object containing search results.

        Returns:
            ifind Response: object encapsulating a search request's results.

        Usage:
            Private method.

        """
        response = Response(query.terms)

        content = json.loads(results.text)

        rank_counter = 1

        if query.result_type == 'web' or not query.result_type:
            for result in content[u'd'][u'results'][0][u'Web']:
                response.add_result(title=result[u'Title'],
                                    url=result[u'Url'],
                                    summary=result[u'Description'],
                                    rank=rank_counter)
                #print result[u'Title']
                #print rank_counter
                #print ' '
                rank_counter += 1

        if query.result_type == 'image':
            for result in content[u'd'][u'results'][0][u'Image']:
                file_size = str(int(result[u'FileSize']) /
                                1024)  # in kilobytes
                width = result[u'Width']
                height = result[u'Height']
                media_url = result[u'MediaUrl']
                thumb_url = result[u'Thumbnail'][u'MediaUrl']
                response.add_result(file_size=file_size,
                                    width=width,
                                    height=height,
                                    media_url=media_url,
                                    thumb_url=thumb_url)

        if query.result_type == 'video':
            for result in content[u'd'][u'results'][0][u'Video']:
                run_time = Bing._get_video_length(int(result[u'RunTime']))
                title = result[u'Title']
                media_url = result[u'MediaUrl']
                thumb_url = result.get(u'Thumbnail', {}).get(u'MediaUrl', None)
                if thumb_url is None:
                    continue
                response.add_result(title=title,
                                    media_url=media_url,
                                    run_time=run_time,
                                    thumb_url=thumb_url)

        return response
Example #4
0
    def _parse_json_response(query, results):
        """
        Parses Facebook's JSON response and returns as an ifind Response.

        Args:
            query (ifind Query): object encapsulating details of a search query.
            results : requests library response object containing search results.

        Returns:
            ifind Response: object encapsulating a search request's results.

        Usage:
            Private method.
        """

        response = Response(query.terms)
        content = json.loads(results.text)

        # Check to see if the response contains any API errors.
        Facebook._check_errors(content)

        if query.result_type == 'user' or not query.result_type:
            # Sample response
            #     {
            # "data": [
            #   {
            #      "name": "John Doe",
            #      "id": "999999999999999"
            #   },
            #   {
            #      "name": "John Doe",
            #      "id": "88888888888888"
            #   }
            #   ],
            #        "paging": {
            #           "next": "long_url"
            #        }
            #     }

            # The base URL is used to create the link to the profile, it will redirect to a permanent user URL.
            base_url = "https://www.facebook.com/app_scoped_user_id/"
            for user in content[u'data']:
                name = user[u'name']
                tempid = user[u'id']
                url = base_url + tempid + '/'
                text = ''
                img = "https://graph.facebook.com/{}/picture?type=normal".format(
                    tempid)
                # Minimal information, probably need a second round of querying the API for each user to get something
                # for the snippet. Better way?
                response.add_result(title=name,
                                    url=url,
                                    summary=text,
                                    imageurl=img)

            # Implement the other search tpyes.
        return response
Example #5
0
    def _parse_json_response(self, query, results):
        """
        Parses Googleplus's JSON response and returns as an ifind Response.

        Args:
            query (ifind Query): object encapsulating details of a search query.
            results : requests library response object containing search results.

        Returns:
            ifind Response: object encapsulating a search request's results.

        Usage:
            Private method.
        """

        response = Response(query.terms)
        content = json.loads(results.text)

        result_type = DEFAULT_RESULT_TYPE
        if query.result_type:
            result_type = query.result_type

        if result_type == 'people' or result_type == 'people+':
            for user in content[u'items']:
                name = user[u'displayName']
                url = user[u'url']
                imageurl = Googleplus._resize_image(user[u'image'][u'url'])

                # Check to see if the search results needs recusrively acquired person details.
                if result_type == 'people+':
                    summary = self._build_person_summary(user[u'id'])
                else:
                    summary = ''
                # Add the result to the response
                response.add_result(title=name,
                                    url=url,
                                    summary=summary,
                                    imageurl=imageurl)

        elif result_type == 'activities':
            for activity in content[u'items']:
                title = activity[u'verb'] + ' ' + activity[u'title']
                url = activity[u'url']
                summary = Googleplus._build_activity_summary(activity)
                imageurl = ''
                try:
                    imageurl = Googleplus._resize_image(
                        activity[u'image'][u'url'])
                except KeyError:
                    pass
                # Add the result to the response.
                response.add_result(title=title,
                                    url=url,
                                    summary=summary,
                                    imageurl=imageurl)

        return response
Example #6
0
    def _create_response(self, query):
        response = Response(query.terms)

        matches = [
            'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight',
            'nine', 'ten'
        ]
        result_list = [
            'rand', 'rand', 'rand', 'rand', 'rand', 'rand', 'rand', 'rand',
            'rand', 'rand'
        ]
        matched = False

        if query.terms in matches:
            matched = True

        if matched:
            result_list = matches

        for x in result_list:
            response.add_result(x, 'www.' + x + '.com', x + ' ' + ' ' + x)

        return response
Example #7
0
    def _parse_json_response(query, results):
        """
        Parses GOV.uk's JSON response and returns as an ifind Response.

        Args:
            query (ifind Query): object encapsulating details of a search query.
            results : requests library response object containing search results.

        Returns:
            ifind Response: object encapsulating a search request's results.

        Usage:
            Private method.

        """
        response = Response(query.terms)

        content = json.loads(results.text)

        # The base url - results do not provide a full link.
        base_url = "https://www.gov.uk"

        for result in content[u'results']:
            try:
                # Catch results with no description (they exist!)
                text = result[u'description']
            except KeyError:
                text = ''
            title = result[u'title']
            url = base_url + result[u'link']
            response.add_result(title=title, url=url, summary=text)

            if len(response) == query.top:
                break

        return response
Example #8
0
    def _parse_json_response(query, results):
        """
        Parses Twitter's JSON response and returns as an ifind Response.

        Args:
            query (ifind Query): object encapsulating details of a search query.
            results : requests library response object containing search results.

        Returns:
            ifind Response: object encapsulating a search request's results.

        Usage:
            Private method.

        """
        response = Response(query.terms)

        content = json.loads(results.text)

        for result in content[u'statuses']:

            text = result[u'text']
            result_id = str(result[u'id'])

            # User dictionary
            user = {'user_id': result[u'user'][u'id_str'],
                    'profile_image': result.get(u'user').get(u'profile_image_url'),
                    'geo_enabled': result.get(u'user').get(u'geo_enabled'),
                    'description': result.get(u'user').get(u'description'),
                    'follower_count': result.get(u'user').get(u'followers_count'),
                    'protected': result.get(u'user').get(u'protected'),
                    'location': result.get(u'user').get(u'location'),
                    'utc_offset': result.get(u'user').get(u'utc_offset'),
                    'time_zone': result.get(u'user').get(u'time_zone'),
                    'name': result.get(u'user').get(u'name'),
                    'screen_name': result.get(u'user').get(u'screen_name'),
                    'member_since': result.get(u'user').get(u'created_at')
            }

            # TODO clean this up
            stamp = result[u'created_at'].split()
            # Created at in format: '01 Jan, 2014 @ 20:23'
            created_at = "{} {}, {} @ {}".format(stamp[2], stamp[1], stamp[5], stamp[3][:-3])

            url = 'https://www.twitter.com/{0}/status/{1}'.format(user['user_id'], result_id)
            imageurl = user.get('profile_image')
            title = u"{} ({}) - {}".format(user['name'], user['screen_name'], created_at)

            # Kwargs below
            source = result.get(u'source')
            coordinates = result.get(u'coordinates')
            place = result.get(u'place')
            hashtags= result.get(u'entities').get(u'hashtags')
            user_info = user
            reply_to_screen_name = result.get(u'in_reply_to_screen_name')
            reply_to_userid = result.get(u'in_reply_to_user_id_str')
            reply_to_status = result.get(u'in_reply_to_status_id_str')


            # List of links in the tweet. Each item in the list is a dictionary with keys:
            # u'url, u'indices', u'expanded_url, u'display_url'
            links = result.get(u'entities').get(u'urls')

            # List of media items in the tweet. Each item in the list is a dictionary with keys:
            # u'expanded_url', u'sizes', u'url', u'media_url_https',
            # u'id_str', u'indices', u'media_url', u'type', u'id', u'display_url'
            media = result.get(u'entities').get(u'media')

            # List of users mentioned in the tweet. Each item in the list is a dictionary with keys:
            # u'indices', 'u'screen_name', u'PSG_inside', u'id', u'name', u'id_str'
            user_mentions = result.get(u'entities').get(u'user_mentions')


            response.add_result(title=title, url=url, summary=text, imageurl=imageurl, stamp=stamp,
                                user_info=user_info, media=media, links=links, user_mentions=user_mentions,
                                source=source, coordinates=coordinates, place=place,
                                hashtags=hashtags,  reply_to_screen_name=reply_to_screen_name,
                                reply_to_status=reply_to_status, reply_to_userid=reply_to_userid)

            if len(response) == query.top:
                break

        return response
Example #9
0
def parse_response(reader,
                   fieldname,
                   analyzer,
                   fragmenter,
                   formatter,
                   query,
                   results,
                   results_are_page=False):
    """
    Returns an ifind Response, given a query and set of results from Whoosh/Redis.
    Takes an ifind Query object and a list of SORTED results for the given query.

    If the page requested (query.skip) is < 0, page 1 is returned.
    If the page requested is greater than the number of available pages, the last page is returned.
    """
    def get_term_list():
        if isinstance(query.parsed_terms, unicode):
            return [query.parsed_terms]

        return [
            text for term_fieldname, text in query.parsed_terms.all_terms()
            if fieldname == fieldname
        ]

    response = Response(query.terms)
    response.results_total = len(results)

    if results_are_page:
        page = results[0]
        response.total_pages = results[1]
        results = results[2]
    else:
        page, response.total_pages, results = get_page(query, results)

    page_len = query.top

    i = 0

    for result in results:
        i = i + 1
        rank = (page - 1) * page_len + i
        whoosh_docnum = result[0]
        score = result[1]
        stored_data = reader.stored_fields(whoosh_docnum)

        title = stored_data['title']

        if title:
            title = title.strip()
        else:
            title = "Untitled Document"

        url = "/treconomics/{0}/".format(whoosh_docnum)
        trecid = stored_data['docid'].strip()
        source = stored_data['source'].strip()

        summary = highlight(stored_data['content'], get_term_list(), analyzer,
                            fragmenter, formatter)
        summary = "{0}...".format(summary)

        response.add_result(title=title,
                            url=url,
                            summary=summary,
                            docid=trecid,
                            source=source,
                            rank=rank,
                            whooshid=whoosh_docnum,
                            score=score)

    # The following two lines are for compatibility purposes with the existing codebase.
    # Would really like to take these out.
    setattr(response, 'results_on_page', len(results))
    setattr(response, 'actual_page', page)

    return response
Example #10
0
    def _parse_json_response(query, results):
        """
        Parses Companycheck's JSON response and returns as an ifind Response.

        Args:
            query (ifind Query): object encapsulating details of a search query.
            results : requests library response object containing search results.

        Returns:
            ifind Response: object encapsulating a search request's results.

        Usage:
            Private method.
        """

        response = Response(query.terms)
        content = json.loads(results.text)
        url_base = 'http://companycheck.co.uk/'

        if query.result_type:
            result_type = query.result_type
        else:
            result_type = DEFAULT_RESULT_TYPE

        if result_type == 'company' or not result_type:
            for company in content:
                name = company[u'name']
                url = url_base + 'company/' + str(company[u'number'])
                imageurl = None
                summary = Companycheck._build_company_summary(company)
                # Keyword args below
                number = company[u'number']
                country = company[u'country']
                address = company[u'address']
                sic = company[u'sic']
                status = company[u'status']
                # Add result object to the response
                response.add_result(title=name,
                                    url=url,
                                    summary=summary,
                                    imageurl=imageurl,
                                    number=number,
                                    country=country,
                                    address=address,
                                    sic=sic,
                                    status=status)

        elif result_type == 'director':
            for director in content:
                name = director[u'name']
                url = url_base + 'director/' + str(director[u'number'])
                imageurl = None
                sum_dic = Companycheck._build_director_summary(director)
                summary = sum_dic.get('summary')
                postcodes = sum_dic.get('postcode_list')
                number = director[u'number']
                response.add_result(title=name,
                                    url=url,
                                    summary=summary,
                                    imageurl=imageurl,
                                    postcodes=postcodes,
                                    number=number)

        return response