Example #1
0
    def _get_distance(self):
        from haystack.utils.geo import Distance

        if self._distance is None:
            # We didn't get it from the backend & we haven't tried calculating
            # it yet. Check if geopy is available to do it the "slow" way
            # (even though slow meant 100 distance calculations in 0.004 seconds
            # in my testing).
            if geopy_distance is None:
                raise SpatialError("The backend doesn't have 'DISTANCE_AVAILABLE' enabled & the 'geopy' library could not be imported, so distance information is not available.")

            if not self._point_of_origin:
                raise SpatialError("The original point is not available.")

            if not hasattr(self, self._point_of_origin['field']):
                raise SpatialError("The field '%s' was not included in search results, so the distance could not be calculated." % self._point_of_origin['field'])

            po_lng, po_lat = self._point_of_origin['point'].get_coords()
            location_field = getattr(self, self._point_of_origin['field'])

            if location_field is None:
                return None

            lf_lng, lf_lat  = location_field.get_coords()
            self._distance = Distance(km=geopy_distance.distance((po_lat, po_lng), (lf_lat, lf_lng)).km)

        # We've either already calculated it or the backend returned it, so
        # let's use that.
        return self._distance
Example #2
0
 def sqs_filter_l(self, sqs, search_params):
     location = search_params.get('l', None)
     if location:
         self.point, self.max_range = self._get_location(location)
         if self.point:
             sqs = sqs.dwithin(
                 'locations', self.point,
                 Distance(km=search_params.get('r', self.max_range)))
     return sqs
Example #3
0
    def _process_results(self,
                         raw_results,
                         highlight=False,
                         result_class=None,
                         distance_point=None):
        from haystack import connections
        results = []
        hits = raw_results.hits
        facets = {}
        stats = {}
        spelling_suggestion = None

        if result_class is None:
            result_class = SearchResult

        if hasattr(raw_results, 'stats'):
            stats = raw_results.stats.get('stats_fields', {})

        if hasattr(raw_results, 'facets'):
            facets = {
                'fields': raw_results.facets.get('facet_fields', {}),
                'dates': raw_results.facets.get('facet_dates', {}),
                'queries': raw_results.facets.get('facet_queries', {}),
            }

            for key in ['fields']:
                for facet_field in facets[key]:
                    # Convert to a two-tuple, as Solr's json format returns a list of
                    # pairs.
                    facets[key][facet_field] = list(
                        zip(facets[key][facet_field][::2],
                            facets[key][facet_field][1::2]))

        if self.include_spelling and hasattr(raw_results, 'spellcheck'):
            # Solr 5+ changed the JSON response format so the suggestions will be key-value mapped rather
            # than simply paired elements in a list, which is a nice improvement but incompatible with
            # Solr 4: https://issues.apache.org/jira/browse/SOLR-3029
            if len(raw_results.spellcheck.get('collations', [])):
                spelling_suggestion = raw_results.spellcheck['collations'][-1]
            elif len(raw_results.spellcheck.get('suggestions', [])):
                spelling_suggestion = raw_results.spellcheck['suggestions'][-1]

            assert spelling_suggestion is None or isinstance(
                spelling_suggestion, six.string_types)

        unified_index = connections[self.connection_alias].get_unified_index()
        indexed_models = unified_index.get_indexed_models()

        for raw_result in raw_results.docs:
            app_label, model_name = raw_result[DJANGO_CT].split('.')
            additional_fields = {}
            model = haystack_get_model(app_label, model_name)

            if model and model in indexed_models:
                index = unified_index.get_index(model)
                index_field_map = index.field_map
                for key, value in raw_result.items():
                    string_key = str(key)
                    # re-map key if alternate name used
                    if string_key in index_field_map:
                        string_key = index_field_map[key]

                    if string_key in index.fields and hasattr(
                            index.fields[string_key], 'convert'):
                        additional_fields[string_key] = index.fields[
                            string_key].convert(value)
                    else:
                        additional_fields[string_key] = self.conn._to_python(
                            value)

                del (additional_fields[DJANGO_CT])
                del (additional_fields[DJANGO_ID])
                del (additional_fields['score'])

                if raw_result[ID] in getattr(raw_results, 'highlighting', {}):
                    additional_fields[
                        'highlighted'] = raw_results.highlighting[
                            raw_result[ID]]

                if distance_point:
                    additional_fields['_point_of_origin'] = distance_point

                    if raw_result.get('__dist__'):
                        from haystack.utils.geo import Distance
                        additional_fields['_distance'] = Distance(
                            km=float(raw_result['__dist__']))
                    else:
                        additional_fields['_distance'] = None

                result = result_class(app_label, model_name,
                                      raw_result[DJANGO_ID],
                                      raw_result['score'], **additional_fields)
                results.append(result)
            else:
                hits -= 1

        return {
            'results': results,
            'hits': hits,
            'stats': stats,
            'facets': facets,
            'spelling_suggestion': spelling_suggestion,
        }
    def _process_results(self, raw_results, highlight=False, result_class=None, distance_point=None):
        results = []
        hits = raw_results.hits
        facets = {}
        stats = {}
        spelling_suggestion = None

        if result_class is None:
            result_class = SearchResult

        if hasattr(raw_results,'stats'):
            stats = raw_results.stats.get('stats_fields',{})

        if hasattr(raw_results, 'facets'):
            facets = {
                'fields': raw_results.facets.get('facet_fields', {}),
                'dates': raw_results.facets.get('facet_dates', {}),
                'queries': raw_results.facets.get('facet_queries', {}),
            }

            for key in ['fields']:
                for facet_field in facets[key]:
                    # Convert to a two-tuple, as Solr's json format returns a list of
                    # pairs.
                    facets[key][facet_field] = list(zip(facets[key][facet_field][::2], facets[key][facet_field][1::2]))

        if self.include_spelling is True:
            if hasattr(raw_results, 'spellcheck'):
                if len(raw_results.spellcheck.get('suggestions', [])):
                    # For some reason, it's an array of pairs. Pull off the
                    # collated result from the end.
                    spelling_suggestion = raw_results.spellcheck.get('suggestions')[-1]

        unified_index = connections[self.connection_alias].get_unified_index()
        indexed_models = unified_index.get_indexed_models()

        for raw_result in raw_results.docs:
            app_label, model_name = raw_result[DJANGO_CT].split('.')
            additional_fields = {}
            model = apps.get_model(app_label, model_name)

            if model and model in indexed_models:
                for key, value in raw_result.items():
                    index = unified_index.get_index(model)
                    string_key = str(key)

                    if string_key in index.fields and hasattr(index.fields[string_key], 'convert'):
                        additional_fields[string_key] = index.fields[string_key].convert(value)
                    else:
                        additional_fields[string_key] = custom_to_python(value, self.conn._to_python)

                del(additional_fields[DJANGO_CT])
                del(additional_fields[DJANGO_ID])
                del(additional_fields['score'])

                if raw_result[ID] in getattr(raw_results, 'highlighting', {}):
                    additional_fields['highlighted'] = raw_results.highlighting[raw_result[ID]]

                if distance_point:
                    additional_fields['_point_of_origin'] = distance_point

                    if raw_result.get('__dist__'):
                        from haystack.utils.geo import Distance
                        additional_fields['_distance'] = Distance(km=float(raw_result['__dist__']))
                    else:
                        additional_fields['_distance'] = None

                result = result_class(app_label, model_name, raw_result[DJANGO_ID], raw_result['score'], **additional_fields)
                results.append(result)
            else:
                hits -= 1

        return {
            'results': results,
            'hits': hits,
            'stats': stats,
            'facets': facets,
            'spelling_suggestion': spelling_suggestion,
        }
Example #5
0
    def _process_results(self,
                         raw_results,
                         highlight=False,
                         result_class=None,
                         distance_point=None):
        from haystack import connections

        results = []
        hits = raw_results.hits
        facets = {}
        stats = {}
        spelling_suggestion = spelling_suggestions = None

        if result_class is None:
            result_class = SearchResult

        if hasattr(raw_results, "stats"):
            stats = raw_results.stats.get("stats_fields", {})

        if hasattr(raw_results, "facets"):
            facets = {
                "fields": raw_results.facets.get("facet_fields", {}),
                "dates": raw_results.facets.get("facet_dates", {}),
                "queries": raw_results.facets.get("facet_queries", {}),
            }

            for key in ["fields"]:
                for facet_field in facets[key]:
                    # Convert to a two-tuple, as Solr's json format returns a list of
                    # pairs.
                    facets[key][facet_field] = list(
                        zip(
                            facets[key][facet_field][::2],
                            facets[key][facet_field][1::2],
                        ))

        if self.include_spelling and hasattr(raw_results, "spellcheck"):
            try:
                spelling_suggestions = self.extract_spelling_suggestions(
                    raw_results)
            except Exception as exc:
                self.log.error(
                    "Error extracting spelling suggestions: %s",
                    exc,
                    exc_info=True,
                    extra={"data": {
                        "spellcheck": raw_results.spellcheck
                    }},
                )

                if not self.silently_fail:
                    raise

                spelling_suggestions = None

            if spelling_suggestions:
                # Maintain compatibility with older versions of Haystack which returned a single suggestion:
                spelling_suggestion = spelling_suggestions[-1]
                assert isinstance(spelling_suggestion, six.string_types)
            else:
                spelling_suggestion = None

        unified_index = connections[self.connection_alias].get_unified_index()
        indexed_models = unified_index.get_indexed_models()

        for raw_result in raw_results.docs:
            app_label, model_name = raw_result[DJANGO_CT].split(".")
            additional_fields = {}
            model = haystack_get_model(app_label, model_name)

            if model and model in indexed_models:
                index = unified_index.get_index(model)
                index_field_map = index.field_map
                for key, value in raw_result.items():
                    string_key = str(key)
                    # re-map key if alternate name used
                    if string_key in index_field_map:
                        string_key = index_field_map[key]

                    if string_key in index.fields and hasattr(
                            index.fields[string_key], "convert"):
                        additional_fields[string_key] = index.fields[
                            string_key].convert(value)
                    else:
                        additional_fields[string_key] = self.conn._to_python(
                            value)

                del (additional_fields[DJANGO_CT])
                del (additional_fields[DJANGO_ID])
                del (additional_fields["score"])

                if raw_result[ID] in getattr(raw_results, "highlighting", {}):
                    additional_fields[
                        "highlighted"] = raw_results.highlighting[
                            raw_result[ID]]

                if distance_point:
                    additional_fields["_point_of_origin"] = distance_point

                    if raw_result.get("__dist__"):
                        from haystack.utils.geo import Distance

                        additional_fields["_distance"] = Distance(
                            km=float(raw_result["__dist__"]))
                    else:
                        additional_fields["_distance"] = None

                result = result_class(app_label, model_name,
                                      raw_result[DJANGO_ID],
                                      raw_result["score"], **additional_fields)
                results.append(result)
            else:
                hits -= 1

        return {
            "results": results,
            "hits": hits,
            "stats": stats,
            "facets": facets,
            "spelling_suggestion": spelling_suggestion,
            "spelling_suggestions": spelling_suggestions,
        }
    def _process_results(self, raw_results, highlight=False,
                         result_class=None, distance_point=None,
                         geo_sort=False):
        from haystack import connections
        results = []
        hits = raw_results.get('hits', {}).get('total', 0)
        facets = {}
        spelling_suggestion = None

        if result_class is None:
            result_class = SearchResult

        if self.include_spelling and 'suggest' in raw_results:
            raw_suggest = raw_results['suggest']['suggest']
            spelling_suggestion = ' '.join([word['text'] if len(word['options']) == 0 else word['options'][0]['text'] for word in raw_suggest])

        if 'facets' in raw_results:
            facets = {
                'fields': {},
                'dates': {},
                'queries': {},
            }

            for facet_fieldname, facet_info in raw_results['facets'].items():
                if facet_info.get('_type', 'terms') == 'terms':
                    facets['fields'][facet_fieldname] = [(individual['term'], individual['count']) for individual in facet_info['terms']]
                elif facet_info.get('_type', 'terms') == 'date_histogram':
                    # Elasticsearch provides UTC timestamps with an extra three
                    # decimals of precision, which datetime barfs on.
                    facets['dates'][facet_fieldname] = [(datetime.datetime.utcfromtimestamp(individual['time'] / 1000), individual['count']) for individual in facet_info['entries']]
                elif facet_info.get('_type', 'terms') == 'query':
                    facets['queries'][facet_fieldname] = facet_info['count']

        unified_index = connections[self.connection_alias].get_unified_index()
        indexed_models = unified_index.get_indexed_models()
        content_field = unified_index.document_field

        for raw_result in raw_results.get('hits', {}).get('hits', []):
            source = raw_result['_source']
            app_label, model_name = source[DJANGO_CT].split('.')
            additional_fields = {}
            model = get_model(app_label, model_name)

            if model and model in indexed_models:
                for key, value in source.items():
                    index = unified_index.get_index(model)
                    string_key = str(key)

                    if string_key in index.fields and hasattr(index.fields[string_key], 'convert'):
                        additional_fields[string_key] = index.fields[string_key].convert(value)
                    else:
                        additional_fields[string_key] = self._to_python(value)

                del(additional_fields[DJANGO_CT])
                del(additional_fields[DJANGO_ID])

                if 'highlight' in raw_result:
                    additional_fields['highlighted'] = raw_result['highlight'].get(content_field, '')

                if distance_point:
                    additional_fields['_point_of_origin'] = distance_point

                    if geo_sort and raw_result.get('sort'):
                        from haystack.utils.geo import Distance
                        additional_fields['_distance'] = Distance(km=float(raw_result['sort'][0]))
                    else:
                        additional_fields['_distance'] = None

                result = result_class(app_label, model_name, source[DJANGO_ID], raw_result['_score'], **additional_fields)
                results.append(result)
            else:
                hits -= 1

        return {
            'results': results,
            'hits': hits,
            'facets': facets,
            'spelling_suggestion': spelling_suggestion,
        }
Example #7
0
    def _process_results(self,
                         raw_results,
                         highlight=False,
                         result_class=None,
                         distance_point=None,
                         geo_sort=-1):
        from haystack import connections
        results = []
        hits = raw_results.get('hits', {}).get('total', 0)
        facets = {}
        spelling_suggestion = None

        if result_class is None:
            result_class = SearchResult

        if self.include_spelling and 'suggest' in raw_results:
            raw_suggest = raw_results['suggest'].get('suggest')
            if raw_suggest:
                spelling_suggestion = ' '.join([
                    word['text'] if len(word['options']) == 0 else
                    word['options'][0]['text'] for word in raw_suggest
                ])

        if 'aggregations' in raw_results:
            facets = {
                'fields': {},
                'dates': {},
                'queries': {},
            }

            # ES can return negative timestamps for pre-1970 data. Handle it.
            def from_timestamp(tm):
                if tm >= 0:
                    return datetime.utcfromtimestamp(tm)
                else:
                    return datetime(1970, 1, 1) + timedelta(seconds=tm)

            for facet_fieldname, facet_info in raw_results[
                    'aggregations'].items():

                try:
                    facet_type = facet_info['meta']['_type']
                except KeyError:
                    facet_type = 'terms'

                if facet_type == 'terms':
                    facets['fields'][facet_fieldname] = [
                        (bucket['key'], bucket['doc_count'])
                        for bucket in facet_info.get('buckets', [])
                    ]

                elif facet_type == 'haystack_date_histogram':
                    # Elasticsearch provides UTC timestamps with an extra three
                    # decimals of precision, which datetime barfs on.
                    dates = [(from_timestamp(bucket['key'] / 1000),
                              bucket['doc_count'])
                             for bucket in facet_info.get('buckets', [])]
                    facets['dates'][facet_fieldname[:-len(
                        DATE_HISTOGRAM_FIELD_NAME_SUFFIX)]] = dates

                elif facet_type == 'haystack_date_range':
                    pass

                elif facet_type == 'query':
                    facets['queries'][facet_fieldname] = facet_info['count']

        unified_index = connections[self.connection_alias].get_unified_index()
        indexed_models = unified_index.get_indexed_models()

        for raw_result in raw_results.get('hits', {}).get('hits', []):
            source = raw_result['_source']
            app_label, model_name = source[DJANGO_CT].split('.')
            additional_fields = {}
            model = haystack_get_model(app_label, model_name)

            if model and model in indexed_models:
                for key, value in source.items():
                    index = unified_index.get_index(model)
                    string_key = str(key)

                    if string_key in index.fields and hasattr(
                            index.fields[string_key], 'convert'):
                        additional_fields[string_key] = index.fields[
                            string_key].convert(value)
                    else:
                        additional_fields[string_key] = self._to_python(value)

                del (additional_fields[DJANGO_CT])
                del (additional_fields[DJANGO_ID])

                if 'highlight' in raw_result:
                    additional_fields['highlighted'] = raw_result['highlight']

                if distance_point:
                    additional_fields['_point_of_origin'] = distance_point

                    if geo_sort >= 0 and raw_result.get('sort'):
                        from haystack.utils.geo import Distance
                        additional_fields['_distance'] = Distance(
                            km=float(raw_result['sort'][geo_sort]))
                    else:
                        additional_fields['_distance'] = None

                result = result_class(app_label, model_name, source[DJANGO_ID],
                                      raw_result['_score'],
                                      **additional_fields)
                results.append(result)
            else:
                hits -= 1

        return {
            'results': results,
            'hits': hits,
            'facets': facets,
            'spelling_suggestion': spelling_suggestion,
        }
Example #8
0
    def _process_results(self,
                         raw_results,
                         highlight=False,
                         result_class=None,
                         distance_point=None,
                         geo_sort=False):
        from haystack import connections
        results = []
        hits = raw_results['hits'].total

        facets = {}
        spelling_suggestion = None

        if result_class is None:
            result_class = SearchResult
        if 'facets' in raw_results:
            facets = {
                'fields': {},
                'dates': {},
                'queries': {},
            }

            # ES can return negative timestamps for pre-1970 data. Handle it.
            def from_timestamp(tm):
                if tm >= 0:
                    return datetime.utcfromtimestamp(tm)
                else:
                    return datetime(1970, 1, 1) + timedelta(seconds=tm)

            for facet_fieldname, facet_info in raw_results['facets'].items():
                if facet_info.get('_type', 'terms') == 'terms':
                    facets['fields'][facet_fieldname] = [
                        (individual['term'], individual['count'])
                        for individual in facet_info['terms']
                    ]
                elif facet_info.get('_type', 'terms') == 'date_histogram':
                    # Elasticsearch provides UTC timestamps with an extra three
                    # decimals of precision, which datetime barfs on.
                    facets['dates'][facet_fieldname] = [
                        (from_timestamp(individual['time'] / 1000),
                         individual['count'])
                        for individual in facet_info['entries']
                    ]
                elif facet_info.get('_type', 'terms') == 'query':
                    facets['queries'][facet_fieldname] = facet_info['count']

        unified_index = connections[self.connection_alias].get_unified_index()

        content_field = unified_index.document_field
        # articleids = list(map(lambda x: x['_id'], raw_results['hits']['hits']))
        # article_results = list(Article.objects.filter(id__in=articleids))

        for raw_result in raw_results['hits']['hits']:
            app_label = 'blog'
            model_name = 'Article'
            additional_fields = {}

            if 'highlight' in raw_result:
                additional_fields['highlighted'] = raw_result['highlight'].get(
                    content_field, '')

            if distance_point:
                additional_fields['_point_of_origin'] = distance_point

                if geo_sort and raw_result.get('sort'):
                    from haystack.utils.geo import Distance
                    additional_fields['_distance'] = Distance(
                        km=float(raw_result['sort'][0]))
                else:
                    additional_fields['_distance'] = None

            result = result_class(app_label, model_name, raw_result['_id'],
                                  raw_result['_score'], **additional_fields)
            results.append(result)

        return {
            'results': results,
            'hits': hits,
            'facets': facets,
            'spelling_suggestion': spelling_suggestion,
        }
    def _process_results(self, raw_results, highlight=False,
                         result_class=None, distance_point=None,
                         geo_sort=False):
        from haystack import connections
        results = []
        hits = raw_results.get('hits', {}).get('total', 0)
        facets = {}
        spelling_suggestion = None

        if result_class is None:
            result_class = SearchResult

        if self.include_spelling and 'suggest' in raw_results:
            raw_suggest = raw_results['suggest'].get('suggest')
            if raw_suggest:
                spelling_suggestion = ' '.join([word['text'] if len(word['options']) == 0 else word['options'][0]['text'] for word in raw_suggest])

        if FACET_RESULTS in raw_results:
            facets = {
                'fields': {},
                'dates': {},
                'queries': {},
            }

            # ES can return negative timestamps for pre-1970 data. Handle it.
            def from_timestamp(tm):
                if elasticsearch.__version__[0] < 2:
                    if tm >= 0:
                        return datetime.utcfromtimestamp(tm)
                    else:
                        return datetime(1970, 1, 1) + timedelta(seconds=tm)
                else:
                    return datetime.strptime(tm, '%Y-%m-%dT%H:%M:%S.%fZ')

            if elasticsearch.__version__[0] < 2:
                for facet_fieldname, facet_info in raw_results['facets'].items():
                    if facet_info.get('_type', 'terms') == 'terms':
                        facets['fields'][facet_fieldname] = [(individual['term'], individual['count']) for individual in
                                                             facet_info['terms']]
                    elif facet_info.get('_type', 'terms') == 'date_histogram':
                        # Elasticsearch provides UTC timestamps with an extra three
                        # decimals of precision, which datetime barfs on.
                        facets['dates'][facet_fieldname] = [(from_timestamp(individual['time'] / 1000),
                                                             individual['count'])
                                                            for individual in facet_info['entries']]
                    elif facet_info.get('_type', 'terms') == 'query':
                        facets['queries'][facet_fieldname] = facet_info['count']
            else:
                # In version 2 Elasticsearch moved facets into a subset of aggs
                # as well as getting rid of _type in facets so we have to do a
                # bit more guess work to figure out what is inside.
                # They also nested aggs inside another level which we call filter1.
                # But only sometimes.

                for facet_fieldname, facet_info in (raw_results[FACET_RESULTS].get('filter1') or raw_results[FACET_RESULTS]).items():
                    if 'pub_date' in facet_fieldname:
                        # Elasticsearch provides UTC timestamps with an extra three
                        # decimals of precision, which datetime barfs on.
                        facets['dates']['pub_date'] = [(from_timestamp(
                            (individual['key_as_string'] / 1000)
                            if elasticsearch.__version__[0] < 2
                            else individual['key_as_string']),
                                                        individual['doc_count'])
                                                       for individual in facet_info['buckets']]
                    elif 'buckets' == facet_fieldname:
                        facets['fields'][facet_fieldname] = [(individual['key'], individual['doc_count']) for individual in facet_info['buckets']]
                    elif isinstance(facet_info, dict) and 'buckets' in facet_info:
                        facets['fields'][facet_fieldname] = [(individual['key'], individual['doc_count']) for individual
                                                             in facet_info['buckets']]
                    elif isinstance(facet_info, dict) and 'doc_count' in facet_info:
                        facets['queries'][facet_fieldname] = facet_info['doc_count']

        unified_index = connections[self.connection_alias].get_unified_index()
        indexed_models = unified_index.get_indexed_models()
        content_field = unified_index.document_field

        for raw_result in raw_results.get('hits', {}).get('hits', []):
            source = raw_result['_source']
            app_label, model_name = source[DJANGO_CT].split('.')
            additional_fields = {}
            model = haystack_get_model(app_label, model_name)

            if model and model in indexed_models:
                for key, value in source.items():
                    index = unified_index.get_index(model)
                    string_key = str(key)

                    if string_key in index.fields and hasattr(index.fields[string_key], 'convert'):
                        additional_fields[string_key] = index.fields[string_key].convert(value)
                    else:
                        additional_fields[string_key] = self._to_python(value)

                del(additional_fields[DJANGO_CT])
                del(additional_fields[DJANGO_ID])

                if 'highlight' in raw_result:
                    additional_fields['highlighted'] = raw_result['highlight'].get(content_field, '')

                if distance_point:
                    additional_fields['_point_of_origin'] = distance_point

                    if geo_sort and raw_result.get('sort'):
                        from haystack.utils.geo import Distance
                        additional_fields['_distance'] = Distance(km=float(raw_result['sort'][0]))
                    else:
                        additional_fields['_distance'] = None

                result = result_class(app_label, model_name, source[DJANGO_ID], raw_result['_score'], **additional_fields)
                results.append(result)
            else:
                hits -= 1

        return {
            'results': results,
            'hits': hits,
            'facets': facets,
            'spelling_suggestion': spelling_suggestion,
        }
Example #10
0
    def _process_results(
        self,
        raw_results,
        model=None,
        highlight=False,
        result_class=None,
        distance_point=None,
        percent_score=False,
        is_faceted=False,
    ):
        results = []
        if len(raw_results) == 1 and "rows_count" in raw_results[0]:
            hits = raw_results[0]["rows_count"]
        else:
            # we can't rely on the DSE response for the hits if we're not using the COUNT(*), that's why we set as 0 and
            # if we need the hits later we will do a COUNT(*) query.
            hits = 0
        facets = {}
        stats = {}
        spelling_suggestion = spelling_suggestions = None
        if is_faceted:
            assert len(raw_results
                       ) == 1, "Faceted searches should have only one result"
            raw_results = raw_results[0]

            facets["fields"] = json.loads(raw_results.get(
                "facet_fields", "{}"))
            facets["dates"] = json.loads(raw_results.get("facet_dates", "{}"))
            facets["queries"] = json.loads(
                raw_results.get("facet_queries", "{}"))
            if "facet_heatmaps" in raw_results:
                facets["heatmaps"] = json.loads(
                    raw_results.get("facet_heatmaps", "{}"))

        if result_class is None:
            result_class = SearchResult

        if hasattr(raw_results, "stats"):
            stats = raw_results.stats.get("stats_fields", {})

        if "facet_fields" in raw_results:
            for key in ["fields"]:
                for facet_field in facets[key]:
                    # Convert to a two-tuple, as Solr's json format returns a list of
                    # pairs.
                    facets[key][facet_field] = list(
                        zip(facets[key][facet_field],
                            facets[key][facet_field].values()))

        from haystack import connections

        unified_index = connections[self.connection_alias].get_unified_index()
        indexed_models = unified_index.get_indexed_models()

        if not is_faceted:
            for raw_result in raw_results:
                app_label, model_name = raw_result[DJANGO_CT].split(".")
                additional_fields = {}
                model = haystack_get_model(app_label, model_name)

                if model and model in indexed_models:
                    index = unified_index.get_index(model)
                    index_field_map = index.field_map
                    for key, value in raw_result.items():
                        string_key = str(key)
                        # re-map key if alternate name used
                        if string_key in index_field_map:
                            string_key = index_field_map[key]

                        if isinstance(value, Date):
                            value = str(value)

                        if string_key in index.fields and hasattr(
                                index.fields[string_key], "convert"):
                            additional_fields[string_key] = index.fields[
                                string_key].convert(value)
                        elif hasattr(model, string_key):
                            column = getattr(model, string_key)
                            if hasattr(column, "column"):
                                additional_fields[
                                    string_key] = column.column.to_python(
                                        value)
                            elif hasattr(column, "to_python"):
                                additional_fields[
                                    string_key] = column.to_python(value)
                            else:
                                additional_fields[
                                    string_key] = self.conn._to_python(value)
                        else:
                            additional_fields[
                                string_key] = self.conn._to_python(value)

                    del additional_fields[DJANGO_CT]
                    del additional_fields[DJANGO_ID]
                    if "rows_count" in additional_fields:
                        del additional_fields["rows_count"]

                    if raw_result[ID] in getattr(raw_results, "highlighting",
                                                 {}):
                        additional_fields[
                            "highlighted"] = raw_results.highlighting[
                                raw_result[ID]]

                    if distance_point:
                        additional_fields["_point_of_origin"] = distance_point

                        if raw_result.get("__dist__"):
                            from haystack.utils.geo import Distance

                            additional_fields["_distance"] = Distance(
                                km=float(raw_result["__dist__"]))
                        else:
                            additional_fields["_distance"] = None

                    additional_fields["already_loaded"] = True
                    if "score" not in additional_fields:
                        additional_fields["score"] = 1.0
                    result = result_class(app_label, model_name,
                                          raw_result[DJANGO_ID],
                                          **additional_fields)
                    results.append(result)

        results = {
            "results": results,
            "hits": hits,
            "stats": stats,
            "facets": facets,
            "spelling_suggestion": spelling_suggestion,
            "spelling_suggestions": spelling_suggestions,
        }

        if "facet_ranges" in raw_results:
            results["facets"]["ranges"] = {}
            ranges = json.loads(raw_results.get("facet_ranges", "{}"))

            if len(ranges):
                for field_name, range_data in ranges.items():
                    if field_name in self.date_facets:
                        results["facets"]["dates"][field_name] = tuple(
                            zip(range_data["counts"],
                                range_data["counts"].values()))
                    elif field_name in self.range_facets:
                        results["facets"]["ranges"][field_name] = tuple(
                            zip(range_data["counts"],
                                range_data["counts"].values()))

        return results
Example #11
0
    def _process_results(self,
                         raw_results,
                         highlight=False,
                         result_class=None,
                         distance_point=None,
                         geo_sort=False):
        from haystack import connections
        results = []
        hits = raw_results.get('hits', {}).get('total', 0)
        facets = {}
        spelling_suggestion = None

        if result_class is None:
            result_class = SearchResult

        if self.include_spelling and 'suggest' in raw_results:
            raw_suggest = raw_results['suggest'].get('suggest')
            if raw_suggest:
                spelling_suggestion = ' '.join([
                    word['text'] if len(word['options']) == 0 else
                    word['options'][0]['text'] for word in raw_suggest
                ])

        if 'facets' in raw_results:
            facets = {
                'fields': {},
                'dates': {},
                'queries': {},
            }

            for facet_fieldname, facet_info in raw_results['facets'].items():
                if facet_info.get('_type', 'terms') == 'terms':
                    facets['fields'][facet_fieldname] = [
                        (individual['term'], individual['count'])
                        for individual in facet_info['terms']
                    ]
                elif facet_info.get('_type', 'terms') == 'date_histogram':
                    # Elasticsearch provides UTC timestamps with an extra three
                    # decimals of precision, which datetime barfs on.
                    facets['dates'][facet_fieldname] = [
                        (datetime.datetime.utcfromtimestamp(
                            individual['time'] / 1000), individual['count'])
                        for individual in facet_info['entries']
                    ]
                elif facet_info.get('_type', 'terms') == 'query':
                    facets['queries'][facet_fieldname] = facet_info['count']

        unified_index = connections[self.connection_alias].get_unified_index()
        indexed_models = unified_index.get_indexed_models()
        content_field = unified_index.document_field

        for raw_result in raw_results.get('hits', {}).get('hits', []):

            result_type = raw_result['_type']
            app_model_delim_idx = result_type.rfind('_')

            app_label = result_type[:app_model_delim_idx]
            model_name = result_type[app_model_delim_idx + 1:]

            model = get_model(app_label, model_name)

            if model and model in indexed_models:

                index = unified_index.get_index(model)

                # Look for _source, then _fields in case this was a values_list query
                if '_source' in raw_result:
                    source = raw_result['_source']

                elif 'fields' in raw_result:
                    # _source wasn't requested, but we have fields, so this was a
                    # values_list query. All Elasticsearch's fields results are lists
                    # so we'll need to fix that for anything that isn't multivalued
                    raw_fields = raw_result['fields']
                    for field in raw_fields:
                        # This breaks multi-value fields when they have one value, so gotta
                        # sort out a fix
                        if field in index.fields and index.fields[
                                field].is_multivalued:
                            # leave this as a list
                            continue
                        else:
                            raw_fields[field] = raw_fields[field][0]

                    source = raw_fields

                additional_fields = {}
                for key, value in source.items():
                    string_key = str(key)

                    if string_key in index.fields and hasattr(
                            index.fields[string_key], 'convert'):
                        additional_fields[string_key] = index.fields[
                            string_key].convert(value)
                    else:
                        additional_fields[string_key] = self._to_python(value)

                if DJANGO_CT in additional_fields:
                    del (additional_fields[DJANGO_CT])

                if DJANGO_ID in additional_fields:
                    del (additional_fields[DJANGO_ID])

                if 'highlight' in raw_result:
                    additional_fields['highlighted'] = raw_result[
                        'highlight'].get(content_field, '')

                if distance_point:
                    additional_fields['_point_of_origin'] = distance_point

                    if geo_sort and raw_result.get('sort'):
                        from haystack.utils.geo import Distance
                        additional_fields['_distance'] = Distance(
                            km=float(raw_result['sort'][0]))
                    else:
                        additional_fields['_distance'] = None

                result = result_class(app_label, model_name, source[DJANGO_ID],
                                      raw_result['_score'],
                                      **additional_fields)
                results.append(result)
            else:
                hits -= 1

        return {
            'results': results,
            'hits': hits,
            'facets': facets,
            'spelling_suggestion': spelling_suggestion,
        }
Example #12
0
    def _process_results(
        self,
        raw_results,
        highlight=False,
        result_class=None,
        distance_point=None,
        geo_sort=False,
    ):
        from haystack import connections

        results = []
        hits = raw_results.get("hits", {}).get("total", 0)
        facets = {}
        spelling_suggestion = None

        if result_class is None:
            result_class = SearchResult

        if self.include_spelling and "suggest" in raw_results:
            raw_suggest = raw_results["suggest"].get("suggest")
            if raw_suggest:
                spelling_suggestion = " ".join([
                    word["text"] if len(word["options"]) == 0 else
                    word["options"][0]["text"] for word in raw_suggest
                ])

        if "facets" in raw_results:
            facets = {"fields": {}, "dates": {}, "queries": {}}

            # ES can return negative timestamps for pre-1970 data. Handle it.
            def from_timestamp(tm):
                if tm >= 0:
                    return datetime.utcfromtimestamp(tm)
                else:
                    return datetime(1970, 1, 1) + timedelta(seconds=tm)

            for facet_fieldname, facet_info in raw_results["facets"].items():
                if facet_info.get("_type", "terms") == "terms":
                    facets["fields"][facet_fieldname] = [
                        (individual["term"], individual["count"])
                        for individual in facet_info["terms"]
                    ]
                elif facet_info.get("_type", "terms") == "date_histogram":
                    # Elasticsearch provides UTC timestamps with an extra three
                    # decimals of precision, which datetime barfs on.
                    facets["dates"][facet_fieldname] = [
                        (from_timestamp(individual["time"] / 1000),
                         individual["count"])
                        for individual in facet_info["entries"]
                    ]
                elif facet_info.get("_type", "terms") == "query":
                    facets["queries"][facet_fieldname] = facet_info["count"]

        unified_index = connections[self.connection_alias].get_unified_index()
        indexed_models = unified_index.get_indexed_models()
        content_field = unified_index.document_field

        for raw_result in raw_results.get("hits", {}).get("hits", []):
            source = raw_result["_source"]
            app_label, model_name = source[DJANGO_CT].split(".")
            additional_fields = {}
            model = haystack_get_model(app_label, model_name)

            if model and model in indexed_models:
                index = source and unified_index.get_index(model)
                for key, value in source.items():
                    string_key = str(key)

                    if string_key in index.fields and hasattr(
                            index.fields[string_key], "convert"):
                        additional_fields[string_key] = index.fields[
                            string_key].convert(value)
                    else:
                        additional_fields[string_key] = self._to_python(value)

                del (additional_fields[DJANGO_CT])
                del (additional_fields[DJANGO_ID])

                if "highlight" in raw_result:
                    additional_fields["highlighted"] = raw_result[
                        "highlight"].get(content_field, "")

                if distance_point:
                    additional_fields["_point_of_origin"] = distance_point

                    if geo_sort and raw_result.get("sort"):
                        from haystack.utils.geo import Distance

                        additional_fields["_distance"] = Distance(
                            km=float(raw_result["sort"][0]))
                    else:
                        additional_fields["_distance"] = None

                result = result_class(app_label, model_name, source[DJANGO_ID],
                                      raw_result["_score"],
                                      **additional_fields)
                results.append(result)
            else:
                hits -= 1

        return {
            "results": results,
            "hits": hits,
            "facets": facets,
            "spelling_suggestion": spelling_suggestion,
        }