def _feature_search(self): # all features in given bounding box if self.featureIndexes is None: # we need bounding box and layernames. FIXME: this should be error raise exc.HTTPBadRequest('Bad request: no layername given') featureLimit = self.limit if self.limit and self.limit <= self.FEATURE_LIMIT else self.FEATURE_LIMIT self.sphinx.SetLimits(0, featureLimit) self.sphinx.SetRankingMode(sphinxapi.SPH_RANK_WORDCOUNT) if self.bbox and self.sortbbox: coords = self._get_geoanchor_from_bbox() self.sphinx.SetGeoAnchor('lat', 'lon', coords[1], coords[0]) self.sphinx.SetSortMode(sphinxapi.SPH_SORT_EXTENDED, '@weight DESC, @geodist ASC') else: self.sphinx.SetSortMode(sphinxapi.SPH_SORT_EXTENDED, '@weight DESC') timeFilter = self._get_time_filter() if self.searchText: searchdText = self._query_fields('@detail') else: searchdText = '' self._add_feature_queries(searchdText, timeFilter) try: temp = self.sphinx.RunQueries() except IOError: # pragma: no cover raise exc.HTTPGatewayTimeout() self.sphinx.ResetFilters() self._parse_feature_results(temp)
def stream_atom(request): params = dict(request.params) # The maximum value that this function allows the limit param. default_limit = 100 max_limit = 500 try: params["limit"] = int(params.get("limit", default_limit)) except (ValueError, TypeError): params["limit"] = default_limit if params["limit"] < 0: params["limit"] = default_limit if params["limit"] > max_limit: params["limit"] = max_limit try: annotations = request.api_client.get("/search", params=params)["rows"] except api_client.ConnectionError as err: raise httpexceptions.HTTPServiceUnavailable(err) except api_client.Timeout as err: raise httpexceptions.HTTPGatewayTimeout(err) except api_client.APIError as err: raise httpexceptions.HTTPBadGateway(err) return dict(annotations=annotations, atom_url=request.route_url("stream_atom"), html_url=request.route_url("stream"), title=request.registry.settings.get("h.feed.title"), subtitle=request.registry.settings.get("h.feed.subtitle"))
def _feature_search(self): # all features in given bounding box if self.featureIndexes is None: # we need bounding box and layernames. FIXME: this should be error return self.sphinx.SetLimits(0, self.FEATURE_LIMIT) self.sphinx.SetRankingMode(sphinxapi.SPH_RANK_WORDCOUNT) if self.bbox: geoAnchor = self._get_geoanchor_from_bbox() self.sphinx.SetGeoAnchor('lat', 'lon', geoAnchor.GetY(), geoAnchor.GetX()) self.sphinx.SetSortMode(sphinxapi.SPH_SORT_EXTENDED, '@weight DESC, @geodist ASC') else: self.sphinx.SetSortMode(sphinxapi.SPH_SORT_EXTENDED, '@weight DESC') timeFilter = self._get_time_filter() if self.searchText: searchdText = self._query_fields('@detail') else: searchdText = '' self._add_feature_queries(searchdText, timeFilter) try: temp = self.sphinx.RunQueries() except IOError: raise exc.HTTPGatewayTimeout() self.sphinx.ResetFilters() self._parse_feature_results(temp)
def _swiss_search(self): if len(self.searchText) < 1 and self.bbox is None: raise exc.HTTPBadRequest('You must at least provide a bbox or a searchText parameter') limit = self.limit if self.limit and self.limit <= self.LOCATION_LIMIT else self.LOCATION_LIMIT self.sphinx.SetLimits(0, limit) # Define ranking mode if self.bbox is not None: geoAnchor = self._get_geoanchor_from_bbox() self.sphinx.SetGeoAnchor('lat', 'lon', geoAnchor.GetY(), geoAnchor.GetX()) self.sphinx.SetSortMode(sphinxapi.SPH_SORT_EXTENDED, '@geodist ASC') else: self.sphinx.SetRankingMode(sphinxapi.SPH_RANK_WORDCOUNT) self.sphinx.SetSortMode(sphinxapi.SPH_SORT_EXTENDED, 'rank ASC, @weight DESC, num ASC') # Filter by origins if needed if self.origins is None: self._detect_keywords() else: self._filter_locations_by_origins() searchList = [] if len(self.searchText) >= 1: searchText = self._query_fields('@detail') searchList.append(searchText) if self.bbox is not None: geomFilter = self._get_quadindex_string() searchList.append(geomFilter) if len(searchList) == 2: searchTextFinal = '(' + searchList[0] + ') & (' + searchList[1] + ')' elif len(searchList) == 1: searchTextFinal = searchList[0] if len(searchList) != 0: try: if self.typeInfo == 'locations_preview': temp = self.sphinx.Query(searchTextFinal, index='swisssearch_preview') else: temp = self.sphinx.Query(searchTextFinal, index='swisssearch') except IOError: # pragma: no cover raise exc.HTTPGatewayTimeout() temp = temp['matches'] if temp is not None else temp # if standard index did not find anything, use soundex/metaphon indices # which should be more fuzzy in its results if temp is None or len(temp) <= 0: temp = self._fuzzy_search(searchTextFinal) else: temp = [] if temp is not None and len(temp) != 0: self._parse_location_results(temp)
def _fuzzy_search(self, searchTextFinal): # We use different ranking for fuzzy search # For ranking modes, see http://sphinxsearch.com/docs/current.html#weighting self.sphinx.SetRankingMode(sphinxapi.SPH_RANK_SPH04) # Only include results with a certain weight. This might need tweaking self.sphinx.SetFilterRange('@weight', 5000, 2**32 - 1) try: if self.typeInfo in ('locations'): temp = self.sphinx.Query(searchTextFinal, index='swisssearch_fuzzy') except IOError: # pragma: no cover raise exc.HTTPGatewayTimeout() temp = temp['matches'] if temp is not None else temp self.results['fuzzy'] = 'true' return temp
def stream_atom(request): try: annotations = request.api_client.get("/search", params={"limit": 1000})["rows"] except api_client.ConnectionError as err: raise httpexceptions.HTTPServiceUnavailable(err) except api_client.Timeout as err: raise httpexceptions.HTTPGatewayTimeout(err) except api_client.APIError as err: raise httpexceptions.HTTPBadGateway(err) return dict(annotations=annotations, atom_url=request.route_url("stream_atom"), html_url=request.route_url("stream"), title=request.registry.settings.get("h.feed.title"), subtitle=request.registry.settings.get("h.feed.subtitle"))
def _swiss_search(self): if len(self.searchText) < 1 and self.bbox is None: raise exc.HTTPBadRequest('You must at least provide a bbox or a searchText parameter') self.sphinx.SetLimits(0, self.LIMIT) # Define ranking mode if self.bbox is not None: geoAnchor = self._get_geoanchor_from_bbox() self.sphinx.SetGeoAnchor('lat', 'lon', geoAnchor.GetY(), geoAnchor.GetX()) self.sphinx.SetSortMode(sphinxapi.SPH_SORT_EXTENDED, '@geodist ASC') else: self.sphinx.SetRankingMode(sphinxapi.SPH_RANK_WORDCOUNT) self.sphinx.SetSortMode(sphinxapi.SPH_SORT_EXTENDED, 'rank ASC, @weight DESC, num ASC') # Filter by origins if needed if self.origins is None: self._detect_keywords() else: self._filter_locations_by_origins() searchList = [] if len(self.searchText) >= 1: searchText = self._query_fields('@detail') searchList.append(searchText) if self.bbox is not None: geomFilter = self._get_quadindex_string() searchList.append(geomFilter) if len(searchList) == 2: searchTextFinal = '(' + searchList[0] + ') & (' + searchList[1] + ')' elif len(searchList) == 1: searchTextFinal = searchList[0] if len(searchList) != 0: try: temp = self.sphinx.Query(searchTextFinal, index='swisssearch') except IOError: raise exc.HTTPGatewayTimeout() temp = temp['matches'] if temp is not None else temp else: temp = [] if temp is not None and len(temp) != 0: self._parse_location_results(temp)
def _layer_search(self): def staging_filter(staging): ret = '@staging prod' if staging == 'integration' or staging == 'test': ret += ' | @staging integration' if staging == 'test': ret += ' | @staging test' return ret # 10 features per layer are returned at max layerLimit = self.limit if self.limit and self.limit <= self.LAYER_LIMIT else self.LAYER_LIMIT self.sphinx.SetLimits(0, layerLimit) self.sphinx.SetRankingMode(sphinxapi.SPH_RANK_WORDCOUNT) self.sphinx.SetSortMode(sphinxapi.SPH_SORT_EXTENDED, '@weight DESC') # Weights defaults to 1 self.sphinx.SetFieldWeights({ '@title': 4, '@detail': 2, '@layer': 1 }) index_name = 'layers_%s' % self.lang mapName = self.mapName if self.mapName != 'all' else '' # Whitelist hack if mapName in ('api'): topicFilter = 'api' else: topicFilter = '(%s | ech)' % mapName searchText = ' '.join(( self._query_fields('@(title,detail,layer)'), '& @topics %s' % (topicFilter), # Filter by to topic if string not empty, ech whitelist hack '& %s' % (staging_filter(self.geodataStaging)) # Only layers in correct staging are searched )) try: temp = self.sphinx.Query(searchText, index=index_name) except IOError: # pragma: no cover raise exc.HTTPGatewayTimeout() temp = temp['matches'] if temp is not None else temp if temp is not None and len(temp) != 0: self.results['results'] += temp
def _layer_search(self): # 10 features per layer are returned at max self.sphinx.SetLimits(0, self.LAYER_LIMIT) self.sphinx.SetRankingMode(sphinxapi.SPH_RANK_WORDCOUNT) self.sphinx.SetSortMode(sphinxapi.SPH_SORT_EXTENDED, '@weight DESC') index_name = 'layers_%s' % self.lang mapName = self.mapName if self.mapName != 'all' else '' # Whitelist hack if mapName in ('api'): topicFilter = 'api' else: topicFilter = '(%s | ech)' % mapName searchText = ' '.join(( self._query_fields('@(detail,layer)'), '& @topics %s' % (topicFilter), # Filter by to topic if string not empty, ech whitelist hack '& @staging prod' # Only layers in prod are searched )) try: temp = self.sphinx.Query(searchText, index=index_name) except IOError: raise exc.HTTPGatewayTimeout() temp = temp['matches'] if temp is not None else temp if temp is not None and len(temp) != 0: self.results['results'] += temp
def _swiss_search(self): limit = self.limit if self.limit and self.limit <= self.LOCATION_LIMIT else self.LOCATION_LIMIT # Define ranking mode if self.bbox is not None and self.sortbbox: coords = self._get_geoanchor_from_bbox() self.sphinx.SetGeoAnchor('lat', 'lon', coords[1], coords[0]) self.sphinx.SetSortMode(sphinxapi.SPH_SORT_EXTENDED, '@geodist ASC') limit = self.BBOX_SEARCH_LIMIT else: self.sphinx.SetRankingMode(sphinxapi.SPH_RANK_WORDCOUNT) self.sphinx.SetSortMode(sphinxapi.SPH_SORT_EXTENDED, 'rank ASC, @weight DESC, num ASC') self.sphinx.SetLimits(0, limit) # Filter by origins if needed if self.origins is None: self._detect_keywords() else: self._filter_locations_by_origins() searchList = [] if len(self.searchText) >= 1: searchText = self._query_fields('@detail') searchList.append(searchText) if self.bbox is not None: geomFilter = self._get_quadindex_string() searchList.append(geomFilter) if len(searchList) == 2: searchTextFinal = '(' + searchList[0] + ') & (' + searchList[ 1] + ')' elif len(searchList) == 1: searchTextFinal = searchList[0] if len(searchList) != 0: try: # wildcard search only if more than one character in searchtext if len(' '.join(self.searchText)) > 1 or self.bbox: # standard wildcard search self.sphinx.AddQuery(searchTextFinal, index='swisssearch') # exact search, first 10 results searchText = '@detail ^%s' % ' '.join(self.searchText) self.sphinx.AddQuery(searchText, index='swisssearch') # reset settings temp = self.sphinx.RunQueries() except IOError: # pragma: no cover raise exc.HTTPGatewayTimeout() temp_merged = temp[1]['matches'] + temp[0]['matches'] if len( temp) == 2 else temp[0]['matches'] # remove duplicate results, exact search results have priority over wildcard search results temp = [] seen = [] for d in temp_merged: if d['id'] not in seen: temp.append(d) seen.append(d['id']) # reduce number of elements in result to limit temp = temp[:limit] # if standard index did not find anything, use soundex/metaphon indices # which should be more fuzzy in its results if temp is None or len(temp) <= 0: temp = self._fuzzy_search(searchTextFinal) else: temp = [] if temp is not None and len(temp) != 0: self._parse_location_results(temp, limit)