def search_question_detail(request): # TODO """ detail 검색 페이지에서 사용한다. .../search/detail/?q=query 와 같은 식으로 사용한다. \n **Permissions:** AllowAny """ q = request.GET.get('q', '') print(q) if q is '': return Response(status=status.HTTP_400_BAD_REQUEST) all_results = SearchQuerySet().models(Question).filter( SQ(content__contains=Clean(q)) & SQ(title__contains=Clean(q))) all_results = SearchQuerySet().filter(content=q) all_results = SearchQuerySet().values() sqs = SearchQuerySet().models(Question).filter( title__contains=q).order_by('created') for i in range(0, sqs.count()): json_data = json.loads(sqs[i].content) for block in json_data['blocks']: print(block['text']) serializer = QuestionSearchSerializer(sqs, many=True) return Response(data=serializer.data, status=status.HTTP_200_OK)
def _build_sub_query(self, search_node): """Returns a string with query terms search_mode: is a SearchNode tree data structure Traverses 'search_node' to find each term, i.e.: (AND: (AND: ('text', 'Robert'), ('text', 'Smith'))) that will be 'flattened' into: 'Robert Smith' """ term_list = [] # When traversing children If child is a node call recursively # else ensure value is a haystack object implementing '.prepare()' for child in search_node.children: if isinstance(child, SearchNode): term_list.append(self._build_sub_query(child)) else: value = child[1] # ensure value implements '.prepare()' if not hasattr(value, 'input_type_name'): if isinstance(value, six.string_types): # It's not an ``InputType``. Assume ``Clean``. value = Clean(value) else: # in case is binary(?) data value = PythonData(value) # or else child[1] is of class InputType term_list.append(value.prepare(self)) # make string from list, ensure encoding support return (' ').join(map(six.text_type, term_list))
def search_public_channels(query): """ Search of channels :param query: query for search :return: SearchResult """ sqs = SearchQuerySet().models(Channel).filter_or( name__contains=Clean(query)).filter_or( description__contains=Clean(query)) return sqs.filter(hidden=False)
def search(request): query = request.GET.get('q', '').strip() if query.startswith('"') and query.endswith('"'): results = SearchQuerySet().filter(content=Clean(Exact(query))) else: results = SearchQuerySet().filter(content=Clean(Words(query))) data = [{'id': i.pk, 'title': i.title, 'data': i.data} for i in results] return JsonResponse({'results': data})
def search(self, query): if not query: return EmptySearchQuerySet() # OPTIMIZE: the number of public projects can increase substantially # causing a really high number of project_ids to be sended to # elasticsearch projects = Project.objects.public_or_collaborate(self.request.user) return RelatedSearchQuerySet()\ .filter(project_id__in=projects.values_list('pk', flat=True))\ .filter(SQ(content__contains=Clean(query)) | SQ(title__contains=Clean(query)))
def build_alt_parser_query(self, parser_name, query_string='', **kwargs): if query_string: query_string = Clean(query_string).prepare(self) kwarg_bits = [] for key in sorted(kwargs.keys()): if isinstance(kwargs[key], basestring) and ' ' in kwargs[key]: kwarg_bits.append(u"%s='%s'" % (key, kwargs[key])) else: kwarg_bits.append(u"%s=%s" % (key, kwargs[key])) return u'_query_:"{!%s %s}%s"' % (parser_name, Clean(' '.join(kwarg_bits)), query_string)
def build_alt_parser_query(self, parser_name, query_string="", **kwargs): if query_string: query_string = Clean(query_string).prepare(self) kwarg_bits = [] for key in sorted(kwargs.keys()): if isinstance(kwargs[key], six.string_types) and " " in kwargs[key]: kwarg_bits.append("%s='%s'" % (key, kwargs[key])) else: kwarg_bits.append("%s=%s" % (key, kwargs[key])) return '_query_:"{!%s %s}%s"' % ( parser_name, Clean(" ".join(kwarg_bits)), query_string, )
def prepare_search_query(query, search_field='searchtext'): query = query or '' qs = None query = re.sub(replace_regex, ' ', query, flags=re.UNICODE) tokens = tokenize(query) if tokens: cleand = Clean(query) qs = SQ(**{search_field: cleand}) qs = qs | SQ(**{search_field + '__startswith': cleand}) if len(tokens) > 1: for q in tokens: qs = qs | SQ(**{search_field: Clean(q)}) qs = qs | SQ(**{search_field + '__startswith': Clean(q)}) return qs
def prepare(self, query_obj): query_string = super(CustomContain, self).prepare(query_obj) query_string = query_obj.clean(query_string) exact_bits = [ Clean(bit).prepare(query_obj) for bit in query_string.split(',') if bit ] query_string = u' '.join(exact_bits) return u'*{}*'.format(query_string)
def ask_search(request, language='en', as_json=False): if 'selected_facets' in request.GET: return redirect_ask_search(request, language=language) language_map = {'en': 'ask-cfpb-search-results', 'es': 'respuestas'} sqs = SearchQuerySet().models(AnswerPage) clean_query = Clean(request.GET.get('q', '')) clean_qstring = clean_query.query_string.strip() qstring = clean_qstring query_sqs = sqs.filter(content=clean_query, language=language) results_page = get_object_or_404(AnswerResultsPage, language=language, slug=language_map[language]) # If there's no query string, don't search if not qstring: results_page.query = '' results_page.result_query = '' return results_page.serve(request) # If we have no results from our query, let's try to suggest a better one suggestion = sqs.spelling_suggestion(qstring) if suggestion == qstring: suggestion = None elif (query_sqs.count() == 0 and request.GET.get('correct', '1') == '1' and flag_enabled('ASK_SEARCH_TYPOS', request=request)): query_sqs = sqs.filter(content=suggestion) qstring, suggestion = suggestion, qstring if as_json: results = { 'query': clean_qstring, 'result_query': qstring, 'suggestion': suggestion, 'results': [{ 'question': result.autocomplete, 'url': result.url, 'text': result.text } for result in query_sqs] } json_results = json.dumps(results) return HttpResponse(json_results, content_type='application/json') else: results_page.query = clean_qstring results_page.result_query = qstring results_page.suggestion = suggestion results_page.answers = [] for result in query_sqs: results_page.answers.append( (result.url, result.autocomplete, result.text)) return results_page.serve(request)
def get_queryset(self): queryset = super(SearchResultsView, self).get_queryset() search_string = str(Clean(self.request.GET.get('q', ''))) key_words = strip_stop_words(search_string) queryset = queryset.filter(django_ct=Exact('qanda.question')) if len(key_words) > 1: queryset = queryset.filter( SQ(text__contains=search_string) | SQ(text__contains=key_words) | SQ(tags__in=key_words.split())) else: queryset = queryset.filter(SQ(text__contains=search_string)) return queryset
def prepare(self, query_obj): query_string = super(CustomContain, self).prepare(query_obj) try: query_string = query_string.decode('utf-8') except AttributeError: pass query_string = query_obj.clean(query_string) exact_bits = [ Clean(bit).prepare(query_obj) for bit in query_string.split(' ') if bit ] query_string = ' '.join(exact_bits) return '*{}*'.format(query_string)
def get_context_data(self, **kwargs): context = super(SearchResultsView, self).get_context_data(**kwargs) search_string = str(Clean(self.request.GET.get('q', ''))) key_words = strip_stop_words(search_string) document_qs = SearchQuerySet().filter( django_ct=Exact('qanda.document')) if len(key_words) > 1: document_qs = document_qs.filter( SQ(text__contains=search_string) | SQ(text__contains=key_words) | SQ(tags__in=key_words.split()))[:5] else: document_qs = document_qs.filter( SQ(text__contains=search_string))[:5] context['documents_list'] = document_qs context['nav'] = 'search' return context
def retrieve(self, request, *args, **kwargs): query = Clean(self.request.QUERY_PARAMS.get('q')) limit = getattr(settings, "SEARCH_LIMIT", 5) users = SearchQuerySet().filter(SQ(username=query) | SQ(bio=query)).models(User)[:limit] networks = SearchQuerySet().filter(name=query).models(Network)[:limit] user_pks = [x.pk for x in users] network_pks = [x.pk for x in networks] users = User.actives.filter(id__in=user_pks) networks = Network.objects.select_related().filter(id__in=network_pks) data = { 'users': serializers.SimpleUserSerializer(users, many=True).data, 'networks': serializers.SimpleNetworkSerializer(networks, many=True).data, 'query': query.query_string } save_search_query.delay(query=query.query_string, user_ids=user_pks, network_ids=network_pks) return Response(data)
def build_query_fragment(self, field, filter_type, value): from haystack import connections query_frag = "" if not hasattr(value, "input_type_name"): # Handle when we've got a ``ValuesListQuerySet``... if hasattr(value, "values_list"): value = list(value) if filter_type in ["regex", "iregex"]: value = RegExp(value) elif filter_type in ["fuzzy"]: value = PythonData(value) elif self.is_function(value): value = Exact(value) elif isinstance(value, six.string_types): # It's not an ``InputType``. Assume ``Clean``. value = Clean(value) else: value = PythonData(value) # Prepare the query using the InputType. prepared_value = value.prepare(self) if not isinstance(prepared_value, (set, list, tuple)): # Then convert whatever we get back to what pysolr wants if needed. prepared_value = self.backend.conn._from_python(prepared_value) if isinstance(prepared_value, str): words_in_value = len(prepared_value.split()) words_in_value = 0 if words_in_value == 1 else 1 # 'content' is a special reserved word, much like 'pk' in # Django's ORM layer. It indicates 'no special field'. if field == "content": index_fieldname = "" else: index_fieldname = "%s" % connections[self._using].get_unified_index().get_index_fieldname(field) filter_types = { "content": ["%s", '"%s"'], "contains": ["*%s*", '"*%s*"'], "endswith": ["*%s"], "startswith": ["%s*"], "exact": ["%s", '"%s"'], "gt": ["{%s TO *}"], "gte": ["[%s TO *]"], "lt": ["{* TO %s}"], "lte": ["[* TO %s]"], "fuzzy": ["%s~", '"%s"~'], "regex": ["/%s/"], "iregex": ["/%s/"], "isnull": ["-%s:[* TO *]", "%s:[* TO *]"], } if value.post_process is False: query_frag = prepared_value else: if filter_type in ["content", "exact", "contains", "startswith", "endswith", "fuzzy", "regex", "iregex", "isnull"]: if value.input_type_name == "exact": query_frag = prepared_value elif filter_type == "fuzzy": # Check if we are using phrases (words between ") # match = re.compile('^([^\\\~]*)\\\~?(\d*)?$').match(prepared_value) match = re.compile("^([^\~]*)\~?(\d*)?$").match(prepared_value) if match: # If the user provided the ~[\d] part of the fuzzy if match.group(2): if not words_in_value: query_frag = "%s~%s" % match.groups() else: query_frag = '"%s"~%s' % match.groups() query_frag = ( filter_types[filter_type][words_in_value] % prepared_value if not len(query_frag) else query_frag ) elif filter_type in ["startswith", "endswith"]: if words_in_value == 0: query_frag = filter_types[filter_type][words_in_value] % prepared_value else: if filter_type in ["startswith"]: query_frag = " AND ".join(prepared_value.split()) + "*" else: query_frag = "*" + " AND ".join(prepared_value.split()) elif filter_type == "isnull": if prepared_value == "true": return filter_types[filter_type][0] % index_fieldname elif prepared_value == "false": return filter_types[filter_type][1] % index_fieldname else: query_frag = filter_types[filter_type][words_in_value] % prepared_value elif filter_type == "in": in_options = [] if not prepared_value: query_frag = "(!*:*)" else: for possible_value in prepared_value: in_options.append('"%s"' % self.backend.conn._from_python(possible_value)) query_frag = "(%s)" % " OR ".join(in_options) elif filter_type == "range": start = self.backend.conn._from_python(prepared_value[0]) end = self.backend.conn._from_python(prepared_value[1]) query_frag = '["%s" TO "%s"]' % (start, end) elif filter_type == "exact": if value.input_type_name == "exact": query_frag = prepared_value else: prepared_value = Exact(prepared_value).prepare(self) query_frag = filter_types[filter_type][words_in_value] % prepared_value else: if value.input_type_name != "exact": prepared_value = Exact(prepared_value).prepare(self) query_frag = filter_types[filter_type][words_in_value] % prepared_value if len(query_frag) and not isinstance(value, Raw) and filter_type not in ["regex", "iregex"]: if not query_frag.startswith("(") and not query_frag.endswith(")"): query_frag = "(%s)" % query_frag elif isinstance(value, Raw): return query_frag # Check if the field is making a reference to a Tuple/UDF object # Obtain the list of searchable fields available at the Index # class associated to the model searchable_fields = connections[self._using].get_unified_index().all_searchfields() # Get the model attribute that is connected to the current # index search field. # If the param was for a dict entry, the index_fieldname won't be in the list of # searcheable fields, as the field is a combination of the fieldname plus the key if index_fieldname in searchable_fields: model_attr = searchable_fields[index_fieldname].model_attr if model_attr and "." in model_attr: return "{{!tuple v='{field}:{value}'}}".format(field=model_attr, value=query_frag) else: return "%s:%s" % (index_fieldname, query_frag) else: return "%s:%s" % (index_fieldname, query_frag)
def join_search_queries(self, left, right): left = left if isinstance(left, SQ) else SQ(contains=Clean(left)) right = right if isinstance(right, SQ) else SQ(contains=Clean(right)) return left & right
def reduce_query(self, keywords): query = reduce(self.join_search_queries, keywords) if not isinstance(query, SQ): return SQ(contains=Clean(query)) return query
def build_query_fragment(self, field, filter_type, value): from haystack import connections query_frag = "" is_datetime = False if not hasattr(value, "input_type_name"): # Handle when we've got a ``ValuesListQuerySet``... if hasattr(value, "values_list"): value = list(value) if hasattr(value, "strftime"): is_datetime = True if isinstance(value, str) and value != " ": # It's not an ``InputType``. Assume ``Clean``. value = Clean(value) else: value = PythonData(value) # Prepare the query using the InputType. prepared_value = value.prepare(self) if not isinstance(prepared_value, (set, list, tuple)): # Then convert whatever we get back to what pysolr wants if needed. prepared_value = self.backend._from_python(prepared_value) # 'content' is a special reserved word, much like 'pk' in # Django's ORM layer. It indicates 'no special field'. if field == "content": index_fieldname = "" else: index_fieldname = "%s:" % connections[ self._using].get_unified_index().get_index_fieldname(field) filter_types = { "content": "%s", "contains": "*%s*", "endswith": "*%s", "startswith": "%s*", "exact": "%s", "gt": "{%s to}", "gte": "[%s to]", "lt": "{to %s}", "lte": "[to %s]", "fuzzy": "%s~{}/%d".format(FUZZY_WHOOSH_MAX_EDITS), } if value.post_process is False: query_frag = prepared_value else: if filter_type in [ "content", "contains", "startswith", "endswith", "fuzzy", ]: if value.input_type_name == "exact": query_frag = prepared_value else: # Iterate over terms & incorportate the converted form of each into the query. terms = [] if isinstance(prepared_value, str): possible_values = prepared_value.split(" ") else: if is_datetime is True: prepared_value = self._convert_datetime( prepared_value) possible_values = [prepared_value] for possible_value in possible_values: possible_value_str = self.backend._from_python( possible_value) if filter_type == "fuzzy": terms.append(filter_types[filter_type] % (possible_value_str, min(FUZZY_WHOOSH_MIN_PREFIX, len(possible_value_str)))) else: terms.append(filter_types[filter_type] % possible_value_str) if len(terms) == 1: query_frag = terms[0] else: query_frag = "(%s)" % " AND ".join(terms) elif filter_type == "in": in_options = [] for possible_value in prepared_value: is_datetime = False if hasattr(possible_value, "strftime"): is_datetime = True pv = self.backend._from_python(possible_value) if is_datetime is True: pv = self._convert_datetime(pv) if isinstance(pv, str) and not is_datetime: in_options.append('"%s"' % pv) else: in_options.append("%s" % pv) query_frag = "(%s)" % " OR ".join(in_options) elif filter_type == "range": start = self.backend._from_python(prepared_value[0]) end = self.backend._from_python(prepared_value[1]) if hasattr(prepared_value[0], "strftime"): start = self._convert_datetime(start) if hasattr(prepared_value[1], "strftime"): end = self._convert_datetime(end) query_frag = "[%s to %s]" % (start, end) elif filter_type == "exact": if value.input_type_name == "exact": query_frag = prepared_value else: prepared_value = Exact(prepared_value).prepare(self) query_frag = filter_types[filter_type] % prepared_value else: if is_datetime is True: prepared_value = self._convert_datetime(prepared_value) query_frag = filter_types[filter_type] % prepared_value if len(query_frag) and not isinstance(value, Raw): if not query_frag.startswith("(") and not query_frag.endswith(")"): query_frag = "(%s)" % query_frag return "%s%s" % (index_fieldname, query_frag)
def build_query_fragment(self, field, filter_type, value): from haystack import connections query_frag = '' if not hasattr(value, 'input_type_name'): # Handle when we've got a ``ValuesListQuerySet``... if hasattr(value, 'values_list'): value = list(value) if isinstance(value, basestring): # It's not an ``InputType``. Assume ``Clean``. value = Clean(value) else: value = PythonData(value) # Prepare the query using the InputType. prepared_value = value.prepare(self) if not isinstance(prepared_value, (set, list, tuple)): # Then convert whatever we get back to what pysolr wants if needed. prepared_value = self.backend.conn._from_python(prepared_value) # 'content' is a special reserved word, much like 'pk' in # Django's ORM layer. It indicates 'no special field'. if field == 'content': index_fieldname = '' else: index_fieldname = u'%s:' % connections[ self._using].get_unified_index().get_index_fieldname(field) filter_types = { 'contains': u'%s', 'startswith': u'%s*', 'exact': u'%s', 'gt': u'{%s TO *}', 'gte': u'[%s TO *]', 'lt': u'{* TO %s}', 'lte': u'[* TO %s]', } if value.post_process is False: query_frag = prepared_value else: if filter_type in ['contains', 'startswith']: if value.input_type_name == 'exact': query_frag = prepared_value else: # Iterate over terms & incorportate the converted form of each into the query. terms = [] for possible_value in prepared_value.split(' '): terms.append( filter_types[filter_type] % self.backend.conn._from_python(possible_value)) if len(terms) == 1: query_frag = terms[0] else: query_frag = u"(%s)" % " AND ".join(terms) elif filter_type == 'in': in_options = [] for possible_value in prepared_value: in_options.append( u'"%s"' % self.backend.conn._from_python(possible_value)) query_frag = u"(%s)" % " OR ".join(in_options) elif filter_type == 'range': start = self.backend.conn._from_python(prepared_value[0]) end = self.backend.conn._from_python(prepared_value[1]) query_frag = u'["%s" TO "%s"]' % (start, end) elif filter_type == 'exact': if value.input_type_name == 'exact': query_frag = prepared_value else: prepared_value = Exact(prepared_value).prepare(self) query_frag = filter_types[filter_type] % prepared_value else: if value.input_type_name != 'exact': prepared_value = Exact(prepared_value).prepare(self) query_frag = filter_types[filter_type] % prepared_value return u"%s%s" % (index_fieldname, query_frag)
def _query_results(query, person): """ Actually build the query results for this person. Make sure any result.content_type values are reflected in RESULT_TYPE_DISPLAY for display to the user. """ if len(query) < 2: return [] query = query.replace( '@sfu.ca', '') # hack to make email addresses searchable as userids query = Clean(query) # offerings person was a member of (coredata.CourseOffering) if person: members = Member.objects.filter(person=person).exclude( role='DROP').select_related('offering') offering_slugs = set(m.offering.slug for m in members) offering_results = SearchQuerySet().models(CourseOffering).filter( text__fuzzy=query) # offerings that match the query offering_results = offering_results.filter( slug__in=offering_slugs) # ... and this person was in else: members = [] offering_results = [] # pages this person can view (pages.Page) page_acl = set(['ALL']) for m in members: # builds a set of offering_slug+"_"+acl_value strings, which will match the permission_key field in the index member_acl = set("%s_%s" % (m.offering.slug, acl) for acl in ACL_ROLES[m.role] if acl != 'ALL') page_acl |= member_acl page_results = SearchQuerySet().models(Page).filter( text__fuzzy=query) # pages that match the query page_results = page_results.filter( permission_key__in=page_acl) # ... and are visible to this user # discussion this person can view (discussion.DiscussionTopic) if person: discuss_results = SearchQuerySet().models(DiscussionTopic).filter( text__fuzzy=query) # discussions that match the query discuss_results = discuss_results.filter( slug__in=offering_slugs) # ... and this person was in else: discuss_results = [] # students taught by instructor (coredata.Member) instr_members = Member.objects.filter(person=person, role__in=['INST','TA']).exclude(offering__component='CAN') \ .select_related('offering') if person and instr_members: offering_slugs = set(m.offering.slug for m in instr_members) member_results = SearchQuerySet().models(Member).filter( text__fuzzy=query) # members that match the query member_results = member_results.filter( offering_slug__in=offering_slugs ) # ... and this person was the instructor for member_results = member_results.load_all() else: member_results = [] # combine and limit to best results results = itertools.chain( offering_results[:MAX_RESULTS], page_results[:MAX_RESULTS], member_results[:MAX_RESULTS], discuss_results[:MAX_RESULTS], ) results = (r for r in results if r is not None) results = list(results) results.sort(key=lambda result: -result.score) results = results[: MAX_RESULTS] # (list before this could be n*MAX_RESULTS long) return results
def get_queryset(self): queryset = SearchQuerySet() if hasattr(self.request, 'accepted_media_type') and re.match( KML_REGEXP, self.request.accepted_media_type): queryset = queryset.models(Unit) self.only_fields['unit'].extend(['street_address', 'www']) input_val = self.request.query_params.get('input', '').strip() q_val = self.request.query_params.get('q', '').strip() if not input_val and not q_val: raise ParseError( "Supply search terms with 'q=' or autocomplete entry with 'input='" ) if input_val and q_val: raise ParseError("Supply either 'q' or 'input', not both") if input_val: queryset = queryset.filter( SQ(autosuggest=input_val) | SQ(autosuggest_extra_searchwords=input_val) | SQ(autosuggest_exact__exact=input_val) | SQ(SQ(number=input_val) & SQ(autosuggest=input_val))) else: queryset = (queryset.filter(name=Clean(q_val)).filter_or( text=Clean(q_val)).filter_or( extra_searchwords=q_val).filter_or(address=q_val)) IS_NOT_UNIT_SQ = (SQ(django_ct='services.service') | SQ(django_ct='services.servicenode') | SQ(django_ct='munigeo.address')) if 'municipality' in self.request.query_params: val = self.request.query_params['municipality'].lower().strip() if len(val) > 0: municipalities = val.split(',') muni_sq = SQ(municipality=municipalities.pop().strip()) for m in municipalities: muni_sq |= SQ(municipality=m) queryset = queryset.filter(SQ(muni_sq | IS_NOT_UNIT_SQ)) if 'city_as_department' in self.request.query_params: val = self.request.query_params['city_as_department'].lower( ).strip() if len(val) > 0: deps_uuid = val.split(',') # forming municipality search query deps = Department.objects.filter( uuid__in=deps_uuid).select_related('municipality') munis = [d.municipality.name for d in deps] muni_sq = SQ(municipality=munis.pop()) for m in munis: muni_sq |= SQ(municipality=m) # forming root_deparment search query dep_sq = SQ(root_department=deps_uuid.pop().strip()) for d in deps_uuid: dep_sq |= SQ(root_department=d) # updating queryset queryset = queryset.filter( SQ(muni_sq | dep_sq | IS_NOT_UNIT_SQ)) service = self.request.query_params.get('service') if service: services = service.split(',') queryset = queryset.filter(django_ct='services.unit').filter( services__in=services) # Only units marked public should be returned. For other types, # public is always True. queryset = queryset.filter(public='true') models = set() types = self.request.query_params.get('type', '').split(',') for t in types: if t == 'service_node': models.add(ServiceNode) elif t == 'service': models.add(Service) elif t == 'unit': models.add(Unit) elif t == 'address': models.add(Address) elif t == 'administrative_division': models.add(AdministrativeDivision) if self._is_kml_media_type_request(): queryset = queryset.models(Unit) elif len(models) > 0: queryset = queryset.models(*list(models)) else: # Hide the to-be-deprecated servicenode from default types queryset = queryset.models(Service, Unit, Address, AdministrativeDivision) return queryset
def build_query_fragment(self, field, filter_type, value): query_frag = '' if not hasattr(value, 'input_type_name'): # Handle when we've got a ``ValuesListQuerySet`` if hasattr(value, 'values_list'): value = list(value) if isinstance(value, six.string_types): # It's not an ``InputType``. Assume ``Clean`` value = Clean(value) else: value = PythonData(value) prepared_value = value.prepare(self) if field == 'content': index_fieldname = '' filter_types = { 'contains': u'({value})', 'startswith': u'~{value}', 'exact': u'"{value}"', } else: index_fieldname = connections[self._using].get_unified_index().get_index_fieldname(field) filter_types = { 'contains': u'{field} = ({value})', 'startswith': u'{field} = ~{value}', 'exact': u'{field} = "{value}"', 'gt': u'{field} > {value}', 'gte': u'{field} >= {value}', 'lt': u'{field} < {value}', 'lte': u'{field <= {value}', } if filter_type in ['contains', 'startswith']: if value.input_type_name == 'exact': prepared_value = '"{}"'.format(prepared_value) query_frag = filter_types[filter_type].format(field=index_fieldname, value=prepared_value) elif filter_type == 'in': in_options = [] for possible_value in prepared_value: if isinstance(possible_value, six.string_types): in_options.append('"{}"'.format(possible_value)) else: in_options.append('{}'.format(possible_value)) frag = '({})'.format(') OR ('.join(in_options)) query_frag = filter_types['contains'].format(field=index_fieldname, value=frag) elif filter_type == 'range': start = prepared_value[0] end = prepared_value[1] start = filter_types['gte'].format(field=index_fieldname, value=start) end = filter_types['lte'].format(field=index_fieldname, value=end) query_frag = '({}) AND ({})'.format(start, end) else: if value.input_type_name != 'exact': prepared_value = Exact(prepared_value).prepare(self) query_frag = filter_types[filter_type].format(field=index_fieldname, value=prepared_value) if len(query_frag) and not isinstance(value, Raw): if not query_frag.startswith('(') and not query_frag.endswith(')'): query_frag = '({})'.format(query_frag) return query_frag
def build_query_fragment(self, field, filter_type, value): from haystack import connections query_frag = '' is_datetime = False if not hasattr(value, 'input_type_name'): # Handle when we've got a ``ValuesListQuerySet``... if hasattr(value, 'values_list'): value = list(value) if hasattr(value, 'strftime'): is_datetime = True if isinstance(value, six.string_types) and value != ' ': # It's not an ``InputType``. Assume ``Clean``. value = Clean(value) else: value = PythonData(value) # Prepare the query using the InputType. prepared_value = value.prepare(self) if not isinstance(prepared_value, (set, list, tuple)): # Then convert whatever we get back to what pysolr wants if needed. prepared_value = self.backend._from_python(prepared_value) # 'content' is a special reserved word, much like 'pk' in # Django's ORM layer. It indicates 'no special field'. if field == 'content': index_fieldname = '' else: index_fieldname = u'%s:' % connections[self._using].get_unified_index().get_index_fieldname(field) filter_types = { 'contains': '%s', 'startswith': "%s*", 'exact': '%s', 'gt': "{%s to}", 'gte': "[%s to]", 'lt': "{to %s}", 'lte': "[to %s]", } if value.post_process is False: query_frag = prepared_value else: if filter_type in ['contains', 'startswith']: if value.input_type_name == 'exact': query_frag = prepared_value else: # Iterate over terms & incorportate the converted form of each into the query. terms = [] if isinstance(prepared_value, six.string_types): possible_values = prepared_value.split(' ') else: if is_datetime is True: prepared_value = self._convert_datetime(prepared_value) possible_values = [prepared_value] for possible_value in possible_values: terms.append(filter_types[filter_type] % self.backend._from_python(possible_value)) if len(terms) == 1: query_frag = terms[0] else: query_frag = u"(%s)" % " AND ".join(terms) elif filter_type == 'in': in_options = [] for possible_value in prepared_value: is_datetime = False if hasattr(possible_value, 'strftime'): is_datetime = True pv = self.backend._from_python(possible_value) if is_datetime is True: pv = self._convert_datetime(pv) if isinstance(pv, six.string_types) and not is_datetime: in_options.append('"%s"' % pv) else: in_options.append('%s' % pv) query_frag = "(%s)" % " OR ".join(in_options) elif filter_type == 'range': start = self.backend._from_python(prepared_value[0]) end = self.backend._from_python(prepared_value[1]) if hasattr(prepared_value[0], 'strftime'): start = self._convert_datetime(start) if hasattr(prepared_value[1], 'strftime'): end = self._convert_datetime(end) query_frag = u"[%s to %s]" % (start, end) elif filter_type == 'exact': if value.input_type_name == 'exact': query_frag = prepared_value else: prepared_value = Exact(prepared_value).prepare(self) query_frag = filter_types[filter_type] % prepared_value else: if is_datetime is True: prepared_value = self._convert_datetime(prepared_value) query_frag = filter_types[filter_type] % prepared_value if len(query_frag) and not isinstance(value, Raw): if not query_frag.startswith('(') and not query_frag.endswith(')'): query_frag = "(%s)" % query_frag return u"%s%s" % (index_fieldname, query_frag)
def build_query_fragment(self, field, filter_type, value): from haystack import connections query_frag = "" if not hasattr(value, "input_type_name"): # Handle when we've got a ``ValuesListQuerySet``... if hasattr(value, "values_list"): value = list(value) if isinstance(value, six.string_types): # It's not an ``InputType``. Assume ``Clean``. value = Clean(value) else: value = PythonData(value) # Prepare the query using the InputType. prepared_value = value.prepare(self) if not isinstance(prepared_value, (set, list, tuple)): # Then convert whatever we get back to what pysolr wants if needed. prepared_value = self.backend.conn._from_python(prepared_value) # 'content' is a special reserved word, much like 'pk' in # Django's ORM layer. It indicates 'no special field'. if field == "content": index_fieldname = "" else: index_fieldname = "%s:" % connections[ self._using].get_unified_index().get_index_fieldname(field) filter_types = { "content": "%s", "contains": "*%s*", "endswith": "*%s", "startswith": "%s*", "exact": "%s", "gt": "{%s TO *}", "gte": "[%s TO *]", "lt": "{* TO %s}", "lte": "[* TO %s]", "fuzzy": "%s~", } if value.post_process is False: query_frag = prepared_value else: if filter_type in [ "content", "contains", "startswith", "endswith", "fuzzy", ]: if value.input_type_name == "exact": query_frag = prepared_value else: # Iterate over terms & incorportate the converted form of each into the query. terms = [] for possible_value in prepared_value.split(" "): terms.append( filter_types[filter_type] % self.backend.conn._from_python(possible_value)) if len(terms) == 1: query_frag = terms[0] else: query_frag = "(%s)" % " AND ".join(terms) elif filter_type == "in": in_options = [] if not prepared_value: query_frag = "(!*:*)" else: for possible_value in prepared_value: in_options.append( '"%s"' % self.backend.conn._from_python(possible_value)) query_frag = "(%s)" % " OR ".join(in_options) elif filter_type == "range": start = self.backend.conn._from_python(prepared_value[0]) end = self.backend.conn._from_python(prepared_value[1]) query_frag = '["%s" TO "%s"]' % (start, end) elif filter_type == "exact": if value.input_type_name == "exact": query_frag = prepared_value else: prepared_value = Exact(prepared_value).prepare(self) query_frag = filter_types[filter_type] % prepared_value else: if value.input_type_name != "exact": prepared_value = Exact(prepared_value).prepare(self) query_frag = filter_types[filter_type] % prepared_value if len(query_frag) and not isinstance(value, Raw): if not query_frag.startswith("(") and not query_frag.endswith(")"): query_frag = "(%s)" % query_frag return "%s%s" % (index_fieldname, query_frag)
def build_query_fragment(self, field, filter_type, value): from haystack import connections query_frag = "" is_datetime = False if not hasattr(value, "input_type_name"): # Handle when we've got a ``ValuesListQuerySet``... if hasattr(value, "values_list"): value = list(value) if hasattr(value, "strftime"): is_datetime = True if isinstance(value, six.string_types) and value != " ": # It's not an ``InputType``. Assume ``Clean``. value = Clean(value) else: value = PythonData(value) # Prepare the query using the InputType. prepared_value = value.prepare(self) if not isinstance(prepared_value, (set, list, tuple)): # Then convert whatever we get back to what pysolr wants if needed. prepared_value = self.backend._from_python(prepared_value) # 'content' is a special reserved word, much like 'pk' in # Django's ORM layer. It indicates 'no special field'. if field == "content": index_fieldname = "" else: index_fieldname = "%s:" % connections[ self._using ].get_unified_index().get_index_fieldname(field) filter_types = { "content": "%s", "contains": "*%s*", "endswith": "*%s", "startswith": "%s*", "exact": "%s", "gt": "{%s to}", "gte": "[%s to]", "lt": "{to %s}", "lte": "[to %s]", "fuzzy": "%s~{}/%d".format(FUZZY_WHOOSH_MAX_EDITS), } if value.post_process is False: query_frag = prepared_value else: if filter_type in [ "content", "contains", "startswith", "endswith", "fuzzy", ]: if value.input_type_name == "exact": query_frag = prepared_value else: # Iterate over terms & incorportate the converted form of each into the query. terms = [] if isinstance(prepared_value, six.string_types): possible_values = prepared_value.split(" ") else: if is_datetime is True: prepared_value = self._convert_datetime(prepared_value) possible_values = [prepared_value] for possible_value in possible_values: possible_value_str = self.backend._from_python( possible_value ) if filter_type == "fuzzy": terms.append( filter_types[filter_type] % ( possible_value_str, min( FUZZY_WHOOSH_MIN_PREFIX, len(possible_value_str) ) ) ) else: terms.append( filter_types[filter_type] % possible_value_str ) if len(terms) == 1: query_frag = terms[0] else: query_frag = "(%s)" % " AND ".join(terms) elif filter_type == "in": in_options = [] for possible_value in prepared_value: is_datetime = False if hasattr(possible_value, "strftime"): is_datetime = True pv = self.backend._from_python(possible_value) if is_datetime is True: pv = self._convert_datetime(pv) if isinstance(pv, six.string_types) and not is_datetime: in_options.append('"%s"' % pv) else: in_options.append("%s" % pv) query_frag = "(%s)" % " OR ".join(in_options) elif filter_type == "range": start = self.backend._from_python(prepared_value[0]) end = self.backend._from_python(prepared_value[1]) if hasattr(prepared_value[0], "strftime"): start = self._convert_datetime(start) if hasattr(prepared_value[1], "strftime"): end = self._convert_datetime(end) query_frag = "[%s to %s]" % (start, end) elif filter_type == "exact": if value.input_type_name == "exact": query_frag = prepared_value else: prepared_value = Exact(prepared_value).prepare(self) query_frag = filter_types[filter_type] % prepared_value else: if is_datetime is True: prepared_value = self._convert_datetime(prepared_value) query_frag = filter_types[filter_type] % prepared_value if len(query_frag) and not isinstance(value, Raw): if not query_frag.startswith("(") and not query_frag.endswith(")"): query_frag = "(%s)" % query_frag return "%s%s" % (index_fieldname, query_frag)
def build_query_fragment(self, field, filter_type, value): """Construct the query fragment based on the field that is been search for. :param field: Field to search. :type field: str :param filter_type: Filter type (contains, gt, lt...) :type filter_type: str :param value: Value to search. :type value: str :return: Query fragment. :rtype: str """ from haystack import connections if not hasattr(value, 'input_type_name'): # Handle when we've got a ``ValuesListQuerySet``... if hasattr(value, 'values_list'): value = list(value) if isinstance(value, six.string_types): # It's not an ``InputType``. Assume ``Clean``. value = Clean(value) else: value = PythonData(value) # Prepare the query using the InputType. prepared_value = value.prepare(self) if not isinstance(prepared_value, (set, list, tuple)): # Then convert whatever we get back to what elasticsearch wants if needed. prepared_value = self.backend._from_python(prepared_value) # 'content' is a special reserved word, much like 'pk' in # Django's ORM layer. It indicates 'no special field'. if field == 'content': index_fieldnames = {} else: index_fieldnames = connections[self._using].get_unified_index().get_index_fieldname(field) filter_types = { 'contains': u'%s', 'startswith': u'%s*', 'exact': u'%s', 'gt': u'{%s TO *}', 'gte': u'[%s TO *]', 'lt': u'{* TO %s}', 'lte': u'[* TO %s]', } if value.post_process is False: query_frag = prepared_value else: if filter_type in ['contains', 'startswith']: if value.input_type_name == 'exact': query_frag = prepared_value else: # Iterate over terms & incorportate the converted form of each into the query. terms = [] if isinstance(prepared_value, six.string_types): for possible_value in prepared_value.split(' '): term = filter_types[filter_type] % self.backend._from_python(possible_value) terms.append(u'"%s"' % term) elif isinstance(prepared_value, bool): term = filter_types[filter_type] % six.text_type(prepared_value).lower() terms.append(u'"%s"' % term) else: terms.append(filter_types[filter_type] % prepared_value) if len(terms) == 1: query_frag = terms[0] else: query_frag = u"(%s)" % " AND ".join(terms) elif filter_type == 'in': in_options = [] for possible_value in prepared_value: if isinstance(possible_value, six.string_types): in_options.append(u'"%s"' % self.backend._from_python(possible_value)) elif isinstance(possible_value, bool): term = filter_types[filter_type] % six.text_type(possible_value).lower() in_options.append(u'"%s"' % term) else: in_options.append(u'%s' % possible_value) query_frag = u"(%s)" % " OR ".join(in_options) elif filter_type == 'range': start = self.backend._from_python(prepared_value[0]) end = self.backend._from_python(prepared_value[1]) query_frag = u'[%s TO %s]' % (start, end) elif filter_type == 'exact': if value.input_type_name == 'exact': query_frag = prepared_value else: prepared_value = Exact(prepared_value).prepare(self) query_frag = filter_types[filter_type] % prepared_value else: if value.input_type_name == 'exact': prepared_value = Exact(prepared_value).prepare(self) query_frag = filter_types[filter_type] % prepared_value if len(query_frag) and not isinstance(value, Raw): if not query_frag.startswith('(') and not query_frag.endswith(')'): query_frag = '(%s)' % str(query_frag) field_names = set(index_fieldnames.values()) if field_names: multiple_query_frag = ' OR '.join([u'%s:%s' % (field_name, query_frag) for field_name in field_names]) result = "(%s)" % multiple_query_frag else: result = query_frag return result
def has_permission(self, request, view): q = request.QUERY_PARAMS.get('q') query_length = getattr(settings, "SEARCH_QUERY_LENGTH", 3) if not (q and len(Clean(q).query_string) > query_length): return False return True
def build_query_fragment(self, field, filter_type, value): from haystack import connections query_frag = "" if not hasattr(value, "input_type_name"): # Handle when we've got a ``ValuesListQuerySet``... if hasattr(value, "values_list"): value = list(value) if isinstance(value, basestring): # It's not an ``InputType``. Assume ``Clean``. value = Clean(value) else: value = PythonData(value) # Prepare the query using the InputType. prepared_value = value.prepare(self) if not isinstance(prepared_value, (set, list, tuple)): # Then convert whatever we get back to what pysolr wants if needed. prepared_value = self.backend.conn._from_python(prepared_value) # 'content' is a special reserved word, much like 'pk' in # Django's ORM layer. It indicates 'no special field'. if field == "content": index_fieldname = "" else: index_fieldname = u"%s:" % connections[self._using].get_unified_index().get_index_fieldname(field) filter_types = { "contains": u"%s", "startswith": u"%s*", "exact": u"%s", "gt": u"{%s TO *}", "gte": u"[%s TO *]", "lt": u"{* TO %s}", "lte": u"[* TO %s]", } if value.post_process is False: query_frag = prepared_value else: if filter_type in ["contains", "startswith"]: if value.input_type_name == "exact": query_frag = prepared_value else: # Iterate over terms & incorportate the converted form of each into the query. terms = [] for possible_value in prepared_value.split(" "): terms.append(filter_types[filter_type] % self.backend.conn._from_python(possible_value)) if len(terms) == 1: query_frag = terms[0] else: query_frag = u"(%s)" % " AND ".join(terms) elif filter_type == "in": in_options = [] for possible_value in prepared_value: in_options.append(u'"%s"' % self.backend.conn._from_python(possible_value)) query_frag = u"(%s)" % " OR ".join(in_options) elif filter_type == "range": start = self.backend.conn._from_python(prepared_value[0]) end = self.backend.conn._from_python(prepared_value[1]) query_frag = u'["%s" TO "%s"]' % (start, end) elif filter_type == "exact": if value.input_type_name == "exact": query_frag = prepared_value else: prepared_value = Exact(prepared_value).prepare(self) query_frag = filter_types[filter_type] % prepared_value else: if value.input_type_name != "exact": prepared_value = Exact(prepared_value).prepare(self) query_frag = filter_types[filter_type] % prepared_value if len(query_frag) and not query_frag.startswith("(") and not query_frag.endswith(")"): query_frag = "(%s)" % query_frag return u"%s%s" % (index_fieldname, query_frag)
def build_query_fragment(self, field, filter_type, value): from haystack import connections query_frag = '' if not hasattr(value, 'input_type_name'): # Handle when we've got a ``ValuesListQuerySet``... if hasattr(value, 'values_list'): value = list(value) if isinstance(value, basestring): # It's not an ``InputType``. Assume ``Clean``. value = Clean(value) else: value = PythonData(value) # Prepare the query using the InputType. prepared_value = value.prepare(self) if not isinstance(prepared_value, (set, list, tuple)): # Then convert whatever we get back to what pysolr wants if needed. prepared_value = self.backend.conn._from_python(prepared_value) # 'content' is a special reserved word, much like 'pk' in # Django's ORM layer. It indicates 'no special field'. if field == 'content': index_fieldname = '' else: index_fieldname = u'%s:' % connections[self._using].get_unified_index().get_index_fieldname(field) filter_types = { 'contains': u'%s', 'startswith': u'%s*', 'exact': u'%s', 'gt': u'{%s TO *}', 'gte': u'[%s TO *]', 'lt': u'{* TO %s}', 'lte': u'[* TO %s]', } if value.post_process is False: query_frag = prepared_value else: if filter_type in ['contains', 'startswith']: if value.input_type_name == 'exact': query_frag = prepared_value else: # Iterate over terms & incorportate the converted form of each into the query. terms = [] for possible_value in prepared_value.split(' '): terms.append(filter_types[filter_type] % self.backend.conn._from_python(possible_value)) if len(terms) == 1: query_frag = terms[0] else: query_frag = u"(%s)" % " AND ".join(terms) elif filter_type == 'in': in_options = [] for possible_value in prepared_value: in_options.append(u'"%s"' % self.backend.conn._from_python(possible_value)) query_frag = u"(%s)" % " OR ".join(in_options) elif filter_type == 'range': start = self.backend.conn._from_python(prepared_value[0]) end = self.backend.conn._from_python(prepared_value[1]) query_frag = u'["%s" TO "%s"]' % (start, end) elif filter_type == 'exact': if value.input_type_name == 'exact': query_frag = prepared_value else: prepared_value = Exact(prepared_value).prepare(self) query_frag = filter_types[filter_type] % prepared_value else: if value.input_type_name != 'exact': prepared_value = Exact(prepared_value).prepare(self) query_frag = filter_types[filter_type] % prepared_value return u"%s%s" % (index_fieldname, query_frag)
def build_query_fragment(self, field, filter_type, value): from haystack import connections query_frag = '' is_datetime = False if not hasattr(value, 'input_type_name'): # Handle when we've got a ``ValuesListQuerySet``... if hasattr(value, 'values_list'): value = list(value) if hasattr(value, 'strftime'): is_datetime = True if isinstance(value, six.string_types) and value != ' ': # It's not an ``InputType``. Assume ``Clean``. value = Clean(value) else: value = PythonData(value) # Prepare the query using the InputType. prepared_value = value.prepare(self) if not isinstance(prepared_value, (set, list, tuple)): # Then convert whatever we get back to what pysolr wants if needed. prepared_value = self.backend._from_python(prepared_value) # 'content' is a special reserved word, much like 'pk' in # Django's ORM layer. It indicates 'no special field'. if field == 'content': index_fieldname = '' else: index_fieldname = u'%s:' % connections[ self._using].get_unified_index().get_index_fieldname(field) filter_types = { 'content': '%s', 'contains': '*%s*', 'endswith': "*%s", 'startswith': "%s*", 'exact': '%s', 'gt': "{%s to}", 'gte': "[%s to]", 'lt': "{to %s}", 'lte': "[to %s]", 'fuzzy': u'%s~', } if value.post_process is False: query_frag = prepared_value else: if filter_type in [ 'content', 'contains', 'startswith', 'endswith', 'fuzzy' ]: if value.input_type_name == 'exact': query_frag = prepared_value else: # Iterate over terms & incorportate the converted form of each into the query. terms = [] if isinstance(prepared_value, six.string_types): possible_values = prepared_value.split(' ') else: if is_datetime is True: prepared_value = self._convert_datetime( prepared_value) possible_values = [prepared_value] for possible_value in possible_values: terms.append(filter_types[filter_type] % self.backend._from_python(possible_value)) if len(terms) == 1: query_frag = terms[0] else: query_frag = u"(%s)" % " AND ".join(terms) elif filter_type == 'in': in_options = [] for possible_value in prepared_value: is_datetime = False if hasattr(possible_value, 'strftime'): is_datetime = True pv = self.backend._from_python(possible_value) if is_datetime is True: pv = self._convert_datetime(pv) if isinstance(pv, six.string_types) and not is_datetime: in_options.append('"%s"' % pv) else: in_options.append('%s' % pv) query_frag = "(%s)" % " OR ".join(in_options) elif filter_type == 'range': start = self.backend._from_python(prepared_value[0]) end = self.backend._from_python(prepared_value[1]) if hasattr(prepared_value[0], 'strftime'): start = self._convert_datetime(start) if hasattr(prepared_value[1], 'strftime'): end = self._convert_datetime(end) query_frag = u"[%s to %s]" % (start, end) elif filter_type == 'exact': if value.input_type_name == 'exact': query_frag = prepared_value else: prepared_value = Exact(prepared_value).prepare(self) query_frag = filter_types[filter_type] % prepared_value else: if is_datetime is True: prepared_value = self._convert_datetime(prepared_value) query_frag = filter_types[filter_type] % prepared_value if len(query_frag) and not isinstance(value, Raw): if not query_frag.startswith('(') and not query_frag.endswith(')'): query_frag = "(%s)" % query_frag return u"%s%s" % (index_fieldname, query_frag)
def search(self): if not self.is_valid(): return self.no_query_found() sqs = self.searchqueryset for facet in self.selected_facets: if ":" not in facet: continue field, value = facet.split(":", 1) if value: sqs = sqs.narrow(u'%s:%s' % (field, value)) #print(self.cleaned_data) if self.cleaned_data['q']: sqs = sqs.auto_query(self.cleaned_data['q']) if self.cleaned_data['title']: sqs = sqs.filter(title=AutoQuery(self.cleaned_data['title'])) if self.cleaned_data['license']: sqs = sqs.filter(license=AutoQuery(self.cleaned_data['license'])) if self.cleaned_data['funded_by']: sqs = sqs.filter( funded_by=AutoQuery(self.cleaned_data['funded_by'])) if self.cleaned_data['domain']: sqs = sqs.filter( web_domains__in=AutoQuery(self.cleaned_data['domain'])) if self.cleaned_data['exclude']: sqs = sqs.exclude(content=Clean(self.cleaned_data['exclude'])) if self.cleaned_data['concept']: #expand concept selection as wide as we can m_concepts = self.cleaned_data['concept'].search_matched( ).alternative_children().get_descendants(include_self=True) sqs = sqs.filter(concepts__in=as_ids(m_concepts.distinct())) #TODO use keywords to search meta data (dataset's own reported keywords)? if self.cleaned_data['publisher']: #TODO expand publisher selection to include subpublishers m_publishers = self.cleaned_data['publisher'] sqs = sqs.filter(publisher__in=as_ids(m_publishers)) if self.cleaned_data['spatial_entity']: sqs = sqs.filter( spatial_entity__in=as_ids(self.cleaned_data['spatial_entity'])) if self.cleaned_data['location_lng'] and self.cleaned_data[ 'location_lat']: point = Point(self.cleaned_data['location_lat'], self.cleaned_data['location_lng']) sqs = sqs.dwithin('location', point, D(mi=2)) if self.cleaned_data['language']: sqs = sqs.filter(language=self.cleaned_data['language']) if self.cleaned_data['access_level']: sqs = sqs.filter( access_level__in=self.cleaned_data['access_level']) if self.cleaned_data['sort_by']: sqs = sqs.order_by(self.cleaned_data['sort_by']) if self.cleaned_data['result_types']: models = map(model_choice_to_model, self.cleaned_data['result_types']) sqs = sqs.models(*models) else: sqs = sqs.models(Story, CatalogRecord) return sqs