def get_object_list(self, request): # for case no search from haystack.inputs import AutoQuery from haystack.query import SearchQuerySet params = request.GET sqs = SearchQuerySet() if len(params.getlist('content_type')): for content_type in params.getlist('content_type'): sqs = sqs.models(get_model(*content_type.split('.'))) #if params.get('order_by'): # sqs = sqs.order_by(params.get('order_by', '')) if params.get('q', ''): sqs = sqs.filter_or(content=AutoQuery(params.get('q', '').lower())) for k, v in params.iteritems(): if k not in ['q', 'page', 'limit', 'content_type', 'order_by']: sqs = sqs.filter_or(**{k: v}) limit = int(request.GET.get('limit', 20)) page = int(request.GET.get('page', 1)) - 1 object_list = sqs[page * limit:(page * limit + limit)] objects = [] for result in object_list: objects.append(result) return objects
def search(request): """ This view is used for search; it sends the keyword to quey on to the haystcks' search. If the user did not provide a keyword to search on, it searches an empty query. """ try: q = request.POST['q'] except: q = '' if q == '': results = SearchQuerySet().all() else: results = SearchQuerySet().filter(content=AutoQuery(q)) results = results.filter_or(tags=AutoQuery(q)) results = results.filter_or(title=AutoQuery(q)) results = results.filter_or(location=AutoQuery(q)) people = results.models(UserProfile) tasks = results.models(Task) return render_to_response( 'search/results.html', { 'tresults': tasks, 'presults': people, 'keyword': q, 'peopleCount': len(people), 'taskCount': len(tasks) }, RequestContext(request))
def get_context_data(self, **kwargs): user = self.object context = {} count_types = OrderedDict() fields_or_lookup = ( {'collaborators__contains': user.username}, {'fullname_and_username__contains': user.username}, ) counter_class = {} #{ # 'wiki': WikiCollabCount, # 'ticket': TicketCollabCount, #} types = ['thread'] #types.extend(['ticket', 'wiki', 'changeset', 'attachment']) messages = Message.objects.filter(from_address__user__pk=user.pk) for type in types: CounterClass = counter_class.get(type) if CounterClass: try: counter = CounterClass.objects.get(author=user.username) except CounterClass.DoesNotExist: count_types[trans(type)] = 0 else: count_types[trans(type)] = counter.count elif type == 'thread': count_types[trans(type)] = messages.count() else: sqs = SearchQuerySet() for filter_or in fields_or_lookup: sqs = sqs.filter_or(type=type, **filter_or) count_types[trans(type)] = sqs.count() context['type_count'] = count_types sqs = SearchQuerySet() for filter_or in fields_or_lookup: sqs = sqs.filter_or(**filter_or).exclude(type='thread') context['results'] = sqs.order_by('-modified', '-created')[:10] email_pks = [addr.pk for addr in user.emails.iterator()] query = Message.objects.filter(from_address__in=email_pks) query = query.order_by('-received_time') context['emails'] = query[:10] count_by = 'thread__mailinglist__name' context['list_activity'] = dict(messages.values_list(count_by)\ .annotate(Count(count_by))\ .order_by(count_by)) context.update(kwargs) return super(UserProfileDetailView, self).get_context_data(**context)
def get_context_data(self, **kwargs): user = self.object context = {} count_types = OrderedDict() fields_or_lookup = ( { 'collaborators__contains': user.username }, { 'fullname_and_username__contains': user.username }, ) counter_class = { 'wiki': WikiCollabCount, 'ticket': TicketCollabCount, } messages = Message.objects.filter(from_address__user__pk=user.pk) for type in ['thread', 'ticket', 'wiki', 'changeset', 'attachment']: CounterClass = counter_class.get(type) if CounterClass: try: counter = CounterClass.objects.get(author=user.username) except CounterClass.DoesNotExist: count_types[trans(type)] = 0 else: count_types[trans(type)] = counter.count elif type == 'thread': count_types[trans(type)] = messages.count() else: sqs = SearchQuerySet() for filter_or in fields_or_lookup: sqs = sqs.filter_or(type=type, **filter_or) count_types[trans(type)] = sqs.count() context['type_count'] = count_types sqs = SearchQuerySet() for filter_or in fields_or_lookup: sqs = sqs.filter_or(**filter_or).exclude(type='thread') context['results'] = sqs.order_by('-modified', '-created')[:10] email_pks = [addr.pk for addr in user.emails.iterator()] query = Message.objects.filter(from_address__in=email_pks) query = query.order_by('-received_time') context['emails'] = query[:10] count_by = 'thread__mailinglist__name' context['list_activity'] = dict(messages.values_list(count_by)\ .annotate(Count(count_by))\ .order_by(count_by)) context.update(kwargs) return super(UserProfileDetailView, self).get_context_data(**context)
def filter_Search(request): """ Filtering the Search according to the keys sent and the main search keyword Main : is the main keyword searched by the user from navbar Key: array of filter keywords sent from the view """ main = '' if 'main' in request.POST: main = request.POST['main'] if main != '': results = SearchQuerySet().filter( content=AutoQuery(main)).models(Task) results = results.filter_or(tags=main).models(Task) results = results.filter_or(title=AutoQuery(main)).models(Task) results = results.filter_or(location=AutoQuery(main)).models(Task) else: results = SearchQuerySet().all().models(Task) else: results = SearchQuerySet().all() if 'loca' in request.POST: loca = request.POST['loca'] if loca != '': results = results.filter_and(city=loca) if 'word' in request.POST: word = request.POST['word'] if word != '': results1 = results.filter_and(content=AutoQuery(word)) results2 = results.filter_and(title=AutoQuery(word)) results3 = results.filter_and(location=AutoQuery(word)) results4 = results1.__or__(results2) results = results4.__or__(results3) if 'skills' in request.POST: skills = request.POST['skills'] if skills != '': skills = skills.split(',') for s in skills: results = results.filter_and(tags=s) if 'from' in request.POST: from_price = request.POST['from'] to_price = request.POST['to'] if from_price != '' and to_price != '' and to_price != 'all': results = results.filter_and( price__range=[int(from_price), int(to_price)]) return render_to_response('search/results_tasks.html', { 'tresults': results, 'keyword': main, 'taskCount': len(results) }, RequestContext(request))
def get_context_data(self, **kwargs): user = self.object context = {} count_types = OrderedDict() fields_or_lookup = ( {"collaborators__contains": user.username}, {"fullname_and_username__contains": user.username}, ) counter_class = {"wiki": WikiCollabCount, "ticket": TicketCollabCount} messages = Message.objects.filter(from_address__user__pk=user.pk) for type in ["thread", "ticket", "wiki", "changeset", "attachment"]: CounterClass = counter_class.get(type) if CounterClass: try: counter = CounterClass.objects.get(author=user.username) except CounterClass.DoesNotExist: count_types[trans(type)] = 0 else: count_types[trans(type)] = counter.count elif type == "thread": count_types[trans(type)] = messages.count() else: sqs = SearchQuerySet() for filter_or in fields_or_lookup: sqs = sqs.filter_or(type=type, **filter_or) count_types[trans(type)] = sqs.count() context["type_count"] = count_types sqs = SearchQuerySet() for filter_or in fields_or_lookup: sqs = sqs.filter_or(**filter_or).exclude(type="thread") context["results"] = sqs.order_by("-modified", "-created")[:10] email_pks = [addr.pk for addr in user.emails.iterator()] query = Message.objects.filter(from_address__in=email_pks) query = query.order_by("-received_time") context["emails"] = query[:10] count_by = "thread__mailinglist__name" context["list_activity"] = dict(messages.values_list(count_by).annotate(Count(count_by)).order_by(count_by)) context.update(kwargs) return super(UserProfileDetailView, self).get_context_data(**context)
def search(self): """ Returns a search queryset. """ cleaned_data = self.cleaned_data_or_empty() search_in = cleaned_data.get("search_in", DEFAULT_SEARCH_IN) query = cleaned_data.get("q", "") if not query: return EmptySearchQuerySet() if not search_in: # The following returns result of a SearchQuerySet.autoquery() search_queryset = super(HitGroupContentSearchForm, self).search() else: # Pass query to each field, which you want to search in. search_queryset = SearchQuerySet() for field in search_in: key = "{}__exact".format(field) search_queryset = search_queryset.filter_or(**{key: query}) # Get field and order for sorting. sort_by = cleaned_data.get("sort_by", DEFAULT_SORT_BY).rsplit("_", 1) # Prepare for SearchQuerySet API. sort_by = "{}{}".format("" if sort_by[1] == "asc" else "-", sort_by[0]) search_queryset = search_queryset.order_by(sort_by) return search_queryset
def search(self): if not hasattr(self, "cleaned_data"): return self.no_query_found() search_fields = [key for key, value in self.cleaned_data.iteritems() if value == True] if 'title' not in search_fields: sqs = SearchQuerySet() else: sqs = super(SearchOptionsForm, self).search() # title is a document field and has been used for filtering in super method search() search_fields = [key for key in search_fields if key != 'title'] query = sqs.query.clean(self.cleaned_data.pop('q')) galleries = [g.id for g in self.cleaned_data.get('galleries', [])] search_galleries = self.cleaned_data.get('search_galleries_choice', "ALL") query_words = query.split() for key in search_fields: if key == "tags": sqs = sqs.filter_or(tags__in=[query.lower() for query in query_words]) else: sqs = self._filter_or_query_words(sqs, key, query_words) if search_galleries == 'SELECTED': sqs = sqs.filter_and(galleries_ids__in=galleries) return sqs
def filter_news(request, default_limit=None): # read hosts list or single host_slugs = parse_union(request) contains = request.GET.get("search") limit = parse_limit( request, default=default_limit) if default_limit else parse_limit(request) start, end = parse_filter_date(request) results = SearchQuerySet() if host_slugs: results = results.filter_or(host_slug__in=host_slugs) if contains: results = results.filter_and(content__contains=contains) if start: results = results.filter_and(published__gte=start) if end: results = results.filter_and(published__lt=end) results = results.models(NewsPost).order_by('-published') if limit: results = results[:limit] posts = [result.object for result in results] return posts
def search(request): q = request.GET.get('q', '') cat_filter = request.GET.get('category') tag_filter = request.GET.get('tag') facet_counts = {} if q or cat_filter or tag_filter: qs = SearchQuerySet() if q: qs = qs.filter(content=q) qs = qs.filter_or(speakers__startswith=q.lower()) if cat_filter: # TODO: This doesn't work quite right. It should filter # out anything that's not *exactly* cat_filter but it's # not. Could be a problem here or with the indexing. The # haystack docs are mysterious. qs = qs.filter_and(category__exact=cat_filter) if tag_filter: qs = qs.filter_and(tags__in=[tag_filter]) # TODO: Whoosh doesn't handle faceting, so we have to do it # manually. Fix this so it detects whether the haystack backend # supports facets and if so, uses the backend and not the db. cat_counts = {} tag_counts = {} for mem in qs: cat_counts[mem.category] = cat_counts.get(mem.category, 0) + 1 for tag in mem.tags: tag_counts[tag] = tag_counts.get(tag, 0) + 1 facet_counts['category'] = sorted(cat_counts.items(), key=lambda pair: pair[1], reverse=True) facet_counts['tag'] = sorted(tag_counts.items(), key=lambda pair: pair[1], reverse=True) page = Paginator(qs, 25) p = request.GET.get('p', '1') try: p = max(1, int(p)) except ValueError: p = 1 try: page = page.page(p) except EmptyPage: page = page.page(1) else: page = None return render( request, 'videos/search.html', { 'query': q, 'tag': tag_filter, 'category': cat_filter, 'facet_counts': facet_counts, 'page': page })
def filter_queryset(self, request, queryset, view): query = request.query_params.get('q', None) if query: search_queryset = SearchQuerySet() query_fields = request.query_params.get('q_fields') if query_fields: query_fields = query_fields.split(',') for query_field in query_fields: filter_dict = {query_field:query} search_queryset = search_queryset.filter_or(**filter_dict) else: search_queryset = search_queryset.filter_or(text=query) gc.disable() activity_ids = search_queryset.values_list('pk',flat=True)[:3000000] gc.enable() return queryset.filter(pk__in=activity_ids).filter(is_searchable=True) return queryset
def name_filter(first, middle, last): r = SearchQuerySet() for text in first: r = r.filter(first_name=text) if '-' in text: for target in text.split('-'): r = r.filter_or(first_name=target) for text in middle: r = r.filter(middle_name=text) if '-' in text: for target in text.split('-'): r = r.filter_or(middle_name=target) for text in last: r = r.filter(last_name=text) if '-' in text: for target in text.split('-'): r = r.filter_or(last_name=target) return r
def name_filter(first, middle, last): r = SearchQuerySet() for text in first: r = r.filter(first_name=text) if '-' in text: for target in text.split('-'): r = r.filter_or(first_name=target) for text in middle: r = r.filter(middle_name=text) if '-' in text: for target in text.split('-'): r = r.filter_or(middle_name=target) for text in last: r = r.filter(last_name=text) if '-' in text: for target in text.split('-'): r = r.filter_or(last_name=target) return r
def search(request): q = request.GET.get("q", "") facet_counts = {} if q: cat_filter = request.GET.get("category") qs = SearchQuerySet() qs = qs.filter(content=q) qs = qs.filter_or(speakers__startswith=q.lower()) if cat_filter: # TODO: This doesn't work quite right. It should filter # out anything that's not *exactly* cat_filter but it's # not. Could be a problem here or with the indexing. The # haystack docs are mysterious. qs = qs.filter_and(category__exact=cat_filter) # TODO: Whoosh doesn't handle faceting, so we have to do it # manually. Fix this so it detects whether the haystack backend # supports facets and if so, uses the backend and not the db. cat_counts = {} for mem in qs: cat_counts[mem.category] = cat_counts.get(mem.category, 0) + 1 facet_counts["category"] = sorted(cat_counts.items(), key=lambda pair: pair[1], reverse=True) page = Paginator(qs, 25) p = request.GET.get("p", "1") try: p = max(1, int(p)) except ValueError: p = 1 try: page = page.page(p) except EmptyPage: page = page.page(1) else: page = None if q: title = u"Search: {query}".format(query=q) else: title = u"Search" get_params = request.GET.copy() if "category" in get_params: get_params.pop("category") base_url = request.path + "?" + get_params.urlencode() return render( request, "videos/search.html", {"query": q, "base_url": base_url, "title": title, "facet_counts": facet_counts, "page": page}, )
def search(request): q = request.GET.get('q', '') cat_filter = request.GET.get('category') tag_filter = request.GET.get('tag') facet_counts = {} if q or cat_filter or tag_filter: qs = SearchQuerySet() if q: qs = qs.filter(content=q) qs = qs.filter_or(speakers__startswith=q.lower()) if cat_filter: # TODO: This doesn't work quite right. It should filter # out anything that's not *exactly* cat_filter but it's # not. Could be a problem here or with the indexing. The # haystack docs are mysterious. qs = qs.filter_and(category__exact=cat_filter) if tag_filter: qs = qs.filter_and(tags__in=[tag_filter]) # TODO: Whoosh doesn't handle faceting, so we have to do it # manually. Fix this so it detects whether the haystack backend # supports facets and if so, uses the backend and not the db. cat_counts = {} tag_counts = {} for mem in qs: cat_counts[mem.category] = cat_counts.get(mem.category, 0) + 1 for tag in mem.tags: tag_counts[tag] = tag_counts.get(tag, 0) + 1 facet_counts['category'] = sorted( cat_counts.items(), key=lambda pair: pair[1], reverse=True) facet_counts['tag'] = sorted( tag_counts.items(), key=lambda pair: pair[1], reverse=True) page = Paginator(qs, 25) p = request.GET.get('p', '1') try: p = max(1, int(p)) except ValueError: p = 1 try: page = page.page(p) except EmptyPage: page = page.page(1) else: page = None return render(request, 'videos/search.html', {'query': q, 'tag': tag_filter, 'category': cat_filter, 'facet_counts': facet_counts, 'page': page})
def filter_Search(request): """ Filtering the Search according to the keys sent and the main search keyword Main : is the main keyword searched by the user from navbar Key: array of filter keywords sent from the view """ main = '' if 'main' in request.POST: main = request.POST['main'] if main != '': results = SearchQuerySet().filter(content=AutoQuery(main)).models(Task) results = results.filter_or(tags=main).models(Task) results = results.filter_or(title=AutoQuery(main)).models(Task) results = results.filter_or(location=AutoQuery(main)).models(Task) else: results = SearchQuerySet().all().models(Task) else: results = SearchQuerySet().all() if 'loca' in request.POST: loca = request.POST['loca'] if loca != '': results = results.filter_and(city=loca) if 'word' in request.POST: word = request.POST['word'] if word != '': results1 = results.filter_and(content=AutoQuery(word)) results2 = results.filter_and(title=AutoQuery(word)) results3 = results.filter_and(location=AutoQuery(word)) results4 = results1.__or__(results2) results = results4.__or__(results3) if 'skills' in request.POST: skills = request.POST['skills'] if skills != '': skills = skills.split(',') for s in skills: results = results.filter_and(tags=s) if 'from' in request.POST: from_price = request.POST['from'] to_price = request.POST['to'] if from_price != '' and to_price != '' and to_price != 'all': results = results.filter_and(price__range=[int(from_price), int(to_price)]) return render_to_response('search/results_tasks.html', {'tresults': results, 'keyword': main, 'taskCount': len(results)}, RequestContext(request))
def get_search(self, request, **kwargs): self.method_check(request, allowed=['get']) self.is_authenticated(request) self.dispatch_list(request) self.throttle_check(request) # Do the query. sqs = SearchQuerySet() query = request.GET.getlist("q") OR = request.GET.getlist("or") if query: for q in query: query_dict = {} encoded_q = encode_filter(q.split(':')[0], q.split(':')[1]) try: key = "{}Filter_exact".format(encoded_q.get('filter')) value = encoded_q.get('value') if ' ' in value: key = key.replace('_', '__') query_dict[key] = value if OR: sqs = sqs.filter_or(**query_dict) else: sqs = sqs.filter(**query_dict) except IndexError: sqs = sqs.filter(content=q) # Apply tastypie filters if any whatsoever sqs_objects = [sq.object for sq in sqs] filtered = self.apply_filters(request, applicable_filters={}) final_list = list(set(sqs_objects) & set(filtered)) ids = [fl.id for fl in final_list] final_list = lr.resourceInfoType_model.objects.filter(id__in=ids) if 'latest' in request.GET.get('sort', ''): final_list = self.apply_sorting(final_list, options={'sort': [u'latest']}) elif 'earliest' in request.GET.get('sort', ''): final_list = self.apply_sorting(final_list, options={'sort': [u'earliest']}) paginator = Paginator(request.GET, final_list, resource_uri='/api/v1/lr/search/') to_be_serialized = paginator.page() bundles = [ self.build_bundle(obj=result, request=request) for result in to_be_serialized['objects'] ] to_be_serialized['objects'] = [ self.full_dehydrate(bundle) for bundle in bundles ] to_be_serialized = self.alter_list_data_to_serialize( request, to_be_serialized) return self.create_response(request, to_be_serialized)
def search_similar(request): query = request.GET.get("q", None) result = [] if query: sqs = SearchQuerySet().models(FoiRequest) for q in query.split(): sqs = sqs.filter_or(content=sqs.query.clean(q)) result = list(sqs)[:5] result = [{"title": x.title, "id": x.pk, "public_body_name": x.public_body_name, "description": x.description, "url": x.url, "score": x.score} for x in result] return HttpResponse(json.dumps(result), content_type="application/json")
def search(request): """ This view is used for search; it sends the keyword to quey on to the haystcks' search. If the user did not provide a keyword to search on, it searches an empty query. """ try: q = request.POST['q'] except: q = '' if q == '': results = SearchQuerySet().all() else: results = SearchQuerySet().filter(content=AutoQuery(q)) results = results.filter_or(tags=AutoQuery(q)) results = results.filter_or(title=AutoQuery(q)) results = results.filter_or(location=AutoQuery(q)) people = results.models(UserProfile) tasks = results.models(Task) return render_to_response('search/results.html', {'tresults': tasks, 'presults': people,'keyword': q, 'peopleCount': len(people), 'taskCount': len(tasks)}, RequestContext(request))
def refined_search(data): searched_skills = searched_locations = searched_industry = searched_edu = state = Skill.objects.none() sqs = SearchQuerySet().models(JobPost).filter_and(status='Live') if 'refine_skill' in data and data.getlist('refine_skill'): term = data.getlist('refine_skill') sqs = sqs.filter_and(SQ(title__in=term) | SQ(skills__in=term) | SQ( description__in=term) | SQ(designation__in=term) | SQ(edu_qualification__in=term)) searched_skills = Skill.objects.filter(name__in=term) location = data.getlist('refine_location') if 'refine_location' in data else [] searched_locations = City.objects.filter(name__in=location) if 'Across India' in location: india = Country.objects.filter(name='India') sqs = sqs.filter_and(SQ(location__in=india.values_list('state__state__name', flat=True))) elif location: other_cities = searched_locations.values_list('parent_city__name', flat=True) sqs = sqs.filter_and(SQ(location__in=location) | SQ(location__in=other_cities)) if 'refine_state' in data and data.getlist('refine_state'): state = State.objects.filter(name__in=data.getlist('refine_state')) sqs = sqs.filter_and(location__in=state.values_list('state__name', flat=True)) if data.get('job_type'): if data['job_type'] == 'Fresher': sqs = sqs.filter_and(min_year__lte=int(0)) else: sqs = sqs.filter_and(job_type__in=[data['job_type']]) if 'refine_industry' in data and data.getlist('refine_industry'): term = data.getlist('refine_industry') sqs = sqs.filter_and(industry__in=term) searched_industry = Industry.objects.filter(name__in=term) if 'refine_education' in data and data.getlist('refine_education'): term = data.getlist('refine_education') sqs = sqs.filter_and(edu_qualification__in=term) searched_edu = Qualification.objects.filter(name__in=term) if 'functional_area' in data and data.getlist('functional_area'): term = data.getlist('functional_area') sqs = sqs.filter_or(functional_area__in=term) if data.get('refine_experience_min') or data.get('refine_experience_min') == 0: sqs = sqs.filter_and(min_year__lte=int(data['refine_experience_min'])) if data.get('refine_experience_max') or data.get('refine_experience_max') == 0: sqs = sqs.filter_and(max_year__lte=int(data['refine_experience_max'])) # TODO: this line is taking 500ms, nikhila has to look into it. # job_list = JobPost.objects.filter(status='Live', pk__in=results).select_related( # 'company', 'user').prefetch_related('location', 'skills', 'industry') sqs = sqs.load_all().order_by('-published_on') return sqs, searched_skills, searched_locations, searched_industry, searched_edu, state
def search_all_bookings(q, county, dicipline, max_price, show_full_boked, show_canceled, show_done, ordering): """ Move later to search.api """ sqs = SearchQuerySet().models(Booking) # TODO: Find out what this field should be used for O.o # if q is not None and q != '': # sqs = sqs.filter(name=AutoQuery(q)) if max_price is not None and max_price not in ('', '0'): sqs = sqs.filter(price__lt=max_price) states = [Booking.STATE_PLANNING] if show_canceled: states.append(Booking.STATE_CANCELED) if show_done: states.append(Booking.STATE_DONE) sqs = sqs.filter(state__in=states) if show_full_boked: sqs = sqs.filter_or(full=to_es_bool(show_full_boked)) else: sqs = sqs.filter(full=to_es_bool(show_full_boked)) if county is not None and county not in ('', '0'): sqs = sqs.filter(county=county) if dicipline is not None and dicipline not in ('', '0'): sqs = sqs.filter(dicipline=dicipline) # Ordering for the sql query ordering_mapping = { '': '-id', # Default ordering by id 'A': '-id', # Datum 'B': '-free_spots', # Free spots 'C': '-id', # Booked spots 'D': '-price', # Most Expensive 'E': 'price', # Cheapest Price } sqs = sqs.order_by(ordering_mapping[ordering]) print(sqs.query) # Extract all objects based on ES search # return order_by_id(Booking, sqs.values_list('pk', flat=True), ordering=ordering_mapping[ordering]) return sqs.values_list('pk', flat=True)
def search(request): tokens = _search_tokens(request) if len(tokens) is None: return HttpResponseRedirect(reverse('mail.views.index')) sqs = SearchQuerySet().models(Thread) for t in tokens: sqs = sqs.filter_or(text_and_recipients=t) sqs = sqs.order_by('-date') if sqs.count()==0: return render_to_response('search_empty.html', { 'path': request.path }, context_instance=RequestContext(request)) return index(request, search=tokens, threads=sqs)
def search(request): tokens = _search_tokens(request) if len(tokens) is None: return HttpResponseRedirect(reverse('mail.views.index')) sqs = SearchQuerySet().models(Thread) for t in tokens: sqs = sqs.filter_or(text_and_recipients=t) sqs = sqs.order_by('-date') if sqs.count() == 0: return render_to_response('search_empty.html', {'path': request.path}, context_instance=RequestContext(request)) return index(request, search=tokens, threads=sqs)
def filter_queryset(self, request, queryset, view): query = request.query_params.get('q', None) exact = request.query_params.get('exact', None) if query: # always match text exactly if exact: query = Exact(query) search_queryset = SearchQuerySet() query_fields = request.query_params.get('q_fields') if query_fields: query_fields = query_fields.split(',') for query_field in query_fields: filter_dict = {query_field:query} search_queryset = search_queryset.filter_or(**filter_dict) else: search_queryset = search_queryset.filter_or(text=query) activity_ids = search_queryset.values_list('pk',flat=True)[0:100000] return queryset.filter(pk__in=activity_ids).filter(is_searchable=True) return queryset
def search_all_trainers(q, county, skill, diciplines): """ Move later to search.api """ model = Trainer sqs = SearchQuerySet().models(model) if q is not None and q not in ('', '0'): sqs = sqs.filter_or(name=AutoQuery(q)) sqs = sqs.filter_or(email=AutoQuery(q)) if county is not None and county not in ('', '0'): sqs = sqs.filter_or(county=county) if skill is not None and skill not in ('', '0'): sqs = sqs.filter_or(skill_level=skill) if diciplines is not None and diciplines not in ('', '0'): sqs = sqs.filter_or(diciplines=diciplines) # Extract all objects based on ES search # return order_by_id(model=model, object_ids=sqs.values_list('pk', flat=True)) return sqs.values_list('pk', flat=True)
def search(request): """New sexy search""" form = SearchForm(request.GET) try: search_qs = SearchQuerySet().models(Post) except InvalidIndexError: search_qs = None if not form.is_valid() or not search_qs: return {"form": form, "query": "", "object_list": Post.objects.all()} q = lambda attr: models.Q(**{attr: form.cleaned_data["query"]}) # SHIT return { "object_list": search_qs.filter_or(q("text") | q("preview") | q("raw_tags") | q("title")), "form": form, "query": form.cleaned_data["query"], }
def filter_queryset(self, request, queryset, view): query = request.query_params.get('q', None) exact = request.query_params.get('exact', None) if query: # always match text exactly if exact: query = Exact(query) search_queryset = SearchQuerySet() query_fields = request.query_params.get('q_fields') if query_fields: query_fields = query_fields.split(',') for query_field in query_fields: filter_dict = {query_field:query} search_queryset = search_queryset.filter_or(**filter_dict) else: search_queryset = search_queryset.filter_or(text=query) activity_ids = search_queryset.values_list('pk',flat=True) return queryset.filter(pk__in=activity_ids).filter(is_searchable=True) return queryset
def ask_me_anything(request): context = {} if request.method == 'POST': q = request.POST.get('q', '') if q: q = Question(q) sq = SearchQuerySet() if q.get_type() in ('LOC', 'DATETIME', 'NUM', 'PERSON'): sq = sq.narrow('ann:' + q.get_type()) for k in q.weight_keywords(): sq = sq.filter_or(SQ(content=k) | SQ(title=k) | SQ(context=k)) context = { 'q': q, 'result': sq[:10], } return render(request, 'ask_me_anything.html', context)
def get_queryset(self): qs = SearchQuerySet().all() if self.request.user.is_authenticated(): # TODO: This has a leak! It should say: # group=self.request.group.id AND group_read=True if not permissions.can_moderate(models.URLPath.root().article, self.request.user): qs = qs.filter_or(owner=self.request.user.id, group=self.request.group.id, other_read=True) else: qs = qs.exclude(other_read=False) qs = qs.filter(content=AutoQuery(self.query)) qs = qs.exclude(other_read=False) qs = qs.load_all() return qs
def search(request): """New sexy search""" form = SearchForm(request.GET) try: search_qs = SearchQuerySet().models(Post) except InvalidIndexError: search_qs = None if not form.is_valid() or not search_qs: return { 'form': form, 'query': '', 'object_list': Post.objects.all(), } q = lambda attr: models.Q(**{attr: form.cleaned_data['query']})#SHIT return { 'object_list': search_qs.filter_or(q('text') | q('preview') | q('raw_tags') | q('title')), 'form': form, 'query': form.cleaned_data['query'], }
def search_person_by_name(name, sqs=None): """ Becuase the default haystack operator is AND if you search for John Quincy Adams then it looks for `John AND Quincy AND Adams' and hence won't find John Adams. This results in missed matches and duplicates. So if it looks like there's more than two names split them and search using the whole name provided or just the first and last. This results in `(John AND Quincy AND Adams) OR (John AND Adams)` which is a bit more tolerant. """ parts = name.split() if sqs is None: sqs = SearchQuerySet().filter(content=name) if len(parts) >= 2: short_name = ' '.join([parts[0], parts[-1]]) sqs = sqs.filter_or(content=short_name) return sqs
def get_queryset(self): qs = SearchQuerySet().all() if self.request.user.is_authenticated(): # TODO: This has a leak! It should say: # group=self.request.group.id AND group_read=True if not permissions.can_moderate(models.URLPath.root().article, self.request.user): qs = qs.filter_or( owner=self.request.user.id, group=self.request.group.id, other_read=True ) else: qs = qs.exclude(other_read=False) qs = qs.filter(content=AutoQuery(self.query)) qs = qs.exclude(other_read=False) qs = qs.load_all() return qs
def get_queryset(self, *args, **kwargs): # This will return a dict of the first known # unit of distance found in the query request = self.request queryset = EmptySearchQuerySet() fieldlist = [] if request.GET.get('fields'): rawfields = request.GET.get('fields') fieldlist = rawfields.split(',') facetlist = [] if request.GET.get('facets'): rawfacets = request.GET.get('facets') facetlist = rawfacets.split(',') if request.GET.get('q'): query = request.GET.get('q') sqs = SearchQuerySet() for item in facetlist: sqs = sqs.facet(item) if not fieldlist: sqs = sqs.filter(content=AutoQuery(query)) else: for idx, field in enumerate(fieldlist): if idx==0: sqs = sqs.filter(SQ(**{field+"__icontains":query})) else: sqs = sqs.filter_or(SQ(**{field+"__icontains":query})) finalResult=[] for m in list(sqs): if request.user.has_perm('filemaster.view_archivefile',m.object): finalResult.append(m) else: continue return finalResult
def search(request): """New sexy search""" form = SearchForm(request.GET) try: search_qs = SearchQuerySet().models(Post) except InvalidIndexError: search_qs = None if not form.is_valid() or not search_qs: return { 'form': form, 'query': '', 'object_list': Post.objects.all(), } q = lambda attr: models.Q(**{attr: form.cleaned_data['query']}) #SHIT return { 'object_list': search_qs.filter_or( q('text') | q('preview') | q('raw_tags') | q('title')), 'form': form, 'query': form.cleaned_data['query'], }
def filter_events(request, default_limit=None): host_slugs = parse_union(request) contains = request.GET.get("search") start, end = parse_filter_date(request) limit = parse_limit( request, default=default_limit) if default_limit else parse_limit(request) results = SearchQuerySet() if host_slugs: results = results.filter_or(hosts_slug__in=host_slugs) if contains: results = results.filter_and(content__contains=contains) events = results.models(Event) # search results for start and end date # handle cases where no dates are given if start and end: start_date = start end_date = end elif start: then = start.replace(year=start.year + 10) start_date = start end_date = then elif end: last_year = datetime.now() - relativedelta(years=1) start_date = last_year end_date = end else: last_year = datetime.now() - relativedelta(years=1) then = datetime.now().replace(year=last_year.year + 10) start_date = last_year end_date = then return events, start_date, end_date, limit
def get(self, request, *args, **kwargs): """ Primary endpoint for retrieving resources via the index Values should never be empty string or python None, instead return string "None" with str() call "availability": list value, js will parse JSON as Array "availabilityurl": single value, pass a string to REST client "type": single value, pass a string to REST client "author": single value, pass a string to REST client first author "creator: authors, The reason for the weird name is the DataOne standard. The metadata was designed to be compliant with DataOne standards. These standards do not contain an author field. Instead, the creator field represents authors. "contributor": list value, js will parse JSON as Array "owner": list value, js will parse JSON as Array "subject": list value, js will parse JSON as Array "coverage_type": list point, period, ... """ start = time.time() sqs = SearchQuerySet().all() asc = '-1' if request.GET.get('asc'): asc = request.GET.get('asc') sort = 'modified' if request.GET.get('sort'): sort = request.GET.get('sort') sort = sort if asc == '1' else '-{}'.format(sort) if request.GET.get('q'): q = request.GET.get('q') sqs = sqs.filter(content=q) try: qs = request.query_params filters = json.loads(qs.get('filter')) # filter values expect lists, for example discoverapi/?filter={"owner":["Firstname Lastname"]} if filters.get('author'): for k, authortype in enumerate(filters['author']): if k == 0 or k == len(filters['author']): sqs = sqs.filter(author_exact=Exact(authortype)) else: sqs = sqs.filter_or(author_exact=Exact(authortype)) if filters.get('owner'): for k, ownertype in enumerate(filters['owner']): if k == 0 or k == len(filters['owner']): sqs = sqs.filter(owner_exact=Exact(ownertype)) else: sqs = sqs.filter_or(owner_exact=Exact(ownertype)) if filters.get('subject'): for k, subjtype in enumerate(filters['subject']): if k == 0 or k == len(subjtype): sqs = sqs.filter(subject_exact=Exact(subjtype)) else: sqs = sqs.filter_or(subject_exact=Exact(subjtype)) if filters.get('contributor'): for k, contribtype in enumerate(filters['contributor']): if k == 0 or k == len(filters['contributor']): sqs = sqs.filter(contributor_exact=Exact(contribtype)) else: sqs = sqs.filter_or( contributor_exact=Exact(contribtype)) if filters.get('type'): for k, restype in enumerate(filters['type']): if k == 0 or k == len(filters['type']): sqs = sqs.filter(content_type_exact=Exact(restype)) else: sqs = sqs.filter_or(content_type_exact=Exact(restype)) if filters.get('availability'): for k, availtype in enumerate(filters['availability']): if k == 0 or k == len(filters['availability']): sqs = sqs.filter(availability_exact=Exact(availtype)) else: sqs = sqs.filter_or( availability_exact=Exact(availtype)) if filters.get('geofilter'): sqs = sqs.filter( north__range=[-90, 90]) # return resources with geographic data if filters.get('date'): try: datefilter = DateRange(start=datetime.datetime.strptime( filters['date'][0], '%Y-%m-%d'), end=datetime.datetime.strptime( filters['date'][1], '%Y-%m-%d')) # restrict to entries with dates sqs = sqs.filter(start_date__gt=datetime.datetime.strptime('1900-01-01', '%Y-%m-%d'))\ .filter(end_date__lte=datetime.datetime.strptime(datetime.date.today().isoformat(), '%Y-%m-%d')) # filter out entries that don't fall in specified range sqs = sqs.exclude(start_date__gt=datefilter.end).exclude( end_date__lt=datefilter.start) except ValueError as date_ex: return JsonResponse( { 'message': 'Filter date parsing error expecting String %Y-%m-%d : {}' .format(str(date_ex)), 'received': request.query_params }, status=400) except Exception as gen_date_ex: return JsonResponse( { 'message': 'Filter date parsing error expecting two date string values : {}' .format(str(gen_date_ex)), 'received': request.query_params }, status=400) except TypeError as type_ex: pass # no filters passed "the JSON object must be str, bytes or bytearray not NoneType" except json.JSONDecodeError as parse_ex: return JsonResponse( { 'message': 'Filter JSON parsing error - {}'.format(str(parse_ex)), 'received': request.query_params }, status=400) except Exception as gen_ex: logger.warning('hs_discover API - {}: {}'.format( type(gen_ex), str(gen_ex))) return JsonResponse( { 'message': '{}'.format( '{}: query error. Contact a server administrator.'. format(type(gen_ex))) }, status=520) filterdata = [] if request.GET.get('filterbuilder'): authors = sqs.facet('author').facet_counts()['fields']['author'] owners = sqs.facet('owner').facet_counts()['fields']['owner'] subjects = sqs.facet('subject').facet_counts()['fields']['subject'] contributors = sqs.facet( 'contributor').facet_counts()['fields']['contributor'] types = sqs.facet( 'content_type').facet_counts()['fields']['content_type'] availability = sqs.facet( 'availability').facet_counts()['fields']['availability'] if request.GET.get('updatefilters'): authors = [x for x in authors if x[1] > 0] owners = [x for x in owners if x[1] > 0] subjects = [x for x in subjects if x[1] > 0] contributors = [x for x in contributors if x[1] > 0] types = [x for x in types if x[1] > 0] availability = [x for x in availability if x[1] > 0] filterdata = [ authors[:self.filterlimit], owners[:self.filterlimit], subjects[:self.filterlimit], contributors[:self.filterlimit], types[:self.filterlimit], availability[:self.filterlimit] ] if sort == 'author': sqs = sqs.order_by('author_exact') elif sort == '-author': sqs = sqs.order_by('-author_exact') else: sqs = sqs.order_by(sort) resources = [] # TODO future release will add title and facilitate order_by title_exact # convert sqs to list after facet operations to allow for Python sorting instead of Haystack order_by if sort == 'title': sqs = sorted(sqs, key=lambda idx: idx.title.lower()) elif sort == '-title': sqs = sorted(sqs, key=lambda idx: idx.title.lower(), reverse=True) p = Paginator(sqs, self.perpage) if request.GET.get('pnum'): pnum = request.GET.get('pnum') pnum = int(pnum) pnum = min(pnum, p.num_pages) if pnum < 1: return JsonResponse( { 'resources': json.dumps([]), 'geodata': json.dumps([]), 'rescount': 0, 'pagecount': 1, 'perpage': self.perpage }, status=200) else: pnum = 1 # page number not specified, implies page 1 pnum = min(pnum, p.num_pages) geodata = [] for result in p.page(pnum): contributor = 'None' # contributor is actually a list and can have multiple values owner = 'None' # owner is actually a list and can have multiple values author_link = None # Send None to avoid anchor render creator = 'None' author = 'None' if result.creator: creator = result.creator authors = creator # there is no concept of authors in DataOne standard # authors might be string 'None' here if result.author: author_link = result.author_url author = str(result.author) if authors == 'None': authors = author # author would override creator in else: if result.organization: if isinstance(result.organization, list): author = str(result.organization[0]) else: author = str(result.organization) author = author.replace('"', '') author = author.replace('[', '') author = author.replace(']', '').strip() if authors == 'None': authors = author if result.contributor is not None: try: contributor = result.contributor except: pass if result.owner is not None: try: owner = result.owner except: pass pt = '' # pass empty string for the frontend to ensure the attribute exists but can be evaluated for empty try: if 'box' in result.coverage_type: pt = { 'short_id': result.short_id, 'title': result.title, 'coverage_type': 'box' } elif 'point' in result.coverage_type: pt = { 'short_id': result.short_id, 'title': result.title, 'coverage_type': 'point' } if isinstance(result.north, (int, float)): pt['north'] = result.north if isinstance(result.east, (int, float)): pt['east'] = result.east if isinstance(result.northlimit, (int, float)): pt['northlimit'] = result.northlimit if isinstance(result.southlimit, (int, float)): pt['southlimit'] = result.southlimit if isinstance(result.eastlimit, (int, float)): pt['eastlimit'] = result.eastlimit if isinstance(result.westlimit, (int, float)): pt['westlimit'] = result.westlimit geodata.append(pt) except: pass # HydroShare production contains dirty data, this handling is in place, until data cleaned resources.append({ "title": result.title, "link": result.absolute_url, "availability": result.availability, "availabilityurl": "/static/img/{}.png".format(result.availability[0]), "type": result.resource_type_exact, "author": author, "authors": authors, "contributor": contributor, "author_link": author_link, "owner": owner, "abstract": result.abstract, "subject": result.subject, "created": result.created.isoformat(), "modified": result.modified.isoformat(), "short_id": result.short_id, "geo": pt }) return JsonResponse( { 'resources': json.dumps(resources), 'geodata': json.dumps(geodata), 'rescount': p.count, 'pagecount': p.num_pages, 'perpage': self.perpage, 'filterdata': json.dumps(filterdata), 'time': (time.time() - start) / 1000 }, status=200)
def search(request): q = request.GET.get('q', '') facet_counts = {} if q: cat_filter = request.GET.get('category') qs = SearchQuerySet() qs = qs.filter(content=q) qs = qs.filter_or(speakers__startswith=q.lower()) if cat_filter: # TODO: This doesn't work quite right. It should filter # out anything that's not *exactly* cat_filter but it's # not. Could be a problem here or with the indexing. The # haystack docs are mysterious. qs = qs.filter_and(category__exact=cat_filter) # TODO: Whoosh doesn't handle faceting, so we have to do it # manually. Fix this so it detects whether the haystack backend # supports facets and if so, uses the backend and not the db. cat_counts = {} for mem in qs: cat_counts[mem.category] = cat_counts.get(mem.category, 0) + 1 facet_counts['category'] = sorted( cat_counts.items(), key=lambda pair: pair[1], reverse=True) page = Paginator(qs, 25) p = request.GET.get('p', '1') try: p = max(1, int(p)) except ValueError: p = 1 try: page = page.page(p) except EmptyPage: page = page.page(1) else: page = None if q: title = u'Search: {query}'.format(query=q) else: title = u'Search' get_params = request.GET.copy() if 'category' in get_params: get_params.pop('category') base_url = request.path + '?' + get_params.urlencode() return render( request, 'videos/search.html', { 'query': q, 'base_url': base_url, 'title': title, 'facet_counts': facet_counts, 'page': page })
def get_search(self, request, **kwargs): # tests self.method_check(request, allowed=['get']) self.is_authenticated(request) self.throttle_check(request) # pagination limit = int(request.GET.get('limit', self._meta.limit)) limit = limit if limit <= self._meta.max_limit else self._meta.max_limit offset = int(request.GET.get('offset', 0)) page = (offset / limit) + 1 q_args = {"published": True} sqs = SearchQuerySet().models(ActivityLog).load_all() if 'verb' in request.GET: q_args['verb'] = request.GET.get('verb') if 'tags' in request.GET: q_args['tags__in'] = request.GET.get('tags').split(',') # GET FOLLOW KEYS elif 'user' in request.GET and 'mode' in request.GET: uid = int(request.GET.get('user')) mode = request.GET.get('mode') if mode == 'actor': q_args['actor_id'] = uid sqs = sqs.filter(**q_args) elif mode == 'target_user': q_args['target_user_id'] = uid sqs = sqs.filter(**q_args) elif mode == 'follow': q_args['follow_keys__in'] = [f.follow_key for f in Follow.objects.filter(user__id=uid)] sqs = sqs.filter(**q_args) elif mode == 'all': follow_keys = [f.follow_key for f in Follow.objects.filter(user__id=uid)] if len(follow_keys) > 0: sqs = sqs.filter_or(follow_keys__in=follow_keys).filter_or(actor_id=uid).filter_or(target_user_id=uid).filter_and(**q_args) else: sqs = sqs.filter_or(actor_id=uid).filter_or(target_user_id=uid).filter_and(**q_args) else: sqs = sqs.filter(**q_args) sqs = sqs.order_by('-created') paginator = Paginator(sqs, limit) try: page = paginator.page(page) except InvalidPage: raise Http404("Sorry, no results on that page.") objects = [] for result in page.object_list: cache_key = self._meta.resource_name+'.'+str(result.obj_id) data = self._meta.cache.get(cache_key) if not data: bundle = self.build_bundle(obj=result.object, request=request) bundle = self.full_dehydrate(bundle) data = self._meta.cache.set(cache_key, bundle) objects.append(data) object_list = { 'meta': { 'limit': limit, 'next': page.has_next(), 'previous': page.has_previous(), 'total_count': sqs.count(), 'offset': offset }, 'objects': objects, } self.log_throttled_access(request) return self.create_response(request, object_list)
def deals_return_response(self, request, **kwargs): self.method_check(request, allowed=['get']) params_dict = request.GET params_keys = params_dict.keys() location_param = params_dict.get('location', None) if not location_param: response = { 'error': { 'message': "You must supply a valid user location information." } } return self.create_response(request, response) lat_lng_in_list = location_param.split(',') lat, lng = map(float, lat_lng_in_list) id_param = params_dict.get('id', 'uuid') radius = D( mi=float(params_dict['radius'])) if 'radius' in params_keys else D( mi=10) user_pnt = Point(lng, lat) sqs = SearchQuerySet().using('mobile_api').filter(django_ct='core.coupon', online=False, is_duplicate=False, is_deleted=False, status='considered-active')\ .exclude(end__lt=datetime.now(pytz.utc))\ .dwithin('merchant_location', user_pnt, radius).distance('merchant_location', user_pnt)\ .order_by('distance') if 'query' in params_keys: query = params_dict['query'] sqs_by_query = SearchQuerySet().using('mobile_api').filter( mobilequery=query) sqs = sqs.__and__(sqs_by_query) # Prepare for 'localindex' api service self.create_localinfo_index_if_doesnt_exist() matched_category_indices = [ i for i, s in enumerate(self.available_categories_list) if query.lower() in s.lower() ] matched_category_names = [ self.available_categories_list[i] for i in matched_category_indices ] self.index_it_in_localinfo_populars(id_param, location_param, string.capwords(query), matched_category_names) if 'category_slugs' in params_keys: category_slugs_list = params_dict['category_slugs'].split(',') sqs_by_category = SearchQuerySet().using('mobile_api') for c in category_slugs_list: sqs_by_category = sqs_by_category.filter_or( category_slugs=c.strip()) sqs = sqs.__and__(sqs_by_category) if 'provider_slugs' in params_keys: provider_slugs_list = params_dict['provider_slugs'].split(',') sqs_by_provider = SearchQuerySet().using('mobile_api') for p in provider_slugs_list: sqs_by_provider = sqs_by_provider.filter_or( provider_slugs=p.strip()) sqs = sqs.__and__(sqs_by_provider) updated_after = params_dict.get('updated_after', None) per_page = int(params_dict.get('per_page', 20)) page = int(params_dict.get('page', 1)) start_point = (page - 1) * per_page end_point = page * per_page deals = [] for sqs_obj in sqs[start_point:end_point]: merchant_pnt = sqs_obj.merchant_location if not merchant_pnt: continue dist_to_user = geopy_distance( (user_pnt.y, user_pnt.x), (merchant_pnt.y, merchant_pnt.x)).miles deal_description = sqs_obj.text if sqs_obj.related_deals_count != 0: deal_description = deal_description if deal_description else "" deal_description += "\n\nFind {} more similar deal(s) from this vendor on {}!".format( sqs_obj.related_deals_count, sqs_obj.provider) each_deal = { 'deal': { 'id': sqs_obj.coupon_ref_id, 'title': sqs_obj.embedly_title, 'short_title': sqs_obj.embedly_description, 'description': deal_description, 'fine_print': sqs_obj.restrictions, 'number_sold': None, 'url': sqs_obj.link, 'untracked_url': sqs_obj.directlink, 'price': sqs_obj.price, 'value': sqs_obj.listprice, 'discount_amount': sqs_obj.discount, 'discount_percentage': float(sqs_obj.percent) / 100, 'commission': None, 'provider_name': sqs_obj.provider, 'provider_slug': sqs_obj.provider_slug, 'category_name': ', '.join(sqs_obj.categories) if sqs_obj.categories else None, 'category_slug': ', '.join(sqs_obj.category_slugs) if sqs_obj.category_slugs else None, 'image_url': sqs_obj.image, 'online': sqs_obj.online, 'expires_at': sqs_obj.end, 'created_at': sqs_obj.start, 'updated_at': sqs_obj.lastupdated, 'is_duplicate': sqs_obj.is_duplicate, 'merchant': { 'id': sqs_obj.merchant_ref_id, 'name': sqs_obj.merchant_name, 'address': sqs_obj.merchant_address, 'locality': sqs_obj.merchant_locality, 'region': sqs_obj.merchant_region, 'postal_code': sqs_obj.merchant_postal_code, 'country': "United States", 'country_code': "US", 'latitude': merchant_pnt.y, 'longitude': merchant_pnt.x, 'dist_to_user_mi': dist_to_user, 'url': sqs_obj.merchant_link, } } } deals.append(each_deal) query = { 'total': len(sqs), 'page': page, 'per_page': per_page, 'query': query if 'query' in params_keys else None, 'location': { 'latitude': lat, 'longitude': lng, }, 'radius': float(params_dict['radius']) if 'radius' in params_keys else 10, 'online': False, 'category_slugs': category_slugs_list if 'category_slugs' in params_keys else None, 'provider_slugs': provider_slugs_list if 'provider_slugs' in params_keys else None, 'updated_after': updated_after, } response = { 'query': query, 'deals': deals, } return self.create_response(request, response)
def haystack_search_api(request, format="json", **kwargs): """ View that drives the search api """ from haystack.inputs import Raw from haystack.query import SearchQuerySet, SQ # Retrieve Query Params id = request.REQUEST.get("id", None) query = request.REQUEST.get('q', None) category = request.REQUEST.get("category", None) limit = int( request.REQUEST.get( "limit", getattr(settings, "HAYSTACK_SEARCH_RESULTS_PER_PAGE", 20))) startIndex = int(request.REQUEST.get("startIndex", 0)) sort = request.REQUEST.get("sort", "relevance") type_facets = request.REQUEST.get("type", None) format = request.REQUEST.get("format", format) date_start = request.REQUEST.get("start_date", None) date_end = request.REQUEST.get("end_date", None) keyword = request.REQUEST.get("kw", None) service = request.REQUEST.get("service", None) local = request.REQUEST.get("local", None) # Geospatial Elements bbox = request.REQUEST.get("extent", None) ts = time() sqs = SearchQuerySet() limit = min(limit, 500) # Filter by ID if id: sqs = sqs.narrow("django_id:%s" % id) # Filter by Type and subtype if type_facets is not None: type_facets = type_facets.replace("owner", "user").split(",") subtype_facets = ["vector", "raster"] types = [] subtypes = [] for type in type_facets: if type in ["map", "layer", "user", "document", "group"]: # Type is one of our Major Types (not a sub type) types.append(type) elif type in subtype_facets: subtypes.append(type) if len(subtypes) > 0: for sub_type in subtype_facets: if sub_type not in subtypes: sqs = sqs.exclude(subtype='%s' % sub_type) if len(types) > 0: sqs = sqs.narrow("type:%s" % ','.join(map(str, types))) # Filter by Query Params # haystack bug? if boosted fields aren't included in the # query, then the score won't be affected by the boost if query: if query.startswith('"') or query.startswith('\''): #Match exact phrase phrase = query.replace('"', '') sqs = sqs.filter( SQ(title__exact=phrase) | SQ(abstract__exact=phrase) | SQ(content__exact=phrase)) else: words = query.split() for word in range(0, len(words)): if word == 0: sqs = sqs.filter( SQ(title=Raw(words[word])) | SQ(abstract=Raw(words[word])) | SQ(content=Raw(words[word]))) elif words[word] in ["AND", "OR"]: pass elif words[word - 1] == "OR": #previous word OR this word sqs = sqs.filter_or( SQ(title=Raw(words[word])) | SQ(abstract=Raw(words[word])) | SQ(content=Raw(words[word]))) else: #previous word AND this word sqs = sqs.filter( SQ(title=Raw(words[word])) | SQ(abstract=Raw(words[word])) | SQ(content=Raw(words[word]))) # filter by cateory if category: sqs = sqs.narrow('category:%s' % category) #filter by keyword if keyword: sqs = sqs.narrow('keywords:%s' % keyword) if date_start: sqs = sqs.filter(SQ(date__gte=date_start)) if date_end: sqs = sqs.filter(SQ(date__lte=date_end)) """ ### Code to filter on temporal extent start/end dates instead if date_start or date_end: #Exclude results with no dates at all sqs = sqs.filter( SQ(temporal_extent_start=Raw("[* TO *]")) | SQ(temporal_extent_end=Raw("[* TO *]")) ) if temporal_start and temporal_end: #Return anything with a start date < date_end or an end date > date_start sqs = sqs.filter( SQ(temporal_extent_end__gte=date_start) | SQ(temporal_extent_start__lte=date_end) ) elif temporal_start: #Exclude anything with an end date <date_start sqs = sqs.exclude( SQ(temporal_extent_end__lte=date_start) ) elif temporal_end: #Exclude anything with a start date > date_end sqs = sqs.exclude( SQ(temporal_extent_start__gte=date_end) ) """ if bbox: left, right, bottom, top = bbox.split(',') sqs = sqs.filter( # first check if the bbox has at least one point inside the window SQ(bbox_left__gte=left) & SQ(bbox_left__lte=right) & SQ(bbox_top__gte=bottom) & SQ(bbox_top__lte=top) | #check top_left is inside the window SQ(bbox_right__lte=right) & SQ(bbox_right__gte=left) & SQ(bbox_top__lte=top) & SQ(bbox_top__gte=bottom) | #check top_right is inside the window SQ(bbox_bottom__gte=bottom) & SQ(bbox_bottom__lte=top) & SQ(bbox_right__lte=right) & SQ(bbox_right__gte=left) | #check bottom_right is inside the window SQ(bbox_top__lte=top) & SQ(bbox_top__gte=bottom) & SQ(bbox_left__gte=left) & SQ(bbox_left__lte=right) | #check bottom_left is inside the window # then check if the bbox is including the window SQ(bbox_left__lte=left) & SQ(bbox_right__gte=right) & SQ(bbox_bottom__lte=bottom) & SQ(bbox_top__gte=top)) # Filter by permissions ''' ### Takes too long with many results. ### Instead, show all results but disable links on restricted ones. for i, result in enumerate(sqs): if result.type == 'layer': if not request.user.has_perm('layers.view_layer',obj = result.object): sqs = sqs.exclude(id = result.id) if result.type == 'map': if not request.user.has_perm('maps.view_map',obj = result.object): sqs = sqs.exclude(id = result.id) ''' #filter by service ''' if service: sqs = sqs.narrow('service:%s' % service) if local: sqs = sqs.narrow('local:%s' % local) ''' # Apply Sort # TODO: Handle for Revised sort types # [relevance, alphabetically, rating, created, updated, popularity] if sort.lower() == "newest": sqs = sqs.order_by("-modified") elif sort.lower() == "oldest": sqs = sqs.order_by("modified") elif sort.lower() == "alphaaz": sqs = sqs.order_by("title_sortable") elif sort.lower() == "alphaza": sqs = sqs.order_by("-title_sortable") elif sort.lower() == "popularity": sqs = sqs.order_by("-popular_count") else: sqs = sqs.order_by("-_score") # Setup Search Results results = [] items = [] # Build the result based on the limit for i, result in enumerate(sqs[startIndex:startIndex + limit]): logger.info(result) data = result.get_stored_fields() resource = None if "modified" in data: data["modified"] = data["modified"].strftime( "%Y-%m-%dT%H:%M:%S.%f") if "temporal_extent_start" in data and data[ "temporal_extent_start"] is not None: data["temporal_extent_start"] = data[ "temporal_extent_start"].strftime("%Y-%m-%dT%H:%M:%S.%f") if "temporal_extent_end" in data and data[ "temporal_extent_end"] is not None: data["temporal_extent_end"] = data["temporal_extent_end"].strftime( "%Y-%m-%dT%H:%M:%S.%f") if data['type'] == "map": resource = MapNormalizer(Map.objects.get(pk=data['oid'])) elif data['type'] == "layer": resource = LayerNormalizer(Layer.objects.get(pk=data['oid'])) elif data['type'] == "user": resource = OwnerNormalizer(Profile.objects.get(pk=data['oid'])) elif data['type'] == "document": resource = DocumentNormalizer(Document.objects.get(pk=data['oid'])) elif data[ 'type'] == "group" and "geonode.contrib.groups" in settings.INSTALLED_APPS: resource = GroupNormalizer(Group.objects.get(pk=data['oid'])) if resource: resource.rating = data["rating"] if "rating" in data else 0 results.append(data) items.append(resource) # Setup Facet Counts sqs = sqs.facet("type").facet("subtype") sqs = sqs.facet('category') sqs = sqs.facet('keywords') sqs = sqs.facet('service') sqs = sqs.facet('local') facet_counts = sqs.facet_counts() # Prepare Search Results data = { "success": True, "total": sqs.count(), "query_info": { "q": query, "startIndex": startIndex, "limit": limit, "sort": sort, "type": type_facets, }, "results": results, "facets": dict( facet_counts.get("fields")['type'] + facet_counts.get('fields')['subtype']) if sqs.count() > 0 else [], "categories": { facet[0]: facet[1] for facet in facet_counts.get('fields')['category'] } if sqs.count() > 0 else {}, "keywords": { facet[0]: facet[1] for facet in facet_counts.get('fields')['keywords'] } if sqs.count() > 0 else {}, } # Return Results ts1 = time() - ts if format == "html": #Send to search/explore page return data, items elif format == "raw": return HttpResponse(json.dumps(data), mimetype="application/json") else: query = query_from_request(request, kwargs) return _search_json(query, items, data["facets"], ts1)
def basic_search(request, template='search.html', load_all=True, form_class=ModelSearchForm, searchqueryset=None, context_class=RequestContext, extra_context=None, results_per_page=None): """ A more traditional view that also demonstrate an alternative way to use Haystack. Useful as an example of for basing heavily custom views off of. Also has the benefit of thread-safety, which the ``SearchView`` class may not be. Template:: ``search/search.html`` Context:: * form An instance of the ``form_class``. (default: ``ModelSearchForm``) * page The current page of search results. * paginator A paginator instance for the results. * query The query received by the form. """ query = '' dept = request.GET.get('dept') if dept[:3].upper() == 'ENG': dept = 'Eng' sort_type = request.GET.get('type') category = request.GET.get('category') query = request.GET.get('q') results = SearchQuerySet() #results.filter(content=category) """if not (request.GET.get('dept') and request.GET.get('type') and request.GET.get('category') and request.GET.get('q')): results = results.all()""" if query: results = results.filter(content=AutoQuery(query)) """form = form_class(request.GET, searchqueryset=searchqueryset, load_all=load_all) if form.is_valid(): query = form.cleaned_data['q'] results = form.search() else: form = form_class(searchqueryset=searchqueryset, load_all=load_all)""" if dept: results = results.filter_or(dept_to__contains=dept).filter_or(dept_from__contains=dept) if sort_type: results = results.order_by(sort_type) if category: results = results.filter(content__contains=category) paginator = Paginator(results, results_per_page or RESULTS_PER_PAGE) try: page = paginator.page(int(request.GET.get('page', 1))) except InvalidPage: raise Http404("No such page of results!") context = { 'page': page, 'paginator': paginator, 'query': query, } kudos_set = Kudos.objects.order_by("created") context['kudos_set'] = kudos_set if results.query.backend.include_spelling: context['suggestion'] = form.get_suggestion() if extra_context: context.update(extra_context) return render_to_response(template, context, context_instance=context_class(request))
def build_haystack_filters(self, parameters): from haystack.inputs import Raw from haystack.query import SearchQuerySet, SQ # noqa sqs = None # Retrieve Query Params # Text search query = parameters.get('q', None) # Types and subtypes to filter (map, layer, vector, etc) type_facets = parameters.getlist("type__in", []) # If coming from explore page, add type filter from resource_name resource_filter = self._meta.resource_name.rstrip("s") if resource_filter != "base" and resource_filter not in type_facets: type_facets.append(resource_filter) # Publication date range (start,end) date_end = parameters.get("date__lte", None) date_start = parameters.get("date__gte", None) # Topic category filter category = parameters.getlist("category__identifier__in") # Keyword filter keywords = parameters.getlist("keywords__slug__in") # Region filter regions = parameters.getlist("regions__name__in") # Owner filters owner = parameters.getlist("owner__username__in") # Sort order sort = parameters.get("order_by", "relevance") # Geospatial Elements bbox = parameters.get("extent", None) # Filter by Type and subtype if type_facets is not None: types = [] subtypes = [] for type in type_facets: if type in ["map", "layer", "document", "user"]: # Type is one of our Major Types (not a sub type) types.append(type) elif type in LAYER_SUBTYPES.keys(): subtypes.append(type) if len(subtypes) > 0: types.append("layer") sqs = SearchQuerySet().narrow("subtype:%s" % ','.join(map(str, subtypes))) if len(types) > 0: sqs = (SearchQuerySet() if sqs is None else sqs).narrow( "type:%s" % ','.join(map(str, types))) # Filter by Query Params # haystack bug? if boosted fields aren't included in the # query, then the score won't be affected by the boost if query: if query.startswith('"') or query.startswith('\''): # Match exact phrase phrase = query.replace('"', '') sqs = (SearchQuerySet() if sqs is None else sqs).filter( SQ(title__exact=phrase) | SQ(description__exact=phrase) | SQ(content__exact=phrase) ) else: words = [ w for w in re.split( '\W', query, flags=re.UNICODE) if w] for i, search_word in enumerate(words): if i == 0: sqs = (SearchQuerySet() if sqs is None else sqs) \ .filter( SQ(title=Raw(search_word)) | SQ(description=Raw(search_word)) | SQ(content=Raw(search_word)) ) elif search_word in ["AND", "OR"]: pass elif words[i - 1] == "OR": # previous word OR this word sqs = sqs.filter_or( SQ(title=Raw(search_word)) | SQ(description=Raw(search_word)) | SQ(content=Raw(search_word)) ) else: # previous word AND this word sqs = sqs.filter( SQ(title=Raw(search_word)) | SQ(description=Raw(search_word)) | SQ(content=Raw(search_word)) ) # filter by category if category: sqs = (SearchQuerySet() if sqs is None else sqs).narrow( 'category:%s' % ','.join(map(str, category))) # filter by keyword: use filter_or with keywords_exact # not using exact leads to fuzzy matching and too many results # using narrow with exact leads to zero results if multiple keywords # selected if keywords: for keyword in keywords: sqs = ( SearchQuerySet() if sqs is None else sqs).filter_or( keywords_exact=keyword) # filter by regions: use filter_or with regions_exact # not using exact leads to fuzzy matching and too many results # using narrow with exact leads to zero results if multiple keywords # selected if regions: for region in regions: sqs = ( SearchQuerySet() if sqs is None else sqs).filter_or( regions_exact__exact=region) # filter by owner if owner: sqs = ( SearchQuerySet() if sqs is None else sqs).narrow( "owner__username:%s" % ','.join(map(str, owner))) # filter by date if date_start: sqs = (SearchQuerySet() if sqs is None else sqs).filter( SQ(date__gte=date_start) ) if date_end: sqs = (SearchQuerySet() if sqs is None else sqs).filter( SQ(date__lte=date_end) ) # Filter by geographic bounding box if bbox: left, bottom, right, top = bbox.split(',') sqs = ( SearchQuerySet() if sqs is None else sqs).exclude( SQ( bbox_top__lte=bottom) | SQ( bbox_bottom__gte=top) | SQ( bbox_left__gte=right) | SQ( bbox_right__lte=left)) # Apply sort if sort.lower() == "-date": sqs = ( SearchQuerySet() if sqs is None else sqs).order_by("-date") elif sort.lower() == "date": sqs = ( SearchQuerySet() if sqs is None else sqs).order_by("date") elif sort.lower() == "title": sqs = (SearchQuerySet() if sqs is None else sqs).order_by( "title_sortable") elif sort.lower() == "-title": sqs = (SearchQuerySet() if sqs is None else sqs).order_by( "-title_sortable") elif sort.lower() == "-popular_count": sqs = (SearchQuerySet() if sqs is None else sqs).order_by( "-popular_count") else: sqs = ( SearchQuerySet() if sqs is None else sqs).order_by("-date") return sqs
def code_search(request): # 代码搜索 all_time = time.time() query = request.GET['code1'] # query_language = request.GET['select_language'] raw_query = query query_highlight_token = highlight_words(raw_query) all_posts = [] all_posts_id = [] all_posts_filtered = [] if query != "": print('Query Code: ', query) lex_token = tokenize(raw_query, "lex").strip() char_token = tokenize(raw_query, "char").strip() print('Lex Tokens: ', lex_token) print('Char Tokens: ', char_token) time_1 = time.time() if char_token == "": char_search_result = [] else: char_search_result = SearchQuerySet().using('problemcode').filter( content=Exact(char_token)) # char_search_result = SearchQuerySet().using('problemcode').all() # for query_item in char_token.split(): # char_search_result = char_search_result.filter_or(content=query_item) # char_search_result = list(char_search_result) for item in char_search_result[:2000]: id = item.id all_posts.append(item) all_posts_id.append(id) char_search_num = len(char_search_result) print(all_posts_id[:10]) print('char search result num: ', char_search_num) if lex_token == "": lex_search_result = [] else: lex_search_result = SearchQuerySet().using('problemcode').filter( content=Exact(lex_token)) # lex_search_result = SearchQuerySet().using('problemcode').all() # for query_item in lex_token.split(): # lex_search_result = lex_search_result.filter_or(content=query_item) # lex_search_result = list(lex_search_result) for item in lex_search_result[:2000]: id = item.id if id not in all_posts_id: all_posts.append(item) all_posts_id.append(id) lex_search_num = len(lex_search_result) print(all_posts_id[:10]) print('lex search result num: ', lex_search_num) print('time_1: ', time.time() - time_1) time_2 = time.time() if len(query_highlight_token.split()) == 1: # 如果查询代码太短(只包含一个word), 则直接跳过语法分析搜索过程 posts_python_ast = [] python_search_num = 0 posts_cpp_ast = [] cpp_search_num = 0 print('Too Short To AST Analysis.') else: # 进行python语法分析, 并根据语法结果找到相似代码, 追加在lex_posts和char_posts后面: try: query_ast = convert_python(raw_query) print('Python Query AST: ', query_ast) query_ast = query_ast.split() if len(query_ast) == 1: posts_python_ast = [] else: posts_python_ast = SearchQuerySet().using( 'problemcode').all() for query_item in query_ast: posts_python_ast = posts_python_ast.filter_or( content=query_item) python_search_num = len(posts_python_ast) print('python search result num: ', python_search_num) # posts_ast = posts_ast[:1000] # posts = posts | posts_python_ast # char_lex_id = set(char_result_id + lex_result_id) # posts += [post for post in posts_ast if post.id not in char_lex_id] # python_ast_id = [int(post.id) for post in posts_ast] except: posts_python_ast = [] python_search_num = 0 print('Python AST Analysis Failed.') # 进行C++语法分析, 并根据语法结果找到相似代码, 追加在lex_posts和char_posts后面: try: query_ast = convert_cpp(cpp_head_remove(raw_query)) print('C++ Query AST: ', query_ast) query_ast = query_ast.split() if len(query_ast) == 1: posts_cpp_ast = [] else: posts_cpp_ast = SearchQuerySet().using('problemcode').all() for query_item in query_ast: posts_cpp_ast = posts_cpp_ast.filter_or( content=query_item) cpp_search_num = len(posts_cpp_ast) print('c++ search result num: ', cpp_search_num) # posts_ast = posts_ast[:1000] # posts = posts | posts_cpp_ast # 已经存在的post_id, 即char, lex, python的并集: # char_lex_python_id = set(char_result_id + lex_result_id + python_ast_id) # posts += [post for post in posts_ast if post.id not in char_lex_python_id] except: posts_cpp_ast = [] cpp_search_num = 0 print('C++ AST Analysis Failed.') for item in posts_python_ast[:2000]: id = item.id if id not in all_posts_id: all_posts.append(item) all_posts_id.append(id) for item in posts_cpp_ast[:2000]: id = item.id if id not in all_posts_id: all_posts.append(item) all_posts_id.append(id) all_posts_num = len(all_posts) print('All Posts Num: ', all_posts_num) print('time_2: ', time.time() - time_2) time_3 = time.time() for item in all_posts: if Problem.objects.filter(id=item.problem).exists(): item.code = cpp_head_convert(item.code) all_posts_filtered.append(item) print('time_3: ', time.time() - time_3) result_num = len(all_posts_filtered) print('Returned Posts Count: ', result_num) print('all_time: ', time.time() - all_time) return render( request, 'search/code_search_result.html', { 'posts': all_posts_filtered, 'raw_query': raw_query, 'query_token': query_highlight_token, 'result_num': result_num })
def search(request): q = request.GET.get('q', '') facet_counts = {} if q: cat_filter = request.GET.get('category') qs = SearchQuerySet() qs = qs.filter(content=q) qs = qs.filter_or(speakers__startswith=q.lower()) if cat_filter: # TODO: This doesn't work quite right. It should filter # out anything that's not *exactly* cat_filter but it's # not. Could be a problem here or with the indexing. The # haystack docs are mysterious. qs = qs.filter_and(category__exact=cat_filter) # TODO: Whoosh doesn't handle faceting, so we have to do it # manually. Fix this so it detects whether the haystack backend # supports facets and if so, uses the backend and not the db. cat_counts = {} for mem in qs: cat_counts[mem.category] = cat_counts.get(mem.category, 0) + 1 facet_counts['category'] = sorted( cat_counts.items(), key=lambda pair: pair[1], reverse=True) page = Paginator(qs, 25) p = request.GET.get('p', '1') try: p = max(1, int(p)) except ValueError: p = 1 try: page = page.page(p) except EmptyPage: page = page.page(1) else: page = None if q: title = u'Search: {query}'.format( query=bleach.clean(q, tags=[], strip=True)) else: title = u'Search' get_params = request.GET.copy() if 'category' in get_params: get_params.pop('category') base_url = request.path + '?' + get_params.urlencode() return render( request, 'videos/search.html', { 'query': q, 'base_url': base_url, 'title': title, 'facet_counts': facet_counts, 'page': page })
def advanced_haystack_search(self, query_items, similar_tags=False, book_ids=None): regex_string = r"{0}" print query_items logger.debug(query_items) keywords = query_items.get('keyword', '').strip() keywords = [x.strip() for x in keywords.split(' ')] print keywords clean_keywords = [] stop = corpus.stopwords.words('english') for word in keywords: if word not in stop: clean_keywords.append(word) keywords = clean_keywords print 'keywords', keywords year = query_items.get('year', '').strip() author = query_items.get('author', '').strip() illustrator = query_items.get('illustrator', '').strip() number_of_results = query_items.get('num_results', '').strip() book_id = query_items.get('book_id', '').strip() publisher = query_items.get('publisher', '').strip() publishing_place = query_items.get('publishing_place', '').strip() title = query_items.get('title', '').strip() tag_keywords_only = query_items.get('tag_keywords_only', False) all_results = SearchQuerySet() print all_results.count() # # This should work, but it makes solr throw "too many boolean clauses" # # if the user selects too many years/ books # if book_ids is not None and len(book_ids) > 0: # print len(book_ids), type(book_ids) # # image_ids_from_books = [] # # if x.book_identifier in book_ids: # # image_ids_from_books.append(x.flickr_id) # all_results = all_results.filter(book_identifier__in=book_ids) if len(year): decade = year[0:3] all_results = all_results.filter(SQ(date__startswith=decade)) if len(author): # all_results = all_results.filter_or(first_author__icontains='*' + author + '*') author_words = [x.strip() for x in author.split(' ')] for author_word in author_words: re.sub(r'\W+', '', author_word) all_results = all_results.filter(first_author__icontains='*' + author_word + '*') if len(title): # all_results = all_results.filter_or(title__icontains='*"' + title + '"*') title_words = [x.strip() for x in title.split(' ')] for title_word in title_words: re.sub(r'\W+', '', title_word) all_results = all_results.filter(title__icontains='*' + title_word + '*') # all_results = all_results.filter(SQ(title=Raw('*' + title + '*'))) if len(illustrator): # all_results = all_results.filter_or(first_author__icontains='*' + author + '*') illustrator_words = [x.strip() for x in illustrator.split(' ')] for illustrator_word in illustrator_words: re.sub(r'\W+', '', illustrator_word) all_results = all_results.filter(title__icontains='*' + illustrator_word + '*') # q_or_objects = [] # for illustrator_book_id in models.BookIllustrator.objects \ # .filter(name__contains=regex_string.format(illustrator)).values_list('book_id', flat=True).distinct(): # if illustrator_book_id: # q_or_objects.append(SQ(book_identifier=str(illustrator_book_id))) # if len(q_or_objects) > 0: # all_results = all_results.filter(reduce(operator.or_, q_or_objects)) if len(book_id): all_results = all_results.filter(book_identifier=book_id) if len(publisher): publisher_words = [x.strip() for x in publisher.split(' ')] for publisher_word in publisher_words: re.sub(r'\W+', '', publisher_word) all_results = all_results.filter(publisher__icontains='*' + publisher_word + '*') # all_results = all_results.filter(SQ(publisher=Raw('*' + publisher + '*'))) if len(publishing_place): publishing_place_words = [x.strip() for x in publishing_place.split(' ')] for publishing_place_word in publishing_place_words: re.sub(r'\W+', '', publishing_place_word) all_results = all_results.filter(pubplace__icontains='*' + publishing_place_word + '*') # all_results = all_results.filter(SQ(pubplace=Raw('*' + publishing_place + '*'))) if similar_tags: similar_words = [] for word in keywords: if len(word): similar_words.extend(self.get_similar_word_array(word)) keywords = similar_words for word in keywords: if len(word): # Create seperate querysets for each keyword with all the possible ORs # then use | to get the union of the queysets with the main set from above all_results_for_ANDs = SearchQuerySet() all_results_for_ANDs = all_results_for_ANDs.filter_or(tag__icontains=word) all_results_for_ANDs = all_results_for_ANDs.filter_or(caption__icontains=word) all_results_for_ANDs = all_results_for_ANDs.filter_or(description__icontains=word) if not tag_keywords_only: all_results_for_ANDs = all_results_for_ANDs.filter_or(first_author__icontains=word) all_results_for_ANDs = all_results_for_ANDs.filter_or(date__icontains=word) all_results_for_ANDs = all_results_for_ANDs.filter_or(title__icontains=word) all_results_for_ANDs = all_results_for_ANDs.filter_or(publisher__icontains=word) all_results_for_ANDs = all_results_for_ANDs.filter_or(pubplace__icontains=word) all_results = all_results & all_results_for_ANDs # all_results = all_results.filter_or(tag__icontains=word) # all_results = all_results.filter_or(caption__icontains=word) # all_results = all_results.filter_or(description__icontains=word) # # if not tag_keywords_only: # all_results = all_results.filter_or(first_author__icontains=word) # all_results = all_results.filter_or(date__icontains=word) # all_results = all_results.filter_or(title__icontains=word) # all_results = all_results.filter_or(publisher__icontains=word) # all_results = all_results.filter_or(pubplace__icontains=word) sort_by = query_items.get('sort_results', '').strip() # print 'sorting by', sort_by if sort_by == 'none' or len(sort_by) < 1: pass elif sort_by == 'title': # print 'title found' all_results = all_results.order_by('title') elif sort_by == 'date': # print 'date found' all_results = all_results.order_by('date') elif sort_by == 'author': # print 'author found' all_results = all_results.order_by('first_author') logger.debug(all_results.query) # print 'full query', all_results.query logger.debug(all_results.count()) # print 'full result count', all_results.count() return all_results
def api_code_search(request, search_way=1): query = request.POST['title'] language = {1: "cpp", 2: "python"} query_language = language[int(request.POST['queryLanguage'])] raw_query = query query_token = highlight_words(raw_query) highlight_dict = {} for word in query_token.split(" "): if word not in highlight_dict and word.strip() != "": highlight_dict[word] = "<font>" + word + "</font>" try: posts = [] if query == '': posts = SearchQuerySet().using('problemcode').all() else: print('Query Code: ', query) if query_language == 'python': py_lex_token = lex_analysis(raw_query) py_char_token = lex_analysis_char(raw_query) # print('py_lex_token: ', py_lex_token) # print('py_char_token: ', py_char_token) try: lex_search_result = stree_Search(string=py_lex_token, stree=lex_suffix_tree) char_search_result = stree_Search(string=py_char_token, stree=char_suffix_tree) print('Lex Result: ', len(lex_search_result)) print('Char Result: ', len(char_search_result)) lex_result_id = [] char_result_id = [] for item in lex_search_result: lex_result_id.append(int(item[0]) + 2) for item in char_search_result: char_result_id.append(int(item[0]) + 2) # print('Lex Result ID: ', lex_result_id) # print('Char Result ID: ', char_result_id) problem_code_lex_obj = ProblemCode.objects.filter( id__in=lex_result_id) problem_code_char_obj = ProblemCode.objects.filter( id__in=char_result_id) problem_code_lex_obj = dict([ (obj.id, obj) for obj in problem_code_lex_obj ]) problem_code_char_obj = dict([ (obj.id, obj) for obj in problem_code_char_obj ]) posts = [] for id in char_result_id: if problem_code_char_obj[ id].language_id_id == 1: # 过滤掉搜索结果中的所有C++代码,只列出Python代码 continue posts.append(problem_code_char_obj[id]) for id in lex_result_id: # 将lex_posts追加在char_posts后面, 如果在char_posts里已经存在, 就不再追加. if problem_code_lex_obj[id].language_id_id == 1: continue if id in char_result_id: continue if len( list( set(query_token.split()).intersection( set( highlight_words( problem_code_lex_obj[id].code). split())))) == 0: continue posts.append(problem_code_lex_obj[id]) try: query_ast = convert_python(raw_query) # print('Query AST: ', query_ast) query_ast = query_ast.split() posts_ast = SearchQuerySet().using('problemcode').all() for query_item in query_ast: posts_ast = posts_ast.filter_or(content=query_item) for post in posts_ast: if post.language == 1: continue if len( list( set(query_token.split()).intersection( set( highlight_words(post.code). split())))) == 0: continue if int( post.id ) in char_result_id or post.id in lex_result_id: continue posts.append(post) except: print('AST Analysis Failed.') except: posts = [] if query_language == 'cpp': cpp_lex_token = lex_analysis(raw_query) cpp_char_token = lex_analysis_char(raw_query) # print('cpp_lex_token: ', cpp_lex_token) # print('cpp_char_token: ', cpp_char_token) try: lex_search_result = stree_Search(string=cpp_lex_token, stree=lex_suffix_tree) char_search_result = stree_Search(string=cpp_char_token, stree=char_suffix_tree) # print('Lex Result: ', lex_search_result) # print('Char Result: ', char_search_result) lex_result_id = [] char_result_id = [] for item in lex_search_result: lex_result_id.append(int(item[0]) + 2) for item in char_search_result: char_result_id.append(int(item[0]) + 2) # print('Lex Result ID: ', lex_result_id) # print('Char Result ID: ', char_result_id) problem_code_lex_obj = ProblemCode.objects.filter( id__in=lex_result_id) problem_code_char_obj = ProblemCode.objects.filter( id__in=char_result_id) problem_code_lex_obj = dict([ (obj.id, obj) for obj in problem_code_lex_obj ]) problem_code_char_obj = dict([ (obj.id, obj) for obj in problem_code_char_obj ]) posts = [] for id in char_result_id: if problem_code_char_obj[ id].language_id_id == 2 or problem_code_char_obj[ id].language_id_id == 3: # 过滤掉搜索结果中的所有Python代码,只列出C++代码 continue problem_code_char_obj[id].code = cpp_head_convert( problem_code_char_obj[id].code) posts.append(problem_code_char_obj[id]) for id in lex_result_id: # 将lex_posts追加在char_posts后面, 如果在char_posts里已经存在, 就不再追加. if problem_code_lex_obj[ id].language_id_id == 2 or problem_code_lex_obj[ id].language_id_id == 3: continue if id in char_result_id: continue if len( list( set(query_token.split()).intersection( set( highlight_words( problem_code_lex_obj[id].code). split())))) == 0: continue problem_code_lex_obj[id].code = cpp_head_convert( problem_code_lex_obj[id].code) posts.append(problem_code_lex_obj[id]) try: query_ast = convert_cpp(cpp_head_remove(raw_query)) # print('Query AST: ', query_ast) query_ast = query_ast.split() posts_ast = SearchQuerySet().using('problemcode').all() for query_item in query_ast: posts_ast = posts_ast.filter_or(content=query_item) for post in posts_ast: if post.language == 2 or post.language == 3: continue if len( list( set(query_token.split()).intersection( set( highlight_words(post.code). split())))) == 0: continue if int( post.id ) in char_result_id or post.id in lex_result_id: continue post.code = cpp_head_convert(post.code) posts.append(post) except: print('AST Analysis Failed.') except: posts = [] posts_id = [] for post in posts: posts_id.append(post.problem_id_id) problem_obj = Problem.objects.all().filter(id__in=posts_id) problem_obj = dict([(obj.id, obj) for obj in problem_obj]) pro_obj = [] for id in posts_id: try: pro_obj.append(problem_obj[int(id)]) except: continue except: pro_obj = [] posts = [] posts = list(zip(posts, pro_obj)) result_num = len(pro_obj) print('Returned Posts Count: ', result_num) if search_way == 0: return posts json_dict = {} json_dict['code'] = 0 json_dict['msg'] = "操作成功" page_info = {} pageNum = int(request.POST["pageNum"]) pageSize = int(request.POST["pageSize"]) page_info['pageNums'] = pageNum page_info['pageSize'] = pageSize total_page = math.ceil(result_num / pageSize) page_info['pageTotal'] = total_page page_info['pageCount'] = result_num json_dict['pageInfo'] = page_info if pageNum == total_page: returned_posts = posts[(pageNum - 1) * pageSize:] else: returned_posts = posts[(pageNum - 1) * pageSize:pageNum * pageSize] data = [] for item in returned_posts: item_dict = {} item_dict['id'] = item[1].id item_dict['contentType'] = request.POST['menuType'] # 将代码中需要高亮的部分用<font></font>标签标注起来 for word, replace_word in highlight_dict.items(): item[0].code = item[0].code.replace(word, replace_word) item_info = {} item_info['title'] = item[1].title item_info['sourceName'] = item[1].source item_info['createTime'] = str(item[1].create_time) item_info['level'] = item[1].difficulty item_info['labels'] = "" item_info['ansName'] = "" item_info['content'] = item[0].code item_info['readNum'] = "" item_info['praiseNum'] = "" item_info['status'] = "" item_info['imgUrl'] = "" item_info['answersNums'] = "" item_info['language'] = "" item_info['languageCode'] = "" item_dict['info'] = item_info data.append(item_dict) json_dict['data'] = data return json_dict
def build_haystack_filters(self,parameters): from haystack.inputs import Raw from haystack.query import SearchQuerySet, SQ sqs = None # Retrieve Query Params #Text search query = parameters.get('q',None) #Types and subtypes to filter (map, layer, vector, etc) type_facets = parameters.getlist("type__in", []) #If coming from explore page, add type filter from resource_name resource_filter = self._meta.resource_name.rstrip("s") if resource_filter != "base" and resource_filter not in type_facets: type_facets.append(resource_filter) #Publication date range (start,end) date_range = parameters.get("date_range", ",").split(",") #Topic category filter category = parameters.getlist("category__identifier__in") #Keyword filter keywords = parameters.getlist("keywords__slug__in") #Sort order sort = parameters.get("order_by", "relevance") # Geospatial Elements bbox = parameters.get("extent", None) # Filter by Type and subtype if type_facets is not None: types = [] subtypes = [] for type in type_facets: if type in ["map", "layer", "document", "user"]: # Type is one of our Major Types (not a sub type) types.append(type) elif type in LAYER_SUBTYPES.keys(): subtypes.append(type) if len(subtypes) > 0: types.append("layer") sqs = SearchQuerySet().narrow("subtype:%s" % ','.join(map(str, subtypes))) if len(types) > 0: sqs = (SearchQuerySet() if sqs is None else sqs).narrow("type:%s" % ','.join(map(str, types))) # Filter by Query Params # haystack bug? if boosted fields aren't included in the # query, then the score won't be affected by the boost if query: if query.startswith('"') or query.startswith('\''): #Match exact phrase phrase = query.replace('"','') sqs = (SearchQuerySet() if sqs is None else sqs).filter( SQ(title__exact=phrase) | SQ(abstract__exact=phrase) | SQ(content__exact=phrase) ) else: words = query.split() for word in range(0,len(words)): search_word = words[word] + "*" if word == 0: sqs = (SearchQuerySet() if sqs is None else sqs).filter( SQ(title=Raw(search_word)) | SQ(description=Raw(search_word)) | SQ(content=Raw(search_word)) ) elif words[word] in ["AND","OR"]: pass elif words[word-1] == "OR": #previous word OR this word sqs = sqs.filter_or( SQ(title=Raw(search_word)) | SQ(description=Raw(search_word)) | SQ(content=Raw(search_word)) ) else: #previous word AND this word sqs = sqs.filter( SQ(title=Raw(search_word)) | SQ(description=Raw(search_word)) | SQ(content=Raw(search_word)) ) # filter by cateory if category: sqs = (SearchQuerySet() if sqs is None else sqs).narrow('category:%s' % ','.join(map(str, category))) #filter by keyword: use filter_or with keywords_exact #not using exact leads to fuzzy matching and too many results #using narrow with exact leads to zero results if multiple keywords selected if keywords: for keyword in keywords: sqs = sqs.filter_or(keywords_exact=keyword) #filter by date if date_range[0]: sqs = (SearchQuerySet() if sqs is None else sqs).filter( SQ(date__gte=date_range[0]) ) if date_range[1]: sqs = (SearchQuerySet() if sqs is None else sqs).filter( SQ(date__lte=date_range[1]) ) #Filter by geographic bounding box if bbox: left,right,bottom,top = bbox.split(',') sqs = (SearchQuerySet() if sqs is None else sqs).filter( # first check if the bbox has at least one point inside the window SQ(bbox_left__gte=left) & SQ(bbox_left__lte=right) & SQ(bbox_top__gte=bottom) & SQ(bbox_top__lte=top) | #check top_left is inside the window SQ(bbox_right__lte=right) & SQ(bbox_right__gte=left) & SQ(bbox_top__lte=top) & SQ(bbox_top__gte=bottom) | #check top_right is inside the window SQ(bbox_bottom__gte=bottom) & SQ(bbox_bottom__lte=top) & SQ(bbox_right__lte=right) & SQ(bbox_right__gte=left) | #check bottom_right is inside the window SQ(bbox_top__lte=top) & SQ(bbox_top__gte=bottom) & SQ(bbox_left__gte=left) & SQ(bbox_left__lte=right) | #check bottom_left is inside the window # then check if the bbox is including the window SQ(bbox_left__lte=left) & SQ(bbox_right__gte=right) & SQ(bbox_bottom__lte=bottom) & SQ(bbox_top__gte=top) ) #Apply sort if sort.lower() == "-date": sqs = (SearchQuerySet() if sqs is None else sqs).order_by("-modified") elif sort.lower() == "date": sqs = (SearchQuerySet() if sqs is None else sqs).order_by("modified") elif sort.lower() == "title": sqs = (SearchQuerySet() if sqs is None else sqs).order_by("title_sortable") elif sort.lower() == "-title": sqs = (SearchQuerySet() if sqs is None else sqs).order_by("-title_sortable") elif sort.lower() == "-popular_count": sqs = (SearchQuerySet() if sqs is None else sqs).order_by("-popular_count") else: sqs = (SearchQuerySet() if sqs is None else sqs).order_by("-modified") return sqs
def search(self): # sqs = SearchQuerySet().models(JobPost).filter(status='Live') sqs = SearchQuerySet() sqs = sqs.filter_and(status='Live') if not self.is_valid(): return sqs if self.cleaned_data['q'] and self.cleaned_data['location']: term = self.cleaned_data['q'] term = term.replace('[', '') term = term.replace(']', '') term = term.replace("'", '') # sqs = sqs.filter_and(SQ(title=term) | SQ(designation=term)| SQ(skills=term)) terms = [t.strip() for t in term.split(',')] sqs = sqs.filter_and( SQ(title__in=terms) | SQ(designation__in=terms) | SQ(skills__in=terms)) # sqs = sqs.filter_or(SQ(designation__in=terms)) # sqs = sqs.filter_or(SQ(skills__in=terms)) location = self.cleaned_data['location'] location = location.replace('[', '') location = location.replace(']', '') location = location.replace("'", '') locations = [t.strip() for t in location.split(',')] other_cities = City.objects.filter(name__in=locations).values_list( 'parent_city__name', flat=True) sqs = sqs.filter_and( SQ(location__in=locations) | SQ(location__startswith=self.cleaned_data['location']) | SQ(location__in=other_cities)) if self.cleaned_data['job_type']: sqs = sqs.filter_and(job_type=self.cleaned_data['job_type']) if self.cleaned_data['industry']: term = self.cleaned_data['industry'] # sqs = sqs.filter_and(SQ(title=term) | SQ(designation=term)| SQ(skills=term)) terms = [t.strip() for t in term.split(',')] sqs = sqs.filter_or(industry__in=terms) if self.cleaned_data['functional_area']: term = self.cleaned_data['functional_area'] # sqs = sqs.filter_and(SQ(title=term) | SQ(designation=term)| SQ(skills=term)) terms = [t.strip() for t in term.split(',')] sqs = sqs.filter_or(functional_area__in=terms) if self.cleaned_data['experience'] or self.cleaned_data[ 'experience'] == 0: sqs = sqs.filter_or( SQ(max_experience__gte=self.cleaned_data['experience']) & SQ(min_experience__lte=self.cleaned_data['experience'])) if self.cleaned_data['salary']: sqs = sqs.filter_or( SQ(max_salary__gte=self.cleaned_data['salary']) & SQ(min_salary__lte=self.cleaned_data['salary'])) if self.cleaned_data['walkin_type']: import datetime if self.cleaned_data['walkin_type'] == 'this_week': date = datetime.date.today() start_week = date - \ datetime.timedelta(date.weekday()) - datetime.timedelta(1) end_week = start_week + datetime.timedelta(6) start_week = datetime.datetime.strptime( str(start_week), "%Y-%m-%d").strftime("%Y-%m-%d") end_week = datetime.datetime.strptime( str(end_week), "%Y-%m-%d").strftime("%Y-%m-%d") sqs = sqs.filter_and( SQ(walkin_from_date__range=[start_week, end_week]) | SQ(walkin_to_date__range=[start_week, end_week])) if self.cleaned_data['walkin_type'] == 'next_week': date = datetime.date.today() start_week = date - \ datetime.timedelta( date.isoweekday()) + datetime.timedelta(7) end_week = start_week + datetime.timedelta(6) start_week = datetime.datetime.strptime( str(start_week), "%Y-%m-%d").strftime("%Y-%m-%d") end_week = datetime.datetime.strptime( str(end_week), "%Y-%m-%d").strftime("%Y-%m-%d") sqs = sqs.filter_and( SQ(walkin_from_date__range=[start_week, end_week]) | SQ(walkin_to_date__range=[start_week, end_week])) # sqs = sqs.filter_and(SQ(walkin_from_date__range=[start_week, end_week]) | SQ(walkin_to_date__range=[start_week, end_week])) if self.cleaned_data['walkin_type'] == 'this_month': current_date = datetime.date.today() from dateutil.relativedelta import relativedelta from datetime import date start_week = date(current_date.year, current_date.month, 1) end_week = start_week + relativedelta(day=31) start_week = datetime.datetime.strptime( str(start_week), "%Y-%m-%d").strftime("%Y-%m-%d") end_week = datetime.datetime.strptime( str(end_week), "%Y-%m-%d").strftime("%Y-%m-%d") sqs = sqs.filter_and( SQ(walkin_from_date__range=[start_week, end_week]) | SQ(walkin_to_date__range=[start_week, end_week])) # if self.cleaned_data['walkin_type'] == 'next_month': # pass if self.cleaned_data['walkin_type'] == 'custom_range': if self.cleaned_data['walkin_from_date']: walkin_from_date = datetime.datetime.strptime( str(self.cleaned_data['walkin_from_date']), "%Y-%m-%d").strftime("%Y-%m-%d") sqs = sqs.filter_and( SQ(walkin_from_date__gte=walkin_from_date) | SQ(walkin_to_date__gte=walkin_from_date)) if self.cleaned_data['walkin_to_date']: walkin_to_date = datetime.datetime.strptime( str(self.cleaned_data['walkin_to_date']), "%Y-%m-%d").strftime("%Y-%m-%d") sqs = sqs.filter_and( SQ(walkin_from_date__gte=walkin_to_date) | SQ(walkin_to_date__lte=walkin_to_date)) return sqs else: return []
def deals_return_response(self, request, **kwargs): self.method_check(request, allowed=['get']) params_dict = request.GET params_keys = params_dict.keys() location_param = params_dict.get('location', None) if not location_param: response = { 'error': {'message': "You must supply a valid user location information."} } return self.create_response(request, response) lat_lng_in_list = location_param.split(',') lat, lng = map(float, lat_lng_in_list) id_param = params_dict.get('id', 'uuid') radius = D(mi=float(params_dict['radius'])) if 'radius' in params_keys else D(mi=10) user_pnt = Point(lng, lat) sqs = SearchQuerySet().using('mobile_api').filter(django_ct='core.coupon', online=False, is_duplicate=False, is_deleted=False, status='considered-active')\ .exclude(end__lt=datetime.now(pytz.utc))\ .dwithin('merchant_location', user_pnt, radius).distance('merchant_location', user_pnt)\ .order_by('distance') if 'query' in params_keys: query = params_dict['query'] sqs_by_query = SearchQuerySet().using('mobile_api').filter(mobilequery=query) sqs = sqs.__and__(sqs_by_query) # Prepare for 'localindex' api service self.create_localinfo_index_if_doesnt_exist() matched_category_indices = [i for i, s in enumerate(self.available_categories_list) if query.lower() in s.lower()] matched_category_names = [self.available_categories_list[i] for i in matched_category_indices] self.index_it_in_localinfo_populars(id_param, location_param, string.capwords(query), matched_category_names) if 'category_slugs' in params_keys: category_slugs_list = params_dict['category_slugs'].split(',') sqs_by_category = SearchQuerySet().using('mobile_api') for c in category_slugs_list: sqs_by_category = sqs_by_category.filter_or(category_slugs=c.strip()) sqs = sqs.__and__(sqs_by_category) if 'provider_slugs' in params_keys: provider_slugs_list = params_dict['provider_slugs'].split(',') sqs_by_provider = SearchQuerySet().using('mobile_api') for p in provider_slugs_list: sqs_by_provider = sqs_by_provider.filter_or(provider_slugs=p.strip()) sqs = sqs.__and__(sqs_by_provider) updated_after = params_dict.get('updated_after', None) per_page = int(params_dict.get('per_page', 20)) page = int(params_dict.get('page', 1)) start_point = (page - 1) * per_page end_point = page * per_page deals = [] for sqs_obj in sqs[start_point:end_point]: merchant_pnt = sqs_obj.merchant_location if not merchant_pnt: continue dist_to_user = geopy_distance((user_pnt.y, user_pnt.x), (merchant_pnt.y, merchant_pnt.x)).miles deal_description = sqs_obj.text if sqs_obj.related_deals_count != 0: deal_description = deal_description if deal_description else "" deal_description += "\n\nFind {} more similar deal(s) from this vendor on {}!".format(sqs_obj.related_deals_count, sqs_obj.provider) each_deal = {'deal': { 'id': sqs_obj.coupon_ref_id, 'title': sqs_obj.embedly_title, 'short_title': sqs_obj.embedly_description, 'description': deal_description, 'fine_print': sqs_obj.restrictions, 'number_sold': None, 'url': sqs_obj.link, 'untracked_url': sqs_obj.directlink, 'price': sqs_obj.price, 'value': sqs_obj.listprice, 'discount_amount': sqs_obj.discount, 'discount_percentage': float(sqs_obj.percent) / 100, 'commission': None, 'provider_name': sqs_obj.provider, 'provider_slug': sqs_obj.provider_slug, 'category_name': ', '.join(sqs_obj.categories) if sqs_obj.categories else None, 'category_slug': ', '.join(sqs_obj.category_slugs) if sqs_obj.category_slugs else None, 'image_url': sqs_obj.image, 'online': sqs_obj.online, 'expires_at': sqs_obj.end, 'created_at': sqs_obj.start, 'updated_at': sqs_obj.lastupdated, 'is_duplicate': sqs_obj.is_duplicate, 'merchant': { 'id': sqs_obj.merchant_ref_id, 'name': sqs_obj.merchant_name, 'address': sqs_obj.merchant_address, 'locality': sqs_obj.merchant_locality, 'region': sqs_obj.merchant_region, 'postal_code': sqs_obj.merchant_postal_code, 'country': "United States", 'country_code': "US", 'latitude': merchant_pnt.y, 'longitude': merchant_pnt.x, 'dist_to_user_mi': dist_to_user, 'url': sqs_obj.merchant_link, } } } deals.append(each_deal) query = { 'total': len(sqs), 'page': page, 'per_page': per_page, 'query': query if 'query' in params_keys else None, 'location': { 'latitude': lat, 'longitude': lng, }, 'radius': float(params_dict['radius']) if 'radius' in params_keys else 10, 'online': False, 'category_slugs': category_slugs_list if 'category_slugs' in params_keys else None, 'provider_slugs': provider_slugs_list if 'provider_slugs' in params_keys else None, 'updated_after': updated_after, } response = { 'query': query, 'deals': deals, } return self.create_response(request, response)
def get_haystack_queryset(self, order_by='-obligation_date'): """ Returns a Haystack QuerySet object with the appropriate filters """ # order AND filters first, then OR filters and_filters = {} or_filters = {} for f in self.filters: filter_field = f[0] filter_value = f[1] filter_conjunction = f[2] # select appropriate target list target_filter_list = and_filters if f[2] == self.CONJUNCTION_OR: target_filter_list = or_filters # deal with fk if self.FIELD_MAPPINGS[filter_field]['type'] == 'fk': field_operator = filter_field + '__in' fk_transformation = self.FIELD_MAPPINGS[filter_field].get( 'solr_transformation', lambda x: x) if type(filter_value) not in (list, tuple): filter_value = (filter_value, ) fk_values = [] for value in filter_value: fk_value = fk_transformation(value) if fk_value is not None: fk_values.append(str(fk_value)) target_filter_list[field_operator] = fk_values # deal with range if self.FIELD_MAPPINGS[filter_field]['type'] == 'range': clause_parts = [] range_transformation = self.FIELD_MAPPINGS[filter_field].get( 'solr_transformation', lambda x: x) # putting this in place filter_operators = ('__gte', '__lte') for i, range_specifier in enumerate(filter_value): if range_specifier is not None: target_filter_list[ self.FIELD_MAPPINGS[filter_field]['solr_field'] + filter_operators[i]] = range_specifier # deal with text elif self.FIELD_MAPPINGS[filter_field]['type'] == 'text': target_filter_list[filter_field] = filter_value s = SearchQuerySet().models(self.DJANGO_MODEL) # add sector filtering -- only works for a single sector at the moment if len(self.sectors): s = s.filter_and( sectors__in=map(lambda x: int(x.id), self.sectors)) if len(or_filters): s = s.filter_or(**or_filters) if len(and_filters): s = s.filter_and(**and_filters) s = s.order_by(order_by) return s
def build_haystack_filters(self, parameters): from haystack.query import SearchQuerySet, SQ # noqa sqs = None # Retrieve Query Params # Text search query = parameters.get('q', None) # Types and subtypes to filter (map, layer, vector, etc) type_facets = parameters.getlist("type__in", []) # If coming from explore page, add type filter from resource_name resource_filter = self._meta.resource_name.rstrip("s") if resource_filter != "base" and resource_filter not in type_facets: type_facets.append(resource_filter) # Publication date range (start,end) date_end = parameters.get("date__lte", None) date_start = parameters.get("date__gte", None) # Topic category filter category = parameters.getlist("category__identifier__in") # Keyword filter keywords = parameters.getlist("keywords__slug__in") # Region filter regions = parameters.getlist("regions__name__in") # Owner filters owner = parameters.getlist("owner__username__in") # Published filter published = parameters.get("is_published", None) # Featured filter featured = parameters.get("featured", None) # Sort order sort = parameters.get("order_by", "relevance") # Geospatial Elements bbox = parameters.get("extent", None) # Filter by Type and subtype if type_facets is not None: types = [] subtypes = [] for type in type_facets: if type in [ "map", "mapstory", "layer", "document", "user", "group" ]: # Type is one of our Major Types (not a sub type) types.append(type) elif type in LAYER_SUBTYPES.keys(): subtypes.append(type) if len(subtypes) > 0: types.append("layer") sqs = SearchQuerySet().narrow("subtype:%s" % ','.join(map(str, subtypes))) if len(types) > 0: sqs = (SearchQuerySet() if sqs is None else sqs).narrow( "type:%s" % ','.join(map(str, types))) # Filter by Query Params # haystack bug? if boosted fields aren't included in the # query, then the score won't be affected by the boost if query: if query.startswith('"') or query.startswith('\''): # Match exact phrase phrase = query.replace('"', '') sqs = (SearchQuerySet() if sqs is None else sqs).filter( SQ(title__exact=phrase) | SQ(description__exact=phrase) | SQ(content__exact=phrase)) else: words = [ w for w in re.split('\W', query, flags=re.UNICODE) if w ] for i, search_word in enumerate(words): if i == 0: sqs = (SearchQuerySet() if sqs is None else sqs) \ .filter( SQ(title__contains=search_word) | SQ(description__contains=search_word) | SQ(content__contains=search_word) ) elif search_word in ["AND", "OR"]: pass elif words[i - 1] == "OR": # previous word OR this word sqs = sqs.filter_or( SQ(title__contains=search_word) | SQ(description__contains=search_word) | SQ(content__contains=search_word)) else: # previous word AND this word sqs = sqs.filter( SQ(title__contains=search_word) | SQ(description__contains=search_word) | SQ(content__contains=search_word)) # filter by category if category: sqs = (SearchQuerySet() if sqs is None else sqs).narrow( 'category:%s' % ','.join(map(str, category))) # filter by keyword: use filter_or with keywords_exact # not using exact leads to fuzzy matching and too many results # using narrow with exact leads to zero results if multiple keywords # selected if keywords: for keyword in keywords: sqs = (SearchQuerySet() if sqs is None else sqs).filter_or( keywords_exact=keyword) # filter by regions: use filter_or with regions_exact # not using exact leads to fuzzy matching and too many results # using narrow with exact leads to zero results if multiple keywords # selected if regions: for region in regions: sqs = (SearchQuerySet() if sqs is None else sqs).filter_or( regions_exact__exact=region) # filter by owner if owner: sqs = (SearchQuerySet() if sqs is None else sqs).narrow( "owner__username:%s" % ','.join(map(str, owner))) # filter by publishing status if published: sqs = (SearchQuerySet() if sqs is None else sqs).filter( SQ(is_published=published)) # filter by featured status if featured: sqs = (SearchQuerySet() if sqs is None else sqs).filter( SQ(featured=featured)) # filter by date if date_start: sqs = (SearchQuerySet() if sqs is None else sqs).filter( SQ(date__gte=date_start)) if date_end: sqs = (SearchQuerySet() if sqs is None else sqs).filter( SQ(date__lte=date_end)) # Filter by geographic bounding box if bbox: left, bottom, right, top = bbox.split(',') sqs = (SearchQuerySet() if sqs is None else sqs).exclude( SQ(bbox_top__lte=bottom) | SQ(bbox_bottom__gte=top) | SQ(bbox_left__gte=right) | SQ(bbox_right__lte=left)) # Apply sort if sort.lower() == "-date": sqs = (SearchQuerySet() if sqs is None else sqs).order_by("-date") elif sort.lower() == "date": sqs = (SearchQuerySet() if sqs is None else sqs).order_by("date") elif sort.lower() == "title": sqs = (SearchQuerySet() if sqs is None else sqs).order_by("title_sortable") elif sort.lower() == "-title": sqs = (SearchQuerySet() if sqs is None else sqs).order_by("-title_sortable") elif sort.lower() == "-popular_count": sqs = (SearchQuerySet() if sqs is None else sqs).order_by("-popular_count") else: sqs = (SearchQuerySet() if sqs is None else sqs).order_by("-date") return sqs
def mysearch_other(request): # 高级搜索 posts = SearchQuerySet().using('problem').models(Problem).all() label_con = [] title = '' description = '' # 开始处理筛选条件 for i in request.GET.lists(): if 'label_con' in i[0]: labels = request.GET['label_con'].strip() if labels != '': label_con = labels.strip().split('|') break for i in request.GET.lists(): if 'delete_label' in i[0]: delete_label = request.GET['delete_label'].strip() if delete_label != '': label_con.remove(delete_label) break # 结束处理筛选条件 query_difficulty = [] for e in request.GET.lists(): if e[0] == 'difficulty' and len(e[1]) != 0: for i in range(len(e[1])): # 对于一类条件下的多个选项 if e[1][i] != '': query_difficulty.append(e[1][i]) posts = posts.filter_or(difficulty=e[1][i]) continue if e[0] == 'label' and len(e[1]) != 0: for i in range(len(e[1])): if e[1][i] not in label_con: label_con.append(e[1][i]) label_con_string = '|'.join(label_con) for i in label_con: posts = posts.filter_or(label=i) for i in request.GET.lists(): if 'title' in i[0]: title = request.GET['title'].strip() if title != '': posts = posts.filter_and(title=title.strip()) break for i in request.GET.lists(): if 'description' in i[0]: description = request.GET['description'].strip() if description != '': posts = posts.filter_and(description=description.strip()) break condition_now = {} for i in posts: for j in i.label: if j != '' and j not in label_con: if not condition_now.get(j): condition_now.update({j: 1}) else: k = condition_now[j] + 1 condition_now.update({j: k}) kw = sorted(condition_now.items(), key=operator.itemgetter(1), reverse=True) keywords = [] for i in posts: res = [] for k in i.label: res.append(k) keywords.append(res) result_num = posts.count() posts = zip(posts, keywords) data = Label_tree(Label.objects.all(), many=True).data dict_tree = [] for i in data: for j in kw: if i['label_name'] == j[0]: dict_tree.append( dict([('id', i['id']), ('label_name', i['label_name'] + '-' + str(j[1])), ('father_id', i['father_id']), ('num', j[1])])) return render( request, 'search/search_result.html', { 'posts': posts, 'query_title': title, 'query_describe': description, 'query_keywords': ' '.join(label_con), 'query_difficulty': ' '.join(query_difficulty), 'query': '', 'result_num': result_num, 'kw': kw, 'label_con': label_con, 'label_con_string': label_con_string, 'data': json.dumps(dict_tree) })