def build_query_fragment(self, field, filter_type, value): from haystack import connections query_frag = '' is_datetime = False if not hasattr(value, 'input_type_name'): # Handle when we've got a ``ValuesListQuerySet``... if hasattr(value, 'values_list'): value = list(value) if hasattr(value, 'strftime'): is_datetime = True if isinstance(value, six.string_types) and value != ' ': # It's not an ``InputType``. Assume ``Clean``. value = Clean(value) else: value = PythonData(value) # Prepare the query using the InputType. prepared_value = value.prepare(self) if not isinstance(prepared_value, (set, list, tuple)): # Then convert whatever we get back to what pysolr wants if needed. prepared_value = self.backend._from_python(prepared_value) # 'content' is a special reserved word, much like 'pk' in # Django's ORM layer. It indicates 'no special field'. if field == 'content': index_fieldname = '' else: index_fieldname = u'%s:' % connections[ self._using].get_unified_index().get_index_fieldname(field) filter_types = { 'content': '%s', 'contains': '*%s*', 'endswith': "*%s", 'startswith': "%s*", 'exact': '%s', 'gt': "{%s to}", 'gte': "[%s to]", 'lt': "{to %s}", 'lte': "[to %s]", 'fuzzy': u'%s~', } if value.post_process is False: query_frag = prepared_value else: if filter_type in [ 'content', 'contains', 'startswith', 'endswith', 'fuzzy' ]: if value.input_type_name == 'exact': query_frag = prepared_value else: # Iterate over terms & incorportate the converted form of each into the query. terms = [] if isinstance(prepared_value, six.string_types): possible_values = prepared_value.split(' ') else: if is_datetime is True: prepared_value = self._convert_datetime( prepared_value) possible_values = [prepared_value] for possible_value in possible_values: terms.append(filter_types[filter_type] % self.backend._from_python(possible_value)) if len(terms) == 1: query_frag = terms[0] else: query_frag = u"(%s)" % " AND ".join(terms) elif filter_type == 'in': in_options = [] for possible_value in prepared_value: is_datetime = False if hasattr(possible_value, 'strftime'): is_datetime = True pv = self.backend._from_python(possible_value) if is_datetime is True: pv = self._convert_datetime(pv) if isinstance(pv, six.string_types) and not is_datetime: in_options.append('"%s"' % pv) else: in_options.append('%s' % pv) query_frag = "(%s)" % " OR ".join(in_options) elif filter_type == 'range': start = self.backend._from_python(prepared_value[0]) end = self.backend._from_python(prepared_value[1]) if hasattr(prepared_value[0], 'strftime'): start = self._convert_datetime(start) if hasattr(prepared_value[1], 'strftime'): end = self._convert_datetime(end) query_frag = u"[%s to %s]" % (start, end) elif filter_type == 'exact': if value.input_type_name == 'exact': query_frag = prepared_value else: prepared_value = Exact(prepared_value).prepare(self) query_frag = filter_types[filter_type] % prepared_value else: if is_datetime is True: prepared_value = self._convert_datetime(prepared_value) query_frag = filter_types[filter_type] % prepared_value if len(query_frag) and not isinstance(value, Raw): if not query_frag.startswith('(') and not query_frag.endswith(')'): query_frag = "(%s)" % query_frag return u"%s%s" % (index_fieldname, query_frag)
def get(self, request, *args, **kwargs): """ Primary endpoint for retrieving resources via the index Values should never be empty string or python None, instead return string "None" with str() call "availability": list value, js will parse JSON as Array "availabilityurl": single value, pass a string to REST client "type": single value, pass a string to REST client "author": single value, pass a string to REST client first author "creator: authors, The reason for the weird name is the DataOne standard. The metadata was designed to be compliant with DataOne standards. These standards do not contain an author field. Instead, the creator field represents authors. "contributor": list value, js will parse JSON as Array "owner": list value, js will parse JSON as Array "subject": list value, js will parse JSON as Array "coverage_type": list point, period, ... """ start = time.time() sqs = SearchQuerySet().all() asc = '-1' if request.GET.get('asc'): asc = request.GET.get('asc') sort = 'modified' if request.GET.get('sort'): sort = request.GET.get('sort') sort = sort if asc == '1' else '-{}'.format(sort) if request.GET.get('q'): q = request.GET.get('q') sqs = sqs.filter(content=q) try: qs = request.query_params filters = json.loads(qs.get('filter')) # filter values expect lists, for example discoverapi/?filter={"owner":["Firstname Lastname"]} if filters.get('author'): for k, authortype in enumerate(filters['author']): if k == 0 or k == len(filters['author']): sqs = sqs.filter(author_exact=Exact(authortype)) else: sqs = sqs.filter_or(author_exact=Exact(authortype)) if filters.get('owner'): for k, ownertype in enumerate(filters['owner']): if k == 0 or k == len(filters['owner']): sqs = sqs.filter(owner_exact=Exact(ownertype)) else: sqs = sqs.filter_or(owner_exact=Exact(ownertype)) if filters.get('subject'): for k, subjtype in enumerate(filters['subject']): if k == 0 or k == len(subjtype): sqs = sqs.filter(subject_exact=Exact(subjtype)) else: sqs = sqs.filter_or(subject_exact=Exact(subjtype)) if filters.get('contributor'): for k, contribtype in enumerate(filters['contributor']): if k == 0 or k == len(filters['contributor']): sqs = sqs.filter(contributor_exact=Exact(contribtype)) else: sqs = sqs.filter_or( contributor_exact=Exact(contribtype)) if filters.get('type'): for k, restype in enumerate(filters['type']): if k == 0 or k == len(filters['type']): sqs = sqs.filter(content_type_exact=Exact(restype)) else: sqs = sqs.filter_or(content_type_exact=Exact(restype)) if filters.get('availability'): for k, availtype in enumerate(filters['availability']): if k == 0 or k == len(filters['availability']): sqs = sqs.filter(availability_exact=Exact(availtype)) else: sqs = sqs.filter_or( availability_exact=Exact(availtype)) if filters.get('geofilter'): sqs = sqs.filter( north__range=[-90, 90]) # return resources with geographic data if filters.get('date'): try: datefilter = DateRange(start=datetime.datetime.strptime( filters['date'][0], '%Y-%m-%d'), end=datetime.datetime.strptime( filters['date'][1], '%Y-%m-%d')) # restrict to entries with dates sqs = sqs.filter(start_date__gt=datetime.datetime.strptime('1900-01-01', '%Y-%m-%d'))\ .filter(end_date__lte=datetime.datetime.strptime(datetime.date.today().isoformat(), '%Y-%m-%d')) # filter out entries that don't fall in specified range sqs = sqs.exclude(start_date__gt=datefilter.end).exclude( end_date__lt=datefilter.start) except ValueError as date_ex: return JsonResponse( { 'message': 'Filter date parsing error expecting String %Y-%m-%d : {}' .format(str(date_ex)), 'received': request.query_params }, status=400) except Exception as gen_date_ex: return JsonResponse( { 'message': 'Filter date parsing error expecting two date string values : {}' .format(str(gen_date_ex)), 'received': request.query_params }, status=400) except TypeError as type_ex: pass # no filters passed "the JSON object must be str, bytes or bytearray not NoneType" except json.JSONDecodeError as parse_ex: return JsonResponse( { 'message': 'Filter JSON parsing error - {}'.format(str(parse_ex)), 'received': request.query_params }, status=400) except Exception as gen_ex: logger.warning('hs_discover API - {}: {}'.format( type(gen_ex), str(gen_ex))) return JsonResponse( { 'message': '{}'.format( '{}: query error. Contact a server administrator.'. format(type(gen_ex))) }, status=520) filterdata = [] if request.GET.get('filterbuilder'): authors = sqs.facet('author').facet_counts()['fields']['author'] owners = sqs.facet('owner').facet_counts()['fields']['owner'] subjects = sqs.facet('subject').facet_counts()['fields']['subject'] contributors = sqs.facet( 'contributor').facet_counts()['fields']['contributor'] types = sqs.facet( 'content_type').facet_counts()['fields']['content_type'] availability = sqs.facet( 'availability').facet_counts()['fields']['availability'] if request.GET.get('updatefilters'): authors = [x for x in authors if x[1] > 0] owners = [x for x in owners if x[1] > 0] subjects = [x for x in subjects if x[1] > 0] contributors = [x for x in contributors if x[1] > 0] types = [x for x in types if x[1] > 0] availability = [x for x in availability if x[1] > 0] filterdata = [ authors[:self.filterlimit], owners[:self.filterlimit], subjects[:self.filterlimit], contributors[:self.filterlimit], types[:self.filterlimit], availability[:self.filterlimit] ] if sort == 'author': sqs = sqs.order_by('author_exact') elif sort == '-author': sqs = sqs.order_by('-author_exact') else: sqs = sqs.order_by(sort) resources = [] # TODO future release will add title and facilitate order_by title_exact # convert sqs to list after facet operations to allow for Python sorting instead of Haystack order_by if sort == 'title': sqs = sorted(sqs, key=lambda idx: idx.title.lower()) elif sort == '-title': sqs = sorted(sqs, key=lambda idx: idx.title.lower(), reverse=True) p = Paginator(sqs, self.perpage) if request.GET.get('pnum'): pnum = request.GET.get('pnum') pnum = int(pnum) pnum = min(pnum, p.num_pages) if pnum < 1: return JsonResponse( { 'resources': json.dumps([]), 'geodata': json.dumps([]), 'rescount': 0, 'pagecount': 1, 'perpage': self.perpage }, status=200) else: pnum = 1 # page number not specified, implies page 1 pnum = min(pnum, p.num_pages) geodata = [] for result in p.page(pnum): contributor = 'None' # contributor is actually a list and can have multiple values owner = 'None' # owner is actually a list and can have multiple values author_link = None # Send None to avoid anchor render creator = 'None' author = 'None' if result.creator: creator = result.creator authors = creator # there is no concept of authors in DataOne standard # authors might be string 'None' here if result.author: author_link = result.author_url author = str(result.author) if authors == 'None': authors = author # author would override creator in else: if result.organization: if isinstance(result.organization, list): author = str(result.organization[0]) else: author = str(result.organization) author = author.replace('"', '') author = author.replace('[', '') author = author.replace(']', '').strip() if authors == 'None': authors = author if result.contributor is not None: try: contributor = result.contributor except: pass if result.owner is not None: try: owner = result.owner except: pass pt = '' # pass empty string for the frontend to ensure the attribute exists but can be evaluated for empty try: if 'box' in result.coverage_type: pt = { 'short_id': result.short_id, 'title': result.title, 'coverage_type': 'box' } elif 'point' in result.coverage_type: pt = { 'short_id': result.short_id, 'title': result.title, 'coverage_type': 'point' } if isinstance(result.north, (int, float)): pt['north'] = result.north if isinstance(result.east, (int, float)): pt['east'] = result.east if isinstance(result.northlimit, (int, float)): pt['northlimit'] = result.northlimit if isinstance(result.southlimit, (int, float)): pt['southlimit'] = result.southlimit if isinstance(result.eastlimit, (int, float)): pt['eastlimit'] = result.eastlimit if isinstance(result.westlimit, (int, float)): pt['westlimit'] = result.westlimit geodata.append(pt) except: pass # HydroShare production contains dirty data, this handling is in place, until data cleaned resources.append({ "title": result.title, "link": result.absolute_url, "availability": result.availability, "availabilityurl": "/static/img/{}.png".format(result.availability[0]), "type": result.resource_type_exact, "author": author, "authors": authors, "contributor": contributor, "author_link": author_link, "owner": owner, "abstract": result.abstract, "subject": result.subject, "created": result.created.isoformat(), "modified": result.modified.isoformat(), "short_id": result.short_id, "geo": pt }) return JsonResponse( { 'resources': json.dumps(resources), 'geodata': json.dumps(geodata), 'rescount': p.count, 'pagecount': p.num_pages, 'perpage': self.perpage, 'filterdata': json.dumps(filterdata), 'time': (time.time() - start) / 1000 }, status=200)
def build_name_query(self, term): SQ = self.view.query_object filter = super(CredNameFilterBuilder, self).build_name_query(term) if term and ' ' not in term: filter = filter | (SQ(source_id=Exact(term)) & SQ(name=Raw('*'))) return filter
def search_results(request): query_str = escape(request.GET.get('q', '')).strip() year = escape(request.GET.get('year', '')).strip() if not year: year = str(Year.objects.latest().hmda_year) lender_id = False respondent_id = False for regex in LENDER_REGEXES: match = regex.match(query_str) if match: lender_id = year + match.group('agency') + match.group('respondent') resp_only_match = RESP_RE.match(query_str) if resp_only_match: respondent_id = resp_only_match.group('respondent') query = SearchQuerySet().models(Institution).load_all() # snl temporary current_sort = request.GET.get('sort') if current_sort == None: current_sort = '-assets' query = SearchQuerySet().models(Institution).load_all().order_by(current_sort) if lender_id: query = query.filter(lender_id=Exact(lender_id),year=year) elif respondent_id: query = query.filter(respondent_id=Exact(respondent_id),year=year) elif query_str and escape(request.GET.get('auto')): # snl temporary: escape creates a bug where None = True query = query.filter(text_auto=AutoQuery(query_str),year=year) elif query_str: query = query.filter(content=AutoQuery(query_str), year=year) else: query = [] # number of results per page try: num_results = int(request.GET.get('num_results', '25')) except ValueError: num_results = 25 # page number try: page = int(request.GET.get('page', '1')) except ValueError: page = 1 # start and end results if page > 1: start_results = num_results * page - num_results end_results = num_results * page else: start_results = 0 end_results = num_results sort = current_sort total_results = len(query) # total number of pages if total_results <= num_results: total_pages = 1 else: total_pages = int( math.ceil( float(total_results) / float(num_results) ) ) query = query[start_results:end_results] # next page if total_results < num_results or page is total_pages: next_page = 0 end_results = total_results else: next_page = page + 1 # previous page prev_page = page - 1 results = [] for result in query: result.object.num_loans = result.num_loans results.append(result.object) if request.accepted_renderer.format != 'html': results = InstitutionSerializer(results, many=True).data # to adjust for template start_results = start_results + 1 return Response( {'institutions': results, 'query_str': query_str, 'num_results': num_results, 'start_results': start_results, 'end_results': end_results, 'sort': sort, 'page_num': page, 'total_results': total_results, 'next_page': next_page, 'prev_page': prev_page, 'total_pages': total_pages, 'current_sort': current_sort, 'year': year}, template_name='respondents/search_results.html')
def test_correct_exact(self): self.sq.add_filter(SQ(content=Exact('hello world'))) self.assertEqual(self.sq.build_query(), '("hello world")')
def build_query_fragment(self, field, filter_type, value): from haystack import connections query_frag = "" if not hasattr(value, "input_type_name"): # Handle when we've got a ``ValuesListQuerySet``... if hasattr(value, "values_list"): value = list(value) if isinstance(value, six.string_types): # It's not an ``InputType``. Assume ``Clean``. value = Clean(value) else: value = PythonData(value) # Prepare the query using the InputType. prepared_value = value.prepare(self) if not isinstance(prepared_value, (set, list, tuple)): # Then convert whatever we get back to what pysolr wants if needed. prepared_value = self.backend.conn._from_python(prepared_value) # 'content' is a special reserved word, much like 'pk' in # Django's ORM layer. It indicates 'no special field'. if field == "content": index_fieldname = "" else: index_fieldname = "%s:" % connections[ self._using ].get_unified_index().get_index_fieldname(field) filter_types = { "content": "%s", "contains": "*%s*", "endswith": "*%s", "startswith": "%s*", "exact": "%s", "gt": "{%s TO *}", "gte": "[%s TO *]", "lt": "{* TO %s}", "lte": "[* TO %s]", "fuzzy": "%s~", } if value.post_process is False: query_frag = prepared_value else: if filter_type in [ "content", "contains", "startswith", "endswith", "fuzzy", ]: if value.input_type_name == "exact": query_frag = prepared_value else: # Iterate over terms & incorportate the converted form of each into the query. terms = [] for possible_value in prepared_value.split(" "): terms.append( filter_types[filter_type] % self.backend.conn._from_python(possible_value) ) if len(terms) == 1: query_frag = terms[0] else: query_frag = "(%s)" % " AND ".join(terms) elif filter_type == "in": in_options = [] if not prepared_value: query_frag = "(!*:*)" else: for possible_value in prepared_value: in_options.append( '"%s"' % self.backend.conn._from_python(possible_value) ) query_frag = "(%s)" % " OR ".join(in_options) elif filter_type == "range": start = self.backend.conn._from_python(prepared_value[0]) end = self.backend.conn._from_python(prepared_value[1]) query_frag = '["%s" TO "%s"]' % (start, end) elif filter_type == "exact": if value.input_type_name == "exact": query_frag = prepared_value else: prepared_value = Exact(prepared_value).prepare(self) query_frag = filter_types[filter_type] % prepared_value else: if value.input_type_name != "exact": prepared_value = Exact(prepared_value).prepare(self) query_frag = filter_types[filter_type] % prepared_value if len(query_frag) and not isinstance(value, Raw): if not query_frag.startswith("(") and not query_frag.endswith(")"): query_frag = "(%s)" % query_frag return "%s%s" % (index_fieldname, query_frag)
def build_query_fragment(self, field, filter_type, value): from haystack import connections query_frag = "" is_datetime = False if not hasattr(value, "input_type_name"): # Handle when we've got a ``ValuesListQuerySet``... if hasattr(value, "values_list"): value = list(value) if hasattr(value, "strftime"): is_datetime = True if isinstance(value, str) and value != " ": # It's not an ``InputType``. Assume ``Clean``. value = Clean(value) else: value = PythonData(value) # Prepare the query using the InputType. prepared_value = value.prepare(self) if not isinstance(prepared_value, (set, list, tuple)): # Then convert whatever we get back to what pysolr wants if needed. prepared_value = self.backend._from_python(prepared_value) # 'content' is a special reserved word, much like 'pk' in # Django's ORM layer. It indicates 'no special field'. if field == "content": index_fieldname = "" else: index_fieldname = "%s:" % connections[ self._using].get_unified_index().get_index_fieldname(field) filter_types = { "content": "%s", "contains": "*%s*", "endswith": "*%s", "startswith": "%s*", "exact": "%s", "gt": "{%s to}", "gte": "[%s to]", "lt": "{to %s}", "lte": "[to %s]", "fuzzy": "%s~{}/%d".format(FUZZY_WHOOSH_MAX_EDITS), } if value.post_process is False: query_frag = prepared_value else: if filter_type in [ "content", "contains", "startswith", "endswith", "fuzzy", ]: if value.input_type_name == "exact": query_frag = prepared_value else: # Iterate over terms & incorportate the converted form of each into the query. terms = [] if isinstance(prepared_value, str): possible_values = prepared_value.split(" ") else: if is_datetime is True: prepared_value = self._convert_datetime( prepared_value) possible_values = [prepared_value] for possible_value in possible_values: possible_value_str = self.backend._from_python( possible_value) if filter_type == "fuzzy": terms.append(filter_types[filter_type] % ( possible_value_str, min(FUZZY_WHOOSH_MIN_PREFIX, len(possible_value_str)), )) else: terms.append(filter_types[filter_type] % possible_value_str) if len(terms) == 1: query_frag = terms[0] else: query_frag = "(%s)" % " AND ".join(terms) elif filter_type == "in": in_options = [] for possible_value in prepared_value: is_datetime = False if hasattr(possible_value, "strftime"): is_datetime = True pv = self.backend._from_python(possible_value) if is_datetime is True: pv = self._convert_datetime(pv) if isinstance(pv, str) and not is_datetime: in_options.append('"%s"' % pv) else: in_options.append("%s" % pv) query_frag = "(%s)" % " OR ".join(in_options) elif filter_type == "range": start = self.backend._from_python(prepared_value[0]) end = self.backend._from_python(prepared_value[1]) if hasattr(prepared_value[0], "strftime"): start = self._convert_datetime(start) if hasattr(prepared_value[1], "strftime"): end = self._convert_datetime(end) query_frag = "[%s to %s]" % (start, end) elif filter_type == "exact": if value.input_type_name == "exact": query_frag = prepared_value else: prepared_value = Exact(prepared_value).prepare(self) query_frag = filter_types[filter_type] % prepared_value else: if is_datetime is True: prepared_value = self._convert_datetime(prepared_value) query_frag = filter_types[filter_type] % prepared_value if len(query_frag) and not isinstance(value, Raw): if not query_frag.startswith("(") and not query_frag.endswith(")"): query_frag = "(%s)" % query_frag return "%s%s" % (index_fieldname, query_frag)
def build_query_fragment(self, field, filter_type, value): from haystack import connections query_frag = '' if not hasattr(value, 'input_type_name'): # Handle when we've got a ``ValuesListQuerySet``... if hasattr(value, 'values_list'): value = list(value) if isinstance(value, six.string_types): # It's not an ``InputType``. Assume ``Clean``. value = Clean(value) else: value = PythonData(value) # Prepare the query using the InputType. prepared_value = value.prepare(self) if not isinstance(prepared_value, (set, list, tuple)): # Then convert whatever we get back to what elasticsearch wants if needed. prepared_value = self.backend._from_python(prepared_value, for_query=True) # 'content' is a special reserved word, much like 'pk' in # Django's ORM layer. It indicates 'no special field'. if field == 'content': index_fieldname = '' else: index_fieldname = u'%s:' % connections[ self._using].get_unified_index().get_index_fieldname(field) filter_types = { 'contains': u'%s', 'startswith': u'%s*', 'exact': u'%s', 'gt': u'{%s TO *}', 'gte': u'[%s TO *]', 'lt': u'{* TO %s}', 'lte': u'[* TO %s]', } if value.post_process is False: query_frag = prepared_value else: if filter_type in ['contains', 'startswith']: if value.input_type_name == 'exact': query_frag = prepared_value else: # Iterate over terms & incorporate the converted form of each into the query. terms = [] if isinstance(prepared_value, six.string_types): for possible_value in prepared_value.split(' '): terms.append(filter_types[filter_type] % self.backend._from_python( possible_value, for_query=True)) else: terms.append(filter_types[filter_type] % self.backend._from_python(prepared_value, for_query=True)) if len(terms) == 1: query_frag = terms[0] else: query_frag = u"(%s)" % " AND ".join(terms) elif filter_type == 'in': in_options = [] if len(prepared_value) >= 500: from elation.util.exception import log_warning log_warning(msg="Found %s values in an ES IN clause" % (len(prepared_value), )) for possible_value in prepared_value: in_options.append(u'"%s"' % self.backend._from_python( possible_value, for_query=True)) query_frag = u"(%s)" % " OR ".join(in_options) elif filter_type == 'range': start = self.backend._from_python(prepared_value[0], for_query=True) end = self.backend._from_python(prepared_value[1], for_query=True) query_frag = u'["%s" TO "%s"]' % (start, end) elif filter_type == 'exact': if value.input_type_name == 'exact': query_frag = prepared_value else: prepared_value = Exact(prepared_value).prepare(self) query_frag = filter_types[filter_type] % prepared_value else: if value.input_type_name != 'exact': prepared_value = Exact(prepared_value).prepare(self) query_frag = filter_types[filter_type] % prepared_value if len(query_frag) and not isinstance(value, Raw): if not query_frag.startswith('(') and not query_frag.endswith(')'): query_frag = "(%s)" % query_frag return u"%s%s" % (index_fieldname, query_frag)
def index(request): db = request.GET.get("db", "all") search = request.GET.get("search", "").strip() selected = { x: Exact(request.GET[x]) for x in ["authors", "affiliations", "taxon"] if x in request.GET } if search: sqs = SearchQuerySet().filter(content=search, **selected).facet("type") else: sqs = SearchQuerySet().filter(**selected).facet("type") search = "*" params = dict(request.GET) params["q"] = [search] for ft in ["authors", "affiliations", "taxon"]: if ft not in selected: sqs = sqs.facet(ft, limit=5) facets = sqs.facet_counts() if "fields" in facets: rdata = defaultdict(lambda: 0, {k: v for k, v in facets["fields"]["type"]}) else: rdata = defaultdict(lambda: 0) count = 0 for r in resources: r["count"] = rdata[str(r["type"])] count += rdata[r["type"]] suggestions = [] if count == 0: suggestions = SearchQuerySet().auto_query(search).spelling_suggestion() if suggestions: suggestions = [ x.strip() for x in suggestions.replace("(", " ").split(")") if x.strip() ] if "fields" in facets: del facets["fields"]["type"] else: facets["fields"] = {} return render( request, 'index.html', { "stats": resources, "search": search if search != "*" else "", "selected": selected, "db": db, "suggestions": suggestions, "querystring": params, "sidebarleft": facets["fields"], "sidebarrigth": { "news": [{ "title": "n1", "text": "lalala" }] } })
def build_query_fragment(self, field, filter_type, value): from haystack import connections query_frag = '' if not hasattr(value, 'input_type_name'): # Handle when we've got a ``ValuesListQuerySet``... if hasattr(value, 'values_list'): value = list(value) if filter_type in ["regex", "iregex"]: value = RegExp(value) elif isinstance(value, six.string_types): # It's not an ``InputType``. Assume ``Clean``. value = Clean(value) else: value = PythonData(value) # Prepare the query using the InputType. prepared_value = value.prepare(self) if not isinstance(prepared_value, (set, list, tuple)): # Then convert whatever we get back to what pysolr wants if needed. prepared_value = self.backend.conn._from_python(prepared_value) # 'content' is a special reserved word, much like 'pk' in # Django's ORM layer. It indicates 'no special field'. if field == 'content': index_fieldname = '' else: index_fieldname = \ u'%s:' % connections[self._using].\ get_unified_index().get_index_fieldname(field) filter_types = { 'content': u'%s', 'contains': u'*%s*', 'endswith': u'*%s', 'startswith': u'%s*', 'exact': u'%s', 'gt': u'{%s TO *}', 'gte': u'[%s TO *]', 'lt': u'{* TO %s}', 'lte': u'[* TO %s]', 'fuzzy': u'%s~', 'regex': u'/%s/', 'iregex': u'/%s/', } if value.post_process is False: query_frag = prepared_value else: if filter_type in \ ['content', 'contains', 'startswith', 'endswith', 'fuzzy', 'regex', 'iregex']: if value.input_type_name == 'exact': query_frag = prepared_value else: # Iterate over terms & incorportate the converted # form of each into the query. terms = [] for possible_value in prepared_value.split(' '): terms.append( filter_types[filter_type] % (self.backend.conn._from_python(possible_value) if filter_type not in ['regex', 'iregex'] else possible_value)) if len(terms) == 1: query_frag = terms[0] else: query_frag = u"(%s)" % " AND ".join(terms) elif filter_type == 'in': in_options = [] if not prepared_value: query_frag = u'(!*:*)' else: for possible_value in prepared_value: in_options.append( u'"%s"' % self.backend.conn._from_python(possible_value)) query_frag = u"(%s)" % " OR ".join(in_options) elif filter_type == 'range': start = self.backend.conn._from_python(prepared_value[0]) end = self.backend.conn._from_python(prepared_value[1]) query_frag = u'["%s" TO "%s"]' % (start, end) elif filter_type == 'exact': if value.input_type_name == 'exact': query_frag = prepared_value else: prepared_value = Exact(prepared_value).prepare(self) query_frag = filter_types[filter_type] % prepared_value else: if value.input_type_name != 'exact': prepared_value = Exact(prepared_value).prepare(self) query_frag = filter_types[filter_type] % prepared_value if len(query_frag) and not isinstance(value, Raw) and \ filter_type not in ['regex', 'iregex']: if not query_frag.startswith('(') and not query_frag.endswith(')'): query_frag = "(%s)" % query_frag return u"%s%s" % (index_fieldname, query_frag)
def get_credit_apps_owned_by_user(user): qs = SearchQuerySet()\ .models(USCreditApp, USJointCreditApp, CACreditApp, CAJointCreditApp)\ .filter(user_id=Exact(user.pk)) return qs.all()
def code_search(request): # 代码搜索 all_time = time.time() query = request.GET['code1'] # query_language = request.GET['select_language'] raw_query = query query_highlight_token = highlight_words(raw_query) all_posts = [] all_posts_id = [] all_posts_filtered = [] if query != "": print('Query Code: ', query) lex_token = tokenize(raw_query, "lex").strip() char_token = tokenize(raw_query, "char").strip() print('Lex Tokens: ', lex_token) print('Char Tokens: ', char_token) time_1 = time.time() if char_token == "": char_search_result = [] else: char_search_result = SearchQuerySet().using('problemcode').filter( content=Exact(char_token)) # char_search_result = SearchQuerySet().using('problemcode').all() # for query_item in char_token.split(): # char_search_result = char_search_result.filter_or(content=query_item) # char_search_result = list(char_search_result) for item in char_search_result[:2000]: id = item.id all_posts.append(item) all_posts_id.append(id) char_search_num = len(char_search_result) print(all_posts_id[:10]) print('char search result num: ', char_search_num) if lex_token == "": lex_search_result = [] else: lex_search_result = SearchQuerySet().using('problemcode').filter( content=Exact(lex_token)) # lex_search_result = SearchQuerySet().using('problemcode').all() # for query_item in lex_token.split(): # lex_search_result = lex_search_result.filter_or(content=query_item) # lex_search_result = list(lex_search_result) for item in lex_search_result[:2000]: id = item.id if id not in all_posts_id: all_posts.append(item) all_posts_id.append(id) lex_search_num = len(lex_search_result) print(all_posts_id[:10]) print('lex search result num: ', lex_search_num) print('time_1: ', time.time() - time_1) time_2 = time.time() if len(query_highlight_token.split()) == 1: # 如果查询代码太短(只包含一个word), 则直接跳过语法分析搜索过程 posts_python_ast = [] python_search_num = 0 posts_cpp_ast = [] cpp_search_num = 0 print('Too Short To AST Analysis.') else: # 进行python语法分析, 并根据语法结果找到相似代码, 追加在lex_posts和char_posts后面: try: query_ast = convert_python(raw_query) print('Python Query AST: ', query_ast) query_ast = query_ast.split() if len(query_ast) == 1: posts_python_ast = [] else: posts_python_ast = SearchQuerySet().using( 'problemcode').all() for query_item in query_ast: posts_python_ast = posts_python_ast.filter_or( content=query_item) python_search_num = len(posts_python_ast) print('python search result num: ', python_search_num) # posts_ast = posts_ast[:1000] # posts = posts | posts_python_ast # char_lex_id = set(char_result_id + lex_result_id) # posts += [post for post in posts_ast if post.id not in char_lex_id] # python_ast_id = [int(post.id) for post in posts_ast] except: posts_python_ast = [] python_search_num = 0 print('Python AST Analysis Failed.') # 进行C++语法分析, 并根据语法结果找到相似代码, 追加在lex_posts和char_posts后面: try: query_ast = convert_cpp(cpp_head_remove(raw_query)) print('C++ Query AST: ', query_ast) query_ast = query_ast.split() if len(query_ast) == 1: posts_cpp_ast = [] else: posts_cpp_ast = SearchQuerySet().using('problemcode').all() for query_item in query_ast: posts_cpp_ast = posts_cpp_ast.filter_or( content=query_item) cpp_search_num = len(posts_cpp_ast) print('c++ search result num: ', cpp_search_num) # posts_ast = posts_ast[:1000] # posts = posts | posts_cpp_ast # 已经存在的post_id, 即char, lex, python的并集: # char_lex_python_id = set(char_result_id + lex_result_id + python_ast_id) # posts += [post for post in posts_ast if post.id not in char_lex_python_id] except: posts_cpp_ast = [] cpp_search_num = 0 print('C++ AST Analysis Failed.') for item in posts_python_ast[:2000]: id = item.id if id not in all_posts_id: all_posts.append(item) all_posts_id.append(id) for item in posts_cpp_ast[:2000]: id = item.id if id not in all_posts_id: all_posts.append(item) all_posts_id.append(id) all_posts_num = len(all_posts) print('All Posts Num: ', all_posts_num) print('time_2: ', time.time() - time_2) time_3 = time.time() for item in all_posts: if Problem.objects.filter(id=item.problem).exists(): item.code = cpp_head_convert(item.code) all_posts_filtered.append(item) print('time_3: ', time.time() - time_3) result_num = len(all_posts_filtered) print('Returned Posts Count: ', result_num) print('all_time: ', time.time() - all_time) return render( request, 'search/code_search_result.html', { 'posts': all_posts_filtered, 'raw_query': raw_query, 'query_token': query_highlight_token, 'result_num': result_num })