Example #1
0
    def build_query_fragment(self, field, filter_type, value):
        from haystack import connections
        query_frag = ''
        is_datetime = False

        if not hasattr(value, 'input_type_name'):
            # Handle when we've got a ``ValuesListQuerySet``...
            if hasattr(value, 'values_list'):
                value = list(value)

            if hasattr(value, 'strftime'):
                is_datetime = True

            if isinstance(value, six.string_types) and value != ' ':
                # It's not an ``InputType``. Assume ``Clean``.
                value = Clean(value)
            else:
                value = PythonData(value)

        # Prepare the query using the InputType.
        prepared_value = value.prepare(self)

        if not isinstance(prepared_value, (set, list, tuple)):
            # Then convert whatever we get back to what pysolr wants if needed.
            prepared_value = self.backend._from_python(prepared_value)

        # 'content' is a special reserved word, much like 'pk' in
        # Django's ORM layer. It indicates 'no special field'.
        if field == 'content':
            index_fieldname = ''
        else:
            index_fieldname = u'%s:' % connections[
                self._using].get_unified_index().get_index_fieldname(field)

        filter_types = {
            'content': '%s',
            'contains': '*%s*',
            'endswith': "*%s",
            'startswith': "%s*",
            'exact': '%s',
            'gt': "{%s to}",
            'gte': "[%s to]",
            'lt': "{to %s}",
            'lte': "[to %s]",
            'fuzzy': u'%s~',
        }

        if value.post_process is False:
            query_frag = prepared_value
        else:
            if filter_type in [
                    'content', 'contains', 'startswith', 'endswith', 'fuzzy'
            ]:
                if value.input_type_name == 'exact':
                    query_frag = prepared_value
                else:
                    # Iterate over terms & incorportate the converted form of each into the query.
                    terms = []

                    if isinstance(prepared_value, six.string_types):
                        possible_values = prepared_value.split(' ')
                    else:
                        if is_datetime is True:
                            prepared_value = self._convert_datetime(
                                prepared_value)

                        possible_values = [prepared_value]

                    for possible_value in possible_values:
                        terms.append(filter_types[filter_type] %
                                     self.backend._from_python(possible_value))

                    if len(terms) == 1:
                        query_frag = terms[0]
                    else:
                        query_frag = u"(%s)" % " AND ".join(terms)
            elif filter_type == 'in':
                in_options = []

                for possible_value in prepared_value:
                    is_datetime = False

                    if hasattr(possible_value, 'strftime'):
                        is_datetime = True

                    pv = self.backend._from_python(possible_value)

                    if is_datetime is True:
                        pv = self._convert_datetime(pv)

                    if isinstance(pv, six.string_types) and not is_datetime:
                        in_options.append('"%s"' % pv)
                    else:
                        in_options.append('%s' % pv)

                query_frag = "(%s)" % " OR ".join(in_options)
            elif filter_type == 'range':
                start = self.backend._from_python(prepared_value[0])
                end = self.backend._from_python(prepared_value[1])

                if hasattr(prepared_value[0], 'strftime'):
                    start = self._convert_datetime(start)

                if hasattr(prepared_value[1], 'strftime'):
                    end = self._convert_datetime(end)

                query_frag = u"[%s to %s]" % (start, end)
            elif filter_type == 'exact':
                if value.input_type_name == 'exact':
                    query_frag = prepared_value
                else:
                    prepared_value = Exact(prepared_value).prepare(self)
                    query_frag = filter_types[filter_type] % prepared_value
            else:
                if is_datetime is True:
                    prepared_value = self._convert_datetime(prepared_value)

                query_frag = filter_types[filter_type] % prepared_value

        if len(query_frag) and not isinstance(value, Raw):
            if not query_frag.startswith('(') and not query_frag.endswith(')'):
                query_frag = "(%s)" % query_frag

        return u"%s%s" % (index_fieldname, query_frag)
Example #2
0
    def get(self, request, *args, **kwargs):
        """
        Primary endpoint for retrieving resources via the index
        Values should never be empty string or python None, instead return string "None" with str() call
        "availability": list value, js will parse JSON as Array
        "availabilityurl": single value, pass a string to REST client
        "type": single value, pass a string to REST client
        "author": single value, pass a string to REST client first author
        "creator: authors,
                The reason for the weird name is the DataOne standard. The metadata was designed to be compliant
                with DataOne standards. These standards do not contain an author field. Instead, the creator field
                represents authors.
        "contributor": list value, js will parse JSON as Array
        "owner": list value, js will parse JSON as Array
        "subject": list value, js will parse JSON as Array
        "coverage_type": list point, period, ...
        """
        start = time.time()

        sqs = SearchQuerySet().all()

        asc = '-1'
        if request.GET.get('asc'):
            asc = request.GET.get('asc')

        sort = 'modified'
        if request.GET.get('sort'):
            sort = request.GET.get('sort')
        sort = sort if asc == '1' else '-{}'.format(sort)

        if request.GET.get('q'):
            q = request.GET.get('q')
            sqs = sqs.filter(content=q)

        try:
            qs = request.query_params
            filters = json.loads(qs.get('filter'))
            # filter values expect lists, for example discoverapi/?filter={"owner":["Firstname Lastname"]}
            if filters.get('author'):
                for k, authortype in enumerate(filters['author']):
                    if k == 0 or k == len(filters['author']):
                        sqs = sqs.filter(author_exact=Exact(authortype))
                    else:
                        sqs = sqs.filter_or(author_exact=Exact(authortype))
            if filters.get('owner'):
                for k, ownertype in enumerate(filters['owner']):
                    if k == 0 or k == len(filters['owner']):
                        sqs = sqs.filter(owner_exact=Exact(ownertype))
                    else:
                        sqs = sqs.filter_or(owner_exact=Exact(ownertype))
            if filters.get('subject'):
                for k, subjtype in enumerate(filters['subject']):
                    if k == 0 or k == len(subjtype):
                        sqs = sqs.filter(subject_exact=Exact(subjtype))
                    else:
                        sqs = sqs.filter_or(subject_exact=Exact(subjtype))
            if filters.get('contributor'):
                for k, contribtype in enumerate(filters['contributor']):
                    if k == 0 or k == len(filters['contributor']):
                        sqs = sqs.filter(contributor_exact=Exact(contribtype))
                    else:
                        sqs = sqs.filter_or(
                            contributor_exact=Exact(contribtype))
            if filters.get('type'):
                for k, restype in enumerate(filters['type']):
                    if k == 0 or k == len(filters['type']):
                        sqs = sqs.filter(content_type_exact=Exact(restype))
                    else:
                        sqs = sqs.filter_or(content_type_exact=Exact(restype))
            if filters.get('availability'):
                for k, availtype in enumerate(filters['availability']):
                    if k == 0 or k == len(filters['availability']):
                        sqs = sqs.filter(availability_exact=Exact(availtype))
                    else:
                        sqs = sqs.filter_or(
                            availability_exact=Exact(availtype))
            if filters.get('geofilter'):
                sqs = sqs.filter(
                    north__range=[-90,
                                  90])  # return resources with geographic data
            if filters.get('date'):
                try:
                    datefilter = DateRange(start=datetime.datetime.strptime(
                        filters['date'][0], '%Y-%m-%d'),
                                           end=datetime.datetime.strptime(
                                               filters['date'][1], '%Y-%m-%d'))

                    # restrict to entries with dates
                    sqs = sqs.filter(start_date__gt=datetime.datetime.strptime('1900-01-01', '%Y-%m-%d'))\
                        .filter(end_date__lte=datetime.datetime.strptime(datetime.date.today().isoformat(), '%Y-%m-%d'))

                    # filter out entries that don't fall in specified range
                    sqs = sqs.exclude(start_date__gt=datefilter.end).exclude(
                        end_date__lt=datefilter.start)

                except ValueError as date_ex:
                    return JsonResponse(
                        {
                            'message':
                            'Filter date parsing error expecting String %Y-%m-%d : {}'
                            .format(str(date_ex)),
                            'received':
                            request.query_params
                        },
                        status=400)
                except Exception as gen_date_ex:
                    return JsonResponse(
                        {
                            'message':
                            'Filter date parsing error expecting two date string values : {}'
                            .format(str(gen_date_ex)),
                            'received':
                            request.query_params
                        },
                        status=400)
        except TypeError as type_ex:
            pass  # no filters passed "the JSON object must be str, bytes or bytearray not NoneType"

        except json.JSONDecodeError as parse_ex:
            return JsonResponse(
                {
                    'message':
                    'Filter JSON parsing error - {}'.format(str(parse_ex)),
                    'received':
                    request.query_params
                },
                status=400)

        except Exception as gen_ex:
            logger.warning('hs_discover API - {}: {}'.format(
                type(gen_ex), str(gen_ex)))
            return JsonResponse(
                {
                    'message':
                    '{}'.format(
                        '{}: query error. Contact a server administrator.'.
                        format(type(gen_ex)))
                },
                status=520)

        filterdata = []
        if request.GET.get('filterbuilder'):
            authors = sqs.facet('author').facet_counts()['fields']['author']
            owners = sqs.facet('owner').facet_counts()['fields']['owner']
            subjects = sqs.facet('subject').facet_counts()['fields']['subject']
            contributors = sqs.facet(
                'contributor').facet_counts()['fields']['contributor']
            types = sqs.facet(
                'content_type').facet_counts()['fields']['content_type']
            availability = sqs.facet(
                'availability').facet_counts()['fields']['availability']
            if request.GET.get('updatefilters'):
                authors = [x for x in authors if x[1] > 0]
                owners = [x for x in owners if x[1] > 0]
                subjects = [x for x in subjects if x[1] > 0]
                contributors = [x for x in contributors if x[1] > 0]
                types = [x for x in types if x[1] > 0]
                availability = [x for x in availability if x[1] > 0]
            filterdata = [
                authors[:self.filterlimit], owners[:self.filterlimit],
                subjects[:self.filterlimit], contributors[:self.filterlimit],
                types[:self.filterlimit], availability[:self.filterlimit]
            ]

        if sort == 'author':
            sqs = sqs.order_by('author_exact')
        elif sort == '-author':
            sqs = sqs.order_by('-author_exact')
        else:
            sqs = sqs.order_by(sort)

        resources = []

        # TODO future release will add title and facilitate order_by title_exact
        # convert sqs to list after facet operations to allow for Python sorting instead of Haystack order_by
        if sort == 'title':
            sqs = sorted(sqs, key=lambda idx: idx.title.lower())
        elif sort == '-title':
            sqs = sorted(sqs, key=lambda idx: idx.title.lower(), reverse=True)

        p = Paginator(sqs, self.perpage)

        if request.GET.get('pnum'):
            pnum = request.GET.get('pnum')
            pnum = int(pnum)
            pnum = min(pnum, p.num_pages)
            if pnum < 1:
                return JsonResponse(
                    {
                        'resources': json.dumps([]),
                        'geodata': json.dumps([]),
                        'rescount': 0,
                        'pagecount': 1,
                        'perpage': self.perpage
                    },
                    status=200)
        else:
            pnum = 1  # page number not specified, implies page 1
            pnum = min(pnum, p.num_pages)

        geodata = []

        for result in p.page(pnum):
            contributor = 'None'  # contributor is actually a list and can have multiple values
            owner = 'None'  # owner is actually a list and can have multiple values
            author_link = None  # Send None to avoid anchor render
            creator = 'None'
            author = 'None'

            if result.creator:
                creator = result.creator

            authors = creator  # there is no concept of authors in DataOne standard
            # authors might be string 'None' here

            if result.author:
                author_link = result.author_url
                author = str(result.author)
                if authors == 'None':
                    authors = author  # author would override creator in
            else:
                if result.organization:
                    if isinstance(result.organization, list):
                        author = str(result.organization[0])
                    else:
                        author = str(result.organization)

                    author = author.replace('"', '')
                    author = author.replace('[', '')
                    author = author.replace(']', '').strip()

                    if authors == 'None':
                        authors = author

            if result.contributor is not None:
                try:
                    contributor = result.contributor
                except:
                    pass

            if result.owner is not None:
                try:
                    owner = result.owner
                except:
                    pass
            pt = ''  # pass empty string for the frontend to ensure the attribute exists but can be evaluated for empty
            try:
                if 'box' in result.coverage_type:
                    pt = {
                        'short_id': result.short_id,
                        'title': result.title,
                        'coverage_type': 'box'
                    }
                elif 'point' in result.coverage_type:
                    pt = {
                        'short_id': result.short_id,
                        'title': result.title,
                        'coverage_type': 'point'
                    }

                if isinstance(result.north, (int, float)):
                    pt['north'] = result.north
                if isinstance(result.east, (int, float)):
                    pt['east'] = result.east
                if isinstance(result.northlimit, (int, float)):
                    pt['northlimit'] = result.northlimit
                if isinstance(result.southlimit, (int, float)):
                    pt['southlimit'] = result.southlimit
                if isinstance(result.eastlimit, (int, float)):
                    pt['eastlimit'] = result.eastlimit
                if isinstance(result.westlimit, (int, float)):
                    pt['westlimit'] = result.westlimit

                geodata.append(pt)
            except:
                pass  # HydroShare production contains dirty data, this handling is in place, until data cleaned
            resources.append({
                "title":
                result.title,
                "link":
                result.absolute_url,
                "availability":
                result.availability,
                "availabilityurl":
                "/static/img/{}.png".format(result.availability[0]),
                "type":
                result.resource_type_exact,
                "author":
                author,
                "authors":
                authors,
                "contributor":
                contributor,
                "author_link":
                author_link,
                "owner":
                owner,
                "abstract":
                result.abstract,
                "subject":
                result.subject,
                "created":
                result.created.isoformat(),
                "modified":
                result.modified.isoformat(),
                "short_id":
                result.short_id,
                "geo":
                pt
            })

        return JsonResponse(
            {
                'resources': json.dumps(resources),
                'geodata': json.dumps(geodata),
                'rescount': p.count,
                'pagecount': p.num_pages,
                'perpage': self.perpage,
                'filterdata': json.dumps(filterdata),
                'time': (time.time() - start) / 1000
            },
            status=200)
Example #3
0
 def build_name_query(self, term):
     SQ = self.view.query_object
     filter = super(CredNameFilterBuilder, self).build_name_query(term)
     if term and ' ' not in term:
         filter = filter | (SQ(source_id=Exact(term)) & SQ(name=Raw('*')))
     return filter
Example #4
0
def search_results(request):
    query_str = escape(request.GET.get('q', '')).strip()
    year = escape(request.GET.get('year', '')).strip()
    if not year:
        year = str(Year.objects.latest().hmda_year)

    lender_id = False
    respondent_id = False
    for regex in LENDER_REGEXES:
        match = regex.match(query_str)
        if match:
            lender_id = year + match.group('agency') + match.group('respondent')
    resp_only_match = RESP_RE.match(query_str)
    if resp_only_match:
        respondent_id = resp_only_match.group('respondent')

    query = SearchQuerySet().models(Institution).load_all() # snl temporary

    current_sort = request.GET.get('sort')
    if current_sort == None:
        current_sort = '-assets'

    query = SearchQuerySet().models(Institution).load_all().order_by(current_sort)

    if lender_id:
        query = query.filter(lender_id=Exact(lender_id),year=year)
    elif respondent_id:
        query = query.filter(respondent_id=Exact(respondent_id),year=year)
    elif query_str and escape(request.GET.get('auto')): # snl temporary: escape creates a bug where None = True
        query = query.filter(text_auto=AutoQuery(query_str),year=year)
    elif query_str:
        query = query.filter(content=AutoQuery(query_str), year=year)
    else:
        query = []

    # number of results per page
    try:
        num_results = int(request.GET.get('num_results', '25'))
    except ValueError:
        num_results = 25

    # page number
    try:
        page = int(request.GET.get('page', '1'))
    except ValueError:
        page = 1

    # start and end results
    if page > 1:
        start_results = num_results * page - num_results
        end_results = num_results * page
    else:
        start_results = 0
        end_results = num_results

    sort = current_sort

    total_results = len(query)

    # total number of pages
    if total_results <= num_results:
        total_pages = 1
    else:
        total_pages = int( math.ceil( float(total_results) / float(num_results) ) )

    query = query[start_results:end_results]

    # next page
    if total_results < num_results or page is total_pages:
        next_page = 0
        end_results = total_results
    else:
        next_page = page + 1

    # previous page
    prev_page = page - 1

    results = []
    for result in query:
        result.object.num_loans = result.num_loans
        results.append(result.object)
    if request.accepted_renderer.format != 'html':
        results = InstitutionSerializer(results, many=True).data

    # to adjust for template
    start_results = start_results + 1

    return Response(
        {'institutions': results, 'query_str': query_str,
         'num_results': num_results, 'start_results': start_results,
         'end_results': end_results, 'sort': sort,
         'page_num': page, 'total_results': total_results,
         'next_page': next_page, 'prev_page': prev_page,
         'total_pages': total_pages, 'current_sort': current_sort,
         'year': year},
        template_name='respondents/search_results.html')
Example #5
0
 def test_correct_exact(self):
     self.sq.add_filter(SQ(content=Exact('hello world')))
     self.assertEqual(self.sq.build_query(), '("hello world")')
Example #6
0
    def build_query_fragment(self, field, filter_type, value):
        from haystack import connections

        query_frag = ""

        if not hasattr(value, "input_type_name"):
            # Handle when we've got a ``ValuesListQuerySet``...
            if hasattr(value, "values_list"):
                value = list(value)

            if isinstance(value, six.string_types):
                # It's not an ``InputType``. Assume ``Clean``.
                value = Clean(value)
            else:
                value = PythonData(value)

        # Prepare the query using the InputType.
        prepared_value = value.prepare(self)

        if not isinstance(prepared_value, (set, list, tuple)):
            # Then convert whatever we get back to what pysolr wants if needed.
            prepared_value = self.backend.conn._from_python(prepared_value)

        # 'content' is a special reserved word, much like 'pk' in
        # Django's ORM layer. It indicates 'no special field'.
        if field == "content":
            index_fieldname = ""
        else:
            index_fieldname = "%s:" % connections[
                self._using
            ].get_unified_index().get_index_fieldname(field)

        filter_types = {
            "content": "%s",
            "contains": "*%s*",
            "endswith": "*%s",
            "startswith": "%s*",
            "exact": "%s",
            "gt": "{%s TO *}",
            "gte": "[%s TO *]",
            "lt": "{* TO %s}",
            "lte": "[* TO %s]",
            "fuzzy": "%s~",
        }

        if value.post_process is False:
            query_frag = prepared_value
        else:
            if filter_type in [
                "content",
                "contains",
                "startswith",
                "endswith",
                "fuzzy",
            ]:
                if value.input_type_name == "exact":
                    query_frag = prepared_value
                else:
                    # Iterate over terms & incorportate the converted form of each into the query.
                    terms = []

                    for possible_value in prepared_value.split(" "):
                        terms.append(
                            filter_types[filter_type]
                            % self.backend.conn._from_python(possible_value)
                        )

                    if len(terms) == 1:
                        query_frag = terms[0]
                    else:
                        query_frag = "(%s)" % " AND ".join(terms)
            elif filter_type == "in":
                in_options = []

                if not prepared_value:
                    query_frag = "(!*:*)"
                else:
                    for possible_value in prepared_value:
                        in_options.append(
                            '"%s"' % self.backend.conn._from_python(possible_value)
                        )

                    query_frag = "(%s)" % " OR ".join(in_options)
            elif filter_type == "range":
                start = self.backend.conn._from_python(prepared_value[0])
                end = self.backend.conn._from_python(prepared_value[1])
                query_frag = '["%s" TO "%s"]' % (start, end)
            elif filter_type == "exact":
                if value.input_type_name == "exact":
                    query_frag = prepared_value
                else:
                    prepared_value = Exact(prepared_value).prepare(self)
                    query_frag = filter_types[filter_type] % prepared_value
            else:
                if value.input_type_name != "exact":
                    prepared_value = Exact(prepared_value).prepare(self)

                query_frag = filter_types[filter_type] % prepared_value

        if len(query_frag) and not isinstance(value, Raw):
            if not query_frag.startswith("(") and not query_frag.endswith(")"):
                query_frag = "(%s)" % query_frag

        return "%s%s" % (index_fieldname, query_frag)
    def build_query_fragment(self, field, filter_type, value):
        from haystack import connections

        query_frag = ""
        is_datetime = False

        if not hasattr(value, "input_type_name"):
            # Handle when we've got a ``ValuesListQuerySet``...
            if hasattr(value, "values_list"):
                value = list(value)

            if hasattr(value, "strftime"):
                is_datetime = True

            if isinstance(value, str) and value != " ":
                # It's not an ``InputType``. Assume ``Clean``.
                value = Clean(value)
            else:
                value = PythonData(value)

        # Prepare the query using the InputType.
        prepared_value = value.prepare(self)

        if not isinstance(prepared_value, (set, list, tuple)):
            # Then convert whatever we get back to what pysolr wants if needed.
            prepared_value = self.backend._from_python(prepared_value)

        # 'content' is a special reserved word, much like 'pk' in
        # Django's ORM layer. It indicates 'no special field'.
        if field == "content":
            index_fieldname = ""
        else:
            index_fieldname = "%s:" % connections[
                self._using].get_unified_index().get_index_fieldname(field)

        filter_types = {
            "content": "%s",
            "contains": "*%s*",
            "endswith": "*%s",
            "startswith": "%s*",
            "exact": "%s",
            "gt": "{%s to}",
            "gte": "[%s to]",
            "lt": "{to %s}",
            "lte": "[to %s]",
            "fuzzy": "%s~{}/%d".format(FUZZY_WHOOSH_MAX_EDITS),
        }

        if value.post_process is False:
            query_frag = prepared_value
        else:
            if filter_type in [
                    "content",
                    "contains",
                    "startswith",
                    "endswith",
                    "fuzzy",
            ]:
                if value.input_type_name == "exact":
                    query_frag = prepared_value
                else:
                    # Iterate over terms & incorportate the converted form of each into the query.
                    terms = []

                    if isinstance(prepared_value, str):
                        possible_values = prepared_value.split(" ")
                    else:
                        if is_datetime is True:
                            prepared_value = self._convert_datetime(
                                prepared_value)

                        possible_values = [prepared_value]

                    for possible_value in possible_values:
                        possible_value_str = self.backend._from_python(
                            possible_value)
                        if filter_type == "fuzzy":
                            terms.append(filter_types[filter_type] % (
                                possible_value_str,
                                min(FUZZY_WHOOSH_MIN_PREFIX,
                                    len(possible_value_str)),
                            ))
                        else:
                            terms.append(filter_types[filter_type] %
                                         possible_value_str)

                    if len(terms) == 1:
                        query_frag = terms[0]
                    else:
                        query_frag = "(%s)" % " AND ".join(terms)
            elif filter_type == "in":
                in_options = []

                for possible_value in prepared_value:
                    is_datetime = False

                    if hasattr(possible_value, "strftime"):
                        is_datetime = True

                    pv = self.backend._from_python(possible_value)

                    if is_datetime is True:
                        pv = self._convert_datetime(pv)

                    if isinstance(pv, str) and not is_datetime:
                        in_options.append('"%s"' % pv)
                    else:
                        in_options.append("%s" % pv)

                query_frag = "(%s)" % " OR ".join(in_options)
            elif filter_type == "range":
                start = self.backend._from_python(prepared_value[0])
                end = self.backend._from_python(prepared_value[1])

                if hasattr(prepared_value[0], "strftime"):
                    start = self._convert_datetime(start)

                if hasattr(prepared_value[1], "strftime"):
                    end = self._convert_datetime(end)

                query_frag = "[%s to %s]" % (start, end)
            elif filter_type == "exact":
                if value.input_type_name == "exact":
                    query_frag = prepared_value
                else:
                    prepared_value = Exact(prepared_value).prepare(self)
                    query_frag = filter_types[filter_type] % prepared_value
            else:
                if is_datetime is True:
                    prepared_value = self._convert_datetime(prepared_value)

                query_frag = filter_types[filter_type] % prepared_value

        if len(query_frag) and not isinstance(value, Raw):
            if not query_frag.startswith("(") and not query_frag.endswith(")"):
                query_frag = "(%s)" % query_frag

        return "%s%s" % (index_fieldname, query_frag)
Example #8
0
    def build_query_fragment(self, field, filter_type, value):
        from haystack import connections
        query_frag = ''

        if not hasattr(value, 'input_type_name'):
            # Handle when we've got a ``ValuesListQuerySet``...
            if hasattr(value, 'values_list'):
                value = list(value)

            if isinstance(value, six.string_types):
                # It's not an ``InputType``. Assume ``Clean``.
                value = Clean(value)
            else:
                value = PythonData(value)

        # Prepare the query using the InputType.
        prepared_value = value.prepare(self)

        if not isinstance(prepared_value, (set, list, tuple)):
            # Then convert whatever we get back to what elasticsearch wants if needed.
            prepared_value = self.backend._from_python(prepared_value,
                                                       for_query=True)

        # 'content' is a special reserved word, much like 'pk' in
        # Django's ORM layer. It indicates 'no special field'.
        if field == 'content':
            index_fieldname = ''
        else:
            index_fieldname = u'%s:' % connections[
                self._using].get_unified_index().get_index_fieldname(field)

        filter_types = {
            'contains': u'%s',
            'startswith': u'%s*',
            'exact': u'%s',
            'gt': u'{%s TO *}',
            'gte': u'[%s TO *]',
            'lt': u'{* TO %s}',
            'lte': u'[* TO %s]',
        }

        if value.post_process is False:
            query_frag = prepared_value
        else:
            if filter_type in ['contains', 'startswith']:
                if value.input_type_name == 'exact':
                    query_frag = prepared_value
                else:
                    # Iterate over terms & incorporate the converted form of each into the query.
                    terms = []

                    if isinstance(prepared_value, six.string_types):
                        for possible_value in prepared_value.split(' '):
                            terms.append(filter_types[filter_type] %
                                         self.backend._from_python(
                                             possible_value, for_query=True))
                    else:
                        terms.append(filter_types[filter_type] %
                                     self.backend._from_python(prepared_value,
                                                               for_query=True))

                    if len(terms) == 1:
                        query_frag = terms[0]
                    else:
                        query_frag = u"(%s)" % " AND ".join(terms)
            elif filter_type == 'in':
                in_options = []
                if len(prepared_value) >= 500:
                    from elation.util.exception import log_warning
                    log_warning(msg="Found %s values in an ES IN clause" %
                                (len(prepared_value), ))
                for possible_value in prepared_value:
                    in_options.append(u'"%s"' % self.backend._from_python(
                        possible_value, for_query=True))

                query_frag = u"(%s)" % " OR ".join(in_options)
            elif filter_type == 'range':
                start = self.backend._from_python(prepared_value[0],
                                                  for_query=True)
                end = self.backend._from_python(prepared_value[1],
                                                for_query=True)
                query_frag = u'["%s" TO "%s"]' % (start, end)
            elif filter_type == 'exact':
                if value.input_type_name == 'exact':
                    query_frag = prepared_value
                else:
                    prepared_value = Exact(prepared_value).prepare(self)
                    query_frag = filter_types[filter_type] % prepared_value
            else:
                if value.input_type_name != 'exact':
                    prepared_value = Exact(prepared_value).prepare(self)

                query_frag = filter_types[filter_type] % prepared_value

        if len(query_frag) and not isinstance(value, Raw):
            if not query_frag.startswith('(') and not query_frag.endswith(')'):
                query_frag = "(%s)" % query_frag

        return u"%s%s" % (index_fieldname, query_frag)
Example #9
0
def index(request):
    db = request.GET.get("db", "all")
    search = request.GET.get("search", "").strip()

    selected = {
        x: Exact(request.GET[x])
        for x in ["authors", "affiliations", "taxon"] if x in request.GET
    }
    if search:
        sqs = SearchQuerySet().filter(content=search, **selected).facet("type")
    else:
        sqs = SearchQuerySet().filter(**selected).facet("type")
        search = "*"

    params = dict(request.GET)
    params["q"] = [search]

    for ft in ["authors", "affiliations", "taxon"]:
        if ft not in selected:
            sqs = sqs.facet(ft, limit=5)

    facets = sqs.facet_counts()
    if "fields" in facets:
        rdata = defaultdict(lambda: 0,
                            {k: v
                             for k, v in facets["fields"]["type"]})
    else:
        rdata = defaultdict(lambda: 0)
    count = 0
    for r in resources:
        r["count"] = rdata[str(r["type"])]
        count += rdata[r["type"]]

    suggestions = []
    if count == 0:
        suggestions = SearchQuerySet().auto_query(search).spelling_suggestion()
        if suggestions:
            suggestions = [
                x.strip() for x in suggestions.replace("(", " ").split(")")
                if x.strip()
            ]

    if "fields" in facets:
        del facets["fields"]["type"]
    else:
        facets["fields"] = {}

    return render(
        request, 'index.html', {
            "stats": resources,
            "search": search if search != "*" else "",
            "selected": selected,
            "db": db,
            "suggestions": suggestions,
            "querystring": params,
            "sidebarleft": facets["fields"],
            "sidebarrigth": {
                "news": [{
                    "title": "n1",
                    "text": "lalala"
                }]
            }
        })
    def build_query_fragment(self, field, filter_type, value):
        from haystack import connections
        query_frag = ''

        if not hasattr(value, 'input_type_name'):
            # Handle when we've got a ``ValuesListQuerySet``...
            if hasattr(value, 'values_list'):
                value = list(value)

            if filter_type in ["regex", "iregex"]:
                value = RegExp(value)
            elif isinstance(value, six.string_types):
                # It's not an ``InputType``. Assume ``Clean``.
                value = Clean(value)
            else:
                value = PythonData(value)

        # Prepare the query using the InputType.
        prepared_value = value.prepare(self)

        if not isinstance(prepared_value, (set, list, tuple)):
            # Then convert whatever we get back to what pysolr wants if needed.
            prepared_value = self.backend.conn._from_python(prepared_value)

        # 'content' is a special reserved word, much like 'pk' in
        # Django's ORM layer. It indicates 'no special field'.
        if field == 'content':
            index_fieldname = ''
        else:
            index_fieldname = \
                u'%s:' % connections[self._using].\
                get_unified_index().get_index_fieldname(field)

        filter_types = {
            'content': u'%s',
            'contains': u'*%s*',
            'endswith': u'*%s',
            'startswith': u'%s*',
            'exact': u'%s',
            'gt': u'{%s TO *}',
            'gte': u'[%s TO *]',
            'lt': u'{* TO %s}',
            'lte': u'[* TO %s]',
            'fuzzy': u'%s~',
            'regex': u'/%s/',
            'iregex': u'/%s/',
        }

        if value.post_process is False:
            query_frag = prepared_value
        else:
            if filter_type in \
                    ['content', 'contains', 'startswith',
                     'endswith', 'fuzzy', 'regex', 'iregex']:
                if value.input_type_name == 'exact':
                    query_frag = prepared_value
                else:
                    # Iterate over terms & incorportate the converted
                    # form of each into the query.
                    terms = []

                    for possible_value in prepared_value.split(' '):
                        terms.append(
                            filter_types[filter_type] %
                            (self.backend.conn._from_python(possible_value)
                             if filter_type not in ['regex', 'iregex'] else
                             possible_value))

                    if len(terms) == 1:
                        query_frag = terms[0]
                    else:
                        query_frag = u"(%s)" % " AND ".join(terms)
            elif filter_type == 'in':
                in_options = []

                if not prepared_value:
                    query_frag = u'(!*:*)'
                else:
                    for possible_value in prepared_value:
                        in_options.append(
                            u'"%s"' %
                            self.backend.conn._from_python(possible_value))

                    query_frag = u"(%s)" % " OR ".join(in_options)
            elif filter_type == 'range':
                start = self.backend.conn._from_python(prepared_value[0])
                end = self.backend.conn._from_python(prepared_value[1])
                query_frag = u'["%s" TO "%s"]' % (start, end)
            elif filter_type == 'exact':
                if value.input_type_name == 'exact':
                    query_frag = prepared_value
                else:
                    prepared_value = Exact(prepared_value).prepare(self)
                    query_frag = filter_types[filter_type] % prepared_value
            else:
                if value.input_type_name != 'exact':
                    prepared_value = Exact(prepared_value).prepare(self)

                query_frag = filter_types[filter_type] % prepared_value

        if len(query_frag) and not isinstance(value, Raw) and \
                filter_type not in ['regex', 'iregex']:
            if not query_frag.startswith('(') and not query_frag.endswith(')'):
                query_frag = "(%s)" % query_frag

        return u"%s%s" % (index_fieldname, query_frag)
def get_credit_apps_owned_by_user(user):
    qs = SearchQuerySet()\
        .models(USCreditApp, USJointCreditApp, CACreditApp, CAJointCreditApp)\
        .filter(user_id=Exact(user.pk))
    return qs.all()
def code_search(request):  # 代码搜索

    all_time = time.time()

    query = request.GET['code1']
    # query_language = request.GET['select_language']
    raw_query = query
    query_highlight_token = highlight_words(raw_query)

    all_posts = []
    all_posts_id = []
    all_posts_filtered = []

    if query != "":
        print('Query Code: ', query)
        lex_token = tokenize(raw_query, "lex").strip()
        char_token = tokenize(raw_query, "char").strip()
        print('Lex Tokens: ', lex_token)
        print('Char Tokens: ', char_token)
        time_1 = time.time()

        if char_token == "":
            char_search_result = []
        else:
            char_search_result = SearchQuerySet().using('problemcode').filter(
                content=Exact(char_token))
            # char_search_result = SearchQuerySet().using('problemcode').all()
            # for query_item in char_token.split():
            #     char_search_result = char_search_result.filter_or(content=query_item)
            # char_search_result = list(char_search_result)
        for item in char_search_result[:2000]:
            id = item.id
            all_posts.append(item)
            all_posts_id.append(id)
        char_search_num = len(char_search_result)
        print(all_posts_id[:10])
        print('char search result num: ', char_search_num)

        if lex_token == "":
            lex_search_result = []
        else:
            lex_search_result = SearchQuerySet().using('problemcode').filter(
                content=Exact(lex_token))
            # lex_search_result = SearchQuerySet().using('problemcode').all()
            # for query_item in lex_token.split():
            #     lex_search_result = lex_search_result.filter_or(content=query_item)
            # lex_search_result = list(lex_search_result)
        for item in lex_search_result[:2000]:
            id = item.id
            if id not in all_posts_id:
                all_posts.append(item)
                all_posts_id.append(id)
        lex_search_num = len(lex_search_result)
        print(all_posts_id[:10])
        print('lex search result num: ', lex_search_num)

        print('time_1: ', time.time() - time_1)
        time_2 = time.time()

        if len(query_highlight_token.split()) == 1:
            # 如果查询代码太短(只包含一个word), 则直接跳过语法分析搜索过程
            posts_python_ast = []
            python_search_num = 0
            posts_cpp_ast = []
            cpp_search_num = 0
            print('Too Short To AST Analysis.')
        else:
            # 进行python语法分析, 并根据语法结果找到相似代码, 追加在lex_posts和char_posts后面:
            try:
                query_ast = convert_python(raw_query)
                print('Python Query AST: ', query_ast)
                query_ast = query_ast.split()
                if len(query_ast) == 1:
                    posts_python_ast = []
                else:
                    posts_python_ast = SearchQuerySet().using(
                        'problemcode').all()
                    for query_item in query_ast:
                        posts_python_ast = posts_python_ast.filter_or(
                            content=query_item)
                python_search_num = len(posts_python_ast)
                print('python search result num: ', python_search_num)
                # posts_ast = posts_ast[:1000]
                # posts = posts | posts_python_ast
                # char_lex_id = set(char_result_id + lex_result_id)
                # posts += [post for post in posts_ast if post.id not in char_lex_id]
                # python_ast_id = [int(post.id) for post in posts_ast]
            except:
                posts_python_ast = []
                python_search_num = 0
                print('Python AST Analysis Failed.')

            # 进行C++语法分析, 并根据语法结果找到相似代码, 追加在lex_posts和char_posts后面:
            try:
                query_ast = convert_cpp(cpp_head_remove(raw_query))
                print('C++ Query AST: ', query_ast)
                query_ast = query_ast.split()
                if len(query_ast) == 1:
                    posts_cpp_ast = []
                else:
                    posts_cpp_ast = SearchQuerySet().using('problemcode').all()
                    for query_item in query_ast:
                        posts_cpp_ast = posts_cpp_ast.filter_or(
                            content=query_item)
                cpp_search_num = len(posts_cpp_ast)
                print('c++ search result num: ', cpp_search_num)
                # posts_ast = posts_ast[:1000]
                # posts = posts | posts_cpp_ast
                # 已经存在的post_id, 即char, lex, python的并集:
                # char_lex_python_id = set(char_result_id + lex_result_id + python_ast_id)
                # posts += [post for post in posts_ast if post.id not in char_lex_python_id]
            except:
                posts_cpp_ast = []
                cpp_search_num = 0
                print('C++ AST Analysis Failed.')

        for item in posts_python_ast[:2000]:
            id = item.id
            if id not in all_posts_id:
                all_posts.append(item)
                all_posts_id.append(id)
        for item in posts_cpp_ast[:2000]:
            id = item.id
            if id not in all_posts_id:
                all_posts.append(item)
                all_posts_id.append(id)

        all_posts_num = len(all_posts)
        print('All Posts Num: ', all_posts_num)
        print('time_2: ', time.time() - time_2)

        time_3 = time.time()
        for item in all_posts:
            if Problem.objects.filter(id=item.problem).exists():
                item.code = cpp_head_convert(item.code)
                all_posts_filtered.append(item)
        print('time_3: ', time.time() - time_3)

    result_num = len(all_posts_filtered)
    print('Returned Posts Count: ', result_num)

    print('all_time: ', time.time() - all_time)

    return render(
        request, 'search/code_search_result.html', {
            'posts': all_posts_filtered,
            'raw_query': raw_query,
            'query_token': query_highlight_token,
            'result_num': result_num
        })