def test_one_case(self, input_query, expected):
    actual = transform_search(input_query)
    message = u""
    message += "\n    input: "
    message += input_query
    message += "\n expected: "
    message += expected
    message += "\n      got: "
    message += unicode(actual)
    self.assertEqual(expected, actual, message)
    logging.info("Correct: %s -> %s", input_query, expected)
Example #2
0
def test_one_case(self, input_query, expected):
    actual = transform_search(input_query)
    message = u""
    message += "\n    input: "
    message += input_query
    message += "\n expected: "
    message += expected
    message += "\n      got: "
    message += unicode(actual)
    self.assertEqual(expected, actual, message)
    logger.info("Correct: %s -> %s", input_query, expected)
Example #3
0
def prepare_sqs_from_search_params(params, sqs=None):
    # We usually search description twice, so we need a higher boost on
    # title to overcome that.
    boost_value = 10
    title = params.get('q')
    location = params.get('location')
    moc = params.get('moc')
    moc_id = params.get('moc_id')
    company = params.get('company')
    exact_title = bool(params.get('exact_title'))
    if sqs is None:
        sqs = DESearchQuerySet()

    # The Haystack API does not allow for boosting terms in individual
    # fields. In this case we want to boost the term represented by
    # the variable 'title' ONLY when it appears in the `title` field in
    # the search index.
    #
    # To get around this I instead construct the string according to the
    # format specified for boosting a term in a specific field by the
    # Solr documentation:
    #   'q=title:(Accountant)^2'
    # By using parens instead of quotes, Solr can parse more complex title
    # searches.
    #
    # I then pass that string to an SQ object and proceed as normal.
    # This allows us to ensure that titles that match a query exactly
    # will appear higher in the results list than results that simply
    # have the query term in the text of the document.
    cleaned_params = dict([(val, _clean(val)) for val in
                           [title, location, moc, moc_id, company] if val])
    q_val = cleaned_params.get(title)
    moc_val = cleaned_params.get(moc)
    moc_id_val = cleaned_params.get(moc_id)
    loc_val = cleaned_params.get(location)

    # If 'q' has a value in the querystring, filter our results by it in
    # two places: 1. In the `text` field (full document) 2. In the `title`
    # field, after it has been boosted by a factor of 0.5. We want to make
    # sure that someone searching for a title like "engineer" sees jobs
    # that match on job title first, before results that "only" match on
    # random words in the full text of the document.
    if q_val:
        # Escape dashes surrounded by spaces, since they probably aren't
        # intended as negation.
        # Retail -Sales will search for Retail excluding Sales
        # Retail - Sales will search for 'Retail - Sales'
        title = "(%s)" % transform_search(title.replace(' - ', ' \\- '))
        tb = u"({t})^{b}".format(t=title, b=boost_value)

        if exact_title:
            sqs = sqs.filter(title_exact__exact=title)
        else:
            # We have to query on description here so that highlighting
            # matches the exact term and not a stem.
            sqs = sqs.filter(SQ(content=Raw("((%s))^1" % title)) |
                             SQ(title=Raw(tb)) |
                             SQ(description=Raw(title))).highlight()

    # If there is a value in the `location` parameter, add filters for it
    # in each location-y field in the index. If the `exact` parameter is
    # `true` in the querystring, search locations for EXACT matches only;
    # the rationale being that if a user clicks on "San Diego" he probably
    # doesn't give a crap about "San Francisco" or "San Jose" results.
    if loc_val:
        sqs = sqs.filter(full_loc=loc_val)

    if company:
        sqs = sqs.filter(company_exact__exact=company)

    if moc_val:
        # Before we can search for MOC, we have to find out if the SeoSite
        # has specified any custom MOC-Onet mappings. If they do, we'll search
        # on the jobs mapped_moc* fields
        prefix = 'mapped_' if settings.SITE_BUIDS else ''

        if moc_id_val:
            moc_filt = SQ(**{'%smocid' % prefix: moc_id_val})
        else:
            moc_filt = SQ(SQ(**{'%smoc' % prefix: moc_val}) |
                          SQ(**{'%smoc_slab' % prefix: moc_val}))
        sqs = sqs.filter(moc_filt)

    return sqs.highlight()
Example #4
0
def prepare_sqs_from_search_params(params, sqs=None):
    # We usually search description twice, so we need a higher boost on
    # title to overcome that.
    boost_value = 10
    title = params.get('q')
    location = params.get('location')
    moc = params.get('moc')
    moc_id = params.get('moc_id')
    company = params.get('company')
    exact_title = bool(params.get('exact_title'))
    if sqs is None:
        sqs = DESearchQuerySet()

    # The Haystack API does not allow for boosting terms in individual
    # fields. In this case we want to boost the term represented by
    # the variable 'title' ONLY when it appears in the `title` field in
    # the search index.
    #
    # To get around this I instead construct the string according to the
    # format specified for boosting a term in a specific field by the
    # Solr documentation:
    #   'q=title:(Accountant)^2'
    # By using parens instead of quotes, Solr can parse more complex title
    # searches.
    #
    # I then pass that string to an SQ object and proceed as normal.
    # This allows us to ensure that titles that match a query exactly
    # will appear higher in the results list than results that simply
    # have the query term in the text of the document.
    cleaned_params = dict([(val, _clean(val))
                           for val in [title, location, moc, moc_id, company]
                           if val])
    q_val = cleaned_params.get(title)
    moc_val = cleaned_params.get(moc)
    moc_id_val = cleaned_params.get(moc_id)
    loc_val = cleaned_params.get(location)

    # If 'q' has a value in the querystring, filter our results by it in
    # two places: 1. In the `text` field (full document) 2. In the `title`
    # field, after it has been boosted by a factor of 0.5. We want to make
    # sure that someone searching for a title like "engineer" sees jobs
    # that match on job title first, before results that "only" match on
    # random words in the full text of the document.
    if q_val:
        # Escape dashes surrounded by spaces, since they probably aren't
        # intended as negation.
        # Retail -Sales will search for Retail excluding Sales
        # Retail - Sales will search for 'Retail - Sales'
        title = "(%s)" % transform_search(title.replace(' - ', ' \\- '))
        tb = u"({t})^{b}".format(t=title, b=boost_value)

        if exact_title:
            sqs = sqs.filter(title_exact__exact=title)
        else:
            # We have to query on description here so that highlighting
            # matches the exact term and not a stem.
            sqs = sqs.filter(
                SQ(content=Raw("((%s))^1" % title)) | SQ(title=Raw(tb))
                | SQ(description=Raw(title))).highlight()

    # If there is a value in the `location` parameter, add filters for it
    # in each location-y field in the index. If the `exact` parameter is
    # `true` in the querystring, search locations for EXACT matches only;
    # the rationale being that if a user clicks on "San Diego" he probably
    # doesn't give a crap about "San Francisco" or "San Jose" results.
    if loc_val:
        sqs = sqs.filter(full_loc=loc_val)

    if company:
        sqs = sqs.filter(company_exact__exact=company)

    if moc_val:
        # Before we can search for MOC, we have to find out if the SeoSite
        # has specified any custom MOC-Onet mappings. If they do, we'll search
        # on the jobs mapped_moc* fields
        prefix = 'mapped_' if settings.SITE_BUIDS else ''

        if moc_id_val:
            moc_filt = SQ(**{'%smocid' % prefix: moc_id_val})
        else:
            moc_filt = SQ(
                SQ(**{'%smoc' % prefix: moc_val})
                | SQ(**{'%smoc_slab' % prefix: moc_val}))
        sqs = sqs.filter(moc_filt)

    return sqs.highlight()