예제 #1
0
 def testQuotingForwardSlashes(self):
     # solr 4 supports regular expressions and requires / to be escaped
     self.assertEqual(quote("/"), "\\/")
     self.assertEqual(quote("(/ OR x)"), "(\\/ OR x)")
     self.assertEqual(quote('"/'), '\\"\\/')
     self.assertEqual(quote('"/"'), '"\\/"')
     self.assertEqual(quote('"(/ OR x)"'), '"\\(\\/ OR x\\)"')
예제 #2
0
 def quoteitem(term):
     if isinstance(term, unicode):
         term = term.encode('utf-8')
     quoted = quote(term)
     if not quoted.startswith('"') and not quoted == term:
         quoted = quote('"' + term + '"')
     return quoted
예제 #3
0
 def testQuotingBoostingTerm(self):
     self.assertEqual(quote("jakarta^4 apache"), "(jakarta^4 apache)")
     self.assertEqual(quote("jakarta^0.2 apache"), "(jakarta^0.2 apache)")
     self.assertEqual(
         quote('"jakarta apache"^4 "Apache Lucene"'),
         '("jakarta apache"^4 "Apache Lucene")',
     )
예제 #4
0
파일: search.py 프로젝트: FHNW/ftw.solr
 def quoteitem(term):
     if isinstance(term, unicode):
         term = term.encode('utf-8')
     quoted = quote(term)
     if not quoted.startswith('"') and not quoted == term:
         quoted = quote('"' + term + '"')
     return quoted
예제 #5
0
 def testQuotingForwardSlashes(self):
     # solr 4 supports regular expressions and requires / to be escaped
     self.assertEqual(quote('/'), '\\/')
     self.assertEqual(quote('(/ OR x)'), '(\\/ OR x)')
     self.assertEqual(quote('"/'), '\\"\\/')
     self.assertEqual(quote('"/"'), '"\\/"')
     self.assertEqual(quote('"(/ OR x)"'), '"\\(\\/ OR x\\)"')
예제 #6
0
 def testQuotingEscapingSpecialCharacters(self):
     self.assertEqual(quote("-+!^~:"), "\\-\\+\\!\\^\\~\\:")
     # Only quote * and ? if quoted
     self.assertEqual(quote('"*?"'), '"\\*\\?"')
     # also quote multiple occurrences
     self.assertEqual(quote(":"), "\\:")
     self.assertEqual(quote(": :"), "(\\: \\:)")
     self.assertEqual(quote("foo+ bar! nul:"), "(foo\\+ bar\\! nul\\:)")
예제 #7
0
 def testQuotingEscapingSpecialCharacters(self):
     self.assertEqual(quote('-+!^~:'), '\\-\\+\\!\\^\\~\\:')
     # Only quote * and ? if quoted
     self.assertEqual(quote('"*?"'), '"\\*\\?"')
     # also quote multiple occurrences
     self.assertEqual(quote(':'), '\\:')
     self.assertEqual(quote(': :'), '(\\: \\:)')
     self.assertEqual(quote('foo+ bar! nul:'), '(foo\\+ bar\\! nul\\:)')
예제 #8
0
 def testQuotingWildcardSearches(self):
     self.assertEqual(quote("te?t"), "te?t")
     self.assertEqual(quote("test*"), "test*")
     self.assertEqual(quote("test**"), "test*")
     self.assertEqual(quote("te*t"), "te*t")
     self.assertEqual(quote("?test"), "test")
     self.assertEqual(quote("*test"), "test")
     self.assertEqual(quote("*test", prefix_wildcard=True), "*test")
     self.assertEqual(quote("?test", prefix_wildcard=True), "?test")
     self.assertEqual(quote("**test", prefix_wildcard=True), "*test")
     self.assertEqual(quote("??test", prefix_wildcard=True), "?test")
예제 #9
0
 def testUnicode(self):
     self.assertEqual(quote('foø'), 'fo\xc3\xb8')
     self.assertEqual(quote('"foø'), '\\"fo\xc3\xb8')
     self.assertEqual(quote('whät?'), 'wh\xc3\xa4t?')
     self.assertEqual(quote('"whät?"'), '"wh\xc3\xa4t\?"')
     self.assertEqual(quote('"[ø]"'), '"\[\xc3\xb8\]"')
     self.assertEqual(quote('[ø]'), '\\[\xc3\xb8\\]')
     self.assertEqual(quote('"foø*"'), '"fo\xc3\xb8\*"')
     self.assertEqual(quote('"foø bar?"'), '"fo\xc3\xb8 bar\?"')
     self.assertEqual(quote(u'*****@*****.**'), '*****@*****.**')
예제 #10
0
 def testUnicode(self):
     self.assertEqual(quote("foø"), b"fo\xc3\xb8".decode("utf-8"))
     self.assertEqual(quote('"foø'), b'\\"fo\xc3\xb8'.decode("utf-8"))
     self.assertEqual(quote("whät?"), b"wh\xc3\xa4t?".decode("utf-8"))
     self.assertEqual(quote('"whät?"'), b'"wh\xc3\xa4t\\?"'.decode("utf-8"))
     self.assertEqual(quote('"[ø]"'), b'"\\[\xc3\xb8\\]"'.decode("utf-8"))
     self.assertEqual(quote("[ø]"), b"\\[\xc3\xb8\\]".decode("utf-8"))
     self.assertEqual(quote('"foø*"'), b'"fo\xc3\xb8\\*"'.decode("utf-8"))
     self.assertEqual(quote('"foø bar?"'),
                      b'"fo\xc3\xb8 bar\\?"'.decode("utf-8"))
     self.assertEqual(quote(u"*****@*****.**"), "*****@*****.**")
예제 #11
0
def mangleSearchableText(value, config):
    config = config or getConfig()
    pattern = getattr(config, 'search_pattern', u'')
    if pattern:
        pattern = pattern.encode('utf-8')
    levenstein_distance = getattr(config, 'levenshtein_distance', 0)
    value_parts = []
    base_value_parts = []

    if not isSimpleSearch(value):
        return value

    for term in splitSimpleSearch(value):
        (term_value,
         term_base_value) = makeSimpleExpressions(term,
                                                  levenstein_distance)
        value_parts.append(term_value)
        base_value_parts.append(term_base_value)

    base_value = ' '.join(base_value_parts)
    value = ' '.join(value_parts)
    if pattern:
        value = pattern.format(value=quote(value),
                               base_value=base_value)
        return set([value])    # add literal query parameter
    return value
예제 #12
0
def mangleSearchableText(value, config):
    config = config or getConfig()
    pattern = getattr(config, 'search_pattern', u'')
    if pattern:
        pattern = pattern.encode('utf-8')
    levenstein_distance = getattr(config, 'levenshtein_distance', 0)
    value_parts = []
    base_value_parts = []

    if not isSimpleSearch(value):
        return value

    for term in splitSimpleSearch(value):
        (term_value,
         term_base_value) = makeSimpleExpressions(term,
                                                  levenstein_distance)
        value_parts.append(term_value)
        base_value_parts.append(term_base_value)

    base_value = ' '.join(base_value_parts)
    value = ' '.join(value_parts)
    if pattern:
        value = pattern.format(value=quote(value),
                               base_value=base_value)
        return set([value])    # add literal query parameter
    return value
예제 #13
0
def makeSimpleExpressions(term, levenstein_distance):
    '''Return a search expression for part of the query that
    includes the levenstein distance and wildcards where appropriate.
    Returns both an expression for "value" and "base_value"'''

    config = getConfig()
    prefix_wildcard = getattr(config, "prefix_wildcard", False)
    prefix_wildcard_str = "*" if prefix_wildcard else ""
    base_value = term
    if levenstein_distance:
        levenstein_expr = "~%s" % levenstein_distance
    else:
        levenstein_expr = ""
    if '"' in term:  # quoted literals
        value = "%s%s" % (term, levenstein_expr)
        base_value = value
    elif isWildCard(term):
        value = prepare_wildcard(term)
        base_value = quote(term.replace("*", "").replace("?", ""))
    else:
        value = "%s%s* OR %s%s" % (
            prefix_wildcard_str,
            prepare_wildcard(term),
            term,
            levenstein_expr,
        )
    return "(%s)" % value, "(%s)" % base_value
예제 #14
0
 def testSolrSpecifics(self):
     # http://wiki.apache.org/solr/SolrQuerySyntax
     # Seems to be ok to quote function
     self.assertEqual(
         quote('"recip(rord(myfield),1,2,3)"'),
         '"recip\\(rord\\(myfield\\),1,2,3\\)"',
     )
     self.assertEqual(quote("[* TO NOW]"), "[* TO NOW]")
     self.assertEqual(quote("[1976-03-06T23:59:59.999Z TO *]"),
                      "[1976-03-06T23:59:59.999Z TO *]")
     self.assertEqual(
         quote("[1995-12-31T23:59:59.999Z TO "
               "2007-03-06T00:00:00Z]"),
         "[1995-12-31T23:59:59.999Z TO "
         "2007-03-06T00:00:00Z]",
     )
     self.assertEqual(
         quote("[NOW-1YEAR/DAY TO NOW/DAY+1DAY]"),
         "[NOW-1YEAR\\/DAY TO NOW\\/DAY+1DAY]",
     )
     self.assertEqual(
         quote("[1976-03-06T23:59:59.999Z TO "
               "1976-03-06T23:59:59.999Z+1YEAR]"),
         "[1976-03-06T23:59:59.999Z TO "
         "1976-03-06T23:59:59.999Z+1YEAR]",
     )
     # quoting date operators seems to be ok too
     self.assertEqual(
         quote("[1976-03-06T23:59:59.999Z/YEAR TO "
               "1976-03-06T23:59:59.999Z]"),
         "[1976-03-06T23:59:59.999Z\\/YEAR TO "
         "1976-03-06T23:59:59.999Z]",
     )
예제 #15
0
 def testQuotingOperatorsGrouping(self):
     self.assertEqual(quote('+return +"pink panther"'),
                            '(+return +"pink panther")')
     self.assertEqual(quote('+jakarta lucene'), '(+jakarta lucene)')
     self.assertEqual(quote('"jakarta apache" -"Apache Lucene"'),
                            '("jakarta apache" -"Apache Lucene")')
     self.assertEqual(quote('"jakarta apache" NOT "Apache Lucene"'),
                            '("jakarta apache" NOT "Apache Lucene")')
     self.assertEqual(quote('"jakarta apache" OR jakarta'),
                            '("jakarta apache" OR jakarta)')
     self.assertEqual(quote('"jakarta apache" AND "Apache Lucene"'),
                            '("jakarta apache" AND "Apache Lucene")')
     self.assertEqual(quote('(jakarta OR apache) AND website'),
                            '((jakarta OR apache) AND website)')
     self.assertEqual(quote('(a AND (b OR c))'), '(a AND (b OR c))')
     self.assertEqual(quote('((a AND b) OR c)'), '((a AND b) OR c)')
예제 #16
0
def searchterms_from_value(value):
    """Turn a search query into a list of search terms, removing
    parentheses, wildcards and quoting any special characters.
    """
    # remove any parens and wildcards, so quote() doesn't try to escape them
    value = strip_wildcards(strip_parens(value))
    # then quote the value
    value = quote(value)
    # and again strip parentheses that might have been added by quote()
    value = strip_parens(value)
    return value.split()
예제 #17
0
파일: mangler.py 프로젝트: FHNW/ftw.solr
def searchterms_from_value(value):
    """Turn a search query into a list of search terms, removing
    parentheses, wildcards and quoting any special characters.
    """
    # remove any parens and wildcards, so quote() doesn't try to escape them
    value = strip_wildcards(strip_parens(value))
    # then quote the value
    value = quote(value)
    # and again strip parentheses that might have been added by quote()
    value = strip_parens(value)
    return value.split()
예제 #18
0
파일: mangler.py 프로젝트: FHNW/ftw.solr
def mangle_searchable_text_query(value, pattern):
    value = value.lower()

    value_lwc = leading_wildcards(value)
    value_twc = trailing_wildcards(value)
    value = strip_wildcards(value)

    value = pattern.format(value=quote(value),
                           value_lwc=value_lwc,
                           value_twc=value_twc)
    return value
예제 #19
0
def mangle_searchable_text_query(value, pattern):
    value = value.lower()

    value_lwc = leading_wildcards(value)
    value_twc = trailing_wildcards(value)
    value = strip_wildcards(value)

    value = pattern.format(
        value=quote(value),
        value_lwc=value_lwc,
        value_twc=value_twc)
    return value
예제 #20
0
 def testQuotingWildcardSearches(self):
     self.assertEqual(quote("te?t"), "te?t")
     self.assertEqual(quote("test*"), "test*")
     self.assertEqual(quote("test**"), "test*")
     self.assertEqual(quote("te*t"), "te*t")
     self.assertEqual(quote("?test"), "test")
     self.assertEqual(quote("*test"), "test")
예제 #21
0
 def testQuotingWildcardSearches(self):
     self.assertEqual(quote('te?t'), 'te?t')
     self.assertEqual(quote('test*'), 'test*')
     self.assertEqual(quote('test**'), 'test*')
     self.assertEqual(quote('te*t'), 'te*t')
     self.assertEqual(quote('?test'), 'test')
     self.assertEqual(quote('*test'), 'test')
예제 #22
0
 def testQuotingRangeSearches(self):
     self.assertEqual(quote('[* TO NOW]'), '[* TO NOW]')
     self.assertEqual(quote('[1972-05-11T00:00:00.000Z TO *]'),
                            '[1972-05-11T00:00:00.000Z TO *]')
     self.assertEqual(quote('[1972-05-11T00:00:00.000Z TO 2011-05-10T01:30:00.000Z]'),
                            '[1972-05-11T00:00:00.000Z TO 2011-05-10T01:30:00.000Z]')
     self.assertEqual(quote('[20020101 TO 20030101]'),
                            '[20020101 TO 20030101]')
     self.assertEqual(quote('{Aida TO Carmen}'), '{Aida TO Carmen}')
     self.assertEqual(quote('{Aida TO}'), '{Aida TO *}')
     self.assertEqual(quote('{TO Carmen}'), '{* TO Carmen}')
예제 #23
0
 def testQuotingRangeSearches(self):
     self.assertEqual(quote("[* TO NOW]"), "[* TO NOW]")
     self.assertEqual(quote("[1972-05-11T00:00:00.000Z TO *]"),
                      "[1972-05-11T00:00:00.000Z TO *]")
     self.assertEqual(
         quote("[1972-05-11T00:00:00.000Z TO "
               "2011-05-10T01:30:00.000Z]"),
         "[1972-05-11T00:00:00.000Z TO "
         "2011-05-10T01:30:00.000Z]",
     )
     self.assertEqual(quote("[20020101 TO 20030101]"),
                      "[20020101 TO 20030101]")
     self.assertEqual(quote("{Aida TO Carmen}"), "{Aida TO Carmen}")
     self.assertEqual(quote("{Aida TO}"), "{Aida TO *}")
     self.assertEqual(quote("{TO Carmen}"), "{* TO Carmen}")
예제 #24
0
 def testSolrSpecifics(self):
     # http://wiki.apache.org/solr/SolrQuerySyntax
     self.assertEqual(quote('"recip(rord(myfield),1,2,3)"'),
                            '"recip\(rord\(myfield\),1,2,3\)"') # Seems to be ok to quote function
     self.assertEqual(quote('[* TO NOW]'), '[* TO NOW]')
     self.assertEqual(quote('[1976-03-06T23:59:59.999Z TO *]'),
                            '[1976-03-06T23:59:59.999Z TO *]')
     self.assertEqual(quote('[1995-12-31T23:59:59.999Z TO 2007-03-06T00:00:00Z]'),
                            '[1995-12-31T23:59:59.999Z TO 2007-03-06T00:00:00Z]')
     self.assertEqual(quote('[NOW-1YEAR/DAY TO NOW/DAY+1DAY]'),
                            '[NOW-1YEAR/DAY TO NOW/DAY+1DAY]')
     self.assertEqual(quote('[1976-03-06T23:59:59.999Z TO 1976-03-06T23:59:59.999Z+1YEAR]'),
                            '[1976-03-06T23:59:59.999Z TO 1976-03-06T23:59:59.999Z+1YEAR]')
     self.assertEqual(quote('[1976-03-06T23:59:59.999Z/YEAR TO 1976-03-06T23:59:59.999Z]'),
                            '[1976-03-06T23:59:59.999Z/YEAR TO 1976-03-06T23:59:59.999Z]')
예제 #25
0
def makeSimpleExpressions(term, levenstein_distance):
    '''Return a search expression for part of the query that
    includes the levenstein distance and wildcards where appropriate.
    Returns both an expression for "value" and "base_value"'''

    base_value = term
    if levenstein_distance:
        levenstein_expr = '~%s' % levenstein_distance
    else:
        levenstein_expr = ''
    if '"' in term:  # quoted literals
        value = '%s%s' % (term, levenstein_expr)
        base_value = value
    elif isWildCard(term):
        value = prepare_wildcard(term)
        base_value = quote(term.replace('*', '').replace('?', ''))
    else:
        value = '%s* OR %s%s' % (prepare_wildcard(term), term, levenstein_expr)
    return '(%s)' % value, '(%s)' % base_value
예제 #26
0
def makeSimpleExpressions(term, levenstein_distance):
    '''Return a search expression for part of the query that
    includes the levenstein distance and wildcards where appropriate.
    Returns both an expression for "value" and "base_value"'''

    base_value = term
    if levenstein_distance:
        levenstein_expr = "~%s" % levenstein_distance
    else:
        levenstein_expr = ""
    if '"' in term:  # quoted literals
        value = "%s%s" % (term, levenstein_expr)
        base_value = value
    elif isWildCard(term):
        value = prepare_wildcard(term)
        base_value = quote(term.replace("*", "").replace("?", ""))
    else:
        value = "%s* OR %s%s" % (prepare_wildcard(term), term, levenstein_expr)
    return "(%s)" % value, "(%s)" % base_value
예제 #27
0
def mangleSearchableText(value, config):
    pattern = getattr(config, "search_pattern", "")
    levenstein_distance = getattr(config, "levenshtein_distance", 0)
    value_parts = []
    base_value_parts = []

    if not isSimpleSearch(value):
        return value

    for term in splitSimpleSearch(value):
        (term_value, term_base_value) = makeSimpleExpressions(term, levenstein_distance)
        value_parts.append(term_value)
        base_value_parts.append(term_base_value)

    base_value = " ".join(base_value_parts)
    value = " ".join(value_parts)
    if pattern:
        value = pattern.format(value=quote(value), base_value=base_value)
        return set([value])  # add literal query parameter
    return value
예제 #28
0
def mangleSearchableText(value, config, force_complex_search=False):
    config = config or getConfig()
    pattern = getattr(config, "search_pattern", u"")
    force_simple_search = getattr(config, "force_simple_search", False)
    allow_complex_search = getattr(config, "allow_complex_search", False)
    levenstein_distance = getattr(config, "levenshtein_distance", 0)
    prefix_wildcard = getattr(config, "prefix_wildcard", False)
    value_parts = []
    base_value_parts = []

    stripped = value.strip()
    force_complex_search_prefix = False
    if stripped.startswith("solr:"):
        stripped = stripped.replace("solr:", "", 1).strip()
        force_complex_search_prefix = True

    if not force_simple_search and not isSimpleSearch(value):
        return value

    if allow_complex_search and (force_complex_search_prefix or force_complex_search):
        # FIXME: fold in catalog solr_complex_search parameter check
        return stripped

    if force_simple_search:
        value = removeSpecialCharactersAndOperators(value)

    for term in splitSimpleSearch(value):
        (term_value, term_base_value) = makeSimpleExpressions(term, levenstein_distance)
        value_parts.append(term_value)
        base_value_parts.append(term_base_value)

    base_value = " ".join(base_value_parts)
    value = " ".join(value_parts)
    if pattern:
        value = pattern.format(
            value=quote(value, prefix_wildcard=prefix_wildcard), base_value=base_value
        )
        return set([value])  # add literal query parameter
    if pattern:
        pattern = pattern.encode("utf-8")
    return value
예제 #29
0
def makeSimpleExpressions(term, levenstein_distance):
    '''Return a search expression for part of the query that
    includes the levenstein distance and wildcards where appropriate.
    Returns both an expression for "value" and "base_value"'''

    base_value = term
    if levenstein_distance:
        levenstein_expr = '~%s' % levenstein_distance
    else:
        levenstein_expr = ''
    if '"' in term:  # quoted literals
        value = '%s%s' % (term, levenstein_expr)
        base_value = value
    elif isWildCard(term):
        value = prepare_wildcard(term)
        base_value = quote(term.replace('*', '').replace('?', ''))
    else:
        value = '%s* OR %s%s' % (prepare_wildcard(term), term,
                                 levenstein_expr)
    # Netsight: we removed the parenthesis around base_value
    #           the first element of the returned tuple (value) is
    #           not used.
    return '(%s)' % value, base_value
예제 #30
0
 def testQuotingProximitySearches(self):
     self.assertEqual(quote('"jakarta apache"~10'), '"jakarta apache"~10')
예제 #31
0
 def testQuotingFuzzySearches(self):
     self.assertEqual(quote('roam~'), 'roam~')
     self.assertEqual(quote('roam~0.8'), 'roam~0.8')
예제 #32
0
 def testQuotingFuzzySearches(self):
     self.assertEqual(quote("roam~"), "roam~")
     self.assertEqual(quote("roam~0.8"), "roam~0.8")
예제 #33
0
 def testQuoting(self):
     # http://lucene.apache.org/java/2_3_2/queryparsersyntax.html
     self.assertEqual(quote(''), '')
     self.assertEqual(quote(' '), '')
     self.assertEqual(quote('foo'), 'foo')
     self.assertEqual(quote('foo '), 'foo')
     self.assertEqual(quote('"foo"'), '"foo"')
     self.assertEqual(quote('"foo'), '\\"foo')
     self.assertEqual(quote('foo"'), 'foo\\"')
     self.assertEqual(quote('foo bar'), '(foo bar)')
     self.assertEqual(quote('"foo bar" bah'), '("foo bar" bah)')
     self.assertEqual(quote('\\['), '\\[')
     self.assertEqual(quote(')'), '\)')
     self.assertEqual(quote('"(foo bar)" bah'), '("\\(foo bar\\)" bah)')
     self.assertEqual(quote('"(foo\\"bar)" bah'), '("\\(foo\\"bar\\)" bah)')
     self.assertEqual(quote('"foo bar"'), '"foo bar"')
     self.assertEqual(quote('"foo bar'), '(\\"foo bar)')
     self.assertEqual(quote('foo bar what?'), '(foo bar what?)')
     self.assertEqual(quote('P|This&That'), 'P|This&That')
     self.assertEqual(quote('[]'), '')
     self.assertEqual(quote('()'), '')
     self.assertEqual(quote('{}'), '')
     self.assertEqual(quote('...""'), '...\\"\\"')
     self.assertEqual(quote('\\'), '\\\\') # Search for \ has to be quoted
     self.assertEqual(quote('\?'), '\?')
     self.assertEqual(quote('*****@*****.**'), '*****@*****.**')
     self.assertEqual(quote('http://machine/folder and item and some/path and and amilli3*'),
                            '(http\://machine/folder and item and some/path and and amilli3*)')
     self.assertEqual(quote('"[]"'), '"\[\]"')
     self.assertEqual(quote('"{}"'), '"\{\}"')
     self.assertEqual(quote('"()"'), '"\(\)"')
     self.assertEqual(quote('foo and bar and 42"*'), '(foo and bar and 42\\"\\*)')
     # Can't use ? or * as beginning of new query
     self.assertEqual(quote('"fix and it"*'), '"fix and it"')
     self.assertEqual(quote('"fix and it"?'), '"fix and it"')
     self.assertEqual(quote('foo and bar and [foobar at foo.com]*'),
                            '(foo and bar and \[foobar at foo.com\])')
예제 #34
0
 def testQuotingOperatorsGrouping(self):
     self.assertEqual(quote('+return +"pink panther"'),
                      '(+return +"pink panther")')
     self.assertEqual(quote("+jakarta lucene"), "(+jakarta lucene)")
     self.assertEqual(
         quote('"jakarta apache" -"Apache Lucene"'),
         '("jakarta apache" -"Apache Lucene")',
     )
     self.assertEqual(
         quote('"jakarta apache" NOT "Apache Lucene"'),
         '("jakarta apache" NOT "Apache Lucene")',
     )
     self.assertEqual(quote('"jakarta apache" OR jakarta'),
                      '("jakarta apache" OR jakarta)')
     self.assertEqual(
         quote('"jakarta apache" AND "Apache Lucene"'),
         '("jakarta apache" AND "Apache Lucene")',
     )
     self.assertEqual(
         quote("(jakarta OR apache) AND website"),
         "((jakarta OR apache) AND website)",
     )
     self.assertEqual(quote("(a AND (b OR c))"), "(a AND (b OR c))")
     self.assertEqual(quote("((a AND b) OR c)"), "((a AND b) OR c)")
     self.assertEqual(quote('"jakarta apache" || jakarta'),
                      '("jakarta apache" || jakarta)')
     self.assertEqual(
         quote('"jakarta apache" && "Apache Lucene"'),
         '("jakarta apache" && "Apache Lucene")',
     )
     self.assertEqual(quote("(jakarta || apache) && website"),
                      "((jakarta || apache) && website)")
     self.assertEqual(quote("(a && (b || c))"), "(a && (b || c))")
     self.assertEqual(quote("((a && b) || c)"), "((a && b) || c)")
     self.assertEqual(quote("P||This&&That"), "(P||This&&That)")
예제 #35
0
 def testQuotingBoostingTerm(self):
     self.assertEqual(quote('jakarta^4 apache'), '(jakarta^4 apache)')
     self.assertEqual(quote('jakarta^0.2 apache'), '(jakarta^0.2 apache)')
     self.assertEqual(quote('"jakarta apache"^4 "Apache Lucene"'),
                            '("jakarta apache"^4 "Apache Lucene")')
예제 #36
0
 def testQuotingOperatorsGrouping(self):
     self.assertEqual(quote('+return +"pink panther"'),
                      '(+return +"pink panther")')
     self.assertEqual(quote('+jakarta lucene'), '(+jakarta lucene)')
     self.assertEqual(quote('"jakarta apache" -"Apache Lucene"'),
                      '("jakarta apache" -"Apache Lucene")')
     self.assertEqual(quote('"jakarta apache" NOT "Apache Lucene"'),
                      '("jakarta apache" NOT "Apache Lucene")')
     self.assertEqual(quote('"jakarta apache" OR jakarta'),
                      '("jakarta apache" OR jakarta)')
     self.assertEqual(quote('"jakarta apache" AND "Apache Lucene"'),
                      '("jakarta apache" AND "Apache Lucene")')
     self.assertEqual(quote('(jakarta OR apache) AND website'),
                      '((jakarta OR apache) AND website)')
     self.assertEqual(quote('(a AND (b OR c))'), '(a AND (b OR c))')
     self.assertEqual(quote('((a AND b) OR c)'), '((a AND b) OR c)')
     self.assertEqual(quote('"jakarta apache" || jakarta'),
                      '("jakarta apache" || jakarta)')
     self.assertEqual(quote('"jakarta apache" && "Apache Lucene"'),
                      '("jakarta apache" && "Apache Lucene")')
     self.assertEqual(quote('(jakarta || apache) && website'),
                      '((jakarta || apache) && website)')
     self.assertEqual(quote('(a && (b || c))'), '(a && (b || c))')
     self.assertEqual(quote('((a && b) || c)'), '((a && b) || c)')
     self.assertEqual(quote('P||This&&That'), '(P||This&&That)')
예제 #37
0
    def buildQueryAndParameters(self, default=None, **args):
        """ helper to build a querystring for simple use-cases """
        schema = self.getManager().getSchema() or {}

        params = subtractQueryParameters(args)
        params = cleanupQueryParameters(params, schema)
        config = self.getConfig()

        prepareData(args)
        mangleQuery(args, config, schema)

        logger.debug('building query for "%r", %r', default, args)
        schema = self.getManager().getSchema() or {}
        defaultSearchField = getattr(schema, 'defaultSearchField', None)
        args[None] = default
        query = {}

        for name, value in sorted(args.items()):
            field = schema.get(name or defaultSearchField, None)
            if field is None or not field.indexed:
                logger.info(
                    'dropping unknown search attribute "%s" '
                    ' (%r) for query: %r', name, value, args
                )
                continue
            if isinstance(value, bool):
                value = str(value).lower()
            elif not value:     # solr doesn't like empty fields (+foo:"")
                if not name:
                    continue
                logger.info(
                    'empty search term form "%s:%s", aborting buildQuery' % (
                        name,
                        value
                    )
                )
                return {}, params
            elif field.class_ == 'solr.BoolField':
                if not isinstance(value, (tuple, list)):
                    value = [value]
                falses = '0', 'False', MV
                true = lambda v: bool(v) and v not in falses
                value = set(map(true, value))
                if not len(value) == 1:
                    assert len(value) == 2      # just to make sure
                    continue                    # skip when "true or false"
                value = str(value.pop()).lower()
            elif isinstance(value, (tuple, list)):
                # list items should be treated as literals, but
                # nevertheless only get quoted when necessary
                value = '(%s)' % ' OR '.join(map(quote_iterable_item, value))
            elif isinstance(value, set):        # sets are taken literally
                if len(value) == 1:
                    query[name] = ''.join(value)
                else:
                    query[name] = '(%s)' % ' OR '.join(value)
                continue
            elif isinstance(value, basestring):
                if field.class_ == 'solr.TextField':
                    if isWildCard(value):
                        value = prepare_wildcard(value)
                    value = quote(value, textfield=True)
                    # if we have an intra-word hyphen, we need quotes
                    if '\\-' in value or '\\+' in value:
                        if value[0] != '"':
                            value = '"%s"' % value
                else:
                    value = quote(value)
                if not value:   # don't search for empty strings, even quoted
                    continue
            else:
                logger.info(
                    'skipping unsupported value "%r" (%s)', value, name
                )
                continue
            if name is None:
                if value and value[0] not in '+-':
                    value = '+%s' % value
            else:
                value = '+%s:%s' % (name, value)
            query[name] = value
        logger.debug('built query "%s"', query)

        if query:
            optimizeQueryParameters(query, params)
        return query, params
예제 #38
0
파일: search.py 프로젝트: FHNW/ftw.solr
def buildQuery(self, default=None, **args):
    """ helper to build a querystring for simple use-cases """
    logger.debug('building query for "%r", %r', default, args)
    schema = self.getManager().getSchema() or {}
    defaultSearchField = getattr(schema, 'defaultSearchField', None)
    if default is not None and defaultSearchField is not None:
        args[None] = default
    query = {}
    for name, value in sorted(args.items()):
        field = schema.get(name or defaultSearchField, None)
        if field is None or not field.indexed:
            logger.warning(
                'dropping unknown search attribute "%s" '
                ' (%r) for query: %r', name, value, args)
            continue
        if isinstance(value, bool):
            value = str(value).lower()
        elif not value:  # solr doesn't like empty fields (+foo:"")
            continue
        elif field.class_ == 'solr.BoolField':
            if not isinstance(value, (tuple, list)):
                value = [value]
            falses = '0', 'False', MV
            true = lambda v: bool(v) and v not in falses
            value = set(map(true, value))
            if not len(value) == 1:
                assert len(value) == 2  # just to make sure
                continue  # skip when "true or false"
            value = str(value.pop()).lower()
        elif isinstance(value, (tuple, list)):
            # list items should be treated as literals, but
            # nevertheless only get quoted when necessary
            def quoteitem(term):
                if isinstance(term, unicode):
                    term = term.encode('utf-8')
                quoted = quote(term)
                if not quoted.startswith('"') and not quoted == term:
                    quoted = quote('"' + term + '"')
                return quoted

            value = '(%s)' % ' OR '.join(map(quoteitem, value))
        elif isinstance(value, set):  # sets are taken literally
            if len(value) == 1:
                query[name] = ''.join(value)
            else:
                query[name] = '(%s)' % ' OR '.join(value)
            if '/' in query[name]:
                query[name] = query[name].replace('/', '\\/')
            continue
        elif isinstance(value, basestring):
            if field.class_ == 'solr.TextField':
                if isWildCard(value):
                    value = prepare_wildcard(value)
                value = quote(value, textfield=True)
                # if we have an intra-word hyphen, we need quotes
                if '\\-' in value or '\\+' in value:
                    if value[0] != '"':
                        value = '"%s"' % value
            else:
                value = quote(value)
            if not value:  # don't search for empty strings, even quoted
                continue
        else:
            logger.info('skipping unsupported value "%r" (%s)', value, name)
            continue
        if name is None:
            if value and value[0] not in '+-':
                value = '+%s' % value
        else:
            value = '+%s:%s' % (name, value)

        # Since Solr 4.0 slashes must be escaped
        # see: http://wiki.apache.org/solr/SolrQuerySyntax
        if '/' in value:
            value = value.replace('/', '\\/')

        query[name] = value

    logger.debug('built query "%s"', query)
    return query
예제 #39
0
def mangleQuery(keywords, config, schema):
    """ translate / mangle query parameters to replace zope specifics
        with equivalent constructs for solr """
    extras = {}
    for key, value in keywords.items():
        if key.endswith('_usage'):          # convert old-style parameters
            category, spec = value.split(':', 1)
            extras[key[:-6]] = {category: spec}
            del keywords[key]
        elif isinstance(value, dict):       # unify dict parameters
            keywords[key] = value['query']
            del value['query']
            extras[key] = value
        elif hasattr(value, 'query'):       # unify object parameters
            keywords[key] = value.query
            extra = dict()
            for arg in query_args:
                arg_val = getattr(value, arg, None)
                if arg_val is not None:
                    extra[arg] = arg_val
            extras[key] = extra
        elif key in ignored:
            del keywords[key]

    # find EPI indexes
    if schema:
        epi_indexes = {}
        for name in schema.keys():
            parts = name.split('_')
            if parts[-1] in ['string', 'depth', 'parents']:
                count = epi_indexes.get(parts[0], 0)
                epi_indexes[parts[0]] = count + 1
        epi_indexes = [k for k, v in epi_indexes.items() if v == 3]
    else:
        epi_indexes = ['path']

    for key, value in keywords.items():
        args = extras.get(key, {})
        if key == 'SearchableText':
            pattern = getattr(config, 'search_pattern', '')
            simple_term = isSimpleTerm(value)
            if pattern and isSimpleSearch(value):
                base_value = value
                if simple_term: # use prefix/wildcard search
                    value = '(%s* OR %s)' % (prepare_wildcard(value), value)
                elif isWildCard(value):
                    value = prepare_wildcard(value)
                    base_value = quote(value.replace('*', '').replace('?', ''))
                # simple queries use custom search pattern
                value = pattern.format(value=quote(value),
                    base_value=base_value)
                keywords[key] = set([value])    # add literal query parameter
                continue
            elif simple_term: # use prefix/wildcard search
                keywords[key] = '(%s* OR %s)' % (
                    prepare_wildcard(value), value)
                continue
        if key in epi_indexes:
            path = keywords['%s_parents' % key] = value
            del keywords[key]
            if 'depth' in args:
                depth = int(args['depth'])
                if depth >= 0:
                    if not isinstance(value, (list, tuple)):
                        path = [path]
                    tmpl = '(+%s_depth:[%d TO %d] AND +%s_parents:%s)'
                    params = keywords['%s_parents' % key] = set()
                    for p in path:
                        base = len(p.split('/'))
                        params.add(tmpl % (key, base + (depth and 1), base + depth, key, p))
                del args['depth']
        elif key == 'effectiveRange':
            if isinstance(value, DateTime):
                steps = getattr(config, 'effective_steps', 1)
                if steps > 1:
                    value = DateTime(value.timeTime() // steps * steps)
                value = iso8601date(value)
            del keywords[key]
            keywords['effective'] = '[* TO %s]' % value
            keywords['expires'] = '[%s TO *]' % value
        elif key == 'show_inactive':
            del keywords[key]           # marker for `effectiveRange`
        elif 'range' in args:
            if not isinstance(value, (list, tuple)):
                value = [value]
            payload = map(iso8601date, value)
            keywords[key] = ranges[args['range']] % tuple(payload)
            del args['range']
        elif 'operator' in args:
            if isinstance(value, (list, tuple)) and len(value) > 1:
                sep = ' %s ' % args['operator'].upper()
                value = sep.join(map(str, map(iso8601date, value)))
                keywords[key] = '(%s)' % value
            del args['operator']
        elif key == 'allowedRolesAndUsers':
            if getattr(config, 'exclude_user', False):
                token = 'user$' + getSecurityManager().getUser().getId()
                if token in value:
                    value.remove(token)
        elif isinstance(value, DateTime):
            keywords[key] = iso8601date(value)
        elif not isinstance(value, basestring):
            assert not args, 'unsupported usage: %r' % args
예제 #40
0
 def buildQuery(self, default=None, **args):
     """ helper to build a querystring for simple use-cases """
     logger.debug('building query for "%r", %r', default, args)
     schema = self.getManager().getSchema() or {}
     defaultSearchField = getattr(schema, 'defaultSearchField', None)
     args[None] = default
     query = {}
     for name, value in args.items():
         field = schema.get(name or defaultSearchField, None)
         if field is None or not field.indexed:
             logger.warning('dropping unknown search attribute "%s" '
                 ' (%r) for query: %r', name, value, args)
             continue
         if isinstance(value, bool):
             value = str(value).lower()
         elif not value:     # solr doesn't like empty fields (+foo:"")
             continue
         elif field.class_ == 'solr.BoolField':
             if not isinstance(value, (tuple, list)):
                 value = [value]
             falses = '0', 'False', MV
             true = lambda v: bool(v) and v not in falses
             value = set(map(true, value))
             if not len(value) == 1:
                 assert len(value) == 2      # just to make sure
                 continue                    # skip when "true or false"
             value = str(value.pop()).lower()
         elif isinstance(value, (tuple, list)):
             # list items should be treated as literals, but
             # nevertheless only get quoted when necessary
             def quoteitem(term):
                 if isinstance(term, unicode):
                     term = term.encode('utf-8')
                 quoted = quote(term)
                 if not quoted.startswith('"') and not quoted == term:
                     quoted = quote('"' + term + '"')
                 return quoted
             value = '(%s)' % ' OR '.join(map(quoteitem, value))
         elif isinstance(value, set):        # sets are taken literally
             if len(value) == 1:
                 query[name] = ''.join(value)
             else:
                 query[name] = '(%s)' % ' OR '.join(value)
             continue
         elif isinstance(value, basestring):
             if field.class_ == 'solr.TextField':
                 if isWildCard(value):
                     value = prepare_wildcard(value)
                 value = quote(value, textfield=True)
                 # if we have an intra-word hyphen, we need quotes
                 if '\\-' in value or '\\+' in value:
                     if value[0] != '"':
                         value = '"%s"' % value
             else:
                 value = quote(value)
             if not value:   # don't search for empty strings, even quoted
                 continue
         else:
             logger.info('skipping unsupported value "%r" (%s)',
                 value, name)
             continue
         if name is None:
             if value and value[0] not in '+-':
                 value = '+%s' % value
         else:
             value = '+%s:%s' % (name, value)
         query[name] = value
     logger.debug('built query "%s"', query)
     return query
예제 #41
0
 def testQuoting(self):
     # http://lucene.apache.org/java/2_3_2/queryparsersyntax.html
     self.assertEqual(quote(""), "")
     self.assertEqual(quote(" "), "")
     self.assertEqual(quote("foo"), "foo")
     self.assertEqual(quote("foo "), "foo")
     self.assertEqual(quote('"foo"'), '"foo"')
     self.assertEqual(quote('"foo'), '\\"foo')
     self.assertEqual(quote('foo"'), 'foo\\"')
     self.assertEqual(quote("foo bar"), "(foo bar)")
     self.assertEqual(quote('"foo bar" bah'), '("foo bar" bah)')
     self.assertEqual(quote("\\["), "\\[")
     self.assertEqual(quote(")"), "\\)")
     self.assertEqual(quote('"(foo bar)" bah'), '("\\(foo bar\\)" bah)')
     self.assertEqual(quote('"(foo\\"bar)" bah'), '("\\(foo\\"bar\\)" bah)')
     self.assertEqual(quote('"foo bar"'), '"foo bar"')
     self.assertEqual(quote('"foo bar'), '(\\"foo bar)')
     self.assertEqual(quote("foo bar what?"), "(foo bar what?)")
     self.assertEqual(quote("P|This&That"), "P|This&That")
     self.assertEqual(quote("[]"), "")
     self.assertEqual(quote("()"), "")
     self.assertEqual(quote("{}"), "")
     self.assertEqual(quote('...""'), '...\\"\\"')
     self.assertEqual(quote("\\"), "\\\\")  # Search for \ has to be quoted
     self.assertEqual(quote("\\?"), "\\?")
     self.assertEqual(quote("*****@*****.**"), "*****@*****.**")
     self.assertEqual(
         quote("http://machine/folder and item and some/path "
               "and and amilli3*"),
         "(http\\:\\/\\/machine\\/folder and item and "
         "some\\/path and and amilli3*)",
     )
     self.assertEqual(quote('"[]"'), '"\\[\\]"')
     self.assertEqual(quote('"{}"'), '"\\{\\}"')
     self.assertEqual(quote('"()"'), '"\\(\\)"')
     self.assertEqual(quote('foo and bar and 42"*'), "(foo and bar and "
                      '42\\"\\*)')
     # Can't use ? or * as beginning of new query
     self.assertEqual(quote('"fix and it"*'), '"fix and it"')
     self.assertEqual(quote('"fix and it"?'), '"fix and it"')
     self.assertEqual(
         quote("foo and bar and [foobar at foo.com]*"),
         "(foo and bar and \\[foobar at foo.com\\])",
     )