def testQuotingForwardSlashes(self): # solr 4 supports regular expressions and requires / to be escaped self.assertEqual(quote("/"), "\\/") self.assertEqual(quote("(/ OR x)"), "(\\/ OR x)") self.assertEqual(quote('"/'), '\\"\\/') self.assertEqual(quote('"/"'), '"\\/"') self.assertEqual(quote('"(/ OR x)"'), '"\\(\\/ OR x\\)"')
def quoteitem(term): if isinstance(term, unicode): term = term.encode('utf-8') quoted = quote(term) if not quoted.startswith('"') and not quoted == term: quoted = quote('"' + term + '"') return quoted
def testQuotingBoostingTerm(self): self.assertEqual(quote("jakarta^4 apache"), "(jakarta^4 apache)") self.assertEqual(quote("jakarta^0.2 apache"), "(jakarta^0.2 apache)") self.assertEqual( quote('"jakarta apache"^4 "Apache Lucene"'), '("jakarta apache"^4 "Apache Lucene")', )
def testQuotingForwardSlashes(self): # solr 4 supports regular expressions and requires / to be escaped self.assertEqual(quote('/'), '\\/') self.assertEqual(quote('(/ OR x)'), '(\\/ OR x)') self.assertEqual(quote('"/'), '\\"\\/') self.assertEqual(quote('"/"'), '"\\/"') self.assertEqual(quote('"(/ OR x)"'), '"\\(\\/ OR x\\)"')
def testQuotingEscapingSpecialCharacters(self): self.assertEqual(quote("-+!^~:"), "\\-\\+\\!\\^\\~\\:") # Only quote * and ? if quoted self.assertEqual(quote('"*?"'), '"\\*\\?"') # also quote multiple occurrences self.assertEqual(quote(":"), "\\:") self.assertEqual(quote(": :"), "(\\: \\:)") self.assertEqual(quote("foo+ bar! nul:"), "(foo\\+ bar\\! nul\\:)")
def testQuotingEscapingSpecialCharacters(self): self.assertEqual(quote('-+!^~:'), '\\-\\+\\!\\^\\~\\:') # Only quote * and ? if quoted self.assertEqual(quote('"*?"'), '"\\*\\?"') # also quote multiple occurrences self.assertEqual(quote(':'), '\\:') self.assertEqual(quote(': :'), '(\\: \\:)') self.assertEqual(quote('foo+ bar! nul:'), '(foo\\+ bar\\! nul\\:)')
def testQuotingWildcardSearches(self): self.assertEqual(quote("te?t"), "te?t") self.assertEqual(quote("test*"), "test*") self.assertEqual(quote("test**"), "test*") self.assertEqual(quote("te*t"), "te*t") self.assertEqual(quote("?test"), "test") self.assertEqual(quote("*test"), "test") self.assertEqual(quote("*test", prefix_wildcard=True), "*test") self.assertEqual(quote("?test", prefix_wildcard=True), "?test") self.assertEqual(quote("**test", prefix_wildcard=True), "*test") self.assertEqual(quote("??test", prefix_wildcard=True), "?test")
def testUnicode(self): self.assertEqual(quote('foø'), 'fo\xc3\xb8') self.assertEqual(quote('"foø'), '\\"fo\xc3\xb8') self.assertEqual(quote('whät?'), 'wh\xc3\xa4t?') self.assertEqual(quote('"whät?"'), '"wh\xc3\xa4t\?"') self.assertEqual(quote('"[ø]"'), '"\[\xc3\xb8\]"') self.assertEqual(quote('[ø]'), '\\[\xc3\xb8\\]') self.assertEqual(quote('"foø*"'), '"fo\xc3\xb8\*"') self.assertEqual(quote('"foø bar?"'), '"fo\xc3\xb8 bar\?"') self.assertEqual(quote(u'*****@*****.**'), '*****@*****.**')
def testUnicode(self): self.assertEqual(quote("foø"), b"fo\xc3\xb8".decode("utf-8")) self.assertEqual(quote('"foø'), b'\\"fo\xc3\xb8'.decode("utf-8")) self.assertEqual(quote("whät?"), b"wh\xc3\xa4t?".decode("utf-8")) self.assertEqual(quote('"whät?"'), b'"wh\xc3\xa4t\\?"'.decode("utf-8")) self.assertEqual(quote('"[ø]"'), b'"\\[\xc3\xb8\\]"'.decode("utf-8")) self.assertEqual(quote("[ø]"), b"\\[\xc3\xb8\\]".decode("utf-8")) self.assertEqual(quote('"foø*"'), b'"fo\xc3\xb8\\*"'.decode("utf-8")) self.assertEqual(quote('"foø bar?"'), b'"fo\xc3\xb8 bar\\?"'.decode("utf-8")) self.assertEqual(quote(u"*****@*****.**"), "*****@*****.**")
def mangleSearchableText(value, config): config = config or getConfig() pattern = getattr(config, 'search_pattern', u'') if pattern: pattern = pattern.encode('utf-8') levenstein_distance = getattr(config, 'levenshtein_distance', 0) value_parts = [] base_value_parts = [] if not isSimpleSearch(value): return value for term in splitSimpleSearch(value): (term_value, term_base_value) = makeSimpleExpressions(term, levenstein_distance) value_parts.append(term_value) base_value_parts.append(term_base_value) base_value = ' '.join(base_value_parts) value = ' '.join(value_parts) if pattern: value = pattern.format(value=quote(value), base_value=base_value) return set([value]) # add literal query parameter return value
def makeSimpleExpressions(term, levenstein_distance): '''Return a search expression for part of the query that includes the levenstein distance and wildcards where appropriate. Returns both an expression for "value" and "base_value"''' config = getConfig() prefix_wildcard = getattr(config, "prefix_wildcard", False) prefix_wildcard_str = "*" if prefix_wildcard else "" base_value = term if levenstein_distance: levenstein_expr = "~%s" % levenstein_distance else: levenstein_expr = "" if '"' in term: # quoted literals value = "%s%s" % (term, levenstein_expr) base_value = value elif isWildCard(term): value = prepare_wildcard(term) base_value = quote(term.replace("*", "").replace("?", "")) else: value = "%s%s* OR %s%s" % ( prefix_wildcard_str, prepare_wildcard(term), term, levenstein_expr, ) return "(%s)" % value, "(%s)" % base_value
def testSolrSpecifics(self): # http://wiki.apache.org/solr/SolrQuerySyntax # Seems to be ok to quote function self.assertEqual( quote('"recip(rord(myfield),1,2,3)"'), '"recip\\(rord\\(myfield\\),1,2,3\\)"', ) self.assertEqual(quote("[* TO NOW]"), "[* TO NOW]") self.assertEqual(quote("[1976-03-06T23:59:59.999Z TO *]"), "[1976-03-06T23:59:59.999Z TO *]") self.assertEqual( quote("[1995-12-31T23:59:59.999Z TO " "2007-03-06T00:00:00Z]"), "[1995-12-31T23:59:59.999Z TO " "2007-03-06T00:00:00Z]", ) self.assertEqual( quote("[NOW-1YEAR/DAY TO NOW/DAY+1DAY]"), "[NOW-1YEAR\\/DAY TO NOW\\/DAY+1DAY]", ) self.assertEqual( quote("[1976-03-06T23:59:59.999Z TO " "1976-03-06T23:59:59.999Z+1YEAR]"), "[1976-03-06T23:59:59.999Z TO " "1976-03-06T23:59:59.999Z+1YEAR]", ) # quoting date operators seems to be ok too self.assertEqual( quote("[1976-03-06T23:59:59.999Z/YEAR TO " "1976-03-06T23:59:59.999Z]"), "[1976-03-06T23:59:59.999Z\\/YEAR TO " "1976-03-06T23:59:59.999Z]", )
def testQuotingOperatorsGrouping(self): self.assertEqual(quote('+return +"pink panther"'), '(+return +"pink panther")') self.assertEqual(quote('+jakarta lucene'), '(+jakarta lucene)') self.assertEqual(quote('"jakarta apache" -"Apache Lucene"'), '("jakarta apache" -"Apache Lucene")') self.assertEqual(quote('"jakarta apache" NOT "Apache Lucene"'), '("jakarta apache" NOT "Apache Lucene")') self.assertEqual(quote('"jakarta apache" OR jakarta'), '("jakarta apache" OR jakarta)') self.assertEqual(quote('"jakarta apache" AND "Apache Lucene"'), '("jakarta apache" AND "Apache Lucene")') self.assertEqual(quote('(jakarta OR apache) AND website'), '((jakarta OR apache) AND website)') self.assertEqual(quote('(a AND (b OR c))'), '(a AND (b OR c))') self.assertEqual(quote('((a AND b) OR c)'), '((a AND b) OR c)')
def searchterms_from_value(value): """Turn a search query into a list of search terms, removing parentheses, wildcards and quoting any special characters. """ # remove any parens and wildcards, so quote() doesn't try to escape them value = strip_wildcards(strip_parens(value)) # then quote the value value = quote(value) # and again strip parentheses that might have been added by quote() value = strip_parens(value) return value.split()
def mangle_searchable_text_query(value, pattern): value = value.lower() value_lwc = leading_wildcards(value) value_twc = trailing_wildcards(value) value = strip_wildcards(value) value = pattern.format(value=quote(value), value_lwc=value_lwc, value_twc=value_twc) return value
def mangle_searchable_text_query(value, pattern): value = value.lower() value_lwc = leading_wildcards(value) value_twc = trailing_wildcards(value) value = strip_wildcards(value) value = pattern.format( value=quote(value), value_lwc=value_lwc, value_twc=value_twc) return value
def testQuotingWildcardSearches(self): self.assertEqual(quote("te?t"), "te?t") self.assertEqual(quote("test*"), "test*") self.assertEqual(quote("test**"), "test*") self.assertEqual(quote("te*t"), "te*t") self.assertEqual(quote("?test"), "test") self.assertEqual(quote("*test"), "test")
def testQuotingWildcardSearches(self): self.assertEqual(quote('te?t'), 'te?t') self.assertEqual(quote('test*'), 'test*') self.assertEqual(quote('test**'), 'test*') self.assertEqual(quote('te*t'), 'te*t') self.assertEqual(quote('?test'), 'test') self.assertEqual(quote('*test'), 'test')
def testQuotingRangeSearches(self): self.assertEqual(quote('[* TO NOW]'), '[* TO NOW]') self.assertEqual(quote('[1972-05-11T00:00:00.000Z TO *]'), '[1972-05-11T00:00:00.000Z TO *]') self.assertEqual(quote('[1972-05-11T00:00:00.000Z TO 2011-05-10T01:30:00.000Z]'), '[1972-05-11T00:00:00.000Z TO 2011-05-10T01:30:00.000Z]') self.assertEqual(quote('[20020101 TO 20030101]'), '[20020101 TO 20030101]') self.assertEqual(quote('{Aida TO Carmen}'), '{Aida TO Carmen}') self.assertEqual(quote('{Aida TO}'), '{Aida TO *}') self.assertEqual(quote('{TO Carmen}'), '{* TO Carmen}')
def testQuotingRangeSearches(self): self.assertEqual(quote("[* TO NOW]"), "[* TO NOW]") self.assertEqual(quote("[1972-05-11T00:00:00.000Z TO *]"), "[1972-05-11T00:00:00.000Z TO *]") self.assertEqual( quote("[1972-05-11T00:00:00.000Z TO " "2011-05-10T01:30:00.000Z]"), "[1972-05-11T00:00:00.000Z TO " "2011-05-10T01:30:00.000Z]", ) self.assertEqual(quote("[20020101 TO 20030101]"), "[20020101 TO 20030101]") self.assertEqual(quote("{Aida TO Carmen}"), "{Aida TO Carmen}") self.assertEqual(quote("{Aida TO}"), "{Aida TO *}") self.assertEqual(quote("{TO Carmen}"), "{* TO Carmen}")
def testSolrSpecifics(self): # http://wiki.apache.org/solr/SolrQuerySyntax self.assertEqual(quote('"recip(rord(myfield),1,2,3)"'), '"recip\(rord\(myfield\),1,2,3\)"') # Seems to be ok to quote function self.assertEqual(quote('[* TO NOW]'), '[* TO NOW]') self.assertEqual(quote('[1976-03-06T23:59:59.999Z TO *]'), '[1976-03-06T23:59:59.999Z TO *]') self.assertEqual(quote('[1995-12-31T23:59:59.999Z TO 2007-03-06T00:00:00Z]'), '[1995-12-31T23:59:59.999Z TO 2007-03-06T00:00:00Z]') self.assertEqual(quote('[NOW-1YEAR/DAY TO NOW/DAY+1DAY]'), '[NOW-1YEAR/DAY TO NOW/DAY+1DAY]') self.assertEqual(quote('[1976-03-06T23:59:59.999Z TO 1976-03-06T23:59:59.999Z+1YEAR]'), '[1976-03-06T23:59:59.999Z TO 1976-03-06T23:59:59.999Z+1YEAR]') self.assertEqual(quote('[1976-03-06T23:59:59.999Z/YEAR TO 1976-03-06T23:59:59.999Z]'), '[1976-03-06T23:59:59.999Z/YEAR TO 1976-03-06T23:59:59.999Z]')
def makeSimpleExpressions(term, levenstein_distance): '''Return a search expression for part of the query that includes the levenstein distance and wildcards where appropriate. Returns both an expression for "value" and "base_value"''' base_value = term if levenstein_distance: levenstein_expr = '~%s' % levenstein_distance else: levenstein_expr = '' if '"' in term: # quoted literals value = '%s%s' % (term, levenstein_expr) base_value = value elif isWildCard(term): value = prepare_wildcard(term) base_value = quote(term.replace('*', '').replace('?', '')) else: value = '%s* OR %s%s' % (prepare_wildcard(term), term, levenstein_expr) return '(%s)' % value, '(%s)' % base_value
def makeSimpleExpressions(term, levenstein_distance): '''Return a search expression for part of the query that includes the levenstein distance and wildcards where appropriate. Returns both an expression for "value" and "base_value"''' base_value = term if levenstein_distance: levenstein_expr = "~%s" % levenstein_distance else: levenstein_expr = "" if '"' in term: # quoted literals value = "%s%s" % (term, levenstein_expr) base_value = value elif isWildCard(term): value = prepare_wildcard(term) base_value = quote(term.replace("*", "").replace("?", "")) else: value = "%s* OR %s%s" % (prepare_wildcard(term), term, levenstein_expr) return "(%s)" % value, "(%s)" % base_value
def mangleSearchableText(value, config): pattern = getattr(config, "search_pattern", "") levenstein_distance = getattr(config, "levenshtein_distance", 0) value_parts = [] base_value_parts = [] if not isSimpleSearch(value): return value for term in splitSimpleSearch(value): (term_value, term_base_value) = makeSimpleExpressions(term, levenstein_distance) value_parts.append(term_value) base_value_parts.append(term_base_value) base_value = " ".join(base_value_parts) value = " ".join(value_parts) if pattern: value = pattern.format(value=quote(value), base_value=base_value) return set([value]) # add literal query parameter return value
def mangleSearchableText(value, config, force_complex_search=False): config = config or getConfig() pattern = getattr(config, "search_pattern", u"") force_simple_search = getattr(config, "force_simple_search", False) allow_complex_search = getattr(config, "allow_complex_search", False) levenstein_distance = getattr(config, "levenshtein_distance", 0) prefix_wildcard = getattr(config, "prefix_wildcard", False) value_parts = [] base_value_parts = [] stripped = value.strip() force_complex_search_prefix = False if stripped.startswith("solr:"): stripped = stripped.replace("solr:", "", 1).strip() force_complex_search_prefix = True if not force_simple_search and not isSimpleSearch(value): return value if allow_complex_search and (force_complex_search_prefix or force_complex_search): # FIXME: fold in catalog solr_complex_search parameter check return stripped if force_simple_search: value = removeSpecialCharactersAndOperators(value) for term in splitSimpleSearch(value): (term_value, term_base_value) = makeSimpleExpressions(term, levenstein_distance) value_parts.append(term_value) base_value_parts.append(term_base_value) base_value = " ".join(base_value_parts) value = " ".join(value_parts) if pattern: value = pattern.format( value=quote(value, prefix_wildcard=prefix_wildcard), base_value=base_value ) return set([value]) # add literal query parameter if pattern: pattern = pattern.encode("utf-8") return value
def makeSimpleExpressions(term, levenstein_distance): '''Return a search expression for part of the query that includes the levenstein distance and wildcards where appropriate. Returns both an expression for "value" and "base_value"''' base_value = term if levenstein_distance: levenstein_expr = '~%s' % levenstein_distance else: levenstein_expr = '' if '"' in term: # quoted literals value = '%s%s' % (term, levenstein_expr) base_value = value elif isWildCard(term): value = prepare_wildcard(term) base_value = quote(term.replace('*', '').replace('?', '')) else: value = '%s* OR %s%s' % (prepare_wildcard(term), term, levenstein_expr) # Netsight: we removed the parenthesis around base_value # the first element of the returned tuple (value) is # not used. return '(%s)' % value, base_value
def testQuotingProximitySearches(self): self.assertEqual(quote('"jakarta apache"~10'), '"jakarta apache"~10')
def testQuotingFuzzySearches(self): self.assertEqual(quote('roam~'), 'roam~') self.assertEqual(quote('roam~0.8'), 'roam~0.8')
def testQuotingFuzzySearches(self): self.assertEqual(quote("roam~"), "roam~") self.assertEqual(quote("roam~0.8"), "roam~0.8")
def testQuoting(self): # http://lucene.apache.org/java/2_3_2/queryparsersyntax.html self.assertEqual(quote(''), '') self.assertEqual(quote(' '), '') self.assertEqual(quote('foo'), 'foo') self.assertEqual(quote('foo '), 'foo') self.assertEqual(quote('"foo"'), '"foo"') self.assertEqual(quote('"foo'), '\\"foo') self.assertEqual(quote('foo"'), 'foo\\"') self.assertEqual(quote('foo bar'), '(foo bar)') self.assertEqual(quote('"foo bar" bah'), '("foo bar" bah)') self.assertEqual(quote('\\['), '\\[') self.assertEqual(quote(')'), '\)') self.assertEqual(quote('"(foo bar)" bah'), '("\\(foo bar\\)" bah)') self.assertEqual(quote('"(foo\\"bar)" bah'), '("\\(foo\\"bar\\)" bah)') self.assertEqual(quote('"foo bar"'), '"foo bar"') self.assertEqual(quote('"foo bar'), '(\\"foo bar)') self.assertEqual(quote('foo bar what?'), '(foo bar what?)') self.assertEqual(quote('P|This&That'), 'P|This&That') self.assertEqual(quote('[]'), '') self.assertEqual(quote('()'), '') self.assertEqual(quote('{}'), '') self.assertEqual(quote('...""'), '...\\"\\"') self.assertEqual(quote('\\'), '\\\\') # Search for \ has to be quoted self.assertEqual(quote('\?'), '\?') self.assertEqual(quote('*****@*****.**'), '*****@*****.**') self.assertEqual(quote('http://machine/folder and item and some/path and and amilli3*'), '(http\://machine/folder and item and some/path and and amilli3*)') self.assertEqual(quote('"[]"'), '"\[\]"') self.assertEqual(quote('"{}"'), '"\{\}"') self.assertEqual(quote('"()"'), '"\(\)"') self.assertEqual(quote('foo and bar and 42"*'), '(foo and bar and 42\\"\\*)') # Can't use ? or * as beginning of new query self.assertEqual(quote('"fix and it"*'), '"fix and it"') self.assertEqual(quote('"fix and it"?'), '"fix and it"') self.assertEqual(quote('foo and bar and [foobar at foo.com]*'), '(foo and bar and \[foobar at foo.com\])')
def testQuotingOperatorsGrouping(self): self.assertEqual(quote('+return +"pink panther"'), '(+return +"pink panther")') self.assertEqual(quote("+jakarta lucene"), "(+jakarta lucene)") self.assertEqual( quote('"jakarta apache" -"Apache Lucene"'), '("jakarta apache" -"Apache Lucene")', ) self.assertEqual( quote('"jakarta apache" NOT "Apache Lucene"'), '("jakarta apache" NOT "Apache Lucene")', ) self.assertEqual(quote('"jakarta apache" OR jakarta'), '("jakarta apache" OR jakarta)') self.assertEqual( quote('"jakarta apache" AND "Apache Lucene"'), '("jakarta apache" AND "Apache Lucene")', ) self.assertEqual( quote("(jakarta OR apache) AND website"), "((jakarta OR apache) AND website)", ) self.assertEqual(quote("(a AND (b OR c))"), "(a AND (b OR c))") self.assertEqual(quote("((a AND b) OR c)"), "((a AND b) OR c)") self.assertEqual(quote('"jakarta apache" || jakarta'), '("jakarta apache" || jakarta)') self.assertEqual( quote('"jakarta apache" && "Apache Lucene"'), '("jakarta apache" && "Apache Lucene")', ) self.assertEqual(quote("(jakarta || apache) && website"), "((jakarta || apache) && website)") self.assertEqual(quote("(a && (b || c))"), "(a && (b || c))") self.assertEqual(quote("((a && b) || c)"), "((a && b) || c)") self.assertEqual(quote("P||This&&That"), "(P||This&&That)")
def testQuotingBoostingTerm(self): self.assertEqual(quote('jakarta^4 apache'), '(jakarta^4 apache)') self.assertEqual(quote('jakarta^0.2 apache'), '(jakarta^0.2 apache)') self.assertEqual(quote('"jakarta apache"^4 "Apache Lucene"'), '("jakarta apache"^4 "Apache Lucene")')
def testQuotingOperatorsGrouping(self): self.assertEqual(quote('+return +"pink panther"'), '(+return +"pink panther")') self.assertEqual(quote('+jakarta lucene'), '(+jakarta lucene)') self.assertEqual(quote('"jakarta apache" -"Apache Lucene"'), '("jakarta apache" -"Apache Lucene")') self.assertEqual(quote('"jakarta apache" NOT "Apache Lucene"'), '("jakarta apache" NOT "Apache Lucene")') self.assertEqual(quote('"jakarta apache" OR jakarta'), '("jakarta apache" OR jakarta)') self.assertEqual(quote('"jakarta apache" AND "Apache Lucene"'), '("jakarta apache" AND "Apache Lucene")') self.assertEqual(quote('(jakarta OR apache) AND website'), '((jakarta OR apache) AND website)') self.assertEqual(quote('(a AND (b OR c))'), '(a AND (b OR c))') self.assertEqual(quote('((a AND b) OR c)'), '((a AND b) OR c)') self.assertEqual(quote('"jakarta apache" || jakarta'), '("jakarta apache" || jakarta)') self.assertEqual(quote('"jakarta apache" && "Apache Lucene"'), '("jakarta apache" && "Apache Lucene")') self.assertEqual(quote('(jakarta || apache) && website'), '((jakarta || apache) && website)') self.assertEqual(quote('(a && (b || c))'), '(a && (b || c))') self.assertEqual(quote('((a && b) || c)'), '((a && b) || c)') self.assertEqual(quote('P||This&&That'), '(P||This&&That)')
def buildQueryAndParameters(self, default=None, **args): """ helper to build a querystring for simple use-cases """ schema = self.getManager().getSchema() or {} params = subtractQueryParameters(args) params = cleanupQueryParameters(params, schema) config = self.getConfig() prepareData(args) mangleQuery(args, config, schema) logger.debug('building query for "%r", %r', default, args) schema = self.getManager().getSchema() or {} defaultSearchField = getattr(schema, 'defaultSearchField', None) args[None] = default query = {} for name, value in sorted(args.items()): field = schema.get(name or defaultSearchField, None) if field is None or not field.indexed: logger.info( 'dropping unknown search attribute "%s" ' ' (%r) for query: %r', name, value, args ) continue if isinstance(value, bool): value = str(value).lower() elif not value: # solr doesn't like empty fields (+foo:"") if not name: continue logger.info( 'empty search term form "%s:%s", aborting buildQuery' % ( name, value ) ) return {}, params elif field.class_ == 'solr.BoolField': if not isinstance(value, (tuple, list)): value = [value] falses = '0', 'False', MV true = lambda v: bool(v) and v not in falses value = set(map(true, value)) if not len(value) == 1: assert len(value) == 2 # just to make sure continue # skip when "true or false" value = str(value.pop()).lower() elif isinstance(value, (tuple, list)): # list items should be treated as literals, but # nevertheless only get quoted when necessary value = '(%s)' % ' OR '.join(map(quote_iterable_item, value)) elif isinstance(value, set): # sets are taken literally if len(value) == 1: query[name] = ''.join(value) else: query[name] = '(%s)' % ' OR '.join(value) continue elif isinstance(value, basestring): if field.class_ == 'solr.TextField': if isWildCard(value): value = prepare_wildcard(value) value = quote(value, textfield=True) # if we have an intra-word hyphen, we need quotes if '\\-' in value or '\\+' in value: if value[0] != '"': value = '"%s"' % value else: value = quote(value) if not value: # don't search for empty strings, even quoted continue else: logger.info( 'skipping unsupported value "%r" (%s)', value, name ) continue if name is None: if value and value[0] not in '+-': value = '+%s' % value else: value = '+%s:%s' % (name, value) query[name] = value logger.debug('built query "%s"', query) if query: optimizeQueryParameters(query, params) return query, params
def buildQuery(self, default=None, **args): """ helper to build a querystring for simple use-cases """ logger.debug('building query for "%r", %r', default, args) schema = self.getManager().getSchema() or {} defaultSearchField = getattr(schema, 'defaultSearchField', None) if default is not None and defaultSearchField is not None: args[None] = default query = {} for name, value in sorted(args.items()): field = schema.get(name or defaultSearchField, None) if field is None or not field.indexed: logger.warning( 'dropping unknown search attribute "%s" ' ' (%r) for query: %r', name, value, args) continue if isinstance(value, bool): value = str(value).lower() elif not value: # solr doesn't like empty fields (+foo:"") continue elif field.class_ == 'solr.BoolField': if not isinstance(value, (tuple, list)): value = [value] falses = '0', 'False', MV true = lambda v: bool(v) and v not in falses value = set(map(true, value)) if not len(value) == 1: assert len(value) == 2 # just to make sure continue # skip when "true or false" value = str(value.pop()).lower() elif isinstance(value, (tuple, list)): # list items should be treated as literals, but # nevertheless only get quoted when necessary def quoteitem(term): if isinstance(term, unicode): term = term.encode('utf-8') quoted = quote(term) if not quoted.startswith('"') and not quoted == term: quoted = quote('"' + term + '"') return quoted value = '(%s)' % ' OR '.join(map(quoteitem, value)) elif isinstance(value, set): # sets are taken literally if len(value) == 1: query[name] = ''.join(value) else: query[name] = '(%s)' % ' OR '.join(value) if '/' in query[name]: query[name] = query[name].replace('/', '\\/') continue elif isinstance(value, basestring): if field.class_ == 'solr.TextField': if isWildCard(value): value = prepare_wildcard(value) value = quote(value, textfield=True) # if we have an intra-word hyphen, we need quotes if '\\-' in value or '\\+' in value: if value[0] != '"': value = '"%s"' % value else: value = quote(value) if not value: # don't search for empty strings, even quoted continue else: logger.info('skipping unsupported value "%r" (%s)', value, name) continue if name is None: if value and value[0] not in '+-': value = '+%s' % value else: value = '+%s:%s' % (name, value) # Since Solr 4.0 slashes must be escaped # see: http://wiki.apache.org/solr/SolrQuerySyntax if '/' in value: value = value.replace('/', '\\/') query[name] = value logger.debug('built query "%s"', query) return query
def mangleQuery(keywords, config, schema): """ translate / mangle query parameters to replace zope specifics with equivalent constructs for solr """ extras = {} for key, value in keywords.items(): if key.endswith('_usage'): # convert old-style parameters category, spec = value.split(':', 1) extras[key[:-6]] = {category: spec} del keywords[key] elif isinstance(value, dict): # unify dict parameters keywords[key] = value['query'] del value['query'] extras[key] = value elif hasattr(value, 'query'): # unify object parameters keywords[key] = value.query extra = dict() for arg in query_args: arg_val = getattr(value, arg, None) if arg_val is not None: extra[arg] = arg_val extras[key] = extra elif key in ignored: del keywords[key] # find EPI indexes if schema: epi_indexes = {} for name in schema.keys(): parts = name.split('_') if parts[-1] in ['string', 'depth', 'parents']: count = epi_indexes.get(parts[0], 0) epi_indexes[parts[0]] = count + 1 epi_indexes = [k for k, v in epi_indexes.items() if v == 3] else: epi_indexes = ['path'] for key, value in keywords.items(): args = extras.get(key, {}) if key == 'SearchableText': pattern = getattr(config, 'search_pattern', '') simple_term = isSimpleTerm(value) if pattern and isSimpleSearch(value): base_value = value if simple_term: # use prefix/wildcard search value = '(%s* OR %s)' % (prepare_wildcard(value), value) elif isWildCard(value): value = prepare_wildcard(value) base_value = quote(value.replace('*', '').replace('?', '')) # simple queries use custom search pattern value = pattern.format(value=quote(value), base_value=base_value) keywords[key] = set([value]) # add literal query parameter continue elif simple_term: # use prefix/wildcard search keywords[key] = '(%s* OR %s)' % ( prepare_wildcard(value), value) continue if key in epi_indexes: path = keywords['%s_parents' % key] = value del keywords[key] if 'depth' in args: depth = int(args['depth']) if depth >= 0: if not isinstance(value, (list, tuple)): path = [path] tmpl = '(+%s_depth:[%d TO %d] AND +%s_parents:%s)' params = keywords['%s_parents' % key] = set() for p in path: base = len(p.split('/')) params.add(tmpl % (key, base + (depth and 1), base + depth, key, p)) del args['depth'] elif key == 'effectiveRange': if isinstance(value, DateTime): steps = getattr(config, 'effective_steps', 1) if steps > 1: value = DateTime(value.timeTime() // steps * steps) value = iso8601date(value) del keywords[key] keywords['effective'] = '[* TO %s]' % value keywords['expires'] = '[%s TO *]' % value elif key == 'show_inactive': del keywords[key] # marker for `effectiveRange` elif 'range' in args: if not isinstance(value, (list, tuple)): value = [value] payload = map(iso8601date, value) keywords[key] = ranges[args['range']] % tuple(payload) del args['range'] elif 'operator' in args: if isinstance(value, (list, tuple)) and len(value) > 1: sep = ' %s ' % args['operator'].upper() value = sep.join(map(str, map(iso8601date, value))) keywords[key] = '(%s)' % value del args['operator'] elif key == 'allowedRolesAndUsers': if getattr(config, 'exclude_user', False): token = 'user$' + getSecurityManager().getUser().getId() if token in value: value.remove(token) elif isinstance(value, DateTime): keywords[key] = iso8601date(value) elif not isinstance(value, basestring): assert not args, 'unsupported usage: %r' % args
def buildQuery(self, default=None, **args): """ helper to build a querystring for simple use-cases """ logger.debug('building query for "%r", %r', default, args) schema = self.getManager().getSchema() or {} defaultSearchField = getattr(schema, 'defaultSearchField', None) args[None] = default query = {} for name, value in args.items(): field = schema.get(name or defaultSearchField, None) if field is None or not field.indexed: logger.warning('dropping unknown search attribute "%s" ' ' (%r) for query: %r', name, value, args) continue if isinstance(value, bool): value = str(value).lower() elif not value: # solr doesn't like empty fields (+foo:"") continue elif field.class_ == 'solr.BoolField': if not isinstance(value, (tuple, list)): value = [value] falses = '0', 'False', MV true = lambda v: bool(v) and v not in falses value = set(map(true, value)) if not len(value) == 1: assert len(value) == 2 # just to make sure continue # skip when "true or false" value = str(value.pop()).lower() elif isinstance(value, (tuple, list)): # list items should be treated as literals, but # nevertheless only get quoted when necessary def quoteitem(term): if isinstance(term, unicode): term = term.encode('utf-8') quoted = quote(term) if not quoted.startswith('"') and not quoted == term: quoted = quote('"' + term + '"') return quoted value = '(%s)' % ' OR '.join(map(quoteitem, value)) elif isinstance(value, set): # sets are taken literally if len(value) == 1: query[name] = ''.join(value) else: query[name] = '(%s)' % ' OR '.join(value) continue elif isinstance(value, basestring): if field.class_ == 'solr.TextField': if isWildCard(value): value = prepare_wildcard(value) value = quote(value, textfield=True) # if we have an intra-word hyphen, we need quotes if '\\-' in value or '\\+' in value: if value[0] != '"': value = '"%s"' % value else: value = quote(value) if not value: # don't search for empty strings, even quoted continue else: logger.info('skipping unsupported value "%r" (%s)', value, name) continue if name is None: if value and value[0] not in '+-': value = '+%s' % value else: value = '+%s:%s' % (name, value) query[name] = value logger.debug('built query "%s"', query) return query
def testQuoting(self): # http://lucene.apache.org/java/2_3_2/queryparsersyntax.html self.assertEqual(quote(""), "") self.assertEqual(quote(" "), "") self.assertEqual(quote("foo"), "foo") self.assertEqual(quote("foo "), "foo") self.assertEqual(quote('"foo"'), '"foo"') self.assertEqual(quote('"foo'), '\\"foo') self.assertEqual(quote('foo"'), 'foo\\"') self.assertEqual(quote("foo bar"), "(foo bar)") self.assertEqual(quote('"foo bar" bah'), '("foo bar" bah)') self.assertEqual(quote("\\["), "\\[") self.assertEqual(quote(")"), "\\)") self.assertEqual(quote('"(foo bar)" bah'), '("\\(foo bar\\)" bah)') self.assertEqual(quote('"(foo\\"bar)" bah'), '("\\(foo\\"bar\\)" bah)') self.assertEqual(quote('"foo bar"'), '"foo bar"') self.assertEqual(quote('"foo bar'), '(\\"foo bar)') self.assertEqual(quote("foo bar what?"), "(foo bar what?)") self.assertEqual(quote("P|This&That"), "P|This&That") self.assertEqual(quote("[]"), "") self.assertEqual(quote("()"), "") self.assertEqual(quote("{}"), "") self.assertEqual(quote('...""'), '...\\"\\"') self.assertEqual(quote("\\"), "\\\\") # Search for \ has to be quoted self.assertEqual(quote("\\?"), "\\?") self.assertEqual(quote("*****@*****.**"), "*****@*****.**") self.assertEqual( quote("http://machine/folder and item and some/path " "and and amilli3*"), "(http\\:\\/\\/machine\\/folder and item and " "some\\/path and and amilli3*)", ) self.assertEqual(quote('"[]"'), '"\\[\\]"') self.assertEqual(quote('"{}"'), '"\\{\\}"') self.assertEqual(quote('"()"'), '"\\(\\)"') self.assertEqual(quote('foo and bar and 42"*'), "(foo and bar and " '42\\"\\*)') # Can't use ? or * as beginning of new query self.assertEqual(quote('"fix and it"*'), '"fix and it"') self.assertEqual(quote('"fix and it"?'), '"fix and it"') self.assertEqual( quote("foo and bar and [foobar at foo.com]*"), "(foo and bar and \\[foobar at foo.com\\])", )