def makeSimpleExpressions(term, levenstein_distance): '''Return a search expression for part of the query that includes the levenstein distance and wildcards where appropriate. Returns both an expression for "value" and "base_value"''' config = getConfig() prefix_wildcard = getattr(config, "prefix_wildcard", False) prefix_wildcard_str = "*" if prefix_wildcard else "" base_value = term if levenstein_distance: levenstein_expr = "~%s" % levenstein_distance else: levenstein_expr = "" if '"' in term: # quoted literals value = "%s%s" % (term, levenstein_expr) base_value = value elif isWildCard(term): value = prepare_wildcard(term) base_value = quote(term.replace("*", "").replace("?", "")) else: value = "%s%s* OR %s%s" % ( prefix_wildcard_str, prepare_wildcard(term), term, levenstein_expr, ) return "(%s)" % value, "(%s)" % base_value
def makeSimpleExpressions(term, levenstein_distance): '''Return a search expression for part of the query that includes the levenstein distance and wildcards where appropriate. Returns both an expression for "value" and "base_value"''' base_value = term if levenstein_distance: levenstein_expr = '~%s' % levenstein_distance else: levenstein_expr = '' if '"' in term: # quoted literals value = '%s%s' % (term, levenstein_expr) base_value = value elif isWildCard(term): value = prepare_wildcard(term) base_value = quote(term.replace('*', '').replace('?', '')) else: value = '%s* OR %s%s' % (prepare_wildcard(term), term, levenstein_expr) return '(%s)' % value, '(%s)' % base_value
def makeSimpleExpressions(term, levenstein_distance): '''Return a search expression for part of the query that includes the levenstein distance and wildcards where appropriate. Returns both an expression for "value" and "base_value"''' base_value = term if levenstein_distance: levenstein_expr = "~%s" % levenstein_distance else: levenstein_expr = "" if '"' in term: # quoted literals value = "%s%s" % (term, levenstein_expr) base_value = value elif isWildCard(term): value = prepare_wildcard(term) base_value = quote(term.replace("*", "").replace("?", "")) else: value = "%s* OR %s%s" % (prepare_wildcard(term), term, levenstein_expr) return "(%s)" % value, "(%s)" % base_value
def makeSimpleExpressions(term, levenstein_distance): '''Return a search expression for part of the query that includes the levenstein distance and wildcards where appropriate. Returns both an expression for "value" and "base_value"''' base_value = term if levenstein_distance: levenstein_expr = '~%s' % levenstein_distance else: levenstein_expr = '' if '"' in term: # quoted literals value = '%s%s' % (term, levenstein_expr) base_value = value elif isWildCard(term): value = prepare_wildcard(term) base_value = quote(term.replace('*', '').replace('?', '')) else: value = '%s* OR %s%s' % (prepare_wildcard(term), term, levenstein_expr) # Netsight: we removed the parenthesis around base_value # the first element of the returned tuple (value) is # not used. return '(%s)' % value, base_value
def buildQuery(self, default=None, **args): """ helper to build a querystring for simple use-cases """ logger.debug('building query for "%r", %r', default, args) schema = self.getManager().getSchema() or {} defaultSearchField = getattr(schema, 'defaultSearchField', None) args[None] = default query = {} for name, value in args.items(): field = schema.get(name or defaultSearchField, None) if field is None or not field.indexed: logger.warning('dropping unknown search attribute "%s" ' ' (%r) for query: %r', name, value, args) continue if isinstance(value, bool): value = str(value).lower() elif not value: # solr doesn't like empty fields (+foo:"") continue elif field.class_ == 'solr.BoolField': if not isinstance(value, (tuple, list)): value = [value] falses = '0', 'False', MV true = lambda v: bool(v) and v not in falses value = set(map(true, value)) if not len(value) == 1: assert len(value) == 2 # just to make sure continue # skip when "true or false" value = str(value.pop()).lower() elif isinstance(value, (tuple, list)): # list items should be treated as literals, but # nevertheless only get quoted when necessary def quoteitem(term): if isinstance(term, unicode): term = term.encode('utf-8') quoted = quote(term) if not quoted.startswith('"') and not quoted == term: quoted = quote('"' + term + '"') return quoted value = '(%s)' % ' OR '.join(map(quoteitem, value)) elif isinstance(value, set): # sets are taken literally if len(value) == 1: query[name] = ''.join(value) else: query[name] = '(%s)' % ' OR '.join(value) continue elif isinstance(value, basestring): if field.class_ == 'solr.TextField': if isWildCard(value): value = prepare_wildcard(value) value = quote(value, textfield=True) # if we have an intra-word hyphen, we need quotes if '\\-' in value or '\\+' in value: if value[0] != '"': value = '"%s"' % value else: value = quote(value) if not value: # don't search for empty strings, even quoted continue else: logger.info('skipping unsupported value "%r" (%s)', value, name) continue if name is None: if value and value[0] not in '+-': value = '+%s' % value else: value = '+%s:%s' % (name, value) query[name] = value logger.debug('built query "%s"', query) return query
def testIsWildCard(self): self.assertTrue(isWildCard('foo*')) self.assertTrue(isWildCard('fo?')) self.assertTrue(isWildCard('fo?o')) self.assertTrue(isWildCard('fo*oo')) self.assertTrue(isWildCard('fo?o*')) self.assertTrue(isWildCard('*foo')) self.assertTrue(isWildCard('*foo*')) self.assertTrue(isWildCard('foo* bar')) self.assertTrue(isWildCard('foo bar?')) self.assertTrue(isWildCard('*')) self.assertTrue(isWildCard('?')) self.assertTrue(isWildCard(u'føø*')) self.assertTrue(isWildCard(u'føø*'.encode('utf-8'))) self.assertTrue(isWildCard(u'*føø*')) self.assertFalse(isWildCard('foo')) self.assertFalse(isWildCard('fo#o')) self.assertFalse(isWildCard('foo bar')) self.assertFalse(isWildCard(u'føø')) self.assertFalse(isWildCard(u'føø'.encode('utf-8'))) # other characters might be meaningful in solr, but we don't # distinguish them properly (yet) self.assertFalse(isWildCard('foo#?'))
def buildQuery(self, default=None, **args): """ helper to build a querystring for simple use-cases """ logger.debug('building query for "%r", %r', default, args) schema = self.getManager().getSchema() or {} defaultSearchField = getattr(schema, 'defaultSearchField', None) if default is not None and defaultSearchField is not None: args[None] = default query = {} for name, value in sorted(args.items()): field = schema.get(name or defaultSearchField, None) if field is None or not field.indexed: logger.warning( 'dropping unknown search attribute "%s" ' ' (%r) for query: %r', name, value, args) continue if isinstance(value, bool): value = str(value).lower() elif not value: # solr doesn't like empty fields (+foo:"") continue elif field.class_ == 'solr.BoolField': if not isinstance(value, (tuple, list)): value = [value] falses = '0', 'False', MV true = lambda v: bool(v) and v not in falses value = set(map(true, value)) if not len(value) == 1: assert len(value) == 2 # just to make sure continue # skip when "true or false" value = str(value.pop()).lower() elif isinstance(value, (tuple, list)): # list items should be treated as literals, but # nevertheless only get quoted when necessary def quoteitem(term): if isinstance(term, unicode): term = term.encode('utf-8') quoted = quote(term) if not quoted.startswith('"') and not quoted == term: quoted = quote('"' + term + '"') return quoted value = '(%s)' % ' OR '.join(map(quoteitem, value)) elif isinstance(value, set): # sets are taken literally if len(value) == 1: query[name] = ''.join(value) else: query[name] = '(%s)' % ' OR '.join(value) if '/' in query[name]: query[name] = query[name].replace('/', '\\/') continue elif isinstance(value, basestring): if field.class_ == 'solr.TextField': if isWildCard(value): value = prepare_wildcard(value) value = quote(value, textfield=True) # if we have an intra-word hyphen, we need quotes if '\\-' in value or '\\+' in value: if value[0] != '"': value = '"%s"' % value else: value = quote(value) if not value: # don't search for empty strings, even quoted continue else: logger.info('skipping unsupported value "%r" (%s)', value, name) continue if name is None: if value and value[0] not in '+-': value = '+%s' % value else: value = '+%s:%s' % (name, value) # Since Solr 4.0 slashes must be escaped # see: http://wiki.apache.org/solr/SolrQuerySyntax if '/' in value: value = value.replace('/', '\\/') query[name] = value logger.debug('built query "%s"', query) return query
def mangleQuery(keywords, config, schema): """ translate / mangle query parameters to replace zope specifics with equivalent constructs for solr """ extras = {} for key, value in keywords.items(): if key.endswith('_usage'): # convert old-style parameters category, spec = value.split(':', 1) extras[key[:-6]] = {category: spec} del keywords[key] elif isinstance(value, dict): # unify dict parameters keywords[key] = value['query'] del value['query'] extras[key] = value elif hasattr(value, 'query'): # unify object parameters keywords[key] = value.query extra = dict() for arg in query_args: arg_val = getattr(value, arg, None) if arg_val is not None: extra[arg] = arg_val extras[key] = extra elif key in ignored: del keywords[key] # find EPI indexes if schema: epi_indexes = {} for name in schema.keys(): parts = name.split('_') if parts[-1] in ['string', 'depth', 'parents']: count = epi_indexes.get(parts[0], 0) epi_indexes[parts[0]] = count + 1 epi_indexes = [k for k, v in epi_indexes.items() if v == 3] else: epi_indexes = ['path'] for key, value in keywords.items(): args = extras.get(key, {}) if key == 'SearchableText': pattern = getattr(config, 'search_pattern', '') simple_term = isSimpleTerm(value) if pattern and isSimpleSearch(value): base_value = value if simple_term: # use prefix/wildcard search value = '(%s* OR %s)' % (prepare_wildcard(value), value) elif isWildCard(value): value = prepare_wildcard(value) base_value = quote(value.replace('*', '').replace('?', '')) # simple queries use custom search pattern value = pattern.format(value=quote(value), base_value=base_value) keywords[key] = set([value]) # add literal query parameter continue elif simple_term: # use prefix/wildcard search keywords[key] = '(%s* OR %s)' % ( prepare_wildcard(value), value) continue if key in epi_indexes: path = keywords['%s_parents' % key] = value del keywords[key] if 'depth' in args: depth = int(args['depth']) if depth >= 0: if not isinstance(value, (list, tuple)): path = [path] tmpl = '(+%s_depth:[%d TO %d] AND +%s_parents:%s)' params = keywords['%s_parents' % key] = set() for p in path: base = len(p.split('/')) params.add(tmpl % (key, base + (depth and 1), base + depth, key, p)) del args['depth'] elif key == 'effectiveRange': if isinstance(value, DateTime): steps = getattr(config, 'effective_steps', 1) if steps > 1: value = DateTime(value.timeTime() // steps * steps) value = iso8601date(value) del keywords[key] keywords['effective'] = '[* TO %s]' % value keywords['expires'] = '[%s TO *]' % value elif key == 'show_inactive': del keywords[key] # marker for `effectiveRange` elif 'range' in args: if not isinstance(value, (list, tuple)): value = [value] payload = map(iso8601date, value) keywords[key] = ranges[args['range']] % tuple(payload) del args['range'] elif 'operator' in args: if isinstance(value, (list, tuple)) and len(value) > 1: sep = ' %s ' % args['operator'].upper() value = sep.join(map(str, map(iso8601date, value))) keywords[key] = '(%s)' % value del args['operator'] elif key == 'allowedRolesAndUsers': if getattr(config, 'exclude_user', False): token = 'user$' + getSecurityManager().getUser().getId() if token in value: value.remove(token) elif isinstance(value, DateTime): keywords[key] = iso8601date(value) elif not isinstance(value, basestring): assert not args, 'unsupported usage: %r' % args
def buildQueryAndParameters(self, default=None, **args): """ helper to build a querystring for simple use-cases """ schema = self.getManager().getSchema() or {} params = subtractQueryParameters(args) params = cleanupQueryParameters(params, schema) config = self.getConfig() prepareData(args) mangleQuery(args, config, schema) logger.debug('building query for "%r", %r', default, args) schema = self.getManager().getSchema() or {} defaultSearchField = getattr(schema, 'defaultSearchField', None) args[None] = default query = {} for name, value in sorted(args.items()): field = schema.get(name or defaultSearchField, None) if field is None or not field.indexed: logger.info( 'dropping unknown search attribute "%s" ' ' (%r) for query: %r', name, value, args ) continue if isinstance(value, bool): value = str(value).lower() elif not value: # solr doesn't like empty fields (+foo:"") if not name: continue logger.info( 'empty search term form "%s:%s", aborting buildQuery' % ( name, value ) ) return {}, params elif field.class_ == 'solr.BoolField': if not isinstance(value, (tuple, list)): value = [value] falses = '0', 'False', MV true = lambda v: bool(v) and v not in falses value = set(map(true, value)) if not len(value) == 1: assert len(value) == 2 # just to make sure continue # skip when "true or false" value = str(value.pop()).lower() elif isinstance(value, (tuple, list)): # list items should be treated as literals, but # nevertheless only get quoted when necessary value = '(%s)' % ' OR '.join(map(quote_iterable_item, value)) elif isinstance(value, set): # sets are taken literally if len(value) == 1: query[name] = ''.join(value) else: query[name] = '(%s)' % ' OR '.join(value) continue elif isinstance(value, basestring): if field.class_ == 'solr.TextField': if isWildCard(value): value = prepare_wildcard(value) value = quote(value, textfield=True) # if we have an intra-word hyphen, we need quotes if '\\-' in value or '\\+' in value: if value[0] != '"': value = '"%s"' % value else: value = quote(value) if not value: # don't search for empty strings, even quoted continue else: logger.info( 'skipping unsupported value "%r" (%s)', value, name ) continue if name is None: if value and value[0] not in '+-': value = '+%s' % value else: value = '+%s:%s' % (name, value) query[name] = value logger.debug('built query "%s"', query) if query: optimizeQueryParameters(query, params) return query, params
def testIsWildCard(self): self.assertTrue(isWildCard("foo*")) self.assertTrue(isWildCard("fo?")) self.assertTrue(isWildCard("fo?o")) self.assertTrue(isWildCard("fo*oo")) self.assertTrue(isWildCard("fo?o*")) self.assertTrue(isWildCard("*foo")) self.assertTrue(isWildCard("*foo*")) self.assertTrue(isWildCard("foo* bar")) self.assertTrue(isWildCard("foo bar?")) self.assertTrue(isWildCard("*")) self.assertTrue(isWildCard("?")) self.assertTrue(isWildCard(u"føø*")) self.assertTrue(isWildCard(u"føø*".encode("utf-8"))) self.assertTrue(isWildCard(u"*føø*")) self.assertFalse(isWildCard("foo")) self.assertFalse(isWildCard("fo#o")) self.assertFalse(isWildCard("foo bar")) self.assertFalse(isWildCard(u"føø")) self.assertFalse(isWildCard(u"føø".encode("utf-8"))) # other characters might be meaningful in solr, but we don't # distinguish them properly (yet) self.assertFalse(isWildCard("foo#?"))