def mangleSearchableText(value, config): config = config or getConfig() pattern = getattr(config, 'search_pattern', u'') if pattern: pattern = pattern.encode('utf-8') levenstein_distance = getattr(config, 'levenshtein_distance', 0) value_parts = [] base_value_parts = [] if not isSimpleSearch(value): return value for term in splitSimpleSearch(value): (term_value, term_base_value) = makeSimpleExpressions(term, levenstein_distance) value_parts.append(term_value) base_value_parts.append(term_base_value) base_value = ' '.join(base_value_parts) value = ' '.join(value_parts) if pattern: value = pattern.format(value=quote(value), base_value=base_value) return set([value]) # add literal query parameter return value
def mangleSearchableText(value, config, boost=5): """Mangle the searchable text query. The pattern configure by Netsight deployments will: - Never use `value` directly in the format pattern in *field* values - Never directly specifiy a boost value for *field* values (instead it's applied here) """ pattern = getattr(config, 'search_pattern', '') levenstein_distance = getattr(config, 'levenshtein_distance', 0) base_value_parts = [] boost_value_parts = [] if not isSimpleSearch(value): return value # Netsight: We don't use value (wildcarded form of `term_base_value` # As commented above, term_base_value is not enclosed in # parenthesises. for term in splitSimpleSearch(value): term_base_value = makeSimpleExpressions(term, levenstein_distance)[1] base_value_parts.append(term_base_value) boost_value = '{}^{:d}'.format(term_base_value, boost) boost_value_parts.append(boost_value) base_value = ' '.join(base_value_parts) boost_value = ' '.join(boost_value_parts) # We should always have a pattern if pattern: value = pattern.format(boost_value=boost_value, base_value=base_value) # add literal query parameter return set([value]) return value
def mangleSearchableText(value, config): pattern = getattr(config, "search_pattern", "") levenstein_distance = getattr(config, "levenshtein_distance", 0) value_parts = [] base_value_parts = [] if not isSimpleSearch(value): return value for term in splitSimpleSearch(value): (term_value, term_base_value) = makeSimpleExpressions(term, levenstein_distance) value_parts.append(term_value) base_value_parts.append(term_base_value) base_value = " ".join(base_value_parts) value = " ".join(value_parts) if pattern: value = pattern.format(value=quote(value), base_value=base_value) return set([value]) # add literal query parameter return value
def mangleSearchableText(value, config, force_complex_search=False): config = config or getConfig() pattern = getattr(config, "search_pattern", u"") force_simple_search = getattr(config, "force_simple_search", False) allow_complex_search = getattr(config, "allow_complex_search", False) levenstein_distance = getattr(config, "levenshtein_distance", 0) prefix_wildcard = getattr(config, "prefix_wildcard", False) value_parts = [] base_value_parts = [] stripped = value.strip() force_complex_search_prefix = False if stripped.startswith("solr:"): stripped = stripped.replace("solr:", "", 1).strip() force_complex_search_prefix = True if not force_simple_search and not isSimpleSearch(value): return value if allow_complex_search and (force_complex_search_prefix or force_complex_search): # FIXME: fold in catalog solr_complex_search parameter check return stripped if force_simple_search: value = removeSpecialCharactersAndOperators(value) for term in splitSimpleSearch(value): (term_value, term_base_value) = makeSimpleExpressions(term, levenstein_distance) value_parts.append(term_value) base_value_parts.append(term_base_value) base_value = " ".join(base_value_parts) value = " ".join(value_parts) if pattern: value = pattern.format( value=quote(value, prefix_wildcard=prefix_wildcard), base_value=base_value ) return set([value]) # add literal query parameter if pattern: pattern = pattern.encode("utf-8") return value
def testSimpleSearch(self): self.assertTrue(isSimpleSearch('foo')) self.assertTrue(isSimpleSearch('foo bar')) self.assertTrue(isSimpleSearch('foo bar ')) self.assertTrue(isSimpleSearch('foo bar')) self.assertTrue(isSimpleSearch(u'føø bär')) self.assertTrue(isSimpleSearch('føø bär')) self.assertTrue(isSimpleSearch('foo*')) self.assertTrue(isSimpleSearch('foo* bar*')) self.assertTrue(isSimpleSearch('*foo*')) self.assertTrue(isSimpleSearch('"foo"')) self.assertTrue(isSimpleSearch('"foo bar"')) self.assertTrue(isSimpleSearch('"foo AND bar"')) self.assertTrue(isSimpleSearch('foo "AND" bar')) self.assertTrue(isSimpleSearch('"foo" "bar"')) self.assertTrue(isSimpleSearch('fo?bar')) self.assertTrue(isSimpleSearch('foo bar?')) self.assertTrue(isSimpleSearch( 'areallyverylongword ' 'andanotherreallylongwordwithsomecake' )) self.assertTrue(isSimpleSearch( 'areallyverylongword ' 'andanotherreallylongwordwithsomecake *' )) self.assertFalse(isSimpleSearch('')) self.assertFalse(isSimpleSearch(u'føø bär!')) self.assertFalse(isSimpleSearch(unicode('föö bär', 'latin'))) self.assertFalse(isSimpleSearch('foo AND bar')) self.assertFalse(isSimpleSearch('foo OR bar')) self.assertFalse(isSimpleSearch('foo NOT bar')) self.assertFalse(isSimpleSearch('"foo" OR bar')) self.assertFalse(isSimpleSearch('(foo OR bar)')) self.assertFalse(isSimpleSearch('+foo')) self.assertFalse(isSimpleSearch('name:foo')) self.assertFalse(isSimpleSearch('foo && bar')) self.assertFalse(isSimpleSearch('2000')) self.assertFalse(isSimpleSearch('foo 2000')) self.assertFalse(isSimpleSearch('foo 1/2000')) self.assertFalse(isSimpleSearch('foo 42 bar11')) self.assertTrue(isSimpleSearch('2000 foo'))
def mangleQuery(keywords, config, schema): """ translate / mangle query parameters to replace zope specifics with equivalent constructs for solr """ extras = {} for key, value in keywords.items(): if key.endswith('_usage'): # convert old-style parameters category, spec = value.split(':', 1) extras[key[:-6]] = {category: spec} del keywords[key] elif isinstance(value, dict): # unify dict parameters keywords[key] = value['query'] del value['query'] extras[key] = value elif hasattr(value, 'query'): # unify object parameters keywords[key] = value.query extra = dict() for arg in query_args: arg_val = getattr(value, arg, None) if arg_val is not None: extra[arg] = arg_val extras[key] = extra elif key in ignored: del keywords[key] # find EPI indexes if schema: epi_indexes = {} for name in schema.keys(): parts = name.split('_') if parts[-1] in ['string', 'depth', 'parents']: count = epi_indexes.get(parts[0], 0) epi_indexes[parts[0]] = count + 1 epi_indexes = [k for k, v in epi_indexes.items() if v == 3] else: epi_indexes = ['path'] for key, value in keywords.items(): args = extras.get(key, {}) if key == 'SearchableText': pattern = getattr(config, 'search_pattern', '') simple_term = isSimpleTerm(value) if pattern and isSimpleSearch(value): base_value = value if simple_term: # use prefix/wildcard search value = '(%s* OR %s)' % (prepare_wildcard(value), value) elif isWildCard(value): value = prepare_wildcard(value) base_value = quote(value.replace('*', '').replace('?', '')) # simple queries use custom search pattern value = pattern.format(value=quote(value), base_value=base_value) keywords[key] = set([value]) # add literal query parameter continue elif simple_term: # use prefix/wildcard search keywords[key] = '(%s* OR %s)' % ( prepare_wildcard(value), value) continue if key in epi_indexes: path = keywords['%s_parents' % key] = value del keywords[key] if 'depth' in args: depth = int(args['depth']) if depth >= 0: if not isinstance(value, (list, tuple)): path = [path] tmpl = '(+%s_depth:[%d TO %d] AND +%s_parents:%s)' params = keywords['%s_parents' % key] = set() for p in path: base = len(p.split('/')) params.add(tmpl % (key, base + (depth and 1), base + depth, key, p)) del args['depth'] elif key == 'effectiveRange': if isinstance(value, DateTime): steps = getattr(config, 'effective_steps', 1) if steps > 1: value = DateTime(value.timeTime() // steps * steps) value = iso8601date(value) del keywords[key] keywords['effective'] = '[* TO %s]' % value keywords['expires'] = '[%s TO *]' % value elif key == 'show_inactive': del keywords[key] # marker for `effectiveRange` elif 'range' in args: if not isinstance(value, (list, tuple)): value = [value] payload = map(iso8601date, value) keywords[key] = ranges[args['range']] % tuple(payload) del args['range'] elif 'operator' in args: if isinstance(value, (list, tuple)) and len(value) > 1: sep = ' %s ' % args['operator'].upper() value = sep.join(map(str, map(iso8601date, value))) keywords[key] = '(%s)' % value del args['operator'] elif key == 'allowedRolesAndUsers': if getattr(config, 'exclude_user', False): token = 'user$' + getSecurityManager().getUser().getId() if token in value: value.remove(token) elif isinstance(value, DateTime): keywords[key] = iso8601date(value) elif not isinstance(value, basestring): assert not args, 'unsupported usage: %r' % args
def testSimpleSearch(self): self.assertTrue(isSimpleSearch('foo')) self.assertTrue(isSimpleSearch('foo bar')) self.assertTrue(isSimpleSearch('foo bar ')) self.assertTrue(isSimpleSearch('foo bar')) self.assertTrue(isSimpleSearch(u'føø bär')) self.assertTrue(isSimpleSearch('føø bär')) self.assertTrue(isSimpleSearch('foo*')) self.assertTrue(isSimpleSearch('foo* bar*')) self.assertTrue(isSimpleSearch('*foo*')) self.assertTrue(isSimpleSearch('"foo"')) self.assertTrue(isSimpleSearch('"foo bar"')) self.assertTrue(isSimpleSearch('"foo AND bar"')) self.assertTrue(isSimpleSearch('foo "AND" bar')) self.assertTrue(isSimpleSearch('"foo" "bar"')) self.assertTrue(isSimpleSearch('fo?bar')) self.assertTrue(isSimpleSearch('foo bar?')) self.assertTrue( isSimpleSearch('areallyverylongword ' 'andanotherreallylongwordwithsomecake')) self.assertTrue( isSimpleSearch('areallyverylongword ' 'andanotherreallylongwordwithsomecake *')) self.assertFalse(isSimpleSearch('')) self.assertFalse(isSimpleSearch(u'føø bär!')) self.assertFalse(isSimpleSearch(unicode('föö bär', 'latin'))) self.assertFalse(isSimpleSearch('foo AND bar')) self.assertFalse(isSimpleSearch('foo OR bar')) self.assertFalse(isSimpleSearch('foo NOT bar')) self.assertFalse(isSimpleSearch('"foo" OR bar')) self.assertFalse(isSimpleSearch('(foo OR bar)')) self.assertFalse(isSimpleSearch('+foo')) self.assertFalse(isSimpleSearch('name:foo')) self.assertFalse(isSimpleSearch('foo && bar')) self.assertFalse(isSimpleSearch('2000')) self.assertFalse(isSimpleSearch('foo 2000')) self.assertFalse(isSimpleSearch('foo 1/2000')) self.assertFalse(isSimpleSearch('foo 42 bar11')) self.assertTrue(isSimpleSearch('2000 foo'))
def testSimpleSearch(self): self.assertTrue(isSimpleSearch("foo")) self.assertTrue(isSimpleSearch("foo bar")) self.assertTrue(isSimpleSearch("foo bar ")) self.assertTrue(isSimpleSearch("foo bar")) self.assertTrue(isSimpleSearch(u"føø bär")) self.assertTrue(isSimpleSearch("føø bär")) self.assertTrue(isSimpleSearch("foo*")) self.assertTrue(isSimpleSearch("foo* bar*")) self.assertTrue(isSimpleSearch("*foo*")) self.assertTrue(isSimpleSearch('"foo"')) self.assertTrue(isSimpleSearch('"foo bar"')) self.assertTrue(isSimpleSearch('"foo AND bar"')) self.assertTrue(isSimpleSearch('foo "AND" bar')) self.assertTrue(isSimpleSearch('"foo" "bar"')) self.assertTrue(isSimpleSearch("fo?bar")) self.assertTrue(isSimpleSearch("foo bar?")) self.assertTrue( isSimpleSearch( "areallyverylongword " "andanotherreallylongwordwithsomecake" ) ) self.assertTrue( isSimpleSearch( "areallyverylongword " "andanotherreallylongwordwithsomecake *" ) ) self.assertFalse(isSimpleSearch("")) self.assertFalse(isSimpleSearch(u"føø bär!")) # XXX Why would this be false? # self.assertFalse(isSimpleSearch(six.text_type('föö bär', 'latin'))) self.assertFalse(isSimpleSearch("foo AND bar")) self.assertFalse(isSimpleSearch("foo OR bar")) self.assertFalse(isSimpleSearch("foo NOT bar")) self.assertFalse(isSimpleSearch('"foo" OR bar')) self.assertFalse(isSimpleSearch("(foo OR bar)")) self.assertFalse(isSimpleSearch("+foo")) self.assertFalse(isSimpleSearch("name:foo")) self.assertFalse(isSimpleSearch("foo && bar")) self.assertTrue(isSimpleSearch("2000")) self.assertTrue(isSimpleSearch("foo 2000")) self.assertFalse(isSimpleSearch("foo 1/2000")) self.assertTrue(isSimpleSearch("foo 42 bar11")) self.assertTrue(isSimpleSearch("2000 foo"))