def testSplitSimpleSearch(self): self.assertEqual(splitSimpleSearch("foo bar"), ["foo", "bar"]) self.assertEqual( splitSimpleSearch('foo "bar foobar" baz'), ["foo", '"bar foobar"', "baz"] ) self.assertRaises(AssertionError, splitSimpleSearch, "foo AND bar") self.assertEqual(splitSimpleSearch("foo 42"), ["foo", "42"])
def mangleSearchableText(value, config, boost=5): """Mangle the searchable text query. The pattern configure by Netsight deployments will: - Never use `value` directly in the format pattern in *field* values - Never directly specifiy a boost value for *field* values (instead it's applied here) """ pattern = getattr(config, 'search_pattern', '') levenstein_distance = getattr(config, 'levenshtein_distance', 0) base_value_parts = [] boost_value_parts = [] if not isSimpleSearch(value): return value # Netsight: We don't use value (wildcarded form of `term_base_value` # As commented above, term_base_value is not enclosed in # parenthesises. for term in splitSimpleSearch(value): term_base_value = makeSimpleExpressions(term, levenstein_distance)[1] base_value_parts.append(term_base_value) boost_value = '{}^{:d}'.format(term_base_value, boost) boost_value_parts.append(boost_value) base_value = ' '.join(base_value_parts) boost_value = ' '.join(boost_value_parts) # We should always have a pattern if pattern: value = pattern.format(boost_value=boost_value, base_value=base_value) # add literal query parameter return set([value]) return value
def mangleSearchableText(value, config): config = config or getConfig() pattern = getattr(config, 'search_pattern', u'') if pattern: pattern = pattern.encode('utf-8') levenstein_distance = getattr(config, 'levenshtein_distance', 0) value_parts = [] base_value_parts = [] if not isSimpleSearch(value): return value for term in splitSimpleSearch(value): (term_value, term_base_value) = makeSimpleExpressions(term, levenstein_distance) value_parts.append(term_value) base_value_parts.append(term_base_value) base_value = ' '.join(base_value_parts) value = ' '.join(value_parts) if pattern: value = pattern.format(value=quote(value), base_value=base_value) return set([value]) # add literal query parameter return value
def mangleSearchableText(value, config): pattern = getattr(config, "search_pattern", "") levenstein_distance = getattr(config, "levenshtein_distance", 0) value_parts = [] base_value_parts = [] if not isSimpleSearch(value): return value for term in splitSimpleSearch(value): (term_value, term_base_value) = makeSimpleExpressions(term, levenstein_distance) value_parts.append(term_value) base_value_parts.append(term_base_value) base_value = " ".join(base_value_parts) value = " ".join(value_parts) if pattern: value = pattern.format(value=quote(value), base_value=base_value) return set([value]) # add literal query parameter return value
def mangleSearchableText(value, config, force_complex_search=False): config = config or getConfig() pattern = getattr(config, "search_pattern", u"") force_simple_search = getattr(config, "force_simple_search", False) allow_complex_search = getattr(config, "allow_complex_search", False) levenstein_distance = getattr(config, "levenshtein_distance", 0) prefix_wildcard = getattr(config, "prefix_wildcard", False) value_parts = [] base_value_parts = [] stripped = value.strip() force_complex_search_prefix = False if stripped.startswith("solr:"): stripped = stripped.replace("solr:", "", 1).strip() force_complex_search_prefix = True if not force_simple_search and not isSimpleSearch(value): return value if allow_complex_search and (force_complex_search_prefix or force_complex_search): # FIXME: fold in catalog solr_complex_search parameter check return stripped if force_simple_search: value = removeSpecialCharactersAndOperators(value) for term in splitSimpleSearch(value): (term_value, term_base_value) = makeSimpleExpressions(term, levenstein_distance) value_parts.append(term_value) base_value_parts.append(term_base_value) base_value = " ".join(base_value_parts) value = " ".join(value_parts) if pattern: value = pattern.format( value=quote(value, prefix_wildcard=prefix_wildcard), base_value=base_value ) return set([value]) # add literal query parameter if pattern: pattern = pattern.encode("utf-8") return value
def testSplitSimpleSearch(self): self.assertEqual(splitSimpleSearch('foo bar'), ['foo', 'bar']) self.assertEqual(splitSimpleSearch('foo "bar foobar" baz'), ['foo', '"bar foobar"', 'baz']) self.assertRaises(AssertionError, splitSimpleSearch, 'foo AND bar') self.assertRaises(AssertionError, splitSimpleSearch, 'foo42')