예제 #1
0
def mangleSearchableText(value, config):
    config = config or getConfig()
    pattern = getattr(config, 'search_pattern', u'')
    if pattern:
        pattern = pattern.encode('utf-8')
    levenstein_distance = getattr(config, 'levenshtein_distance', 0)
    value_parts = []
    base_value_parts = []

    if not isSimpleSearch(value):
        return value

    for term in splitSimpleSearch(value):
        (term_value,
         term_base_value) = makeSimpleExpressions(term,
                                                  levenstein_distance)
        value_parts.append(term_value)
        base_value_parts.append(term_base_value)

    base_value = ' '.join(base_value_parts)
    value = ' '.join(value_parts)
    if pattern:
        value = pattern.format(value=quote(value),
                               base_value=base_value)
        return set([value])    # add literal query parameter
    return value
예제 #2
0
def mangleSearchableText(value, config, boost=5):
    """Mangle the searchable text query.
    The pattern configure by Netsight deployments will:

       - Never use `value` directly in the format pattern in *field* values
       - Never directly specifiy a boost value for *field* values
        (instead it's applied here)

    """
    pattern = getattr(config, 'search_pattern', '')
    levenstein_distance = getattr(config, 'levenshtein_distance', 0)
    base_value_parts = []
    boost_value_parts = []

    if not isSimpleSearch(value):
        return value

    # Netsight: We don't use value (wildcarded form of `term_base_value`
    #           As commented above, term_base_value is not enclosed in
    #           parenthesises.
    for term in splitSimpleSearch(value):
        term_base_value = makeSimpleExpressions(term, levenstein_distance)[1]
        base_value_parts.append(term_base_value)
        boost_value = '{}^{:d}'.format(term_base_value, boost)
        boost_value_parts.append(boost_value)

    base_value = ' '.join(base_value_parts)
    boost_value = ' '.join(boost_value_parts)
    # We should always have a pattern
    if pattern:
        value = pattern.format(boost_value=boost_value,
                               base_value=base_value)
        # add literal query parameter
        return set([value])
    return value
예제 #3
0
def mangleSearchableText(value, config):
    config = config or getConfig()
    pattern = getattr(config, 'search_pattern', u'')
    if pattern:
        pattern = pattern.encode('utf-8')
    levenstein_distance = getattr(config, 'levenshtein_distance', 0)
    value_parts = []
    base_value_parts = []

    if not isSimpleSearch(value):
        return value

    for term in splitSimpleSearch(value):
        (term_value,
         term_base_value) = makeSimpleExpressions(term,
                                                  levenstein_distance)
        value_parts.append(term_value)
        base_value_parts.append(term_base_value)

    base_value = ' '.join(base_value_parts)
    value = ' '.join(value_parts)
    if pattern:
        value = pattern.format(value=quote(value),
                               base_value=base_value)
        return set([value])    # add literal query parameter
    return value
예제 #4
0
def mangleSearchableText(value, config):
    pattern = getattr(config, "search_pattern", "")
    levenstein_distance = getattr(config, "levenshtein_distance", 0)
    value_parts = []
    base_value_parts = []

    if not isSimpleSearch(value):
        return value

    for term in splitSimpleSearch(value):
        (term_value, term_base_value) = makeSimpleExpressions(term, levenstein_distance)
        value_parts.append(term_value)
        base_value_parts.append(term_base_value)

    base_value = " ".join(base_value_parts)
    value = " ".join(value_parts)
    if pattern:
        value = pattern.format(value=quote(value), base_value=base_value)
        return set([value])  # add literal query parameter
    return value
예제 #5
0
def mangleSearchableText(value, config, force_complex_search=False):
    config = config or getConfig()
    pattern = getattr(config, "search_pattern", u"")
    force_simple_search = getattr(config, "force_simple_search", False)
    allow_complex_search = getattr(config, "allow_complex_search", False)
    levenstein_distance = getattr(config, "levenshtein_distance", 0)
    prefix_wildcard = getattr(config, "prefix_wildcard", False)
    value_parts = []
    base_value_parts = []

    stripped = value.strip()
    force_complex_search_prefix = False
    if stripped.startswith("solr:"):
        stripped = stripped.replace("solr:", "", 1).strip()
        force_complex_search_prefix = True

    if not force_simple_search and not isSimpleSearch(value):
        return value

    if allow_complex_search and (force_complex_search_prefix or force_complex_search):
        # FIXME: fold in catalog solr_complex_search parameter check
        return stripped

    if force_simple_search:
        value = removeSpecialCharactersAndOperators(value)

    for term in splitSimpleSearch(value):
        (term_value, term_base_value) = makeSimpleExpressions(term, levenstein_distance)
        value_parts.append(term_value)
        base_value_parts.append(term_base_value)

    base_value = " ".join(base_value_parts)
    value = " ".join(value_parts)
    if pattern:
        value = pattern.format(
            value=quote(value, prefix_wildcard=prefix_wildcard), base_value=base_value
        )
        return set([value])  # add literal query parameter
    if pattern:
        pattern = pattern.encode("utf-8")
    return value
예제 #6
0
 def testSimpleSearch(self):
     self.assertTrue(isSimpleSearch('foo'))
     self.assertTrue(isSimpleSearch('foo bar'))
     self.assertTrue(isSimpleSearch('foo bar '))
     self.assertTrue(isSimpleSearch('foo   bar'))
     self.assertTrue(isSimpleSearch(u'føø bär'))
     self.assertTrue(isSimpleSearch('føø bär'))
     self.assertTrue(isSimpleSearch('foo*'))
     self.assertTrue(isSimpleSearch('foo* bar*'))
     self.assertTrue(isSimpleSearch('*foo*'))
     self.assertTrue(isSimpleSearch('"foo"'))
     self.assertTrue(isSimpleSearch('"foo bar"'))
     self.assertTrue(isSimpleSearch('"foo AND bar"'))
     self.assertTrue(isSimpleSearch('foo "AND" bar'))
     self.assertTrue(isSimpleSearch('"foo" "bar"'))
     self.assertTrue(isSimpleSearch('fo?bar'))
     self.assertTrue(isSimpleSearch('foo bar?'))
     self.assertTrue(isSimpleSearch(
         'areallyverylongword '
         'andanotherreallylongwordwithsomecake'
     ))
     self.assertTrue(isSimpleSearch(
         'areallyverylongword '
         'andanotherreallylongwordwithsomecake *'
     ))
     self.assertFalse(isSimpleSearch(''))
     self.assertFalse(isSimpleSearch(u'føø bär!'))
     self.assertFalse(isSimpleSearch(unicode('föö bär', 'latin')))
     self.assertFalse(isSimpleSearch('foo AND bar'))
     self.assertFalse(isSimpleSearch('foo OR bar'))
     self.assertFalse(isSimpleSearch('foo NOT bar'))
     self.assertFalse(isSimpleSearch('"foo" OR bar'))
     self.assertFalse(isSimpleSearch('(foo OR bar)'))
     self.assertFalse(isSimpleSearch('+foo'))
     self.assertFalse(isSimpleSearch('name:foo'))
     self.assertFalse(isSimpleSearch('foo && bar'))
     self.assertFalse(isSimpleSearch('2000'))
     self.assertFalse(isSimpleSearch('foo 2000'))
     self.assertFalse(isSimpleSearch('foo 1/2000'))
     self.assertFalse(isSimpleSearch('foo 42 bar11'))
     self.assertTrue(isSimpleSearch('2000 foo'))
예제 #7
0
def mangleQuery(keywords, config, schema):
    """ translate / mangle query parameters to replace zope specifics
        with equivalent constructs for solr """
    extras = {}
    for key, value in keywords.items():
        if key.endswith('_usage'):          # convert old-style parameters
            category, spec = value.split(':', 1)
            extras[key[:-6]] = {category: spec}
            del keywords[key]
        elif isinstance(value, dict):       # unify dict parameters
            keywords[key] = value['query']
            del value['query']
            extras[key] = value
        elif hasattr(value, 'query'):       # unify object parameters
            keywords[key] = value.query
            extra = dict()
            for arg in query_args:
                arg_val = getattr(value, arg, None)
                if arg_val is not None:
                    extra[arg] = arg_val
            extras[key] = extra
        elif key in ignored:
            del keywords[key]

    # find EPI indexes
    if schema:
        epi_indexes = {}
        for name in schema.keys():
            parts = name.split('_')
            if parts[-1] in ['string', 'depth', 'parents']:
                count = epi_indexes.get(parts[0], 0)
                epi_indexes[parts[0]] = count + 1
        epi_indexes = [k for k, v in epi_indexes.items() if v == 3]
    else:
        epi_indexes = ['path']

    for key, value in keywords.items():
        args = extras.get(key, {})
        if key == 'SearchableText':
            pattern = getattr(config, 'search_pattern', '')
            simple_term = isSimpleTerm(value)
            if pattern and isSimpleSearch(value):
                base_value = value
                if simple_term: # use prefix/wildcard search
                    value = '(%s* OR %s)' % (prepare_wildcard(value), value)
                elif isWildCard(value):
                    value = prepare_wildcard(value)
                    base_value = quote(value.replace('*', '').replace('?', ''))
                # simple queries use custom search pattern
                value = pattern.format(value=quote(value),
                    base_value=base_value)
                keywords[key] = set([value])    # add literal query parameter
                continue
            elif simple_term: # use prefix/wildcard search
                keywords[key] = '(%s* OR %s)' % (
                    prepare_wildcard(value), value)
                continue
        if key in epi_indexes:
            path = keywords['%s_parents' % key] = value
            del keywords[key]
            if 'depth' in args:
                depth = int(args['depth'])
                if depth >= 0:
                    if not isinstance(value, (list, tuple)):
                        path = [path]
                    tmpl = '(+%s_depth:[%d TO %d] AND +%s_parents:%s)'
                    params = keywords['%s_parents' % key] = set()
                    for p in path:
                        base = len(p.split('/'))
                        params.add(tmpl % (key, base + (depth and 1), base + depth, key, p))
                del args['depth']
        elif key == 'effectiveRange':
            if isinstance(value, DateTime):
                steps = getattr(config, 'effective_steps', 1)
                if steps > 1:
                    value = DateTime(value.timeTime() // steps * steps)
                value = iso8601date(value)
            del keywords[key]
            keywords['effective'] = '[* TO %s]' % value
            keywords['expires'] = '[%s TO *]' % value
        elif key == 'show_inactive':
            del keywords[key]           # marker for `effectiveRange`
        elif 'range' in args:
            if not isinstance(value, (list, tuple)):
                value = [value]
            payload = map(iso8601date, value)
            keywords[key] = ranges[args['range']] % tuple(payload)
            del args['range']
        elif 'operator' in args:
            if isinstance(value, (list, tuple)) and len(value) > 1:
                sep = ' %s ' % args['operator'].upper()
                value = sep.join(map(str, map(iso8601date, value)))
                keywords[key] = '(%s)' % value
            del args['operator']
        elif key == 'allowedRolesAndUsers':
            if getattr(config, 'exclude_user', False):
                token = 'user$' + getSecurityManager().getUser().getId()
                if token in value:
                    value.remove(token)
        elif isinstance(value, DateTime):
            keywords[key] = iso8601date(value)
        elif not isinstance(value, basestring):
            assert not args, 'unsupported usage: %r' % args
예제 #8
0
 def testSimpleSearch(self):
     self.assertTrue(isSimpleSearch('foo'))
     self.assertTrue(isSimpleSearch('foo bar'))
     self.assertTrue(isSimpleSearch('foo bar '))
     self.assertTrue(isSimpleSearch('foo   bar'))
     self.assertTrue(isSimpleSearch(u'føø bär'))
     self.assertTrue(isSimpleSearch('føø bär'))
     self.assertTrue(isSimpleSearch('foo*'))
     self.assertTrue(isSimpleSearch('foo* bar*'))
     self.assertTrue(isSimpleSearch('*foo*'))
     self.assertTrue(isSimpleSearch('"foo"'))
     self.assertTrue(isSimpleSearch('"foo bar"'))
     self.assertTrue(isSimpleSearch('"foo AND bar"'))
     self.assertTrue(isSimpleSearch('foo "AND" bar'))
     self.assertTrue(isSimpleSearch('"foo" "bar"'))
     self.assertTrue(isSimpleSearch('fo?bar'))
     self.assertTrue(isSimpleSearch('foo bar?'))
     self.assertTrue(
         isSimpleSearch('areallyverylongword '
                        'andanotherreallylongwordwithsomecake'))
     self.assertTrue(
         isSimpleSearch('areallyverylongword '
                        'andanotherreallylongwordwithsomecake *'))
     self.assertFalse(isSimpleSearch(''))
     self.assertFalse(isSimpleSearch(u'føø bär!'))
     self.assertFalse(isSimpleSearch(unicode('föö bär', 'latin')))
     self.assertFalse(isSimpleSearch('foo AND bar'))
     self.assertFalse(isSimpleSearch('foo OR bar'))
     self.assertFalse(isSimpleSearch('foo NOT bar'))
     self.assertFalse(isSimpleSearch('"foo" OR bar'))
     self.assertFalse(isSimpleSearch('(foo OR bar)'))
     self.assertFalse(isSimpleSearch('+foo'))
     self.assertFalse(isSimpleSearch('name:foo'))
     self.assertFalse(isSimpleSearch('foo && bar'))
     self.assertFalse(isSimpleSearch('2000'))
     self.assertFalse(isSimpleSearch('foo 2000'))
     self.assertFalse(isSimpleSearch('foo 1/2000'))
     self.assertFalse(isSimpleSearch('foo 42 bar11'))
     self.assertTrue(isSimpleSearch('2000 foo'))
예제 #9
0
 def testSimpleSearch(self):
     self.assertTrue(isSimpleSearch("foo"))
     self.assertTrue(isSimpleSearch("foo bar"))
     self.assertTrue(isSimpleSearch("foo bar "))
     self.assertTrue(isSimpleSearch("foo   bar"))
     self.assertTrue(isSimpleSearch(u"føø bär"))
     self.assertTrue(isSimpleSearch("føø bär"))
     self.assertTrue(isSimpleSearch("foo*"))
     self.assertTrue(isSimpleSearch("foo* bar*"))
     self.assertTrue(isSimpleSearch("*foo*"))
     self.assertTrue(isSimpleSearch('"foo"'))
     self.assertTrue(isSimpleSearch('"foo bar"'))
     self.assertTrue(isSimpleSearch('"foo AND bar"'))
     self.assertTrue(isSimpleSearch('foo "AND" bar'))
     self.assertTrue(isSimpleSearch('"foo" "bar"'))
     self.assertTrue(isSimpleSearch("fo?bar"))
     self.assertTrue(isSimpleSearch("foo bar?"))
     self.assertTrue(
         isSimpleSearch(
             "areallyverylongword " "andanotherreallylongwordwithsomecake"
         )
     )
     self.assertTrue(
         isSimpleSearch(
             "areallyverylongword " "andanotherreallylongwordwithsomecake *"
         )
     )
     self.assertFalse(isSimpleSearch(""))
     self.assertFalse(isSimpleSearch(u"føø bär!"))
     # XXX Why would this be false?
     # self.assertFalse(isSimpleSearch(six.text_type('föö bär', 'latin')))
     self.assertFalse(isSimpleSearch("foo AND bar"))
     self.assertFalse(isSimpleSearch("foo OR bar"))
     self.assertFalse(isSimpleSearch("foo NOT bar"))
     self.assertFalse(isSimpleSearch('"foo" OR bar'))
     self.assertFalse(isSimpleSearch("(foo OR bar)"))
     self.assertFalse(isSimpleSearch("+foo"))
     self.assertFalse(isSimpleSearch("name:foo"))
     self.assertFalse(isSimpleSearch("foo && bar"))
     self.assertTrue(isSimpleSearch("2000"))
     self.assertTrue(isSimpleSearch("foo 2000"))
     self.assertFalse(isSimpleSearch("foo 1/2000"))
     self.assertTrue(isSimpleSearch("foo 42 bar11"))
     self.assertTrue(isSimpleSearch("2000 foo"))