예제 #1
0
def makeSimpleExpressions(term, levenstein_distance):
    '''Return a search expression for part of the query that
    includes the levenstein distance and wildcards where appropriate.
    Returns both an expression for "value" and "base_value"'''

    config = getConfig()
    prefix_wildcard = getattr(config, "prefix_wildcard", False)
    prefix_wildcard_str = "*" if prefix_wildcard else ""
    base_value = term
    if levenstein_distance:
        levenstein_expr = "~%s" % levenstein_distance
    else:
        levenstein_expr = ""
    if '"' in term:  # quoted literals
        value = "%s%s" % (term, levenstein_expr)
        base_value = value
    elif isWildCard(term):
        value = prepare_wildcard(term)
        base_value = quote(term.replace("*", "").replace("?", ""))
    else:
        value = "%s%s* OR %s%s" % (
            prefix_wildcard_str,
            prepare_wildcard(term),
            term,
            levenstein_expr,
        )
    return "(%s)" % value, "(%s)" % base_value
예제 #2
0
def makeSimpleExpressions(term, levenstein_distance):
    '''Return a search expression for part of the query that
    includes the levenstein distance and wildcards where appropriate.
    Returns both an expression for "value" and "base_value"'''

    base_value = term
    if levenstein_distance:
        levenstein_expr = '~%s' % levenstein_distance
    else:
        levenstein_expr = ''
    if '"' in term:  # quoted literals
        value = '%s%s' % (term, levenstein_expr)
        base_value = value
    elif isWildCard(term):
        value = prepare_wildcard(term)
        base_value = quote(term.replace('*', '').replace('?', ''))
    else:
        value = '%s* OR %s%s' % (prepare_wildcard(term), term, levenstein_expr)
    return '(%s)' % value, '(%s)' % base_value
예제 #3
0
def makeSimpleExpressions(term, levenstein_distance):
    '''Return a search expression for part of the query that
    includes the levenstein distance and wildcards where appropriate.
    Returns both an expression for "value" and "base_value"'''

    base_value = term
    if levenstein_distance:
        levenstein_expr = "~%s" % levenstein_distance
    else:
        levenstein_expr = ""
    if '"' in term:  # quoted literals
        value = "%s%s" % (term, levenstein_expr)
        base_value = value
    elif isWildCard(term):
        value = prepare_wildcard(term)
        base_value = quote(term.replace("*", "").replace("?", ""))
    else:
        value = "%s* OR %s%s" % (prepare_wildcard(term), term, levenstein_expr)
    return "(%s)" % value, "(%s)" % base_value
예제 #4
0
def makeSimpleExpressions(term, levenstein_distance):
    '''Return a search expression for part of the query that
    includes the levenstein distance and wildcards where appropriate.
    Returns both an expression for "value" and "base_value"'''

    base_value = term
    if levenstein_distance:
        levenstein_expr = '~%s' % levenstein_distance
    else:
        levenstein_expr = ''
    if '"' in term:  # quoted literals
        value = '%s%s' % (term, levenstein_expr)
        base_value = value
    elif isWildCard(term):
        value = prepare_wildcard(term)
        base_value = quote(term.replace('*', '').replace('?', ''))
    else:
        value = '%s* OR %s%s' % (prepare_wildcard(term), term,
                                 levenstein_expr)
    # Netsight: we removed the parenthesis around base_value
    #           the first element of the returned tuple (value) is
    #           not used.
    return '(%s)' % value, base_value
예제 #5
0
 def buildQuery(self, default=None, **args):
     """ helper to build a querystring for simple use-cases """
     logger.debug('building query for "%r", %r', default, args)
     schema = self.getManager().getSchema() or {}
     defaultSearchField = getattr(schema, 'defaultSearchField', None)
     args[None] = default
     query = {}
     for name, value in args.items():
         field = schema.get(name or defaultSearchField, None)
         if field is None or not field.indexed:
             logger.warning('dropping unknown search attribute "%s" '
                 ' (%r) for query: %r', name, value, args)
             continue
         if isinstance(value, bool):
             value = str(value).lower()
         elif not value:     # solr doesn't like empty fields (+foo:"")
             continue
         elif field.class_ == 'solr.BoolField':
             if not isinstance(value, (tuple, list)):
                 value = [value]
             falses = '0', 'False', MV
             true = lambda v: bool(v) and v not in falses
             value = set(map(true, value))
             if not len(value) == 1:
                 assert len(value) == 2      # just to make sure
                 continue                    # skip when "true or false"
             value = str(value.pop()).lower()
         elif isinstance(value, (tuple, list)):
             # list items should be treated as literals, but
             # nevertheless only get quoted when necessary
             def quoteitem(term):
                 if isinstance(term, unicode):
                     term = term.encode('utf-8')
                 quoted = quote(term)
                 if not quoted.startswith('"') and not quoted == term:
                     quoted = quote('"' + term + '"')
                 return quoted
             value = '(%s)' % ' OR '.join(map(quoteitem, value))
         elif isinstance(value, set):        # sets are taken literally
             if len(value) == 1:
                 query[name] = ''.join(value)
             else:
                 query[name] = '(%s)' % ' OR '.join(value)
             continue
         elif isinstance(value, basestring):
             if field.class_ == 'solr.TextField':
                 if isWildCard(value):
                     value = prepare_wildcard(value)
                 value = quote(value, textfield=True)
                 # if we have an intra-word hyphen, we need quotes
                 if '\\-' in value or '\\+' in value:
                     if value[0] != '"':
                         value = '"%s"' % value
             else:
                 value = quote(value)
             if not value:   # don't search for empty strings, even quoted
                 continue
         else:
             logger.info('skipping unsupported value "%r" (%s)',
                 value, name)
             continue
         if name is None:
             if value and value[0] not in '+-':
                 value = '+%s' % value
         else:
             value = '+%s:%s' % (name, value)
         query[name] = value
     logger.debug('built query "%s"', query)
     return query
예제 #6
0
 def testIsWildCard(self):
     self.assertTrue(isWildCard('foo*'))
     self.assertTrue(isWildCard('fo?'))
     self.assertTrue(isWildCard('fo?o'))
     self.assertTrue(isWildCard('fo*oo'))
     self.assertTrue(isWildCard('fo?o*'))
     self.assertTrue(isWildCard('*foo'))
     self.assertTrue(isWildCard('*foo*'))
     self.assertTrue(isWildCard('foo* bar'))
     self.assertTrue(isWildCard('foo bar?'))
     self.assertTrue(isWildCard('*'))
     self.assertTrue(isWildCard('?'))
     self.assertTrue(isWildCard(u'føø*'))
     self.assertTrue(isWildCard(u'føø*'.encode('utf-8')))
     self.assertTrue(isWildCard(u'*føø*'))
     self.assertFalse(isWildCard('foo'))
     self.assertFalse(isWildCard('fo#o'))
     self.assertFalse(isWildCard('foo bar'))
     self.assertFalse(isWildCard(u'føø'))
     self.assertFalse(isWildCard(u'føø'.encode('utf-8')))
     # other characters might be meaningful in solr, but we don't
     # distinguish them properly (yet)
     self.assertFalse(isWildCard('foo#?'))
예제 #7
0
파일: search.py 프로젝트: FHNW/ftw.solr
def buildQuery(self, default=None, **args):
    """ helper to build a querystring for simple use-cases """
    logger.debug('building query for "%r", %r', default, args)
    schema = self.getManager().getSchema() or {}
    defaultSearchField = getattr(schema, 'defaultSearchField', None)
    if default is not None and defaultSearchField is not None:
        args[None] = default
    query = {}
    for name, value in sorted(args.items()):
        field = schema.get(name or defaultSearchField, None)
        if field is None or not field.indexed:
            logger.warning(
                'dropping unknown search attribute "%s" '
                ' (%r) for query: %r', name, value, args)
            continue
        if isinstance(value, bool):
            value = str(value).lower()
        elif not value:  # solr doesn't like empty fields (+foo:"")
            continue
        elif field.class_ == 'solr.BoolField':
            if not isinstance(value, (tuple, list)):
                value = [value]
            falses = '0', 'False', MV
            true = lambda v: bool(v) and v not in falses
            value = set(map(true, value))
            if not len(value) == 1:
                assert len(value) == 2  # just to make sure
                continue  # skip when "true or false"
            value = str(value.pop()).lower()
        elif isinstance(value, (tuple, list)):
            # list items should be treated as literals, but
            # nevertheless only get quoted when necessary
            def quoteitem(term):
                if isinstance(term, unicode):
                    term = term.encode('utf-8')
                quoted = quote(term)
                if not quoted.startswith('"') and not quoted == term:
                    quoted = quote('"' + term + '"')
                return quoted

            value = '(%s)' % ' OR '.join(map(quoteitem, value))
        elif isinstance(value, set):  # sets are taken literally
            if len(value) == 1:
                query[name] = ''.join(value)
            else:
                query[name] = '(%s)' % ' OR '.join(value)
            if '/' in query[name]:
                query[name] = query[name].replace('/', '\\/')
            continue
        elif isinstance(value, basestring):
            if field.class_ == 'solr.TextField':
                if isWildCard(value):
                    value = prepare_wildcard(value)
                value = quote(value, textfield=True)
                # if we have an intra-word hyphen, we need quotes
                if '\\-' in value or '\\+' in value:
                    if value[0] != '"':
                        value = '"%s"' % value
            else:
                value = quote(value)
            if not value:  # don't search for empty strings, even quoted
                continue
        else:
            logger.info('skipping unsupported value "%r" (%s)', value, name)
            continue
        if name is None:
            if value and value[0] not in '+-':
                value = '+%s' % value
        else:
            value = '+%s:%s' % (name, value)

        # Since Solr 4.0 slashes must be escaped
        # see: http://wiki.apache.org/solr/SolrQuerySyntax
        if '/' in value:
            value = value.replace('/', '\\/')

        query[name] = value

    logger.debug('built query "%s"', query)
    return query
예제 #8
0
def mangleQuery(keywords, config, schema):
    """ translate / mangle query parameters to replace zope specifics
        with equivalent constructs for solr """
    extras = {}
    for key, value in keywords.items():
        if key.endswith('_usage'):          # convert old-style parameters
            category, spec = value.split(':', 1)
            extras[key[:-6]] = {category: spec}
            del keywords[key]
        elif isinstance(value, dict):       # unify dict parameters
            keywords[key] = value['query']
            del value['query']
            extras[key] = value
        elif hasattr(value, 'query'):       # unify object parameters
            keywords[key] = value.query
            extra = dict()
            for arg in query_args:
                arg_val = getattr(value, arg, None)
                if arg_val is not None:
                    extra[arg] = arg_val
            extras[key] = extra
        elif key in ignored:
            del keywords[key]

    # find EPI indexes
    if schema:
        epi_indexes = {}
        for name in schema.keys():
            parts = name.split('_')
            if parts[-1] in ['string', 'depth', 'parents']:
                count = epi_indexes.get(parts[0], 0)
                epi_indexes[parts[0]] = count + 1
        epi_indexes = [k for k, v in epi_indexes.items() if v == 3]
    else:
        epi_indexes = ['path']

    for key, value in keywords.items():
        args = extras.get(key, {})
        if key == 'SearchableText':
            pattern = getattr(config, 'search_pattern', '')
            simple_term = isSimpleTerm(value)
            if pattern and isSimpleSearch(value):
                base_value = value
                if simple_term: # use prefix/wildcard search
                    value = '(%s* OR %s)' % (prepare_wildcard(value), value)
                elif isWildCard(value):
                    value = prepare_wildcard(value)
                    base_value = quote(value.replace('*', '').replace('?', ''))
                # simple queries use custom search pattern
                value = pattern.format(value=quote(value),
                    base_value=base_value)
                keywords[key] = set([value])    # add literal query parameter
                continue
            elif simple_term: # use prefix/wildcard search
                keywords[key] = '(%s* OR %s)' % (
                    prepare_wildcard(value), value)
                continue
        if key in epi_indexes:
            path = keywords['%s_parents' % key] = value
            del keywords[key]
            if 'depth' in args:
                depth = int(args['depth'])
                if depth >= 0:
                    if not isinstance(value, (list, tuple)):
                        path = [path]
                    tmpl = '(+%s_depth:[%d TO %d] AND +%s_parents:%s)'
                    params = keywords['%s_parents' % key] = set()
                    for p in path:
                        base = len(p.split('/'))
                        params.add(tmpl % (key, base + (depth and 1), base + depth, key, p))
                del args['depth']
        elif key == 'effectiveRange':
            if isinstance(value, DateTime):
                steps = getattr(config, 'effective_steps', 1)
                if steps > 1:
                    value = DateTime(value.timeTime() // steps * steps)
                value = iso8601date(value)
            del keywords[key]
            keywords['effective'] = '[* TO %s]' % value
            keywords['expires'] = '[%s TO *]' % value
        elif key == 'show_inactive':
            del keywords[key]           # marker for `effectiveRange`
        elif 'range' in args:
            if not isinstance(value, (list, tuple)):
                value = [value]
            payload = map(iso8601date, value)
            keywords[key] = ranges[args['range']] % tuple(payload)
            del args['range']
        elif 'operator' in args:
            if isinstance(value, (list, tuple)) and len(value) > 1:
                sep = ' %s ' % args['operator'].upper()
                value = sep.join(map(str, map(iso8601date, value)))
                keywords[key] = '(%s)' % value
            del args['operator']
        elif key == 'allowedRolesAndUsers':
            if getattr(config, 'exclude_user', False):
                token = 'user$' + getSecurityManager().getUser().getId()
                if token in value:
                    value.remove(token)
        elif isinstance(value, DateTime):
            keywords[key] = iso8601date(value)
        elif not isinstance(value, basestring):
            assert not args, 'unsupported usage: %r' % args
예제 #9
0
    def buildQueryAndParameters(self, default=None, **args):
        """ helper to build a querystring for simple use-cases """
        schema = self.getManager().getSchema() or {}

        params = subtractQueryParameters(args)
        params = cleanupQueryParameters(params, schema)
        config = self.getConfig()

        prepareData(args)
        mangleQuery(args, config, schema)

        logger.debug('building query for "%r", %r', default, args)
        schema = self.getManager().getSchema() or {}
        defaultSearchField = getattr(schema, 'defaultSearchField', None)
        args[None] = default
        query = {}

        for name, value in sorted(args.items()):
            field = schema.get(name or defaultSearchField, None)
            if field is None or not field.indexed:
                logger.info(
                    'dropping unknown search attribute "%s" '
                    ' (%r) for query: %r', name, value, args
                )
                continue
            if isinstance(value, bool):
                value = str(value).lower()
            elif not value:     # solr doesn't like empty fields (+foo:"")
                if not name:
                    continue
                logger.info(
                    'empty search term form "%s:%s", aborting buildQuery' % (
                        name,
                        value
                    )
                )
                return {}, params
            elif field.class_ == 'solr.BoolField':
                if not isinstance(value, (tuple, list)):
                    value = [value]
                falses = '0', 'False', MV
                true = lambda v: bool(v) and v not in falses
                value = set(map(true, value))
                if not len(value) == 1:
                    assert len(value) == 2      # just to make sure
                    continue                    # skip when "true or false"
                value = str(value.pop()).lower()
            elif isinstance(value, (tuple, list)):
                # list items should be treated as literals, but
                # nevertheless only get quoted when necessary
                value = '(%s)' % ' OR '.join(map(quote_iterable_item, value))
            elif isinstance(value, set):        # sets are taken literally
                if len(value) == 1:
                    query[name] = ''.join(value)
                else:
                    query[name] = '(%s)' % ' OR '.join(value)
                continue
            elif isinstance(value, basestring):
                if field.class_ == 'solr.TextField':
                    if isWildCard(value):
                        value = prepare_wildcard(value)
                    value = quote(value, textfield=True)
                    # if we have an intra-word hyphen, we need quotes
                    if '\\-' in value or '\\+' in value:
                        if value[0] != '"':
                            value = '"%s"' % value
                else:
                    value = quote(value)
                if not value:   # don't search for empty strings, even quoted
                    continue
            else:
                logger.info(
                    'skipping unsupported value "%r" (%s)', value, name
                )
                continue
            if name is None:
                if value and value[0] not in '+-':
                    value = '+%s' % value
            else:
                value = '+%s:%s' % (name, value)
            query[name] = value
        logger.debug('built query "%s"', query)

        if query:
            optimizeQueryParameters(query, params)
        return query, params
예제 #10
0
 def testIsWildCard(self):
     self.assertTrue(isWildCard('foo*'))
     self.assertTrue(isWildCard('fo?'))
     self.assertTrue(isWildCard('fo?o'))
     self.assertTrue(isWildCard('fo*oo'))
     self.assertTrue(isWildCard('fo?o*'))
     self.assertTrue(isWildCard('*foo'))
     self.assertTrue(isWildCard('*foo*'))
     self.assertTrue(isWildCard('foo* bar'))
     self.assertTrue(isWildCard('foo bar?'))
     self.assertTrue(isWildCard('*'))
     self.assertTrue(isWildCard('?'))
     self.assertTrue(isWildCard(u'føø*'))
     self.assertTrue(isWildCard(u'føø*'.encode('utf-8')))
     self.assertTrue(isWildCard(u'*føø*'))
     self.assertFalse(isWildCard('foo'))
     self.assertFalse(isWildCard('fo#o'))
     self.assertFalse(isWildCard('foo bar'))
     self.assertFalse(isWildCard(u'føø'))
     self.assertFalse(isWildCard(u'føø'.encode('utf-8')))
     # other characters might be meaningful in solr, but we don't
     # distinguish them properly (yet)
     self.assertFalse(isWildCard('foo#?'))
예제 #11
0
 def testIsWildCard(self):
     self.assertTrue(isWildCard("foo*"))
     self.assertTrue(isWildCard("fo?"))
     self.assertTrue(isWildCard("fo?o"))
     self.assertTrue(isWildCard("fo*oo"))
     self.assertTrue(isWildCard("fo?o*"))
     self.assertTrue(isWildCard("*foo"))
     self.assertTrue(isWildCard("*foo*"))
     self.assertTrue(isWildCard("foo* bar"))
     self.assertTrue(isWildCard("foo bar?"))
     self.assertTrue(isWildCard("*"))
     self.assertTrue(isWildCard("?"))
     self.assertTrue(isWildCard(u"føø*"))
     self.assertTrue(isWildCard(u"føø*".encode("utf-8")))
     self.assertTrue(isWildCard(u"*føø*"))
     self.assertFalse(isWildCard("foo"))
     self.assertFalse(isWildCard("fo#o"))
     self.assertFalse(isWildCard("foo bar"))
     self.assertFalse(isWildCard(u"føø"))
     self.assertFalse(isWildCard(u"føø".encode("utf-8")))
     # other characters might be meaningful in solr, but we don't
     # distinguish them properly (yet)
     self.assertFalse(isWildCard("foo#?"))