Esempio n. 1
0
def mangleSearchableText(value, config):
    config = config or getConfig()
    pattern = getattr(config, 'search_pattern', u'')
    if pattern:
        pattern = pattern.encode('utf-8')
    levenstein_distance = getattr(config, 'levenshtein_distance', 0)
    value_parts = []
    base_value_parts = []

    if not isSimpleSearch(value):
        return value

    for term in splitSimpleSearch(value):
        (term_value,
         term_base_value) = makeSimpleExpressions(term,
                                                  levenstein_distance)
        value_parts.append(term_value)
        base_value_parts.append(term_base_value)

    base_value = ' '.join(base_value_parts)
    value = ' '.join(value_parts)
    if pattern:
        value = pattern.format(value=quote(value),
                               base_value=base_value)
        return set([value])    # add literal query parameter
    return value
Esempio n. 2
0
    def add(self, boost_values=None, atomic_updates=True, **fields):
        solr_config = getConfig()
        atomic_updates_enabled = getattr(solr_config,
                                         'atomic_updates',
                                         atomic_updates)

        schema = self.get_schema()
        uniqueKey = schema.get('uniqueKey', None)
        if uniqueKey is None:
            raise Exception("Could not get uniqueKey from Solr schema")

        if uniqueKey not in fields:
            logger.warn("uniqueKey '%s' missing for item %s, skipping" %
                        (uniqueKey, fields))
            return

        within = fields.pop('commitWithin', None)
        if within:
            lst = ['<add commitWithin="%s">' % str(within)]
        else:
            lst = ['<add>']
        if boost_values is None:
            boost_values = {}
        if '' in boost_values:      # boost value for the entire document
            lst.append('<doc boost="%s">' % boost_values[''])
        else:
            lst.append('<doc>')
        for f, v in fields.items():

            # Add update="set" attribute to each field except for the uniqueKey
            # field.
            if f == uniqueKey:
                tmpl = '<field name="%s">%%s</field>' % self.escapeKey(f)
                lst.append(tmpl % self.escapeVal(v))
                continue

            if f in boost_values:
                tmpl = '<field name="%s" boost="%s" update="set">%%s</field>'
                tmpl = tmpl % (self.escapeKey(f), boost_values[f])
            else:
                tmpl = '<field name="%s" update="set">%%s</field>'
                tmpl = tmpl % self.escapeKey(f)

            if not atomic_updates_enabled:
                # Remove update="set", since it breaks the index time boosting.
                tmpl = tmpl.replace(' update="set"', '')

            if isinstance(v, (list, tuple)):  # multi-valued
                for value in v:
                    lst.append(tmpl % self.escapeVal(value))
            else:
                lst.append(tmpl % self.escapeVal(v))
        lst.append('</doc>')
        lst.append('</add>')
        xstr = ''.join(lst)

        if self.conn.debuglevel > 0:
            logger.info('Update message:\n' + xstr)

        return self.doUpdateXML(xstr)
Esempio n. 3
0
    def test_add(self):
        config = getConfig()
        config.atomic_updates = True
        add_request = getData('add_request.txt')
        add_response = getData('add_response.txt')

        c = SolrConnection(host='localhost:8983', persistent=True)

        # fake schema response - caches the schema
        fakehttp(c, getData('schema.xml'))
        c.get_schema()

        output = fakehttp(c, add_response)
        c.add(id='500', name='python test doc')
        res = c.flush()
        self.assertEqual(len(res), 1)   # one request was sent
        res = res[0]
        self.failUnlessEqual(str(output), add_request)
        # Status
        node = res.findall(".//int")[0]
        self.failUnlessEqual(node.attrib['name'], 'status')
        self.failUnlessEqual(node.text, '0')
        # QTime
        node = res.findall(".//int")[1]
        self.failUnlessEqual(node.attrib['name'], 'QTime')
        self.failUnlessEqual(node.text, '4')
        res.find('QTime')
Esempio n. 4
0
    def testFacetDependencies(self):
        cfg = getConfig()
        # dependency info can be set via the configuration utility...
        cfg.facets = ['foo:bar']
        context = Dummy()
        request = {}
        view = DummyView(context, request)
        self.assertEqual(facetParameters(view),
                         (['foo:bar'], dict(foo=['bar'])))
        # overridden on the context
        context.facet_fields = ['bar:foo']
        self.assertEqual(facetParameters(view),
                         (['bar:foo'], dict(bar=['foo'])))
        # and via the request
        request['facet.field'] = ['foo:bar', 'bar:foo']
        self.assertEqual(
            facetParameters(view),
            (['foo:bar', 'bar:foo'], dict(foo=['bar'], bar=['foo']))
        )
        # white space shouldn't matter
        request['facet.field'] = ['foo : bar', 'bar  :foo']
        self.assertEqual(
            facetParameters(view),
            (['foo : bar', 'bar  :foo'], dict(foo=['bar'], bar=['foo']))
        )

        # XXX: Manually clean up after the test. We should be able to remove
        # this once our test isolation issues have been dealt with.
        cfg.facets = []
Esempio n. 5
0
 def setHost(self, active=False, host='localhost', port=8983, base='/solr'):
     """ set connection parameters """
     config = getConfig()
     config.active = active
     config.host = unicode(host)
     config.port = port
     config.base = unicode(base)
     self.closeConnection(clearSchema=True)
Esempio n. 6
0
 def setUp(self):
     self.mngr = SolrConnectionManager()
     self.mngr.setHost(active=True)
     conn = self.mngr.getConnection()
     fakehttp(conn, getData('schema.xml'))       # fake schema response
     self.mngr.getSchema()                       # read and cache the schema
     self.proc = SolrIndexProcessor(self.mngr)
     config = getConfig()
     config.atomic_updates = True
Esempio n. 7
0
    def setUp(self):
        self.mngr = SolrConnectionManager()
        self.mngr.setHost(active=True)
        self.conn = self.mngr.getConnection()
        self.proc = SolrIndexProcessor(self.mngr)
        self.log = []                   # catch log messages...

        def logger(*args):
            self.log.extend(args)
        logger_indexer.warning = logger
        config = getConfig()
        config.atomic_updates = True
Esempio n. 8
0
    def testLocalConnections(self):
        config = getConfig()
        config.atomic_updates = True
        mngr = SolrConnectionManager(active=True)
        proc = SolrIndexProcessor(mngr)
        mngr.setHost(active=True)
        schema = getData('schema.xml')
        log = []

        def runner():
            # fake schema response on solr connection - caches the schema
            fakehttp(mngr.getConnection(), getData('schema.xml'))
            mngr.getConnection().get_schema()

            fakehttp(mngr.getConnection(), schema)      # fake schema response
            # read and cache the schema
            mngr.getSchema()
            response = getData('add_response.txt')
            # fake add response
            output = fakehttp(mngr.getConnection(), response)
            # indexing sends data
            proc.index(Foo(id='500', name='python test doc'))
            mngr.closeConnection()
            log.append(str(output))
            log.append(proc)
            log.append(mngr.getConnection())
        # after the runner was set up, another thread can be created and
        # started;  its output should contain the proper indexing request,
        # whereas the main thread's connection remain idle;  the latter
        # cannot be checked directly, but the connection object would raise
        # an exception if it was used to send a request without setting up
        # a fake response beforehand...
        thread = Thread(target=runner)
        thread.start()
        thread.join()
        conn = mngr.getConnection()         # get this thread's connection
        fakehttp(conn, schema)              # fake schema response
        mngr.getSchema()                    # read and cache the schema
        mngr.closeConnection()
        mngr.setHost(active=False)
        self.assertEqual(len(log), 3)
        self.assertEqual(sortFields(log[0]), getData(
            'add_request.txt').rstrip('\n'))
        self.failUnless(isinstance(log[1], SolrIndexProcessor))
        self.failUnless(isinstance(log[2], SolrConnection))
        self.failUnless(isinstance(proc, SolrIndexProcessor))
        self.failUnless(isinstance(conn, SolrConnection))
        self.assertEqual(log[1], proc)      # processors should be the same...
        self.assertNotEqual(log[2], conn)   # but not the connections
 def testExcludeUserFromAllowedRolesAndUsers(self):
     config = getConfig()
     search = queryUtility(ISearch)
     schema = search.getManager().getSchema() or {}
     # first test the default setting, i.e. not removing the user
     keywords = dict(allowedRolesAndUsers=['Member', 'user$test_user_1_'])
     mangleQuery(keywords, config, schema)
     self.assertEqual(keywords, {
         'allowedRolesAndUsers': ['Member', 'user$test_user_1_'],
     })
     # now let's remove it...
     config.exclude_user = True
     keywords = dict(allowedRolesAndUsers=['Member', 'user$test_user_1_'])
     mangleQuery(keywords, config, schema)
     self.assertEqual(keywords, {
         'allowedRolesAndUsers': ['Member'],
     })
Esempio n. 10
0
 def commit(self, wait=None):
     conn = self.getConnection()
     if conn is not None:
         config = getConfig()
         if not isinstance(wait, bool):
             wait = not config.async
         try:
             logger.debug('committing')
             if not config.auto_commit or config.commit_within:
                 # If we have commitWithin enabled, we never want to do
                 # explicit commits. Even though only add's support this
                 # and we might wait a bit longer on delete's this way
                 conn.flush()
             else:
                 conn.commit(waitSearcher=wait)
         except (SolrConnectionException, error):
             logger.exception('exception during commit')
         self.manager.closeConnection()
Esempio n. 11
0
 def testFacetParameters(self):
     context = Dummy()
     request = {}
     view = DummyView(context, request)
     # with nothing set up, no facets will be returned
     self.assertEqual(facetParameters(view), ([], {}))
     # setting up the regular config utility should give the default value
     cfg = getConfig()
     self.assertEqual(facetParameters(view), ([], {}))
     # so let's set it...
     cfg.facets = ['foo']
     self.assertEqual(facetParameters(view), (['foo'], {}))
     # override the setting on the context
     context.facet_fields = ['bar']
     self.assertEqual(facetParameters(view), (['bar'], {}))
     # and again via the request
     request['facet.field'] = ['foo', 'bar']
     self.assertEqual(facetParameters(view),
                      (['foo', 'bar'], {}))
Esempio n. 12
0
    def test_add_with_boost_values(self):
        config = getConfig()
        config.atomic_updates = False
        add_request = getData('add_request_with_boost_values.txt')
        add_response = getData('add_response.txt')
        c = SolrConnection(host='localhost:8983', persistent=True)

        # fake schema response - caches the schema
        fakehttp(c, getData('schema.xml'))
        c.get_schema()

        output = fakehttp(c, add_response)
        boost = {'': 2, 'id': 0.5, 'name': 5}
        c.add(boost_values=boost,
              atomic_updates=False,  # Force disabling atomic updates
              id='500',
              name='python test doc')

        res = c.flush()
        self.assertEqual(len(res), 1)   # one request was sent
        self.failUnlessEqual(str(output), add_request)
Esempio n. 13
0
 def setUp(self):
     self.foo = Foo(id='500', name='python test doc')
     self.schema_request = 'GET /solr/admin/file/?file=schema.xml'
     config = getConfig()
     config.atomic_updates = True
Esempio n. 14
0
 def getConfig(self):
     if self.config is None:
         self.config = getConfig()
     return self.config
Esempio n. 15
0
 def testFilterQuerySubstitution(self):
     def optimize(**params):
         query = dict(a='a:23', b='b:42', c='c:(23 42)')
         optimizeQueryParameters(query, params)
         return query, params
     # first test without the configuration utility
     self.assertEqual(
         optimize(),
         (dict(a='a:23', b='b:42', c='c:(23 42)'), dict())
     )
     # now unconfigured...
     config = getConfig()
     self.assertEqual(
         optimize(),
         (dict(a='a:23', b='b:42', c='c:(23 42)'), dict())
     )
     config.filter_queries = [u'a']
     self.assertEqual(
         optimize(),
         (dict(b='b:42', c='c:(23 42)'), dict(fq=['a:23']))
     )
     self.assertEqual(
         optimize(fq='x:13'),
         (dict(b='b:42', c='c:(23 42)'), dict(fq=['x:13', 'a:23']))
     )
     self.assertEqual(
         optimize(fq=['x:13', 'y:17']),
         (dict(b='b:42', c='c:(23 42)'), dict(fq=['x:13', 'y:17', 'a:23']))
     )
     config.filter_queries = ['a', 'c']
     self.assertEqual(
         optimize(),
         (dict(b='b:42'), dict(fq=['a:23', 'c:(23 42)'])))
     self.assertEqual(
         optimize(fq='x:13'),
         (dict(b='b:42'), dict(fq=['x:13', 'a:23', 'c:(23 42)']))
     )
     self.assertEqual(
         optimize(fq=['x:13', 'y:17']),
         (dict(b='b:42'), dict(fq=['x:13', 'y:17', 'a:23', 'c:(23 42)']))
     )
     # also test substitution of combined filter queries
     config.filter_queries = ['a c']
     self.assertEqual(
         optimize(),
         (dict(b='b:42'), dict(fq=['a:23 c:(23 42)']))
     )
     config.filter_queries = ['a c', 'b']
     self.assertEqual(
         optimize(),
         ({'*': '*:*'}, dict(fq=['a:23 c:(23 42)', 'b:42']))
     )
     # for multiple matches the first takes precedence
     config.filter_queries = ['a', 'a c', 'b']
     self.assertEqual(
         optimize(),
         (dict(c='c:(23 42)'), dict(fq=['a:23', 'b:42']))
     )
     # parameters not contained in the query must not be converted
     config.filter_queries = ['a nonexisting', 'b']
     self.assertEqual(
         optimize(),
         (dict(a='a:23', c='c:(23 42)'), dict(fq=['b:42']))
     )
Esempio n. 16
0
 def setUp(self):
     self.config = getConfig()
     provideUtility(self.config, ISolrConnectionConfig)