def update(self, **caches): """Refresh index version. **POST** /update Reopen searcher, optionally reloading caches, and return document count. {"spellcheckers": true,... } .. versionchanged:: 1.2 request body is an object instead of an array :return: *int* """ names = () while self.urls: url = self.urls[0] try: names = self.sync(url) break except IOError: with contextlib.suppress(ValueError): self.urls.remove(url) self.searcher = self.searcher.reopen(**caches) self.updated = time.time() if names: engine.IndexWriter(self.searcher.directory).close() if not self.urls and hasattr(self, 'fields'): other = WebIndexer(self.searcher.directory, analyzer=self.searcher.analyzer) other.indexer.shared, other.indexer.fields = self.searcher.shared, self.fields app, = (app for app in cherrypy.tree.apps.values() if app.root is self) mount(other, app=app, autoupdate=getattr(self, 'autoupdate', 0)) return len(self.searcher)
def test_facets(tempdir, servers, zipcodes): writer = engine.IndexWriter(tempdir) writer.commit() resource = servers.start(servers.ports[0], '-r', tempdir) writer.set('zipcode', dimensions=1, stored=True) writer.fields['location'] = engine.NestedField('county.city', docValuesType='sorted') for doc in zipcodes: if doc['state'] == 'CA': writer.add(zipcode=int(doc['zipcode']), location='{}.{}'.format(doc['county'], doc['city'])) writer.commit() assert resource.post('update') == resource().popitem()[1] == len(writer) result = resource.search(count=0, facets='county', **{'count.min': 10000}) facets = result['facets']['county'] assert result['count'] == sum(facets.values()) and 'Los Angeles' in facets result = resource.search(q='Los Angeles', count=0, facets='county.city', **{'q.type': 'term', 'q.field': 'county'}) facets = result['facets']['county.city'] assert result['count'] == sum(facets.values()) and all(location.startswith('Los Angeles.') for location in facets) result = resource.search(count=0, facets='county', **{'facets.count': 3}) assert sorted(result['facets']['county']) == ['Los Angeles', 'Orange', 'San Diego'] result = resource.search(count=0, facets='county', **{'facets.min': 140}) assert sorted(result['facets']['county']) == ['Los Angeles', 'Orange', 'San Diego'] result = resource.search( q='Los Angeles', group='county.city', **{'group.count': 2, 'q.field': 'county', 'q.type': 'prefix'} ) assert all(group['value'].startswith('Los Angeles') for group in result['groups']) assert sum(map(operator.itemgetter('count'), result['groups'])) == sum(facets.values()) == result['count']
def __init__(self, *directories, **kwargs): self.urls = collections.deque(kwargs.pop('urls', ())) if self.urls: engine.IndexWriter(*directories).close() self.searcher = engine.MultiSearcher( directories, ** kwargs) if len(directories) > 1 else engine.IndexSearcher( *directories, **kwargs) self.updated = time.time() self.query_map = {}
def test_config(tempdir, servers): assert subprocess.call((sys.executable, '-m', 'lupyne.server', '-c', __file__), stderr=subprocess.PIPE) engine.IndexWriter(tempdir).close() config = {'tools.validate.last_modified': True, 'tools.validate.expires': 0, 'tools.validate.max_age': 0} client = servers.start(servers.ports[0], tempdir, tempdir, '--autoreload=0.1', **config).client response = client.get() assert response.ok ((directory, size),) = response.json().items() assert 'Directory@' in directory and size == 0 assert int(response.headers['age']) >= 0 assert response.headers['cache-control'] == 'max-age=0' assert float(response.headers['x-response-time']) > 0.0 dates = [parsedate(response.headers[header]) for header in ('last-modified', 'expires', 'date')] assert all(dates) and sorted(dates) == dates w, verion, _ = response.headers['etag'].split('"') assert w == 'W/' and verion.isdigit()
def test_replication(tempdir, servers): primary = servers.start(servers.ports[0], tempdir, '--autoupdate=1') directory = os.path.join(tempdir, 'backup') engine.IndexWriter(directory).close() secondary = servers.start(servers.ports[1], '-r', directory, '--autoupdate=1') primary.post('docs', [{}]) assert primary.client.get('update/0').status_code == http.client.NOT_FOUND assert primary.client.get('update/x').status_code == http.client.NOT_FOUND response = primary.client.post('update', {'snapshot': True}) assert response.status_code == http.client.CREATED location, filenames = response.headers['location'], response.json() assert primary.get(location) == filenames for filename in filenames: shutil.copy(os.path.join(tempdir, filename), directory) assert primary.delete(location) == filenames time.sleep(1.1) assert secondary.get('docs') == [0]
def index(tempdir, fields, constitution): with engine.IndexWriter(tempdir) as writer: writer.fields.update({field.name: field for field in fields}) for doc in constitution: writer.add(doc) return tempdir