def test_nrt(): indexer = engine.Indexer(nrt=True) indexer.add() assert indexer.count() == 0 and not indexer.current indexer.refresh() assert indexer.count() == 1 and indexer.current searcher = engine.IndexSearcher(indexer.directory) assert searcher.count() == 0 and searcher.current indexer.add() indexer.commit() assert indexer.count() == engine.IndexSearcher( indexer.directory).count() == 2
def __init__(self, *directories, **kwargs): if len(directories) > 1: self.searcher = engine.MultiSearcher(directories, **kwargs) else: self.searcher = engine.IndexSearcher(*directories, **kwargs) self.updated = time.time() self.query_map = {}
def test_indexes(tempdir): with pytest.raises(TypeError): engine.IndexSearcher() with pytest.raises(lucene.JavaError): engine.Indexer(tempdir, 'r') indexer = engine.Indexer() indexer.set('name', engine.Field.String, stored=True) indexer.set('text', engine.Field.Text) with engine.Indexer(tempdir) as temp: temp.add() with pytest.raises(KeyError), engine.Indexer(tempdir) as temp: temp.add() temp.add(missing='') for other in (temp, temp.directory, tempdir): indexer += other assert len(indexer) == 3 analyzer = engine.Analyzer.whitespace() indexer.add(text=analyzer.tokens('?'), name=util.BytesRef('{}')) indexer.commit() assert indexer[next(indexer.docs('text', '?'))]['name'] == '{}' indexer.delete('text', '?') indexer.commit(merge=True) assert not indexer.hasDeletions() indexer.commit(merge=1) assert len(list(indexer.readers)) == 1 reader = engine.indexers.IndexReader(indexer.indexReader) del reader.indexReader with pytest.raises(AttributeError): reader.maxDoc del indexer.indexSearcher with pytest.raises(AttributeError): indexer.search indexer = engine.Indexer(tempdir) indexer.add() indexer.commit() files = set(os.listdir(tempdir)) path = os.path.join(tempdir, 'temp') with indexer.snapshot() as commit: indexer.commit(merge=1) assert indexer.indexCommit.generation > commit.generation engine.indexers.copy(commit, path) assert set(os.listdir(path)) == set(commit.fileNames) < files < set( os.listdir(tempdir)) filepath = os.path.join(path, commit.segmentsFileName) os.remove(filepath) open(filepath, 'w').close() with pytest.raises(OSError): engine.indexers.copy(commit, path) with pytest.raises(lucene.JavaError): indexer.check(tempdir) del indexer assert engine.Indexer(tempdir) assert not os.path.exists(os.path.join(tempdir, commit.segmentsFileName)) assert engine.IndexWriter.check(tempdir).clean assert not engine.IndexWriter.check(tempdir, fix=True).numBadSegments
def __init__(self, *directories, **kwargs): self.urls = collections.deque(kwargs.pop('urls', ())) if self.urls: engine.IndexWriter(*directories).close() self.searcher = engine.MultiSearcher( directories, ** kwargs) if len(directories) > 1 else engine.IndexSearcher( *directories, **kwargs) self.updated = time.time() self.query_map = {}
def test_grouping(tempdir, indexer, zipcodes): field = indexer.fields['location'] = engine.NestedField( 'state.county.city', docValuesType='sorted') for doc in zipcodes: if doc['state'] in ('CA', 'AK', 'WY', 'PR'): lat, lng = ('{0:08.3f}'.format(doc.pop(l)) for l in ['latitude', 'longitude']) location = '.'.join(doc[name] for name in ['state', 'county', 'city']) indexer.add(doc, latitude=lat, longitude=lng, location=location) indexer.commit() states = list(indexer.terms('state')) assert states[0] == 'AK' and states[-1] == 'WY' counties = [ term.split('.')[-1] for term in indexer.terms('state.county', 'CA') ] hits = indexer.search(field.prefix('CA')) assert sorted({hit['county'] for hit in hits}) == counties assert counties[0] == 'Alameda' and counties[-1] == 'Yuba' cities = [ term.split('.')[-1] for term in indexer.terms('state.county.city', 'CA.Los Angeles') ] hits = indexer.search(field.prefix('CA.Los Angeles')) assert sorted({hit['city'] for hit in hits}) == cities assert cities[0] == 'Acton' and cities[-1] == 'Woodland Hills' (hit, ) = indexer.search('zipcode:90210') assert hit['state'] == 'CA' and hit['county'] == 'Los Angeles' and hit[ 'city'] == 'Beverly Hills' and hit['longitude'] == '-118.406' query = Q.prefix('zipcode', '90') ((field, facets), ) = indexer.facets(query, 'state.county').items() assert field == 'state.county' la, orange = sorted(filter(facets.get, facets)) assert la == 'CA.Los Angeles' and facets[la] > 100 assert orange == 'CA.Orange' and facets[orange] > 10 queries = { term: Q.term(field, term) for term in indexer.terms(field, 'CA.') } ((field, facets), ) = indexer.facets(query, **{field: queries}).items() assert all(value.startswith('CA.') for value in facets) and set(facets) == set(queries) assert facets['CA.Los Angeles'] == 264 groups = indexer.groupby(field, Q.term('state', 'CA'), count=1) assert len(groups) == 1 < groups.count (hits, ) = groups assert hits.value == 'CA.Los Angeles' and len( hits) == 1 and hits.count > 100 grouping = engine.documents.GroupingSearch(field, sort=search.Sort( indexer.sortfield(field)), cache=False, allGroups=True) assert all( grouping.search(indexer.indexSearcher, Q.alldocs()).facets.values()) assert len(grouping) == len(list(grouping)) > 100 assert set(grouping) > set(facets) hits = indexer.search(query, timeout=-1) assert not hits and not hits.count and math.isnan(hits.maxscore) hits = indexer.search(query, timeout=10) assert len(hits) == hits.count == indexer.count( query) and hits.maxscore == 1.0 directory = store.RAMDirectory() query = Q.term('state', 'CA') size = indexer.copy(directory, query) searcher = engine.IndexSearcher(directory) assert len(searcher) == size and list(searcher.terms('state')) == ['CA'] path = os.path.join(tempdir, 'temp') size = indexer.copy(path, exclude=query, merge=1) assert len(searcher) + size == len(indexer) searcher = engine.IndexSearcher(path) assert len(searcher.segments) == 1 and 'CA' not in searcher.terms('state') directory.close()