def transform_to_index(my_nodes): to_return = [] for n in my_nodes: doc = document.Document() doc.add(document.Field("id", n.id, document.TextField.TYPE_STORED)) doc.add(document.Field("label", " ".join(n.label), document.TextField.TYPE_STORED)) for el in n.inputlabel: doc.add(document.Field("next", " ".join(el), document.TextField.TYPE_STORED)) for el in n.outputlabel: doc.add(document.Field("prev", " ".join(el), document.TextField.TYPE_STORED)) to_return.append(doc) return to_return
def items(self, *values: str) -> Iterator[document.Field]: """Generate indexed component fields.""" field = getattr(self, 'docValueLess', self) for value in values: for name, text in zip(self.names, self.values(value)): yield document.Field(name, text, field) if self.docvalues: yield self.docValueClass(name, util.BytesRef(text))
def items(self, *values) -> Iterator[document.Field]: """Generate lucene Fields suitable for adding to a document.""" if self.docvalues: types = {int: int, float: util.NumericUtils.doubleToSortableLong} for value in values: yield self.docValueClass(self.name, types.get(type(value), util.BytesRef)(value)) self = getattr(self, 'docValueLess', self) if self.dimensions: for value in values: if isinstance(value, int): yield document.LongPoint(self.name, int(value)) else: yield document.DoublePoint(self.name, value) if self.indexed: for value in values: yield document.Field(self.name, value, self) elif self.stored: for value in values: yield document.StoredField(self.name, value)
def test_fields(indexer, constitution): with pytest.raises(lucene.InvalidArgsError): engine.Field('', stored='invalid') with pytest.raises(AttributeError): engine.Field('', invalid=None) with pytest.raises(lucene.JavaError): with engine.utils.suppress( search.TimeLimitingCollector.TimeExceededException): document.Field('name', 'value', document.FieldType()) assert str(engine.Field.String('')) == str( document.StringField('', '', document.Field.Store.NO).fieldType()) assert str(engine.Field.Text('')) == str( document.TextField('', '', document.Field.Store.NO).fieldType()) assert str(engine.DateTimeField('')) == str( document.DoublePoint('', 0.0).fieldType()) settings = {'docValuesType': 'NUMERIC', 'indexOptions': 'DOCS'} field = engine.Field('', **settings) assert field.settings == engine.Field('', ** field.settings).settings == settings field = engine.NestedField('', stored=True) assert field.settings == { 'stored': True, 'tokenized': False, 'omitNorms': True, 'indexOptions': 'DOCS' } attrs = 'stored', 'omitNorms', 'storeTermVectors', 'storeTermVectorPositions', 'storeTermVectorOffsets' field = engine.Field('', indexOptions='docs', **dict.fromkeys(attrs, True)) (field, ) = field.items(' ') assert all(getattr(field.fieldType(), attr)() for attr in attrs) indexer.set('amendment', engine.Field.String, stored=True) indexer.set('size', engine.Field.String, stored=True, docValuesType='sorted') field = indexer.fields['date'] = engine.NestedField('Y-m-d', sep='-', stored=True) for doc in constitution: if 'amendment' in doc: indexer.add(amendment='{:02}'.format(int(doc['amendment'])), date=doc['date'], size='{:04}'.format(len(doc['text']))) indexer.commit() assert set( indexer.fieldinfos) == {'amendment', 'Y', 'Y-m', 'Y-m-d', 'size'} assert str(indexer.fieldinfos['amendment'].indexOptions) == 'DOCS' query = Q.range('amendment', '', '10') assert indexer.count(query) == 9 query = Q.prefix('amendment', '0') assert indexer.count(query) == 9 query = field.prefix('1791-12-15') assert indexer.count(query) == 10 query = field.range('', '1921-12', lower=False, upper=True) assert str(query) == 'Y-m:{ TO 1921-12]', query assert indexer.count(query) == 19 query = field.range('1919-01-01', '1921-12-31') assert str(query) == 'Y-m-d:[1919-01-01 TO 1921-12-31}' hits = indexer.search(query) assert [hit['amendment'] for hit in hits] == ['18', '19'] assert [hit['Y-m-d'].split('-')[0] for hit in hits] == ['1919', '1920'] sizes = {id: int(indexer[id]['size']) for id in indexer} ids = sorted((id for id in sizes if sizes[id] >= 1000), key=sizes.get) query = Q.range('size', '1000', None) hits = indexer.search(query).sorted(sizes.get) assert list(hits.ids) == ids hits = indexer.search(query, count=3, sort='size') assert list(hits.ids) == ids[:len(hits)] hits.select('amendment') hit = hits[0].dict() assert math.isnan(hit.pop('__score__')) assert hit == {'amendment': '20', '__id__': 19, '__sortkeys__': ('1923', )} query = Q.range('size', None, '1000') assert indexer.count(query) == len(sizes) - len(ids)