예제 #1
0
파일: documents.py 프로젝트: napoler/lupyne
 def items(self, *values) -> Iterator[document.Field]:
     """Generate lucene Fields suitable for adding to a document."""
     if self.docvalues:
         types = {int: int, float: util.NumericUtils.doubleToSortableLong}
         for value in values:
             yield self.docValueClass(self.name, types.get(type(value), util.BytesRef)(value))
         self = getattr(self, 'docValueLess', self)
     if self.dimensions:
         for value in values:
             if isinstance(value, int):
                 yield document.LongPoint(self.name, int(value))
             else:
                 yield document.DoublePoint(self.name, value)
     if self.indexed:
         for value in values:
             yield document.Field(self.name, value, self)
     elif self.stored:
         for value in values:
             yield document.StoredField(self.name, value)
예제 #2
0
def test_fields(indexer, constitution):
    with pytest.raises(lucene.InvalidArgsError):
        engine.Field('', stored='invalid')
    with pytest.raises(AttributeError):
        engine.Field('', invalid=None)
    with pytest.raises(lucene.JavaError):
        with engine.utils.suppress(
                search.TimeLimitingCollector.TimeExceededException):
            document.Field('name', 'value', document.FieldType())
    assert str(engine.Field.String('')) == str(
        document.StringField('', '', document.Field.Store.NO).fieldType())
    assert str(engine.Field.Text('')) == str(
        document.TextField('', '', document.Field.Store.NO).fieldType())
    assert str(engine.DateTimeField('')) == str(
        document.DoublePoint('', 0.0).fieldType())
    settings = {'docValuesType': 'NUMERIC', 'indexOptions': 'DOCS'}
    field = engine.Field('', **settings)
    assert field.settings == engine.Field('', **
                                          field.settings).settings == settings
    field = engine.NestedField('', stored=True)
    assert field.settings == {
        'stored': True,
        'tokenized': False,
        'omitNorms': True,
        'indexOptions': 'DOCS'
    }
    attrs = 'stored', 'omitNorms', 'storeTermVectors', 'storeTermVectorPositions', 'storeTermVectorOffsets'
    field = engine.Field('', indexOptions='docs', **dict.fromkeys(attrs, True))
    (field, ) = field.items(' ')
    assert all(getattr(field.fieldType(), attr)() for attr in attrs)
    indexer.set('amendment', engine.Field.String, stored=True)
    indexer.set('size',
                engine.Field.String,
                stored=True,
                docValuesType='sorted')
    field = indexer.fields['date'] = engine.NestedField('Y-m-d',
                                                        sep='-',
                                                        stored=True)
    for doc in constitution:
        if 'amendment' in doc:
            indexer.add(amendment='{:02}'.format(int(doc['amendment'])),
                        date=doc['date'],
                        size='{:04}'.format(len(doc['text'])))
    indexer.commit()
    assert set(
        indexer.fieldinfos) == {'amendment', 'Y', 'Y-m', 'Y-m-d', 'size'}
    assert str(indexer.fieldinfos['amendment'].indexOptions) == 'DOCS'
    query = Q.range('amendment', '', '10')
    assert indexer.count(query) == 9
    query = Q.prefix('amendment', '0')
    assert indexer.count(query) == 9
    query = field.prefix('1791-12-15')
    assert indexer.count(query) == 10
    query = field.range('', '1921-12', lower=False, upper=True)
    assert str(query) == 'Y-m:{ TO 1921-12]', query
    assert indexer.count(query) == 19
    query = field.range('1919-01-01', '1921-12-31')
    assert str(query) == 'Y-m-d:[1919-01-01 TO 1921-12-31}'
    hits = indexer.search(query)
    assert [hit['amendment'] for hit in hits] == ['18', '19']
    assert [hit['Y-m-d'].split('-')[0] for hit in hits] == ['1919', '1920']
    sizes = {id: int(indexer[id]['size']) for id in indexer}
    ids = sorted((id for id in sizes if sizes[id] >= 1000), key=sizes.get)
    query = Q.range('size', '1000', None)
    hits = indexer.search(query).sorted(sizes.get)
    assert list(hits.ids) == ids
    hits = indexer.search(query, count=3, sort='size')
    assert list(hits.ids) == ids[:len(hits)]
    hits.select('amendment')
    hit = hits[0].dict()
    assert math.isnan(hit.pop('__score__'))
    assert hit == {'amendment': '20', '__id__': 19, '__sortkeys__': ('1923', )}
    query = Q.range('size', None, '1000')
    assert indexer.count(query) == len(sizes) - len(ids)