Ejemplo n.º 1
0
    def testCreateFacet(self):
        fields = {
            'field1': ['value1'],
            'sorted.field3': ['value3'],
            'untokenized.field4': ['value4'],
            'untokenized.field5': ['value5', 'value6'],
            'untokenized.field6': ['value5/value6'],
            'untokenized.field7': ['valuex'],
            'untokenized.field8': [['grandparent', 'parent', 'child'],
                                   ['parent2', 'child']]
        }
        fields2LuceneDoc = Fields2LuceneDoc(
            'tsname',
            fieldRegistry=FieldRegistry(drilldownFields=[
                DrilldownField('untokenized.field4'),
                DrilldownField('untokenized.field5'),
                DrilldownField('untokenized.field6'),
                DrilldownField('untokenized.field8', hierarchical=True),
            ]))
        observer = CallTrace()
        fields2LuceneDoc.addObserver(observer)
        fields2LuceneDoc.ctx.tx = Transaction('tsname')
        fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier'
        for field, values in fields.items():
            for value in values:
                fields2LuceneDoc.addField(field, value)

        consume(fields2LuceneDoc.commit('unused'))

        document = observer.calledMethods[0].kwargs['document']
        searchFields = [
            f for f in document.getFields() if not FacetField.instance_(f)
        ]
        self.assertEquals(['field1', 'sorted.field3', 'untokenized.field7'],
                          [f.name() for f in searchFields])

        facetsFields = [
            FacetField.cast_(f) for f in document.getFields()
            if FacetField.instance_(f)
        ]
        self.assertEquals(6, len(facetsFields))
        self.assertEquals([
            ('untokenized.field8', ['grandparent', 'parent', 'child']),
            ('untokenized.field8', ['parent2', 'child']),
            ('untokenized.field6', ['value5/value6']),
            ('untokenized.field4', ['value4']),
            ('untokenized.field5', ['value5']),
            ('untokenized.field5', ['value6']),
        ], [(f.dim, list(f.path))
            for f in facetsFields])  # Note: a FacetField doesn't have a name
Ejemplo n.º 2
0
 def testAddFacetField(self):
     fields2LuceneDoc = Fields2LuceneDoc('tsname',
         fieldRegistry=FieldRegistry(drilldownFields=[
             DrilldownField('untokenized.field'),
         ])
     )
     observer = CallTrace()
     fields2LuceneDoc.addObserver(observer)
     fields2LuceneDoc.ctx.tx = Transaction('tsname')
     fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier'
     fields2LuceneDoc.addField('field', 'value')
     fields2LuceneDoc.addFacetField('untokenized.field', 'untokenized value')
     consume(fields2LuceneDoc.commit('unused'))
     document = observer.calledMethods[0].kwargs['document']
     facetsFields = [FacetField.cast_(f) for f in document.getFields() if FacetField.instance_(f)]
     self.assertEquals(1, len(facetsFields))
Ejemplo n.º 3
0
    def testCreateFacet(self):
        fields = {
            'field1': ['value1'],
            'sorted.field3': ['value3'],
            'untokenized.field4': ['value4'],
            'untokenized.field5': ['value5', 'value6'],
            'untokenized.field6': ['value5/value6'],
            'untokenized.field7': ['valuex'],
            'untokenized.field8': [['grandparent', 'parent', 'child'], ['parent2', 'child']]
        }
        fields2LuceneDoc = Fields2LuceneDoc('tsname',
            fieldRegistry=FieldRegistry(drilldownFields=[
                DrilldownField('untokenized.field4'),
                DrilldownField('untokenized.field5'),
                DrilldownField('untokenized.field6'),
                DrilldownField('untokenized.field8', hierarchical=True),
            ])
        )
        observer = CallTrace()
        fields2LuceneDoc.addObserver(observer)
        fields2LuceneDoc.ctx.tx = Transaction('tsname')
        fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier'
        for field, values in fields.items():
            for value in values:
                fields2LuceneDoc.addField(field, value)

        consume(fields2LuceneDoc.commit('unused'))

        document = observer.calledMethods[0].kwargs['document']
        searchFields = [f for f in document.getFields() if not FacetField.instance_(f)]
        self.assertEquals(['field1', 'sorted.field3', 'untokenized.field7'], [f.name() for f in searchFields])

        facetsFields = [FacetField.cast_(f) for f in document.getFields() if FacetField.instance_(f)]
        self.assertEquals(6, len(facetsFields))
        self.assertEquals([
                ('untokenized.field8', ['grandparent', 'parent', 'child']),
                ('untokenized.field8', ['parent2', 'child']),
                ('untokenized.field6', ['value5/value6']),
                ('untokenized.field4', ['value4']),
                ('untokenized.field5', ['value5']),
                ('untokenized.field5', ['value6']),
            ], [(f.dim, list(f.path)) for f in facetsFields])  # Note: a FacetField doesn't have a name
Ejemplo n.º 4
0
 def testAddFacetField(self):
     fields2LuceneDoc = Fields2LuceneDoc(
         'tsname',
         fieldRegistry=FieldRegistry(drilldownFields=[
             DrilldownField('untokenized.field'),
         ]))
     observer = CallTrace()
     fields2LuceneDoc.addObserver(observer)
     fields2LuceneDoc.ctx.tx = Transaction('tsname')
     fields2LuceneDoc.ctx.tx.locals['id'] = 'identifier'
     fields2LuceneDoc.addField('field', 'value')
     fields2LuceneDoc.addFacetField('untokenized.field',
                                    'untokenized value')
     consume(fields2LuceneDoc.commit('unused'))
     document = observer.calledMethods[0].kwargs['document']
     facetsFields = [
         FacetField.cast_(f) for f in document.getFields()
         if FacetField.instance_(f)
     ]
     self.assertEquals(1, len(facetsFields))
Ejemplo n.º 5
0
    def index(cls, indexDir, taxoDir, facets_config):
        """Create an index, and adds to it sample documents and facets.
        indexDir Directory in which the index should be created.
        taxoDir Directory in which the taxonomy index should be created.
        """
        # create and open an index writer
        config = IndexWriterConfig(Version.LUCENE_48,
                                   WhitespaceAnalyzer(Version.LUCENE_48))
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
        iw = IndexWriter(indexDir, config)
        # create and open a taxonomy writer
        taxo = DirectoryTaxonomyWriter(taxoDir,
                                       IndexWriterConfig.OpenMode.CREATE)
        # loop over sample documents
        nDocsAdded = 0
        nFacetsAdded = 0
        for docNum in range(len(docTexts)):
            # create a plain Lucene document and add some regular Lucene fields to it
            doc = Document()
            doc.add(TextField(TITLE, docTitles[docNum], Field.Store.YES))
            doc.add(TextField(TEXT, docTexts[docNum], Field.Store.NO))
            # obtain the sample facets for current document
            facets = categories[docNum]
            author = authors[docNum]
            # ... and use the FacetField class for adding facet fields to
            # the Lucene document (and via FacetsConfig to the taxonomy index)
            doc.add(FacetField("Author", author))
            for f in facets:
                doc.add(FacetField("Categories", f))
            # finally add the document to the index
            iw.addDocument(facets_config.build(taxo, doc))
            nDocsAdded += 1

        # close the taxonomy index and the index - all modifications are
        # now safely in the provided directories: indexDir and taxoDir.
        iw.close()
        taxo.close()
        print "Indexed %d documents with facets." % nDocsAdded
Ejemplo n.º 6
0
 def _createDocument(self, fields, facet_fields=None):
     facet_fields = facet_fields or {}
     doc = Document()
     for field, values in (fields.items() + facet_fields.items()):
         if self._fieldRegistry.isDrilldownField(field):
             for value in values:
                 if hasattr(value, 'extend'):
                     path = [str(category) for category in value]
                 else:
                     path = [str(value)]
                 doc.add(FacetField(field, path))
         else:
             for value in values:
                 if field == IDFIELD:
                     raise ValueError(
                         "Field '%s' is protected and created by Meresco Lucene"
                         % IDFIELD)
                 if field.startswith(KEY_PREFIX):
                     value = self.call.numerateTerm(value)
                 doc.add(self._fieldRegistry.createField(field, value))
     return doc
def fieldsFromDocument(document):
    searchFields = [f for f in document.getFields() if not FacetField.instance_(f)]
    facetsFields = [FacetField.cast_(f) for f in document.getFields() if FacetField.instance_(f)]
    return searchFields, facetsFields