Esempio n. 1
0
class LuceneTestCase(SeecrTestCase):

    def setUp(self, fieldRegistry=FieldRegistry()):
        super(LuceneTestCase, self).setUp()
        self._javaObjects = self._getJavaObjects()
        self._reactor = CallTrace('reactor', methods={'addTimer': lambda seconds, callback: CallTrace('timer')})
        self._defaultSettings = LuceneSettings(commitCount=1, commitTimeout=1, fieldRegistry=fieldRegistry)
        self.lucene = Lucene(
            join(self.tempdir, 'lucene'),
            reactor=self._reactor,
            settings=self._defaultSettings,
        )
        self.observer = CallTrace()
        self.lucene.addObserver(self.observer)

    def tearDown(self):
        try:
            self._reactor.calledMethods.reset() # don't keep any references.
            self.lucene.close()
            self.lucene = None
            gc.collect()
            diff = self._getJavaObjects() - self._javaObjects
            self.assertEquals(0, len(diff), diff)
        finally:
            SeecrTestCase.tearDown(self)

    def _getJavaObjects(self):
        refs = VM._dumpRefs(classes=True)
        return set(
                [(c, refs[c])
                for c in refs.keys()
                if c != 'class java.lang.Class' and
                    c != 'class org.apache.lucene.document.Field' and # Fields are kept in FieldRegistry for reusing
                    c != 'class org.apache.lucene.document.NumericDocValuesField' and
                    c != 'class org.apache.lucene.facet.FacetsConfig'
            ])
Esempio n. 2
0
class MultiLuceneTest(SeecrTestCase):
    def __init__(self, *args, **kwargs):
        super(MultiLuceneTest, self).__init__(*args, **kwargs)
        self._multithreaded = True

    def setUp(self):
        SeecrTestCase.setUp(self)
        settings = LuceneSettings(multithreaded=self._multithreaded, verbose=False)
        settingsLuceneC = LuceneSettings(multithreaded=self._multithreaded, verbose=False, similarity=TermFrequencySimilarity())

        self.luceneA = Lucene(join(self.tempdir, 'a'), name='coreA', reactor=CallTrace(), settings=settings)
        self.luceneB = Lucene(join(self.tempdir, 'b'), name='coreB', reactor=CallTrace(), settings=settings)
        self.luceneC = Lucene(join(self.tempdir, 'c'), name='coreC', reactor=CallTrace(), settings=settingsLuceneC)
        self.dna = be((Observable(),
            (MultiLucene(defaultCore='coreA', multithreaded=self._multithreaded),
                (self.luceneA,),
                (self.luceneB,),
                (self.luceneC,),
            )
        ))

        # +---------------------------------+   +---------------------------------+  +----------------------+
        # |              ______             |   |                                 |  |                    C |
        # |         ____/      \____     A  |   |    __________                B  |  |      ____            |
        # |        /   /\   Q  /\   \       |   |   /    N     \                  |  |     /    \           |
        # |       /   /  \    /  \   \      |   |  /   ____     \                 |  |    |   R  |          |
        # |      /   |    \  /    |   \     |   | |   /    \     |                |  |     \ ___/           |
        # |     /     \    \/    /     \    |   | |  |  M __|____|_____           |  |                      |
        # |    /       \   /\   /       \   |   | |   \__/_/     |     \          |  |                      |
        # |   |         \_|__|_/         |  |   |  \    |       /      |          |  |                      |
        # |   |    U      |  |     M     |  |   |   \___|______/    ___|_______   |  |                      |
        # |   |           \  /           |  |   |       |          /   |       \  |  |                      |
        # |    \           \/           /   |   |       |   O     /   _|__      \ |  |                      |
        # |     \          /\          /    |   |        \_______|___/_/  \     | |  |                      |
        # |      \        /  \        /     |   |                |  |  M   | P  | |  |                      |
        # |       \______/    \______/      |   |                |   \____/     | |  |                      |
        # |                                 |   |                 \            /  |  |                      |
        # |                                 |   |                  \__________/   |  |                      |
        # +---------------------------------+   +---------------------------------+  +----------------------+

        k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11 = range(1,12)
        self.addDocument(self.luceneA, identifier='A',      keys=[('A', k1 )], fields=[('M', 'false'), ('Q', 'false'), ('U', 'false'), ('S', '1')])
        self.addDocument(self.luceneA, identifier='A-U',    keys=[('A', k2 )], fields=[('M', 'false'), ('Q', 'false'), ('U', 'true' ), ('S', '2')])
        self.addDocument(self.luceneA, identifier='A-Q',    keys=[('A', k3 )], fields=[('M', 'false'), ('Q', 'true' ), ('U', 'false'), ('S', '3')])
        self.addDocument(self.luceneA, identifier='A-QU',   keys=[('A', k4 )], fields=[('M', 'false'), ('Q', 'true' ), ('U', 'true' ), ('S', '4')])
        self.addDocument(self.luceneA, identifier='A-M',    keys=[('A', k5 ), ('C', k5)], fields=[('M', 'true' ), ('Q', 'false'), ('U', 'false'), ('S', '5')])
        self.addDocument(self.luceneA, identifier='A-MU',   keys=[('A', k6 )], fields=[('M', 'true' ), ('Q', 'false'), ('U', 'true' ), ('S', '6')])
        self.addDocument(self.luceneA, identifier='A-MQ',   keys=[('A', k7 )], fields=[('M', 'true' ), ('Q', 'true' ), ('U', 'false'), ('S', '7')])
        self.addDocument(self.luceneA, identifier='A-MQU',  keys=[('A', k8 )], fields=[('M', 'true' ), ('Q', 'true' ), ('U', 'true' ), ('S', '8')])

        self.addDocument(self.luceneB, identifier='B-N>A-M',   keys=[('B', k5 ), ('D', k5)], fields=[('N', 'true' ), ('O', 'true' ), ('P', 'false')])
        self.addDocument(self.luceneB, identifier='B-N>A-MU',  keys=[('B', k6 )], fields=[('N', 'true' ), ('O', 'false'), ('P', 'false')])
        self.addDocument(self.luceneB, identifier='B-N>A-MQ',  keys=[('B', k7 )], fields=[('N', 'true' ), ('O', 'true' ), ('P', 'false')])
        self.addDocument(self.luceneB, identifier='B-N>A-MQU', keys=[('B', k8 )], fields=[('N', 'true' ), ('O', 'false'), ('P', 'false')])
        self.addDocument(self.luceneB, identifier='B-N',       keys=[('B', k9 )], fields=[('N', 'true' ), ('O', 'true' ), ('P', 'false')])
        self.addDocument(self.luceneB, identifier='B',         keys=[('B', k10)], fields=[('N', 'false'), ('O', 'false'), ('P', 'false')])
        self.addDocument(self.luceneB, identifier='B-P>A-M',   keys=[('B', k5 )], fields=[('N', 'false'), ('O', 'true' ), ('P', 'true' )])
        self.addDocument(self.luceneB, identifier='B-P>A-MU',  keys=[('B', k6 )], fields=[('N', 'false'), ('O', 'false'), ('P', 'true' )])
        self.addDocument(self.luceneB, identifier='B-P>A-MQ',  keys=[('B', k7 )], fields=[('N', 'false'), ('O', 'false' ), ('P', 'true' )])
        self.addDocument(self.luceneB, identifier='B-P>A-MQU', keys=[('B', k8 )], fields=[('N', 'false'), ('O', 'false'), ('P', 'true' )])
        self.addDocument(self.luceneB, identifier='B-P',       keys=[('B', k11)], fields=[('N', 'false'), ('O', 'true' ), ('P', 'true' )])

        self.addDocument(self.luceneC, identifier='C-R', keys=[('C', k5)], fields=[('R', 'true')])
        self.addDocument(self.luceneC, identifier='C-S', keys=[('C', k8)], fields=[('S', 'true')])
        self.addDocument(self.luceneC, identifier='C-S2', keys=[('C', k7)], fields=[('S', 'false')])

        self.luceneA._realCommit()
        self.luceneB._realCommit()
        self.luceneC._realCommit()
        settings.commitCount = 1
        settingsLuceneC.commitCount = 1

    def tearDown(self):
        self.luceneA.close()
        self.luceneB.close()
        SeecrTestCase.tearDown(self)

    def hitIds(self, hits):
        return set([hit.id for hit in hits])

    def testQueryOneIndex(self):
        result = returnValueFromGenerator(self.dna.any.executeQuery(luceneQuery=luceneQueryFromCql('Q=true')))
        self.assertEquals(set(['A-Q', 'A-QU', 'A-MQ', 'A-MQU']), self.hitIds(result.hits))
        result = returnValueFromGenerator(self.dna.any.executeQuery(luceneQuery=luceneQueryFromCql('Q=true AND M=true')))
        self.assertEquals(set(['A-MQ', 'A-MQU']), self.hitIds(result.hits))

    def testQueryOneIndexWithComposedQuery(self):
        cq = ComposedQuery('coreA')
        cq.setCoreQuery(core='coreA', query=luceneQueryFromCql('Q=true'))
        result = returnValueFromGenerator(self.dna.any.executeComposedQuery(cq))
        self.assertEquals(set(['A-Q', 'A-QU', 'A-MQ', 'A-MQU']), self.hitIds(result.hits))
        cq = ComposedQuery('coreA')
        cq.setCoreQuery(core='coreA', query=luceneQueryFromCql('Q=true'), filterQueries=[luceneQueryFromCql('M=true')])
        result = returnValueFromGenerator(self.dna.any.executeComposedQuery(cq))
        self.assertEquals(set(['A-MQ', 'A-MQU']), self.hitIds(result.hits))

    def testB_N_is_true(self):
        result = returnValueFromGenerator(self.dna.any.executeQuery(core='coreB', luceneQuery=luceneQueryFromCql('N=true')))
        self.assertEquals(5, result.total)
        self.assertEquals(set(['B-N', 'B-N>A-M', 'B-N>A-MU', 'B-N>A-MQ', 'B-N>A-MQU']), self.hitIds(result.hits))

    def testJoinQuery(self):
        q = ComposedQuery('coreA', query=MatchAllDocsQuery())
        q.setCoreQuery(core='coreB', query=luceneQueryFromCql('N=true'))
        q.addMatch(dict(core='coreA', uniqueKey=KEY_PREFIX+'A'), dict(core='coreB', key=KEY_PREFIX+'B'))
        result = returnValueFromGenerator(self.dna.any.executeComposedQuery(q))
        self.assertEquals(4, result.total)
        self.assertEquals(set(['A-M', 'A-MU', 'A-MQ', 'A-MQU']), self.hitIds(result.hits))

    def testMultipleJoinQueriesKeepsCachesWithinMaxSize(self):
        for i in xrange(25):
            self.addDocument(self.luceneB, identifier=str(i), keys=[('X', i)], fields=[('Y', str(i))])
        for i in xrange(25):
            q = ComposedQuery('coreA', query=MatchAllDocsQuery())
            q.setCoreQuery(core='coreB', query=luceneQueryFromCql('Y=%s' % i))
            q.addMatch(dict(core='coreA', uniqueKey=KEY_PREFIX+'A'), dict(core='coreB', key=KEY_PREFIX+'X'))
            ignoredResult = returnValueFromGenerator(self.dna.any.executeComposedQuery(q))

    def testJoinQueryWithFilters(self):
        q = ComposedQuery('coreA')
        q.addFilterQuery('coreB', query=luceneQueryFromCql('N=true'))
        q.addMatch(dict(core='coreA', uniqueKey=KEY_PREFIX+'A'), dict(core='coreB', key=KEY_PREFIX+'B'))
        result = returnValueFromGenerator(self.dna.any.executeComposedQuery(q))
        self.assertEquals(4, result.total)
        self.assertEquals(set(['A-M', 'A-MU', 'A-MQ', 'A-MQU']), self.hitIds(result.hits))

    def testJoinFacet(self):
        q = ComposedQuery('coreA', query=luceneQueryFromCql('Q=true'))
        q.addFacet('coreB', dict(fieldname='cat_N', maxTerms=10))
        q.addFacet('coreB', dict(fieldname='cat_O', maxTerms=10))
        q.addMatch(dict(core='coreA', uniqueKey=KEY_PREFIX + 'A'), dict(core='coreB', key=KEY_PREFIX + 'B'))
        result = returnValueFromGenerator(self.dna.any.executeComposedQuery(query=q))
        self.assertEquals(4, result.total)
        self.assertEquals([{
                'terms': [
                        {'count': 2, 'term': u'true'},
                        {'count': 2, 'term': u'false'},
                    ],
                'path': [],
                'fieldname': u'cat_N'
            }, {
                'terms': [
                    {'count': 3, 'term': u'false'},
                    {'count': 1, 'term': u'true'},
                ],
                'path': [],
                'fieldname': u'cat_O'
            }], result.drilldownData)

    def testJoinFacetWithDrilldownQueryFilters(self):
        q = ComposedQuery('coreA', query=luceneQueryFromCql('M=true'))
        q.addDrilldownQuery('coreA', drilldownQuery=('cat_Q', ['true']))
        q.addFacet('coreB', dict(fieldname='cat_O', maxTerms=10))
        q.addMatch(dict(core='coreA', uniqueKey=KEY_PREFIX + 'A'), dict(core='coreB', key=KEY_PREFIX + 'B'))
        result = returnValueFromGenerator(self.dna.any.executeComposedQuery(query=q))
        self.assertEquals(2, result.total)
        self.assertEquals([{
                'terms': [
                    {'count': 3, 'term': u'false'},
                    {'count': 1, 'term': u'true'},
                ],
                'path': [],
                'fieldname': u'cat_O'
            }], result.drilldownData)

    def testJoinFacetWithJoinDrilldownQueryFilters(self):
        q = ComposedQuery('coreA', query=luceneQueryFromCql('M=true'))
        q.addDrilldownQuery('coreB', drilldownQuery=('cat_O', ['true']))
        q.addFacet('coreB', dict(fieldname='cat_O', maxTerms=10))
        q.addMatch(dict(core='coreA', uniqueKey=KEY_PREFIX + 'A'), dict(core='coreB', key=KEY_PREFIX + 'B'))
        result = returnValueFromGenerator(self.dna.any.executeComposedQuery(query=q))
        self.assertEquals(2, result.total)
        self.assertEquals([{
                'terms': [
                    {'count': 3, 'term': u'true'},
                ],
                'path': [],
                'fieldname': u'cat_O'
            }], result.drilldownData)

    def testJoinDrilldownQueryFilters(self):
        q = ComposedQuery('coreA', query=luceneQueryFromCql('M=true'))
        q.addDrilldownQuery('coreB', drilldownQuery=('cat_O', ['true']))
        q.addMatch(dict(core='coreA', uniqueKey=KEY_PREFIX + 'A'), dict(core='coreB', key=KEY_PREFIX + 'B'))
        result = returnValueFromGenerator(self.dna.any.executeComposedQuery(query=q))
        self.assertEquals(2, result.total)

    def testJoinFacetWithFilter(self):
        q = ComposedQuery('coreA', query=luceneQueryFromCql('M=true'))
        q.addFilterQuery('coreA', query=luceneQueryFromCql('Q=true'))
        q.addFacet('coreB', dict(fieldname='cat_O', maxTerms=10))
        q.addMatch(dict(core='coreA', uniqueKey=KEY_PREFIX + 'A'), dict(core='coreB', key=KEY_PREFIX + 'B'))
        result = returnValueFromGenerator(self.dna.any.executeComposedQuery(query=q))
        self.assertEquals(2, result.total)
        self.assertEquals([{
                'terms': [
                    {'count': 3, 'term': u'false'},
                    {'count': 1, 'term': u'true'},
                ],
                'path': [],
                'fieldname': u'cat_O'
            }], result.drilldownData)

    def testJoinFacetFromBPointOfView(self):
        q = ComposedQuery('coreB')
        q.setCoreQuery(core='coreA', query=luceneQueryFromCql('Q=true'))
        q.setCoreQuery(core='coreB', query=None, facets=[
                dict(fieldname='cat_N', maxTerms=10),
                dict(fieldname='cat_O', maxTerms=10),
            ])
        try:
            q.addMatch(dict(core='coreA', uniqueKey=KEY_PREFIX + 'A'), dict(core='coreB', key=KEY_PREFIX + 'B'))
        except ValueError, e:
            self.assertEquals("Match for result core 'coreB' must have a uniqueKey specification.", str(e))
            return

        # for future reference
        self.assertEquals(4, result.total)
        self.assertEquals(set(['B-N>A-MQ', 'B-N>A-MQU', 'B-P>A-MQ', 'B-P>A-MQU']), self.hitIds(result.hits))
        self.assertEquals([{
                'terms': [
                        {'count': 2, 'term': u'false'},
                        {'count': 2, 'term': u'true'},
                    ],
                'fieldname': u'cat_N'
            }, {
                'terms': [
                    {'count': 2, 'term': u'false'},
                    {'count': 2, 'term': u'true'},
                ],
                'fieldname': u'cat_O'
             }], result.drilldownData)
Esempio n. 3
0
class DeDupFilterCollectorTest(SeecrTestCase):
    def setUp(self):
        super(DeDupFilterCollectorTest, self).setUp()
        self._reactor = CallTrace('reactor')
        settings = LuceneSettings(commitCount=1, verbose=False)
        self.lucene = Lucene(self.tempdir,
                             reactor=self._reactor,
                             settings=settings)

    def tearDown(self):
        self.lucene.close()
        super(DeDupFilterCollectorTest, self).tearDown()

    def testCollectorTransparentlyDelegatesToNextCollector(self):
        self._addDocument("urn:1", 2)
        tc = TopScoreDocSuperCollector(100, True)
        c = DeDupFilterSuperCollector("__isformatof__", "__sort__", tc)
        self.lucene.search(query=MatchAllDocsQuery(), collector=c)
        self.assertEquals(1, tc.topDocs(0).totalHits)

    def _addDocument(self, identifier, isformatof, sort=None):
        doc = Document()
        if isformatof:
            doc.add(NumericDocValuesField("__isformatof__", long(isformatof)))
        if sort:
            doc.add(NumericDocValuesField("__sort__", long(sort)))
        consume(self.lucene.addDocument(identifier, doc))
        self.lucene.commit()  # Explicitly, not required: since commitCount=1.

    def testCollectorFiltersTwoSimilar(self):
        self._addDocument("urn:1", 2, 1)
        self._addDocument("urn:2", 2, 2)
        tc = TopScoreDocSuperCollector(100, True)
        c = DeDupFilterSuperCollector("__isformatof__", "__sort__", tc)
        self.lucene.search(query=MatchAllDocsQuery(), collector=c)
        topDocsResult = tc.topDocs(0)
        self.assertEquals(1, topDocsResult.totalHits)
        self.assertEquals(1, len(topDocsResult.scoreDocs))

        docId = topDocsResult.scoreDocs[0].doc
        key = c.keyForDocId(docId)
        identifier = self.lucene._index.getDocument(
            key.getDocId()).get(IDFIELD)
        self.assertEquals('urn:2', identifier)
        self.assertEquals(2, key.count)

    def testCollectorFiltersTwoTimesTwoSimilarOneNot(self):
        self._addDocument("urn:1", 1, 2001)
        self._addDocument("urn:2", 3, 2009)  # result 2x
        self._addDocument("urn:3", 50, 2010)  # result 1x
        self._addDocument("urn:4", 3, 2001)
        self._addDocument("urn:5", 1, 2009)  # result 2x
        #expected: "urn:2', "urn:3" and "urn:5" in no particular order
        tc = TopScoreDocSuperCollector(100, True)
        c = DeDupFilterSuperCollector("__isformatof__", "__sort__", tc)
        self.lucene.search(query=MatchAllDocsQuery(), collector=c)
        topDocsResult = tc.topDocs(0)
        self.assertEquals(3, topDocsResult.totalHits)
        self.assertEquals(3, len(topDocsResult.scoreDocs))
        rawDocIds = [scoreDoc.doc for scoreDoc in topDocsResult.scoreDocs]
        netDocIds = [c.keyForDocId(rawDocId).docId for rawDocId in rawDocIds]
        identifiers = set(
            self.lucene._index.getDocument(doc).get(IDFIELD)
            for doc in netDocIds)
        self.assertEquals(set(["urn:2", "urn:3", "urn:5"]), identifiers)
        self.assertEquals(
            [1, 2, 2], list(sorted(c.keyForDocId(d).count for d in netDocIds)))

    def testSilentyYieldsWrongResultWhenFieldNameDoesNotMatch(self):
        self._addDocument("urn:1", 2)
        tc = TopScoreDocSuperCollector(100, True)
        c = DeDupFilterSuperCollector("__wrong_field__", "__sort__", tc)
        self.lucene.search(query=MatchAllDocsQuery(), collector=c)
        self.assertEquals(1, tc.topDocs(0).totalHits)

    def testShouldAddResultsWithoutIsFormatOf(self):
        self._addDocument("urn:1", 2)
        self._addDocument("urn:2", None)
        self._addDocument("urn:3", 2)
        self._addDocument("urn:4", None)
        self._addDocument("urn:5", None)
        self._addDocument("urn:6", None)
        self._addDocument("urn:7", None)
        self._addDocument("urn:8", None)
        self._addDocument("urn:9", None)
        self._addDocument("urn:A", None)
        self._addDocument("urn:B", None)  # trigger a merge
        tc = TopScoreDocSuperCollector(100, True)
        c = DeDupFilterSuperCollector("__isformatof__", "__sort__", tc)
        self.lucene.search(query=MatchAllDocsQuery(), collector=c)
        self.assertEquals(10, tc.topDocs(0).totalHits)
class DeDupFilterCollectorTest(SeecrTestCase):
    def setUp(self):
        super(DeDupFilterCollectorTest, self).setUp()
        self._reactor = CallTrace('reactor')
        settings = LuceneSettings(commitCount=1, verbose=False)
        self.lucene = Lucene(self.tempdir, reactor=self._reactor, settings=settings)

    def tearDown(self):
        self.lucene.close()
        super(DeDupFilterCollectorTest, self).tearDown()

    def testCollectorTransparentlyDelegatesToNextCollector(self):
        self._addDocument("urn:1", 2)
        tc = TopScoreDocSuperCollector(100, True)
        c = DeDupFilterSuperCollector("__isformatof__", "__sort__", tc)
        self.lucene.search(query=MatchAllDocsQuery(), collector=c)
        self.assertEquals(1, tc.topDocs(0).totalHits)

    def _addDocument(self, identifier, isformatof, sort=None):
        doc = Document()
        if isformatof:
            doc.add(NumericDocValuesField("__isformatof__", long(isformatof)))
        if sort:
            doc.add(NumericDocValuesField("__sort__", long(sort)))
        consume(self.lucene.addDocument(identifier, doc))
        self.lucene.commit()  # Explicitly, not required: since commitCount=1.

    def testCollectorFiltersTwoSimilar(self):
        self._addDocument("urn:1", 2, 1)
        self._addDocument("urn:2", 2, 2)
        tc = TopScoreDocSuperCollector(100, True)
        c = DeDupFilterSuperCollector("__isformatof__", "__sort__", tc)
        self.lucene.search(query=MatchAllDocsQuery(), collector=c)
        topDocsResult = tc.topDocs(0)
        self.assertEquals(1, topDocsResult.totalHits)
        self.assertEquals(1, len(topDocsResult.scoreDocs))

        docId = topDocsResult.scoreDocs[0].doc
        key = c.keyForDocId(docId)
        identifier = self.lucene._index.getDocument(key.getDocId()).get(IDFIELD)
        self.assertEquals('urn:2', identifier)
        self.assertEquals(2, key.count)

    def testCollectorFiltersTwoTimesTwoSimilarOneNot(self):
        self._addDocument("urn:1",  1, 2001)
        self._addDocument("urn:2",  3, 2009) # result 2x
        self._addDocument("urn:3", 50, 2010) # result 1x
        self._addDocument("urn:4",  3, 2001)
        self._addDocument("urn:5",  1, 2009) # result 2x
        #expected: "urn:2', "urn:3" and "urn:5" in no particular order
        tc = TopScoreDocSuperCollector(100, True)
        c = DeDupFilterSuperCollector("__isformatof__", "__sort__", tc)
        self.lucene.search(query=MatchAllDocsQuery(), collector=c)
        topDocsResult = tc.topDocs(0)
        self.assertEquals(3, topDocsResult.totalHits)
        self.assertEquals(3, len(topDocsResult.scoreDocs))
        rawDocIds = [scoreDoc.doc for scoreDoc in topDocsResult.scoreDocs]
        netDocIds = [c.keyForDocId(rawDocId).docId for rawDocId in rawDocIds]
        identifiers = set(self.lucene._index.getDocument(doc).get(IDFIELD) for doc in netDocIds)
        self.assertEquals(set(["urn:2", "urn:3", "urn:5"]), identifiers)
        self.assertEquals([1,2,2], list(sorted(c.keyForDocId(d).count for d in netDocIds)))

    def testSilentyYieldsWrongResultWhenFieldNameDoesNotMatch(self):
        self._addDocument("urn:1", 2)
        tc = TopScoreDocSuperCollector(100, True)
        c = DeDupFilterSuperCollector("__wrong_field__", "__sort__", tc)
        self.lucene.search(query=MatchAllDocsQuery(), collector=c)
        self.assertEquals(1, tc.topDocs(0).totalHits)

    def testShouldAddResultsWithoutIsFormatOf(self):
        self._addDocument("urn:1", 2)
        self._addDocument("urn:2", None)
        self._addDocument("urn:3", 2)
        self._addDocument("urn:4", None)
        self._addDocument("urn:5", None)
        self._addDocument("urn:6", None)
        self._addDocument("urn:7", None)
        self._addDocument("urn:8", None)
        self._addDocument("urn:9", None)
        self._addDocument("urn:A", None)
        self._addDocument("urn:B", None) # trigger a merge
        tc = TopScoreDocSuperCollector(100, True)
        c = DeDupFilterSuperCollector("__isformatof__", "__sort__", tc)
        self.lucene.search(query=MatchAllDocsQuery(), collector=c)
        self.assertEquals(10, tc.topDocs(0).totalHits)