예제 #1
0
class PyLuceneTestCase(TestCase):
    def __init__(self, *args):
        super(PyLuceneTestCase, self).__init__(*args)

    def setUp(self):
        self.directory = RAMDirectory()

    def tearDown(self):
        self.directory.close()

    def getConfig(self, analyzer=None):
        return IndexWriterConfig(analyzer)

    def getWriter(self,
                  directory=None,
                  analyzer=None,
                  open_mode=None,
                  similarity=None,
                  maxBufferedDocs=None,
                  mergePolicy=None):
        if analyzer is None:
            analyzer = LimitTokenCountAnalyzer(WhitespaceAnalyzer(), 10000)
        config = self.getConfig(analyzer)

        if open_mode is None:
            open_mode = IndexWriterConfig.OpenMode.CREATE
        config.setOpenMode(open_mode)
        if similarity is not None:
            config.setSimilarity(similarity)
        if maxBufferedDocs is not None:
            config.setMaxBufferedDocs(maxBufferedDocs)
        if mergePolicy is not None:
            config.setMergePolicy(mergePolicy)

        if directory is None:
            directory = self.directory

        return IndexWriter(directory, config)

    def getSearcher(self, directory=None, reader=None):
        if reader is not None:
            return IndexSearcher(reader)
        return IndexSearcher(self.getReader(directory=directory))

    def getReader(self, directory=None):
        if directory is None:
            directory = self.directory
        return DirectoryReader.open(directory)

    def getOnlyLeafReader(self, reader):
        subReaders = reader.leaves()
        if subReaders.size() != 1:
            raise ValueError(reader + " has " + subReaders.size() +
                             " segments instead of exactly one")
        return subReaders.get(0).reader()
예제 #2
0
class PyLuceneTestCase(TestCase):

    def __init__(self, *args):
        super(PyLuceneTestCase, self).__init__(*args)

    def setUp(self):
        self.directory = RAMDirectory()

    def tearDown(self):
        self.directory.close()

    def getConfig(self, analyzer=None):
        return IndexWriterConfig(analyzer)

    def getWriter(self, directory=None, analyzer=None, open_mode=None,
                  similarity=None, maxBufferedDocs=None, mergePolicy=None):
        if analyzer is None:
            analyzer = LimitTokenCountAnalyzer(WhitespaceAnalyzer(), 10000)
        config = self.getConfig(analyzer)

        if open_mode is None:
            open_mode = IndexWriterConfig.OpenMode.CREATE
        config.setOpenMode(open_mode)
        if similarity is not None:
            config.setSimilarity(similarity)
        if maxBufferedDocs is not None:
            config.setMaxBufferedDocs(maxBufferedDocs)
        if mergePolicy is not None:
            config.setMergePolicy(mergePolicy)

        if directory is None:
            directory = self.directory

        return IndexWriter(directory, config)

    def getSearcher(self, directory=None, reader=None):
        if reader is not None:
            return IndexSearcher(reader)
        return IndexSearcher(self.getReader(directory=directory))

    def getReader(self, directory=None):
        if directory is None:
            directory = self.directory
        return DirectoryReader.open(directory)

    def getOnlyLeafReader(self, reader):
        subReaders = reader.leaves()
        if subReaders.size() != 1:
            raise ValueError(reader + " has " + subReaders.size() +
                             " segments instead of exactly one")
        return subReaders.get(0).reader()
예제 #3
0
class PyLuceneTestCase(TestCase):
    def __init__(self, *args):
        super(PyLuceneTestCase, self).__init__(*args)
        self.TEST_VERSION = Version.LUCENE_CURRENT

    def setUp(self):
        self.directory = RAMDirectory()

    def tearDown(self):
        self.directory.close()

    def getConfig(self, analyzer=None):
        return IndexWriterConfig(self.TEST_VERSION, analyzer)

    def getWriter(self,
                  directory=None,
                  analyzer=None,
                  open_mode=None,
                  similarity=None,
                  maxBufferedDocs=None,
                  mergePolicy=None):
        if analyzer is None:
            analyzer = LimitTokenCountAnalyzer(
                WhitespaceAnalyzer(self.TEST_VERSION), 10000)
        config = self.getConfig(analyzer)

        if open_mode is None:
            open_mode = IndexWriterConfig.OpenMode.CREATE
        config.setOpenMode(open_mode)
        if similarity is not None:
            config.setSimilarity(similarity)
        if maxBufferedDocs is not None:
            config.setMaxBufferedDocs(maxBufferedDocs)
        if mergePolicy is not None:
            config.setMergePolicy(mergePolicy)

        if directory is None:
            directory = self.directory

        return IndexWriter(directory, config)

    def getSearcher(self, directory=None, reader=None):
        if reader is not None:
            return IndexSearcher(reader)
        return IndexSearcher(self.getReader(directory=directory))

    def getReader(self, directory=None):
        if directory is None:
            directory = self.directory
        return DirectoryReader.open(directory)
class PyLuceneTestCase(TestCase):

    def __init__(self, *args):
        super(PyLuceneTestCase, self).__init__(*args)
        self.TEST_VERSION = Version.LUCENE_CURRENT

    def setUp(self):
        self.directory = RAMDirectory()

    def tearDown(self):
        self.directory.close()

    def getConfig(self, analyzer=None):
        return IndexWriterConfig(self.TEST_VERSION, analyzer)
        
    def getWriter(self, directory=None, analyzer=None, open_mode=None,
                  similarity=None, maxBufferedDocs=None, mergePolicy=None):
        if analyzer is None:
            analyzer = LimitTokenCountAnalyzer(WhitespaceAnalyzer(self.TEST_VERSION), 10000)
        config = self.getConfig(analyzer)

        if open_mode is None:
            open_mode = IndexWriterConfig.OpenMode.CREATE
        config.setOpenMode(open_mode)
        if similarity is not None:
            config.setSimilarity(similarity)
        if maxBufferedDocs is not None:
            config.setMaxBufferedDocs(maxBufferedDocs)
        if mergePolicy is not None:
            config.setMergePolicy(mergePolicy)

        if directory is None:
            directory = self.directory

        return IndexWriter(directory, config)
        
    def getSearcher(self, directory=None, reader=None):
        if reader is not None:
            return IndexSearcher(reader)
        return IndexSearcher(self.getReader(directory=directory))
    
    def getReader(self, directory=None):
        if directory is None:
            directory = self.directory
        return DirectoryReader.open(directory)
예제 #5
0
    def testTieBreaker(self):
      # MultiTermQuery provides (via attribute) information about which values
      # must be competitive to enter the priority queue. 
      #
      # FuzzyQuery optimizes itself around this information, if the attribute
      # is not implemented correctly, there will be problems!
      #
      directory = RAMDirectory()
      writer = self.getWriter(directory=directory)
      self._addDoc("a123456", writer)
      self._addDoc("c123456", writer)
      self._addDoc("d123456", writer)
      self._addDoc("e123456", writer)

      directory2 = RAMDirectory()
      writer2 = self.getWriter(directory=directory2)
      self._addDoc("a123456", writer2)
      self._addDoc("b123456", writer2)
      self._addDoc("b123456", writer2)
      self._addDoc("b123456", writer2)
      self._addDoc("c123456", writer2)
      self._addDoc("f123456", writer2)

      ir1 = writer.getReader()
      ir2 = writer2.getReader()

      mr = MultiReader([ir1, ir2])
      searcher = self.getSearcher(reader=mr)

      fq = FuzzyQuery(Term("field", "z123456"), 1, 0, 2, False)
      docs = searcher.search(fq, 2)
      self.assertEqual(5, docs.totalHits.value)  # 5 docs, from the a and b's

      mr.close()
      ir1.close()
      ir2.close()
      writer.close()
      writer2.close()
      directory.close()
      directory2.close()
    def testTieBreaker(self):
      # MultiTermQuery provides (via attribute) information about which values
      # must be competitive to enter the priority queue. 
      #
      # FuzzyQuery optimizes itself around this information, if the attribute
      # is not implemented correctly, there will be problems!
      #
      directory = RAMDirectory()
      writer = self.getWriter(directory=directory)
      self._addDoc("a123456", writer)
      self._addDoc("c123456", writer)
      self._addDoc("d123456", writer)
      self._addDoc("e123456", writer)
    
      directory2 = RAMDirectory()
      writer2 = self.getWriter(directory=directory2)
      self._addDoc("a123456", writer2)
      self._addDoc("b123456", writer2)
      self._addDoc("b123456", writer2)
      self._addDoc("b123456", writer2)
      self._addDoc("c123456", writer2)
      self._addDoc("f123456", writer2)
    
      ir1 = writer.getReader()
      ir2 = writer2.getReader()
    
      mr = MultiReader([ir1, ir2])
      searcher = self.getSearcher(reader=mr)

      fq = FuzzyQuery(Term("field", "z123456"), 1, 0, 2, False)
      docs = searcher.search(fq, 2)
      self.assertEqual(5, docs.totalHits)  # 5 docs, from the a and b's

      mr.close()
      ir1.close()
      ir2.close()
      writer.close()
      writer2.close()
      directory.close()
      directory2.close()