class PyLuceneTestCase(TestCase): def __init__(self, *args): super(PyLuceneTestCase, self).__init__(*args) self.TEST_VERSION = Version.LUCENE_CURRENT def setUp(self): self.directory = RAMDirectory() def tearDown(self): self.directory.close() def getWriter(self, directory=None, analyzer=None, open_mode=None): config = IndexWriterConfig(self.TEST_VERSION, analyzer or LimitTokenCountAnalyzer(WhitespaceAnalyzer(Version.LUCENE_CURRENT), 10000) ) config.setOpenMode(open_mode or IndexWriterConfig.OpenMode.CREATE) return IndexWriter(directory or self.directory, config) def getSearcher(self, directory=None, reader=None): if reader is not None: return IndexSearcher(reader) return self.getReader(directory=directory) def getReader(self, directory=None): return DirectoryReader.open(directory or self.directory)
def testSlurp(self): fsDirReader = IndexReader.open(self.dir, True) self.assertEqual(len(self.keywords), fsDirReader.maxDoc()) self.assertEqual(len(self.keywords), fsDirReader.numDocs()) ramDir = RAMDirectory(self.dir) ramDirReader = IndexReader.open(ramDir, True) self.assertEqual(fsDirReader.maxDoc(), ramDirReader.maxDoc()) self.assertEqual(fsDirReader.numDocs(), ramDirReader.numDocs()) fsDirReader.close() ramDir.close()
def setUp(self): self.directory = RAMDirectory() writer = IndexWriter(self.directory, WhitespaceAnalyzer(), True, IndexWriter.MaxFieldLength.UNLIMITED) # Elwood document = Document() document.add( Field("owner", "elwood", Field.Store.YES, Field.Index.NOT_ANALYZED)) document.add( Field("keywords", "elwoods sensitive info", Field.Store.YES, Field.Index.ANALYZED)) writer.addDocument(document) # Jake document = Document() document.add( Field("owner", "jake", Field.Store.YES, Field.Index.NOT_ANALYZED)) document.add( Field("keywords", "jakes sensitive info", Field.Store.YES, Field.Index.ANALYZED)) writer.addDocument(document) writer.close()
def setUp(self): self.directory = RAMDirectory() self.analyzer = WhitespaceAnalyzer() writer = IndexWriter(self.directory, self.analyzer, True, IndexWriter.MaxFieldLength.UNLIMITED) doc = Document() doc.add( Field("f", "the quick brown fox jumps over the lazy dog", Field.Store.YES, Field.Index.ANALYZED)) writer.addDocument(doc) doc = Document() doc.add( Field("f", "the quick red fox jumps over the sleepy cat", Field.Store.YES, Field.Index.ANALYZED)) writer.addDocument(doc) writer.close() self.searcher = IndexSearcher(self.directory, True) self.reader = IndexReader.open(self.directory, True) self.quick = SpanTermQuery(Term("f", "quick")) self.brown = SpanTermQuery(Term("f", "brown")) self.red = SpanTermQuery(Term("f", "red")) self.fox = SpanTermQuery(Term("f", "fox")) self.lazy = SpanTermQuery(Term("f", "lazy")) self.sleepy = SpanTermQuery(Term("f", "sleepy")) self.dog = SpanTermQuery(Term("f", "dog")) self.cat = SpanTermQuery(Term("f", "cat"))
def setUp(self): fsIndexDir = os.path.join(System.getProperty("java.io.tmpdir", "tmp"), "fs-index") self.rmdir(fsIndexDir) self.ramDir = RAMDirectory() self.fsDir = SimpleFSDirectory(File(fsIndexDir))
def setUp(self): self.directory = RAMDirectory() writer = IndexWriter(self.directory, self.porterAnalyzer, True, IndexWriter.MaxFieldLength.UNLIMITED) doc = Document() doc.add(Field("contents", "The quick brown fox jumps over the lazy dogs", Field.Store.YES, Field.Index.ANALYZED)) writer.addDocument(doc) writer.close()
def setUp(self): animals = [ "aardvark", "beaver", "coati", "dog", "elephant", "frog", "gila monster", "horse", "iguana", "javelina", "kangaroo", "lemur", "moose", "nematode", "orca", "python", "quokka", "rat", "scorpion", "tarantula", "uromastyx", "vicuna", "walrus", "xiphias", "yak", "zebra" ] analyzer = WhitespaceAnalyzer() aTOmDirectory = RAMDirectory() nTOzDirectory = RAMDirectory() aTOmWriter = IndexWriter(aTOmDirectory, analyzer, True, IndexWriter.MaxFieldLength.UNLIMITED) nTOzWriter = IndexWriter(nTOzDirectory, analyzer, True, IndexWriter.MaxFieldLength.UNLIMITED) for animal in animals: doc = Document() doc.add( Field("animal", animal, Field.Store.YES, Field.Index.NOT_ANALYZED)) if animal[0].lower() < "n": aTOmWriter.addDocument(doc) else: nTOzWriter.addDocument(doc) aTOmWriter.close() nTOzWriter.close() self.searchers = [ IndexSearcher(aTOmDirectory), IndexSearcher(nTOzDirectory) ]
def setUp(self): # set up sample document directory = RAMDirectory() writer = IndexWriter(directory, WhitespaceAnalyzer(), True, IndexWriter.MaxFieldLength.UNLIMITED) doc = Document() doc.add( Field("field", "the quick brown fox jumped over the lazy dog", Field.Store.YES, Field.Index.ANALYZED)) writer.addDocument(doc) writer.close() self.searcher = IndexSearcher(directory)
def setUp(self): self.analyzer = WhitespaceAnalyzer() self.directory = RAMDirectory() writer = IndexWriter(self.directory, self.analyzer, True, IndexWriter.MaxFieldLength.LIMITED) for i in xrange(1, 501): doc = Document() doc.add(Field("id", NumberUtils.pad(i), Field.Store.YES, Field.Index.NOT_ANALYZED)) writer.addDocument(doc) writer.close()
def setUp(self): self.directory = RAMDirectory() writer = IndexWriter(self.directory, WhitespaceAnalyzer(), True, IndexWriter.MaxFieldLength.UNLIMITED) self.addPoint(writer, "El Charro", "restaurant", 1, 2) self.addPoint(writer, "Cafe Poca Cosa", "restaurant", 5, 9) self.addPoint(writer, "Los Betos", "restaurant", 9, 6) self.addPoint(writer, "Nico's Taco Shop", "restaurant", 3, 8) writer.close() self.searcher = IndexSearcher(self.directory, True) self.query = TermQuery(Term("type", "restaurant"))
def setUp(self): self.directory = RAMDirectory() writer = IndexWriter(self.directory, SimpleAnalyzer(), True, IndexWriter.MaxFieldLength.UNLIMITED) doc = Document() doc.add( Field("partnum", "Q36", Field.Store.YES, Field.Index.NOT_ANALYZED)) doc.add( Field("description", "Illidium Space Modulator", Field.Store.YES, Field.Index.ANALYZED)) writer.addDocument(doc) writer.close() self.searcher = IndexSearcher(self.directory, True)
def someMethod(self): directory = RAMDirectory() analyzer = StandardAnalyzer() writer = IndexWriter(directory, analyzer, True) doc = Document() doc.add(Field.Text("title", "This is the title")) doc.add(Field.UnStored("contents", "...document contents...")) writer.addDocument(doc) writer.addDocument(doc, analyzer) expression = "some query" query = QueryParser.parse(expression, "contents", analyzer) parser = QueryParser("contents", analyzer) query = parser.parseQuery(expression)
def setUp(self): directory = RAMDirectory() writer = IndexWriter(directory, WhitespaceAnalyzer(), True, IndexWriter.MaxFieldLength.UNLIMITED) doc1 = Document() doc1.add( Field("field", "the quick brown fox jumped over the lazy dog", Field.Store.YES, Field.Index.ANALYZED)) writer.addDocument(doc1) doc2 = Document() doc2.add( Field("field", "the fast fox hopped over the hound", Field.Store.YES, Field.Index.ANALYZED)) writer.addDocument(doc2) writer.close() self.searcher = IndexSearcher(directory, True)
def _createIndex(self, inputDF, colname): """ function to create lucene index, iterates over inputDF row by row, and indexes the relevant column By default - WhitespaceAnalyzer is used, other Analyzers are also available. """ # Create index directory directory = RAMDirectory() writer = IndexWriter(directory, WhitespaceAnalyzer(), True, IndexWriter.MaxFieldLength.LIMITED) # Inline indexing of column data inputDF.apply(lambda x: self._addDoc(x[colname], writer), axis=1) # Optimize, close and return writer.optimize() writer.close() return directory
def getIndexWriter(self): writer = IndexWriter(RAMDirectory(), StandardAnalyzer(), True) writer.setUseCompoundFile(False) return writer
def __init__(self, indexDir): self.directory = RAMDirectory(SimpleFSDirectory(File(indexDir))) self.searcher = IndexSearcher(self.directory)
def indexFile(self): self._th=lucene.initVM() self._analyzer = StandardAnalyzer(Version.LUCENE_36) self._dir = RAMDirectory() self._writer = IndexWriter(self._dir, self._analyzer, True, IndexWriter.MaxFieldLength(25000))
def setUp(self): self.directory = RAMDirectory()
def setUp(self): super(ScoreTest, self).setUp() self.directory = RAMDirectory()