def main(cls, argv): allBooks = MatchAllDocsQuery() parser = QueryParser(Version.LUCENE_CURRENT, "contents", StandardAnalyzer(Version.LUCENE_CURRENT)) query = BooleanQuery() query.add(allBooks, BooleanClause.Occur.SHOULD) query.add(parser.parse("java OR action"), BooleanClause.Occur.SHOULD) indexDir = System.getProperty("index.dir") directory = SimpleFSDirectory(File(indexDir)) example = SortingExample(directory) example.displayResults(query, Sort.RELEVANCE) example.displayResults(query, Sort.INDEXORDER) example.displayResults(query, Sort(SortField("category", SortField.STRING))) example.displayResults(query, Sort(SortField("pubmonth", SortField.INT, True))) example.displayResults(query, Sort([SortField("category", SortField.STRING), SortField.FIELD_SCORE, SortField("pubmonth", SortField.INT, True)])) example.displayResults(query, Sort([SortField.FIELD_SCORE, SortField("category", SortField.STRING)])) directory.close()
class LiaTestCase(TestCase): TEST_VERSION = Version.LUCENE_CURRENT def __init__(self, *args): super(LiaTestCase, self).__init__(*args) self.indexDir = System.getProperty("index.dir") def setUp(self): self.directory = SimpleFSDirectory(File(self.indexDir)) def tearDown(self): self.directory.close() def getWriter(self, directory=None, analyzer=None, open_mode=None): config = IndexWriterConfig(self.TEST_VERSION, analyzer or LimitTokenCountAnalyzer(WhitespaceAnalyzer(Version.LUCENE_CURRENT), 10000) ) config.setOpenMode(open_mode or IndexWriterConfig.OpenMode.CREATE) return IndexWriter(directory or self.directory, config) def getSearcher(self, directory=None, reader=None): if reader is not None: return IndexSearcher(reader) return IndexSearcher(DirectoryReader.open(directory or self.directory)) # # For troubleshooting # def dumpHits(self, searcher, scoreDocs): if not scoreDocs: print "No hits" else: for scoreDoc in scoreDocs: print "%s: %s" %(scoreDoc.score, searcher.doc(scoreDoc.doc).get('title')) def assertHitsIncludeTitle(self, searcher, scoreDocs, title, fail=False): for scoreDoc in scoreDocs: doc = searcher.doc(scoreDoc.doc) if title == doc.get("title"): if fail: self.fail("title '%s' found" %(title)) return if not fail: self.fail("title '%s' not found" %(title)) def parseDate(self, s): return SimpleDateFormat("yyyy-MM-dd").parse(s)
def index(cls, indexDir, dataDir): if not (os.path.exists(dataDir) and os.path.isdir(dataDir)): raise IOError, "%s does not exist or is not a directory" %(dataDir) dir = SimpleFSDirectory(File(indexDir)) writer = IndexWriter(dir, StandardAnalyzer(Version.LUCENE_CURRENT), True, IndexWriter.MaxFieldLength.LIMITED) writer.setUseCompoundFile(False) cls.indexDirectory(writer, dataDir) numIndexed = writer.numDocs() writer.commit() writer.close() dir.close() return numIndexed
class LiaTestCase(TestCase): def __init__(self, *args): super(LiaTestCase, self).__init__(*args) self.indexDir = os.environ["index.dir"] def setUp(self): self.directory = SimpleFSDirectory(self.indexDir) def tearDown(self): self.directory.close() # # For troubleshooting # def dumpHits(self, searcher, scoreDocs): if not scoreDocs: print "No hits" else: for scoreDoc in scoreDocs: print "%s: %s" %(scoreDoc.score, searcher.doc(scoreDoc.doc).get('title')) def assertHitsIncludeTitle(self, searcher, scoreDocs, title, fail=False): for scoreDoc in scoreDocs: doc = searcher.doc(scoreDoc.doc) if title == doc.get("title"): if fail: self.fail("title '%s' found" %(title)) return if not fail: self.fail("title '%s' not found" %(title)) def parseDate(self, s): return datetime.date("yyyy-MM-dd")
def index(cls, indexDir, dataDir): if not (os.path.exists(dataDir) and os.path.isdir(dataDir)): raise IOError, "%s does not exist or is not a directory" % ( dataDir) dir = SimpleFSDirectory(File(indexDir)) writer = IndexWriter(dir, StandardAnalyzer(Version.LUCENE_CURRENT), True, IndexWriter.MaxFieldLength.LIMITED) writer.setUseCompoundFile(False) cls.indexDirectory(writer, dataDir) numIndexed = writer.numDocs() writer.optimize() writer.close() dir.close() return numIndexed
class LiaTestCase(TestCase): def __init__(self, *args): super(LiaTestCase, self).__init__(*args) self.indexDir = System.getProperty("index.dir") def setUp(self): self.directory = SimpleFSDirectory(File(self.indexDir)) def tearDown(self): self.directory.close() # # For troubleshooting # def dumpHits(self, searcher, scoreDocs): if not scoreDocs: print "No hits" else: for scoreDoc in scoreDocs: print "%s: %s" % (scoreDoc.score, searcher.doc( scoreDoc.doc).get('title')) def assertHitsIncludeTitle(self, searcher, scoreDocs, title, fail=False): for scoreDoc in scoreDocs: doc = searcher.doc(scoreDoc.doc) if title == doc.get("title"): if fail: self.fail("title '%s' found" % (title)) return if not fail: self.fail("title '%s' not found" % (title)) def parseDate(self, s): return SimpleDateFormat("yyyy-MM-dd").parse(s)