Beispiel #1
0
def init_lucene_search():
    lucene.initVM(vmargs=['-Djava.awt.headless=true'])
    print 'lucene', lucene.VERSION
    print 'Index ', INDEX_DIR
    base_dir = os.path.dirname(os.path.abspath(sys.argv[0]))  # current dir
    directory = SimpleFSDirectory(File(INDEX_DIR))
    searcher = IndexSearcher(DirectoryReader.open(directory))
    analyzer = SmartChineseAnalyzer(Version.LUCENE_CURRENT, SmartChineseAnalyzer.getDefaultStopSet())

    return searcher, analyzer
Beispiel #2
0
def init_lucene_search():
    lucene.initVM(vmargs=['-Djava.awt.headless=true'])
    print 'lucene', lucene.VERSION
    print 'Index ', INDEX_DIR
    base_dir = os.path.dirname(os.path.abspath(sys.argv[0]))  # current dir
    directory = SimpleFSDirectory(File(INDEX_DIR))
    searcher = IndexSearcher(DirectoryReader.open(directory))
    analyzer = SmartChineseAnalyzer(Version.LUCENE_CURRENT,
                                    SmartChineseAnalyzer.getDefaultStopSet())

    return searcher, analyzer
Beispiel #3
0
    def __init__(self, root, storeDir, analyzer):

        if not os.path.exists(storeDir):
            os.mkdir(storeDir)

        store = SimpleFSDirectory(File(storeDir))
        # analyzer = LimitTokenCountAnalyzer(analyzer, 1048576)
        # use smart chinese analyzer
        analyzer = SmartChineseAnalyzer(Version.LUCENE_CURRENT, SmartChineseAnalyzer.getDefaultStopSet())
        config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
        writer = IndexWriter(store, config)

        self.indexDocs(root, writer)
        ticker = Ticker()
        print 'commit index',
        threading.Thread(target=ticker.run).start()
        writer.commit()
        writer.close()
        ticker.tick = False
        print 'done'
Beispiel #4
0
    def __init__(self, root, storeDir, analyzer):

        if not os.path.exists(storeDir):
            os.mkdir(storeDir)

        store = SimpleFSDirectory(File(storeDir))
        # analyzer = LimitTokenCountAnalyzer(analyzer, 1048576)
        # use smart chinese analyzer
        analyzer = SmartChineseAnalyzer(
            Version.LUCENE_CURRENT, SmartChineseAnalyzer.getDefaultStopSet())
        config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
        writer = IndexWriter(store, config)

        self.indexDocs(root, writer)
        ticker = Ticker()
        print 'commit index',
        threading.Thread(target=ticker.run).start()
        writer.commit()
        writer.close()
        ticker.tick = False
        print 'done'
Beispiel #5
0
                    doc = Document()
                    doc.add(Field("name", filename, t1))
                    doc.add(Field("path", root, t1))
                    if len(contents) > 0:
                        doc.add(Field("contents", contents, t2))
                    else:
                        print "warning: no content in %s" % filename
                    writer.addDocument(doc)
                except Exception, e:
                    print "Failed in indexDocs:", e


if __name__ == '__main__':
    if len(sys.argv) < 2:
        print IndexFiles.__doc__
        sys.exit(1)
    lucene.initVM(vmargs=['-Djava.awt.headless=true'])
    print 'lucene', lucene.VERSION
    start = datetime.now()
    try:
        # base_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
        IndexFiles(
            sys.argv[1], INDEX_DIR,
            SmartChineseAnalyzer(Version.LUCENE_CURRENT,
                                 SmartChineseAnalyzer.getDefaultStopSet()))
        end = datetime.now()
        print end - start
    except Exception, e:
        print "Failed: ", e
        raise e
Beispiel #6
0
                    contents = unicode(file.read(), 'utf-8')
                    file.close()
                    doc = Document()
                    doc.add(Field("name", filename, t1))
                    doc.add(Field("path", root, t1))
                    if len(contents) > 0:
                        doc.add(Field("contents", contents, t2))
                    else:
                        print "warning: no content in %s" % filename
                    writer.addDocument(doc)
                except Exception, e:
                    print "Failed in indexDocs:", e


if __name__ == '__main__':
    if len(sys.argv) < 2:
        print IndexFiles.__doc__
        sys.exit(1)
    lucene.initVM(vmargs=['-Djava.awt.headless=true'])
    print 'lucene', lucene.VERSION
    start = datetime.now()
    try:
        # base_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
        IndexFiles(sys.argv[1], INDEX_DIR,
                   SmartChineseAnalyzer(Version.LUCENE_CURRENT, SmartChineseAnalyzer.getDefaultStopSet()))
        end = datetime.now()
        print end - start
    except Exception, e:
        print "Failed: ", e
        raise e