Python IndexWriter.MaxFieldLengthの例

プログラミング言語: Python

名前空間/パッケージ名: lucene

クラス/型: IndexWriter

メソッド/関数: MaxFieldLength

hotexamples.comのコード掲載数: 12

Python IndexWriter.MaxFieldLength - 12件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのlucene.IndexWriter.MaxFieldLengthの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

IndexWriter(30)

close(30)

addDocument(29)

optimize(23)

MaxFieldLength(12)

numDocs(9)

setUseCompoundFile(7)

setMaxFieldLength(2)

commit(2)

getConfig(1)

getMaxBufferedDocs(1)

getMaxMergeDocs(1)

getMergeFactor(1)

addIndexes(1)

setInfoStream(1)

setMaxBufferedDocs(1)

setMaxMergeDocs(1)

setMergeFactor(1)

setRAMBufferSizeMB(1)

updateDocument(1)

コード例 #1

ファイルを表示

ファイル: indexer.py プロジェクト: mefagan/relevancefeedback-

def createIndex():
    #initialize lucene and jvm
    print("started indexer")
    lucene.initVM()
    indexDir = "/Tmp/REMOVEME.index-dir"

    
    #get the analyzer
    analyzer = StandardAnalyzer(Version.LUCENE_30)
    
    #get index storage
    dir = lucene.SimpleFSDirectory(lucene.File(indexDir))
   
    writer = IndexWriter(dir, analyzer, True, IndexWriter.MaxFieldLength(512))

    src_dir = 'html_files'
    i = 0
    for l in os.listdir(src_dir):
        l = os.path.join(src_dir, l)
        with open(l, 'r') as myfile:
            data=myfile.read()
        i += 1
        document, errors = parsehtml(data)
        doc = Document()
        doc.add(Field("text", document, Field.Store.YES, Field.Index.ANALYZED))
        writer.addDocument(doc)
    writer.optimize()
    writer.close()

コード例 #2

ファイルを表示

ファイル: pylucene_test.py プロジェクト: SamChen1981/spider-1

def luceneIndexer(contents):
    lucene.initVM()
    

    INDEXIDR= settings.INDEX_DIR

    indexdir= SimpleFSDirectory(File(INDEXIDR))
    
    analyzer= StandardAnalyzer(Version.LUCENE_30)

    index_writer= IndexWriter(indexdir,analyzer,True,\

    IndexWriter.MaxFieldLength(512))
    for tfile in contents:
        print"Indexing: ", tfile

        document= Document()

        content= tfile.getvalue()

        document.add(Field("text",content,Field.Store.YES,\
                           Field.Index.ANALYZED))
        index_writer.addDocument(document)
        print"Done: ", tfile
        index_writer.optimize()
        print index_writer.numDocs()
    index_writer.close()

コード例 #3

ファイルを表示

ファイル: app.py プロジェクト: ProjectLISM/GUI

def configure_lucene():

    f = open('clique.txt', 'r')
    lucene.initVM()
    print 'Inside Function'
    #indexDir = "/tmp/luceneindex"
    dir = SimpleFSDirectory(File(indexDir))
    analyzer = StandardAnalyzer(lucene.Version.LUCENE_CURRENT)
    writer = IndexWriter(dir, analyzer, True, IndexWriter.MaxFieldLength(512))

    print >> sys.stderr, "Currently there are %d documents in the index..." % writer.numDocs(
    )

    print >> sys.stderr, "Reading lines from sys.stdin..."
    for line in f:
        line = line.replace('\t', '')
        line = line.replace('\r', '')
        line = line.replace('\n', '')
        line = line.replace('^', '')
        line = line.strip()
        doc = Document()
        doc.add(Field("text", line, Field.Store.YES, Field.Index.ANALYZED))
        writer.addDocument(doc)

    print >> sys.stderr, "Indexed lines from stdin (%d documents in index)" % (
        writer.numDocs())
    print >> sys.stderr, "About to optimize index of %d documents..." % writer.numDocs(
    )
    writer.optimize()
    print >> sys.stderr, "...done optimizing index of %d documents" % writer.numDocs(
    )
    print >> sys.stderr, "Closing index of %d documents..." % writer.numDocs()
    writer.close()

コード例 #4

ファイルを表示

ファイル: indexer.py プロジェクト: CrawlingFingers/ConcordiaCrawler

def index_files(files, index_directory):
    lucene.initVM()
    d = SimpleFSDirectory(File(index_directory))
    analyzer = StandardAnalyzer(Version.LUCENE_30)
    writer = IndexWriter(d, analyzer, True, IndexWriter.MaxFieldLength(512))
    for f in files:
        parse_file(f, writer)
    writer.optimize()
    writer.close()

コード例 #5

ファイルを表示

def luceneIndexer(docdir, indir):
    """

         IndexDocuments from a directory

         """

    lucene.initVM()

    DIRTOINDEX = docdir

    INDEXIDR = indir

    indexdir = SimpleFSDirectory(File(INDEXIDR))

    analyzer = StandardAnalyzer(Version.LUCENE_30)

    index_writer= IndexWriter(indexdir,analyzer,True,\

    IndexWriter.MaxFieldLength(512))

    for tfile in glob.glob(os.path.join(DIRTOINDEX, '*.txt')):

        print "Indexing: ", tfile

        document = Document()

        content = open(tfile, 'r').read()

        document.add(Field("text",content,Field.Store.YES,\

                 Field.Index.ANALYZED))

        index_writer.addDocument(document)

        print "Done: ", tfile

    index_writer.optimize()

    print index_writer.numDocs()

    index_writer.close()

コード例 #6

ファイルを表示

    def addDocuments(self, dir, maxFieldLength):

        writer = IndexWriter(dir, SimpleAnalyzer(), True,
                             IndexWriter.MaxFieldLength(maxFieldLength))
        
        for keyword, unindexed, unstored, text in \
                izip(self.keywords, self.unindexed, self.unstored, self.text):
            doc = Document()
            doc.add(Field("id", keyword,
                          Field.Store.YES, Field.Index.NOT_ANALYZED))
            doc.add(Field("country", unindexed,
                          Field.Store.YES, Field.Index.NO))
            doc.add(Field("contents", unstored,
                          Field.Store.NO, Field.Index.ANALYZED))
            doc.add(Field("city", text,
                          Field.Store.YES, Field.Index.ANALYZED))
            writer.addDocument(doc)

        writer.optimize()
        writer.close()

コード例 #7

ファイルを表示

    def index(self, doc, title, department, url):
        indexdir = SimpleFSDirectory(File(self.indir))
        analyzer = StandardAnalyzer(Version.LUCENE_30)
        index_writer = IndexWriter(indexdir, analyzer, self.init,
                                   IndexWriter.MaxFieldLength(512))
        self.init = False

        # Initialize document and index it
        document = Document()
        document.add(
            Field("title", title, Field.Store.YES, Field.Index.ANALYZED))
        document.add(Field("url", url, Field.Store.YES, Field.Index.ANALYZED))
        document.add(
            Field("department", department, Field.Store.YES,
                  Field.Index.ANALYZED))
        document.add(Field("text", doc, Field.Store.YES, Field.Index.ANALYZED))
        index_writer.addDocument(document)

        index_writer.optimize()
        index_writer.close()

コード例 #8

ファイルを表示

ファイル: indexize.py プロジェクト: wncios/Readers

def luceneIndexer(docdir, indir):
    """frpFile
    IndexDocuments from a directory

    para:{
        docdir: the path of the txt file
        indir: the path of the index file which is generated by the following code
        }
    """

    lucene.initVM()
    DIRTOINDEX = docdir
    INDEXIDR = indir
    indexdir = SimpleFSDirectory(File(INDEXIDR))
    analyzer = StandardAnalyzer(Version.LUCENE_30)
    index_writer = IndexWriter(indexdir, analyzer, True, \
                               IndexWriter.MaxFieldLength(512))
    #for tfile in glob.glob(os.path.join(DIRTOINDEX, '*.txt')):
    list = os.listdir(DIRTOINDEX)
    for i in range(len(list)):
        tfile = os.path.join(DIRTOINDEX, list[i])
        if os.path.isfile(tfile):
            print ("Indexing: ", tfile)
            print ('okokokook')
            document = Document()
            content = open(tfile, 'r').read()
            document.add(Field("text", content, Field.Store.YES, \
                               Field.Index.ANALYZED))
            document.add(Field("title",str(tfile.strip('.txt')),Field.Store.YES,\
                               Field.Index.ANALYZED))
            index_writer.addDocument(document)
            #print (document)
            print ("Done: ", tfile)
    index_writer.optimize()
    print (index_writer.numDocs())
    index_writer.close()

コード例 #9

ファイルを表示

 def __init__(self, indir):
     lucene.initVM()
     indexdir = SimpleFSDirectory(File(indir))
     self.index_writer = IndexWriter(indexdir, self.getAnalyzer(), True,
                                     IndexWriter.MaxFieldLength(512))

コード例 #10

ファイルを表示

ファイル: index-sentences.py プロジェクト: afcarl/biased-text-sample

from common.stats import stats

import lucene
from lucene import \
    SimpleFSDirectory, System, File, \
    Document, Field, StandardAnalyzer, IndexWriter, Version

if __name__ == "__main__":
    lucene.initVM()
    # create an index called 'index-dir' in a temp directory
    #    indexDir = os.path.join(System.getProperty('java.io.tmpdir', 'tmp'),
    #                            'index-dir')
    indexDir = "/Tmp/REMOVEME.index-dir"
    dir = SimpleFSDirectory(File(indexDir))
    analyzer = StandardAnalyzer(Version.LUCENE_30)
    writer = IndexWriter(dir, analyzer, True, IndexWriter.MaxFieldLength(512))

    #    # set variables that affect speed of indexing
    #    writer.setMergeFactor(int(argv[2]))
    #    writer.setMaxMergeDocs(int(argv[3]))
    #    writer.setMaxBufferedDocs(int(argv[4]))
    #    # writer.infoStream = System.out
    #
    #    print "Merge factor:  ", writer.getMergeFactor()
    #    print "Max merge docs:", writer.getMaxMergeDocs()
    #    print "Max buffered docs:", writer.getMaxBufferedDocs()

    print >> sys.stderr, "Currently there are %d documents in the index..." % writer.numDocs(
    )

    i = 0

コード例 #11

ファイルを表示

ファイル: statisticscats.py プロジェクト: horvathpeter/wikipedia

 def indexFile(self):
     self._th=lucene.initVM()
     self._analyzer = StandardAnalyzer(Version.LUCENE_36)
     self._dir = RAMDirectory()
     self._writer = IndexWriter(self._dir, self._analyzer, True, IndexWriter.MaxFieldLength(25000))

コード例 #12

ファイルを表示

ファイル: pylucene_test.py プロジェクト: danduma/minerva

import lucene
import json

from lucene import (SimpleFSDirectory, System, File, Document, Field,
                    StandardAnalyzer, IndexWriter, IndexSearcher, QueryParser)

if __name__ == "__main__":
    lucene.initVM()
    fullIndexDir = r"c:\NLP\PhD\bob\fileDB\LuceneFullIndex"

    print("lucene version is:", lucene.VERSION)

    fullIndex = SimpleFSDirectory(File(fullIndexDir))
    analyzer = StandardAnalyzer(lucene.Version.LUCENE_CURRENT)
    writer = IndexWriter(fullIndex, analyzer, True,
                         IndexWriter.MaxFieldLength(20000000))
    ##    writer = IndexWriter(store, analyzer, True, IndexWriter.MaxFieldLength(512))

    print("Currently there are %d documents in the index..." %
          writer.numDocs())

    ##    print  "Reading lines from sys.stdin..."
    lines = [
        "bla bla bla bla bla", "Erase una vez que se era",
        "En un lugar de La Mancha de cuyo nombre no quiero acordarme, no ha mucho que vivia un hidalgo de los de lanza en ristre",
        "Manchame mancha mancha que te mancha la mancha"
    ]

    for l in lines:
        doc = Document()
        doc.add(Field("text", l, Field.Store.YES, Field.Index.ANALYZED))