Python IndexWriter.getConfig 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: org.apache.lucene.index

클래스/타입: IndexWriter

메소드/함수: getConfig

hotexamples.com에서의 예제들: 1

Python IndexWriter.getConfig - 1개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 org.apache.lucene.index.IndexWriter.getConfig에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

IndexWriter(30)

close(30)

commit(30)

addDocument(30)

numDocs(28)

deleteDocuments(11)

updateDocument(5)

optimize(5)

deleteAll(3)

rollback(2)

forceMerge(2)

setUseCompoundFile(2)

getReader(2)

getDocStats(1)

setMaxFieldLength(1)

getConfig(1)

docCount(1)

writerow(1)

예제 #1

파일 보기

class IndexBuilder(object):
    def __init__(self, index_path, update=False):
        dir = FSDirectory.open(Paths.get(index_path))
        analyzer = StandardAnalyzer()
        iwc = IndexWriterConfig(analyzer)
        if update:
            iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND)
        else:
            iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
        self.writer = IndexWriter(dir, iwc)

    def index_docs(self, input_documents):
        for document in tqdm(input_documents, total=len(input_documents)):
            doc = Document()
            doc.add(StringField(".I", document[".I"].lower(), Field.Store.YES))
            doc.add(StringField(".U", document[".U"].lower(), Field.Store.YES))
            type = FieldType()
            type.setIndexOptions(
                IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
            type.setStored(True)
            type.setStoreTermVectors(True)
            type.setTokenized(True)
            if ".W" in document and ".M" in document:
                doc.add(
                    Field(
                        "text", " ".join(
                            tokenizer.tokenize(document[".M"].lower() + " " +
                                               document[".T"].lower() +
                                               document[".W"].lower())), type))
            elif ".M" in document and ".W" not in document:
                doc.add(
                    Field(
                        "text", " ".join(
                            tokenizer.tokenize(document[".M"].lower() + " " +
                                               document[".T"].lower())), type))
            elif ".M" not in document and ".W" in document:
                doc.add(
                    Field(
                        "text", " ".join(
                            tokenizer.tokenize(document[".T"].lower() +
                                               document[".W"].lower())), type))
            elif ".M" not in document and ".W" not in document:
                doc.add(
                    Field("text",
                          " ".join(tokenizer.tokenize(document[".T"].lower())),
                          type))
            if self.writer.getConfig().getOpenMode(
            ) == IndexWriterConfig.OpenMode.CREATE:
                self.writer.addDocument(doc)
            else:
                self.writer.updateDocument(Term(".U", document[".U"]), doc)
        self.writer.close()