PATENT_TERMS2 = [ 'compris', 'detect', 'first', 'second', 'mean', 'one', 'said', 'unit', 'least', 'sensor', 'angel', 'determin', 'base' ] PATENT_TERMS_CAS_SAMPLE = ['청구항', 'vehicl', 'first', 'second'] PATENT_TERMS_NUMBER = [] for i in range(50): PATENT_TERMS_CAS_SAMPLE.append(str(i)) PATENT_TERMS.append(str(i)) PATENT_TERMS2.append(str(i)) PATENT_TERMS_NUMBER.append(str(i)) for_extracting_excel_documents = lda.LDA_KWK('non-meaning_string', given_pass_num=PASS_TIMES, given_topic_num=ALL_TOPIC_NUMBER, given_words_num=WORD_NUMBER) Corpus_Excel = lda.Generate_with_Excel_CORPUS(for_extracting_excel_documents, excel_location=ALL_LOCATION, idx=IDX, sheet_num=SHEET_NUMBER) LDACLASS_Corpus_Excel = lda.LDA_KWK(Corpus_Excel, given_pass_num=PASS_TIMES, given_topic_num=ALL_TOPIC_NUMBER, given_words_num=WORD_NUMBER, given_patent_term=PATENT_TERMS2, given_all_flag=ALL_MULTIPLE_VALUE) print("Generating All Doc LDA Processing") LDACLASS_Corpus_Excel.Generate_LDA() LDACLASS_Corpus_Excel.Split_Topics_with_Terms() print("Done")
result.append(tb(i)) print("End extracting document from given excel - document") return result def t(GWE): result = [] for items in GWE: result.append(tb(" ".join(items.texts[0]))) return result for_extracting_excel_documents = lda.LDA_KWK('non-meaning_string', given_pass_num=PASS_TIMES, given_topic_num=ALL_TOPIC_NUMBER, given_words_num=WORD_NUMBER) GWE = lda.Generate_with_Excel(for_extracting_excel_documents, excel_location=ALL_LOCATION, idx=IDX, sheet_num=SHEET_NUMBER, given_patent_term=PATENT_TERMS2, given_all_flag=ALL_MULTIPLE_VALUE) lda.Processing_Entire_Class(GWE) bloblist = t(GWE) # print(bloblist) temp = []