コード例 #1
0
PATENT_TERMS2 = [
    'compris', 'detect', 'first', 'second', 'mean', 'one', 'said', 'unit',
    'least', 'sensor', 'angel', 'determin', 'base'
]

PATENT_TERMS_CAS_SAMPLE = ['청구항', 'vehicl', 'first', 'second']
PATENT_TERMS_NUMBER = []
for i in range(50):
    PATENT_TERMS_CAS_SAMPLE.append(str(i))
    PATENT_TERMS.append(str(i))
    PATENT_TERMS2.append(str(i))
    PATENT_TERMS_NUMBER.append(str(i))

for_extracting_excel_documents = lda.LDA_KWK('non-meaning_string',
                                             given_pass_num=PASS_TIMES,
                                             given_topic_num=ALL_TOPIC_NUMBER,
                                             given_words_num=WORD_NUMBER)
Corpus_Excel = lda.Generate_with_Excel_CORPUS(for_extracting_excel_documents,
                                              excel_location=ALL_LOCATION,
                                              idx=IDX,
                                              sheet_num=SHEET_NUMBER)
LDACLASS_Corpus_Excel = lda.LDA_KWK(Corpus_Excel,
                                    given_pass_num=PASS_TIMES,
                                    given_topic_num=ALL_TOPIC_NUMBER,
                                    given_words_num=WORD_NUMBER,
                                    given_patent_term=PATENT_TERMS2,
                                    given_all_flag=ALL_MULTIPLE_VALUE)
print("Generating All Doc LDA Processing")
LDACLASS_Corpus_Excel.Generate_LDA()
LDACLASS_Corpus_Excel.Split_Topics_with_Terms()
print("Done")
コード例 #2
0
ファイル: tf_idf.py プロジェクト: Alresthyst/resume
        result.append(tb(i))

    print("End extracting document from given excel - document")

    return result


def t(GWE):
    result = []
    for items in GWE:
        result.append(tb(" ".join(items.texts[0])))
    return result


for_extracting_excel_documents = lda.LDA_KWK('non-meaning_string',
                                             given_pass_num=PASS_TIMES,
                                             given_topic_num=ALL_TOPIC_NUMBER,
                                             given_words_num=WORD_NUMBER)
GWE = lda.Generate_with_Excel(for_extracting_excel_documents,
                              excel_location=ALL_LOCATION,
                              idx=IDX,
                              sheet_num=SHEET_NUMBER,
                              given_patent_term=PATENT_TERMS2,
                              given_all_flag=ALL_MULTIPLE_VALUE)

lda.Processing_Entire_Class(GWE)

bloblist = t(GWE)

# print(bloblist)

temp = []