Python JsonParser.get_texts 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: GoogleNews

클래스/타입: JsonParser

메소드/함수: get_texts

hotexamples.com에서의 예제들: 3

Python JsonParser.get_texts - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 GoogleNews.JsonParser.get_texts에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

get_texts(3)

get_target_labels(2)

get_docs_labels(1)

get_only_labels(1)

예제 #1

파일 보기

def run_hicocluster_create_matrix():
    # Number of docs: 1950
    # Number of items: 21826
    texts = JsonParser.get_texts(os.getcwd() + "\\clusters")
    newTexts = []
    for text in texts:
        newTexts.append(text.split())
    # print newTexts[0]

    dictionary = corpora.Dictionary(newTexts)
    dictionary.save(os.getcwd() + "\\dictionary.dict")

    corpus = [dictionary.doc2bow(text) for text in newTexts]
    corpora.MmCorpus.serialize(os.getcwd() + "\\corpus.mm", corpus)
    print "length of docs: " + str(dictionary.num_docs)
    print "length of items: " + str(len(dictionary.token2id.items()))

    features = len(dictionary.token2id.items())
    row = 1
    set_doc_terms = []
    for doc in corpus:
        doc_terms = [0] * features
        if len(doc) > 0:
            row += 1
            for term in doc:
                doc_terms[term[0]] = term[1]
            set_doc_terms.append(doc_terms)
    matrix = open(os.getcwd() + "\\matrix.txt", "w")
    for line in set_doc_terms:
        for i in range(len(line)):
            matrix.write(str(line[i]) + " ")
        matrix.write("\n")
    matrix.close()

예제 #2

파일 보기

def get_combination():
    print "run_combination"
    # Google data
    # parser = GoogleNewsParser.NewsParsers()
    # parser.parse_data_from_tok()

    # Json Google
    tfidf = ExTFIDF.TfIdf()
    # tfidf.fit_data(parser.get_texts())
    tfidf.fit_data(JsonParser.get_texts(os.getcwd() + "\\" + "clusters"))
    tf_vectors = tfidf.get_data_as_vector()
    print "Length of tfidf feature: " + str(len(tf_vectors[0]))
    # print tf_vectors[0]

    pairs = load_d2v()
    single = pairs[1]
    print "Length of doc2vec feature: " + str(len(single[0]))
    # print single[0]

    final = numpy.hstack((tf_vectors, single))
    print "Length of final features: " + str(len(final[0]))
    # final = []
    # for i in range(length):
    #     temp = tf_vectors[i] + single[i]
    #     final.append(temp)
    return final

예제 #3

파일 보기

def algorithm_tfidf():
    print "Running TFIDF"
    # Google data
    # parser = GoogleNewsParser.NewsParsers()
    # parser.parse_data_from_tok()

    # Json Google
    tfidf = ExTFIDF.TfIdf()
    # tfidf.fit_data(parser.get_texts())
    tfidf.fit_data(JsonParser.get_texts(os.getcwd() + "\\" + "clusters"))

    print "lennth of tfidf : " + str(len(tfidf.get_data_as_vector()))

    print "Running algorithm with TFIDF"
    Algorithm.algorithm_Kmean(tfidf.get_data_as_vector())