Python read_toy_comments 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: FeatureExtraction.mainExtractor

메소드/함수: read_toy_comments

hotexamples.com에서의 예제들: 3

Python read_toy_comments - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 FeatureExtraction.mainExtractor.read_toy_comments에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: MostRelevantFeatures.py 프로젝트: DirkBrand/Comment-Classification

import numpy as np


def extract_words(vectorizor, train_list, test_list):
    count_vect = vectorizor.fit(train_list)
    train = count_vect.transform(train_list)
    test = count_vect.transform(test_list)
        
    return train, test, count_vect.get_feature_names()

set = 1
if __name__ == '__main__':
    if set == 1:
        articleList, commentList, parentList, commentCount = read_comments(comment_data_path + 'trainTestDataSet.txt', skip_mtn=False)
    elif set == 2:
        articleList, commentList, parentList, commentCount = read_toy_comments(comment_data_path + 'trainTestDataSet.txt', comment_data_path + 'toyComments.csv')
    elif set == 3:
        articleList, commentList, commentCount = read_slashdot_comments(comment_data_path + 'slashdotDataSet.txt', limit=100000)
    
    # Values
    y = extract_values(articleList, commentList, commentCount, set)
    sss = StratifiedShuffleSplit(y, 1, test_size=0.95, random_state=42)
    y_train = []
    y_test = []
    for train, test in sss:
        np.save('train_vect', train)
        np.save('test_vect', test)
        y_train = y[train]
        y_test = y[test]
    
    processed_comment_list = extract_global_bag_of_words_processed(commentList)

예제 #2

파일 보기

파일: main.py 프로젝트: DirkBrand/Comment-Classification


set = 3
if __name__ == '__main__':
    print 'START'
         
    # To process all the comments
    if set == 1:
        df_comments = read_news24_comments(comment_data_path + 'trainTestDataSet.txt')
        df_comments.sort('date', inplace=True)
        df_comments.reset_index(inplace=True, drop=True)
        df_thread_groupby = df_comments.groupby('thread_root_id')
        set_tag = "_news24"
        tag = '_main'
    elif set == 2:
        df_comments = read_toy_comments(comment_data_path + 'trainTestDataSet.txt', comment_data_path + 'toyComments.csv')
        df_comments.sort('date', inplace=True)
        df_comments.reset_index(inplace=True, drop=True)
        df_thread_groupby = df_comments.groupby('thread_root_id')
        set_tag = "_news24"
        tag = '_toy'
    elif set == 3:
        df_comments = read_slashdot_comments(comment_data_path + 'slashdotDataSet_latest.txt')
        df_comments.sort('date', inplace=True)
        df_comments.reset_index(inplace=True, drop=True)
        df_thread_groupby = df_comments.groupby('thread_root_id')
        set_tag = "_slashdot"
        tag = '_slashdot'
        
   
    print df_comments.shape

예제 #3

파일 보기

파일: main.py 프로젝트: abhi-shek/Comment-Classification


set = 3
if __name__ == '__main__':
    print 'START'
    # setup()

    # To process all the comments
    if set == 1:
        articleList, commentList, parentList, commentCount = read_comments(
            comment_data_path + 'trainTestDataSet.txt')
        set_tag = "_news24"
        tag = '_main'
    elif set == 2:
        articleList, commentList, parentList, commentCount = read_toy_comments(
            comment_data_path + 'trainTestDataSet.txt',
            comment_data_path + 'toyComments.csv')
        set_tag = "_news24"
        tag = '_toy'
    elif set == 3:
        articleList, commentList, commentCount = read_slashdot_comments(
            comment_data_path + 'slashdotDataSet.txt', limit=100000)
        set_tag = "_slashdot"
        tag = '_slashdot'

    print "Processed", commentCount, "Comments"

    extractSaveValues(articleList, commentList, commentCount,
                      feature_set_path + "valueVector" + tag, set)
    y = load_numpy_matrix(feature_set_path + r'valueVector' + tag + '.npy')
    sss = StratifiedShuffleSplit(y, 1, test_size=0.40, random_state=42)