Python text_to_word_count 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: feature_extract

메소드/함수: text_to_word_count

hotexamples.com에서의 예제들: 4

Python text_to_word_count - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 feature_extract.text_to_word_count에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: group_features_plus_word_count.py 프로젝트: chiru1221/ssi2021

def add_group_feature(x_train, x_valid, x_test, x_sub):
    others = [x_valid, x_test, x_sub]
    x_train, others = fe.goal_min_group(x_train, others)
    x_train, others = fe.goal_max_group(x_train, others)
    x_train, others = fe.duration_group(x_train, others)
    x_train, others = fe.text_to_word_count(x_train, others)
    return x_train, others[0], others[1], others[2]

예제 #2

파일 보기

파일: word_count_plus_target_encoding.py 프로젝트: chiru1221/ssi2021

def add_group_feature(x_train, x_valid, x_test, x_sub, y_train):
    others = [x_valid, x_test, x_sub]
    # x_train, others = fe.goal_min_group(x_train, others)
    # x_train, others = fe.goal_max_group(x_train, others)
    # x_train, others = fe.duration_group(x_train, others)

    x_train, others = fe.target_encoding(x_train, y_train, others,
                                         'country_encoding')
    x_train, others = fe.target_encoding(x_train, y_train, others,
                                         'category1_encoding')
    x_train, others = fe.target_encoding(x_train, y_train, others,
                                         'category2_encoding')

    x_train, others = fe.multi_target_encoding(
        x_train, y_train, others,
        ['country_encoding', 'category1_encoding', 'category2_encoding'])

    x_train, others = fe.text_to_word_count(x_train, others)

    return x_train, others[0], others[1], others[2]

예제 #3

파일 보기

    target = 'state'


    cv = 10
    train_dfs, valid_dfs, test_dfs = training.cv(train_df, cv)
    scores = list()
    params = None
    tune = True
    name = 'lgb_baseline'
    for cv_idx in range(cv):
        'prepare'
        x_train, y_train = train_dfs[cv_idx][features], train_dfs[cv_idx][target]
        x_valid, y_valid = valid_dfs[cv_idx][features], valid_dfs[cv_idx][target]
        x_test, y_test = test_dfs[cv_idx][features], test_dfs[cv_idx][target]
        
        x_train, others = fe.text_to_word_count(x_train, [x_valid, x_test, test_df[features]])
        x_valid, x_test, x_sub = others[0], others[1], others[2]
        
        lgb_train, lgb_valid = lgb.Dataset(x_train, y_train, categorical_feature=[3, 4, 5], free_raw_data=False), lgb.Dataset(x_valid, y_valid, categorical_feature=[3, 4, 5], free_raw_data=False)

        'train'
        if tune:
            params = training.tuning(lgb_train, lgb_valid, 100)
            pd.to_pickle(params, 'params/{0}_cv{1}.pkl'.format(name, cv_idx))
        model = training.train(lgb_train, lgb_valid, params)
        score = training.evaluation(model, x_test, y_test)
        scores.append(score)
        model.save_model('model/{0}_cv{1}.txt'.format(name, cv_idx), num_iteration=model.best_iteration)

        'predict'
        pred = model.predict(x_sub)

예제 #4

파일 보기

    target = 'state'


    cv = 10
    train_dfs, valid_dfs, test_dfs = training.cv(train_df, cv)
    scores = list()
    params = None
    tune = True
    name = 'word_count_plus_h1_word_count'
    for cv_idx in range(cv):
        'prepare'
        x_train, y_train = train_dfs[cv_idx][features], train_dfs[cv_idx][target]
        x_valid, y_valid = valid_dfs[cv_idx][features], valid_dfs[cv_idx][target]
        x_test, y_test = test_dfs[cv_idx][features], test_dfs[cv_idx][target]
        
        x_train, others = fe.text_to_word_count(x_train, [x_valid, x_test, test_df[features]], del_html_content=False)
        x_valid, x_test, x_sub = others[0], others[1], others[2]
        x_train, others = fe.text_to_h1_word_count(x_train, [x_valid, x_test, x_sub])
        x_valid, x_test, x_sub = others[0], others[1], others[2]
        
        
        lgb_train, lgb_valid = lgb.Dataset(x_train, y_train, categorical_feature=[3, 4, 5], free_raw_data=False), lgb.Dataset(x_valid, y_valid, categorical_feature=[3, 4, 5], free_raw_data=False)

        'train'
        if tune:
            params = training.tuning(lgb_train, lgb_valid, 100)
            pd.to_pickle(params, 'params/{0}_cv{1}.pkl'.format(name, cv_idx))
        model = training.train(lgb_train, lgb_valid, params)
        score = training.evaluation(model, x_test, y_test)
        scores.append(score)
        model.save_model('model/{0}_cv{1}.txt'.format(name, cv_idx), num_iteration=model.best_iteration)