Beispiel #1
0
def CountEn():
    '''
    英文词频统计
    '''
    rawpath = os.getcwd()
    pathen = rawpath + '/output_encount'
    pathencsv = rawpath + '/output_encount_csv'
    target_file_name = '/output_en'
    lst = lib.name_lst(rawpath + target_file_name)
    try:
        os.mkdir(pathen)
        os.mkdir(pathencsv)
    except:
        pass
    for name in lst:
        try:
            os.chdir(pathen)
            li = lib.count_en(name, target_file_name)  # 文件的词,词频总和
            lib.to_csv(li, name, pathencsv)  # csv函数加在这里(参数使用li)
            os.chdir(rawpath)
        except:
            continue
    return ''
Beispiel #2
0
def CountZh():
    '''
    中文词频统计
    '''
    rawpath = os.getcwd()
    pathzh = rawpath + '/output_zhcount'
    pathzhcsv = rawpath + '/output_zhcount_csv'
    target_file_name = '/output_zh'
    lst = lib.name_lst(rawpath + target_file_name)
    try:
        os.mkdir(pathzh)
        os.mkdir(pathzhcsv)
    except:
        pass
    for name in lst:
        try:
            os.chdir(pathzh)
            li = lib.count_zh(name,
                              target_file_name)  # 所有列表的总和,包含所有文件的(词,词频)列表
            lib.to_csv(li, name, pathzhcsv)
            os.chdir(rawpath)
        except:
            continue
    return ''
Beispiel #3
0
from lib import to_csv
from validator import SVM, XGB, RFC, VC
from preprocess import train_X, train_y, test_X, passenger_id

SVM.fit(train_X, train_y)
SVM_pred = SVM.predict(test_X)
to_csv(SVM_pred, passenger_id, 'SVM')

XGB.fit(train_X, train_y)
XGB_pred = XGB.predict(test_X)
to_csv(XGB_pred, passenger_id, 'XGB')

RFC.fit(train_X, train_y)
RFC_pred = RFC.predict(test_X)
to_csv(RFC_pred, passenger_id, 'RFC')

VC.fit(train_X, train_y)
VC_pred = VC.predict(test_X)
to_csv(VC_pred, passenger_id, 'VC')