def CountEn(): ''' 英文词频统计 ''' rawpath = os.getcwd() pathen = rawpath + '/output_encount' pathencsv = rawpath + '/output_encount_csv' target_file_name = '/output_en' lst = lib.name_lst(rawpath + target_file_name) try: os.mkdir(pathen) os.mkdir(pathencsv) except: pass for name in lst: try: os.chdir(pathen) li = lib.count_en(name, target_file_name) # 文件的词,词频总和 lib.to_csv(li, name, pathencsv) # csv函数加在这里(参数使用li) os.chdir(rawpath) except: continue return ''
def CountZh(): ''' 中文词频统计 ''' rawpath = os.getcwd() pathzh = rawpath + '/output_zhcount' pathzhcsv = rawpath + '/output_zhcount_csv' target_file_name = '/output_zh' lst = lib.name_lst(rawpath + target_file_name) try: os.mkdir(pathzh) os.mkdir(pathzhcsv) except: pass for name in lst: try: os.chdir(pathzh) li = lib.count_zh(name, target_file_name) # 所有列表的总和,包含所有文件的(词,词频)列表 lib.to_csv(li, name, pathzhcsv) os.chdir(rawpath) except: continue return ''
from lib import to_csv from validator import SVM, XGB, RFC, VC from preprocess import train_X, train_y, test_X, passenger_id SVM.fit(train_X, train_y) SVM_pred = SVM.predict(test_X) to_csv(SVM_pred, passenger_id, 'SVM') XGB.fit(train_X, train_y) XGB_pred = XGB.predict(test_X) to_csv(XGB_pred, passenger_id, 'XGB') RFC.fit(train_X, train_y) RFC_pred = RFC.predict(test_X) to_csv(RFC_pred, passenger_id, 'RFC') VC.fit(train_X, train_y) VC_pred = VC.predict(test_X) to_csv(VC_pred, passenger_id, 'VC')