def train_test_all(): preds_test_on_all_models = [] preds_train_on_all_models = [] model_val_loss_dict = {} weights = [] for i, [ prefix_name, ModelHelper, Model, special_param, PreprocessHelper, preprocess_param, weight ] in enumerate(model_config): print_seperater(prefix_name, Model, special_param) param['output'] = os.path.join( param['output_root_dir'], 'model_save/output_{}'.format(prefix_name)) param['model'] = Model param.update(special_param) param['out_data_dir'] = os.path.join( param['output_root_dir'], "out_data_dir_" + PreprocessHelper.__name__) weights.append(weight) total_train_preds, total_test_preds = train_test_once( ModelHelper, PreprocessHelper, preprocess_param) # 把train数据的预测结果写到文件中 overall_val_loss = write_val_result(total_train_preds, root_dir=param['root_dir'], output_dir=param['output']) model_val_loss_dict[prefix_name] = [overall_val_loss, weight] # 将每次预测后的结果加到list中 preds_test_on_all_models.append(total_test_preds) preds_train_on_all_models.append(total_train_preds) print("preds after repeating -- shape:", np.shape(preds_test_on_all_models)) # 求均值 preds_test_on_all_models = np.average(np.array(preds_test_on_all_models), axis=0, weights=weights) preds_train_on_all_models = np.average(np.array(preds_train_on_all_models), axis=0, weights=weights) # 将test的预测结果写到文件 write_result(preds_test_on_all_models, root_dir=param['root_dir']) print("End -- model_val_loss_dict:", model_val_loss_dict) # 将train的测试结果写到文件 overall_val_loss = write_val_result(preds_train_on_all_models, root_dir=param['root_dir']) model_val_loss_dict["overall"] = overall_val_loss # 将所有的val loss写到文件 with open("val_loss002.pkl", 'wb') as f: pickle.dump(model_val_loss_dict, f)
def compuate_val_loss(): weights = [] preds_train_on_all_models = [] for i, [ prefix_name, ModelHelper, Model, special_param, PreprocessHelper, preprocess_param, weight ] in enumerate(model_config): print_seperater(prefix_name, Model, special_param) param['output'] = os.path.join( param['output_root_dir'], 'model_save/output_{}'.format(prefix_name)) weights.append(weight) submission_csv = os.path.join(param['output'], 'submission_val.csv') submission = pd.read_csv(submission_csv) del submission['sig_id'] preds_train_on_all_models.append(np.array(submission.values)) overall_val_loss = calculate_overall_loss(param['root_dir'], submission_csv) print("model :{}; loss:{}".format(prefix_name, overall_val_loss)) preds_train_on_all_models = np.average(np.array(preds_train_on_all_models, dtype=float), axis=0, weights=weights) overall_val_loss = write_val_result(preds_train_on_all_models, root_dir=param['root_dir']) print("overall model ; loss:{}".format(overall_val_loss))