# 학습 데이터셋 train_X = any_football_data.drop(label_columns, axis=1) train_Y = any_football_data['result_type'] # 경기 결과 (승=0, 무=1, 패=2) # 특성 처리 feature_pipeline = ColumnTransformer([ ("num", StandardScaler(), num_attribs), ("cat", OneHotEncoder(), cat_attribs_with_camp), ]) # 모델 훈련 prepared_train = feature_pipeline.fit_transform(train_X) print(f'prepared_train data_set shape: {prepared_train.shape}') start_prepared = time.time() knn_clf = KNeighborsClassifier(weights='distance', n_neighbors=19) knn_clf.fit(prepared_train, train_Y) end_prepared = time.time() print(f'elapsed time: {end_prepared - start_prepared} sec \n') full_pipeline = Pipeline([("preparation", feature_pipeline), ("predictor", knn_clf)]) # 저장 dir_name = datetime.today().strftime("knn_%Y%m%d-%H%M%S") utility.save_sklearn_model(full_pipeline, dir_name, "knn_clf_result_any.pkl")
sgd_home_pred = np.round(sgd_clf_4_5_any.predict_proba(home_train), decimals=3) sgd_away_pred = np.round(sgd_clf_4_5_any.predict_proba(away_train), decimals=3) home_prepared_train = np.c_[np.ones(len(home_train)), np.zeros(len(home_train)), knn_home_pred, rft_home_pred, sgd_home_pred] away_prepared_train = np.c_[np.zeros(len(away_train)), np.ones(len(away_train)), knn_away_pred, rft_away_pred, sgd_away_pred] train_X = np.r_[home_prepared_train, away_prepared_train] train_Y = np.r_[home_result_4_5_over_Y, away_result_4_5_over_Y] # SGDClassifier 학습 start_prepared = time.time() knn_clf_4_5 = KNeighborsClassifier(weights='uniform', n_neighbors=100) knn_clf_4_5.fit(train_X, train_Y) end_prepared = time.time() print( f'4.5 knn predictor elapsed time: {end_prepared - start_prepared} sec \n') # Save dir_name = datetime.today().strftime("fin_%Y%m%d-%H%M%S") utility.save_sklearn_model(knn_clf_1_5, dir_name, "fin_knn_clf_1_5.pkl") utility.save_sklearn_model(knn_clf_2_5, dir_name, "fin_knn_clf_2_5.pkl") utility.save_sklearn_model(knn_clf_3_5, dir_name, "fin_knn_clf_3_5.pkl") utility.save_sklearn_model(knn_clf_4_5, dir_name, "fin_knn_clf_4_5.pkl")
lin_away_pred = np.round(lin_reg_any.predict(away_train), decimals=3) # prepare data # league_cat = utility.LeagueTypeCategorize() # leagues = league_cat.transform(home_train.copy()) home_prepared_train = np.c_[np.ones(len(home_train)), np.zeros(len(home_train)), knn_home_pred, lin_home_pred] away_prepared_train = np.c_[np.zeros(len(away_train)), np.ones(len(away_train)), knn_away_pred, lin_away_pred] train_X = np.r_[home_prepared_train, away_prepared_train] train_Y = np.r_[home_score_Y, away_score_Y] # LinearRegression 학습 start_prepared = time.time() fin_lin_reg = LinearRegression() fin_lin_reg.fit(train_X, train_Y) # 훈련 end_prepared = time.time() print( f'lin score predictor elapsed time: {end_prepared - start_prepared} sec \n' ) # Save dir_name = datetime.today().strftime("fin_%Y%m%d-%H%M%S") utility.save_sklearn_model(fin_lin_reg, dir_name, "fin_lin_reg_score.pkl")
train_X = np.r_[home_prepared_train, away_prepared_train] train_Y = np.r_[home_result_Y, away_result_Y] # SGDClassifier 학습 # start_prepared = time.time() # # sgd_clf = SGDClassifier(max_iter=3000, tol=1e-3, random_state=42, alpha=0.01, loss='log') # sgd_clf.fit(train_X, train_Y) # # end_prepared = time.time() # # print(f'sgd result predictor elapsed time: {end_prepared - start_prepared} sec \n') # KNeighborsClassifier 학습 start_prepared = time.time() knn_clf = KNeighborsClassifier(weights='uniform', n_neighbors=100) knn_clf.fit(train_X, train_Y) end_prepared = time.time() print( f'knn result predictor elapsed time: {end_prepared - start_prepared} sec \n' ) # Save dir_name = datetime.today().strftime("fin_%Y%m%d-%H%M%S") # utility.save_sklearn_model(sgd_clf, dir_name, "fin_sgd_clf_result.pkl") utility.save_sklearn_model(knn_clf, dir_name, "fin_knn_clf_result.pkl")
# 학습 데이터셋 train_X = any_football_data.drop(label_columns, axis=1) train_Y = any_football_data['score'] # 경기 결과 (승=0, 무=1, 패=2) # 특성 처리 feature_pipeline = ColumnTransformer([ ("num", StandardScaler(), num_attribs), ("cat", OneHotEncoder(), cat_attribs_with_camp), ]) # 모델 훈련 prepared_train = feature_pipeline.fit_transform(train_X) print(f'prepared_train data_set shape: {prepared_train.shape}') start_prepared = time.time() knn_reg = KNeighborsRegressor(n_neighbors=19, weights="distance") knn_reg.fit(prepared_train, train_Y) # 훈련 end_prepared = time.time() print(f'elapsed time: {end_prepared - start_prepared} sec \n') full_pipeline = Pipeline([("preparation", feature_pipeline), ("predictor", knn_reg)]) # 저장 dir_name = datetime.today().strftime("knn_%Y%m%d-%H%M%S") utility.save_sklearn_model(full_pipeline, dir_name, "knn_reg_score_any.pkl")
knn_clf_3_5 = KNeighborsClassifier(weights='distance', n_neighbors=19) knn_clf_3_5.fit(prepared_train, train_3_5_Y) end_prepared = time.time() print(f'3.5 predictor elapsed time: {end_prepared - start_prepared} sec \n') full_pipeline_3_5 = Pipeline([("preparation", feature_pipeline), ("predictor", knn_clf_3_5)]) # 모델 훈련 (4.5) start_prepared = time.time() knn_clf_4_5 = KNeighborsClassifier(weights='distance', n_neighbors=19) knn_clf_4_5.fit(prepared_train, train_4_5_Y) end_prepared = time.time() print(f'4.5 predictor elapsed time: {end_prepared - start_prepared} sec \n') full_pipeline_4_5 = Pipeline([("preparation", feature_pipeline), ("predictor", knn_clf_4_5)]) # 저장 dir_name = datetime.today().strftime("knn_%Y%m%d-%H%M%S") utility.save_sklearn_model(full_pipeline_1_5, dir_name, "knn_clf_1_5_any.pkl") utility.save_sklearn_model(full_pipeline_2_5, dir_name, "knn_clf_2_5_any.pkl") utility.save_sklearn_model(full_pipeline_3_5, dir_name, "knn_clf_3_5_any.pkl") utility.save_sklearn_model(full_pipeline_4_5, dir_name, "knn_clf_4_5_any.pkl")
("cat", OneHotEncoder(), cat_attribs_with_camp), ]) feature_pipeline = Pipeline([ ('columns', columns_pipeline_home), ]) # 모델 훈련 prepared_train = feature_pipeline.fit_transform(train_X) print(f'prepared_train data_set shape: {prepared_train.shape}') start_prepared = time.time() sgd_clf = SGDClassifier(max_iter=2000, tol=1e-3, random_state=42, alpha=0.01, loss='log') sgd_clf.fit(prepared_train, train_Y) end_prepared = time.time() print(f'elapsed time: {end_prepared - start_prepared} sec \n') full_pipeline = Pipeline([("preparation", feature_pipeline), ("predictor", sgd_clf)]) # 저장 dir_name = datetime.today().strftime("sgd_%Y%m%d-%H%M%S") utility.save_sklearn_model(full_pipeline, dir_name, "sgd_clf_bts_any.pkl")