from sklearn.model_selection import train_test_split from copy import copy from sklearn.decomposition import FastICA from sklearn.ensemble import ExtraTreesClassifier from sklearn.pipeline import make_pipeline, make_union from sklearn.preprocessing import FunctionTransformer import logging from code_.domain.games_info import SeasonFirstHalfAggregator from code_.domain.performance_analyzer import PerformanceAnalyzer import settings as stg if __name__ == '__main__': stg.enable_logging(log_filename='{}.log'.format(splitext(basename(__file__))[0]), logging_level=logging.DEBUG) logging.info('Start of script {}'.format(basename(__file__))) logging.info('Load data ..') sfha = SeasonFirstHalfAggregator(saved_filename=stg.FILENAME_STATS_AGGREGATED) df = sfha.build_players_stats_dataset(sliding_interval_min=5, list_events_number=stg.EVENTS_COMPUTE_NUMBER, list_events_with_success_rate=stg.EVENTS_COMPUTE_SUCCESS_RATE) logging.info('.. Done') train, test = train_test_split(df, test_size=0.3, random_state=42) X_train, y_train = train[stg.PLAYER_FEATURES], train[stg.PLAYER_TARGET] X_test, y_test = test[stg.PLAYER_FEATURES], test[stg.PLAYER_TARGET] logging.info('Impute missing values with median ..')
import logging import numpy as np import os import pandas as pd from lxml import etree from os.path import join from code_.infrastructure.game import Game import settings as stg if __name__ == '__main__': stg.enable_logging(log_filename='players.log') class Players(): """Soccer players. Attributes ---------- all_players: pandas.DataFrame transfered_players: pandas.DataFrame players_more_800_min: pandas.DataFrame Players having played more than 800 minutes in first half of season """ def __init__(self, xml_filename=stg.FILENAME_ALL_PLAYERS, saved_csv_filename=stg.FILENAME_PLAYERS_MORE_800): """Init class.