def download_league_data(self, league):
     """
     download leagues data
     :return: 
     """
     self.filename = league
     league_data = get_config(file="leagues_id")
     self.league_code = league_data.get(league)
     log.info("{}: {}".format(league, self.league_code))
     self.download_football_data()
    def download_football_data(self):
        """
        :rtype: object
        """
        pieces = []
        # TODO: Remove totally to capture all data
        # clmns = ["Div","Date","HomeTeam","AwayTeam","FTHG","FTAG","FTR","HTHG","HTAG","HTR","HS","AS","HST","AST",
        # "HF","AF","HC","AC","HY","AY","HR","AR","B365H","B365D","B365A","BWH","BWD","BWA"]

        data_url = 'http://www.football-data.co.uk/mmz4281/{year}/{league_id}.csv'
        for i in range(start_yr, start_yr + 1):
            year = str(i).zfill(2) + str(i + 1).zfill(2)
            formated_data_url = data_url.format(year=year,
                                                league_id=self.league_code)
            log.info("Year: {0}, League code: {1}, URL: {2}".format(
                year, self.league_code, formated_data_url))

            if requests.get(formated_data_url).status_code == 200:
                try:
                    dd = pd.read_csv(formated_data_url,
                                     encoding='utf-8',
                                     error_bad_lines=False)
                except:
                    dd = pd.read_csv(
                        "http://www.football-data.co.uk/mmz4281/1819/I2.csv",
                        encoding='ISO-8859-1',
                        error_bad_lines=False)

                dd['Date'] = pd.to_datetime(dd['Date'], dayfirst=True)
                dd['Season'] = year
                dd["Comp_id"] = dd["Div"]
                dd = dd.drop('Div', axis=1)
                pieces.append(dd)
                time.sleep(2)
        try:
            # data = pd.concat(pieces, ignore_index=True, sort=True)
            data = dd.drop_duplicates(
                subset=['Date', 'HomeTeam', 'AwayTeam', 'Season'],
                inplace=False)
            data.rename(columns={
                'BbAv<2.5': 'BbAv<25',
                'BbAv>2.5': 'BbAv>25',
                'BbMx<2.5': 'BbMx<25',
                'BbMx>2.5': 'BbMx>25'
            },
                        inplace=True)
            # print(data.columns)
            football_data = data.copy()
            # print(football_data)
            self.merge_with_existing_data(ft_data=football_data)
        except ValueError:
            pass

        return self
    def merge_with_existing_data(self, ft_data):
        """Merge data if any exist"""
        client = MongoClient(mongodb_uri, connectTimeoutMS=30000)

        db = client.get_database("sports_prediction")
        wdw_raw_data = db[self.filename]

        ft_data = ft_data.dropna(how='any')

        translate = translation.get(self.filename)
        if translate:
            ft_data["HomeTeam"].replace(translate, inplace=True)
            ft_data["AwayTeam"].replace(translate, inplace=True)

        try:
            dta = ft_data.to_dict("record")
            if dta:
                wdw_raw_data.insert_many(dta)
                print(self.filename, " Saved")
            else:
                print("I cant store".upper())
        except Exception as e:
            log.info("Encountered Error:{} \n League: {}".format(
                e, self.filename))
@author: tola
"""
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.externals import joblib
from imblearn.over_sampling import SMOTE

from utils import get_analysis_root_path, get_config
from te_logger.logger import log

leagues_data = get_config(file="league")
leagues = list(leagues_data.keys())

for league in leagues:
    log.info(msg="Building model for league: {}".format(league))
    lg_data_path = get_analysis_root_path(
        'tools/data/clean_data/team_trend/{}.csv'.format(league))
    try:
        games = pd.read_csv(lg_data_path)
        games = games.dropna(how='any')

        model_columns = get_config(file="wdw_columns/{}".format(league))
        played_data = games.loc[
            (games.Season.isin([1415, 1516, 1617, 1718, 1819]))
            & (games.played == 1)]

        target = played_data.FTR.map({"D": 0, "A": 1, "H": 2})

        # Select significant columns
        data = played_data[model_columns]
Esempio n. 5
0
@author: tola
"""
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.externals import joblib
from imblearn.over_sampling import SMOTE

from utils import get_analysis_root_path, get_config
from te_logger.logger import log

leagues_data = get_config(file="league")
leagues = list(leagues_data.keys())

for league in leagues:
    log.info(msg="Building model for league: {}".format(league))
    lg_data_path = get_analysis_root_path(
        'tools/data/clean_data/team_trend/{}.csv'.format(league))
    try:
        games = pd.read_csv(lg_data_path)
        games = games.dropna(how='any')

        dc_columns = get_config(file="dc_columns/{}".format(league))
        played_data = games.loc[
            (games.Season.isin([1415, 1516, 1617, 1718, 1819]))
            & (games.played == 1)]

        target_1x = played_data.FTR.map({"D": 0, "A": 1, "H": 0})

        # Select significant columns
        dc_data = played_data[dc_columns]
Esempio n. 6
0
for league in leagues:
    try:
        games = pd.read_csv(
            get_analysis_root_path(
                'tools/data/clean_data/team_trend/{}.csv'.format(league)))
        games = games.dropna(how='any')

        model_columns = get_config(file="ou25_columns/{}".format(league))
        data = games.loc[(games.Season.isin([1415, 1516, 1617, 1718, 1819]))
                         & (games.played == 1)]

        target = data.UO25.values

        # Data without target
        data = data[model_columns]

        model = LogisticRegression(C=1e5)
        model.fit(data, target)
        log.info("League: {}\t score: {}".format(league,
                                                 model.score(data, target)))

        model_filename = get_analysis_root_path(
            "tools/league_models/{}_ou25".format(league))
        joblib.dump(model, model_filename)
    except Exception as e:
        log.warn("New O/U 2.5 model not built for {} \n{}".format(
            league.upper(), e))

log.info("Finished training over under 2.5 model")