Esempio n. 1
0
def get_model():
    if mu.mutex_process('data/model2.pkl'):
        model_y = tfk.Sequential([
            tfk.layers.InputLayer(batch_input_shape=(None, 20, 8+6+15), name='in'),
            tfk.layers.Conv1D(filters=100, kernel_size=2, strides=1, name='conv1'),
            tfk.layers.MaxPooling1D(pool_size=4, name='pool1'),
            tfk.layers.Dropout(.2),
            tfk.layers.Conv1D(filters=50, kernel_size=3, strides=1, name='conv2'),
            tfk.layers.MaxPooling1D(pool_size=2, name='pool2'),
            tfk.layers.Dropout(.2),
            tfk.layers.Flatten(name='flat'),
            tfk.layers.Dense(50, activation=tfk.activations.linear, name='dense1'),
            tfk.layers.Dropout(.2),
            tfk.layers.Dense(20, activation=tfk.activations.linear, name='dense2'),
            tfk.layers.Dropout(.2),
            tfk.layers.Dense(3, activation=tf.nn.softmax, name='denseOut')
        ])
        model_y.summary()
        model_y.compile(
            optimizer='adam',
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy'])
        model_y.save('data/model2.h5')
        model_acc0 = pd.DataFrame(data={'loss_train': [], 'accuracy_train': [], 'nb_train': [], 'loss_test': [],
                                        'accuracy_test': [], 'nb_test': [], 'nb_epochs':[]})
        mu.mutex_save(model_acc0, 'data/model2.pkl')
    mod_y = tfk.models.load_model('data/model2.h5')
    mod_acc = mu.mutex_load('data/model2.pkl')
    return [mod_y, mod_acc]
Esempio n. 2
0
def download_scores(year, league):
    filename = 'data/'+str(league)+'_'+str(year)+'_scores.pkl'
    if mu.mutex_process(filename):
        data_scores = pd.DataFrame()
        seas_y1 = str(year)[-2:]
        seas_y2 = str(year + 1)[-2:]
        url = "http://www.football-data.co.uk/mmz4281/" + str(seas_y1) + str(seas_y2) + "/data.zip"
        print(url)
        os.system("wget -c --read-timeout=5 --tries=0 --directory-prefix tmpdata/ " + url)
        os.system("unzip -d tmpdata tmpdata/data.zip ")
        csvfile = "tmpdata/"+str(league)+".csv"
        tt = pd.read_csv(csvfile, usecols=["Date", "HomeTeam", "AwayTeam", "FTHG", "FTAG"])
        tt = tt.loc[:, ["Date", "HomeTeam", "AwayTeam", "FTHG", "FTAG"]]
        tt.dropna(subset=["Date"], inplace=True)
        tt["Season"] = year
        if data_scores.empty:
            data_scores = tt
        else:
            data_scores = data_scores.append(tt)
        os.system("rm tmpdata/data.zip ; rm tmpdata/*csv")
        data_scores[["FTHG", "FTAG"]] = data_scores[["FTHG", "FTAG"]].astype(int)
        mu.mutex_save(data_scores, filename)
    scores_year_league = mu.mutex_load(filename)
    return scores_year_league

#ss = download_scores(2020, 'F1')
Esempio n. 3
0
def get_model():
    if mu.mutex_process('data/model.pkl'):
        model_y = tfk.Sequential([
            tfk.layers.InputLayer(batch_input_shape=(None, 5, 8), name='in'),
            tfk.layers.Flatten(),
            tfk.layers.Dense(40, activation=tf.nn.softmax, name='dense1'),
            tfk.layers.Dense(30, activation=tf.nn.softmax, name='dense2'),
            tfk.layers.Dense(10, activation=tf.nn.softmax, name='dense3'),
            #tfk.layers.LSTM(units=5, name='lstm'),
            #tfk.layers.Dense(10, activation=tf.nn.softmax, name='dense'),
            tfk.layers.Dense(3, activation=tf.nn.softmax, name='denseOut')
        ])
        model_y.compile(optimizer='adam',
                        loss='sparse_categorical_crossentropy',
                        metrics=['accuracy'])
        model_y.save('data/model.h5')
        model_acc0 = pd.DataFrame(
            data={
                'loss_train': [],
                'accuracy_train': [],
                'nb_train': [],
                'loss_test': [],
                'accuracy_test': [],
                'nb_test': [],
                'nb_epochs': []
            })
        mu.mutex_save(model_acc0, 'data/model.pkl')
    mod_y = tfk.models.load_model('data/model.h5')
    mod_acc = mu.mutex_load('data/model.pkl')
    return [mod_y, mod_acc]
Esempio n. 4
0
def process_data(year, league):
    filename = 'data/'+str(league)+'_'+str(year)+'_tdata.pkl'
    if mu.mutex_process(filename):
        scores_data = dl.download_scores(year, league)
        nb_scores = scores_data.shape[0]
        x_data = np.zeros(shape=(nb_scores, 5, 8))
        y_data = np.zeros(shape=(nb_scores, 1))
        x_data[:, :, :] = np.nan
        y_data[:, :] = np.nan
        curr_score = 0
        current_season = -1
        classement = pd.DataFrame(columns=["team", "points", "win", "draw", "lost", "matchs", "BP", "BC", "DB"])
        dict_teams = dict()
        for i in range(0, nb_scores):
            gameline = scores_data.iloc[i, :]
            if gameline.Season != current_season:
                if not classement.empty:
                    print(current_season)
                    print(i)
                    print(classement)
                classement.drop(classement.index, inplace=True)
                current_season = gameline.Season
                dict_teams.clear()
            hometeam = gameline.HomeTeam
            awayteam = gameline.AwayTeam
            # fill the NN inputs
            if ((hometeam in dict_teams) and (awayteam in dict_teams)
                    and classement.loc[classement.team == hometeam, "matchs"].values[0] > 4
                    and classement.loc[classement.team == awayteam, "matchs"].values[0] > 4):
                x_data[curr_score, :, 0] = dict_teams[hometeam]["BP"]
                x_data[curr_score, :, 1] = dict_teams[hometeam]["BC"]
                x_data[curr_score, :, 2] = dict_teams[hometeam]["cl"]
                x_data[curr_score, :, 3] = dict_teams[hometeam]["loc"]
                x_data[curr_score, :, 4] = dict_teams[awayteam]["BP"]
                x_data[curr_score, :, 5] = dict_teams[awayteam]["BC"]
                x_data[curr_score, :, 6] = dict_teams[awayteam]["cl"]
                x_data[curr_score, :, 7] = dict_teams[awayteam]["loc"]
                if gameline.FTHG > gameline.FTAG:
                    y_data[curr_score, 0] = 0
                elif gameline.FTHG == gameline.FTAG:
                    y_data[curr_score, 0] = 1
                else:
                    y_data[curr_score, 0] = 2
                curr_score = curr_score + 1
            # add team into the ranking if necesseray
            clhome = classement.loc[classement.team == hometeam, :]
            if clhome.empty:
                classement.loc[classement.shape[0]] = [hometeam, 0, 0, 0, 0, 0, 0, 0, 0]
            claway = classement.loc[classement.team == awayteam, :]
            if claway.empty:
                classement.loc[classement.shape[0]] = [awayteam, 0, 0, 0, 0, 0, 0, 0, 0]

            # update rank
            if gameline.FTHG > gameline.FTAG:
                classement.loc[classement.team == hometeam, "win"] = \
                    classement.loc[classement.team == hometeam, "win"] + 1
                classement.loc[classement.team == awayteam, "lost"] = \
                    classement.loc[classement.team == awayteam, "lost"] + 1
            elif gameline.FTHG == gameline.FTAG:
                classement.loc[classement.team == hometeam, "draw"] = \
                    classement.loc[classement.team == hometeam, "draw"] + 1
                classement.loc[classement.team == awayteam, "draw"] = \
                    classement.loc[classement.team == awayteam, "draw"] + 1
            else:
                classement.loc[classement.team == hometeam, "lost"] = \
                    classement.loc[classement.team == hometeam, "lost"] + 1
                classement.loc[classement.team == awayteam, "win"] = \
                    classement.loc[classement.team == awayteam, "win"] + 1
            classement.loc[classement.team == hometeam, "BP"] = \
                classement.loc[classement.team == hometeam, "BP"] + gameline.FTHG
            classement.loc[classement.team == hometeam, "BC"] = \
                classement.loc[classement.team == hometeam, "BC"] + gameline.FTAG
            classement.loc[classement.team == awayteam, "BP"] = \
                classement.loc[classement.team == awayteam, "BP"] + gameline.FTAG
            classement.loc[classement.team == awayteam, "BC"] = \
                classement.loc[classement.team == awayteam, "BC"] + gameline.FTHG

            if current_season < 1994:
                classement.points = classement.win * 2 + classement.draw
            else:
                classement.points = classement.win * 3 + classement.draw
            classement.matchs = classement.win + classement.draw + classement.lost
            classement.DB = classement.BP - classement.BC
            classement = classement.sort_values(by=["points", "DB"], ascending=False)
            classement = classement.reset_index(drop=True)

            # fill hometeam dict
            clhome_after = classement.loc[classement.team == hometeam, :]
            claway_after = classement.loc[classement.team == awayteam, :]
            if hometeam not in dict_teams:
                dict_teams[hometeam] = {'BP': np.empty(5), 'BC': np.empty(5), 'cl': np.empty(5), 'loc': np.empty(5)}
                dict_teams[hometeam]['BP'][:] = np.nan
                dict_teams[hometeam]['BC'][:] = np.nan
                dict_teams[hometeam]['cl'][:] = np.nan
                dict_teams[hometeam]['loc'][:] = np.nan
            nanv = np.where(np.isnan(dict_teams[hometeam]['BP']))[0]
            if len(nanv) == 0:
                dict_teams[hometeam]['BP'] = np.concatenate(([gameline.FTHG], dict_teams[hometeam]['BP'][:-1]))
                dict_teams[hometeam]['BC'] = np.concatenate(([gameline.FTAG], dict_teams[hometeam]['BC'][:-1]))
                dict_teams[hometeam]['cl'] = \
                    np.concatenate(([clhome_after.index[0] + 1], dict_teams[hometeam]['cl'][:-1]))
                dict_teams[hometeam]['loc'] = np.concatenate(([0], dict_teams[hometeam]['loc'][:-1]))
            else:
                dict_teams[hometeam]['BP'][len(nanv) - 1] = gameline.FTHG
                dict_teams[hometeam]['BC'][len(nanv) - 1] = gameline.FTAG
                dict_teams[hometeam]['cl'][len(nanv) - 1] = clhome_after.index[0] + 1
                dict_teams[hometeam]['loc'][len(nanv) - 1] = 0

            # fill awayteam dict
            if awayteam not in dict_teams:
                dict_teams[awayteam] = {'BP': np.empty(5), 'BC': np.empty(5), 'cl': np.empty(5), 'loc': np.empty(5)}
                dict_teams[awayteam]['BP'][:] = np.nan
                dict_teams[awayteam]['BC'][:] = np.nan
                dict_teams[awayteam]['cl'][:] = np.nan
                dict_teams[awayteam]['loc'][:] = np.nan
            nanv = np.where(np.isnan(dict_teams[awayteam]['BP']))[0]
            if len(nanv) == 0:
                dict_teams[awayteam]['BP'] = np.concatenate(([gameline.FTAG], dict_teams[awayteam]['BP'][:-1]))
                dict_teams[awayteam]['BC'] = np.concatenate(([gameline.FTHG], dict_teams[awayteam]['BC'][:-1]))
                dict_teams[awayteam]['cl'] = \
                    np.concatenate(([claway_after.index[0] + 1], dict_teams[awayteam]['cl'][:-1]))
                dict_teams[awayteam]['loc'] = np.concatenate(([1], dict_teams[awayteam]['loc'][:-1]))
            else:
                dict_teams[awayteam]['BP'][len(nanv) - 1] = gameline.FTAG
                dict_teams[awayteam]['BC'][len(nanv) - 1] = gameline.FTHG
                dict_teams[awayteam]['cl'][len(nanv) - 1] = claway_after.index[0] + 1
                dict_teams[awayteam]['loc'][len(nanv) - 1] = 1
        index_data = np.where(np.isnan(x_data[:, 0, 0]))[0][0]
        x_data = x_data[range(index_data), :, :]
        y_data = y_data[range(index_data), :]
        tdata = {'x_data': x_data, 'y_data': y_data,
                 'teams_state': dict_teams, 'ranking_state': classement}
        mu.mutex_save(tdata, filename)
    processed_data = mu.mutex_load(filename)
    return processed_data
Esempio n. 5
0
def process_data(year, league):
    filename = 'data/'+str(league)+'_'+str(year)+'_tdata2.pkl'
    #print(filename)
    if mu.mutex_process(filename):
        scores_data = dl.download_scores(year, league)
        nb_scores = scores_data.shape[0]
        x_data = np.zeros(shape=(nb_scores, 20, 8+6+15))
        y_data = np.zeros(shape=(nb_scores, 1))
        x_data[:, :, :] = np.nan
        y_data[:, :] = np.nan
        curr_score = 0
        current_season = -1
        classement = pd.DataFrame(columns=["team", "points", "win", "draw", "lost", "matchs", "BP", "BC", "DB"]+
                                          ["h_win", "h_draw", "h_lost", "h_BP", "h_BC", "h_DB"]+
                                          [str("m")+str(k)+str(t) for k in range(1, 6) for t in ["team", "loc", "res"]])
        for i in range(0, nb_scores):
            gameline = scores_data.iloc[i, :]
            #print(gameline.Date)
            if gameline.Season != current_season:
                if not classement.empty:
                    print(current_season)
                    print(i)
                    print(classement)
                classement.drop(classement.index, inplace=True)
                current_season = gameline.Season
            hometeam = gameline.HomeTeam
            awayteam = gameline.AwayTeam
            # fill the NN inputs
            if ((hometeam in classement['team'].unique()) and (awayteam in classement['team'].unique())
                    and not classement.isnull().values.any()):
                cl = pd.concat([classement.loc[classement.team == hometeam, :],
                                classement.loc[classement.team == awayteam, :],
                                classement.loc[np.logical_and(classement.team != hometeam, classement.team != awayteam), :]])
                x_data[curr_score, :, :] = cl.drop('team', axis=1)
                if gameline.FTHG > gameline.FTAG:
                    y_data[curr_score, 0] = 0
                elif gameline.FTHG == gameline.FTAG:
                    y_data[curr_score, 0] = 1
                else:
                    y_data[curr_score, 0] = 2
                curr_score = curr_score + 1
            # add team into the ranking if necesseray
            clhome = classement.loc[classement.team == hometeam, :]
            if clhome.empty:
                classement.loc[classement.shape[0]] = [hometeam] + \
                                                      [0 for gg in range(0, 8+6)] + [np.nan for gg in range(0, 15)]
            claway = classement.loc[classement.team == awayteam, :]
            if claway.empty:
                classement.loc[classement.shape[0]] = [awayteam] + \
                                                      [0 for gg in range(0, 8+6)] + [np.nan for gg in range(0, 15)]

            # update rank
            home_res = -1
            away_res = -1
            if gameline.FTHG > gameline.FTAG:
                home_res = 0
                away_res = 2
                classement.loc[classement.team == hometeam, "win"] = \
                    classement.loc[classement.team == hometeam, "win"] + 1
                classement.loc[classement.team == awayteam, "lost"] = \
                    classement.loc[classement.team == awayteam, "lost"] + 1
                classement.loc[classement.team == hometeam, "h_win"] = \
                    classement.loc[classement.team == hometeam, "h_win"] + 1

            elif gameline.FTHG == gameline.FTAG:
                home_res = 1
                away_res = 1
                classement.loc[classement.team == hometeam, "draw"] = \
                    classement.loc[classement.team == hometeam, "draw"] + 1
                classement.loc[classement.team == awayteam, "draw"] = \
                    classement.loc[classement.team == awayteam, "draw"] + 1
                classement.loc[classement.team == hometeam, "h_draw"] = \
                    classement.loc[classement.team == hometeam, "h_draw"] + 1
            else:
                home_res = 2
                away_res = 0
                classement.loc[classement.team == hometeam, "lost"] = \
                    classement.loc[classement.team == hometeam, "lost"] + 1
                classement.loc[classement.team == awayteam, "win"] = \
                    classement.loc[classement.team == awayteam, "win"] + 1
                classement.loc[classement.team == hometeam, "h_lost"] = \
                    classement.loc[classement.team == hometeam, "h_lost"] + 1
            classement.loc[classement.team == hometeam, "BP"] = \
                classement.loc[classement.team == hometeam, "BP"] + gameline.FTHG
            classement.loc[classement.team == hometeam, "BC"] = \
                classement.loc[classement.team == hometeam, "BC"] + gameline.FTAG
            classement.loc[classement.team == awayteam, "BP"] = \
                classement.loc[classement.team == awayteam, "BP"] + gameline.FTAG
            classement.loc[classement.team == awayteam, "BC"] = \
                classement.loc[classement.team == awayteam, "BC"] + gameline.FTHG
            classement.loc[classement.team == hometeam, "h_BP"] = \
                classement.loc[classement.team == hometeam, "h_BP"] + gameline.FTHG
            classement.loc[classement.team == hometeam, "h_BC"] = \
                classement.loc[classement.team == hometeam, "h_BC"] + gameline.FTAG
            if current_season < 1994:
                classement.points = classement.win * 2 + classement.draw
            else:
                classement.points = classement.win * 3 + classement.draw
            classement.matchs = classement.win + classement.draw + classement.lost
            classement.DB = classement.BP - classement.BC

            #add match historic
            home_igame = -1
            away_igame = -1
            for k in reversed(range(1, 6)):
                if np.isnan(classement.loc[classement.team == hometeam,:]["m" + str(k) + "team"].iloc[0]) and home_igame == -1:
                    home_igame = k
                if np.isnan(classement.loc[classement.team == awayteam,:]["m" + str(k) + "team"].iloc[0]) and away_igame == -1:
                    away_igame = k
            if home_igame == -1:
                for k in reversed(range(2, 6)):
                    classement.loc[classement.team == hometeam, ["m" + str(k) + "team"]] = \
                        classement.loc[classement.team == hometeam, :]["m" + str(k-1) + "team"]
                    classement.loc[classement.team == hometeam, ["m" + str(k) + "loc"]] = \
                        classement.loc[classement.team == hometeam, :]["m" + str(k - 1) + "loc"]
                    classement.loc[classement.team == hometeam, ["m" + str(k) + "res"]] = \
                        classement.loc[classement.team == hometeam, :]["m" + str(k - 1) + "res"]
                home_igame = 1
            if away_igame == -1:
                for k in reversed(range(2, 6)):
                    classement.loc[classement.team == awayteam, ["m" + str(k) + "team"]] = \
                        classement.loc[classement.team == awayteam, :]["m" + str(k - 1) + "team"]
                    classement.loc[classement.team == awayteam, ["m" + str(k) + "loc"]] = \
                        classement.loc[classement.team == awayteam, :]["m" + str(k - 1) + "loc"]
                    classement.loc[classement.team == awayteam, ["m" + str(k) + "res"]] = \
                        classement.loc[classement.team == awayteam, :]["m" + str(k - 1) + "res"]
                away_igame = 1
            classement.loc[classement.team == hometeam, ["m" + str(home_igame) + u for u in ["team", "loc", "res"]]] = \
                [classement.loc[classement.team == awayteam,:].index[0]+1, 0, home_res]
            classement.loc[classement.team == awayteam, ["m" + str(away_igame) + u for u in ["team", "loc", "res"]]] = \
                [classement.loc[classement.team == hometeam,:].index[0]+1, 1, away_res]

            #sort ranking
            classement = classement.sort_values(by=["points", "DB"], ascending=False)
            classement = classement.reset_index(drop=True)

        index_data = np.where(np.isnan(x_data[:, 0, 0]))[0][0]

        x_data = x_data[range(index_data), :, :]
        y_data = y_data[range(index_data), :]
        tdata = {'x_data': x_data, 'y_data': y_data,
                 'ranking_state': classement}
        mu.mutex_save(tdata, filename)
    processed_data = mu.mutex_load(filename)
    return processed_data