def get_model(): if mu.mutex_process('data/model2.pkl'): model_y = tfk.Sequential([ tfk.layers.InputLayer(batch_input_shape=(None, 20, 8+6+15), name='in'), tfk.layers.Conv1D(filters=100, kernel_size=2, strides=1, name='conv1'), tfk.layers.MaxPooling1D(pool_size=4, name='pool1'), tfk.layers.Dropout(.2), tfk.layers.Conv1D(filters=50, kernel_size=3, strides=1, name='conv2'), tfk.layers.MaxPooling1D(pool_size=2, name='pool2'), tfk.layers.Dropout(.2), tfk.layers.Flatten(name='flat'), tfk.layers.Dense(50, activation=tfk.activations.linear, name='dense1'), tfk.layers.Dropout(.2), tfk.layers.Dense(20, activation=tfk.activations.linear, name='dense2'), tfk.layers.Dropout(.2), tfk.layers.Dense(3, activation=tf.nn.softmax, name='denseOut') ]) model_y.summary() model_y.compile( optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) model_y.save('data/model2.h5') model_acc0 = pd.DataFrame(data={'loss_train': [], 'accuracy_train': [], 'nb_train': [], 'loss_test': [], 'accuracy_test': [], 'nb_test': [], 'nb_epochs':[]}) mu.mutex_save(model_acc0, 'data/model2.pkl') mod_y = tfk.models.load_model('data/model2.h5') mod_acc = mu.mutex_load('data/model2.pkl') return [mod_y, mod_acc]
def download_scores(year, league): filename = 'data/'+str(league)+'_'+str(year)+'_scores.pkl' if mu.mutex_process(filename): data_scores = pd.DataFrame() seas_y1 = str(year)[-2:] seas_y2 = str(year + 1)[-2:] url = "http://www.football-data.co.uk/mmz4281/" + str(seas_y1) + str(seas_y2) + "/data.zip" print(url) os.system("wget -c --read-timeout=5 --tries=0 --directory-prefix tmpdata/ " + url) os.system("unzip -d tmpdata tmpdata/data.zip ") csvfile = "tmpdata/"+str(league)+".csv" tt = pd.read_csv(csvfile, usecols=["Date", "HomeTeam", "AwayTeam", "FTHG", "FTAG"]) tt = tt.loc[:, ["Date", "HomeTeam", "AwayTeam", "FTHG", "FTAG"]] tt.dropna(subset=["Date"], inplace=True) tt["Season"] = year if data_scores.empty: data_scores = tt else: data_scores = data_scores.append(tt) os.system("rm tmpdata/data.zip ; rm tmpdata/*csv") data_scores[["FTHG", "FTAG"]] = data_scores[["FTHG", "FTAG"]].astype(int) mu.mutex_save(data_scores, filename) scores_year_league = mu.mutex_load(filename) return scores_year_league #ss = download_scores(2020, 'F1')
def get_model(): if mu.mutex_process('data/model.pkl'): model_y = tfk.Sequential([ tfk.layers.InputLayer(batch_input_shape=(None, 5, 8), name='in'), tfk.layers.Flatten(), tfk.layers.Dense(40, activation=tf.nn.softmax, name='dense1'), tfk.layers.Dense(30, activation=tf.nn.softmax, name='dense2'), tfk.layers.Dense(10, activation=tf.nn.softmax, name='dense3'), #tfk.layers.LSTM(units=5, name='lstm'), #tfk.layers.Dense(10, activation=tf.nn.softmax, name='dense'), tfk.layers.Dense(3, activation=tf.nn.softmax, name='denseOut') ]) model_y.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) model_y.save('data/model.h5') model_acc0 = pd.DataFrame( data={ 'loss_train': [], 'accuracy_train': [], 'nb_train': [], 'loss_test': [], 'accuracy_test': [], 'nb_test': [], 'nb_epochs': [] }) mu.mutex_save(model_acc0, 'data/model.pkl') mod_y = tfk.models.load_model('data/model.h5') mod_acc = mu.mutex_load('data/model.pkl') return [mod_y, mod_acc]
def process_data(year, league): filename = 'data/'+str(league)+'_'+str(year)+'_tdata.pkl' if mu.mutex_process(filename): scores_data = dl.download_scores(year, league) nb_scores = scores_data.shape[0] x_data = np.zeros(shape=(nb_scores, 5, 8)) y_data = np.zeros(shape=(nb_scores, 1)) x_data[:, :, :] = np.nan y_data[:, :] = np.nan curr_score = 0 current_season = -1 classement = pd.DataFrame(columns=["team", "points", "win", "draw", "lost", "matchs", "BP", "BC", "DB"]) dict_teams = dict() for i in range(0, nb_scores): gameline = scores_data.iloc[i, :] if gameline.Season != current_season: if not classement.empty: print(current_season) print(i) print(classement) classement.drop(classement.index, inplace=True) current_season = gameline.Season dict_teams.clear() hometeam = gameline.HomeTeam awayteam = gameline.AwayTeam # fill the NN inputs if ((hometeam in dict_teams) and (awayteam in dict_teams) and classement.loc[classement.team == hometeam, "matchs"].values[0] > 4 and classement.loc[classement.team == awayteam, "matchs"].values[0] > 4): x_data[curr_score, :, 0] = dict_teams[hometeam]["BP"] x_data[curr_score, :, 1] = dict_teams[hometeam]["BC"] x_data[curr_score, :, 2] = dict_teams[hometeam]["cl"] x_data[curr_score, :, 3] = dict_teams[hometeam]["loc"] x_data[curr_score, :, 4] = dict_teams[awayteam]["BP"] x_data[curr_score, :, 5] = dict_teams[awayteam]["BC"] x_data[curr_score, :, 6] = dict_teams[awayteam]["cl"] x_data[curr_score, :, 7] = dict_teams[awayteam]["loc"] if gameline.FTHG > gameline.FTAG: y_data[curr_score, 0] = 0 elif gameline.FTHG == gameline.FTAG: y_data[curr_score, 0] = 1 else: y_data[curr_score, 0] = 2 curr_score = curr_score + 1 # add team into the ranking if necesseray clhome = classement.loc[classement.team == hometeam, :] if clhome.empty: classement.loc[classement.shape[0]] = [hometeam, 0, 0, 0, 0, 0, 0, 0, 0] claway = classement.loc[classement.team == awayteam, :] if claway.empty: classement.loc[classement.shape[0]] = [awayteam, 0, 0, 0, 0, 0, 0, 0, 0] # update rank if gameline.FTHG > gameline.FTAG: classement.loc[classement.team == hometeam, "win"] = \ classement.loc[classement.team == hometeam, "win"] + 1 classement.loc[classement.team == awayteam, "lost"] = \ classement.loc[classement.team == awayteam, "lost"] + 1 elif gameline.FTHG == gameline.FTAG: classement.loc[classement.team == hometeam, "draw"] = \ classement.loc[classement.team == hometeam, "draw"] + 1 classement.loc[classement.team == awayteam, "draw"] = \ classement.loc[classement.team == awayteam, "draw"] + 1 else: classement.loc[classement.team == hometeam, "lost"] = \ classement.loc[classement.team == hometeam, "lost"] + 1 classement.loc[classement.team == awayteam, "win"] = \ classement.loc[classement.team == awayteam, "win"] + 1 classement.loc[classement.team == hometeam, "BP"] = \ classement.loc[classement.team == hometeam, "BP"] + gameline.FTHG classement.loc[classement.team == hometeam, "BC"] = \ classement.loc[classement.team == hometeam, "BC"] + gameline.FTAG classement.loc[classement.team == awayteam, "BP"] = \ classement.loc[classement.team == awayteam, "BP"] + gameline.FTAG classement.loc[classement.team == awayteam, "BC"] = \ classement.loc[classement.team == awayteam, "BC"] + gameline.FTHG if current_season < 1994: classement.points = classement.win * 2 + classement.draw else: classement.points = classement.win * 3 + classement.draw classement.matchs = classement.win + classement.draw + classement.lost classement.DB = classement.BP - classement.BC classement = classement.sort_values(by=["points", "DB"], ascending=False) classement = classement.reset_index(drop=True) # fill hometeam dict clhome_after = classement.loc[classement.team == hometeam, :] claway_after = classement.loc[classement.team == awayteam, :] if hometeam not in dict_teams: dict_teams[hometeam] = {'BP': np.empty(5), 'BC': np.empty(5), 'cl': np.empty(5), 'loc': np.empty(5)} dict_teams[hometeam]['BP'][:] = np.nan dict_teams[hometeam]['BC'][:] = np.nan dict_teams[hometeam]['cl'][:] = np.nan dict_teams[hometeam]['loc'][:] = np.nan nanv = np.where(np.isnan(dict_teams[hometeam]['BP']))[0] if len(nanv) == 0: dict_teams[hometeam]['BP'] = np.concatenate(([gameline.FTHG], dict_teams[hometeam]['BP'][:-1])) dict_teams[hometeam]['BC'] = np.concatenate(([gameline.FTAG], dict_teams[hometeam]['BC'][:-1])) dict_teams[hometeam]['cl'] = \ np.concatenate(([clhome_after.index[0] + 1], dict_teams[hometeam]['cl'][:-1])) dict_teams[hometeam]['loc'] = np.concatenate(([0], dict_teams[hometeam]['loc'][:-1])) else: dict_teams[hometeam]['BP'][len(nanv) - 1] = gameline.FTHG dict_teams[hometeam]['BC'][len(nanv) - 1] = gameline.FTAG dict_teams[hometeam]['cl'][len(nanv) - 1] = clhome_after.index[0] + 1 dict_teams[hometeam]['loc'][len(nanv) - 1] = 0 # fill awayteam dict if awayteam not in dict_teams: dict_teams[awayteam] = {'BP': np.empty(5), 'BC': np.empty(5), 'cl': np.empty(5), 'loc': np.empty(5)} dict_teams[awayteam]['BP'][:] = np.nan dict_teams[awayteam]['BC'][:] = np.nan dict_teams[awayteam]['cl'][:] = np.nan dict_teams[awayteam]['loc'][:] = np.nan nanv = np.where(np.isnan(dict_teams[awayteam]['BP']))[0] if len(nanv) == 0: dict_teams[awayteam]['BP'] = np.concatenate(([gameline.FTAG], dict_teams[awayteam]['BP'][:-1])) dict_teams[awayteam]['BC'] = np.concatenate(([gameline.FTHG], dict_teams[awayteam]['BC'][:-1])) dict_teams[awayteam]['cl'] = \ np.concatenate(([claway_after.index[0] + 1], dict_teams[awayteam]['cl'][:-1])) dict_teams[awayteam]['loc'] = np.concatenate(([1], dict_teams[awayteam]['loc'][:-1])) else: dict_teams[awayteam]['BP'][len(nanv) - 1] = gameline.FTAG dict_teams[awayteam]['BC'][len(nanv) - 1] = gameline.FTHG dict_teams[awayteam]['cl'][len(nanv) - 1] = claway_after.index[0] + 1 dict_teams[awayteam]['loc'][len(nanv) - 1] = 1 index_data = np.where(np.isnan(x_data[:, 0, 0]))[0][0] x_data = x_data[range(index_data), :, :] y_data = y_data[range(index_data), :] tdata = {'x_data': x_data, 'y_data': y_data, 'teams_state': dict_teams, 'ranking_state': classement} mu.mutex_save(tdata, filename) processed_data = mu.mutex_load(filename) return processed_data
def process_data(year, league): filename = 'data/'+str(league)+'_'+str(year)+'_tdata2.pkl' #print(filename) if mu.mutex_process(filename): scores_data = dl.download_scores(year, league) nb_scores = scores_data.shape[0] x_data = np.zeros(shape=(nb_scores, 20, 8+6+15)) y_data = np.zeros(shape=(nb_scores, 1)) x_data[:, :, :] = np.nan y_data[:, :] = np.nan curr_score = 0 current_season = -1 classement = pd.DataFrame(columns=["team", "points", "win", "draw", "lost", "matchs", "BP", "BC", "DB"]+ ["h_win", "h_draw", "h_lost", "h_BP", "h_BC", "h_DB"]+ [str("m")+str(k)+str(t) for k in range(1, 6) for t in ["team", "loc", "res"]]) for i in range(0, nb_scores): gameline = scores_data.iloc[i, :] #print(gameline.Date) if gameline.Season != current_season: if not classement.empty: print(current_season) print(i) print(classement) classement.drop(classement.index, inplace=True) current_season = gameline.Season hometeam = gameline.HomeTeam awayteam = gameline.AwayTeam # fill the NN inputs if ((hometeam in classement['team'].unique()) and (awayteam in classement['team'].unique()) and not classement.isnull().values.any()): cl = pd.concat([classement.loc[classement.team == hometeam, :], classement.loc[classement.team == awayteam, :], classement.loc[np.logical_and(classement.team != hometeam, classement.team != awayteam), :]]) x_data[curr_score, :, :] = cl.drop('team', axis=1) if gameline.FTHG > gameline.FTAG: y_data[curr_score, 0] = 0 elif gameline.FTHG == gameline.FTAG: y_data[curr_score, 0] = 1 else: y_data[curr_score, 0] = 2 curr_score = curr_score + 1 # add team into the ranking if necesseray clhome = classement.loc[classement.team == hometeam, :] if clhome.empty: classement.loc[classement.shape[0]] = [hometeam] + \ [0 for gg in range(0, 8+6)] + [np.nan for gg in range(0, 15)] claway = classement.loc[classement.team == awayteam, :] if claway.empty: classement.loc[classement.shape[0]] = [awayteam] + \ [0 for gg in range(0, 8+6)] + [np.nan for gg in range(0, 15)] # update rank home_res = -1 away_res = -1 if gameline.FTHG > gameline.FTAG: home_res = 0 away_res = 2 classement.loc[classement.team == hometeam, "win"] = \ classement.loc[classement.team == hometeam, "win"] + 1 classement.loc[classement.team == awayteam, "lost"] = \ classement.loc[classement.team == awayteam, "lost"] + 1 classement.loc[classement.team == hometeam, "h_win"] = \ classement.loc[classement.team == hometeam, "h_win"] + 1 elif gameline.FTHG == gameline.FTAG: home_res = 1 away_res = 1 classement.loc[classement.team == hometeam, "draw"] = \ classement.loc[classement.team == hometeam, "draw"] + 1 classement.loc[classement.team == awayteam, "draw"] = \ classement.loc[classement.team == awayteam, "draw"] + 1 classement.loc[classement.team == hometeam, "h_draw"] = \ classement.loc[classement.team == hometeam, "h_draw"] + 1 else: home_res = 2 away_res = 0 classement.loc[classement.team == hometeam, "lost"] = \ classement.loc[classement.team == hometeam, "lost"] + 1 classement.loc[classement.team == awayteam, "win"] = \ classement.loc[classement.team == awayteam, "win"] + 1 classement.loc[classement.team == hometeam, "h_lost"] = \ classement.loc[classement.team == hometeam, "h_lost"] + 1 classement.loc[classement.team == hometeam, "BP"] = \ classement.loc[classement.team == hometeam, "BP"] + gameline.FTHG classement.loc[classement.team == hometeam, "BC"] = \ classement.loc[classement.team == hometeam, "BC"] + gameline.FTAG classement.loc[classement.team == awayteam, "BP"] = \ classement.loc[classement.team == awayteam, "BP"] + gameline.FTAG classement.loc[classement.team == awayteam, "BC"] = \ classement.loc[classement.team == awayteam, "BC"] + gameline.FTHG classement.loc[classement.team == hometeam, "h_BP"] = \ classement.loc[classement.team == hometeam, "h_BP"] + gameline.FTHG classement.loc[classement.team == hometeam, "h_BC"] = \ classement.loc[classement.team == hometeam, "h_BC"] + gameline.FTAG if current_season < 1994: classement.points = classement.win * 2 + classement.draw else: classement.points = classement.win * 3 + classement.draw classement.matchs = classement.win + classement.draw + classement.lost classement.DB = classement.BP - classement.BC #add match historic home_igame = -1 away_igame = -1 for k in reversed(range(1, 6)): if np.isnan(classement.loc[classement.team == hometeam,:]["m" + str(k) + "team"].iloc[0]) and home_igame == -1: home_igame = k if np.isnan(classement.loc[classement.team == awayteam,:]["m" + str(k) + "team"].iloc[0]) and away_igame == -1: away_igame = k if home_igame == -1: for k in reversed(range(2, 6)): classement.loc[classement.team == hometeam, ["m" + str(k) + "team"]] = \ classement.loc[classement.team == hometeam, :]["m" + str(k-1) + "team"] classement.loc[classement.team == hometeam, ["m" + str(k) + "loc"]] = \ classement.loc[classement.team == hometeam, :]["m" + str(k - 1) + "loc"] classement.loc[classement.team == hometeam, ["m" + str(k) + "res"]] = \ classement.loc[classement.team == hometeam, :]["m" + str(k - 1) + "res"] home_igame = 1 if away_igame == -1: for k in reversed(range(2, 6)): classement.loc[classement.team == awayteam, ["m" + str(k) + "team"]] = \ classement.loc[classement.team == awayteam, :]["m" + str(k - 1) + "team"] classement.loc[classement.team == awayteam, ["m" + str(k) + "loc"]] = \ classement.loc[classement.team == awayteam, :]["m" + str(k - 1) + "loc"] classement.loc[classement.team == awayteam, ["m" + str(k) + "res"]] = \ classement.loc[classement.team == awayteam, :]["m" + str(k - 1) + "res"] away_igame = 1 classement.loc[classement.team == hometeam, ["m" + str(home_igame) + u for u in ["team", "loc", "res"]]] = \ [classement.loc[classement.team == awayteam,:].index[0]+1, 0, home_res] classement.loc[classement.team == awayteam, ["m" + str(away_igame) + u for u in ["team", "loc", "res"]]] = \ [classement.loc[classement.team == hometeam,:].index[0]+1, 1, away_res] #sort ranking classement = classement.sort_values(by=["points", "DB"], ascending=False) classement = classement.reset_index(drop=True) index_data = np.where(np.isnan(x_data[:, 0, 0]))[0][0] x_data = x_data[range(index_data), :, :] y_data = y_data[range(index_data), :] tdata = {'x_data': x_data, 'y_data': y_data, 'ranking_state': classement} mu.mutex_save(tdata, filename) processed_data = mu.mutex_load(filename) return processed_data