async def get_player_data(): """Download all data from the Fantasy Premier League website""" # Connect to the sqlite3 DB conn = connect_to_db() summary_df = pd.DataFrame() async with aiohttp.ClientSession() as session: fpl = FPL(session) players = await fpl.get_players() i = 0 for player in players: id = player.id i += 1 print("Loading data for player {} of {}".format(i, len(players))) player_summary = await fpl.get_player_summary(id) player_history = pd.DataFrame(player_summary.history) player_history['web_name'] = player.web_name player_history['id'] = player.id player_history['team'] = player.team player_history['team_code'] = player.team_code summary_df = summary_df.append(player_history) # Upload the data to a table in the database run_query(query='DROP TABLE IF EXISTS player_data', return_data=False) summary_df.to_sql('player_data', conn)
def find_closest_match(team_name): # Hardcoded some of the matches because it was too difficult to match accurately if team_name == 'Man City': output = { "team_name": 'Manchester City', "team_id": fetch_id("Manchester City") } return output elif team_name == 'Spurs': output = {"team_name": 'Tottenham', "team_id": fetch_id("Tottenham")} return output else: conn = connect_to_db() df = run_query("Select team_name, team_id from team_ids") df['l_dist'] = df['team_name'].apply( lambda x: Levenshtein.ratio(x, team_name)) max_similarity = max(df['l_dist']) closest_match = df.loc[df['l_dist'] == max_similarity, ['team_name', 'team_id']].reset_index(drop=True) output = { "team_name": closest_match.loc[0, 'team_name'], "team_id": closest_match.loc[0, 'team_id'] } conn.close() return output
async def get_fpl_teams(): """Download upcoming fixture data from the Fantasy Premier League website""" # Connect to the sqlite3 DB conn = connect_to_db() df_teams = pd.DataFrame() i = 0 # Get all of the team IDs from the fpl_fixtures table teams = run_query('select distinct team_h from fpl_fixtures') n_teams = len(teams) assert n_teams == 20, 'The number of returned teams should be 20, ' \ 'but is actually {}'.format(n_teams) async with aiohttp.ClientSession() as session: fpl = FPL(session) team_data = await fpl.get_teams(return_json=True) for team in team_data: df_teams = df_teams.append(pd.DataFrame(team, index=[i])) i += 1 # Find the closest matching team names to the ones in our data matches = list(df_teams['name'].apply(lambda x: find_closest_match(x))) df_matches = pd.DataFrame() for match in matches: df_matches = df_matches.append(pd.DataFrame(match, index=[0])) df_teams = pd.concat([df_teams, df_matches.reset_index(drop=True)], axis=1) # Upload the data to a table in the database run_query('DROP TABLE IF EXISTS fpl_teams', return_data=False) df_teams.to_sql('fpl_teams', conn) conn.close()
def fetch_name(team_id): """Get name from team ID""" conn = connect_to_db() cursor = conn.cursor() cursor.execute("Select team_name from team_ids where team_id == ?", [team_id]) output = cursor.fetchone()[0] conn.close() return output
def fetch_id(team_name): """Get team ID from name""" conn = connect_to_db() cursor = conn.cursor() cursor.execute( "Select team_id from team_ids where team_name = '{}' or alternate_name = '{}'" .format(team_name, team_name)) output = cursor.fetchone()[0] conn.close() return output
async def get_fpl_fixtures(): """Download upcoming fixture data from the Fantasy Premier League website""" # Connect to the sqlite3 DB conn = connect_to_db() fixture_list = pd.DataFrame() async with aiohttp.ClientSession() as session: fpl = FPL(session) fixtures = await fpl.get_fixtures(return_json=True) for fixture in fixtures: del fixture['stats'] fixture_list = fixture_list.append(pd.DataFrame(fixture, index=[0])) # Upload the data to a table in the database run_query(query='DROP TABLE IF EXISTS fpl_fixtures', return_data=False) fixture_list.to_sql('fpl_fixtures', conn)
def make_predictions(): # Update tables update_tables() # Get the current season current_season = run_query( query='select max(season) from main_fixtures').iloc[0, 0] # Load the in-production model model = MatchResultXGBoost(load_trained_model=True, problem_name='match-predict-base') # Get upcoming games (that we haven't predicted) df = get_upcoming_games() logger.info(f'Making predictions for upcoming games: {df}') # Make predictions predictions_df = pd.DataFrame(columns=[ 'date', 'home_id', 'away_id', 'season', 'home_odds', 'draw_odds', 'away_odds', 'H', 'D', 'A', 'model', 'version', 'creation_time' ]) for row in df.iterrows(): input_dict = { "date": row[1]['kickoff_time'], "home_id": row[1]['home_id'], "away_id": row[1]['away_id'], "season": current_season, "home_odds": row[1]['home_odds'], "draw_odds": row[1]['draw_odds'], "away_odds": row[1]['away_odds'], } # Get predictions from model predictions = model.predict(**input_dict) # Combine predictions with input data output_dict = dict(input_dict, **predictions) # Convert dictionary to DataFrame output_df = pd.DataFrame(output_dict, columns=predictions_df.columns, index=len(predictions_df)) # Add row to output DataFrame predictions_df = predictions_df.append(output_df) predictions_df["model"] = model.model_id, predictions_df["version"] = __version__, predictions_df["creation_time"] = str(dt.datetime.today()) with connect_to_db() as conn: # TODO: Upload to S3 if LOCAL is False # Upload output DataFrame to DB predictions_df.to_sql('latest_predictions', conn, if_exists='replace')
def upload_to_table(self, df, table_name, model_id=None): # Upload the predictions to the model_predictions table conn = connect_to_db() if 'creation_time' not in df.columns: df['creation_time'] = dt.datetime.now() try: model_ids = run_query( query=f'select distinct model_id from {table_name}') model_ids = list(model_ids['model_id'].unique()) except sqlite3.OperationalError as e: model_ids = [] if model_id in model_ids: logger.warning( f'Model id: {model_id} already exists in the table {table_name}!' ) # Add model ID so we can compare model performances if model_id is not None: df['model_id'] = model_id df.to_sql(table_name, con=conn, if_exists='append', index=False) conn.close()
def update_fixtures_from_fbd(table_name='main_fixtures'): # Connect to the database conn = connect_to_db() cursor = conn.cursor() # Drop and recrease the table we are going to populate cursor.execute("DROP TABLE IF EXISTS {tn}".format(tn=table_name)) cursor.execute("""CREATE TABLE {tn} (fixture_id INTEGER, home_team TEXT, home_id INTEGER, away_team TEXT, away_id INTEGER, date DATE, home_score INTEGER, away_score INTEGER, home_shots INTEGER, away_shots INTEGER, full_time_result TEXT, b365_home_odds REAL, b365_draw_odds REAL, b365_away_odds REAL, referee TEXT, over_2p5_odds REAL, under_2p5_odds REAL, ah_home_odds REAL, ah_away_odds REAL, ah_home_handicap REAL, season TEXT, home_yellow_cards INTEGER, away_yellow_cards INTEGER, home_red_cards INTEGER, away_red_cards INTEGER)""".format(tn=table_name)) conn.commit() # Create the list of urls to get data from urls = [ 'http://www.football-data.co.uk/mmz4281/0304/E0.csv', 'http://www.football-data.co.uk/mmz4281/0405/E0.csv', 'http://www.football-data.co.uk/mmz4281/0506/E0.csv', 'http://www.football-data.co.uk/mmz4281/0607/E0.csv', 'http://www.football-data.co.uk/mmz4281/0708/E0.csv', 'http://www.football-data.co.uk/mmz4281/0809/E0.csv', 'http://www.football-data.co.uk/mmz4281/0910/E0.csv', 'http://www.football-data.co.uk/mmz4281/1011/E0.csv', 'http://www.football-data.co.uk/mmz4281/1112/E0.csv', 'http://www.football-data.co.uk/mmz4281/1213/E0.csv', 'http://www.football-data.co.uk/mmz4281/1314/E0.csv', 'http://www.football-data.co.uk/mmz4281/1415/E0.csv', 'http://www.football-data.co.uk/mmz4281/1516/E0.csv', 'http://www.football-data.co.uk/mmz4281/1617/E0.csv', 'http://www.football-data.co.uk/mmz4281/1718/E0.csv', 'http://www.football-data.co.uk/mmz4281/1819/E0.csv', 'http://www.football-data.co.uk/mmz4281/1920/E0.csv' ] for url in urls: extract_data_from_fbd(url, table_name) # Close the connection conn.close()
def get_teams_from_wiki(): # Connect to database conn = connect_to_db() website_url = requests.get( 'https://en.wikipedia.org/wiki/List_of_Premier_League_clubs').text soup = BeautifulSoup(website_url, features="lxml") My_table = soup.find('div', {'class': 'timeline-wrapper'}) links = My_table.findAll('area') teams = [] for link in links: team = link.get('alt').encode('UTF-8').decode() # Convert &26 to & team = team.replace('%26', '&') # Remove ' A.F.C' and ' F.C' team = team.replace('A.F.C', '') team = team.replace('F.C', '') # Remove any '.'s team = team.replace('.', '') # Remove white space from the start and end team = team.lstrip().rstrip() teams.append(team) df = pd.DataFrame() df['team_name'] = teams df = df.sort_values('team_name') df['team_id'] = np.arange(len(df))+1 # Load the names into the database run_query(query='drop table if exists team_ids', return_data=False) run_query(query='create table team_ids (team_name TEXT, team_id INTEGER, ' 'alternate_name TEXT, alternate_name2 TEXT)', return_data=False) for row in df.iterrows(): params = [row[1]['team_name'], row[1]['team_id']] params.append(fetch_alternative_name(row[1]['team_name'])) params.append(fetch_alternative_name2(row[1]['team_name'])) run_query(query='insert into team_ids(team_name, team_id, ' 'alternate_name, alternate_name2) values(?, ?, ?, ?)', params=params, return_data=False) conn.commit() conn.close()
def get_info(home_id, away_id, date, season, home_odds, draw_odds, away_odds): """Given the data and home/away team id's, get model features""" conn = connect_to_db() h_manager = get_manager(team_id=home_id, date=date) a_manager = get_manager(team_id=away_id, date=date) # Check that data was retrieved (catch the error sooner to speed up debugging) assert len(h_manager) > 0, 'No data returned for home manager' assert len(a_manager) > 0, 'No data returned for away manager' # Get the max date from the database max_date = run_query(query='select max(date) from main_fixtures') max_date = pd.to_datetime(max_date.iloc[0, 0]) # set the fixture_id to be 1 higher than the max fixture_id for that season max_fixture = run_query( query= "select max(fixture_id) id from main_fixtures where date = '{}'". format(str(max_date))) max_fixture = max_fixture.iloc[0, 0] info_dict = { "date": date, "home_id": home_id, "home_team": fetch_name(home_id), "away_id": away_id, "away_team": fetch_name(away_id), "fixture_id": max_fixture, "home_manager_start": h_manager.loc[0, "start_date"], "away_manager_start": a_manager.loc[0, "start_date"], "season": season, "home_odds": home_odds, "draw_odds": draw_odds, "away_odds": away_odds } output = pd.DataFrame() output = output.append(pd.DataFrame(info_dict, index=[0])) conn.close() return output
def create_team_fixtures(): # Connect to database conn = connect_to_db() cursor = conn.cursor() # Calculate the home team stats cursor.execute("drop table if exists team_fixtures_home") cursor.execute("""CREATE TABLE team_fixtures_home as SELECT fixture_id, date, home_team as team_name, home_id as team_id, season, home_score goals_for, away_score goals_against, home_score-away_score goal_difference, home_yellow_cards yellow_cards, home_red_cards red_cards, home_shots shots_for, away_shots shots_against, home_shots-away_shots shot_difference, 1 as is_home, b365_home_odds as b365_win_odds, case when home_score > away_score then 'W' when home_score < away_score then 'L' when home_score = away_score then 'D' end result, case when home_score > away_score then 3 when home_score < away_score then 0 when home_score = away_score then 1 end points FROM main_fixtures""") # Calculate the table for away only cursor.execute("drop table if exists team_fixtures_away") cursor.execute("""CREATE TABLE team_fixtures_away as SELECT fixture_id, date, away_team as team_name, away_id as team_id, season, away_score goals_for, home_score goals_against, away_score-home_score goal_difference, away_yellow_cards yellow_cards, away_red_cards red_cards, away_shots shots_for, home_shots shots_against, away_shots-home_shots shot_difference, 0 as is_home, b365_away_odds as b365_win_odds, case when home_score > away_score then 'L' when home_score < away_score then 'W' when home_score = away_score then 'D' end result, case when home_score > away_score then 0 when home_score < away_score then 3 when home_score = away_score then 1 end points FROM main_fixtures""") # Combine the home and away table to get a full table cursor.execute("drop table if exists team_fixtures") cursor.execute( """CREATE TABLE team_fixtures as select * from team_fixtures_home UNION ALL select * from team_fixtures_away""") conn.commit() conn.close()
def get_latest_fixtures(): """Get the latest premier league fixtures from the Betfair Exchange API""" trading = connect_to_betfair() trading.login() # Define what type of data to retrieve from the API market_projection = [ "MARKET_START_TIME", "RUNNER_DESCRIPTION", "COMPETITION", "EVENT", "MARKET_DESCRIPTION" ] # Create a market filter event_filter = betfairlightweight.filters.market_filter( event_type_ids=[1], market_countries=['GB'], market_type_codes=['MATCH_ODDS'], text_query='English Premier League') # Get market catalogue with event info market_catalogues = trading.betting.list_market_catalogue( filter=event_filter, market_projection=market_projection, max_results=200, ) # Filter for premier league markets prem_markets = [ market for market in market_catalogues if market.competition.name == 'English Premier League' ] # Populate a DataFrame with the catalogue info df_odds = pd.DataFrame(columns=['market_id', 'home_team', 'away_team']) for market in prem_markets: names = market.event.name.split(' v ') # Get the selection IDs for home/draw/away home_id = [ runner for runner in market.runners if runner.runner_name == names[0] ][0].selection_id away_id = [ runner for runner in market.runners if runner.runner_name == names[1] ][0].selection_id # Add everything to a DataFrame df_odds = df_odds.append( pd.DataFrame( { "market_id": market.market_id, 'start_time': market.market_start_time, "home_team": names[0], "home_id": home_id, "away_team": names[1], "away_id": away_id }, index=[len(df_odds)])) # Get market books (betting info) of the premier league markets market_books = trading.betting.list_market_book( market_ids=[market.market_id for market in prem_markets]) # Populate a DataFrame of the market book information df_mb = pd.DataFrame(columns=['market_id', 'selection_id', 'odds']) for book in market_books: for runner in book.runners: sid = { 'market_id': book.market_id, 'selection_id': runner.selection_id, 'odds': runner.last_price_traded if runner.last_price_traded is not None else None } df_mb = df_mb.append(pd.DataFrame(sid, index=[len(df_mb)])) output_cols = [ 'market_id', 'market_start_time', 'home_team', 'home_id', 'away_id', 'away_team', 'home_odds', 'draw_odds', 'away_odds' ] # Check whether there are any markets open if len(df_odds) == 0 or len(df_mb) == 0: df_output = pd.DataFrame(columns=output_cols) else: # Merge the DataFrames containing market catalogue and market book info df_output = pd.merge(df_odds, df_mb, left_on=['market_id', 'home_id'], right_on=['market_id', 'selection_id']) df_output = pd.merge(df_output, df_mb[df_mb['selection_id'] == 58805], on='market_id') df_output = pd.merge(df_output, df_mb, left_on=['market_id', 'away_id'], right_on=['market_id', 'selection_id']) df_output = df_output.drop( ['selection_id', 'selection_id_x', 'selection_id_y'], axis=1) df_output.columns = [ 'away_id', 'away_team', 'home_id', 'home_team', 'market_id', 'market_start_time', 'home_odds', 'draw_odds', 'away_odds' ] df_output = df_output[[ 'market_id', 'market_start_time', 'home_team', 'home_id', 'away_id', 'away_team', 'home_odds', 'draw_odds', 'away_odds' ]] # ToDo: Check how accurate last price traded is, look into getting odds the normal way # if these odds arent similar to the betfair odds with connect_to_db() as conn: df_output.to_sql('bfex_latest_fixtures', conn, if_exists='replace')
def get_manager_data(): """Download html content from wikipedia, break down with BeautifulSoup """ # Connect to database conn = connect_to_db() url = "https://en.wikipedia.org/wiki/List_of_Premier_League_managers" website_url = requests.get(url).text soup = BeautifulSoup(website_url, features="html.parser") My_table = soup.find("table", {"class": "wikitable sortable plainrowheaders"}) # All data can be found in 'a' and 'span' tags links = My_table.findAll("a") links2 = My_table.findAll("span") # Load data from links1 link1_data = [] for name in links: link1_data.append(name.get("title")) # Load data from links2 link2_data = [] for name in links2: link2_data.append(name.get("data-sort-value")) # Remove Nulls link2_data = [i for i in link2_data if i] link1_data = [i for i in link1_data if i] # Test2 manager name indexes name_indexes = [] for i in range(0, len(link2_data)): if not hasNumbers(link2_data[i]): name_indexes.append(i) # Test3 manager name indexes manager = [] country = [] team = [] for i in range(0, len(link1_data)): if i % 3 == 0: manager.append(link1_data[i]) if i % 3 == 1: country.append(link1_data[i]) if i % 3 == 2: team.append(link1_data[i]) # Create a DataFrame to store all of the scraped data managers = pd.DataFrame() for i in range(0, len(name_indexes)): manager_index = name_indexes[i] try: next_manager = name_indexes[i + 1] except IndexError: next_manager = len(name_indexes) + 1 num_elements = next_manager - manager_index manager_name = link2_data[manager_index] manager_sd = link2_data[manager_index + 1] # Remove the first 0 zeros from manager_sd manager_sd = manager_sd[8:-5] managers.loc[i, "manager"] = " ".join(str.split(manager[i])[0:2]) managers.loc[i, "country"] = country[i] managers.loc[i, "team"] = fix_team_name(team[i]) managers.loc[i, "from"] = manager_sd # If the manager has left, there will be a second row we must capture if num_elements == 3: manager_ed = link2_data[manager_index + 2] # Remove the first 0 zeros from manager_ed manager_ed = manager_ed[8:-5] managers.loc[i, "until"] = manager_ed # Replace club names with those used in the rest of the project # (e.g. Use Manchester City instead of Man City). Also get the team_id managers["team_id"] = managers["team"].apply(lambda x: fetch_id(x)) managers["team"] = managers["team_id"].apply(lambda x: fetch_name(x)) # Rename columns managers = managers[["manager", "team", "team_id", "from", "until"]] managers['from'] = pd.to_datetime(managers['from']) managers['until'] = pd.to_datetime(managers['until']) # If the until date of the last manager and from date of the next manager are the same, # # subtract 1 day from until of the previous manager managers['next_from'] = managers.groupby('team')['from'].shift(-1) managers['until'] = managers.apply( lambda x: x['until'] if x['until'] != x['next_from'] else x['until'] - dt.timedelta(days=1), axis=1) df_dates = pd.DataFrame() for row in managers.iterrows(): until = row[1]['until'] if \ isinstance(row[1]['until'], pd._libs.tslibs.timestamps.Timestamp) \ else dt.datetime.today() dates_between = pd.DataFrame([ row[1]['from'] + dt.timedelta(days=x) for x in range((until - row[1]['from']).days + 1) ], columns=['date']) dates_between['manager'] = row[1]['manager'] dates_between['team'] = row[1]['team'] dates_between['team_id'] = row[1]['team_id'] df_dates = df_dates.append(dates_between) # Concatenate manager names when two managers have managed at once df_dates = df_dates.groupby( ['date', 'team', 'team_id'])['manager'].apply(lambda x: ' & '.join(x)).reset_index() # Get the number of days between each row to identify missing data df_dates['date_lag'] = df_dates.groupby( ['team', 'team_id'])['date'].apply(lambda x: x.shift(1)) df_dates['date_diff'] = df_dates.apply(lambda x: (x['date'] - x['date_lag']).days, axis=1) # Create rows for missing data and add them to df_dates missing_data = df_dates[df_dates['date_diff'] > 1] missing_dates = pd.DataFrame() for row in missing_data.dropna().iterrows(): dates_between = pd.DataFrame([ row[1]['date_lag'] + dt.timedelta(days=x + 1) for x in range((row[1]['date'] - (row[1]['date_lag'] + dt.timedelta(days=1))).days) ], columns=['date']) dates_between['manager'] = 'No Manager' dates_between['team'] = row[1]['team'] dates_between['team_id'] = row[1]['team_id'] missing_dates = missing_dates.append(dates_between) # Drop unnecessary columns and add the missing data rows df_dates.drop(['date_lag', 'date_diff'], axis=1, inplace=True) df_dates = df_dates.append(missing_dates).reset_index( drop=True).sort_values('date') # Create an indicator that tells us each time a team changes manager df_dates['last_manager'] = df_dates.groupby( ['team', 'team_id'])['manager'].apply(lambda x: x.shift(1)) df_dates['manager_change'] = df_dates.apply( lambda x: 1 if x['manager'] != x['last_manager'] else 0, axis=1) df_dates['manager_num'] = df_dates.groupby(['team', 'team_id' ])['manager_change'].cumsum() # Aggregate the manager data to get a start/end date for each managerial spell min_dates = df_dates.groupby(['team', 'team_id', 'manager', 'manager_num'])['date'].min().reset_index() max_dates = df_dates.groupby(['team', 'team_id', 'manager', 'manager_num'])['date'].max().reset_index() # Add on managers who are still in power df_current = df_dates.groupby( ['team', 'team_id', 'manager', 'manager_num'])['date'] manager_dates = pd.merge(min_dates, max_dates, on=['team', 'team_id', 'manager', 'manager_num']).reset_index(drop=True) manager_dates.columns = [ 'team', 'team_id', 'manager', 'manager_num', 'from', 'until' ] manager_dates = manager_dates.groupby( ['team', 'team_id', 'manager', 'manager_num', 'from'])['until'].max().reset_index() manager_dates = manager_dates.groupby( ['team', 'team_id', 'manager', 'manager_num', 'until'])['from'].min().reset_index() manager_dates = manager_dates.groupby( ['team', 'team_id', 'manager', 'manager_num', 'from'])['until'].max().reset_index() # Drop and recreate the table we are going to populate run_query(query="DROP TABLE IF EXISTS managers", return_data=False) run_query(query="CREATE TABLE managers (manager INT, team TEXT, " "team_id INTEGER, start_date DATE, end_date DATE)", return_data=False) for row in manager_dates.iterrows(): params = [ str(row[1]["manager"]), str(row[1]["team"]), int(row[1]["team_id"]), str(row[1]["from"].date()), str(row[1]["until"].date()), ] run_query( query= "INSERT INTO managers (manager, team, team_id, start_date, end_date) " "VALUES(?, ?, ?, ?, ?)", params=params, return_data=False) conn.commit() conn.close()
def extract_data_from_fbd(url, table_name, connection_url=None): # Connect to database conn = connect_to_db(connection_url) cursor = conn.cursor() # Pull the csv into a pandas data frame fixtureData = pd.read_csv(url, skipinitialspace=True, error_bad_lines=False, keep_default_na=False).dropna() # Get the length of the data frame sLength = len(fixtureData['HomeTeam']) # Create an additional column for fixture_id, home_id and away_id. Full with random values for now fixtureData['fixture_id'] = 0 fixtureData['home_id'] = 0 fixtureData['away_id'] = 0 # Replace dodgy column names. fixtureData.columns = fixtureData.columns.str.replace('>', '_greater_than_').\ str.replace('<', '_less_than_').str.replace('.', 'p') # Loop through each row of the data for i in range(0, len(fixtureData.index)): # Check if the row is empty if fixtureData.loc[i, 'HomeTeam'] != "": # Change the game date format to a more appropriate format try: date_corrected = datetime.strptime( fixtureData.Date[i], '%d/%m/%y').strftime('%Y-%m-%d') except: date_corrected = datetime.strptime( fixtureData.Date[i], '%d/%m/%Y').strftime('%Y-%m-%d') date_corrected = datetime.strptime(date_corrected, '%Y-%m-%d') # The next section determines which season the game is in, e.g. 16/17. test = fixtureData.Date[i] # Extract the year and month year = date_corrected.year month = date_corrected.month # If we are in the second half of the season if month in [1, 2, 3, 4, 5, 6]: # The string is last_year / this_year season = str('{0:02d}'.format(int(str(year)[-2:]) - 1)) + '/' + str(year)[-2:] else: # The string is this_year / next_year season = str(year)[-2:] + '/' + str( '{0:02d}'.format(int(str(year)[-2:]) + 1)) # Get the over/under 2.5 odds (they are named differently in older .csv's) try: # Try the newer format o2p5 = fixtureData.BbMx_greater_than_2p5[i] u2p5 = fixtureData.BbMx_less_than_2p5[i] except: # Try the older format o2p5 = fixtureData.B365_greater_than_2p5[i] u2p5 = fixtureData.B365_less_than_2p5[i] # Get the Asian Handicap odds try: # Try the newer format ahHome = fixtureData.BbMxAHH[i] ahAway = fixtureData.BbMxAHA[i] ahHandicap = fixtureData.BbAHh[i] except: # Try the older format ahHome = fixtureData.B365AHH[i] ahAway = fixtureData.B365AHA[i] try: ahHandicap = fixtureData.B365AH[i] except AttributeError: ahHandicap = None # Load all parameters into the main_fixtures table in SQLite. # try: # Define the parameters params = [ i + 1, # Fixture ID fixtureData.HomeTeam[i], # Home team name fetch_id(fixtureData.HomeTeam[i]), # Home team ID fixtureData.AwayTeam[i], # Away team name fetch_id(fixtureData.AwayTeam[i]), # Away team ID date_corrected, # Fixture date int(fixtureData.FTHG[i]), # Home goals (full time) int(fixtureData.FTAG[i]), # Away goals (full time) int(fixtureData.HS[i]), # Home shots int(fixtureData.AS[i]), # Away shots fixtureData.FTR[i], # Full time result float(fixtureData.B365H[i]), # Bet 365 home odds float(fixtureData.B365D[i]), # Bet 365 draw odds float(fixtureData.B365A[i]), # Bet 365 away odds fixtureData.Referee[i], # Referee name o2p5, # Over 2.5 goal odds u2p5, # Under 2.5 goal odds ahHome, # Asian Handicap home odds ahAway, # Asian Handicap away odds ahHandicap, # Asian Handicap season, # Season name (e.g. 16/17) int(fixtureData.HY[i]), # Home yellow cards int(fixtureData.AY[i]), # Away yellow cards int(fixtureData.HR[i]), # Home red cards int(fixtureData.AR[i]) ] # Away red cards # Load the parameters into the table query = """ INSERT INTO {tn} ( fixture_id, home_team, home_id, away_team, away_id, date, home_score, away_score, home_shots, away_shots, full_time_result, b365_home_odds, b365_draw_odds, b365_away_odds, referee, over_2p5_odds, under_2p5_odds, ah_home_odds, ah_away_odds, ah_home_handicap, season, home_yellow_cards, away_yellow_cards, home_red_cards, away_red_cards) VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""" cursor.execute(query.format(tn=table_name), params) # Commit the changes conn.commit()