def calculate_feature(self, game_list, ignore_cache=False): feature_cache_path = URLHelper.cache_folder_path( ) + "features/" + self.get_printable_name() + ".dat" cached_game_list = FileHelper.read_object_from_disk( file_path=feature_cache_path) if cached_game_list is not None and not ignore_cache: game_list.games_df[self.get_feature_names()] = cached_game_list print('Feature ' + self.get_printable_name() + ' loaded from cache') else: if game_list is None: return game_list elif game_list.games_df is None: return game_list elif game_list.games_df.empty: return game_list start_time = datetime.datetime.now() game_list = self.inner_calculate_feature(game_list) end_time = datetime.datetime.now() print('Feature ' + self.get_printable_name() + ' took ' + str((end_time - start_time).total_seconds()) + ' seconds') if not ignore_cache: FileHelper.save_object_to_disk( game_list.games_df[self.get_feature_names()], feature_cache_path) return game_list
def game_list_by_url(url, league_name, season=""): def read_game_list_from_csv(_url, _division, _season, _league_name): data_frame = pd.read_csv(_url, usecols=[ "HomeTeam", "AwayTeam", "Div", "Date", "FTAG", "FTHG" ]).dropna(how='all') data_frame["LeagueName"] = _league_name # new feature data_frame["Season"] = _season # new feature data_frame["SeasonId"] = data_frame["LeagueName"] + \ data_frame["Div"] + \ data_frame["Season"] # setting the draw field data_frame.ix[data_frame.FTAG == data_frame.FTHG, "Draw"] = True data_frame.ix[data_frame.FTAG != data_frame.FTHG, "Draw"] = False # Modeling data_frame["HomeTeam"] = data_frame["HomeTeam"].astype("category") data_frame["AwayTeam"] = data_frame["AwayTeam"].astype("category") data_frame["Div"] = data_frame["Div"].astype("category") data_frame["Season"] = data_frame["Season"].astype("category") data_frame["LeagueName"] = data_frame["LeagueName"].astype( "category") data_frame["Date"] = pd.to_datetime(data_frame["Date"], format="%d/%m/%y") current_game_list = GameList(_division, data_frame) return current_game_list # extracting the division from url division = url.split('/')[-1].split('.')[0] # extracting the season from url if not season: season = url.split('/')[-2] # define cache folder and file paths cache_folder_path = URLHelper.cache_folder_path() + league_name cache_file_path = cache_folder_path + "/" + division + season + ".dat" cached_game_list = FileHelper.read_object_from_disk(cache_file_path) if cached_game_list: # if cache existed return cached_game_list else: game_list = read_game_list_from_csv(url, division, season, league_name) FileHelper.save_object_to_disk(game_list, cache_file_path) return game_list