def calculate_feature(self, game_list, ignore_cache=False):

        feature_cache_path = URLHelper.cache_folder_path(
        ) + "features/" + self.get_printable_name() + ".dat"
        cached_game_list = FileHelper.read_object_from_disk(
            file_path=feature_cache_path)

        if cached_game_list is not None and not ignore_cache:
            game_list.games_df[self.get_feature_names()] = cached_game_list
            print('Feature ' + self.get_printable_name() +
                  ' loaded from cache')
        else:
            if game_list is None:
                return game_list
            elif game_list.games_df is None:
                return game_list
            elif game_list.games_df.empty:
                return game_list
            start_time = datetime.datetime.now()
            game_list = self.inner_calculate_feature(game_list)
            end_time = datetime.datetime.now()
            print('Feature ' + self.get_printable_name() + ' took ' +
                  str((end_time - start_time).total_seconds()) + ' seconds')

            if not ignore_cache:
                FileHelper.save_object_to_disk(
                    game_list.games_df[self.get_feature_names()],
                    feature_cache_path)

        return game_list
Beispiel #2
0
    def game_list_by_url(url, league_name, season=""):
        def read_game_list_from_csv(_url, _division, _season, _league_name):

            data_frame = pd.read_csv(_url,
                                     usecols=[
                                         "HomeTeam", "AwayTeam", "Div", "Date",
                                         "FTAG", "FTHG"
                                     ]).dropna(how='all')
            data_frame["LeagueName"] = _league_name  # new feature
            data_frame["Season"] = _season  # new feature
            data_frame["SeasonId"] = data_frame["LeagueName"] + \
                                     data_frame["Div"] + \
                                     data_frame["Season"]

            # setting the draw field
            data_frame.ix[data_frame.FTAG == data_frame.FTHG, "Draw"] = True
            data_frame.ix[data_frame.FTAG != data_frame.FTHG, "Draw"] = False

            # Modeling
            data_frame["HomeTeam"] = data_frame["HomeTeam"].astype("category")
            data_frame["AwayTeam"] = data_frame["AwayTeam"].astype("category")
            data_frame["Div"] = data_frame["Div"].astype("category")
            data_frame["Season"] = data_frame["Season"].astype("category")
            data_frame["LeagueName"] = data_frame["LeagueName"].astype(
                "category")
            data_frame["Date"] = pd.to_datetime(data_frame["Date"],
                                                format="%d/%m/%y")

            current_game_list = GameList(_division, data_frame)

            return current_game_list

        # extracting the division from url
        division = url.split('/')[-1].split('.')[0]

        # extracting the season from url
        if not season:
            season = url.split('/')[-2]

        # define cache folder and file paths
        cache_folder_path = URLHelper.cache_folder_path() + league_name
        cache_file_path = cache_folder_path + "/" + division + season + ".dat"

        cached_game_list = FileHelper.read_object_from_disk(cache_file_path)

        if cached_game_list:  # if cache existed
            return cached_game_list
        else:
            game_list = read_game_list_from_csv(url, division, season,
                                                league_name)
            FileHelper.save_object_to_disk(game_list, cache_file_path)
            return game_list