Python Scraper Examples

Programming Language: Python

Namespace/Package Name: nhl_scraper.nhl

Class/Type: Scraper

Examples at hotexamples.com: 10

Python Scraper - 10 examples found. These are the top rated real world Python examples of nhl_scraper.nhl.Scraper extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Scraper(6)

teams(4)

_teams_playing_one_day(1)

box_scores(1)

games(1)

linescores(1)

Example #1

Show file

    def predict(self, roster):
        """Build a dataset of hockey predictions for the week

        The pool of players is passed into this function through roster_const.
        It will generate a DataFrame for these players with their predictions.

        The returning DataFrame has rows for each player, and columns for each
        prediction stat.

        :param roster_cont: Roster of players to generate predictions for
        :type roster_cont: roster.Container object
        :return: Dataset of predictions
        :rtype: DataFrame
        """
        # Produce a DataFrame using preds as the base.  We'll filter out
        # all of the players not in roster_cont by doing a join of the two
        # data frames.  This also has the affect of attaching eligible
        # positions and Yahoo! player ID from the input player pool.
        self.nhl_scraper = Scraper()
        my_roster = None
        if 'player_id' not in roster.columns:
            my_roster = roster.reset_index()
        else:
            my_roster = roster.copy()

        if 'team_id' not in my_roster.columns:
            # we must map in teams
            self._fix_yahoo_team_abbr(my_roster)
            nhl_teams = self.nhl_scraper.teams()
            nhl_teams.set_index("id")
            nhl_teams.rename(columns={'name': 'team_name'}, inplace=True)

            my_roster = my_roster.merge(nhl_teams,
                                        left_on='editorial_team_abbr',
                                        right_on='abbrev')
            my_roster.rename(columns={'id': 'team_id'}, inplace=True)

        df = pd.merge(my_roster,
                      self.ppool[self.scoring_categories + ['name', 'Tm'] +
                                 Parser.goalie_headings],
                      left_on=['name', 'abbrev'],
                      right_on=['name', 'Tm'],
                      how='left')
        if 'FOW' in self.scoring_categories:
            df.rename(columns={'FOW': 'FW'}, inplace=True)
        if 'player_id' in df.columns:
            df.set_index('player_id', inplace=True)
        return df

Example #2

Show file

        def all_loader():
            all_players = pd.DataFrame(League.all_players(self))
            self._fix_yahoo_team_abbr(all_players)
            self.nhl_scraper = Scraper()

            nhl_teams = self.nhl_scraper.teams()
            nhl_teams.set_index("id")
            nhl_teams.rename(columns={'name': 'team_name'}, inplace=True)

            all_players['league_id'] = self.league_id

            all_players = all_players.merge(nhl_teams,
                                            left_on='editorial_team_abbr',
                                            right_on='abbrev')
            all_players.rename(columns={'id': 'team_id'}, inplace=True)
            return all_players

Example #3

Show file

File: yahoo_scraping.py Project: QuailAutomation/fantasy_bot

    def __init__(self,
                 league_id,
                 predition_type=PredictionType.days_14) -> None:

        lg = FantasyLeague(league_id)
        self.scoring_categories = lg.scoring_categories()

        y_projections = YahooProjectionScraper(lg.league_id,
                                               self.scoring_categories)
        projections = y_projections.get_projections_df(predition_type.value)
        # if no projection, then zero
        projections.replace('-', 0, inplace=True)
        projections["team"].replace(nhl_team_mappings, inplace=True)

        nhl_scraper = Scraper()
        nhl_teams = nhl_scraper.teams()
        nhl_teams.set_index("id")
        nhl_teams.rename(columns={'name': 'team_name'}, inplace=True)

        all_players = projections.merge(nhl_teams,
                                        left_on='team',
                                        right_on='abbrev')
        all_players.rename(columns={'id': 'team_id'}, inplace=True)
        for rank in ['preseason_rank', 'current_rank']:
            all_players[rank] = pd.to_numeric(all_players[rank],
                                              downcast="integer")
        # GP is incorrect for 7 and 14 day predictions, let's get nhl schedule and fix
        game_day = datetime.date.today()
        if predition_type != PredictionType.rest_season:
            if predition_type == PredictionType.days_7:
                num_days = 7
            else:
                num_days = 14
            games = find_teams_playing(game_day, num_days)
            all_players['GP'] = all_players["team_id"].map(games)
        all_players['GP'] = pd.to_numeric(all_players['GP'], downcast="float")
        # let's return projections per game
        for stat in self.scoring_categories:
            all_players[stat] = pd.to_numeric(all_players[stat],
                                              downcast="float")
            all_players[stat] = all_players[stat] / (all_players['GP'] +
                                                     .0000001)

        # y_projections.get_stats(outfile, league_id, scoring_categories)
        self.all_players = all_players.set_index('player_id')

Example #4

Show file

File: yahoo_scraping.py Project: QuailAutomation/fantasy_bot

def generate_predictions(league_id, predition_type=PredictionType.days_14):

    lg = FantasyLeague(league_id)
    scoring_categories = lg.scoring_categories()

    y_projections = YahooProjectionScraper(lg.league_id, scoring_categories)
    projections = y_projections.get_projections_df(predition_type.value)

    projections = projections.astype({'GP': 'int32'}, copy=False)
    projections = projections.astype(
        {cat: 'int32'
         for cat in lg.scoring_categories()}, copy=False)

    projections["team"].replace(nhl_team_mappings, inplace=True)

    nhl_scraper = Scraper()
    nhl_teams = nhl_scraper.teams()

    nhl_teams.set_index("id")
    nhl_teams.rename(columns={'name': 'team_name'}, inplace=True)

    all_players = projections.merge(nhl_teams,
                                    left_on='team',
                                    right_on='abbrev')
    all_players.rename(columns={'id': 'team_id'}, inplace=True)

    # GP is incorrect for 7 and 14 day predictions, let's get nhl schedule and fix
    game_day = datetime.date.today()
    if predition_type != PredictionType.rest_season:
        if predition_type == PredictionType.days_7:
            num_days = 7
        else:
            num_days = 14
        games = find_teams_playing(game_day, num_days)
        all_players['GP'] = all_players["team_id"].map(games)

    # let's return projections per game
    for stat in scoring_categories:
        all_players[stat] = pd.to_numeric(all_players[stat], downcast="float")
        all_players[stat + "_per_game"] = all_players[stat] / all_players['GP']

    # y_projections.get_stats(outfile, league_id, scoring_categories)
    return all_players

Example #5

Show file

class FantasyLeague(League):
    """Represents a league in yahoo."""
    def __init__(self, league_id):
        """Instantiate the league."""
        super().__init__(oauth_token, league_id)
        self.lg_cache = utils.LeagueCache(league_id)

        self.fantasy_status_code_translation = {
            'waivers': 'W',
            'freeagents': 'FA'
        }
        # store datetime we are as-of use to roll transactions
        self.as_of_date = None
        self._all_players_df = None
        self.scorer = None
        self.score_comparer = None
        # TODO unsure if we should load this, or hardcode for performance
        # self.weights_series = pd.Series([1, .75, .5, .5, 1, .1, 1], index=["G", "A", "+/-", "PIM", "SOG", "FW", "HIT"])
        # the cached ACTUAL roster results
        self.cached_actual_results = {}

        self._roster_makeup = None

    def roster_makeup(self, position_type=None):
        if self._roster_makeup is None:
            positions = self.positions()
            roster_makeup = {}
            for position in positions.keys():
                roster_makeup[position] = int(positions[position]['count'])
            self._roster_makeup = roster_makeup
        if position_type:
            return {
                key: value['count']
                for key, value in self.positions().items()
                if position_type == value.get('position_type', None)
            }
        return self._roster_makeup

    def scoring_categories(self, position_type=['P']):
        """Return list of categories that count for scoring."""
        return [
            stat['display_name'] for stat in League.stat_categories(self)
            if stat['position_type'] in position_type
        ]

    def all_players(self):
        """Return dataframe of entire league for as of date."""
        if self.as_of_date:
            return self._all_players_df
        else:
            raise NoAsOfDateException()

    def _all_players(self):
        """Return all players in league."""
        def all_loader():
            all_players = pd.DataFrame(League.all_players(self))
            self._fix_yahoo_team_abbr(all_players)
            self.nhl_scraper = Scraper()

            nhl_teams = self.nhl_scraper.teams()
            nhl_teams.set_index("id")
            nhl_teams.rename(columns={'name': 'team_name'}, inplace=True)

            all_players['league_id'] = self.league_id

            all_players = all_players.merge(nhl_teams,
                                            left_on='editorial_team_abbr',
                                            right_on='abbrev')
            all_players.rename(columns={'id': 'team_id'}, inplace=True)
            return all_players

        expiry = timedelta(days=7)
        return self.lg_cache.load_all_players(expiry, all_loader)

    def transactions(self):
        """Return all players in league."""
        def transaction_loader():
            return League.transactions(self)

        expiry = timedelta(minutes=60)
        return self.lg_cache.load_transactions(expiry, transaction_loader)

    def team_by_id(self, team_id):
        """Use the last part of team id for resolve team."""
        return Team(self.sc, f"{self.league_id}.t.{team_id}")

    def team_by_key(self, team_key):
        """Resolve team for passed in key."""
        return Team(self.sc, team_key)

    def _fix_yahoo_team_abbr(self, df):
        nhl_team_mappings = {
            'LA': 'LAK',
            'Ott': 'OTT',
            'Bos': 'BOS',
            'SJ': 'SJS',
            'Anh': 'ANA',
            'Min': 'MIN',
            'Nsh': 'NSH',
            'Tor': 'TOR',
            'StL': 'STL',
            'Det': 'DET',
            'Edm': 'EDM',
            'Chi': 'CHI',
            'TB': 'TBL',
            'Fla': 'FLA',
            'Dal': 'DAL',
            'Van': 'VAN',
            'NJ': 'NJD',
            'Mon': 'MTL',
            'Ari': 'ARI',
            'Wpg': 'WPG',
            'Pit': 'PIT',
            'Was': 'WSH',
            'Cls': 'CBJ',
            'Col': 'COL',
            'Car': 'CAR',
            'Buf': 'BUF',
            'Cgy': 'CGY',
            'Phi': 'PHI',
            'Sea': 'SEA'
        }
        df["editorial_team_abbr"].replace(nhl_team_mappings, inplace=True)

    def draft_results(self, format='List'):
        """Return the draft results."""
        raw = super().draft_results()

        if format != 'Pandas':
            return raw
        else:
            if len(raw) > 0:
                draft_df = pd.DataFrame(raw, columns=raw[0].keys())
                try:
                    draft_df['player_id'] = draft_df.player_key.str.split(
                        '.', expand=True)[2].astype('int16')
                    draft_df['fantasy_team_id'] = draft_df.team_key.str.split(
                        '.', expand=True)[4].astype('int8')
                    draft_df.set_index(['player_id'], inplace=True)
                    draft_df.rename(columns={
                        'round': 'draft_round',
                        'pick': 'draft_pick'
                    },
                                    inplace=True)
                except AttributeError:
                    print("Draft probably has not begun yet")
                return draft_df
            else:
                return pd.DataFrame()

    def team_by_id(self, team_id):
        """Return team assigned to fantasy team id.

        Args:
            team_id ([int]): The team index.
        """
        return self._all_players_df[self._all_players_df.fantasy_status ==
                                    team_id]

    def free_agents(self, position=None):
        """Return the free agents at give datetime."""
        return self._all_players_df[self._all_players_df.fantasy_status ==
                                    'FA']

    def waivers(self, asof_date=None):
        """Return players on waivers."""
        return self._all_players_df[self._all_players_df.fantasy_status == 'W']

    def num_moves_made(self, week):
        if not week or week == self.current_week():
            number_moves_made = {}
            json = self.scoreboard()
            t = objectpath.Tree(json)
            my_team_id = super().team_key()
            elems = t.execute('$..matchup')
            for match in elems:
                number_moves_made[match['0']['teams']['0']['team'][0][0]['team_key']] = \
                    int(match['0']['teams']['0']['team'][0][11]['roster_adds']['value'])
                number_moves_made[match['0']['teams']['1']['team'][0][0]['team_key']] = \
                    int(match['0']['teams']['1']['team'][0][11]['roster_adds']['value'])

            return number_moves_made[my_team_id]
        else:
            return 0

    def as_of(self, asof_date):
        """Return the various buckets as of this date time."""
        # if asof_date is a date, lets make it midnight
        if type(asof_date) is date:
            asof_date = datetime.combine(asof_date, datetime.min.time())

        if not self.as_of_date or asof_date != self.as_of_date:
            all_players = self._all_players()
            all_players = all_players.set_index(keys=['player_id'])
            draft_df = self.draft_results(format='Pandas')
            # create a column fantasy_status.  will be team id, or FA (Free Agent), W-{Date} (Waivers)
            # TODO add waiver expiry column
            all_players['fantasy_status'] = 'FA'
            all_players['waiver_date'] = np.nan
            #assign drafted players to their team
            if len(draft_df) > 0 and 'fantasy_team_id' in draft_df.columns:
                all_players.loc[all_players.index.intersection(draft_df.index),
                                'fantasy_status'] = draft_df['fantasy_team_id']

            txns = self.transactions()
            asof_timestamp = datetime.timestamp(asof_date)
            for trans in zip(txns[::-2], txns[-2::-2]):
                if int(trans[1]['timestamp']) < asof_timestamp:
                    method = f"_apply_{trans[1]['type'].replace('/','')}"
                    if method in FantasyLeague.__dict__.keys():
                        FantasyLeague.__dict__[method](self, trans,
                                                       all_players)
                    elif method == '_apply_commish':
                        pass
                    else:
                        LOG.error(f"Unexpected transaction type: {method}")

            self.as_of_date = asof_date
            self._all_players_df = all_players
            self.scorer = None
            self.score_comparer = None
        return self

    def _apply_adddrop(self, txn_info, post_draft_player_list):
        trans_info = txn_info[0]
        txn_timestamp = datetime.fromtimestamp(int(txn_info[1]['timestamp']))
        self._add_player(trans_info['players']['0'], post_draft_player_list)
        self._drop_player(trans_info['players']['1'], post_draft_player_list,
                          txn_timestamp)

    def _apply_add(self, txn_info, post_draft_player_list):
        trans_info = txn_info[0]
        self._add_player(trans_info['players']['0'], post_draft_player_list)

    def _apply_drop(self, txn_info, post_draft_player_list):
        trans_info = txn_info[0]
        txn_timestamp = datetime.fromtimestamp(int(txn_info[1]['timestamp']))
        self._drop_player(trans_info['players']['0'], post_draft_player_list,
                          txn_timestamp)

    def _add_player(self, player_info, post_draft_player_list):
        player_id = int(player_info['player'][0][1]['player_id'])
        player_name = player_info['player'][0][2]['name']['full']
        dest_team_id = int(player_info['player'][1]['transaction_data'][0]
                           ['destination_team_key'].split('.')[-1])
        dest_team_name = player_info['player'][1]['transaction_data'][0][
            'destination_team_name']
        post_draft_player_list.at[player_id, 'fantasy_status'] = dest_team_id
        LOG.debug(f'apply add, player: {player_name} to: {dest_team_name}')

    def _drop_player(self, player_info, post_draft_player_list, drop_date):
        player_id = int(player_info['player'][0][1]['player_id'])
        player_name = player_info['player'][0][2]['name']['full']
        source_team_name = player_info['player'][1]['transaction_data'][
            'source_team_name']
        destination = player_info['player'][1]['transaction_data'][
            'destination_type']
        waiver_days = int(self.settings()['waiver_time'])
        time_clear_waivers = datetime.combine(
            (drop_date + timedelta(days=waiver_days + 1)), datetime.min.time())
        if time_clear_waivers > datetime.now():
            post_draft_player_list.at[
                player_id,
                'fantasy_status'] = self.fantasy_status_code_translation[
                    destination]
            post_draft_player_list.at[player_id,
                                      'waiver_date'] = time_clear_waivers
        else:
            post_draft_player_list.at[player_id, 'fantasy_status'] = 'FA'
        LOG.debug(
            f'dropping player: {player_name}, from: {source_team_name} to: {destination}'
        )

    def stat_predictor(self):
        """Load and return the prediction builder."""
        def loader():
            return fantasysp_scrape.Parser(
                scoring_categories=self.scoring_categories())

        expiry = timedelta(days=7)
        return self.lg_cache.load_prediction_builder(expiry, loader)

    def get_projections(self):
        """Return projections dataframe."""
        if not self.as_of_date:
            raise NoAsOfDateException("As of date not specified yet")

        return self.stat_predictor().predict(self._all_players_df)

    def _actuals_for_team_day(self, team_id, game_day, scoring_categories):
        _game_day = game_day.to_pydatetime().date()
        actual_cache_key = f"actuals:{team_id}-{_game_day}"
        results = RedisClient().conn.get(actual_cache_key)
        if not results:
            the_roster = self.team_by_key(team_id).roster(day=game_day)
            opp_daily_roster = pd.DataFrame(the_roster)
            lineup = opp_daily_roster.query(
                'selected_position != "BN" & selected_position != "G"')
            stats = self.player_stats(lineup.player_id.tolist(),
                                      "date",
                                      date=_game_day)
            daily_stats = pd.DataFrame(stats).loc[:, ['player_id'] +
                                                  scoring_categories]
            daily_stats.loc[:, 'score_type'] = 'a'
            daily_stats.replace('-', np.nan, inplace=True)
            daily_stats.set_index('player_id', inplace=True)
            time.sleep(.5)
            results = daily_stats.loc[~daily_stats.G.isnull(), :]

            # df_compressed = pa.serialize(daily_stats).to_buffer().to_pybytes()
            RedisClient().conn.set(actual_cache_key, pickle.dumps(daily_stats))
        else:
            results = pickle.loads(results)
            # results = pa.deserialize(results)
        return results

    def score_team_fpts(self,
                        player_projections,
                        date_range,
                        roster_change_set=None,
                        simulation_mode=True,
                        date_last_use_actuals=None,
                        team_id=None):
        """Score the team.
        Args:
            player_projections (DataFrame): Projections for all players on the team
            date_range (pd.DateRange): Date range to project for
            scoring_categories (list): List of player scoring categories scored
            roster_change_set (RosterChangeSet, optional): Changes to make throughout the scoring period. Defaults to None.
            simulation_mode (bool, optional): Ignores actuals if games already played, still uses projected scoring. Defaults to True.
            date_last_use_actuals (DateTime): If not in simulation mode, this value sets the last day to use actual scoring instead of projecting. 
            team_id (string, optional): Need this to look up actual scores for days which have passed.

        Returns:
            [type]: [description]
        """
        # we are going to modify this as we iterate the dates.  so we need this for the math at end
        current_projections = player_projections.copy()
        # projections for players who may play.  changes with roster changes during period
        projections_with_added_players = player_projections.copy()
        current_projections.sort_values(by='fpts',
                                        ascending=False,
                                        inplace=True)
        # dict to keep track of how many games players play using projected stats
        projected_games_played = defaultdict(int)
        # we need to look up roster changes by date so let's make a dict ourselves
        rc_dict = defaultdict(list)
        if roster_change_set:
            rc_dict = _roster_changes_as_day_dict(roster_change_set)

        scoring_categories = self.scoring_categories()

        roster_week_results = None
        if not (simulation_mode or date_last_use_actuals):
            # if date_last_use_actuals is not set, we default it to 1 second before midnight today
            date_last_use_actuals = datetime.now(
            ).replace(hour=0, minute=0, second=0, microsecond=0) - timedelta(
                seconds=1
            )  # can't support today yet, need to watch for completed games, etc.

        for game_day in date_range:
            roster_results = None

            for rc in rc_dict[game_day.date()]:
                # TODO should really figure out how to deal with this.  sometimes it is string, sometimes list.
                # i think has to do with serializing via jsonpickle
                with suppress(Exception):
                    rc.in_projections['eligible_positions'] = pd.eval(
                        rc.in_projections['eligible_positions'])
                # add player in projections to projection dataframe
                current_projections = current_projections.append(
                    rc.in_projections)
                projections_with_added_players = projections_with_added_players.append(
                    rc.in_projections)
                current_projections.drop(rc.out_player_id, inplace=True)
                current_projections.sort_values(by='fpts',
                                                ascending=False,
                                                inplace=True)

            # let's see if we should grab actuals
            if not simulation_mode and game_day < date_last_use_actuals:
                roster_results = self._actuals_for_team_day(
                    team_id, game_day, scoring_categories)
            else:
                game_day_players = projections_with_added_players[
                    projections_with_added_players.team_id.isin(
                        find_teams_playing(game_day.to_pydatetime().date()))]
                if len(game_day_players) > 0:
                    roster = best_roster(
                        game_day_players.loc[:, ['eligible_positions']].
                        itertuples())
                    rostered_players = [player.player_id for player in roster]
                    roster_results = projections_with_added_players.loc[
                        rostered_players, scoring_categories]
                    roster_results.loc[:, 'score_type'] = 'p'

                    if len(roster_results[roster_results.G != roster_results.G]
                           .index.values) > 0:
                        LOG.warn(
                            f"no projections for players: {roster_results[roster_results.G != roster_results.G].index.values}"
                        )

            if roster_results is not None and len(roster_results) > 0:
                roster_results['play_date'] = game_day
                if roster_week_results is None:
                    roster_week_results = roster_results
                else:
                    roster_week_results = roster_week_results.append(
                        roster_results)

        #TODO maybe we should formalize a return structure
        if len(roster_week_results) > 0:
            roster_week_results.reset_index(inplace=True)
            roster_week_results.set_index(['play_date', 'player_id'],
                                          inplace=True)
        return roster_change_set, roster_week_results

    def score_team(self,
                   player_projections,
                   date_range,
                   opponent_scores,
                   roster_change_set=None,
                   simulation_mode=True,
                   date_last_use_actuals=None,
                   team_id=None):
        try:
            date_last_use_actuals = datetime.now().replace(
                hour=0, minute=0, second=0,
                microsecond=0) - timedelta(seconds=1)
            # if date_range[0] > date_last_use_actuals:
            #     date_last_use_actuals = date_range[0]
            scoring_categories = self.scoring_categories()
            # lets add actuals, they can't be optimized
            actuals_results = self.score_actuals(
                team_id,
                date_range[date_range.slice_indexer(date_range[0],
                                                    date_last_use_actuals)],
                scoring_categories)
            actual_results_summed = None
            if actuals_results is not None:
                actual_results_summed = actuals_results.sum()

            roster_makeup = self.roster_makeup(position_type='P')
            projected_results = score_gekko(
                player_projections,
                team_id,
                opponent_scores,
                scoring_categories,
                date_range[date_range.slice_indexer(date_last_use_actuals)],
                roster_makeup,
                roster_change_set=roster_change_set,
                actual_scores=actual_results_summed)

            if actuals_results is not None:
                actuals_results.reset_index(inplace=True)
                roster_week_results = actuals_results.append(projected_results)
            else:
                roster_week_results = projected_results
            roster_week_results.set_index(['play_date', 'player_id'],
                                          inplace=True)
        except Exception as e:
            LOG.exception(e)

        return roster_change_set, roster_week_results

    def score_actuals(self, team_id, date_range, scoring_categories):
        # TODO cache the group of actuals
        # grab actuals
        roster_week_results = None
        for game_day in date_range:
            roster_results = self._actuals_for_team_day(
                team_id, game_day, scoring_categories)
            # get rid of players that didnt play
            roster_results = roster_results[roster_results.G ==
                                            roster_results.G]
            if roster_results is not None and len(roster_results) > 0:
                roster_results['play_date'] = game_day
                if roster_week_results is None:
                    roster_week_results = roster_results
                else:
                    roster_week_results = roster_week_results.append(
                        roster_results)
        return roster_week_results

Example #6

Show file

File: run_minimize.py Project: QuailAutomation/fantasy_bot

from math import e
from collections import defaultdict
import numpy as np

from csh_fantasy_bot.bot import ManagerBot
from nhl_scraper.nhl import Scraper
from csh_fantasy_bot.roster_change_optimizer import RosterChangeSet

nhl_scraper = Scraper()

available_positions = {
    "C": 2,
    "LW": 2,
    "RW": 2,
    "D": 4,
}

manager = ManagerBot(week=5, league_id='403.l.41177')
roster_change_text = """
Date: 2021-02-19, in: Brock Nelson(4990), out: Nazem Kadri(3637)
"""
roster_changes = RosterChangeSet.from_pretty_print_text(
    roster_change_text, manager.all_player_predictions)

scores_with = manager.score_team_pulp(roster_change_set=roster_changes)
scores_without = manager.score_team_pulp()

scoring_categories = manager.stat_categories

Example #7

Show file

class Parser:
    goalie_headings = ["GAA", "WIN%", "SHO%"]

    def __init__(
            self,
            positions=None,
            scoring_categories=["G", "A", "SOG", "+/-", "HIT", "PIM", "FW"]):
        # which positions to scrape from fantasysp
        if positions is not None:
            self.positions = positions
        else:
            self.positions = ['C', 'LW', 'RW', 'D', 'G']
        self.scoring_categories = scoring_categories
        headings = ["Name", "Tm", "Pos", "GAMES"]
        df = pd.DataFrame(data=[], columns=headings)
        index_offset = 0
        for position in self.positions:
            print("Processing: {}".format(position))
            file_name = "{}fantasysp_weekly-{}.html".format(
                html_file_location, position)

            with open(file_name, "rb") as f:
                soup = BeautifulSoup(f, "lxml")
                table = soup.find(
                    "table", {
                        "class":
                        "table sortable table-clean table-add-margin table-fixed"
                    })
                table_body = table.find('tbody')

                rows = table_body.find_all('tr')
                for i, row in enumerate(rows):
                    name_base = row.find_all('td')[1].text.strip().split("\n")
                    # name_base = row.find_all('td')[1].find_all('span')
                    name = name_base[0].strip()
                    tm = name_base[3].strip()
                    pos = name_base[6].strip()

                    games_base = row.find_all('td')[2].text.strip().split(
                        "\n")[0]
                    for i, c in enumerate(games_base):
                        if not c.isdigit():
                            break
                    num_games = int(games_base[:i])
                    if position != 'G':
                        players_projections = [
                            float(
                                row.find("td", {
                                    "class": FANTASY_SP_CAT_MAP[css_class]
                                }).text.strip()) / num_games
                            for css_class in scoring_categories
                        ]
                        df = df.append(
                            pd.DataFrame(data=[[name, tm, pos, num_games] +
                                               players_projections],
                                         columns=headings + scoring_categories,
                                         index=[i + index_offset]))
                    else:
                        # goalie
                        # goalie_starting_status = 'Confirmed'
                        # if starting_goalies_df is not None:
                        #     goalie_starting_status = starting_goalies_df.query('goalie_name == {}'.format(name))
                        gaa = float(
                            row.find("td", {
                                "class": "proj-gaa"
                            }).text.strip())
                        win_per_game = float(
                            row.find("td", {
                                "class": "proj-wins"
                            }).text.strip()) / num_games
                        so_per_game = float(
                            row.find("td", {
                                "class": "proj-so"
                            }).text.strip()) / num_games
                        df = df.append(
                            pd.DataFrame(data=[[
                                name, tm, pos, num_games, gaa, win_per_game,
                                so_per_game
                            ]],
                                         columns=headings +
                                         Parser.goalie_headings,
                                         index=[i + index_offset]))
                stats = ["G", "A", "SOG", "+/-", "HIT", "PIM", "FOW"]
                # compute per game stats
                # for stat in stats:
                #     df["{}-GAME".format(stat)] = df[stat].div(df["GAMES"])

        df['Tm'].replace("TB", "TBL", inplace=True)
        df['Tm'].replace("WAS", "WSH", inplace=True)
        df['Tm'].replace("SJ", "SJS", inplace=True)
        df['Tm'].replace("MON", "MTL", inplace=True)
        df['Tm'].replace("CLB", "CBJ", inplace=True)
        df['Tm'].replace("NJ", "NJD", inplace=True)
        df['Tm'].replace("LA", "LAK", inplace=True)

        self.ppool = df
        self.ppool.rename(columns={'Name': 'name'}, inplace=True)

    def predict(self, roster):
        """Build a dataset of hockey predictions for the week

        The pool of players is passed into this function through roster_const.
        It will generate a DataFrame for these players with their predictions.

        The returning DataFrame has rows for each player, and columns for each
        prediction stat.

        :param roster_cont: Roster of players to generate predictions for
        :type roster_cont: roster.Container object
        :return: Dataset of predictions
        :rtype: DataFrame
        """
        # Produce a DataFrame using preds as the base.  We'll filter out
        # all of the players not in roster_cont by doing a join of the two
        # data frames.  This also has the affect of attaching eligible
        # positions and Yahoo! player ID from the input player pool.
        self.nhl_scraper = Scraper()
        my_roster = None
        if 'player_id' not in roster.columns:
            my_roster = roster.reset_index()
        else:
            my_roster = roster.copy()

        if 'team_id' not in my_roster.columns:
            # we must map in teams
            self._fix_yahoo_team_abbr(my_roster)
            nhl_teams = self.nhl_scraper.teams()
            nhl_teams.set_index("id")
            nhl_teams.rename(columns={'name': 'team_name'}, inplace=True)

            my_roster = my_roster.merge(nhl_teams,
                                        left_on='editorial_team_abbr',
                                        right_on='abbrev')
            my_roster.rename(columns={'id': 'team_id'}, inplace=True)

        df = pd.merge(my_roster,
                      self.ppool[self.scoring_categories + ['name', 'Tm'] +
                                 Parser.goalie_headings],
                      left_on=['name', 'abbrev'],
                      right_on=['name', 'Tm'],
                      how='left')
        if 'FOW' in self.scoring_categories:
            df.rename(columns={'FOW': 'FW'}, inplace=True)
        if 'player_id' in df.columns:
            df.set_index('player_id', inplace=True)
        return df

    def parse(self):
        return self.ppool

    def _fix_yahoo_team_abbr(self, df):
        nhl_team_mappings = {
            'LA': 'LAK',
            'Ott': 'OTT',
            'Bos': 'BOS',
            'SJ': 'SJS',
            'Anh': 'ANA',
            'Min': 'MIN',
            'Nsh': 'NSH',
            'Tor': 'TOR',
            'StL': 'STL',
            'Det': 'DET',
            'Edm': 'EDM',
            'Chi': 'CHI',
            'TB': 'TBL',
            'Fla': 'FLA',
            'Dal': 'DAL',
            'Van': 'VAN',
            'NJ': 'NJD',
            'Mon': 'MTL',
            'Ari': 'ARI',
            'Wpg': 'WPG',
            'Pit': 'PIT',
            'Was': 'WSH',
            'Cls': 'CBJ',
            'Col': 'COL',
            'Car': 'CAR',
            'Buf': 'BUF',
            'Cgy': 'CGY',
            'Phi': 'PHI'
        }
        df["editorial_team_abbr"].replace(nhl_team_mappings, inplace=True)

Example #8

Show file

File: download_nhl_boxscores.py Project: QuailAutomation/fantasy_bot

from datetime import date
import json
import time
import pandas as pd

from nhl_scraper.nhl import Scraper

pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)


season_opening_date = date(2019,10,2)
season_end_date = date(2020,10,8)
nhl_scraper = Scraper()
games = nhl_scraper.games(season_opening_date, season_end_date)

for game in games:
    print('downloading boxscore, id={}'.format(game))
    box_score = nhl_scraper.box_scores(game,format='json')
    with open('box-scores/{}.json'.format(game), 'w') as outfile:
        json.dump(box_score, outfile)
    time.sleep(1)
pass

Example #9

Show file

File: dump-nhl-schedule-es.py Project: QuailAutomation/fantasy_bot

import datetime
import pandas as pd
import json
from nhl_scraper.nhl import Scraper

from elasticsearch import Elasticsearch
from elasticsearch import helpers

es = Elasticsearch(hosts='http://192.168.1.20:9200', http_compress=True)

pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

nhl = Scraper()

a_day = datetime.date(2021, 10, 11)
end_day = datetime.date(2021, 12, 18)
games = nhl.linescores(a_day, end_day)


def doc_generator_linescores(games):
    for game in games:
        game['timestamp'] = game['gameDate']
        # document['player_id'] = index
        yield {
            "_index": 'fantasy-nhl-2021-line-scores',
            "_type": "_doc",
            "_id": game['gamePk'],
            "_source": game,
        }

Example #10

Show file

from nhl_scraper.nhl import Scraper
from csh_fantasy_bot.scoring import ScoreComparer

from pulp import *

league_id = '403.l.41177'
week_number = 5
#  zero based (Mon = 0, to x)
game_day_week = 0
manager = ManagerBot(week_number, league_id=league_id)


my_scores = manager.my_team.scores()
# manager.score_comparer.print_week_results(my_scores)

nhl_scraper = Scraper()

# lets do first day of week, lots of guys can play
game_day = manager.week[game_day_week]
# make sure roster is as-of this date
manager.as_of(game_day)

roster_change_text="""
Date: 2021-02-16, in: Adam Fox(7174), out: Teuvo Teravainen(5698)
Date: 2021-02-16, in: Andrew Copp(6083), out: Oliver Ekman-Larsson(4686)
"""
roster_changes = RosterChangeSet.from_pretty_print_text(roster_change_text, manager.all_player_predictions)

# lets sum actuals

my_scores = manager.score_team_pulp(manager.opponent.scores(), roster_change_set=roster_changes)