def test_all_player_features(self): season = Season(2013) all_players = players.find({}).sort('id', pymongo.ASCENDING) offensive_features = [] defensive_features = [] for player_data in all_players: player = Player(player_data['id']) print 'Extracting offensive features for {} from the {}'.format(player, season) o_features = player_ocluster_features(player, season) print 'Extracting defensive features for {} from the {}'.format(player, season) d_features = player_dcluster_features(player, season) offensive_features.append(o_features) defensive_features.append(d_features) offensive_features = np.array(offensive_features) defensive_features = np.array(defensive_features) o_features_file = os.path.join('season_data', '2013', 'player_offense_features.csv') d_features_file = os.path.join('season_data', '2013', 'player_defense_features.csv') o_header = 'id,ast_pct,ts_pct,orb_pct,usg,ortg,mp_pct' d_header = 'id,blk_pct,stl_pct,drb_pct,drtg,pf_pct,mp_pct' np.savetxt(o_features_file, offensive_features, delimiter=',', header=o_header) np.savetxt(d_features_file, defensive_features, delimiter=',', header=d_header) self.assertTrue(os.path.exists(o_features_file)) self.assertTrue(os.path.exists(d_features_file))
def test_all_player_features(self): season = Season(2013) all_players = players.find({}).sort('id', pymongo.ASCENDING) offensive_features = [] for player_data in all_players: player = Player(player_data['id']) print 'Extracting offensive features for {} from the {}'.format(player, season) features = player_ocluster_features(player, season) offensive_features.append(features) offensive_features = np.array(offensive_features)
def extract_data(filename='shots.csv'): r, c, s, g, t, p = dict(), dict(), dict(), dict(), dict(), dict() for region in regions.find(): player_id = region['regionId'] r[player_id] = region['name'] for tournament in tournaments.find(): player_id = tournament['tournamentId'] c[player_id] = tournament['name'] for season in seasons.find(): player_id = season['seasonId'] s[player_id] = season['name'] for stage in stages.find(): player_id = stage['stageId'] g[player_id] = stage['name'] for team in teams.find(): player_id = team['teamId'] t[player_id] = team['name'] for player in players.find(): player_id = player['playerId'] p[player_id] = player['name'] f = open(filename, 'w', newline='\n') writer = csv.DictWriter(f, fieldnames=FIELDNAMES + QUALIFIERS, extrasaction='ignore') writer.writeheader() # goals = events.count({'isGoal': True, 'isOwnGoal': {'$exists': False}}) # shots = events.count({'isShot': True, 'isOwnGoal': {'$exists': False}}) # print('{0:,} goals from {1:,} shots ({2:.1%} shot rate)'.format(goals, shots, goals / shots)) for event in events.find({'isShot': True, 'isOwnGoal': {'$exists': False}}).sort('matchId', -1): event['isGoal'] = 1 if event.get('isGoal') else 0 event['Region'] = r[event['regionId']] if event.get('regionId') else None event['Tournament'] = c[event['tournamentId']] if event.get('tournamentId') else None event['Season'] = s[event['seasonId']] if event.get('seasonId') else None event['Stage'] = g[event['stageId']] if event.get('stageId') else None event['Team'] = t[event['teamId']] if event.get('teamId') else None try: event['Player'] = p[event['playerId']] if event.get('playerId') else None except KeyError: player_id = event['playerId'] print('Missing playerId: {}'.format(player_id)) get_player(player_id) event['Player'] = None event_qualifiers = {q['type']['displayName']: q.get('value', 1) for q in event['qualifiers']} for qualifier in QUALIFIERS: event[qualifier] = event_qualifiers.get(qualifier, 0) writer.writerow(event)
def calc_all_player_times(year, recompute=False): class TimeComputationError(Exception): def __init__(self, msg): self.msg = msg def __str__(self): return self.msg all_players = players.find({}).sort('id', pymongo.ASCENDING) print('Loading the {}-{} NBA season'.format(year, year + 1)) season = Season(year) print('Loaded season') print('Computing time on court for all players in all games...') for player_data in all_players: player = Player(player_data['id']) games_played = season.get_player_games_in_range(player) for game in games_played: print('Calculating time on court for {} ({}) in {} ({})'.format(player, player.id, game, game.id)) boxscore_minutes = game.player_boxscore(player)['totalSecondsPlayed'] / 60.0 if boxscore_minutes > 0: time_on_court = player.time_on_court(game, recompute=recompute) computed_minutes = compute_ts_length(time_on_court, unit='minutes') else: # there's never anything to calculate anyway computed_minutes = 0 if not abs(computed_minutes - boxscore_minutes) <= 0.5: print('In computing playing time for {} ({}) in {} ({}):'.format(player, player.id, game, game.id), file=sys.stderr) print('Discrepancy between computed time: {0:2.2f}, and boxscore time: {1:2.2f}'.format(computed_minutes, boxscore_minutes), file=sys.stderr) #raise TimeComputationError('Discrepancy between computed time: {}, and boxscore time: {}'.format(computed_minutes, boxscore_minutes) else: print('{} played {} minutes in {}'.format(player, round(computed_minutes, 3), game))
def get_all_player_features(season, start_date=None, end_date=None, recompute=False): all_players = players.find({}).sort('id', pymongo.ASCENDING) o_header = ['ast_pct', 'ts_pct', 'orb_pct', 'usg', 'ortg', 'mp_pct'] d_header = ['blk_pct', 'stl_pct', 'drb_pct', 'drtg', 'pf_pct'] str_format = '%Y-%m-%d' feature_file = 'player-features-from-{}-to-{}.csv'.format(start_date.strftime(str_format), end_date.strftime(str_format)) path = os.path.join('season_data', str(season.season), feature_file) if os.path.exists(path) and not recompute: player_features = pd.read_csv(path, index_col=0) else: feature_headers = o_header + d_header index = [] all_features = [] for i, player_data in enumerate(all_players): player = Player(player_data['id']) index.append(player.id) #print 'Extracting offensive features for {} from the {}'.format(player, season) o_features = player_ocluster_features(player, season, start_date, end_date) #print 'Extracting defensive features for {} from the {}'.format(player, season) d_features = player_dcluster_features(player, season, start_date, end_date) # drop the id in d_features all_features.append(o_features[1:] + d_features[1:-1]) player_features = pd.DataFrame(data=all_features, columns=feature_headers, index=index) player_features.index.name = 'id' player_features.to_csv(path) return player_features
def extract_data(filename='shots.csv'): r, c, s, g, t, p = dict(), dict(), dict(), dict(), dict(), dict() for region in regions.find(): player_id = region['regionId'] r[player_id] = region['name'] for tournament in tournaments.find(): player_id = tournament['tournamentId'] c[player_id] = tournament['name'] for season in seasons.find(): player_id = season['seasonId'] s[player_id] = season['name'] for stage in stages.find(): player_id = stage['stageId'] g[player_id] = stage['name'] for team in teams.find(): player_id = team['teamId'] t[player_id] = team['name'] for player in players.find(): player_id = player['playerId'] p[player_id] = player['name'] f = open(filename, 'w', newline='\n') writer = csv.DictWriter(f, fieldnames=FIELDNAMES + QUALIFIERS, extrasaction='ignore') writer.writeheader() # goals = events.count({'isGoal': True, 'isOwnGoal': {'$exists': False}}) # shots = events.count({'isShot': True, 'isOwnGoal': {'$exists': False}}) # print('{0:,} goals from {1:,} shots ({2:.1%} shot rate)'.format(goals, shots, goals / shots)) for event in events.find({ 'isShot': True, 'isOwnGoal': { '$exists': False } }).sort('matchId', -1): event['isGoal'] = 1 if event.get('isGoal') else 0 event['Region'] = r[event['regionId']] if event.get( 'regionId') else None event['Tournament'] = c[event['tournamentId']] if event.get( 'tournamentId') else None event['Season'] = s[event['seasonId']] if event.get( 'seasonId') else None event['Stage'] = g[event['stageId']] if event.get('stageId') else None event['Team'] = t[event['teamId']] if event.get('teamId') else None try: event['Player'] = p[event['playerId']] if event.get( 'playerId') else None except KeyError: player_id = event['playerId'] print('Missing playerId: {}'.format(player_id)) get_player(player_id) event['Player'] = None event_qualifiers = { q['type']['displayName']: q.get('value', 1) for q in event['qualifiers'] } for qualifier in QUALIFIERS: event[qualifier] = event_qualifiers.get(qualifier, 0) writer.writerow(event)