def lupbp_badlen(year, type = 'reg', games = 'ALL', periods = 'ALL', datapath = ''): # Open pickle of lusubs for specified type and year with open('{}lupbp_{}_{}.pkl'.format(datapath, type, year), 'rb') as f: lupbp = pickle.load(f) f.close() # Create pandas dataframe for specified year, type, games and periods df = lusubspkl.create_dfseg(year, type, games, periods, datapath) for game in pd.unique(df['GAME_ID']): for period in pd.unique(df.loc[df['GAME_ID']==game, 'PERIOD']): for team in ['HOME', 'VIS']: badlen = 0 unevenlen = 0 if len(lupbp[year, type, game, period, 1, team, 'on']) not in [4,5]: badlen = 1 for stint in pd.unique(df.loc[(df['GAME_ID']==game) & (df['PERIOD']==period), 'STINT'])[1:]: if len(lupbp[year, type, game, period, stint, team, 'on']) != len(lupbp[year, type, game, period, stint-1, team, 'on']): unevenlen = 1 if (badlen == 1) or (unevenlen == 1): print '' print year, game, team, 'q' + str(period) for stint in pd.unique(df.loc[(df['GAME_ID']==game) & (df['PERIOD']==period), 'STINT']): print stint, len(lupbp[year, type, game, period, stint, team, 'on']), lupbp[year, type, game, period, stint, team, 'on'] for p in pd.unique(df.loc[df['GAME_ID']==game, 'PERIOD']): print 'q' + str(p), 'len stint 1: ' + str(len(lupbp[year, type, game, p, 1, team, 'on']))
def lubox(year, type = 'reg', games = 'ALL', periods = 'ALL', datapath = ''): # Open pickle of lusubs for specified type and year with open('{}lupbp_{}_{}.pkl'.format(datapath, type, year), 'rb') as f: lubox = pickle.load(f) f.close() # Create pandas dataframe for specified year, type, games and periods df = lusubspkl.create_dfseg(year, type, games, periods, datapath) for game in pd.unique(df['GAME_ID']): for team in ['HOME', 'VIS']: teamid = team + '_ID' linew4 = 0 for period in pd.unique(df.loc[df['GAME_ID']==game, 'PERIOD']): if len(lubox[year, type, game, period, 1, team, 'on']) == 4: linew4 = 1 if linew4 ==1: print game hminp = {} for period in pd.unique(df.loc[df['GAME_ID']==game, 'PERIOD']): for stint in pd.unique(df.loc[(df['GAME_ID']==game) & (df['PERIOD']==period), 'STINT']): for player in lubox[year, type, game, period, stint, team, 'on']: if player in hminp.keys(): hminp[player] = hminp[player] + df.loc[df.loc[(df['GAME_ID']==game) & (df['PERIOD']==period) & (df['STINT']==stint)].index[0], 'STINT_TIME'] if player not in hminp.keys(): hminp[player] = df.loc[df.loc[(df['GAME_ID']==game) & (df['PERIOD']==period) & (df['STINT']==stint)].index[0], 'STINT_TIME'] with open('{}_{}'.format(year, game), 'r') as f: res = json.load(f) f.close() hminb = {} for player in res['resultSets'][0]['rowSet']: if isinstance(player[8], unicode): hminb[player[4]] = (60*int(player[8].split(':')[0])) + int(player[8].split(':')[1]) hmindiff = {} for player in hminp.keys(): hmindiff[player] = hminb[player] - hminp[player] for period in pd.unique(df.loc[df['GAME_ID']==game, 'PERIOD']): if len(lubox[year, type, game, period, 1, team, 'on']) == 4: for stint in pd.unique(df.loc[(df['GAME_ID']==game) & (df['PERIOD']==period), 'STINT']): for player in hmindiff.keys(): if (hmindiff[player] > 200) & (player not in lubox[year, type, game, period, stint, team, 'list']): lubox[year, type, game, period, stint, team, 'on'].append(player) lubox[year, type, game, period, stint, team, 'on'].sort() lubox[year, type, game, period, stint, team, 'list'].append(player) lubox[year, type, game, period, stint, team, 'list'].sort() with open('{}lubox_{}_{}.pkl'.format(datapath, type, year), 'wb') as f: pickle.dump(lubox, f, pickle.HIGHEST_PROTOCOL) f.close()
def lubox_len4(year, type = 'reg', games = 'ALL', periods = 'ALL', datapath = ''): # Open pickle of lusubs for specified type and year with open('{}lubox_{}_{}.pkl'.format(datapath, type, year), 'rb') as f: lupbp = pickle.load(f) f.close() # Create pandas dataframe for specified year, type, games and periods df = lusubspkl.create_dfseg(year, type, games, periods, datapath) for game in pd.unique(df['GAME_ID']): for team in ['HOME', 'VIS']: for period in pd.unique(df.loc[df['GAME_ID']==game, 'PERIOD']): if len(lupbp[year, type, game, period, 1, team, 'on']) == 4: print '' print year, game, team, 'q' + str(period) for stint in pd.unique(df.loc[(df['GAME_ID']==game) & (df['PERIOD']==period), 'STINT']): print stint, len(lupbp[year, type, game, period, stint, team, 'on']), lupbp[year, type, game, period, stint, team, 'on']
def lupbp_mult4(year, type='reg', games='ALL', periods='ALL', datapath=''): # Open pickle of lusubs for specified type and year with open('{}lupbp_{}_{}.pkl'.format(datapath, type, year), 'rb') as f: lupbp = pickle.load(f) f.close() # Create pandas dataframe for specified year, type, games and periods df = lusubspkl.create_dfseg(year, type, games, periods, datapath) for game in pd.unique(df['GAME_ID']): for team in ['HOME', 'VIS']: count4 = 0 for period in pd.unique(df.loc[df['GAME_ID'] == game, 'PERIOD']): if len(lupbp[year, type, game, period, 1, team, 'on']) == 4: count4 = count4 + 1 if count4 > 1: print '' print year, game, team for p in pd.unique(df.loc[df['GAME_ID'] == game, 'PERIOD']): print 'q' + str(p), 'len stint 1: ' + str( len(lupbp[year, type, game, p, 1, team, 'on']))
def lupbp(year, type='reg', games='ALL', periods='ALL', datapath=''): # Open pickle of lusubs for specified type and year with open('{}lusubs_{}_{}.pkl'.format(datapath, type, year), 'rb') as f: lupbp = pickle.load(f) f.close() # Create pandas dataframe for specified year, type, games and periods df = lusubspkl.create_dfseg(year, type, games, periods, datapath) # Create vector of eventmsgactiontype for (technical) fouls that can be credited to players on bench, and removing rows with ejections, timeouts, substitutions and technical fouls (all of which can be credited to players off court) foulexc = [10, 11, 16, 19, 25] df = df[(df['EVENTMSGTYPE'] != 9) & (df['EVENTMSGTYPE'] != 11) & ((df['EVENTMSGTYPE'] != 6) | (~df['EVENTMSGACTIONTYPE'].isin(foulexc)))] # Addinng players in pbp but not in subs to each associated lineup for game in pd.unique(df['GAME_ID']): for period in pd.unique(df.loc[df['GAME_ID'] == game, 'PERIOD']): print game, period for stint in pd.unique(df.loc[(df['GAME_ID'] == game) & (df['PERIOD'] == period), 'STINT']): for team in ['HOME', 'VIS']: for number in range(1, 4): for player in pd.unique(df.loc[ (df['GAME_ID'] == game) & (df['PERIOD'] == period) & (df['PLAYER{}_TEAM_ID'.format(number)] == df['{}_ID'.format(team)]), 'PLAYER{}_ID'.format(number)]): if (player not in lupbp[year, type, game, period, stint, team, 'list']) & (player != 0): lupbp[year, type, game, period, stint, team, 'on'].append(player) lupbp[year, type, game, period, stint, team, 'list'].append(player) # Sorting 'on' and 'list' lists lupbp[year, type, game, period, stint, team, 'on'].sort() lupbp[year, type, game, period, stint, team, 'list'].sort() # Saving pbp lineups as pickle with open('{}lupbp_{}_{}.pkl'.format(datapath, type, year), 'wb') as f: pickle.dump(lupbp, f, pickle.HIGHEST_PROTOCOL) f.close()
def lubox_checkpt(year, type='reg', games='ALL', periods='ALL', datapath=''): #open constructed lineups pickle, and create pandas dataframe using lusubspkl and pbp_stint csv with open('{}lubox_{}_{}.pkl'.format(datapath, type, year), 'rb') as f: lubox = pickle.load(f) f.close() df = lusubspkl.create_dfseg(year, type, games, periods, datapath) #Create variables for the max difference in minutes and the associated game, team and player maxdiff = 0 maxdiffgame = 0 maxdiffteam = 0 maxdiffplayer = 0 for game in pd.unique(df['GAME_ID']): #print maxdiff #get minutes from box score for each player season = str(year) + '-' + str(year + 1) url = 'http://stats.nba.com/stats/boxscoretraditionalv2?EndPeriod=10&EndRange=50000&GameID=00{}&RangeType=0&Season={}&SeasonType=Regular+Season&StartPeriod=1&StartRange=0'.format( game, season) res = requests.get(url, headers={'USER-AGENT': 'u_a'}) minb = {} for player in res.json()['resultSets'][0]['rowSet']: minb[player[4]] = 0 if isinstance(player[8], unicode): minb[player[4]] = (60 * int(player[8].split(':')[0])) + int( player[8].split(':')[1]) for team in ['HOME', 'VIS']: #print game, team #check if gameXteam has lineups of bad length (length > 5) by checking the length of the last stint in each period (all 11 lineups of bad length have bad length in the final stint of the bad period) badlen = 0 for period in pd.unique(df.loc[df['GAME_ID'] == game, 'PERIOD']): if len(lubox[year, type, game, period, pd.unique(df.loc[(df['GAME_ID'] == game) & (df['PERIOD'] == period), 'STINT'])[-1], team, 'on']) > 5: badlen = 1 #Compare minutes in gameXteam if all stints are not of bad length if badlen == 0: #Calculate minutes of players in constructed lineups minp = {} for period in pd.unique(df.loc[df['GAME_ID'] == game, 'PERIOD']): for stint in pd.unique(df.loc[(df['GAME_ID'] == game) & (df['PERIOD'] == period), 'STINT']): for player in lubox[year, type, game, period, stint, team, 'on']: if player in minp.keys(): minp[player] = minp[player] + df.loc[ df.loc[(df['GAME_ID'] == game) & (df['PERIOD'] == period) & (df['STINT'] == stint)].index[0], 'STINT_TIME'] if player not in minp.keys(): minp[player] = df.loc[ df.loc[(df['GAME_ID'] == game) & (df['PERIOD'] == period) & (df['STINT'] == stint)].index[0], 'STINT_TIME'] #For each player, calulate the absolute value of difference between pbp and box minutes. If above threshold, print details. In some cases, player ids in play-by-play not in box score. Create exception, and print relevant details. abovethresh = 0 exceptions = [] try: for player in minp.keys(): if abs(minp[player] - minb[player]) > thresh: abovethresh = 1 if abs(minp[player] - minb[player]) > maxdiff: maxdiff = abs(minp[player] - minb[player]) maxdiffgame = game maxdiffteam = team maxdiffplayer = player if abovethresh == 1: print '' print year, game, team for player in minp.keys(): print player, 'box: ' + str( minb[player]), 'pbp: ' + str( minp[player]), 'diff: ' + str( abs(minb[player] - minp[player])) except KeyError: print '' print '' print 'Exception: ', year, game, team, 'p' + str(player) print '' exceptions.append(game) #Print maxdiff stuff print '' print 'MaxDiff:', year, maxdiffgame, maxdiffteam, maxdiffplayer print 'Exeption Game IDs: ', exceptions