コード例 #1
0
ファイル: lupbp_badlen.py プロジェクト: palpen/basketball_Qs
def lupbp_badlen(year, type = 'reg', games = 'ALL', periods = 'ALL', datapath = ''):
  # Open pickle of lusubs for specified type and year
  with open('{}lupbp_{}_{}.pkl'.format(datapath, type, year), 'rb') as f:
    lupbp = pickle.load(f)
  f.close()
  # Create pandas dataframe for specified year, type, games and periods
  df = lusubspkl.create_dfseg(year, type, games, periods, datapath)
  
  for game in pd.unique(df['GAME_ID']):
    for period in pd.unique(df.loc[df['GAME_ID']==game, 'PERIOD']):
      for team in ['HOME', 'VIS']:
        badlen = 0
        unevenlen = 0
        if len(lupbp[year, type, game, period, 1, team, 'on']) not in [4,5]:
          badlen = 1
        for stint in pd.unique(df.loc[(df['GAME_ID']==game) & (df['PERIOD']==period), 'STINT'])[1:]:
          if len(lupbp[year, type, game, period, stint, team, 'on']) != len(lupbp[year, type, game, period, stint-1, team, 'on']):
            unevenlen = 1
        if (badlen == 1) or (unevenlen == 1):
          print ''
          print year, game, team, 'q' + str(period)
          for stint in pd.unique(df.loc[(df['GAME_ID']==game) & (df['PERIOD']==period), 'STINT']):
            print stint, len(lupbp[year, type, game, period, stint, team, 'on']), lupbp[year, type, game, period, stint, team, 'on'] 
          for p in pd.unique(df.loc[df['GAME_ID']==game, 'PERIOD']):
            print 'q' + str(p), 'len stint 1: ' + str(len(lupbp[year, type, game, p, 1, team, 'on']))
コード例 #2
0
ファイル: luboxpkl.py プロジェクト: palpen/basketball_Qs
def lubox(year, type = 'reg', games = 'ALL', periods = 'ALL', datapath = ''):
  # Open pickle of lusubs for specified type and year
  with open('{}lupbp_{}_{}.pkl'.format(datapath, type, year), 'rb') as f:
    lubox = pickle.load(f)
  f.close()
  # Create pandas dataframe for specified year, type, games and periods
  df = lusubspkl.create_dfseg(year, type, games, periods, datapath)

  for game in pd.unique(df['GAME_ID']):
    for team in ['HOME', 'VIS']:
      teamid = team + '_ID'
      linew4 = 0
      for period in pd.unique(df.loc[df['GAME_ID']==game, 'PERIOD']):
        if len(lubox[year, type, game, period, 1, team, 'on']) == 4:
          linew4 = 1
      if linew4 ==1:
        print game
        hminp = {}
        for period in pd.unique(df.loc[df['GAME_ID']==game, 'PERIOD']):
          for stint in pd.unique(df.loc[(df['GAME_ID']==game) & (df['PERIOD']==period), 'STINT']):
            for player in lubox[year, type, game, period, stint, team, 'on']:
              if player in hminp.keys():             
                hminp[player] = hminp[player] + df.loc[df.loc[(df['GAME_ID']==game) & (df['PERIOD']==period) & (df['STINT']==stint)].index[0], 'STINT_TIME']      
              if player not in hminp.keys():
                hminp[player] = df.loc[df.loc[(df['GAME_ID']==game) & (df['PERIOD']==period) & (df['STINT']==stint)].index[0], 'STINT_TIME']
        
        with open('{}_{}'.format(year, game), 'r') as f:
          res = json.load(f)
        f.close()
        hminb = {}
        for player in res['resultSets'][0]['rowSet']:
          if isinstance(player[8], unicode):
            hminb[player[4]] = (60*int(player[8].split(':')[0])) + int(player[8].split(':')[1])
            
        hmindiff = {}
        for player in hminp.keys():
          hmindiff[player] = hminb[player] - hminp[player]
          
        for period in pd.unique(df.loc[df['GAME_ID']==game, 'PERIOD']):
          if len(lubox[year, type, game, period, 1, team, 'on']) == 4:
            for stint in pd.unique(df.loc[(df['GAME_ID']==game) & (df['PERIOD']==period), 'STINT']):
              for player in hmindiff.keys():
                if (hmindiff[player] > 200) & (player not in lubox[year, type, game, period, stint, team, 'list']):
                  lubox[year, type, game, period, stint, team, 'on'].append(player)
                  lubox[year, type, game, period, stint, team, 'on'].sort()
                  lubox[year, type, game, period, stint, team, 'list'].append(player)
                  lubox[year, type, game, period, stint, team, 'list'].sort()
                  
  with open('{}lubox_{}_{}.pkl'.format(datapath, type, year), 'wb') as f:
    pickle.dump(lubox, f, pickle.HIGHEST_PROTOCOL)    
  f.close()
コード例 #3
0
def lubox_len4(year, type = 'reg', games = 'ALL', periods = 'ALL', datapath = ''):
  # Open pickle of lusubs for specified type and year
  with open('{}lubox_{}_{}.pkl'.format(datapath, type, year), 'rb') as f:
    lupbp = pickle.load(f)
  f.close()
  # Create pandas dataframe for specified year, type, games and periods
  df = lusubspkl.create_dfseg(year, type, games, periods, datapath)
  
  for game in pd.unique(df['GAME_ID']):
    for team in ['HOME', 'VIS']:
      for period in pd.unique(df.loc[df['GAME_ID']==game, 'PERIOD']):
        if len(lupbp[year, type, game, period, 1, team, 'on']) == 4:
          print ''
          print year, game, team, 'q' + str(period)
          for stint in pd.unique(df.loc[(df['GAME_ID']==game) & (df['PERIOD']==period), 'STINT']):
            print stint, len(lupbp[year, type, game, period, stint, team, 'on']), lupbp[year, type, game, period, stint, team, 'on']
コード例 #4
0
def lupbp_mult4(year, type='reg', games='ALL', periods='ALL', datapath=''):
    # Open pickle of lusubs for specified type and year
    with open('{}lupbp_{}_{}.pkl'.format(datapath, type, year), 'rb') as f:
        lupbp = pickle.load(f)
    f.close()
    # Create pandas dataframe for specified year, type, games and periods
    df = lusubspkl.create_dfseg(year, type, games, periods, datapath)

    for game in pd.unique(df['GAME_ID']):
        for team in ['HOME', 'VIS']:
            count4 = 0
            for period in pd.unique(df.loc[df['GAME_ID'] == game, 'PERIOD']):
                if len(lupbp[year, type, game, period, 1, team, 'on']) == 4:
                    count4 = count4 + 1
            if count4 > 1:
                print ''
                print year, game, team
                for p in pd.unique(df.loc[df['GAME_ID'] == game, 'PERIOD']):
                    print 'q' + str(p), 'len stint 1: ' + str(
                        len(lupbp[year, type, game, p, 1, team, 'on']))
コード例 #5
0
ファイル: lupbppkl.py プロジェクト: palpen/basketball_Qs
def lupbp(year, type='reg', games='ALL', periods='ALL', datapath=''):
    # Open pickle of lusubs for specified type and year
    with open('{}lusubs_{}_{}.pkl'.format(datapath, type, year), 'rb') as f:
        lupbp = pickle.load(f)
    f.close()
    # Create pandas dataframe for specified year, type, games and periods
    df = lusubspkl.create_dfseg(year, type, games, periods, datapath)
    # Create vector of eventmsgactiontype for (technical) fouls that can be credited to players on bench, and removing rows with ejections, timeouts, substitutions and technical fouls (all of which can be credited to players off court)
    foulexc = [10, 11, 16, 19, 25]
    df = df[(df['EVENTMSGTYPE'] != 9) & (df['EVENTMSGTYPE'] != 11) &
            ((df['EVENTMSGTYPE'] != 6) |
             (~df['EVENTMSGACTIONTYPE'].isin(foulexc)))]
    # Addinng players in pbp but not in subs to each associated lineup
    for game in pd.unique(df['GAME_ID']):
        for period in pd.unique(df.loc[df['GAME_ID'] == game, 'PERIOD']):
            print game, period
            for stint in pd.unique(df.loc[(df['GAME_ID'] == game) &
                                          (df['PERIOD'] == period), 'STINT']):
                for team in ['HOME', 'VIS']:
                    for number in range(1, 4):
                        for player in pd.unique(df.loc[
                            (df['GAME_ID'] == game) & (df['PERIOD'] == period)
                                & (df['PLAYER{}_TEAM_ID'.format(number)] ==
                                   df['{}_ID'.format(team)]),
                                'PLAYER{}_ID'.format(number)]):
                            if (player not in lupbp[year, type, game, period,
                                                    stint, team,
                                                    'list']) & (player != 0):
                                lupbp[year, type, game, period, stint, team,
                                      'on'].append(player)
                                lupbp[year, type, game, period, stint, team,
                                      'list'].append(player)
                    # Sorting 'on' and 'list' lists
                    lupbp[year, type, game, period, stint, team, 'on'].sort()
                    lupbp[year, type, game, period, stint, team, 'list'].sort()
    # Saving pbp lineups as pickle
    with open('{}lupbp_{}_{}.pkl'.format(datapath, type, year), 'wb') as f:
        pickle.dump(lupbp, f, pickle.HIGHEST_PROTOCOL)
    f.close()
コード例 #6
0
def lubox_checkpt(year, type='reg', games='ALL', periods='ALL', datapath=''):
    #open constructed lineups pickle, and create pandas dataframe using lusubspkl and pbp_stint csv
    with open('{}lubox_{}_{}.pkl'.format(datapath, type, year), 'rb') as f:
        lubox = pickle.load(f)
    f.close()
    df = lusubspkl.create_dfseg(year, type, games, periods, datapath)

    #Create variables for the max difference in minutes and the associated game, team and player
    maxdiff = 0
    maxdiffgame = 0
    maxdiffteam = 0
    maxdiffplayer = 0
    for game in pd.unique(df['GAME_ID']):
        #print maxdiff
        #get minutes from box score for each player
        season = str(year) + '-' + str(year + 1)
        url = 'http://stats.nba.com/stats/boxscoretraditionalv2?EndPeriod=10&EndRange=50000&GameID=00{}&RangeType=0&Season={}&SeasonType=Regular+Season&StartPeriod=1&StartRange=0'.format(
            game, season)
        res = requests.get(url, headers={'USER-AGENT': 'u_a'})
        minb = {}
        for player in res.json()['resultSets'][0]['rowSet']:
            minb[player[4]] = 0
            if isinstance(player[8], unicode):
                minb[player[4]] = (60 * int(player[8].split(':')[0])) + int(
                    player[8].split(':')[1])

        for team in ['HOME', 'VIS']:
            #print game, team
            #check if gameXteam has lineups of bad length (length > 5) by checking the length of the last stint in each period (all 11 lineups of bad length have bad length in the final stint of the bad period)
            badlen = 0
            for period in pd.unique(df.loc[df['GAME_ID'] == game, 'PERIOD']):
                if len(lubox[year, type, game, period,
                             pd.unique(df.loc[(df['GAME_ID'] == game) &
                                              (df['PERIOD'] == period),
                                              'STINT'])[-1], team, 'on']) > 5:
                    badlen = 1

            #Compare minutes in gameXteam if all stints are not of bad length
            if badlen == 0:
                #Calculate minutes of players in constructed lineups
                minp = {}
                for period in pd.unique(df.loc[df['GAME_ID'] == game,
                                               'PERIOD']):
                    for stint in pd.unique(df.loc[(df['GAME_ID'] == game) &
                                                  (df['PERIOD'] == period),
                                                  'STINT']):
                        for player in lubox[year, type, game, period, stint,
                                            team, 'on']:
                            if player in minp.keys():
                                minp[player] = minp[player] + df.loc[
                                    df.loc[(df['GAME_ID'] == game)
                                           & (df['PERIOD'] == period) &
                                           (df['STINT'] == stint)].index[0],
                                    'STINT_TIME']
                            if player not in minp.keys():
                                minp[player] = df.loc[
                                    df.loc[(df['GAME_ID'] == game)
                                           & (df['PERIOD'] == period) &
                                           (df['STINT'] == stint)].index[0],
                                    'STINT_TIME']

                #For each player, calulate the absolute value of difference between pbp and box minutes. If above threshold, print details. In some cases, player ids in play-by-play not in box score. Create exception, and print relevant details.
                abovethresh = 0
                exceptions = []
                try:
                    for player in minp.keys():
                        if abs(minp[player] - minb[player]) > thresh:
                            abovethresh = 1
                        if abs(minp[player] - minb[player]) > maxdiff:
                            maxdiff = abs(minp[player] - minb[player])
                            maxdiffgame = game
                            maxdiffteam = team
                            maxdiffplayer = player
                    if abovethresh == 1:
                        print ''
                        print year, game, team
                        for player in minp.keys():
                            print player, 'box: ' + str(
                                minb[player]), 'pbp: ' + str(
                                    minp[player]), 'diff: ' + str(
                                        abs(minb[player] - minp[player]))
                except KeyError:
                    print ''
                    print ''
                    print 'Exception: ', year, game, team, 'p' + str(player)
                    print ''
                    exceptions.append(game)

    #Print maxdiff stuff
    print ''
    print 'MaxDiff:', year, maxdiffgame, maxdiffteam, maxdiffplayer
    print 'Exeption Game IDs: ', exceptions