def read_shots(dirname, game_code, quarter, players_data, events_data): # Read the pbp document pbp_filename = 'NBA_FINALPBP_EXP${}.XML'.format(game_code) document = ElementTree.parse(os.path.join(dirname, pbp_filename)) sports_statistics = document.getroot() sports_scores = next(c for c in sports_statistics if c.tag == 'sports-scores') nba_scores = next(c for c in sports_scores if c.tag == 'nba-scores') nba_pbp = next(c for c in nba_scores if c.tag == 'nba-playbyplay') # Read the box document box_filename = 'NBA_FINALBOX_OPTICAL${}.XML'.format(game_code) document_box = ElementTree.parse(os.path.join(dirname, box_filename)) sports_statistics_box = document_box.getroot() sports_boxscores = next(c for c in sports_statistics_box if c.tag == 'sports-boxscores') nba_boxscores = next(c for c in sports_boxscores if c.tag == 'nba-boxscores') nba_boxscore = next(c for c in nba_boxscores if c.tag == 'nba-boxscore') # Compile all the data shots_result = [] # For every play in the file for play in [c for c in nba_pbp if c.tag == 'play']: # Only look at the requested quarter if int(play.attrib['quarter']) != quarter: continue # Only look at shots event_id = int(play.attrib['event-id']) if event_id != PlayTypes.FIELD_GOAL_MADE and event_id != PlayTypes.FIELD_GOAL_MISSED: continue # Compute the time and link to the events time = float(play.attrib['time-minutes']) * 60 + float( play.attrib['time-seconds']) event_index = np.argmin(np.abs(events_data['game clock'] - time)) event = events_data[event_index] # Cross reference to find the shot in the shots file best_shot_from_file = None best_shot_from_file_dist = 100000000.0 for players_team in [c for c in nba_boxscore if c.tag == 'players']: for player in [c for c in players_team if c.tag == 'player']: try: shot_log = next(c for c in player if c.tag == 'shot-log') except StopIteration: continue for shot_from_file in [c for c in shot_log if c.tag == 'shot']: shot_from_file_time = float(shot_from_file.attrib[ 'game-clock'].split(':')[0]) * 60 + float( shot_from_file.attrib['game-clock'].split(':')[1]) shot_from_file_dist = abs(time - shot_from_file_time) if shot_from_file_dist < best_shot_from_file_dist: best_shot_from_file_dist = shot_from_file_dist best_shot_from_file = shot_from_file if best_shot_from_file is None: print("Can't find shot for event at time {}".format( events_data['game clock'])) continue dribbles = int(best_shot_from_file.attrib['dribbles']) defender_dist = float( next(c for c in best_shot_from_file if c.tag == 'closest-defender').attrib['defender-distance']) if len(best_shot_from_file.attrib['x-coordinate']) > 0 and \ len(best_shot_from_file.attrib['y-coordinate']) > 0: position = [ float(best_shot_from_file.attrib['x-coordinate']), float(best_shot_from_file.attrib['y-coordinate']) ] else: position = [float('nan'), float('nan')] if len(best_shot_from_file.attrib['shot-distance']): shot_dist = float(best_shot_from_file.attrib['shot-distance']) else: shot_dist = 10000 # Record the shot details description = play.attrib['detail-description'] shooter_id = int(play.attrib['global-player-id-1']) shooter = players_data[shooter_id] new_shot = Shot(event, position, description, shooter) new_shot.result = 1 if best_shot_from_file.attrib[ 'result'] == 'made' else 0 new_shot.points = int(best_shot_from_file.attrib['points-type']) # Compute shot quality new_shot.calculate_shot_quality(dribbles, defender_dist, shot_dist) shots_result.append(new_shot) return shots_result