def get_team_id(xml_file): ''' Get the id of teams playing ''' game_df, event_df, q_df = parse_xml_file(xml_file) q_id_with_players = [7, 30, 53, 194, 281] teams = {} for i in q_id_with_players: df_chunk = q_df[q_df['qualifier_id'] == str(i)] for row in df_chunk.iterrows(): id_event = row[1]['id_event'] value = row[1]['value'].split(', ') #print(id_event,value) id_team = event_df_test[event_df_test['own_id'] == id_event].team_id.values[0] teams[id_team] = teams.get(id_team, []) + value team_dic = {} for t in range(2): teams[t] = set(teams[str(t)]) team, val = get_team(teams[t]) if val > 1: team_dic[str(t)] = team #print(team_name[team]) return team_dic
def player_team_id(xml_file): ''' Get the id of the player of interest ''' team_playing = get_team_id(test_file) game_df, event_df, q_df = parse_xml_file(xml_file) team_id = event_df[event_df['player_id'] == '1'].team_id.unique()[0] return team_playing.get(team_id, 'None')
def filtered_indices_team(xml_file): ''' Put all three previous functions all together. From a xml file, one get, if available, the list of indices to filter on when predicted the player identity by restrecting on the active player of a specific team ''' game_df, event_df, q_df = parse_xml_file(xml_file) q_id_with_players = [7, 30, 53, 194, 281] teams = {} for i in q_id_with_players: df_chunk = q_df[q_df['qualifier_id'] == str(i)] for row in df_chunk.iterrows(): id_event = row[1]['id_event'] value = row[1]['value'].split(', ') #print(id_event,value) id_team = event_df[event_df['own_id'] == id_event].team_id.values[0] teams[id_team] = teams.get(id_team, []) + value team_dic = {} for t in range(2): teams[t] = set(teams.get(str(t), [])) team, val = get_team(teams[t]) if val > 1: team_dic[str(t)] = team game_df, event_df, q_df = parse_xml_file(xml_file) team_id = event_df[event_df['player_id'] == '1'].team_id.unique()[0] player_team = team_dic.get(team_id, 'None') if player_team == 'None': return [i for i in range(230)] else: idx_to_filter = [ player_to_idx[i] for i in dic_team_active[player_team] ] return idx_to_filter
def get_feature_vector(file): ''' Extract the final feature vector. Which is the concatenation of the team and player feature vectors ''' game_df, event_df, q_df = parse_xml_file(file) event_df['min']= event_df['min'].astype(int) event_df['x']= event_df['x'].astype(float) event_df['y']= event_df['y'].astype(float) event_df['period_id']= event_df['period_id'].astype(int) team = event_df[event_df['player_id'] == '1']['team_id'].unique() #print(str(team[0])) vec_player = get_features_vector_player(q_df, event_df.loc[event_df.index[:-10],:],'1') vec = get_features_vector(q_df, event_df.loc[event_df.index[:-10],:]) #print(vec[int(team[0])].shape, vec_player.shape) return np.concatenate([vec[int(team[0])],vec_player])
def get_array_file_position(xml_file): game_df, event_df, q_df = parse_xml_file(xml_file) event_df['min']= event_df['min'].astype(int) event_df['x']= event_df['x'].astype(float) event_df['y']= event_df['y'].astype(float) event_df['team_id']= event_df['team_id'].astype(int) #maxx = event_df['x'].max() #maxy = event_df['y'].max() df_chunk = event_df.loc[event_df.index[-10:]] # Get output df_chunk = df_chunk[['team_id','x', 'y','type_id']] batch_input = np.zeros((10,53)) for i,val in enumerate(df_chunk.type_id.values) : batch_input[i,event_to_idx[val]] = 1 batch_input[:,-3] = df_chunk.x.values batch_input[:,-2] = df_chunk.y.values batch_input[:,-1] = df_chunk.team_id.values return batch_input[np.newaxis,:,:]
def get_array_file(file): ''' Construct the feature vector out of a 15 min xml file, that will be feed into the neural networks for the prediction of the next team. This feature vectors basically consists in the aggregation for both team of all different event occuring (related to one team at a time) and the pourcentage of positive outcome for each event Alongside this vector, we return as well the a (10,3) array that gather information on the position (x,y), the id of the team as well as the type_id of the past 10 events.) ''' batch_input = [] batch_vec = [] game_df, event_df, q_df = parse_xml_file(file) home_team_id, away_team_id = game_df['home_team_id'][0], game_df['away_team_id'][0] #print('ID',home_team_id, away_team_id) #event_df['team_id'] = event_df['team_id'].map({home_team_id: '1', away_team_id:'0'}) event_df['min']= event_df['min'].astype(int) event_df['x']= event_df['x'].astype(float) event_df['y']= event_df['y'].astype(float) event_df['period_id']= event_df['period_id'].astype(int) maxx = event_df['x'].max() maxy = event_df['y'].max() event_df = event_df[event_df['min'].between(1,90)] # Select the 300 events before the 10 anonymised event past_df = event_df.loc[event_df.index[:-10]] past_df['outcome'] = past_df['outcome'].astype(int) # Select the 10 last ananumised event df_chunk = event_df.loc[event_df.index[-10:]] # We're gonna collect the two vectors of the past events aggregated by team vec = [] for team in range(2) : # Select the concerned team df_event_chunk = past_df[past_df['team_id']==str(team)] #print(past_df.shape,df_event_chunk.shape, past_df.team_id.unique()) #Get occurances of each event type dic_event = dict(Counter(df_event_chunk.type_id)) #print(dic_event) # List of keys list_keys_event = list(dic_event.keys()) vec_e = np.zeros(50*2) for feature in list_keys_event: mapped = event_to_idx.get(feature,None) if mapped != None: l = list(df_event_chunk[df_event_chunk['type_id']==feature].outcome) if l != []: mean = np.round(np.mean(l),3) vec_e[mapped*2+1] = mean #print(mean) vec_e[mapped*2] = dic_event[feature] else : pass vec.append(vec_e) # Vec of event with outcome ratio vec = np.concatenate(vec, axis=0) batch_vec.append(vec[np.newaxis,:]) df_chunk = df_chunk[['team_id','x', 'y','type_id']] batch_input_vec = np.array(df_chunk) batch_input_vec[:,1] /= maxx batch_input_vec[:,2] /= maxy batch_input.append(batch_input_vec[np.newaxis,:,:]) return np.concatenate(batch_vec, axis=0), np.concatenate(batch_input, axis=0)