Ejemplo n.º 1
0
def get_team_id(xml_file):
    ''' Get the id of teams playing '''

    game_df, event_df, q_df = parse_xml_file(xml_file)

    q_id_with_players = [7, 30, 53, 194, 281]
    teams = {}

    for i in q_id_with_players:
        df_chunk = q_df[q_df['qualifier_id'] == str(i)]
        for row in df_chunk.iterrows():
            id_event = row[1]['id_event']
            value = row[1]['value'].split(', ')
            #print(id_event,value)
            id_team = event_df_test[event_df_test['own_id'] ==
                                    id_event].team_id.values[0]
            teams[id_team] = teams.get(id_team, []) + value

    team_dic = {}
    for t in range(2):
        teams[t] = set(teams[str(t)])
        team, val = get_team(teams[t])
        if val > 1:
            team_dic[str(t)] = team
        #print(team_name[team])
    return team_dic
Ejemplo n.º 2
0
def player_team_id(xml_file):
    ''' Get the id of the player of interest '''

    team_playing = get_team_id(test_file)

    game_df, event_df, q_df = parse_xml_file(xml_file)
    team_id = event_df[event_df['player_id'] == '1'].team_id.unique()[0]
    return team_playing.get(team_id, 'None')
Ejemplo n.º 3
0
def filtered_indices_team(xml_file):
    ''' Put all three previous functions all together. From a xml file, 
    one get, if available, the list of indices to filter on when predicted the
    player identity by restrecting on the active player of a specific team '''

    game_df, event_df, q_df = parse_xml_file(xml_file)

    q_id_with_players = [7, 30, 53, 194, 281]
    teams = {}

    for i in q_id_with_players:
        df_chunk = q_df[q_df['qualifier_id'] == str(i)]
        for row in df_chunk.iterrows():
            id_event = row[1]['id_event']
            value = row[1]['value'].split(', ')
            #print(id_event,value)
            id_team = event_df[event_df['own_id'] ==
                               id_event].team_id.values[0]
            teams[id_team] = teams.get(id_team, []) + value

    team_dic = {}
    for t in range(2):
        teams[t] = set(teams.get(str(t), []))
        team, val = get_team(teams[t])
        if val > 1:
            team_dic[str(t)] = team

    game_df, event_df, q_df = parse_xml_file(xml_file)
    team_id = event_df[event_df['player_id'] == '1'].team_id.unique()[0]
    player_team = team_dic.get(team_id, 'None')

    if player_team == 'None':
        return [i for i in range(230)]

    else:
        idx_to_filter = [
            player_to_idx[i] for i in dic_team_active[player_team]
        ]
        return idx_to_filter
Ejemplo n.º 4
0
def get_feature_vector(file):

    ''' Extract the final feature vector. Which is the concatenation of
    the team and player feature vectors '''
    
    game_df, event_df, q_df = parse_xml_file(file)

    event_df['min']= event_df['min'].astype(int)
    event_df['x']= event_df['x'].astype(float)
    event_df['y']= event_df['y'].astype(float)
    event_df['period_id']= event_df['period_id'].astype(int)
    
    team = event_df[event_df['player_id'] == '1']['team_id'].unique()
    #print(str(team[0]))
    vec_player = get_features_vector_player(q_df, event_df.loc[event_df.index[:-10],:],'1')
    vec = get_features_vector(q_df, event_df.loc[event_df.index[:-10],:])
    #print(vec[int(team[0])].shape, vec_player.shape)
    return np.concatenate([vec[int(team[0])],vec_player])
Ejemplo n.º 5
0
def get_array_file_position(xml_file):
    
    game_df, event_df, q_df = parse_xml_file(xml_file)
    event_df['min']= event_df['min'].astype(int)
    event_df['x']= event_df['x'].astype(float)
    event_df['y']= event_df['y'].astype(float)
    event_df['team_id']= event_df['team_id'].astype(int)
    #maxx = event_df['x'].max()
    #maxy = event_df['y'].max()
    
    df_chunk = event_df.loc[event_df.index[-10:]]
    # Get output
    df_chunk = df_chunk[['team_id','x', 'y','type_id']]
    
    batch_input = np.zeros((10,53))
    for i,val in enumerate(df_chunk.type_id.values) :
        batch_input[i,event_to_idx[val]] = 1
    batch_input[:,-3] = df_chunk.x.values
    batch_input[:,-2] = df_chunk.y.values
    batch_input[:,-1] = df_chunk.team_id.values

    return batch_input[np.newaxis,:,:]
Ejemplo n.º 6
0
def get_array_file(file):

    ''' Construct the feature vector out of a 15 min xml file, that will be feed into 
    the neural networks for the prediction of the next team.

    This feature vectors basically consists in the aggregation for both team of all different 
    event occuring (related to one team at a time) and the pourcentage of positive outcome for 
    each event 

    Alongside this vector, we return as well the a (10,3) array that gather information on the position 
    (x,y), the id of the team as well as the type_id of the past 10 events.) '''
    
    batch_input = []
    batch_vec = []

    game_df, event_df, q_df = parse_xml_file(file)
    
    home_team_id, away_team_id = game_df['home_team_id'][0], game_df['away_team_id'][0]
    #print('ID',home_team_id, away_team_id)
    #event_df['team_id'] = event_df['team_id'].map({home_team_id: '1', away_team_id:'0'})
    event_df['min']= event_df['min'].astype(int)
    event_df['x']= event_df['x'].astype(float)
    event_df['y']= event_df['y'].astype(float)
    event_df['period_id']= event_df['period_id'].astype(int)

    maxx = event_df['x'].max()
    maxy = event_df['y'].max()

    event_df = event_df[event_df['min'].between(1,90)]


    # Select the 300 events before the 10 anonymised event
    past_df = event_df.loc[event_df.index[:-10]]
    past_df['outcome'] = past_df['outcome'].astype(int)

    # Select the 10 last ananumised event
    df_chunk = event_df.loc[event_df.index[-10:]]

    # We're gonna collect the two vectors of the past events aggregated by team 
    vec = []
    for team in range(2) :
        # Select the concerned team
        df_event_chunk = past_df[past_df['team_id']==str(team)]
        #print(past_df.shape,df_event_chunk.shape, past_df.team_id.unique())
        #Get occurances of each event type
        dic_event = dict(Counter(df_event_chunk.type_id))
        #print(dic_event)
        # List of keys
        list_keys_event = list(dic_event.keys())
        vec_e = np.zeros(50*2)

        for feature in list_keys_event:
            mapped = event_to_idx.get(feature,None)
            if mapped != None:
                l = list(df_event_chunk[df_event_chunk['type_id']==feature].outcome)

                if l != []:
                    mean = np.round(np.mean(l),3)
                    vec_e[mapped*2+1] = mean
                    #print(mean)
                vec_e[mapped*2] = dic_event[feature]
            else : 
                pass
        vec.append(vec_e)
    # Vec of event with outcome ratio
    vec = np.concatenate(vec, axis=0)

    batch_vec.append(vec[np.newaxis,:])

    df_chunk = df_chunk[['team_id','x', 'y','type_id']]
    batch_input_vec = np.array(df_chunk)
    batch_input_vec[:,1] /= maxx
    batch_input_vec[:,2] /= maxy
    batch_input.append(batch_input_vec[np.newaxis,:,:])
        
    return np.concatenate(batch_vec, axis=0), np.concatenate(batch_input, axis=0)