def _update_players_teams(): global players_data players = {} sample = db['match_headers'].find().sort([('date', pymongo.ASCENDING)]) for match_header in tqdm.tqdm(sample, total=sample.count()): home = match_header['home'] home_id = get_value(teams_data, 'whoscoredName', home, 'whoscoredId') away = match_header['away'] away_id = get_value(teams_data, 'whoscoredName', away, 'whoscoredId') additional_info = db['additional_info'].find_one( {'match_uuid': match_header['uuid']}) if 'homePlayers' in additional_info: for player_data in additional_info['homePlayers']: player_id = player_data['playerId'] players[player_id] = (home_id, home) if 'awayPlayers' in additional_info: for player_data in additional_info['awayPlayers']: player_id = player_data['playerId'] players[player_id] = (away_id, away) for player_id in players: players_data.loc[player_id, 'whoscoredId'] = players[player_id][0] players_data.loc[player_id, 'whoscoredName'] = players[player_id][1] save_players_data()
def get_match_uuid_by_intelbet_match(intelbet_match): date_ = dateize(intelbet_match['date']) home = get_value(teams_data, 'intelbetName', intelbet_match['home'], 'whoscoredName') away = get_value(teams_data, 'intelbetName', intelbet_match['away'], 'whoscoredName') if home is None or away is None: return None return get_match_header_by_date_and_teams(date_, home, away)
def _print_unmatched_player_names(match_header, is_home): content = '' if is_home: intelbet_player_names = intelbet_player_names_df.loc[ intelbet_player_names_df['whoscoredName'] == match_header['home'], 'intelbetPlayerName' ].tolist() unmatched_whoscored_names = players_data.loc[ (players_data['whoscoredName'] == match_header['home']) & (players_data['intelbetPlayerName'].isnull()), 'whoscoredPlayerName' ].tolist() team_players_data = players_data[ players_data['whoscoredName'] == match_header['home'] ] else: intelbet_player_names = intelbet_player_names_df.loc[ intelbet_player_names_df['whoscoredName'] == match_header['away'], 'intelbetPlayerName' ].tolist() unmatched_whoscored_names = players_data.loc[ (players_data['whoscoredName'] == match_header['away']) & (players_data['intelbetPlayerName'].isnull()), 'whoscoredPlayerName' ].tolist() team_players_data = players_data[ players_data['whoscoredName'] == match_header['away'] ] if len(intelbet_player_names) > 0: content += '<form action="/match_player_names" method="post">' for intelbet_player_name in sorted(intelbet_player_names): whoscored_player_name = get_value(team_players_data, 'intelbetPlayerName', intelbet_player_name, 'whoscoredPlayerName') if whoscored_player_name is not None and whoscored_player_name != '': continue content += intelbet_player_name content += ' ' content += '<select name="player_%s">' % (intelbet_player_name,) content += '<option value="" selected="selected"></option>' for unmatched_whoscored_name in unmatched_whoscored_names: escaped_unmatched_whoscored_name = re.sub('"', '\\"', unmatched_whoscored_name) content += '<option value="%s">%s</option>' % (escaped_unmatched_whoscored_name, unmatched_whoscored_name) content += '</select>' content += '<br>' content += '<input type="submit" value="Задать соответствия">' content += '</form>' return content
def _get_additional_info_of_intelbet_match(intelbet_match): additional_info = {} if 'homePlayerNames' in intelbet_match: additional_info['homePlayers'] = [] home = get_value(teams_data, 'intelbetName', intelbet_match['home'], 'whoscoredName') home_players_data = players_data[ players_data['whoscoredName'] == home ] for intelbet_player_name in intelbet_match['homePlayerNames']: player_name = get_value(home_players_data, 'intelbetPlayerName', intelbet_player_name, 'whoscoredPlayerName') if player_name is None: print('Unknown player: %s' % (intelbet_player_name,)) intelbet_player_names_add(intelbet_player_name, home) continue player_id = get_value(home_players_data, 'intelbetPlayerName', intelbet_player_name, 'whoscoredPlayerId') # FIXME: Из-за переименования команд, некоторые игроки имеют имя команды, но ее идентификатор определить не удается. # Тогда в колонке 'whoscoredPlayerId' появляется '' (после парсинга - `None`), а ее тип становится `np.float64`. Это - костыль от этого player_id = int(player_id) if player_id is not None else None additional_info['homePlayers'].append({ 'playerId': player_id, 'playerName': player_name, 'isFirstEleven': True }) if 'awayPlayerNames' in intelbet_match: additional_info['awayPlayers'] = [] away = get_value(teams_data, 'intelbetName', intelbet_match['away'], 'whoscoredName') away_players_data = players_data[ players_data['whoscoredName'] == away ] for intelbet_player_name in intelbet_match['awayPlayerNames']: player_name = get_value(away_players_data, 'intelbetPlayerName', intelbet_player_name, 'whoscoredPlayerName') if player_name is None: print('Unknown player: %s' % (intelbet_player_name,)) intelbet_player_names_add(intelbet_player_name, away) continue # FIXME: Из-за переименования команд, некоторые игроки имеют имя команды, но ее идентификатор определить не удается. # Тогда в колонке 'whoscoredPlayerId' появляется '' (после парсинга - `None`), а ее тип становится `np.float64`. Это - костыль от этого player_id = int(player_id) if player_id is not None else None additional_info['awayPlayers'].append({ 'playerId': player_id, 'playerName': player_name, 'isFirstEleven': True }) return additional_info
def _create_player_if_neccessary(whoscored_player_id, whoscored_player_name, team): if whoscored_player_id not in players_data['whoscoredPlayerId'].values: print('Creating player %u (%s)...' % (whoscored_player_id, whoscored_player_name)) players_data.loc[whoscored_player_id] = pd.Series({ 'whoscoredPlayerId': whoscored_player_id, 'whoscoredPlayerName': whoscored_player_name, 'intelbetPlayerName': None, 'whoscoredName': team, 'whoscoredId': get_value(teams_data, 'whoscoredName', team, 'whoscoredId') }) save_players_data()
def _fit(self, match_header, **kwargs): statistic = self.previous_fitter.statistic.copy() if statistic.shape[0] == 0: self.statistic = statistic return self.tournament_id = match_header['tournamentId'] transformed_statistic = statistic[statistic['tournament_id'] == self.tournament_id] self.statistic = transformed_statistic.copy() get_logger('prediction').info( 'Отобраны заголовки матчей, произошедших в рамках турнира %s (%u): %u штук', get_value(tournaments_data, 'whoscoredTournamentId', self.tournament_id, 'whoscoredTournamentName'), self.tournament_id, self.statistic.shape[0])
def index(): match_headers_collection = db['match_headers'] match_headers = get_match_headers() today = datetime.date.today() first_date = today - datetime.timedelta(days=90) last_date = today date_range = pd.date_range(first_date, last_date, freq='D') content = '' content += '<h2>Матчи</h2>' for date_ in date_range: date_str = date_.strftime('%Y-%m-%d') content += '<h3>' + date_str + '</h3>' whoscored_tournament_ids = set(tournaments_data['whoscoredTournamentId'].values) for whoscored_tournament_id in whoscored_tournament_ids: this_date_tournament_match_headers = match_headers[ (match_headers['date'] == date_) & (match_headers['tournament_id'] == whoscored_tournament_id) ] if this_date_tournament_match_headers.shape[0] == 0: continue betcity_tournament_name = get_value(tournaments_data, 'whoscoredTournamentId', whoscored_tournament_id, 'betcityTournamentName') content += '<h4>' + betcity_tournament_name + '</h4>' content += '<table>' content += '<tbody>' for (match_uuid, match_header) in this_date_tournament_match_headers.iterrows(): match_href = '/matches/%s' % (match_uuid,) content += '<tr>' content += '<td>' + date_str + '</td>' content += '<td><a href="' + match_href + '">' + match_uuid + '</a></td>' content += '<td><a href="' + match_href + '">' + match_header['home'] + '</a></td>' content += '<td><a href="' + match_href + '">' + match_header['away'] + '</a></td>' content += '</tr>' content += '</tbody>' content += '</table>' return { 'content': content }