def read_matches_by_away_team(team_api_id, season=None): """ Read matches of the team identified by team_api_id, when it plays AWAY :param team_api_id: :param season: :return: """ match_list = [] filter = {"away_team_api_id": team_api_id} if season: filter["season"] = season else: season = "" try: return Cache.get_element(str(team_api_id) + "_" + season, "MATCH_AWAY") except KeyError: pass for sqllite_row in SQLLite.get_connection().select("Match", **filter): match = Match(sqllite_row["id"]) for attribute, value in sqllite_row.items(): match.__setattr__(attribute, value) match_list.append(match) Cache.add_element( str(team_api_id) + "_" + season, match_list, "MATCH_AWAY") return match_list
def read_matches_by_league(league_id, season=None, only_stages=True): """ return matches played in the league_id, in a specified season if required Only stages allow to filter-out dirty data :param league_id: :param season: :param only_stages: :return: """ match_list = [] filter = {"league_id": league_id} if season: filter["season"] = season else: season = "" try: return Cache.get_element( str(league_id) + "_" + season, "MATCH_BY_LEAGUE") except KeyError: pass for sqllite_row in SQLLite.get_connection().select("Match", **filter): match = Match(sqllite_row["id"]) for attribute, value in sqllite_row.items(): match.__setattr__(attribute, value) if only_stages and type(match.stage) != int: continue match_list.append(match) Cache.add_element( str(league_id) + "_" + season, match_list, "MATCH_BY_LEAGUE") return match_list
def get_league_on_page(self): """ get information of the league :return: """ try: return Cache.get_element(self.league_data_stage, "CRAWL_LEAGUE_PAGE") except KeyError: pass if not self.soup: page = requests.get(self.link_league_to_check).text self.soup = BeautifulSoup(page, "html.parser") div_league = self.soup.find('div', {'class': 'mx-dropdown-container mx-flexbox mx-float-left mx-template-dropdown'}) league_name = str(div_league.span.string).strip() leagues = League.read_by_name(league_name, like=True) league = None if len(leagues) == 0: # No league found, also with the name in the web page log.warning("No league found, also with the name in the web page ["+self.league_data_stage+"]") elif len(leagues) == 1: league = leagues[0] else: # too many leagues found log.warning("Too many leagues found [" + self.league_data_stage + "]") Cache.add_element(self.league_data_stage, league, "CRAWL_LEAGUE_PAGE") return league
def read_by_player_api_id(player_api_id, only_stages=True): """ Read the matches played by the input-player :param player_api_id: :param only_stages: :return: """ try: return Cache.get_element(player_api_id, "MATCH_BY_PLAYER_API_ID") except KeyError: pass home_player_i = 'home_player_' away_player_i = 'away_player_' or_filter = {} for i in range(11): or_filter[home_player_i + str(i + 1)] = player_api_id or_filter[away_player_i + str(i + 1)] = player_api_id match_list = [] for sqllite_row in SQLLite.get_connection().select_or( "Match", **or_filter): match = Match(sqllite_row["id"]) for attribute, value in sqllite_row.items(): match.__setattr__(attribute, value) if only_stages and type(match.stage) != int: continue match_list.append(match) Cache.add_element(player_api_id, match_list, "MATCH_BY_PLAYER_API_ID") return match_list
def get_current_team(self): """ get the current team where this player has been playing :return: """ import src.application.Domain.Team as Team try: return Cache.get_element(self.id, "PLAYER_CURRENT_TEAM") except KeyError: pass matches = self.get_matches() current_team = None if len(matches) > 0: last_match = sorted(matches, key=lambda match: match.date)[-1] home_player_i = 'home_player_' away_player_i = 'away_player_' for i in range(11): if last_match.__getattribute__(home_player_i + str(i + 1)) == self.player_api_id: current_team = Team.read_by_team_api_id( last_match.home_team_api_id) break if last_match.__getattribute__(away_player_i + str(i + 1)) == self.player_api_id: current_team = Team.read_by_team_api_id( last_match.away_team_api_id) break Cache.add_element(self.id, current_team, "PLAYER_CURRENT_TEAM") return current_team
def read_by_fifa_api_id(player_fifa_api_id): """ Read a player by its team_fifa_api_id :param player_fifa_api_id: :return: """ try: return Cache.get_element(player_fifa_api_id, "PLAYER_BY_FIFA_API_ID") except KeyError: pass filter = {"player_fifa_api_id": player_fifa_api_id} try: sqllite_row = SQLLite.get_connection().select("Player", **filter)[0] except IndexError: return None player = Player(sqllite_row["id"]) for attribute, value in sqllite_row.items(): player.__setattr__(attribute, value) Cache.add_element(player.player_fifa_api_id, player, "PLAYER_BY_FIFA_API_ID") Cache.add_element(player.player_api_id, player, "PLAYER_BY_API_ID") Cache.add_element(player.player_name, player, "PLAYER_BY_NAME") Cache.add_element(player.id, player, "PLAYER_BY_ID") return player
def getColumnFromTable(self, table_name): try: return Cache.get_element(table_name, "SQLLITE_COLUMN_TABLE") except KeyError: pass columns = [] for r in self.cursor.execute("PRAGMA table_info(" + table_name + ");"): columns.append(r[1]) Cache.add_element(table_name, columns, "SQLLITE_COLUMN_TABLE") return columns
def read_players_api_id_by_team_api_id(team_api_id, season=None): """ return a list of player_api_id if season is set, consider only that list :param team_api_id: :param season: :return: """ players_api_id = set() filter = {} if season: filter["season"] = season else: season = "" try: return Cache.get_element( str(team_api_id) + "_" + season, "MATCH_GET_PLAYERS_BY_TEAM_API_ID") except KeyError: pass filter["home_team_api_id"] = team_api_id for sqllite_row in SQLLite.get_connection().select( "Match", column_filter="home_player_1, home_player_2, " "home_player_3, home_player_4, " "home_player_5, home_player_6, " "home_player_7, home_player_8, " "home_player_9, home_player_10, " "home_player_11", **filter): for home_player_i, player_api_id in sqllite_row.items(): if player_api_id: players_api_id.add(player_api_id) del (filter["home_team_api_id"]) filter["away_team_api_id"] = team_api_id for sqllite_row in SQLLite.get_connection().select( "Match", column_filter="away_player_1, away_player_2, " "away_player_3, away_player_4, " "away_player_5, away_player_6, " "away_player_7, away_player_8, " "away_player_9, away_player_10, " "away_player_11", **filter): for away_player_i, player_api_id in sqllite_row.items(): if not util.is_None(player_api_id): players_api_id.add(player_api_id) Cache.add_element( str(team_api_id) + "_" + season, players_api_id, "MATCH_GET_PLAYERS_BY_TEAM_API_ID") return players_api_id
def write_player_attributes(player, player_attributes, date=util.get_today_date()+" 00:00:00"): """ Write a new player attribute in the DB :param player: :param player_attributes: :param date: :return: """ log.debug("write_player_attributes of player_fifa_api_id = [" + str(player.player_fifa_api_id) + "]") player_attributes["player_fifa_api_id"] = player.player_fifa_api_id player_attributes["player_api_id"] = player.player_api_id player_attributes["date"] = date SQLLite.get_connection().insert("Player_Attributes", player_attributes) Cache.del_element(player.player_fifa_api_id, "PLAYER_ATTRIBUTES")
def get_predictor(ml_alg_framework="my_poisson", ml_alg_method="SVM", ml_train_input_id=5, ml_train_input_representation=1, ml_train_stages_to_train=19, update_current_predictor=True): global current_predictor key = "" key += ml_alg_framework+"_" key += ml_alg_method+"_" key += str(ml_train_input_id)+"_" key += str(ml_train_input_representation)+"_" key += str(ml_train_stages_to_train) try: return Cache.get_element(key, "PREDICTOR_BY_KEY") except KeyError: pass predictor = Predictor(ml_alg_framework, ml_alg_method, ml_train_input_id, ml_train_input_representation, ml_train_stages_to_train) if update_current_predictor: current_predictor = predictor return predictor
def get_seasons(self): """ Return the stored seasons of this league :return: """ try: return Cache.get_element(self.id, "SEASONS_BY_LEAGUE") except KeyError: pass seasons = [] query = "SELECT distinct(season) FROM Match WHERE league_id='" + str( self.id) + "'" for sqllite_row in SQLLite.get_connection().execute_select(query): seasons.append(sqllite_row[0]) Cache.add_element(self.id, seasons, "SEASONS_BY_LEAGUE") return seasons
def __init__(self, ml_alg_framework, ml_alg_method, ml_train_input_id, ml_train_input_representation, ml_train_stages_to_train): self.ml_alg_method = ml_alg_method self.ml_alg_framework = ml_alg_framework self.ml_train_input_id = ml_train_input_id self.ml_train_input_representation = ml_train_input_representation self.ml_train_stages_to_train = ml_train_stages_to_train key = self.get_predictor_key() # KEY: LEAGUE ID; VALUE: <MATCH ID: <pred, prob>> self.predictions = dict() Cache.add_element(key, self, "PREDICTOR_BY_KEY")
def write_team_attributes(team, team_attributes, date=util.get_today_date() + " 00:00:00"): """ Persist the team attributes of the team :param team: :param team_attributes: :param date: :return: """ log.debug("write_team_attributes of team_fifa_api_id = [" + str(team.team_fifa_api_id) + "]") team_attributes["team_fifa_api_id"] = team.team_fifa_api_id team_attributes["team_api_id"] = team.team_api_id team_attributes["date"] = date SQLLite.get_connection().insert("Team_Attributes", team_attributes) Cache.del_element(team.team_fifa_api_id, "TEAM_ATTRIBUTES")
def get_season(self): """ return the season of this league :return: """ try: return Cache.get_element(self.league_data_stage, "CRAWL_LEAGUE_SEASON") except KeyError: pass if not self.soup: page = requests.get(self.link_league_to_check).text self.soup = BeautifulSoup(page, "html.parser") div_season = self.soup.find('div', {'class': 'mx-dropdown-container mx-flexbox mx-float-left mx-tournament-dropdown'}) season = str(div_season.span.string) Cache.add_element(self.league_data_stage, season, "CRAWL_LEAGUE_SEASON") return season
def get_league(self): """ Return the DB-league managed in this web page :return: """ try: return Cache.get_element(self.league_data_stage, "CRAWL_LEAGUE_PAGE") except KeyError: pass return self.league
def read_by_id(id): """ Return the league with this id :param id: :return: """ try: return Cache.get_element(id, "LEAGUE_BY_ID") except KeyError: pass sqllite_row = SQLLite.get_connection().select("League", **{"id": id})[0] league = League(sqllite_row["id"]) for attribute, value in sqllite_row.items(): league.__setattr__(attribute, value) Cache.add_element(league.id, league, "LEAGUE_BY_ID") Cache.add_element(league.country_id, league, "LEAGUE_BY_COUNTRY") return league
def read_by_player_fifa_api_id(player_fifa_api_id): """ return a player by its fifa api id :param player_fifa_api_id: :return: """ try: return Cache.get_element(player_fifa_api_id, "PLAYER_ATTRIBUTES") except KeyError: pass player_attributes_list = [] for sqllite_row in SQLLite.get_connection().\ select("Player_Attributes", **{"player_fifa_api_id": player_fifa_api_id}): player_attributes = PlayerAttributes(sqllite_row["id"]) for attribute, value in sqllite_row.items(): player_attributes.__setattr__(attribute, value) player_attributes_list.append(player_attributes) Cache.add_element(player_fifa_api_id, player_attributes_list, "PLAYER_ATTRIBUTES") return player_attributes_list
def read_by_team_fifa_api_id(team_fifa_api_id): """ :param team_fifa_api_id: :return: """ try: return Cache.get_element(team_fifa_api_id, "TEAM_ATTRIBUTES") except KeyError: pass team_attributes_list = [] for sqllite_row in SQLLite.get_connection().select( "Team_Attributes", **{"team_fifa_api_id": team_fifa_api_id}): team_attributes = Team_Attributes(sqllite_row["id"]) for attribute, value in sqllite_row.items(): team_attributes.__setattr__(attribute, value) team_attributes_list.append(team_attributes) Cache.add_element(team_fifa_api_id, team_attributes_list, "TEAM_ATTRIBUTES") return team_attributes_list
def update(player): """ Update the player in the DB, and return the last version of it :param player: :return: """ SQLLite.get_connection().update("Player", player) Cache.del_element(player.player_fifa_api_id, "PLAYER_BY_FIFA_API_ID") Cache.del_element(player.player_api_id, "PLAYER_BY_API_ID") Cache.del_element(player.player_name, "PLAYER_BY_NAME") Cache.del_element(player.id, "PLAYER_BY_ID") return read_by_id(player.id)
def read_by_match_api_id(match_api_id): """ return the match by its api id :param match_api_id: :return: """ try: return Cache.get_element(str(match_api_id), "MATCH_BY_API_ID") except KeyError: pass try: sqllite_row = SQLLite.get_connection().select( "Match", **{"match_api_id": str(match_api_id)})[0] except IndexError: return None match = Match(sqllite_row["id"]) for attribute, value in sqllite_row.items(): match.__setattr__(attribute, value) Cache.add_element(str(match.id), match, "MATCH_BY_ID") Cache.add_element(str(match.match_api_id), match, "MATCH_BY_API_ID") return match
def is_in_a_managed_country(self): """ check if the league is correct :return: """ try: return Cache.get_element(self.league_data_stage, "CRAWL_LEAGUE_MANAGED") except KeyError: pass n_try = 0 while n_try < 5: try: page = requests.get(self.link_league_to_check).text self.soup = BeautifulSoup(page, "html.parser") country_name = str(self.soup.find('span', {'class': 'mx-country-dropdown-name'}).string).strip() break except AttributeError: n_try += 1 if n_try == 5: return False countries = Country.read_by_name(country_name, like=True) if len(countries) == 0: # this country is not managed!! Cache.add_element(self.league_data_stage, False, "CRAWL_LEAGUE_MANAGED") return False elif len(countries) == 1: # country found country = countries[0] league_found = False for league in country.get_leagues(): if self.league_name in league.name: self.league = league league_found = True if not league_found: self.league = self.get_league_on_page() Cache.add_element(self.league_data_stage, self.league, "CRAWL_LEAGUE_PAGE") Cache.add_element(self.league_data_stage, True, "CRAWL_LEAGUE_MANAGED") return True
def read_by_team_api_id(team_api_id, season=None): """ Return list of players that play in the team identified my team_api_id if season is set, consider only that season :param team_api_id: :param season: :return: """ if not season: season = "" try: return Cache.get_element( str(team_api_id) + "_" + season, "PLAYER_BY_TEAM_API_ID") except KeyError: pass players = [] players_api_id = Match.read_players_api_id_by_team_api_id( team_api_id, season) for player_api_id in players_api_id: # if the player_api_id is not set --> continue if util.is_None(player_api_id): continue try: player = Cache.get_element(player_api_id, "PLAYER_BY_API_ID") except KeyError: filter = {"player_api_id": player_api_id} try: sqllite_row = SQLLite.get_connection().select( "Player", **filter)[0] except IndexError: log.warning("Player api id not found in DB [" + str(player_api_id) + "]") continue player = Player(sqllite_row["id"]) for attribute, value in sqllite_row.items(): player.__setattr__(attribute, value) Cache.add_element(player_api_id, player, "PLAYER_BY_API_ID") players.append(player) Cache.add_element( str(team_api_id) + "_" + season, players, "PLAYER_BY_TEAM_API_ID") return players
def read_match_shot(match, on=True): """ Return the list of shots (either on or off) of the match :param match: :param on: :return: """ if on: on_off = "ON" else: on_off = "OFF" try: return Cache.get_element(match.match_api_id, "SHOT" + on_off + "_BY_MATCH_API_ID") except KeyError: pass try: if on: bs = BeautifulSoup(match.shoton, "html.parser") else: bs = BeautifulSoup(match.shotoff, "html.parser") except TypeError: raise MLException(2) shot_list = [] for value in bs.contents[0].children: shot = Shot() for tag in value.children: tag_name = tag.name # < stats > < blocked > 1 < / blocked > < / stats > if tag_name == "stats": stats = {} for content in tag.contents: stats[content.name] = str(content.string) shot.stats = stats # < event_incident_typefk > 61 < / event_incident_typefk > elif tag_name == "event_incident_typefk": shot.event_incident_typefk = str(tag.string) # < coordinates > < value > 11 < / value > < value > 9 < / value > < / coordinates > elif tag_name == "coordinates": x = str(tag.contents[0].string) y = str(tag.contents[1].string) shot.coordinates = (x, y) # < elapsed > 3 < / elapsed > elif tag_name == "elapsed": shot.elapsed = str(tag.string) # < subtype > blocked_shot < / subtype > elif tag_name == "subtype": shot.subtype = str(tag.string) # < player1 > 41540 < / player1 > elif tag_name == "player1": shot.player1 = str(tag.string) # < sortorder > 2 < / sortorder > elif tag_name == "sortorder": shot.sortorder = str(tag.string) # < team > 8534 < / team > elif tag_name == "team": shot.team = int(tag.string) # < n > 23 < / n > elif tag_name == "n": shot.n = str(tag.string) # < type > shoton < / type > elif tag_name == "type": shot.type = str(tag.string) # < id > 4707358 < / id > elif tag_name == "id": shot.id = str(tag.string) elif tag_name == "elapsed_plus": shot.elapsed_plus = str(tag.string) elif tag_name == "del": shot._del = str(tag.string) else: logging.debug("Shot :: read_team_shoton > tag not managed [ " + tag_name + " ]") shot_list.append(shot) Cache.add_element(match.match_api_id, shot_list, "SHOT" + on_off + "_BY_MATCH_API_ID") return shot_list