def scrape_league_history(country): league_name = countries_leagues[country] db.delete_league_by_name(league_name) browser.get(main_url) more_countries_element = browser.find_element_by_class_name('show-more') more_countries_button = more_countries_element.find_element_by_link_text('More') execute_script_click(more_countries_button) click_league(country, league_name) archive_button = browser.find_element_by_link_text('Archive') archive_button.click() season_names = browser.find_elements_by_class_name('leagueTable__season')[2:] season_names = [season.find_element_by_tag_name('a') for season in season_names][::-1] league = League(name=league_name, country=country) db.save_league(league) seasons = [Season(name=get_years_from_season_name(season_name.text), league=league) for season_name in season_names] links = [season.get_attribute('href') for season in season_names] for season, link in zip(seasons, links): scrape_table(link, league, season) scrape_results(link, league, season)
def parseLeague(self): sh = self.wb.sheet_by_name(LEAGUE) self.teamSrv = ObjSrv() header = self.getRow(sh, 0, True) unifGrpIdL = header[3:] print(unifGrpIdL) divD = {} for i in range(1, sh.nrows): valL = self.getRow(sh, i, True) try: div = int(valL[0]) except ValueError: div = str(valL[0]) pool = valL[1] team = self.parseTeam(valL[2]) team.rank = i countL = [int(e) for e in valL[3:]] team.unifCountD = dict(zip(unifGrpIdL, countL)) if div not in divD: divD[div] = [] divD[div].append((pool, team)) divL = [] for divId, valL in iteritems(divD): poolL, teamL = zip(*valL) poolS = set(poolL) if len(poolS) == 1: division = Division('division-%s' % str(divId), teamL) elif len(poolS) == 2: pool1, pool2 = list(poolS) teamL1 = [team for pool, team in valL if pool == pool1] teamL2 = [team for pool, team in valL if pool == pool2] division = DivisionHalf('division-%s' % str(divId), teamL1, teamL2) else: raise Exception( "Can't have more than two pools in the same division (%s)." % (', '.join(map(str, poolS)))) for team in teamL: team.division = division divL.append(division) self.league = League(divL) self.loadLeague(self.league)
def parseSeason(teamlist: TeamList, gamelist: GameList, prev_leagues, filename, leaguename, leagueyear, initial=False, is_lg=True): league = League(leaguename, leagueyear) default = (PPG, PPG) if initial else season_map.default_quality( league, prev_leagues) with open(filename) as games_in: # Open csv dicitonary reader and discard header game_rdr = csv.reader(games_in) row = next(game_rdr) table_formats = set([]) if row == ['Wk', 'Day', 'Date', 'Time', 'Home', 'Score', 'Away', 'Attendance', 'Venue', 'Referee', 'Match Report', 'Notes'] or \ row == ['Round', 'Day', 'Date', 'Time', 'Home', 'Score', 'Away', 'Attendance', 'Venue', 'Referee', 'Match Report', 'Notes']: pass elif row == [ 'Wk', 'Day', 'Date', 'Time', 'Home', 'xG', 'Score', 'xG', 'Away', 'Attendance', 'Venue', 'Referee', 'Match Report', 'Notes' ]: table_formats = {"XG"} elif row == [ 'Round', 'Wk', 'Day', 'Date', 'Time', 'Home', 'Score', 'Away', 'Attendance', 'Venue', 'Referee', 'Match Report', 'Notes' ]: table_formats = {"Round"} elif row == [ 'Round', 'Wk', 'Day', 'Date', 'Time', 'Home', 'xG', 'Score', 'xG', 'Away', 'Attendance', 'Venue', 'Referee', 'Match Report', 'Notes' ]: table_formats = {"XG", "Round"} else: raise RuntimeError( "Unable to parse table with header row: {}".format(row)) for row in game_rdr: if "XG" in table_formats: if "Round" in table_formats: rg_season, gamedate, homename, homexg, score, awayxg, awayname = row[ 0], row[3], row[5], row[6], row[7], row[8], row[9] else: gamedate, homename, homexg, score, awayxg, awayname = row[ 2], row[4], row[5], row[6], row[7], row[8] rg_season = None else: if "Round" in table_formats: rg_season, gamedate, homename, score, awayname = row[ 0], row[3], row[5], row[6], row[7] homexg = awayxg = None else: gamedate, homename, score, awayname = row[2], row[4], row[ 5], row[6] homexg = awayxg = rg_season = None if leaguename in ["champs"]: homename = " ".join(homename.split(" ")[:-1]) awayname = " ".join(awayname.split(" ")[1:]) try: date = datetime.datetime.strptime(gamedate, "%Y-%m-%d").date() except: continue hometeam = teamlist.getOrAdd(homename, default, is_lg) awayteam = teamlist.getOrAdd(awayname, default, is_lg) # parse scores if they exist try: homescore, awayscore = score.split('–') homescore, awayscore = int(homescore), int(awayscore) except: homescore = awayscore = None # parse home/away xg if they exist; otherwise make them None if homexg and awayxg: try: home_xg, away_xg = float(homexg), float(awayxg) except: print("Error in row with xg values", "/n", row) else: home_xg = away_xg = None # Make this a game lg = EXTRA_LEAGUE if is_lg and rg_season and rg_season != "Regular Season" else league game = Game(date, lg, hometeam, awayteam) if not (homescore is None): game.score(homescore, awayscore, home_xg, away_xg) # Add game to list of all games gamelist.add(game) return teamlist, league, gamelist