Esempio n. 1
0
def scrape_league_history(country):
    league_name = countries_leagues[country]
    db.delete_league_by_name(league_name)

    browser.get(main_url)
    more_countries_element = browser.find_element_by_class_name('show-more')
    more_countries_button = more_countries_element.find_element_by_link_text('More')
    execute_script_click(more_countries_button)

    click_league(country, league_name)

    archive_button = browser.find_element_by_link_text('Archive')
    archive_button.click()

    season_names = browser.find_elements_by_class_name('leagueTable__season')[2:]
    season_names = [season.find_element_by_tag_name('a') for season in season_names][::-1]

    league = League(name=league_name, country=country)
    db.save_league(league)

    seasons = [Season(name=get_years_from_season_name(season_name.text), league=league) for season_name in season_names]
    links = [season.get_attribute('href') for season in season_names]

    for season, link in zip(seasons, links):
        scrape_table(link, league, season)
        scrape_results(link, league, season)
Esempio n. 2
0
    def parseLeague(self):
        sh = self.wb.sheet_by_name(LEAGUE)
        self.teamSrv = ObjSrv()
        header = self.getRow(sh, 0, True)
        unifGrpIdL = header[3:]
        print(unifGrpIdL)
        divD = {}
        for i in range(1, sh.nrows):
            valL = self.getRow(sh, i, True)
            try:
                div = int(valL[0])
            except ValueError:
                div = str(valL[0])

            pool = valL[1]
            team = self.parseTeam(valL[2])
            team.rank = i
            countL = [int(e) for e in valL[3:]]
            team.unifCountD = dict(zip(unifGrpIdL, countL))
            if div not in divD:
                divD[div] = []
            divD[div].append((pool, team))

        divL = []
        for divId, valL in iteritems(divD):
            poolL, teamL = zip(*valL)
            poolS = set(poolL)
            if len(poolS) == 1:
                division = Division('division-%s' % str(divId), teamL)
            elif len(poolS) == 2:
                pool1, pool2 = list(poolS)
                teamL1 = [team for pool, team in valL if pool == pool1]
                teamL2 = [team for pool, team in valL if pool == pool2]
                division = DivisionHalf('division-%s' % str(divId), teamL1,
                                        teamL2)
            else:
                raise Exception(
                    "Can't have more than two pools in the same division (%s)."
                    % (', '.join(map(str, poolS))))

            for team in teamL:
                team.division = division
            divL.append(division)

        self.league = League(divL)
        self.loadLeague(self.league)
Esempio n. 3
0
def parseSeason(teamlist: TeamList,
                gamelist: GameList,
                prev_leagues,
                filename,
                leaguename,
                leagueyear,
                initial=False,
                is_lg=True):

    league = League(leaguename, leagueyear)

    default = (PPG, PPG) if initial else season_map.default_quality(
        league, prev_leagues)

    with open(filename) as games_in:

        # Open csv dicitonary reader and discard header
        game_rdr = csv.reader(games_in)
        row = next(game_rdr)

        table_formats = set([])
        if row == ['Wk', 'Day', 'Date', 'Time', 'Home', 'Score', 'Away', 'Attendance', 'Venue', 'Referee', 'Match Report', 'Notes'] or \
                row == ['Round', 'Day', 'Date', 'Time', 'Home', 'Score', 'Away', 'Attendance', 'Venue', 'Referee', 'Match Report', 'Notes']:
            pass
        elif row == [
                'Wk', 'Day', 'Date', 'Time', 'Home', 'xG', 'Score', 'xG',
                'Away', 'Attendance', 'Venue', 'Referee', 'Match Report',
                'Notes'
        ]:
            table_formats = {"XG"}
        elif row == [
                'Round', 'Wk', 'Day', 'Date', 'Time', 'Home', 'Score', 'Away',
                'Attendance', 'Venue', 'Referee', 'Match Report', 'Notes'
        ]:
            table_formats = {"Round"}
        elif row == [
                'Round', 'Wk', 'Day', 'Date', 'Time', 'Home', 'xG', 'Score',
                'xG', 'Away', 'Attendance', 'Venue', 'Referee', 'Match Report',
                'Notes'
        ]:
            table_formats = {"XG", "Round"}
        else:
            raise RuntimeError(
                "Unable to parse table with header row: {}".format(row))

        for row in game_rdr:

            if "XG" in table_formats:
                if "Round" in table_formats:
                    rg_season, gamedate, homename, homexg, score, awayxg, awayname = row[
                        0], row[3], row[5], row[6], row[7], row[8], row[9]
                else:
                    gamedate, homename, homexg, score, awayxg, awayname = row[
                        2], row[4], row[5], row[6], row[7], row[8]
                    rg_season = None
            else:
                if "Round" in table_formats:
                    rg_season, gamedate, homename, score, awayname = row[
                        0], row[3], row[5], row[6], row[7]
                    homexg = awayxg = None
                else:
                    gamedate, homename, score, awayname = row[2], row[4], row[
                        5], row[6]
                    homexg = awayxg = rg_season = None

            if leaguename in ["champs"]:
                homename = " ".join(homename.split(" ")[:-1])
                awayname = " ".join(awayname.split(" ")[1:])

            try:
                date = datetime.datetime.strptime(gamedate, "%Y-%m-%d").date()
            except:
                continue

            hometeam = teamlist.getOrAdd(homename, default, is_lg)
            awayteam = teamlist.getOrAdd(awayname, default, is_lg)

            # parse scores if they exist
            try:
                homescore, awayscore = score.split('–')
                homescore, awayscore = int(homescore), int(awayscore)
            except:
                homescore = awayscore = None

            # parse home/away xg if they exist; otherwise make them None

            if homexg and awayxg:
                try:
                    home_xg, away_xg = float(homexg), float(awayxg)
                except:
                    print("Error in row with xg values", "/n", row)
            else:
                home_xg = away_xg = None

            # Make this a game
            lg = EXTRA_LEAGUE if is_lg and rg_season and rg_season != "Regular Season" else league
            game = Game(date, lg, hometeam, awayteam)
            if not (homescore is None):
                game.score(homescore, awayscore, home_xg, away_xg)

            # Add game to list of all games
            gamelist.add(game)

    return teamlist, league, gamelist