def getTopLeagues():
    templates = ['UEFA_leagues', 'CONMEBOL_leagues', 'CONCACAF_leagues', 
            'CAF_leagues', 'AFC_leagues', 'OFC_leagues']
    leagues = dict()
    for t in templates:
        confederationname = t.split('_')[0]
        text = wikiutils.getPage('Template:' + t)
        if text:
            print 'done.'
            state = 0
            for line in text.split('\n'):
                lineWithoutSpaces = ''.join(line.split())
                if state == 0 and re.match('\|list[123456789]=', lineWithoutSpaces):
                    state = 1

                elif state == 1:
                    if lineWithoutSpaces:
                        if (lineWithoutSpaces[0] == '|' or lineWithoutSpaces[0] == '}'):
                            state == 0
                        if lineWithoutSpaces[0] == '*':
                            v = line.strip('*').strip()
                            name, link = wikiutils.unlinkify(v)
                            if link:
                                leagues[link] = (name, name, link, confederationname)
                                print 'Found', name
    return leagues
def fetchTeamData(team):
    rvtext = wikiutils.getPage(team)
    if not rvtext:
        print 'No revision text.'
        return None
    else:
        td = parseTeam(team, rvtext, True)
        if not td:
            print 'failed - no players found.'
            return None
        else:
            print 'done (kit %s, position %d, %d players)' % (td.kits[0].bodycolor, td.pos, len(td.players))
            return td
def fetchLeagueData(specificLeague):
    try:
        load()
    except IOError as exc:
        if exc.errno == errno.ENOENT:
            print 'No previous progress - starting from the top.'
            Globals.progress.leagues = parser.getTopLeagues()
            Globals.progress.processedleagues = dict()
            save()
        else:
            raise

    if len(Globals.progress.processedleagues) == 0 and len(Globals.progress.leagues) == 0:
        print 'No progress - starting from the top.'
        Globals.progress.leagues = parser.getTopLeagues()
        Globals.progress.processedleagues = dict()
        save()

    while specificLeague or len(Globals.progress.leagues) > 0:
        if specificLeague:
            found = None
            for k in Globals.progress.leagues.keys():
                if specificLeague in k:
                    found = k
                    leaguetitle = found
                    leaguename, country, toplevelleague, confederationname = Globals.progress.leagues[found]
                    break

            if not found:
                for k in Globals.progress.processedleagues.keys():
                    if specificLeague == k:
                        found = k
                        leaguetitle = found
                        league = Globals.progress.processedleagues[found]
                        country = league.country
                        toplevelleague = league.toplevelleague
                        confederationname = league.confederation
                        break

            if not found:
                print >> sys.stderr, "I don't have league '%s' queued.\n" % specificLeague
                print >> sys.stderr, "%s\n" % Globals.progress.printQueuedLeagues()
                return
        else:
            leaguetitle = iter(Globals.progress.leagues).next()
            leaguename, country, toplevelleague, confederationname = Globals.progress.leagues[leaguetitle]

        promotionleague = None
        for processedleaguename, processedleague in Globals.progress.processedleagues.items():
            if processedleague.relegationleagues and leaguetitle in processedleague.relegationleagues:
                promotionleague = processedleaguename
                break

        leaguedata = None
        rvtext = wikiutils.getPage(leaguetitle)
        if rvtext:
            """First get and parse the league text as it may contain a link to the current season.
            Then, try to complement any league data from the season page.
            Finally, try to get the team data, from the season link first if possible."""
            leaguedata = soccer.LeagueData(leaguetitle, promotionleague, confederationname, country, toplevelleague)
            parser.getLeagueData(rvtext, leaguedata)
            if leaguedata.season:
                stext = wikiutils.getPage(leaguedata.season, True)
            else:
                stext = None

            if stext:
                parser.getLeagueData(stext, leaguedata)

            # overwrite levelnum from the wiki info as it seems to be unreliable (e.g. Venezuelan_Segunda_División)
            if not promotionleague:
                leaguedata.levelnum = 1
            else:
                leaguedata.levelnum = Globals.progress.processedleagues[promotionleague].levelnum + 1

            if Globals.fetchTeams:
                if stext:
                    parser.getTeamData(stext, leaguedata)
                parser.getTeamData(rvtext, leaguedata)

            if leaguedata.hasTeams():
                root = leaguedata.toXML()
                outdir = Globals.outputdir + wikiutils.titleToFilename(leaguedata.confederation) + '/' + country + '/'
                utils.mkdir_p(outdir)
                with open(outdir + wikiutils.titleToFilename(leaguedata.title) + '.xml', 'w') as f:
                    f.write(etree.tostring(root, pretty_print=True))

                if leaguedata.relegationleagues:
                    for rln, rll in leaguedata.relegationleagues.items():
                        if rln not in Globals.progress.leagues:
                            Globals.progress.leagues[rll] = (rln, country, toplevelleague, confederationname)
                    print '%d following league(s): %s' % (len(leaguedata.relegationleagues), leaguedata.relegationleagues.keys())
                else:
                    print 'No following leagues.'
            else:
                print 'Failed to fetch teams.'
        else:
            print 'No revision text for league.'

        Globals.didSomething = True
        if leaguedata:
            Globals.progress.leagueProcessed(leaguedata)
        else:
            del Globals.progress.leagues[leaguetitle]

        save()

        if specificLeague:
            return
def parseTeam(team, rvtext, mayGetTemplates):
    players = []
    teamposition = None
    kit = [soccer.Kit(), soccer.Kit()]
    finishedReadingPlayers = False
    lookForSquadTemplate = False

    def teamError(msg):
        print >> Globals.errlog, "Team %s: %s" % (team.encode('utf-8'), msg.encode('utf-8'))

    for line in rvtext.split('\n'):
        lineWithoutSpaces = ''.join(line.split())
        if not finishedReadingPlayers:
            p = playerparser.fetchPlayer(line)
            if p:
                players.append(p)
            else:
                heading = wikiutils.getHeading(line)
                if heading:
                    if mayGetTemplates and 'current squad' in heading.lower() or ('first' in heading.lower() and 'squad' in heading.lower()):
                        lookForSquadTemplate = True
                    else:
                        lookForSquadTemplate = False
                elif lookForSquadTemplate:
                    t = wikiutils.getTemplate(line)
                    if t:
                        text = wikiutils.getPage('Template:' + t)
                        if text:
                            players = playerparser.fetchPlayers(text)
                            if len(players) > 15:
                                finishedReadingPlayers = True

        if playerparser.endOfPlayerList(line):
            finishedReadingPlayers = True

        if lineWithoutSpaces.startswith("|position="):
            # this seems to usually be either this or last season's position
            if not ('promoted' in lineWithoutSpaces.lower() or 'relegated' in lineWithoutSpaces.lower()):
                tp = wikiutils.getNumberKeyValue(line)
                if tp:
                    teamposition = tp

        kitresults = kitinfo_re.findall(line)
        for kitresult in kitresults:
            columns = [x.strip() for x in line.split('|') if 'body' in x or 'shorts' in x or 'socks' in x or 'pattern_b' in x]
            # apparently, n may be more than 1 if more than one kit part is on a line
            for c in columns:
                try:
                    k, v = wikiutils.getKeyValue(c)
                except:
                    continue

                if k.startswith('body'):
                    k = k[4:]
                    if not k: continue
                    n = int(k[0]) - 1
                    if n == 0 or n == 1:
                        kit[n].bodycolor = getColorValue(v)
                elif k.startswith('shorts'):
                    k = k[6:]
                    if not k: continue
                    n = int(k[0]) - 1
                    if n == 0 or n == 1:
                        kit[n].shortscolor = getColorValue(v)
                elif k.startswith('socks'):
                    k = k[5:]
                    if not k: continue
                    n = int(k[0]) - 1
                    if n == 0 or n == 1:
                        kit[n].sockscolor = getColorValue(v)
                elif k.startswith('pattern_b') and k != 'pattern_blue':
                    k = k[9:]
                    if not k: continue
                    n = int(k[0]) - 1
                    # TODO: body type, second color

    if len(players) < 15:
        return None

    if not teamposition:
        teamposition = 0

    return soccer.Team(team, kit, teamposition, players)