def getTopLeagues(): templates = ['UEFA_leagues', 'CONMEBOL_leagues', 'CONCACAF_leagues', 'CAF_leagues', 'AFC_leagues', 'OFC_leagues'] leagues = dict() for t in templates: confederationname = t.split('_')[0] text = wikiutils.getPage('Template:' + t) if text: print 'done.' state = 0 for line in text.split('\n'): lineWithoutSpaces = ''.join(line.split()) if state == 0 and re.match('\|list[123456789]=', lineWithoutSpaces): state = 1 elif state == 1: if lineWithoutSpaces: if (lineWithoutSpaces[0] == '|' or lineWithoutSpaces[0] == '}'): state == 0 if lineWithoutSpaces[0] == '*': v = line.strip('*').strip() name, link = wikiutils.unlinkify(v) if link: leagues[link] = (name, name, link, confederationname) print 'Found', name return leagues
def fetchTeamData(team): rvtext = wikiutils.getPage(team) if not rvtext: print 'No revision text.' return None else: td = parseTeam(team, rvtext, True) if not td: print 'failed - no players found.' return None else: print 'done (kit %s, position %d, %d players)' % (td.kits[0].bodycolor, td.pos, len(td.players)) return td
def fetchLeagueData(specificLeague): try: load() except IOError as exc: if exc.errno == errno.ENOENT: print 'No previous progress - starting from the top.' Globals.progress.leagues = parser.getTopLeagues() Globals.progress.processedleagues = dict() save() else: raise if len(Globals.progress.processedleagues) == 0 and len(Globals.progress.leagues) == 0: print 'No progress - starting from the top.' Globals.progress.leagues = parser.getTopLeagues() Globals.progress.processedleagues = dict() save() while specificLeague or len(Globals.progress.leagues) > 0: if specificLeague: found = None for k in Globals.progress.leagues.keys(): if specificLeague in k: found = k leaguetitle = found leaguename, country, toplevelleague, confederationname = Globals.progress.leagues[found] break if not found: for k in Globals.progress.processedleagues.keys(): if specificLeague == k: found = k leaguetitle = found league = Globals.progress.processedleagues[found] country = league.country toplevelleague = league.toplevelleague confederationname = league.confederation break if not found: print >> sys.stderr, "I don't have league '%s' queued.\n" % specificLeague print >> sys.stderr, "%s\n" % Globals.progress.printQueuedLeagues() return else: leaguetitle = iter(Globals.progress.leagues).next() leaguename, country, toplevelleague, confederationname = Globals.progress.leagues[leaguetitle] promotionleague = None for processedleaguename, processedleague in Globals.progress.processedleagues.items(): if processedleague.relegationleagues and leaguetitle in processedleague.relegationleagues: promotionleague = processedleaguename break leaguedata = None rvtext = wikiutils.getPage(leaguetitle) if rvtext: """First get and parse the league text as it may contain a link to the current season. Then, try to complement any league data from the season page. Finally, try to get the team data, from the season link first if possible.""" leaguedata = soccer.LeagueData(leaguetitle, promotionleague, confederationname, country, toplevelleague) parser.getLeagueData(rvtext, leaguedata) if leaguedata.season: stext = wikiutils.getPage(leaguedata.season, True) else: stext = None if stext: parser.getLeagueData(stext, leaguedata) # overwrite levelnum from the wiki info as it seems to be unreliable (e.g. Venezuelan_Segunda_División) if not promotionleague: leaguedata.levelnum = 1 else: leaguedata.levelnum = Globals.progress.processedleagues[promotionleague].levelnum + 1 if Globals.fetchTeams: if stext: parser.getTeamData(stext, leaguedata) parser.getTeamData(rvtext, leaguedata) if leaguedata.hasTeams(): root = leaguedata.toXML() outdir = Globals.outputdir + wikiutils.titleToFilename(leaguedata.confederation) + '/' + country + '/' utils.mkdir_p(outdir) with open(outdir + wikiutils.titleToFilename(leaguedata.title) + '.xml', 'w') as f: f.write(etree.tostring(root, pretty_print=True)) if leaguedata.relegationleagues: for rln, rll in leaguedata.relegationleagues.items(): if rln not in Globals.progress.leagues: Globals.progress.leagues[rll] = (rln, country, toplevelleague, confederationname) print '%d following league(s): %s' % (len(leaguedata.relegationleagues), leaguedata.relegationleagues.keys()) else: print 'No following leagues.' else: print 'Failed to fetch teams.' else: print 'No revision text for league.' Globals.didSomething = True if leaguedata: Globals.progress.leagueProcessed(leaguedata) else: del Globals.progress.leagues[leaguetitle] save() if specificLeague: return
def parseTeam(team, rvtext, mayGetTemplates): players = [] teamposition = None kit = [soccer.Kit(), soccer.Kit()] finishedReadingPlayers = False lookForSquadTemplate = False def teamError(msg): print >> Globals.errlog, "Team %s: %s" % (team.encode('utf-8'), msg.encode('utf-8')) for line in rvtext.split('\n'): lineWithoutSpaces = ''.join(line.split()) if not finishedReadingPlayers: p = playerparser.fetchPlayer(line) if p: players.append(p) else: heading = wikiutils.getHeading(line) if heading: if mayGetTemplates and 'current squad' in heading.lower() or ('first' in heading.lower() and 'squad' in heading.lower()): lookForSquadTemplate = True else: lookForSquadTemplate = False elif lookForSquadTemplate: t = wikiutils.getTemplate(line) if t: text = wikiutils.getPage('Template:' + t) if text: players = playerparser.fetchPlayers(text) if len(players) > 15: finishedReadingPlayers = True if playerparser.endOfPlayerList(line): finishedReadingPlayers = True if lineWithoutSpaces.startswith("|position="): # this seems to usually be either this or last season's position if not ('promoted' in lineWithoutSpaces.lower() or 'relegated' in lineWithoutSpaces.lower()): tp = wikiutils.getNumberKeyValue(line) if tp: teamposition = tp kitresults = kitinfo_re.findall(line) for kitresult in kitresults: columns = [x.strip() for x in line.split('|') if 'body' in x or 'shorts' in x or 'socks' in x or 'pattern_b' in x] # apparently, n may be more than 1 if more than one kit part is on a line for c in columns: try: k, v = wikiutils.getKeyValue(c) except: continue if k.startswith('body'): k = k[4:] if not k: continue n = int(k[0]) - 1 if n == 0 or n == 1: kit[n].bodycolor = getColorValue(v) elif k.startswith('shorts'): k = k[6:] if not k: continue n = int(k[0]) - 1 if n == 0 or n == 1: kit[n].shortscolor = getColorValue(v) elif k.startswith('socks'): k = k[5:] if not k: continue n = int(k[0]) - 1 if n == 0 or n == 1: kit[n].sockscolor = getColorValue(v) elif k.startswith('pattern_b') and k != 'pattern_blue': k = k[9:] if not k: continue n = int(k[0]) - 1 # TODO: body type, second color if len(players) < 15: return None if not teamposition: teamposition = 0 return soccer.Team(team, kit, teamposition, players)