def fetchPlayer(line): def playerError(msg): print >> Globals.errlog, "Player %s: %s" % (line.encode('utf-8'), msg.encode('utf-8')) lineWithoutSpaces = ''.join(line.split()) ll = line.lower() if '{{fs player' in ll or \ '{{football squad player' in ll or \ '{{fs2 player' in ll: unlinkedline = wikiutils.unlink_wiki(line) columns = [s.strip() for s in unlinkedline.replace('{', '').replace('}', '').split('|')] number = None nationality = None pos = None name = None firstname = None lastname = None for column in columns: if '=' in column: try: k, v = wikiutils.getKeyValue(column) except ValueError: playerError("Couldn't parse player information column: %s" % column) continue if k == 'no': try: number = int(v) except (UnicodeEncodeError, ValueError): pass # usually dash as a player number elif k == 'nat': nationality = v elif k == 'pos': pos = v elif k == 'name': name = wikiutils.unlinkify(v)[0] elif k == 'first': firstname = wikiutils.unlinkify(v)[0] elif k == 'last': lastname = wikiutils.unlinkify(v)[0] if not name and firstname and lastname: name = firstname + ' ' + lastname if not number: number = 0 if not nationality: nationality = 'NA' if nationality and pos and name: return soccer.Player(name, number, pos, nationality) return None
def addOrUpdateTeamList(l, heading, teams): def cleaned(namelist): ret = [] for t in sorted(namelist): ret.append(t.replace(' ', ' ')) return ret """Check whether a list with the same team names already exists in the list. If this is the case, have the team list with more links in the list.""" tlist = sorted([wikiutils.unlinkify(t) for t in teams]) toinsert = cleaned([t[0] for t in tlist]) previous = None teamPairList = [p[1] for p in l] prev = None for t in l: thistl = cleaned([x[0] for x in t[1]]) if thistl == toinsert: prev = t break if prev: numLinksInPrev = len([x for x in prev[1] if x[1]]) numLinksInThis = len([x for x in tlist if x[1]]) if numLinksInThis > numLinksInPrev: l.remove(prev) l.append((heading, tlist)) else: l.append((heading, tlist))
def getTopLeagues(): templates = ['UEFA_leagues', 'CONMEBOL_leagues', 'CONCACAF_leagues', 'CAF_leagues', 'AFC_leagues', 'OFC_leagues'] leagues = dict() for t in templates: confederationname = t.split('_')[0] text = wikiutils.getPage('Template:' + t) if text: print 'done.' state = 0 for line in text.split('\n'): lineWithoutSpaces = ''.join(line.split()) if state == 0 and re.match('\|list[123456789]=', lineWithoutSpaces): state = 1 elif state == 1: if lineWithoutSpaces: if (lineWithoutSpaces[0] == '|' or lineWithoutSpaces[0] == '}'): state == 0 if lineWithoutSpaces[0] == '*': v = line.strip('*').strip() name, link = wikiutils.unlinkify(v) if link: leagues[link] = (name, name, link, confederationname) print 'Found', name return leagues
def getLeagueData(rvtext, leaguedata): season = '' relegationleagues = dict() numteams = 0 levelnum = 0 divisions = 0 class InfoboxState: Outside = 0 Entered = 1 RelegationLeagues = 2 NumTeams = 3 NumLevel = 4 Season = 5 ibs = InfoboxState.Outside for line in rvtext.split('\n'): lineWithoutSpaces = ''.join(line.split()) if not season and lineWithoutSpaces.startswith("|current="): k, v = wikiutils.getKeyValue(line) competition, competitionlink = wikiutils.unlinkify(v) if competitionlink: season = competitionlink if not divisions and (lineWithoutSpaces.startswith("|divisions=") or lineWithoutSpaces.startswith("|division=")): tp = wikiutils.getNumberKeyValue(line) if tp: divisions = tp if not levelnum and (lineWithoutSpaces.startswith("|levels=") or lineWithoutSpaces.startswith("|level=")): tp = wikiutils.getNumberKeyValue(line) if tp: levelnum = tp if len(relegationleagues) == 0 and lineWithoutSpaces.startswith("|relegation="): k, v = wikiutils.getKeyValue(line) candidates = [wikiutils.unlinkify(x.strip()) for x in br_re.split(v)] for cn, cl in candidates: if cl: relegationleagues[cl] = cl if not numteams and lineWithoutSpaces.startswith('|teams='): numteams = wikiutils.getNumberKeyValue(line) if ibs == InfoboxState.Outside and lineWithoutSpaces.startswith('{|class="infoboxfootball"'): # e.g. Regionalliga_Nord ibs = InfoboxState.Entered elif ibs != InfoboxState.Outside: if lineWithoutSpaces and lineWithoutSpaces[0] == '|': text = '|'.join(line.split('|')[2:]) if not text and lineWithoutSpaces[0:2] == '|}': ibs = InfoboxState.Outside break elif text: t, link = wikiutils.unlinkify(text) tl = t.lower() if 'background' in line: if 'relegation' in tl: ibs = InfoboxState.RelegationLeagues elif 'number of clubs' in tl: ibs = InfoboxState.NumTeams elif 'level' in tl: ibs = InfoboxState.NumLevel elif 'current season' in tl: ibs = InfoboxState.Season else: ibs = InfoboxState.Entered else: if ibs == InfoboxState.RelegationLeagues: if not link: ibs = InfoboxState.Entered else: relegationleagues[link] = link elif ibs == InfoboxState.NumTeams: pos = re.findall(r'\d+', t) if len(pos) >= 1: numteams = int(pos[0]) elif ibs == InfoboxState.NumLevel: pos = re.findall(r'\d+', t) if len(pos) >= 1: levelnum = int(pos[0]) elif ibs == InfoboxState.Season: if not link: ibs = InfoboxState.Entered else: season = link if not leaguedata.season: leaguedata.season = season if not leaguedata.relegationleagues: leaguedata.relegationleagues = relegationleagues if not leaguedata.numteams: leaguedata.numteams = numteams if not leaguedata.divisions: leaguedata.divisions = divisions if not leaguedata.levelnum: leaguedata.levelnum = levelnum