def test_parseEnglishTopLeagues(self): for file in ['tests/wikidumps/Football_League_Championship.txt', 'tests/wikidumps/Football_League_One.txt', 'tests/wikidumps/Football_League_Two.txt']: with open(file, 'r') as f: leaguedata = soccer.LeagueData() parser.getLeagueData(f.read(), leaguedata) self.assertEqual(leaguedata.numteams, 24)
def test_parseLegaProPrimaDivisione(self): leaguetext = open('tests/wikidumps/Lega_Pro_Prima_Divisione.txt', 'r').read() seasontext = open('tests/wikidumps/2012–13_Lega_Pro_Prima_Divisione.txt', 'r').read() leaguedata = soccer.LeagueData() parser.getLeagueData(leaguetext, leaguedata) self.assertEqual(leaguedata.numteams, 33) groups = parser.getSeasonTeams(seasontext, leaguedata) self.assertEqual(len(groups), 2)
def test_parseOldInfoboxFootball(self): with open('tests/wikidumps/Regionalliga_Nord.txt', 'r') as f: leaguedata = soccer.LeagueData() parser.getLeagueData(f.read(), leaguedata) self.assertEqual(leaguedata.season, '2012–13 Fußball-Regionalliga') self.assertEqual(leaguedata.numteams, 18) self.assertEqual(leaguedata.levelnum, 4) self.assertEqual(len(leaguedata.relegationleagues), 4) self.assertIn('Oberliga Hamburg', leaguedata.relegationleagues.keys()) self.assertIn('Bremen-Liga', leaguedata.relegationleagues.keys()) self.assertIn('Schleswig-Holstein-Liga', leaguedata.relegationleagues.keys()) self.assertIn('Oberliga Niedersachsen', leaguedata.relegationleagues.keys()) leaguedata = self.parseDump('Oberliga_Niedersachsen', None, 16, 0, 5) self.assertEqual(len(leaguedata.relegationleagues), 4) self.assertIn('Landesliga Braunschweig', leaguedata.relegationleagues.keys()) self.assertIn('Landesliga Lüneburg', leaguedata.relegationleagues.keys()) self.assertIn('Landesliga Hannover', leaguedata.relegationleagues.keys()) self.assertIn('Landesliga Weser-Ems', leaguedata.relegationleagues.keys())
def parseDump(self, name, seasontitle, numteams, numgroups = 0, levelnum = 0): leaguetext = open('tests/wikidumps/%s.txt' % name, 'r').read() if seasontitle: seasontext = open('tests/wikidumps/%s.txt' % seasontitle, 'r').read() else: seasontext = None leaguedata = soccer.LeagueData(name) parser.getLeagueData(leaguetext, leaguedata) groups = None if seasontext: parser.getLeagueData(seasontext, leaguedata) groups = parser.getSeasonTeams(seasontext, leaguedata) if not groups: groups = parser.getSeasonTeams(leaguetext, leaguedata) self.assertEqual(leaguedata.numteams, numteams) if numgroups: self.assertEqual(len(groups), numgroups) if levelnum: self.assertEqual(leaguedata.levelnum, levelnum) return leaguedata
def test_parseScottishLeague(self): leagues = [('Scottish Premier League', 12), ('Scottish Football League First Division', 10), ('Scottish Football League Second Division', 10), ('Scottish Football League Third Division', 10)] for i in xrange(len(leagues)): leaguename = leagues[i][0] numteams = leagues[i][1] nextleaguename = leagues[i + 1][0] if i < len(leagues) - 1 else None seasonname = '2012–13 ' + leaguename.replace('Football League ', '') promotionleague = '' if i == 0 else leagues[i - 1][0] leaguepath = 'tests/wikidumps/' + wikiutils.titleToFilename(leaguename) + '.txt' seasonpath = 'tests/wikidumps/' + wikiutils.titleToFilename(seasonname) + '.txt' leaguedata = soccer.LeagueData(leaguename, promotionleague) parser.getLeagueData(open(leaguepath, 'r').read(), leaguedata) if not nextleaguename: self.assertEqual(len(leaguedata.relegationleagues), 0) else: self.assertEqual(len(leaguedata.relegationleagues), 1) self.assertIn(nextleaguename, leaguedata.relegationleagues.keys()) self.assertEqual(leaguedata.title, leaguename) self.assertEqual(leaguedata.promotionleague, promotionleague) groups = parser.getSeasonTeams(open(seasonpath, 'r').read(), leaguedata) self.assertEqual(len(groups), 1) teams = groups[0][1] self.checkLeagueData(numteams, leaguedata, teams)
def fetchLeagueData(specificLeague): try: load() except IOError as exc: if exc.errno == errno.ENOENT: print 'No previous progress - starting from the top.' Globals.progress.leagues = parser.getTopLeagues() Globals.progress.processedleagues = dict() save() else: raise if len(Globals.progress.processedleagues) == 0 and len(Globals.progress.leagues) == 0: print 'No progress - starting from the top.' Globals.progress.leagues = parser.getTopLeagues() Globals.progress.processedleagues = dict() save() while specificLeague or len(Globals.progress.leagues) > 0: if specificLeague: found = None for k in Globals.progress.leagues.keys(): if specificLeague in k: found = k leaguetitle = found leaguename, country, toplevelleague, confederationname = Globals.progress.leagues[found] break if not found: for k in Globals.progress.processedleagues.keys(): if specificLeague == k: found = k leaguetitle = found league = Globals.progress.processedleagues[found] country = league.country toplevelleague = league.toplevelleague confederationname = league.confederation break if not found: print >> sys.stderr, "I don't have league '%s' queued.\n" % specificLeague print >> sys.stderr, "%s\n" % Globals.progress.printQueuedLeagues() return else: leaguetitle = iter(Globals.progress.leagues).next() leaguename, country, toplevelleague, confederationname = Globals.progress.leagues[leaguetitle] promotionleague = None for processedleaguename, processedleague in Globals.progress.processedleagues.items(): if processedleague.relegationleagues and leaguetitle in processedleague.relegationleagues: promotionleague = processedleaguename break leaguedata = None rvtext = wikiutils.getPage(leaguetitle) if rvtext: """First get and parse the league text as it may contain a link to the current season. Then, try to complement any league data from the season page. Finally, try to get the team data, from the season link first if possible.""" leaguedata = soccer.LeagueData(leaguetitle, promotionleague, confederationname, country, toplevelleague) parser.getLeagueData(rvtext, leaguedata) if leaguedata.season: stext = wikiutils.getPage(leaguedata.season, True) else: stext = None if stext: parser.getLeagueData(stext, leaguedata) # overwrite levelnum from the wiki info as it seems to be unreliable (e.g. Venezuelan_Segunda_División) if not promotionleague: leaguedata.levelnum = 1 else: leaguedata.levelnum = Globals.progress.processedleagues[promotionleague].levelnum + 1 if Globals.fetchTeams: if stext: parser.getTeamData(stext, leaguedata) parser.getTeamData(rvtext, leaguedata) if leaguedata.hasTeams(): root = leaguedata.toXML() outdir = Globals.outputdir + wikiutils.titleToFilename(leaguedata.confederation) + '/' + country + '/' utils.mkdir_p(outdir) with open(outdir + wikiutils.titleToFilename(leaguedata.title) + '.xml', 'w') as f: f.write(etree.tostring(root, pretty_print=True)) if leaguedata.relegationleagues: for rln, rll in leaguedata.relegationleagues.items(): if rln not in Globals.progress.leagues: Globals.progress.leagues[rll] = (rln, country, toplevelleague, confederationname) print '%d following league(s): %s' % (len(leaguedata.relegationleagues), leaguedata.relegationleagues.keys()) else: print 'No following leagues.' else: print 'Failed to fetch teams.' else: print 'No revision text for league.' Globals.didSomething = True if leaguedata: Globals.progress.leagueProcessed(leaguedata) else: del Globals.progress.leagues[leaguetitle] save() if specificLeague: return