Example #1
0
from globals import TeamStat, getInFile, marshal
import string
from lxml import etree
from whohas import whohas

tree = etree.parse(getInFile('raw/standings.html'),etree.HTMLParser())
standings=list()

for letter in string.ascii_uppercase[:8]:
  table=tree.xpath('//table[@summary="Group Group %c"]'%(letter))[0]
  for row in table.findall('./tbody/tr'):
    [tdTeam,tdPlayed,tdWins,tdDraws,tdLosses,tdGoalsFor,tdGoalsAgainst,tdPoints]=row.getchildren()
    ts = TeamStat()
    ts.group = letter
    ts.teamCode = tdTeam.find('.//img').get('src')[-7:-4].upper()
    ts.teamName = unicode(tdTeam.find('.//img').get('title').encode('latin1'),'utf-8') # decode and re-encode utf-8 string
    # previously: ts.teamName = tdTeam.find('.//img').get('title')
    ts.played = int(tdPlayed.text)
    ts.wins = int(tdWins.text)
    ts.draws = int(tdDraws.text)
    ts.losses = int(tdLosses.text)
    ts.goalsFor = int(tdGoalsFor.text)
    ts.goalsAgainst = int(tdGoalsAgainst.text)
    ts.goalsDiff = int(ts.goalsFor - ts.goalsAgainst)
    ts.points = int(tdPoints.text)
    
    standings.append(ts)

marshal(standings,'parsed/gss.pkl')
Example #2
0
    sys.exit(0) # MatchStat exists, abort parsing.
except IOError:
  pass # file not found means go on, the MatchStat does not yet exist! 

assert(ms.number == int(div.xpath('./div[@class="footer"]/div[@class="info"]/span')[0].text[6:]))

ms.group = div.xpath('./div[@class="footer"]/div[@class="info"]/span')[1].text
ms.group = re.search('\w[\w -]+\w', ms.group).group()
if 'GROUP ' in ms.group.upper():
  ms.group = ms.group[6:].upper()


whdate = div.xpath('./div[@class="footer"]/div[@class="info"]/span')[2].text
whtime = div.xpath('./div[@class="match"]/div[@class="time"]')[0].text
ms.when = re.search('\d\d Ju[nl][ey]', whdate).group() + ' ' + re.search('\d?\d:\d\d', whtime).group()
ms.homeCode = div.xpath('./div[@class="match"]/div[@class="teamH"]/div[@class="flag"]/a/img')[0].get('src')[-7:-4].upper()
ms.homeName = div.xpath('./div[@class="match"]/div[@class="teamH"]/div[@class="name"]/a')[0].text
ms.awayCode = div.xpath('./div[@class="match"]/div[@class="teamA"]/div[@class="flag"]/a/img')[0].get('src')[-7:-4].upper()
ms.awayName = div.xpath('./div[@class="match"]/div[@class="teamA"]/div[@class="name"]/a')[0].text

assert( ms.homeCode.isalpha() )
assert( ms.awayCode.isalpha() )
assert( ms.homeName[0].isalpha() )
assert( ms.awayName[0].isalpha() )
assert( ms.homeName[-1].isalpha() )
assert( ms.awayName[-1].isalpha() )

ms.hasResults = False

marshal(ms,'parsed/match%02d.pkl'%(ms.number))
Example #3
0
ms.goalsCode = goals
ms.homeGoals = homeGoals
ms.awayGoals = awayGoals
ms.homeCards = homeCards
ms.awayCards = awayCards
ms.hasResults = True

res = "%d:%d" % (len(homeGoals), len(awayGoals))
try:
    fg = "%d' -- %s" % min(goals)
except:
    fg = "(no goals)"
ycrc = "%d/%d : %d/%d -- total: %d" % (
    homeCards[0],
    homeCards[1],
    awayCards[0],
    awayCards[1],
    homeCards[2] + awayCards[2],
)
print "Results of match %d:\n  %s:%s  %s\n  yc/rc: %s\n  first goal: %s" % (
    ms.number,
    ms.homeCode,
    ms.awayCode,
    res,
    ycrc,
    fg,
)

marshal(ms, "parsed/match%02d.pkl" % (ms.number))