def __init__(self): #setup teams dict from teams.json using dataDir self.teams = dataDir.getTeamData() # setup empty units by team dict self.unitsByTeam = {} self.unitsByTeamNumpy = {} for team in self.teams: self.unitsByTeam[team['nickname']] = [] self.unitsByTeamNumpy[team['nickname']] = [] # setup empty players by team dict self.playersByTeam = {} self.playersByTeamNumpy = {} for team in self.teams: players = team['roster'] self.playersByTeam[team['nickname']] = [] self.playersByTeamNumpy[team['nickname']] = [] for player in players: playerName = player['first_name'] + ' ' + player['last_name'] self.playersByTeam[team['nickname']].append([playerName, []]) # init bools to keep track of what we've parsed # we'll test against this before we parse through everything # # TODO - seupt a list of games that we've parsed and check against that # before we start parsing self.players_parsed = False self.units_parsed = False
elif teamAbbr == 'was': teamAbbr = 'wsh' for team in self.teamData: if teamAbbr == team['abbr']: self.currentTeamData = team return # get the html from the roster url def getRosterHtml(): roster_page = urllib2.urlopen(k_baseUrl) roster_page_html = roster_page.read() roster_page.close() # have to get rid of '&' symbols as it f***s things up roster_page_clean = '' for line in roster_page_html: if line != "&": roster_page_clean = roster_page_clean + line return roster_page_html def saveData(dataAsList): jsonRoster = json.dumps(dataAsList, sort_keys=True, indent=2) jsonFile = open(dataDir.k_rosterDir + 'leagueRoster.json', 'w') jsonFile.write(jsonRoster) jsonFile.close() # setup the parser, get the html, parse it, and save the new stuff parser = rosterHTMLParser() teamData = dataDir.getTeamData() newData = parser.start_parse(getRosterHtml(), teamData) saveTeamData(newData)
def handle_starttag(self, tag, attrs): if (tag == "a"): for attr in attrs: if attr[0] == "title" and self.isTeamLogo(attr[1]): self.tag_stack.append(tag) elif (tag == "img" and self.tag_stack): logo = [self.currentTeam, attrs[0][1]] self.logoList.append(logo) # when we reach an end tag, checks with tag stack def handle_endtag(self, tag): if self.tag_stack and tag == self.tag_stack[-1]: self.tag_stack.pop() # get the html from the logo url # return the html as a string def getLogoHtml(): logo_page = urllib2.urlopen(k_logoUrl) logo_page_html = logo_page.read() logo_page.close() return logo_page_html # setup the parser, get the html, parse it, and save the new stuff parser = logoHTMLParser() teamData = dataDir.getTeamData() logoList = parser.start_parse(getLogoHtml(), teamData) saveLogos(logoList)