def parseSeason(name, seasonUrl): data = urllib2.urlopen(seasonUrl) soup = BeautifulSoup(data.read()) teams = [] divisions = [] divisionSet = set() currentDivision = None divisionLevels = dict() seasonId = None divisionLevel = 1 season = Season() season.name = name season.url = seasonUrl teamId = 1 divisionId = 1 for link in soup.find_all('a'): hrefVal = link.get('href') if hrefVal and hrefVal.startswith('display-schedule.php'): #print 'Team name is: ' + link.string + ' : ' + hrefVal if seasonId is None: seasonId = getSeason(hrefVal) team = Team() team.url = hrefVal team.name = link.string team.teamId = teamId teamId += 1 team.players = parseTeam(baseUrl + hrefVal) if (currentDivision) : currentDivision.teams.append(team) elif not hrefVal : test = link.string.split() division = test[1] #print 'Division: ' + division # Only process if division not in divisionSet: divisionSet.add(division) divisions.append(division) divisionLevels[division] = divisionLevel divisionLevel += 1 currentDivision = Division() currentDivision.level = divisionLevel divisionId += 1 currentDivision.divisionId =divisionId currentDivision.name = link.string season.divisions.append(currentDivision ) season.seasonId = seasonId return season
def get_team(self, team_type, attrib): try: team = Team() team.code = attrib['%s_team_code' % team_type] team.name = attrib['%s_fname' % team_type] team.mlb_id = int(attrib['%s_id' % team_type]) return team except: return None