def parse_penalty_08(event): desc = event.desc.replace('\\', '') match = against_team_re.match(desc) against_team_abbr = team_abbr_parser(match.group('team_against')) event.participants = [] match = against_player_re.match(desc) if match: event.participants.append({ 'name': match.group('against_player_name'), 'num': int(match.group('against_player_number')), 'team': against_team_abbr, 'playerType': 'penaltyOn', }) match = serving_player_re.match(desc) if match: event.participants.append({ 'name': match.group('serving_player_name'), 'num': int(match.group('serving_player_number')), 'team': against_team_abbr, 'playerType': 'servedBy', }) match = drawn_player_re.match(desc) if match: event.participants.append({ 'name': match.group('drawn_player_name'), 'num': int(match.group('drawn_player_number')), 'team': team_abbr_parser(match.group('drawn_team')), 'playerType': 'drewBy', }) match = penalty_info_re.match(desc) if match: event.offence = match.group('offence').strip() event.length = int(match.group('penalty_length')) else: event.offence = 'Unknown' event.length = 2 try: event.severity = (match.group('penalty_class') or 'min') + 'or' except: event.severity = 'minor' match = zone_re.match(desc) if match: event.zone = match.group('zone')
def team_num_name(s): # error report 600 and error report 672 if '#' in s: tnn = s.split("#") team = team_abbr_parser(tnn[0].strip()) m = __num_name_re.search(tnn[1]) if m: name = m.group(2).strip() num = int(m.group(1)) if len(m.group(1)) > 0 else -1 else: num = -1 name = '' else: match_regex = r"(?P<team_abbr>[A-Z\.]{2,3})\s*(?P<player_num>[0-9]{1,2})\s*(?P<player_name>.+)" match = re.match(match_regex, s) team = match.group('team_abbr') num = int(match.group('player_num')) name = match.group('player_name') d = { "team": team, "num": num, "name": name } return d
def parse_goal_desc_08(event): event.is_penalty_shot = 'penalty' in event.desc.lower() if not event.is_penalty_shot: s = event.desc.split(":") # assists dictionary = { number, [ name, season total ] } a_d = { } if len(s) > 1: # assists by a = [si.strip() for si in s[1].split(";") if si.strip() != ""] for ai in a: a_l = assist_from(ai) a_d[a_l[0]] = a_l[1:3] event.assists = a_d s = s[0].split(",") s = [e.strip() for e in s if e not in ["Assists", "Assist", "A"]] else: s = event.desc.split(',') s = rem_penalty_shot_desc(s) # base case if len(s) > 3: event.shot_type = s[1] event.zone = s[2] event.dist = get_ft(s[3]) else: # this is really ugly try: event.dist = get_ft(s[-1]) if 'zone' in s[-2].lower(): event.zone = s[-2] else: event.shot_type = s[-2] except: if 'zone' in s[-1].lower(): event.zone = s[-2] else: event.shot_type = s[-2] scorer = s[0].split(" ") # account for two word last names if len(scorer) == 4: scorer[2] = scorer[2] + " " + scorer[3] num_str = scorer[1].replace('#','') pl_tot = [e.strip() for e in ' '.join(scorer[2:]).split("(")] event.shooter = { 'team': team_abbr_parser(scorer[0]), 'num': int(num_str) if num_str.isdigit() else -1, 'name': pl_tot[0] } pl_tot[1] = pl_tot[1].replace('(','').replace(')','') event.shooter_seas_tot = int(pl_tot[1]) if pl_tot[1].isdigit() else -1
def parse_goal_desc_08(event): event.is_penalty_shot = 'penalty' in event.desc.lower() if not event.is_penalty_shot: s = event.desc.split(":") # assists dictionary = { number, [ name, season total ] } a_d = {} if len(s) > 1: # assists by a = [si.strip() for si in s[1].split(";") if si.strip() != ""] for ai in a: a_l = assist_from(ai) a_d[a_l[0]] = a_l[1:3] event.assists = a_d s = s[0].split(",") s = [e.strip() for e in s if e not in ["Assists", "Assist", "A"]] else: s = event.desc.split(',') s = rem_penalty_shot_desc(s) # base case if len(s) > 3: event.shot_type = s[1] event.zone = s[2] event.dist = get_ft(s[3]) else: # this is really ugly try: event.dist = get_ft(s[-1]) if 'zone' in s[-2].lower(): event.zone = s[-2] else: event.shot_type = s[-2] except: if 'zone' in s[-1].lower(): event.zone = s[-2] else: event.shot_type = s[-2] scorer = s[0].split(" ") # account for two word last names if len(scorer) == 4: scorer[2] = scorer[2] + " " + scorer[3] num_str = scorer[1].replace('#', '') pl_tot = [e.strip() for e in ' '.join(scorer[2:]).split("(")] event.shooter = { 'team': team_abbr_parser(scorer[0]), 'num': int(num_str) if num_str.isdigit() else -1, 'name': pl_tot[0] } pl_tot[1] = pl_tot[1].replace('(', '').replace(')', '') event.shooter_seas_tot = int(pl_tot[1]) if pl_tot[1].isdigit() else -1
def team_num_name(s): tnn = s.split(" ") tnn[1] = rem_char(tnn[1], "#") tnn[1] = int(tnn[1]) if tnn[1].isdigit() else -1 return { "team": team_abbr_parser(tnn[0]), "num": tnn[1], "name": str(tnn[2] + (tnn[3] if len(tnn) > 3 else "")) # two word names }
def parse_takeaway_08(event): s = split_and_strip(event.desc, " - ") s[0] = s[0].replace('?', ' ') event.team = team_abbr_parser(s[0].split(" ")[0].strip()) s = split_and_strip(s[1], ",") tnn = team_num_name(str('team ' + s[0])) event.player_num = tnn["num"] event.player_name = tnn["name"] event.zone = s[1]
def parse_takeaway_08(event): s = split_and_strip(event.desc, " - ") s[0] = s[0].replace('?', ' ') event.team = team_abbr_parser(s[0].split(" ")[0].strip()) s = split_and_strip(s[1], ",") tnn = team_num_name(str('team ' + s[0])) event.player_num = tnn["num"] event.player_name = tnn["name"] event.zone = s[1]
def team_num_name(s): # error report 600 and error report 672 tnn = s.split("#") team = team_abbr_parser(tnn[0].strip()) m = __num_name_re.search(tnn[1]) if m: name = m.group(2).strip() num = int(m.group(1)) if len(m.group(1)) > 0 else -1 else: num = -1 name = '' d = {"team": team, "num": num, "name": name} return d
def team_num_name(s): # error report 600 and error report 672 tnn = s.split("#") team = team_abbr_parser(tnn[0].strip()) m = __num_name_re.search(tnn[1]) if m: name = m.group(2).strip() num = int(m.group(1)) if len(m.group(1)) > 0 else -1 else: num = -1 name = '' d = { "team": team, "num": num, "name": name } return d
def parse_takeaway_08(event): s = split_and_strip(event.desc, " - ") s[0] = s[0].replace('?', ' ') event.team = team_abbr_parser(s[0].split(" ")[0].strip()) s = split_and_strip(s[1], ",") tnn = team_num_name(str('team ' + s[0])) event.player_num = tnn["num"] event.player_name = tnn["name"] if len(s) > 1: event.zone = s[1] event.participants = (dict( name=event.player_name, num=event.player_num, team=event.team, playerType='playerID', # Butchering this for nhlscraper ),)
def parse_goal_desc_08(event): event.is_penalty_shot = 'penalty' in event.desc.lower() event.participants = [] match = team_for_re.match(event.desc) if match: team_for = team_abbr_parser(match.group('team_for')) match = shooter_re.match(event.desc) if match: shooter_number = int(match.group('shooter_number')) shooter_name = match.group('shooter_name') event.shooter = { 'name': shooter_name, 'num': shooter_number, 'team': team_for, 'playerType': 'scorer', } event.participants.append(event.shooter) event.scorer = event.shooter event.assists = [] match = a1_re.match(event.desc) if match: a1_number = int(match.group('a1_num')) a1_name = match.group('a1_name') a1 = { 'name': a1_name, 'num': a1_number, 'team': team_for, 'playerType': 'assist' } event.participants.append(a1) event.assists.append(a1) match = a2_re.match(event.desc) if match: a2_number = int(match.group('a2_num')) a2_name = match.group('a2_name') a2 = { 'name': a2_name, 'num': a2_number, 'team': team_for, 'playerType': 'assist' } event.participants.append(a2) event.assists.append(a2) match = zone_re.match(event.desc) if match: event.zone = match.group('zone') match = distance_re.match(event.desc) if match: event.dist = int(match.group('distance')) match = shot_type_re.match(event.desc) if match: event.shot_type = match.group('shot_type')
def parse_scoring_summary(self): lx_doc = self.html_doc() main = lx_doc.xpath('//*[@id="MainTable"]')[0] scr_summ = main.xpath('child::tr[4]//tr') for r in scr_summ: #print r.get('class') if r.get('class') in ['oddColor','evenColor']: tds = r.xpath('./td') scr = [td.xpath('text()') for td in tds[:8]] # goal summry data goals = { } # goal num, game state, scoring team gn = to_int(scr[0][0]) if scr[0] else -1 period = self.__period(scr[1]) time = split_time(scr[2][0] if period < 4 else '0:00') strength = self.__strength(scr[3][0] if scr[3] else 'EV') team = team_abbr_parser(scr[4][0]) # skaters on the ice sks = tds[8:] goals[gn] = { 'per': period, 'time': time, 'strength': strength, 'team': team, 'home': self.__skaters(sks[0]), 'away': self.__skaters(sks[1]) } scorer = self.__scorer(scr[5][0]) if scorer['num'] in goals[gn][ assists = [] for s in scr[6:8]: if s and s[0] != u'\xa0': #print s[0], self.__scorer(s[0]) assists.append(self.__scorer(s[0])) print { 'goal_num': gn, 'scorer': scorer, 'assists': assists } def __period(self, scr): period = 0 if scr: if scr[0] == 'SO': period = 5 elif scr[0] == 'OT': period = 4 else: period = to_int(scr[0]) return period def __strength(self, sg_str): if 'PP' in sg_str: return Strength.PP elif 'SH' in sg_str: return Strength.PP else: return Strength.Even def __position(self, long_name): return ''.join(s[0] for s in long_name.split(' ')) def __scorer(self, num_name_tot): nnt = num_name_tot.replace('(',' ').replace(')','') nnt_l = nnt.split(' ') return { 'num': to_int(nnt_l[0]), 'name': nnt_l[1].split('.')[1].strip(), 'seas_tot': to_int(nnt_l[2]) if len(nnt_l) == 3 else -1 } def __skaters(td): sk_d = { } for sk in td.xpath('./font'): pos_pl = sk.get('title').split(' - ') num = to_int(sk.xpath('text()')[0]) if num > 0: sk_d[num] = { 'pos': self.__position(pos_pl[0]), 'name': pos_pl[1] } return sk_d
def parse_scoring_summary(self): lx_doc = self.html_doc() main = lx_doc.xpath('//*[@id="MainTable"]')[0] scr_summ = main.xpath('child::tr[4]//tr') for r in scr_summ: print r.get('class') if r.get('class') in ['oddColor','evenColor']: tds = r.xpath('./td') scr = [td.xpath('text()') for td in tds[:8]] # goal summry data goals = { } # goal num, game state, scoring team gn = to_int(scr[0][0]) if scr[0] else -1 period = self.__period(scr[1]) time = split_time(scr[2][0] if period < 4 else '0:00') strength = self.__strength(scr[3][0] if scr[3] else 'EV') team = team_abbr_parser(scr[4][0]) # skaters on the ice sks = tds[8:] goals[gn] = { 'per': period, 'time': time, 'strength': strength, 'team': team, 'home': self.__skaters(sks[0]), 'away': self.__skaters(sks[1]) } scorer = self.__scorer(scr[5][0]) if scorer['num'] in goals[gn][ assists = [] for s in scr[6:8]: if s and s[0] != u'\xa0': print s[0], self.__scorer(s[0]) assists.append(self.__scorer(s[0])) print { 'goal_num': gn, 'scorer': scorer, 'assists': assists } def __period(self, scr): period = 0 if scr: if scr[0] == 'SO': period = 5 elif scr[0] == 'OT': period = 4 else: period = to_int(scr[0]) return period def __strength(self, sg_str): if 'PP' in sg_str: return Strength.PP elif 'SH' in sg_str: return Strength.PP else: return Strength.Even def __position(self, long_name): return ''.join(s[0] for s in long_name.split(' ')) def __scorer(self, num_name_tot): nnt = num_name_tot.replace('(',' ').replace(')','') nnt_l = nnt.split(' ') return { 'num': to_int(nnt_l[0]), 'name': nnt_l[1].split('.')[1].strip(), 'seas_tot': to_int(nnt_l[2]) if len(nnt_l) == 3 else -1 } def __skaters(td): sk_d = { } for sk in td.xpath('./font'): pos_pl = sk.get('title').split(' - ') num = to_int(sk.xpath('text()')[0]) if num > 0: sk_d[num] = { 'pos': self.__position(pos_pl[0]), 'name': pos_pl[1] } return sk_d
def parseSummary(self): teams = {} PP = {} EV = {} self.__open() lx_doc = self.html_src main = lx_doc.xpath('//*[@id="MainTable"]')[0] text_file = open("Output.txt", "w") text_file.write(etree.tostring(main, pretty_print=True)) text_file.close() away_team = main.find('.//table[@id="Visitor"]') away_team = away_team.findall('.//td[@align="center"]')[-1].text home_team = main.find('.//table[@id="Home"]') home_team = home_team.findall('.//td[@align="center"]')[-1].text teams['home'] = ABB[home_team] teams['away'] = ABB[away_team] PP[teams['home']] = 0 PP[teams['away']] = 0 EV[teams['home']] = 0 EV[teams['away']] = 0 scr_summ = main.xpath('child::tr[4]//tr') for r in scr_summ: if r.get('class') in ['oddColor', 'evenColor']: tds = r.xpath('./td') scr = [td.xpath('text()') for td in tds[:8]] try: if scr[3][0] == 'EV': EV[team_abbr_parser(scr[4][0])] += 1 except Exception: continue try: if scr[3][0] == 'PP': PP[team_abbr_parser(scr[4][0])] += 1 except Exception: continue pen_sum = main.find('.//table[@id="PenaltySummary"]') test = pen_sum.findall('.//table') tbl_list = [] txtList = [ "1.txt", "2.txt", "3.txt", "4.txt", "5.txt", "6.txt", "7.txt", "8.txt" ] i = 0 for t in test: if len(t.xpath('.//td[text()="TOT (PN-PIM)"]')) == 1: if len( t.xpath( './/td[text()="Power Plays (Goals-Opp./PPTime)"]') ) != 0: s = lxml.html.tostring(t) text_file = open(txtList[i], "w") i += 1 text_file.write(s) text_file.close() tbl_list.append(t) #test: a_pen = tbl_list[0].findall('.//td[@align="left"]') h_pen = tbl_list[1].findall('.//td[@align="left"]') away_pen = a_pen[-1].text home_pen = h_pen[-1].text away_pen = int(away_pen[away_pen.index('-') + 1:away_pen.index('/')]) home_pen = int(home_pen[home_pen.index('-') + 1:home_pen.index('/')]) PP[teams['home']] = (home_pen, PP[teams['home']]) PP[teams['away']] = (away_pen, PP[teams['away']]) self.PP = PP self.EV = EV self.teams = teams