def official_parser_pre_09(lx_doc): off_row = lx_doc.xpath('//td[contains(text(),"Referee")]')[0].xpath('..')[0] refs = ex_junk(off_row[1].xpath('.//text()')) lines = ex_junk(off_row[3].xpath('.//text()')) return __format_out(refs, lines)
def __read_team_doc(lx_doc): fo = {} re_opp = re_comp_num_pos_name() took_draw = lx_doc.xpath(".//td[contains(@class,'playerHeading')]/..") for cent in took_draw: # extract info of center taking the draw rec = ex_junk(cent.xpath('.//text()'), containing=['\n', '\r']) num, pos, name = to_int(rec[0]), rec[1], ' '.join( ri.strip() for ri in reversed(rec[2].split(','))) fo[num] = FaceOffRep.__player_fo_rec(name, pos, rec[3:7]) fo[num]['opps'] = {} for vs in cent.xpath('following-sibling::tr'): if vs.xpath(".//td[contains(@class,'space')]"): break else: opp_rec = ex_junk(vs.xpath('.//text()'), containing=['\n', '\r']) reg_res = re_opp.findall(opp_rec[2]) opp_num, opp_pos, opp_last, opp_first = reg_res[ 0] if reg_res else ('-1', '', '', '') opp_name = ' '.join(oi.strip() for oi in [opp_first, opp_last]) fo[num]['opps'][to_int( opp_num)] = FaceOffRep.__player_fo_rec( opp_name, opp_pos, opp_rec[3:7]) return fo
def official_parser_pre_09(lx_doc): off_row = lx_doc.xpath('//td[contains(text(),"Referee")]')[0].xpath( '..')[0] refs = ex_junk(off_row[1].xpath('.//text()')) lines = ex_junk(off_row[3].xpath('.//text()')) return __format_out(refs, lines)
def __get_by_per_summ(self, per_summ): summ = {} while True: cl = per_summ.get('class') if cl is not None and cl.strip() in ['oddColor', 'evenColor']: txt = ex_junk(per_summ.xpath('.//text()'), ['\r', '\n'], ['']) if txt: per = to_int(txt[0]) per = per if per > 0 else 4 if txt[0] == 'OT' else 0 ps = { 'shifts': to_int(txt[1]), 'avg': self.__get_time(txt[2]), 'toi': self.__get_time(txt[3]), 'ev_toi': self.__get_time(txt[4]), 'pp_toi': self.__get_time(txt[5]), 'sh_toi': self.__get_time(txt[6]) } summ[per] = ps per_summ = per_summ.xpath('following-sibling::tr') if per_summ: per_summ = per_summ[0] else: break return summ, per_summ
def __get_by_per_summ(self, per_summ): summ = { } while True: cl = per_summ.get('class') if cl is not None and cl.strip() in ['oddColor', 'evenColor']: txt = ex_junk(per_summ.xpath('.//text()'), ['\r', '\n'], ['']) if txt: per = to_int(txt[0]) per = per if per > 0 else 4 if txt[0] == 'OT' else 0 ps = { 'shifts': to_int(txt[1]), 'avg': self.__get_time(txt[2]), 'toi': self.__get_time(txt[3]), 'ev_toi': self.__get_time(txt[4]), 'pp_toi': self.__get_time(txt[5]), 'sh_toi': self.__get_time(txt[6]) } summ[per] = ps per_summ = per_summ.xpath('following-sibling::tr') if per_summ: per_summ = per_summ[0] else: break return summ, per_summ
def official_parser_10(lx_doc): off_table = lx_doc.xpath('//td[contains(text(),"Referee")]')[0].xpath('../..')[0] offs = ex_junk(off_table[1].xpath('.//text()'), ['\n','\r']) if len(offs) == 4: return __format_out(offs[:2], offs[2:]) else: return __format_out(offs[:1], offs[1:])
def official_parser_10(lx_doc): off_table = lx_doc.xpath('//td[contains(text(),"Referee")]')[0].xpath( '../..')[0] offs = ex_junk(off_table[1].xpath('.//text()'), ['\n', '\r']) if len(offs) == 4: return __format_out(offs[:2], offs[2:]) else: return __format_out(offs[:1], offs[1:])
def parse_coaches(self): """Parse the home and away coaches""" lx_doc = self.html_doc() tr = lx_doc.xpath('//tr[@id="HeadCoaches"]')[0] for i, td in enumerate(tr): txt = td.xpath('.//text()') txt = ex_junk(txt, ['\n','\r']) team = 'away' if i == 0 else 'home' self.coaches[team] = txt[0] return self.coaches
def __player_shifts(self, shift): parsed_shifts = [] while True: sh_arr = ex_junk(shift.xpath('.//text()'), ['\r', '\n'], ['']) parsed_shifts.append(self.__build_shift(sh_arr)) # get next row shift = shift.xpath('following-sibling::tr')[0] cl = shift.get('class') if cl is None or cl.strip() not in ['oddColor', 'evenColor']: break return parsed_shifts, shift
def __player_shifts(self, shift): parsed_shifts = [] while True: sh_arr = ex_junk(shift.xpath('.//text()'), ['\r','\n'], ['']) parsed_shifts.append(self.__build_shift(sh_arr)) # get next row shift = shift.xpath('following-sibling::tr')[0] cl = shift.get('class') if cl is None or cl.strip() not in ['oddColor', 'evenColor']: break return parsed_shifts, shift
def __read_team_doc(lx_doc): fo = {} re_opp = re_comp_num_pos_name() took_draw = lx_doc.xpath(".//td[contains(@class,'playerHeading')]/..") for cent in took_draw: # extract info of center taking the draw rec = ex_junk(cent.xpath(".//text()"), containing=["\n", "\r"]) num, pos, name = to_int(rec[0]), rec[1], " ".join(ri.strip() for ri in reversed(rec[2].split(","))) fo[num] = FaceOffRep.__player_fo_rec(name, pos, rec[3:7]) fo[num]["opps"] = {} for vs in cent.xpath("following-sibling::tr"): if vs.xpath(".//td[contains(@class,'space')]"): break else: opp_rec = ex_junk(vs.xpath(".//text()"), containing=["\n", "\r"]) reg_res = re_opp.findall(opp_rec[2]) opp_num, opp_pos, opp_last, opp_first = reg_res[0] if reg_res else ("-1", "", "", "") opp_name = " ".join(oi.strip() for oi in [opp_first, opp_last]) fo[num]["opps"][to_int(opp_num)] = FaceOffRep.__player_fo_rec(opp_name, opp_pos, opp_rec[3:7]) return fo
def parse_coaches(self): """ Parse the home and away coaches :returns: ``self`` on success, ``None`` otherwise """ lx_doc = self.html_doc() tr = lx_doc.xpath('//tr[@id="HeadCoaches"]')[0] for i, td in enumerate(tr): txt = td.xpath(".//text()") txt = ex_junk(txt, ["\n", "\r"]) team = "away" if i == 0 else "home" self.coaches[team] = txt[0] return self if self.coaches else None
def parse_coaches(self): """ Parse the home and away coaches :returns: ``self`` on success, ``None`` otherwise """ lx_doc = self.html_doc() tr = lx_doc.xpath('//tr[@id="HeadCoaches"]')[0] for i, td in enumerate(tr): txt = td.xpath('.//text()') txt = ex_junk(txt, ['\n', '\r']) team = 'away' if i == 0 else 'home' self.coaches[team] = txt[0] return self if self.coaches else None
def __clean_pl_block(self, bl): def no_letter(s): s = s.strip() return '(C)' not in s and '(A)' not in s r = { } for p in bl: txt = p.xpath('.//text()') if len(txt) and '#' not in txt[0]: txt = ex_junk(txt, ['\r','\n']) txt[2] = ' '.join(s.strip() for s in txt[2].split(' ') if no_letter(s)) # need some unique key num = int(txt[0]) if txt[0].isdigit() else len(r.keys()) r[num] = { 'position': txt[1], 'name': txt[2] } return r
def __clean_pl_block(self, bl): def no_letter(s): s = s.strip() return "(C)" not in s and "(A)" not in s r = {} for p in bl: txt = p.xpath(".//text()") if len(txt) and "#" not in txt[0]: txt = ex_junk(txt, ["\r", "\n"]) txt[2] = " ".join(s.strip() for s in txt[2].split(" ") if no_letter(s)) # need some unique key num = int(txt[0]) if txt[0].isdigit() else max(r.keys()) + 1 r[num] = {"position": txt[1], "name": txt[2]} return r
def __clean_pl_block(self, bl): def no_letter(s): s = s.strip() return '(C)' not in s and '(A)' not in s r = {} for p in bl: txt = p.xpath('.//text()') if len(txt) and '#' not in txt[0]: txt = ex_junk(txt, ['\r', '\n']) txt[2] = ' '.join(s.strip() for s in txt[2].split(' ') if no_letter(s)) # need some unique key num = int(txt[0]) if txt[0].isdigit() else len(r.keys()) r[num] = {'position': txt[1], 'name': txt[2]} return r
def _rem(txt): return ex_junk(txt, containing=['\n', '\r'])
def _rem(txt): return ex_junk(txt, containing=['\n','\r'])