Ejemplo n.º 1
0
def official_parser_pre_09(lx_doc):
  off_row = lx_doc.xpath('//td[contains(text(),"Referee")]')[0].xpath('..')[0]
  
  refs = ex_junk(off_row[1].xpath('.//text()'))
  lines = ex_junk(off_row[3].xpath('.//text()'))
    
  return __format_out(refs, lines)
Ejemplo n.º 2
0
    def __read_team_doc(lx_doc):
        fo = {}
        re_opp = re_comp_num_pos_name()

        took_draw = lx_doc.xpath(".//td[contains(@class,'playerHeading')]/..")
        for cent in took_draw:
            # extract info of center taking the draw
            rec = ex_junk(cent.xpath('.//text()'), containing=['\n', '\r'])

            num, pos, name = to_int(rec[0]), rec[1], ' '.join(
                ri.strip() for ri in reversed(rec[2].split(',')))
            fo[num] = FaceOffRep.__player_fo_rec(name, pos, rec[3:7])
            fo[num]['opps'] = {}

            for vs in cent.xpath('following-sibling::tr'):
                if vs.xpath(".//td[contains(@class,'space')]"):
                    break
                else:
                    opp_rec = ex_junk(vs.xpath('.//text()'),
                                      containing=['\n', '\r'])
                    reg_res = re_opp.findall(opp_rec[2])
                    opp_num, opp_pos, opp_last, opp_first = reg_res[
                        0] if reg_res else ('-1', '', '', '')
                    opp_name = ' '.join(oi.strip()
                                        for oi in [opp_first, opp_last])
                    fo[num]['opps'][to_int(
                        opp_num)] = FaceOffRep.__player_fo_rec(
                            opp_name, opp_pos, opp_rec[3:7])

        return fo
Ejemplo n.º 3
0
def official_parser_pre_09(lx_doc):
    off_row = lx_doc.xpath('//td[contains(text(),"Referee")]')[0].xpath(
        '..')[0]

    refs = ex_junk(off_row[1].xpath('.//text()'))
    lines = ex_junk(off_row[3].xpath('.//text()'))

    return __format_out(refs, lines)
Ejemplo n.º 4
0
    def __get_by_per_summ(self, per_summ):
        summ = {}
        while True:
            cl = per_summ.get('class')
            if cl is not None and cl.strip() in ['oddColor', 'evenColor']:
                txt = ex_junk(per_summ.xpath('.//text()'), ['\r', '\n'], [''])
                if txt:
                    per = to_int(txt[0])
                    per = per if per > 0 else 4 if txt[0] == 'OT' else 0
                    ps = {
                        'shifts': to_int(txt[1]),
                        'avg': self.__get_time(txt[2]),
                        'toi': self.__get_time(txt[3]),
                        'ev_toi': self.__get_time(txt[4]),
                        'pp_toi': self.__get_time(txt[5]),
                        'sh_toi': self.__get_time(txt[6])
                    }

                summ[per] = ps

            per_summ = per_summ.xpath('following-sibling::tr')
            if per_summ:
                per_summ = per_summ[0]
            else:
                break

        return summ, per_summ
Ejemplo n.º 5
0
 def __get_by_per_summ(self, per_summ):
     summ = { }
     while True:
         cl = per_summ.get('class')
         if cl is not None and cl.strip() in ['oddColor', 'evenColor']:
             txt = ex_junk(per_summ.xpath('.//text()'), ['\r', '\n'], [''])
             if txt:
                 per = to_int(txt[0])
                 per = per if per > 0 else 4 if txt[0] == 'OT' else 0
                 ps = {
                     'shifts': to_int(txt[1]),
                     'avg': self.__get_time(txt[2]),
                     'toi': self.__get_time(txt[3]),
                     'ev_toi': self.__get_time(txt[4]),
                     'pp_toi': self.__get_time(txt[5]),
                     'sh_toi': self.__get_time(txt[6])
                 }
           
             summ[per] = ps
     
         per_summ = per_summ.xpath('following-sibling::tr')
         if per_summ:
             per_summ = per_summ[0]
         else:
             break
   
     return summ, per_summ
Ejemplo n.º 6
0
def official_parser_10(lx_doc):
  off_table = lx_doc.xpath('//td[contains(text(),"Referee")]')[0].xpath('../..')[0]
  
  offs = ex_junk(off_table[1].xpath('.//text()'), ['\n','\r'])
  
  if len(offs) == 4:
    return __format_out(offs[:2], offs[2:])
  else:
    return __format_out(offs[:1], offs[1:])
Ejemplo n.º 7
0
def official_parser_10(lx_doc):
    off_table = lx_doc.xpath('//td[contains(text(),"Referee")]')[0].xpath(
        '../..')[0]

    offs = ex_junk(off_table[1].xpath('.//text()'), ['\n', '\r'])

    if len(offs) == 4:
        return __format_out(offs[:2], offs[2:])
    else:
        return __format_out(offs[:1], offs[1:])
Ejemplo n.º 8
0
 def parse_coaches(self):
   """Parse the home and away coaches"""
   
   lx_doc = self.html_doc()
   tr = lx_doc.xpath('//tr[@id="HeadCoaches"]')[0]
   
   for i, td in enumerate(tr):
     txt = td.xpath('.//text()')
     txt = ex_junk(txt, ['\n','\r'])
     team = 'away' if i == 0 else 'home'
     self.coaches[team] = txt[0]
     
   return self.coaches
Ejemplo n.º 9
0
    def __player_shifts(self, shift):
        parsed_shifts = []

        while True:
            sh_arr = ex_junk(shift.xpath('.//text()'), ['\r', '\n'], [''])
            parsed_shifts.append(self.__build_shift(sh_arr))

            # get next row
            shift = shift.xpath('following-sibling::tr')[0]
            cl = shift.get('class')
            if cl is None or cl.strip() not in ['oddColor', 'evenColor']:
                break

        return parsed_shifts, shift
Ejemplo n.º 10
0
    def __player_shifts(self, shift):
        parsed_shifts = []
    
        while True:
            sh_arr = ex_junk(shift.xpath('.//text()'), ['\r','\n'], [''])
            parsed_shifts.append(self.__build_shift(sh_arr))

            # get next row
            shift = shift.xpath('following-sibling::tr')[0]
            cl = shift.get('class')
            if cl is None or cl.strip() not in ['oddColor', 'evenColor']:
                break
      
        return parsed_shifts, shift
Ejemplo n.º 11
0
    def __read_team_doc(lx_doc):
        fo = {}
        re_opp = re_comp_num_pos_name()

        took_draw = lx_doc.xpath(".//td[contains(@class,'playerHeading')]/..")
        for cent in took_draw:
            # extract info of center taking the draw
            rec = ex_junk(cent.xpath(".//text()"), containing=["\n", "\r"])

            num, pos, name = to_int(rec[0]), rec[1], " ".join(ri.strip() for ri in reversed(rec[2].split(",")))
            fo[num] = FaceOffRep.__player_fo_rec(name, pos, rec[3:7])
            fo[num]["opps"] = {}

            for vs in cent.xpath("following-sibling::tr"):
                if vs.xpath(".//td[contains(@class,'space')]"):
                    break
                else:
                    opp_rec = ex_junk(vs.xpath(".//text()"), containing=["\n", "\r"])
                    reg_res = re_opp.findall(opp_rec[2])
                    opp_num, opp_pos, opp_last, opp_first = reg_res[0] if reg_res else ("-1", "", "", "")
                    opp_name = " ".join(oi.strip() for oi in [opp_first, opp_last])
                    fo[num]["opps"][to_int(opp_num)] = FaceOffRep.__player_fo_rec(opp_name, opp_pos, opp_rec[3:7])

        return fo
Ejemplo n.º 12
0
    def parse_coaches(self):
        """
        Parse the home and away coaches
        
        :returns: ``self`` on success, ``None`` otherwise
        """
        lx_doc = self.html_doc()
        tr = lx_doc.xpath('//tr[@id="HeadCoaches"]')[0]

        for i, td in enumerate(tr):
            txt = td.xpath(".//text()")
            txt = ex_junk(txt, ["\n", "\r"])
            team = "away" if i == 0 else "home"
            self.coaches[team] = txt[0]

        return self if self.coaches else None
Ejemplo n.º 13
0
    def parse_coaches(self):
        """
        Parse the home and away coaches

        :returns: ``self`` on success, ``None`` otherwise
        """
        lx_doc = self.html_doc()
        tr = lx_doc.xpath('//tr[@id="HeadCoaches"]')[0]

        for i, td in enumerate(tr):
            txt = td.xpath('.//text()')
            txt = ex_junk(txt, ['\n', '\r'])
            team = 'away' if i == 0 else 'home'
            self.coaches[team] = txt[0]

        return self if self.coaches else None
Ejemplo n.º 14
0
    def __clean_pl_block(self, bl):
        def no_letter(s):
            s = s.strip()
            return '(C)' not in s and '(A)' not in s

        r = { }
        for p in bl:
            txt = p.xpath('.//text()')
            if len(txt) and '#' not in txt[0]:
                txt = ex_junk(txt, ['\r','\n'])
                txt[2] = ' '.join(s.strip() for s in txt[2].split(' ') if no_letter(s))

                # need some unique key
                num = int(txt[0]) if txt[0].isdigit() else len(r.keys())
                r[num] = { 'position': txt[1], 'name': txt[2] }

        return r
Ejemplo n.º 15
0
    def __clean_pl_block(self, bl):
        def no_letter(s):
            s = s.strip()
            return "(C)" not in s and "(A)" not in s

        r = {}

        for p in bl:
            txt = p.xpath(".//text()")
            if len(txt) and "#" not in txt[0]:
                txt = ex_junk(txt, ["\r", "\n"])
                txt[2] = " ".join(s.strip() for s in txt[2].split(" ") if no_letter(s))

            # need some unique key
            num = int(txt[0]) if txt[0].isdigit() else max(r.keys()) + 1
            r[num] = {"position": txt[1], "name": txt[2]}

        return r
Ejemplo n.º 16
0
    def __clean_pl_block(self, bl):
        def no_letter(s):
            s = s.strip()
            return '(C)' not in s and '(A)' not in s

        r = {}
        for p in bl:
            txt = p.xpath('.//text()')
            if len(txt) and '#' not in txt[0]:
                txt = ex_junk(txt, ['\r', '\n'])
                txt[2] = ' '.join(s.strip() for s in txt[2].split(' ')
                                  if no_letter(s))

                # need some unique key
                num = int(txt[0]) if txt[0].isdigit() else len(r.keys())
                r[num] = {'position': txt[1], 'name': txt[2]}

        return r
Ejemplo n.º 17
0
def _rem(txt):
    return ex_junk(txt, containing=['\n', '\r'])
Ejemplo n.º 18
0
def _rem(txt):
    return ex_junk(txt, containing=['\n','\r'])