Python ex_junk 예제들, nhlscrapi._tools.ex_junk Python 예제들

예제 #1

0

파일 보기

파일: officialsparser.py 프로젝트: deathreaperco6/nhlscrapi

def official_parser_pre_09(lx_doc):
  off_row = lx_doc.xpath('//td[contains(text(),"Referee")]')[0].xpath('..')[0]
  
  refs = ex_junk(off_row[1].xpath('.//text()'))
  lines = ex_junk(off_row[3].xpath('.//text()'))
    
  return __format_out(refs, lines)

예제 #2

0

파일 보기

파일: faceoffrep.py 프로젝트: tiagofassoni/nhlscrapi

    def __read_team_doc(lx_doc):
        fo = {}
        re_opp = re_comp_num_pos_name()

        took_draw = lx_doc.xpath(".//td[contains(@class,'playerHeading')]/..")
        for cent in took_draw:
            # extract info of center taking the draw
            rec = ex_junk(cent.xpath('.//text()'), containing=['\n', '\r'])

            num, pos, name = to_int(rec[0]), rec[1], ' '.join(
                ri.strip() for ri in reversed(rec[2].split(',')))
            fo[num] = FaceOffRep.__player_fo_rec(name, pos, rec[3:7])
            fo[num]['opps'] = {}

            for vs in cent.xpath('following-sibling::tr'):
                if vs.xpath(".//td[contains(@class,'space')]"):
                    break
                else:
                    opp_rec = ex_junk(vs.xpath('.//text()'),
                                      containing=['\n', '\r'])
                    reg_res = re_opp.findall(opp_rec[2])
                    opp_num, opp_pos, opp_last, opp_first = reg_res[
                        0] if reg_res else ('-1', '', '', '')
                    opp_name = ' '.join(oi.strip()
                                        for oi in [opp_first, opp_last])
                    fo[num]['opps'][to_int(
                        opp_num)] = FaceOffRep.__player_fo_rec(
                            opp_name, opp_pos, opp_rec[3:7])

        return fo

예제 #3

0

파일 보기

def official_parser_pre_09(lx_doc):
    off_row = lx_doc.xpath('//td[contains(text(),"Referee")]')[0].xpath(
        '..')[0]

    refs = ex_junk(off_row[1].xpath('.//text()'))
    lines = ex_junk(off_row[3].xpath('.//text()'))

    return __format_out(refs, lines)

예제 #4

0

파일 보기

파일: toirep.py 프로젝트: mbeaini8/nhlscrapper

    def __get_by_per_summ(self, per_summ):
        summ = {}
        while True:
            cl = per_summ.get('class')
            if cl is not None and cl.strip() in ['oddColor', 'evenColor']:
                txt = ex_junk(per_summ.xpath('.//text()'), ['\r', '\n'], [''])
                if txt:
                    per = to_int(txt[0])
                    per = per if per > 0 else 4 if txt[0] == 'OT' else 0
                    ps = {
                        'shifts': to_int(txt[1]),
                        'avg': self.__get_time(txt[2]),
                        'toi': self.__get_time(txt[3]),
                        'ev_toi': self.__get_time(txt[4]),
                        'pp_toi': self.__get_time(txt[5]),
                        'sh_toi': self.__get_time(txt[6])
                    }

                summ[per] = ps

            per_summ = per_summ.xpath('following-sibling::tr')
            if per_summ:
                per_summ = per_summ[0]
            else:
                break

        return summ, per_summ

예제 #5

0

파일 보기

파일: toirep.py 프로젝트: Cophy08/nhlscrapi

 def __get_by_per_summ(self, per_summ):
     summ = { }
     while True:
         cl = per_summ.get('class')
         if cl is not None and cl.strip() in ['oddColor', 'evenColor']:
             txt = ex_junk(per_summ.xpath('.//text()'), ['\r', '\n'], [''])
             if txt:
                 per = to_int(txt[0])
                 per = per if per > 0 else 4 if txt[0] == 'OT' else 0
                 ps = {
                     'shifts': to_int(txt[1]),
                     'avg': self.__get_time(txt[2]),
                     'toi': self.__get_time(txt[3]),
                     'ev_toi': self.__get_time(txt[4]),
                     'pp_toi': self.__get_time(txt[5]),
                     'sh_toi': self.__get_time(txt[6])
                 }
           
             summ[per] = ps
     
         per_summ = per_summ.xpath('following-sibling::tr')
         if per_summ:
             per_summ = per_summ[0]
         else:
             break
   
     return summ, per_summ

예제 #6

0

파일 보기

파일: officialsparser.py 프로젝트: deathreaperco6/nhlscrapi

def official_parser_10(lx_doc):
  off_table = lx_doc.xpath('//td[contains(text(),"Referee")]')[0].xpath('../..')[0]
  
  offs = ex_junk(off_table[1].xpath('.//text()'), ['\n','\r'])
  
  if len(offs) == 4:
    return __format_out(offs[:2], offs[2:])
  else:
    return __format_out(offs[:1], offs[1:])

예제 #7

0

파일 보기

def official_parser_10(lx_doc):
    off_table = lx_doc.xpath('//td[contains(text(),"Referee")]')[0].xpath(
        '../..')[0]

    offs = ex_junk(off_table[1].xpath('.//text()'), ['\n', '\r'])

    if len(offs) == 4:
        return __format_out(offs[:2], offs[2:])
    else:
        return __format_out(offs[:1], offs[1:])

예제 #8

0

파일 보기

파일: rosterrep.py 프로젝트: rhewett/nhlscrapi

 def parse_coaches(self):
   """Parse the home and away coaches"""
   
   lx_doc = self.html_doc()
   tr = lx_doc.xpath('//tr[@id="HeadCoaches"]')[0]
   
   for i, td in enumerate(tr):
     txt = td.xpath('.//text()')
     txt = ex_junk(txt, ['\n','\r'])
     team = 'away' if i == 0 else 'home'
     self.coaches[team] = txt[0]
     
   return self.coaches

예제 #9

0

파일 보기

파일: toirep.py 프로젝트: mbeaini8/nhlscrapper

    def __player_shifts(self, shift):
        parsed_shifts = []

        while True:
            sh_arr = ex_junk(shift.xpath('.//text()'), ['\r', '\n'], [''])
            parsed_shifts.append(self.__build_shift(sh_arr))

            # get next row
            shift = shift.xpath('following-sibling::tr')[0]
            cl = shift.get('class')
            if cl is None or cl.strip() not in ['oddColor', 'evenColor']:
                break

        return parsed_shifts, shift

예제 #10

0

파일 보기

파일: toirep.py 프로젝트: Cophy08/nhlscrapi

    def __player_shifts(self, shift):
        parsed_shifts = []
    
        while True:
            sh_arr = ex_junk(shift.xpath('.//text()'), ['\r','\n'], [''])
            parsed_shifts.append(self.__build_shift(sh_arr))

            # get next row
            shift = shift.xpath('following-sibling::tr')[0]
            cl = shift.get('class')
            if cl is None or cl.strip() not in ['oddColor', 'evenColor']:
                break
      
        return parsed_shifts, shift

예제 #11

0

파일 보기

파일: faceoffrep.py 프로젝트: deathreaperco6/nhlscrapi

    def __read_team_doc(lx_doc):
        fo = {}
        re_opp = re_comp_num_pos_name()

        took_draw = lx_doc.xpath(".//td[contains(@class,'playerHeading')]/..")
        for cent in took_draw:
            # extract info of center taking the draw
            rec = ex_junk(cent.xpath(".//text()"), containing=["\n", "\r"])

            num, pos, name = to_int(rec[0]), rec[1], " ".join(ri.strip() for ri in reversed(rec[2].split(",")))
            fo[num] = FaceOffRep.__player_fo_rec(name, pos, rec[3:7])
            fo[num]["opps"] = {}

            for vs in cent.xpath("following-sibling::tr"):
                if vs.xpath(".//td[contains(@class,'space')]"):
                    break
                else:
                    opp_rec = ex_junk(vs.xpath(".//text()"), containing=["\n", "\r"])
                    reg_res = re_opp.findall(opp_rec[2])
                    opp_num, opp_pos, opp_last, opp_first = reg_res[0] if reg_res else ("-1", "", "", "")
                    opp_name = " ".join(oi.strip() for oi in [opp_first, opp_last])
                    fo[num]["opps"][to_int(opp_num)] = FaceOffRep.__player_fo_rec(opp_name, opp_pos, opp_rec[3:7])

        return fo

예제 #12

0

파일 보기

파일: rosterrep.py 프로젝트: Cophy08/nhlscrapi

    def parse_coaches(self):
        """
        Parse the home and away coaches
        
        :returns: ``self`` on success, ``None`` otherwise
        """
        lx_doc = self.html_doc()
        tr = lx_doc.xpath('//tr[@id="HeadCoaches"]')[0]

        for i, td in enumerate(tr):
            txt = td.xpath(".//text()")
            txt = ex_junk(txt, ["\n", "\r"])
            team = "away" if i == 0 else "home"
            self.coaches[team] = txt[0]

        return self if self.coaches else None

예제 #13

0

파일 보기

    def parse_coaches(self):
        """
        Parse the home and away coaches

        :returns: ``self`` on success, ``None`` otherwise
        """
        lx_doc = self.html_doc()
        tr = lx_doc.xpath('//tr[@id="HeadCoaches"]')[0]

        for i, td in enumerate(tr):
            txt = td.xpath('.//text()')
            txt = ex_junk(txt, ['\n', '\r'])
            team = 'away' if i == 0 else 'home'
            self.coaches[team] = txt[0]

        return self if self.coaches else None

예제 #14

0

파일 보기

파일: rosterrep.py 프로젝트: deathreaperco6/nhlscrapi

    def __clean_pl_block(self, bl):
        def no_letter(s):
            s = s.strip()
            return '(C)' not in s and '(A)' not in s

        r = { }
        for p in bl:
            txt = p.xpath('.//text()')
            if len(txt) and '#' not in txt[0]:
                txt = ex_junk(txt, ['\r','\n'])
                txt[2] = ' '.join(s.strip() for s in txt[2].split(' ') if no_letter(s))

                # need some unique key
                num = int(txt[0]) if txt[0].isdigit() else len(r.keys())
                r[num] = { 'position': txt[1], 'name': txt[2] }

        return r

예제 #15

0

파일 보기

파일: rosterrep.py 프로젝트: Cophy08/nhlscrapi

    def __clean_pl_block(self, bl):
        def no_letter(s):
            s = s.strip()
            return "(C)" not in s and "(A)" not in s

        r = {}

        for p in bl:
            txt = p.xpath(".//text()")
            if len(txt) and "#" not in txt[0]:
                txt = ex_junk(txt, ["\r", "\n"])
                txt[2] = " ".join(s.strip() for s in txt[2].split(" ") if no_letter(s))

            # need some unique key
            num = int(txt[0]) if txt[0].isdigit() else max(r.keys()) + 1
            r[num] = {"position": txt[1], "name": txt[2]}

        return r

예제 #16

0

파일 보기

파일: rosterrep.py 프로젝트: mbeaini8/nhlscrapper

    def __clean_pl_block(self, bl):
        def no_letter(s):
            s = s.strip()
            return '(C)' not in s and '(A)' not in s

        r = {}
        for p in bl:
            txt = p.xpath('.//text()')
            if len(txt) and '#' not in txt[0]:
                txt = ex_junk(txt, ['\r', '\n'])
                txt[2] = ' '.join(s.strip() for s in txt[2].split(' ')
                                  if no_letter(s))

                # need some unique key
                num = int(txt[0]) if txt[0].isdigit() else len(r.keys())
                r[num] = {'position': txt[1], 'name': txt[2]}

        return r

예제 #17

0

파일 보기

def _rem(txt):
    return ex_junk(txt, containing=['\n', '\r'])

예제 #18

0

파일 보기

파일: eventsummrep.py 프로젝트: deathreaperco6/nhlscrapi

def _rem(txt):
    return ex_junk(txt, containing=['\n','\r'])