def get_svc_channels(self): url = "https://search.daum.net/search?DA=B3T&w=tot&rtmaxcoll=B3T&q={}" channelcate = ["지상파", "종합편성", "케이블", "스카이라이프", "해외위성", "라디오"] for c in channelcate: search_url = url.format(f"{c} 편성표") data = self.request(search_url, None, method="GET", output="html") soup = BeautifulSoup(data) if not soup.find_all(attrs={"disp-attr": "B3T"}): continue all_channels = [ str(x.text.strip()) for x in soup.select( 'div[id="channelNaviLayer"] > div[class="layer_tv layer_all scroll"] > div > ul > li' ) ] if not all_channels: all_channels += [ str(x.text.strip()) for x in soup.select('div[class="wrap_sub"] > span > a') ] svc_cate = c.replace("스카이라이프", "SKYLIFE") self.svc_channel_list += [{ "Name": x, "ServiceId": f"{svc_cate} {x}", "Category": c, } for x in all_channels]
def get_svc_channels(self): channelcate = [ { "name": "지상파", "category": "00" }, { "name": "스포츠/취미", "category": "01" }, { "name": "영화", "category": "02" }, { "name": "뉴스/경제", "category": "03" }, { "name": "교양/다큐", "category": "04" }, { "name": "여성/오락", "category": "05" }, { "name": "어린이/교육", "category": "06" }, { "name": "홈쇼핑", "category": "07" }, { "name": "공공/종교", "category": "08" }, ] p_name = re.compile(r".+(?=[(])") p_no = re.compile(r"(?<=Ch[.])\d+") p_svcid = re.compile(r"(?<=[('])\d+(?=[',])") url = "https://www.uplus.co.kr/css/chgi/chgi/RetrieveTvChannel.hpi" params = {"code": "12810"} for c in channelcate: params.update({"category": c["category"]}) soup = BeautifulSoup( self.request(url, params, method="GET", output="html")) for ch in soup.select('li > a[name="chList"]'): ch_txt = ch.text self.svc_channel_list.append({ "Name": p_name.search(ch_txt).group(), "No": str(p_no.search(ch_txt).group()), "ServiceId": p_svcid.search(ch["onclick"]).group(), "Category": c["name"], })
def get_svc_channels(self): url = "https://tv.kt.com/tv/channel/pChList.asp" params = {"ch_type": "1", "parent_menu_id": "0"} soup = BeautifulSoup( self.request(url, params, method="POST", output="html")) raw_channels = [ unquote(x.find("span", { "class": "ch" }).text.strip()) for x in soup.select("li > a") ] # 몇몇 채널은 (TV로만 제공, 유료채널) 웹에서 막혀있지만 실제로는 데이터가 있을 수 있다. self.svc_channel_list = [{ "Name": " ".join(x.split()[1:]), "No": str(x.split()[0]), "ServiceId": x.split()[0] } for x in raw_channels]
def get_programs(self, lazy_write=False): url = "https://tv.kt.com/tv/channel/pSchedule.asp" params = { "ch_type": "1", # 1: live 2: skylife 3: uhd live 4: uhd skylife "view_type": "1", # 1: daily 2: weekly "service_ch_no": "SVCID", "seldate": "EPGDATE", } for idx, _ch in enumerate(self.req_channels): log.info("%03d/%03d %s", idx + 1, len(self.req_channels), _ch) for nd in range(int(self.cfg["FETCH_LIMIT"])): day = date.today() + timedelta(days=nd) params.update({ "service_ch_no": _ch.svcid, "seldate": day.strftime("%Y%m%d") }) try: data = self.request(url, params, method="POST", output="html") soup = BeautifulSoup(data, parse_only=SoupStrainer("tbody")) for row in soup.find_all("tr"): cell = row.find_all("td") hour = cell[0].text.strip() for minute, program, category in zip( cell[1].find_all("p"), cell[2].find_all("p"), cell[3].find_all("p")): _prog = EPGProgram(_ch.id) _prog.stime = datetime.strptime( f"{str(day)} {hour}:{minute.text.strip()}", "%Y-%m-%d %H:%M") _prog.title = program.text.replace("방송중 ", "").strip() _prog.category = category.text.strip() for image in program.find_all("img", alt=True): grade = re.match(r"([\d,]+)", image["alt"]) _prog.rating = int( grade.group(1)) if grade else 0 _ch.programs.append(_prog) except Exception: log.exception("파싱 에러: %s", _ch) if not lazy_write: _ch.to_xml(self.cfg, no_endtime=self.no_endtime)
def get_programs(self, lazy_write=False): url = "https://search.daum.net/search?DA=B3T&w=tot&rtmaxcoll=B3T&q={}" for idx, _ch in enumerate(self.req_channels): log.info("%03d/%03d %s", idx + 1, len(self.req_channels), _ch) search_url = url.format(quote(_ch.svcid + " 편성표")) data = self.request(search_url, None, method="GET", output="html") soup = BeautifulSoup(data) if not soup.find_all(attrs={"disp-attr": "B3T"}): log.warning("EPG 정보가 없거나 없는 채널입니다: %s", _ch) continue days = soup.select( 'div[class="tbl_head head_type2"] > span > span[class="date"]') # 연도 추정 currdate = datetime.now() # 언제나 basedate보다 미래 basedate = datetime.strptime(days[0].text.strip(), "%m.%d").replace(year=currdate.year) if (basedate - currdate).days > 0: basedate = basedate.replace(year=basedate.year - 1) for nd, _ in enumerate(days): hours = soup.select( f'[id="tvProgramListWrap"] > table > tbody > tr > td:nth-of-type({nd+1})' ) if len(hours) != 24: log.warning("24개의 시간 행이 있어야 합니다: %s, 현재: %d", _ch, len(hours)) break for nh, hour in enumerate(hours): for dl in hour.select("dl"): _prog = EPGProgram(_ch.id) nm = int(dl.select("dt")[0].text.strip()) _prog.stime = basedate + timedelta( days=nd, hours=nh, minutes=nm) for atag in dl.select("dd > a"): _prog.title = atag.text.strip() # TODO: Get more details via daum search for span in dl.select("dd > span"): class_val = " ".join(span["class"]) if class_val == "": _prog.title = span.text.strip() elif "ico_re" in class_val: _prog.rebroadcast = True elif "ico_rate" in class_val: _prog.rating = int( class_val.split("ico_rate")[1].strip()) else: # ico_live ico_hd ico_subtitle ico_hand ico_uhd ico_talk ico_st _prog.extras.append(span.text.strip()) match = re.compile(self.title_regex).search( _prog.title) _prog.title = match.group("title") or None _prog.part_num = match.group("part") or None _prog.ep_num = match.group("epnum") or "" _prog.title_sub = match.group("subname1") or "" _prog.title_sub = match.group( "subname2") or _prog.title_sub if _prog.part_num: _prog.title += f" {_prog.part_num}부" _ch.programs.append(_prog) if not lazy_write: _ch.to_xml(self.cfg, no_endtime=self.no_endtime)
def get_programs(self, lazy_write=False): max_ndays = 4 if int(self.cfg["FETCH_LIMIT"]) > max_ndays: log.warning( """ *********************************************************************** %s는 당일포함 %d일치만 EPG를 제공하고 있습니다. *********************************************************************** """, self.provider_name, max_ndays, ) url = "http://m.skbroadband.com/content/realtime/Channel_List.do" params = {"key_depth2": "SVCID", "key_depth3": "EPGDATE"} for idx, _ch in enumerate(self.req_channels): log.info("%03d/%03d %s", idx + 1, len(self.req_channels), _ch) for nd in range(min(int(self.cfg["FETCH_LIMIT"]), max_ndays)): day = date.today() + timedelta(days=nd) params.update({ "key_depth2": _ch.svcid, "key_depth3": day.strftime("%Y%m%d") }) try: data = self.request(url, params, method="GET", output="html") data = re.sub("EUC-KR", "utf-8", data) data = re.sub("<!--(.*?)-->", "", data, 0, re.I | re.S) data = re.sub( '<span class="round_flag flag02">(.*?)</span>', "", data) data = re.sub( '<span class="round_flag flag03">(.*?)</span>', "", data) data = re.sub( '<span class="round_flag flag04">(.*?)</span>', "", data) data = re.sub( '<span class="round_flag flag09">(.*?)</span>', "", data) data = re.sub( '<span class="round_flag flag10">(.*?)</span>', "", data) data = re.sub( '<span class="round_flag flag11">(.*?)</span>', "", data) data = re.sub( '<span class="round_flag flag12">(.*?)</span>', "", data) data = re.sub('<strong class="hide">프로그램 안내</strong>', "", data) data = re.sub('<p class="cont">(.*)', partial(SK.replacement, tag="p"), data) data = re.sub('<p class="tit">(.*)', partial(SK.replacement, tag="p"), data) strainer = SoupStrainer("div", {"id": "uiScheduleTabContent"}) soup = BeautifulSoup(data, parse_only=strainer) for row in soup.find_all("li", {"class": "list"}): _prog = EPGProgram(_ch.id) _prog.stime = datetime.strptime( f"{str(day)} {row.find('p', {'class': 'time'}).text}", "%Y-%m-%d %H:%M") for itag in row.select('i[class="hide"]'): itxt = itag.text.strip() if "세 이상" in itxt: _prog.rating = int( itxt.replace("세 이상", "").strip()) else: _prog.extras.append(itxt) cell = row.find("p", {"class": "cont"}) if cell: if cell.find("span"): cell.span.decompose() _prog.title = cell.text.strip() matches = re.match(self.title_regex, _prog.title) if matches: _prog.title = matches.group(1) or "" _prog.title_sub = matches.group(5) or "" _prog.rebroadcast = bool(matches.group(7)) _prog.ep_num = matches.group(3) or "" _ch.programs.append(_prog) except Exception: log.exception("파싱 에러: %s", _ch) if not lazy_write: _ch.to_xml(self.cfg, no_endtime=self.no_endtime)
def get_programs(self, lazy_write=False): max_ndays = 5 if int(self.cfg["FETCH_LIMIT"]) > max_ndays: log.warning( """ *********************************************************************** %s는 당일포함 %d일치만 EPG를 제공하고 있습니다. *********************************************************************** """, self.provider_name, max_ndays, ) url = "http://www.uplus.co.kr/css/chgi/chgi/RetrieveTvSchedule.hpi" params = {"chnlCd": "SVCID", "evntCmpYmd": "EPGDATE"} for idx, _ch in enumerate(self.req_channels): log.info("%03d/%03d %s", idx + 1, len(self.req_channels), _ch) for nd in range(min(int(self.cfg["FETCH_LIMIT"]), max_ndays)): day = date.today() + timedelta(days=nd) params.update({ "chnlCd": _ch.svcid, "evntCmpYmd": day.strftime("%Y%m%d") }) try: data = self.request(url, params, method="POST", output="html") data = data.replace("<재>", "<재>").replace( " [..", "").replace(" (..", "") soup = BeautifulSoup(data, parse_only=SoupStrainer("table")) if not str(soup): log.warning("EPG 정보가 없거나 없는 채널입니다: %s", _ch) # 오늘 없으면 내일도 없는 채널로 간주 break for row in soup.find("table").tbody.find_all("tr"): cell = row.find_all("td") _prog = EPGProgram(_ch.id) _prog.stime = datetime.strptime( f"{str(day)} {cell[0].text}", "%Y-%m-%d %H:%M") for span in cell[1].select("span > span[class]"): span_txt = span.text.strip() if "cte_all" in span["class"]: _prog.rating = 0 if span_txt == "All" else int( span_txt) else: _prog.extras.append(span_txt) cell[1].find("span", {"class": "tagGroup"}).decompose() _prog.title = cell[1].text.strip() matches = re.match(self.title_regex, _prog.title) if matches: _prog.title = (matches.group(1) or "").strip() _prog.title_sub = (matches.group(2) or "").strip() _prog.ep_num = matches.group(3) or "" _prog.rebroadcast = bool(matches.group(4)) _prog.category = cell[2].text.strip() _ch.programs.append(_prog) except Exception: log.exception("파싱 에러: %s", _ch) if not lazy_write: _ch.to_xml(self.cfg, no_endtime=self.no_endtime)