def delete(self, content_type, data_hash, file_name): content_type = self.normalize(content_type) file_name = self.normalize(file_name) key = content_type + '/' + data_hash + '/' + file_name result = {key: memcache.delete(key)} content_type = py_urllib.unquote(content_type) if IMAGE_TYPES.match(content_type): thumbnail_key = key + THUMB_SUFFIX result[thumbnail_key] = memcache.delete(thumbnail_key) if 'application/json' in self.request.headers.get('Accept'): self.response.headers['Content-Type'] = 'application/json' s = self.json_stringify(result) self.response.write(s)
def get(self, content_type, data_hash, file_name): content_type = self.normalize(content_type) file_name = self.normalize(file_name) key = content_type + '/' + data_hash + '/' + file_name data = memcache.get(key) if data is None: return self.error(404) # Prevent browsers from MIME-sniffing the content-type: self.response.headers['X-Content-Type-Options'] = 'nosniff' content_type = py_urllib.unquote(content_type) if not IMAGE_TYPES.match(content_type): # Force a download dialog for non-image types: content_type = 'application/octet-stream' elif file_name.endswith(THUMB_SUFFIX): content_type = 'image/png' self.response.headers['Content-Type'] = content_type # Cache for the expiration time: self.response.headers['Cache-Control'] = 'public,max-age=%d' \ % EXPIRATION_TIME self.response.write(data)
def generate(): option = ModelSetting.get('trans_option').strip().split(' ') source = request.args.get('source') source = py_urllib.unquote(source) logger.debug(source) startTime = time.time() buffer = [] sentBurst = False ffmpeg_command = ['ffmpeg', "-loglevel", "quiet", "-i", source ] + option #, '-vcodec', 'libx264, "-c:a", "aac", "-b:a", "128k", "-f", "mpegts", "-tune", "zerolatency", "pipe:stdout"] logger.debug('command : %s', ffmpeg_command) process = subprocess.Popen(ffmpeg_command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, bufsize=-1) global process_list #process_list.append(process) #ix = len(process_list)-1 while True: line = process.stdout.read(1024) buffer.append(line) if sentBurst is False and time.time() > startTime + 1 and len( buffer) > 0: sentBurst = True for i in range(0, len(buffer) - 2): yield buffer.pop(0) elif time.time() > startTime + 1 and len(buffer) > 0: yield buffer.pop(0) process.poll() if isinstance(process.returncode, int): if process.returncode > 0: logger.debug('FFmpeg Error :%s', process.returncode) break
def handle_upload(self): results = [] for name, fieldStorage in self.request.POST.items(): if type(fieldStorage) is unicode: continue result = {} result['name'] = py_urllib.unquote(fieldStorage.filename) result['type'] = fieldStorage.type result['size'] = self.get_file_size(fieldStorage.file) if self.validate(result): key, thumbnail_key = self.write_blob(fieldStorage.value, result) if key is not None: result['url'] = self.request.host_url + '/' + key result['deleteUrl'] = result['url'] result['deleteType'] = 'DELETE' if thumbnail_key is not None: result['thumbnailUrl'] = self.request.host_url +\ '/' + thumbnail_key else: result['error'] = 'Failed to store uploaded file.' results.append(result) return results
def __get_download_list(html, tree, site_instance, item): download_list = [] try: if 'DOWNLOAD_REGEX' not in site_instance.info: return download_list #logger.debug(html) #tmp = html.find('a href="https://www.rgtorrent.me/bbs/download.php') #if tmp != -1: # logger.debug(html[tmp-300:tmp+300]) #logger.debug(site_instance.info['DOWNLOAD_REGEX']) tmp = re.compile(site_instance.info['DOWNLOAD_REGEX'], re.MULTILINE).finditer(html) for t in tmp: #logger.debug(t.group('url')) #logger.debug(t.group('filename')) if t.group('filename').strip() == '': continue entity = {} entity['link'] = py_urllib.unquote( t.group('url').strip()).strip() entity['link'] = unescape(entity['link']) logger.debug(entity['link']) entity['filename'] = py_urllib.unquote( t.group('filename').strip()) entity['filename'] = unescape(entity['filename']) if 'DOWNLOAD_URL_SUB' in site_instance.info: logger.debug(entity['link']) entity['link'] = re.sub( site_instance.info['DOWNLOAD_URL_SUB'][0], site_instance.info['DOWNLOAD_URL_SUB'][1].format( URL=site_instance.info['TORRENT_SITE_URL']), entity['link']).strip() if not entity['link'].startswith('http'): form = '%s%s' if entity['link'].startswith( '/') else '%s/%s' entity['link'] = form % ( site_instance.info['TORRENT_SITE_URL'], entity['link']) if 'FILENAME_SUB' in site_instance.info: entity['filename'] = re.sub( site_instance.info['FILENAME_SUB'][0], site_instance.info['FILENAME_SUB'][1], entity['filename']).strip() exist = False for tt in download_list: if tt['link'] == entity['link']: exist = True break if not exist: if app.config['config']['is_sjva_server'] and len( item['magnet']) > 0: # or True: try: ext = os.path.splitext( entity['filename'])[1].lower() #item['magnet'] if ext in ['.smi', '.srt', '.ass']: #if True: import io if 'USE_SELENIUM' in site_instance.info[ 'EXTRA']: from system import SystemLogicSelenium driver = SystemLogicSelenium.get_driver() driver.get(entity['link']) import time time.sleep(10) files = SystemLogicSelenium.get_downloaded_files( ) logger.debug(files) # 파일확인 filename_no_ext = os.path.splitext( entity['filename'].split('/')[-1]) file_index = 0 for idx, value in enumerate(files): if value.find( filename_no_ext[0]) != -1: file_index = idx break logger.debug('fileindex : %s', file_index) content = SystemLogicSelenium.get_file_content( files[file_index]) byteio = io.BytesIO() byteio.write(content) else: data = LogicFromSite.get_html( entity['link'], referer=item['url'], stream=True) byteio = io.BytesIO() for chunk in data.iter_content(1024): byteio.write(chunk) from discord_webhook import DiscordWebhook, DiscordEmbed webhook_url = app.config['config'][ 'rss_subtitle_webhook'] text = '%s\n<%s>' % (item['title'], item['url']) webhook = DiscordWebhook(url=webhook_url, content=text) webhook.add_file(file=byteio.getvalue(), filename=entity['filename']) response = webhook.execute() discord = response.json() logger.debug(discord) if 'attachments' in discord: entity['direct_url'] = discord[ 'attachments'][0]['url'] except Exception as e: logger.debug('Exception:%s', e) logger.debug(traceback.format_exc()) download_list.append(entity) return download_list except Exception as e: logger.debug('Exception:%s', e) logger.debug(traceback.format_exc()) return download_list
def __get_bbs_list(site_instance, board, max_page, max_id, xpath_dict, is_test=False): bbs_list = [] index_step = xpath_dict[ 'INDEX_STEP'] if 'INDEX_STEP' in xpath_dict else 1 index_start = xpath_dict[ 'INDEX_START'] if 'INDEX_START' in xpath_dict else 1 stop_by_maxid = False if 'FORCE_FIRST_PAGE' in site_instance.info['EXTRA']: max_page = 1 cookie = None if 'COOKIE' in site_instance.info: cookie = site_instance.info['COOKIE'] for p in range(max_page): url = LogicFromSite.get_board_url(site_instance, board, str(p + 1)) list_tag = xpath_dict['XPATH'][:xpath_dict['XPATH'].find('[%s]')] #list_tag = '/html/body/main/div/div/div[3]/div/table/tbody' logger.debug('list_tag : %s', list_tag) logger.debug('Url : %s', url) if 'USE_SELENIUM' in site_instance.info['EXTRA']: from system import SystemLogicSelenium tmp = SystemLogicSelenium.get_pagesoruce_by_selenium( url, list_tag) else: tmp = LogicFromSite.get_html(url, cookie=cookie) #logger.debug(tmp) tree = html.fromstring(tmp) #tree = html.fromstring(LogicFromSite.get_html(url))) lists = tree.xpath(list_tag) logger.debug('Count : %s', len(lists)) for i in range(index_start, len(lists) + 1, index_step): try: a_tag = tree.xpath(xpath_dict['XPATH'] % i) a_tag_index = len(a_tag) - 1 if a_tag_index == -1: logger.debug('a_tag_index : %s', a_tag_index) continue item = {} # if 'TITLE_XPATH' in xpath_dict: #logger.debug(a_tag[a_tag_index].xpath(xpath_dict['TITLE_XPATH'])) if xpath_dict['TITLE_XPATH'].endswith('text()'): logger.debug(a_tag[a_tag_index].xpath( xpath_dict['TITLE_XPATH'])) item['title'] = py_urllib.unquote( a_tag[a_tag_index].xpath( xpath_dict['TITLE_XPATH'])[-1]).strip() else: item['title'] = py_urllib.unquote( a_tag[a_tag_index].xpath( xpath_dict['TITLE_XPATH']) [0].text_content()).strip() else: item['title'] = py_urllib.unquote( a_tag[a_tag_index].text_content()).strip() if 'TITLE_SUB' in xpath_dict: item['title'] = re.sub(xpath_dict['TITLE_SUB'][0], xpath_dict['TITLE_SUB'][1], item['title']).strip() # 일반적이 제목 처리 후 정규식이 있으면 추출 if 'TITLE_REGEX' in xpath_dict: match = re.compile(xpath_dict['TITLE_REGEX']).search( item['title']) if match: item['title'] = match.group('title') item['url'] = a_tag[a_tag_index].attrib['href'] if 'DETAIL_URL_SUB' in site_instance.info: #item['url'] = item['url'].replace(site_instance.info['DETAIL_URL_RULE'][0], site_instance.info['DETAIL_URL_RULE'][1].format(URL=site_instance.info['TORRENT_SITE_URL'])) item['url'] = re.sub( site_instance.info['DETAIL_URL_SUB'][0], site_instance.info['DETAIL_URL_SUB'][1].format( URL=site_instance.info['TORRENT_SITE_URL']), item['url']) if not item['url'].startswith('http'): form = '%s%s' if item['url'].startswith( '/') else '%s/%s' item['url'] = form % ( site_instance.info['TORRENT_SITE_URL'], item['url']) item['id'] = '' if 'ID_REGEX' in site_instance.info: id_regexs = [site_instance.info['ID_REGEX']] #id_regexs.insert(0, site_instance.info['ID_REGEX']) else: id_regexs = [ r'wr_id\=(?P<id>\d+)', r'\/(?P<id>\d+)\.html', r'\/(?P<id>\d+)$' ] for regex in id_regexs: match = re.compile(regex).search(item['url']) if match: item['id'] = match.group('id') break if item['id'] == '': for regex in id_regexs: match = re.compile(regex).search( item['url'].split('?')[0]) if match: item['id'] = match.group('id') break logger.debug('ID : %s, TITLE : %s', item['id'], item['title']) if item['id'].strip() == '': continue if is_test: bbs_list.append(item) else: if 'USING_BOARD_CHAR_ID' in site_instance.info[ 'EXTRA']: # javdb from .model import ModelBbs2 entity = ModelBbs2.get( site=site_instance.info['NAME'], board=board, board_char_id=item['id']) if entity is None: bbs_list.append(item) logger.debug('> Append..') else: logger.debug('> exist..') else: # 2019-04-04 토렌트퐁 try: if 'NO_BREAK_BY_MAX_ID' in site_instance.info[ 'EXTRA']: if int(item['id']) <= max_id: continue else: bbs_list.append(item) else: if int(item['id']) <= max_id: logger.debug('STOP by MAX_ID(%s)', max_id) stop_by_maxid = True break bbs_list.append(item) #logger.debug(item) except Exception as e: logger.error('Exception:%s', e) logger.error(traceback.format_exc()) except Exception as e: logger.error('Exception:%s', e) logger.error(traceback.format_exc()) logger.error(site_instance.info) if stop_by_maxid: break logger.debug('Last count :%s', len(bbs_list)) return bbs_list
def get_show_info_on_home(root): try: tags = root.xpath('//*[@id="tvpColl"]/div[2]/div/div[1]/span/a') if len(tags) != 1: return entity = {} entity['title'] = tags[0].text match = re.compile(r'q\=(?P<title>.*?)&').search( tags[0].attrib['href']) if match: entity['title'] = py_urllib.unquote(match.group('title')) entity['id'] = re.compile(r'irk\=(?P<id>\d+)').search( tags[0].attrib['href']).group('id') entity['status'] = 0 tags = root.xpath('//*[@id="tvpColl"]/div[2]/div/div[1]/span/span') if len(tags) == 1: if tags[0].text == u'방송종료': entity['status'] = 1 elif tags[0].text == u'방송예정': entity['status'] = 2 tags = root.xpath('//*[@id="tvpColl"]/div[2]/div/div[1]/div') entity['extra_info'] = tags[0].text_content().strip() tags = root.xpath('//*[@id="tvpColl"]/div[2]/div/div[1]/div/a') entity['studio'] = tags[0].text tags = root.xpath('//*[@id="tvpColl"]/div[2]/div/div[1]/div/span') entity['extra_info_array'] = [tag.text for tag in tags] entity['broadcast_info'] = entity['extra_info_array'][-2].strip() entity['broadcast_term'] = entity['extra_info_array'][-1].split( ',')[-1].strip() entity['year'] = re.compile(r'(?P<year>\d{4})').search( entity['extra_info_array'][-1]).group('year') #시리즈 entity['series'] = [] entity['series'].append({ 'title': entity['title'], 'id': entity['id'], 'year': entity['year'] }) tags = root.xpath('//*[@id="tv_series"]/div/ul/li') if tags: # 2019-03-05 시리즈 더보기 존재시 try: more = root.xpath('//*[@id="tv_series"]/div/div/a') if more: url = more[0].attrib['href'] if not url.startswith('http'): url = 'https://search.daum.net/search%s' % url logger.debug('MORE URL : %s', url) if more[0].xpath('span')[0].text == u'시리즈 더보기': more_root = Logic.get_lxml_by_url(url) tags = more_root.xpath('//*[@id="series"]/ul/li') except Exception as exception: logger.error('Exception:%s', exception) logger.error(traceback.format_exc()) for tag in tags: dic = {} dic['title'] = tag.xpath('a')[0].text dic['id'] = re.compile(r'irk\=(?P<id>\d+)').search( tag.xpath('a')[0].attrib['href']).group('id') if tag.xpath('span'): dic['date'] = tag.xpath('span')[0].text dic['year'] = re.compile(r'(?P<year>\d{4})').search( dic['date']).group('year') else: dic['year'] = None entity['series'].append(dic) entity['series'] = sorted(entity['series'], key=lambda k: int(k['id'])) #동명 entity['equal_name'] = [] tags = root.xpath( u'//div[@id="tv_program"]//dt[contains(text(),"동명 콘텐츠")]//following-sibling::dd' ) if tags: tags = tags[0].xpath('*') for tag in tags: if tag.tag == 'a': dic = {} dic['title'] = tag.text dic['id'] = re.compile(r'irk\=(?P<id>\d+)').search( tag.attrib['href']).group('id') elif tag.tag == 'span': match = re.compile( r'\((?P<studio>.*?),\s*(?P<year>\d{4})?\)').search( tag.text) if match: dic['studio'] = match.group('studio') dic['year'] = match.group('year') elif tag.text == u'(동명프로그램)': entity['equal_name'].append(dic) elif tag.text == u'(동명회차)': continue #logger.debug(entity) return entity except Exception as exception: logger.error('Exception:%s', exception) logger.error(traceback.format_exc())
def api(sub): if sub == 'url.m3u8': try: mode = request.args.get('m') source = request.args.get('s') source_id = request.args.get('i') quality = request.args.get('q') logger.debug('m:%s, s:%s, i:%s', mode, source, source_id) action, ret = LogicKlive.get_url(source, source_id, quality, mode) #logger.debug('action:%s, url:%s', action, ret) if mode == 'plex': #new_url = '%s/klive/api/url.m3u8?m=web_play&s=%s&i=%s&q=%s' % (SystemModelSetting.get('ddns'), source, source_id, quality) new_url = '%s/klive/api/url.m3u8?m=url&s=%s&i=%s&q=%s' % (SystemModelSetting.get('ddns'), source, source_id, quality) #logger.debug(SystemModelSetting.get_bool('auth_use_apikey')) if SystemModelSetting.get_bool('auth_use_apikey'): new_url += '&apikey=%s' % SystemModelSetting.get('auth_apikey') def generate(): startTime = time.time() buffer = [] sentBurst = False if platform.system() == 'Windows': path_ffmpeg = os.path.join(path_app_root, 'bin', platform.system(), 'ffmpeg.exe') else: path_ffmpeg = 'ffmpeg' #ffmpeg_command = [path_ffmpeg, "-i", new_url, "-c", "copy", "-f", "mpegts", "-tune", "zerolatency", "pipe:stdout"] #ffmpeg_command = [path_ffmpeg, "-i", new_url, "-c:v", "copy", "-c:a", "aac", "-b:a", "128k", "-f", "mpegts", "-tune", "zerolatency", "pipe:stdout"] # 2020-12-17 by 잠자 ffmpeg_command = [path_ffmpeg, "-loglevel", "quiet", "-i", new_url, "-c:v", "copy", "-c:a", "aac", "-b:a", "128k", "-f", "mpegts", "-tune", "zerolatency", "pipe:stdout"] #logger.debug('command : %s', ffmpeg_command) process = subprocess.Popen(ffmpeg_command, stdout = subprocess.PIPE, stderr = subprocess.STDOUT, bufsize = -1) global process_list process_list.append(process) while True: line = process.stdout.read(1024) buffer.append(line) if sentBurst is False and time.time() > startTime + 1 and len(buffer) > 0: sentBurst = True for i in range(0, len(buffer) - 2): yield buffer.pop(0) elif time.time() > startTime + 1 and len(buffer) > 0: yield buffer.pop(0) process.poll() if isinstance(process.returncode, int): if process.returncode > 0: logger.debug('FFmpeg Error :%s', process.returncode) break return Response(stream_with_context(generate()), mimetype = "video/MP2T") if action == 'redirect': return redirect(ret, code=302) elif action == 'return_after_read': logger.warning('return_after_read') data = LogicKlive.get_return_data(source, source_id, ret, mode) #logger.debug('Data len : %s', len(data)) return data, 200, {'Content-Type': 'application/vnd.apple.mpegurl'} elif action == 'return': return ret if ret == None: return if mode == 'url.m3u8': return redirect(ret, code=302) elif mode == 'lc': return ret except Exception as e: logger.error('Exception:%s', e) logger.error(traceback.format_exc()) elif sub == 'm3uall': return LogicKlive.get_m3uall() elif sub == 'm3u': data = LogicKlive.get_m3u(m3u_format=request.args.get('format'), group=request.args.get('group'), call=request.args.get('call')) if request.args.get('file') == 'true': import framework.common.util as CommonUtil basename = 'klive_custom.m3u' filename = os.path.join(path_data, 'tmp', basename) CommonUtil.write_file(data, filename) return send_file(filename, as_attachment=True, attachment_filename=basename) else: return data elif sub == 'm3utvh': return LogicKlive.get_m3u(for_tvh=True, m3u_format=request.args.get('format'), group=request.args.get('group')) elif sub == 'redirect': try: url = request.args.get('url') proxy = request.args.get('proxy') proxies = None if proxy is not None: proxy = py_urllib.unquote(proxy) proxies={"https": proxy, 'http':proxy} url = py_urllib.unquote(url) #logger.debug('REDIRECT:%s', url) #logger.warning(f"redirect : {url}") # 2021-06-03 """ res = requests.get(url, proxies=proxies) data = res.content return data, 200, {'Content-Type':res.headers['Content-Type']} """ headers = {'Connection' : 'keep-alive'} r = requests.get(url, headers=headers, stream=True, proxies=proxies) rv = Response(r.iter_content(chunk_size=1024), r.status_code, content_type=r.headers['Content-Type'], direct_passthrough=True) return rv except Exception as e: logger.error('Exception:%s', e) logger.error(traceback.format_exc()) elif sub == 'url.mpd': try: mode = request.args.get('m') source = request.args.get('s') source_id = request.args.get('i') quality = request.args.get('q') return_format = 'json' data = LogicKlive.get_play_info(source, source_id, quality, mode=mode, return_format=return_format) return jsonify(data) except Exception as e: logger.error('Exception:%s', e) logger.error(traceback.format_exc()) elif sub == 'url.strm': try: mode = request.args.get('m') source = request.args.get('s') source_id = request.args.get('i') quality = request.args.get('q') return_format = 'strm' data = LogicKlive.get_play_info(source, source_id, quality, mode=mode, return_format=return_format) #return data import framework.common.util as CommonUtil from .model import ModelCustom db_item = ModelCustom.get(source, source_id) if db_item is not None: basename = '%s.strm' % db_item.title else: basename = '%s.strm' % source_id filename = os.path.join(path_data, 'tmp', basename) CommonUtil.write_file(data, filename) return send_file(filename, as_attachment=True, attachment_filename=basename) #return data except Exception as e: logger.error('Exception:%s', e) logger.error(traceback.format_exc()) elif sub == 'sinaplayer': data = LogicKlive.get_m3u_for_sinaplayer() return data
def info_detail(cls, code, entity): try: #https://movie.naver.com/movie/bi/mi/detail.nhn?code=182205 url = 'https://movie.naver.com/movie/bi/mi/detail.nhn?code=%s' % code[ 2:] #logger.debug(url) root = html.fromstring(requests.get(url).text) tags = root.xpath('//ul[@class="lst_people"]/li') if tags: for tag in tags: actor = EntityActor('', site=cls.site_name) tmp = tag.xpath('.//img')[0].attrib['src'] match = re.search(r'src\=(?P<url>.*?)\&', tmp) if match: actor.thumb = py_urllib.unquote(match.group('url')) actor.name = tag.xpath( './/div[@class="p_info"]/a')[0].attrib['title'] tmp = tag.xpath('.//div[@class="p_info"]/em') if tmp: actor.originalname = tmp[0].text_content() tmp = tag.xpath( './/div[@class="p_info"]//p[@class="pe_cmt"]/span') if tmp: actor.role = tmp[0].text_content().replace(u'역', '').strip() entity.actor.append(actor) tags = root.xpath( '//div[@class="director"]//div[@class="dir_obj"]') if tags: for tag in tags: tmp = tag.xpath('.//div[@class="dir_product"]/a') if tmp: entity.director.append(tmp[0].attrib['title']) # tags = root.xpath('//div[@class="staff"]//tr[1]//span') if tags: for tag in tags: tmp = tag.xpath('.//a') if tmp: entity.credits.append(tmp[0].text_content().strip()) else: entity.credits.append(tag.text.strip()) tags = root.xpath('//div[@class="agency"]/dl') if tags: tmp1 = tags[0].xpath('.//dt') tmp2 = tags[0].xpath('.//dd') for idx, tag in enumerate(tmp1): if tag.text_content().strip() == u'제작': tmp = tmp2[idx].xpath('.//a') entity.studio = tmp[0].text_content().strip( ) if tmp else tmp2[idx].text_content().strip() except Exception as exception: logger.error('Exception:%s', exception) logger.error(traceback.format_exc())
def get_show_info_on_home(cls, root): try: tags = root.xpath('//*[@id="tvpColl"]/div[2]/div/div[1]/span/a') # 2019-05-13 #일밤- 미스터리 음악쇼 복면가왕 A 태그 2개 if len(tags) < 1: return tag_index = len(tags) - 1 #entity = {} entity = EntitySearchItemTvDaum(cls.site_name) entity.title = tags[tag_index].text match = re.compile(r'q\=(?P<title>.*?)&').search( tags[tag_index].attrib['href']) if match: entity.title = py_urllib.unquote(match.group('title')) entity.code = cls.module_char + cls.site_char + re.compile( r'irk\=(?P<id>\d+)').search( tags[tag_index].attrib['href']).group('id') tags = root.xpath('//*[@id="tvpColl"]/div[2]/div/div[1]/span/span') if len(tags) == 1: if tags[0].text == u'방송종료' or tags[0].text == u'완결': entity.status = 2 elif tags[0].text == u'방송예정': entity.status = 0 #entity.image_url = 'https:' + root.xpath('//*[@id="tv_program"]/div[1]/div[1]/a/img')[0].attrib['src'] # 악동탐정스 시즌2 try: entity.image_url = cls.process_image_url( root.xpath('//*[@id="tv_program"]/div[1]/div[1]/a/img') [0].attrib['src']) except: entity.image_url = None #logger.debug('get_show_info_on_home status: %s', entity.status) tags = root.xpath('//*[@id="tvpColl"]/div[2]/div/div[1]/div') entity.extra_info = SiteUtil.change_html( tags[0].text_content().strip()) #logger.debug('get_show_info_on_home extra_info: %s', entity.extra_info) tags = root.xpath('//*[@id="tvpColl"]/div[2]/div/div[1]/div/a') if len(tags) == 1: entity.studio = tags[0].text else: tags = root.xpath( '//*[@id="tvpColl"]/div[2]/div/div[1]/div/span[1]') if len(tags) == 1: entity.studio = tags[0].text #logger.debug('get_show_info_on_home studio: %s', entity.studio) tags = root.xpath('//*[@id="tvpColl"]/div[2]/div/div[1]/div/span') extra_infos = [tag.text_content() for tag in tags] logger.debug(extra_infos) #tmps = extra_infos[1].strip().split(' ') # 2021-11-03 # 홍루몽. 중국 방송사는 a 태그가 없기 떄문에 방송사가 장르가 되어버린다. entity.genre = extra_infos[0] if extra_infos[1] in [ '미국드라마', '중국드라마', '영국드라마', '일본드라마', '대만드라마', '기타국가드라마' ]: entity.genre = extra_infos[1] entity.studio = extra_infos[0] if entity.genre in [ '미국드라마', '중국드라마', '영국드라마', '일본드라마', '대만드라마', '기타국가드라마' ]: entity.status = 1 #logger.debug(tmps) #if len(tmps) == 2: try: entity.episode = int( re.compile(r'(?P<epi>\d{1,4})%s' % u'부').search( entity.extra_info).group('epi')) except: entity.episode = -1 entity.broadcast_info = extra_infos[-2].strip().replace( ' ', ' ').replace(' ', ' ') entity.broadcast_term = extra_infos[-1].split(',')[-1].strip() try: entity.year = re.compile(r'(?P<year>\d{4})').search( extra_infos[-1]).group('year') except: entity.year = 0 entity.desc = root.xpath( '//*[@id="tv_program"]/div[1]/dl[1]/dd/text()')[0] #logger.debug('get_show_info_on_home 1: %s', entity['status']) #시리즈 entity.series = [] try: tmp = entity.broadcast_term.split('.') if len(tmp) == 2: entity.series.append({ 'title': entity.title, 'code': entity.code, 'year': entity.year, 'status': entity.status, 'date': '%s.%s' % (tmp[0], tmp[1]) }) else: entity.series.append({ 'title': entity.title, 'code': entity.code, 'year': entity.year, 'status': entity.status, 'date': '%s' % (entity.year) }) except Exception as exception: logger.debug('Not More!') logger.debug(traceback.format_exc()) tags = root.xpath('//*[@id="tv_series"]/div/ul/li') if tags: # 2019-03-05 시리즈 더보기 존재시 try: more = root.xpath('//*[@id="tv_series"]/div/div/a') if more: url = more[0].attrib['href'] if not url.startswith('http'): url = 'https://search.daum.net/search%s' % url #logger.debug('MORE URL : %s', url) if more[0].xpath('span')[0].text == u'시리즈 더보기': #more_root = HTML.ElementFromURL(url) more_root = SiteUtil.get_tree( url, proxy_url=SystemModelSetting.get( 'site_daum_proxy'), headers=cls.default_headers, cookies=SystemLogicSite.get_daum_cookies()) tags = more_root.xpath('//*[@id="series"]/ul/li') except Exception as exception: logger.debug('Not More!') logger.debug(traceback.format_exc()) find_1900 = False for tag in tags: dic = {} dic['title'] = tag.xpath('a')[0].text #logger.debug(dic['title']) dic['code'] = cls.module_char + cls.site_char + re.compile( r'irk\=(?P<id>\d+)').search( tag.xpath('a')[0].attrib['href']).group('id') if tag.xpath('span'): # 년도 없을 수 있음 dic['date'] = tag.xpath('span')[0].text if dic['date'] is None: dic['date'] = '1900' find_1900 = True else: dic['year'] = re.compile( r'(?P<year>\d{4})').search( dic['date']).group('year') else: dic['year'] = None entity.series.append(dic) # 뒷 시즌이 code가 더 적은 경우 있음. csi 라스베가스 # 2021-03-29 전지적 짝사랑 시점 if find_1900 or entity.year == 0: entity.series = sorted(entity.series, key=lambda k: int(k['code'][2:])) else: # 2021-06-06 펜트하우스3. 2는 2021.2로 나오고 3은 2021로만 나와서 00이 붙어 3이 위로 가버림 # 같은 년도는 코드로... """ for item in entity.series: tmp = item['date'].split('.') if len(tmp) == 2: item['sort_value'] = int('%s%s' % (tmp[0],tmp[1].zfill(2))) elif len(tmp) == 1: item['sort_value'] = int('%s00' % tmp[0]) entity.series = sorted(entity.series, key=lambda k: k['sort_value']) """ for item in entity.series: tmp = item['date'].split('.') if len(tmp) == 2: item['sort_value'] = int(tmp[0]) elif len(tmp) == 1: item['sort_value'] = int(tmp[0]) entity.series = sorted( entity.series, key=lambda k: (k['sort_value'], int(k['code'][2:]))) #동명 entity.equal_name = [] tags = root.xpath( u'//div[@id="tv_program"]//dt[contains(text(),"동명 콘텐츠")]//following-sibling::dd' ) if tags: tags = tags[0].xpath('*') for tag in tags: if tag.tag == 'a': dic = {} dic['title'] = tag.text dic['code'] = cls.module_char + cls.site_char + re.compile( r'irk\=(?P<id>\d+)').search( tag.attrib['href']).group('id') elif tag.tag == 'span': match = re.compile( r'\((?P<studio>.*?),\s*(?P<year>\d{4})?\)').search( tag.text) if match: dic['studio'] = match.group('studio') dic['year'] = match.group('year') elif tag.text == u'(동명프로그램)': entity.equal_name.append(dic) elif tag.text == u'(동명회차)': continue #logger.debug(entity) return entity.as_dict() except Exception as exception: logger.debug('Exception get_show_info_by_html : %s', exception) logger.debug(traceback.format_exc())
def process_image_url(cls, url): tmps = url.split('fname=') if len(tmps) == 2: return py_urllib.unquote(tmps[1]) else: return 'https' + url
def normalize(self, str): return py_urllib.quote(py_urllib.unquote(str), '')