Beispiel #1
0
 def get_lxml_by_url(url):
     try:
         from system import SystemLogicSite
         return SystemLogicSite.get_tree_daum(url)
     except Exception as exception: 
         logger.error('Exception:%s', exception)
         logger.error(traceback.format_exc())
Beispiel #2
0
    def check_filename(filename):
        logger.debug('check_filename filename : %s', filename)
        try:
            ret = None
            match1 = re.compile(_REGEX_FILENAME).match(filename)
            match2 = re.compile(_REGEX_FILENAME_NO_EPISODE_NUMBER).match(filename)

            for regex in [_REGEX_FILENAME, _REGEX_FILENAME_NO_EPISODE_NUMBER]:
                match = re.compile(regex).match(filename)
                if match:
                    logger.debug('QQQQQQQQQQQ')
                    ret = {}
                    ret['title'] = match1.group('name')
                    ret['no'] = match1.group('no')
                    ret['date'] = match1.group('date')
                    ret['etc'] = match1.group('etc').replace('.', '')
                    ret['quality'] = match1.group('quality')
                    ret['release'] = None
                    if 'release' in match1.groupdict():
                        ret['release'] = match1.group('release')
                    else:
                        ret['release'] = None
                    if ret['no'] is not None and ret['no'] != '': 
                        ret['no'] = int(ret['no'])
                    else: 
                        ret['no'] = -1
                    return DaumTV.change_filename_continous_episode(ret)
        except Exception as exception: 
            logger.error('Exception:%s', exception)
            logger.error(traceback.format_exc())
Beispiel #3
0
    def get_html(url):
        try:
            from system import SystemLogicSite
            data = SystemLogicSite.get_text_daum(url)
            return data

        except Exception as exception: 
            logger.error('Exception:%s', exception)
            logger.error(traceback.format_exc())
Beispiel #4
0
 def get_lxml_by_url(url):
     try:
         from framework.common.daum import headers, session
         from system.logic_site import SystemLogicSite
         res = session.get(url, headers=headers, cookies=SystemLogicSite.get_daum_cookies())
         data = res.content
         root = lxml.html.fromstring(data)
         return root
     except Exception as e:
         logger.error('Exception:%s', e)
         logger.error(traceback.format_exc())
Beispiel #5
0
    def get_show_info_on_home_title(title, daum_id=None):
        try:
            title = title.replace(u'[종영]', '')
            if daum_id is None:
                url = 'https://search.daum.net/search?q=%s' % (py_urllib.quote(title.encode('utf8')))
            else:
                url = 'https://search.daum.net/search?q=%s&irk=%s&irt=tv-program&DA=TVP' % (py_urllib.quote(title.encode('utf8')), daum_id)

            return DaumTV.get_lxml_by_url(url)
        except Exception as exception: 
            logger.error('Exception:%s', exception)
            logger.error(traceback.format_exc())
Beispiel #6
0
 def get_html(url):
     try:
         #from . import headers, cookies
         #res = Logic.session.get(url, headers=headers, cookies=cookies)
         from framework.common.daum import headers, session
         from system.logic_site import SystemLogicSite
         res = session.get(url, headers=headers, cookies=SystemLogicSite.get_daum_cookies())
         data = res.content
         return data
     except Exception as e:
         logger.error('Exception:%s', e)
         logger.error(traceback.format_exc())
Beispiel #7
0
 def get_show_info(title, no=None, date=None):
     try:
         # Home
         title = DaumTV.get_search_name_from_original(title)
         url = 'https://search.daum.net/search?q=%s' % (py_urllib.quote(title.encode('utf8')))
         data = DaumTV.get_html(url)
         root = lxml.html.fromstring(data)
         home_info = DaumTV.get_show_info_on_home(root)
         tv = DaumTV.get_daum_tv_info(title)
         ret = {'home':home_info, 'tv':tv}
         return ret
     except Exception as exception: 
         logger.error('Exception:%s', exception)
         logger.error(traceback.format_exc())
Beispiel #8
0
 def change_filename_continous_episode(ret):
     try:
         if ret['title'].find(u'\ud569') == -1:
             return ret
         match = re.compile(_REGEX_FILENAME_RENAME).match(ret['title'])
         if match:
             logger.debug(u'\ud569\ubcf8 : %s', ret['filename'])
             ret['title'] = match.group('title').strip()
             if ret['no'] == -1:
                 ret['no'] = int(match.group('no'))
         return ret
     except Exception as e:
         logger.error('Exception:%s', e)
         logger.error(traceback.format_exc())
Beispiel #9
0
    def get_daum_tv_info(search_name, daum_id=None, on_home=False):
        try:
            entity = {}
            #logger.debug('get_daum_tv_info 1 %s', search_name)
            search_name = DaumTV.get_search_name_from_original(search_name)
            #logger.debug('get_daum_tv_info 2 %s', search_name)

            if daum_id is not None:
                url = 'https://search.daum.net/search?w=tv&q=%s&irk=%s&irt=tv-program&DA=TVP' % (py_urllib.quote(search_name.encode('utf8')), daum_id)
            else:
                url = 'https://search.daum.net/search?w=tv&q=%s' % (py_urllib.quote(search_name.encode('utf8')))

            data = DaumTV.get_html(url)
            match = re.compile(r'irk\=(?P<id>\d+)').search(data)
            root = lxml.html.fromstring(data)
            daum_id = match.group('id') if match else ''
            
            entity = {}
            entity['daum_id'] = daum_id
            items = root.xpath('//*[@id="tv_program"]/div[1]/div[2]/strong')
            if not items: 
                return None
            if len(items) == 1:
                entity['title'] = items[0].text.strip()
                entity['title'] = entity['title'] .replace('?', '').replace(':', '')

            entity['status'] = 0
            # 방송종료, 방송예정
            items = root.xpath('//*[@id="tv_program"]/div[1]/div[2]/span')
            if items:
                if items[0].text.strip() == u'방송종료':
                    entity['status'] = 1
                elif items[0].text.strip() == u'방송예정':
                    entity['status'] = 2
            
            items = root.xpath('//*[@id="tv_program"]/div[1]/div[3]/span')
            # 2019-02-25 방송종료시 정보 없어짐
            if items:
                entity['studio'] = items[0].text.strip()
                try: 
                    entity['broadcast_info'] = items[1].text.strip()
                except: 
                    pass
                try: 
                    entity['broadcast_term'] = items[2].text.strip()
                except: 
                    pass
                try:
                    items = root.xpath('//*[@id="tv_program"]/div[1]/div[2]/span')
                except:
                    pass
            else:
                if on_home:
                    logger.debug('on_home : %s', search_name)
                    xml_root = DaumTV.get_show_info_on_home_title(search_name, daum_id=daum_id)
                    home_ret = DaumTV.get_show_info_on_home(xml_root)
                    if home_ret :
                        entity['studio'] = home_ret['studio']
                        entity['broadcast_info'] = home_ret['broadcast_info']
                        entity['broadcast_term'] = home_ret['broadcast_term']
                        #방송예정은 items True가 되어 여기 안온다.

            try:
                match = re.compile(r'(\d{4}\.\d{1,2}\.\d{1,2})~').search(entity['broadcast_term'])
                if match:
                    entity['start_date'] = match.group(1)
            except:
                pass

            items = root.xpath('//*[@id="tv_program"]/div[1]/dl[1]/dd')
            if len(items) == 1:
                entity['genre'] = items[0].text.strip().split(' ')[0]
                entity['genre'] = entity['genre'].split('(')[0].strip()


            items = root.xpath('//*[@id="tv_program"]/div[1]/dl[2]/dd')
            if len(items) == 1:
                entity['summary'] = items[0].text.replace('&nbsp', ' ')

            items = root.xpath('//*[@id="tv_program"]/div[1]/div[1]/a/img')
            if len(items) == 1:
                entity['poster_url'] = items[0].attrib['src'] if items[0].attrib['src'].startswith('http') else f"https:{items[0].attrib['src']}"

            items = root.xpath('//*[@id="clipDateList"]/li')
            entity['episode_list'] = {}

            if len(items) > 300: 
                items = items[len(items)-300:]
            today = int(datetime.now().strftime('%Y%m%d'))

            for item in items:
                try:
                    a_tag = item.xpath('a') 
                    if len(a_tag) == 1:
                        span_tag = a_tag[0].xpath('span[@class="txt_episode"]')
                        if len(span_tag) == 1:
                            if item.attrib['data-clip'] in entity['episode_list']:
                                #같은날짜 같은회차가 두번나올때가 있다. 버그로 보임
                                #EBS 초대석, 2010912 28회
                                if entity['episode_list'][item.attrib['data-clip']][0] == span_tag[0].text.strip().replace(u'회', ''):
                                    pass
                                else:
                                    # 에피소드 넘버가 1차이가 날때만, 마지막꺼와
                                    idx = len(entity['episode_list'][item.attrib['data-clip']]) - 1
                                    _ = abs(int(entity['episode_list'][item.attrib['data-clip']][idx]) - int(span_tag[0].text.strip().replace(u'회', '')))
                                    #2019-06-24
                                    #슬플때사랑한다, 21, 22 순서 
                                    if _ <= 4:
                                        if item.attrib['data-clip'] != '' and today >= int(item.attrib['data-clip']):
                                            entity['last_episode_date'] = item.attrib['data-clip']
                                            entity['last_episode_no'] = span_tag[0].text.strip().replace(u'회', '')
                                        entity['episode_list'][item.attrib['data-clip']].append(span_tag[0].text.strip().replace(u'회', ''))
                                    else:
                                        pass
                                    # Daum에 뜬금없는 에피소드가 끼어져있다
                                    # 무시
                                        
                            else:
                                if item.attrib['data-clip'] != '' and today >= int(item.attrib['data-clip']):
                                    entity['last_episode_date'] = item.attrib['data-clip']
                                    entity['last_episode_no'] = span_tag[0].text.strip().replace(u'회', '')
                                entity['episode_list'][item.attrib['data-clip']] = [span_tag[0].text.strip().replace(u'회', '')]
                except Exception as exception: 
                    logger.error('Exception:%s', exception)
                    logger.error(traceback.format_exc())
            #전체 에피소드 갯수 : len(items)
            #에피소드 dict 갯수  len(entity.episode_list)  
            #정확히 반이면 1일 2회 방송, 1/4이면 1일 4회 방송
            
            # 2019-06-24
            #if len(entity.episode_list) != 0 and len(items) % len(entity.episode_list) == 0:
            #    entity.episode_count_one_day = len(items) / len(entity.episode_list)
            try:
                if len(entity['episode_list']):
                    entity['episode_count_one_day'] = int(round(float(len(items)) / len(entity['episode_list'])))
                    if entity['episode_count_one_day'] == 0:
                        entity['episode_count_one_day'] = 1
                else:
                    entity['episode_count_one_day'] = 1
            except:
                entity['episode_count_one_day'] = 1

            #entity['episode_list_json'] = json.dumps(entity['episode_list'])
            #entity['episode_list_json'] = entity['episode_list']
            #logger.debug(entity['episode_list_json'])
            #entity.save()
            logger.debug('daum tv len(entity.episode_list) : %s %s %s', len(items), len(entity['episode_list']), entity['episode_count_one_day'])
            #logger.debug(entity.episode_list)
            #logger.debug(items)
            return entity  
        except Exception as exception: 
            logger.error('Exception:%s', exception)
            logger.error(traceback.format_exc())
Beispiel #10
0
    def get_daum_tv_info(search_name, daum_id = None, on_home = False):
        try:
            entity = {}
            logger.debug('get_daum_tv_info 1 %s', search_name)
            search_name = DaumTV.get_search_name_from_original(search_name)
            logger.debug('get_daum_tv_info 2 %s', search_name)
            if daum_id is not None:
                url = 'https://search.daum.net/search?w=tv&q=%s&irk=%s&irt=tv-program&DA=TVP' % (urllib.quote(search_name.encode('utf8')), daum_id)
            else:
                url = 'https://search.daum.net/search?w=tv&q=%s' % urllib.quote(search_name.encode('utf8'))
            data = DaumTV.get_html(url)
            match = re.compile('irk\\=(?P<id>\\d+)').search(data)
            root = lxml.html.fromstring(data)
            daum_id = match.group('id') if match else ''
            entity = {}
            entity['daum_id'] = daum_id
            items = root.xpath('//*[@id="tv_program"]/div[1]/div[2]/strong')
            if not items:
                return
            if len(items) == 1:
                entity['title'] = items[0].text.strip()
                entity['title'] = entity['title'].replace('?', '').replace(':', '')
            entity['status'] = 0
            items = root.xpath('//*[@id="tv_program"]/div[1]/div[2]/span')
            if items:
                if items[0].text.strip() == u'\ubc29\uc1a1\uc885\ub8cc':
                    entity['status'] = 1
                elif items[0].text.strip() == u'\ubc29\uc1a1\uc608\uc815':
                    entity['status'] = 2
            items = root.xpath('//*[@id="tv_program"]/div[1]/div[3]/span')
            if items:
                entity['studio'] = items[0].text.strip()
                try:
                    entity['broadcast_info'] = items[1].text.strip()
                except:
                    pass

                try:
                    entity['broadcast_term'] = items[2].text.strip()
                except:
                    pass

                try:
                    items = root.xpath('//*[@id="tv_program"]/div[1]/div[2]/span')
                except:
                    pass

            elif on_home:
                logger.debug('on_home : %s', search_name)
                xml_root = DaumTV.get_show_info_on_home_title(search_name, daum_id=daum_id)
                home_ret = DaumTV.get_show_info_on_home(xml_root)
                if home_ret:
                    entity['studio'] = home_ret['studio']
                    entity['broadcast_info'] = home_ret['broadcast_info']
                    entity['broadcast_term'] = home_ret['broadcast_term']
            try:
                match = re.compile('(\\d{4}\\.\\d{1,2}\\.\\d{1,2})~').search(entity['broadcast_term'])
                if match:
                    entity['start_date'] = match.group(1)
            except:
                pass

            items = root.xpath('//*[@id="tv_program"]/div[1]/dl[1]/dd')
            if len(items) == 1:
                entity['genre'] = items[0].text.strip().split(' ')[0]
                entity['genre'] = entity['genre'].split('(')[0].strip()
            items = root.xpath('//*[@id="tv_program"]/div[1]/dl[2]/dd')
            if len(items) == 1:
                entity['summary'] = items[0].text.replace('&nbsp', ' ')
            items = root.xpath('//*[@id="tv_program"]/div[1]/div[1]/a/img')
            if len(items) == 1:
                entity['poster_url'] = 'https:%s' % items[0].attrib['src']
            items = root.xpath('//*[@id="clipDateList"]/li')
            entity['episode_list'] = {}
            if len(items) > 300:
                items = items[len(items) - 300:]
            today = int(datetime.now().strftime('%Y%m%d'))
            for item in items:
                try:
                    a_tag = item.xpath('a')
                    if len(a_tag) == 1:
                        span_tag = a_tag[0].xpath('span[@class="txt_episode"]')
                        if len(span_tag) == 1:
                            if item.attrib['data-clip'] in entity['episode_list']:
                                if entity['episode_list'][item.attrib['data-clip']][0] == span_tag[0].text.strip().replace(u'\ud68c', ''):
                                    pass
                                else:
                                    idx = len(entity['episode_list'][item.attrib['data-clip']]) - 1
                                    _ = abs(int(entity['episode_list'][item.attrib['data-clip']][idx]) - int(span_tag[0].text.strip().replace(u'\ud68c', '')))
                                    if _ <= 4:
                                        if item.attrib['data-clip'] != '' and today >= int(item.attrib['data-clip']):
                                            entity['last_episode_date'] = item.attrib['data-clip']
                                            entity['last_episode_no'] = span_tag[0].text.strip().replace(u'\ud68c', '')
                                        entity['episode_list'][item.attrib['data-clip']].append(span_tag[0].text.strip().replace(u'\ud68c', ''))
                            else:
                                if item.attrib['data-clip'] != '' and today >= int(item.attrib['data-clip']):
                                    entity['last_episode_date'] = item.attrib['data-clip']
                                    entity['last_episode_no'] = span_tag[0].text.strip().replace(u'\ud68c', '')
                                entity['episode_list'][item.attrib['data-clip']] = [span_tag[0].text.strip().replace(u'\ud68c', '')]
                except Exception as e:
                    logger.error('Exception:%s', e)
                    logger.error(traceback.format_exc())

            try:
                if len(entity['episode_list']):
                    entity['episode_count_one_day'] = int(round(float(len(items)) / len(entity['episode_list'])))
                    if entity['episode_count_one_day'] == 0:
                        entity['episode_count_one_day'] = 1
                else:
                    entity['episode_count_one_day'] = 1
            except:
                entity['episode_count_one_day'] = 1

            #entity['episode_list_json'] = json.dumps(entity['episode_list'])
            logger.debug('daum tv len(entity.episode_list) : %s %s %s', len(items), len(entity['episode_list']), entity['episode_count_one_day'])
            return entity
        except Exception as e:
            logger.error('Exception:%s', e)
            logger.error(traceback.format_exc())

        return