Esempio n. 1
0
 def get_real_url(self):
     for i, play_page_url in enumerate(self.play_page_urls):
         # logging.info('[当前资源第:] %s/%s [项]', i + 1, self.num)
         play_page_url = 'https://www.yszxwang.com' + play_page_url
         # logging.info('[播放页链接:] %s', play_page_url)
         resp1 = get_response(play_page_url)
         if resp1:
             self.data_url = re.search('var now="(http.*?)"',
                                       resp1).group(1).strip()
             # logging.info('[数据链接:] %s', self.data_url)
             resp2 = get_response(self.data_url)
             if resp2:
                 u2 = ''
                 if 'm3u8' in self.data_url:
                     if self.type == 'w':
                         break
                     host = re.sub('index.*', '', self.data_url)
                     resp3 = get_response(self.data_url)
                     if resp3:
                         m3u8text = resp3.split('\n')
                         for text in m3u8text:
                             if 'm3u8' in text:
                                 u2 = text
                 else:
                     s = self.data_url.split('/')
                     host1 = s[0] + '//' + s[2]
                     u1 = re.search('var main = "(.*?)"',
                                    resp2).group(1).strip()
                     m3u8_url1 = host1 + u1
                     # logging.info('[第一个m3u8:] %s', m3u8_url1)
                     host = re.sub('index.*', '', m3u8_url1)
                     # 读取第一个m3u8链接,获取真实m3u8链接
                     resp3 = get_response(m3u8_url1)
                     if resp3:
                         m3u8text = resp3.split('\n')
                         for text in m3u8text:
                             if 'm3u8' in text:
                                 u2 = text
                 if u2:
                     if u2[0] == '/':
                         real_url = host + u2[1:]
                     else:
                         real_url = host + u2
                     # logging.info('[真实m3u8:] %s', real_url)
                     resp = get_response(real_url)
                     if resp:
                         return real_url
         logging.info('[播放页异常:] %s', play_page_url)
     return '全部资源尝试完毕,解析失败'
Esempio n. 2
0
 def get_all_source(self):
     max_num = 0
     max_play = 0
     resp = get_response(self.many_detail_url)
     all_source = re.findall("href='(/video.*?)'", resp)
     for s in all_source:
         num = int(re.search('-(\d+?)-', s).group(1).strip())
         play_num = int(re.search('-\d+?-(\d+?)\.html', s).group(1).strip())
         if num > max_num:
             max_num = num
         if play_num > max_play:
             max_play = play_num
     # 最大资源数
     source_num = max_num + 1
     # 最大集数,有些资源更新慢集数不足,弃用
     self.play_num = max_play + 1
     all_play_list = []
     for i in range(source_num):
         soruce_list = []
         for s in all_source:
             # 资源分类
             cate = int(re.search('-(\d+?)-', s).group(1).strip())
             if cate == i:
                 soruce_list.append(s)
         # 获取集数最大的所有资源
         if len(soruce_list) == self.play_num:
             all_play_list.append(soruce_list)
     # 弃用集数不足的资源
     logging.info('[共有资源]%s[项],[每项资源]%s[集]', len(all_play_list),
                  self.play_num)
     return all_play_list
Esempio n. 3
0
 def get_play_urls(self):
     # 进入详情页,找到播放页链接
     resp = get_response(self.detail_url)
     if not resp:
         return "ERROR, 请求失败"
     play_page_urls = re.findall(
         'a title=.*? href=\'(.*?)\' target="_self"', resp)
     return play_page_urls
Esempio n. 4
0
def down_m3u8_thread(url, file_name, host=None, headers=None):
    mkdir()

    file_name = file_name + '.mp4'
    logging.info('[url] %s [file_name] %s', url, file_name)
    host = host
    # 预下载,获取链接并写文件
    resp = get_response(url)
    m3u8_text = resp
    # 开链接队列
    ts_queue = Queue(10000)
    lines = m3u8_text.split('\n')
    concatfile = 'cache/' + "s" + '.txt'
    for i, line in enumerate(lines):
        if '.ts' in line:
            if 'http' in line:
                ts_queue.put(line)
            else:
                if line[0] == '/':
                    line = host + line
                else:
                    line = host + '/' + line
                ts_queue.put(line)
            filename = re.search('([a-zA-Z0-9-_]+.ts)', line).group(1).strip()
            open(concatfile, 'a+').write('file %s\n' % filename)

    num = ts_queue.qsize()
    logging.info('[下载开始,队列任务数:] %s', num)
    if num > 5:
        t_num = num // 5
    else:
        t_num = 1
    if t_num > 50:
        t_num = 50

    threads = []
    logging.info('下载开始')

    for i in range(t_num):
        t = threading.Thread(target=down,
                             name='th-' + str(i),
                             kwargs={
                                 'ts_queue': ts_queue,
                                 'headers': headers
                             })
        t.setDaemon(True)
        threads.append(t)
    for t in threads:
        logging.info('[线程开始]')
        time.sleep(0.4)
        t.start()
    for t in threads:
        logging.info('[线程停止]')
        t.join()

    logging.info('下载完成,合并开始')
    merge(concatfile, file_name)
    logging.info('合并完成,删除冗余文件')
    remove()
    result = getLength(file_name)

    return result
Esempio n. 5
0
    def get_many_real_url(self):
        for i, play_list in enumerate(self.all_play_list):
            many_data_url = []
            many_real_url = []
            # logging.info('[当前资源第:] %s/%s [项]', i + 1, self.source_num)
            for j, play_page_url in enumerate(play_list):
                # logging.info('[当前解析第:] %s/%s [集]', j + 1, self.play_num)
                play_page_url = 'https://www.yszxwang.com' + play_page_url
                # logging.info('[播放页链接:] %s', play_page_url)
                resp1 = get_response(play_page_url)
                if resp1:
                    data_url = re.search('var now="(http.*?)"',
                                         resp1).group(1).strip()
                    # logging.info('[数据链接:] %s', data_url)
                    resp2 = get_response(data_url)
                    many_data_url.append(data_url)
                    if resp2:
                        u2 = ''
                        if 'm3u8' in data_url:
                            if self.type == 'w':
                                break
                            host = re.sub('index.*', '', data_url)
                            resp3 = get_response(data_url)
                            if resp3:
                                m3u8text = resp3.split('\n')
                                for text in m3u8text:
                                    if 'm3u8' in text:
                                        u2 = text
                        else:
                            # host = re.search('var redirecturl = "(http.*?)"', resp2).group(1).strip()
                            s = data_url.split('/')
                            host1 = s[0] + '//' + s[2]
                            u1 = re.search('var main = "(.*?)"',
                                           resp2).group(1).strip()
                            m3u8_url1 = host1 + u1
                            # logging.info('[第一个m3u8:] %s', m3u8_url1)
                            host = re.sub('index.*', '', m3u8_url1)
                            # 读取第一个m3u8链接,获取真实m3u8链接
                            resp3 = get_response(m3u8_url1)
                            if resp3:
                                m3u8text = resp3.split('\n')
                                for text in m3u8text:
                                    if 'm3u8' in text:
                                        u2 = text
                            else:
                                break
                        if u2:
                            if u2[0] == '/':
                                real_url = host + u2[1:]
                            else:
                                real_url = host + u2
                            if not real_url:
                                break
                            # logging.info('[真实m3u8:] %s', real_url)
                            resp = get_response(real_url)
                            # 简单测试链接可用性
                            if resp:
                                many_real_url.append(real_url)
                            else:
                                break
                    else:
                        break
                else:
                    break
            if len(many_real_url) == self.play_num and len(
                    many_data_url) == self.play_num:
                return many_real_url, many_data_url

        return 'defeat', 'defeat'