Beispiel #1
0
    def start_request(self, href, path):
        # *获取列表分页
        totalpage = int(etree.HTML(use_proxy_requst(self.href+href).content.decode()).xpath(
            "//ul[@class='pagination']/li[last()-2]/a/text()")[0])
        if not os.path.exists(path):
            os.makedirs(path)
        for i in range(1, int(totalpage)):
            if i == 1:
                item_response = use_proxy_requst(self.href+href)
            else:
                item_response = use_proxy_requst(
                    self.href+href+"/index_"+str(i)+".html")

            self.list_data(item_response, path)
Beispiel #2
0
 def get_channel(self):
     # * 获取类目
     response = etree.HTML(use_proxy_requst(self.href).content.decode(
     )).xpath("//ul[@class='tags-list']/li/*")
     for channel_item in response:
         if channel_item.text:
             self.start_request(
                 channel_item.attrib["href"], "video/"+channel_item.text+"/")
Beispiel #3
0
 def ts_download(ts_item):
     try:
         if not os.path.exists(path+video_name+"/"+ts_item):
             res_video = use_proxy_requst(base_url+"720kb/hls/"+ts_item)
             if res_video.status_code == 200:
                 if not os.path.exists(path+video_name):
                     os.makedirs(path+video_name)
                 with open(path+video_name+"/"+ts_item, "wb") as f:
                     f.write(res_video.content)
     except:
         print("download ts fail")
Beispiel #4
0
 def down_video(self, video_src, url, path, video_name):
     # *常规存储方式
     response = use_proxy_requst(video_src)
     if response.status_code == 200:
         print(video_name+"完成")
         try:
             with open(path+video_name+".mp4", "wb") as f:
                 f.write(response.content)
                 f.flush()
         except:
             print("==========保存出错!==========")
     else:
         print("正常下载出错!")
         # todo m3u8破解下载
         self.blob_download(
             url, path, video_name)
Beispiel #5
0
 def list_data(self, html, path):
     # *内容详情页操作
     page_list = etree.HTML(html.content.decode()).xpath(
         "//ul[@class='masonry']/li/@data-href")
     path = path.replace(" ", "")
     pool = Pool(moreThread)
     for href in page_list:
         if len(href) > 2:
             item_response = use_proxy_requst(self.href+href)
             video_src = etree.HTML(item_response.content.decode()).xpath(
                 "//a[@class='meihua_btn']/@href")[0]
             video_name = etree.HTML(item_response.content.decode()).xpath(
                 "//h1[@class='article-title']/a/text()")[0].replace(" ", "")
             if not os.path.exists(path+video_name+".mp4"):
                 # todo 直接下载可用
                 # self.down_video(video_src, path+video_name+".mp4")
                 pool.apply_async(
                     self.down_video, (video_src, self.href +
                                       href, path, video_name))
     pool.close()
     pool.join()
Beispiel #6
0
    def blob_download(self, url, path, video_name):
        def is_ts(val):
            return "ts" in val

        def ts_download(ts_item):
            try:
                if not os.path.exists(path+video_name+"/"+ts_item):
                    res_video = use_proxy_requst(base_url+"720kb/hls/"+ts_item)
                    if res_video.status_code == 200:
                        if not os.path.exists(path+video_name):
                            os.makedirs(path+video_name)
                        with open(path+video_name+"/"+ts_item, "wb") as f:
                            f.write(res_video.content)
            except:
                print("download ts fail")

        response = use_proxy_requst(url).content.decode()
        rIndex = response.find("m3u8")
        lIndex = response.find("vHLSurl")
        findStr = response[lIndex:rIndex+5]
        target_url = self.txt_wrap_by('"', '"', findStr)
        #  ts_urls only two type so we don't get
        # res_blob = requests.get(target_url, headers=self.headers,
        #                         timeout=10).content.decode().splitlines()
        # * get m3u8 file url
        base_url = target_url[0:len(target_url)-10]
        # * 判断是否存在ffmpeg
        b = 1
        if not isHasffmpeg:
            # download ts file and conact all file
            res_ts = use_proxy_requst(
                base_url+"720kb/hls/index.m3u8").content.decode().splitlines()
            res_ts = list(filter(is_ts, res_ts))
            # 创建进程池,执行10个任务
            # Thread = Pool(moreThread)
            for i in res_ts:
                ts_download(i)
            # Thread.apply_async(ts_download, (i))  # 执行任务
            # Thread.close()
            # Thread.join()
            # 调用合并
            print("调用合并")
            try:
                b = os.system("copy /b %s/*.ts %s.mp4",
                              (path+video_name, path+video_name))
            except:
                print("调用合并失败")
            else:
                os.system("rmdir /s/q %s", (path+video_name))

        else:
            b = os.system("ffmpeg -i %s -acodec copy -vcodec copy -absf aac_adtstoasc %s.mp4" %
                          (base_url+"720kb/hls/index.m3u8", path+video_name))
            # b = os.system("ffmpeg -i %s -c copy  %s.mp4" %
            #               (base_url+"720kb/hls/index.m3u8", path+video_name))

        if b == 0:
            print(video_name[0:-4]+'下载完成')
        else:
            print('m3u8')
            if isHasyou_get:
                # todo you_get下载方式
                self.you_get_download(url, path, video_name)