def recode(url, duration=None, vfmt=2, outpath='./', npf=3, freq=10, tmin=5, tmax=20, proxy=None, log=None): assert duration is None or duration > 0 name = '%s.%s.ts' % (_util.get_time_string(), hash(url)) outfile = pjoin(outpath, name) log.info("|=> begin: %s", url) if duration: log.info("|=>duration: %d", duration) log.info("|=> output: %s", outfile) _util.assure_path(outpath) axel = WorkShop(tmin=tmin, tmax=tmax, log=log) m3u8 = M3u8Stream(axel=axel, proxy=proxy,log=log) fetcher = HttpFetcher() start_at = time.time() try: with open(outfile, 'wb') as fp: if url.find('m3u8') > 0 or __is_url_file(url): axel.serve() m3u8.recode(url=url, duration=duration, vfmt=vfmt, fp=fp, npf=npf, freq=cfg.freq) else: fetcher.fetch(url=url, fp=fp) log.info("|=> end: total=%.2fs, out=%s", time.time() - start_at, outfile) finally: if axel.isAlive(): axel.setToStop() axel.join()
def recode(url, duration=None, vfmt=2, outpath='./', npf=3, freq=10, tmin=5, tmax=20, proxy=None, log=None): assert duration is None or duration > 0 name = '%s.%s.ts' % (_util.get_time_string(), hash(url)) outfile = pjoin(outpath, name) log.info("|=> begin: %s", url) if duration: log.info("|=>duration: %d", duration) log.info("|=> output: %s", outfile) _util.assure_path(outpath) axel = WorkShop(tmin=tmin, tmax=tmax, log=log) m3u8 = M3u8Stream(axel=axel, proxy=proxy, log=log) fetcher = HttpFetcher() start_at = time.time() try: with open(outfile, 'wb') as fp: if url.find('m3u8') > 0 or __is_url_file(url): axel.serve() m3u8.recode(url=url, duration=duration, vfmt=vfmt, fp=fp, npf=npf, freq=cfg.freq) else: fetcher.fetch(url=url, fp=fp) log.info("|=> end: total=%.2fs, out=%s", time.time() - start_at, outfile) finally: if axel.isAlive(): axel.setToStop() axel.join()
def __init__(self, url, fp, data_range, parent, headers=None, proxy=None, callback=None, log=None): threadutil.WorkBase.__init__(self, parent=parent) self.url = url self.fp = fp self.data_range = data_range self.proxy = proxy self.headers = headers self.__callback = callback self.log = log self.__retry_count = 0 self.__http_fetcher = HttpFetcher(log=log) if self.proxy: self.__http_fetcher.set_proxy(self.proxy)
def __makeSubWorks(self): curr_size = 0 size = self.__get_content_len(self.url) clip_ranges = HttpFetcher.div_file(size, self.npf) if not self.__is_inter_file: self.__fp = self.out self.retrans = False else: self.tmp_file = self.out + '!' if os.path.exists(self.tmp_file): self.__fp = open(self.tmp_file, 'rb+') else: self.__fp = open(self.tmp_file, 'wb') if size and self.retrans: self.__history_file = HistoryFile() clip_ranges, curr_size = self.__history_file.load( self.tmp_file, clip_ranges, size) # can not retransmission if clip_ranges is None or size is None or size == 0: self.retrans = False self.log.debug('[DownloadUrl] can not retransmission, %s', self.url) clip_ranges = [None] size = 0 if self.progress_bar: self.progress_bar.set(total_size=size, curr_size=curr_size) subworks = [] syn_file = util.SynFileContainer(self.__fp) for clip_range in clip_ranges: work = UrlTask.HttpSubWork(url=self.url, fp=syn_file, data_range=clip_range, parent=self, headers=self.headers, proxy=self.proxy, callback=self.__update, log=self.log) subworks.append(work) self.addSubWorks(subworks)
class HttpSubWork(threadutil.WorkBase): def __init__(self, url, fp, data_range, parent, headers=None, proxy=None, callback=None, log=None): threadutil.WorkBase.__init__(self, parent=parent) self.url = url self.fp = fp self.data_range = data_range self.proxy = proxy self.headers = headers self.__callback = callback self.log = log self.__retry_count = 0 self.__http_fetcher = HttpFetcher(log=log) if self.proxy: self.__http_fetcher.set_proxy(self.proxy) def work(self, this_thread, log): isSetStop = lambda: this_thread.isSetStop() or self.isSetStop() while not isSetStop(): try: if self.headers: self.__http_fetcher.add_headers(self.headers) self.__http_fetcher.fetch(self.url, fp=self.fp, data_range=self.data_range, isSetStop=isSetStop, callback=self.__callback) return except _socket_timeout: self.__retry_count += 1 start_at = self.__http_fetcher.handler.start_at end_at = self.__http_fetcher.handler.end_at log.debug('[HttpSubWork] timeout(%d-[%d,%d]) %s', self.__retry_count, start_at, end_at, self.url) _sleep(1) except urllib2.URLError as e: log.debug('[HttpSubWork] Network not work :( %s', e.reason) _sleep(1) except: raise
def __makeSubWorks(self): curr_size = 0 size = self.__get_content_len(self.url) clip_ranges = HttpFetcher.div_file(size, self.npf) if not self.__is_inter_file: self.__fp = self.out self.retrans = False else: self.tmp_file = self.out + '!' if os.path.exists(self.tmp_file): self.__fp = open(self.tmp_file, 'rb+') else: self.__fp = open(self.tmp_file, 'wb') if size and self.retrans: self.__history_file = HistoryFile() clip_ranges, curr_size = self.__history_file.load( self.tmp_file, clip_ranges, size) # can not retransmission if clip_ranges is None or size is None or size == 0: self.retrans = False self.log.debug('[DownloadUrl] can not retransmission, %s', self.url) clip_ranges = [None] size = 0 if self.progress_bar: self.progress_bar.set(total_size=size, curr_size=curr_size) subworks = [] syn_file = util.SynFileContainer(self.__fp) for clip_range in clip_ranges: work = UrlTask.HttpSubWork( url=self.url, fp=syn_file, data_range=clip_range, parent=self, headers=self.headers, proxy=self.proxy, callback=self.__update, log=self.log ) subworks.append(work) self.addSubWorks(subworks)
class HttpSubWork(threadutil.WorkBase): def __init__(self, url, fp, data_range, parent, headers=None, proxy=None, callback=None, log=None): threadutil.WorkBase.__init__(self, parent=parent) self.url = url self.fp = fp self.data_range = data_range self.proxy = proxy self.headers = headers self.__callback = callback self.log = log self.__retry_count = 0 self.__http_fetcher = HttpFetcher(log=log) if self.proxy: self.__http_fetcher.set_proxy(self.proxy) def work(self, this_thread, log): isSetStop = lambda : this_thread.isSetStop() or self.isSetStop() while not isSetStop(): try: if self.headers: self.__http_fetcher.add_headers(self.headers) self.__http_fetcher.fetch( self.url, fp=self.fp, data_range=self.data_range, isSetStop=isSetStop, callback=self.__callback ) return except _socket_timeout: self.__retry_count += 1 start_at = self.__http_fetcher.handler.start_at end_at = self.__http_fetcher.handler.end_at log.debug('[HttpSubWork] timeout(%d-[%d,%d]) %s', self.__retry_count, start_at, end_at, self.url) _sleep(1) except urllib2.URLError as e: log.debug('[HttpSubWork] Network not work :( %s', e.reason) _sleep(1) except: raise