Beispiel #1
0
 def __init__(self, axel, proxy=None, log=None):
     ThreadBase.__init__(self, log=log)
     self.__oldurls = []
     self.__urltsks_q = Queue.Queue()
     self.__axel = axel
     self.__http = HttpUtil()
     self.__progress_bar = ProgressBar()
     if proxy:
         self.__http.set_proxy(proxy)
Beispiel #2
0
def main():
    urls = [
        # 'http://v.youku.com/v_show/id_XNzUyNDE4MTQw.html'
        # 'http://i.youku.com/u/UNTc4NzI3MjY0',
        # 'http://v.youku.com/v_show/id_XNzQ5NDAwMDIw.html?from=y1.1-2.10001-0.1-1',
        # 'http://v.youku.com/v_show/id_XNzUwMTE2MDQw.html?f=22611771',
        # 'http://v.youku.com/v_show/id_XNzQ3MjMxMTYw.html',
        'http://video.sina.com.cn/p/ent/v/m/2014-08-14/102164094039.html'
    ]
    log = util.get_logger()
    bar = ProgressBar()
    ws = WorkShop(tmin=1, tmax=2, log=log)
    dlvs = []
    for i, url in enumerate(urls):
        dlvideo = VUrlTask(url, 0, 3, './tmp', bar=bar, log=log)
        dlvs.append(dlvideo)
    try:
        ws.serve()
        ws.addTasks(dlvs)
        while len(dlvs) > 0:
            for i, dlv in enumerate(dlvs):
                if dlv.isArchived() or dlv.isError():
                    del dlvs[i]
            _sleep(1)
    except KeyboardInterrupt:
        pass
    except Exception as e:
        log.exception(e)
    finally:
        ws.setToStop()
        ws.join()
Beispiel #3
0
def main(cfg, log):
    if cfg.playlist:
        for url in cfg.urls:
            outpath, cfg.urls = parsers.getPlayListParser(url).info(url)
            cfg.outpath = pjoin(cfg.outpath, outpath)
            util.assure_path(cfg.outpath)
            with open(pjoin(cfg.outpath, 'url.txt'), 'w') as fp:
                fp.writelines([url + "\n\n"])
                for i, clip in enumerate(cfg.urls):
                    fp.writelines(["[%03d] %s\n" % (i, clip)])

    bar = ProgressBar()
    ws = WorkShop(tmin=cfg.tmin, tmax=cfg.tmax, log=log)
    dlvs = []
    for i, url in enumerate(cfg.urls):
        dlvideo = VUrlTask(url,
                           vidfmt=cfg.vidfmt,
                           npf=cfg.npf,
                           outpath=cfg.outpath,
                           bar=bar,
                           log=log)
        dlvs.append(dlvideo)
    try:
        ws.serve()
        ws.addTasks(dlvs)
        while len(dlvs) > 0:
            for i, dlv in enumerate(dlvs):
                if dlv.isArchived() or dlv.isError():
                    del dlvs[i]
            _sleep(1)
    except Exception as e:
        log.exception(e)
    finally:
        ws.setToStop()
        ws.join()
Beispiel #4
0
 def __init__(self, axel, proxy=None, log=None):
     ThreadBase.__init__(self, log=log)
     self.__oldurls = []
     self.__urltsks_q = Queue.Queue()
     self.__axel = axel
     self.__http = HttpUtil()
     self.__progress_bar = ProgressBar()
     if proxy:
         self.__http.set_proxy(proxy)
Beispiel #5
0
def main(argv):
    cfg = config.MiniAxelConfig()
    cfg.read_cmdline_config('miniaxel.ini', script=__file__, argv=sys.argv)
    log = cfg.log
    bar = ProgressBar()
    axel = WorkShop(tmin=cfg.tmin, tmax=cfg.tmax, log=log)
    try:
        if not axel.serve(timeout=3):
            raise ValueError('server not started')
        if hasattr(cfg, 'urls'):
            for url in cfg.urls:
                log.info('add %s', url)
                name = pjoin(cfg.outpath, find_name(url))
                urltask = UrlTask(url,
                                  out=name,
                                  npf=cfg.npf,
                                  bar=bar,
                                  retrans=True,
                                  log=cfg.log)
                axel.addTask(urltask)
        while True:
            cmd = raw_input('>>')
            if cmd in ('q'):
                break
            elif cmd in ('h'):
                print usage()
            elif cmd in ('test'):
                mainTest(axel, bar, log)
            else:
                name, url = process_cmd(cmd)
                if name:
                    name = pjoin(cfg.outpath, name)
                else:
                    name = pjoin(cfg.outpath, find_name(url))
                urltask = UrlTask(url,
                                  out=name,
                                  npf=cfg.npf,
                                  bar=bar,
                                  retrans=True,
                                  log=cfg.log)
                axel.addTask(urltask)
    except KeyboardInterrupt as e:
        pass
    except Exception as e:
        log.exception(e)
        raise
    finally:
        axel.setToStop()
        axel.join()
Beispiel #6
0
class M3u8Stream(ThreadBase):
    def __init__(self, axel, proxy=None, log=None):
        ThreadBase.__init__(self, log=log)
        self.__oldurls = []
        self.__urltsks_q = Queue.Queue()
        self.__axel = axel
        self.__http = HttpUtil()
        self.__progress_bar = ProgressBar()
        if proxy:
            self.__http.set_proxy(proxy)

    def recode(self, url, duration, vfmt, fp, npf, freq=10, detach=False):
        """ @param npf: download url stream by n parts per file
            @param vfmt: live video format """
        self.m3u8url = url
        self.duration = duration
        self.vfmt = int(vfmt) # TODO: ugly conversion
        self.__ostream = fp
        self.__npf = npf
        self.__freq = freq
        if detach:
            self.start()
        else:
            self.run()

    def run(self):
        try:
            self.__loop()
        except:
            raise
        finally:
            while not self.__urltsks_q.empty():
                self.__urltsks_q.get().cleanup()
            self.log.debug('[M3u8Stream] stop')

    def __loop(self):
        last_clip_at = 0
        buff_stream_len = 0
        targetduration = 2
        start_at = time.time()
        stop_at = 0
        if self.duration:
            stop_at = start_at + self.duration

        curr_tsk = None
        while not self.isSetStop():
            start_at = time.time()
            self.__progress_bar.display()

            if self.duration and start_at >= stop_at:
                self.log.info("[DownloadLiveStream] time's up")
                return

            # get index page every 10s
            if last_clip_at + self.__freq < start_at:
                urls, targetduration = self.__get_curr_m3u8_file(self.m3u8url)
                for url in urls:
                    if url not in self.__oldurls:
                        memfile = BytesIO()
                        memfile.read = memfile.getvalue
                        urltask = UrlTask(url, out=memfile, npf=self.__npf,
                                          bar=self.__progress_bar, log=self.log)
                        self.__oldurls.append(url)
                        self.__axel.addTask(urltask)
                        self.__urltsks_q.put(urltask)
                if len(self.__oldurls) > 100:
                    self.__oldurls = self.__oldurls[-20:]
                last_clip_at = start_at

            # append to stream; handle error; get a new clip
            if curr_tsk:
                if curr_tsk.isArchived():
                    self.log.debug('[M3u8Stream] merge clip, %s', curr_tsk.url)
                    self.__ostream.write(curr_tsk.out.read())
                    curr_tsk.out.close()
                    curr_tsk.cleanup()
                    curr_tsk = None
                    buff_stream_len += targetduration
                elif curr_tsk.isError():
                    self.log.error('[M3u8Stream] error: %s', curr_tsk.url)
                    curr_tsk.cleanup()
                    raise
            elif not self.__urltsks_q.empty():
                curr_tsk = self.__urltsks_q.get()

            if time.time() - start_at < 1:
                sleep(1)

    def __get_curr_m3u8_file(self, m3u8url, n=3):
        urls = []
        sub_m3u8s = []
        targetduration = 0
        try:
            m3u8 = self.__http.get(m3u8url)
            for line in m3u8.splitlines(False):
                line = line.strip(' \n')
                if line == '':
                    continue
                if line.startswith('#'):
                    if line.lower().find('targetduration') > 0:
                        targetduration = int(line.split(':')[1])
                        self.log.debug('[M3u8Stream] targetduration=%d', targetduration)
                else:
                    if line.startswith('http'):
                        urls.append(line)
                    else:
                        url = urllib.basejoin(M3u8Stream.host_filter(m3u8url), line)
                        if line.endswith('.m3u8'):
                            sub_m3u8s.append(url)
                        else:
                            urls.append(url)

            sm_len = len(sub_m3u8s)
            if sm_len > 0:
                fmt_index = self.vfmt if self.vfmt < sm_len else sm_len-1
                self.log.debug('[M3u8Stream] use sub m3u8 url: %s', sub_m3u8s[fmt_index])
                return self.__get_curr_m3u8_file(sub_m3u8s[fmt_index])
        except urllib2.URLError as e:
            self.log.warn('[M3u8Stream] network not working: %s', e.message)
        except _socket_timeout:
            self.log.warn('[M3u8Stream] connection timeout')
        except:
            raise
        return urls, targetduration

    @staticmethod
    def host_filter(url):
        if url.find('ifeng.com') > 0:
            return re.match('(^http[s]?://[^/?]*/)', url).group(0)
        else:
            return re.match('(^http[s]?://.*/)', url).group(0)


# if __name__ == "__main__":
#     main()
Beispiel #7
0
class M3u8Stream(ThreadBase):
    def __init__(self, axel, proxy=None, log=None):
        ThreadBase.__init__(self, log=log)
        self.__oldurls = []
        self.__urltsks_q = Queue.Queue()
        self.__axel = axel
        self.__http = HttpUtil()
        self.__progress_bar = ProgressBar()
        if proxy:
            self.__http.set_proxy(proxy)

    def recode(self, url, duration, vfmt, fp, npf, freq=10, detach=False):
        """ @param npf: download url stream by n parts per file
            @param vfmt: live video format """
        self.m3u8url = url
        self.duration = duration
        self.vfmt = int(vfmt)  # TODO: ugly conversion
        self.__ostream = fp
        self.__npf = npf
        self.__freq = freq
        if detach:
            self.start()
        else:
            self.run()

    def run(self):
        try:
            self.__loop()
        except:
            raise
        finally:
            while not self.__urltsks_q.empty():
                self.__urltsks_q.get().cleanup()
            self.log.debug('[M3u8Stream] stop')

    def __loop(self):
        last_clip_at = 0
        buff_stream_len = 0
        targetduration = 2
        start_at = time.time()
        stop_at = 0
        if self.duration:
            stop_at = start_at + self.duration

        curr_tsk = None
        while not self.isSetStop():
            start_at = time.time()
            self.__progress_bar.display()

            if self.duration and start_at >= stop_at:
                self.log.info("[DownloadLiveStream] time's up")
                return

            # get index page every 10s
            if last_clip_at + self.__freq < start_at:
                urls, targetduration = self.__get_curr_m3u8_file(self.m3u8url)
                for url in urls:
                    if url not in self.__oldurls:
                        memfile = BytesIO()
                        memfile.read = memfile.getvalue
                        urltask = UrlTask(url,
                                          out=memfile,
                                          npf=self.__npf,
                                          bar=self.__progress_bar,
                                          log=self.log)
                        self.__oldurls.append(url)
                        self.__axel.addTask(urltask)
                        self.__urltsks_q.put(urltask)
                if len(self.__oldurls) > 100:
                    self.__oldurls = self.__oldurls[-20:]
                last_clip_at = start_at

            # append to stream; handle error; get a new clip
            if curr_tsk:
                if curr_tsk.isArchived():
                    self.log.debug('[M3u8Stream] merge clip, %s', curr_tsk.url)
                    self.__ostream.write(curr_tsk.out.read())
                    curr_tsk.out.close()
                    curr_tsk.cleanup()
                    curr_tsk = None
                    buff_stream_len += targetduration
                elif curr_tsk.isError():
                    self.log.error('[M3u8Stream] error: %s', curr_tsk.url)
                    curr_tsk.cleanup()
                    raise
            elif not self.__urltsks_q.empty():
                curr_tsk = self.__urltsks_q.get()

            if time.time() - start_at < 1:
                sleep(1)

    def __get_curr_m3u8_file(self, m3u8url, n=3):
        urls = []
        sub_m3u8s = []
        targetduration = 0
        try:
            m3u8 = self.__http.get(m3u8url)
            for line in m3u8.splitlines(False):
                line = line.strip(' \n')
                if line == '':
                    continue
                if line.startswith('#'):
                    if line.lower().find('targetduration') > 0:
                        targetduration = int(line.split(':')[1])
                        self.log.debug('[M3u8Stream] targetduration=%d',
                                       targetduration)
                else:
                    if line.startswith('http'):
                        urls.append(line)
                    else:
                        url = urllib.basejoin(M3u8Stream.host_filter(m3u8url),
                                              line)
                        if line.endswith('.m3u8'):
                            sub_m3u8s.append(url)
                        else:
                            urls.append(url)

            sm_len = len(sub_m3u8s)
            if sm_len > 0:
                fmt_index = self.vfmt if self.vfmt < sm_len else sm_len - 1
                self.log.debug('[M3u8Stream] use sub m3u8 url: %s',
                               sub_m3u8s[fmt_index])
                return self.__get_curr_m3u8_file(sub_m3u8s[fmt_index])
        except urllib2.URLError as e:
            self.log.warn('[M3u8Stream] network not working: %s', e.message)
        except _socket_timeout:
            self.log.warn('[M3u8Stream] connection timeout')
        except:
            raise
        return urls, targetduration

    @staticmethod
    def host_filter(url):
        if url.find('ifeng.com') > 0:
            return re.match('(^http[s]?://[^/?]*/)', url).group(0)
        else:
            return re.match('(^http[s]?://.*/)', url).group(0)


# if __name__ == "__main__":
#     main()