Example #1
0
def _dl_caption(url, dir_path, fne, ext):
    try:
        _dl_url(url, dir_path, fne, ext)
    except requests.exceptions.HTTPError:
        if cc.statics.args.ignore_caption_error:
            logging.warning('could not download caption %s to %s/%s%s',
                            url, dir_path, fne, ext, exc_info=True)
        else:
            raise
Example #2
0
def _get_dls(episode):
    if episode.url is not None:
        yield _dl_url, episode.url, 'index', '.html'
    for video in episode.videos:
        if video.rtmps:
            rtmp = max(video.rtmps, key=lambda r: r.width)
            yield _dl_rtmp, rtmp.url, video.fne, rtmp.ext
        else:
            logging.warning('content is unavailable: %s', video.page_url)
            yield _unavailable, video.page_url, video.fne, '.mp4.unavailable'
        for caption in video.captions:
            yield _dl_caption, caption.url, video.fne, caption.ext
Example #3
0
def _get_mediagen_tree(feed, video_blob):
    parts = urllib.parse.urlparse(feed.show_url)
    new_parts = urllib.parse.ParseResult(
        scheme=parts.scheme,
        netloc=parts.netloc,
        path='feeds/mrss',
        params='',
        query=urllib.parse.urlencode({'uri': video_blob.uri}),
        fragment='')
    mrss_url = urllib.parse.urlunparse(new_parts)
    try:
        mrss_tree = cc.http.get_url_dom_tree(mrss_url)
    except lxml.etree.XMLSyntaxError:
        logging.warning('fix mrss xml %s', mrss_url, exc_info=True)
        mrss_tree = _get_url_dom_tree_with_fixes(mrss_url)
    content = mrss_tree.find('.//{http://search.yahoo.com/mrss/}content')
    mediagen_url = content.get('url')
    try:
        return cc.http.get_url_dom_tree(mediagen_url)
    except lxml.etree.XMLSyntaxError:
        logging.warning('fix mediagen xml %s', mediagen_url, exc_info=True)
        return _get_url_dom_tree_with_fixes(mediagen_url)
Example #4
0
def _download(url, file_name, cwd,
              prog,
              download_timeout,
              monitor_period,
              cpu_bound,
              memory_bound,
              partial_okay):
    cwd = cwd or os.getcwd()
    file_name_part = file_name + '.part'
    output_path = os.path.join(cwd, file_name)
    output_path_part = os.path.join(cwd, file_name_part)
    digest = None
    for retry_exp in itertools.count():
        timer = threading.Timer(download_timeout, lambda: None)
        timer.daemon = True
        proc = _make_subprocess(url, file_name_part, cwd, prog)
        timer.start()
        ret = -1
        while True:
            try:
                ret = proc.wait(timeout=monitor_period)
                break
            except psutil.TimeoutExpired:
                pass
            cpu_percent = proc.get_cpu_percent(interval=None)
            memory_percent = proc.get_memory_percent()
            logging.trace('rtmp: pid=%d cpu=%.1f memory=%.1f',
                          proc.pid, cpu_percent, memory_percent)
            if cpu_percent > cpu_bound:
                logging.error('rtmp: cpu limit exceeded')
                proc.kill()
                break
            if memory_percent > memory_bound:
                logging.error('rtmp: memory limit exceeded')
                proc.kill()
                break
            if timer.finished.is_set():
                logging.error('rtmp: timeout: %s -> %s', url, output_path_part)
                proc.kill()
                break
        timer.cancel()
        if prog == 'rtmpdump' and ret == RTMPDUMP_INCOMPLETE:
            if partial_okay:
                logging.warning(
                    'rtmp: partial download %s to %s', url, file_name)
                ret = 0
                break
            with open(output_path_part, 'rb') as output_file:
                new_digest = hashlib.sha1(output_file.read()).digest()
            if digest is not None and digest == new_digest:
                # We made no progress; the download might be completed.
                # Let's not retry and assume it was.
                logging.warning(
                    'rtmp: no progress: url=%s file_name=%s', url, file_name)
                ret = 0
                break
            digest = new_digest
            # rtmpdump didn't complete the transfer; resume might get further.
            retry = 2 ** retry_exp
            if retry > download_timeout:
                logging.error('rtmp: retry timeout: %s -> %s',
                              url, output_path_part)
            else:
                logging.trace('rtmp: retry=%d url=%s', retry, url)
                time.sleep(retry)
                continue
        if ret is not None and ret != 0:
            raise cc.Error('Could not download (ret=%s): %s' % (ret, url))
        # Okay, we are done.
        break
    os.rename(output_path_part, output_path)
    logging.info('rtmp: success: %s -> %s', url, output_path)