Beispiel #1
0
def save_url(url, name, reporthook = simple_hook):
    bs = 1024*8
    size = -1
    read = 0
    blocknum = 0
    open_mode = 'wb'
    req = Request(url, headers = fake_headers)
    response = urlopen(req, None)
    if "content-length" in response.headers:
        size = int(response.headers["Content-Length"])
    if os.path.exists(name):
        filesize = os.path.getsize(name)
        if filesize == size:
            print('Skipped: file already downloaded')
            return
        elif -1 != size:
            req.add_header('Range', 'bytes=%d-' % filesize)
            blocknum = int(filesize / bs)
            response = urlopen(req, None)
            open_mode = 'ab'
    reporthook(blocknum, bs, size)
    with open(name, open_mode) as tfp:
        while True:
            block = response.read(bs)
            if not block:
                break
            read += len(block)
            tfp.write(block)
            blocknum += 1
            reporthook(blocknum, bs, size)
Beispiel #2
0
def save_url(url, name, reporthook=simple_hook):
    bs = 1024 * 8
    size = -1
    read = 0
    blocknum = 0
    open_mode = 'wb'
    req = Request(url, headers=fake_headers)
    response = urlopen(req, None)
    if "content-length" in response.headers:
        size = int(response.headers["Content-Length"])
    if os.path.exists(name):
        filesize = os.path.getsize(name)
        if filesize == size:
            print('Skipped: file already downloaded')
            return
        elif -1 != size:
            req.add_header('Range', 'bytes=%d-' % filesize)
            blocknum = int(filesize / bs)
            response = urlopen(req, None)
            open_mode = 'ab'
    reporthook(blocknum, bs, size)
    with open(name, open_mode) as tfp:
        while True:
            block = response.read(bs)
            if not block:
                break
            read += len(block)
            tfp.write(block)
            blocknum += 1
            reporthook(blocknum, bs, size)
Beispiel #3
0
def save_url(url, name, ext, status, part=None, reporthook=simple_hook):
    if part is None:
        print('Download: ' + name)
        name = name + '.' + ext
        part = 0
    else:
        print('\nDownload: %s part %d' % (name, part))
        name = '%s_%d.%s' % (name, part, ext)
    bs = 1024 * 8
    size = -1
    read = 0
    blocknum = 0
    open_mode = 'wb'
    response = None
    req = Request(url, headers=fake_headers)
    if os.path.exists(name):
        filesize = os.path.getsize(name)
        if filesize:
            req.add_header('Range',
                           'bytes=%d-' % (filesize - 1))  # get +1, avoid 416
            response = urlopen(req, None)
            assert response.status == 206, 'HTTP status %d' % response.status
            size = int(response.headers['Content-Range'].split('/')[-1])
            if filesize == size:
                print('Skipped: file already downloaded')
                status[part] = 1
                return
            if filesize < size:
                if filesize:
                    blocknum = int(filesize / bs)
                open_mode = 'ab'
                response.read(1)  # read -1
    if response is None:
        response = urlopen(req, None)
    if size < 0:
        size = int(response.headers.get('Content-Length', -1))
    reporthook(blocknum, bs, size)
    with open(name, open_mode) as tfp:
        while True:
            block = response.read(bs)
            if not block:
                break
            read += len(block)
            tfp.write(block)
            blocknum += 1
            reporthook(blocknum, bs, size)
    if os.path.exists(name):
        filesize = os.path.getsize(name)
        if filesize == size:
            status[part] = 1
Beispiel #4
0
def get_head_response(url, headers=fake_headers):
    try:
        req = Request(url, headers=headers)
        req.get_method = lambda: 'HEAD'
        response = urlopen(req)
    except IOError as e:
        # if HEAD method is not supported
        if 'HTTP Error 405' in str(e):
            req = Request(url, headers=headers)
            response = urlopen(req)
            response.close()
        else:
            raise
    # urllib will follow redirections and it's too much code to tell urllib
    # not to do that
    return response
Beispiel #5
0
def save_url(url, name, ext, status, part=None, reporthook=simple_hook):
    if part is None:
        print("Download: " + name)
        name = name + '.' + ext
        part = 0
    else:
        print("Download: " + name + " part %d" % part)
        name = name + '_%d_.' % part + ext
    bs = 1024 * 8
    size = -1
    read = 0
    blocknum = 0
    open_mode = 'wb'
    req = Request(url, headers=fake_headers)
    if os.path.exists(name):
        filesize = os.path.getsize(name)
        req.add_header('Range', 'bytes=%d-' % filesize)
        response = urlopen(req, None)
        if response.status == 206:
            size = int(response.headers['Content-Range'].split('/')[-1])
            if filesize == size:
                print('Skipped: file already downloaded')
                status[part] = 1
                return
            if filesize < size:
                if filesize:
                    blocknum = int(filesize / bs)
                open_mode = 'ab'
    else:
        response = urlopen(req, None)
    if size < 0:
        size = int(response.headers.get('Content-Length', -1))
    reporthook(blocknum, bs, size)
    with open(name, open_mode) as tfp:
        while True:
            block = response.read(bs)
            if not block:
                break
            read += len(block)
            tfp.write(block)
            blocknum += 1
            reporthook(blocknum, bs, size)
    if os.path.exists(name):
        filesize = os.path.getsize(name)
        if filesize == size:
            status[part] = 1
Beispiel #6
0
def get_content(url, headers=fake_headers, data=None, charset=None):
    """Gets the content of a URL via sending a HTTP GET request.

    Args:
        url: A URL.
        headers: Request headers used by the client.
        decoded: Whether decode the response body using UTF-8 or the charset specified in Content-Type.

    Returns:
        The content as a string.
    """
    logger.debug("get_content> URL: " + url)
    req = Request(url, headers=headers, data=data)
    #if cookies_txt:
    #    cookies_txt.add_cookie_header(req)
    #    req.headers.update(req.unredirected_hdrs)
    response = urlopen(req)
    data = response.read()

    # Handle HTTP compression for gzip and deflate (zlib)
    resheader = response.info()
    if 'Content-Encoding' in resheader:
        content_encoding = resheader['Content-Encoding']
    elif hasattr(resheader, 'get_payload'):
        payload = resheader.get_payload()
        if isinstance(payload, str):
            content_encoding = match1(payload, r'Content-Encoding:\s*([\w-]+)')
        else:
            content_encoding = None
    else:
        content_encoding = None
    if content_encoding == 'gzip':
        data = ungzip(data)
    elif content_encoding == 'deflate':
        data = undeflate(data)

    if charset == 'ignore':
        return data

    # Decode the response body
    if charset is None:
        if 'Content-Type' in resheader:
            charset = match1(resheader['Content-Type'], r'charset=([\w-]+)')
        charset = charset or match1(str(data), r'charset=\"([\w-]+)',
                                    'charset=([\w-]+)') or 'utf-8'
    logger.debug("get_content> Charset: " + charset)
    try:
        data = data.decode(charset, errors='replace')
    except:
        logger.warning("wrong charset for {}".format(url))
    return data
Beispiel #7
0
def get_content(url, headers=fake_headers, data=None, charset = None):
    """Gets the content of a URL via sending a HTTP GET request.

    Args:
        url: A URL.
        headers: Request headers used by the client.
        decoded: Whether decode the response body using UTF-8 or the charset specified in Content-Type.

    Returns:
        The content as a string.
    """
    logger.debug("get_content> URL: " + url)
    req = Request(url, headers=headers, data=data)
    #if cookies_txt:
    #    cookies_txt.add_cookie_header(req)
    #    req.headers.update(req.unredirected_hdrs)
    response = urlopen(req)
    data = response.read()

    # Handle HTTP compression for gzip and deflate (zlib)
    resheader = response.info()
    if 'Content-Encoding' in resheader:
        content_encoding = resheader['Content-Encoding']
    elif hasattr(resheader, 'get_payload'):
        payload = resheader.get_payload()
        if isinstance(payload, str):
            content_encoding =  match1(payload, r'Content-Encoding:\s*([\w-]+)')
        else:
            content_encoding = None
    else:
        content_encoding = None
    if content_encoding == 'gzip':
        data = ungzip(data)
    elif content_encoding == 'deflate':
        data = undeflate(data)

    if charset == 'ignore':
        return data

    # Decode the response body
    if charset is None:
        if 'Content-Type' in resheader:
            charset = match1(resheader['Content-Type'], r'charset=([\w-]+)')
        charset = charset or match1(str(data), r'charset=\"([\w-]+)', 'charset=([\w-]+)') or 'utf-8'
    logger.debug("get_content> Charset: " + charset)
    try:
        data = data.decode(charset, errors='replace')
    except:
        logger.warning("wrong charset for {}".format(url))
    return data
Beispiel #8
0
    def prepare(self):
        self.live = True
        info = VideoInfo(self.name, True)

        page = get_content(self.url)
        page_meta = match1(page, r'window\.HOSTINFO=(.+?);')
        page_meta = json.loads(page_meta)

        info.title = page_meta['roominfo']['name']
        info.artist = page_meta['hostinfo']['nickName']
        info.stream_types.append('current')

        stream_url = page_meta['videoinfo']['streamurl']
        try:
            urlopen(stream_url)
        except:
            assert 0, 'offline'

        info.streams['current'] = dict(container='flv',
                                       video_profile='current',
                                       src=[stream_url],
                                       size=float('inf'))

        return info
Beispiel #9
0
def get_head_response(url, headers=fake_headers):
    logger.debug('get_head_response> URL: ' + url)
    try:
        req = Request(url, headers=headers, method='HEAD')
        response = urlopen(req)
    except IOError as e:
        # if HEAD method is not supported
        if match1(str(e), 'HTTP Error (40[345])'):
            logger.debug('get_head_response> HEAD failed, try GET')
            response = get_response(url, headers=headers)
            response.close()
        else:
            raise
    # urllib will follow redirections and it's too much code to tell urllib
    # not to do that
    return response
Beispiel #10
0
def get_content(url, headers=fake_headers, data=None, charset=None):
    """Gets the content of a URL via sending a HTTP GET request.

    Args:
        url: A URL.
        headers: Request headers used by the client.
        decoded: Whether decode the response body using UTF-8 or the charset specified in Content-Type.

    Returns:
        The content as a string.
    """

    req = Request(url, headers=headers, data=data)
    #if cookies_txt:
    #    cookies_txt.add_cookie_header(req)
    #    req.headers.update(req.unredirected_hdrs)
    response = urlopen(req)
    data = response.read()

    # Handle HTTP compression for gzip and deflate (zlib)
    resheader = response.info()
    if 'Content-Encoding' in resheader:
        content_encoding = resheader['Content-Encoding']
    else:
        content_encoding = None
    if content_encoding == 'gzip':
        data = ungzip(data)
    elif content_encoding == 'deflate':
        data = undeflate(data)

    if charset == 'ignore':
        return data

    # Decode the response body
    if charset is None:
        charset = match1(resheader['Content-Type'], r'charset=([\w-]+)') or \
              match1(str(data), r'charset=\"([^\"]+)', 'charset=([^"]+)') or 'utf-8'
    try:
        data = data.decode(charset)
    except:
        from .log import w
        w("wrong charset for {}".format(url))
    return data
Beispiel #11
0
def get_content(url, headers=fake_headers, data=None, charset = None):
    """Gets the content of a URL via sending a HTTP GET request.

    Args:
        url: A URL.
        headers: Request headers used by the client.
        decoded: Whether decode the response body using UTF-8 or the charset specified in Content-Type.

    Returns:
        The content as a string.
    """

    req = Request(url, headers=headers, data=data)
    #if cookies_txt:
    #    cookies_txt.add_cookie_header(req)
    #    req.headers.update(req.unredirected_hdrs)
    response = urlopen(req)
    data = response.read()

    # Handle HTTP compression for gzip and deflate (zlib)
    resheader = response.info()
    if 'Content-Encoding' in resheader:
        content_encoding = resheader['Content-Encoding']
    else:
        content_encoding = None
    if content_encoding == 'gzip':
        data = ungzip(data)
    elif content_encoding == 'deflate':
        data = undeflate(data)

    if charset == 'ignore':
        return data

    # Decode the response body
    if charset is None:
        charset = match1(resheader['Content-Type'], r'charset=([\w-]+)') or \
              match1(str(data), r'charset=\"([^\"]+)', 'charset=([^"]+)') or 'utf-8'
    try:
        data = data.decode(charset)
    except:
        from .log import w
        w("wrong charset for {}".format(url))
    return data
Beispiel #12
0
def fetch_cna():
    url = 'https://gm.mmstat.com/yt/ykcomment.play.commentInit?cna='
    req = urlopen(url)
    cookies = req.info()['Set-Cookie']
    cna = match1(cookies, "cna=([^;]+)")
    return cna if cna else "oqikEO1b7CECAbfBdNNf1PM1"
Beispiel #13
0
Datei: html.py Projekt: flfq/ykdl
def get_location(url, headers = fake_headers):
    response = urlopen(Request(url, headers = fake_headers))
    # urllib will follow redirections and it's too much code to tell urllib
    # not to do that
    return response.geturl()
Beispiel #14
0
def _save_url(url, name, ext, status, part=None, reporthook=multi_hook):
    def print(*args, **kwargs):
        reporthook(['print', args, kwargs])

    def read_response(bs):
        if size > 0:
            # a independent timeout for read response
            rd, _, ed = select.select([fd], [], [fd], timeout)
            if ed:
                raise socket.error(ed)
            if not rd:
                raise socket.timeout('The read operation timed out')
        return response.read(bs)

    if part is None:
        name = name + '.' + ext
        part = 0
    else:
        name = '%s_%d.%s' % (name, part, ext)
    bs = 8192
    size = -1
    filesize = 0
    downloaded = 0
    open_mode = 'wb'
    response = None
    timeout = max(socket.getdefaulttimeout() or 0, 60)
    req = Request(url, headers=fake_headers)
    try:
        reporthook(['part'], part=part)
        if os.path.exists(name):
            filesize = os.path.getsize(name)
            if filesize:
                req.add_header('Range', 'bytes=%d-' %
                               (filesize - 1))  # get +1, avoid 416
                response = urlopen(req, None)
                set_rcvbuf(response)
                if response.status == 206:
                    size = int(
                        response.headers['Content-Range'].split('/')[-1])
                    needless_size = 1
                elif response.status == 200:
                    size = int(response.headers.get('Content-Length', -1))
                    needless_size = filesize
                if filesize == size:
                    print('Skipped: file part %d has already been downloaded' %
                          part)
                    status[part] = 1
                    return True
                if filesize < size:
                    percent = int(filesize * 100 / size)
                    open_mode = 'ab'
                    print('Restored: file part %d is incomplete at %d%%' %
                          (part, percent))
                    reporthook(['part'], filesize, size, part)
                    fd = response.fileno()
                    while needless_size > 0:
                        if needless_size > bs:
                            block = read_response(bs)
                        else:
                            block = read_response(needless_size)
                        if not block:
                            return
                        needless_size -= len(block)
        if response is None:
            response = urlopen(req, None)
            set_rcvbuf(response)
            fd = response.fileno()
        if size < 0:
            size = int(response.headers.get('Content-Length', -1))
        with open(name, open_mode) as tfp:
            while size < 0 or filesize < size:
                block = read_response(bs)
                if not block:
                    break
                n = tfp.write(block)
                downloaded += n
                filesize += n
                reporthook(['part'], filesize, size, part)
        if os.path.exists(name):
            filesize = os.path.getsize(name)
            if filesize and (size < 0 or filesize == size):
                status[part] = 1
                return True
    finally:
        time.sleep(1)
        reporthook(['part end', status, downloaded], filesize, size, part)
Beispiel #15
0
def fetch_cna():
    url = 'http://gm.mmstat.com/yt/ykcomment.play.commentInit?cna='
    req = urlopen(url)
    return req.info()['Set-Cookie'].split(';')[0].split('=')[1]
Beispiel #16
0
def get_location(url, headers = fake_headers):
    response = urlopen(Request(url, headers = headers))
    # urllib will follow redirections and it's too much code to tell urllib
    # not to do that
    return response.geturl()
Beispiel #17
0
def fetch_cna():
    url = 'https://gm.mmstat.com/yt/ykcomment.play.commentInit?cna='
    req = urlopen(url)
    cookies = req.info()['Set-Cookie']
    cna = match1(cookies, "cna=([^;]+)")
    return cna if cna else "oqikEO1b7CECAbfBdNNf1PM1"
Beispiel #18
0
def get_response(url, headers=fake_headers, data=None):
    req = Request(url, headers=headers, data=data)
    #if cookies_txt:
    #    cookies_txt.add_cookie_header(req)
    #    req.headers.update(req.unredirected_hdrs)
    return urlopen(req)