def save_url(url, name, reporthook = simple_hook): bs = 1024*8 size = -1 read = 0 blocknum = 0 open_mode = 'wb' req = Request(url, headers = fake_headers) response = urlopen(req, None) if "content-length" in response.headers: size = int(response.headers["Content-Length"]) if os.path.exists(name): filesize = os.path.getsize(name) if filesize == size: print('Skipped: file already downloaded') return elif -1 != size: req.add_header('Range', 'bytes=%d-' % filesize) blocknum = int(filesize / bs) response = urlopen(req, None) open_mode = 'ab' reporthook(blocknum, bs, size) with open(name, open_mode) as tfp: while True: block = response.read(bs) if not block: break read += len(block) tfp.write(block) blocknum += 1 reporthook(blocknum, bs, size)
def save_url(url, name, reporthook=simple_hook): bs = 1024 * 8 size = -1 read = 0 blocknum = 0 open_mode = 'wb' req = Request(url, headers=fake_headers) response = urlopen(req, None) if "content-length" in response.headers: size = int(response.headers["Content-Length"]) if os.path.exists(name): filesize = os.path.getsize(name) if filesize == size: print('Skipped: file already downloaded') return elif -1 != size: req.add_header('Range', 'bytes=%d-' % filesize) blocknum = int(filesize / bs) response = urlopen(req, None) open_mode = 'ab' reporthook(blocknum, bs, size) with open(name, open_mode) as tfp: while True: block = response.read(bs) if not block: break read += len(block) tfp.write(block) blocknum += 1 reporthook(blocknum, bs, size)
def save_url(url, name, ext, status, part=None, reporthook=simple_hook): if part is None: print('Download: ' + name) name = name + '.' + ext part = 0 else: print('\nDownload: %s part %d' % (name, part)) name = '%s_%d.%s' % (name, part, ext) bs = 1024 * 8 size = -1 read = 0 blocknum = 0 open_mode = 'wb' response = None req = Request(url, headers=fake_headers) if os.path.exists(name): filesize = os.path.getsize(name) if filesize: req.add_header('Range', 'bytes=%d-' % (filesize - 1)) # get +1, avoid 416 response = urlopen(req, None) assert response.status == 206, 'HTTP status %d' % response.status size = int(response.headers['Content-Range'].split('/')[-1]) if filesize == size: print('Skipped: file already downloaded') status[part] = 1 return if filesize < size: if filesize: blocknum = int(filesize / bs) open_mode = 'ab' response.read(1) # read -1 if response is None: response = urlopen(req, None) if size < 0: size = int(response.headers.get('Content-Length', -1)) reporthook(blocknum, bs, size) with open(name, open_mode) as tfp: while True: block = response.read(bs) if not block: break read += len(block) tfp.write(block) blocknum += 1 reporthook(blocknum, bs, size) if os.path.exists(name): filesize = os.path.getsize(name) if filesize == size: status[part] = 1
def get_head_response(url, headers=fake_headers): try: req = Request(url, headers=headers) req.get_method = lambda: 'HEAD' response = urlopen(req) except IOError as e: # if HEAD method is not supported if 'HTTP Error 405' in str(e): req = Request(url, headers=headers) response = urlopen(req) response.close() else: raise # urllib will follow redirections and it's too much code to tell urllib # not to do that return response
def save_url(url, name, ext, status, part=None, reporthook=simple_hook): if part is None: print("Download: " + name) name = name + '.' + ext part = 0 else: print("Download: " + name + " part %d" % part) name = name + '_%d_.' % part + ext bs = 1024 * 8 size = -1 read = 0 blocknum = 0 open_mode = 'wb' req = Request(url, headers=fake_headers) if os.path.exists(name): filesize = os.path.getsize(name) req.add_header('Range', 'bytes=%d-' % filesize) response = urlopen(req, None) if response.status == 206: size = int(response.headers['Content-Range'].split('/')[-1]) if filesize == size: print('Skipped: file already downloaded') status[part] = 1 return if filesize < size: if filesize: blocknum = int(filesize / bs) open_mode = 'ab' else: response = urlopen(req, None) if size < 0: size = int(response.headers.get('Content-Length', -1)) reporthook(blocknum, bs, size) with open(name, open_mode) as tfp: while True: block = response.read(bs) if not block: break read += len(block) tfp.write(block) blocknum += 1 reporthook(blocknum, bs, size) if os.path.exists(name): filesize = os.path.getsize(name) if filesize == size: status[part] = 1
def get_content(url, headers=fake_headers, data=None, charset=None): """Gets the content of a URL via sending a HTTP GET request. Args: url: A URL. headers: Request headers used by the client. decoded: Whether decode the response body using UTF-8 or the charset specified in Content-Type. Returns: The content as a string. """ logger.debug("get_content> URL: " + url) req = Request(url, headers=headers, data=data) #if cookies_txt: # cookies_txt.add_cookie_header(req) # req.headers.update(req.unredirected_hdrs) response = urlopen(req) data = response.read() # Handle HTTP compression for gzip and deflate (zlib) resheader = response.info() if 'Content-Encoding' in resheader: content_encoding = resheader['Content-Encoding'] elif hasattr(resheader, 'get_payload'): payload = resheader.get_payload() if isinstance(payload, str): content_encoding = match1(payload, r'Content-Encoding:\s*([\w-]+)') else: content_encoding = None else: content_encoding = None if content_encoding == 'gzip': data = ungzip(data) elif content_encoding == 'deflate': data = undeflate(data) if charset == 'ignore': return data # Decode the response body if charset is None: if 'Content-Type' in resheader: charset = match1(resheader['Content-Type'], r'charset=([\w-]+)') charset = charset or match1(str(data), r'charset=\"([\w-]+)', 'charset=([\w-]+)') or 'utf-8' logger.debug("get_content> Charset: " + charset) try: data = data.decode(charset, errors='replace') except: logger.warning("wrong charset for {}".format(url)) return data
def get_content(url, headers=fake_headers, data=None, charset = None): """Gets the content of a URL via sending a HTTP GET request. Args: url: A URL. headers: Request headers used by the client. decoded: Whether decode the response body using UTF-8 or the charset specified in Content-Type. Returns: The content as a string. """ logger.debug("get_content> URL: " + url) req = Request(url, headers=headers, data=data) #if cookies_txt: # cookies_txt.add_cookie_header(req) # req.headers.update(req.unredirected_hdrs) response = urlopen(req) data = response.read() # Handle HTTP compression for gzip and deflate (zlib) resheader = response.info() if 'Content-Encoding' in resheader: content_encoding = resheader['Content-Encoding'] elif hasattr(resheader, 'get_payload'): payload = resheader.get_payload() if isinstance(payload, str): content_encoding = match1(payload, r'Content-Encoding:\s*([\w-]+)') else: content_encoding = None else: content_encoding = None if content_encoding == 'gzip': data = ungzip(data) elif content_encoding == 'deflate': data = undeflate(data) if charset == 'ignore': return data # Decode the response body if charset is None: if 'Content-Type' in resheader: charset = match1(resheader['Content-Type'], r'charset=([\w-]+)') charset = charset or match1(str(data), r'charset=\"([\w-]+)', 'charset=([\w-]+)') or 'utf-8' logger.debug("get_content> Charset: " + charset) try: data = data.decode(charset, errors='replace') except: logger.warning("wrong charset for {}".format(url)) return data
def prepare(self): self.live = True info = VideoInfo(self.name, True) page = get_content(self.url) page_meta = match1(page, r'window\.HOSTINFO=(.+?);') page_meta = json.loads(page_meta) info.title = page_meta['roominfo']['name'] info.artist = page_meta['hostinfo']['nickName'] info.stream_types.append('current') stream_url = page_meta['videoinfo']['streamurl'] try: urlopen(stream_url) except: assert 0, 'offline' info.streams['current'] = dict(container='flv', video_profile='current', src=[stream_url], size=float('inf')) return info
def get_head_response(url, headers=fake_headers): logger.debug('get_head_response> URL: ' + url) try: req = Request(url, headers=headers, method='HEAD') response = urlopen(req) except IOError as e: # if HEAD method is not supported if match1(str(e), 'HTTP Error (40[345])'): logger.debug('get_head_response> HEAD failed, try GET') response = get_response(url, headers=headers) response.close() else: raise # urllib will follow redirections and it's too much code to tell urllib # not to do that return response
def get_content(url, headers=fake_headers, data=None, charset=None): """Gets the content of a URL via sending a HTTP GET request. Args: url: A URL. headers: Request headers used by the client. decoded: Whether decode the response body using UTF-8 or the charset specified in Content-Type. Returns: The content as a string. """ req = Request(url, headers=headers, data=data) #if cookies_txt: # cookies_txt.add_cookie_header(req) # req.headers.update(req.unredirected_hdrs) response = urlopen(req) data = response.read() # Handle HTTP compression for gzip and deflate (zlib) resheader = response.info() if 'Content-Encoding' in resheader: content_encoding = resheader['Content-Encoding'] else: content_encoding = None if content_encoding == 'gzip': data = ungzip(data) elif content_encoding == 'deflate': data = undeflate(data) if charset == 'ignore': return data # Decode the response body if charset is None: charset = match1(resheader['Content-Type'], r'charset=([\w-]+)') or \ match1(str(data), r'charset=\"([^\"]+)', 'charset=([^"]+)') or 'utf-8' try: data = data.decode(charset) except: from .log import w w("wrong charset for {}".format(url)) return data
def get_content(url, headers=fake_headers, data=None, charset = None): """Gets the content of a URL via sending a HTTP GET request. Args: url: A URL. headers: Request headers used by the client. decoded: Whether decode the response body using UTF-8 or the charset specified in Content-Type. Returns: The content as a string. """ req = Request(url, headers=headers, data=data) #if cookies_txt: # cookies_txt.add_cookie_header(req) # req.headers.update(req.unredirected_hdrs) response = urlopen(req) data = response.read() # Handle HTTP compression for gzip and deflate (zlib) resheader = response.info() if 'Content-Encoding' in resheader: content_encoding = resheader['Content-Encoding'] else: content_encoding = None if content_encoding == 'gzip': data = ungzip(data) elif content_encoding == 'deflate': data = undeflate(data) if charset == 'ignore': return data # Decode the response body if charset is None: charset = match1(resheader['Content-Type'], r'charset=([\w-]+)') or \ match1(str(data), r'charset=\"([^\"]+)', 'charset=([^"]+)') or 'utf-8' try: data = data.decode(charset) except: from .log import w w("wrong charset for {}".format(url)) return data
def fetch_cna(): url = 'https://gm.mmstat.com/yt/ykcomment.play.commentInit?cna=' req = urlopen(url) cookies = req.info()['Set-Cookie'] cna = match1(cookies, "cna=([^;]+)") return cna if cna else "oqikEO1b7CECAbfBdNNf1PM1"
def get_location(url, headers = fake_headers): response = urlopen(Request(url, headers = fake_headers)) # urllib will follow redirections and it's too much code to tell urllib # not to do that return response.geturl()
def _save_url(url, name, ext, status, part=None, reporthook=multi_hook): def print(*args, **kwargs): reporthook(['print', args, kwargs]) def read_response(bs): if size > 0: # a independent timeout for read response rd, _, ed = select.select([fd], [], [fd], timeout) if ed: raise socket.error(ed) if not rd: raise socket.timeout('The read operation timed out') return response.read(bs) if part is None: name = name + '.' + ext part = 0 else: name = '%s_%d.%s' % (name, part, ext) bs = 8192 size = -1 filesize = 0 downloaded = 0 open_mode = 'wb' response = None timeout = max(socket.getdefaulttimeout() or 0, 60) req = Request(url, headers=fake_headers) try: reporthook(['part'], part=part) if os.path.exists(name): filesize = os.path.getsize(name) if filesize: req.add_header('Range', 'bytes=%d-' % (filesize - 1)) # get +1, avoid 416 response = urlopen(req, None) set_rcvbuf(response) if response.status == 206: size = int( response.headers['Content-Range'].split('/')[-1]) needless_size = 1 elif response.status == 200: size = int(response.headers.get('Content-Length', -1)) needless_size = filesize if filesize == size: print('Skipped: file part %d has already been downloaded' % part) status[part] = 1 return True if filesize < size: percent = int(filesize * 100 / size) open_mode = 'ab' print('Restored: file part %d is incomplete at %d%%' % (part, percent)) reporthook(['part'], filesize, size, part) fd = response.fileno() while needless_size > 0: if needless_size > bs: block = read_response(bs) else: block = read_response(needless_size) if not block: return needless_size -= len(block) if response is None: response = urlopen(req, None) set_rcvbuf(response) fd = response.fileno() if size < 0: size = int(response.headers.get('Content-Length', -1)) with open(name, open_mode) as tfp: while size < 0 or filesize < size: block = read_response(bs) if not block: break n = tfp.write(block) downloaded += n filesize += n reporthook(['part'], filesize, size, part) if os.path.exists(name): filesize = os.path.getsize(name) if filesize and (size < 0 or filesize == size): status[part] = 1 return True finally: time.sleep(1) reporthook(['part end', status, downloaded], filesize, size, part)
def fetch_cna(): url = 'http://gm.mmstat.com/yt/ykcomment.play.commentInit?cna=' req = urlopen(url) return req.info()['Set-Cookie'].split(';')[0].split('=')[1]
def get_location(url, headers = fake_headers): response = urlopen(Request(url, headers = headers)) # urllib will follow redirections and it's too much code to tell urllib # not to do that return response.geturl()
def get_response(url, headers=fake_headers, data=None): req = Request(url, headers=headers, data=data) #if cookies_txt: # cookies_txt.add_cookie_header(req) # req.headers.update(req.unredirected_hdrs) return urlopen(req)