def refresh_signin_vcode(cookie, tokens, vcodetype): '''刷新验证码. vcodetype - 在调用check_login()时返回的vcodetype. ''' url = ''.join([ const.PASSPORT_BASE, 'v2/?reggetcodestr', '&token=', tokens['token'], '&tpl=pp&apiver=v3', '&tt=', util.timestamp(), '&fr=ligin', '&vcodetype=', encoder.encode_uri(vcodetype), ]) headers = { 'Cookie': cookie.header_output(), 'Referer': const.REFERER, } logger.debug('refresh vcode url: %s' % url) req = net.urlopen(url, headers=headers) if req: try: data = req.data.decode('gbk') logger.debug('refresh vcode: %s' % data) return json.loads(data) except ValueError: logger.error(traceback.format_exc()) return None
def post_multipart(url, headers, fields, files, retries=RETRIES): content_type, body = encode_multipart_formdata(fields, files) schema = urllib.parse.urlparse(url) headers_merged = default_headers.copy() for key in headers.keys(): headers_merged[key] = headers[key] headers_merged['Content-Type'] = content_type headers_merged['Content-length'] = str(len(body)) for i in range(retries): try: h = http.client.HTTPConnection(schema.netloc) h.request('POST', url, body=body, headers=headers_merged) req = h.getresponse() encoding = req.getheader('Content-encoding') req.data = req.read() if encoding == 'gzip': req.data = gzip.decompress(req.data) elif encoding == 'deflate': req.data = zlib.decompress(req.data, -zlib.MAX_WBITS) return req except OSError: logger.error(traceback.format_exc()) except: logger.error(traceback.format_exc()) #return None return None
def urlopen(url, headers={}, data=None, retries=RETRIES, timeout=TIMEOUT): '''打开一个http连接, 并返回Request. headers 是一个dict. 默认提供了一些项目, 比如User-Agent, Referer等, 就 不需要重复加入了. 这个函数只能用于http请求, 不可以用于下载大文件. 如果服务器支持gzip压缩的话, 就会使用gzip对数据进行压缩, 然后在本地自动 解压. req.data 里面放着的是最终的http数据内容, 通常都是UTF-8编码的文本. ''' headers_merged = default_headers.copy() for key in headers.keys(): headers_merged[key] = headers[key] opener = urllib.request.build_opener(ForbiddenHandler) opener.addheaders = [(k, v) for k,v in headers_merged.items()] for i in range(retries): try: req = opener.open(url, data=data, timeout=timeout) encoding = req.headers.get('Content-encoding') req.data = req.read() if encoding == 'gzip': req.data = gzip.decompress(req.data) elif encoding == 'deflate': req.data = zlib.decompress(req.data, -zlib.MAX_WBITS) return req except OSError: logger.error(traceback.format_exc()) except: logger.error(traceback.format_exc()) return None
def urlopen_without_redirect(url, headers={}, data=None, retries=RETRIES): '''请求一个URL, 并返回一个Response对象. 不处理重定向. 使用这个函数可以返回URL重定向(Error 301/302)后的地址, 也可以重到URL中请 求的文件的大小, 或者Header中的其它认证信息. ''' headers_merged = default_headers.copy() for key in headers.keys(): headers_merged[key] = headers[key] parse_result = urllib.parse.urlparse(url) for i in range(retries): try: conn = http.client.HTTPConnection(parse_result.netloc) if data: conn.request('POST', url, body=data, headers=headers_merged) else: conn.request('GET', url, body=data, headers=headers_merged) return conn.getresponse() except OSError: logger.error(traceback.format_exc()) except: logger.error(traceback.format_exc()) #return None return None
def urlopen(url, headers={}, data=None, retries=RETRIES, timeout=TIMEOUT): '''打开一个http连接, 并返回Request. headers 是一个dict. 默认提供了一些项目, 比如User-Agent, Referer等, 就 不需要重复加入了. 这个函数只能用于http请求, 不可以用于下载大文件. 如果服务器支持gzip压缩的话, 就会使用gzip对数据进行压缩, 然后在本地自动 解压. req.data 里面放着的是最终的http数据内容, 通常都是UTF-8编码的文本. ''' headers_merged = default_headers.copy() for key in headers.keys(): headers_merged[key] = headers[key] opener = urllib.request.build_opener(ForbiddenHandler) opener.addheaders = [(k, v) for k, v in headers_merged.items()] for i in range(retries): try: req = opener.open(url, data=data, timeout=timeout) encoding = req.headers.get('Content-encoding') req.data = req.read() if encoding == 'gzip': req.data = gzip.decompress(req.data) elif encoding == 'deflate': req.data = zlib.decompress(req.data, -zlib.MAX_WBITS) return req except OSError: logger.error(traceback.format_exc()) except: logger.error(traceback.format_exc()) return None
def json_loads_single(s): '''处理不标准JSON结构化数据''' try: return json.loads(s.replace("'", '"').replace('\t', '')) except (ValueError, UnicodeDecodeError): logger.error(traceback.format_exc()) return None
def get_download_link(cookie, tokens, path): '''在下载之前, 要先获取最终的下载链接. path - 一个文件的绝对路径. @return red_url, red_url 是重定向后的URL, 如果获取失败, 就返回原来的dlink; ''' metas = get_metas(cookie, tokens, path) if (not metas or metas.get('errno', -1) != 0 or 'info' not in metas or len(metas['info']) != 1): logger.error('pcs.get_download_link(): %s' % metas) return None dlink = metas['info'][0]['dlink'] url = '{0}&cflg={1}'.format(dlink, cookie.get('cflag').value) req = net.urlopen_without_redirect(url, headers={ 'Cookie': cookie.sub_output( 'BAIDUID', 'BDUSS', 'cflag'), 'Accept': const.ACCEPT_HTML, }) if not req: return url else: return req.getheader('Location', url)
def refresh_signin_vcode(cookie, tokens, vcodetype): '''刷新验证码. vcodetype - 在调用check_login()时返回的vcodetype. ''' url = ''.join([ const.PASSPORT_BASE, 'v2/?reggetcodestr', '&token=', tokens['token'], '&tpl=pp&apiver=v3', '&tt=', util.timestamp(), '&fr=ligin', '&vcodetype=', encoder.encode_uri(vcodetype), ]) headers={ 'Cookie': cookie.header_output(), 'Referer': const.REFERER, } logger.debug('refresh vcode url: %s' % url) req = net.urlopen(url, headers=headers) if req: try: data = req.data.decode('gbk') logger.debug('refresh vcode: %s' % data) return json.loads(data) except ValueError: logger.error(traceback.format_exc()) return None
def urlopen_simple(url, retries=RETRIES, timeout=TIMEOUT): for i in range(retries): try: return urllib.request.urlopen(url, timeout=timeout) except OSError: logger.error(traceback.format_exc()) except : logger.error(traceback.format_exc()) return None
def urlopen_simple(url, retries=RETRIES, timeout=TIMEOUT): for i in range(retries): try: return urllib.request.urlopen(url, timeout=timeout) except OSError: logger.error(traceback.format_exc()) except: logger.error(traceback.format_exc()) return None
def urloption(url, headers={}, retries=RETRIES): '''发送OPTION 请求''' headers_merged = default_headers.copy() for key in headers.keys(): headers_merged[key] = headers[key] schema = urllib.parse.urlparse(url) for i in range(retries): try: conn = http.client.HTTPConnection(schema.netloc) conn.request('OPTIONS', url, headers=headers_merged) resp = conn.getresponse() return resp except OSError: logger.error(traceback.format_exc()) #return None except: logger.error(traceback.format_exc()) #return None return None
def get_download_link(cookie, tokens, path): """在下载之前, 要先获取最终的下载链接. path - 一个文件的绝对路径. @return red_url, red_url 是重定向后的URL, 如果获取失败, 就返回原来的dlink; """ metas = get_metas(cookie, tokens, path) if not metas or metas.get("errno", -1) != 0 or "info" not in metas or len(metas["info"]) != 1: logger.error("pcs.get_download_link(): %s" % metas) return None dlink = metas["info"][0]["dlink"] url = "{0}&cflg={1}".format(dlink, cookie.get("cflag").value) req = net.urlopen_without_redirect( url, headers={"Cookie": cookie.sub_output("BAIDUID", "BDUSS", "cflag"), "Accept": const.ACCEPT_HTML} ) if not req: return url else: return req.getheader("Location", url)
import hashlib import json import os import random import re import traceback import urllib.parse import time from bcloud_core.const import ValidatePathState from bcloud_core.log import logger try: from Crypto.PublicKey import RSA from Crypto.Cipher import PKCS1_v1_5 except (ImportError, ValueError): logger.error(traceback.format_exc()) SIZE_K = 2 ** 10 SIZE_M = 2 ** 20 SIZE_G = 2 ** 30 SIZE_T = 2 ** 40 def timestamp(): '''返回当前的时间标记, 以毫秒为单位''' return str(int(time.time() * 1000)) def curr_time(): now = datetime.datetime.now() return datetime.datetime.strftime(now, '%Y%m%d%H%M%S') def latency():