def _refresh_vcode(info, error=None): if not info or error: logger.error("SigninVcode.refresh_vcode: %s, %s." % (info, error)) return logger.debug("refresh vcode: %s" % info) self.codeString = info["data"]["verifyStr"] gutil.async_call(auth.get_signin_vcode, self.cookie, self.codeString, callback=self.update_img)
def refresh_signin_vcode(cookie, tokens, vcodetype): '''刷新验证码. vcodetype - 在调用check_login()时返回的vcodetype. ''' url = ''.join([ const.PASSPORT_BASE, 'v2/?reggetcodestr', '&token=', tokens['token'], '&tpl=pp&apiver=v3', '&tt=', util.timestamp(), '&fr=ligin', '&vcodetype=', encoder.encode_uri(vcodetype), ]) headers={ 'Cookie': cookie.header_output(), 'Referer': const.REFERER, } logger.debug('refresh vcode url: %s' % url) req = net.urlopen(url, headers=headers) if req: try: data = req.data.decode('gbk') logger.debug('refresh vcode: %s' % data) return json.loads(data) except ValueError: logger.error(traceback.format_exc()) return None
def post_login(cookie, token, username, password, rsakey, verifycode='', codestring=''): '''登录验证. password - 使用RSA加密后的base64字符串 rsakey - 与public_key相匹配的rsakey verifycode - 验证码, 默认为空 @return (status, info). 其中, status表示返回的状态: 0 - 正常, 这里, info里面存放的是auth_cookie -1 - 未知异常 4 - 密码错误 257 - 需要输入验证码, 此时info里面存放着(vcodetype, codeString)) ''' url = const.PASSPORT_LOGIN data = ''.join([ 'staticpage=https%3A%2F%2Fpassport.baidu.com%2Fstatic%2Fpasspc-account%2Fhtml%2Fv3Jump.html&charset=UTF-8', '&token=', token, '&tpl=pp&subpro=&apiver=v3', '&tt=', util.timestamp(), '&codestring=', codestring, '&safeflg=0&u=https%3A%2F%2Fpassport.baidu.com%2F&isPhone=', '&quick_user=0&logintype=basicLogin&logLoginType=pc_loginBasic&idc=', '&loginmerge=true', '&username='******'&password='******'&verifycode=', verifycode, '&mem_pass=on', '&rsakey=', rsakey, '&crypttype=12&ppui_logintime=',get_ppui_logintime(), '&callback=parent.bd__pcbs__m8g1kg', ]) logger.debug('auth.post_login: %s' % data) headers={ 'Cookie': cookie.header_output(), 'Content-Type': const.CONTENT_FORM, 'Referer': const.REFERER, 'Connection': 'Keep-Alive', } req = net.urlopen(url, headers=headers, data=data.encode()) if req: auth_cookie = req.headers.get_all('Set-Cookie') resp_content= req.data.decode() match = re.findall('"(err_no[^"]+)"', resp_content) if len(match) != 1: return (-1, None) query = dict(urllib.parse.parse_qsl(match[0])) err_no = int(query.get('err_no', '-1')) if err_no == 0: return (0, auth_cookie) if err_no != 257: return (err_no, None) vcodetype = query.get('vcodetype', '') codeString = query.get('codeString', '') if vcodetype and codeString: return (257, (vcodetype, codeString)) return (-1, None) else: return (-1, None) return (-1, None)
def _refresh_vcode(info, error=None): if not info or error: logger.error('SigninVcode.refresh_vcode: %s, %s.' % (info, error)) return logger.debug('refresh vcode: %s' % info) self.codeString = info['data']['verifyStr'] gutil.async_call(auth.get_signin_vcode, self.cookie, self.codeString, callback=self.update_img)
def on_load_url(filelist, error=None): self.url_entry.props.secondary_icon_name = REFRESH_ICON if timestamp != self.url_entry.timestamp: logger.debug("SharePage.load_url, dirname not match, ignored") return if error or not filelist: self.app.toast(_("Failed to get files, please reload this page")) logger.warn("SharePage.load_url: %s, %s, %s" % (self.curr_url, filelist, error)) self.has_next = False return state = self.select_all_button.get_active() tree_iters = [] # 插入.. 点击后返回上个目录 if filelist and self.dirname and self.dirname != "/": parent_dirname = os.path.dirname(self.dirname) pixbuf, type_ = self.app.mime.get(parent_dirname, True, icon_size=ICON_SIZE) large_pixbuf, type_ = self.app.mime.get(parent_dirname, True, icon_size=LARGE_ICON_SIZE) self.liststore.append([state, pixbuf, large_pixbuf, "..", parent_dirname, True, 0, "0", 0, ""]) for file_ in filelist: isdir = file_["isdir"] == "1" pixbuf, type_ = self.app.mime.get(file_["path"], isdir, icon_size=ICON_SIZE) large_pixbuf, type_ = self.app.mime.get(file_["path"], isdir, icon_size=LARGE_ICON_SIZE) size = int(file_.get("size", 0)) human_size = util.get_human_size(size)[0] mtime = int(file_.get("server_mtime", 0)) human_mtime = time.ctime(mtime) tree_iter = self.liststore.append( [ state, pixbuf, large_pixbuf, file_["server_filename"], file_["path"], isdir, size, human_size, mtime, human_mtime, ] ) tree_iters.append(tree_iter) cache_path = Config.get_cache_path(self.app.profile["username"]) gutil.async_call( gutil.update_share_image, self.liststore, tree_iters, ICON_COL, LARGE_ICON_COL, filelist, cache_path, ICON_SIZE, LARGE_ICON_SIZE, )
def get_req(self, start_size, end_size): '''打开socket''' logger.debug('DownloadBatch.get_req: %s, %s' % (start_size, end_size)) opener = request.build_opener() content_range = 'bytes={0}-{1}'.format(start_size, end_size) opener.addheaders = [('Range', content_range)] for i in range(RETRIES): try: return opener.open(self.url, timeout=self.timeout) except OSError: logger.error(traceback.format_exc()) else: return None
def download(self): offset = self.start_size req = self.get_req(offset, self.end_size) if not req: self.queue.put((self.id_, BATCH_ERROR), block=False) return while not self.stop_flag: for i in range(DOWNLOAD_RETRIES): if not req: req = self.get_req(offset, self.end_size) logger.debug('DownloadBatch.download: socket reconnected') try: block = req.read(CHUNK_SIZE) if block: break except (OSError, AttributeError): #self.queue.put((self.id_, BATCH_ERROR), block=False) logger.error(traceback.format_exc()) req = None except : req=None logger.error( 'Time out occured.') #self.queue.put((self.id_, BATCH_ERROR), block=False) #return else: logger.error('DownloadBatch, block is empty: %s, %s, %s, %s' % (offset, self.start_size, self.end_size, len(block))) self.queue.put((self.id_, BATCH_ERROR), block=False) return with self.lock: if self.fh.closed: return self.fh.seek(offset) self.fh.write(block) self.queue.put((self.id_, len(block)), block=False) offset = offset + len(block) # 下载完成 if offset >= self.end_size: self.queue.put((self.id_, BATCH_FINISISHED), block=False) return
def download(self): offset = self.start_size req = self.get_req(offset, self.end_size) if not req: self.queue.put((self.id_, BATCH_ERROR), block=False) return while not self.stop_flag: for i in range(DOWNLOAD_RETRIES): if not req: req = self.get_req(offset, self.end_size) logger.debug('DownloadBatch.download: socket reconnected') try: block = req.read(CHUNK_SIZE) if block: break except (OSError, AttributeError): # self.queue.put((self.id_, BATCH_ERROR), block=False) logger.error(traceback.format_exc()) req = None except: req = None logger.error('Time out occured.') # self.queue.put((self.id_, BATCH_ERROR), block=False) # return else: logger.error( 'DownloadBatch, block is empty: %s, %s, %s, %s' % (offset, self.start_size, self.end_size, len(block))) self.queue.put((self.id_, BATCH_ERROR), block=False) return with self.lock: if self.fh.closed: return self.fh.seek(offset) self.fh.write(block) self.queue.put((self.id_, len(block)), block=False) offset += len(block) # 下载完成 if offset >= self.end_size: self.queue.put((self.id_, BATCH_FINISISHED), block=False) return
def urlopen_simple(url, retries=RETRIES, timeout=TIMEOUT): if DEBUG: logger.debug('net.urlopen_simple: %s' % url) for i in range(retries): try: if DEBUG and i > 0: logger.debug('net.urlopen_simple: retried, %d' % i) opener = urllib.request.build_opener(ForbiddenHandler) opener.addheaders = [(k, v) for k, v in default_headers.items()] return opener.open(url, timeout=timeout) except OSError: logger.error(traceback.format_exc()) except: logger.error(traceback.format_exc()) return None
def get_req(self, start_size, end_size): '''打开socket''' logger.debug('DownloadBatch.get_req: %s, %s' % (start_size, end_size)) opener = request.build_opener() content_range = 'bytes={0}-{1}'.format(start_size, end_size) opener.addheaders = [('Range', content_range), ('User-Agent', const.USER_AGENT), ('Accept', '*/*'), ('Connection', 'close')] for i in range(RETRIES): try: return opener.open(self.url, timeout=self.timeout) #except OSError: # logger.error(traceback.format_exc()) # self.queue.put((self.id_, BATCH_ERROR), block=False) # return None except: logger.error(traceback.format_exc()) else: self.queue.put((self.id_, BATCH_ERROR), block=False) return None
def get_req(self, start_size, end_size): '''打开socket''' logger.debug('DownloadBatch.get_req: %s, %s' % (start_size, end_size)) opener = request.build_opener() content_range = 'bytes={0}-{1}'.format(start_size, end_size) opener.addheaders = [ ('Range', content_range), ('User-Agent', const.USER_AGENT), ('Referer', const.PAN_REFERER), ] for i in range(RETRIES): try: return opener.open(self.url, timeout=self.timeout) except OSError: logger.error(traceback.format_exc()) self.queue.put((self.id_, BATCH_ERROR), block=False) return None except: self.queue.put((self.id_, BATCH_ERROR), block=False) return None else: return None
def urloption(url, headers={}, retries=RETRIES): '''发送OPTION 请求''' if DEBUG: logger.debug('net.urloption: < URL: %s' % url) if headers: for key, value in headers.items(): logger.debug('net.urloption: < HEADER: %s' % '{0}: {1}'.format(key, value)) headers_merged = default_headers.copy() for key in headers.keys(): headers_merged[key] = headers[key] schema = urllib.parse.urlparse(url) for i in range(retries): try: if DEBUG and i > 0: logger.debug('net.urloption: retried, %d' % i) conn = http.client.HTTPConnection(schema.netloc) conn.request('OPTIONS', url, headers=headers_merged) resp = conn.getresponse() if DEBUG: logger.debug( 'net.urloption: > STATUS: %d %s ' % (resp.getcode(), http.client.responses[resp.getcode()])) for key, value in resp.getheaders(): logger.debug('net.urloption: > HEADER: %s' % '{0}: {1}'.format(key, value)) return resp except OSError: logger.error(traceback.format_exc()) #return None except: logger.error(traceback.format_exc()) #return None return None
from bcloud.PreferencesDialog import PreferencesDialog from bcloud.CategoryPage import * from bcloud.CloudPage import CloudPage from bcloud.DownloadPage import DownloadPage from bcloud.HomePage import HomePage from bcloud.PreferencesDialog import PreferencesDialog from bcloud.SharePage import SharePage from bcloud.SigninDialog import SigninDialog from bcloud.TrashPage import TrashPage from bcloud.UploadPage import UploadPage try: # Ubuntu Unity uses appindicator instead of status icon from gi.repository import AppIndicator3 as AppIndicator except ImportError: logger.debug(traceback.format_exc()) if Config.GTK_LE_36: GObject.threads_init() (ICON_COL, NAME_COL, TOOLTIP_COL, COLOR_COL) = list(range(4)) BLINK_DELTA = 250 # 字体闪烁间隔, 250 miliseconds BLINK_SUSTAINED = 3 # 字体闪烁持续时间, 5 seconds # 用于处理拖放上传 DROP_TARGETS = ( (TargetType.URI_LIST, Gtk.TargetFlags.OTHER_APP, TargetInfo.URI_LIST), ) DROP_TARGET_LIST = [Gtk.TargetEntry.new(*t) for t in DROP_TARGETS]
def add_view_history(self, abs_path): logger.debug("add view history: %s", abs_path) self.view_history = self.view_history[:self.view_history_pos + 1] self.view_history.append(abs_path) self.view_history_pos += 1
def urlopen(url, headers={}, data=None, retries=RETRIES, timeout=TIMEOUT): '''打开一个http连接, 并返回Request. headers 是一个dict. 默认提供了一些项目, 比如User-Agent, Referer等, 就 不需要重复加入了. 这个函数只能用于http请求, 不可以用于下载大文件. 如果服务器支持gzip压缩的话, 就会使用gzip对数据进行压缩, 然后在本地自动 解压. req.data 里面放着的是最终的http数据内容, 通常都是UTF-8编码的文本. ''' if DEBUG: logger.debug('net.urlopen: < URL: %s' % url) if headers: for key, value in headers.items(): logger.debug('net.urlopen: < HEADER: %s' % '{0}: {1}'.format(key, value)) if data: logger.debug('net.urlopen: < DATA: %s' % data.decode()) headers_merged = default_headers.copy() for key in headers.keys(): headers_merged[key] = headers[key] opener = urllib.request.build_opener(ForbiddenHandler) opener.addheaders = [(k, v) for k, v in headers_merged.items()] for i in range(retries): try: if DEBUG and i > 0: logger.debug('net.urlopen: retried, %d' % i) req = opener.open(url, data=data, timeout=timeout) encoding = req.headers.get('Content-encoding') req.data = req.read() if encoding == 'gzip': req.data = gzip.decompress(req.data) elif encoding == 'deflate': req.data = zlib.decompress(req.data, -zlib.MAX_WBITS) if DEBUG: logger.debug( 'net.urlopen: > STATUS: %d %s' % (req.getcode(), http.client.responses[req.getcode()])) for key, value in req.getheaders(): logger.debug('net.urlopen: > HEADER: %s' % '{0}: {1}'.format(key, value)) return req except OSError: logger.error(traceback.format_exc()) except: logger.error(traceback.format_exc()) return None
def download(self): row = self.row if not os.path.exists(row[SAVEDIR_COL]): os.makedirs(row[SAVEDIR_COL], exist_ok=True) filepath, tmp_filepath, conf_filepath = get_tmp_filepath( row[SAVEDIR_COL], row[SAVENAME_COL]) if os.path.exists(filepath): if self.download_mode == DownloadMode.IGNORE: self.emit('downloaded', row[FSID_COL]) logger.debug('File exists, ignored!') return elif self.download_mode == DownloadMode.NEWCOPY: name, ext = os.path.splitext(filepath) filepath = '{0}_{1}{2}'.format(name, util.curr_time(), ext) url = pcs.get_download_link(self.cookie, self.tokens, row[PATH_COL]) if not url: row[STATE_COL] = State.ERROR self.emit('network-error', row[FSID_COL]) logger.warn('Failed to get url to download') return if os.path.exists(conf_filepath) and os.path.exists(tmp_filepath): with open(conf_filepath) as conf_fh: status = json.load(conf_fh) threads = len(status) file_exists = True fh = open(tmp_filepath, 'rb+') fh.seek(0) else: req = net.urlopen_simple(url) if not req: logger.warn('Failed to get url to download') self.emit('network-error', row[FSID_COL]) return content_length = req.getheader('Content-Length') # Fixed: baiduPCS using non iso-8859-1 codec in http headers if not content_length: match = re.search('\sContent-Length:\s*(\d+)', str(req.headers)) if not match: logger.warn('Failed to get url to download') self.emit('network-error', row[FSID_COL]) return content_length = match.group(1) size = int(content_length) if size <= SMALL_FILE_SIZE: threads = 1 else: threads = self.default_threads average_size, pad_size = divmod(size, threads) file_exists = False status = [] fh = open(tmp_filepath, 'wb') try: fh.truncate(size) except (OSError, IOError): e = truncate.format_exc() logger.error(e) self.emit('disk-error', row[FSID_COL], tmp_filepath) return # task list tasks = [] # message queue queue = Queue() # threads lock lock = threading.RLock() for id_ in range(threads): if file_exists: start_size, end_size, received = status[id_] if start_size + received >= end_size: # part of file has been downloaded continue start_size += received else: start_size = id_ * average_size end_size = start_size + average_size - 1 if id_ == threads - 1: end_size = end_size + pad_size + 1 status.append([start_size, end_size, 0]) task = DownloadBatch(id_, queue, url, lock, start_size, end_size, fh, self.timeout) tasks.append(task) for task in tasks: task.start() try: conf_count = 0 done = 0 self.emit('started', row[FSID_COL]) while row[STATE_COL] == State.DOWNLOADING: id_, received = queue.get() # FINISHED if received == BATCH_FINISISHED: done += 1 if done == len(tasks): row[STATE_COL] = State.FINISHED break else: continue # error occurs elif received == BATCH_ERROR: row[STATE_COL] = State.ERROR break status[id_][2] += received conf_count += 1 # flush data and status to disk if conf_count > THRESHOLD_TO_FLUSH: with lock: if not fh.closed: fh.flush() with open(conf_filepath, 'w') as fh: json.dump(status, fh) conf_count = 0 received_total = sum(t[2] for t in status) self.emit('received', row[FSID_COL], received, received_total) except Exception: logger.error(traceback.format_exc()) row[STATE_COL] = State.ERROR with lock: if not fh.closed: fh.close() for task in tasks: if task.isAlive(): task.stop() with open(conf_filepath, 'w') as fh: json.dump(status, fh) if row[STATE_COL] == State.CANCELED: os.remove(tmp_filepath) if os.path.exists(conf_filepath): os.remove(conf_filepath) elif row[STATE_COL] == State.ERROR: self.emit('network-error', row[FSID_COL]) elif row[STATE_COL] == State.FINISHED: self.emit('downloaded', row[FSID_COL]) os.rename(tmp_filepath, filepath) if os.path.exists(conf_filepath): os.remove(conf_filepath)
def post_multipart(url, headers, fields, files, retries=RETRIES): content_type, body = encode_multipart_formdata(fields, files) schema = urllib.parse.urlparse(url) headers_merged = default_headers.copy() for key in headers.keys(): headers_merged[key] = headers[key] headers_merged['Content-Type'] = content_type headers_merged['Content-length'] = str(len(body)) if DEBUG: logger.debug('net.post_multipart: < URL: %s' % url) if headers: for key, value in headers.items(): logger.debug('net.post_multipart: < HEADER: %s' % '{0}: {1}'.format(key, value)) logger.debug('net.post_multipart: < Filename: %s' % files[0][1]) logger.debug('net.post_multipart: < Filesize: %d' % len(files[0][2])) for i in range(retries): try: if DEBUG and i > 0: logger.debug('net.post_multipart: retried, %d' % i) h = http.client.HTTPConnection(schema.netloc) h.request('POST', url, body=body, headers=headers_merged) req = h.getresponse() encoding = req.getheader('Content-encoding') req.data = req.read() if encoding == 'gzip': req.data = gzip.decompress(req.data) elif encoding == 'deflate': req.data = zlib.decompress(req.data, -zlib.MAX_WBITS) if DEBUG: logger.debug( 'net.post_multipart: > STATUS: %d %s' % (req.getcode(), http.client.responses[req.getcode()])) for key, value in req.getheaders(): logger.debug('net.post_multipart: > HEADER: %s' % '{0}: {1}'.format(key, value)) return req except OSError: logger.error(traceback.format_exc()) except: logger.error(traceback.format_exc()) #return None return None
def on_load_url(filelist, error=None): self.url_entry.props.secondary_icon_name = REFRESH_ICON if timestamp != self.url_entry.timestamp: logger.debug('SharePage.load_url, dirname not match, ignored') return if error: self.app.toast( _('Failed to get files, please reload this page')) logger.warn('SharePage.load_url: %s, %s, %s' % (self.curr_url, filelist, error)) self.has_next = False return state = self.select_all_button.get_active() tree_iters = [] # 插入.. 点击后返回上个目录 if self.dirname and self.dirname != '/': parent_dirname = os.path.dirname(self.dirname) pixbuf, type_ = self.app.mime.get(parent_dirname, True, icon_size=ICON_SIZE) large_pixbuf, type_ = self.app.mime.get( parent_dirname, True, icon_size=LARGE_ICON_SIZE) self.liststore.append([ state, pixbuf, large_pixbuf, '..', parent_dirname, True, 0, '0', 0, '', ]) for file_ in filelist: if file_['isdir'] == '1' or file_['isdir'] == 1: isdir = True else: isdir = False pixbuf, type_ = self.app.mime.get(file_['path'], isdir, icon_size=ICON_SIZE) large_pixbuf, type_ = self.app.mime.get( file_['path'], isdir, icon_size=LARGE_ICON_SIZE) size = int(file_.get('size', 0)) human_size = util.get_human_size(size)[0] mtime = int(file_.get('server_mtime', 0)) human_mtime = time.ctime(mtime) tree_iter = self.liststore.append([ state, pixbuf, large_pixbuf, file_['server_filename'], file_['path'], isdir, size, human_size, mtime, human_mtime, ]) tree_iters.append(tree_iter) cache_path = Config.get_cache_path(self.app.profile['username']) gutil.async_call(gutil.update_share_image, self.liststore, tree_iters, ICON_COL, LARGE_ICON_COL, filelist, cache_path, ICON_SIZE, LARGE_ICON_SIZE)
def post_login(cookie, tokens, username, password, rsakey, verifycode='', codestring=''): '''登录验证. password - 使用RSA加密后的base64字符串 rsakey - 与public_key相匹配的rsakey verifycode - 验证码, 默认为空 @return (status, info). 其中, status表示返回的状态: 0 - 正常, 这里, info里面存放的是auth_cookie -1 - 未知异常 4 - 密码错误 257 - 需要输入验证码, 此时info里面存放着(vcodetype, codeString)) ''' url = const.PASSPORT_LOGIN data = ''.join([ 'staticpage=https%3A%2F%2Fpassport.baidu.com%2Fstatic%2Fpasspc-account%2Fhtml%2Fv3Jump.html', '&charset=UTF-8', '&token=', tokens['token'], '&tpl=pp&subpro=&apiver=v3', '&tt=', util.timestamp(), '&codestring=', codestring, '&safeflg=0&u=http%3A%2F%2Fpassport.baidu.com%2F', '&isPhone=', '&quick_user=0&logintype=basicLogin&logLoginType=pc_loginBasic&idc=', '&loginmerge=true', '&username='******'&password='******'&verifycode=', verifycode, '&mem_pass=on', '&rsakey=', rsakey, '&crypttype=12', '&ppui_logintime=', get_ppui_logintime(), '&callback=parent.bd__pcbs__28g1kg', ]) logger.debug('auth.post_login: %s' % data) logger.debug('cookie: %s' % cookie.header_output()) headers = { 'Accept': const.ACCEPT_HTML, 'Cookie': cookie.sub_output('BAIDUID', 'HOSUPPORT', 'UBI'), 'Referer': const.REFERER, 'Connection': 'Keep-Alive', } req = net.urlopen(url, headers=headers, data=data.encode()) if req: auth_cookie = req.headers.get_all('Set-Cookie') resp_content = req.data.decode() logger.debug('tokens: %s' % tokens) logger.debug('post login content: %s' % resp_content) logger.debug('post login cookie: %s' % req.headers.get_all('Set-Cookie')) logger.debug('post login header: %s' % req.headers.items()) match = re.findall('"(err_no[^"]+)"', resp_content) if len(match) != 1: return (-1, None) query = dict(urllib.parse.parse_qsl(match[0])) err_no = int(query.get('err_no', '-1')) if err_no == 0: return (0, auth_cookie) if err_no != 257: return (err_no, None) vcodetype = query.get('vcodetype', '') codeString = query.get('codeString', '') if vcodetype and codeString: return (257, (vcodetype, codeString)) return (-1, None) else: return (-1, None) return (-1, None)
def download(self): row = self.row if not os.path.exists(row[SAVEDIR_COL]): os.makedirs(row[SAVEDIR_COL], exist_ok=True) filepath, tmp_filepath, conf_filepath = get_tmp_filepath( row[SAVEDIR_COL], row[SAVENAME_COL]) if os.path.exists(filepath): if self.download_mode == DownloadMode.IGNORE: self.emit('downloaded', row[FSID_COL]) logger.debug('File exists, ignored!') return elif self.download_mode == DownloadMode.NEWCOPY: name, ext = os.path.splitext(filepath) filepath = '{0}_{1}{2}'.format(name, util.curr_time(), ext) url = pcs.get_download_link(self.cookie, self.tokens, row[PATH_COL]) if not url: row[STATE_COL] = State.ERROR self.emit('network-error', row[FSID_COL]) logger.warn('Failed to get url to download') return if os.path.exists(conf_filepath) and os.path.exists(tmp_filepath): with open(conf_filepath) as conf_fh: status = json.load(conf_fh) threads = len(status) file_exists = True fh = open(tmp_filepath, 'rb+') fh.seek(0) else: req = net.urlopen_simple(url) if not req: logger.warn('Failed to get url to download') self.emit('network-error', row[FSID_COL]) return content_length = req.getheader('Content-Length') # Fixed: baiduPCS using non iso-8859-1 codec in http headers if not content_length: match = re.search('\sContent-Length:\s*(\d+)', str(req.headers)) if not match: logger.warn('Failed to get url to download') self.emit('network-error', row[FSID_COL]) return content_length = match.group(1) size = int(content_length) if size == 0: open(filepath, 'a').close() self.emit('downloaded', row[FSID_COL]) return elif size <= SMALL_FILE_SIZE: threads = 1 else: threads = self.default_threads average_size, pad_size = divmod(size, threads) file_exists = False status = [] fh = open(tmp_filepath, 'wb') try: fh.truncate(size) except (OSError, IOError): e = truncate.format_exc() logger.error(e) self.emit('disk-error', row[FSID_COL], tmp_filepath) return # task list tasks = [] # message queue queue = Queue() # threads lock lock = threading.RLock() for id_ in range(threads): if file_exists: start_size, end_size, received = status[id_] if start_size + received >= end_size: # part of file has been downloaded continue start_size += received else: start_size = id_ * average_size end_size = start_size + average_size - 1 if id_ == threads - 1: end_size = end_size + pad_size + 1 status.append([start_size, end_size, 0]) task = DownloadBatch(id_, queue, url, lock, start_size, end_size, fh, self.timeout) tasks.append(task) for task in tasks: task.start() try: conf_count = 0 done = 0 self.emit('started', row[FSID_COL]) while row[STATE_COL] == State.DOWNLOADING: id_, received = queue.get() # FINISHED if received == BATCH_FINISISHED: done += 1 if done == len(tasks): row[STATE_COL] = State.FINISHED break else: continue # error occurs elif received == BATCH_ERROR: row[STATE_COL] = State.ERROR break status[id_][2] += received conf_count += 1 # flush data and status to disk if conf_count > THRESHOLD_TO_FLUSH: with lock: if not fh.closed: fh.flush() with open(conf_filepath, 'w') as fh: json.dump(status, fh) conf_count = 0 received_total = sum(t[2] for t in status) self.emit('received', row[FSID_COL], received, received_total) except Exception: logger.error(traceback.format_exc()) row[STATE_COL] = State.ERROR with lock: if not fh.closed: fh.close() for task in tasks: if task.isAlive(): task.stop() with open(conf_filepath, 'w') as fh: json.dump(status, fh) if row[STATE_COL] == State.CANCELED: if os.path.exists(tmp_filepath): os.remove(tmp_filepath) if os.path.exists(conf_filepath): os.remove(conf_filepath) elif row[STATE_COL] == State.ERROR: self.emit('network-error', row[FSID_COL]) elif row[STATE_COL] == State.FINISHED: self.emit('downloaded', row[FSID_COL]) os.rename(tmp_filepath, filepath) if os.path.exists(conf_filepath): os.remove(conf_filepath)
def urlopen_without_redirect(url, headers={}, data=None, retries=RETRIES): '''请求一个URL, 并返回一个Response对象. 不处理重定向. 使用这个函数可以返回URL重定向(Error 301/302)后的地址, 也可以重到URL中请 求的文件的大小, 或者Header中的其它认证信息. ''' if DEBUG: logger.debug('net.urlopen_without_redirect: < URL: %s' % url) if headers: for key, value in headers.items(): logger.debug('net.urlopen_without_redirect: < HEADER: %s' % '{0}: {1}'.format(key, value)) if data: logger.debug('net.urlopen_without_redirect: < DATA: %s' % data.decode()) headers_merged = default_headers.copy() for key in headers.keys(): headers_merged[key] = headers[key] parse_result = urllib.parse.urlparse(url) for i in range(retries): try: if DEBUG and i > 0: logger.debug('net.urlopen_without_redirect: retried, %d' % i) if parse_result.scheme == 'https': conn = http.client.HTTPSConnection(parse_result.netloc) else: conn = http.client.HTTPConnection(parse_result.netloc) if data: conn.request('POST', url, body=data, headers=headers_merged) else: conn.request('GET', url, body=data, headers=headers_merged) resp = conn.getresponse() if DEBUG: logger.debug( 'net.urlopen_without_redirect: > STATUS: %d %s' % (resp.getcode(), http.client.responses[resp.getcode()])) for key, value in resp.getheaders(): logger.debug('net.urlopen_without_redirect: > HEADER: %s' % '{0}: {1}'.format(key, value)) return resp except OSError: logger.error(traceback.format_exc()) except: logger.error(traceback.format_exc()) #return None return None