class BaiduFS(Operations): '''Baidu netdisk filesystem''' def __init__(self, username, password, *args, **kw): self.disk = PCS(username, password,captcha_get) self.buffer = {} self.traversed_folder = {} self.bufferLock = Lock() self.upload_blocks = {} # 文件上传时用于记录块的md5,{PATH:{TMP:'',BLOCKS:''} self.create_tmp = {} # {goutputstrem_path:file} self.upload_fails = {} # self.fd = 3 # 初始化百度服务器 print '设置pcs服务器' pcs = self.disk.get_fastest_pcs_server() self.disk.set_pcs_server(pcs) print 'pcs api server:',pcs ''' print '设置百度网盘服务器,时间比较长,请耐心等待' pan = self.disk.get_fastest_mirror() self.disk.set_pan_server(pan) print 'baidupan server',pan ''' self.uploadLock = Lock() # 上传文件时不刷新目录 self.readLock = Lock() self.downloading_files = [] def unlink(self, path): print '*'*10,'UNLINK CALLED',path self.disk.delete([path]) def _add_file_to_buffer(self, path,file_info): foo = File() foo['st_ctime'] = file_info['local_ctime'] foo['st_mtime'] = file_info['local_mtime'] foo['st_mode'] = (stat.S_IFDIR | 0777) if file_info['isdir'] \ else (stat.S_IFREG | 0777) foo['st_nlink'] = 2 if file_info['isdir'] else 1 foo['st_size'] = file_info['size'] self.buffer[path] = foo def _del_file_from_buffer(self,path): self.buffer.pop(path) def getattr(self, path, fh=None): #print 'getattr *',path # 先看缓存中是否存在该文件 if not self.buffer.has_key(path): print path,'未命中' #print self.buffer #print self.traversed_folder jdata = json.loads(self.disk.meta([path]).content) try: if 'info' not in jdata: raise FuseOSError(errno.ENOENT) if jdata['errno'] != 0: raise FuseOSError(errno.ENOENT) file_info = jdata['info'][0] self._add_file_to_buffer(path,file_info) st = self.buffer[path].getDict() return st except: raise FuseOSError(errno.ENOENT) else: #print path,'命中' return self.buffer[path].getDict() def readdir(self, path, offset): self.uploadLock.acquire() while True: try: foo = json.loads(self.disk.list_files(path).text) break except: print 'error' files = ['.', '..'] abs_files = [] # 该文件夹下文件的绝对路径 for file in foo['list']: files.append(file['server_filename']) abs_files.append(file['path']) # 缓存文件夹下文件信息,批量查询meta info # Update:解决meta接口一次不能查询超过100条记录 # 分成 ceil(file_num / 100.0) 组,利用商群 if not self.traversed_folder.has_key(path) or self.traversed_folder[path] == False: print '正在对',path,'缓存中' file_num = len(abs_files) group = int(math.ceil(file_num / 100.0)) for i in range(group): obj = [f for n,f in enumerate(abs_files) if n % group == i] #一组数据 while 1: try: ret = json.loads(self.disk.meta(obj).text) break except: print 'error' for file_info in ret['info']: if not self.buffer.has_key(file_info['path']): self._add_file_to_buffer(file_info['path'],file_info) #print self.buffer print '对',path,'的缓存完成' self.traversed_folder[path] = True for r in files: yield r self.uploadLock.release() def _update_file_manual(self,path): while 1: try: jdata = json.loads(self.disk.meta([path]).content) break except: print 'error' if 'info' not in jdata: raise FuseOSError(errno.ENOENT) if jdata['errno'] != 0: raise FuseOSError(errno.ENOENT) file_info = jdata['info'][0] self._add_file_to_buffer(path,file_info) def rename(self, old, new): #logging.debug('* rename',old,os.path.basename(new)) print '*'*10,'RENAME CALLED',old,os.path.basename(new),type(old),type(new) while True: try: ret = self.disk.rename([(old,os.path.basename(new))]).content jdata = json.loads(ret) break except: print 'error' if jdata['errno'] != 0: # 文件名已存在,删除原文件 print self.disk.delete([new]).content print self.disk.rename([(old,os.path.basename(new))]) self._update_file_manual(new) self.buffer.pop(old) def open(self, path, flags): self.readLock.acquire() print '*'*10,'OPEN CALLED',path,flags #print '[****]',path """ Permission denied accmode = os.O_RDONLY | os.O_WRONLY | os.O_RDWR if (flags & accmode) != os.O_RDONLY: raise FuseOSError(errno.EACCES) """ self.fd += 1 self.readLock.release() return self.fd def create(self, path, mode,fh=None): # 创建文件 # 中文路径有问题 print '*'*10,'CREATE CALLED',path,mode,type(path) #if 'outputstream' not in path: tmp_file = tempfile.TemporaryFile('r+w+b') foo = self.disk.upload(os.path.dirname(path),tmp_file,os.path.basename(path)).content ret = json.loads(foo) print ret print 'create-not-outputstream',ret if ret['path'] != path: # 文件已存在 print '文件已存在' raise FuseOSError(errno.EEXIST) ''' else: print 'create:',path foo = File() foo['st_ctime'] = int(time.time()) foo['st_mtime'] = int(time.time()) foo['st_mode'] = (stat.S_IFREG | 0777) foo['st_nlink'] = 1 foo['st_size'] = 0 self.buffer[path] = foo ''' ''' dict(st_mode=(stat.S_IFREG | mode), st_nlink=1, st_size=0, st_ctime=time.time(), st_mtime=time.time(), st_atime=time.time()) ''' self.fd += 1 return 0 def write(self, path, data, offset, fp): # 上传文件时会调用 # 4kb ( 4096 bytes ) 每块,data中是块中的数据 # 最后一块的判断:len(data) < 4096 # 文件大小 = 最后一块的offset + len(data) # 4kb传太慢了,合计成2M传一次 #print '*'*10,path,offset, len(data) def _block_size(stream): stream.seek(0,2) return stream.tell() _BLOCK_SIZE = 16 * 2 ** 20 # 第一块的任务 if offset == 0: #self.uploadLock.acquire() #self.readLock.acquire() # 初始化块md5列表 self.upload_blocks[path] = {'tmp':None, 'blocks':[]} # 创建缓冲区临时文件 tmp_file = tempfile.TemporaryFile('r+w+b') self.upload_blocks[path]['tmp'] = tmp_file # 向临时文件写入数据,检查是否>= _BLOCK_SIZE 是则上传该块并将临时文件清空 try: tmp = self.upload_blocks[path]['tmp'] except KeyError: return 0 tmp.write(data) if _block_size(tmp) > _BLOCK_SIZE: print path,'发生上传' tmp.seek(0) try: foo = self.disk.upload_tmpfile(tmp,callback=ProgressBar()).content foofoo = json.loads(foo) block_md5 = foofoo['md5'] except: print foo # 在 upload_blocks 中插入本块的 md5 self.upload_blocks[path]['blocks'].append(block_md5) # 创建缓冲区临时文件 self.upload_blocks[path]['tmp'].close() tmp_file = tempfile.TemporaryFile('r+w+b') self.upload_blocks[path]['tmp'] = tmp_file print '创建临时文件',tmp_file.name # 最后一块的任务 if len(data) < 4096: # 检查是否有重名,有重名则删除它 while True: try: foo = self.disk.meta([path]).content foofoo = json.loads(foo) break except: print 'error' if foofoo['errno'] == 0: logging.debug('Deleted the file which has same name.') self.disk.delete([path]) # 看看是否需要上传 if _block_size(tmp) != 0: # 此时临时文件有数据,需要上传 print path,'发生上传,块末尾,文件大小',_block_size(tmp) tmp.seek(0) while True: try: foo = self.disk.upload_tmpfile(tmp,callback=ProgressBar()).content foofoo = json.loads(foo) break except: print 'exception, retry.' block_md5 = foofoo['md5'] # 在 upload_blocks 中插入本块的 md5 self.upload_blocks[path]['blocks'].append(block_md5) # 调用 upload_superfile 以合并块文件 print '合并文件',path,type(path) self.disk.upload_superfile(path,self.upload_blocks[path]['blocks']) # 删除upload_blocks中数据 self.upload_blocks.pop(path) # 更新本地文件列表缓存 self._update_file_manual(path) #self.readLock.release() #self.uploadLock.release() return len(data) def mkdir(self, path, mode): logger.debug("mkdir is:" + path) self.disk.mkdir(path) def rmdir(self, path): logger.debug("rmdir is:" + path) self.disk.delete([path]) def read(self, path, size, offset, fh): #print '*'*10,'READ CALLED',path,size,offset #logger.debug("read is: " + path) paras = {'Range': 'bytes=%s-%s' % (offset, offset + size - 1)} while True: try: foo = self.disk.download(path, headers=paras).content return foo except: pass access = None statfs = None
class BaiduFS(Operations): '''Baidu netdisk filesystem''' def __init__(self, username, password, *args, **kw): self.disk = PCS(username, password) self.buffer = {} self.traversed_folder = {} self.bufferLock = Lock() self.upload_blocks = {} # 文件上传时用于记录块的md5,{PATH:{TMP:'',BLOCKS:''} self.create_tmp = {} # {goutputstrem_path:file} self.upload_fails = {} # self.fd = 3 # 初始化百度服务器 print '设置pcs服务器' pcs = self.disk.get_fastest_pcs_server() self.disk.set_pcs_server(pcs) print 'pcs api server:', pcs ''' print '设置百度网盘服务器,时间比较长,请耐心等待' pan = self.disk.get_fastest_mirror() self.disk.set_pan_server(pan) print 'baidupan server',pan ''' self.uploadLock = Lock() # 上传文件时不刷新目录 self.readLock = Lock() self.downloading_files = [] def unlink(self, path): print '*' * 10, 'UNLINK CALLED', path self.disk.delete([path]) def _add_file_to_buffer(self, path, file_info): foo = File() foo['st_ctime'] = file_info['local_ctime'] foo['st_mtime'] = file_info['local_mtime'] foo['st_mode'] = (stat.S_IFDIR | 0777) if file_info['isdir'] \ else (stat.S_IFREG | 0777) foo['st_nlink'] = 2 if file_info['isdir'] else 1 foo['st_size'] = file_info['size'] self.buffer[path] = foo def _del_file_from_buffer(self, path): self.buffer.pop(path) def getattr(self, path, fh=None): #print 'getattr *',path # 先看缓存中是否存在该文件 if not self.buffer.has_key(path): print path, '未命中' #print self.buffer #print self.traversed_folder jdata = json.loads(self.disk.meta([path]).content) try: if 'info' not in jdata: raise FuseOSError(errno.ENOENT) if jdata['errno'] != 0: raise FuseOSError(errno.ENOENT) file_info = jdata['info'][0] self._add_file_to_buffer(path, file_info) st = self.buffer[path].getDict() return st except: raise FuseOSError(errno.ENOENT) else: #print path,'命中' return self.buffer[path].getDict() def readdir(self, path, offset): self.uploadLock.acquire() while True: try: foo = json.loads(self.disk.list_files(path).text) break except: print 'error' files = ['.', '..'] abs_files = [] # 该文件夹下文件的绝对路径 for file in foo['list']: files.append(file['server_filename']) abs_files.append(file['path']) # 缓存文件夹下文件信息,批量查询meta info # Update:解决meta接口一次不能查询超过100条记录 # 分成 ceil(file_num / 100.0) 组,利用商群 if not self.traversed_folder.has_key( path) or self.traversed_folder[path] == False: print '正在对', path, '缓存中' file_num = len(abs_files) group = int(math.ceil(file_num / 100.0)) for i in range(group): obj = [f for n, f in enumerate(abs_files) if n % group == i] #一组数据 while 1: try: ret = json.loads(self.disk.meta(obj).text) break except: print 'error' for file_info in ret['info']: if not self.buffer.has_key(file_info['path']): self._add_file_to_buffer(file_info['path'], file_info) #print self.buffer print '对', path, '的缓存完成' self.traversed_folder[path] = True for r in files: yield r self.uploadLock.release() def _update_file_manual(self, path): while 1: try: jdata = json.loads(self.disk.meta([path]).content) break except: print 'error' if 'info' not in jdata: raise FuseOSError(errno.ENOENT) if jdata['errno'] != 0: raise FuseOSError(errno.ENOENT) file_info = jdata['info'][0] self._add_file_to_buffer(path, file_info) def rename(self, old, new): #logging.debug('* rename',old,os.path.basename(new)) print '*' * 10, 'RENAME CALLED', old, os.path.basename(new), type( old), type(new) while True: try: ret = self.disk.rename([(old, os.path.basename(new))]).content jdata = json.loads(ret) break except: print 'error' if jdata['errno'] != 0: # 文件名已存在,删除原文件 print self.disk.delete([new]).content print self.disk.rename([(old, os.path.basename(new))]) self._update_file_manual(new) self.buffer.pop(old) def open(self, path, flags): self.readLock.acquire() print '*' * 10, 'OPEN CALLED', path, flags #print '[****]',path """ Permission denied accmode = os.O_RDONLY | os.O_WRONLY | os.O_RDWR if (flags & accmode) != os.O_RDONLY: raise FuseOSError(errno.EACCES) """ self.fd += 1 self.readLock.release() return self.fd def create(self, path, mode, fh=None): # 创建文件 # 中文路径有问题 print '*' * 10, 'CREATE CALLED', path, mode, type(path) #if 'outputstream' not in path: tmp_file = tempfile.TemporaryFile('r+w+b') foo = self.disk.upload(os.path.dirname(path), tmp_file, os.path.basename(path)).content ret = json.loads(foo) print ret print 'create-not-outputstream', ret if ret['path'] != path: # 文件已存在 print '文件已存在' raise FuseOSError(errno.EEXIST) ''' else: print 'create:',path foo = File() foo['st_ctime'] = int(time.time()) foo['st_mtime'] = int(time.time()) foo['st_mode'] = (stat.S_IFREG | 0777) foo['st_nlink'] = 1 foo['st_size'] = 0 self.buffer[path] = foo ''' ''' dict(st_mode=(stat.S_IFREG | mode), st_nlink=1, st_size=0, st_ctime=time.time(), st_mtime=time.time(), st_atime=time.time()) ''' self.fd += 1 return 0 def write(self, path, data, offset, fp): # 上传文件时会调用 # 4kb ( 4096 bytes ) 每块,data中是块中的数据 # 最后一块的判断:len(data) < 4096 # 文件大小 = 最后一块的offset + len(data) # 4kb传太慢了,合计成2M传一次 #print '*'*10,path,offset, len(data) def _block_size(stream): stream.seek(0, 2) return stream.tell() _BLOCK_SIZE = 16 * 2**20 # 第一块的任务 if offset == 0: #self.uploadLock.acquire() #self.readLock.acquire() # 初始化块md5列表 self.upload_blocks[path] = {'tmp': None, 'blocks': []} # 创建缓冲区临时文件 tmp_file = tempfile.TemporaryFile('r+w+b') self.upload_blocks[path]['tmp'] = tmp_file # 向临时文件写入数据,检查是否>= _BLOCK_SIZE 是则上传该块并将临时文件清空 try: tmp = self.upload_blocks[path]['tmp'] except KeyError: return 0 tmp.write(data) if _block_size(tmp) > _BLOCK_SIZE: print path, '发生上传' tmp.seek(0) try: foo = self.disk.upload_tmpfile(tmp, callback=ProgressBar()).content foofoo = json.loads(foo) block_md5 = foofoo['md5'] except: print foo # 在 upload_blocks 中插入本块的 md5 self.upload_blocks[path]['blocks'].append(block_md5) # 创建缓冲区临时文件 self.upload_blocks[path]['tmp'].close() tmp_file = tempfile.TemporaryFile('r+w+b') self.upload_blocks[path]['tmp'] = tmp_file print '创建临时文件', tmp_file.name # 最后一块的任务 if len(data) < 4096: # 检查是否有重名,有重名则删除它 while True: try: foo = self.disk.meta([path]).content foofoo = json.loads(foo) break except: print 'error' if foofoo['errno'] == 0: logging.debug('Deleted the file which has same name.') self.disk.delete([path]) # 看看是否需要上传 if _block_size(tmp) != 0: # 此时临时文件有数据,需要上传 print path, '发生上传,块末尾,文件大小', _block_size(tmp) tmp.seek(0) while True: try: foo = self.disk.upload_tmpfile( tmp, callback=ProgressBar()).content foofoo = json.loads(foo) break except: print 'exception, retry.' block_md5 = foofoo['md5'] # 在 upload_blocks 中插入本块的 md5 self.upload_blocks[path]['blocks'].append(block_md5) # 调用 upload_superfile 以合并块文件 print '合并文件', path, type(path) self.disk.upload_superfile(path, self.upload_blocks[path]['blocks']) # 删除upload_blocks中数据 self.upload_blocks.pop(path) # 更新本地文件列表缓存 self._update_file_manual(path) #self.readLock.release() #self.uploadLock.release() return len(data) def mkdir(self, path, mode): logger.debug("mkdir is:" + path) self.disk.mkdir(path) def rmdir(self, path): logger.debug("rmdir is:" + path) self.disk.delete([path]) def read(self, path, size, offset, fh): #print '*'*10,'READ CALLED',path,size,offset #logger.debug("read is: " + path) paras = {'Range': 'bytes=%s-%s' % (offset, offset + size - 1)} while True: try: foo = self.disk.download(path, headers=paras).content return foo except: pass access = None statfs = None
class BaiduPCS(object): """ Baidu disk uploader. """ def __init__(self, filepath, username, password): """ Login """ (self.filepath, self.filename, self.dirname, self.filesize) = (filepath, os.path.basename(filepath), os.path.dirname(filepath), os.path.getsize(filepath)) self.path = self.dirname + '\\' + self.filename.split('.')[0] self.pcs = PCS(username, password) #Login def create_upload(self, num): self.uplog['md5'][num] = (json.loads( self.pcs.upload_tmpfile(self.block( (num - 1) * self.chinksize)).content)['md5']) self.count += 1 with open(self.dirname + '\\' + self.filename.split('.')[0] + '.json', 'w') as self.new_uplog: json.dump(self.uplog, self.new_uplog) print('[' + str(self.count) + '/' + str(self.fid) + ' Uploaded BlockID: ' + str(num) + ' md5: ' + self.uplog['md5'][num] + ']') def read_uplog(self): if os.path.exists(self.dirname + '\\' + self.filename.split('.')[0] + '.json'): with open( self.dirname + '\\' + self.filename.split('.')[0] + '.json', 'r') as self.uplog_file: self.uplog = json.load(self.uplog_file) tmp_dict = {} for i in sorted(self.uplog['md5'].keys()): tmp_dict[int(i)] = self.uplog['md5'][i] self.uplog['md5'] = tmp_dict else: self.uplog_file = open( self.dirname + '\\' + self.filename.split('.')[0] + '.json', 'w') self.uplog = {'block': 0, 'size': 0, 'md5': {}} def block(self, location=None): if location == None: return math.ceil(os.path.getsize(self.filepath) / self.chinksize) file = open(self.filepath, 'rb') file.seek(location, 0) return io.BytesIO(file.read(self.chinksize)) def upload(self): """ Biadu upload module """ self.read_uplog() if int(self.uplog['size']) == 0: self.chinksize = 1024 * 1024 * 24 self.uplog['size'] = self.chinksize else: self.chinksize = self.uplog['size'] self.thread_num = 25 if int(self.uplog['block']) == 0: self.fid = self.block() self.count = len(self.uplog['md5']) with open(self.dirname + '\\' + self.filename.split('.')[0] + '.json', 'w') as self.new_uplog: json.dump(self.uplog, self.new_uplog) print('start uploading...') self.threads = [] for i in range(self.fid): if not i + 1 in self.uplog['md5']: while len(threading.enumerate()) - 1 >= self.thread_num: time.sleep(1) self.t = threading.Thread(target=self.create_upload, kwargs={'num': i + 1}) self.t.setDaemon(True) self.t.start() self.threads.append(self.t) for self.thread in self.threads: self.thread.join() def superfile(self): self.pcs.upload_superfile('/' + self.filename, [(self.uplog['md5'][k]) for k in sorted(self.uplog['md5'].keys())]) def CheckUpload(self): """ Check upload status. Retry if file not uploaded. """ if not self.fid == len(self.uplog['md5']): return 0 return 1 def quota_remaining(self): self.quota_info = json.loads(self.pcs.quota().content.decode( "utf-8", "ignore")) self.remaining = self.quota_info['total'] - self.quota_info['used'] return self.remaining
import sys import os, json, sys, tempfile from baidupcsapi import PCS pcs = PCS('username', 'password') chinksize = 1024 * 1024 * 16 fid = 1 md5list = [] tmpdir = tempfile.mkdtemp('bdpcs') with open(sys.argv[1], 'rb') as infile: while 1: data = infile.read(chinksize) if len(data) == 0: break smallfile = os.path.join(tmpdir, 'tmp%d' % fid) with open(smallfile, 'wb') as f: f.write(data) print('chunk%d size %d' % (fid, len(data))) fid += 1 print('start uploading...') ret = pcs.upload_tmpfile(open(smallfile, 'rb')) md5list.append(json.loads(ret.content)['md5']) print('md5: %s' % (md5list[-1])) os.remove(smallfile) os.rmdir(tmpdir) remote_path = sys.argv[2] if not sys.argv[2].endswith( '/') else sys.argv[2] + os.path.basename(sys.argv[1]) ret = pcs.upload_superfile(remote_path, md5list) print ret.content