Ejemplo n.º 1
0
class BaiduFS(Operations):
    '''Baidu netdisk filesystem'''

    def __init__(self, username, password, *args, **kw):
        self.disk = PCS(username, password,captcha_get)
        self.buffer = {}
        self.traversed_folder = {}
        self.bufferLock = Lock()
        self.upload_blocks = {} # 文件上传时用于记录块的md5,{PATH:{TMP:'',BLOCKS:''}
        self.create_tmp = {} # {goutputstrem_path:file}
        self.upload_fails = {} #
        self.fd = 3
        # 初始化百度服务器
        print '设置pcs服务器'
        pcs = self.disk.get_fastest_pcs_server()
        self.disk.set_pcs_server(pcs)
        print 'pcs api server:',pcs
        '''
        print '设置百度网盘服务器,时间比较长,请耐心等待'
        pan = self.disk.get_fastest_mirror()
        self.disk.set_pan_server(pan)
        print 'baidupan server',pan
        '''

        self.uploadLock = Lock() # 上传文件时不刷新目录
        self.readLock = Lock()
        self.downloading_files = []
        
    def unlink(self, path):
        print '*'*10,'UNLINK CALLED',path
        self.disk.delete([path])


    def _add_file_to_buffer(self, path,file_info):
        foo = File()
        foo['st_ctime'] = file_info['local_ctime']
        foo['st_mtime'] = file_info['local_mtime']
        foo['st_mode'] = (stat.S_IFDIR | 0777) if file_info['isdir'] \
            else (stat.S_IFREG | 0777)
        foo['st_nlink'] = 2 if file_info['isdir'] else 1
        foo['st_size'] = file_info['size']
        self.buffer[path] = foo

    def _del_file_from_buffer(self,path):
        self.buffer.pop(path)

    def getattr(self, path, fh=None):
        #print 'getattr *',path
        # 先看缓存中是否存在该文件

        if not self.buffer.has_key(path):
            print path,'未命中'
            #print self.buffer
            #print self.traversed_folder
            jdata = json.loads(self.disk.meta([path]).content)
            try:
                if 'info' not in jdata:
                    raise FuseOSError(errno.ENOENT)
                if jdata['errno'] != 0:
                    raise FuseOSError(errno.ENOENT)
                file_info = jdata['info'][0]
                self._add_file_to_buffer(path,file_info)
                st = self.buffer[path].getDict()
                return st
            except:
                raise FuseOSError(errno.ENOENT)
        else:
            #print path,'命中'
            return self.buffer[path].getDict()



    def readdir(self, path, offset):
        self.uploadLock.acquire()
        while True:
            try:
                foo = json.loads(self.disk.list_files(path).text)
                break
            except:
                print 'error'


        files = ['.', '..']
        abs_files = [] # 该文件夹下文件的绝对路径
        for file in foo['list']:
            files.append(file['server_filename'])
            abs_files.append(file['path'])
        # 缓存文件夹下文件信息,批量查询meta info

        # Update:解决meta接口一次不能查询超过100条记录
        # 分成 ceil(file_num / 100.0) 组,利用商群
        if not self.traversed_folder.has_key(path) or self.traversed_folder[path] == False:
            print '正在对',path,'缓存中'
            file_num = len(abs_files)
            group = int(math.ceil(file_num / 100.0))
            for i in range(group):
                obj = [f for n,f in enumerate(abs_files) if n % group == i] #一组数据
                while 1:
                    try:
                        ret = json.loads(self.disk.meta(obj).text)
                        break
                    except:
                        print 'error'

                for file_info in ret['info']:
                    if not self.buffer.has_key(file_info['path']):
                        self._add_file_to_buffer(file_info['path'],file_info)
            #print self.buffer
            print '对',path,'的缓存完成'
            self.traversed_folder[path] = True
        for r in files:
            yield r
        self.uploadLock.release()

    def _update_file_manual(self,path):
        while 1:
            try:
                jdata = json.loads(self.disk.meta([path]).content)
                break
            except:
                print 'error'

        if 'info' not in jdata:
            raise FuseOSError(errno.ENOENT)
        if jdata['errno'] != 0:
            raise FuseOSError(errno.ENOENT)
        file_info = jdata['info'][0]
        self._add_file_to_buffer(path,file_info)


    def rename(self, old, new):
        #logging.debug('* rename',old,os.path.basename(new))
        print '*'*10,'RENAME CALLED',old,os.path.basename(new),type(old),type(new)
        while True:
            try:
                ret = self.disk.rename([(old,os.path.basename(new))]).content
                jdata = json.loads(ret)
                break
            except:
                print 'error'

        if jdata['errno'] != 0:
            # 文件名已存在,删除原文件
            print self.disk.delete([new]).content
            print self.disk.rename([(old,os.path.basename(new))])
        self._update_file_manual(new)
        self.buffer.pop(old)


    def open(self, path, flags):
        self.readLock.acquire()
        print '*'*10,'OPEN CALLED',path,flags
        #print '[****]',path
        """
        Permission denied

        accmode = os.O_RDONLY | os.O_WRONLY | os.O_RDWR
        if (flags & accmode) != os.O_RDONLY:
            raise FuseOSError(errno.EACCES)
        """
        self.fd += 1
        self.readLock.release()
        
        return self.fd

    def create(self, path, mode,fh=None):
        # 创建文件
        # 中文路径有问题
        print '*'*10,'CREATE CALLED',path,mode,type(path)
        #if 'outputstream' not in path:
        tmp_file = tempfile.TemporaryFile('r+w+b')
        foo = self.disk.upload(os.path.dirname(path),tmp_file,os.path.basename(path)).content
        ret = json.loads(foo)
        print ret
        print 'create-not-outputstream',ret
        if ret['path'] != path:
            # 文件已存在
            print '文件已存在'
            raise FuseOSError(errno.EEXIST)
        '''
        else:
            print 'create:',path
            foo = File()
            foo['st_ctime'] = int(time.time())
            foo['st_mtime'] = int(time.time())
            foo['st_mode'] = (stat.S_IFREG | 0777)
            foo['st_nlink'] = 1
            foo['st_size'] = 0
            self.buffer[path] = foo
        '''


        '''
        dict(st_mode=(stat.S_IFREG | mode), st_nlink=1,
                                st_size=0, st_ctime=time.time(), st_mtime=time.time(),
                                st_atime=time.time())
        '''
        self.fd += 1
        return 0

    def write(self, path, data, offset, fp):
        # 上传文件时会调用
        # 4kb ( 4096 bytes ) 每块,data中是块中的数据
        # 最后一块的判断:len(data) < 4096
        # 文件大小 = 最后一块的offset + len(data)

        # 4kb传太慢了,合计成2M传一次

        #print '*'*10,path,offset, len(data)

        def _block_size(stream):
            stream.seek(0,2)
            return stream.tell()

        _BLOCK_SIZE = 16 * 2 ** 20
        # 第一块的任务
        if offset == 0:
            #self.uploadLock.acquire()
            #self.readLock.acquire()
            # 初始化块md5列表
            self.upload_blocks[path] = {'tmp':None,
                                        'blocks':[]}
            # 创建缓冲区临时文件
            tmp_file = tempfile.TemporaryFile('r+w+b')
            self.upload_blocks[path]['tmp'] = tmp_file

        # 向临时文件写入数据,检查是否>= _BLOCK_SIZE 是则上传该块并将临时文件清空
        try:
            tmp = self.upload_blocks[path]['tmp']
        except KeyError:
            return 0
        tmp.write(data)

        if _block_size(tmp) > _BLOCK_SIZE:
            print path,'发生上传'
            tmp.seek(0)
            try:
                foo = self.disk.upload_tmpfile(tmp,callback=ProgressBar()).content
                foofoo = json.loads(foo)
                block_md5 = foofoo['md5']
            except:
                 print foo



            # 在 upload_blocks 中插入本块的 md5
            self.upload_blocks[path]['blocks'].append(block_md5)
            # 创建缓冲区临时文件
            self.upload_blocks[path]['tmp'].close()
            tmp_file = tempfile.TemporaryFile('r+w+b')
            self.upload_blocks[path]['tmp'] = tmp_file
            print '创建临时文件',tmp_file.name

        # 最后一块的任务
        if len(data) < 4096:
            # 检查是否有重名,有重名则删除它
            while True:
                try:
                    foo = self.disk.meta([path]).content
                    foofoo = json.loads(foo)
                    break
                except:
                    print 'error'


            if foofoo['errno'] == 0:
                logging.debug('Deleted the file which has same name.')
                self.disk.delete([path])
            # 看看是否需要上传
            if _block_size(tmp) != 0:
                # 此时临时文件有数据,需要上传
                print path,'发生上传,块末尾,文件大小',_block_size(tmp)
                tmp.seek(0)
                while True:
                    try:
                        foo = self.disk.upload_tmpfile(tmp,callback=ProgressBar()).content
                        foofoo = json.loads(foo)
                        break
                    except:
                        print 'exception, retry.'

                block_md5 = foofoo['md5']
                # 在 upload_blocks 中插入本块的 md5
                self.upload_blocks[path]['blocks'].append(block_md5)

            # 调用 upload_superfile 以合并块文件
            print '合并文件',path,type(path)
            self.disk.upload_superfile(path,self.upload_blocks[path]['blocks'])
            # 删除upload_blocks中数据
            self.upload_blocks.pop(path)
            # 更新本地文件列表缓存
            self._update_file_manual(path)
            #self.readLock.release()
            #self.uploadLock.release()
        return len(data)


    def mkdir(self, path, mode):
        logger.debug("mkdir is:" + path)
        self.disk.mkdir(path)

    def rmdir(self, path):
        logger.debug("rmdir is:" + path)
        self.disk.delete([path])

    def read(self, path, size, offset, fh):
        #print '*'*10,'READ CALLED',path,size,offset
        #logger.debug("read is: " + path)
        paras = {'Range': 'bytes=%s-%s' % (offset, offset + size - 1)}
        while True:
            try:
                foo = self.disk.download(path, headers=paras).content
                return foo
            except:
                pass

    access = None
    statfs = None
Ejemplo n.º 2
0
class BaiduFS(Operations):
    '''Baidu netdisk filesystem'''
    def __init__(self, username, password, *args, **kw):
        self.disk = PCS(username, password)
        self.buffer = {}
        self.traversed_folder = {}
        self.bufferLock = Lock()
        self.upload_blocks = {}  # 文件上传时用于记录块的md5,{PATH:{TMP:'',BLOCKS:''}
        self.create_tmp = {}  # {goutputstrem_path:file}
        self.upload_fails = {}  #
        self.fd = 3
        # 初始化百度服务器
        print '设置pcs服务器'
        pcs = self.disk.get_fastest_pcs_server()
        self.disk.set_pcs_server(pcs)
        print 'pcs api server:', pcs
        '''
        print '设置百度网盘服务器,时间比较长,请耐心等待'
        pan = self.disk.get_fastest_mirror()
        self.disk.set_pan_server(pan)
        print 'baidupan server',pan
        '''

        self.uploadLock = Lock()  # 上传文件时不刷新目录
        self.readLock = Lock()
        self.downloading_files = []

    def unlink(self, path):
        print '*' * 10, 'UNLINK CALLED', path
        self.disk.delete([path])

    def _add_file_to_buffer(self, path, file_info):
        foo = File()
        foo['st_ctime'] = file_info['local_ctime']
        foo['st_mtime'] = file_info['local_mtime']
        foo['st_mode'] = (stat.S_IFDIR | 0777) if file_info['isdir'] \
            else (stat.S_IFREG | 0777)
        foo['st_nlink'] = 2 if file_info['isdir'] else 1
        foo['st_size'] = file_info['size']
        self.buffer[path] = foo

    def _del_file_from_buffer(self, path):
        self.buffer.pop(path)

    def getattr(self, path, fh=None):
        #print 'getattr *',path
        # 先看缓存中是否存在该文件

        if not self.buffer.has_key(path):
            print path, '未命中'
            #print self.buffer
            #print self.traversed_folder
            jdata = json.loads(self.disk.meta([path]).content)
            try:
                if 'info' not in jdata:
                    raise FuseOSError(errno.ENOENT)
                if jdata['errno'] != 0:
                    raise FuseOSError(errno.ENOENT)
                file_info = jdata['info'][0]
                self._add_file_to_buffer(path, file_info)
                st = self.buffer[path].getDict()
                return st
            except:
                raise FuseOSError(errno.ENOENT)
        else:
            #print path,'命中'
            return self.buffer[path].getDict()

    def readdir(self, path, offset):
        self.uploadLock.acquire()
        while True:
            try:
                foo = json.loads(self.disk.list_files(path).text)
                break
            except:
                print 'error'

        files = ['.', '..']
        abs_files = []  # 该文件夹下文件的绝对路径
        for file in foo['list']:
            files.append(file['server_filename'])
            abs_files.append(file['path'])
        # 缓存文件夹下文件信息,批量查询meta info

        # Update:解决meta接口一次不能查询超过100条记录
        # 分成 ceil(file_num / 100.0) 组,利用商群
        if not self.traversed_folder.has_key(
                path) or self.traversed_folder[path] == False:
            print '正在对', path, '缓存中'
            file_num = len(abs_files)
            group = int(math.ceil(file_num / 100.0))
            for i in range(group):
                obj = [f for n, f in enumerate(abs_files)
                       if n % group == i]  #一组数据
                while 1:
                    try:
                        ret = json.loads(self.disk.meta(obj).text)
                        break
                    except:
                        print 'error'

                for file_info in ret['info']:
                    if not self.buffer.has_key(file_info['path']):
                        self._add_file_to_buffer(file_info['path'], file_info)
            #print self.buffer
            print '对', path, '的缓存完成'
            self.traversed_folder[path] = True
        for r in files:
            yield r
        self.uploadLock.release()

    def _update_file_manual(self, path):
        while 1:
            try:
                jdata = json.loads(self.disk.meta([path]).content)
                break
            except:
                print 'error'

        if 'info' not in jdata:
            raise FuseOSError(errno.ENOENT)
        if jdata['errno'] != 0:
            raise FuseOSError(errno.ENOENT)
        file_info = jdata['info'][0]
        self._add_file_to_buffer(path, file_info)

    def rename(self, old, new):
        #logging.debug('* rename',old,os.path.basename(new))
        print '*' * 10, 'RENAME CALLED', old, os.path.basename(new), type(
            old), type(new)
        while True:
            try:
                ret = self.disk.rename([(old, os.path.basename(new))]).content
                jdata = json.loads(ret)
                break
            except:
                print 'error'

        if jdata['errno'] != 0:
            # 文件名已存在,删除原文件
            print self.disk.delete([new]).content
            print self.disk.rename([(old, os.path.basename(new))])
        self._update_file_manual(new)
        self.buffer.pop(old)

    def open(self, path, flags):
        self.readLock.acquire()
        print '*' * 10, 'OPEN CALLED', path, flags
        #print '[****]',path
        """
        Permission denied

        accmode = os.O_RDONLY | os.O_WRONLY | os.O_RDWR
        if (flags & accmode) != os.O_RDONLY:
            raise FuseOSError(errno.EACCES)
        """
        self.fd += 1
        self.readLock.release()

        return self.fd

    def create(self, path, mode, fh=None):
        # 创建文件
        # 中文路径有问题
        print '*' * 10, 'CREATE CALLED', path, mode, type(path)
        #if 'outputstream' not in path:
        tmp_file = tempfile.TemporaryFile('r+w+b')
        foo = self.disk.upload(os.path.dirname(path), tmp_file,
                               os.path.basename(path)).content
        ret = json.loads(foo)
        print ret
        print 'create-not-outputstream', ret
        if ret['path'] != path:
            # 文件已存在
            print '文件已存在'
            raise FuseOSError(errno.EEXIST)
        '''
        else:
            print 'create:',path
            foo = File()
            foo['st_ctime'] = int(time.time())
            foo['st_mtime'] = int(time.time())
            foo['st_mode'] = (stat.S_IFREG | 0777)
            foo['st_nlink'] = 1
            foo['st_size'] = 0
            self.buffer[path] = foo
        '''
        '''
        dict(st_mode=(stat.S_IFREG | mode), st_nlink=1,
                                st_size=0, st_ctime=time.time(), st_mtime=time.time(),
                                st_atime=time.time())
        '''
        self.fd += 1
        return 0

    def write(self, path, data, offset, fp):
        # 上传文件时会调用
        # 4kb ( 4096 bytes ) 每块,data中是块中的数据
        # 最后一块的判断:len(data) < 4096
        # 文件大小 = 最后一块的offset + len(data)

        # 4kb传太慢了,合计成2M传一次

        #print '*'*10,path,offset, len(data)

        def _block_size(stream):
            stream.seek(0, 2)
            return stream.tell()

        _BLOCK_SIZE = 16 * 2**20
        # 第一块的任务
        if offset == 0:
            #self.uploadLock.acquire()
            #self.readLock.acquire()
            # 初始化块md5列表
            self.upload_blocks[path] = {'tmp': None, 'blocks': []}
            # 创建缓冲区临时文件
            tmp_file = tempfile.TemporaryFile('r+w+b')
            self.upload_blocks[path]['tmp'] = tmp_file

        # 向临时文件写入数据,检查是否>= _BLOCK_SIZE 是则上传该块并将临时文件清空
        try:
            tmp = self.upload_blocks[path]['tmp']
        except KeyError:
            return 0
        tmp.write(data)

        if _block_size(tmp) > _BLOCK_SIZE:
            print path, '发生上传'
            tmp.seek(0)
            try:
                foo = self.disk.upload_tmpfile(tmp,
                                               callback=ProgressBar()).content
                foofoo = json.loads(foo)
                block_md5 = foofoo['md5']
            except:
                print foo

            # 在 upload_blocks 中插入本块的 md5
            self.upload_blocks[path]['blocks'].append(block_md5)
            # 创建缓冲区临时文件
            self.upload_blocks[path]['tmp'].close()
            tmp_file = tempfile.TemporaryFile('r+w+b')
            self.upload_blocks[path]['tmp'] = tmp_file
            print '创建临时文件', tmp_file.name

        # 最后一块的任务
        if len(data) < 4096:
            # 检查是否有重名,有重名则删除它
            while True:
                try:
                    foo = self.disk.meta([path]).content
                    foofoo = json.loads(foo)
                    break
                except:
                    print 'error'

            if foofoo['errno'] == 0:
                logging.debug('Deleted the file which has same name.')
                self.disk.delete([path])
            # 看看是否需要上传
            if _block_size(tmp) != 0:
                # 此时临时文件有数据,需要上传
                print path, '发生上传,块末尾,文件大小', _block_size(tmp)
                tmp.seek(0)
                while True:
                    try:
                        foo = self.disk.upload_tmpfile(
                            tmp, callback=ProgressBar()).content
                        foofoo = json.loads(foo)
                        break
                    except:
                        print 'exception, retry.'

                block_md5 = foofoo['md5']
                # 在 upload_blocks 中插入本块的 md5
                self.upload_blocks[path]['blocks'].append(block_md5)

            # 调用 upload_superfile 以合并块文件
            print '合并文件', path, type(path)
            self.disk.upload_superfile(path,
                                       self.upload_blocks[path]['blocks'])
            # 删除upload_blocks中数据
            self.upload_blocks.pop(path)
            # 更新本地文件列表缓存
            self._update_file_manual(path)
            #self.readLock.release()
            #self.uploadLock.release()
        return len(data)

    def mkdir(self, path, mode):
        logger.debug("mkdir is:" + path)
        self.disk.mkdir(path)

    def rmdir(self, path):
        logger.debug("rmdir is:" + path)
        self.disk.delete([path])

    def read(self, path, size, offset, fh):
        #print '*'*10,'READ CALLED',path,size,offset
        #logger.debug("read is: " + path)
        paras = {'Range': 'bytes=%s-%s' % (offset, offset + size - 1)}
        while True:
            try:
                foo = self.disk.download(path, headers=paras).content
                return foo
            except:
                pass

    access = None
    statfs = None
Ejemplo n.º 3
0
class BaiduPCS(object):
    """
    Baidu disk uploader.
    """
    def __init__(self, filepath, username, password):
        """
        Login
        """
        (self.filepath, self.filename,
         self.dirname, self.filesize) = (filepath, os.path.basename(filepath),
                                         os.path.dirname(filepath),
                                         os.path.getsize(filepath))
        self.path = self.dirname + '\\' + self.filename.split('.')[0]
        self.pcs = PCS(username, password)  #Login

    def create_upload(self, num):
        self.uplog['md5'][num] = (json.loads(
            self.pcs.upload_tmpfile(self.block(
                (num - 1) * self.chinksize)).content)['md5'])
        self.count += 1
        with open(self.dirname + '\\' + self.filename.split('.')[0] + '.json',
                  'w') as self.new_uplog:
            json.dump(self.uplog, self.new_uplog)
        print('[' + str(self.count) + '/' + str(self.fid) +
              ' Uploaded   BlockID: ' + str(num) + '   md5: ' +
              self.uplog['md5'][num] + ']')

    def read_uplog(self):
        if os.path.exists(self.dirname + '\\' + self.filename.split('.')[0] +
                          '.json'):
            with open(
                    self.dirname + '\\' + self.filename.split('.')[0] +
                    '.json', 'r') as self.uplog_file:
                self.uplog = json.load(self.uplog_file)
            tmp_dict = {}
            for i in sorted(self.uplog['md5'].keys()):
                tmp_dict[int(i)] = self.uplog['md5'][i]
            self.uplog['md5'] = tmp_dict
        else:
            self.uplog_file = open(
                self.dirname + '\\' + self.filename.split('.')[0] + '.json',
                'w')
            self.uplog = {'block': 0, 'size': 0, 'md5': {}}

    def block(self, location=None):
        if location == None:
            return math.ceil(os.path.getsize(self.filepath) / self.chinksize)
        file = open(self.filepath, 'rb')
        file.seek(location, 0)
        return io.BytesIO(file.read(self.chinksize))

    def upload(self):
        """
        Biadu upload module
        """
        self.read_uplog()

        if int(self.uplog['size']) == 0:
            self.chinksize = 1024 * 1024 * 24
            self.uplog['size'] = self.chinksize
        else:
            self.chinksize = self.uplog['size']

        self.thread_num = 25

        if int(self.uplog['block']) == 0:
            self.fid = self.block()

        self.count = len(self.uplog['md5'])

        with open(self.dirname + '\\' + self.filename.split('.')[0] + '.json',
                  'w') as self.new_uplog:
            json.dump(self.uplog, self.new_uplog)

        print('start uploading...')
        self.threads = []
        for i in range(self.fid):
            if not i + 1 in self.uplog['md5']:
                while len(threading.enumerate()) - 1 >= self.thread_num:
                    time.sleep(1)
                self.t = threading.Thread(target=self.create_upload,
                                          kwargs={'num': i + 1})
                self.t.setDaemon(True)
                self.t.start()
                self.threads.append(self.t)

        for self.thread in self.threads:
            self.thread.join()

    def superfile(self):
        self.pcs.upload_superfile('/' + self.filename,
                                  [(self.uplog['md5'][k])
                                   for k in sorted(self.uplog['md5'].keys())])

    def CheckUpload(self):
        """
        Check upload status.
        Retry if file not uploaded.
        """
        if not self.fid == len(self.uplog['md5']):
            return 0
        return 1

    def quota_remaining(self):
        self.quota_info = json.loads(self.pcs.quota().content.decode(
            "utf-8", "ignore"))
        self.remaining = self.quota_info['total'] - self.quota_info['used']
        return self.remaining
Ejemplo n.º 4
0
import sys
import os, json, sys, tempfile
from baidupcsapi import PCS

pcs = PCS('username', 'password')
chinksize = 1024 * 1024 * 16
fid = 1
md5list = []
tmpdir = tempfile.mkdtemp('bdpcs')
with open(sys.argv[1], 'rb') as infile:
    while 1:
        data = infile.read(chinksize)
        if len(data) == 0: break
        smallfile = os.path.join(tmpdir, 'tmp%d' % fid)
        with open(smallfile, 'wb') as f:
            f.write(data)
        print('chunk%d size %d' % (fid, len(data)))
        fid += 1
        print('start uploading...')
        ret = pcs.upload_tmpfile(open(smallfile, 'rb'))
        md5list.append(json.loads(ret.content)['md5'])
        print('md5: %s' % (md5list[-1]))
        os.remove(smallfile)

os.rmdir(tmpdir)

remote_path = sys.argv[2] if not sys.argv[2].endswith(
    '/') else sys.argv[2] + os.path.basename(sys.argv[1])
ret = pcs.upload_superfile(remote_path, md5list)
print ret.content