def dfs(self, path): print "DFS: {path}".format(path=to_utf8(path)) _finish = False _context = u'' max_retry = self._max_retry path = to_unicode(path) while not _finish: request = ListFolderRequest(bucket_name=self._bucket, cos_path=path, context=_context) ret = self._cos_api.list_folder(request) if ret['code'] != 0: max_retry -= 1 else: _finish = ret['data']['listover'] _context = ret['data']['context'] for item in ret['data']['infos']: if 'filelen' in item: try: key = "{prefix}{filename}".format(prefix=path, filename=item['name']) yield Task(key, item['filelen'], None) except: pass else: _sub_dir = "{prefix}{filename}".format(prefix=path.encode('utf-8'), filename=item['name'].encode('utf-8')) for i in self.dfs(_sub_dir): yield i if max_retry == 0: _finish = True
def list(self, marker): for obj in oss2.ObjectIterator(self._oss_api, prefix=self._prefix, marker=marker): if obj.key[-1] == '/': continue logger.info("yield new object: {}".format(obj.key)) yield Task(obj.key, obj.size, None)
def get_task_by_key(self, key): _path = key if not _path.startswith('/'): _path = '/' + _path logger = getLogger(__name__) if isinstance(_path, str): _path = _path.decode('utf-8') request = StatFileRequest(self._bucket, _path) ret = self._cos_api.stat_file(request) logger.info("ret: " + str(ret)) # import json # v = json.loads(ret) if ret['code'] != 0: logger.warn("get task by key error, key = {},error code: {}".format(key, str(ret['code']))) return Task(key, None, None, None) return Task(key, int(ret['data']['filesize']), None, None)
def __dfs_list(self, path): logger.info("try to dump file list under {}".format(path)) _finish = False _context = u'' max_retry = 10 while not _finish: try: request = ListFolderRequest(bucket_name=self._bucket, cos_path=path, context=_context) ret = self._cos_api.list_folder(request) except Exception: logger.exception("list failed") max_retry -= 1 continue logger.debug(str(ret)) if ret['code'] != 0: logger.warning("request failed: {}".format(str(ret))) max_retry -= 1 else: _finish = not ret['data']['has_more'] _context = ret['data']['context'] for item in ret['data']['infos']: if 'filelen' in item: # file key = "{prefix}{filename}".format( prefix=path, filename=item['name']) yield Task(key, item['filelen'], None) else: _sub_dir = "{prefix}{filename}/".format( prefix=path, filename=item['name']) if isinstance(_sub_dir, str): _sub_dir = _sub_dir.decode('utf-8') for i in self.__dfs_list(_sub_dir): yield i # directory if max_retry == 0: _finish = True logger.error( "reach max retry times, finish this directory {}".format( path)) logger.info("finish directory {}".format(path))
def list(self, marker): if self._filelist is not None and len(self._filelist) > 0: filelist_path = self._sync_files_dir + '/' + os.path.basename(self._filelist) filelist_task = Task(self._filelist, None, None, None) self._download(filelist_task, filelist_path, True) with open(filelist_path) as f: line = f.readline() while line: if not line.startswith('/'): line = '/' + line task = self.get_task_by_key(line.strip()) yield task line = f.readline() else: if self._prefix_dir is None: for i in self.dfs('/'): yield i else: for i in self.dfs(self._prefix_dir): yield i
def list(self, marker): limit = 100 delimiter = None marker = None eof = False while not eof: try: ret, eof, info = self._qiniu_api.list(self._bucket, self._prefix, marker, limit, delimiter) if ret is None: logger.warn("ret is None") if info.error == 'bad token': raise TokenException else: logger.warn(info.text_body) raise IOError(info.error) for i in ret['items']: logger.info("yield new object: {}".format(i['key'])) yield Task(i['key'], i['fsize'], None, None) if eof is True: logger.info("eof is {}".format(eof)) continue if not eof and 'marker' in ret: marker = ret['marker'] else: eof = True except TokenException as e: eof = True logger.warn( "Your accessid/accesskey is incorrect, Please double check your configures" ) except Exception as e: logger.exception("list exception: " + str(e))
def list(self): for file in os.listdir(self._workspace): from os import path yield Task(file, path.getsize(file), None, None)
def list(self): for obj in self._bucket_api.list(prefix=self._prefix): if obj.name[-1] == '/': continue logger.info("yield new object: {}".format(obj.key)) yield Task(obj.name, obj.size, None)