def append(self, data, path, size=-1, buffersize=None): path = absolute_path(path) url = '{}{}?op=APPEND&overwrite={}'.format(self.service_url(), path) if buffersize is not None: url += '&buffersize={}'.format(buffersize) #print(url) open_req = self.post(url, allow_redirects=False, headers={'Content-Length': '0'}) if open_req.status_code == 307: headers = {} headers['Content-Type'] = 'application/octet-stream' if size >= 0: headers['Content-Length'] = str(size) location = open_req.headers['Location'] #print(location) req = self.post(location, data=data, headers=headers) if req.status_code != 200: raise ServiceError(req.status_code, 'Cannot append to path {}'.format(path), req) else: raise ServiceError(req.status_code, 'Cannot append path {}'.format(path), open_req) return True
def open(self, path, offset=None, length=None, buffersize=None): path = absolute_path(path) url = '{}{}?op=OPEN'.format(self.service_url(), path) if offset is not None: url += '&offset={}'.format(offset) if length is not None: url += '&length={}'.format(length) if buffersize is not None: url += '&buffersize={}'.format(buffersize) #print(url) #open_req = self.get(url) #if open_req.status_code==200: # return open_req.iter_content(chunk_size=self.read_chunk_size) open_req = self.get(url, allow_redirects=False) if open_req.status_code == 307: location = open_req.headers['Location'] read_req = self.get(location, allow_redirects=False, stream=True) if read_req.status_code == 200: return read_req.iter_content(chunk_size=self.read_chunk_size) else: raise ServiceError( read_req.status_code, 'Cannot open datanode location {}'.format(location), read_req) else: raise ServiceError(open_req.status_code, 'Cannot open path {}'.format(path), open_req)
def hdfs_mv_command(client, argv): if len(argv) != 3: sys.stderr.write( 'Invalid number of arguments: {}'.format(len(args.command) - 1)) if not client.mv(argv[0], argv[1]): raise ServiceError(403, 'Move failed: {} → {}'.format(argv[0], argv[1]))
def upload(self, hdfs_path, local_path, overwrite=False, force=False, recursive=False): destpath = self._remove_schema(hdfs_path) if destpath[-1] == "/": if isfile(local_path): self._copy_to_destination(local_path, destpath, force=force) else: files = glob(local_path, recursive=recursive) mkdir = set() for source in files: self._copy_to_destination(source, destpath, mkdir, force=force) else: source = local_path size = -1 with open(source, "rb") as input: if not self.client.copy( input, destpath, size=size, overwrite=overwrite): from pyox.client import ServiceError raise ServiceError( 403, "Move failed: {} → {}".format(source, destpath)) return destpath
def scheduler(self): url = '{}/cluster/scheduler'.format(self.service_url()) req = self.get(url) #print(req.url) if req.status_code != 200: raise ServiceError(req.status_code, 'Cannot get cluster information', request=req) return response_data(req)['scheduler']['schedulerInfo']
def metrics(self): url = '{}/cluster/metrics'.format(self.service_url()) req = self.get(url) #print(req.url) if req.status_code != 200: raise ServiceError(req.status_code, 'Cannot get cluster information', request=req) return response_data(req)['clusterMetrics']
def status(self, path): url = '{}{}?op=GETFILESTATUS'.format(self.service_url(), absolute_path(path)) #print(url) req = self.get(url) if req.status_code != 200: raise ServiceError(req.status_code, 'Cannot status path {}'.format(path), req) msg = req.json() return msg['FileStatus']
def _copy_to_destination(self, source, destpath, mkdirs=None, force=False): if mkdirs is None: mkdirs = set() size = os.path.getsize(source) targetpath = source slash = source.rfind("/") if source[0] == "/": targetpath = source[slash + 1:] elif source[0:3] == "../": targetpath = source[slash + 1:] elif slash > 0: dirpath = source[0:slash] if dirpath not in mkdirs: if self.client.make_directory(destpath + dirpath): mkdirs.add(dirpath) else: from pyox.client import ServiceError raise ServiceError( 403, "Cannot make target directory: {}".format(dirpath)) target = destpath + targetpath with open(source, "rb") as input: def chunker(): sent = 0 while True: b = input.read(32768) sent += len(b) yield b if not self.client.copy(chunker() if size < 0 else input, target, size=size, overwrite=force): from pyox.client import ServiceError raise ServiceError( 403, "Move failed: {} → {}".format(source, target)) return target
def copy_to_destination(client, source, destpath, verbose=False, force=False): size = os.path.getsize(source) targetpath = source slash = source.rfind('/') if source[0] == '/': targetpath = source[slash + 1:] elif source[0:3] == '../': targetpath = source[slash + 1:] elif slash > 0: dirpath = source[0:slash] if dirpath not in mkdirs.values: if cpargs.verbose: sys.stderr.write(dirpath + '/\n') if client.make_directory(destpath + dirpath): mkdirs.add(dirpath) else: raise ServiceError( 403, 'Cannot make target directory: {}'.format(dirpath)) target = destpath + targetpath if verbose: sys.stderr.write(source + ' → ' + target + '\n') with open(source, 'rb') as input: def chunker(): sent = 0 while True: b = input.read(32768) sent += len(b) if not b: if cpargs.verbose: sys.stderr.write('Sent {} bytes\n'.format(sent)) break yield b if not client.copy(chunker() if size < 0 else input, target, size=size, overwrite=force): raise ServiceError(403, 'Move failed: {} → {}'.format(source, target))
def remove(self, path, recursive=False): path = absolute_path(path) recursiveParam = 'true' if recursive else 'false' url = '{}{}?op=DELETE&recursive={}'.format(self.service_url(), path, recursiveParam) #print(url) req = self.delete(url) if req.status_code != 200: raise ServiceError(req.status_code, 'Cannot delete path {}'.format(path), req) msg = req.json() return msg['boolean']
def make_directory(self, path, permission=None): path = absolute_path(path) url = '{}{}?op=MKDIRS'.format(self.service_url(), path) if permission is not None: url += '&permission={}'.format(permission) #print(url) req = self.put(url) if req.status_code != 200: raise ServiceError(req.status_code, 'Cannot create path {}'.format(path), req) msg = req.json() return msg['boolean']
def hdfs_rm_command(client, argv): rmparser = argparse.ArgumentParser(prog='pyox hdfs rm', description="rm") rmparser.add_argument('-r', action='store_true', dest='recursive', default=False, help="Recursively remove files/directories") rmparser.add_argument('paths', nargs='*', help='a list of paths') rmargs = rmparser.parse_args(argv) for path in rmargs.paths: if not client.remove(path, recursive=rmargs.recursive): raise ServiceError(403, 'Cannot remove: {}'.format(path))
def move(self, sourcepath, destpath): sourcepath = absolute_path(sourcepath) destpath = absolute_path(destpath) url = '{}{}?op=RENAME&destination={}'.format(self.service_url(), sourcepath, destpath) #print(url) req = self.put(url) if req.status_code != 200: raise ServiceError( req.status_code, 'Cannot move path {} to {}'.format(sourcepath, destpath), req) msg = req.json() return msg['boolean']
def list_directory(self, path): path = absolute_path(path) url = '{}{}'.format(self.service_url(), path) req = self.get(url, params={'op': 'LISTSTATUS'}, allow_redirects=False) #print(req.url) #req = requests.get(url,auth=None) if req.status_code == 200: data = req.json() result = {} for entry in data['FileStatuses']['FileStatus']: result[entry['pathSuffix']] = entry return result else: raise ServiceError(req.status_code, 'Cannot access path {}'.format(path), req)
def copy(self, data, path, size=-1, overwrite=False): path = absolute_path(path) overwriteParam = 'true' if overwrite else 'false' url = '{}{}?op=CREATE&overwrite={}'.format(self.service_url(), path, overwriteParam) #print(url) headers = {} headers['Content-Type'] = 'application/octet-stream' if size >= 0: headers['Content-Length'] = str(size) open_req = self.put(url, allow_redirects=False, headers={'Content-Length': '0'}) if open_req.status_code == 307: location = open_req.headers['Location'] #print(location) req = self.put(location, data=data, headers=headers) if req.status_code != 201: raise ServiceError(req.status_code, 'Cannot copy to path {}'.format(path), req) else: raise ServiceError(req.status_code, 'Cannot open path {}'.format(path), open_req) return True
def hdfs_mkdir_command(client, argv): for path in argv: if not client.make_directory(path): raise ServiceError(403, 'mkdir failed: {}'.format(path))
def hdfs_cp_command(client, argv): cpparser = argparse.ArgumentParser(prog='pyox hdfs cp', description="cp") cpparser.add_argument('-f', action='store_true', dest='force', default=False, help="Force an overwrite") cpparser.add_argument('-v', action='store_true', dest='verbose', default=False, help="Verbose") cpparser.add_argument('-r', action='store_true', dest='recursive', default=False, help="Recursively apply wildcards") cpparser.add_argument('-s', action='store_true', dest='sendsize', default=False, help="Send the file size") cpparser.add_argument('paths', nargs='*', help='a list of paths') cpargs = cpparser.parse_args(argv) if len(cpargs.paths) < 2: sys.stderr.write('At least two paths must be specified.\n') sys.exit(1) destpath = cpargs.paths[-1] if destpath[-1] == '/': # directory copy, glob files mkdirs = tracker() for pattern in cpargs.paths[:-1]: if isfile(pattern): copy_to_destination(client, pattern, destpath, verbose=cpargs.verbose, force=cpargs.force) else: files = glob(pattern, recursive=cpargs.recursive) if len(files) == 0 and cpargs.verbose: sys.stderr.write('Nothing matched {}\n'.format(pattern)) for source in files: copy_to_destination(client, source, destpath, verbose=cpargs.verbose, force=cpargs.force) elif len(cpargs.paths) == 2: source = cpargs.paths[0] size = os.path.getsize(source) if cpargs.sendsize else -1 with open(source, 'rb') as input: if cpargs.verbose: sys.stderr.write(source + ' → ' + destpath + '\n') if not client.copy( input, destpath, size=size, overwrite=cpargs.force): raise ServiceError( 403, 'Move failed: {} → {}'.format(source, destpath)) else: raise ServiceError(400, 'Target is not a directory.')