Beispiel #1
0
 def append(self, data, path, size=-1, buffersize=None):
     path = absolute_path(path)
     url = '{}{}?op=APPEND&overwrite={}'.format(self.service_url(), path)
     if buffersize is not None:
         url += '&buffersize={}'.format(buffersize)
     #print(url)
     open_req = self.post(url,
                          allow_redirects=False,
                          headers={'Content-Length': '0'})
     if open_req.status_code == 307:
         headers = {}
         headers['Content-Type'] = 'application/octet-stream'
         if size >= 0:
             headers['Content-Length'] = str(size)
         location = open_req.headers['Location']
         #print(location)
         req = self.post(location, data=data, headers=headers)
         if req.status_code != 200:
             raise ServiceError(req.status_code,
                                'Cannot append to path {}'.format(path),
                                req)
     else:
         raise ServiceError(req.status_code,
                            'Cannot append path {}'.format(path), open_req)
     return True
Beispiel #2
0
 def open(self, path, offset=None, length=None, buffersize=None):
     path = absolute_path(path)
     url = '{}{}?op=OPEN'.format(self.service_url(), path)
     if offset is not None:
         url += '&offset={}'.format(offset)
     if length is not None:
         url += '&length={}'.format(length)
     if buffersize is not None:
         url += '&buffersize={}'.format(buffersize)
     #print(url)
     #open_req = self.get(url)
     #if open_req.status_code==200:
     #   return open_req.iter_content(chunk_size=self.read_chunk_size)
     open_req = self.get(url, allow_redirects=False)
     if open_req.status_code == 307:
         location = open_req.headers['Location']
         read_req = self.get(location, allow_redirects=False, stream=True)
         if read_req.status_code == 200:
             return read_req.iter_content(chunk_size=self.read_chunk_size)
         else:
             raise ServiceError(
                 read_req.status_code,
                 'Cannot open datanode location {}'.format(location),
                 read_req)
     else:
         raise ServiceError(open_req.status_code,
                            'Cannot open path {}'.format(path), open_req)
Beispiel #3
0
def hdfs_mv_command(client, argv):
    if len(argv) != 3:
        sys.stderr.write(
            'Invalid number of arguments: {}'.format(len(args.command) - 1))
    if not client.mv(argv[0], argv[1]):
        raise ServiceError(403,
                           'Move failed: {} → {}'.format(argv[0], argv[1]))
Beispiel #4
0
    def upload(self,
               hdfs_path,
               local_path,
               overwrite=False,
               force=False,
               recursive=False):
        destpath = self._remove_schema(hdfs_path)

        if destpath[-1] == "/":
            if isfile(local_path):
                self._copy_to_destination(local_path, destpath, force=force)
            else:
                files = glob(local_path, recursive=recursive)
                mkdir = set()
                for source in files:
                    self._copy_to_destination(source,
                                              destpath,
                                              mkdir,
                                              force=force)

        else:
            source = local_path
            size = -1
            with open(source, "rb") as input:
                if not self.client.copy(
                        input, destpath, size=size, overwrite=overwrite):
                    from pyox.client import ServiceError

                    raise ServiceError(
                        403, "Move failed: {} → {}".format(source, destpath))
        return destpath
Beispiel #5
0
 def scheduler(self):
     url = '{}/cluster/scheduler'.format(self.service_url())
     req = self.get(url)
     #print(req.url)
     if req.status_code != 200:
         raise ServiceError(req.status_code,
                            'Cannot get cluster information',
                            request=req)
     return response_data(req)['scheduler']['schedulerInfo']
Beispiel #6
0
 def metrics(self):
     url = '{}/cluster/metrics'.format(self.service_url())
     req = self.get(url)
     #print(req.url)
     if req.status_code != 200:
         raise ServiceError(req.status_code,
                            'Cannot get cluster information',
                            request=req)
     return response_data(req)['clusterMetrics']
Beispiel #7
0
 def status(self, path):
     url = '{}{}?op=GETFILESTATUS'.format(self.service_url(),
                                          absolute_path(path))
     #print(url)
     req = self.get(url)
     if req.status_code != 200:
         raise ServiceError(req.status_code,
                            'Cannot status path {}'.format(path), req)
     msg = req.json()
     return msg['FileStatus']
Beispiel #8
0
    def _copy_to_destination(self, source, destpath, mkdirs=None, force=False):
        if mkdirs is None:
            mkdirs = set()
        size = os.path.getsize(source)
        targetpath = source
        slash = source.rfind("/")
        if source[0] == "/":
            targetpath = source[slash + 1:]
        elif source[0:3] == "../":
            targetpath = source[slash + 1:]
        elif slash > 0:
            dirpath = source[0:slash]
            if dirpath not in mkdirs:
                if self.client.make_directory(destpath + dirpath):
                    mkdirs.add(dirpath)
                else:
                    from pyox.client import ServiceError

                    raise ServiceError(
                        403,
                        "Cannot make target directory: {}".format(dirpath))

        target = destpath + targetpath

        with open(source, "rb") as input:

            def chunker():
                sent = 0
                while True:
                    b = input.read(32768)
                    sent += len(b)
                    yield b

            if not self.client.copy(chunker() if size < 0 else input,
                                    target,
                                    size=size,
                                    overwrite=force):
                from pyox.client import ServiceError

                raise ServiceError(
                    403, "Move failed: {} → {}".format(source, target))
        return target
Beispiel #9
0
def copy_to_destination(client, source, destpath, verbose=False, force=False):
    size = os.path.getsize(source)
    targetpath = source
    slash = source.rfind('/')
    if source[0] == '/':
        targetpath = source[slash + 1:]
    elif source[0:3] == '../':
        targetpath = source[slash + 1:]
    elif slash > 0:
        dirpath = source[0:slash]
        if dirpath not in mkdirs.values:
            if cpargs.verbose:
                sys.stderr.write(dirpath + '/\n')
            if client.make_directory(destpath + dirpath):
                mkdirs.add(dirpath)
            else:
                raise ServiceError(
                    403, 'Cannot make target directory: {}'.format(dirpath))

    target = destpath + targetpath

    if verbose:
        sys.stderr.write(source + ' → ' + target + '\n')
    with open(source, 'rb') as input:

        def chunker():
            sent = 0
            while True:
                b = input.read(32768)
                sent += len(b)
                if not b:
                    if cpargs.verbose:
                        sys.stderr.write('Sent {} bytes\n'.format(sent))
                    break
                yield b

        if not client.copy(chunker() if size < 0 else input,
                           target,
                           size=size,
                           overwrite=force):
            raise ServiceError(403,
                               'Move failed: {} → {}'.format(source, target))
Beispiel #10
0
 def remove(self, path, recursive=False):
     path = absolute_path(path)
     recursiveParam = 'true' if recursive else 'false'
     url = '{}{}?op=DELETE&recursive={}'.format(self.service_url(), path,
                                                recursiveParam)
     #print(url)
     req = self.delete(url)
     if req.status_code != 200:
         raise ServiceError(req.status_code,
                            'Cannot delete path {}'.format(path), req)
     msg = req.json()
     return msg['boolean']
Beispiel #11
0
 def make_directory(self, path, permission=None):
     path = absolute_path(path)
     url = '{}{}?op=MKDIRS'.format(self.service_url(), path)
     if permission is not None:
         url += '&permission={}'.format(permission)
     #print(url)
     req = self.put(url)
     if req.status_code != 200:
         raise ServiceError(req.status_code,
                            'Cannot create path {}'.format(path), req)
     msg = req.json()
     return msg['boolean']
Beispiel #12
0
def hdfs_rm_command(client, argv):
    rmparser = argparse.ArgumentParser(prog='pyox hdfs rm', description="rm")
    rmparser.add_argument('-r',
                          action='store_true',
                          dest='recursive',
                          default=False,
                          help="Recursively remove files/directories")
    rmparser.add_argument('paths', nargs='*', help='a list of paths')
    rmargs = rmparser.parse_args(argv)
    for path in rmargs.paths:
        if not client.remove(path, recursive=rmargs.recursive):
            raise ServiceError(403, 'Cannot remove: {}'.format(path))
Beispiel #13
0
 def move(self, sourcepath, destpath):
     sourcepath = absolute_path(sourcepath)
     destpath = absolute_path(destpath)
     url = '{}{}?op=RENAME&destination={}'.format(self.service_url(),
                                                  sourcepath, destpath)
     #print(url)
     req = self.put(url)
     if req.status_code != 200:
         raise ServiceError(
             req.status_code,
             'Cannot move path {} to {}'.format(sourcepath, destpath), req)
     msg = req.json()
     return msg['boolean']
Beispiel #14
0
 def list_directory(self, path):
     path = absolute_path(path)
     url = '{}{}'.format(self.service_url(), path)
     req = self.get(url, params={'op': 'LISTSTATUS'}, allow_redirects=False)
     #print(req.url)
     #req = requests.get(url,auth=None)
     if req.status_code == 200:
         data = req.json()
         result = {}
         for entry in data['FileStatuses']['FileStatus']:
             result[entry['pathSuffix']] = entry
         return result
     else:
         raise ServiceError(req.status_code,
                            'Cannot access path {}'.format(path), req)
Beispiel #15
0
 def copy(self, data, path, size=-1, overwrite=False):
     path = absolute_path(path)
     overwriteParam = 'true' if overwrite else 'false'
     url = '{}{}?op=CREATE&overwrite={}'.format(self.service_url(), path,
                                                overwriteParam)
     #print(url)
     headers = {}
     headers['Content-Type'] = 'application/octet-stream'
     if size >= 0:
         headers['Content-Length'] = str(size)
     open_req = self.put(url,
                         allow_redirects=False,
                         headers={'Content-Length': '0'})
     if open_req.status_code == 307:
         location = open_req.headers['Location']
         #print(location)
         req = self.put(location, data=data, headers=headers)
         if req.status_code != 201:
             raise ServiceError(req.status_code,
                                'Cannot copy to path {}'.format(path), req)
     else:
         raise ServiceError(req.status_code,
                            'Cannot open path {}'.format(path), open_req)
     return True
Beispiel #16
0
def hdfs_mkdir_command(client, argv):
    for path in argv:
        if not client.make_directory(path):
            raise ServiceError(403, 'mkdir failed: {}'.format(path))
Beispiel #17
0
def hdfs_cp_command(client, argv):
    cpparser = argparse.ArgumentParser(prog='pyox hdfs cp', description="cp")
    cpparser.add_argument('-f',
                          action='store_true',
                          dest='force',
                          default=False,
                          help="Force an overwrite")
    cpparser.add_argument('-v',
                          action='store_true',
                          dest='verbose',
                          default=False,
                          help="Verbose")
    cpparser.add_argument('-r',
                          action='store_true',
                          dest='recursive',
                          default=False,
                          help="Recursively apply wildcards")
    cpparser.add_argument('-s',
                          action='store_true',
                          dest='sendsize',
                          default=False,
                          help="Send the file size")
    cpparser.add_argument('paths', nargs='*', help='a list of paths')
    cpargs = cpparser.parse_args(argv)
    if len(cpargs.paths) < 2:
        sys.stderr.write('At least two paths must be specified.\n')
        sys.exit(1)
    destpath = cpargs.paths[-1]
    if destpath[-1] == '/':
        # directory copy, glob files
        mkdirs = tracker()
        for pattern in cpargs.paths[:-1]:
            if isfile(pattern):
                copy_to_destination(client,
                                    pattern,
                                    destpath,
                                    verbose=cpargs.verbose,
                                    force=cpargs.force)
            else:
                files = glob(pattern, recursive=cpargs.recursive)
                if len(files) == 0 and cpargs.verbose:
                    sys.stderr.write('Nothing matched {}\n'.format(pattern))
                for source in files:
                    copy_to_destination(client,
                                        source,
                                        destpath,
                                        verbose=cpargs.verbose,
                                        force=cpargs.force)

    elif len(cpargs.paths) == 2:
        source = cpargs.paths[0]
        size = os.path.getsize(source) if cpargs.sendsize else -1
        with open(source, 'rb') as input:
            if cpargs.verbose:
                sys.stderr.write(source + ' → ' + destpath + '\n')
            if not client.copy(
                    input, destpath, size=size, overwrite=cpargs.force):
                raise ServiceError(
                    403, 'Move failed: {} → {}'.format(source, destpath))

    else:
        raise ServiceError(400, 'Target is not a directory.')