def do_put(self, path): ''' Usage: put <local file> Upload local file into current remote directory ''' try: path = self._fix_path(path, local=True, required='put') dest = '%s/%s' % (self.path, os.path.basename(path)) if stat.S_ISDIR(os.stat(path).st_mode): raise WebHDFSError('%s: cannot upload directory' % path) if self.hdfs.stat(dest, catch=True): raise WebHDFSError('%s: already exists' % dest) self.hdfs.put(dest, data=open(path, 'r')) except (WebHDFSError, OSError) as e: print e
def do_rmdir(self, path): ''' Usage: rm <remote dir> Removes remote directory ''' try: path = self._fix_path(path, required='rmdir') temp = self.hdfs.stat(path) if not temp.is_dir(): raise WebHDFSError('%s: not a directory' % path) if not temp.is_empty(): raise WebHDFSError('%s: directory not empty' % path) self.hdfs.rm(path) except WebHDFSError as e: print e
def do_get(self, path): ''' Usage: get <remote file> Fetch remote file into current local directory ''' try: path = self._fix_path(path, required='get') if self.hdfs.stat(path).is_dir(): raise WebHDFSError('%s: cannot download directory' % path) if os.path.exists(os.path.basename(path)): raise WebHDFSError('%s: file exists' % path) self.hdfs.get(path, data=open( '%s/%s' % (os.getcwd(), os.path.basename(path)), 'w')) except (WebHDFSError, OSError) as e: print e
def _req(self, name, path, kind='get', data=None, **args): args['op'] = name args['user.name'] = self.user try: for indx, base in enumerate(self.urls): u = '%s/webhdfs/v1/%s' % (base, path.lstrip('/')) try: if not data: r = getattr(requests, kind)(u, params=args, timeout=self.wait) self._log(r) r.raise_for_status() return r.json() elif kind == 'put': r = requests.put(u, params=args, allow_redirects=False, timeout=self.wait) self._log(r) r.raise_for_status() r = requests.put(r.headers['location'], headers={'content-type': 'application/octet-stream'}, data=data) self._log(r) r.raise_for_status() return True else: r = requests.get(u, params=args, stream=True, timeout=self.wait) self._log(r) r.raise_for_status() for c in r.iter_content(16 * 1024): data.write(c) return True except requests.exceptions.HTTPError as e: try: if e.response.json()['RemoteException']['exception'] == 'StandbyException': continue raise WebHDFSError(e.response.json()) except ValueError: raise WebHDFSError('%s: %s' % (e.response.reason, path)) except requests.exceptions.ConnectionError: continue except requests.exceptions.Timeout: continue else: raise WebHDFSConnectionError('cannot connect to any webhdfs endpoint') finally: self.urls = self.urls[indx:] + self.urls[:indx]
def do_mv(self, args): ''' Usage: mv <remote file/dir> <remote dir> Moves/renames remote file or directory ''' try: path, dest = shlex.split(args) path = self._fix_path(path, required='mv') dest = self._fix_path(dest, required='mv') stat = self.hdfs.stat(dest, catch=True) or self.hdfs.stat( os.path.dirname(dest), catch=True) if stat and not stat.is_dir(): raise WebHDFSError('%s: invalid destination' % dest) if not self.hdfs.mv(path, dest): raise WebHDFSError('%s: failed to move/rename' % path) except WebHDFSError as e: print e except ValueError as e: print 'Usage: mv <source> <target>'
def do_cat(self, path): ''' Usage: cat <remote file> Display contents of remote file ''' try: path = self._fix_path(path, required='cat') if self.hdfs.stat(path).is_dir(): raise WebHDFSError('%s: cannot cat directory' % path) sys.stdout.write(self.hdfs.get(path)) except (WebHDFSError, OSError) as e: print e
def do_rm(self, path): ''' Usage: rm <remote file> Removes remote file ''' try: path = self._fix_path(path, required='rm') if self.hdfs.stat(path).is_dir(): raise WebHDFSError('%s: cannot remove directory' % path) self.hdfs.rm(path) except WebHDFSError as e: print e
def do_mkdir(self, path): ''' Usage: mkdir <remote dir> Creates remote directory ''' try: path = self._fix_path(path, required='mkdir') if self.hdfs.stat(path, catch=True): raise WebHDFSError('%s: already exists' % path) self.hdfs.mkdir(path) except WebHDFSError as e: print e
def do_zcat(self, path): ''' Usage: zcat <remote file> Display contents of compressed remote file ''' try: path = self._fix_path(path, required='zcat') if self.hdfs.stat(path).is_dir(): raise WebHDFSError('%s: cannot cat directory' % path) sys.stdout.write( zlib.decompress(self.hdfs.get(path), 16 + zlib.MAX_WBITS)) except (WebHDFSError, OSError) as e: print e
def do_cd(self, path=None): ''' Usage: cd <remote dir> Changes the shell remote directory ''' try: path = self._fix_path(path or '/user/%s' % self.user) if not self.hdfs.stat(path).is_dir(): raise WebHDFSError('%s: not a directory' % path) self.path = path except WebHDFSError as e: self.path = '/' print e finally: self._reset_prompt()
def _fix_path(self, path, local=False, required=False): path = '' if path is None else path.strip() rval = [] if not path and required: raise WebHDFSError('%s: path not specified' % required) if not path: path = getattr(self, 'path', '/user/%s' % self.user) if not local else os.getcwd() if not path.startswith('/'): path = '%s/%s' % (self.path if not local else os.getcwd(), path) for part in path.split('/'): if not part or part == '.': continue if rval and part == '..': rval.pop() else: rval.append(part) return '/' + '/'.join(rval)