def copyfile(self, src, dst, skip_header=False): sb = self._stats(src) if sb is None: raise IOError(errno.ENOENT, _("Copy src '%s' does not exist") % src) if sb.isDir: raise IOError(errno.INVAL, _("Copy src '%s' is a directory") % src) if self.isdir(dst): raise IOError(errno.INVAL, _("Copy dst '%s' is a directory") % dst) offset = 0 while True: data = self.read(src, offset, UPLOAD_CHUNK_SIZE.get()) cnt = len(data) if offset == 0: if skip_header: n = data.index('\n') if n > 0: data = data[n + 1:] self.create(dst, overwrite=True, blocksize=sb.blockSize, replication=sb.replication, permission=oct(stat.S_IMODE(sb.mode)), data=data) else: self.append(dst, data) if cnt < UPLOAD_CHUNK_SIZE.get(): break offset += cnt
def copyfile(self, src, dst): sb = self._stats(src) if sb is None: raise IOError(errno.ENOENT, "Copy src '%s' does not exist" % (src,)) if sb.isDir: raise IOError(errno.INVAL, "Copy src '%s' is a directory" % (src,)) if self.isdir(dst): raise IOError(errno.INVAL, "Copy dst '%s' is a directory" % (dst,)) offset = 0 while True: data = self.read(src, offset, UPLOAD_CHUNK_SIZE.get()) if offset == 0: self.create(dst, overwrite=True, blocksize=sb.blockSize, replication=sb.replication, permission=oct(stat.S_IMODE(sb.mode)), data=data) cnt = len(data) if cnt == 0: break if offset != 0: self.append(dst, data) offset += cnt
def __init__(self, request): FileUploadHandler.__init__(self, request) self._file = None self._starttime = 0 self._activated = False # Need to directly modify FileUploadHandler.chunk_size FileUploadHandler.chunk_size = UPLOAD_CHUNK_SIZE.get()
def read_in_chunks(fs, path, offset=0): while True: chunk = fs.read(path, offset, UPLOAD_CHUNK_SIZE.get()) if chunk: offset += len(chunk) yield chunk else: return
def test_upload_file(self): with tempfile.NamedTemporaryFile() as local_file: # Make sure we can upload larger than the UPLOAD chunk size file_size = UPLOAD_CHUNK_SIZE.get() * 2 local_file.write('0' * file_size) local_file.flush() prefix = self.cluster.fs_prefix + '/test_upload_file' self.cluster.fs.mkdir(prefix) USER_NAME = 'test' HDFS_DEST_DIR = prefix + "/tmp/fb-upload-test" LOCAL_FILE = local_file.name HDFS_FILE = HDFS_DEST_DIR + '/' + os.path.basename(LOCAL_FILE) self.cluster.fs.do_as_superuser(self.cluster.fs.mkdir, HDFS_DEST_DIR) self.cluster.fs.do_as_superuser(self.cluster.fs.chown, HDFS_DEST_DIR, USER_NAME, USER_NAME) self.cluster.fs.do_as_superuser(self.cluster.fs.chmod, HDFS_DEST_DIR, 0700) stats = self.cluster.fs.stats(HDFS_DEST_DIR) assert_equal(stats['user'], USER_NAME) assert_equal(stats['group'], USER_NAME) # Just upload the current python file resp = self.c.post('/filebrowser/upload/file?dest=%s' % HDFS_DEST_DIR, # GET param avoids infinite looping dict(dest=HDFS_DEST_DIR, hdfs_file=file(LOCAL_FILE))) response = json.loads(resp.content) assert_equal(0, response['status'], response) stats = self.cluster.fs.stats(HDFS_FILE) assert_equal(stats['user'], USER_NAME) assert_equal(stats['group'], USER_NAME) f = self.cluster.fs.open(HDFS_FILE) actual = f.read(file_size) expected = file(LOCAL_FILE).read() assert_equal(actual, expected, 'files do not match: %s != %s' % (len(actual), len(expected))) # Upload again and so fails because file already exits resp = self.c.post('/filebrowser/upload/file?dest=%s' % HDFS_DEST_DIR, dict(dest=HDFS_DEST_DIR, hdfs_file=file(LOCAL_FILE))) response = json.loads(resp.content) assert_equal(-1, response['status'], response) assert_true('already exists' in response['data'], response) # Upload in / and fails because of missing permissions not_me = make_logged_in_client("not_me", is_superuser=False) grant_access("not_me", "not_me", "filebrowser") try: resp = not_me.post('/filebrowser/upload/file?dest=%s' % HDFS_DEST_DIR, dict(dest=HDFS_DEST_DIR, hdfs_file=file(LOCAL_FILE))) response = json.loads(resp.content) assert_equal(-1, response['status'], response) assert_true('Permission denied' in response['data'], response) except AttributeError: # Seems like a Django bug. # StopFutureHandlers() does not seem to work in test mode as it continues to MemoryFileUploadHandler after perm issue and so fails. pass
def test_upload_file(self): with tempfile.NamedTemporaryFile() as local_file: # Make sure we can upload larger than the UPLOAD chunk size file_size = UPLOAD_CHUNK_SIZE.get() * 2 local_file.write('0' * file_size) local_file.flush() prefix = self.cluster.fs_prefix + '/test_upload_file' self.cluster.fs.mkdir(prefix) USER_NAME = 'test' HDFS_DEST_DIR = prefix + "/tmp/fb-upload-test" LOCAL_FILE = local_file.name HDFS_FILE = HDFS_DEST_DIR + '/' + os.path.basename(LOCAL_FILE) self.cluster.fs.do_as_superuser(self.cluster.fs.mkdir, HDFS_DEST_DIR) self.cluster.fs.do_as_superuser(self.cluster.fs.chown, HDFS_DEST_DIR, USER_NAME, USER_NAME) self.cluster.fs.do_as_superuser(self.cluster.fs.chmod, HDFS_DEST_DIR, 0700) stats = self.cluster.fs.stats(HDFS_DEST_DIR) assert_equal(stats['user'], USER_NAME) assert_equal(stats['group'], USER_NAME) # Just upload the current python file resp = self.c.post('/filebrowser/upload/file?dest=%s' % HDFS_DEST_DIR, # GET param avoids infinite looping dict(dest=HDFS_DEST_DIR, hdfs_file=file(LOCAL_FILE))) response = json.loads(resp.content) assert_equal(0, response['status'], response) stats = self.cluster.fs.stats(HDFS_FILE) assert_equal(stats['user'], USER_NAME) assert_equal(stats['group'], USER_NAME) f = self.cluster.fs.open(HDFS_FILE) actual = f.read(file_size) expected = file(LOCAL_FILE).read() assert_equal(actual, expected, 'files do not match: %s != %s' % (len(actual), len(expected))) # Upload again and so fails because file already exits resp = self.c.post('/filebrowser/upload/file?dest=%s' % HDFS_DEST_DIR, dict(dest=HDFS_DEST_DIR, hdfs_file=file(LOCAL_FILE))) response = json.loads(resp.content) assert_equal(-1, response['status'], response) assert_true('already exists' in response['data'], response) # Upload in / and fails because of missing permissions not_me = make_logged_in_client("not_me", is_superuser=False) grant_access("not_me", "not_me", "filebrowser") try: resp = not_me.post('/filebrowser/upload/file?dest=%s' % HDFS_DEST_DIR, dict(dest=HDFS_DEST_DIR, hdfs_file=file(LOCAL_FILE))) response = json.loads(resp.content) assert_equal(-1, response['status'], response) assert_true('User not_me does not have permissions' in response['data'], response) except AttributeError: # Seems like a Django bug. # StopFutureHandlers() does not seem to work in test mode as it continues to MemoryFileUploadHandler after perm issue and so fails. pass
def __init__(self, request): FileUploadHandler.__init__(self, request) self._file = None self._starttime = 0 self._activated = False self._destination = request.GET.get('dest', None) # GET param avoids infinite looping self.request = request # Need to directly modify FileUploadHandler.chunk_size FileUploadHandler.chunk_size = UPLOAD_CHUNK_SIZE.get()
def __init__(self, request): FileUploadHandler.__init__(self, request) self._file = None self._starttime = 0 self._activated = False self._destination = request.GET.get( 'dest', None) # GET param avoids infinite looping self.request = request # Need to directly modify FileUploadHandler.chunk_size FileUploadHandler.chunk_size = UPLOAD_CHUNK_SIZE.get()
def read_in_chunks(fs, path, offset): src_file_obj = fs.open(path, mode='r') gz = gzip.GzipFile(fileobj=src_file_obj, mode='rb') gz.read(offset) while True: chunk = gz.read(UPLOAD_CHUNK_SIZE.get()) if chunk: yield chunk else: src_file_obj.close() return src_file_obj.close()
def __init__(self, request): FileUploadHandler.__init__(self, request) self._file = None self._starttime = 0 self._activated = False self._destination = request.GET.get( 'dest', None) # GET param avoids infinite looping self.request = request fs = fsmanager.get_filesystem('default') fs.setuser(request.user.username) FileUploadHandler.chunk_size = fs.get_upload_chuck_size( self._destination) if self._destination else UPLOAD_CHUNK_SIZE.get( ) LOG.debug("Chunk size = %d" % FileUploadHandler.chunk_size)
def get_upload_chuck_size(self): from hadoop.conf import UPLOAD_CHUNK_SIZE # circular dependency return UPLOAD_CHUNK_SIZE.get()