def copy(source_path: str, dest_path: str, follow_symlinks: Optional[bool] = False): """ Copy |source_path| to |dest_path|. Assume dest_path doesn't exist. |follow_symlinks|: whether to follow symlinks Note: this only works in Linux. """ if os.path.exists(dest_path): raise path_error('already exists', dest_path) if source_path == '/dev/stdin': with open(dest_path, 'wb') as dest: file_util.copy( sys.stdin, dest, autoflush=False, print_status='Copying %s to %s' % (source_path, dest_path), ) else: if not follow_symlinks and os.path.islink(source_path): raise path_error('not following symlinks', source_path) if not os.path.exists(source_path): raise path_error('does not exist', source_path) command = [ 'rsync', '-pr%s' % ('L' if follow_symlinks else 'l'), source_path + ('/' if not os.path.islink(source_path) and os.path.isdir(source_path) else ''), dest_path, ] if subprocess.call(command) != 0: raise path_error('Unable to copy %s to' % source_path, dest_path)
def upload_bundle(self, source_file, bundle_type, worksheet_uuid): """ Upload |source_file| (a stream) to |worksheet_uuid|. """ # Construct info for creating the bundle. bundle_subclass = get_bundle_subclass( bundle_type) # program or data metadata = metadata_util.fill_missing_metadata( bundle_subclass, {}, initial_metadata={ 'name': source_file.name, 'description': 'Upload ' + source_file.name }) info = {'bundle_type': bundle_type, 'metadata': metadata} # Upload it by creating a file handle and copying source_file to it (see RemoteBundleClient.upload_bundle in the CLI). remote_file_uuid = self.client.open_temp_file() dest = RPCFileHandle(remote_file_uuid, self.client.proxy) file_util.copy(source_file, dest, autoflush=False, print_status='Uploading %s' % info['metadata']['name']) dest.close() # Then tell the client that the uploaded file handle is there. new_bundle_uuid = self.client.upload_bundle_zip( remote_file_uuid, info, worksheet_uuid, False, True) return new_bundle_uuid
def cat(path, out): ''' Copy data from the file at the given path to the file descriptor |out|. ''' if not os.path.isfile(path): return None with open(path, 'rb') as file_handle: file_util.copy(file_handle, out)
def cat(path): ''' Copy data from the file at the given path to stdout. ''' check_isfile(path, 'cat') with open(path, 'rb') as file_handle: file_util.copy(file_handle, sys.stdout)
def copy(source_path, dest_path, follow_symlinks=False, exclude_patterns=None): """ Copy |source_path| to |dest_path|. Assume dest_path doesn't exist. |follow_symlinks|: whether to follow symlinks |exclude_patterns|: patterns to not copy Note: this only works in Linux. """ if os.path.exists(dest_path): raise path_error('already exists', dest_path) if source_path == '/dev/stdin': with open(dest_path, 'wb') as dest: file_util.copy(sys.stdin, dest, autoflush=False, print_status='Copying %s to %s' % (source_path, dest_path)) else: if not follow_symlinks and os.path.islink(source_path): raise path_error('not following symlinks', source_path) if not os.path.exists(source_path): raise path_error('does not exist', source_path) command = [ 'rsync', '-pr%s' % ('L' if follow_symlinks else 'l'), source_path + ('/' if not os.path.islink(source_path) and os.path.isdir(source_path) else ''), dest_path, ] if exclude_patterns is not None: for pattern in exclude_patterns: command.extend(['--exclude', pattern]) if subprocess.call(command) != 0: raise path_error('Unable to copy %s to' % source_path, dest_path)
def copy(source_path, dest_path, follow_symlinks=False, exclude_patterns=[]): ''' source_path can be a list of files, in which case we need to create a directory first. Assume dest_path doesn't exist. Don't copy things that match |exclude_patterns|. ''' # Note: this only works in Linux. if os.path.exists(dest_path): raise path_error('already exists', dest_path) if isinstance(source_path, list): os.mkdir(dest_path) source = ' '.join(source_path) else: source = source_path if source_path == '/dev/stdin': with open(dest_path, 'wb') as dest: file_util.copy(sys.stdin, dest, autoflush=False, print_status='Copying %s to %s' % (source_path, dest_path)) else: #command = "cp -pR%s %s %s" % (('L' if follow_symlinks else 'P'), source, dest_path) command = "rsync -pr%s %s%s %s" % ( ('L' if follow_symlinks else 'l'), source, '/' if os.path.isdir(source) else '', dest_path) if exclude_patterns: for pattern in exclude_patterns: command += ' --exclude "' + pattern + '"' #print command if os.system(command) != 0: raise path_error('Unable to copy %s to' % source_path, dest_path)
def upload_bundle(self, source_file, bundle_type, worksheet_uuid): """ Upload |source_file| (a stream) to |worksheet_uuid|. """ # Construct info for creating the bundle. bundle_subclass = get_bundle_subclass(bundle_type) # program or data metadata = metadata_util.fill_missing_metadata(bundle_subclass, {}, initial_metadata={'name': source_file.filename, 'description': 'Upload ' + source_file.filename}) info = {'bundle_type': bundle_type, 'metadata': metadata} # Upload it by creating a file handle and copying source_file to it (see RemoteBundleClient.upload_bundle in the CLI). remote_file_uuid = self.client.open_temp_file(metadata['name']) try: with closing(RPCFileHandle(remote_file_uuid, self.client.proxy)) as dest: file_util.copy(source_file.file, dest, autoflush=False, print_status='Uploading %s' % metadata['name']) pack = False # For now, always unpack (note: do this after set remote_file_uuid, which needs the extension) if not pack and zip_util.path_is_archive(metadata['name']): metadata['name'] = zip_util.strip_archive_ext(metadata['name']) # Then tell the client that the uploaded file handle is there. new_bundle_uuid = self.client.finish_upload_bundle( [remote_file_uuid], not pack, # unpack info, worksheet_uuid, True) # add_to_worksheet except: self.client.finalize_file(remote_file_uuid) raise return new_bundle_uuid
def copy(source_path, dest_path, follow_symlinks=False, exclude_patterns=[]): ''' source_path can be a list of files, in which case we need to create a directory first. Assume dest_path doesn't exist. Don't copy things that match |exclude_patterns|. ''' # Note: this only works in Linux. if os.path.exists(dest_path): raise path_error('already exists', dest_path) if isinstance(source_path, list): os.mkdir(dest_path) source = ' '.join(source_path) else: source = source_path if source_path == '/dev/stdin': with open(dest_path, 'wb') as dest: file_util.copy(sys.stdin, dest, autoflush=False, print_status='Copying %s to %s' % (source_path, dest_path)) else: #command = "cp -pR%s %s %s" % (('L' if follow_symlinks else 'P'), source, dest_path) command = "rsync -pr%s %s%s %s" % (('L' if follow_symlinks else 'l'), source, '/' if os.path.isdir(source) else '', dest_path) if exclude_patterns: for pattern in exclude_patterns: command += ' --exclude "' + pattern + '"' #print command if os.system(command) != 0: raise path_error('Unable to copy %s to' % source_path, dest_path)
def upload_bundle(self, path, info, worksheet_uuid, follow_symlinks, exclude_patterns, add_to_worksheet): # URLs can be directly passed to the local client. if path and not isinstance(path, list) and path_util.path_is_url(path): return self.upload_bundle_url(path, info, worksheet_uuid, follow_symlinks, exclude_patterns) # First, zip path up (temporary local zip file). if path: name = info['metadata']['name'] zip_path = zip_util.zip(path, follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns, file_name=name) # Copy it up to the server (temporary remote zip file) with open(zip_path, 'rb') as source: remote_file_uuid = self.open_temp_file() dest = RPCFileHandle(remote_file_uuid, self.proxy) # FileServer does not expose an API for forcibly flushing writes, so # we rely on closing the file to flush it. file_util.copy(source, dest, autoflush=False, print_status='Uploading %s%s to %s' % (zip_path, ' ('+info['uuid']+')' if 'uuid' in info else '', self.address)) dest.close() else: remote_file_uuid = None zip_path = None # Finally, install the zip file (this will be in charge of deleting that zip file). result = self.upload_bundle_zip(remote_file_uuid, info, worksheet_uuid, follow_symlinks, add_to_worksheet) if zip_path: path_util.remove(zip_path) # Remove local zip return result
def upload_bundle(self, path, info, worksheet_uuid, follow_symlinks): # URLs can be directly passed to the local client. if path and not isinstance(path, list) and path_util.path_is_url(path): return self.upload_bundle_url(path, info, worksheet_uuid, follow_symlinks) # First, zip path up (temporary local zip file). if path: zip_path, sub_path = zip_util.zip(path, follow_symlinks=follow_symlinks) # Copy it up to the server (temporary remote zip file) with open(zip_path, 'rb') as source: remote_file_uuid = self.open_temp_file() dest = RPCFileHandle(remote_file_uuid, self.proxy) # FileServer does not expose an API for forcibly flushing writes, so # we rely on closing the file to flush it. file_util.copy(source, dest, autoflush=False, print_status=True) dest.close() else: remote_file_uuid = None zip_path = None # Finally, install the zip file (this will be in charge of deleting that zip file). result = self.upload_bundle_zip(remote_file_uuid, info, worksheet_uuid, follow_symlinks) if zip_path: path_util.remove(zip_path) # Remove local zip return result
def cat(path, out): ''' Copy data from the file at the given path to the file descriptor |out|. ''' check_isfile(path, 'cat') with open(path, 'rb') as file_handle: file_util.copy(file_handle, out)
def copy_bundle(self, source_bundle_uuid, info, dest_client, dest_worksheet_uuid, add_to_worksheet): ''' A streamlined combination of download_target and upload_bundle. Copy from self to dest_client. ''' # Open source source_file_uuid, name = self.open_target_zip((source_bundle_uuid, ''), False) source = RPCFileHandle(source_file_uuid, self.proxy) # Open target dest_file_uuid = dest_client.open_temp_file() dest = RPCFileHandle(dest_file_uuid, dest_client.proxy) # Copy contents over file_util.copy(source, dest, autoflush=False, print_status='Copying %s from %s to %s' % (source_bundle_uuid, self.address, dest_client.address)) dest.close() # Finally, install the zip file (this will be in charge of deleting that zip file). result = dest_client.upload_bundle_zip(dest_file_uuid, info, dest_worksheet_uuid, False, add_to_worksheet) self.finalize_file(source_file_uuid, True) # Delete remote zip file return result
def upload_bundle(self, sources, follow_symlinks, exclude_patterns, git, unpack, remove_sources, info, worksheet_uuid, add_to_worksheet): """ See local_bundle_client.py for documentation on the usage. Strategy: 1) We copy the |sources| to a temporary directory on the server (streaming either a tar or tar.gz depending on whether compression is needed). 2) We politely ask the server to finish_upload_bundle (performs a LocalBundleClient.upload_bundle from the temporary directory). """ # URLs can be directly passed to the local client. if all(path_util.path_is_url(source) for source in sources): return self.upload_bundle_url(sources, follow_symlinks, exclude_patterns, git, unpack, remove_sources, info, worksheet_uuid, add_to_worksheet) # 1) Copy sources up to the server (temporary remote zip file) remote_file_uuids = [] for source in sources: remote_file_uuid = self.open_temp_file(zip_util.add_packed_suffix(os.path.basename(source))) remote_file_uuids.append(remote_file_uuid) dest_handle = RPCFileHandle(remote_file_uuid, self.proxy) if zip_util.path_is_archive(source): source_handle = open(source) else: source_handle = zip_util.open_packed_path(source, follow_symlinks, exclude_patterns) unpack = True # We packed it, so we have to unpack it status = 'Uploading %s%s to %s' % (source, ' ('+info['uuid']+')' if 'uuid' in info else '', self.address) # FileServer does not expose an API for forcibly flushing writes, so # we rely on closing the file to flush it. file_util.copy(source_handle, dest_handle, autoflush=False, print_status=status) dest_handle.close() # 2) Install upload (this call will be in charge of deleting the temporary file). result = self.finish_upload_bundle(remote_file_uuids, unpack, info, worksheet_uuid, add_to_worksheet) return result
def upload(self, bundle_type, path, metadata, worksheet_uuid=None): zip_path = zip_util.zip(path) with open(zip_path, 'rb') as source: remote_file_uuid = self.open_temp_file() dest = RPCFileHandle(remote_file_uuid, self.proxy) with contextlib.closing(dest): # FileServer does not expose an API for forcibly flushing writes, so # we rely on closing the file to flush it. file_util.copy(source, dest, autoflush=False) return self.upload_zip(bundle_type, remote_file_uuid, metadata, worksheet_uuid)
def upload_bundle(self, sources, follow_symlinks, exclude_patterns, git, unpack, remove_sources, info, worksheet_uuid, add_to_worksheet): """ See local_bundle_client.py for documentation on the usage. Strategy: 1) We copy the |sources| to a temporary directory on the server (streaming either a tar or tar.gz depending on whether compression is needed). 2) We politely ask the server to finish_upload_bundle (performs a LocalBundleClient.upload_bundle from the temporary directory). """ # URLs can be directly passed to the local client. if all(path_util.path_is_url(source) for source in sources): return self.upload_bundle_url(sources, follow_symlinks, exclude_patterns, git, unpack, remove_sources, info, worksheet_uuid, add_to_worksheet) remote_file_uuids = [] try: # 1) Copy sources up to the server (temporary remote zip file) for source in sources: if zip_util.path_is_archive(source): source_handle = open(source) temp_file_name = os.path.basename(source) elif os.path.isdir(source): source_handle = tar_gzip_directory(source, follow_symlinks, exclude_patterns) temp_file_name = os.path.basename(source) + '.tar.gz' unpack = True # We packed it, so we have to unpack it else: resolved_source = source if follow_symlinks: resolved_source = os.path.realpath(source) if not os.path.exists(resolved_source): raise UsageError('Broken symlink') elif os.path.islink(source): raise UsageError('Not following symlinks.') source_handle = gzip_file(resolved_source) temp_file_name = os.path.basename(source) + '.gz' unpack = True # We packed it, so we have to unpack it remote_file_uuid = self.open_temp_file(temp_file_name) remote_file_uuids.append(remote_file_uuid) with closing(RPCFileHandle(remote_file_uuid, self.proxy)) as dest_handle: status = 'Uploading %s%s to %s' % (source, ' ('+info['uuid']+')' if 'uuid' in info else '', self.address) file_util.copy(source_handle, dest_handle, autoflush=False, print_status=status) # 2) Install upload (this call will be in charge of deleting the temporary file). return self.finish_upload_bundle(remote_file_uuids, unpack, info, worksheet_uuid, add_to_worksheet) except: for remote_file_uuid in remote_file_uuids: self.finalize_file(remote_file_uuid) raise
def upload_bundle(self, source_file, bundle_type, worksheet_uuid): ''' Upload |source_file| (a stream) to |worksheet_uuid|. ''' # Construct info for creating the bundle. bundle_subclass = get_bundle_subclass(bundle_type) # program or data metadata = metadata_util.fill_missing_metadata(bundle_subclass, {}, initial_metadata={'name': source_file.name, 'description': 'Upload ' + source_file.name}) info = {'bundle_type': bundle_type, 'metadata': metadata} # Upload it by creating a file handle and copying source_file to it (see RemoteBundleClient.upload_bundle in the CLI). remote_file_uuid = self.client.open_temp_file() dest = RPCFileHandle(remote_file_uuid, self.client.proxy) file_util.copy(source_file, dest, autoflush=False, print_status='Uploading %s' % info['metadata']['name']) dest.close() # Then tell the client that the uploaded file handle is there. new_bundle_uuid = self.client.upload_bundle_zip(remote_file_uuid, info, worksheet_uuid, False, True) return new_bundle_uuid
def download_target(self, target, follow_symlinks, return_zip=False): # Create remote zip file, download to local zip file (fd, zip_path) = tempfile.mkstemp(dir=tempfile.gettempdir()) os.close(fd) source_uuid, name = self.open_target_zip(target, follow_symlinks) source = RPCFileHandle(source_uuid, self.proxy) with open(zip_path, 'wb') as dest: with contextlib.closing(source): file_util.copy(source, dest, autoflush=False, print_status='Downloading %s on %s to %s' % ('/'.join(target), self.address, zip_path)) self.finalize_file(source_uuid, True) # Delete remote zip file # Unpack the local zip file container_path = tempfile.mkdtemp() if return_zip: return zip_path, container_path result_path = zip_util.unzip(zip_path, container_path, name) path_util.remove(zip_path) # Delete local zip file return (result_path, container_path)
def unpack(source, dest_path): """ Unpack the archive |source_path| to |dest_path|. Note: |source| can be a file handle or a path. """ # Unpack to a temporary location. # TODO: guard against zip bombs. Put a maximum limit and enforce it here. # In the future, we probably don't want to be unpacking things all over the place. tmp_path = tempfile.mkdtemp('-zip_util.unpack') if isinstance(source, basestring): source_path = source if source_path.endswith('tar.gz') or source_path.endswith('tgz'): exitcode = subprocess.call(['tar', 'xfz', source_path, '-C', tmp_path]) elif source_path.endswith('tar.bz2'): exitcode = subprocess.call(['tar', 'xfj', source_path, '-C', tmp_path]) elif source_path.endswith('zip'): exitcode = subprocess.call(['unzip', '-q', source_path, '-d', tmp_path]) elif source_path.endswith('.gz'): with open(os.path.join(tmp_path, os.path.basename(strip_archive_ext(source_path))), 'wb') as f: exitcode = subprocess.call(['gunzip', '-q', '-c', source_path], stdout=f) else: raise UsageError('Not an archive: %s' % source_path) if exitcode != 0: raise UsageError('Error unpacking %s' % source_path) else: # File handle, stream the contents! source_handle = source proc = subprocess.Popen(['tar', 'xfz', '-', '-C', tmp_path], stdin=subprocess.PIPE) file_util.copy(source_handle, proc.stdin, print_status='Downloading and unpacking to %s' % tmp_path) proc.stdin.close() proc.wait() # Move files into the right place. # If archive only contains one path, then use that. files = [f for f in os.listdir(tmp_path) if not ignore_file(f)] if len(files) == 1: path_util.rename(os.path.join(tmp_path, files[0]), dest_path) path_util.remove(tmp_path) else: path_util.rename(tmp_path, dest_path)
def upload_bundle(self, source_file, bundle_type, worksheet_uuid): """ Upload |source_file| (a stream) to |worksheet_uuid|. """ # Construct info for creating the bundle. bundle_subclass = get_bundle_subclass(bundle_type) # program or data metadata = metadata_util.fill_missing_metadata( bundle_subclass, {}, initial_metadata={ 'name': source_file.filename, 'description': 'Upload ' + source_file.filename }) info = {'bundle_type': bundle_type, 'metadata': metadata} # Upload it by creating a file handle and copying source_file to it (see RemoteBundleClient.upload_bundle in the CLI). remote_file_uuid = self.client.open_temp_file(metadata['name']) try: with closing(RPCFileHandle(remote_file_uuid, self.client.proxy)) as dest: file_util.copy(source_file.file, dest, autoflush=False, print_status='Uploading %s' % metadata['name']) pack = False # For now, always unpack (note: do this after set remote_file_uuid, which needs the extension) if not pack and zip_util.path_is_archive(metadata['name']): metadata['name'] = zip_util.strip_archive_ext(metadata['name']) # Then tell the client that the uploaded file handle is there. new_bundle_uuid = self.client.finish_upload_bundle( [remote_file_uuid], not pack, # unpack info, worksheet_uuid, True) # add_to_worksheet except: self.client.finalize_file(remote_file_uuid) raise return new_bundle_uuid
def upload_bundle(self, path, info, worksheet_uuid, follow_symlinks, exclude_patterns, add_to_worksheet): # URLs can be directly passed to the local client. if path and not isinstance(path, list) and path_util.path_is_url(path): return self.upload_bundle_url(path, info, worksheet_uuid, follow_symlinks, exclude_patterns) # First, zip path up (temporary local zip file). if path: name = info['metadata']['name'] zip_path = zip_util.zip(path, follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns, file_name=name) # Copy it up to the server (temporary remote zip file) with open(zip_path, 'rb') as source: remote_file_uuid = self.open_temp_file() dest = RPCFileHandle(remote_file_uuid, self.proxy) # FileServer does not expose an API for forcibly flushing writes, so # we rely on closing the file to flush it. file_util.copy(source, dest, autoflush=False, print_status='Uploading %s%s to %s' % (zip_path, ' (' + info['uuid'] + ')' if 'uuid' in info else '', self.address)) dest.close() else: remote_file_uuid = None zip_path = None # Finally, install the zip file (this will be in charge of deleting that zip file). result = self.upload_bundle_zip(remote_file_uuid, info, worksheet_uuid, follow_symlinks, add_to_worksheet) if zip_path: path_util.remove(zip_path) # Remove local zip return result
def copy(source_path, dest_path, follow_symlinks=False, exclude_names=[]): ''' source_path can be a list of files, in which case we need to create a directory first. Assume dest_path doesn't exist. ''' # TODO: implement exclude_names # TODO: copytree doesn't preserve permissions, so we're making a system # call (only works in Linux). if os.path.exists(dest_path): raise path_error('already exists', dest_path) if isinstance(source_path, list): os.mkdir(dest_path) source = ' '.join(source_path) else: source = source_path if source_path == '/dev/stdin': with open(dest_path, 'wb') as dest: file_util.copy(sys.stdin, dest, autoflush=False, print_status=True) else: command = "cp -pR%s %s %s" % (('L' if follow_symlinks else 'P'), source, dest_path) if os.system(command) != 0: raise path_error('Unable to copy %s to' % source_path, dest_path)
def cat(self, target): remote_file_uuid = self.open_target(target) source = RPCFileHandle(remote_file_uuid, self.proxy) with contextlib.closing(source): file_util.copy(source, sys.stdout)
def upload_bundle(self, sources, follow_symlinks, exclude_patterns, git, unpack, remove_sources, info, worksheet_uuid, add_to_worksheet): """ See local_bundle_client.py for documentation on the usage. Strategy: 1) We copy the |sources| to a temporary directory on the server (streaming either a tar or tar.gz depending on whether compression is needed). 2) We politely ask the server to finish_upload_bundle (performs a LocalBundleClient.upload_bundle from the temporary directory). """ # URLs can be directly passed to the local client. if all(path_util.path_is_url(source) for source in sources): return self.upload_bundle_url(sources, follow_symlinks, exclude_patterns, git, unpack, remove_sources, info, worksheet_uuid, add_to_worksheet) remote_file_uuids = [] try: # 1) Copy sources up to the server (temporary remote zip file) for source in sources: if zip_util.path_is_archive(source): source_handle = open(source) temp_file_name = os.path.basename(source) elif os.path.isdir(source): source_handle = tar_gzip_directory(source, follow_symlinks, exclude_patterns) temp_file_name = os.path.basename(source) + '.tar.gz' unpack = True # We packed it, so we have to unpack it else: resolved_source = source if follow_symlinks: resolved_source = os.path.realpath(source) if not os.path.exists(resolved_source): raise UsageError('Broken symlink') elif os.path.islink(source): raise UsageError('Not following symlinks.') source_handle = gzip_file(resolved_source) temp_file_name = os.path.basename(source) + '.gz' unpack = True # We packed it, so we have to unpack it remote_file_uuid = self.open_temp_file(temp_file_name) remote_file_uuids.append(remote_file_uuid) with closing(RPCFileHandle(remote_file_uuid, self.proxy)) as dest_handle: status = 'Uploading %s%s to %s' % ( source, ' (' + info['uuid'] + ')' if 'uuid' in info else '', self.address) file_util.copy(source_handle, dest_handle, autoflush=False, print_status=status) # 2) Install upload (this call will be in charge of deleting the temporary file). return self.finish_upload_bundle(remote_file_uuids, unpack, info, worksheet_uuid, add_to_worksheet) except: for remote_file_uuid in remote_file_uuids: self.finalize_file(remote_file_uuid) raise
def cat(self, target): source = self.open_file(target) with contextlib.closing(source): file_util.copy(source, sys.stdout)
def cat_target(self, target, out): source = self.open_target_handle(target) if not source: return file_util.copy(source, out) self.close_target_handle(source)