def upload_bundle(self, path, info, worksheet_uuid, follow_symlinks): # URLs can be directly passed to the local client. if path and not isinstance(path, list) and path_util.path_is_url(path): return self.upload_bundle_url(path, info, worksheet_uuid, follow_symlinks) # First, zip path up (temporary local zip file). if path: zip_path, sub_path = zip_util.zip(path, follow_symlinks=follow_symlinks) # Copy it up to the server (temporary remote zip file) with open(zip_path, 'rb') as source: remote_file_uuid = self.open_temp_file() dest = RPCFileHandle(remote_file_uuid, self.proxy) # FileServer does not expose an API for forcibly flushing writes, so # we rely on closing the file to flush it. file_util.copy(source, dest, autoflush=False, print_status=True) dest.close() else: remote_file_uuid = None zip_path = None # Finally, install the zip file (this will be in charge of deleting that zip file). result = self.upload_bundle_zip(remote_file_uuid, info, worksheet_uuid, follow_symlinks) if zip_path: path_util.remove(zip_path) # Remove local zip return result
def upload_bundle(self, sources, follow_symlinks, exclude_patterns, git, unpack, remove_sources, info, worksheet_uuid, add_to_worksheet): """ See local_bundle_client.py for documentation on the usage. Strategy: 1) We copy the |sources| to a temporary directory on the server (streaming either a tar or tar.gz depending on whether compression is needed). 2) We politely ask the server to finish_upload_bundle (performs a LocalBundleClient.upload_bundle from the temporary directory). """ # URLs can be directly passed to the local client. if all(path_util.path_is_url(source) for source in sources): return self.upload_bundle_url(sources, follow_symlinks, exclude_patterns, git, unpack, remove_sources, info, worksheet_uuid, add_to_worksheet) # 1) Copy sources up to the server (temporary remote zip file) remote_file_uuids = [] for source in sources: remote_file_uuid = self.open_temp_file(zip_util.add_packed_suffix(os.path.basename(source))) remote_file_uuids.append(remote_file_uuid) dest_handle = RPCFileHandle(remote_file_uuid, self.proxy) if zip_util.path_is_archive(source): source_handle = open(source) else: source_handle = zip_util.open_packed_path(source, follow_symlinks, exclude_patterns) unpack = True # We packed it, so we have to unpack it status = 'Uploading %s%s to %s' % (source, ' ('+info['uuid']+')' if 'uuid' in info else '', self.address) # FileServer does not expose an API for forcibly flushing writes, so # we rely on closing the file to flush it. file_util.copy(source_handle, dest_handle, autoflush=False, print_status=status) dest_handle.close() # 2) Install upload (this call will be in charge of deleting the temporary file). result = self.finish_upload_bundle(remote_file_uuids, unpack, info, worksheet_uuid, add_to_worksheet) return result
def upload_bundle(self, path, info, worksheet_uuid, follow_symlinks, exclude_patterns, add_to_worksheet): # URLs can be directly passed to the local client. if path and not isinstance(path, list) and path_util.path_is_url(path): return self.upload_bundle_url(path, info, worksheet_uuid, follow_symlinks, exclude_patterns) # First, zip path up (temporary local zip file). if path: name = info['metadata']['name'] zip_path = zip_util.zip(path, follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns, file_name=name) # Copy it up to the server (temporary remote zip file) with open(zip_path, 'rb') as source: remote_file_uuid = self.open_temp_file() dest = RPCFileHandle(remote_file_uuid, self.proxy) # FileServer does not expose an API for forcibly flushing writes, so # we rely on closing the file to flush it. file_util.copy(source, dest, autoflush=False, print_status='Uploading %s%s to %s' % (zip_path, ' ('+info['uuid']+')' if 'uuid' in info else '', self.address)) dest.close() else: remote_file_uuid = None zip_path = None # Finally, install the zip file (this will be in charge of deleting that zip file). result = self.upload_bundle_zip(remote_file_uuid, info, worksheet_uuid, follow_symlinks, add_to_worksheet) if zip_path: path_util.remove(zip_path) # Remove local zip return result
def _interpret_source(self, source): is_url, is_local_path, is_fileobj = False, False, False if isinstance(source, basestring): if path_util.path_is_url(source): is_url = True else: is_local_path = True filename = os.path.basename(os.path.normpath(source)) else: is_fileobj = True filename = source[0] return is_url, is_local_path, is_fileobj, filename
def _interpret_source(self, source): is_url, is_local_path, is_fileobj = False, False, False if isinstance(source, str): if path_util.path_is_url(source): is_url = True else: is_local_path = True filename = os.path.basename(os.path.normpath(source)) else: is_fileobj = True filename = source[0] return is_url, is_local_path, is_fileobj, filename
def _interpret_source(self, source): is_url, is_local_path, is_fileobj = False, False, False if isinstance(source, str): if path_util.path_is_url(source): is_url = True source = source.rsplit( '?', 1)[0] # Remove query string from URL, if present else: is_local_path = True filename = os.path.basename(os.path.normpath(source)) else: is_fileobj = True filename = source[0] return is_url, is_local_path, is_fileobj, filename
def upload_bundle(self, sources, follow_symlinks, exclude_patterns, git, unpack, remove_sources, info, worksheet_uuid, add_to_worksheet): """ See local_bundle_client.py for documentation on the usage. Strategy: 1) We copy the |sources| to a temporary directory on the server (streaming either a tar or tar.gz depending on whether compression is needed). 2) We politely ask the server to finish_upload_bundle (performs a LocalBundleClient.upload_bundle from the temporary directory). """ # URLs can be directly passed to the local client. if all(path_util.path_is_url(source) for source in sources): return self.upload_bundle_url(sources, follow_symlinks, exclude_patterns, git, unpack, remove_sources, info, worksheet_uuid, add_to_worksheet) remote_file_uuids = [] try: # 1) Copy sources up to the server (temporary remote zip file) for source in sources: if zip_util.path_is_archive(source): source_handle = open(source) temp_file_name = os.path.basename(source) elif os.path.isdir(source): source_handle = tar_gzip_directory(source, follow_symlinks, exclude_patterns) temp_file_name = os.path.basename(source) + '.tar.gz' unpack = True # We packed it, so we have to unpack it else: resolved_source = source if follow_symlinks: resolved_source = os.path.realpath(source) if not os.path.exists(resolved_source): raise UsageError('Broken symlink') elif os.path.islink(source): raise UsageError('Not following symlinks.') source_handle = gzip_file(resolved_source) temp_file_name = os.path.basename(source) + '.gz' unpack = True # We packed it, so we have to unpack it remote_file_uuid = self.open_temp_file(temp_file_name) remote_file_uuids.append(remote_file_uuid) with closing(RPCFileHandle(remote_file_uuid, self.proxy)) as dest_handle: status = 'Uploading %s%s to %s' % (source, ' ('+info['uuid']+')' if 'uuid' in info else '', self.address) file_util.copy(source_handle, dest_handle, autoflush=False, print_status=status) # 2) Install upload (this call will be in charge of deleting the temporary file). return self.finish_upload_bundle(remote_file_uuids, unpack, info, worksheet_uuid, add_to_worksheet) except: for remote_file_uuid in remote_file_uuids: self.finalize_file(remote_file_uuid) raise
def _interpret_source(self, source: Source): """Interprets the given source. Args: source (Source): Source to interpret. Returns: (is_url, is_fileobj, filename) """ is_url, is_fileobj = False, False if isinstance(source, str): if path_util.path_is_url(source): is_url = True source = source.rsplit( '?', 1)[0] # Remove query string from URL, if present else: raise UsageError("Path must be a URL.") filename = os.path.basename(os.path.normpath(source)) else: is_fileobj = True filename = source[0] return is_url, is_fileobj, filename
def upload_bundle(self, path, info, worksheet_uuid, follow_symlinks, exclude_patterns, add_to_worksheet): # URLs can be directly passed to the local client. if path and not isinstance(path, list) and path_util.path_is_url(path): return self.upload_bundle_url(path, info, worksheet_uuid, follow_symlinks, exclude_patterns) # First, zip path up (temporary local zip file). if path: name = info['metadata']['name'] zip_path = zip_util.zip(path, follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns, file_name=name) # Copy it up to the server (temporary remote zip file) with open(zip_path, 'rb') as source: remote_file_uuid = self.open_temp_file() dest = RPCFileHandle(remote_file_uuid, self.proxy) # FileServer does not expose an API for forcibly flushing writes, so # we rely on closing the file to flush it. file_util.copy(source, dest, autoflush=False, print_status='Uploading %s%s to %s' % (zip_path, ' (' + info['uuid'] + ')' if 'uuid' in info else '', self.address)) dest.close() else: remote_file_uuid = None zip_path = None # Finally, install the zip file (this will be in charge of deleting that zip file). result = self.upload_bundle_zip(remote_file_uuid, info, worksheet_uuid, follow_symlinks, add_to_worksheet) if zip_path: path_util.remove(zip_path) # Remove local zip return result
def upload(self, sources, follow_symlinks, exclude_patterns, git, unpack, remove_sources): ''' |sources|: specifies the locations of the contents to upload. Each element is either a URL or a local path. |follow_symlinks|: for local path(s), whether to follow (resolve) symlinks |exclude_patterns|: for local path(s), don't upload these patterns (e.g., *.o) |git|: for URL, whether |source| is a git repo to clone. |unpack|: for each source in |sources|, whether to unpack it if it's an archive. |remove_sources|: remove |sources|. If |sources| contains one source, then the bundle contents will be that source. Otherwise, the bundle contents will be a directory with each of the sources. Exceptions: - If |git|, then each source is replaced with the result of running 'git clone |source|' - If |unpack| is True or a source is an archive (zip, tar.gz, etc.), then unpack the source. Install the contents of the directory at |source| into DATA_SUBDIRECTORY in a subdirectory named by a hash of the contents. Return a (data_hash, metadata) pair, where the metadata is a dict mapping keys to precomputed statistics about the new data directory. ''' to_delete = [] # Create temporary directory as a staging area and put everything there. temp_path = tempfile.mkdtemp('-bundle_store_upload') temp_subpaths = [] for source in sources: # Where to save |source| to (might change this value if we unpack). temp_subpath = os.path.join(temp_path, os.path.basename(source)) if remove_sources: to_delete.append(source) source_unpack = unpack and zip_util.path_is_archive(source) if path_util.path_is_url(source): # Download the URL. print_util.open_line('BundleStore.upload: downloading %s to %s' % (source, temp_path)) if git: file_util.git_clone(source, temp_subpath) else: file_util.download_url(source, temp_subpath, print_status=True) if source_unpack: zip_util.unpack(temp_subpath, zip_util.strip_archive_ext(temp_subpath)) path_util.remove(temp_subpath) temp_subpath = zip_util.strip_archive_ext(temp_subpath) print_util.clear_line() else: # Copy the local path. source_path = path_util.normalize(source) path_util.check_isvalid(source_path, 'upload') # Recursively copy the directory into a new BundleStore temp directory. print_util.open_line('BundleStore.upload: %s => %s' % (source_path, temp_subpath)) if source_unpack: zip_util.unpack(source_path, zip_util.strip_archive_ext(temp_subpath)) temp_subpath = zip_util.strip_archive_ext(temp_subpath) else: if remove_sources: path_util.rename(source_path, temp_subpath) else: path_util.copy(source_path, temp_subpath, follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns) print_util.clear_line() temp_subpaths.append(temp_subpath) # If exactly one source, then upload that directly. if len(temp_subpaths) == 1: to_delete.append(temp_path) temp_path = temp_subpaths[0] # Multiplex between uploading a directory and uploading a file here. # All other path_util calls will use these lists of directories and files. if os.path.isdir(temp_path): dirs_and_files = path_util.recursive_ls(temp_path) else: dirs_and_files = ([], [temp_path]) # Hash the contents of the temporary directory, and then if there is no # data with this hash value, move this directory into the data directory. print_util.open_line('BundleStore.upload: hashing %s' % temp_path) data_hash = '0x%s' % (path_util.hash_directory(temp_path, dirs_and_files),) print_util.clear_line() print_util.open_line('BundleStore.upload: computing size of %s' % temp_path) data_size = path_util.get_size(temp_path, dirs_and_files) print_util.clear_line() final_path = os.path.join(self.data, data_hash) if os.path.exists(final_path): # Already exists, just delete it path_util.remove(temp_path) else: print >>sys.stderr, 'BundleStore.upload: moving %s to %s' % (temp_path, final_path) path_util.rename(temp_path, final_path) # Delete paths. for path in to_delete: if os.path.exists(path): path_util.remove(path) # After this operation there should always be a directory at the final path. assert(os.path.lexists(final_path)), 'Uploaded to %s failed!' % (final_path,) return (data_hash, {'data_size': data_size})
def upload_bundle(self, sources, follow_symlinks, exclude_patterns, git, unpack, remove_sources, info, worksheet_uuid, add_to_worksheet): """ See local_bundle_client.py for documentation on the usage. Strategy: 1) We copy the |sources| to a temporary directory on the server (streaming either a tar or tar.gz depending on whether compression is needed). 2) We politely ask the server to finish_upload_bundle (performs a LocalBundleClient.upload_bundle from the temporary directory). """ # URLs can be directly passed to the local client. if all(path_util.path_is_url(source) for source in sources): return self.upload_bundle_url(sources, follow_symlinks, exclude_patterns, git, unpack, remove_sources, info, worksheet_uuid, add_to_worksheet) remote_file_uuids = [] try: # 1) Copy sources up to the server (temporary remote zip file) for source in sources: if zip_util.path_is_archive(source): source_handle = open(source) temp_file_name = os.path.basename(source) elif os.path.isdir(source): source_handle = tar_gzip_directory(source, follow_symlinks, exclude_patterns) temp_file_name = os.path.basename(source) + '.tar.gz' unpack = True # We packed it, so we have to unpack it else: resolved_source = source if follow_symlinks: resolved_source = os.path.realpath(source) if not os.path.exists(resolved_source): raise UsageError('Broken symlink') elif os.path.islink(source): raise UsageError('Not following symlinks.') source_handle = gzip_file(resolved_source) temp_file_name = os.path.basename(source) + '.gz' unpack = True # We packed it, so we have to unpack it remote_file_uuid = self.open_temp_file(temp_file_name) remote_file_uuids.append(remote_file_uuid) with closing(RPCFileHandle(remote_file_uuid, self.proxy)) as dest_handle: status = 'Uploading %s%s to %s' % ( source, ' (' + info['uuid'] + ')' if 'uuid' in info else '', self.address) file_util.copy(source_handle, dest_handle, autoflush=False, print_status=status) # 2) Install upload (this call will be in charge of deleting the temporary file). return self.finish_upload_bundle(remote_file_uuids, unpack, info, worksheet_uuid, add_to_worksheet) except: for remote_file_uuid in remote_file_uuids: self.finalize_file(remote_file_uuid) raise
def upload(self, sources, follow_symlinks, exclude_patterns, git, unpack, remove_sources, uuid): """ |sources|: specifies the locations of the contents to upload. Each element is either a URL or a local path. |follow_symlinks|: for local path(s), whether to follow (resolve) symlinks |exclude_patterns|: for local path(s), don't upload these patterns (e.g., *.o) |git|: for URL, whether |source| is a git repo to clone. |unpack|: for each source in |sources|, whether to unpack it if it's an archive. |remove_sources|: remove |sources|. If |sources| contains one source, then the bundle contents will be that source. Otherwise, the bundle contents will be a directory with each of the sources. Exceptions: - If |git|, then each source is replaced with the result of running 'git clone |source|' - If |unpack| is True or a source is an archive (zip, tar.gz, etc.), then unpack the source. Install the contents of the directory at |source| into DATA_SUBDIRECTORY in a subdirectory named by a hash of the contents. Return a (data_hash, metadata) pair, where the metadata is a dict mapping keys to precomputed statistics about the new data directory. """ to_delete = [] # If just a single file, set the final path to be equal to that file single_path = len(sources) == 1 # Determine which disk this will go on disk_choice = self.ring.get_node(uuid) final_path = os.path.join(self.partitions, disk_choice, self.DATA_SUBDIRECTORY, uuid) if os.path.exists(final_path): raise UsageError('Path %s already present in bundle store' % final_path) # Only make if not there elif not single_path: path_util.make_directory(final_path) # Paths to resources subpaths = [] for source in sources: # Where to save |source| to (might change this value if we unpack). if not single_path: subpath = os.path.join(final_path, os.path.basename(source)) else: subpath = final_path if remove_sources: to_delete.append(source) source_unpack = unpack and zip_util.path_is_archive(source) if source_unpack and single_path: # Load the file into the bundle store under the given path subpath += zip_util.get_archive_ext(source) if path_util.path_is_url(source): # Download the URL. print_util.open_line('BundleStore.upload: downloading %s to %s' % (source, subpath)) if git: file_util.git_clone(source, subpath) else: file_util.download_url(source, subpath, print_status=True) if source_unpack: zip_util.unpack(subpath, zip_util.strip_archive_ext(subpath)) path_util.remove(subpath) subpath = zip_util.strip_archive_ext(subpath) print_util.clear_line() else: # Copy the local path. source_path = path_util.normalize(source) path_util.check_isvalid(source_path, 'upload') # Recursively copy the directory into the BundleStore print_util.open_line('BundleStore.upload: %s => %s' % (source_path, subpath)) if source_unpack: zip_util.unpack(source_path, zip_util.strip_archive_ext(subpath)) subpath = zip_util.strip_archive_ext(subpath) else: if remove_sources: path_util.rename(source_path, subpath) else: path_util.copy(source_path, subpath, follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns) print_util.clear_line() subpaths.append(subpath) dirs_and_files = None if os.path.isdir(final_path): dirs_and_files = path_util.recursive_ls(final_path) else: dirs_and_files = [], [final_path] # Hash the contents of the bundle directory. Update the data_hash attribute # for the bundle print_util.open_line('BundleStore.upload: hashing %s' % final_path) data_hash = '0x%s' % (path_util.hash_directory(final_path, dirs_and_files)) print_util.clear_line() print_util.open_line('BundleStore.upload: computing size of %s' % final_path) data_size = path_util.get_size(final_path, dirs_and_files) print_util.clear_line() # Delete paths. for path in to_delete: if os.path.exists(path): path_util.remove(path) # After this operation there should always be a directory at the final path. assert (os.path.lexists(final_path)), 'Uploaded to %s failed!' % (final_path,) return (data_hash, {'data_size': data_size})
def upload(self, path, follow_symlinks, exclude_patterns): ''' Copy the contents of the directory at |path| into the data subdirectory, in a subfolder named by a hash of the contents of the new data directory. If |path| is in a temporary directory, then we just move it. Return a (data_hash, metadata) pair, where the metadata is a dict mapping keys to precomputed statistics about the new data directory. ''' # Create temporary directory as a staging area. # If |path| is already temporary, then we use that directly # (with the understanding that |path| will be moved) if not isinstance(path, list) and os.path.realpath(path).startswith( os.path.realpath(self.temp)): temp_path = path else: temp_path = os.path.join(self.temp, uuid.uuid4().hex) if not isinstance(path, list) and path_util.path_is_url(path): # Have to be careful. Want to make sure if we're fetching a URL # that points to a file, we are allowing this. if path.startswith('file://'): path_suffix = path[7:] if os.path.islink(path_suffix): raise UsageError('Not allowed to upload symlink %s' % path_suffix) if not any( path_suffix.startswith(f) for f in self.direct_upload_paths): raise UsageError( 'Not allowed to upload %s (only %s allowed)' % (path_suffix, self.direct_upload_paths)) # Download |path| if it is a URL. print >> sys.stderr, 'BundleStore.upload: downloading %s to %s' % ( path, temp_path) file_util.download_url(path, temp_path, print_status=True) elif path != temp_path: # Copy |path| into the temp_path. if isinstance(path, list): absolute_path = [path_util.normalize(p) for p in path] for p in absolute_path: path_util.check_isvalid(p, 'upload') else: absolute_path = path_util.normalize(path) path_util.check_isvalid(absolute_path, 'upload') # Recursively copy the directory into a new BundleStore temp directory. print_util.open_line('BundleStore.upload: copying %s to %s' % (absolute_path, temp_path)) path_util.copy(absolute_path, temp_path, follow_symlinks=follow_symlinks, exclude_patterns=exclude_patterns) print_util.clear_line() # Multiplex between uploading a directory and uploading a file here. # All other path_util calls will use these lists of directories and files. if os.path.isdir(temp_path): dirs_and_files = path_util.recursive_ls(temp_path) else: dirs_and_files = ([], [temp_path]) # Hash the contents of the temporary directory, and then if there is no # data with this hash value, move this directory into the data directory. print_util.open_line('BundleStore.upload: hashing %s' % temp_path) data_hash = '0x%s' % (path_util.hash_directory(temp_path, dirs_and_files), ) print_util.clear_line() print_util.open_line('BundleStore.upload: computing size of %s' % temp_path) data_size = path_util.get_size(temp_path, dirs_and_files) print_util.clear_line() final_path = os.path.join(self.data, data_hash) final_path_exists = False try: # If data_hash already exists, then we don't need to move it over. os.utime(final_path, None) final_path_exists = True except OSError, e: if e.errno == errno.ENOENT: print >> sys.stderr, 'BundleStore.upload: moving %s to %s' % ( temp_path, final_path) path_util.rename(temp_path, final_path) else: raise
def upload(self, path, follow_symlinks): """ Copy the contents of the directory at |path| into the data subdirectory, in a subfolder named by a hash of the contents of the new data directory. If |path| is in a temporary directory, then we just move it. Return a (data_hash, metadata) pair, where the metadata is a dict mapping keys to precomputed statistics about the new data directory. """ # Create temporary directory as a staging area. # If |path| is already temporary, then we use that directly # (with the understanding that |path| will be moved) if not isinstance(path, list) and os.path.realpath(path).startswith(os.path.realpath(self.temp)): temp_path = path else: temp_path = os.path.join(self.temp, uuid.uuid4().hex) if not isinstance(path, list) and path_util.path_is_url(path): # Have to be careful. Want to make sure if we're fetching a URL # that points to a file, we are allowing this. if path.startswith("file://"): path_suffix = path[7:] if os.path.islink(path_suffix): raise UsageError("Not allowed to upload symlink %s" % path_suffix) if not any(path_suffix.startswith(f) for f in self.direct_upload_paths): raise UsageError( "Not allowed to upload %s (only %s allowed)" % (path_suffix, self.direct_upload_paths) ) # Download |path| if it is a URL. print >>sys.stderr, "BundleStore.upload: downloading %s to %s" % (path, temp_path) file_util.download_url(path, temp_path, print_status=True) elif path != temp_path: # Copy |path| into the temp_path. if isinstance(path, list): absolute_path = [path_util.normalize(p) for p in path] for p in absolute_path: path_util.check_isvalid(p, "upload") else: absolute_path = path_util.normalize(path) path_util.check_isvalid(absolute_path, "upload") # Recursively copy the directory into a new BundleStore temp directory. print >>sys.stderr, "BundleStore.upload: copying %s to %s" % (absolute_path, temp_path) path_util.copy(absolute_path, temp_path, follow_symlinks=follow_symlinks) # Multiplex between uploading a directory and uploading a file here. # All other path_util calls will use these lists of directories and files. if os.path.isdir(temp_path): dirs_and_files = path_util.recursive_ls(temp_path) else: dirs_and_files = ([], [temp_path]) # Hash the contents of the temporary directory, and then if there is no # data with this hash value, move this directory into the data directory. print >>sys.stderr, "BundleStore.upload: hashing %s" % (temp_path) data_hash = "0x%s" % (path_util.hash_directory(temp_path, dirs_and_files),) data_size = path_util.get_size(temp_path, dirs_and_files) final_path = os.path.join(self.data, data_hash) final_path_exists = False try: # If data_hash already exists, then we don't need to move it over. os.utime(final_path, None) final_path_exists = True except OSError, e: if e.errno == errno.ENOENT: print >>sys.stderr, "BundleStore.upload: moving %s to %s" % (temp_path, final_path) path_util.rename(temp_path, final_path) else: raise