class DeleteOperation(Operation): """Delete a file or a directory. Please note that, for obvious reasons, failures cannot be undone. This operation will stop at the first encountered error""" __slots__ = "_path" # the path to delete name = "DeleteOperation" def __init__(self, transaction, path): super(DeleteOperation, self).__init__(transaction) self._path = Path(path) def apply(self): if self._dry_run(): return if self._path.isdir(): self.log.info("Deleting directory %s", self._path) self._path.rmtree() else: self.log.info("Deleting file %s", self._path) self._path.remove() # END perform actual removal def rollback(self): self.log.info("Deletion of filesystem items cannot be rolled back")
class CreateFSItemOperation(FSOperationBase): """Create a directory or file with the given access permissions and ownership. In case of a file, you may specify an initial content. For this operation to succeed, the destination path must not exist yet!""" __slots__ = ("_path", "_content", "_mode", "_uid", "_gid") name = "CreateFSItem" def __init__(self, transaction, path, initial_file_content=None, mode=None, uid=None, gid=None): """Initialize the operation with a path to create. If initial_file_content is set, to a string, it will be written in binary mode to a file. If it is unset, a directory will be created. Non-existing parent-directories will be created. After creation, the mode will be set if not None, and uid and gid will be set as well to the given numerical ID if of of them is not None""" super(CreateFSItemOperation, self).__init__(transaction) self._assert_posix() self._path = Path(path) self._content = initial_file_content self._mode = mode self._uid = uid self._gid = gid def apply(self): if self._content and self._path.isdir() or not self._content and self._path.isfile(): raise AssertionError( "Cannot create item of type directory or file as the an equally named item of different type exists") # END sanity check if self._dry_run(): return if self._path.exists(): return # end ignore existing items of the same type # we don't do it the most efficient way, as we could specify certain things in # at creation. For now, we don't do it though as it shouldn't matter if self._content: self.log.info("creating file %s", self._path) self._path.write_bytes(self._content) else: self.log.info("creating directory %s", self._path) self._path.makedirs() # END initial creation self._operation_performed = True if self._mode is not None: self._path.chmod(self._mode) # END handle mode self.set_user_group(self._path, self._gid, self._uid) def rollback(self): try: if not self._operation_performed or not self._path.exists(): return if self._content: self.log.info("Removing file %s", self._path) self._path.remove() else: self.log.info("Removing single directory %s", self._path) self._path.rmdir() # END handle removal, safely as we don't recursively delete anything finally: self._reset_state()
class TreeRoot(dict): """A python representation of a directory tree It keeps a tree-like structure in a simple dict, where each tree entry is associated with a tuple of meta-data. This property makes it comparable and easily diffable. As the value of a directory entry is another dict with items, we don't have stat information on a dictionary itself. File entries will contain the stat structure obtained by os.stat Additionally, a tree is able to find package root paths, and allows simplified access to sub-paths using a path separator right away. Packages are items underneath which there is at least one file. A package starts at the path which actually contains a file. A package may be a file. A package is a simple helper to keep track of everything below it """ __slots__ = ('_root_path', '_sample_time', '_packages', '_one_package_per_file', '__weakref__') # ------------------------- ## @name Constants # @{ ## we assume tokens to be separated with this separator TOKEN_SEPARATOR = '/' ## -- End Constants -- @} def __new__(cls, root_path, *args, **kwargs): """Just required to allow custom constructor""" return dict.__new__(cls) def __init__(self, root_path, one_package_per_file=False): """Initialize this instance from the given root path and parse all information contained in the directory @param root_path butility.Path instance, pointing to an existing directory @param one_package_per_file if True, every file will be a package.""" self._root_path = Path(root_path) self._packages = None self._one_package_per_file = one_package_per_file self._init_contents() # ------------------------- ## @name Utilities # @{ def _init_contents(self): """Initialize our data by parsing it from disk. @note can only be called once as we have to be empty""" assert len(self) == 0, "Need to be empty, can only be called once" assert self._root_path.isdir(), "Root path must be directory" # Main recursion helper, depth first # root_dict - dict to put information into # root_path - path to directory to analyze ls = os.listdir stat = os.stat join = os.path.join def recurse_dir(root_dict, root_path): for entry in ls(root_path): # entry is just the name absentry = join(root_path, entry) try: einfo = stat(absentry) except OSError: continue # end ignore files deleted under our nose if S_ISDIR(einfo.st_mode): root_dict[entry] = recurse_dir(dict(), absentry) else: root_dict[entry] = einfo # end # end for each entry return root_dict # end recursion helper recurse_dir(self, self._root_path) # once we are done with the sample, we set the sample time. Otherwise packages might be considered # stable just because the it took us many seconds until the sample was taken self._sample_time = time() ## -- End Utilities -- @} # ------------------------- ## @name Superclass Overrides # @{ def __str__(self): return 'TreeRoot("%s")' % self.root_path() def __getitem__(self, name): """If name contains a token separator, recurse into ourselves to return the result @raise KeyError if there no entry at the given path""" for token in name.split(self.TOKEN_SEPARATOR): self = dict.__getitem__(self, token) # end handle recursion return self ## -- End Superclass Overrides -- @} # ------------------------- ## @name Interface # @{ def root_path(self): """@return our root path""" return self._root_path def sample_time(self): """@return time (as seconds since epoch) at which our snapshot was taken. @note basically the time of our instantiation""" return self._sample_time def entries(self, root_relative): """@return list of all entries in ourselves, depth first, files only, as tuple of (rela_path, stat) @param root_relative relative path into our dict - can contain path separators""" out = list() def recurse_dir(root_item, root_path): if isinstance(root_item, dict): for key in root_item: recurse_dir(root_item[key], root_path + self.TOKEN_SEPARATOR + key) # end for each key in ourselves else: out.append((root_path, root_item)) # end handle dict/non-dict # end recursion helper recurse_dir(self[root_relative], root_relative) return out def iter_packages(self): """@return an iterator yielding all Package instances found in this tree @note we are caching the package just because this will allow them to carry on their own stable_since date. Otherwise it wouldn't be a problem at all to obtain packages on demand """ # We build a cache only once actually if self._packages is None: self._packages = list() # Recurse into our structure and find indication for packages. # Abort recursion once criteria are met, and handle files specifically underneath # our root # filter a dict's contents into files and dir tuples def files_and_dirs(d): files = list() dirs = list() for name, entry in d.items(): if isinstance(entry, dict): dirs.append((name, entry)) else: files.append((name, entry)) # end handle entry type # end for each name, entry return files, dirs # end files and dirs join = os.path.join files, dirs = files_and_dirs(self) for name, info in files: self._packages.append(Package(self, name)) # end for each file underneath # for each directory, enter standard recursion, with standard rules, and yield items def recurse(dir_dict, subdir_relative): # if there is a single file, it's a package files, dirs = files_and_dirs(dir_dict) if files: if self._one_package_per_file: for file in files: self._packages.append(Package(self, subdir_relative + os.path.sep + file[0])) # end for each file else: self._packages.append(Package(self, subdir_relative)) # end handle package instantiation else: for name, dir_dict in dirs: recurse(dir_dict, join(subdir_relative, name)) # end for each directory to traverse # end handle recursion # end recursion helper for name, dir_dict in dirs: recurse(dir_dict, name) # end for each name, dir_dict # end build cache return iter(self._packages)
class RsyncOperation(Operation): """An operation which allows to safely copy a source file or directory to a given destination file or directory. The location of the rsync-program is currently taken assumed. @note only works on linux (even though theoretically, rsync might exist on windows""" __slots__ = ("_source_path", "_destination_path", "_actual_destination_path", "_move_mode", "_current_path", "_total_num_files_transferred", "_num_files_transferred", "_total_transferred_filesize_bytes", "_transferred_filesize_bytes", "_current_total_transferred_filesize_bytes", "_seen_progress_for_current_file", "_current_bandwidth", "_start_time", '_last_time_left_s', "_process", "_destination_existed", "_actual_destination_existed", "_max_bandwidth_kb" ) # ------------------------- # @name Constants # @{ NUM_FILES = "Number of files transferred: " TRANSFERRED_BYTES = "Total file size: " # -- End Constants -- @} # ------------------------- # @name Configuration # @{ name = "rsync" description = "Synchronize directory structures or copy files" re_is_path = re.compile(r"^/?[\w\-]+(?:/[\w\-\.]+)*$") re_progress = re.compile(r"(?P<bytes>\d+)\s+(?P<percent>\d+)%\s+(?P<bandwidth>\d+\.\d+\w+/\w)\s+.*") rsync_path = "/usr/bin/rsync" rm_path = "/bin/rm" # -- End Configuration -- @} def __init__(self, transaction, source, destination, move=False, max_bandwidth_kb=0): """initialize an rsync operation with a source and destination path. If move is True, the source will be deleted after a successful rsync operation. An operation is successful if there were no error lines in stderr of the process, and if If the maximum bandwidth is greater 0, the rsync operation will be using no more than the given bandwidth in kilobytes. the return code was 0.""" super(RsyncOperation, self).__init__(transaction) if os.name != "posix": raise AssertionError("This module only works on posix systems") # END handle posix self._source_path = Path(source).expandvars() self._destination_path = self._actual_destination_path = Path(destination).expandvars() self._destination_existed = self._destination_path.exists() # rsync creates at max one parent directory if it does not exist. It will alays put # directories into other directories, unless you specify a / in the end of source. # # In case of files, it will always put them into existing directories, or rename them to the given # name if ((self._source_path.isdir() and not self._source_path.tolinuxpath().endswith('/')) or (self._source_path.isfile() and not self._destination_existed or self._destination_path.isdir())): self._actual_destination_path = self._destination_path / self._source_path.basename() + "/" # / as we have to assure it copies the contents of the directory in case of undo # END assure destination self._actual_destination_existed = self._actual_destination_path.exists() self._move_mode = move self._max_bandwidth_kb = max_bandwidth_kb self._current_path = None self._total_num_files_transferred = 0 self._total_transferred_filesize_bytes = 0 self._process = None self._reset_current_state() def _reset_current_state(self): """Reset the current values that will be counted in the following invokation""" self._start_time = 0 self._num_files_transferred = 0 self._transferred_filesize_bytes = 0 self._current_total_transferred_filesize_bytes = 0 self._seen_progress_for_current_file = False self._current_bandwidth = None self._last_time_left_s = None def _set_current_file(self, path): """set the path of the file being currently transferred, adjust state""" # if there is progress for the previous file, we count it # This will make us miss the last file, but its okay ... if self._seen_progress_for_current_file: self._num_files_transferred += 1 self._current_total_transferred_filesize_bytes += self._transferred_filesize_bytes self._transferred_filesize_bytes = 0 self._seen_progress_for_current_file = False # END handle count self._current_path = path def _handle_progress_match(self, match): """Check the match regex and adjust our state""" self._current_bandwidth = match.group("bandwidth") self._transferred_filesize_bytes = int(match.group("bytes")) def _update_progress(self, gather_mode=False): """Use our state to produce a progresss @param gather_mode in gather mode, we will just present a count up""" self._abort_point() self._seen_progress_for_current_file = True prog = self._progress() if gather_mode: prog.set(self._num_files_transferred, message="Gathering Files ... %i" % self._num_files_transferred) else: # remaining bytes remaining_mbytes = ( self._total_transferred_filesize_bytes - self._current_total_transferred_filesize_bytes) / 1024 ** 2 time_taken = max(1, time.time() - self._start_time) time_left_s = self._total_transferred_filesize_bytes / \ max(1, (self._current_total_transferred_filesize_bytes / time_taken)) # remaining time suffix = "second" divisor = 1 if time_left_s >= 60.0: divisor = 60 suffix = "minute" # END handle suffix if time_left_s > 1: suffix += "s" # END handle plural if self._last_time_left_s is not None: time_left_s = (time_left_s + self._last_time_left_s) / 2.0 # soften jumps a bit # END average values msg = "Transferring %s at %s - %i files left, done in about %i %s" % ( self._current_path, self._current_bandwidth, self._total_num_files_transferred - self._num_files_transferred, int(time_left_s / divisor), suffix) prog.set(self._num_files_transferred, message=msg) self._last_time_left_s = time_left_s # END handle gather mode def _parse_output_line(self, line): """Parse a single line and adjust our state accordingly @return 1 == file, 2 == progress, 3 == stats, False otherwise""" # FILENAME ########### line = line.strip() # strip trailing newline if self.re_is_path.match(line) is not None: self._set_current_file(line) return 1 # END check for path # PROGRESS ########### # parse " 27131904 51% 2.44MB/s 0:00:10 " m = self.re_progress.match(line) if m: self._handle_progress_match(m) self._update_progress() return 2 # END handle progress # STATS ########### if line.startswith(self.NUM_FILES): self._total_num_files_transferred = int(line[len(self.NUM_FILES):]) return 3 # END check file count if line.startswith(self.TRANSFERRED_BYTES): self._total_transferred_filesize_bytes = int(line[len(self.TRANSFERRED_BYTES):].split(" ")[0]) return 3 # END check transferred bytes return False def _force_removal(self, destination): """Forcefully delete given directory or file, linux only. @throws OSError""" self.log.info("about to remove directory at %s ... " % destination) rval = subprocess.call([self.rm_path, "-Rf", str(destination)]) if rval != 0: raise OSError("Failed to remove file or directory that we managed to copy previously: %s" % destination) self.log.info("... done removing destination path") def _parse_output(self, gather_mode=False): """Parse the output of the rsync process and set the progress accordingly @param gather_mode if True, we will just hang onto the standard output, which may speed up processing. This way, we only count the line actually @return error data, chunk-separated, lines are within the data accordingly""" empty_list = list() timeout = 0.1 err_data = list() process = self._process # GATHER MODE ################## try: if gather_mode: while True: line = process.stdout.readline().decode() if not line: return err_data if self._parse_output_line(line) == 1: self._update_progress(gather_mode) # END update progress only if we parsed something # END loop forever return err_data # END handle stderr # RUN MODE ########### # Set stderr to non-blocking to allow simple reads fl = fcntl.fcntl(process.stderr.fileno(), fcntl.F_GETFL) fcntl.fcntl(process.stderr.fileno(), fcntl.F_SETFL, fl | os.O_NONBLOCK) while process.poll() is None: # stdout is in blocking mode, so we can read lines accordingly # try to read as many as possible # as long as there is someting while select([process.stdout.fileno()], empty_list, empty_list, timeout)[0]: line = process.stdout.readline().decode() if not line: break self._parse_output_line(line) # END handle standard output try: # from stderr we expect not much output if at all, so poll it from time to time err = process.stderr.read() if err: err_data.append(err) # END gather errors except IOError: # it can happen that the process goes down in the process of reading stdout # Therefore we fail to read - lets just try again in this case continue # END handle invalid reads time.sleep(timeout) # END while process is active finally: # if we don't close the handles, process will stay around, even if the handle gets # deleted. Will never get used to that, call it a bug !!! Everytime I fall for this ... if process.stdout: process.stdout.close() if process.stderr: process.stderr.close() # END assure f*****g pipes are closed !!! return err_data # ------------------------- # @name Interface Implementation # @{ def apply(self): try: # assure that directories will have their content copied, to assure # the source = self._source_path def_args = ("-a", "--append", "--progress", self._source_path, self._destination_path) def proc(args, gather_mode=False): self.log.info(" ".join(args)) return subprocess.Popen(args, stdin=None, stdout=subprocess.PIPE, stderr=not gather_mode and subprocess.PIPE or None, shell=False) # END proc helper def handle_process(gather_mode=False): # PARSE OUTPUT ############### try: err_chunks = self._parse_output(gather_mode) if self._current_path is not None: self._set_current_file(None) # trigger the last file to be registered self._process.wait() finally: if self._process.poll() is None: self._process.terminate() # END assure process is terminated # END if process is still running here, kill it, as we are likely to be in # an exceptional state (user aborted) if self._process.returncode != 0 or err_chunks: raise OSError("rsync failed with error code: %i, error was \n%s" % (self._process.returncode, "".join(err_chunks))) # END handle error code self._process = None # END handle process # GATHER RUN ############# # Gather information about the run to determine the required needs args = [self.rsync_path, "--dry-run", "--stats"] args.extend(def_args) self._progress().setup(round_robin=True, relative=False) self.log.info("Calculating cost of operation ... ") self._process = proc(args, True) handle_process(gather_mode=True) self._reset_current_state() # VERIFY FREE SPACE IN DESTINATION ################################## # destination doesn't necessarily exist, hence we try the parent path as well # prefer the actual destination, in case its a dir - the parent might already be # on another mount for item in [self._destination_path, self._destination_path.dirname()]: if not item.exists(): continue # END handle missing items if not self._total_transferred_filesize_bytes: self.log.info("Wouldn't do any work - skipping transfer operation") return # end abort if nothing to do fs_info = os.statvfs(item) free_bytes_at_destination = fs_info.f_bsize * fs_info.f_bavail if self._total_transferred_filesize_bytes >= free_bytes_at_destination: msg = "Insufficient disk space available at %s to copy %s - require %iMB, have %iMB" % ( item, self._source_path, self._total_transferred_filesize_bytes / 1024 ** 2, free_bytes_at_destination / 1024 ** 2) raise OSError(msg) # END check free space # END for each item to try args = [self.rsync_path] if self._dry_run(): args.append("--dry-run") # END handle dry-run if self._max_bandwidth_kb > 0: args.append("--bwlimit=%i" % self._max_bandwidth_kb) # END handle bandwidth limit args.extend(def_args) # START PROCESS ################ self.log.info("Starting %s" % (" ".join(args))) self._progress().setup(range=(0, self._total_num_files_transferred), relative=True) self._start_time = time.time() self._process = proc(args) handle_process() if self._move_mode and not self._dry_run(): self._force_removal(self._source_path) # END handle movemode finally: if self._process and self._process.poll() is not None: self.log.error("Terminating child forcefully") try: self._process.kill() except OSError: pass self._process = None # END handle process # END assure process is killed on error def rollback(self): # without destination, we couldn't fix anything anyway if not self._destination_path.exists(): return # have to reproduce source from destination ? if not self._source_path.exists(): if self._destination_existed: self.log.warn("Destination at %s existed - rollback might copy more data than expected" % self._destination_path) # END info self.log.info("Restoring source from destination ...") t = Transaction(self.log, progress=self._progress()) type(self)(t, self._actual_destination_path + "/", self._source_path) self.log.info("rolling back rsync from %s to %s", self._actual_destination_path, self._source_path) if not t.apply().succeeded(): raise IOError( "Expected copy operation to succeed - rollback failed, destination data exists at %s" % self._destination_path) # END apply sub-transaction # END source doesn't exist # finally remove destination if possible for destination, existed in ((self._actual_destination_path, self._actual_destination_existed), (self._destination_path, self._destination_existed)): if existed: self.log.warn( "Refusing deletion of destination during rollback as it existed before the rsync operation at %s" % destination) continue # END sanity check self._force_removal(destination) # END for each pair of possible paths # -- End Interface Implementation -- @} # ------------------------- # @name Interface # @{ def actual_destination(self): """:return: the destination that will actually receive the copy""" return self._actual_destination_path
def update(self, known_only=False): """Update our set of dropboxes to represent the latest state on disk @param known_only if True will not actually search for new dropboxes, but only check if existing dropboxes have had their configuration changed or were removed @return self""" def update_stat(dbpath, stat, db): try: new_stat = dbpath.stat() except OSError: del self.dropboxes[dbpath] self._dropbox_removed(db) else: if new_stat.st_size != stat.st_size or new_stat.st_mtime != stat.st_mtime: self.dropboxes[dbpath] = (new_stat, db) self._dropbox_changed(stat, new_stat, db) # end handle change # end handle dropbox doesn't exist # end utility to test stat if known_only: for dbpath, (stat, db) in self.dropboxes.iteritems(): update_stat(dbpath, stat, db) # end for each stat, db else: seen_paths = set() for search_base in self.paths: if search_base.endswith(os.path.sep): search_base = Path(search_base[:-1]) # end assure we don't end with slash if not search_base.isdir(): log.warn("Skipping unaccessible search base at '%s'", search_base) continue # end log.debug("Searching for dropboxes under '%s' (depth=%i, glob='%s')", search_base, self.max_depth, self.config_file_glob) num_dropboxes = 0 # Amount of dropboxes found for this search base for root, dirs, files in os.walk(search_base): if root[len(search_base):].count(os.path.sep) == self.max_depth - 1: del dirs[:] # end handle aborting recursion for match in (f for f in files if fnmatch(f, self.config_file_glob)): dbpath = Path(root) / match seen_paths.add(dbpath) num_dropboxes += 1 if dbpath in self.dropboxes: # check for change stat, db = self.dropboxes[dbpath] update_stat(dbpath, stat, db) else: # handle new dropbox try: stat = dbpath.stat() except OSError: log.error("Couldn't stat dropbox configuration at '%s' even though it was found during search", dbpath) else: dropbox = self.DropboxType(dbpath) self.dropboxes[dbpath] = (stat, dropbox) self._dropbox_added(stat, dropbox) # end handle inaccessible config file (invalid ACL ?) # end handle update or new # end handle each match # end for each root, dir, files if num_dropboxes == 0: log.warn("Didn't find a single dropbox in search base '%s'", search_base) # end info log # end for each search_base # Check for deleted for deleted_db_path in (set(self.dropboxes.keys()) - seen_paths): stat, db = self.dropboxes[deleted_db_path] del self.dropboxes[deleted_db_path] self._dropbox_removed(stat, db) # end for each deleted # end handle known only return self
def pre_start(self, executable, env, args, cwd, resolve): """Place boot-stap environment variables, based on information received from the tank studio installation""" executable, env, new_args, cwd = super(TankEngineDelegate, self).pre_start(executable, env, args, cwd, resolve) rval = (executable, env, new_args, cwd) if not self.init_tank: return rval # end bail out early without tank actual_executable = self._actual_executable() self._context_paths.append(actual_executable) self._context_paths.append(cwd) settings = self.settings_value() try: # NOTE: The reason we always go for a tank by path is that it will be coming up much faster that way ! tk, context_path = self._tank_instance(env, sorted(self._context_paths, reverse=True), settings) except Exception as err: log.error("Failed to instantiate tank - application will come up without it ! Error was: %s", err) return rval # end ignore exceptions host_app_name = self._host_app_name(actual_executable) # Get the most specific context, and feed it to the engine via env vars # We could have entity information from a 'btank' invocation done previously, so try to use that instead if settings.entity_type: ctx = tk.context_from_entity(settings.entity_type, settings.entity_id) # Deferred folder creation is a feature implemented by the launchapp, and it actually helps us # to fill the path cache with enough information to get a context from a path. # Most of the time though, applictions would be launched through shotgun, in one way or another, # which comes with a context from an entity. Let's be a good citizen though, even though I think # folders should be created after the application actually launched (by the application) # NOTE: Depending on the used hooks, this mail fail at boot time as we only have a minimal setup # It's up to the one setting up the boot-time paths to make this work, or to the delegate # implementation to disable this as desired if settings.create_folders: log.debug("Creating folders for %s %s, %s" % (settings.entity_type, settings.entity_id, host_app_name)) try: tk.create_filesystem_structure(settings.entity_type, settings.entity_id, engine=host_app_name) except Exception as err: log.error("Tank folder creation failed with error: %s", err) # NOTE: tank itself aborts here, but I want to see if this is truly required # end ignore errors, lets start the app # end create folders only if this is allowed else: ctx = tk.context_from_path(context_path) # end init context location_dict = settings['multi-launchapp-location'] import tank.deploy.descriptor try: dsc = tank.deploy.descriptor.get_from_location(tank.deploy.descriptor.AppDescriptor.APP, tk.pipeline_configuration, location_dict) except Exception as err: log.error("Couldn't find location of multi-launchapp with error: %s", err) return rval # end couldn't find multi-launch app if ctx.project is None: log.error("Couldn't obtain a valid tank context from path '%s' - tank is disabled", context_path) return rval # end verify context isn't empty # prepare the tank environment import tank.context env['TANK_CONTEXT'] = tank.context.serialize(ctx) env['TANK_ENGINE'] = 'tk-' + host_app_name log.log(logging.TRACE, "Using tank engine context: '%s'", ctx) startup_path = Path(dsc.get_path()) / 'app_specific' / host_app_name / 'startup' if not startup_path.isdir(): log.error("No engine startup configuration found at '%s' - tank will be disabled", startup_path) return rval # end handle startup dir try: self.prepare_tank_engine_environment(startup_path, new_args, env) except Exception as err: # just log the exception log.error("Failed to configure '%s' tank engine with error: %s - tank is disabled", host_app_name, err) # end ignore exception self._may_start_process = True return rval