Ejemplo n.º 1
0
Archivo: base.py Proyecto: Byron/bcore
    def wrapper(self, *args, **kwargs):
        path = Path(_maketemp(prefix=func.__name__))
        path.mkdir()
        keep = False
        prev_val = os.environ.get('RW_DIR')
        os.environ['RW_DIR'] = str(path)
        prev_cwd = os.getcwd()
        os.chdir(path)
        try:
            try:
                return func(self, path, *args, **kwargs)
            except Exception as err:
                print(("Test %s.%s failed with error %s: '%s', output is at %r"
                       % (type(self).__name__, type(err), err, func.__name__, path)), file=sys.stderr)
                keep = True
                raise
            # end be informed about failure
        finally:
            if prev_val is not None:
                os.environ['RW_DIR'] = prev_val
            # end restore state
            os.chdir(prev_cwd)

            # Need to collect here to be sure all handles have been closed. It appears
            # a windows-only issue. In fact things should be deleted, as well as
            # memory maps closed, once objects go out of scope. For some reason
            # though this is not the case here unless we collect explicitly.
            if not keep:
                gc.collect()
                shutil.rmtree(path)
Ejemplo n.º 2
0
    def test_plugins(self, rw_dir):
        """load all known plugins and dispatch some events"""
        def raiser(py_file, mod_name):
            raise AssertionError("loading of plugin '%s' failed")
        # end 

        prev_dir = os.getcwd()
        bapp.main().context().push('example plugins')

        plugin_path = Path(__file__).dirname().dirname() / 'plugins'
        examples_path = plugin_path.dirname().dirname().dirname() / 'examples'

        for path in (plugin_path, examples_path):
            assert path.isdir()
            assert load_files(path, on_error=raiser)
        # end for each path to load plugins from

        try:
            os.chdir(rw_dir)
            sg = EventsReadOnlyTestSQLProxyShotgunConnection()
            engine = EventEngine(sg)

            for eid in range(1, 1000, 100):
                sg.next_event_id = sg.first_event_id + eid
                engine._process_events()
            # end 
        finally:
            os.chdir(prev_dir)
Ejemplo n.º 3
0
Archivo: fsops.py Proyecto: Byron/bcore
class DeleteOperation(Operation):

    """Delete a file or a directory. Please note that, for obvious reasons, failures
    cannot be undone. This operation will stop at the first encountered error"""

    __slots__ = "_path"     # the path to delete
    name = "DeleteOperation"

    def __init__(self, transaction, path):
        super(DeleteOperation, self).__init__(transaction)
        self._path = Path(path)

    def apply(self):
        if self._dry_run():
            return

        if self._path.isdir():
            self.log.info("Deleting directory %s", self._path)
            self._path.rmtree()
        else:
            self.log.info("Deleting file %s", self._path)
            self._path.remove()
        # END perform actual removal

    def rollback(self):
        self.log.info("Deletion of filesystem items cannot be rolled back")
Ejemplo n.º 4
0
Archivo: io_stat.py Proyecto: Byron/bit
 def _cleanup(self):
     """Remove all files created by us"""
     if self._generated_file:
         self._generated_file.close()
         file_path = Path(self._generated_file.name)
         if file_path.isfile():
             file_path.remove()
         # end delete file
         self._generated_file = None
Ejemplo n.º 5
0
 def _journal_path(self):
     """@return path to journal file"""
     config = self.settings_value()
     res = config['event-journal-file']
     if not res:
         res = Path('~/.sg-events-daemon.journal')
         self.log.info("event-journal-file not configured, defaulting to '%s'", res)
     # end try to use a reasonable value
     return res.expand_or_raise()
Ejemplo n.º 6
0
 def test_delegate(self):
     dlg_type = ProcessControllerDelegate
     for path in ('C:\\foo\\bar\\file.ext', '/mnt/share/subdir/file.ext'):
         path = Path(path)
         for p in (path, path.dirname()):
             # can happen with windows paths dirname on posix
             if not p:
                 continue
             m = dlg_type.re_find_path.match(p)
             assert m and m.group(0) == p, "should have found a path in '%s'" % p
             p = p.replace('/', '-').replace('\\', '-')
             assert not dlg_type.re_find_path.match(p), "This should be no path '%s'" % p
Ejemplo n.º 7
0
Archivo: base.py Proyecto: Byron/btank
def core_hook_type():
    """When executed from a core hook, it will return the BaseType from which your core hook
    should derive from.
    @throws TypeError if a core-hook base type couldn't be found.
    @note this works around an issue with tank being unable to provide base types for core hooks,
    as they are special. Therefore, sgtk.get_hook_baseclass() either returns Hook only, or 
    your own type.
    """
    calling_module = Path(inspect.currentframe().f_back.f_globals["__file__"])
    tank_root = Path(sys.modules[Hook.__module__].__file__).dirname().dirname().dirname()
    base_hook_file = tank_root / "hooks" / (calling_module.namebase() + ".py")
    return loader.load_plugin(base_hook_file, Hook)
Ejemplo n.º 8
0
Archivo: base.py Proyecto: Byron/bit
 def package_search_paths(self):
     """@return list of absolute package search paths, under which we will check for packages and changes thereof"""
     out = list()
     for search_path in self.settings().package.search_paths:
         search_path = Path(search_path)
         if not search_path.isabs():
             assert self._config_file_path is not None, "Require a configuration file path for relative search paths"
             search_path = self._config_file_path.dirname() / search_path
         # end make absolute path
         out.append(search_path)
     # end for each possibly relative search path
     return out
Ejemplo n.º 9
0
    def pre_start(self, executable, env, args, cwd, resolve):
        executable, env, new_args, cwd = super(TankCommandDelegate, self).pre_start(executable, env, args, cwd, resolve)
        # and the second argument must be the tank install root ... lets make it happy
        if len(new_args) > 2 and not os.path.isabs(new_args[1]):
            install_root = Path(new_args[0]).dirname().dirname().dirname()
            assert install_root.basename() == 'install', "Expected first argument '%s' to be tank_cmd.py right in the install root" % new_args[0]
            new_args.insert(1, install_root.dirname())
        # end handle install root

        last_arg = new_args[-1]
        if not last_arg.startswith(self.tank_pc_arg):
            # we assume to be in the right spot, but a check can't hurt until
            # we are able to do more ourselves
            actual_executable = self._actual_executable()
            base = actual_executable.dirname()
            assert (base / 'tank').exists(), "Currently '%s' must be right next to the 'tank' executable" % executable
            new_args.append(str(self.tank_pc_arg + base))
        # end setup context

        #######################
        # Process Arguments ##
        #####################
        if len(new_args) > 6 and new_args[3].startswith(self.launch_prefix):
            # now we could go crazy and try to find asset paths in order to provide context to bprocess
            # We could also use the shotgun context in some way, to feed data to our own asset management
            # However, for now using the project itself should just be fine, but this is certainly 
            # to be improved

            # Additinally, what we really want is to start any supported program, and enforce tank support by
            # fixing up delegates. For that, we will create a new process controller, which uses our Application 
            # instance, and the delegate that it defined so far.
            # However, we are currently unable to truly provide the information we have to a new process controller, 
            # unless it's communicated via the context.

            # It should be one of ours (e.g. TankEngineDelegate derivative) if there is tank support, which 
            # requires proper configuration.
            
            # For that to work, we will override the entire start procedure, as in pre-start we can't and should not
            # swap in the entire delegate
            def set_overrides(schema, value):
                value.host_app_name = new_args[3][len(self.launch_prefix):]
                value.entity_type = new_args[4]
                value.entity_id = int(new_args[5])
            # end overrides setter

            # This call will also push the context onto the stack, nothing more to be done here
            self.ApplyChangeContextType('tank-engine-information').setup(self._app.context(),
                                                                         set_overrides, 
                                                                         tank_engine_schema)
        #end handle particular command mode

        return (executable, env, new_args, cwd)
Ejemplo n.º 10
0
Archivo: fsops.py Proyecto: Byron/bcore
class MoveFSItemOperation(FSOperationBase):

    """Move a file or directory to the given name"""
    __slots__ = ("_source_path", "_destination_path", "_actual_destination_path")

    name = "FSItemMove"

    def __init__(self, transaction, source_path, destination_path):
        super(MoveFSItemOperation, self).__init__(transaction)
        self._source_path = Path(source_path).expandvars()
        self._destination_path = self._actual_destination_path = Path(destination_path).expandvars()

        # if we move into a directory, the filename will just be appended
        if self._destination_path.isdir():
            self._actual_destination_path = self._destination_path / source_path.basename()
        # END handle actual destination

    def apply(self):
        if self._actual_destination_path.exists():
            raise AssertionError("Cannot move item %s onto existing one at %s" %
                                 (self._source_path, self._actual_destination_path))
        # END handle existing dsetination

        if self._dry_run():
            return

        self.log.info("moving %s to %s", self._source_path, self._destination_path)
        self._source_path.move(self._destination_path)
        self._operation_performed = True

    def rollback(self):
        try:
            if not self._operation_performed or not self._actual_destination_path.exists():
                return

            # fail gracefully
            if self._source_path.exists():
                raise AssertionError("Cannot move destination %s into existing source %s, source directory should not exist" % (
                    self._actual_destination_path, self._source_path))
            self.log.info("moving %s to %s", self._actual_destination_path, self._source_path)
            self._actual_destination_path.move(self._source_path)
        finally:
            self._reset_state()
        # END assure state reset

    #{ Interface

    def actual_destination(self):
        """:return: path to the final destination"""
        return self._actual_destination_path
Ejemplo n.º 11
0
Archivo: fsops.py Proyecto: Byron/bit
    def _normalized_destination(cls, package, config):
        """@return an absolute path with all strp based items replaced if applicable"""
        stat = package.root().stat()
        assert config.date_field in cls.valid_times, "Invalid date field, must be one of %s" % (', '.join(cls.valid_times))
        assert config.destination, "'destination' field must be set"

        time_tuple = gmtime(getattr(stat, 'st_%s' % config.date_field) - timezone)

        destination_path = Path(strftime(config.destination, time_tuple))
        if not destination_path.isabs():
            destination_path = package.root() / destination_path
        # end make absolute

        return destination_path
Ejemplo n.º 12
0
Archivo: rsync.py Proyecto: Byron/bcore
    def __init__(self, transaction, source, destination, move=False, max_bandwidth_kb=0):
        """initialize an rsync operation with a source and destination path.
        If move is True, the source will be deleted after a successful rsync operation.
        An operation is successful if there were no error lines in stderr of the process, and if
        If the maximum bandwidth is greater 0, the rsync operation will be using no more than the given
        bandwidth in kilobytes.
        the return code was 0."""
        super(RsyncOperation, self).__init__(transaction)

        if os.name != "posix":
            raise AssertionError("This module only works on posix systems")
        # END handle posix

        self._source_path = Path(source).expandvars()
        self._destination_path = self._actual_destination_path = Path(destination).expandvars()
        self._destination_existed = self._destination_path.exists()
        # rsync creates at max one parent directory if it does not exist. It will alays put
        # directories into other directories, unless you specify a / in the end of source.
        #
        # In case of files, it will always put them into existing directories, or rename them to the given
        # name
        if ((self._source_path.isdir() and not self._source_path.tolinuxpath().endswith('/')) or
                (self._source_path.isfile() and not self._destination_existed or self._destination_path.isdir())):
            self._actual_destination_path = self._destination_path / self._source_path.basename() + "/"
            # / as we have to assure it copies the contents of the directory in case of undo
        # END assure destination
        self._actual_destination_existed = self._actual_destination_path.exists()
        self._move_mode = move
        self._max_bandwidth_kb = max_bandwidth_kb

        self._current_path = None
        self._total_num_files_transferred = 0
        self._total_transferred_filesize_bytes = 0
        self._process = None
        self._reset_current_state()
Ejemplo n.º 13
0
    def executable(self, env):
        """@return butility.Path to executable - its not verified to be existing
        @note for now this is uncached, but its okay for our use
        @note we always resolve environment variables
        """
        executables = self.data().executable
        if not executables:
            raise ValueError("no executable set for package '%s'" % self.name())

        error = None
        executable_path = None
        for executable in executables:
            executable_path = Path._expandvars_deep(executable, env)
            try:
                executable_path = self.to_abs_path(executable_path)
            except EnvironmentError as err:
                if '$' in executable_path:
                    # give it more time, let them work with it until something breaks
                    # Don't have another choice
                    executable_path = Path(executable_path)
                else:
                    error = err
                    continue
            # end handle conversion

            if os.name == 'nt':
                # We assume exe by default, and not com or bat.
                # Even though magic isn't good, I see no point in making this configurable, people
                # can just be explicit about the extension
                win_ext = '.exe'
                if not executable_path.ext():
                    executable_path += win_ext
                # handle extension
            # end handle windows

            # If we have variables in the path, we can't assume anything (nor resolve) as it might be too early
            # for that. In that case, we assume the best. Otherwise, the executable must exist
            if not executable_path.containsvars() and not executable_path.isfile():
                continue
            # end

            return executable_path
        # end for each executable to try
        assert executable_path or error, "Should have collected at least one error at this point"
        if error:
            raise error
        return executable_path
Ejemplo n.º 14
0
 def to_abs_path(self, path):
     """Convert the given possibly relative path to an absolute path, if necessary
     @note it is not checked for existence
     @param path string or butility.Path
     @return absolute version of the path, as butility.Path
     @throws ValueError if the path is relative and there is no valid root path
     @note assumes the best when an environment variable is found, which never be made absolute.
     Will only appliy to paths like '$FOO/bar', not to 'foo/$BAR'"""
     path = Path(path)
     if path.isabs():
         return path
     if path.containsvars():
         return path
     if self.root_path() is None:
         raise EnvironmentError("Cannot convert '%s' to absolute path in package '%s' without a single valid tree, tried: [%s]" % (path, self.name(), ', '.join(self._data.trees)))
     # end handle root path
     return self.root_path() / path
Ejemplo n.º 15
0
Archivo: fsops.py Proyecto: Byron/bcore
    def __init__(self, transaction, source_path, destination_path):
        super(MoveFSItemOperation, self).__init__(transaction)
        self._source_path = Path(source_path).expandvars()
        self._destination_path = self._actual_destination_path = Path(destination_path).expandvars()

        # if we move into a directory, the filename will just be appended
        if self._destination_path.isdir():
            self._actual_destination_path = self._destination_path / source_path.basename()
Ejemplo n.º 16
0
Archivo: tree.py Proyecto: Byron/bit
 def __init__(self, root_path, one_package_per_file=False):
     """Initialize this instance from the given root path and parse all information contained in the directory
     @param root_path butility.Path instance, pointing to an existing directory
     @param one_package_per_file if True, every file will be a package."""
     self._root_path = Path(root_path)
     self._packages = None
     self._one_package_per_file = one_package_per_file
     self._init_contents()
Ejemplo n.º 17
0
    def communicate(self, process):
        # another test runs in here, but has no channel configured
        if process.stdout is None:
            return super(TestCommunicatorDelegate, self).communicate(process)
        # handle other test

        try:
            # output should be marker file
            err_lines = process.stderr.readlines()
            assert not err_lines
            lines = process.stdout.readlines()
            assert len(lines) == 1
            tmpfile = Path(lines[0].decode().strip())
            tmpfile.remove()
        finally:
            res = super(TestCommunicatorDelegate, self).communicate(process)
        assert res.returncode == 0, "There should have been no error"
        return res
Ejemplo n.º 18
0
Archivo: fsops.py Proyecto: Byron/bcore
 def __init__(self, transaction, path, initial_file_content=None, mode=None, uid=None, gid=None):
     """Initialize the operation with a path to create. If initial_file_content is set, 
     to a string, it will be written in binary mode to a file. If it is unset, 
     a directory will be created. Non-existing parent-directories will be created.
     After creation, the mode will be set if not None, and uid and gid will be set as well to the given
     numerical ID if of of them is not None"""
     super(CreateFSItemOperation, self).__init__(transaction)
     self._assert_posix()
     self._path = Path(path)
     self._content = initial_file_content
     self._mode = mode
     self._uid = uid
     self._gid = gid
Ejemplo n.º 19
0
 def executable(self, env):
     """@return butility.Path to executable - its not verified to be existing
     @note for now this is uncached, but its okay for our use
     @note we always resolve environment variables
     """
     executable_path = Path._expandvars_deep(self.data().executable, env)
     try:
         executable_path = self.to_abs_path(executable_path)
     except EnvironmentError:
         if '$' in executable_path:
             # give it more time, let them work with it until something breaks
             # Don't have another choice
             executable_path = Path(executable_path)
         else:
             raise
     # end handle conversion
     if os.name == 'nt':
         win_ext = '.exe'
         if not executable_path.ext():
             executable_path += win_ext
         # handle extension
     # end handle windows
     return executable_path
Ejemplo n.º 20
0
    def prepare_context(self, executable, env, args, cwd):
        """We will parse paths from the given commandline and use them in the context we build.
        Additionaly, we will provide a per-arg handler with the opportunity to inject kvstore overrides
        """
        # Will be a kvstore if there have been overrides
        kvstore_overrides = KeyValueStoreModifier(dict())
        for arg in args:
            # by default, we use paths as as context provider (configurable)
            path = self._extract_path(arg)
            if path:
                # ignore args that are not paths
                path = Path(path)
                if path.dirname().isdir():
                    self._app.context().push(self.StackAwareHierarchicalContextType(path.dirname()))
                # end handle valid directory
            # end handle path
            self.handle_argument(arg, kvstore_overrides)
        # end for each arg to check

        # set overrides
        if list(kvstore_overrides.keys()):
            self._app.context().push(Context('delegate overrides', kvstore_overrides))
        # end handle overrides
        return super(ProcessControllerDelegate, self).prepare_context(executable, env, args, cwd)
Ejemplo n.º 21
0
    def pre_start(self, executable, env, args, cwd, resolve):
        """Place boot-stap environment variables, based on information received from the tank studio installation"""
        executable, env, new_args, cwd = super(TankEngineDelegate, self).pre_start(executable, env, args, cwd, resolve)
        rval = (executable, env, new_args, cwd)
        if not self.init_tank:
            return rval
        # end bail out early without tank

        actual_executable = self._actual_executable()
        self._context_paths.append(actual_executable)
        self._context_paths.append(cwd)

        settings = self.settings_value()

        try:
            # NOTE: The reason we always go for a tank by path is that it will be coming up much faster that way !
            tk, context_path = self._tank_instance(env, sorted(self._context_paths, reverse=True), settings)
        except Exception as err:
            log.error("Failed to instantiate tank - application will come up without it ! Error was: %s", err)
            return rval
        # end ignore exceptions

        host_app_name = self._host_app_name(actual_executable)

        # Get the most specific context, and feed it to the engine via env vars
        # We could have entity information from a 'btank' invocation done previously, so try to use that instead
        if settings.entity_type:
            ctx = tk.context_from_entity(settings.entity_type, settings.entity_id)

            # Deferred folder creation is a feature implemented by the launchapp, and it actually helps us 
            # to fill the path  cache with enough information to get a context from a path.
            # Most of the time though, applictions would be launched through shotgun, in one way or another, 
            # which comes with a context from an entity. Let's be a good citizen though, even though I think
            # folders should be created after the application actually launched (by the application)
            # NOTE: Depending on the used hooks, this mail fail at boot time as we only have a minimal setup
            # It's up to the one setting up the boot-time paths to make this work, or to the delegate 
            # implementation to disable this as desired
            if settings.create_folders:
                log.debug("Creating folders for %s %s, %s" % (settings.entity_type, settings.entity_id, host_app_name))
                try:
                    tk.create_filesystem_structure(settings.entity_type, settings.entity_id, engine=host_app_name)
                except Exception as err:
                    log.error("Tank folder creation failed with error: %s", err)
                    # NOTE: tank itself aborts here, but I want to see if this is truly required
                # end ignore errors, lets start the app
            # end create folders only if this is allowed
        else:
            ctx = tk.context_from_path(context_path)
        # end init context

        location_dict = settings['multi-launchapp-location']
        import tank.deploy.descriptor
        try:
            dsc = tank.deploy.descriptor.get_from_location(tank.deploy.descriptor.AppDescriptor.APP, 
                                                           tk.pipeline_configuration,
                                                           location_dict)
        except Exception as err:
            log.error("Couldn't find location of multi-launchapp with error: %s", err)
            return rval
        # end couldn't find multi-launch app

        if ctx.project is None:
            log.error("Couldn't obtain a valid tank context from path '%s' - tank is disabled", context_path)
            return rval
        # end verify context isn't empty

        # prepare the tank environment
        import tank.context
        env['TANK_CONTEXT'] = tank.context.serialize(ctx)
        env['TANK_ENGINE'] = 'tk-' + host_app_name

        log.log(logging.TRACE, "Using tank engine context: '%s'", ctx)

        startup_path = Path(dsc.get_path()) / 'app_specific' / host_app_name / 'startup'
        if not startup_path.isdir():
            log.error("No engine startup configuration found at '%s' - tank will be disabled", startup_path)
            return rval
        # end handle startup dir

        try:
            self.prepare_tank_engine_environment(startup_path, new_args, env)
        except Exception as err:
            # just log the exception
            log.error("Failed to configure '%s' tank engine with error: %s - tank is disabled", 
                        host_app_name, err)
        # end ignore exception

        self._may_start_process = True
        return rval
Ejemplo n.º 22
0
Archivo: fsstat.py Proyecto: Byron/bit
    def _update_db(self, args):
        """Update the sqlite database database
        @return error code"""
        log = self.log()

        num_sources = bool(args.directories) + bool(args.merge_paths)
        if num_sources > 1:
            raise AssertionError("Cannot use --from-directories or --merge together")
        elif num_sources and args.remove_duplicates:
            raise AssertionError("--remove-duplicate-paths cannot be used in conjunction with any source")
        elif not (args.fast or args.remove_duplicates) and num_sources == 0:
            raise AssertionError("Specify at least one of the flags specifying from where to update the database")
        # end assure consistency

        #############
        # INIT DB ##
        ###########
        path = args.update_db
        engine = create_engine(self._url_from_path(path))
        meta = None
        # Assume file exists
        if is_url(path) or path.isfile():
            meta = MetaData(engine, reflect=True)
        # end handle file exists

        if not meta or args.table_name not in meta.tables:
            if args.fast:
                log.warn("Database didn't exist yet - fast implicitly disabled")
                args.fast = False
                if num_sources == 0:
                    raise AssertionError(
                        "Require at least one initial data source, either --from-directories or --merge"
                    )
                # end handle logic
            # end handle fast
            if args.remove_duplicates:
                raise AssertionError("Cannot remove duplicates on non-existing table")
            # end handle remove duplicates

            meta = fsstat_schema.meta
            fsstat_schema.record.name = args.table_name
            meta.bind = engine
            meta.create_all()
            log.info("initalized database at %s", path)
            fsitem = fsstat_schema.record
            # assure we have the meta-data with the proper name - renaming the table before we create_all
            # is kind of a hack
            meta = MetaData(engine, reflect=True)
        else:
            if args.with_index:
                log.info("Cannot create index on exiting table without additional logic - turning index creation off")
            # end
            args.with_index = False

            fsitem = meta.tables[args.table_name]
            log.info("Updating database '%s' at '%s'", path, args.table_name)
        # end initialize table

        strip = str.strip
        basename = os.path.basename
        connection = engine.connect()
        insert = fsitem.insert()

        st = time()
        nr = 0  # num records handled
        records = list()

        ########################
        # REMOVE DUPLICATES ###
        ######################
        if args.remove_duplicates:
            nr = self._remove_duplicates(connection, fsitem)
        ######################
        # FAST UPDATE ####
        ###############
        elif args.fast:
            nr = self._fast_update_database(engine, args)
        ###########################
        ## DIRECTORY CRAWLING ####
        #########################
        elif args.directories:

            streamer = HashStreamer(hashlib.sha1, lz4dumps)
            join = os.path.join
            normalize = os.path.normpath
            totalbcount = 0  # total amount of bytes processed

            lct = time()
            progress_every = 500
            commit_every_fcount = 15000
            commit_every_seconds = 1 * 60  ## commits per minute

            def progress():
                elapsed = time() - st
                log.info(
                    "Processed %i files with %s in %.2fs (%.2f files/s | %s MB/s)",
                    nr,
                    int_to_size_string(totalbcount),
                    elapsed,
                    nr / elapsed,
                    mb(totalbcount) / elapsed,
                )

            # end

            for directory in args.directories:
                if not os.path.isdir(directory):
                    log.error("Skipped non-existing directory '%s'", directory)
                    continue
                # end handle failed directory acccess

                # normalize to prevent extra stuff
                directory = normalize(directory)
                for root, dirs, files in os.walk(directory, followlinks=False):
                    # NOTE: We also take directories, as it allows to find directories with many files, or with
                    # no files (empty directories). Also, we can optimize updates that way
                    # Just to also handle root ! It must be in the database, otherwise we can never
                    # handle additions correctly, at least not for the root folder
                    chains = [files, dirs]
                    if root is directory:
                        # an empty string joined with root, is root
                        chains.insert(0, [""])
                    # end handle root
                    for filename in chain(*chains):
                        nr += 1
                        # only join if we are not seeing the root. Otherwise we get a slash appended
                        # Which is something we really don't want as it could hinder later updates
                        path = filename and join(root, filename) or root
                        stat = self._append_path_record(records, path, streamer, log)
                        if stat:
                            totalbcount += stat.st_size

                            if nr % progress_every == 0:
                                progress()
                            # end show progress
                        # end managaed to handle file

                        if time() - lct >= commit_every_seconds or nr % commit_every_fcount == 0:
                            lct = time()
                            progress()
                            self.do_execute_records(connection, insert, records, log, st, nr)
                        # end commit
                # end for each file
            # end for each directory to traverse
            # final execute
            progress()
            self.do_execute_records(connection, insert, records, log, st, nr)
        #########################
        ## Database Merges  ####
        ######################
        elif args.merge_paths:
            ## Commit this amount of records at once
            commit_count = 100000

            def progress():
                elapsed = time() - st
                log.info("Inserted %i records in %.2fs (%.2f records/s)", nr, elapsed, nr / elapsed)

            # end

            for merge_path in args.merge_paths:
                merge_path = Path(merge_path)

                if not is_url(merge_path) and not merge_path.isfile():
                    log.error("Database at '%s' didn't exist - skipping", merge_path)
                    continue
                # end for each path

                log.info("Merging DB at '%s' ...", merge_path)
                merge_engine = create_engine(self._url_from_path(merge_path))
                mcon = merge_engine.connect()
                md = MetaData(merge_engine, reflect=True)

                try:
                    for table in md.tables.itervalues():
                        # If id is part of it, and we rollback because of a unicode error, the counter
                        # will be offset and we cannot commit anymore. Just let it be done automatically, no
                        # matter what
                        column_names = [c.name for c in table.columns if c != "id"]
                        try:
                            cursor = mcon.execute(select([table]))

                            # We assume the cursor deals with the query efficiently, and doesn't really fetch everything
                            while True:
                                fst = time()
                                log.info("Fetching %i '%s' records from '%s' ...", commit_count, table.name, merge_path)

                                rows = cursor.fetchmany(commit_count)
                                records.extend(dict(zip(column_names, row)) for row in rows)

                                elapsed = time() - fst
                                log.info(
                                    "Fetched %i records in %.2fs (%.2f records/s)",
                                    len(records),
                                    elapsed,
                                    len(records) / elapsed,
                                )

                                nr += len(records)
                                must_break = len(records) < commit_count

                                ##############
                                self.do_execute_records(connection, insert, records, log, st, nr)
                                progress()
                                ##############

                                # Did we get enough ?
                                if must_break:
                                    break
                                # end check for end of iteration
                            # end endless loop
                        finally:
                            cursor.close()
                    # end for each table to merge
                finally:
                    mcon.close()
                # end assure we close resources
            # end for each merge path
        else:
            raise AssertionError("Reached unexpected mode")
        # end handle mode of operation

        ##############################
        # CREATE INDICES AND VIEWS ##
        ############################
        if args.with_index:
            # Create one index per column, which allows fast searches over it
            # Create a custom one that speeds up our common search group by path, order by path, mtime.
            for col in fsitem.columns:
                # id is primary, and thus already indexed
                # path is too big - it needs to be hashed to be useful in an actual index
                # file as well
                if col in (fsitem.c.id, fsitem.c.path, fsitem.c.sha1):
                    continue
                # end handle index creation
                ist = time()
                log.info("Creating index for columns '%s' ...", col)
                try:
                    Index("idx_%s_%s" % (fsitem.name, col.name), col).create(engine)
                except Exception:
                    log.error("Index creation failed", exc_info=True)
                else:
                    elapsed = time() - ist
                    log.info("Created index with %i entries in %.2fs (%.2f entries/s)" % (nr, elapsed, nr / elapsed))
                # end handle creation errors
            # end for each index to create
        # end handle index creation

        if args.sql_directories:
            for sql_dir in args.sql_directories:
                sql_dir = Path(sql_dir)
                for sql_file in sql_dir.files(pattern="*.sql"):
                    try:
                        transaction = connection.begin()
                        log.info("Creating view from '%s'", sql_file)
                        connection.execute(sqlite_view_from_file(sql_file))
                        transaction.commit()
                    except Exception:
                        transaction.rollback()
                        log.error("Failed to create view for file '%s' - it might have existed - skipping", sql_file)
                        continue
                    # end handle transaction per sql view
                # end for each file
            # end for eeach sqldir
        # end have sql directories

        # FINAL CLEANUP
        ################
        # If there were unicode errors, we end up having a row with a null-path. This breaks our code
        # Lets keep the data consistent instead of altering code
        dst = time()
        connection.execute(fsitem.delete().where(fsitem.c.path == None))
        log.info("Cleaned dataset after (possible) unicode errors in %fs", time() - dst)

        connection.close()

        ##################
        # FINAL INFO ###
        ###############
        elapsed = time() - st
        log.info("Overall time to process %i records is %.2fs (%.2f records/s)", nr, elapsed, nr / elapsed)
        log.info("File written to %s", Path(args.update_db).abspath())

        return self.SUCCESS
Ejemplo n.º 23
0
Archivo: tree.py Proyecto: Byron/bit
class TreeRoot(dict):
    """A python representation of a directory tree

    It keeps a tree-like structure in a simple dict, where each tree entry is associated with a tuple of meta-data.
    This property makes it comparable and easily diffable.
    As the value of a directory entry is another dict with items, we don't have stat information on a dictionary itself.
    File entries will contain the stat structure obtained by os.stat

    Additionally, a tree is able to find package root paths, and allows simplified access to sub-paths using a path 
    separator right away.

    Packages are items underneath which there is at least one file. A package starts at the path which actually
    contains a file.
    A package may be a file.

    A package is a simple helper to keep track of everything below it
    """
    __slots__ = ('_root_path', 
                 '_sample_time',
                 '_packages',
                 '_one_package_per_file',
                 '__weakref__')

    # -------------------------
    ## @name Constants
    # @{

    ## we assume tokens to be separated with this separator
    TOKEN_SEPARATOR = '/'
    
    ## -- End Constants -- @}


    def __new__(cls, root_path, *args, **kwargs):
        """Just required to allow custom constructor"""
        return dict.__new__(cls)
        
    def __init__(self, root_path, one_package_per_file=False):
        """Initialize this instance from the given root path and parse all information contained in the directory
        @param root_path butility.Path instance, pointing to an existing directory
        @param one_package_per_file if True, every file will be a package."""
        self._root_path = Path(root_path)
        self._packages = None
        self._one_package_per_file = one_package_per_file
        self._init_contents()

    # -------------------------
    ## @name Utilities
    # @{

    def _init_contents(self):
        """Initialize our data by parsing it from disk.
        @note can only be called once as we have to be empty"""
        assert len(self) == 0, "Need to be empty, can only be called once"
        assert self._root_path.isdir(), "Root path must be directory"

        # Main recursion helper, depth first
        # root_dict - dict to put information into
        # root_path - path to directory to analyze
        ls = os.listdir
        stat = os.stat
        join = os.path.join

        def recurse_dir(root_dict, root_path):
            for entry in ls(root_path):
                # entry is just the name
                absentry = join(root_path, entry)
                try:
                    einfo = stat(absentry)
                except OSError:
                    continue
                # end ignore files deleted under our nose

                if S_ISDIR(einfo.st_mode):
                    root_dict[entry] = recurse_dir(dict(), absentry)
                else:
                    root_dict[entry] = einfo
                # end 
            # end for each entry
            return root_dict
        # end recursion helper
        recurse_dir(self, self._root_path)

        # once we are done with the sample, we set the sample time. Otherwise packages might be considered 
        # stable just because the it took us many seconds until the sample was taken
        self._sample_time = time()

    ## -- End Utilities -- @}


    # -------------------------
    ## @name Superclass Overrides
    # @{

    def __str__(self):
        return 'TreeRoot("%s")' % self.root_path()

    def __getitem__(self, name):
        """If name contains a token separator, recurse into ourselves to return the result
        @raise KeyError if there no entry at the given path"""
        for token in name.split(self.TOKEN_SEPARATOR):
            self = dict.__getitem__(self, token)
        # end handle recursion

        return self

    ## -- End Superclass Overrides -- @}

    # -------------------------
    ## @name Interface
    # @{

    def root_path(self):
        """@return our root path"""
        return self._root_path

    def sample_time(self):
        """@return time (as seconds since epoch) at which our snapshot was taken.
        @note basically the time of our instantiation"""
        return self._sample_time

    def entries(self, root_relative):
        """@return list of all entries in ourselves, depth first, files only, as tuple of (rela_path, stat)
        @param root_relative relative path into our dict - can contain path separators"""
        out = list()
        def recurse_dir(root_item, root_path):
            if isinstance(root_item, dict):
                for key in root_item:
                    recurse_dir(root_item[key], root_path + self.TOKEN_SEPARATOR + key)
                # end for each key in ourselves
            else:
                out.append((root_path, root_item))
            # end handle dict/non-dict
        # end recursion helper
        recurse_dir(self[root_relative], root_relative)
        return out

    def iter_packages(self):
        """@return an iterator yielding all Package instances found in this tree
        @note we are caching the package just because this will allow them to carry on their own stable_since 
        date. Otherwise it wouldn't be a problem at all to obtain packages on demand
        """
        # We build a cache only once actually
        if self._packages is None:
            self._packages = list()
            # Recurse into our structure and find indication for packages.
            # Abort recursion once criteria are met, and handle files specifically underneath 
            # our root

            # filter a dict's contents into files and dir tuples
            def files_and_dirs(d):
                files = list()
                dirs = list()
                for name, entry in d.items():
                    if isinstance(entry, dict):
                        dirs.append((name, entry))
                    else:
                        files.append((name, entry))
                    # end handle entry type
                # end for each name, entry
                return files, dirs
            # end files and dirs

            join = os.path.join
            files, dirs = files_and_dirs(self)

            for name, info in files:
                self._packages.append(Package(self, name))
            # end for each file underneath
            
            # for each directory, enter standard recursion, with standard rules, and yield items
            def recurse(dir_dict, subdir_relative):
                # if there is a single file, it's a package
                files, dirs = files_and_dirs(dir_dict)
                if files:
                    if self._one_package_per_file:
                        for file in files:
                            self._packages.append(Package(self, subdir_relative + os.path.sep + file[0]))    
                        # end for each file
                    else:
                        self._packages.append(Package(self, subdir_relative))
                    # end handle package instantiation
                else:
                    for name, dir_dict in dirs:
                        recurse(dir_dict, join(subdir_relative, name))
                    # end for each directory to traverse
                # end handle recursion
            # end recursion helper

            for name, dir_dict in dirs:
                recurse(dir_dict, name)
            # end for each name, dir_dict
        # end build cache

        return iter(self._packages)
Ejemplo n.º 24
0
    def __init__(self, *args, **kwargs):
        """Initialize this instance with the required operations and verify configuration
        @throw ValueError if our configuration seems invalid"""
        super(TransferDropboxTransaction, self).__init__(*args, **kwargs)

        # Prepare the kvstore with data for resolving values
        now = datetime.utcnow()
        store = self._kvstore
        store.set_value('Y', now.year)
        store.set_value('M', now.month)
        store.set_value('D', now.day)
        store.set_value('H', now.hour)
        store.set_value('MIN', now.minute)

        config = self._config()

        if config.mode not in self.valid_modes:
            raise ValueError("Invalid transfer mode '%s' - must be one of %s" % (config.mode, ','.join(self.valid_modes)))
        # end check mode

        if not config.destination_dir.isdir():
            raise ValueError("Destination dropbox was not accessible: '%s'" % config.destination_dir)
        # prepare and resolve destination
        
        # handle subdir and create it if needed
        if config.subdir:
            raise NotImplementedError("implement unique identifier and subdir creation")
        # end 

        source = self._sql_instance.in_package.root()
        destination = config.destination_dir
        is_sync_mode = config.mode == self.MODE_SYNC
        if config.keep_package_subdir:
            # NOTE: rsync will duplicate our first directory unless we truncate it here
            root_relative = Path(self._package.root_relative())
            if root_relative.dirname():
                destination /= root_relative.dirname()
            # end handle modification of destination

            if is_sync_mode:
                if not source.isdir():
                    log.warn("Using copy instead of sync as it would be dangerous to use if there is no package subdirectory - source is file")
                    is_sync_mode = False
                else:
                    # In case of sync, we want to use the most destination path. This is possibly by instructing
                    # rsync to copy only the directory contents, into a destination which carries the additional
                    # base name of the source directory 
                    destination = destination / source.basename()
                    source += '/'
                # end put in sync mode safety
            # end adjust source-destination for sync mode

            # Make sure the directory exists
            if not destination.isdir():
                destination.makedirs()
            # end handle dir creation
        elif is_sync_mode:
            log.warn("Deactivating sync-mode as it is dangerous to use if keep_package_subdir is disabled")
            is_sync_mode = False
        # end handle subdir
        rsync_args = is_sync_mode and ['--delete'] or list()

        TransferRsyncOperation(self, source, destination, move=config.mode==self.MODE_MOVE, additional_rsync_args=rsync_args)
        self._sql_instance.comment = "%sing package from '%s' to '%s'" % (config.mode, source, destination)
Ejemplo n.º 25
0
Archivo: fsops.py Proyecto: Byron/bcore
 def __init__(self, transaction, path):
     super(DeleteOperation, self).__init__(transaction)
     self._path = Path(path)
Ejemplo n.º 26
0
Archivo: fsops.py Proyecto: Byron/bcore
class CreateFSItemOperation(FSOperationBase):

    """Create a directory or file with the given access permissions and ownership. In
    case of a file, you may specify an initial content.
    For this operation to succeed, the destination path must not exist yet!"""
    __slots__ = ("_path", "_content", "_mode", "_uid", "_gid")

    name = "CreateFSItem"

    def __init__(self, transaction, path, initial_file_content=None, mode=None, uid=None, gid=None):
        """Initialize the operation with a path to create. If initial_file_content is set, 
        to a string, it will be written in binary mode to a file. If it is unset, 
        a directory will be created. Non-existing parent-directories will be created.
        After creation, the mode will be set if not None, and uid and gid will be set as well to the given
        numerical ID if of of them is not None"""
        super(CreateFSItemOperation, self).__init__(transaction)
        self._assert_posix()
        self._path = Path(path)
        self._content = initial_file_content
        self._mode = mode
        self._uid = uid
        self._gid = gid

    def apply(self):
        if self._content and self._path.isdir() or not self._content and self._path.isfile():
            raise AssertionError(
                "Cannot create item of type directory or file as the an equally named item of different type exists")
        # END sanity check

        if self._dry_run():
            return

        if self._path.exists():
            return
        # end ignore existing items of the same type

        # we don't do it the most efficient way, as we could specify certain things in
        # at creation. For now, we don't do it though as it shouldn't matter
        if self._content:
            self.log.info("creating file  %s", self._path)
            self._path.write_bytes(self._content)
        else:
            self.log.info("creating directory %s", self._path)
            self._path.makedirs()
        # END initial creation
        self._operation_performed = True

        if self._mode is not None:
            self._path.chmod(self._mode)
        # END handle mode

        self.set_user_group(self._path, self._gid, self._uid)

    def rollback(self):
        try:
            if not self._operation_performed or not self._path.exists():
                return

            if self._content:
                self.log.info("Removing file %s", self._path)
                self._path.remove()
            else:
                self.log.info("Removing single directory %s", self._path)
                self._path.rmdir()
            # END handle removal, safely as we don't recursively delete anything
        finally:
            self._reset_state()
Ejemplo n.º 27
0
Archivo: finder.py Proyecto: Byron/bit
    def update(self, known_only=False):
        """Update our set of dropboxes to represent the latest state on disk
        @param known_only if True will not actually search for new dropboxes, but only check if existing dropboxes
        have had their configuration changed or were removed
        @return self"""
        def update_stat(dbpath, stat, db):
            try:
                new_stat = dbpath.stat()
            except OSError:
                del self.dropboxes[dbpath]
                self._dropbox_removed(db)
            else:
                if new_stat.st_size != stat.st_size or new_stat.st_mtime != stat.st_mtime:
                    self.dropboxes[dbpath] = (new_stat, db)
                    self._dropbox_changed(stat, new_stat, db)
                # end handle change
            # end handle dropbox doesn't exist
        # end utility to test stat

        if known_only:
            for dbpath, (stat, db) in self.dropboxes.iteritems():
                update_stat(dbpath, stat, db)
            # end for each stat, db
        else:
            seen_paths = set()
            for search_base in self.paths:
                if search_base.endswith(os.path.sep):
                    search_base = Path(search_base[:-1])
                # end assure we don't end with slash
                if not search_base.isdir():
                    log.warn("Skipping unaccessible search base at '%s'", search_base)
                    continue
                # end 
                log.debug("Searching for dropboxes under '%s' (depth=%i, glob='%s')", 
                                                search_base, self.max_depth, self.config_file_glob)

                num_dropboxes = 0  # Amount of dropboxes found for this search base
                for root, dirs, files in os.walk(search_base):
                    if root[len(search_base):].count(os.path.sep) == self.max_depth - 1:
                        del dirs[:]
                    # end handle aborting recursion

                    for match in (f for f in files if fnmatch(f, self.config_file_glob)):
                        dbpath = Path(root) / match
                        seen_paths.add(dbpath)
                        num_dropboxes += 1
                        if dbpath in self.dropboxes:
                            # check for change
                            stat, db = self.dropboxes[dbpath]
                            update_stat(dbpath, stat, db)
                        else:
                            # handle new dropbox
                            try:
                                stat = dbpath.stat()
                            except OSError:
                                log.error("Couldn't stat dropbox configuration at '%s' even though it was found during search", dbpath)
                            else:
                                dropbox = self.DropboxType(dbpath)
                                self.dropboxes[dbpath] = (stat, dropbox)
                                self._dropbox_added(stat, dropbox)
                            # end handle inaccessible config file (invalid ACL ?)
                        # end handle update or new
                    # end handle each match
                # end for each root, dir, files
                if num_dropboxes == 0:
                    log.warn("Didn't find a single dropbox in search base '%s'", search_base)
                # end info log
            # end for each search_base

            # Check for deleted
            for deleted_db_path in (set(self.dropboxes.keys()) - seen_paths):
                stat, db = self.dropboxes[deleted_db_path]
                del self.dropboxes[deleted_db_path]
                self._dropbox_removed(stat, db)
            # end for each deleted
        # end handle known only
        return self
Ejemplo n.º 28
0
 def resolve_arg(self, arg, env):
     """@return the argument without any environment variables
     @note this method exists primarly for interception by subclasses"""
     return str(Path._expandvars_deep(arg, env))
Ejemplo n.º 29
0
Archivo: rsync.py Proyecto: Byron/bcore
class RsyncOperation(Operation):

    """An operation which allows to safely copy a source file or directory to a
    given destination file or directory.
    The location of the rsync-program is currently taken assumed.
    @note only works on linux (even though theoretically, rsync might exist on windows"""

    __slots__ = ("_source_path",
                 "_destination_path",
                 "_actual_destination_path",
                 "_move_mode",
                 "_current_path",
                 "_total_num_files_transferred",
                 "_num_files_transferred",
                 "_total_transferred_filesize_bytes",
                 "_transferred_filesize_bytes",
                 "_current_total_transferred_filesize_bytes",
                 "_seen_progress_for_current_file",
                 "_current_bandwidth",
                 "_start_time",
                 '_last_time_left_s',
                 "_process",
                 "_destination_existed",
                 "_actual_destination_existed",
                 "_max_bandwidth_kb"
                 )

    # -------------------------
    # @name Constants
    # @{

    NUM_FILES = "Number of files transferred: "
    TRANSFERRED_BYTES = "Total file size: "

    # -- End Constants -- @}

    # -------------------------
    # @name Configuration
    # @{

    name = "rsync"
    description = "Synchronize directory structures or copy files"

    re_is_path = re.compile(r"^/?[\w\-]+(?:/[\w\-\.]+)*$")
    re_progress = re.compile(r"(?P<bytes>\d+)\s+(?P<percent>\d+)%\s+(?P<bandwidth>\d+\.\d+\w+/\w)\s+.*")

    rsync_path = "/usr/bin/rsync"
    rm_path = "/bin/rm"

    # -- End Configuration -- @}

    def __init__(self, transaction, source, destination, move=False, max_bandwidth_kb=0):
        """initialize an rsync operation with a source and destination path.
        If move is True, the source will be deleted after a successful rsync operation.
        An operation is successful if there were no error lines in stderr of the process, and if
        If the maximum bandwidth is greater 0, the rsync operation will be using no more than the given
        bandwidth in kilobytes.
        the return code was 0."""
        super(RsyncOperation, self).__init__(transaction)

        if os.name != "posix":
            raise AssertionError("This module only works on posix systems")
        # END handle posix

        self._source_path = Path(source).expandvars()
        self._destination_path = self._actual_destination_path = Path(destination).expandvars()
        self._destination_existed = self._destination_path.exists()
        # rsync creates at max one parent directory if it does not exist. It will alays put
        # directories into other directories, unless you specify a / in the end of source.
        #
        # In case of files, it will always put them into existing directories, or rename them to the given
        # name
        if ((self._source_path.isdir() and not self._source_path.tolinuxpath().endswith('/')) or
                (self._source_path.isfile() and not self._destination_existed or self._destination_path.isdir())):
            self._actual_destination_path = self._destination_path / self._source_path.basename() + "/"
            # / as we have to assure it copies the contents of the directory in case of undo
        # END assure destination
        self._actual_destination_existed = self._actual_destination_path.exists()
        self._move_mode = move
        self._max_bandwidth_kb = max_bandwidth_kb

        self._current_path = None
        self._total_num_files_transferred = 0
        self._total_transferred_filesize_bytes = 0
        self._process = None
        self._reset_current_state()

    def _reset_current_state(self):
        """Reset the current values that will be counted in the following invokation"""
        self._start_time = 0
        self._num_files_transferred = 0
        self._transferred_filesize_bytes = 0
        self._current_total_transferred_filesize_bytes = 0
        self._seen_progress_for_current_file = False
        self._current_bandwidth = None
        self._last_time_left_s = None

    def _set_current_file(self, path):
        """set the path of the file being currently transferred, adjust state"""
        # if there is progress for the previous file, we count it
        # This will make us miss the last file, but its okay ...
        if self._seen_progress_for_current_file:
            self._num_files_transferred += 1
            self._current_total_transferred_filesize_bytes += self._transferred_filesize_bytes

            self._transferred_filesize_bytes = 0
            self._seen_progress_for_current_file = False
        # END handle count
        self._current_path = path

    def _handle_progress_match(self, match):
        """Check the match regex and adjust our state"""
        self._current_bandwidth = match.group("bandwidth")
        self._transferred_filesize_bytes = int(match.group("bytes"))

    def _update_progress(self, gather_mode=False):
        """Use our state to produce a progresss
        @param gather_mode in gather mode, we will just present a count up"""
        self._abort_point()
        self._seen_progress_for_current_file = True
        prog = self._progress()

        if gather_mode:
            prog.set(self._num_files_transferred, message="Gathering Files ... %i" % self._num_files_transferred)
        else:
            # remaining bytes
            remaining_mbytes = (
                self._total_transferred_filesize_bytes - self._current_total_transferred_filesize_bytes) / 1024 ** 2
            time_taken = max(1, time.time() - self._start_time)
            time_left_s = self._total_transferred_filesize_bytes / \
                max(1, (self._current_total_transferred_filesize_bytes / time_taken))

            # remaining time
            suffix = "second"
            divisor = 1
            if time_left_s >= 60.0:
                divisor = 60
                suffix = "minute"
            # END handle suffix
            if time_left_s > 1:
                suffix += "s"
            # END handle plural

            if self._last_time_left_s is not None:
                time_left_s = (time_left_s + self._last_time_left_s) / 2.0      # soften jumps a bit
            # END average values

            msg = "Transferring %s at %s - %i files left, done in about %i %s" % (
                self._current_path, self._current_bandwidth, self._total_num_files_transferred - self._num_files_transferred, int(time_left_s / divisor), suffix)
            prog.set(self._num_files_transferred, message=msg)
            self._last_time_left_s = time_left_s
        # END handle gather mode

    def _parse_output_line(self, line):
        """Parse a single line and adjust our state accordingly
        @return 1  == file, 2  == progress, 3 == stats, False otherwise"""
        # FILENAME
        ###########
        line = line.strip()  # strip trailing newline
        if self.re_is_path.match(line) is not None:
            self._set_current_file(line)
            return 1
        # END check for path

        # PROGRESS
        ###########
        # parse "    27131904  51%    2.44MB/s    0:00:10  "
        m = self.re_progress.match(line)
        if m:
            self._handle_progress_match(m)
            self._update_progress()
            return 2
        # END handle progress

        # STATS
        ###########
        if line.startswith(self.NUM_FILES):
            self._total_num_files_transferred = int(line[len(self.NUM_FILES):])
            return 3
        # END check file count

        if line.startswith(self.TRANSFERRED_BYTES):
            self._total_transferred_filesize_bytes = int(line[len(self.TRANSFERRED_BYTES):].split(" ")[0])
            return 3
        # END check transferred bytes
        return False

    def _force_removal(self, destination):
        """Forcefully delete given directory or file, linux only.
        @throws OSError"""
        self.log.info("about to remove directory at %s ... " % destination)
        rval = subprocess.call([self.rm_path, "-Rf", str(destination)])
        if rval != 0:
            raise OSError("Failed to remove file or directory that we managed to copy previously: %s" % destination)
        self.log.info("... done removing destination path")

    def _parse_output(self, gather_mode=False):
        """Parse the output of the rsync process and set the progress accordingly
        @param gather_mode if True, we will just hang onto the standard output, which 
            may speed up processing. This way, we only count the line actually
        @return error data, chunk-separated, lines are within the data accordingly"""
        empty_list = list()
        timeout = 0.1
        err_data = list()
        process = self._process

        # GATHER MODE
        ##################
        try:
            if gather_mode:
                while True:
                    line = process.stdout.readline().decode()
                    if not line:
                        return err_data
                    if self._parse_output_line(line) == 1:
                        self._update_progress(gather_mode)
                    # END update progress only if we parsed something
                # END loop forever
                return err_data
            # END handle stderr

            # RUN MODE
            ###########
            # Set stderr to non-blocking to allow simple reads
            fl = fcntl.fcntl(process.stderr.fileno(), fcntl.F_GETFL)
            fcntl.fcntl(process.stderr.fileno(), fcntl.F_SETFL, fl | os.O_NONBLOCK)

            while process.poll() is None:
                # stdout is in blocking mode, so we can read lines accordingly
                # try to read as many as possible
                # as long as there is someting
                while select([process.stdout.fileno()], empty_list, empty_list, timeout)[0]:
                    line = process.stdout.readline().decode()
                    if not line:
                        break
                    self._parse_output_line(line)
                # END handle standard output

                try:
                    # from stderr we expect not much output if at all, so poll it from time to time
                    err = process.stderr.read()
                    if err:
                        err_data.append(err)
                    # END gather errors
                except IOError:
                    # it can happen that the process goes down in the process of reading stdout
                    # Therefore we fail to read - lets just try again in this case
                    continue
                # END handle invalid reads

                time.sleep(timeout)
            # END while process is active
        finally:
            # if we don't close the handles, process will stay around, even if the handle gets
            # deleted. Will never get used to that, call it a bug !!! Everytime I fall for this ...
            if process.stdout:
                process.stdout.close()
            if process.stderr:
                process.stderr.close()
        # END assure f*****g pipes are closed !!!

        return err_data

    # -------------------------
    # @name Interface Implementation
    # @{

    def apply(self):
        try:
            # assure that directories will have their content copied, to assure
            # the
            source = self._source_path
            def_args = ("-a", "--append", "--progress", self._source_path, self._destination_path)

            def proc(args, gather_mode=False):
                self.log.info(" ".join(args))
                return subprocess.Popen(args, stdin=None, stdout=subprocess.PIPE, stderr=not gather_mode and subprocess.PIPE or None, shell=False)
            # END proc helper

            def handle_process(gather_mode=False):
                # PARSE OUTPUT
                ###############
                try:
                    err_chunks = self._parse_output(gather_mode)
                    if self._current_path is not None:
                        self._set_current_file(None)    # trigger the last file to be registered
                    self._process.wait()
                finally:
                    if self._process.poll() is None:
                        self._process.terminate()
                    # END assure process is terminated
                # END if process is still running here, kill it, as we are likely to be in
                # an exceptional state (user aborted)

                if self._process.returncode != 0 or err_chunks:
                    raise OSError("rsync failed with error code: %i, error was \n%s" %
                                  (self._process.returncode, "".join(err_chunks)))
                # END handle error code
                self._process = None
            # END handle process

            # GATHER RUN
            #############
            # Gather information about the run to determine the required needs
            args = [self.rsync_path, "--dry-run", "--stats"]
            args.extend(def_args)

            self._progress().setup(round_robin=True, relative=False)
            self.log.info("Calculating cost of operation ... ")
            self._process = proc(args, True)
            handle_process(gather_mode=True)
            self._reset_current_state()

            # VERIFY FREE SPACE IN DESTINATION
            ##################################
            # destination doesn't necessarily exist, hence we try the parent path as well
            # prefer the actual destination, in case its a dir - the parent might already be
            # on another mount
            for item in [self._destination_path, self._destination_path.dirname()]:
                if not item.exists():
                    continue
                # END handle missing items

                if not self._total_transferred_filesize_bytes:
                    self.log.info("Wouldn't do any work - skipping transfer operation")
                    return
                # end abort if nothing to do

                fs_info = os.statvfs(item)
                free_bytes_at_destination = fs_info.f_bsize * fs_info.f_bavail
                if self._total_transferred_filesize_bytes >= free_bytes_at_destination:
                    msg = "Insufficient disk space available at %s to copy %s - require %iMB, have %iMB" % (
                        item, self._source_path, self._total_transferred_filesize_bytes / 1024 ** 2, free_bytes_at_destination / 1024 ** 2)
                    raise OSError(msg)
                # END check free space
            # END for each item to try

            args = [self.rsync_path]
            if self._dry_run():
                args.append("--dry-run")
            # END handle dry-run
            if self._max_bandwidth_kb > 0:
                args.append("--bwlimit=%i" % self._max_bandwidth_kb)
            # END handle bandwidth limit
            args.extend(def_args)

            # START PROCESS
            ################
            self.log.info("Starting %s" % (" ".join(args)))
            self._progress().setup(range=(0, self._total_num_files_transferred), relative=True)
            self._start_time = time.time()
            self._process = proc(args)
            handle_process()

            if self._move_mode and not self._dry_run():
                self._force_removal(self._source_path)
            # END handle movemode

        finally:
            if self._process and self._process.poll() is not None:
                self.log.error("Terminating child forcefully")
                try:
                    self._process.kill()
                except OSError:
                    pass
                self._process = None
            # END handle process
        # END assure process is killed on error

    def rollback(self):
        # without destination, we couldn't fix anything anyway
        if not self._destination_path.exists():
            return

        # have to reproduce source from destination ?
        if not self._source_path.exists():
            if self._destination_existed:
                self.log.warn("Destination at %s existed - rollback might copy more data than expected" %
                              self._destination_path)
            # END info
            self.log.info("Restoring source from destination ...")

            t = Transaction(self.log, progress=self._progress())
            type(self)(t, self._actual_destination_path + "/", self._source_path)

            self.log.info("rolling back rsync from %s to %s", self._actual_destination_path, self._source_path)
            if not t.apply().succeeded():
                raise IOError(
                    "Expected copy operation to succeed - rollback failed, destination data exists at %s" % self._destination_path)
            # END apply sub-transaction
        # END source doesn't exist

        # finally remove destination if possible
        for destination, existed in ((self._actual_destination_path, self._actual_destination_existed),
                                     (self._destination_path, self._destination_existed)):
            if existed:
                self.log.warn(
                    "Refusing deletion of destination during rollback as it existed before the rsync operation at %s" % destination)
                continue
            # END sanity check
            self._force_removal(destination)
        # END for each pair of possible paths

    # -- End Interface Implementation -- @}

    # -------------------------
    # @name Interface
    # @{

    def actual_destination(self):
        """:return: the destination that will actually receive the copy"""
        return self._actual_destination_path