コード例 #1
0
ファイル: repository.py プロジェクト: Feuermurmel/filemaster
        def move_file(source, dest):
            log('Moving: {} -> {}', relpath(source), relpath(dest))

            # Can't prevent race conditions. But this should catch logic bugs.
            assert not dest.exists()

            repo.rename_with_cache_hint(source, dest)
コード例 #2
0
ファイル: repository.py プロジェクト: Feuermurmel/filemaster
 def fail_identical_files(path1, path2):
     # Produce a different error message when same file is selected twice.
     if path1 == path2:
         raise UserError(
             'The same file is selected through multiple command line '
             'arguments: {}', relpath(path1))
     else:
         raise UserError(
             'Cannot apply an intended path for identical files '
             'simultaneously: {} and {}', relpath(path1), relpath(path2))
コード例 #3
0
ファイル: repository.py プロジェクト: Feuermurmel/filemaster
    def check_create_directory(path, matched_file):
        # TODO: Here and in many places, exists() returns false for symlinks.
        if path.exists():
            # Raise an error if the parent exists but is not directory. If it
            # does not exist. It is recorded as a directory to be created
            if not path.is_dir():
                raise UserError(
                    'Cannot create parent directory for {}, path already '
                    'exists: {}', relpath(matched_file.path), relpath(path))
        elif path not in moved_files_by_created_directories:
            check_create_directory(path.parent, matched_file)

            # Record one of the files for which the directory needs to be
            # created so we can have a nice error message on conflict.
            moved_files_by_created_directories[path] = matched_file
コード例 #4
0
ファイル: repository.py プロジェクト: Feuermurmel/filemaster
    def process_aggregated_file(aggregated_file):
        matched_file = \
            matched_files_by_hash.get(aggregated_file.index_entry.hash)

        if matched_file is None:
            # Return the instance unmodified.
            return aggregated_file

        new_intended_path = intended_path_fn(matched_file)

        # intended_path_fn returns None to remove the intended path.
        if new_intended_path is not None:
            try:
                new_intended_path = new_intended_path.relative_to(
                    repo.root_dir)

                if new_intended_path == pathlib.Path():
                    # The intended path would point at the root directory of
                    # the repository.
                    raise ValueError
            except ValueError:
                raise UserError(
                    'Intended path is outside the repository\'s root '
                    'directory: {}', relpath(new_intended_path))

        return aggregated_file._replace(
            index_entry=aggregated_file.index_entry._replace(
                intended_path=new_intended_path))
コード例 #5
0
ファイル: repository.py プロジェクト: Feuermurmel/filemaster
def list_files(repo: Repository, file_set: FileSet, summary_only):
    # All files to list.
    items = repo.get_matched_files(file_set)

    if not summary_only:
        # Print each file together with additional information.
        for i in items:
            intended_path = i.aggregated_file.index_entry.intended_path

            if intended_path is not None:
                intended_path = repo.root_dir / intended_path

            print(relpath(i.path))

            # Only display the intended path, if the file is not currently at
            # its intended path.
            if intended_path is None:
                print('  => ?')
            elif intended_path != i.path:
                print('  =>', relpath(repo.root_dir / intended_path))

        if items:
            # An empty line before the summary, unless we got no files.
            print(flush=False)

    def iter_summary_parts():
        yield '{} files'.format(len(items))

        # Number of files selected, whose hash has no intended path
        # defined.
        num_without_intended_path = sum(
            1 for i in items
            if i.aggregated_file.index_entry.intended_path is None)

        if num_without_intended_path:
            yield '{} without intended path'.format(num_without_intended_path)

        # The number of duplicates is defined as the number of selected files
        # minus the number of distinct hashes of the selected files.
        distinct_hashes = set(i.aggregated_file.index_entry.hash
                              for i in items)
        num_duplicates = len(items) - len(distinct_hashes)

        if num_duplicates:
            yield '{} duplicates'.format(num_duplicates)

    print('{}.'.format(', '.join(iter_summary_parts())))
コード例 #6
0
ファイル: repository.py プロジェクト: Feuermurmel/filemaster
    def check_move_file(destination, matched_file):
        # Check for an already existing file.
        if destination.exists():
            raise UserError('Cannot move {}, path already exists: {}',
                            relpath(matched_file.path), relpath(destination))

        # Check that all necessary parents can be created.
        check_create_directory(destination.parent, matched_file)

        other_matched_file = moved_files_by_destination.get(destination)

        # Check for another file to be moved to this destination.
        if other_matched_file is not None:
            raise UserError('Cannot move both {} and {} to same path: {}',
                            relpath(other_matched_file.path),
                            relpath(matched_file.path), relpath(destination))

        moved_files_by_destination[destination] = matched_file
コード例 #7
0
ファイル: cache.py プロジェクト: Feuermurmel/filemaster
    def update(self, *, file_checked_progress_fn, data_read_progress_fn):
        """
        Update the hashes of all files in the tree and remove entries for
        files which do not exist anymore.
        """

        # We can't trust hashes computed for files which do not have a mtime
        # that is smaller than the current time. These files could still be
        # written to without visibly changing their mtime. If we hash such a
        # file we store 0 as their mtime, which forces re-computing the hash
        # next time the tree is scanned.
        current_mtime = self._get_current_mtime()

        # List of updated entries.
        new_entries = []

        # Used to look up cache entries by path while scanning. This
        # includes records from an existing write log. Entries of
        # unchanged paths are copied to new_cache_files.
        entries_by_path_mtime = {
            (i.path, i.mtime): i
            for i in self._store.get() + self._write_log.records
        }

        for path in iter_regular_files(self._root_path, self._filter_fn):
            # TODO: We're stat'ing the file (at least) a second time. iter_regular_files() already had to stat the file.
            stat = _stat_path(path)
            mtime = stat.st_mtime

            # Find a cache entry with correct path and mtime.
            entry = entries_by_path_mtime.get((path, mtime))

            # Hash the file and create a new entry, if non was found.
            if entry is None:
                # Force hashing the file again when the mtime is too recent.
                if mtime >= current_mtime:
                    mtime = 0

                # Do not log small files.
                if stat.st_size >= 1 << 24:
                    log('Hashing {} ({}) ...', relpath(path),
                        format_size(stat.st_size))

                hash = file_digest(path, progress_fn=data_read_progress_fn)
                entry = CachedFile(path, mtime, hash)

                # We're using the write log only to prevent losing the work
                # of hashing files.
                self._write_log.append(entry)

            new_entries.append(entry)
            file_checked_progress_fn()

        # Save the new list of entries.
        self._store.set(new_entries)
        self._write_log.flush()
コード例 #8
0
ファイル: repository.py プロジェクト: Feuermurmel/filemaster
def find_filemaster_root(root_dir: pathlib.Path = None):
    """
    Return an absolute, resolved path to the root directory.

    If a root directory is specified, it is tested to be a valid root
    directory. If no root directory is specified, the closest parent
    directory containing a .filemaster directory is used. If no such
    directory can be found, a `UserError` is raised.
    """

    if root_dir is None:
        # Make sure to use an absolute path so that .parents list all the
        # parents.
        current_dir = pathlib.Path.cwd()

        for root_dir in [current_dir, *current_dir.parents]:
            if (root_dir / filemaster_dir_name).is_dir():
                break
        else:
            raise UserError(
                'No {} directory found in the current directory or any of its '
                'parents.', filemaster_dir_name)

    filemaster_dir = root_dir / filemaster_dir_name

    if not filemaster_dir.exists():
        raise UserError('Repository does not exist: {}',
                        relpath(filemaster_dir))

    files_exist = \
        (filemaster_dir / _file_cache_store_name).is_file() \
        and (filemaster_dir / _file_index_store_name).is_file()

    if not files_exist:
        raise UserError('Not a valid repository: {}', relpath(filemaster_dir))

    # Now that we know that the directory exists, we can safely resolve it to
    # make it absolute and also get rid of any . and .. components.
    return root_dir.resolve()
コード例 #9
0
ファイル: repository.py プロジェクト: Feuermurmel/filemaster
 def create_directory(path):
     log('Creating directory: {}', relpath(path))
     path.mkdir()
コード例 #10
0
ファイル: repository.py プロジェクト: Feuermurmel/filemaster
 def move_file(source, dest):
     log('Would move: {} -> {}', relpath(source), relpath(dest))
コード例 #11
0
ファイル: repository.py プロジェクト: Feuermurmel/filemaster
 def create_directory(path):
     log('Would create directory: {}', relpath(path))
コード例 #12
0
ファイル: repository.py プロジェクト: Feuermurmel/filemaster
def apply_intended_paths(repo, file_set, *, dry_run=False):
    if dry_run:

        def create_directory(path):
            log('Would create directory: {}', relpath(path))

        def move_file(source, dest):
            log('Would move: {} -> {}', relpath(source), relpath(dest))
    else:

        def create_directory(path):
            log('Creating directory: {}', relpath(path))
            path.mkdir()

        def move_file(source, dest):
            log('Moving: {} -> {}', relpath(source), relpath(dest))

            # Can't prevent race conditions. But this should catch logic bugs.
            assert not dest.exists()

            repo.rename_with_cache_hint(source, dest)

    # Records changes to the file system before performing them so that we
    # can detect conflicts before doing anything.
    moved_files_by_created_directories = {}
    moved_files_by_destination = {}

    def check_create_directory(path, matched_file):
        # TODO: Here and in many places, exists() returns false for symlinks.
        if path.exists():
            # Raise an error if the parent exists but is not directory. If it
            # does not exist. It is recorded as a directory to be created
            if not path.is_dir():
                raise UserError(
                    'Cannot create parent directory for {}, path already '
                    'exists: {}', relpath(matched_file.path), relpath(path))
        elif path not in moved_files_by_created_directories:
            check_create_directory(path.parent, matched_file)

            # Record one of the files for which the directory needs to be
            # created so we can have a nice error message on conflict.
            moved_files_by_created_directories[path] = matched_file

    def check_move_file(destination, matched_file):
        # Check for an already existing file.
        if destination.exists():
            raise UserError('Cannot move {}, path already exists: {}',
                            relpath(matched_file.path), relpath(destination))

        # Check that all necessary parents can be created.
        check_create_directory(destination.parent, matched_file)

        other_matched_file = moved_files_by_destination.get(destination)

        # Check for another file to be moved to this destination.
        if other_matched_file is not None:
            raise UserError('Cannot move both {} and {} to same path: {}',
                            relpath(other_matched_file.path),
                            relpath(matched_file.path), relpath(destination))

        moved_files_by_destination[destination] = matched_file

    # We first process all files to be moved and make sure that no operations
    # conflict with already existing files and directories or with each other.
    # There are 4 distinct cases that can arise that need to be checked
    # individually:
    #
    # 1. A file is to be moved to a path that already exists (as a file, a
    #    directory, or something else).
    # 2. Two files are to be moved to the exact same path.
    # 3. A directory needs to be created but ...
    #   3a. ... one of the parents already exists but is not a directory.
    #   3b. ... one of the parents is the destination for a file to be moved.
    for i in repo.get_matched_files(file_set):
        intended_path = i.aggregated_file.index_entry.intended_path

        if intended_path is not None:
            absolute_intended_path = repo.root_dir / intended_path

            if i.path != absolute_intended_path:
                check_move_file(absolute_intended_path, i)

    # After generating all operations, check for conflicts between moved
    # files and created directories.
    for path, matched_file in moved_files_by_destination.items():
        other_moved_file = moved_files_by_created_directories.get(path)

        if other_moved_file is not None:
            raise UserError(
                'Cannot create parent directory for {}, {} will be moved to '
                'that path: {}', relpath(other_moved_file.path),
                relpath(matched_file.path), relpath(path))

    # Iterating over these sorted will give us the parents before the children.
    for path in sorted(moved_files_by_created_directories):
        create_directory(path)

    # No problems detected. Create the directories and move the files.
    for destination, moved_file in moved_files_by_destination.items():
        move_file(moved_file.path, destination)