def move_file(source, dest): log('Moving: {} -> {}', relpath(source), relpath(dest)) # Can't prevent race conditions. But this should catch logic bugs. assert not dest.exists() repo.rename_with_cache_hint(source, dest)
def fail_identical_files(path1, path2): # Produce a different error message when same file is selected twice. if path1 == path2: raise UserError( 'The same file is selected through multiple command line ' 'arguments: {}', relpath(path1)) else: raise UserError( 'Cannot apply an intended path for identical files ' 'simultaneously: {} and {}', relpath(path1), relpath(path2))
def check_create_directory(path, matched_file): # TODO: Here and in many places, exists() returns false for symlinks. if path.exists(): # Raise an error if the parent exists but is not directory. If it # does not exist. It is recorded as a directory to be created if not path.is_dir(): raise UserError( 'Cannot create parent directory for {}, path already ' 'exists: {}', relpath(matched_file.path), relpath(path)) elif path not in moved_files_by_created_directories: check_create_directory(path.parent, matched_file) # Record one of the files for which the directory needs to be # created so we can have a nice error message on conflict. moved_files_by_created_directories[path] = matched_file
def process_aggregated_file(aggregated_file): matched_file = \ matched_files_by_hash.get(aggregated_file.index_entry.hash) if matched_file is None: # Return the instance unmodified. return aggregated_file new_intended_path = intended_path_fn(matched_file) # intended_path_fn returns None to remove the intended path. if new_intended_path is not None: try: new_intended_path = new_intended_path.relative_to( repo.root_dir) if new_intended_path == pathlib.Path(): # The intended path would point at the root directory of # the repository. raise ValueError except ValueError: raise UserError( 'Intended path is outside the repository\'s root ' 'directory: {}', relpath(new_intended_path)) return aggregated_file._replace( index_entry=aggregated_file.index_entry._replace( intended_path=new_intended_path))
def list_files(repo: Repository, file_set: FileSet, summary_only): # All files to list. items = repo.get_matched_files(file_set) if not summary_only: # Print each file together with additional information. for i in items: intended_path = i.aggregated_file.index_entry.intended_path if intended_path is not None: intended_path = repo.root_dir / intended_path print(relpath(i.path)) # Only display the intended path, if the file is not currently at # its intended path. if intended_path is None: print(' => ?') elif intended_path != i.path: print(' =>', relpath(repo.root_dir / intended_path)) if items: # An empty line before the summary, unless we got no files. print(flush=False) def iter_summary_parts(): yield '{} files'.format(len(items)) # Number of files selected, whose hash has no intended path # defined. num_without_intended_path = sum( 1 for i in items if i.aggregated_file.index_entry.intended_path is None) if num_without_intended_path: yield '{} without intended path'.format(num_without_intended_path) # The number of duplicates is defined as the number of selected files # minus the number of distinct hashes of the selected files. distinct_hashes = set(i.aggregated_file.index_entry.hash for i in items) num_duplicates = len(items) - len(distinct_hashes) if num_duplicates: yield '{} duplicates'.format(num_duplicates) print('{}.'.format(', '.join(iter_summary_parts())))
def check_move_file(destination, matched_file): # Check for an already existing file. if destination.exists(): raise UserError('Cannot move {}, path already exists: {}', relpath(matched_file.path), relpath(destination)) # Check that all necessary parents can be created. check_create_directory(destination.parent, matched_file) other_matched_file = moved_files_by_destination.get(destination) # Check for another file to be moved to this destination. if other_matched_file is not None: raise UserError('Cannot move both {} and {} to same path: {}', relpath(other_matched_file.path), relpath(matched_file.path), relpath(destination)) moved_files_by_destination[destination] = matched_file
def update(self, *, file_checked_progress_fn, data_read_progress_fn): """ Update the hashes of all files in the tree and remove entries for files which do not exist anymore. """ # We can't trust hashes computed for files which do not have a mtime # that is smaller than the current time. These files could still be # written to without visibly changing their mtime. If we hash such a # file we store 0 as their mtime, which forces re-computing the hash # next time the tree is scanned. current_mtime = self._get_current_mtime() # List of updated entries. new_entries = [] # Used to look up cache entries by path while scanning. This # includes records from an existing write log. Entries of # unchanged paths are copied to new_cache_files. entries_by_path_mtime = { (i.path, i.mtime): i for i in self._store.get() + self._write_log.records } for path in iter_regular_files(self._root_path, self._filter_fn): # TODO: We're stat'ing the file (at least) a second time. iter_regular_files() already had to stat the file. stat = _stat_path(path) mtime = stat.st_mtime # Find a cache entry with correct path and mtime. entry = entries_by_path_mtime.get((path, mtime)) # Hash the file and create a new entry, if non was found. if entry is None: # Force hashing the file again when the mtime is too recent. if mtime >= current_mtime: mtime = 0 # Do not log small files. if stat.st_size >= 1 << 24: log('Hashing {} ({}) ...', relpath(path), format_size(stat.st_size)) hash = file_digest(path, progress_fn=data_read_progress_fn) entry = CachedFile(path, mtime, hash) # We're using the write log only to prevent losing the work # of hashing files. self._write_log.append(entry) new_entries.append(entry) file_checked_progress_fn() # Save the new list of entries. self._store.set(new_entries) self._write_log.flush()
def find_filemaster_root(root_dir: pathlib.Path = None): """ Return an absolute, resolved path to the root directory. If a root directory is specified, it is tested to be a valid root directory. If no root directory is specified, the closest parent directory containing a .filemaster directory is used. If no such directory can be found, a `UserError` is raised. """ if root_dir is None: # Make sure to use an absolute path so that .parents list all the # parents. current_dir = pathlib.Path.cwd() for root_dir in [current_dir, *current_dir.parents]: if (root_dir / filemaster_dir_name).is_dir(): break else: raise UserError( 'No {} directory found in the current directory or any of its ' 'parents.', filemaster_dir_name) filemaster_dir = root_dir / filemaster_dir_name if not filemaster_dir.exists(): raise UserError('Repository does not exist: {}', relpath(filemaster_dir)) files_exist = \ (filemaster_dir / _file_cache_store_name).is_file() \ and (filemaster_dir / _file_index_store_name).is_file() if not files_exist: raise UserError('Not a valid repository: {}', relpath(filemaster_dir)) # Now that we know that the directory exists, we can safely resolve it to # make it absolute and also get rid of any . and .. components. return root_dir.resolve()
def create_directory(path): log('Creating directory: {}', relpath(path)) path.mkdir()
def move_file(source, dest): log('Would move: {} -> {}', relpath(source), relpath(dest))
def create_directory(path): log('Would create directory: {}', relpath(path))
def apply_intended_paths(repo, file_set, *, dry_run=False): if dry_run: def create_directory(path): log('Would create directory: {}', relpath(path)) def move_file(source, dest): log('Would move: {} -> {}', relpath(source), relpath(dest)) else: def create_directory(path): log('Creating directory: {}', relpath(path)) path.mkdir() def move_file(source, dest): log('Moving: {} -> {}', relpath(source), relpath(dest)) # Can't prevent race conditions. But this should catch logic bugs. assert not dest.exists() repo.rename_with_cache_hint(source, dest) # Records changes to the file system before performing them so that we # can detect conflicts before doing anything. moved_files_by_created_directories = {} moved_files_by_destination = {} def check_create_directory(path, matched_file): # TODO: Here and in many places, exists() returns false for symlinks. if path.exists(): # Raise an error if the parent exists but is not directory. If it # does not exist. It is recorded as a directory to be created if not path.is_dir(): raise UserError( 'Cannot create parent directory for {}, path already ' 'exists: {}', relpath(matched_file.path), relpath(path)) elif path not in moved_files_by_created_directories: check_create_directory(path.parent, matched_file) # Record one of the files for which the directory needs to be # created so we can have a nice error message on conflict. moved_files_by_created_directories[path] = matched_file def check_move_file(destination, matched_file): # Check for an already existing file. if destination.exists(): raise UserError('Cannot move {}, path already exists: {}', relpath(matched_file.path), relpath(destination)) # Check that all necessary parents can be created. check_create_directory(destination.parent, matched_file) other_matched_file = moved_files_by_destination.get(destination) # Check for another file to be moved to this destination. if other_matched_file is not None: raise UserError('Cannot move both {} and {} to same path: {}', relpath(other_matched_file.path), relpath(matched_file.path), relpath(destination)) moved_files_by_destination[destination] = matched_file # We first process all files to be moved and make sure that no operations # conflict with already existing files and directories or with each other. # There are 4 distinct cases that can arise that need to be checked # individually: # # 1. A file is to be moved to a path that already exists (as a file, a # directory, or something else). # 2. Two files are to be moved to the exact same path. # 3. A directory needs to be created but ... # 3a. ... one of the parents already exists but is not a directory. # 3b. ... one of the parents is the destination for a file to be moved. for i in repo.get_matched_files(file_set): intended_path = i.aggregated_file.index_entry.intended_path if intended_path is not None: absolute_intended_path = repo.root_dir / intended_path if i.path != absolute_intended_path: check_move_file(absolute_intended_path, i) # After generating all operations, check for conflicts between moved # files and created directories. for path, matched_file in moved_files_by_destination.items(): other_moved_file = moved_files_by_created_directories.get(path) if other_moved_file is not None: raise UserError( 'Cannot create parent directory for {}, {} will be moved to ' 'that path: {}', relpath(other_moved_file.path), relpath(matched_file.path), relpath(path)) # Iterating over these sorted will give us the parents before the children. for path in sorted(moved_files_by_created_directories): create_directory(path) # No problems detected. Create the directories and move the files. for destination, moved_file in moved_files_by_destination.items(): move_file(moved_file.path, destination)