def test_new_name_with_empties(self): base_dir = self.get_temp_dir() self.assertRaises(AssertionError, new_name, '', is_dir=False) test_file = base_dir + '/' renamed = new_name(test_file, is_dir=False) assert renamed assert not exists(renamed) test_file = join(base_dir, '.') renamed = new_name(test_file, is_dir=False) assert not exists(renamed) result = fileutils.file_name(renamed) assert '_' == result test_dir = base_dir + '/' renamed = new_name(test_dir, is_dir=True) assert not exists(renamed) result = fileutils.file_name(renamed) assert result test_dir = join(base_dir, '.') renamed = new_name(test_dir, is_dir=True) assert not exists(renamed) result = fileutils.file_name(renamed) assert '_' == result
def test_new_name_with_extensions(self): test_dir = self.get_test_loc("new_name/ext", copy=True) renamed = new_name(join(test_dir, "test.txt"), is_dir=False) assert not exists(renamed) result = fileutils.file_name(renamed) assert "test_3.txt" == result renamed = new_name(join(test_dir, "TEST.txt"), is_dir=False) assert not exists(renamed) result = fileutils.file_name(renamed) assert "TEST_3.txt" == result renamed = new_name(join(test_dir, "TEST.tXt"), is_dir=False) assert not exists(renamed) result = fileutils.file_name(renamed) assert "TEST_3.tXt" == result renamed = new_name(join(test_dir, "test.txt"), is_dir=True) assert not exists(renamed) result = fileutils.file_name(renamed) assert "test.txt_2" == result renamed = new_name(join(test_dir, "teST.txt"), is_dir=True) assert not exists(renamed) result = fileutils.file_name(renamed) assert "teST.txt_2" == result
def test_new_name_with_extensions(self): test_dir = self.get_test_loc('new_name/ext', copy=True) renamed = new_name(join(test_dir, 'test.txt'), is_dir=False) assert not exists(renamed) result = fileutils.file_name(renamed) assert 'test_3.txt' == result renamed = new_name(join(test_dir, 'TEST.txt'), is_dir=False) assert not exists(renamed) result = fileutils.file_name(renamed) assert 'TEST_3.txt' == result renamed = new_name(join(test_dir, 'TEST.tXt'), is_dir=False) assert not exists(renamed) result = fileutils.file_name(renamed) assert 'TEST_3.tXt' == result renamed = new_name(join(test_dir, 'test.txt'), is_dir=True) assert not exists(renamed) result = fileutils.file_name(renamed) assert 'test.txt_2' == result renamed = new_name(join(test_dir, 'teST.txt'), is_dir=True) assert not exists(renamed) result = fileutils.file_name(renamed) assert 'teST.txt_2' == result
def test_new_name_without_extensions(self): test_dir = self.get_test_loc('new_name/noext', copy=True) renamed = new_name(join(test_dir, 'test'), is_dir=False) assert not exists(renamed) result = fileutils.file_name(renamed) assert 'test_4' == result renamed = new_name(join(test_dir, 'TEST'), is_dir=False) assert not exists(renamed) result = fileutils.file_name(renamed) assert 'TEST_4' == result renamed = new_name(join(test_dir, 'test_1'), is_dir=True) assert not exists(renamed) result = fileutils.file_name(renamed) assert 'test_1_1' == result
def test_new_name_without_extensions(self): test_dir = self.get_test_loc("new_name/noext", copy=True) renamed = new_name(join(test_dir, "test"), is_dir=False) assert not exists(renamed) result = fileutils.file_name(renamed) assert "test_4" == result renamed = new_name(join(test_dir, "TEST"), is_dir=False) assert not exists(renamed) result = fileutils.file_name(renamed) assert "TEST_4" == result renamed = new_name(join(test_dir, "test_1"), is_dir=True) assert not exists(renamed) result = fileutils.file_name(renamed) assert "test_1_1" == result
def write(self, target_dir, transform_path=lambda x: x): """ Write entry to a file or directory saved relatively to the `target_dir` and return the path where the file or directory was written or None if nothing was written to disk. `transform_path` is a callable taking a path and returning a transformed path such as resolving relative paths, transliterating non-portable characters or other path transformations. The default is a no-op lambda. """ if not self.archive.archive_struct: raise ArchiveErrorIllegalOperationOnClosedArchive() # skip links and special files if not (self.isfile or self.isdir): return abs_target_dir = os.path.abspath(os.path.expanduser(target_dir)) # TODO: return some warning when original path has been transformed clean_path = transform_path(self.path) if self.isdir: # TODO: also rename directories to a new name if needed segment by segment dir_path = os.path.join(abs_target_dir, clean_path) fileutils.create_dir(dir_path) return dir_path # note: here isfile=True try: # create parent directories if needed target_path = os.path.join(abs_target_dir, clean_path) parent_path = os.path.dirname(target_path) # TODO: also rename directories to a new name if needed segment by segment fileutils.create_dir(parent_path) # TODO: return some warning when original path has been renamed? unique_path = extractcode.new_name(target_path, is_dir=False) chunk_len = 10240 sbuffer = create_string_buffer(chunk_len) with open(unique_path, 'wb') as target: chunk_size = 1 while chunk_size: chunk_size = read_entry_data(self.archive.archive_struct, sbuffer, chunk_len) data = sbuffer.raw[0:chunk_size] target.write(data) os.utime(unique_path, (self.time, self.time)) return target_path except ArchiveWarning, aw: msg = aw.args and '\n'.join(aw.args) or 'No message provided.' if msg not in self.warnings: self.warnings.append(msg) return target_path
def write(self, target_dir, transform_path=lambda x: x): """ Write entry to a file or directory saved relatively to the `target_dir` and return the path where the file or directory was written or None if nothing was written to disk. `transform_path` is a callable taking a path and returning a transformed path such as resolving relative paths, transliterating non-portable characters or other path transformations. The default is a no-op lambda. """ if not self.archive.archive_struct: raise ArchiveErrorIllegalOperationOnClosedArchive() # skip links and special files if not (self.isfile or self.isdir): return abs_target_dir = os.path.abspath(os.path.expanduser(target_dir)) # TODO: return some warning when original path has been transformed clean_path = transform_path(self.path) if self.isdir: # TODO: also rename directories to a new name if needed segment by segment dir_path = os.path.join(abs_target_dir, clean_path) fileutils.create_dir(dir_path) return dir_path # note: here isfile=True try: # create parent directories if needed target_path = os.path.join(abs_target_dir, clean_path) parent_path = os.path.dirname(target_path) # TODO: also rename directories to a new name if needed segment by segment fileutils.create_dir(parent_path) # TODO: return some warning when original path has been renamed? unique_path = extractcode.new_name(target_path, is_dir=False) chunk_len = 10240 sbuffer = create_string_buffer(chunk_len) with open(unique_path, 'wb') as target: chunk_size = 1 while chunk_size: chunk_size = read_entry_data(self.archive.archive_struct, sbuffer, chunk_len) data = sbuffer.raw[0:chunk_size] target.write(data) os.utime(unique_path, (self.time, self.time)) return target_path except ArchiveWarning as aw: msg = aw.args and '\n'.join(aw.args) or 'No message provided.' if msg not in self.warnings: self.warnings.append(msg) return target_path
def test_new_name_with_empties(self): test_dir = self.get_temp_dir() self.assertRaises(AssertionError, new_name, "", is_dir=False) renamed = new_name(join(test_dir, "/"), is_dir=False) assert not exists(renamed) result = fileutils.file_name(renamed) assert "file" == result renamed = new_name(join(test_dir, "."), is_dir=False) assert not exists(renamed) result = fileutils.file_name(renamed) assert "file" == result renamed = new_name(join(test_dir, "/"), is_dir=True) assert not exists(renamed) result = fileutils.file_name(renamed) assert "file" == result renamed = new_name(join(test_dir, "."), is_dir=True) assert not exists(renamed) result = fileutils.file_name(renamed) assert "file" == result
def test_new_name_with_empties(self): test_dir = self.get_temp_dir() self.assertRaises(AssertionError, new_name, '', is_dir=False) renamed = new_name(join(test_dir, '/'), is_dir=False) assert not exists(renamed) result = fileutils.file_name(renamed) assert 'file' == result renamed = new_name(join(test_dir, '.'), is_dir=False) assert not exists(renamed) result = fileutils.file_name(renamed) assert 'file' == result renamed = new_name(join(test_dir, '/'), is_dir=True) assert not exists(renamed) result = fileutils.file_name(renamed) assert 'file' == result renamed = new_name(join(test_dir, '.'), is_dir=True) assert not exists(renamed) result = fileutils.file_name(renamed) assert 'file' == result
def write(self, target_dir, transform_path=lambda x: x, skip_links=True): """ Write entry to a file or directory saved relatively to the `target_dir` and return the path where the file or directory was written or None if nothing was written to disk. `transform_path` is a callable taking a path and returning a transformed path such as resolving relative paths, transliterating non-portable characters or other path transformations. The default is a no-op lambda. """ if TRACE: logger.debug('writing entry: {}'.format(self)) if not self.archive.archive_struct: raise ArchiveErrorIllegalOperationOnClosedArchive() # skip links and special files if not (self.isfile or self.isdir): return if skip_links and self.issym: return if skip_links and self.issym: return if not skip_links and self.issym: raise NotImplemented( 'extraction of sym links with librarchive is not yet implemented.' ) abs_target_dir = os.path.abspath(os.path.expanduser(target_dir)) # TODO: return some warning when original path has been transformed clean_path = transform_path(self.path) if self.isdir: # TODO: also rename directories to a new name if needed segment by segment dir_path = os.path.join(abs_target_dir, clean_path) fileutils.create_dir(dir_path) return dir_path # note: here isfile=True # create parent directories if needed target_path = os.path.join(abs_target_dir, clean_path) parent_path = os.path.dirname(target_path) # TODO: also rename directories to a new name if needed segment by segment fileutils.create_dir(parent_path) # TODO: return some warning when original path has been renamed? unique_path = extractcode.new_name(target_path, is_dir=False) if TRACE: logger.debug('path: \ntarget_path: {}\nunique_path: {}'.format( target_path, unique_path)) with open(unique_path, 'wb') as target: for content in self.get_content(): if TRACE_DEEP: logger.debug(' chunk: {}'.format(repr(content))) target.write(content) os.utime(unique_path, (self.time, self.time)) return target_path
def extract_file_by_file(location, target_dir, arch_type='*', skip_symlinks=True): """ Extract all files using a one-by-one process from a 7zip-supported archive file at location in the `target_dir` directory. Return a list of warning messages if any or an empty list. Raise exception on errors. `arch_type` is the type of 7zip archive passed to the -t 7zip option. Can be None. """ abs_location = os.path.abspath(os.path.expanduser(location)) abs_target_dir = os.path.abspath(os.path.expanduser(target_dir)) entries, errors_msgs = list_entries(location, arch_type) entries = list(entries) # Determine if we need a one-by-one approach: technically the aproach is to # check if we have files that are in the same dir and have the same name # when the case is ignored. We take a simpler approach: we check if all # paths are unique when we ignore the case: for that we only check that the # length of two paths sets are the same: one set as-is and the other # lowercased. paths_as_is = set(e.path for e in entries) paths_no_case = set(p.lower() for p in paths_as_is) need_by_file = len(paths_as_is) != len(paths_no_case) if not need_by_file: # use regular extract return extract_all_files_at_once(location=location, target_dir=target_dir, arch_type=arch_type) # now we are extracting one file at a time. this is a tad painful because we # are dealing with a full command execution at each time. errors = {} warnings = {} tmp_dir = fileutils.get_temp_dir(prefix='extractcode-extract-') for i, entry in enumerate(entries): if not entry.is_file: continue tmp_extract_dir = os.path.join(tmp_dir, str(i)) fileutils.create_dir(tmp_extract_dir) ex_args = build_7z_extract_command( location=location, target_dir=tmp_extract_dir, single_entry=entry, arch_type=arch_type, ) rc, stdout, stderr = command.execute2(**ex_args) error = get_7z_errors(stdout, stderr) if error or rc != 0: error = error or UNKNOWN_ERROR if TRACE: logger.debug( 'extract: failure: {rc}\n' 'stderr: {stderr}\nstdout: {stdout}'.format(**locals())) errors[entry.path] = error continue # these are all for a single file path warns = get_7z_warnings(stdout) or {} wmsg = '\n'.join(warns.values()) if wmsg: if entry.path in warnings: warnings[entry.path] += '\n' + wmsg else: warnings[entry.path] = wmsg # finally move that extracted file to its target location, possibly renamed source_file_name = fileutils.file_name(entry.path) source_file_loc = os.path.join(tmp_extract_dir, source_file_name) if not os.path.exists(source_file_loc): if entry.path in errors: errors[entry.path] += '\nNo file name extracted.' else: errors[entry.path] = 'No file name extracted.' continue safe_path = paths.safe_path(entry.path, posix=True) target_file_loc = os.path.join(target_dir, safe_path) target_file_dir = os.path.dirname(target_file_loc) fileutils.create_dir(target_file_dir) unique_target_file_loc = extractcode.new_name(target_file_loc, is_dir=False) if TRACE: logger.debug( 'extract: unique_target_file_loc: from {} to {}'.format( target_file_loc, unique_target_file_loc)) if os.path.isfile(source_file_loc): fileutils.copyfile(source_file_loc, unique_target_file_loc) else: fileutils.copytree(source_file_loc, unique_target_file_loc) extractcode.remove_backslashes_and_dotdots(abs_target_dir) if errors: raise ExtractErrorFailedToExtract(errors) return convert_warnings_to_list(warnings)