Beispiel #1
0
    def test_new_name_with_empties(self):
        base_dir = self.get_temp_dir()
        self.assertRaises(AssertionError, new_name, '', is_dir=False)
        test_file = base_dir + '/'
        renamed = new_name(test_file, is_dir=False)
        assert renamed
        assert not exists(renamed)

        test_file = join(base_dir, '.')
        renamed = new_name(test_file, is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert '_' == result

        test_dir = base_dir + '/'

        renamed = new_name(test_dir, is_dir=True)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert result

        test_dir = join(base_dir, '.')
        renamed = new_name(test_dir, is_dir=True)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert '_' == result
    def test_new_name_with_extensions(self):
        test_dir = self.get_test_loc("new_name/ext", copy=True)
        renamed = new_name(join(test_dir, "test.txt"), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert "test_3.txt" == result

        renamed = new_name(join(test_dir, "TEST.txt"), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert "TEST_3.txt" == result

        renamed = new_name(join(test_dir, "TEST.tXt"), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert "TEST_3.tXt" == result

        renamed = new_name(join(test_dir, "test.txt"), is_dir=True)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert "test.txt_2" == result

        renamed = new_name(join(test_dir, "teST.txt"), is_dir=True)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert "teST.txt_2" == result
Beispiel #3
0
    def test_new_name_with_extensions(self):
        test_dir = self.get_test_loc('new_name/ext', copy=True)
        renamed = new_name(join(test_dir, 'test.txt'), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'test_3.txt' == result

        renamed = new_name(join(test_dir, 'TEST.txt'), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'TEST_3.txt' == result

        renamed = new_name(join(test_dir, 'TEST.tXt'), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'TEST_3.tXt' == result

        renamed = new_name(join(test_dir, 'test.txt'), is_dir=True)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'test.txt_2' == result

        renamed = new_name(join(test_dir, 'teST.txt'), is_dir=True)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'teST.txt_2' == result
    def test_new_name_with_empties(self):
        base_dir = self.get_temp_dir()
        self.assertRaises(AssertionError, new_name, '', is_dir=False)
        test_file = base_dir + '/'
        renamed = new_name(test_file, is_dir=False)
        assert renamed
        assert not exists(renamed)

        test_file = join(base_dir, '.')
        renamed = new_name(test_file, is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert '_' == result

        test_dir = base_dir + '/'

        renamed = new_name(test_dir, is_dir=True)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert result

        test_dir = join(base_dir, '.')
        renamed = new_name(test_dir, is_dir=True)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert '_' == result
    def test_new_name_with_extensions(self):
        test_dir = self.get_test_loc('new_name/ext', copy=True)
        renamed = new_name(join(test_dir, 'test.txt'), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'test_3.txt' == result

        renamed = new_name(join(test_dir, 'TEST.txt'), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'TEST_3.txt' == result

        renamed = new_name(join(test_dir, 'TEST.tXt'), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'TEST_3.tXt' == result

        renamed = new_name(join(test_dir, 'test.txt'), is_dir=True)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'test.txt_2' == result

        renamed = new_name(join(test_dir, 'teST.txt'), is_dir=True)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'teST.txt_2' == result
Beispiel #6
0
    def test_new_name_without_extensions(self):
        test_dir = self.get_test_loc('new_name/noext', copy=True)
        renamed = new_name(join(test_dir, 'test'), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'test_4' == result

        renamed = new_name(join(test_dir, 'TEST'), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'TEST_4' == result

        renamed = new_name(join(test_dir, 'test_1'), is_dir=True)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'test_1_1' == result
    def test_new_name_without_extensions(self):
        test_dir = self.get_test_loc("new_name/noext", copy=True)
        renamed = new_name(join(test_dir, "test"), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert "test_4" == result

        renamed = new_name(join(test_dir, "TEST"), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert "TEST_4" == result

        renamed = new_name(join(test_dir, "test_1"), is_dir=True)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert "test_1_1" == result
    def test_new_name_without_extensions(self):
        test_dir = self.get_test_loc('new_name/noext', copy=True)
        renamed = new_name(join(test_dir, 'test'), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'test_4' == result

        renamed = new_name(join(test_dir, 'TEST'), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'TEST_4' == result

        renamed = new_name(join(test_dir, 'test_1'), is_dir=True)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'test_1_1' == result
    def write(self, target_dir, transform_path=lambda x: x):
        """
        Write entry to a file or directory saved relatively to the `target_dir` and
        return the path where the file or directory was written or None if nothing
        was written to disk. `transform_path` is a callable taking a path and
        returning a transformed path such as resolving relative paths,
        transliterating non-portable characters or other path transformations.
        The default is a no-op lambda.
        """
        if not self.archive.archive_struct:
            raise ArchiveErrorIllegalOperationOnClosedArchive()
        # skip links and special files
        if not (self.isfile or self.isdir):
            return
        abs_target_dir = os.path.abspath(os.path.expanduser(target_dir))
        # TODO: return some warning when original path has been transformed
        clean_path = transform_path(self.path)

        if self.isdir:
            # TODO: also rename directories to a new name if needed segment by segment
            dir_path = os.path.join(abs_target_dir, clean_path)
            fileutils.create_dir(dir_path)
            return dir_path

        # note: here isfile=True
        try:
            # create parent directories if needed
            target_path = os.path.join(abs_target_dir, clean_path)
            parent_path = os.path.dirname(target_path)

            # TODO: also rename directories to a new name if needed segment by segment
            fileutils.create_dir(parent_path)

            # TODO: return some warning when original path has been renamed?
            unique_path = extractcode.new_name(target_path, is_dir=False)

            chunk_len = 10240
            sbuffer = create_string_buffer(chunk_len)
            with open(unique_path, 'wb') as target:
                chunk_size = 1
                while chunk_size:
                    chunk_size = read_entry_data(self.archive.archive_struct,
                                                 sbuffer, chunk_len)
                    data = sbuffer.raw[0:chunk_size]
                    target.write(data)
            os.utime(unique_path, (self.time, self.time))
            return target_path

        except ArchiveWarning, aw:
            msg = aw.args and '\n'.join(aw.args) or 'No message provided.'
            if msg not in self.warnings:
                self.warnings.append(msg)
            return target_path
    def write(self, target_dir, transform_path=lambda x: x):
        """
        Write entry to a file or directory saved relatively to the `target_dir` and
        return the path where the file or directory was written or None if nothing
        was written to disk. `transform_path` is a callable taking a path and
        returning a transformed path such as resolving relative paths,
        transliterating non-portable characters or other path transformations.
        The default is a no-op lambda.
        """
        if not self.archive.archive_struct:
            raise ArchiveErrorIllegalOperationOnClosedArchive()
        # skip links and special files
        if not (self.isfile or self.isdir):
            return
        abs_target_dir = os.path.abspath(os.path.expanduser(target_dir))
        # TODO: return some warning when original path has been transformed
        clean_path = transform_path(self.path)

        if self.isdir:
            # TODO: also rename directories to a new name if needed segment by segment
            dir_path = os.path.join(abs_target_dir, clean_path)
            fileutils.create_dir(dir_path)
            return dir_path

        # note: here isfile=True
        try:
            # create parent directories if needed
            target_path = os.path.join(abs_target_dir, clean_path)
            parent_path = os.path.dirname(target_path)

            # TODO: also rename directories to a new name if needed segment by segment
            fileutils.create_dir(parent_path)

            # TODO: return some warning when original path has been renamed?
            unique_path = extractcode.new_name(target_path, is_dir=False)

            chunk_len = 10240
            sbuffer = create_string_buffer(chunk_len)
            with open(unique_path, 'wb') as target:
                chunk_size = 1
                while chunk_size:
                    chunk_size = read_entry_data(self.archive.archive_struct,
                                                 sbuffer, chunk_len)
                    data = sbuffer.raw[0:chunk_size]
                    target.write(data)
            os.utime(unique_path, (self.time, self.time))
            return target_path

        except ArchiveWarning as aw:
            msg = aw.args and '\n'.join(aw.args) or 'No message provided.'
            if msg not in self.warnings:
                self.warnings.append(msg)
            return target_path
    def test_new_name_with_empties(self):
        test_dir = self.get_temp_dir()
        self.assertRaises(AssertionError, new_name, "", is_dir=False)

        renamed = new_name(join(test_dir, "/"), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert "file" == result

        renamed = new_name(join(test_dir, "."), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert "file" == result

        renamed = new_name(join(test_dir, "/"), is_dir=True)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert "file" == result

        renamed = new_name(join(test_dir, "."), is_dir=True)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert "file" == result
Beispiel #12
0
    def test_new_name_with_empties(self):
        test_dir = self.get_temp_dir()
        self.assertRaises(AssertionError, new_name, '', is_dir=False)

        renamed = new_name(join(test_dir, '/'), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'file' == result

        renamed = new_name(join(test_dir, '.'), is_dir=False)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'file' == result

        renamed = new_name(join(test_dir, '/'), is_dir=True)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'file' == result

        renamed = new_name(join(test_dir, '.'), is_dir=True)
        assert not exists(renamed)
        result = fileutils.file_name(renamed)
        assert 'file' == result
Beispiel #13
0
    def write(self, target_dir, transform_path=lambda x: x, skip_links=True):
        """
        Write entry to a file or directory saved relatively to the `target_dir` and
        return the path where the file or directory was written or None if nothing
        was written to disk. `transform_path` is a callable taking a path and
        returning a transformed path such as resolving relative paths,
        transliterating non-portable characters or other path transformations.
        The default is a no-op lambda.
        """
        if TRACE:
            logger.debug('writing entry: {}'.format(self))

        if not self.archive.archive_struct:
            raise ArchiveErrorIllegalOperationOnClosedArchive()
        # skip links and special files
        if not (self.isfile or self.isdir):
            return

        if skip_links and self.issym:
            return

        if skip_links and self.issym:
            return
        if not skip_links and self.issym:
            raise NotImplemented(
                'extraction of sym links with librarchive is not yet implemented.'
            )

        abs_target_dir = os.path.abspath(os.path.expanduser(target_dir))
        # TODO: return some warning when original path has been transformed
        clean_path = transform_path(self.path)

        if self.isdir:
            # TODO: also rename directories to a new name if needed segment by segment
            dir_path = os.path.join(abs_target_dir, clean_path)
            fileutils.create_dir(dir_path)
            return dir_path

        # note: here isfile=True
        # create parent directories if needed
        target_path = os.path.join(abs_target_dir, clean_path)
        parent_path = os.path.dirname(target_path)

        # TODO: also rename directories to a new name if needed segment by segment
        fileutils.create_dir(parent_path)

        # TODO: return some warning when original path has been renamed?
        unique_path = extractcode.new_name(target_path, is_dir=False)
        if TRACE:
            logger.debug('path: \ntarget_path: {}\nunique_path: {}'.format(
                target_path, unique_path))

        with open(unique_path, 'wb') as target:
            for content in self.get_content():
                if TRACE_DEEP:
                    logger.debug('    chunk: {}'.format(repr(content)))
                target.write(content)

        os.utime(unique_path, (self.time, self.time))

        return target_path
Beispiel #14
0
def extract_file_by_file(location,
                         target_dir,
                         arch_type='*',
                         skip_symlinks=True):
    """
    Extract all files using a one-by-one process from a 7zip-supported archive
    file at location in the `target_dir` directory.

    Return a list of warning messages if any or an empty list.
    Raise exception on errors.

    `arch_type` is the type of 7zip archive passed to the -t 7zip option.
    Can be None.
    """
    abs_location = os.path.abspath(os.path.expanduser(location))
    abs_target_dir = os.path.abspath(os.path.expanduser(target_dir))

    entries, errors_msgs = list_entries(location, arch_type)
    entries = list(entries)

    # Determine if we need a one-by-one approach: technically the aproach is to
    # check if we have files that are in the same dir and have the same name
    # when the case is ignored. We take a simpler approach: we check if all
    # paths are unique when we ignore the case: for that we only check that the
    # length of two paths sets are the same: one set as-is and the other
    # lowercased.

    paths_as_is = set(e.path for e in entries)
    paths_no_case = set(p.lower() for p in paths_as_is)
    need_by_file = len(paths_as_is) != len(paths_no_case)

    if not need_by_file:
        # use regular extract
        return extract_all_files_at_once(location=location,
                                         target_dir=target_dir,
                                         arch_type=arch_type)

    # now we are extracting one file at a time. this is a tad painful because we
    # are dealing with a full command execution at each time.

    errors = {}
    warnings = {}
    tmp_dir = fileutils.get_temp_dir(prefix='extractcode-extract-')
    for i, entry in enumerate(entries):

        if not entry.is_file:
            continue

        tmp_extract_dir = os.path.join(tmp_dir, str(i))
        fileutils.create_dir(tmp_extract_dir)

        ex_args = build_7z_extract_command(
            location=location,
            target_dir=tmp_extract_dir,
            single_entry=entry,
            arch_type=arch_type,
        )
        rc, stdout, stderr = command.execute2(**ex_args)

        error = get_7z_errors(stdout, stderr)
        if error or rc != 0:
            error = error or UNKNOWN_ERROR
            if TRACE:
                logger.debug(
                    'extract: failure: {rc}\n'
                    'stderr: {stderr}\nstdout: {stdout}'.format(**locals()))
            errors[entry.path] = error
            continue

        # these are all for a single file path
        warns = get_7z_warnings(stdout) or {}
        wmsg = '\n'.join(warns.values())
        if wmsg:
            if entry.path in warnings:
                warnings[entry.path] += '\n' + wmsg
            else:
                warnings[entry.path] = wmsg

        # finally move that extracted file to its target location, possibly renamed
        source_file_name = fileutils.file_name(entry.path)
        source_file_loc = os.path.join(tmp_extract_dir, source_file_name)
        if not os.path.exists(source_file_loc):
            if entry.path in errors:
                errors[entry.path] += '\nNo file name extracted.'
            else:
                errors[entry.path] = 'No file name extracted.'
            continue

        safe_path = paths.safe_path(entry.path, posix=True)
        target_file_loc = os.path.join(target_dir, safe_path)
        target_file_dir = os.path.dirname(target_file_loc)
        fileutils.create_dir(target_file_dir)

        unique_target_file_loc = extractcode.new_name(target_file_loc,
                                                      is_dir=False)

        if TRACE:
            logger.debug(
                'extract: unique_target_file_loc: from {} to {}'.format(
                    target_file_loc, unique_target_file_loc))

        if os.path.isfile(source_file_loc):
            fileutils.copyfile(source_file_loc, unique_target_file_loc)
        else:
            fileutils.copytree(source_file_loc, unique_target_file_loc)

    extractcode.remove_backslashes_and_dotdots(abs_target_dir)
    if errors:
        raise ExtractErrorFailedToExtract(errors)

    return convert_warnings_to_list(warnings)