def create(input, # list of files or named ([['name', 'data...'], ...]) or anonymous ([[data...], ...]) byte strings.
           input_disposition='named_byte_strings', # ['file_locations', 'anonymous_byte_strings', 'named_byte_strings']

           compression='tar.gz', # ['gz', 'bz2', 'tar', 'tar.gz', 'tar.bz2', 'zip']

           compress_to = 'byte_string', # ['byte_string', 'my_file', 'temp_file']
           my_file=None, # name of output archive, if compress_to='my_file'
           recurse_dirs=True, # [True, False]

           directory_structure='retain', # ['retain', 'flatten']
           use_compression_root='calculate_minimum', # ['calculate_minimum', 'this_root']
           this_root=None, # root path for compression of files.

           filename_collision='rename_similar', # ['throw_error', 'rename_similar', 'rename_random', 'overwrite', 'skip']
           rename_from_set='abcdefghijklmnopqrstuvwxyz', # characters to use if filename_collision='rename_random'
           num_random_bits=8, # number of random bits to use in the random filename.

           force_file_permissions=None, # file permission bits. eg 0777.
           force_dir_permissions=None, # file permission bits. eg 0777.

           file_handle = None, # [None, 'py', 'os']
           file_handle_mode = 'rb',

           allow_clobber=False, # [True, False]
           ):

    # Basic idea: If we are told to output an archive (tar or zip)
    # then all files given in input are put into a single archive.  If
    # we are told to output compressed files (gz, bz2) then we must be
    # given a maximum of one archive file.

    # If we are given anonymous byte strings with no filename, we use
    # filename_generator.generate_filename() to provide a random
    # filename with hopefully a correct file extension.

    # Clean out written files list:
    global _remove_on_error
    global _remove_always
    _remove_on_error = []
    _remove_always = []

    # Validate arguments.
    # ??????????????????

    # From here on, we start writing things out to disk, so we wrap it
    # in a try loop and catch all exceptions. This allows us to clean
    # up the disk if we didn't succeed with the whole of the
    # extraction.

    try:
        # try/except/finally cannot be combined, so we have to nest:
        try:
            # Write input to a temp file if we are given a byte string.

            # Work out where the output archive file is going to go:
            if compress_to == 'my_file':
                if my_file is None:
                    raise ValueError("if compress_to == 'my_file' then argument my_file must be specified. got None.")

                # Make given file into a nice sane one:
                archive_fullname = os.path.abspath(os.path.expanduser(os.path.normpath(my_file)))

                # Should we remember this file or not?  If we get an error in
                # the middle of processing, should we delete a user specified
                # archive file? The decision is not so clear cut as with
                # temporary files (see next). My choice is not to remember
                # (and so not to delete on error)

            else:
                # compress_to in ['temp_file', 'byte_string']
                (tf, tf_name) = _open_tempfile(mode='wb')

                # close filehandle because we don't need it:
                tf.close()

                # delete the empty tempfile that open_tempfile
                # created, so we don't get ClobberError
                os.unlink(tf_name)
                del tf

                if compress_to == 'temp_file':
                    _remember_write(tf_name, error_only=True)
                else:
                    # compress_to == 'byte_string'
                    _remember_write(tf_name, error_only=False)

                archive_fullname = tf_name

            # Get an archive/compress tool:
            tool_class = available_tools[compression]
            archive = tool_class(file_loc=archive_fullname, mode='w', allow_clobber=allow_clobber)

            # Deal with the input:
            # We do this as follows:

            # 1. Take anonymous byte strings and turn them into byte strings
            # by generating a filename for each string, then set
            # input=[new list of named byte strings]
            # input_disposition='named_byte_strings'

            # 2. Take named byte strings and write them to a temporary
            # directory, chdir to this directory and set:
            # input = [glob of temp dir]
            # input_diposition = 'file_locations'

            if input_disposition == 'anonymous_byte_strings':
                # If input is anonymous byte strings, we need generate a filename
                # for each of the strings:
                seen_rand_names = []

                def f(bytstr):
                    rand_name = _random_alphanum_string(num_random_bits, chars=rename_from_set)
                    tries = 1
                    while rand_name in seen_rand_names:
                        rand_name = _random_alphanum_string(num_random_bits, chars=rename_from_set)
                        tries += 1
                        if tries > 20:
                            raise EZArchiveError('20 random filename selections collided: perhaps you need to increase num_rand_bits?')
                    seen_rand_names.append(rand_name)
                    return [rand_name, bytstr]

                input = map(f, input)
                input_disposition = 'named_byte_strings'

            if input_disposition == 'named_byte_strings':
                # Write the byte strings out to the temporary directory.
                temp_dir = tempfile.mkdtemp()
                _remember_write(temp_dir, error_only=False)

                if this_root is not None:
                    # santize:
                    this_root = os.path.abspath(os.path.expanduser(os.path.normpath(this_root)))
                    # chop off the root slashes:
                    this_root = re.sub(r'^/+', '', string=this_root, count=1)
                    # rejig the root dir to reflect the fact we've shoved
                    # everything under a psuedo-root temp directory:
                    this_root = os.path.join(temp_dir, this_root)

                new_input = []
                seen_filenames = {}

                for filename, bytestr in input:
                    # Sanitize the filename we've been given:
                    filename = os.path.abspath(os.path.expanduser(os.path.normpath(filename)))
                    # chop off the root slashes:
                    filename = re.sub(r'^/+', '', string=filename, count=1)

                    dirname = os.path.dirname(filename)

                    # Use temp_dir as a 'fake_root': (There is some possible
                    # dodginess here if the user names one of the files as if
                    # it were inside the not yet existant temp directory:
                    # unlikely scenario; should we work around it? I haven't.
                    _mkdir_parents(os.path.join(temp_dir, dirname))

                    filename = _verify_filename(name=filename, seen_filenames=seen_filenames,
                                                filename_collision=filename_collision, num_random_bits=num_random_bits,
                                                rename_from_set=rename_from_set)
                    if filename == ['skip']: continue

                    tempfile_fullname = os.path.join(temp_dir, filename)

                    open(tempfile_fullname, 'wb').write(bytestr)
                    new_input.append(tempfile_fullname)

                input = new_input
                input_disposition='file_locations'

            # At this point, input_disposition='file_locations' and input contains a list of filenames.

            # sanitize the list of filenames
            f = lambda x: os.path.abspath(os.path.expanduser(os.path.normpath(x)))
            input = map(f, input)

            # Expand any directories into filenames (excluding symlinks):
            new_input = []
            for item in input:
                if os.path.isdir(item):
                    new_input.append(item)
                    if recurse_dirs:
                        new_input.extend(_recursive_dir_contents(item))
                else:
                    new_input.append(item)
            input = new_input

            # calculate the compression root:
            if use_compression_root == 'calculate_minimum':
                first_input = input[0]
                if input == filter(lambda x: x == first_input, input):
                    # all of the filenames we've been given are the same:
                    compression_root = os.path.dirname(first_input)
                    files_to_compress = [os.path.basename(first_input)] * len(input)
                else:
                    # find out the common root of the filenames:
                    (compression_root, files_to_compress) = _split_common_path(input)
                    # if compression_root was also specified in input, it will
                    # have become a blank entry '' in files_to_compress:
                    files_to_compress = filter(lambda x: (x != '' and True) or False, files_to_compress)
            else:
                # use_compression_root == 'this_root':
                if this_root is None:
                    raise EZArchiveError("if compression_root=='this_root' then argument this_root must be specified")

                this_root = os.path.abspath(os.path.expanduser(os.path.normpath(this_root)))

                # check that this_root is indeed a prefix of all of the input
                # files we've been given:
                if input != filter(lambda file: this_root in _dirtree(file), input):
                    raise EZArchiveError('not all files specified in argument input are children of argument this_root')
                # get rid of the entries that are exactly this_root:
                input = filter(lambda file: file != this_root, input)

                compression_root = this_root

                # Chop off this_root from input:
                if this_root == '/' or this_root == '//':
                    this_root_len = len(this_root)
                else:
                    this_root_len = len(this_root + '/')
                files_to_compress = map(lambda file: file[this_root_len:], input)

            old_cwd = os.getcwd()
            os.chdir(compression_root)

            seen_filenames = {}
            for file_to_compress in files_to_compress:

                if directory_structure == 'flatten':
                    if os.path.isdir(file_to_compress):
                        continue

                    archive_name = os.path.basename(file_to_compress)

                    archive_name = _verify_filename(name=archive_name, seen_filenames=seen_filenames,
                                                    filename_collision=filename_collision,
                                                    num_random_bits=num_random_bits,
                                                    rename_from_set=rename_from_set)
                    if archive_name == ['skip']: continue

                    archive.add_member(file_loc=file_to_compress, archive_name=archive_name,
                                       force_file_permissions=force_file_permissions,
                                       force_dir_permissions=force_dir_permissions)

                else:
                    # directory_structure == 'retain':
                    archive.add_member(file_loc=file_to_compress, archive_name=None,
                                       force_file_permissions=force_file_permissions,
                                       force_dir_permissions=force_dir_permissions)

            # get rid of the archive object, which has an open
            # filehandle, mode 'wb' on the archive file:
            # not closing this would prevent us from seeing what
            # has been written to the files.
            del archive

            # now see if we need to return anything:
            if compress_to == 'my_file':
                return None
            elif compress_to == 'temp_file':
                return tf_name
            else:
                # compress_to == 'byte_string':
                return open(archive_fullname, 'rb').read()
        except:
            # Clean up non-temporary file if we get an error:
            _delete_files(_remove_on_error)
            raise
    finally:
        # Always clean up temporary files, error or not:
        _delete_files(_remove_always)
        try:
            os.chdir(old_cwd)
        except:
            pass
Ejemplo n.º 2
0
def create(input, # list of files or named ([['name', 'data...'], ...]) or anonymous ([[data...], ...]) byte strings.
           input_disposition='named_byte_strings', # ['file_locations', 'anonymous_byte_strings', 'named_byte_strings']

           compression='tar.gz', # ['gz', 'bz2', 'tar', 'tar.gz', 'tar.bz2', 'zip']

           compress_to = 'byte_string', # ['byte_string', 'my_file', 'temp_file']
           my_file=None, # name of output archive, if compress_to='my_file'
           recurse_dirs=True, # [True, False]

           directory_structure='retain', # ['retain', 'flatten']
           use_compression_root='calculate_minimum', # ['calculate_minimum', 'this_root']
           this_root=None, # root path for compression of files.

           filename_collision='rename_similar', # ['throw_error', 'rename_similar', 'rename_random', 'overwrite', 'skip']
           rename_from_set='abcdefghijklmnopqrstuvwxyz', # characters to use if filename_collision='rename_random'
           num_random_bits=8, # number of random bits to use in the random filename.

           force_file_permissions=None, # file permission bits. eg 0777.
           force_dir_permissions=None, # file permission bits. eg 0777.

           file_handle = None, # [None, 'py', 'os']
           file_handle_mode = 'rb',

           allow_clobber=False, # [True, False]
           ):

    # Basic idea: If we are told to output an archive (tar or zip)
    # then all files given in input are put into a single archive.  If
    # we are told to output compressed files (gz, bz2) then we must be
    # given a maximum of one archive file.

    # If we are given anonymous byte strings with no filename, we use
    # filename_generator.generate_filename() to provide a random
    # filename with hopefully a correct file extension.

    # Clean out written files list:
    global _remove_on_error
    global _remove_always
    _remove_on_error = []
    _remove_always = []

    # Validate arguments.
    # ??????????????????

    # From here on, we start writing things out to disk, so we wrap it
    # in a try loop and catch all exceptions. This allows us to clean
    # up the disk if we didn't succeed with the whole of the
    # extraction.

    try:
        # try/except/finally cannot be combined, so we have to nest:
        try:
            # Write input to a temp file if we are given a byte string.

            # Work out where the output archive file is going to go:
            if compress_to == 'my_file':
                if my_file is None:
                    raise ValueError("if compress_to == 'my_file' then argument my_file must be specified. got None.")

                # Make given file into a nice sane one:
                archive_fullname = os.path.abspath(os.path.expanduser(os.path.normpath(my_file)))

                # Should we remember this file or not?  If we get an error in
                # the middle of processing, should we delete a user specified
                # archive file? The decision is not so clear cut as with
                # temporary files (see next). My choice is not to remember
                # (and so not to delete on error)

            else:
                # compress_to in ['temp_file', 'byte_string']
                (tf, tf_name) = _open_tempfile(mode='wb')

                # close filehandle because we don't need it:
                tf.close()

                # delete the empty tempfile that open_tempfile
                # created, so we don't get ClobberError
                os.unlink(tf_name)
                del tf

                if compress_to == 'temp_file':
                    _remember_write(tf_name, error_only=True)
                else:
                    # compress_to == 'byte_string'
                    _remember_write(tf_name, error_only=False)

                archive_fullname = tf_name

            # Get an archive/compress tool:
            tool_class = available_tools[compression]
            archive = tool_class(file_loc=archive_fullname, mode='w', allow_clobber=allow_clobber)

            # Deal with the input:
            # We do this as follows:

            # 1. Take anonymous byte strings and turn them into byte strings
            # by generating a filename for each string, then set
            # input=[new list of named byte strings]
            # input_disposition='named_byte_strings'

            # 2. Take named byte strings and write them to a temporary
            # directory, chdir to this directory and set:
            # input = [glob of temp dir]
            # input_diposition = 'file_locations'

            if input_disposition == 'anonymous_byte_strings':
                # If input is anonymous byte strings, we need generate a filename
                # for each of the strings:
                seen_rand_names = []

                def f(bytstr):
                    rand_name = _random_alphanum_string(num_random_bits, chars=rename_from_set)
                    tries = 1
                    while rand_name in seen_rand_names:
                        rand_name = _random_alphanum_string(num_random_bits, chars=rename_from_set)
                        tries += 1
                        if tries > 20:
                            raise EZArchiveError('20 random filename selections collided: perhaps you need to increase num_rand_bits?')
                    seen_rand_names.append(rand_name)
                    return [rand_name, bytstr]

                input = map(f, input)
                input_disposition = 'named_byte_strings'

            if input_disposition == 'named_byte_strings':
                # Write the byte strings out to the temporary directory.
                temp_dir = tempfile.mkdtemp()
                _remember_write(temp_dir, error_only=False)

                if this_root is not None:
                    # santize:
                    this_root = os.path.abspath(os.path.expanduser(os.path.normpath(this_root)))
                    # chop off the root slashes:
                    this_root = re.sub(r'^/+', '', string=this_root, count=1)
                    # rejig the root dir to reflect the fact we've shoved
                    # everything under a psuedo-root temp directory:
                    this_root = os.path.join(temp_dir, this_root)

                new_input = []
                seen_filenames = {}

                for filename, bytestr in input:
                    # Sanitize the filename we've been given:
                    filename = os.path.abspath(os.path.expanduser(os.path.normpath(filename)))
                    # chop off the root slashes:
                    filename = re.sub(r'^/+', '', string=filename, count=1)

                    dirname = os.path.dirname(filename)

                    # Use temp_dir as a 'fake_root': (There is some possible
                    # dodginess here if the user names one of the files as if
                    # it were inside the not yet existant temp directory:
                    # unlikely scenario; should we work around it? I haven't.
                    _mkdir_parents(os.path.join(temp_dir, dirname))

                    filename = _verify_filename(name=filename, seen_filenames=seen_filenames,
                                                filename_collision=filename_collision, num_random_bits=num_random_bits,
                                                rename_from_set=rename_from_set)
                    if filename == ['skip']: continue

                    tempfile_fullname = os.path.join(temp_dir, filename)

                    open(tempfile_fullname, 'wb').write(bytestr)
                    new_input.append(tempfile_fullname)

                input = new_input
                input_disposition='file_locations'

            # At this point, input_disposition='file_locations' and input contains a list of filenames.

            # sanitize the list of filenames
            f = lambda x: os.path.abspath(os.path.expanduser(os.path.normpath(x)))
            input = map(f, input)

            # Expand any directories into filenames (excluding symlinks):
            new_input = []
            for item in input:
                if os.path.isdir(item):
                    new_input.append(item)
                    if recurse_dirs:
                        new_input.extend(_recursive_dir_contents(item))
                else:
                    new_input.append(item)
            input = new_input

            # calculate the compression root:
            if use_compression_root == 'calculate_minimum':
                first_input = input[0]
                if input == filter(lambda x: x == first_input, input):
                    # all of the filenames we've been given are the same:
                    compression_root = os.path.dirname(first_input)
                    files_to_compress = [os.path.basename(first_input)] * len(input)
                else:
                    # find out the common root of the filenames:
                    (compression_root, files_to_compress) = _split_common_path(input)
                    # if compression_root was also specified in input, it will
                    # have become a blank entry '' in files_to_compress:
                    files_to_compress = filter(lambda x: (x != '' and True) or False, files_to_compress)
            else:
                # use_compression_root == 'this_root':
                if this_root is None:
                    raise EZArchiveError("if compression_root=='this_root' then argument this_root must be specified")

                this_root = os.path.abspath(os.path.expanduser(os.path.normpath(this_root)))

                # check that this_root is indeed a prefix of all of the input
                # files we've been given:
                if input != filter(lambda file: this_root in _dirtree(file), input):
                    raise EZArchiveError('not all files specified in argument input are children of argument this_root')
                # get rid of the entries that are exactly this_root:
                input = filter(lambda file: file != this_root, input)

                compression_root = this_root

                # Chop off this_root from input:
                if this_root == '/' or this_root == '//':
                    this_root_len = len(this_root)
                else:
                    this_root_len = len(this_root + '/')
                files_to_compress = map(lambda file: file[this_root_len:], input)

            old_cwd = os.getcwd()
            os.chdir(compression_root)

            seen_filenames = {}
            for file_to_compress in files_to_compress:

                if directory_structure == 'flatten':
                    if os.path.isdir(file_to_compress):
                        continue

                    archive_name = os.path.basename(file_to_compress)

                    archive_name = _verify_filename(name=archive_name, seen_filenames=seen_filenames,
                                                    filename_collision=filename_collision,
                                                    num_random_bits=num_random_bits,
                                                    rename_from_set=rename_from_set)
                    if archive_name == ['skip']: continue

                    archive.add_member(file_loc=file_to_compress, archive_name=archive_name,
                                       force_file_permissions=force_file_permissions,
                                       force_dir_permissions=force_dir_permissions)

                else:
                    # directory_structure == 'retain':
                    archive.add_member(file_loc=file_to_compress, archive_name=None,
                                       force_file_permissions=force_file_permissions,
                                       force_dir_permissions=force_dir_permissions)

            # get rid of the archive object, which has an open
            # filehandle, mode 'wb' on the archive file:
            # not closing this would prevent us from seeing what
            # has been written to the files.
            del archive

            # now see if we need to return anything:
            if compress_to == 'my_file':
                return None
            elif compress_to == 'temp_file':
                return tf_name
            else:
                # compress_to == 'byte_string':
                return open(archive_fullname, 'rb').read()
        except:
            # Clean up non-temporary file if we get an error:
            _delete_files(_remove_on_error)
            raise
    finally:
        # Always clean up temporary files, error or not:
        _delete_files(_remove_always)
        try:
            os.chdir(old_cwd)
        except:
            pass
def calculate_filename_ext_libmagic(filename=None, file=None):

    # See comments in magic/magic.ext for details of the format
    # of the data file. All file extensions if recognized by a magic
    # test will be returned in the form "file_ext:{xyz}"; this lets us
    # detect the "file_ext:{}" marker and know we have a successful
    # guess at the correct extension. The reason we need this marker
    # is that libmagic has many tests whose return value is not
    # governed through the magic data file and so we need some way of
    # being sure a file extension has been returned. eg:

    # >>> magician.file('/etc/init.d')
    # "symbolic link to `rc.d/init.d'"

    if filename is None and file is None: raise ValueError('at least one of file or content_type must be specified')
    if not _got_magic: raise ImportError('magic module did not import successfully')

    magician = magic.open(magic.MAGIC_NONE)

    ret_load = magician.load()

    # Throw private error if the magic data file is corrupt, or
    # doesn't exist.

    if ret_load != 0: raise _MagicDataError()

    if filename is None:
        # then we have only been given file as binary string.

        # Get a temporary file and write file variable out to it
        # because the magic module expects to be handed the name of a
        # real file.

        tf, tf_name = _open_tempfile(mode='wb')
        tf.write(file)
        tf.close()

        delete_file = True
    else:
        os.stat(filename) # Make sure we can stat the file.
        tf_name = filename
        delete_file = False

    ext_info = magician.file(tf_name)

    # Now process ext_info to see if we can find a file extension
    # contained in it.

    file_ext_re = re.compile(r'file_ext:{(.+?)}')
    file_ext_match = file_ext_re.search(ext_info)

    if file_ext_match:
        name_ext = file_ext_match.group(1)

        # See if we have a compressed file type we can deal
        # with. If so, uncompress it and call ourself to get more
        # info:

        # Note that we could use the magic.MAGIC_COMPRESS flag to
        # get libmagic to do the decompression for us but:
        # 1. It only supports gzip
        # 2. The implementation has a nasty bug which has only
        #    been fixed in very recent releases of libmagic.

        if name_ext == 'gz':

            try:
                # Decompress the stream:
                decomp_file = gzip.open(tf_name).read()
            except zlib.error:
                # Couldn't decompress sucessfully, so just stick
                # with extension we have.
                pass
            else:
                # Guess an extension of the decompressed stream and
                # tack current '.gz' on the end:
                name_ext = calculate_filename_ext_libmagic(file=decomp_file)  + '.' + name_ext

        elif name_ext == 'bz2':

            try:
                # Decompress the file:
                if not _got_bz2:
                    raise ImportError('Failed to import bz2 module.')
                decomp_file = bz2.BZ2File(tf_name).read()
            except IOError:
                # Couldn't decompress sucessfully, so just stick
                # with extension we have.
                pass
            else:
                # Guess an extension of the decompressed stream and
                # tack current '.bz2' on the end:
                name_ext = calculate_filename_ext_libmagic(file=decomp_file)  + '.' + name_ext

    # Otherwise, look for special results from libmagic's
    # 'internal tests' that we recognize:

    elif ext_info.lower().rfind('tar archive') != -1:
        name_ext = 'tar'

    elif ext_info.lower().rfind('text') != -1:
        name_ext = 'txt'

    # Can't guess a filetype so use generic extension .dat

    else:
        name_ext = 'dat'

    # Identification done so get rid of the temp file, assuming we created the file:
    if delete_file: _remove_tempfile(tf_name)

    return name_ext
def calculate_filename_ext_libmagic(filename=None, file=None):

    # See comments in magic/magic.ext for details of the format
    # of the data file. All file extensions if recognized by a magic
    # test will be returned in the form "file_ext:{xyz}"; this lets us
    # detect the "file_ext:{}" marker and know we have a successful
    # guess at the correct extension. The reason we need this marker
    # is that libmagic has many tests whose return value is not
    # governed through the magic data file and so we need some way of
    # being sure a file extension has been returned. eg:

    # >>> magician.file('/etc/init.d')
    # "symbolic link to `rc.d/init.d'"

    if filename is None and file is None:
        raise ValueError(
            'at least one of file or content_type must be specified')
    if not _got_magic:
        raise ImportError('magic module did not import successfully')

    magician = magic.open(magic.MAGIC_NONE)

    ret_load = magician.load()

    # Throw private error if the magic data file is corrupt, or
    # doesn't exist.

    if ret_load != 0: raise _MagicDataError()

    if filename is None:
        # then we have only been given file as binary string.

        # Get a temporary file and write file variable out to it
        # because the magic module expects to be handed the name of a
        # real file.

        tf, tf_name = _open_tempfile(mode='wb')
        tf.write(file)
        tf.close()

        delete_file = True
    else:
        os.stat(filename)  # Make sure we can stat the file.
        tf_name = filename
        delete_file = False

    ext_info = magician.file(tf_name)

    # Now process ext_info to see if we can find a file extension
    # contained in it.

    file_ext_re = re.compile(r'file_ext:{(.+?)}')
    file_ext_match = file_ext_re.search(ext_info)

    if file_ext_match:
        name_ext = file_ext_match.group(1)

        # See if we have a compressed file type we can deal
        # with. If so, uncompress it and call ourself to get more
        # info:

        # Note that we could use the magic.MAGIC_COMPRESS flag to
        # get libmagic to do the decompression for us but:
        # 1. It only supports gzip
        # 2. The implementation has a nasty bug which has only
        #    been fixed in very recent releases of libmagic.

        if name_ext == 'gz':

            try:
                # Decompress the stream:
                decomp_file = gzip.open(tf_name).read()
            except zlib.error:
                # Couldn't decompress sucessfully, so just stick
                # with extension we have.
                pass
            else:
                # Guess an extension of the decompressed stream and
                # tack current '.gz' on the end:
                name_ext = calculate_filename_ext_libmagic(
                    file=decomp_file) + '.' + name_ext

        elif name_ext == 'bz2':

            try:
                # Decompress the file:
                if not _got_bz2:
                    raise ImportError('Failed to import bz2 module.')
                decomp_file = bz2.BZ2File(tf_name).read()
            except IOError:
                # Couldn't decompress sucessfully, so just stick
                # with extension we have.
                pass
            else:
                # Guess an extension of the decompressed stream and
                # tack current '.bz2' on the end:
                name_ext = calculate_filename_ext_libmagic(
                    file=decomp_file) + '.' + name_ext

    # Otherwise, look for special results from libmagic's
    # 'internal tests' that we recognize:

    elif ext_info.lower().rfind('tar archive') != -1:
        name_ext = 'tar'

    elif ext_info.lower().rfind('text') != -1:
        name_ext = 'txt'

    # Can't guess a filetype so use generic extension .dat

    else:
        name_ext = 'dat'

    # Identification done so get rid of the temp file, assuming we created the file:
    if delete_file: _remove_tempfile(tf_name)

    return name_ext