예제 #1
0
def stream_to_open_named_file(stream,
                              fd,
                              filename,
                              source_encoding=None,
                              source_error='strict',
                              target_encoding=None,
                              target_error='strict'):
    """Writes a stream to the provided file descriptor, returns the file's name and bool( is_multi_byte ). Closes file descriptor"""
    # signature and behavor is somewhat odd, due to backwards compatibility, but this can/should be done better
    CHUNK_SIZE = 1048576
    data_checked = False
    is_compressed = False
    is_binary = False
    is_multi_byte = False
    if not target_encoding or not encodings_search_function(target_encoding):
        target_encoding = util.DEFAULT_ENCODING  # utf-8
    if not source_encoding:
        source_encoding = util.DEFAULT_ENCODING  # sys.getdefaultencoding() would mimic old behavior (defaults to ascii)
    while True:
        chunk = stream.read(CHUNK_SIZE)
        if not chunk:
            break
        if not data_checked:
            # See if we're uploading a compressed file
            if zipfile.is_zipfile(filename):
                is_compressed = True
            else:
                try:
                    if text_type(chunk[:2]) == text_type(util.gzip_magic):
                        is_compressed = True
                except:
                    pass
            if not is_compressed:
                # See if we have a multi-byte character file
                chars = chunk[:100]
                is_multi_byte = multi_byte.is_multi_byte(chars)
                if not is_multi_byte:
                    is_binary = util.is_binary(chunk)
            data_checked = True
        if not is_compressed and not is_binary:
            if not isinstance(chunk, text_type):
                chunk = chunk.decode(source_encoding, source_error)
            os.write(fd, chunk.encode(target_encoding, target_error))
        else:
            # Compressed files must be encoded after they are uncompressed in the upload utility,
            # while binary files should not be encoded at all.
            os.write(fd, chunk)
    os.close(fd)
    return filename, is_multi_byte
예제 #2
0
파일: sniff.py 프로젝트: sa-fa/galaxy-dist
def stream_to_open_named_file(
    stream, fd, filename, source_encoding=None, source_error="strict", target_encoding=None, target_error="strict"
):
    """Writes a stream to the provided file descriptor, returns the file's name and bool( is_multi_byte ). Closes file descriptor"""
    # signature and behavor is somewhat odd, due to backwards compatibility, but this can/should be done better
    CHUNK_SIZE = 1048576
    data_checked = False
    is_compressed = False
    is_binary = False
    is_multi_byte = False
    if not target_encoding or not encodings_search_function(target_encoding):
        target_encoding = util.DEFAULT_ENCODING  # utf-8
    if not source_encoding:
        source_encoding = util.DEFAULT_ENCODING  # sys.getdefaultencoding() would mimic old behavior (defaults to ascii)
    while 1:
        chunk = stream.read(CHUNK_SIZE)
        if not chunk:
            break
        if not data_checked:
            # See if we're uploading a compressed file
            if zipfile.is_zipfile(filename):
                is_compressed = True
            else:
                try:
                    if unicode(chunk[:2]) == unicode(util.gzip_magic):
                        is_compressed = True
                except:
                    pass
            if not is_compressed:
                # See if we have a multi-byte character file
                chars = chunk[:100]
                is_multi_byte = util.is_multi_byte(chars)
                if not is_multi_byte:
                    is_binary = util.is_binary(chunk)
            data_checked = True
        if not is_compressed and not is_binary:
            if not isinstance(chunk, unicode):
                chunk = chunk.decode(source_encoding, source_error)
            os.write(fd, chunk.encode(target_encoding, target_error))
        else:
            # Compressed files must be encoded after they are uncompressed in the upload utility,
            # while binary files should not be encoded at all.
            os.write(fd, chunk)
    os.close(fd)
    return filename, is_multi_byte