Beispiel #1
0
def read_contents(codec_type, path, fs, offset, length):
    """
    Reads contents of a passed path, by appropriately decoding the data.
    Arguments:
       codec_type - The type of codec to use to decode. (Auto-detected if None).
       path - The path of the file to read.
       fs - The FileSystem instance to use to read.
       offset - Offset to seek to before read begins.
       length - Amount of bytes to read after offset.
       Returns: A tuple of codec_type, offset, length and contents read.
    """
    contents = ''

    try:
        fhandle = fs.open(path)
        stats = fs.stats(path)

        # Auto codec detection for [gzip, avro, snappy, snappy avro, none]
        if codec_type == 'avro' and snappy_installed() and detect_snappy(fhandle.read()):
            codec_type = 'snappy_avro'
        elif not codec_type:
            contents = fhandle.read(3)
            codec_type = 'none'
            if path.endswith('.gz') and detect_gzip(contents):
                codec_type = 'gzip'
                offset = 0
            elif path.endswith('.avro'):
                if detect_avro(contents):
                    codec_type = 'avro'
                if snappy_installed() and stats.size <= MAX_SNAPPY_DECOMPRESSION_SIZE.get() and detect_snappy(contents + fhandle.read()):
                    codec_type = 'snappy_avro'
            elif snappy_installed() and path.endswith('.snappy'):
                codec_type = 'snappy'
            elif snappy_installed() and stats.size <= MAX_SNAPPY_DECOMPRESSION_SIZE.get() and detect_snappy(contents + fhandle.read()):
                codec_type = 'snappy'

        fhandle.seek(0)

        if codec_type == 'gzip':
            contents = _read_gzip(fhandle, path, offset, length, stats)
        elif codec_type == 'avro':
            contents = _read_avro(fhandle, path, offset, length, stats)
        elif codec_type == 'snappy_avro':
            contents = _read_snappy_avro(fhandle, path, offset, length, stats)
        elif codec_type == 'snappy':
            contents = _read_snappy(fhandle, path, offset, length, stats)
        else:
            # for 'none' type.
            contents = _read_simple(fhandle, path, offset, length, stats)

    finally:
        fhandle.close()

    return (codec_type, offset, length, contents)
Beispiel #2
0
def _read_snappy_avro(fhandle, path, offset, length, stats):
    if not snappy_installed():
        raise PopupException(_('Failed to decompress snappy compressed file. Snappy is not installed.'))

    if stats.size > MAX_SNAPPY_DECOMPRESSION_SIZE.get():
        raise PopupException(_('Failed to decompress snappy compressed file. File size is greater than allowed max snappy decompression size of %d.') % MAX_SNAPPY_DECOMPRESSION_SIZE.get())

    return _read_avro(StringIO(_decompress_snappy(fhandle.read())), path, offset, length, stats)