Exemple #1
0
def scanner(func_code):
    code = func_code.co_code
    names = func_code.co_names
    consts = func_code.co_consts
    LOAD_LOAD_AND_IMPORT = LOAD_CONST + LOAD_CONST + IMPORT_NAME
    LOAD_GLOBAL_ATTR = LOAD_GLOBAL + LOAD_ATTR
    while code:
        # The below code captures use of direct imports, which can be
        # useful.  However, it also captures imports of disco modules,
        # which imp.find_module() may not find, resulting in failures
        # for existing code when before there were none.  Hence, it is
        # commented out for now, until we can handle imported packages
        # better or screen out disco modules properly.

        # if code[:9:3] == LOAD_LOAD_AND_IMPORT:
        #     oparg_1, oparg_2, oparg_3 = struct.unpack('<xHxHxH', code[:9])
        #     level = consts[oparg_1]
        #     if level == -1 or level == 0:  # normal or absolute import
        #         yield names[oparg_3]
        #     # we do not support relative imports.
        #     code = code[9:]
        #     continue

        if code[:6:3] == LOAD_GLOBAL_ATTR:
            oparg, = struct.unpack('<xH', code[:3])
            yield names[oparg]
            code = code[6:]
            continue
        if int_of_byte(code[0]) >= HAVE_ARGUMENT:
            code = code[3:]
        else:
            code = code[1:]
Exemple #2
0
def scanner(func_code):
    code   = func_code.co_code
    names  = func_code.co_names
    consts = func_code.co_consts
    LOAD_LOAD_AND_IMPORT = LOAD_CONST + LOAD_CONST + IMPORT_NAME
    LOAD_GLOBAL_ATTR = LOAD_GLOBAL + LOAD_ATTR
    while code:
        # The below code captures use of direct imports, which can be
        # useful.  However, it also captures imports of disco modules,
        # which imp.find_module() may not find, resulting in failures
        # for existing code when before there were none.  Hence, it is
        # commented out for now, until we can handle imported packages
        # better or screen out disco modules properly.

        # if code[:9:3] == LOAD_LOAD_AND_IMPORT:
        #     oparg_1, oparg_2, oparg_3 = struct.unpack('<xHxHxH', code[:9])
        #     level = consts[oparg_1]
        #     if level == -1 or level == 0:  # normal or absolute import
        #         yield names[oparg_3]
        #     # we do not support relative imports.
        #     code = code[9:]
        #     continue

        if code[:6:3] == LOAD_GLOBAL_ATTR:
            oparg, = struct.unpack('<xH', code[:3])
            yield names[oparg]
            code = code[6:]
            continue
        if int_of_byte(code[0]) >= HAVE_ARGUMENT:
            code = code[3:]
        else:
            code = code[1:]
Exemple #3
0
def disco_input_stream(stream, size, url, ignore_corrupt=False):
    """Input stream for Disco's internal compression format."""
    from disco.compat import BytesIO, int_of_byte
    from disco.compat import pickle_load
    import struct, gzip, zlib
    offset = 0
    while True:
        header = stream.read(1)
        if not header:
            return
        if int_of_byte(header[0]) < 128:
            for e in old_netstr_reader(stream, size, url, header):
                yield e
            return
        try:
            is_compressed, checksum, hunk_size =\
                struct.unpack('<BIQ', stream.read(13))
        except:
            raise DataError("Truncated data at {0} bytes".format(offset), url)
        if not hunk_size:
            return
        hunk = stream.read(hunk_size)
        data = b''
        try:
            data = zlib.decompress(hunk) if is_compressed else hunk
            if checksum != (zlib.crc32(data) & 0xFFFFFFFF):
                raise ValueError("Checksum does not match")
        except (ValueError, zlib.error) as e:
            if not ignore_corrupt:
                raise DataError(
                    "Corrupted data between bytes {0}-{1}: {2}".format(
                        offset, offset + hunk_size, e), url)
        offset += hunk_size
        hunk = BytesIO(data)
        while True:
            try:
                yield pickle_load(hunk)
            except EOFError:
                break
            except UnpicklingError as e:
                if not ignore_corrupt:
                    raise DataError(
                        "Corrupted data between bytes {0}-{1}: {2}".format(
                            offset - hunk_size, offset, e), url)
Exemple #4
0
def disco_input_stream(stream, size, url, ignore_corrupt = False):
    """Input stream for Disco's internal compression format."""
    from disco.compat import BytesIO, int_of_byte
    from disco.compat import pickle_load
    import struct, gzip, zlib
    offset = 0
    while True:
        header = stream.read(1)
        if not header:
            return
        if int_of_byte(header[0]) < 128:
            for e in old_netstr_reader(stream, size, url, header):
                yield e
            return
        try:
            is_compressed, checksum, hunk_size =\
                struct.unpack('<BIQ', stream.read(13))
        except:
            raise DataError("Truncated data at {0} bytes".format(offset), url)
        if not hunk_size:
            return
        hunk = stream.read(hunk_size)
        data = b''
        try:
            data = zlib.decompress(hunk) if is_compressed else hunk
            if checksum != (zlib.crc32(data) & 0xFFFFFFFF):
                raise ValueError("Checksum does not match")
        except (ValueError, zlib.error) as e:
            if not ignore_corrupt:
                raise DataError("Corrupted data between bytes {0}-{1}: {2}"
                                .format(offset, offset + hunk_size, e), url)
        offset += hunk_size
        hunk = BytesIO(data)
        while True:
            try:
                yield pickle_load(hunk)
            except EOFError:
                break
            except UnpicklingError as e:
                if not ignore_corrupt:
                    raise DataError("Corrupted data between bytes {0}-{1}: {2}"
                                    .format(offset - hunk_size, offset, e), url)