def scanner(func_code): code = func_code.co_code names = func_code.co_names consts = func_code.co_consts LOAD_LOAD_AND_IMPORT = LOAD_CONST + LOAD_CONST + IMPORT_NAME LOAD_GLOBAL_ATTR = LOAD_GLOBAL + LOAD_ATTR while code: # The below code captures use of direct imports, which can be # useful. However, it also captures imports of disco modules, # which imp.find_module() may not find, resulting in failures # for existing code when before there were none. Hence, it is # commented out for now, until we can handle imported packages # better or screen out disco modules properly. # if code[:9:3] == LOAD_LOAD_AND_IMPORT: # oparg_1, oparg_2, oparg_3 = struct.unpack('<xHxHxH', code[:9]) # level = consts[oparg_1] # if level == -1 or level == 0: # normal or absolute import # yield names[oparg_3] # # we do not support relative imports. # code = code[9:] # continue if code[:6:3] == LOAD_GLOBAL_ATTR: oparg, = struct.unpack('<xH', code[:3]) yield names[oparg] code = code[6:] continue if int_of_byte(code[0]) >= HAVE_ARGUMENT: code = code[3:] else: code = code[1:]
def disco_input_stream(stream, size, url, ignore_corrupt=False): """Input stream for Disco's internal compression format.""" from disco.compat import BytesIO, int_of_byte from disco.compat import pickle_load import struct, gzip, zlib offset = 0 while True: header = stream.read(1) if not header: return if int_of_byte(header[0]) < 128: for e in old_netstr_reader(stream, size, url, header): yield e return try: is_compressed, checksum, hunk_size =\ struct.unpack('<BIQ', stream.read(13)) except: raise DataError("Truncated data at {0} bytes".format(offset), url) if not hunk_size: return hunk = stream.read(hunk_size) data = b'' try: data = zlib.decompress(hunk) if is_compressed else hunk if checksum != (zlib.crc32(data) & 0xFFFFFFFF): raise ValueError("Checksum does not match") except (ValueError, zlib.error) as e: if not ignore_corrupt: raise DataError( "Corrupted data between bytes {0}-{1}: {2}".format( offset, offset + hunk_size, e), url) offset += hunk_size hunk = BytesIO(data) while True: try: yield pickle_load(hunk) except EOFError: break except UnpicklingError as e: if not ignore_corrupt: raise DataError( "Corrupted data between bytes {0}-{1}: {2}".format( offset - hunk_size, offset, e), url)
def disco_input_stream(stream, size, url, ignore_corrupt = False): """Input stream for Disco's internal compression format.""" from disco.compat import BytesIO, int_of_byte from disco.compat import pickle_load import struct, gzip, zlib offset = 0 while True: header = stream.read(1) if not header: return if int_of_byte(header[0]) < 128: for e in old_netstr_reader(stream, size, url, header): yield e return try: is_compressed, checksum, hunk_size =\ struct.unpack('<BIQ', stream.read(13)) except: raise DataError("Truncated data at {0} bytes".format(offset), url) if not hunk_size: return hunk = stream.read(hunk_size) data = b'' try: data = zlib.decompress(hunk) if is_compressed else hunk if checksum != (zlib.crc32(data) & 0xFFFFFFFF): raise ValueError("Checksum does not match") except (ValueError, zlib.error) as e: if not ignore_corrupt: raise DataError("Corrupted data between bytes {0}-{1}: {2}" .format(offset, offset + hunk_size, e), url) offset += hunk_size hunk = BytesIO(data) while True: try: yield pickle_load(hunk) except EOFError: break except UnpicklingError as e: if not ignore_corrupt: raise DataError("Corrupted data between bytes {0}-{1}: {2}" .format(offset - hunk_size, offset, e), url)