def _need_help_dependency(self, obj): if isinstance(obj, (CombinedDependsFunction, TrivialDependsFunction)): return False if isinstance(obj, DependsFunction): if obj in (self._always, self._never): return False func, glob = self.unwrap(obj._func) # We allow missing --help dependencies for functions that: # - don't use @imports # - don't have a closure # - don't use global variables if func in self._has_imports or func.__closure__: return True for instr in Bytecode(func): if instr.opname in ("LOAD_GLOBAL", "STORE_GLOBAL"): # There is a fake os module when one is not imported, # and it's allowed for functions without a --help # dependency. if instr.argval == "os" and glob.get("os") is self.OS: continue if instr.argval in self.BUILTINS: continue if instr.argval in "namespace": continue return True return False
def _check_dependencies(self, obj): if isinstance(obj, CombinedDependsFunction) or obj in ( self._always, self._never, ): return func, glob = self.unwrap(obj._func) func_args = inspect.getfullargspec(func) if func_args.varkw: e = ConfigureError( "Keyword arguments are not allowed in @depends functions") self._raise_from(e, func) all_args = list(func_args.args) if func_args.varargs: all_args.append(func_args.varargs) used_args = set() for instr in Bytecode(func): if instr.opname in ("LOAD_FAST", "LOAD_CLOSURE"): if instr.argval in all_args: used_args.add(instr.argval) for num, arg in enumerate(all_args): if arg not in used_args: dep = obj.dependencies[num] if dep != self._help_option or not self._need_help_dependency( obj): if isinstance(dep, DependsFunction): dep = dep.name else: dep = dep.option e = ConfigureError("The dependency on `%s` is unused" % dep) self._raise_from(e, func)
def linker(tree, db): """A linker between AST nodes and Bytecode instructions.""" bytecode = iter(Bytecode(compile(tree, "<InspectorTiger>", "exec"))) current_instruction = next(bytecode) last_node = None for node in ast.walk(tree): if not hasattr(node, "instrs"): node.instrs = [] if not hasattr(node, "lineno"): continue try: if current_instruction.starts_line is None: if last_node is None: last_node = node else: last_node.instrs.append(current_instruction) current_instruction = next(bytecode) continue if (node.end_lineno >= current_instruction.starts_line >= node.lineno): node.instrs.append(current_instruction) current_instruction = next(bytecode) except StopIteration: break last_node = node return tree
def visit_eval(self, node): code = compile( ast.Expression(body=node, lineno=node.lineno, col_offset=node.col_offset), "", "eval") byte_code = list(Bytecode(code))[:-1] self.write(Byte(node.lineno, code, byte_code))
def filtered_bytecode(func, freevars, globals, locals): '''Get the bytecode for a function with adjusted closed variables Any references to globlas or locals in the bytecode which exist in the freevars are modified to reference the freevars instead. ''' opcode_map = { opmap['LOAD_FAST']: opmap['LOAD_DEREF'], opmap['STORE_FAST']: opmap['STORE_DEREF'], opmap['LOAD_GLOBAL']: opmap['LOAD_DEREF'], opmap['STORE_GLOBAL']: opmap['STORE_DEREF'] } freevars_map = {var: idx for (idx, var) in enumerate(freevars)} globals_map = {var: idx for (idx, var) in enumerate(globals)} locals_map = {var: idx for (idx, var) in enumerate(locals)} for instruction in Bytecode(func): if instruction.opcode not in opcode_map: yield bytes([instruction.opcode, instruction.arg or 0]) elif instruction.argval in freevars_map: yield bytes([ opcode_map[instruction.opcode], freevars_map[instruction.argval] ]) elif 'GLOBAL' in instruction.opname: yield bytes([instruction.opcode, globals_map[instruction.argval]]) elif 'FAST' in instruction.opname: yield bytes([instruction.opcode, locals_map[instruction.argval]])
def _prepare_function(self, func, update_globals=None): wrapped = super(LintSandbox, self)._prepare_function(func, update_globals) _, glob = self.unwrap(wrapped) imports = set() for _from, _import, _as in self._imports.get(func, ()): if _as: imports.add(_as) else: what = _import.split(".")[0] imports.add(what) for instr in Bytecode(func): code = func.__code__ if (instr.opname == "LOAD_GLOBAL" and instr.argval not in glob and instr.argval not in imports and instr.argval not in glob["__builtins__"] and instr.argval not in code.co_varnames[:code.co_argcount]): # Raise the same kind of error as what would happen during # execution. e = NameError("global name '{}' is not defined".format( instr.argval)) if instr.starts_line is None: self._raise_from(e, func) else: self._raise_from(e, func, instr.starts_line - code.co_firstlineno) return wrapped
def index(): if request.method == 'GET': return 'Welcome to the Python Disassembler Service' try: json_data = request.get_json() code = json_data['code'] diss_code = '' bytecode = Bytecode(code) for instr in bytecode: diss_code += f'{instr}\n' return diss_code except KeyError as e: app.logger.error('Missing code key.') app.logger.error(request.get_json()) return 'Invalid request' except Exception as e: app.logger.error('Error!') app.logger.error(e) return str(e).replace('<disassembly>, ', '')
def list_func_calls(fn): funcs = [] bytecode = Bytecode(fn) instrs = list(reversed(list(bytecode))) for (ix, instr) in enumerate(instrs): if instr.argval == "event" and ix - 2 > 0 and instrs[ ix - 1].opname == "LOAD_METHOD": funcs.append(instrs[ix - 1].argval) elif instr.argval == "event" and ix - 1 > 0 and instrs[ ix - 1].opname == "LOAD_ATTR" and instrs[ ix - 1].argval == "optional" and instrs[ ix - 2].opname == "LOAD_METHOD": funcs.append(instrs[ix - 2].argval) elif instr.argval == "event" and ix - 1 > 0 and instrs[ ix - 1].opname == "LOAD_ATTR" and instrs[ix - 1].argval not in ["_mf"]: if instrs[ix - 1].argval == "optional": if ix - 2 > 0: funcs.append(instrs[ix - 2].argval) else: funcs.append(instrs[ix - 1].argval) return ["%s" % funcname for funcname in reversed(funcs)]
def extract_constant(code, symbol, default=-1): """Extract the constant value of 'symbol' from 'code' If the name 'symbol' is bound to a constant value by the Python code object 'code', return that value. If 'symbol' is bound to an expression, return 'default'. Otherwise, return 'None'. Return value is based on the first assignment to 'symbol'. 'symbol' must be a global, or at least a non-"fast" local in the code block. That is, only 'STORE_NAME' and 'STORE_GLOBAL' opcodes are checked, and 'symbol' must be present in 'code.co_names'. """ if symbol not in code.co_names: # name's not there, can't possibly be an assignment return None from dis import Bytecode name_idx = list(code.co_names).index(symbol) STORE_NAME = 90 STORE_GLOBAL = 97 LOAD_CONST = 100 const = default for byte_code in Bytecode(code): op = byte_code.opcode arg = byte_code.arg if op == LOAD_CONST: const = code.co_consts[arg] elif arg == name_idx and (op == STORE_NAME or op == STORE_GLOBAL): return const else: const = default
def from_pycode(cls, co): """Create a Code object from a python code object. Parameters ---------- co : CodeType The python code object. Returns ------- code : Code The codetransformer Code object. """ # Make it sparse to instrs[n] is the instruction at bytecode[n] sparse_instrs = tuple( _sparse_args( Instruction.from_opcode( b.opcode, Instruction._no_arg if b.arg is None else b.arg, ) for b in Bytecode(co)), ) for idx, instr in enumerate(sparse_instrs): if instr is None: # The sparse value continue if instr.absjmp: instr.arg = sparse_instrs[instr.arg] elif instr.reljmp: instr.arg = sparse_instrs[instr.arg + idx + 3] elif isinstance(instr, LOAD_CONST): instr.arg = co.co_consts[instr.arg] elif instr.uses_name: instr.arg = co.co_names[instr.arg] elif instr.uses_varname: instr.arg = co.co_varnames[instr.arg] elif instr.uses_free: instr.arg = _freevar_argname( instr.arg, co.co_freevars, co.co_cellvars, ) flags = co.co_flags has_vargs = bool(flags & Flags.CO_VARARGS) has_kwargs = bool(flags & Flags.CO_VARKEYWORDS) # Here we convert the varnames format into our argnames format. paramnames = co.co_varnames[:(co.co_argcount + co.co_kwonlyargcount + has_vargs + has_kwargs)] # We start with the positional arguments. new_paramnames = list(paramnames[:co.co_argcount]) # Add *args next. if has_vargs: new_paramnames.append('*' + paramnames[-1 - has_kwargs]) # Add positional only arguments next. new_paramnames.extend(paramnames[co.co_argcount:co.co_argcount + co.co_kwonlyargcount]) # Add **kwargs last. if has_kwargs: new_paramnames.append('**' + paramnames[-1]) return cls( filter(bool, sparse_instrs), argnames=new_paramnames, cellvars=co.co_cellvars, freevars=co.co_freevars, name=co.co_name, filename=co.co_filename, firstlineno=co.co_firstlineno, lnotab={ lno: sparse_instrs[off] for off, lno in findlinestarts(co) }, nested=flags & Flags.CO_NESTED, generator=flags & Flags.CO_GENERATOR, coroutine=flags & Flags.CO_COROUTINE, iterable_coroutine=flags & Flags.CO_ITERABLE_COROUTINE, new_locals=flags & Flags.CO_NEWLOCALS, )
from dis import Bytecode def myfunc(x: int) -> str: return str(x * 2) print(help(myfunc)) print(myfunc(2)) print('===' * 30) my_byte_code = Bytecode(myfunc) print(vars(my_byte_code)) print('===' * 30) for byte_code in my_byte_code: print(byte_code) print('---' * 15) print(byte_code.opname) # ========================================================================================== # {'codeobj': <code object myfunc at 0x0000016EEEFCD450, file "C:/proj/deepdive/03_Section/dis_bytecode_simple.py", line 4>, 'first_line': 4, '_line_offset': 0, '_cell_names': (), '_linestarts': {0: 5}, '_original_object': <function myfunc at 0x0000016EEF017CA0>, 'current_offset': None} # ========================================================================================== # Instruction(opname='LOAD_GLOBAL', opcode=116, arg=0, argval='str', argrepr='str', offset=0, starts_line=5, is_jump_target=False) # --------------------------------------------- # LOAD_GLOBAL # Instruction(opname='LOAD_FAST', opcode=124, arg=0, argval='x', argrepr='x', offset=2, starts_line=None, is_jump_target=False) # --------------------------------------------- # LOAD_FAST # Instruction(opname='LOAD_CONST', opcode=100, arg=1, argval=2, argrepr='2', offset=4, starts_line=None, is_jump_target=False) # --------------------------------------------- # LOAD_CONST
def run(self, code): buf = Bytecode(code) for instr in buf: self.dispatch_instr(instr)
def _visit(self, co, *, name=None): # WARNING: # This is setup in this double assignment way because jump args # must backreference their original jump target before any transforms. # Don't refactor this into a single pass. self._instrs = tuple( _sparse_args([ Instruction.from_opcode(b.opcode, b.arg) for b in Bytecode(co) ])) self._instrs = tuple( filter(bool, (instr and instr._with_jmp_arg(self) for instr in self._instrs))) self._const_indices = const_indices = {} self._const_values = const_values = {} for n, const in enumerate(self.visit_consts(co.co_consts)): const_indices.setdefault(id(const), []).append(n) const_values[id(const)] = const self._const_idx = len(co.co_consts) # used for adding new consts. self._clean_co = co # Apply the transforms. self._instrs = tuple( _sparse_args( sum( (tuple(self.visit_generic(_instr)) for _instr in self), (), ))) code = b''.join( (instr or b'') and instr.to_bytecode(self) for instr in self) consts = [None] * self._const_idx for const_id, idxs in self._const_indices.items(): for idx in idxs: consts[idx] = const_values[const_id] names = tuple(self.visit_names(co.co_names)) if self._optimize: # Run the optimizer over the new code. code = _optimize( code, consts, names, co.co_lnotab, ) return CodeType( co.co_argcount, co.co_kwonlyargcount, co.co_nlocals, _calculate_stack_effect(code), co.co_flags, code, tuple(consts), names, tuple(self.visit_varnames(co.co_varnames)), co.co_filename, self.visit_name(name if name is not None else co.co_name), co.co_firstlineno, co.co_lnotab, tuple(self.visit_freevars(co.co_freevars)), tuple(self.visit_cellvars(co.co_cellvars)), )
def _module_inspect(self, obj): """ inspect objects for module dependencies """ worklist = [] seen = set() mods = set() if inspect.isfunction(obj) or inspect.ismethod(obj): # The obj is the user's function worklist.append(obj) elif type(obj) == dict: # the obj is the user's iterdata to_anayze = list(obj.values()) for param in to_anayze: if type(param).__module__ != "__builtin__": if inspect.isfunction(param): # it is a user defined function worklist.append(param) else: # it is a user defined class members = inspect.getmembers(param) for k, v in members: if inspect.ismethod(v): worklist.append(v) else: # The obj is the user's function but in form of a class members = inspect.getmembers(obj) found_methods = [] for k, v in members: if inspect.ismethod(v): found_methods.append(k) worklist.append(v) if "__call__" not in found_methods: raise Exception('The class you passed as the function to ' 'run must contain the "__call__" method') # The worklist is only used for analyzing functions for fn in worklist: mods.add(fn.__module__) codeworklist = [fn] cvs = inspect.getclosurevars(fn) modules = list(cvs.nonlocals.items()) modules.extend(list(cvs.globals.items())) for k, v in modules: if inspect.ismodule(v): mods.add(v.__name__) elif inspect.isfunction(v) and id(v) not in seen: seen.add(id(v)) mods.add(v.__module__) worklist.append(v) elif hasattr(v, "__module__"): mods.add(v.__module__) for block in codeworklist: for (k, v) in [ self._inner_module_inspect(inst) for inst in Bytecode(block) ]: if k is None: continue if k == "modules": newmods = [ mod.__name__ for mod in v if hasattr(mod, "__name__") ] mods.update(set(newmods)) elif k == "code" and id(v) not in seen: seen.add(id(v)) if hasattr(v, "__module__"): mods.add(v.__module__) if inspect.isfunction(v): worklist.append(v) elif inspect.iscode(v): codeworklist.append(v) result = list(mods) return result
def visit_exec(self, node): code = compile(ast.Module(body=[node]), "", "exec") byte_code = list(Bytecode(code))[:-2] self.write(Byte(node.lineno, code, byte_code))
def PatchedMethod(*args, **kwargs): global original global RunOriginal arginfo = getfullargspec(oldOriginal) params = arginfo.args kwarg = arginfo.varkw != None infarg = arginfo.varargs != None pos = -1 argPrefix = f"_{cl.__name__}__" arguments = {f"{argPrefix}result":None, f"{argPrefix}state":None} defaults = getDefaultArgs(oldOriginal) r = None for parameter in params: pos+=1 pName = parameter try:param = args[pos] except IndexError: if pName in defaults:param = defaults[pName] elif force and len(params)-len(args)==1 and params[0]=="self":arguments["self"]=None else:r = PatchingError(f'Method "{name}" expects {len(params)} arguments, {len(args)} were given.') #if pName=="self" and type(param)!=t:raise TypeError("Invalid instance class") arguments[pName]=param other = args[pos+1:] if pos<len(args)-1 else [] #*args support if r!=None:raise r if kwarg:arguments.update(kwargs) hasPrefix = hasattr(cl, "prefix") hasPostfix = hasattr(cl, "postfix") hasFinalizer = hasattr(cl, "finalizer") prefix = getattr(cl, "prefix") if hasPrefix else None postfix = getattr(cl, "postfix") if hasPostfix else None finalizer = getattr(cl, "finalizer") if hasFinalizer else None if hasattr(cl, "transpiler"): #Handle transpiler original = getattr(cl, "transpiler") arginfo = getfullargspec(original) params = arginfo.args kwarg = arginfo.varkw != None arguments[f"{argPrefix}originalMethod"] = oldOriginal arguments[f"{argPrefix}instance"] = arguments["self"] if "self" in arguments else None prefkw = getfullargspec(prefix).varkw!=None if hasPrefix else False postkw = getfullargspec(postfix).varkw!=None if hasPostfix else False finalkw = getfullargspec(finalizer).varkw!=None if hasFinalizer else False yields = [] RunOriginal = 1 setter = arguments["self"] if "self" in arguments else t #If there's no object reference, use type def getName(base): #Gets the name to use on injections if base.startswith("__"):base = base.replace("__",argPrefix,1) return base def getArguments(base): #Handles RefVar values final = {} for key in base:final[key] = base[key].Value if isinstance(base[key], RefVar) else base[key] return final def handleChange(name, value, paramSet): #Handles parameter change name = checkType(name, str) if not name in paramSet or not name in arguments: temp = f"{argPrefix}{name}" #Check if it's an injected variable if temp in paramSet and temp in arguments: handleChange(temp, value, paramSet) return temp = f"{argPrefix}_{name}" #Check if it's a field injection if temp in paramSet and temp in arguments: handleChange(temp, value, paramSet) return raise NameError(f'Invalid parameter name: "{name}"') if isinstance(arguments[name], RefVar): setattr(setter, arguments[name].Name, value) arguments[name]=RefVar(arguments[name].Name, value) else:arguments[name]=value def getPrivate(func): #Refer fields for field in getFieldNames(func, argPrefix): if not hasattr(setter, field):raise InjectionError(f'Invalid variable name: {field}') arguments[f"{argPrefix}_{field}"]=RefVar(field, getattr(setter, field)) def runIteration(func, stop, *o, **kargs): #Runs prefix and postfix global RunOriginal paramSet = [x for x in kargs] o = o if getfullargspec(func).varargs!=None else [] ran = func(*o, **kargs) if isinstance(ran, Iterable): #Check if there's anything yielded for value in ran: if isinstance(value, OptionalYield) and value.Yield:yields.append(value) #Handle every OptionalYield instance else:yields.append(value) if isinstance(value, Stop): #Handle Stop if not value.State and stop:RunOriginal-=1 break if isinstance(value, SetVar): #Handle SetVar handleChange(getName(value.Name), value.Value, paramSet) return ran def runPrefix(*o, **kargs): runIteration(prefix, True, *o, **kargs) def runOriginal(*o, **kargs): arguments[f"{argPrefix}result"]=runIteration(original, False, *o, **kargs) def runPostfix(*o, **kargs): runIteration(postfix, False, *o, **kargs) r = None try: if hasPrefix: getPrivate(prefix) runPrefix(*other, **getArguments(selectArgs(arguments, prefix, prefkw))) yields = [] if RunOriginal: argsWithInst = arguments.copy() argsWithInst[f"{argPrefix}instructions"]=Bytecode(oldOriginal) runOriginal(*other, **getArguments(selectArgs(argsWithInst, original, kwarg))) yields = [] if hasPostfix: getPrivate(postfix) runPostfix(*other, **getArguments(selectArgs(arguments, postfix, postkw))) yields = [] except Exception as e: if hasFinalizer: #Handle finalizer ex = finalizer(**selectArgs({f"{argPrefix}exception":e}, finalizer, finalkw)) if isinstance(ex, Exception):r = ex else:r = e if r!=None:raise r return arguments[f"{argPrefix}result"] #Return result
def from_pycode(cls, co): """Create a Code object from a python code object. Parameters ---------- co : CodeType The python code object. Returns ------- code : Code The codetransformer Code object. """ # Make it sparse to instrs[n] is the instruction at bytecode[n] sparse_instrs = tuple( _sparse_args( Instruction.from_opcode( b.opcode, Instruction._no_arg if b.arg is None else _RawArg(b.arg), ) for b in Bytecode(co)), ) for idx, instr in enumerate(sparse_instrs): if instr is None: # The sparse value continue if instr.absjmp: instr.arg = sparse_instrs[instr.arg] elif instr.reljmp: instr.arg = sparse_instrs[instr.arg + idx + argsize + 1] elif isinstance(instr, LOAD_CONST): instr.arg = co.co_consts[instr.arg] elif instr.uses_name: instr.arg = co.co_names[instr.arg] elif instr.uses_varname: instr.arg = co.co_varnames[instr.arg] elif instr.uses_free: instr.arg = _freevar_argname( instr.arg, co.co_freevars, co.co_cellvars, ) elif instr.have_arg and isinstance(instr.arg, _RawArg): instr.arg = int(instr.arg) flags = Flag.unpack(co.co_flags) has_vargs = flags['CO_VARARGS'] has_kwargs = flags['CO_VARKEYWORDS'] # Here we convert the varnames format into our argnames format. paramnames = co.co_varnames[:(co.co_argcount + co.co_kwonlyargcount + has_vargs + has_kwargs)] # We start with the positional arguments. new_paramnames = list(paramnames[:co.co_argcount]) # Add *args next. if has_vargs: new_paramnames.append('*' + paramnames[-1 - has_kwargs]) # Add positional only arguments next. new_paramnames.extend(paramnames[co.co_argcount:co.co_argcount + co.co_kwonlyargcount]) # Add **kwargs last. if has_kwargs: new_paramnames.append('**' + paramnames[-1]) return cls( filter(bool, sparse_instrs), argnames=new_paramnames, cellvars=co.co_cellvars, freevars=co.co_freevars, name=co.co_name, filename=co.co_filename, firstlineno=co.co_firstlineno, lnotab={ lno: sparse_instrs[off] for off, lno in findlinestarts(co) }, flags=flags, )