def safe_exec(code, globals_dict, random_seed=None, python_path=None, cache=None, slug=None): """ Execute python code safely. `code` is the Python code to execute. It has access to the globals in `globals_dict`, and any changes it makes to those globals are visible in `globals_dict` when this function returns. `random_seed` will be used to see the `random` module available to the code. `python_path` is a list of directories to add to the Python path before execution. `cache` is an object with .get(key) and .set(key, value) methods. It will be used to cache the execution, taking into account the code, the values of the globals, and the random seed. `slug` is an arbitrary string, a description that's meaningful to the caller, that will be used in log messages. """ # Check the cache for a previous result. if cache: safe_globals = json_safe(globals_dict) md5er = hashlib.md5() md5er.update(repr(code)) update_hash(md5er, safe_globals) key = "safe_exec.%r.%s" % (random_seed, md5er.hexdigest()) cached = cache.get(key) if cached is not None: # We have a cached result. The result is a pair: the exception # message, if any, else None; and the resulting globals dictionary. emsg, cleaned_results = cached globals_dict.update(cleaned_results) if emsg: raise SafeExecException(emsg) return # Create the complete code we'll run. code_prolog = CODE_PROLOG % random_seed # Run the code! Results are side effects in globals_dict. try: codejail_safe_exec( code_prolog + LAZY_IMPORTS + code, globals_dict, python_path=python_path, slug=slug, ) except SafeExecException as e: emsg = e.message else: emsg = None # Put the result back in the cache. This is complicated by the fact that # the globals dict might not be entirely serializable. if cache: cleaned_results = json_safe(globals_dict) cache.set(key, (emsg, cleaned_results)) # If an exception happened, raise it now. if emsg: raise e
def not_safe_exec(code, globals_dict, files=None, python_path=None, slug=None): """ Another implementation of `safe_exec`, but not safe. This can be swapped in for debugging problems in sandboxed Python code. This is not thread-safe, due to temporarily changing the current directory and modifying sys.path. """ g_dict = json_safe(globals_dict) with temp_directory() as tmpdir: with change_directory(tmpdir): # Copy the files here. for filename in files or (): dest = os.path.join(tmpdir, os.path.basename(filename)) shutil.copyfile(filename, dest) original_path = sys.path if python_path: sys.path.extend(python_path) try: exec code in g_dict except Exception as e: # Wrap the exception in a SafeExecException, but we don't # try here to include the traceback, since this is just a # substitute implementation. msg = "{0.__class__.__name__}: {0!s}".format(e) raise SafeExecException(msg) finally: sys.path = original_path globals_dict.update(json_safe(g_dict))
def test_surrogate_unicode_keys(self): # Test that json_safe() excludes surrogate unicode keys. # Try surrogate unicode values for code in self.SURROGATE_RANGE: unicode_char = unichr(code) # Try it is a dictionary key json_safe({unicode_char: 'test'})
def test_surrogate_unicode_values(self): # Test that json_safe() excludes surrogate unicode values. # Try surrogate unicode values for code in self.SURROGATE_RANGE: unicode_char = unichr(code) # Try it as a dictionary value json_safe({'test': unicode_char})
def test_unicode(self): # Test that json_safe() handles non-surrogate unicode values. # Try a few non-ascii UTF-16 characters for unicode_char in [unichr(512), unichr(2**8 - 1), unichr(2**16 - 1)]: # Try it as a dictionary value result = json_safe({'test': unicode_char}) self.assertEqual(result.get('test', None), unicode_char) # Try it as a dictionary key result = json_safe({unicode_char: 'test'}) self.assertEqual(result.get(unicode_char, None), 'test')
def test_unicode(self): # Test that json_safe() handles non-surrogate unicode values. # Try a few non-ascii UTF-16 characters for unicode_char in [unichr(512), unichr(2**8-1), unichr(2**16-1)]: # Try it as a dictionary value result = json_safe({'test': unicode_char}) self.assertEqual(result.get('test', None), unicode_char) # Try it as a dictionary key result = json_safe({unicode_char: 'test'}) self.assertEqual(result.get(unicode_char, None), 'test')
def test_surrogate_unicode_keys(self): """ Test that json_safe() excludes surrogate unicode keys """ # Try surrogate unicode values for code in self.SURROGATE_RANGE: unicode_char = unichr(code) # Try it is a dictionary key result = json_safe({unicode_char: 'test'}) self.assertFalse(unicode_char in result)
def test_surrogate_unicode_values(self): """ Test that json_safe() excludes surrogate unicode values """ # Try surrogate unicode values for code in self.SURROGATE_RANGE: unicode_char = unichr(code) # Try it as a dictionary value result = json_safe({'test': unicode_char}) self.assertFalse('test' in result)
def test_decodable_dict(self): test_dict = { 1: bytes('a', 'utf8'), 2: 'b', 3: { 1: bytes('b', 'utf8'), 2: (1, bytes('a', 'utf8')) } } cleaned_dict = safe_exec.json_safe(test_dict) self.assertDictEqual(cleaned_dict, { '1': 'a', '2': 'b', '3': { '1': 'b', '2': [1, 'a'] } })
def safe_exec(code, globals_dict, files=None, python_path=None, slug=None): """ Execute code as "exec" does, but safely. `code` is a string of Python code. `globals_dict` is used as the globals during execution. Modifications the code makes to `globals_dict` are reflected in the dictionary on return. `files` is a list of file paths, either files or directories. They will be copied into the temp directory used for execution. No attempt is made to determine whether the file is appropriate or safe to copy. The caller must determine which files to provide to the code. `python_path` is a list of directory paths. They will be copied just as `files` are, but will also be added to `sys.path` so that modules there can be imported. `slug` is an arbitrary string, a description that's meaningful to the caller, that will be used in log messages. Returns None. Changes made by `code` are visible in `globals_dict`. If the code raises an exception, this function will raise `SafeExecException` with the stderr of the sandbox process, which usually includes the original exception message and traceback. """ the_code = [] files = list(files or ()) the_code.append( textwrap.dedent( """ import sys try: import simplejson as json except ImportError: import json """ # We need to prevent the sandboxed code from printing to stdout, # or it will pollute the json we print there. This isn't a # security concern (they can put any values in the json output # anyway, either by writing to sys.__stdout__, or just by defining # global values), but keeps accidents from happening. """ class DevNull(object): def write(self, *args, **kwargs): pass sys.stdout = DevNull() """ # Read the code and the globals from the stdin. """ code, g_dict = json.load(sys.stdin) """)) for pydir in python_path or (): pybase = os.path.basename(pydir) the_code.append("sys.path.append(%r)\n" % pybase) files.append(pydir) the_code.append( textwrap.dedent( # Execute the sandboxed code. """ exec code in g_dict """ # Clean the globals for sending back as JSON over stdout. """ ok_types = ( type(None), int, long, float, str, unicode, list, tuple, dict ) bad_keys = ("__builtins__",) def jsonable(v): if not isinstance(v, ok_types): return False try: json.dumps(v) except Exception: return False return True g_dict = { k:v for k,v in g_dict.iteritems() if jsonable(v) and k not in bad_keys } """ # Write the globals back to the calling process. """ json.dump(g_dict, sys.__stdout__) """)) stdin = json.dumps([code, json_safe(globals_dict)]) jailed_code = "".join(the_code) # Turn this on to see what's being executed. if LOG_ALL_CODE: # pragma: no cover log.debug("Jailed code: %s", jailed_code) log.debug("Exec: %s", code) log.debug("Stdin: %s", stdin) res = jail_code.jail_code( "python", code=jailed_code, stdin=stdin, files=files, slug=slug, ) if res.status != 0: raise SafeExecException("Couldn't execute jailed code: %s" % res.stderr) globals_dict.update(json.loads(res.stdout))
def safe_exec(code, globals_dict, files=None, python_path=None, slug=None): """ Execute code as "exec" does, but safely. `code` is a string of Python code. `globals_dict` is used as the globals during execution. Modifications the code makes to `globals_dict` are reflected in the dictionary on return. `files` is a list of file paths, either files or directories. They will be copied into the temp directory used for execution. No attempt is made to determine whether the file is appropriate or safe to copy. The caller must determine which files to provide to the code. `python_path` is a list of directory paths. They will be copied just as `files` are, but will also be added to `sys.path` so that modules there can be imported. `slug` is an arbitrary string, a description that's meaningful to the caller, that will be used in log messages. Returns None. Changes made by `code` are visible in `globals_dict`. If the code raises an exception, this function will raise `SafeExecException` with the stderr of the sandbox process, which usually includes the original exception message and traceback. """ the_code = [] files = list(files or ()) the_code.append(textwrap.dedent( """ import sys try: import simplejson as json except ImportError: import json """ # We need to prevent the sandboxed code from printing to stdout, # or it will pollute the json we print there. This isn't a # security concern (they can put any values in the json output # anyway, either by writing to sys.__stdout__, or just by defining # global values), but keeps accidents from happening. """ class DevNull(object): def write(self, *args, **kwargs): pass sys.stdout = DevNull() """ # Read the code and the globals from the stdin. """ code, g_dict = json.load(sys.stdin) """)) for pydir in python_path or (): pybase = os.path.basename(pydir) the_code.append("sys.path.append(%r)\n" % pybase) files.append(pydir) the_code.append(textwrap.dedent( # Execute the sandboxed code. """ exec code in g_dict """ # Clean the globals for sending back as JSON over stdout. """ ok_types = ( type(None), int, long, float, str, unicode, list, tuple, dict ) bad_keys = ("__builtins__",) def jsonable(v): if not isinstance(v, ok_types): return False try: json.dumps(v) except Exception: return False return True g_dict = { k:v for k,v in g_dict.iteritems() if jsonable(v) and k not in bad_keys } """ # Write the globals back to the calling process. """ json.dump(g_dict, sys.__stdout__) """)) stdin = json.dumps([code, json_safe(globals_dict)]) jailed_code = "".join(the_code) # Turn this on to see what's being executed. if LOG_ALL_CODE: # pragma: no cover log.debug("Jailed code: %s", jailed_code) log.debug("Exec: %s", code) log.debug("Stdin: %s", stdin) res = jail_code.jail_code( "python", code=jailed_code, stdin=stdin, files=files, slug=slug, ) if res.status != 0: raise SafeExecException( "Couldn't execute jailed code: %s" % res.stderr ) globals_dict.update(json.loads(res.stdout))
def test_bad_value(self): test_dict = {b'good_key': b'\x99bad_value', 'a': 'b'} cleaned_dict = safe_exec.json_safe(test_dict) self.assertDictEqual(cleaned_dict, {'a': 'b'})
def safe_exec( code, globals_dict, random_seed=None, python_path=None, extra_files=None, cache=None, limit_overrides_context=None, slug=None, unsafely=False, ): """ Execute python code safely. `code` is the Python code to execute. It has access to the globals in `globals_dict`, and any changes it makes to those globals are visible in `globals_dict` when this function returns. `random_seed` will be used to see the `random` module available to the code. `python_path` is a list of filenames or directories to add to the Python path before execution. If the name is not in `extra_files`, then it will also be copied into the sandbox. `extra_files` is a list of (filename, contents) pairs. These files are created in the sandbox. `cache` is an object with .get(key) and .set(key, value) methods. It will be used to cache the execution, taking into account the code, the values of the globals, and the random seed. `limit_overrides_context` is an optional string to be used as a key on the `settings.CODE_JAIL['limit_overrides']` dictionary in order to apply context-specific overrides to the codejail execution limits. If `limit_overrides_context` is omitted or not present in limit_overrides, then use the default limits specified insettings.CODE_JAIL['limits']. `slug` is an arbitrary string, a description that's meaningful to the caller, that will be used in log messages. If `unsafely` is true, then the code will actually be executed without sandboxing. """ # Check the cache for a previous result. if cache: safe_globals = json_safe(globals_dict) md5er = hashlib.md5() md5er.update(repr(code).encode('utf-8')) update_hash(md5er, safe_globals) key = "safe_exec.%r.%s" % (random_seed, md5er.hexdigest()) cached = cache.get(key) if cached is not None: # We have a cached result. The result is a pair: the exception # message, if any, else None; and the resulting globals dictionary. emsg, cleaned_results = cached globals_dict.update(cleaned_results) if emsg: raise SafeExecException(emsg) return # Create the complete code we'll run. code_prolog = CODE_PROLOG % random_seed # Decide which code executor to use. if unsafely: exec_fn = codejail_not_safe_exec else: exec_fn = codejail_safe_exec # Run the code! Results are side effects in globals_dict. try: exec_fn( code_prolog + LAZY_IMPORTS + code, globals_dict, python_path=python_path, extra_files=extra_files, limit_overrides_context=limit_overrides_context, slug=slug, ) except SafeExecException as e: # Saving SafeExecException e in exception to be used later. exception = e emsg = text_type(e) else: emsg = None # Put the result back in the cache. This is complicated by the fact that # the globals dict might not be entirely serializable. if cache: cleaned_results = json_safe(globals_dict) cache.set(key, (emsg, cleaned_results)) # If an exception happened, raise it now. if emsg: raise exception
def safe_exec(code, globals_dict, random_seed=None, python_path=None, cache=None, slug=None, unsafely=False): """ Execute python code safely. `code` is the Python code to execute. It has access to the globals in `globals_dict`, and any changes it makes to those globals are visible in `globals_dict` when this function returns. `random_seed` will be used to see the `random` module available to the code. `python_path` is a list of directories to add to the Python path before execution. `cache` is an object with .get(key) and .set(key, value) methods. It will be used to cache the execution, taking into account the code, the values of the globals, and the random seed. `slug` is an arbitrary string, a description that's meaningful to the caller, that will be used in log messages. If `unsafely` is true, then the code will actually be executed without sandboxing. """ # Check the cache for a previous result. if cache: safe_globals = json_safe(globals_dict) md5er = hashlib.md5() md5er.update(repr(code)) update_hash(md5er, safe_globals) key = "safe_exec.%r.%s" % (random_seed, md5er.hexdigest()) cached = cache.get(key) if cached is not None: # We have a cached result. The result is a pair: the exception # message, if any, else None; and the resulting globals dictionary. emsg, cleaned_results = cached globals_dict.update(cleaned_results) if emsg: raise SafeExecException(emsg) return # Create the complete code we'll run. code_prolog = CODE_PROLOG % random_seed # Decide which code executor to use. if unsafely: exec_fn = codejail_not_safe_exec else: exec_fn = codejail_safe_exec # Run the code! Results are side effects in globals_dict. try: exec_fn( code_prolog + LAZY_IMPORTS + code, globals_dict, python_path=python_path, slug=slug, ) except SafeExecException as e: emsg = e.message else: emsg = None # Put the result back in the cache. This is complicated by the fact that # the globals dict might not be entirely serializable. if cache: cleaned_results = json_safe(globals_dict) cache.set(key, (emsg, cleaned_results)) # If an exception happened, raise it now. if emsg: raise e