def test_file_in_subfolder(self): # Test with and without trailing slash ret = file_util.file_is_in_folder_glob("/a/b/c/foo.py", "/a") self.assertTrue(ret) ret = file_util.file_is_in_folder_glob("/a/b/c/foo.py", "/a/") self.assertTrue(ret) ret = file_util.file_is_in_folder_glob("/a/b/c/foo.py", "/a/b") self.assertTrue(ret) ret = file_util.file_is_in_folder_glob("/a/b/c/foo.py", "/a/b/") self.assertTrue(ret)
def _file_should_be_hashed(self, filename): filepath = os.path.abspath(filename) file_is_blacklisted = self._folder_black_list.is_blacklisted(filepath) # Short circuiting for performance. if file_is_blacklisted: return False return file_util.file_is_in_folder_glob( filepath, self._get_main_script_directory() ) or file_util.file_in_pythonpath(filepath)
def is_blacklisted(self, filepath): """Test if filepath is in the blacklist. Parameters ---------- filepath : str File path that we intend to test. """ return any( file_util.file_is_in_folder_glob(filepath, blacklisted_folder) for blacklisted_folder in self._folder_blacklist)
def _file_should_be_hashed(self, filename: str) -> bool: global _FOLDER_BLACK_LIST if not _FOLDER_BLACK_LIST: _FOLDER_BLACK_LIST = FolderBlackList( config.get_option("server.folderWatchBlacklist")) filepath = os.path.abspath(filename) file_is_blacklisted = _FOLDER_BLACK_LIST.is_blacklisted(filepath) # Short circuiting for performance. if file_is_blacklisted: return False return file_util.file_is_in_folder_glob( filepath, self._get_main_script_directory() ) or file_util.file_in_pythonpath(filepath)
def _file_should_be_watched(self, filepath): # Using short circuiting for performance. return self._file_is_new(filepath) and ( file_util.file_is_in_folder_glob(filepath, self._session_data.script_folder) or file_util.file_in_pythonpath(filepath))
def _to_bytes(self, obj, context): """Hash objects to bytes, including code with dependencies. Python's built in `hash` does not produce consistent results across runs.""" try: if _is_magicmock(obj): # MagicMock can result in objects that appear to be infinitely # deep, so we don't try to hash them at all. return self.to_bytes(id(obj)) elif isinstance(obj, bytes) or isinstance(obj, bytearray): return obj elif isinstance(obj, string_types): # noqa: F821 # Don't allow the user to override string since # str == bytes on python 2 return obj.encode() elif type(obj) in self.hash_funcs: # Escape hatch for unsupported objects return self.to_bytes(self.hash_funcs[type(obj)](obj)) elif isinstance(obj, float): return self.to_bytes(hash(obj)) elif isinstance(obj, int): return _int_to_bytes(obj) elif isinstance(obj, list) or isinstance(obj, tuple): h = hashlib.new(self.name) # add type to distingush x from [x] self._update(h, type(obj).__name__.encode() + b":") for e in obj: self._update(h, e, context) return h.digest() elif obj is None: # Special string since hashes change between sessions. # We don't use Python's `hash` since hashes are not consistent # across runs. return b"none:" elif obj is True: return b"bool:1" elif obj is False: return b"bool:0" elif type_util.is_type( obj, "pandas.core.frame.DataFrame") or type_util.is_type( obj, "pandas.core.series.Series"): import pandas as pd if len(obj) >= PANDAS_ROWS_LARGE: obj = obj.sample(n=PANDAS_SAMPLE_SIZE, random_state=0) try: return pd.util.hash_pandas_object(obj).sum() except TypeError: # Use pickle if pandas cannot hash the object for example if # it contains unhashable objects. return pickle.dumps(obj, pickle.HIGHEST_PROTOCOL) elif type_util.is_type(obj, "numpy.ndarray"): h = hashlib.new(self.name) self._update(h, obj.shape) if obj.size >= NP_SIZE_LARGE: import numpy as np state = np.random.RandomState(0) obj = state.choice(obj.flat, size=NP_SAMPLE_SIZE) self._update(h, obj.tobytes()) return h.digest() elif inspect.isbuiltin(obj): return self.to_bytes(obj.__name__) elif hasattr(obj, "name") and ( isinstance(obj, io.IOBase) or (isinstance(obj.name, string_types) # noqa: F821 and os.path.exists(obj.name))): # Hash files as name + last modification date + offset. h = hashlib.new(self.name) self._update(h, obj.name) self._update(h, os.path.getmtime(obj.name)) self._update(h, obj.tell()) return h.digest() elif inspect.isroutine(obj): if hasattr(obj, "__wrapped__"): # Ignore the wrapper of wrapped functions. return self.to_bytes(obj.__wrapped__) if obj.__module__.startswith("streamlit"): # Ignore streamlit modules even if they are in the CWD # (e.g. during development). return self.to_bytes("%s.%s" % (obj.__module__, obj.__name__)) h = hashlib.new(self.name) filepath = os.path.abspath(obj.__code__.co_filename) if file_util.file_is_in_folder_glob( filepath, self._get_main_script_directory() ) and not self._folder_black_list.is_blacklisted(filepath): context = _get_context(obj) if obj.__defaults__: self._update(h, obj.__defaults__, context) h.update(self._code_to_bytes(obj.__code__, context)) else: # Don't hash code that is not in the current working directory. self._update(h, obj.__module__) self._update(h, obj.__name__) return h.digest() elif inspect.iscode(obj): return self._code_to_bytes(obj, context) elif inspect.ismodule(obj): # TODO: Figure out how to best show this kind of warning to the # user. In the meantime, show nothing. This scenario is too common, # so the current warning is quite annoying... # st.warning(('Streamlit does not support hashing modules. ' # 'We did not hash `%s`.') % obj.__name__) # TODO: Hash more than just the name for internal modules. return self.to_bytes(obj.__name__) elif inspect.isclass(obj): # TODO: Figure out how to best show this kind of warning to the # user. In the meantime, show nothing. This scenario is too common, # (e.g. in every "except" statement) so the current warning is # quite annoying... # st.warning(('Streamlit does not support hashing classes. ' # 'We did not hash `%s`.') % obj.__name__) # TODO: Hash more than just the name of classes. return self.to_bytes(obj.__name__) elif isinstance(obj, functools.partial): # The return value of functools.partial is not a plain function: # it's a callable object that remembers the original function plus # the values you pickled into it. So here we need to special-case it. h = hashlib.new(self.name) self._update(h, obj.args) self._update(h, obj.func) self._update(h, obj.keywords) return h.digest() else: try: # As a last resort, we pickle the object to hash it. return pickle.dumps(obj, pickle.HIGHEST_PROTOCOL) except: st.warning( _hashing_error_message( "Streamlit cannot hash an object of type %s." % type(obj))) except: st.warning( _hashing_error_message( "Streamlit failed to hash an object of type %s." % type(obj)))
def test_rel_file_not_in_folder_glob(self): ret = file_util.file_is_in_folder_glob("foo.py", "") self.assertTrue(ret)
def test_file_not_in_folder_glob(self): ret = file_util.file_is_in_folder_glob("/a/b/c/foo.py", "**/f") self.assertFalse(ret)
def test_file_in_folder_glob(self): ret = file_util.file_is_in_folder_glob("/a/b/c/foo.py", "**/c") self.assertTrue(ret)
def test_rel_file_not_in_folder(self): # Test with and without trailing slash ret = file_util.file_is_in_folder_glob("foo.py", "/d/e/f/") self.assertFalse(ret) ret = file_util.file_is_in_folder_glob("foo.py", "/d/e/f") self.assertFalse(ret)
def update_watched_modules(self): if self._is_closed: return local_filepaths = [] # Clone modules dict here because we may alter the original dict inside # the loop. modules = dict(sys.modules) for name, module in modules.items(): try: spec = getattr(module, "__spec__", None) if spec is None: filepath = getattr(module, "__file__", None) if filepath is None: # Some modules have neither a spec nor a file. But we # can ignore those since they're not the user-created # modules we want to watch anyway. continue else: filepath = spec.origin if filepath is None: # Built-in modules (and other stuff) don't have origins. continue filepath = os.path.abspath(filepath) if not os.path.isfile(filepath): # There are some modules that have a .origin, but don't # point to real files. For example, there's a module where # .origin is 'built-in'. continue if self._folder_black_list.is_blacklisted(filepath): continue file_is_new = filepath not in self._watched_modules file_is_local = file_util.file_is_in_folder_glob( filepath, self._report.script_folder ) local_filepaths.append(filepath) if file_is_local and file_is_new: self._register_watcher(filepath, name) except Exception: # In case there's a problem introspecting some specific module, # let's not stop the entire loop from running. For example, # the __spec__ field in some modules (like IPython) is actually # a dynamic property, which can crash if the underlying # module's code has a bug (as discovered by one of our users). continue # Clone dict here because we may alter the original dict inside the # loop. watched_modules = dict(self._watched_modules) # Remove no-longer-depended-on files from self._watched_modules # Will this ever happen? for filepath in watched_modules: if filepath not in local_filepaths: self._deregister_watcher(filepath)