def handle(self, request, comm=None) -> None: if request["type"] == "change_active_cell": self.set_active_cell(request["active_cell_id"]) elif request["type"] == "cell_freshness": if self._active_cell_id is None: self.set_active_cell(request.get("executed_cell_id", None)) cell_id = request.get("executed_cell_id", None) order_index_by_id = request["order_index_by_cell_id"] cells().set_cell_positions(order_index_by_id) cells_to_check = (cell for cell in (cells().from_id(cell_id) for cell_id in order_index_by_id) if cell is not None) response = self.check_and_link_multiple_cells( cells_to_check=cells_to_check, last_executed_cell_id=cell_id).to_json() response["type"] = "cell_freshness" response["exec_mode"] = self.mut_settings.exec_mode.value response["exec_schedule"] = self.mut_settings.exec_schedule.value response["flow_order"] = self.mut_settings.flow_order.value response["last_executed_cell_id"] = cell_id response[ "highlights_enabled"] = self.mut_settings.highlights_enabled if comm is not None: comm.send(response) elif request["type"] == "reactivity_cleanup": self.reactivity_cleanup() else: dbg_msg = "Unsupported request type for request %s" % request logger.error(dbg_msg) self._saved_debug_message = dbg_msg
def _is_stale_at_position_impl(self, pos: int, deep: bool) -> bool: for par, timestamps in self.parents.items(): for ts in timestamps: dep_introduced_pos = cells().from_timestamp(ts).position if dep_introduced_pos > pos: continue for updated_ts in par.updated_timestamps: if cells().from_timestamp(updated_ts).position > dep_introduced_pos: continue if updated_ts.cell_num > ts.cell_num or par.is_stale_at_position( ts.cell_num ): # logger.error("sym: %s", self) # logger.error("pos: %s", pos) # logger.error("parent: %s", par) # logger.error("dep introdced ts: %s", ts) # logger.error("dep introdced pos: %s", dep_introduced_pos) # logger.error("par updated ts: %s", updated_ts) # logger.error("par updated position: %s", cells().from_timestamp(updated_ts).position) return True if deep: for sym in self.namespace_stale_symbols: if sym.is_stale_at_position(pos): return True return False
def visit(self, node: ast.AST): try: ret = super().visit(node) cells().current_cell().to_ast( override=cast(ast.Module, self.orig_to_copy_mapping[id(node)]) ) return ret except Exception as e: nbs().set_exception_raised_during_execution(e) traceback.print_exc() raise e
def reset_cell_counter(self): # only called in test context assert not self.settings.store_history self.dynamic_data_deps.clear() self.static_data_deps.clear() for sym in self.all_data_symbols(): sym._timestamp = ( sym._max_inner_timestamp ) = sym.required_timestamp = Timestamp.uninitialized() sym.timestamp_by_used_time.clear() sym.timestamp_by_liveness_time.clear() cells().clear()
def run_cell(cell_content: str, cell_id: Optional[int] = None, fresh_are_reactive: bool = False) -> Tuple[int, Set[int]]: orig_mode = nbs().mut_settings.exec_mode try: if fresh_are_reactive: nbs().mut_settings.exec_mode = ExecutionMode.REACTIVE executed_cells = set() reactive_cells = set() next_content_to_run = cell_content next_cell_to_run_id = cell_id while next_content_to_run is not None: executed_cells.add( run_cell_(next_content_to_run, cell_id=next_cell_to_run_id)) if len(executed_cells) == 1: cell_id = next(iter(executed_cells)) next_content_to_run = None checker_result = nbs().check_and_link_multiple_cells() if fresh_are_reactive: reactive_cells |= checker_result.new_fresh_cells else: reactive_cells |= checker_result.forced_reactive_cells for reactive_cell_id in sorted(reactive_cells - executed_cells): next_content_to_run = cells().from_id(reactive_cell_id).content next_cell_to_run_id = reactive_cell_id break return cell_id, executed_cells finally: nbs().mut_settings.exec_mode = orig_mode nbs().reactivity_cleanup()
def is_stale_at_position(self, pos: int, deep: bool = True) -> bool: assert not hasattr( builtins, EMIT_EVENT ), "this should be called outside of tracing / execution context" if deep: if not self.is_stale: return False else: if not self.is_shallow_stale: return False if nbs().mut_settings.flow_order == FlowOrder.ANY_ORDER: return True if cells().exec_counter() > self._last_computed_staleness_cache_ts: self._is_stale_at_position_cache.clear() self._last_computed_staleness_cache_ts = cells().exec_counter() if (pos, deep) in self._is_stale_at_position_cache: return self._is_stale_at_position_cache[pos, deep] is_stale = self._is_stale_at_position_impl(pos, deep) self._is_stale_at_position_cache[pos, deep] = is_stale return is_stale
async def safe_execute(self, cell_content: str, is_async: bool, run_cell_func): if self._saved_debug_message is not None: # pragma: no cover logger.error(self._saved_debug_message) self._saved_debug_message = None ret = None with save_number_of_currently_executing_cell(): cell_id, self._active_cell_id = self._active_cell_id, None assert cell_id is not None cell = cells().create_and_track( cell_id, cell_content, self._tags, validate_ipython_counter=self.settings.store_history, ) # Stage 1: Precheck. self._safety_precheck_cell(cell) # Stage 2: Trace / run the cell, updating dependencies as they are encountered. try: with self._tracing_context(): if is_async: ret = await run_cell_func(cell_content ) # pragma: no cover else: ret = run_cell_func(cell_content) # Stage 2.1: resync any defined symbols that could have gotten out-of-sync # due to tracing being disabled self._resync_symbols([ # TODO: avoid bad performance by only iterating over symbols updated in this cell sym for sym in self.all_data_symbols() if sym.timestamp.cell_num == cells().exec_counter() ]) self._gc() except Exception as e: if self.is_test: self.set_exception_raised_during_execution(e) finally: return ret
def test_int_change_to_str_triggers_typecheck(): run_cell("a = 1", 1) assert not get_cell_ids_needing_typecheck() run_cell("b = 2", 2) assert not get_cell_ids_needing_typecheck() run_cell("logging.info(a + b)", 3) assert not get_cell_ids_needing_typecheck() run_cell('b = "b"', 4) assert get_cell_ids_needing_typecheck() == {3} nbs().check_and_link_multiple_cells() assert not get_cell_ids_needing_typecheck() assert cells().from_id(3)._cached_typecheck_result is False
def make_slice(line: str) -> Optional[str]: try: args = _SLICE_PARSER.parse_args(shlex.split(line)) except: return None tag = args.tag slice_cells = None cell_num = args.cell_num if cell_num is None: if tag is None: cell_num = cells().exec_counter() - 1 if cell_num is not None: slice_cells = {cells().from_timestamp(cell_num)} elif args.tag is not None: if tag.startswith("$"): tag = tag[1:] cells().current_cell().mark_as_reactive_for_tag(tag) slice_cells = cells().from_tag(tag) if slice_cells is None: logger.warning("Cell(s) have not yet been run") elif len(slice_cells) == 0 and tag is not None: logger.warning("No cell(s) for tag: %s", tag) else: deps = list(cells().compute_slice_for_cells( slice_cells, stmt_level=args.stmt).items()) deps.sort() return "\n\n".join(f"# Cell {cell_num}\n" + content for cell_num, content in deps) return None
def run_cell(code, cell_id=None, ignore_exceptions=False) -> int: if cell_id is None: cell_id = cells().next_exec_counter() nbs().set_active_cell(cell_id) get_ipython().run_cell_magic(nbs().cell_magic_name, None, textwrap.dedent(code)) try: if not ignore_exceptions and getattr(sys, "last_value", None) is not None: last_tb = getattr(sys, "last_traceback", None) if last_tb is not None: if last_tb.tb_frame.f_back is None: # then this was raised from non-test code (no idea why) raise sys.last_value finally: sys.last_value = None sys.last_traceback = None return cell_id
def __init__(self, cell_magic_name=None, use_comm=False, settrace=None, **kwargs): super().__init__() cells().clear() self.settings: NotebookSafetySettings = NotebookSafetySettings( store_history=kwargs.pop("store_history", True), test_context=kwargs.pop("test_context", False), use_comm=use_comm, track_dependencies=True, mark_stale_symbol_usages_unsafe=kwargs.pop( "mark_stale_symbol_usages_unsafe", True), mark_typecheck_failures_unsafe=kwargs.pop( "mark_typecheck_failures_unsafe", False), mark_phantom_cell_usages_unsafe=kwargs.pop( "mark_phantom_cell_usages_unsafe", False), enable_reactive_modifiers=kwargs.pop("enable_reactive_modifiers", True), mode=SafetyRunMode.get(), ) self.mut_settings: MutableNotebookSafetySettings = ( MutableNotebookSafetySettings( trace_messages_enabled=kwargs.pop("trace_messages_enabled", False), highlights_enabled=kwargs.pop("highlights_enabled", True), static_slicing_enabled=kwargs.pop("static_slicing_enabled", True), dynamic_slicing_enabled=kwargs.pop("dynamic_slicing_enabled", True), exec_mode=ExecutionMode( kwargs.pop("exec_mode", ExecutionMode.NORMAL)), exec_schedule=ExecutionSchedule( kwargs.pop("exec_schedule", ExecutionSchedule.LIVENESS_BASED)), flow_order=FlowOrder( kwargs.pop("flow_order", FlowOrder.ANY_ORDER)), )) # Note: explicitly adding the types helps PyCharm intellisense self.namespaces: Dict[int, Namespace] = {} # TODO: wrap this in something that clears the dict entry when the set is 0 length self.aliases: Dict[int, Set[DataSymbol]] = defaultdict(set) self.dynamic_data_deps: Dict[Timestamp, Set[Timestamp]] = defaultdict(set) self.static_data_deps: Dict[Timestamp, Set[Timestamp]] = defaultdict(set) self.global_scope: Scope = Scope() self.updated_symbols: Set[DataSymbol] = set() self.updated_reactive_symbols: Set[DataSymbol] = set() self.updated_deep_reactive_symbols: Set[DataSymbol] = set() self.blocked_reactive_timestamps_by_symbol: Dict[DataSymbol, int] = {} self.statement_to_func_cell: Dict[int, DataSymbol] = {} self._active_cell_id: Optional[CellId] = None self.safety_issue_detected = False if cell_magic_name is None: self._cell_magic = None else: self._cell_magic = self._make_cell_magic(cell_magic_name) self._line_magic = self._make_line_magic() self._prev_cell_stale_symbols: Set[DataSymbol] = set() self._cell_name_to_cell_num_mapping: Dict[str, int] = {} self._exception_raised_during_execution: Optional[Exception] = None self._saved_debug_message: Optional[str] = None self.min_timestamp = -1 self._tags: Tuple[str, ...] = () if use_comm: get_ipython().kernel.comm_manager.register_target( __package__, self._comm_target)
def compute_slice(cell_num): return cells().from_timestamp(cell_num).compute_slice()
def check_and_link_multiple_cells( self, cells_to_check: Optional[Iterable[ExecutedCodeCell]] = None, update_liveness_time_versions: bool = False, last_executed_cell_id: Optional[CellId] = None, ) -> FrontendCheckerResult: SafetyTracer.instance( ) # force initialization here in case not already inited stale_cells = set() unsafe_order_cells: Set[CellId] = set() typecheck_error_cells = set() fresh_cells = set() new_fresh_cells = set() forced_reactive_cells = set() stale_symbols_by_cell_id: Dict[CellId, Set[DataSymbol]] = {} killing_cell_ids_for_symbol: Dict[DataSymbol, Set[CellId]] = defaultdict(set) phantom_cell_info: Dict[CellId, Dict[CellId, Set[int]]] = {} checker_results_by_cid = {} if last_executed_cell_id is None: last_executed_cell = None last_executed_cell_pos = None else: last_executed_cell = cells().from_id(last_executed_cell_id) last_executed_cell_pos = last_executed_cell.position for tag in last_executed_cell.tags: for reactive_cell_id in cells().get_reactive_ids_for_tag(tag): forced_reactive_cells.add(reactive_cell_id) if cells_to_check is None: cells_to_check = cells().all_cells_most_recently_run_for_each_id() cells_to_check = sorted(cells_to_check, key=lambda c: c.position) for cell in cells_to_check: try: checker_result = cell.check_and_resolve_symbols( update_liveness_time_versions=update_liveness_time_versions ) except SyntaxError: continue cell_id = cell.cell_id checker_results_by_cid[cell_id] = checker_result # if self.mut_settings.flow_order == FlowOrder.IN_ORDER: # for live_sym in checker_result.live: # if cells().from_timestamp(live_sym.timestamp).position > cell.position: # unsafe_order_cells.add(cell_id) # break if self.mut_settings.flow_order == FlowOrder.IN_ORDER: if (last_executed_cell_pos is not None and cell.position <= last_executed_cell_pos): continue if self.mut_settings.exec_schedule == ExecutionSchedule.LIVENESS_BASED: stale_symbols = { sym.dsym for sym in checker_result.live if sym.is_stale_at_position(cell.position) } else: stale_symbols = set() if len(stale_symbols) > 0: stale_symbols_by_cell_id[cell_id] = stale_symbols stale_cells.add(cell_id) if not checker_result.typechecks: typecheck_error_cells.add(cell_id) for dead_sym in checker_result.dead: killing_cell_ids_for_symbol[dead_sym].add(cell_id) is_fresh = cell_id not in stale_cells if self.settings.mark_phantom_cell_usages_unsafe: phantom_cell_info_for_cell = cell.compute_phantom_cell_info( checker_result.used_cells) if len(phantom_cell_info_for_cell) > 0: phantom_cell_info[cell_id] = phantom_cell_info_for_cell if self.mut_settings.exec_schedule == ExecutionSchedule.DAG_BASED: is_fresh = False flow_order = self.mut_settings.flow_order if self.mut_settings.dynamic_slicing_enabled: for par in cell.dynamic_parents: if (flow_order == flow_order.IN_ORDER and par.position >= cell.position): continue if par.cell_ctr > max(cell.cell_ctr, self.min_timestamp): is_fresh = True break if not is_fresh and self.mut_settings.static_slicing_enabled: for par in cell.static_parents: if (flow_order == flow_order.IN_ORDER and par.position >= cell.position): continue if par.cell_ctr > max(cell.cell_ctr, self.min_timestamp): is_fresh = True break else: is_fresh = is_fresh and ( cell.get_max_used_live_symbol_cell_counter( checker_result.live) > max(cell.cell_ctr, self.min_timestamp)) if self.mut_settings.exec_schedule == ExecutionSchedule.STRICT: for dead_sym in checker_result.dead: if dead_sym.timestamp.cell_num > max( cell.cell_ctr, self.min_timestamp): is_fresh = True if is_fresh: fresh_cells.add(cell_id) if not cells().from_id(cell_id).set_fresh(is_fresh) and is_fresh: new_fresh_cells.add(cell_id) if is_fresh and self.mut_settings.exec_schedule == ExecutionSchedule.STRICT: break if self.mut_settings.exec_schedule == ExecutionSchedule.DAG_BASED: prev_stale_cells: Set[CellId] = set() while True: for cell in cells_to_check: if cell.cell_id in stale_cells: continue if self.mut_settings.dynamic_slicing_enabled: if cell.dynamic_parent_ids & (fresh_cells | stale_cells): stale_cells.add(cell.cell_id) continue if self.mut_settings.static_slicing_enabled: if cell.static_parent_ids & (fresh_cells | stale_cells): stale_cells.add(cell.cell_id) if prev_stale_cells == stale_cells: break prev_stale_cells = set(stale_cells) fresh_cells -= stale_cells new_fresh_cells -= stale_cells for cell_id in stale_cells: cells().from_id(cell_id).set_fresh(False) if self.mut_settings.exec_mode != ExecutionMode.REACTIVE: for cell_id in new_fresh_cells: if cell_id not in checker_results_by_cid: continue cell = cells().from_id(cell_id) if cell.get_max_used_live_symbol_cell_counter( checker_results_by_cid[cell_id].live, filter_to_reactive=True) > max(cell.cell_ctr, self.min_timestamp): forced_reactive_cells.add(cell_id) stale_links: Dict[CellId, Set[CellId]] = defaultdict(set) refresher_links: Dict[CellId, Set[CellId]] = defaultdict(set) eligible_refresher_for_dag = fresh_cells | stale_cells for stale_cell_id in stale_cells: refresher_cell_ids: Set[CellId] = set() if self.mut_settings.flow_order == ExecutionSchedule.DAG_BASED: if self.mut_settings.dynamic_slicing_enabled: refresher_cell_ids |= ( cells().from_id(stale_cell_id).dynamic_parent_ids & eligible_refresher_for_dag) if self.mut_settings.static_slicing_enabled: refresher_cell_ids |= ( cells().from_id(stale_cell_id).static_parent_ids & eligible_refresher_for_dag) else: stale_syms = stale_symbols_by_cell_id.get(stale_cell_id, set()) refresher_cell_ids = refresher_cell_ids.union( *(killing_cell_ids_for_symbol[stale_sym] for stale_sym in stale_syms)) if self.mut_settings.flow_order == FlowOrder.IN_ORDER: refresher_cell_ids = { cid for cid in refresher_cell_ids if cells().from_id( cid).position < cells().from_id(stale_cell_id).position } if last_executed_cell_id is not None: refresher_cell_ids.discard(last_executed_cell_id) stale_links[stale_cell_id] = refresher_cell_ids stale_link_changes = True # transitive closer up until we hit non-stale refresher cells while stale_link_changes: stale_link_changes = False for stale_cell_id in stale_cells: new_stale_links = set(stale_links[stale_cell_id]) original_length = len(new_stale_links) for refresher_cell_id in stale_links[stale_cell_id]: if refresher_cell_id not in stale_cells: continue new_stale_links |= stale_links[refresher_cell_id] new_stale_links.discard(stale_cell_id) stale_link_changes = stale_link_changes or original_length != len( new_stale_links) stale_links[stale_cell_id] = new_stale_links for stale_cell_id in stale_cells: stale_links[stale_cell_id] -= stale_cells for refresher_cell_id in stale_links[stale_cell_id]: refresher_links[refresher_cell_id].add(stale_cell_id) return FrontendCheckerResult( # TODO: we should probably have separate fields for stale vs non-typechecking cells, # or at least change the name to a more general "unsafe_cells" or equivalent stale_cells=stale_cells | typecheck_error_cells | unsafe_order_cells, fresh_cells=fresh_cells, new_fresh_cells=new_fresh_cells, forced_reactive_cells=forced_reactive_cells, stale_links=stale_links, refresher_links=refresher_links, phantom_cell_info=phantom_cell_info, )
def reactivity_cleanup(self) -> None: for cell in cells().all_cells_most_recently_run_for_each_id(): cell.set_fresh(False)
def set_name_to_cell_num_mapping(self, frame: FrameType): self._cell_name_to_cell_num_mapping[ frame.f_code.co_filename] = cells().exec_counter()
def get_cell_ids_needing_typecheck() -> Set[CellId]: return { cell.cell_id for cell in cells().all_cells_most_recently_run_for_each_id() if cell.needs_typecheck }
def add_static_data_dep(self, child: Timestamp, parent: Timestamp): self.static_data_deps[child].add(parent) cells().from_timestamp(child).add_static_parent( cells().from_timestamp(parent))
def test_simple(): run_cell("x = 0") run_cell("y = x + 1") assert cells().from_id(2).dynamic_parent_ids == {1} assert cells().from_id(1).dynamic_children_ids == { 2 }, ("got %s" % cells().from_id(1)._dynamic_children) run_cell("z = x + y + 2") assert cells().from_id(3).dynamic_parent_ids == {1, 2} assert cells().from_id(1).dynamic_children_ids == {2, 3} assert cells().from_id(2).dynamic_children_ids == {3} run_cell("x = 42") assert cells().from_id(3).dynamic_parent_ids == {1, 2} assert cells().from_id(1).dynamic_children_ids == {2, 3} assert cells().from_id(2).dynamic_children_ids == {3} run_cell("y = x + 1") assert cells().from_id(3).dynamic_parent_ids == {1, 2} assert cells().from_id(1).dynamic_children_ids == {2, 3} assert cells().from_id(2).dynamic_children_ids == {3} assert cells().from_id(5).dynamic_parent_ids == {4}
def cell_counter() -> int: return cells().exec_counter()
def __init__( self, name: SupportedIndexType, symbol_type: DataSymbolType, obj: Any, containing_scope: "Scope", stmt_node: Optional[ast.AST] = None, # TODO: also keep a reference to the target node? refresh_cached_obj: bool = False, implicit: bool = False, ): if refresh_cached_obj: # TODO: clean up redundancies assert implicit assert stmt_node is None self.name = name self.symbol_type = symbol_type self.obj = obj self._tombstone = False self._cached_out_of_sync = True self.cached_obj_id = None self.cached_obj_type = None if refresh_cached_obj: self._refresh_cached_obj() self.containing_scope = containing_scope self.stmt_node = self.update_stmt_node(stmt_node) self._funcall_live_symbols = None self.parents: Dict["DataSymbol", List[Timestamp]] = defaultdict(list) self.children: Dict["DataSymbol", List[Timestamp]] = defaultdict(list) self.call_scope: Optional[Scope] = None if self.is_function: self.call_scope = self.containing_scope.make_child_scope(self.name) # initialize at -1 since the corresponding piece of data could already be around, # and we don't want liveness checker to think this was newly created unless we # explicitly trace an update somewhere self._timestamp: Timestamp = Timestamp.uninitialized() # The version is a simple counter not associated with cells that is bumped whenever the timestamp is updated self._version: int = 0 self._defined_cell_num = cells().exec_counter() # The necessary last-updated timestamp / cell counter for this symbol to not be stale self.required_timestamp: Timestamp = self.timestamp # for each usage of this dsym, the version that was used, if different from the timestamp of usage self.timestamp_by_used_time: Dict[Timestamp, Timestamp] = {} # History of definitions at time of liveness self.timestamp_by_liveness_time: Dict[Timestamp, Timestamp] = {} # All timestamps associated with this symbol self.updated_timestamps: Set[Timestamp] = set() self.fresher_ancestors: Set["DataSymbol"] = set() self.fresher_ancestor_timestamps: Set[Timestamp] = set() # cells where this symbol was live self.cells_where_deep_live: Set[ExecutedCodeCell] = set() self.cells_where_shallow_live: Set[ExecutedCodeCell] = set() self._last_computed_staleness_cache_ts: int = -1 self._is_stale_at_position_cache: Dict[Tuple[int, bool], bool] = {} # if implicitly created when tracing non-store-context ast nodes self._implicit = implicit # Will never be stale if no_warning is True self.disable_warnings = False self._temp_disable_warnings = False nbs().aliases[id(obj)].add(self) if ( isinstance(self.name, str) and not self.is_anonymous and not self.containing_scope.is_namespace_scope ): ns = self.namespace if ns is not None and ns.scope_name == "self": # hack to get a better name than `self.whatever` for fields of this object # not ideal because it relies on the `self` convention but is probably # acceptable for the use case of improving readable names ns.scope_name = self.name