def create_watchpoint(self, params): """ Create watchpoint. Args: - watch_condition (dict): The watch condition. The format is like: { "id": "tensor_too_large", "params": [ { "name": "abs_mean_gt", "value": 1.1 } ] } - id (str): Id of condition. - params (list[dict]): The list of param for this condition. - watch_nodes (list[str]): The list of node names. - watch_point_id (int): The id of watchpoint. - search_pattern (dict): The search pattern. - graph_name (str): The relative graph_name of the watched node. Returns: dict, the id of new watchpoint and metadata info. """ watch_condition = params.get('watch_condition') log.info("Received create watchpoint request. WatchCondition: %s", watch_condition) metadata_stream = self._metadata_stream if metadata_stream.state != ServerStatus.WAITING.value: log.error("Failed to create watchpoint as the MindSpore is not in waiting state.") raise DebuggerCreateWatchPointError( "Failed to create watchpoint as the MindSpore is not in waiting state.") self._validate_watch_condition(watch_condition) watch_nodes = self._get_watch_node_with_basic_info( node_names=params.get('watch_nodes'), search_pattern=params.get('search_pattern'), graph_name=params.get('graph_name')) validate_watch_condition(self._condition_mgr, watch_condition) condition_id = watch_condition.get('id') condition = self._condition_mgr.get_condition(condition_id) condition_context = ConditionContext(metadata_stream.backend, metadata_stream.step) if not condition.is_available(condition_context): log.error("Failed to create watchpoint as the condition is not available.") raise DebuggerConditionUnavailableError( "Failed to create watchpoint as the condition is not available.") watch_nodes = get_basic_node_info(condition.supported_target_type.value, self._graph_stream).copy() watchpoint_stream = self._watchpoint_stream watch_point_id = watchpoint_stream.create_watchpoint( self._condition_mgr, watch_condition, watch_nodes, params.get('watch_point_id')) log.info("Create watchpoint %d", watch_point_id) metadata_stream.enable_recheck = watchpoint_stream.is_recheckable() res = metadata_stream.get(['state', 'enable_recheck']) res['id'] = watch_point_id return res
def get_condition_collections(self, train_id): """Get default condition_collections""" metadata_stream = self.cache_store.get_stream_handler(Streams.METADATA) condition_context = ConditionContext(metadata_stream.backend, metadata_stream.step) log.debug("Train_id: %s, backend: %s", train_id, condition_context.backend) return self.condition_mgr.get_all_collections(condition_context)
def set_recommended_watch_points(self, set_recommended, train_id): """set recommended watch points.""" if not isinstance(set_recommended, bool): log.error("Bool param should be given for set_recommended") raise DebuggerParamValueError("Bool param should be given.") metadata_stream = self.cache_store.get_stream_handler(Streams.METADATA) condition_context = ConditionContext(metadata_stream.backend, metadata_stream.step) log.debug("Train_id: %s, backend: %s", train_id, condition_context.backend) res = metadata_stream.get(['state', 'enable_recheck']) if set_recommended and not metadata_stream.recommendation_confirmed: res['id'] = self._add_recommended_watchpoints(condition_context) metadata_stream.recommendation_confirmed = True return res