def test_retrieve_when_version_mismatch(self, app_client, body_data, expect_file): """Test retrieve when train_begin.""" url = 'retrieve' with self._debugger_client.get_thread_instance(): check_state(app_client, ServerStatus.MISMATCH.value) send_and_compare_result(app_client, url, body_data, expect_file) send_terminate_cmd(app_client)
def test_next_node_on_gpu(self, app_client): """Test get next node on GPU.""" gpu_debugger_client = MockDebuggerClient(backend='GPU') with gpu_debugger_client.get_thread_instance(): check_state(app_client) # send run command to get watchpoint hit url = 'control' body_data = { 'mode': 'continue', 'level': 'node', 'name': 'Default/TransData-op99' } res = get_request_result(app_client, url, body_data) assert res == { 'metadata': { 'state': 'sending', 'enable_recheck': False } } # get metadata check_state(app_client) url = 'retrieve' body_data = {'mode': 'all'} expect_file = 'retrieve_next_node_on_gpu.json' send_and_compare_result(app_client, url, body_data, expect_file) send_terminate_cmd(app_client)
def test_compare_tensor_value(self, app_client): """Test compare tensor value.""" node_name = 'Default/args0' with self._debugger_client.get_thread_instance(): check_state(app_client) # prepare tensor values url = 'control' body_data = {'mode': 'continue', 'steps': 2} get_request_result(app_client, url, body_data) check_state(app_client) get_request_result(app_client=app_client, url='tensor-history', body_data={'name': node_name}) res = get_request_result(app_client=app_client, url='poll-data', body_data={'pos': 0}, method='get') assert res.get('receive_tensor', {}).get('node_name') == node_name # get compare results url = 'tensor-comparisons' body_data = { 'name': node_name + ':0', 'detail': 'data', 'shape': quote('[:, :]'), 'tolerance': 1 } expect_file = 'compare_tensors.json' send_and_compare_result(app_client, url, body_data, expect_file, method='get') send_terminate_cmd(app_client)
def test_search_by_category(self, app_client, filter_condition, expect_file): """Test recheck request.""" with self._debugger_client.get_thread_instance(): check_state(app_client) send_and_compare_result(app_client, 'search', filter_condition, expect_file, method='get') send_terminate_cmd(app_client)
def test_continue_on_gpu(self, app_client, params, expect_file): """Test get next node on GPU.""" gpu_debugger_client = MockDebuggerClient(backend='GPU', graph_num=2) original_value = settings.ENABLE_RECOMMENDED_WATCHPOINTS settings.ENABLE_RECOMMENDED_WATCHPOINTS = True try: with gpu_debugger_client.get_thread_instance(): check_state(app_client) # send run command to get watchpoint hit url = 'control' body_data = {'mode': 'continue'} body_data.update(params) res = get_request_result(app_client, url, body_data) assert res == { 'metadata': { 'state': 'sending', 'enable_recheck': False } } # get metadata check_state(app_client) url = 'retrieve' body_data = {'mode': 'all'} send_and_compare_result(app_client, url, body_data, expect_file) send_terminate_cmd(app_client) finally: settings.ENABLE_RECOMMENDED_WATCHPOINTS = original_value
def test_compare_tensor_value(self, app_client): """Test compare tensor value.""" node_name = 'Default/args0' with self._debugger_client.get_thread_instance(): check_state(app_client) # prepare tensor values url = 'control' body_data = {'mode': 'continue', 'steps': 2} get_request_result(app_client, url, body_data) check_state(app_client) get_request_result( app_client=app_client, url='tensor-history', body_data={'name': node_name, 'rank_id': 0}) res = get_request_result( app_client=app_client, url='poll-data', body_data={'pos': 0}, method='get') assert res.get('receive_tensor', {}).get('node_name') == node_name, 'Node name unmatched.' # get compare results url = 'tensor-comparisons' body_data = { 'name': node_name + ':0', 'detail': 'data', 'shape': quote('[:, :]'), 'tolerance': 1, 'rank_id': 0} get_request_result(app_client, url, body_data, method='GET') # sleep 0.01 second to wait the tensor update. time.sleep(0.01) res = get_request_result( app_client=app_client, url='poll-data', body_data={'pos': 0}, method='get') assert res.get('receive_tensor', {}).get('node_name') == node_name, 'Node name unmatched.' expect_file = 'compare_tensors.json' if self.save_results: send_and_save_result(app_client, url, body_data, expect_file, method='get') send_and_compare_result(app_client, url, body_data, expect_file, method='get') send_terminate_cmd(app_client)
def test_retrieve_tensor_value(self, app_client): """Test retrieve tensor value.""" node_name = 'Default/TransData-op99' with self._debugger_client.get_thread_instance(): check_state(app_client) # prepare tensor value url = 'tensor-history' body_data = {'name': node_name, 'rank_id': 0} get_request_result(app_client, url, body_data, method='post') get_request_result(app_client=app_client, url='poll-data', body_data={'pos': 0}, method='get') url = 'tensors' body_data = { 'name': node_name + ':0', 'detail': 'data', 'shape': quote('[1, 1:3]') } get_request_result(app_client, url, body_data, method='GET') # sleep 0.01 second to wait the tensor update. time.sleep(0.01) res = get_request_result( app_client=app_client, url='poll-data', body_data={'pos': 0}, method='get') assert res.get('receive_tensor', {}).get('node_name') == node_name, 'Node name unmatched.' expect_file = 'retrieve_tensor_value.json' if self.save_results: send_and_save_result(app_client, url, body_data, expect_file, method='get') send_and_compare_result(app_client, url, body_data, expect_file, method='get') send_terminate_cmd(app_client)
def test_update_watchpoint(self, app_client): """Test retrieve when train_begin.""" watch_point_id = 1 leaf_node_name = 'Default/optimizer-Momentum/Parameter[18]_7/moments.fc3.bias' with self._debugger_client.get_thread_instance(): check_state(app_client) condition = { 'id': 'tensor_too_large', 'params': [{ 'name': 'max_gt', 'value': 1.0 }] } create_watchpoint(app_client, condition, watch_point_id) # update watchpoint watchpoint list url = 'update-watchpoint' body_data = { 'watch_point_id': watch_point_id, 'watch_nodes': [leaf_node_name], 'mode': 1 } get_request_result(app_client, url, body_data) # get updated nodes url = 'search' body_data = { 'name': leaf_node_name, 'watch_point_id': watch_point_id } expect_file = 'search_unwatched_leaf_node.json' send_and_compare_result(app_client, url, body_data, expect_file, method='get') send_terminate_cmd(app_client)
def test_create_and_delete_watchpoint(self, app_client): """Test create and delete watchpoint.""" with self._debugger_client.get_thread_instance(): check_state(app_client) conditions = [ {'id': 'tensor_too_large', 'params': [{'name': 'max_gt', 'value': 1.0}]}, {'id': 'tensor_too_small', 'params': [{'name': 'max_lt', 'value': -1.0}]}, {'id': 'tensor_too_large', 'params': [{'name': 'min_gt', 'value': 1e+32}]}, {'id': 'tensor_too_small', 'params': [{'name': 'min_lt', 'value': -1e+32}]}, {'id': 'tensor_too_large', 'params': [{'name': 'mean_gt', 'value': 0}]}, {'id': 'tensor_too_small', 'params': [{'name': 'mean_lt', 'value': 0}]} ] for idx, condition in enumerate(conditions): create_watchpoint(app_client, condition, idx + 1) # delete 4-th watchpoint url = 'delete-watchpoint' body_data = {'watch_point_id': 4} get_request_result(app_client, url, body_data) # test watchpoint list url = 'retrieve' body_data = {'mode': 'watchpoint'} expect_file = 'create_and_delete_watchpoint.json' if self.save_results: send_and_save_result(app_client, url, body_data, expect_file) send_and_compare_result(app_client, url, body_data, expect_file) send_terminate_cmd(app_client)
def test_search_by_category_with_multi_graph(self, app_client, filter_condition, expect_file): """Test search by category request.""" with self._debugger_client.get_thread_instance(): check_state(app_client) if self.save_results: send_and_save_result(app_client, 'search', filter_condition, expect_file, method='get') send_and_compare_result(app_client, 'search', filter_condition, expect_file, method='get') send_terminate_cmd(app_client)
def test_retrieve_when_train_begin(self, app_client, body_data, expect_file): """Test retrieve when train_begin.""" url = 'retrieve' with self._debugger_client.get_thread_instance(): check_state(app_client) send_and_compare_result(app_client, url, body_data, expect_file) send_terminate_cmd(app_client)
def test_retrieve_tensor_hits(self, app_client, body_data, expect_file): """Test retrieve tensor graph.""" url = 'tensor-hits' with self._debugger_client.get_thread_instance(): check_state(app_client) if self.save_results: send_and_save_result(app_client, url, body_data, expect_file, method='GET') send_and_compare_result(app_client, url, body_data, expect_file, method='GET') send_terminate_cmd(app_client)
def test_multi_retrieve_when_train_begin(self, app_client, body_data, expect_file): """Test retrieve when train_begin.""" url = 'retrieve' debugger_client = MockDebuggerClient(backend='Ascend', graph_num=2) with debugger_client.get_thread_instance(): check_state(app_client) send_and_compare_result(app_client, url, body_data, expect_file) send_terminate_cmd(app_client)
def test_get_conditions(self, app_client): """Test get conditions for ascend.""" url = '/v1/mindinsight/debugger/sessions/0/condition-collections' body_data = {} expect_file = 'get_conditions_for_ascend.json' with self._debugger_client.get_thread_instance(): check_state(app_client) if self.save_results: send_and_save_result(app_client, url, body_data, expect_file, method='get', full_url=True) send_and_compare_result(app_client, url, body_data, expect_file, method='get', full_url=True) send_terminate_cmd(app_client)
def test_retrieve_bfs_node(self, app_client, body_data, expect_file): """Test retrieve bfs node.""" with self._debugger_client.get_thread_instance(): check_state(app_client) # prepare tensor values url = 'retrieve_node_by_bfs' send_and_compare_result(app_client, url, body_data, expect_file, method='get') send_terminate_cmd(app_client)
def test_recommend_watchpoints(self, app_client): """Test generating recommended watchpoints.""" original_value = settings.ENABLE_RECOMMENDED_WATCHPOINTS settings.ENABLE_RECOMMENDED_WATCHPOINTS = True try: with self._debugger_client.get_thread_instance(): check_state(app_client) url = 'retrieve' body_data = {'mode': 'watchpoint'} expect_file = 'recommended_watchpoints_at_startup.json' send_and_compare_result(app_client, url, body_data, expect_file, method='post') send_terminate_cmd(app_client) finally: settings.ENABLE_RECOMMENDED_WATCHPOINTS = original_value
def test_get_conditions(self, app_client): """Test get conditions for gpu.""" url = '/v1/mindinsight/conditionmgr/train-jobs/train-id/condition-collections' body_data = {} expect_file = 'get_conditions_for_gpu.json' with self._debugger_client.get_thread_instance(): check_state(app_client) send_and_compare_result(app_client, url, body_data, expect_file, method='get', full_url=True) send_terminate_cmd(app_client)
def test_retrieve_tensor_graph(self, app_client, body_data, expect_file): """Test retrieve tensor graph.""" url = 'tensor-graphs' with self._debugger_client.get_thread_instance(): create_watchpoint_and_wait(app_client) get_request_result(app_client, url, body_data, method='GET') # sleep 0.01 second to wait the tensor update. time.sleep(0.01) # check full tensor history from poll data res = get_request_result( app_client=app_client, url='poll-data', body_data={'pos': 0}, method='get') assert res.get('receive_tensor', {}).get('tensor_name') == body_data.get('tensor_name') if self.save_results: send_and_save_result(app_client, url, body_data, expect_file, method='GET') send_and_compare_result(app_client, url, body_data, expect_file, method='GET') send_terminate_cmd(app_client)
def test_retrieve_tensor_history(self, app_client): """Test retrieve tensor value.""" node_name = 'Default/TransData-op99' with self._debugger_client.get_thread_instance(): check_state(app_client) # prepare tensor value url = 'tensor-history' body_data = {'name': node_name, 'rank_id': 0} expect_file = 'retrieve_empty_tensor_history.json' if self.save_results: send_and_save_result(app_client, url, body_data, expect_file) send_and_compare_result(app_client, url, body_data, expect_file) # check full tensor history from poll data res = get_request_result( app_client=app_client, url='poll-data', body_data={'pos': 0}, method='get') assert res.get('receive_tensor', {}).get('node_name') == node_name, 'Node name unmatched.' expect_file = 'retrieve_full_tensor_history.json' if self.save_results: send_and_save_result(app_client, url, body_data, expect_file) send_and_compare_result(app_client, url, body_data, expect_file) send_terminate_cmd(app_client)
def test_retrieve_tensor_value(self, app_client): """Test retrieve tensor value.""" node_name = 'Default/TransData-op99' with self._debugger_client.get_thread_instance(): check_state(app_client) # prepare tensor value url = 'tensor-history' body_data = {'name': node_name} expect_file = 'retrieve_empty_tensor_history.json' send_and_compare_result(app_client, url, body_data, expect_file) # check full tensor history from poll data res = get_request_result(app_client=app_client, url='poll-data', body_data={'pos': 0}, method='get') assert res.get('receive_tensor', {}).get('node_name') == node_name expect_file = 'retrieve_full_tensor_history.json' send_and_compare_result(app_client, url, body_data, expect_file) # check tensor value url = 'tensors' body_data = { 'name': node_name + ':0', 'detail': 'data', 'shape': quote('[1, 1:3]') } expect_file = 'retrieve_tensor_value.json' send_and_compare_result(app_client, url, body_data, expect_file, method='get') send_terminate_cmd(app_client)
def test_before_train_begin(self, app_client): """Test retrieve all.""" url = 'retrieve' body_data = {'mode': 'all'} expect_file = 'before_train_begin.json' send_and_compare_result(app_client, url, body_data, expect_file)
def send_terminate_cmd(app_client): """Send terminate command to debugger client.""" url = os.path.join(DEBUGGER_BASE_URL, 'control') body_data = {'mode': 'terminate'} send_and_compare_result(app_client, url, body_data)