Exemple #1
0
 def test_mode_str_ps_local_cluster_1p_2t_1f_async_c2(self):
     """test_mode_str_ps_local_cluster_1p_2t_c2."""
     self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml'
     self.yaml_content["mode"] = "runner0"
     self.yaml_content["runner"][0]["class"] = "local_cluster_train"
     self.yaml_content["runner"][0]["worker_num"] = 2
     self.yaml_content["runner"][0]["server_num"] = 1
     self.run_yaml()
     built_in.equals(self.pro.returncode, 0, self.err_msg)
     built_in.not_contains('logs/server.0', 'Traceback', self.err_msg)
     built_in.not_contains('logs/worker.0', 'Traceback', self.err_msg)
     built_in.path_not_exist('logs/worker.1', self.err_msg)
 def test_mode_list_collective_selected_gpus_2f_2cards_c2(self):
     """test selected gpus 2card with two files and set fleet mode = collective,
        it will change ps to collective and run with local_cluster_train mode
     """
     self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml'
     self.yaml_content["runner"][0]["device"] = 'gpu'
     self.yaml_content["runner"][0]["selected_gpus"] = "0,1"
     self.yaml_content["runner"][0]["class"] = "local_cluster_train"
     self.yaml_content["runner"][0]["fleet_mode"] = "collective"
     self.yaml_content["dataset"][0]["data_path"] = "criteo_data"
     self.run_yaml()
     built_in.equals(self.pro.returncode, 0, self.err_msg)
     built_in.not_contains(self.err, 'Traceback', self.err_msg)
     built_in.regex_match_len('logs/worker.0', self.epoch_re, 2, self.err_msg)
     built_in.path_not_exist('logs/server.0', self.err_msg)
 def test_mode_list_collective_selected_gpus_2f_4cards_c2(self):
     """
     test_collective_selected_gpus_2f_2cards.
     程序运行GPU卡号,会依据文件个数, gpu_nums, worker_num 来判断起多少个trainer.
     """
     self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml'
     self.yaml_content["runner"][0]["device"] = 'gpu'
     self.yaml_content["runner"][0]["selected_gpus"] = "0,1"
     self.yaml_content["runner"][0]["class"] = "local_cluster_train"
     self.yaml_content["runner"][0]["fleet_mode"] = "collective"
     self.yaml_content["dataset"][0]["data_path"] = "criteo_data"
     self.run_yaml()
     built_in.equals(self.pro.returncode, 0, self.err_msg)
     built_in.not_contains(self.err, 'Traceback', self.err_msg)
     built_in.regex_match_len('logs/worker.1', self.epoch_re, 2, self.err_msg)
     built_in.path_not_exist('logs/worker.2', self.err_msg)
Exemple #4
0
 def test_mode_list_ps_selected_gpus_2f_2card_c2(self):
     """test selected gpus 2card with two files and not set fleet mode,
        it will change ps to collective and run with local_cluster_train mode
     """
     self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml'
     self.yaml_content["runner"][0]["device"] = 'gpu'
     self.yaml_content["runner"][0]["selected_gpus"] = "0,1"
     self.yaml_content["dataset"][0]["data_path"] = "criteo_data"
     self.run_yaml()
     built_in.equals(self.pro.returncode, 0, self.err_msg)
     built_in.not_contains(self.err, 'Traceback', self.err_msg)
     built_in.path_not_exist('logs/server.0', self.err_msg)
     built_in.regex_match_equal(self.out,
                                '\ntrain.trainer.engine\s+(\S+)\s+\n',
                                "local_cluster",
                                self.err_msg)
     built_in.regex_match_len('logs/worker.1', self.auc_re, 6, self.err_msg)