def test_two_phase_train_c2(self): """test single train with two phase in runner config. """ self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml' self.yaml_content["runner"][0]["phases"] = ['phase1', 'phase2'] self.run_yaml() built_in.equals(self.pro.returncode, 0, self.err_msg) built_in.not_contains(self.err, 'Traceback', self.err_msg) built_in.regex_match_len(self.out, self.epoch_re, 4, self.err_msg)
def test_single_infer_gpu(self): """test single infer with gpu.""" self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml' self.yaml_content['mode'] = 'runner2' self.yaml_content["runner"][2]["device"] = 'gpu' self.run_yaml() built_in.equals(self.pro.returncode, 0, self.err_msg) built_in.not_contains(self.err, 'Traceback', self.err_msg) built_in.regex_match_len(self.out, self.run_time_re, 1, self.err_msg)
def test_multiview_simnet_gpu(self): """test match.multiview_simnet with gpu.""" self.yaml_config_name = "models/match/multiview-simnet/config.yaml" sed_gpu_cmd = "sed -i 's/device:.*cpu/device: gpu/g' {}".format( self.yaml_config_name) utils.cmd_shell(sed_gpu_cmd) self.run_yaml(generate=False, cuda_devices="0") built_in.equals(self.pro.returncode, 0, self.err_msg) built_in.not_contains(self.err, 'Traceback', self.err_msg)
def test_debug_open_gpu(self): """test debug open with gpu.""" self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml' self.yaml_content["runner"][0]["device"] = 'gpu' self.yaml_content["debug"] = 'True' self.run_yaml() built_in.equals(self.pro.returncode, 0, self.err_msg) built_in.not_contains(self.err, 'Traceback', self.err_msg) built_in.regex_match_len(self.out, self.epoch_re, 2, self.err_msg)
def test_optimizer_sgd_gpu(self): """test optimizer sgd with gpu.""" self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml' self.yaml_content["runner"][0]["device"] = 'gpu' self.yaml_content["hyper_parameters"]['optimizer']['class'] = 'SGD' self.run_yaml() built_in.equals(self.pro.returncode, 0, self.err_msg) built_in.not_contains(self.err, 'Traceback', self.err_msg) built_in.regex_match_len(self.out, self.epoch_re, 2, self.err_msg)
def test_single_infer(self): """test single infer.""" self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml' self.yaml_content['mode'] = 'runner2' self.run_yaml() built_in.equals(self.pro.returncode, 0, self.err_msg) built_in.not_contains(self.err, 'Traceback', self.err_msg) built_in.regex_match_len(self.out, 'Infer.+done', 1, self.err_msg)
def test_increment_train(self): """test increment train.""" self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml' self.yaml_content['mode'] = 'runner1' self.run_yaml() built_in.equals(self.pro.returncode, 0, self.err_msg) built_in.not_contains(self.err, 'Traceback', self.err_msg) built_in.regex_match_len(self.out, self.epoch_re, 2, self.err_msg) built_in.regex_match_equal(self.out, '\nmode\s+(\S+)\s+\n', 'runner1', self.err_msg)
def test_optimizer_sgd_reg(self): """test optimizer sgd with reg.""" self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml' self.yaml_content["hyper_parameters"]['optimizer']['class'] = 'SGD' self.yaml_content["hyper_parameters"]['reg'] = 0.1 self.run_yaml() built_in.equals(self.pro.returncode, 0, self.err_msg) built_in.not_contains(self.err, 'Traceback', self.err_msg) built_in.regex_match_len(self.out, 'epoch.+done', 2, self.err_msg)
def test_tagspace_gpu(self): """test contentunderstanding.tagspace with gpu.""" self.yaml_config_name = "models/contentunderstanding/tagspace/config.yaml" sed_gpu_cmd = "sed -i 's/device:.*cpu/device: gpu/g' {}".format( self.yaml_config_name) utils.cmd_shell(sed_gpu_cmd) self.run_yaml(generate=False, cuda_devices="0") built_in.equals(self.pro.returncode, 0, self.err_msg) built_in.not_contains(self.err, 'Traceback', self.err_msg)
def test_single_infer_in_epochs_dir_c2(self): """test single infer base on one epoch with new config. """ self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml' self.yaml_content['mode'] = 'runner2' self.run_yaml() built_in.equals(self.pro.returncode, 0, self.err_msg) built_in.not_contains(self.err, 'Traceback', self.err_msg) built_in.regex_match_len(self.out, self.run_time_re, 1, self.err_msg)
def test_xdeepfm_gpu(self): """test rank.xdeepfm with gpu.""" self.yaml_config_name = "models/rank/xdeepfm/config.yaml" sed_gpu_cmd = "sed -i 's/device:.*cpu/device: gpu/g' {}".format( self.yaml_config_name) utils.cmd_shell(sed_gpu_cmd) self.run_yaml(generate=False, cuda_devices="0") built_in.equals(self.pro.returncode, 0, self.err_msg) built_in.not_contains(self.err, 'Traceback', self.err_msg)
def test_fibinet(self): """test rank.fibinet""" self.yaml_config_name = "models/rank/fibinet/config.yaml" verify_epochs = "sed -i 's/epochs:.*4/epochs: 1/g' {}".format( self.yaml_config_name) utils.cmd_shell(verify_epochs) self.run_yaml(generate=False) built_in.equals(self.pro.returncode, 0, self.err_msg) built_in.not_contains(self.err, 'Traceback', self.err_msg)
def test_mode_str_ps_local_cluster_1p_1t_async_c2(self): """test_mode_str_ps_local_cluster_1p_1t_c2.""" self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml' self.yaml_content["mode"] = "runner0" self.yaml_content["runner"][0]["class"] = "local_cluster_train" self.run_yaml() built_in.equals(self.pro.returncode, 0, self.err_msg) built_in.not_contains('logs/server.0', 'Traceback', self.err_msg) built_in.contains('logs/worker.0', 'AsyncCommunicator Initialized', self.err_msg)
def test_single_infer_in_base_dir_c2(self): """test single infer base on save dir with new config. """ self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml' self.yaml_content['mode'] = 'runner2' self.yaml_content['runner'][2]['init_model_path'] = 'increment_dnn' self.run_yaml() built_in.equals(self.pro.returncode, 0, self.err_msg) built_in.not_contains(self.err, 'Traceback', self.err_msg)
def test_mode_list_ps_local_cluster_c2(self): """test mode list has one element and the runner is local cluster. """ utils.cmd_shell("rm -rf logs") self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml' self.yaml_content["runner"][0]["class"] = "local_cluster_train" self.run_yaml() built_in.equals(self.pro.returncode, 0, self.err_msg) built_in.not_contains(self.err, 'Traceback', self.err_msg)
def test_single_infer(self): """test single infer.""" # run basic to save models self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml' self.run_yaml() self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml' self.yaml_content['mode'] = 'runner2' self.run_yaml() built_in.equals(self.pro.returncode, 0, self.err_msg) built_in.not_contains(self.err, 'Traceback', self.err_msg)
def test_increment_train_c2(self): """test increment train. both runners are single & train. """ self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml' self.yaml_content['mode'] = ['runner0', 'runner1'] self.run_yaml() built_in.equals(self.pro.returncode, 0, self.err_msg) built_in.not_contains(self.err, 'Traceback', self.err_msg) built_in.regex_match_len(self.out, self.epoch_re, 4, self.err_msg)
def test_AutoInt_gpu(self): """test rank.fibinet""" self.yaml_config_name = "models/rank/AutoInt/config.yaml" verify_epochs = "sed -i 's/epochs:.*2/epochs: 1/g' {}".format(self.yaml_config_name) sed_gpu_cmd = "sed -i 's/device:.*cpu/device: gpu/g' {}".format(self.yaml_config_name) utils.cmd_shell(sed_gpu_cmd) utils.cmd_shell(verify_epochs) self.run_yaml(generate=False, cuda_devices="0") built_in.equals(self.pro.returncode, 0, self.err_msg) built_in.not_contains(self.err, 'Traceback', self.err_msg)
def test_mode_str_ps_local_cluster_1p_2t_sync_c2(self): """test_mode_str_ps_local_cluster_1p_2t_sync_c2.""" self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml' self.yaml_content["mode"] = "runner0" self.yaml_content["runner"][0]["class"] = "local_cluster_train" self.yaml_content["runner"][0]["distribute_strategy"] = "sync" self.yaml_content["runner"][0]["worker_num"] = 2 self.yaml_content["runner"][0]["server_num"] = 1 self.run_yaml() built_in.equals(self.pro.returncode, 0, self.err_msg) built_in.not_contains('logs/server.0', 'Traceback', self.err_msg)
def test_mode_list_ps_local_cluster_and_increment_c2(self): """test mode list has two elements and both runner are local cluster. one is train, the other is increment training. """ self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml' self.yaml_content["runner"][0]["class"] = "local_cluster_train" self.yaml_content["runner"][1]["class"] = "local_cluster_train" self.yaml_content["mode"] = ["runner0", "runner1"] self.run_yaml() built_in.equals(self.pro.returncode, 0, self.err_msg) built_in.not_contains(self.err, 'Traceback', self.err_msg) built_in.regex_match_len('logs/worker.0', self.epoch_re, 2, self.err_msg)
def test_two_phase_train(self): """test two phase train""" self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml' self.yaml_content['phase'].append({ 'name': 'phase2', 'model': '{workspace}/model.py', # user-defined model 'dataset_name': 'dataset_infer', # select dataset by name 'thread_num': 1 }) self.run_yaml() built_in.equals(self.pro.returncode, 0, self.err_msg) built_in.not_contains(self.err, 'Traceback', self.err_msg) built_in.regex_match_len(self.out, 'epoch.+done', 4, self.err_msg)
def test_mode_list_single_selected_gpus_1card_c2(self): """test selected gpus 1card, it will run with single mode.""" self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml' self.yaml_content["runner"][0]["device"] = 'gpu' self.yaml_content["runner"][0]["selected_gpus"] = "0" self.run_yaml() built_in.equals(self.pro.returncode, 0, self.err_msg) built_in.not_contains(self.err, 'Traceback', self.err_msg) built_in.regex_match_len(self.out, self.epoch_re, 2, self.err_msg) built_in.regex_match_equal(self.out, '\ntrain.trainer.engine\s+(\S+)\s+\n', "single", self.err_msg)
def test_QueueDataset_train_c2(self): """test QueueDataset in train.""" self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml' self.yaml_content["dataset"][0]["type"] = "QueueDataset" self.run_yaml() built_in.equals(self.pro.returncode, 0, self.err_msg) built_in.not_contains(self.err, 'Traceback', self.err_msg) built_in.regex_match_len(self.out, self.epoch_re, 2, self.err_msg) # NOTE windows和mac直接会强行切换到dataloader if utils.get_platform() != "LINUX" or not six.PY2: check_type = "DataLoader" else: check_type = "QueueDataset"
def test_mode_str_ps_local_cluster_2p_2t_async_c2(self): """test_mode_str_ps_local_cluster_2p_2t_async_c2.""" self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml' self.yaml_content["mode"] = "runner0" self.yaml_content["runner"][0]["class"] = "local_cluster_train" self.yaml_content["runner"][0]["worker_num"] = 2 self.yaml_content["runner"][0]["server_num"] = 2 self.yaml_content["dataset"][0]["data_path"] = "criteo_data" self.run_yaml() built_in.equals(self.pro.returncode, 0, self.err_msg) built_in.not_contains('logs/server.0', 'Traceback', self.err_msg) built_in.not_contains('logs/server.1', 'Traceback', self.err_msg) built_in.regex_match_len('logs/worker.0', self.epoch_re, 2, self.err_msg) built_in.regex_match_len('logs/worker.1', self.epoch_re, 2, self.err_msg)
def test_optimizer_lr_le(self): """test optimizer lr""" self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml' self.yaml_content["hyper_parameters"]['optimizer']['class'] = 'SGD' self.yaml_content["hyper_parameters"]['optimizer']['learning_rate'] = 2e-2 self.yaml_content["hyper_parameters"]['reg'] = 0.1 self.run_yaml() built_in.equals(self.pro.returncode, 0, self.err_msg) built_in.not_contains(self.err, 'Traceback', self.err_msg) built_in.regex_match_len(self.out, self.epoch_re, 2, self.err_msg) built_in.regex_match_equal(self.out, '\nhyper_parameters.optimizer.learning_rate\s+(\S+)\s+\n', '0.02', self.err_msg)
def test_mode_list_collective_selected_gpus_2f_2cards_c2(self): """test selected gpus 2card with two files and set fleet mode = collective, it will change ps to collective and run with local_cluster_train mode """ self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml' self.yaml_content["runner"][0]["device"] = 'gpu' self.yaml_content["runner"][0]["selected_gpus"] = "0,1" self.yaml_content["runner"][0]["class"] = "local_cluster_train" self.yaml_content["runner"][0]["fleet_mode"] = "collective" self.yaml_content["dataset"][0]["data_path"] = "criteo_data" self.run_yaml() built_in.equals(self.pro.returncode, 0, self.err_msg) built_in.not_contains(self.err, 'Traceback', self.err_msg) built_in.regex_match_len('logs/worker.0', self.epoch_re, 2, self.err_msg) built_in.path_not_exist('logs/server.0', self.err_msg)
def test_thread_num(self): """test thread num.""" self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml' self.yaml_content['phase'].append({ 'name': 'phase2', 'model': '{workspace}/model.py', # user-defined model 'dataset_name': 'dataset_infer', # select dataset by name 'thread_num': 2 }) self.run_yaml() built_in.equals(self.pro.returncode, 0, self.err_msg) built_in.not_contains(self.err, 'Traceback', self.err_msg) built_in.regex_match_len(self.out, self.run_time_re, 4, self.err_msg) built_in.regex_match_equal(self.out, '\nphase.phase2.thread_num\s+(\S+)\s+\n', 2, self.err_msg)
def test_user_define_all_normal(self): """test normal yaml construct by MultiTaskMMOE base.""" self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml' self.yaml_content["runner"][0]["instance_class_path"] = 'paddle_rec_user_define.py' self.yaml_content["runner"][0]["network_class_path"] = 'paddle_rec_user_define.py' self.yaml_content["runner"][0]["startup_class_path"] = 'paddle_rec_user_define.py' self.yaml_content["runner"][0]["runner_class_path"] = 'paddle_rec_user_define.py' self.yaml_content["runner"][0]["terminal_class_path"] = 'paddle_rec_user_define.py' self.run_yaml() built_in.equals(self.pro.returncode, 0, self.err_msg) built_in.not_contains(self.err, 'Traceback', self.err_msg) built_in.contains(self.out, 'User Define SingleInstance', self.err_msg) built_in.contains(self.out, 'User Define SingleNetwork', self.err_msg) built_in.contains(self.out, 'User Define SingleStartup', self.err_msg) built_in.contains(self.out, 'User Define SingleRunner', self.err_msg) built_in.contains(self.out, 'User Define SingleTerminal', self.err_msg)
def test_mode_list_collective_selected_gpus_2f_4cards_c2(self): """ test_collective_selected_gpus_2f_2cards. 程序运行GPU卡号,会依据文件个数, gpu_nums, worker_num 来判断起多少个trainer. """ self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml' self.yaml_content["runner"][0]["device"] = 'gpu' self.yaml_content["runner"][0]["selected_gpus"] = "0,1" self.yaml_content["runner"][0]["class"] = "local_cluster_train" self.yaml_content["runner"][0]["fleet_mode"] = "collective" self.yaml_content["dataset"][0]["data_path"] = "criteo_data" self.run_yaml() built_in.equals(self.pro.returncode, 0, self.err_msg) built_in.not_contains(self.err, 'Traceback', self.err_msg) built_in.regex_match_len('logs/worker.1', self.epoch_re, 2, self.err_msg) built_in.path_not_exist('logs/worker.2', self.err_msg)
def test_mode_list_ps_selected_gpus_2f_2card_c2(self): """test selected gpus 2card with two files and not set fleet mode, it will change ps to collective and run with local_cluster_train mode """ self.yaml_config_name = sys._getframe().f_code.co_name + '.yaml' self.yaml_content["runner"][0]["device"] = 'gpu' self.yaml_content["runner"][0]["selected_gpus"] = "0,1" self.yaml_content["dataset"][0]["data_path"] = "criteo_data" self.run_yaml() built_in.equals(self.pro.returncode, 0, self.err_msg) built_in.not_contains(self.err, 'Traceback', self.err_msg) built_in.path_not_exist('logs/server.0', self.err_msg) built_in.regex_match_equal(self.out, '\ntrain.trainer.engine\s+(\S+)\s+\n', "local_cluster", self.err_msg) built_in.regex_match_len('logs/worker.1', self.auc_re, 6, self.err_msg)