def test_compression_eval_trained(_params, tmp_path): p = _params args = p['args'] tc = p['test_config'] args['mode'] = 'test' args['log-dir'] = tmp_path args['workers'] = 4 args['seed'] = 1 checkpoint_path = os.path.join(args['checkpoint-save-dir'], tc['checkpoint_name'] + '_best.pth') args['resume'] = checkpoint_path if 'weights' in args: del args['weights'] reset_context('orig') reset_context('quantized_graphs') runner = Command( create_command_line(get_cli_dict_args(args), tc['sample_type'])) res = runner.run(timeout=tc['timeout']) assert res == 0 output_path = None for root, _, names in os.walk(str(tmp_path)): for name in names: if 'output' in name: output_path = os.path.join(root, name) assert os.path.exists(output_path) with open(output_path, "r") as f: last_line = f.readlines()[-1] acc1 = float(re.findall("\\d+\\.\\d+", last_line)[0]) assert torch.load(checkpoint_path)['best_acc1'] == approx( acc1, abs=tc['absolute_tolerance_eval'])
def test_compression_train(_params, tmp_path): p = _params args = p['args'] tc = p['test_config'] args['mode'] = 'train' args['log-dir'] = tmp_path args['workers'] = 4 args['seed'] = 1 reset_context('orig') reset_context('quantized_graphs') runner = Command( create_command_line(get_cli_dict_args(args), tc['sample_type'])) res = runner.run(timeout=tc['timeout']) assert res == 0 checkpoint_path = os.path.join(args['checkpoint-save-dir'], tc['checkpoint_name'] + '_best.pth') assert os.path.exists(checkpoint_path) actual_acc = torch.load(checkpoint_path)['best_acc1'] ref_acc = tc['expected_accuracy'] better_accuracy_tolerance = 3 tolerance = tc[ 'absolute_tolerance_train'] if actual_acc < ref_acc else better_accuracy_tolerance assert actual_acc == approx(ref_acc, abs=tolerance)
def test_loaded_model_evals_according_to_saved_acc(_params, tmp_path, dataset_dir): p = _params config_path = p['sample_config_path'] checkpoint_path = p['checkpoint_path'] metrics_path = str(tmp_path.joinpath('metrics.json')) tmp_path = str(tmp_path) args = {} if not dataset_dir: dataset_dir = tmp_path args['data'] = dataset_dir args['dataset'] = p['dataset'] args['config'] = str(config_path) args['mode'] = 'test' args['log-dir'] = tmp_path args['workers'] = 0 # Workaroundr the PyTorch MultiProcessingDataLoader issue args['seed'] = 1 args['resume'] = checkpoint_path args['metrics-dump'] = metrics_path if p['execution_mode'] == ExecutionMode.MULTIPROCESSING_DISTRIBUTED: args['multiprocessing-distributed'] = '' else: pytest.skip("DataParallel eval takes too long for this test to be run during pre-commit") runner = Command(create_command_line(get_cli_dict_args(args), "classification")) res = runner.run() assert res == 0 with open(metrics_path) as metric_file: metrics = json.load(metric_file) assert torch.load(checkpoint_path)['best_acc1'] == pytest.approx(metrics['Accuracy'])
def test_compression_eval_trained(_params, tmp_path): p = _params args = p['args'] tc = p['test_config'] args['mode'] = 'test' args['log-dir'] = tmp_path args['workers'] = 4 args['seed'] = 1 checkpoint_path = os.path.join(args['checkpoint-save-dir'], tc['checkpoint_name'] + '_best.pth') args['resume'] = checkpoint_path if 'weights' in args: del args['weights'] reset_context('orig') reset_context('quantized_graphs') runner = Command( create_command_line(get_cli_dict_args(args), tc['sample_type'])) res = runner.run(timeout=tc['timeout']) assert res == 0 acc1 = parse_best_acc1(tmp_path) assert torch.load(checkpoint_path)['best_acc1'] == approx( acc1, abs=tc['absolute_tolerance_eval'])
def test_loaded_model_evals_according_to_saved_acc(_params, tmp_path): p = _params config_path = p['sample_config_path'] checkpoint_path = p['checkpoint_path'] tmp_path = str(tmp_path) args = {} args['data'] = tmp_path + '/' + p['dataset'] args['dataset'] = p['dataset'] args['config'] = str(config_path) args['mode'] = 'test' args['log-dir'] = tmp_path args['workers'] = 4 args['seed'] = 1 args['resume'] = checkpoint_path if p['execution_mode'] == ExecutionMode.MULTIPROCESSING_DISTRIBUTED: args['multiprocessing-distributed'] = '' else: pytest.skip("DataParallel eval takes too long for this test to be run during pre-commit") runner = Command(create_command_line(get_cli_dict_args(args), "classification")) res = runner.run() assert res == 0 acc1 = parse_best_acc1(tmp_path) assert torch.load(checkpoint_path)['best_acc1'] == pytest.approx(acc1)
def test_ssd300_eval(self): checkpoint = os.path.join(self.MMDET_PATH, "work_dirs", "ssd300_voc_int8", "latest.pth") comm_line = "tools/test.py configs/pascal_voc/ssd300_voc_int8.py {} --eval mAP".format( checkpoint) runner = Command( create_command_line(comm_line, self.activate_venv, self.mmdet_python), self.MMDET_PATH) runner.run()
def test_retinanet_eval(self): checkpoint = os.path.join(self.MMDET_PATH, "work_dirs", "retinanet_r50_fpn_1x_int8", "latest.pth") comm_line = "tools/test.py configs/retinanet/retinanet_r50_fpn_1x_int8.py {} --eval bbox".format( checkpoint) runner = Command( create_command_line(comm_line, self.activate_venv, self.mmdet_python), self.MMDET_PATH) runner.run()
def test_xnli_eval(self, temp_folder): com_line = "examples/text-classification/run_xnli.py --model_name_or_path {output}" \ " --language zh --do_eval --data_dir {} --learning_rate 5e-5 --max_seq_length 128 --output_dir" \ " {output} --nncf_config nncf_bert_config_xnli.json --per_gpu_eval_batch_size 24" \ .format(DATASET_PATH, output=os.path.join(temp_folder["models"], "xnli")) runner = Command( create_command_line(com_line, self.activate_venv, self.trans_python, self.cuda_visible_string), self.TRANS_PATH) runner.run()
def test_glue_eval(self, temp_folder): com_line = "examples/text-classification/run_glue.py --model_name_or_path {output}" \ " --task_name mnli --do_eval --data_dir {}/glue/glue_data/MNLI --learning_rate 2e-5" \ " --num_train_epochs 1.0 --max_seq_length 128 --output_dir {output}" \ " --nncf_config nncf_roberta_config_mnli.json" \ .format(DATASET_PATH, output=os.path.join(temp_folder["models"], "roberta_mnli")) runner = Command( create_command_line(com_line, self.activate_venv, self.trans_python, self.cuda_visible_string), self.TRANS_PATH) runner.run()
def test_squad_eval(self, temp_folder): com_line = "examples/question-answering/run_squad.py --model_type bert --model_name_or_path {output}" \ " --do_eval --do_lower_case --predict_file {}/squad/dev-v1.1.json --learning_rate 3e-5" \ " --max_seq_length 384 --doc_stride 128 --per_gpu_eval_batch_size=4 --output_dir {output} " \ "--nncf_config nncf_bert_config_squad.json" \ .format(DATASET_PATH, output=os.path.join(temp_folder["models"], "squad")) runner = Command( create_command_line(com_line, self.activate_venv, self.trans_python, self.cuda_visible_string), self.TRANS_PATH) runner.run()
def test_convert_to_onnx(self, temp_folder): com_line = "examples/question-answering/run_squad.py --model_type bert --model_name_or_path {output}" \ " --output_dir {output}" \ " --to_onnx {output}/model.onnx".format(output=os.path.join(temp_folder["models"], "squad")) runner = Command( create_command_line(com_line, self.activate_venv, self.trans_python, self.cuda_visible_string), self.TRANS_PATH) runner.run() assert os.path.exists( os.path.join(temp_folder["models"], "squad", "model.onnx"))
def test_lm_eval(self, temp_folder): com_line = "examples/language-modeling/run_language_modeling.py --model_type gpt2 " \ "--model_name_or_path {output} --do_eval " \ " --output_dir {output} --eval_data_file {}/wikitext-2-raw/wiki.train.raw" \ " --nncf_config nncf_gpt2_config_wikitext_hw_config.json" \ .format(DATASET_PATH, output=os.path.join(temp_folder["models"], "lm_output")) runner = Command( create_command_line(com_line, self.activate_venv, self.trans_python, self.cuda_visible_string), self.TRANS_PATH) runner.run()
def test_maskrcnn_eval(self): checkpoint = os.path.join(self.MMDET_PATH, "work_dirs", "mask_rcnn_r50_caffe_fpn_1x_coco_int8", "latest.pth") comm_line = "tools/test.py configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco_int8.py {} --eval bbox".format( checkpoint) runner = Command( create_command_line(comm_line, self.activate_venv, self.mmdet_python), self.MMDET_PATH) res = runner.run() assert res == 0
def test_glue_distilbert_eval(self, temp_folder): com_line = "examples/text-classification/run_glue.py --model_name_or_path {output}" \ " --task_name SST-2 --do_eval --max_seq_length 128" \ " --output_dir {output} --data_dir {}/glue/glue_data/SST-2" \ " --nncf_config nncf_distilbert_config_sst2.json" \ .format(DATASET_PATH, output=os.path.join(temp_folder["models"], "distilbert_output")) runner = Command( create_command_line(com_line, self.activate_venv, self.trans_python, self.cuda_visible_string), self.TRANS_PATH) runner.run()
def test_retinanet_export2onnx(self): checkpoint = os.path.join(self.MMDET_PATH, "work_dirs", "retinanet_r50_fpn_1x_int8", "latest.pth") comm_line = "tools/pytorch2onnx.py configs/retinanet/retinanet_r50_fpn_1x_int8.py {} --output-file retinanet_r50_fpn_1x_int8.onnx".format( checkpoint) runner = Command( create_command_line(comm_line, self.activate_venv, self.mmdet_python), self.MMDET_PATH) runner.run() assert os.path.exists( os.path.join(self.MMDET_PATH, "retinanet_r50_fpn_1x_int8.onnx"))
def test_xnli_train(self, temp_folder): com_line = "examples/text-classification/run_xnli.py --model_name_or_path bert-base-chinese" \ " --language zh --train_language zh --do_train --data_dir {} --per_gpu_train_batch_size 24" \ " --learning_rate 5e-5 --num_train_epochs 1.0 --max_seq_length 128 --output_dir {}" \ " --save_steps 200 --nncf_config nncf_bert_config_xnli.json" \ .format(DATASET_PATH, os.path.join(temp_folder["models"], "xnli")) runner = Command( create_command_line(com_line, self.VENV_TRANS_PATH, self.trans_python, self.cuda_visible_string), self.TRANS_PATH) runner.run() assert os.path.exists( os.path.join(temp_folder["models"], "xnli", "pytorch_model.bin"))
def test_ssd300_train(self): subprocess.run( "wget https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmdetection/models/ssd300_voc_vgg16_caffe_240e_20190501-7160d09a.pth", check=True, shell=True, cwd=self.MMDET_PATH) comm_line = "tools/train.py configs/pascal_voc/ssd300_voc_int8.py" runner = Command( create_command_line(comm_line, self.activate_venv, self.mmdet_python), self.MMDET_PATH) runner.run() assert os.path.exists( os.path.join(self.MMDET_PATH, "work_dirs", "ssd300_voc_int8", "latest.pth"))
def test_squad_train(self, temp_folder): com_line = "examples/question-answering/run_squad.py --model_type bert --model_name_or_path " \ "bert-large-uncased-whole-word-masking-finetuned-squad --do_train --do_lower_case " \ "--train_file {}/squad/train-v1.1.json" \ " --learning_rate 3e-5 --num_train_epochs 1 --max_seq_length 384 --doc_stride 128 --output_dir " \ "{} --per_gpu_train_batch_size=1 --save_steps=200 --nncf_config" \ " nncf_bert_config_squad.json".format(DATASET_PATH, os.path.join(temp_folder["models"], "squad")) runner = Command( create_command_line(com_line, self.activate_venv, self.trans_python, self.cuda_visible_string), self.TRANS_PATH) runner.run() assert os.path.exists( os.path.join(temp_folder["models"], "squad", "pytorch_model.bin"))
def test_retinanet_train(self): subprocess.run( "wget https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmdetection/models/retinanet_r50_fpn_2x_20190616-75574209.pth", check=True, shell=True, cwd=self.MMDET_PATH) comm_line = "tools/train.py configs/retinanet/retinanet_r50_fpn_1x_int8.py" runner = Command( create_command_line(comm_line, self.activate_venv, self.mmdet_python), self.MMDET_PATH) runner.run() assert os.path.exists( os.path.join(self.MMDET_PATH, "work_dirs", "retinanet_r50_fpn_1x_int8", "latest.pth"))
def test_maskrcnn_train(self): subprocess.run( "wget http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_bbox_mAP-0.408__segm_mAP-0.37_20200504_163245-42aa3d00.pth", check=True, shell=True, cwd=self.MMDET_PATH) comm_line = "tools/train.py configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco_int8.py" runner = Command( create_command_line(comm_line, self.activate_venv, self.mmdet_python), self.MMDET_PATH) runner.run() assert os.path.exists( os.path.join(self.MMDET_PATH, "work_dirs", "mask_rcnn_r50_caffe_fpn_1x_coco_int8", "latest.pth"))
def test_lm_train(self, temp_folder): com_line = "examples/language-modeling/run_language_modeling.py --model_type gpt2 --model_name_or_path gpt2" \ " --do_train --per_gpu_train_batch_size 8" \ " --train_data_file {}/wikitext-2-raw/wiki.train.raw " \ " --output_dir {} --nncf_config" \ " nncf_gpt2_config_wikitext_hw_config.json".format(DATASET_PATH, os.path.join(temp_folder["models"], "lm_output")) runner = Command( create_command_line(com_line, self.activate_venv, self.trans_python, self.cuda_visible_string), self.TRANS_PATH) runner.run() assert os.path.exists( os.path.join(temp_folder["models"], "lm_output", "pytorch_model.bin"))
def test_glue_train(self, temp_folder): com_line = "examples/text-classification/run_glue.py --model_name_or_path" \ " roberta-large-mnli --task_name mnli --do_train --data_dir {}/glue/glue_data/MNLI" \ " --per_gpu_train_batch_size 4 --learning_rate 2e-5 --num_train_epochs 1.0 --max_seq_length 128 " \ "--output_dir {} --save_steps 200 --nncf_config" \ " nncf_roberta_config_mnli.json" \ .format(DATASET_PATH, os.path.join(temp_folder["models"], "roberta_mnli")) runner = Command( create_command_line(com_line, self.activate_venv, self.trans_python, self.cuda_visible_string), self.TRANS_PATH) runner.run() assert os.path.exists( os.path.join(temp_folder["models"], "roberta_mnli", "pytorch_model.bin"))
def test_glue_distilbert_train(self, temp_folder): com_line = "examples/text-classification/run_glue.py --model_name_or_path" \ " distilbert-base-uncased" \ " --task_name SST-2 --do_train --max_seq_length 128 --per_gpu_train_batch_size 8" \ " --data_dir {}/glue/glue_data/SST-2 --learning_rate 5e-5 --num_train_epochs 3.0" \ " --output_dir {} --save_steps 200 --nncf_config" \ " nncf_distilbert_config_sst2.json".format(DATASET_PATH, os.path.join(temp_folder["models"], "distilbert_output")) runner = Command( create_command_line(com_line, self.activate_venv, self.trans_python, self.cuda_visible_string), self.TRANS_PATH) runner.run() assert os.path.exists( os.path.join(temp_folder["models"], "distilbert_output", "pytorch_model.bin"))
def test_compression_eval_trained(_params, tmp_path): p = _params args = p['args'] tc = p['test_config'] args['mode'] = 'test' args['log-dir'] = tmp_path args['workers'] = 0 # Workaround for PyTorch MultiprocessingDataLoader issues args['seed'] = 1 checkpoint_path = os.path.join(args['checkpoint-save-dir'], tc['checkpoint_name'] + '_best.pth') args['resume'] = checkpoint_path if 'weights' in args: del args['weights'] runner = Command(create_command_line(get_cli_dict_args(args), tc['sample_type'])) runner.run(timeout=tc['timeout']) acc1 = parse_best_acc1(tmp_path) assert torch.load(checkpoint_path)['best_acc1'] == approx(acc1, abs=tc['absolute_tolerance_eval'])
def test_compression_train(_params, tmp_path): p = _params args = p['args'] tc = p['test_config'] args['mode'] = 'train' args['log-dir'] = tmp_path args['workers'] = 0 # Workaround for PyTorch MultiprocessingDataLoader issues args['seed'] = 1 runner = Command(create_command_line(get_cli_dict_args(args), tc['sample_type'])) runner.run(timeout=tc['timeout']) checkpoint_path = os.path.join(args['checkpoint-save-dir'], tc['checkpoint_name'] + '_best.pth') assert os.path.exists(checkpoint_path) actual_acc = torch.load(checkpoint_path)['best_acc1'] ref_acc = tc['expected_accuracy'] better_accuracy_tolerance = 3 tolerance = tc['absolute_tolerance_train'] if actual_acc < ref_acc else better_accuracy_tolerance assert actual_acc == approx(ref_acc, abs=tolerance)