Ejemplo n.º 1
0
def test_resnet50_cifar10_ascend():
    cur_path = os.path.dirname(os.path.abspath(__file__))
    model_path = "{}/../../../../model_zoo/official/cv".format(cur_path)
    model_name = "resnet"
    utils.copy_files(model_path, cur_path, model_name)
    cur_model_path = os.path.join(cur_path, "resnet")
    old_list = ["total_epochs=config.epoch_size", "config.epoch_size - config.pretrain_epoch_size"]
    new_list = ["total_epochs=10", "10"]
    utils.exec_sed_command(old_list, new_list, os.path.join(cur_model_path, "train.py"))
    dataset_path = os.path.join(utils.data_root, "cifar-10-batches-bin")
    exec_network_shell = "cd resnet/scripts; bash run_distribute_train.sh resnet50 cifar10 {} {}"\
        .format(utils.rank_table_path, dataset_path)
    os.system(exec_network_shell)
    cmd = "ps -ef | grep python | grep train.py | grep -v grep"
    ret = utils.process_check(100, cmd)
    assert ret
    log_file = os.path.join(cur_model_path, "scripts/train_parallel{}/log")
    for i in range(8):
        per_step_time = utils.get_perf_data(log_file.format(i))
        assert per_step_time < 20.0
    loss_list = []
    for i in range(8):
        loss = utils.get_loss_data_list(log_file.format(i))
        loss_list.append(loss[-1])
    assert sum(loss_list) / len(loss_list) < 0.70
Ejemplo n.º 2
0
def test_lenet_MNIST():
    cur_path = os.path.dirname(os.path.abspath(__file__))
    model_path = "{}/../../../../model_zoo/official/cv".format(cur_path)
    model_name = "lenet"
    utils.copy_files(model_path, cur_path, model_name)
    cur_model_path = os.path.join(cur_path, model_name)
    train_log = os.path.join(cur_model_path, "train_ascend.log")
    ckpt_file = os.path.join(cur_model_path, "ckpt/checkpoint_lenet-10_1875.ckpt")
    infer_log = os.path.join(cur_model_path, "infer_ascend.log")
    dataset_path = os.path.join(utils.data_root, "mnist")
    exec_network_shell = "cd {0}; python train.py --data_path={1} > {2} 2>&1"\
        .format(model_name, dataset_path, train_log)
    ret = os.system(exec_network_shell)
    assert ret == 0
    exec_network_shell = "cd {0}; python eval.py --data_path={1} --ckpt_path={2} > {3} 2>&1"\
        .format(model_name, dataset_path, ckpt_file, infer_log)
    ret = os.system(exec_network_shell)
    assert ret == 0

    per_step_time = utils.get_perf_data(train_log)
    print("per_step_time is", per_step_time)
    assert per_step_time < 1.3

    pattern = r"'Accuracy': ([\d\.]+)}"
    acc = utils.parse_log_file(pattern, infer_log)
    print("acc is", acc)
    assert acc[0] > 0.98
Ejemplo n.º 3
0
def test_DeeplabV3_voc2007():
    cur_path = os.path.dirname(os.path.abspath(__file__))
    model_path = "{}/../../../../model_zoo/official/cv".format(cur_path)
    model_name = "deeplabv3"
    utils.copy_files(model_path, cur_path, model_name)
    cur_model_path = os.path.join(cur_path, model_name)

    old_list = [
        '/PATH/TO/EXPERIMENTS_DIR', '/PATH/TO/MODEL_ZOO_CODE',
        '/PATH/TO/MINDRECORD_NAME', '/PATH/TO/PRETRAIN_MODEL',
        "\\${train_code_path}/src/tools/rank_table_8p.json"
    ]
    new_list = [
        cur_model_path + '/train', cur_model_path,
        os.path.join(utils.data_root,
                     "voc/voc2012/mindrecord_train/vocaug_mindrecord0"),
        os.path.join(utils.ckpt_root, "deeplabv3/resnet101_ascend.ckpt"),
        utils.rank_table_path
    ]
    utils.exec_sed_command(
        old_list, new_list,
        os.path.join(cur_model_path, "scripts/run_distribute_train_s16_r1.sh"))

    old_list = ['model.train(args.train_epochs', 'callbacks=cbs']
    new_list = ['model.train(70', 'callbacks=cbs, sink_size=2']
    utils.exec_sed_command(old_list, new_list,
                           os.path.join(cur_model_path, "train.py"))

    exec_network_shell = "cd {}; sh scripts/run_distribute_train_s16_r1.sh".format(
        model_name)
    ret = os.system(exec_network_shell)
    assert ret == 0
    cmd = "ps -ef | grep python | grep train.py | grep -v grep"
    ret = utils.process_check(100, cmd)
    assert ret

    log_file = os.path.join(cur_model_path, "train/device{}/log")
    for i in range(8):
        per_step_time = utils.get_perf_data(log_file.format(i))
        print("per_step_time is", per_step_time)
        assert per_step_time < 530.0
    loss_list = []
    for i in range(8):
        loss = utils.get_loss_data_list(log_file.format(i))
        print("loss is", loss[-1])
        loss_list.append(loss[-1])
    assert sum(loss_list) / len(loss_list) < 2.5
Ejemplo n.º 4
0
def test_SSD_mobilenet_v1_fpn_coco2017():
    cur_path = os.path.dirname(os.path.abspath(__file__))
    model_path = "{}/../../../../model_zoo/official/cv".format(cur_path)
    model_name = "ssd"
    utils.copy_files(model_path, cur_path, model_name)
    cur_model_path = os.path.join(cur_path, model_name)

    old_list = [
        "/data/MindRecord_COCO", "/ckpt/mobilenet_v1.ckpt", "/data/coco2017"
    ]
    new_list = [
        os.path.join(utils.data_root,
                     "coco/coco2017/mindrecord_train/ssd_mindrecord"),
        os.path.join(utils.ckpt_root, "ssd_mobilenet_v1/mobilenet-v1.ckpt"),
        os.path.join(utils.data_root, "coco/coco2017")
    ]
    utils.exec_sed_command(
        old_list, new_list,
        os.path.join(cur_model_path, "src/config_ssd_mobilenet_v1_fpn.py"))
    old_list = ["ssd300"]
    new_list = ["ssd_mobilenet_v1_fpn"]
    utils.exec_sed_command(old_list, new_list,
                           os.path.join(cur_model_path, "src/config.py"))
    old_list = ["args_opt.epoch_size", "dataset_sink_mode=dataset_sink_mode"]
    new_list = ["5", "dataset_sink_mode=dataset_sink_mode, sink_size=100"]
    utils.exec_sed_command(old_list, new_list,
                           os.path.join(cur_model_path, "train.py"))

    exec_network_shell = "cd {0}; sh -x scripts/run_distribute_train.sh 8 {1} 0.2 coco {2}"\
        .format(model_name, 60, utils.rank_table_path)
    os.system(exec_network_shell)
    cmd = "ps -ef | grep train.py | grep coco | grep device_num | grep device_id | grep -v grep"
    ret = utils.process_check(120, cmd)
    assert ret

    log_file = os.path.join(cur_model_path, "LOG{}/log.txt")
    for i in range(8):
        per_step_time = utils.get_perf_data(log_file.format(i))
        print("per_step_time is", per_step_time)
        assert per_step_time < 545
    loss_list = []
    for i in range(8):
        loss = utils.get_loss_data_list(log_file.format(i))
        print("loss is", loss[-1])
        loss_list.append(loss[-1])
    assert sum(loss_list) / len(loss_list) < 2.72