def test_correctness_use_adaptive_pooling():
    if mge.is_cuda_available():
        model_name = "mnist_model_with_test.mge"
    else:
        model_name = "mnist_model_with_test_cpu.mge"
    model_path = os.path.join(os.path.dirname(__file__), model_name)
    set_conv_execution_strategy("HEURISTIC_REPRODUCIBLE")

    run_train(model_path,
              False,
              False,
              max_err=1e-5,
              use_adaptive_pooling=True)
    run_train(model_path, True, False, max_err=1e-5, use_adaptive_pooling=True)
    run_train(model_path, True, True, max_err=1e-5, use_adaptive_pooling=True)

    # sublinear
    config = SublinearMemoryConfig(genetic_nr_iter=10)
    run_train(
        model_path,
        True,
        True,
        sublinear_memory_config=config,
        max_err=1e-5,
        use_adaptive_pooling=True,
    )

    run_eval(model_path, False, max_err=1e-7, use_adaptive_pooling=True)
    run_eval(model_path, True, max_err=1e-7, use_adaptive_pooling=True)
Ejemplo n.º 2
0
def train_test(backend):

    model_path = "../examples/cifar10/resnet_example/checkpoint/pretrained_model_82.mge"

    # Change the reference number if the change is from numerical rounding-off
    # FIXME! Need to use different number depending on CPU/GPU
    if backend == "megengine-dynamic":
        os.environ["MGE_DISABLE_TRACE"] = "true"
        loss_ref = np.array([3.4709125, 12.46342]).astype(np.float32)
    else:
        loss_ref = np.array([3.4709125, 12.463419]).astype(np.float32)

    import megengine
    from megengine.functional.debug_param import set_conv_execution_strategy
    from megengine.test import assertTensorClose
    from megengine.core import Graph

    sys.path.append(
        os.path.join(os.path.dirname(__file__), "..", "..", "..", "examples"))
    from cifar10.resnet_example.main import Example as resnet18_config
    from cifar10.resnet_example.main import train_one_iter_mge

    mge_root = os.path.dirname(megengine.__file__)
    model_path = os.path.join(mge_root, model_path)
    set_conv_execution_strategy("HEURISTIC_REPRODUCIBLE")
    run_case = resnet18_config(backend=backend, mode="train")
    run_case.init_net()
    run_case.load_model(model_path)

    max_err = 0.0

    loss = []
    np.random.seed(0)
    inputs = np.random.rand(run_case.train_batch_size, 3, 32, 32)
    targets = np.random.randint(10, size=(run_case.train_batch_size, ))

    run_case.set_optimizer(0.0)
    opt = run_case.net_context["optimizer"]

    for lr in (1.0, 1.0):
        run_case.set_optimizer(lr)
        opt.zero_grad()
        loss_batch, _ = train_one_iter_mge(inputs, targets, config=run_case)
        opt.step()
        loss.append(loss_batch.numpy()[0])
    try:
        assertTensorClose(np.array(loss).astype(np.float32),
                          loss_ref,
                          max_err=1e-5)
    except:
        print("calculated loss:", loss)
        print("expect:", loss_ref)
        sys.exit(1)
Ejemplo n.º 3
0
def test_correctness():

    if mge.is_cuda_available():
        model_name = "mnist_model_with_test.mge"
    else:
        model_name = "mnist_model_with_test_cpu.mge"
    model_path = os.path.join(os.path.dirname(__file__), model_name)
    set_conv_execution_strategy("HEURISTIC_REPRODUCIBLE")

    run_test(model_path, False, False)
    run_test(model_path, True, False)
    run_test(model_path, True, True)
Ejemplo n.º 4
0
def run_perf(
    batch_size=64,
    warm_up=True,
    dump_prof=None,
    opt_level=2,
    conv_fastrun=False,
    run_step=True,
    track_bn_stats=True,
    warm_up_iter=20,
    run_iter=100,
    num_gpu=None,
    device=0,
    server=None,
    port=None,
    scale_batch_size=False,
    eager=False,
):

    if conv_fastrun:
        set_conv_execution_strategy("PROFILE")

    if num_gpu:
        dist.init_process_group(args.server, args.port, num_gpu, device,
                                device)
        if scale_batch_size:
            batch_size = batch_size // num_gpu
        print("Run with data parallel, batch size = {} per GPU".format(
            batch_size))

    data = tensor(np.random.randn(batch_size, 3, 224, 224).astype("float32"))
    label = tensor(np.random.randint(1000, size=[
        batch_size,
    ], dtype=np.int32))

    net = Resnet50(track_bn_stats=track_bn_stats)
    opt = SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)

    def train_func(data, label):
        logits = net(data)
        loss = F.cross_entropy_with_softmax(logits, label)

        if num_gpu:
            loss = loss / num_gpu

        opt.zero_grad()
        opt.backward(loss)
        return loss

    train_func = trace(
        train_func,
        symbolic=(not eager),
        opt_level=opt_level,
        profiling=not (dump_prof is None),
    )

    if warm_up:
        print("Warm up ...")
        for _ in range(warm_up_iter):
            opt.zero_grad()
            train_func(data, label)
            if run_step:
                opt.step()
    print_gpu_usage()
    print("Running train ...")
    start = time.time()
    for _ in range(run_iter):
        opt.zero_grad()
        train_func(data, label)
        if run_step:
            opt.step()

    time_used = time.time() - start

    if dump_prof:
        with open(dump_prof, "w") as fout:
            json.dump(train_func.get_profile(), fout, indent=2)

    return time_used / run_iter
Ejemplo n.º 5
0
def test_dp_correctness():
    model_name = "mnist_model_with_test.mge"
    model_path = os.path.join(os.path.dirname(__file__), model_name)
    set_conv_execution_strategy("HEURISTIC_REPRODUCIBLE")
    run_test(model_path, False, False, max_err=1e-5)
Ejemplo n.º 6
0
def test_fastrun():
    set_conv_execution_strategy("PROFILE")
    x = Conv2d(1, 1, kernel_size=1, bias=True)
    a = mge.tensor(np.random.randn(1, 1, 1, 1).astype(np.float32))
    a = x(a)