def test_correctness_use_adaptive_pooling(): if mge.is_cuda_available(): model_name = "mnist_model_with_test.mge" else: model_name = "mnist_model_with_test_cpu.mge" model_path = os.path.join(os.path.dirname(__file__), model_name) set_conv_execution_strategy("HEURISTIC_REPRODUCIBLE") run_train(model_path, False, False, max_err=1e-5, use_adaptive_pooling=True) run_train(model_path, True, False, max_err=1e-5, use_adaptive_pooling=True) run_train(model_path, True, True, max_err=1e-5, use_adaptive_pooling=True) # sublinear config = SublinearMemoryConfig(genetic_nr_iter=10) run_train( model_path, True, True, sublinear_memory_config=config, max_err=1e-5, use_adaptive_pooling=True, ) run_eval(model_path, False, max_err=1e-7, use_adaptive_pooling=True) run_eval(model_path, True, max_err=1e-7, use_adaptive_pooling=True)
def train_test(backend): model_path = "../examples/cifar10/resnet_example/checkpoint/pretrained_model_82.mge" # Change the reference number if the change is from numerical rounding-off # FIXME! Need to use different number depending on CPU/GPU if backend == "megengine-dynamic": os.environ["MGE_DISABLE_TRACE"] = "true" loss_ref = np.array([3.4709125, 12.46342]).astype(np.float32) else: loss_ref = np.array([3.4709125, 12.463419]).astype(np.float32) import megengine from megengine.functional.debug_param import set_conv_execution_strategy from megengine.test import assertTensorClose from megengine.core import Graph sys.path.append( os.path.join(os.path.dirname(__file__), "..", "..", "..", "examples")) from cifar10.resnet_example.main import Example as resnet18_config from cifar10.resnet_example.main import train_one_iter_mge mge_root = os.path.dirname(megengine.__file__) model_path = os.path.join(mge_root, model_path) set_conv_execution_strategy("HEURISTIC_REPRODUCIBLE") run_case = resnet18_config(backend=backend, mode="train") run_case.init_net() run_case.load_model(model_path) max_err = 0.0 loss = [] np.random.seed(0) inputs = np.random.rand(run_case.train_batch_size, 3, 32, 32) targets = np.random.randint(10, size=(run_case.train_batch_size, )) run_case.set_optimizer(0.0) opt = run_case.net_context["optimizer"] for lr in (1.0, 1.0): run_case.set_optimizer(lr) opt.zero_grad() loss_batch, _ = train_one_iter_mge(inputs, targets, config=run_case) opt.step() loss.append(loss_batch.numpy()[0]) try: assertTensorClose(np.array(loss).astype(np.float32), loss_ref, max_err=1e-5) except: print("calculated loss:", loss) print("expect:", loss_ref) sys.exit(1)
def test_correctness(): if mge.is_cuda_available(): model_name = "mnist_model_with_test.mge" else: model_name = "mnist_model_with_test_cpu.mge" model_path = os.path.join(os.path.dirname(__file__), model_name) set_conv_execution_strategy("HEURISTIC_REPRODUCIBLE") run_test(model_path, False, False) run_test(model_path, True, False) run_test(model_path, True, True)
def run_perf( batch_size=64, warm_up=True, dump_prof=None, opt_level=2, conv_fastrun=False, run_step=True, track_bn_stats=True, warm_up_iter=20, run_iter=100, num_gpu=None, device=0, server=None, port=None, scale_batch_size=False, eager=False, ): if conv_fastrun: set_conv_execution_strategy("PROFILE") if num_gpu: dist.init_process_group(args.server, args.port, num_gpu, device, device) if scale_batch_size: batch_size = batch_size // num_gpu print("Run with data parallel, batch size = {} per GPU".format( batch_size)) data = tensor(np.random.randn(batch_size, 3, 224, 224).astype("float32")) label = tensor(np.random.randint(1000, size=[ batch_size, ], dtype=np.int32)) net = Resnet50(track_bn_stats=track_bn_stats) opt = SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4) def train_func(data, label): logits = net(data) loss = F.cross_entropy_with_softmax(logits, label) if num_gpu: loss = loss / num_gpu opt.zero_grad() opt.backward(loss) return loss train_func = trace( train_func, symbolic=(not eager), opt_level=opt_level, profiling=not (dump_prof is None), ) if warm_up: print("Warm up ...") for _ in range(warm_up_iter): opt.zero_grad() train_func(data, label) if run_step: opt.step() print_gpu_usage() print("Running train ...") start = time.time() for _ in range(run_iter): opt.zero_grad() train_func(data, label) if run_step: opt.step() time_used = time.time() - start if dump_prof: with open(dump_prof, "w") as fout: json.dump(train_func.get_profile(), fout, indent=2) return time_used / run_iter
def test_dp_correctness(): model_name = "mnist_model_with_test.mge" model_path = os.path.join(os.path.dirname(__file__), model_name) set_conv_execution_strategy("HEURISTIC_REPRODUCIBLE") run_test(model_path, False, False, max_err=1e-5)
def test_fastrun(): set_conv_execution_strategy("PROFILE") x = Conv2d(1, 1, kernel_size=1, bias=True) a = mge.tensor(np.random.randn(1, 1, 1, 1).astype(np.float32)) a = x(a)