# run dry_run = 5 # use 5 iterations to warm up for i in range(dry_run+num_batches): if i == dry_run: tic = time.time() mod.forward(batch, is_train=False) for output in mod.get_outputs(): output.wait_to_read() # return num images per second return num_batches*batch_size/(time.time() - tic) if __name__ == '__main__': networks = ['alexnet', 'vgg-16', 'inception-bn', 'inception-v3', 'resnet-50', 'resnet-152'] devs = [mx.gpu(0)] if len(get_gpus()) > 0 else [] # Enable USE_MKLDNN for better CPU performance devs.append(mx.cpu()) batch_sizes = [1, 2, 4, 8, 16, 32] for net in networks: logging.info('network: %s', net) for d in devs: logging.info('device: %s', d) logged_fp16_warning = False for b in batch_sizes: for dtype in ['float32', 'float16']: if d == mx.cpu() and dtype == 'float16': #float16 is not supported on CPU continue elif net in ['inception-bn', 'alexnet'] and dtype == 'float16':
def test_imagenet1k_resnet(**kwargs): models = ["imagenet1k-resnet-34", "imagenet1k-resnet-50", "imagenet1k-resnet-101", "imagenet1k-resnet-152"] accs = [0.72, 0.75, 0.765, 0.76] for (m, g) in zip(models, accs): acc = mx.metric.create("acc") (speed,) = score(model=m, data_val="data/val-5k-256.rec", rgb_mean="0,0,0", metrics=acc, **kwargs) r = acc.get()[1] print("testing %s, acc = %f, speed = %f img/sec" % (m, r, speed)) assert r > g and r < g + 0.1 def test_imagenet1k_inception_bn(**kwargs): acc = mx.metric.create("acc") m = "imagenet1k-inception-bn" g = 0.72 (speed,) = score(model=m, data_val="data/val-5k-256.rec", rgb_mean="123.68,116.779,103.939", metrics=acc, **kwargs) r = acc.get()[1] print("Tested %s acc = %f, speed = %f img/sec" % (m, r, speed)) assert r > g and r < g + 0.1 if __name__ == "__main__": gpus = get_gpus() assert len(gpus) > 0 batch_size = 16 * len(gpus) gpus = ",".join([str(i) for i in gpus]) download_data() test_imagenet1k_resnet(gpus=gpus, batch_size=batch_size) test_imagenet1k_inception_bn(gpus=gpus, batch_size=batch_size)