def main(): batch_size=64 num_classes=1000 epoch_size=100 num_epochs=1 image_shape=(3,229,229) # epoch_size is similar to the idea of steps # set fake data network='resnet' num_layers=50 dev = mx.gpu(0) if len(get_gpus()) > 0 else mx.cpu() net= import_module('symbols.'+network) sym= net.get_symbol(num_classes=num_classes,image_shape=image_shape,num_layers=num_layers,dtype=np.float32) mod = mx.mod.Module(symbol=sym,context=dev) data = [mx.random.uniform(-1.0,1.0,shape=shape,ctx=dev) for _, shape in mod.data_shapes] DataIter = mx.io.DataBatch(data,[]) # get model model_resnet50 = vision.resnet50_v1(pretrained=false) # pick optimizer optim = mx.optimizer.SGD(); # run training train(model_resnet50,DataIter,optim)
def run_profile_test(config): network = config['network'] batch_size = config['batch_size'] dev = config['dev'] dry_run = config['dry_run'] iteration = config['iteration'] out_dir = config['out_dir'] #config dev if dev == 'gpu': dev_list = [mx.gpu(0)] if len(get_gpus()) > 0 else [] elif dev == 'cpu': dev_list = [mx.cpu()] else: logging.error('no valid device') #clean and create out_dir if os.path.isdir(out_dir): shutil.rmtree(out_dir) os.mkdir(out_dir) logging.info('network: {} dev {}'.format(network, dev)) logging.info('batch size {}, dry_run: {}, iteration {}'.format(batch_size, dry_run, iteration)) run_profiler(network, batch_size, dev_list[0], iteration, dry_run)
# return num images per second return num_batches * batch_size / (time.time() - tic) if __name__ == '__main__': if opt.network == 'all': networks = [ 'alexnet', 'vgg-16', 'resnetv1-50', 'resnet-50', 'resnet-152', 'inception-bn', 'inception-v3', 'inception-v4', 'inception-resnet-v2', 'mobilenet', 'densenet121', 'squeezenet1.1' ] logging.info('It may take some time to run all models, ' 'set --network to run a specific one') else: networks = [opt.network] devs = [mx.gpu(0)] if len(get_gpus()) > 0 else [] # Enable USE_MKLDNN for better CPU performance devs.append(mx.cpu()) if opt.batch_size == 0: batch_sizes = [1, 32, 64, 128, 256] logging.info('run batchsize [1, 32, 64, 128, 256] by default, ' 'set --batch-size to run a specific one') else: batch_sizes = [opt.batch_size] for net in networks: logging.info('network: %s', net) if net in ['densenet121', 'squeezenet1.1']: logging.info('network: %s is converted from gluon modelzoo', net) logging.info(
# run dry_run = 5 # use 5 iterations to warm up for i in range(dry_run+num_batches): if i == dry_run: tic = time.time() mod.forward(batch, is_train=False) for output in mod.get_outputs(): output.wait_to_read() # return num images per second return num_batches*batch_size/(time.time() - tic) if __name__ == '__main__': networks = ['alexnet', 'vgg-16', 'inception-bn', 'inception-v3', 'resnet-50', 'resnet-152'] devs = [mx.gpu(0)] if len(get_gpus()) > 0 else [] # Enable USE_MKLDNN for better CPU performance devs.append(mx.cpu()) batch_sizes = [1, 2, 4, 8, 16, 32] for net in networks: logging.info('network: %s', net) for d in devs: logging.info('device: %s', d) logged_fp16_warning = False for b in batch_sizes: for dtype in ['float32', 'float16']: if d == mx.cpu() and dtype == 'float16': #float16 is not supported on CPU continue elif net in ['inception-bn', 'alexnet'] and dtype == 'float16':
metrics=acc, **kwargs) r = acc.get()[1] print('testing %s, acc = %f, speed = %f img/sec' % (m, r, speed)) assert r > g and r < g + .1 def test_imagenet1k_inception_bn(**kwargs): acc = mx.metric.create('acc') m = 'imagenet1k-inception-bn' g = 0.72 (speed, ) = score(model=m, data_val='data/val-5k-256.rec', rgb_mean='123.68,116.779,103.939', metrics=acc, **kwargs) r = acc.get()[1] print('Tested %s acc = %f, speed = %f img/sec' % (m, r, speed)) assert r > g and r < g + .1 if __name__ == '__main__': gpus = get_gpus() assert len(gpus) > 0 batch_size = 16 * len(gpus) gpus = ','.join([str(i) for i in gpus]) download_data() test_imagenet1k_resnet(gpus=gpus, batch_size=batch_size) test_imagenet1k_inception_bn(gpus=gpus, batch_size=batch_size)
def test_imagenet1k_resnet(**kwargs): models = ["imagenet1k-resnet-34", "imagenet1k-resnet-50", "imagenet1k-resnet-101", "imagenet1k-resnet-152"] accs = [0.72, 0.75, 0.765, 0.76] for (m, g) in zip(models, accs): acc = mx.metric.create("acc") (speed,) = score(model=m, data_val="data/val-5k-256.rec", rgb_mean="0,0,0", metrics=acc, **kwargs) r = acc.get()[1] print("testing %s, acc = %f, speed = %f img/sec" % (m, r, speed)) assert r > g and r < g + 0.1 def test_imagenet1k_inception_bn(**kwargs): acc = mx.metric.create("acc") m = "imagenet1k-inception-bn" g = 0.72 (speed,) = score(model=m, data_val="data/val-5k-256.rec", rgb_mean="123.68,116.779,103.939", metrics=acc, **kwargs) r = acc.get()[1] print("Tested %s acc = %f, speed = %f img/sec" % (m, r, speed)) assert r > g and r < g + 0.1 if __name__ == "__main__": gpus = get_gpus() assert len(gpus) > 0 batch_size = 16 * len(gpus) gpus = ",".join([str(i) for i in gpus]) download_data() test_imagenet1k_resnet(gpus=gpus, batch_size=batch_size) test_imagenet1k_inception_bn(gpus=gpus, batch_size=batch_size)