def ResNet50(order, cudnn_ws, mkl): my_arg_scope = {'order': order, 'use_cudnn': True, 'cudnn_exhaustive_search': True, 'ws_nbytes_limit': str(cudnn_ws)} model = ModelHelper(name="alexnet", arg_scope=my_arg_scope) resnet.create_resnet50(model, "data", 3, 1000, is_test=True, final_avg_kernel=14) return model, 448
def ResNet50(order, cudnn_ws, device): my_arg_scope = {'order': order, 'use_cudnn': True, 'cudnn_exhaustive_search': True, 'ws_nbytes_limit': str(cudnn_ws)} model = ModelHelper(name="resnet50", arg_scope=my_arg_scope) resnet.create_resnet50(model, "data", 3, 1000, is_test=True, final_avg_kernel=14) return model, 448
def complex_resnet(): model = ModelHelper(name="r", arg_scope={"order": "NCHW", "is_test": True}) resnet.create_resnet50(model, "data", num_input_channels=1, num_labels=5, is_test=True, no_loss=True) return model, [(1, 1, 224, 224)]
def create_resnet50_model_ops(model, loss_scale): initializer = (pFP16Initializer if args.dtype == 'float16' else Initializer) with brew.arg_scope([brew.conv, brew.fc], WeightInitializer=initializer, BiasInitializer=initializer, enable_tensor_core=args.enable_tensor_core): pred = resnet.create_resnet50( model, "data", num_input_channels=args.num_channels, num_labels=args.num_labels, no_bias=True, no_loss=True, ) if args.dtype == 'float16': pred = model.net.HalfToFloat(pred, pred + '_fp32') softmax, loss = model.SoftmaxWithLoss([pred, 'label'], ['softmax', 'loss']) loss = model.Scale(loss, scale=loss_scale) brew.accuracy(model, [softmax, "label"], "accuracy") return [loss]
def create_model(model, loss_scale): initializer = (PseudoFP16Initializer if args.data_type == 'float16' else Initializer) with brew.arg_scope([brew.conv, brew.fc], WeightInitializer=initializer, BiasInitializer=initializer, enable_tensor_core=False, float16_compute=False): pred = resnet.create_resnet50( model, "data", num_input_channels=args.channels, num_labels=args.num_labels, # num_groups=args.resnext_num_groups, # num_width_per_group=args.resnext_width_per_group, no_bias=True, no_loss=True) # If we're using float on 2B, then inflate to the 4B representation if args.data_type == 'float16': pred = model.net.HalfToFloat(pred, pred + '_fp32') # Compute the softmax probabilities and the loss softmax, loss = model.SoftmaxWithLoss([pred, 'label'], ['softmax', 'loss']) # Noralize the loss, and compute the top_k accuracies for k \in {1, 5} loss = model.Scale(loss, scale=loss_scale) brew.accuracy(model, [softmax, "label"], "accuracy", top_k=1) brew.accuracy(model, [softmax, "label"], "accuracy_top5", top_k=5) return [loss]
def Resnet50(model, loss_scale, dtype='float'): initializer = (PseudoFP16Initializer if dtype == 'float16' else Initializer) with brew.arg_scope( [brew.conv, brew.fc], WeightInitializer=initializer, BiasInitializer=initializer, ): # residual network pred = resnet.create_resnet50( model, "data", num_input_channels=3, num_labels=1000, label="label", no_bias=True, no_loss=True, ) if dtype == 'float16': pred = model.net.HalfToFloat(pred, pred + '_fp32') softmax, loss = model.SoftmaxWithLoss([pred, 'label'], ['softmax', 'loss']) prefix = model.net.Proto().name loss = model.net.Scale(loss, prefix + "_loss", scale=loss_scale) brew.accuracy(model, [softmax, "label"], prefix + "_accuracy") return [loss]
def test_resnet_forward_only_fast_simplenet(self): ''' Test C++ memonger that is only for simple nets ''' model = cnn.CNNModelHelper( order="NCHW", name="test", cudnn_exhaustive_search=True, ) with core.NameScope("gpu_0"): data = model.net.AddExternalInput("gpu_0/data") resnet.create_resnet50( model, data, num_input_channels=3, num_labels=1000, is_test=True ) count_before = count_blobs(model.net.Proto()) t = time.time() optim_proto = memonger.optimize_inference_fast( model.net.Proto(), set(["gpu_0/data", "gpu_0/last_out_L1000"]).union( set(model.net.Proto().external_input)) ) print("Optimization took {} secs".format(time.time() - t)) count_after = count_blobs(optim_proto) num_shared_blobs = count_shared_blobs(optim_proto) self.assertTrue(count_after < count_before) print(count_after, count_before, num_shared_blobs) self.assertTrue(num_shared_blobs < 7 and num_shared_blobs > 0) # Run model and compare results workspace.RunNetOnce(model.param_init_net) data = np.random.rand(4, 3, 227, 227).astype(np.float32) workspace.FeedBlob("gpu_0/data", data) model.net.Proto().type = 'simple' workspace.RunNetOnce(model.net) loss1 = workspace.FetchBlob("gpu_0/last_out_L1000") workspace.RunNetOnce(optim_proto) optimized_loss1 = workspace.FetchBlob("gpu_0/last_out_L1000") np.testing.assert_almost_equal(loss1, optimized_loss1)
def create_resnet50_model_ops(model): [softmax, loss] = resnet.create_resnet50( model, "data", num_input_channels=args.num_channels, num_labels=args.num_labels, label="label", ) model.Accuracy([softmax, "label"], "accuracy") return [loss]
def create_resnet50_model_ops(model, loss_scale): [softmax, loss] = resnet.create_resnet50( model, "data", num_input_channels=3, num_labels=1000, label="label", ) model.Accuracy([softmax, "label"], "accuracy") return [loss]
def test_resnet_forward_only(self): model = cnn.CNNModelHelper( order="NCHW", name="test", cudnn_exhaustive_search=True, ) with core.NameScope("gpu_0"): data = model.net.AddExternalInput("gpu_0/data") resnet.create_resnet50( model, data, num_input_channels=3, num_labels=1000, is_test=True ) count_before = count_blobs(model.net.Proto()) optim_proto = memonger.optimize_inference_for_dag( model.net, ["gpu_0/data"], "gpu_0/" ) count_after = count_blobs(optim_proto) num_shared_blobs = count_shared_blobs(optim_proto) # Run model and compare results workspace.RunNetOnce(model.param_init_net) data = np.random.rand(4, 3, 227, 227).astype(np.float32) workspace.FeedBlob("gpu_0/data", data) workspace.RunNetOnce(model.net) model.net.Proto().type = 'dag' model.net.Proto().num_workers = 4 loss1 = workspace.FetchBlob("gpu_0/last_out_L1000") self.assertTrue(memonger.verify_graph_equality( model.net.Proto(), optim_proto)) workspace.RunNetOnce(optim_proto) optimized_loss1 = workspace.FetchBlob("gpu_0/last_out_L1000") self.assertTrue(count_after < count_before) self.assertTrue(num_shared_blobs < 7) np.testing.assert_almost_equal(loss1, optimized_loss1)
def create_resnet50_model_ops(model, loss_scale): [softmax, loss] = resnet.create_resnet50( model, "data", num_input_channels=args.num_channels, num_labels=args.num_labels, label="label", no_bias=True, ) loss = model.Scale(loss, scale=loss_scale) model.Accuracy([softmax, "label"], "accuracy") return [loss]
def create_resnet50_model_ops(model, loss_scale=1.0): # residual network [softmax, loss] = resnet.create_resnet50(model, "data", num_input_channels=3, num_labels=num_labels, label="label", no_bias=True, ) prefix = model.net.Proto().name loss = model.net.Scale(loss, prefix + "_loss", scale=loss_scale) brew.accuracy(model, [softmax, "label"], prefix + "_accuracy") return [loss]
def create_resnet50_model_ops(model, loss_scale=1.0): # Creates a residual network [softmax, loss] = resnet.create_resnet50( model, "data", num_input_channels=3, num_labels=num_labels, label="label", ) prefix = model.net.Proto().name loss = model.net.Scale(loss, prefix + "_loss", scale=loss_scale) brew.accuracy(model, [softmax, "label"], prefix + "_accuracy") return [loss]
def test_resnet_forward_only(self): model = cnn.CNNModelHelper( order="NCHW", name="test", cudnn_exhaustive_search=True, ) with core.NameScope("gpu_0"): data = model.net.AddExternalInput("gpu_0/data") resnet.create_resnet50( model, data, num_input_channels=3, num_labels=1000, is_test=True ) count_before = count_blobs(model.net.Proto()) optim_proto = memonger.optimize_inference_for_dag( model.net, ["gpu_0/data"], "gpu_0/" ) count_after = count_blobs(optim_proto) num_shared_blobs = count_shared_blobs(optim_proto) # Run model and compare results workspace.RunNetOnce(model.param_init_net) data = np.random.rand(4, 3, 227, 227).astype(np.float32) workspace.FeedBlob("gpu_0/data", data) workspace.RunNetOnce(model.net) model.net.Proto().type = 'dag' model.net.Proto().num_workers = 4 loss1 = workspace.FetchBlob("gpu_0/last_out_L1000") workspace.RunNetOnce(optim_proto) optimized_loss1 = workspace.FetchBlob("gpu_0/last_out_L1000") self.assertTrue(count_after < count_before) self.assertTrue(num_shared_blobs < 7 and num_shared_blobs > 0) np.testing.assert_almost_equal(loss1, optimized_loss1)
def create_resnet50_model_ops(model, loss_scale): [softmax, loss] = resnet.create_resnet50( model, "data", num_input_channels=3, num_labels=1000, label="label", ) model.Accuracy([softmax, "label"], "accuracy") my_loss_scale = 1. / self.opts['distributed']['num_xpus'] / \ self.opts['distributed']['num_shards'] loss = model.Scale(loss, scale=my_loss_scale) return [loss]
def gen_test_resnet50(_order, _cudnn_ws): model = cnn.CNNModelHelper( order="NCHW", name="resnet_50_test", cudnn_exhaustive_search=True, ) data = model.net.AddExternalInput("data") label = model.net.AddExternalInput("label") (_softmax, loss) = resnet.create_resnet50( model, data, num_input_channels=3, num_labels=1000, label=label, is_test=False, ) return model, 227
def create_resnet50_model_ops(model, loss_scale): with brew.arg_scope([brew.conv, brew.fc], WeightInitializer=Initializer, BiasInitializer=Initializer, enable_tensor_core=0): pred = resnet.create_resnet50( model, "data", num_input_channels=num_channels, num_labels=num_labels, no_bias=True, no_loss=True, ) softmax, loss = model.SoftmaxWithLoss([pred, 'label'], ['softmax', 'loss']) loss = model.Scale(loss, scale=loss_scale) brew.accuracy(model, [softmax, "label"], "accuracy") return [loss]
def test_resnet_shared_grads(self, with_shapes, gc, dc): model = cnn.CNNModelHelper( order="NCHW", name="test", cudnn_exhaustive_search=True, ) with core.NameScope("gpu_0"): data = model.net.AddExternalInput("gpu_0/data") label = model.net.AddExternalInput("gpu_0/label") (_softmax, loss) = resnet.create_resnet50( model, data, num_input_channels=3, num_labels=1000, label=label, is_test=False, ) param_to_grad = model.AddGradientOperators([loss]) (shapes, types) = workspace.InferShapesAndTypes( [model.param_init_net, model.net], {'gpu_0/data': [4, 3, 227, 227], 'gpu_0/label': [4]}, ) count_before = count_blobs(model.net.Proto()) optim_proto = memonger.share_grad_blobs( model.net, ["gpu_0/loss"], set(model.param_to_grad.values()), "gpu_0/", share_activations=True, dont_share_blobs=set([str(param_to_grad["gpu_0/conv1_w"])]), blob_shapes=shapes if with_shapes else None, ) count_after = count_blobs(optim_proto) self.assertTrue(count_after < count_before) # Run model and compare results. We check that the loss is same # and also that the final gradient (conv1_w_grad is same) workspace.RunNetOnce(model.param_init_net) data = np.random.rand(4, 3, 227, 227).astype(np.float32) label = (np.random.rand(4) * 1000).astype(np.int32) workspace.FeedBlob("gpu_0/data", data) workspace.FeedBlob("gpu_0/label", label) workspace.RunNetOnce(model.net) model.net.Proto().type = 'dag' model.net.Proto().num_workers = 4 loss1 = workspace.FetchBlob("gpu_0/last_out_L1000") conv1_w_grad = workspace.FetchBlob(param_to_grad["gpu_0/conv1_w"]) workspace.FeedBlob(param_to_grad["gpu_0/conv1_w"], np.array([0.0])) workspace.RunNetOnce(optim_proto) optimized_loss1 = workspace.FetchBlob("gpu_0/last_out_L1000") optim_conv1_w_grad = workspace.FetchBlob(param_to_grad["gpu_0/conv1_w"]) print("before: {} after: {}".format(count_before, count_after)) np.testing.assert_almost_equal(loss1, optimized_loss1) np.testing.assert_almost_equal(conv1_w_grad, optim_conv1_w_grad)
def create_model_ops_testable(model, loss_scale, is_test=False): initializer = (PseudoFP16Initializer if args.dtype == 'float16' else Initializer) with brew.arg_scope([brew.conv, brew.fc], WeightInitializer=initializer, BiasInitializer=initializer, enable_tensor_core=args.enable_tensor_core, float16_compute=args.float16_compute): if args.model == "cifar10": if args.image_size != 32: log.warn("Cifar10 expects a 32x32 image.") pred = models.cifar10.create_cifar10( model, "data", image_channels=args.num_channels, num_classes=args.num_labels, image_height=args.image_size, image_width=args.image_size, ) elif args.model == "resnet32x32": if args.image_size != 32: log.warn("ResNet32x32 expects a 32x32 image.") pred = models.resnet.create_resnet32x32( model, "data", num_layers=args.num_layers, num_input_channels=args.num_channels, num_labels=args.num_labels, is_test=is_test) elif args.model == "resnet": if args.image_size != 224: log.warn( "ResNet expects a 224x224 image. input image = %d" % args.image_size) pred = resnet.create_resnet50( #args.layers, model, "data", num_input_channels=args.num_channels, num_labels=args.num_labels, no_bias=True, no_loss=True, ) elif args.model == "vgg": if args.image_size != 224: log.warn("VGG expects a 224x224 image.") pred = vgg.create_vgg(model, "data", num_input_channels=args.num_channels, num_labels=args.num_labels, num_layers=args.num_layers, is_test=is_test) elif args.model == "googlenet": if args.image_size != 224: log.warn("GoogLeNet expects a 224x224 image.") pred = googlenet.create_googlenet( model, "data", num_input_channels=args.num_channels, num_labels=args.num_labels, is_test=is_test) elif args.model == "alexnet": if args.image_size != 224: log.warn("Alexnet expects a 224x224 image.") pred = alexnet.create_alexnet( model, "data", num_input_channels=args.num_channels, num_labels=args.num_labels, is_test=is_test) elif args.model == "alexnetv0": if args.image_size != 224: log.warn("Alexnet v0 expects a 224x224 image.") pred = alexnet.create_alexnetv0( model, "data", num_input_channels=args.num_channels, num_labels=args.num_labels, is_test=is_test) else: raise NotImplementedError("Network {} not found.".format( args.model)) if args.dtype == 'float16': pred = model.net.HalfToFloat(pred, pred + '_fp32') softmax, loss = model.SoftmaxWithLoss([pred, 'label'], ['softmax', 'loss']) loss = model.Scale(loss, scale=loss_scale) brew.accuracy(model, [softmax, "label"], "accuracy") return [loss]
def complex_resnet(): model = ModelHelper(name="r", arg_scope={"order": "NCHW", "is_test": True}) resnet.create_resnet50( model, "data", num_input_channels=1, num_labels=5, is_test=True, no_loss=True) return model, (1, 1, 224, 224)
import caffe2.python.models.resnet as resnet from caffe2.python import cnn from caffe2.python import workspace import caffe2.python._import_c_extension as C import numpy as np from caffe2.proto import caffe2_pb2 import time test_model = cnn.CNNModelHelper(order="NCHW", name="resnet50_test", use_cudnn=True, cudnn_exhaustive_search=True, ws_nbytes_limit=512 * 1024 * 1024) [softmax, loss] = resnet.create_resnet50(test_model, "data", num_input_channels=3, num_labels=1000, label="label", no_bias=True) device_opts = caffe2_pb2.DeviceOption() device_opts.device_type = caffe2_pb2.CUDA device_opts.cuda_gpu_id = 0 net_def = test_model.net.Proto() net_def.device_option.CopyFrom(device_opts) test_model.param_init_net.RunAllOnGPU(gpu_id=0, use_cudnn=True) workspace.CreateBlob("data") workspace.CreateBlob("label") workspace.RunNetOnce(test_model.param_init_net)
def CivilNet(name, train_test_deplopy=0): arg_scope = { 'order': 'NCHW', 'use_cudnn': True, 'cudnn_exhaustive_search': True, 'ws_nbytes_limit': (64 * 1024 * 1024) } model = model_helper.ModelHelper(name=name, arg_scope=arg_scope) model._device_type = caffe2_pb2.CUDA model._device_prefix = "gpu" model._shared_model = False model._devices = [0] device_opt = core.DeviceOption(caffe2_pb2.CUDA, 0) #for deploy if train_test_deplopy == 2: with core.DeviceScope(device_opt): with core.NameScope("{}_{}".format(model._device_prefix, 0)): with brew.arg_scope([brew.conv, brew.fc], WeightInitializer=Initializer, BiasInitializer=Initializer, enable_tensor_core=False, float16_compute=False): resnet.create_resnet50(model, "data", num_input_channels=3, num_labels=args.num_labels, no_bias=True, no_loss=False) workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(model.net) return model reader_name = "reader" if train_test_deplopy == 0 else "test_reader" reader_data = args.train_data if train_test_deplopy == 0 else args.test_data reader = model.CreateDB(reader_name, db=reader_data, db_type='lmdb', num_shards=1, shard_id=0) is_test = True if train_test_deplopy == 1 else False loss = None with core.DeviceScope(device_opt): with core.NameScope("{}_{}".format(model._device_prefix, 0)): AddImageInput(model, reader, batch_size=32, is_test=is_test) with brew.arg_scope([brew.conv, brew.fc], WeightInitializer=Initializer, BiasInitializer=Initializer, enable_tensor_core=False, float16_compute=False): pred = resnet.create_resnet50(model, "data", num_input_channels=3, num_labels=args.num_labels, no_bias=True, no_loss=True) softmax, loss = model.SoftmaxWithLoss([pred, 'label'], ['softmax', 'loss']) brew.accuracy(model, [softmax, "label"], "accuracy") #for test if train_test_deplopy == 1: workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(model.net) return model #for train loss_grad = {} losses_by_gpu = {} losses_by_gpu[0] = [loss] #add grad def create_grad(lossp): return model.ConstantFill(lossp, str(lossp) + "_grad", value=1.0) # Explicitly need to create gradients on GPU 0 device = core.DeviceOption(model._device_type, 0) with core.DeviceScope(device): for l in losses_by_gpu[0]: lg = create_grad(l) loss_grad[str(l)] = str(lg) model.AddGradientOperators(loss_grad) #end add grad optimizer.add_weight_decay(model, args.weight_decay) stepsz = int(30 * args.epoch_size / 32) opt = optimizer.build_multi_precision_sgd(model, args.base_learning_rate, momentum=0.9, nesterov=1, policy="step", stepsize=stepsz, gamma=0.1) model._optimizer = opt workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(model.net) return model