def testTHCAllocator(self): core.GlobalInit(['caffe2', '--caffe2_cuda_memory_pool=thc']) # just run one operator # it's importantant to not call anything here from Torch API # even torch.cuda.memory_allocated would initialize CUDA context workspace.RunOperatorOnce(core.CreateOperator( 'ConstantFill', [], ["x"], shape=[5, 5], value=1.0, device_option=core.DeviceOption(workspace.GpuDeviceType) )) # make sure we actually used THC allocator self.assertGreater(torch.cuda.memory_allocated(), 0)
def load_environment(): core.GlobalInit(['caffe2', '--caffe2_log_level=0']) DETECTRON_HOME = os.environ.get('DETECTRON_HOME') if DETECTRON_HOME is None: DETECTRON_HOME = '/home/thomas/code/detectron_service/detectron' cfg_fname = os.path.join( DETECTRON_HOME, 'configs/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_2x.yaml') merge_cfg_from_file(cfg_fname) detectron_ops_lib = "/home/thomas/caffe2/lib/libcaffe2_detectron_ops_gpu.so" dyndep.InitOpsLibrary(detectron_ops_lib) _, net = load_model() return net
def setUp(self): core.GlobalInit(["python", "caffe2"]) ws.ResetWorkspace() self.model = model_helper.ModelHelper() brew.fc(self.model, "data", "y", dim_in=4, dim_out=2, weight_init=('ConstantFill', dict(value=1.0)), bias_init=('ConstantFill', dict(value=0.0)), axis=0) ws.FeedBlob("data", np.zeros([4], dtype='float32')) ws.RunNetOnce(self.model.param_init_net) ws.CreateNet(self.model.net)
def run_gym( params, score_bar, gpu_id, save_timesteps_to_dataset=None, start_saving_from_episode=0, ): # Caffe2 core uses the min of caffe2_log_level and minloglevel # to determine loglevel. See caffe2/caffe2/core/logging.cc for more info. core.GlobalInit(["caffe2", "--caffe2_log_level=2", "--minloglevel=2"]) logger.info("Running gym with params") logger.info(params) rl_parameters = RLParameters(**params["rl"]) env_type = params["env"] env = OpenAIGymEnvironment( env_type, rl_parameters.epsilon, rl_parameters.softmax_policy, rl_parameters.gamma, ) replay_buffer = OpenAIGymMemoryPool(params["max_replay_memory_size"]) model_type = params["model_type"] use_gpu = gpu_id != USE_CPU trainer = create_trainer(params["model_type"], params, rl_parameters, use_gpu, env) predictor = create_predictor(trainer, model_type, use_gpu) c2_device = core.DeviceOption( caffe2_pb2.CUDA if use_gpu else caffe2_pb2.CPU, gpu_id ) return train_sgd( c2_device, env, replay_buffer, model_type, trainer, predictor, "{} test run".format(env_type), score_bar, **params["run_details"], save_timesteps_to_dataset=save_timesteps_to_dataset, start_saving_from_episode=start_saving_from_episode, )
def main(): parser = GetArgumentParser() args, extra_args = parser.parse_known_args() core.GlobalInit([ 'tc_bench', '--caffe2_logging_operator_dyno_sampling_rate=0', '--tuner_devices=' + args.tuner_devices, '--caffe2_simple_net_benchmark_run_whole_net=0', ] + extra_args) mapping_options = tune(args) compare_fcs( args.batch_size, args.input_dim, args.output_dim, args.num_runs, mapping_options, )
def main(): parser = GetArgumentParser() args, extra_args = parser.parse_known_args() core.GlobalInit( [ "dnnlowp_fc_perf_bench", "--caffe2_logging_operator_dyno_sampling_rate=0", ] + extra_args ) if args.all_shapes: for input_shape in input_shapes_nmt: compare_fcs(input_shape[0], input_shape[2], input_shape[1], args) for input_shape in input_shapes_speech: compare_fcs(input_shape[0], input_shape[2], input_shape[1], args) for input_shape in input_shapes_ads: compare_fcs(input_shape[0], input_shape[2], input_shape[1], args) else: compare_fcs(args.batch_size, args.input_dim, args.output_dim, args)
def benchmark_mul_gradient(args): workspace.FeedBlob("dC", np.random.rand(args.m, args.n).astype(np.float32)) workspace.FeedBlob("A", np.random.rand(args.m, args.n).astype(np.float32)) workspace.FeedBlob("B", np.random.rand(args.m).astype(np.float32)) net = core.Net("mynet") net.MulGradient(["dC", "A", "B"], ["dA", "dB"], broadcast=True, axis=0) workspace.CreateNet(net) workspace.BenchmarkNet(net.Name(), 1, args.iteration, True) if __name__ == "__main__": parser = argparse.ArgumentParser(description="benchmark for MulGradient.") parser.add_argument('-m', type=int, default=9508, help="The number of rows of A") parser.add_argument("-n", type=int, default=80, help="The number of columns of A") parser.add_argument('-i', "--iteration", type=int, default=100, help="The number of iterations.") args, extra_args = parser.parse_known_args() core.GlobalInit(['python'] + extra_args) benchmark_mul_gradient(args)
import numpy as np import time import unittest import caffe2.python.fakelowp.init_shared_libs # noqa from hypothesis import given, settings from hypothesis import strategies as st from caffe2.proto import caffe2_pb2 from caffe2.python import core from caffe2.python import workspace from caffe2.python.onnx.onnxifi import onnxifi_caffe2_net from caffe2.python.onnx.tests.test_utils import TestCase from caffe2.python.fakelowp.test_utils import print_test_debug_info core.GlobalInit(["caffe2", "--glow_global_fp16=1", "--glow_global_fused_scale_offset_fp16=1", "--glow_global_force_sls_fp16_accum=1"]) GLOW_LOWERED_BATCHNORM = False def reference_spatialbn_test16(X, scale, bias, mean, var, epsilon, order): X = X.astype(np.float16) scale = scale.astype(np.float16) bias = bias.astype(np.float16) mean = mean.astype(np.float16) #var = var.astype(np.float16) assert(order == "NCHW") scale = scale[np.newaxis, :, np.newaxis, np.newaxis] bias = bias[np.newaxis, :, np.newaxis, np.newaxis]
from caffe2.proto import caffe2_pb2 from caffe2.python.predictor import mobile_exporter from caffe2.python import ( brew, core, model_helper, net_drawer, optimizer, visualize, workspace, scope, ) # If you would like to see some really detailed initializations, # you can change --caffe2_log_level=0 to --caffe2_log_level=-1 core.GlobalInit( ['caffe2', '--caffe2_log_level=2', '--caffe2_gpu_memory_tracking=0']) print("Necessities imported!") use_legacy_pool_padding = True import requests import tarfile # Set paths and variables # data_folder is where the data is downloaded and unpacked data_folder = os.path.join(os.path.expanduser('~'), 'caffe2_notebooks', 'tutorial_data', 'cifar10') # root_folder is where checkpoint files and .pb model definition files will be outputted root_folder = os.path.join(os.path.expanduser('~'), 'caffe2_notebooks', 'tutorial_files', 'tutorial_cifar10')
import caffe2.python.fakelowp.init_shared_libs # noqa import numpy as np from caffe2.python import core, workspace from caffe2.python.onnx.onnxifi import onnxifi_caffe2_net from hypothesis import given, strategies as st, settings from caffe2.python.fakelowp.test_utils import print_test_debug_info import caffe2.python.serialized_test.serialized_test_util as serial import datetime core.GlobalInit([ "caffe2", "--caffe2_log_level=-3", "--glow_global_fp16=1", "--glow_clip_quant_range_to_fp16=1", "--glow_global_fp16_constants=1" ]) class Int8OpsTest(serial.SerializedTestCase): def _get_scale_zp(self, tensor): tensor_max = np.max(tensor) tensor_min = min(0, np.min(tensor)) scale = np.float32(np.float16((tensor_max - tensor_min) / 255.0)) if scale < 1e-6: scale = np.float32(1e-6) zero_point = 0 - tensor_min / scale zero_point = int(round(np.clip(zero_point, 0, 255.0))) return (scale, zero_point) @given(n=st.integers(2, 1024), rand_seed=st.integers(0, 65534), non_zero_offset=st.booleans()) @settings(deadline=datetime.timedelta(seconds=50)) def test_int8_quantize(self, n, rand_seed, non_zero_offset):
from __future__ import absolute_import from __future__ import division from __future__ import print_function from caffe2.python import core, workspace import unittest core.GlobalInit(['python']) class BlobDeallocationTest(unittest.TestCase): def test(self): net = core.Net('net') x = net.GivenTensorStringFill([], ['x'], shape=[3], values=['a', 'b', 'c']) y = net.GivenTensorStringFill([], ['y'], shape=[3], values=['d', 'e', 'f']) net.Concat([x, y], ['concated', '_'], axis=0) workspace.ResetWorkspace() workspace.RunNetOnce(net) workspace.ResetWorkspace() workspace.RunNetOnce(net) self.assertTrue(True) if __name__ == '__main__':
from __future__ import print_function from __future__ import unicode_literals import matplotlib.pyplot as plt import numpy as np import os import shutil import operator import caffe2.python.predictor.predictor_exporter as pe # from caffe2.python import core from caffe2.python import brew, core, model_helper, net_drawer, optimizer, visualize, workspace from IPython import display # import requests, StringIO, zipfile # import urllib2, urllib # caffe2初始化细节:caffe2_log_level=1 core.GlobalInit(["caffe2", "--caffe2_log_level=1"]) USE_LENET_MODEL = True # 当前文件夹路径 current_folder = os.path.abspath(os.path.dirname(__name__)) # 图像数据集路径 data_folder = os.path.join(current_folder, "turorial_data", "mnist") # 根路径,存储模型日志文件,及workspace分配的空间 root_folder = os.path.join(current_folder, "turorial_files", "tutorial_mnist") db_missing = False if not os.path.exists(data_folder): os.makedirs(data_folder) if os.path.exists(os.path.join(data_folder, "mnist-train-nchw-lmdb")): print("lmdb train db found!") else: print("Please download datasets manually!")
workspace.FeedBlob("curr_iter", curr_iter) workspace.FeedBlob("update_counter", update_counter) workspace.FeedBlob("prev_iter", prev_iter) net.RowWiseCounter( ["prev_iter", "update_counter", "indices", "curr_iter"], ["prev_iter", "update_counter"], counter_halflife=counter_halflife, ) workspace.RunNetOnce(net) prev_iter_out = workspace.FetchBlob("prev_iter") update_counter_out = workspace.FetchBlob("update_counter") prev_iter_out_ref, update_counter_out_ref = update_counter_ref( prev_iter, update_counter, indices, curr_iter, counter_halflife=counter_halflife, ) assert np.allclose(prev_iter_out, prev_iter_out_ref, rtol=1e-3) assert np.allclose(update_counter_out, update_counter_out_ref, rtol=1e-3) if __name__ == "__main__": global_options = ["caffe2"] core.GlobalInit(global_options) unittest.main()
from caffe2.python import core, dyndep import caffe2.python.hypothesis_test_util as hu from hypothesis import given import hypothesis.strategies as st import numpy as np import os import unittest try: from libfb import parutil except ImportError as e: # If libfb not found, skip all tests in this file raise unittest.SkipTest(str(e)) core.GlobalInit(["python", "--caffe2_log_level=0"]) dyndep.InitOpsLibrary('@/caffe2/caffe2/contrib/torch:torch_ops') RUNTIME = parutil.get_runtime_path() if 'LUA_PATH' not in os.environ: os.environ['LUA_PATH'] = ";".join([ os.path.join(RUNTIME, '_lua', '?.lua'), os.path.join(RUNTIME, '_lua', '?', 'init.lua'), ]) os.environ['LUA_CPATH'] = os.path.join(RUNTIME, '_lua', '?.so') class TorchOpTest(hu.HypothesisTestCase): @given(n=st.integers(min_value=1, max_value=10), i=st.integers(min_value=1, max_value=10),
from fblearner.flow.projects.dper.preprocs.ads_feature_processor import ( ads_feature_processor, ) from hiveio import par_init # noqa import fblearner.flow.projects.dper.flow_types as T import fblearner.flow.projects.dper.utils.assemble as assemble_utils import fblearner.flow.projects.dper.utils.data as data_utils import fblearner.flow.projects.dper.utils.visualize as vis_utils import fblearner.flow.projects.dper.workflows.ads_config as default_config import fblearner.flow.projects.dper.ifbpy.compute_meta as compute_meta from fblearner.flow.projects.dper.ifbpy.execution import test_model_locally import fblearner.flow.projects.dper.utils.visualize as vis_utils import fblearner.flow.projects.dper.utils.perf_estimator_execution as perf_estimator_execution import json core.GlobalInit(['ifbpy']) from IPython.core.debugger import Pdb ipdb = Pdb() # In[ ]: # when testing a particular flow, load model options from json file, and pass it to model_options # local_prod_jason_file="/home/dongli/fbsource/fbcode/caffe2/caffe2/net_config/33252482/prod_model.json" # with open(local_prod_jason_file, 'r') as f: # prod_model_options = sparse_nn.MODEL_OPTIONS.decode(json.loads(f.read())) # print(prod_model_options) # In[ ]: preproc_options = default_config.DEFAULT_PREPROC_OPTIONS
def main(): core.GlobalInit(["caffe2", "--caffe2_log_level=0"]) train_model, test_model, deploy_model = create_train_test_models(DATA_DIR) do_training(train_model) save_model(deploy_model) do_inference()
def run_gym( params, score_bar, gpu_id, save_timesteps_to_dataset=None, start_saving_from_episode=0, batch_rl_file_path=None, ): # Caffe2 core uses the min of caffe2_log_level and minloglevel # to determine loglevel. See caffe2/caffe2/core/logging.cc for more info. core.GlobalInit(["caffe2", "--caffe2_log_level=2", "--minloglevel=2"]) logger.info("Running gym with params") logger.info(params) rl_parameters = RLParameters(**params["rl"]) env_type = params["env"] env = OpenAIGymEnvironment( env_type, rl_parameters.epsilon, rl_parameters.softmax_policy, params["max_replay_memory_size"], rl_parameters.gamma, ) model_type = params["model_type"] c2_device = core.DeviceOption( caffe2_pb2.CPU if gpu_id == USE_CPU else caffe2_pb2.CUDA, gpu_id) use_gpu = gpu_id != USE_CPU if model_type == ModelType.PYTORCH_DISCRETE_DQN.value: training_settings = params["training"] training_parameters = TrainingParameters(**training_settings) if env.img: assert (training_parameters.cnn_parameters is not None), "Missing CNN parameters for image input" training_parameters.cnn_parameters = CNNParameters( **training_settings["cnn_parameters"]) training_parameters.cnn_parameters.conv_dims[ 0] = env.num_input_channels training_parameters.cnn_parameters.input_height = env.height training_parameters.cnn_parameters.input_width = env.width training_parameters.cnn_parameters.num_input_channels = ( env.num_input_channels) else: assert (training_parameters.cnn_parameters is None), "Extra CNN parameters for non-image input" trainer_params = DiscreteActionModelParameters( actions=env.actions, rl=rl_parameters, training=training_parameters) trainer = DQNTrainer(trainer_params, env.normalization, use_gpu) elif model_type == ModelType.DISCRETE_ACTION.value: with core.DeviceScope(c2_device): training_settings = params["training"] training_parameters = TrainingParameters(**training_settings) if env.img: assert (training_parameters.cnn_parameters is not None), "Missing CNN parameters for image input" training_parameters.cnn_parameters = CNNParameters( **training_settings["cnn_parameters"]) training_parameters.cnn_parameters.conv_dims[ 0] = env.num_input_channels training_parameters.cnn_parameters.input_height = env.height training_parameters.cnn_parameters.input_width = env.width training_parameters.cnn_parameters.num_input_channels = ( env.num_input_channels) else: assert (training_parameters.cnn_parameters is None), "Extra CNN parameters for non-image input" trainer_params = DiscreteActionModelParameters( actions=env.actions, rl=rl_parameters, training=training_parameters) trainer = DiscreteActionTrainer(trainer_params, env.normalization) elif model_type == ModelType.PYTORCH_PARAMETRIC_DQN.value: training_settings = params["training"] training_parameters = TrainingParameters(**training_settings) if env.img: assert (training_parameters.cnn_parameters is not None), "Missing CNN parameters for image input" training_parameters.cnn_parameters = CNNParameters( **training_settings["cnn_parameters"]) training_parameters.cnn_parameters.conv_dims[ 0] = env.num_input_channels else: assert (training_parameters.cnn_parameters is None), "Extra CNN parameters for non-image input" trainer_params = ContinuousActionModelParameters( rl=rl_parameters, training=training_parameters, knn=KnnParameters(model_type="DQN"), ) trainer = ParametricDQNTrainer(trainer_params, env.normalization, env.normalization_action, use_gpu) elif model_type == ModelType.PARAMETRIC_ACTION.value: with core.DeviceScope(c2_device): training_settings = params["training"] training_parameters = TrainingParameters(**training_settings) if env.img: assert (training_parameters.cnn_parameters is not None), "Missing CNN parameters for image input" training_parameters.cnn_parameters = CNNParameters( **training_settings["cnn_parameters"]) training_parameters.cnn_parameters.conv_dims[ 0] = env.num_input_channels else: assert (training_parameters.cnn_parameters is None), "Extra CNN parameters for non-image input" trainer_params = ContinuousActionModelParameters( rl=rl_parameters, training=training_parameters, knn=KnnParameters(model_type="DQN"), ) trainer = ContinuousActionDQNTrainer(trainer_params, env.normalization, env.normalization_action) elif model_type == ModelType.CONTINUOUS_ACTION.value: training_settings = params["shared_training"] actor_settings = params["actor_training"] critic_settings = params["critic_training"] trainer_params = DDPGModelParameters( rl=rl_parameters, shared_training=DDPGTrainingParameters(**training_settings), actor_training=DDPGNetworkParameters(**actor_settings), critic_training=DDPGNetworkParameters(**critic_settings), ) action_range_low = env.action_space.low.astype(np.float32) action_range_high = env.action_space.high.astype(np.float32) trainer = DDPGTrainer( trainer_params, env.normalization, env.normalization_action, torch.from_numpy(action_range_low).unsqueeze(dim=0), torch.from_numpy(action_range_high).unsqueeze(dim=0), use_gpu, ) else: raise NotImplementedError( "Model of type {} not supported".format(model_type)) return run( c2_device, env, model_type, trainer, "{} test run".format(env_type), score_bar, **params["run_details"], save_timesteps_to_dataset=save_timesteps_to_dataset, start_saving_from_episode=start_saving_from_episode, batch_rl_file_path=batch_rl_file_path, )
from caffe2.python import core, workspace from caffe2.proto import caffe2_pb2 from caffe2.python.test_util import TestCase import unittest core.GlobalInit(["caffe2", "--caffe2_cpu_numa_enabled=1"]) def build_test_net(net_name): net = core.Net(net_name) net.Proto().type = "async_scheduling" numa_device_option = caffe2_pb2.DeviceOption() numa_device_option.device_type = caffe2_pb2.CPU numa_device_option.numa_node_id = 0 net.ConstantFill([], "output_blob_0", shape=[1], value=3.14, device_option=numa_device_option) numa_device_option.numa_node_id = 1 net.ConstantFill([], "output_blob_1", shape=[1], value=3.14, device_option=numa_device_option) gpu_device_option = caffe2_pb2.DeviceOption() gpu_device_option.device_type = caffe2_pb2.CUDA
def setThrowIfFpExceptions(enabled): core.GlobalInit(["caffe2", "--caffe2_operator_throw_if_fp_exceptions=%d" % (1 if enabled else 0)])
data = model.Scale(data, data, scale=float(1. / 256)) data = model.StopGradient(data, data) return data, label def AddLeNetModel(model, data): fc1 = brew.fc(model, data, 'fc1', dim_in=784, dim_out=1000) fc1 = brew.relu(model, fc1, fc1) fc2 = brew.fc(model, fc1, 'fc2', dim_in=1000, dim_out=1000) fc2 = brew.relu(model, fc2, fc2) pred = brew.fc(model, fc2, 'fc3', dim_in=1000, dim_out=10) softmax = brew.softmax(model, pred, 'softmax') return softmax core.GlobalInit(['caffe2', '--caffe2_log_level=0']) root_folder, data_folder = DownloadMNIST() workspace.ResetWorkspace(root_folder) arg_scope = {"order": "NCHW"} test_model = model_helper.ModelHelper(name="mnist_test", arg_scope=arg_scope, init_params=True) data, label = AddInput(test_model, batch_size=1, db=os.path.join(data_folder, 'mnist-test-nchw-lmdb'), db_type='lmdb') softmax = AddLeNetModel(test_model, data) # run a test pass on the test net
help="Embedding dimension.") parser.add_argument( "--average-len", type=int, default=27, help="Sparse feature average lengths, default is 27", ) parser.add_argument("--batch-size", type=int, default=100, help="The batch size.") parser.add_argument("-i", "--iteration", type=int, default=100000, help="The number of iterations.") parser.add_argument("--flush-cache", action="store_true", help="If true, flush cache") args, extra_args = parser.parse_known_args() core.GlobalInit(["python"] + extra_args) benchmark_sparse_lengths_sum( args.dtype, args.embedding_size, args.embedding_dim, args.average_len, args.batch_size, args.iteration, args.flush_cache, )
import os import caffe2.python.fakelowp.init_shared_libs # noqa import caffe2.python.hypothesis_test_util as hu from hypothesis import given from caffe2.proto import caffe2_pb2 from caffe2.python import dyndep from caffe2.python import core from caffe2.python import workspace from caffe2.python.onnx.onnxifi import onnxifi_caffe2_net from caffe2.python.onnx.tests.test_utils import TestCase from caffe2.python.fakelowp.test_utils import print_test_debug_info core.GlobalInit(["caffe2", "--caffe2_log_level=-3", "--glow_global_fp16=1"]) kEpsilon = 1e-8 class ArithmeticOpsTest(TestCase): def _test_binary_op_graph(self, name): # First dimension is the batch size dims = np.concatenate((np.array([1]), np.random.randint(1, 20, size=3))) A = np.random.uniform(low=-100.0, high=100.0, size=dims).astype(np.float32) B = np.random.uniform(low=-100.0, high=100.0, size=dims).astype(np.float32) print(A.shape, B.shape) pred_net = caffe2_pb2.NetDef()
import numpy as np from caffe2.python import \ core, device_checker, gradient_checker, test_util, workspace from caffe2.proto import caffe2_pb2, caffe2_legacy_pb2 import collections import sys import unittest core.GlobalInit(["python"]) if workspace.has_gpu_support and workspace.NumberOfGPUs() > 0: gpu_device_option = caffe2_pb2.DeviceOption() gpu_device_option.device_type = caffe2_pb2.CUDA cpu_device_option = caffe2_pb2.DeviceOption() gpu_device_checker = device_checker.DeviceChecker( 0.01, [gpu_device_option] ) device_checker = device_checker.DeviceChecker( 0.01, [gpu_device_option, cpu_device_option] ) gpu_gradient_checkers = [ gradient_checker.GradientChecker( 0.005, 0.05, gpu_device_option, "gpu_checker_ws" ), ] gradient_checkers = [ gradient_checker.GradientChecker( 0.005, 0.05, gpu_device_option, "gpu_checker_ws" ), gradient_checker.GradientChecker(