# Shutdown data loading threads model.roi_data_loader.shutdown() return checkpoints def test_model(model_file, multi_gpu_testing, opts=None): # All arguments to inference functions are passed via cfg cfg.TEST.WEIGHTS = model_file # Clear memory before inference workspace.ResetWorkspace() # Run inference test_net.main(multi_gpu_testing=multi_gpu_testing) if __name__ == '__main__': workspace.GlobalInit( ['caffe2', '--caffe2_log_level=0', '--caffe2_gpu_memory_tracking']) set_loggers() # TODO(rbg): set C2 random seed np.random.seed(cfg.RNG_SEED) args = parse_args() logger.info('Called with args:') logger.info(args) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.opts is not None: cfg_from_list(args.opts) assert_and_infer_cfg() logger.info('Training with config:') logger.info(pprint.pformat(cfg)) checkpoints = net_trainer() if not args.skip_test:
def test_SparseSegmentUint8(self): init_net = core.Net("init") net = core.Net("bench") size = 10**3 isize = 10**2 # input preparation d = init_net.UniformFill([], shape=[size, 32]) w = init_net.UniformFill([], shape=[isize, ]) i = init_net.UniformIntFill([], shape=[isize], max=size - 1) i = init_net.Cast([i], to=core.DataType.INT64) l = init_net.ConstantFill( [], ['l'], shape=[isize // 10], value=10, dtype=core.DataType.INT32, ) net.FloatToRowwiseQuantized8Bits([d], ['quantized_data', 'scale_bias']) net.Rowwise8BitQuantizedToFloat(['quantized_data', 'scale_bias'], ['dequantized_data']) # SparseLengthsWeightedSum net.SparseLengthsWeightedSum(['dequantized_data', w, i, l], ['PositionWeighted_0'], engine='fp16') net.SparseLengthsWeightedSum8BitsRowwise( ['quantized_data', w, i, l, 'scale_bias'], ['PositionWeighted_1']) # SparseLengthsSum net.SparseLengthsSum(['dequantized_data', i, l], ['Sum_0'], engine='fp16') net.SparseLengthsSum8BitsRowwise( ['quantized_data', i, l, 'scale_bias'], ['Sum_1']) # SparseLengthsWeightedMean # net.SparseLengthsWeightedMean(['dequantized_data', w, i, l], # ['WeightedMean_0']) # net.SparseLengthsWeightedMean8BitsRowwise( # ['quantized_data', w, i, l, 'scale_bias'], # ['WeightedMean_1']) # SparseLengthsMean net.SparseLengthsMean(['dequantized_data', i, l], ['Mean_0'], engine='fp16') net.SparseLengthsMean8BitsRowwise( ['quantized_data', i, l, 'scale_bias'], ['Mean_1']) gathered_w = net.Gather(['quantized_data', i], engine='fp16') gathered_scale_bias = net.Gather(['scale_bias', i], engine='fp16') net.Rowwise8BitQuantizedToFloat( [gathered_w, gathered_scale_bias], 'Gathered_1') net.Gather(['dequantized_data', i], 'Gathered_0') workspace.GlobalInit(['caffe2', '--caffe2_log_level=0']) workspace.RunNetOnce(init_net) workspace.CreateNet(net) workspace.RunNetOnce(net) PositionWeighted_1 = workspace.FetchBlob('PositionWeighted_1') ground_truth_posw = workspace.FetchBlob('PositionWeighted_0') np.testing.assert_array_almost_equal(PositionWeighted_1, ground_truth_posw, decimal=5) Sum_1 = workspace.FetchBlob('Sum_1') ground_truth_sum = workspace.FetchBlob('Sum_0') np.testing.assert_array_almost_equal(Sum_1, ground_truth_sum, decimal=5) Mean_1 = workspace.FetchBlob('Mean_1') ground_truth_mean = workspace.FetchBlob('Mean_0') np.testing.assert_array_almost_equal(Mean_1, ground_truth_mean, decimal=5) Gathered_1 = workspace.FetchBlob('Gathered_1') ground_truth_gathered = workspace.FetchBlob('Gathered_0') np.testing.assert_array_almost_equal(Gathered_1, ground_truth_gathered, decimal=5)
from caffe2.python import workspace, core import numpy as np from utils import NUM_LOOP_ITERS workspace.GlobalInit(['caffe2']) def add_blob(ws, blob_name, tensor_size): blob_tensor = np.random.randn(*tensor_size).astype(np.float32) ws.FeedBlob(blob_name, blob_tensor) class C2SimpleNet(object): """ This module constructs a net with 'op_name' operator. The net consist a series of such operator. It initializes the workspace with input blob equal to the number of parameters needed for the op. Provides forward method to run the net niter times. """ def __init__(self, op_name, num_inputs=1, debug=False): self.input_names = [] self.net = core.Net("framework_benchmark_net") self.input_names = ["in_{}".format(i) for i in range(num_inputs)] for i in range(num_inputs): add_blob(workspace, self.input_names[i], [1]) self.net.AddExternalInputs(self.input_names) op_constructor = getattr(self.net, op_name) op_constructor(self.input_names) self.output_name = self.net._net.op[-1].output
def setUpClass(cls): workspace.GlobalInit(get_default_test_flags()) # clear the default engines settings to separate out its # affect from the ops tests core.SetEnginePref({}, {})
def __init__( self, cli_args, model=None, tag=None, enable_prof=False, id_qs = None, len_qs = None, fc_q = None # TODO: Rename this as there are no bottom fc layers ): super(Wide_and_Deep, self).__init__() self.args = cli_args # Check to ensure we are configure wide and deep networks correctly self.check_args(self.args) ### parse command line arguments ### ln_bot = np.fromstring(cli_args.arch_mlp_bot, dtype=int, sep="-") m_den = ln_bot[0] m_spa = cli_args.arch_sparse_feature_size ln_emb = np.fromstring(cli_args.arch_embedding_size, dtype=int, sep="-") num_fea = ln_emb.size + 1 # num sparse + num dense features accel_en = self.args.use_accel # print("num features ", num_fea) # Size of input dimension to TopFC layers is m_den_out * ln_emb (sparse features) + dense feature input num_int = (num_fea-1) * int(m_spa) + int(ln_bot[0]) #num_int = (num_fea) * m_den_out arch_mlp_top_adjusted = str(num_int) + "-" + cli_args.arch_mlp_top # print("mlp_top is: ", arch_mlp_top_adjusted) ln_top = np.fromstring(arch_mlp_top_adjusted, dtype=int, sep="-") ### initialize the model ### if model is None: global_init_opt = ["caffe2", "--caffe2_log_level=0"] if enable_prof: global_init_opt += [ "--logtostderr=0", "--log_dir=$HOME", #"--caffe2_logging_print_net_summary=1", ] workspace.GlobalInit(global_init_opt) self.set_tags() self.model = model_helper.ModelHelper(name="Wide_and_Deep", init_params=True) if cli_args: self.model.net.Proto().type = cli_args.caffe2_net_type self.model.net.Proto().num_workers = cli_args.inter_op_workers else: # WARNING: assume that workspace and tags have been initialized elsewhere self.set_tags(tag[0], tag[1], tag[2], tag[3], tag[4], tag[5], tag[6], tag[7], tag[8], tag[9]) self.model = model # save arguments self.m_spa = m_spa self.ln_emb = ln_emb self.ln_bot = ln_bot self.ln_top = ln_top self.arch_interaction_op = cli_args.arch_interaction_op self.arch_interaction_itself = cli_args.arch_interaction_itself self.sigmoid_bot = -1 # TODO: Lets not hard-code this going forward self.sigmoid_top = ln_top.size - 1 self.accel_en = accel_en return self.create_sequential_forward_ops(id_qs, len_qs, fc_q)
import unittest # Must happen before importing caffe2.python.* import caffe2.python.fakelowp.init_shared_libs # noqa import numpy as np from hypothesis import given, settings from hypothesis import strategies as st from caffe2.proto import caffe2_pb2 from caffe2.python import core, workspace from caffe2.python.onnx.onnxifi import onnxifi_caffe2_net from caffe2.python.fakelowp.test_utils import print_test_debug_info import caffe2.python.serialized_test.serialized_test_util as serial workspace.GlobalInit([ "caffe2", "--glow_global_fp16=1", "--glow_global_fused_scale_offset_fp16=1", "--glow_global_force_sls_fp16_accum=1", ]) GLOW_MATMUL_ATOL = 1e-5 GLOW_MATMUL_RTOL = 1e-3 class SparseLengthsSum8BitFakeNNPIFp16Test(serial.SerializedTestCase): def Skip_test_SLS_NonQuantized_fp16(self): N = 20000 DIM = 64 D = (4 * np.random.random_sample((N, DIM)) + 1).astype(np.float32) I = (np.random.randint(0, N, size=12)).astype(np.int64) L = np.asarray([4, 4, 4]).astype(np.int32) workspace.FeedBlob("D", D)
if rv[j] != nv[j]: print(j, rv[j], nv[j]) c += 1 if c == 10: break mismatch = True self.assertFalse(mismatch) def enable_rnn_executor(self, net, value, forward_only): num_found = 0 for op in net.Proto().op: if op.type.startswith("RecurrentNetwork"): for arg in op.arg: if arg.name == 'enable_rnn_executor': arg.i = value num_found += 1 # This sanity check is so that if someone changes the # enable_rnn_executor parameter name, the test will # start failing as this function will become defective. self.assertEqual(1 if forward_only else 2, num_found) if __name__ == "__main__": import random random.seed(2603) workspace.GlobalInit( ['caffe2', '--caffe2_log_level=0', '--caffe2_rnn_executor=1']) unittest.main()
def setUpClass(cls): workspace.GlobalInit([ 'caffe2', '--caffe2_log_level=0', '--caffe2_omp_num_threads=1', ])
def main(): # Initialize C2 workspace.GlobalInit( ['caffe2', '--caffe2_log_level=0', '--caffe2_gpu_memory_tracking=1']) # Set up logging and load config options logger = setup_logging(__name__) logging.getLogger('detectron.roi_data.loader').setLevel(logging.INFO) args = parse_args() logger.info('Called with args:') logger.info(args) if args.cfg_file is not None: merge_cfg_from_file(args.cfg_file) if args.opts is not None: merge_cfg_from_list(args.opts) assert_and_infer_cfg() logger.info('Training with config:') logger.info(pprint.pformat(cfg)) # Note that while we set the numpy random seed network training will not be # deterministic in general. There are sources of non-determinism that cannot # be removed with a reasonble execution-speed tradeoff (such as certain # non-deterministic cudnn functions). np.random.seed(cfg.RNG_SEED) # Execute the training run fs = open('imgnames.pkl', 'rb') roidbnames = pickle.load(fs) fs.close() logger.info('Loading dataset: {}'.format(cfg.TRAIN.DATASETS)) dataset_names = cfg.TRAIN.DATASETS proposal_files = cfg.TRAIN.PROPOSAL_FILES roidb = get_training_roidb(dataset_names, proposal_files) logger.info('{:d} roidb entries'.format(len(roidb))) total_num = len(roidb) # bitmap idx indicated for training bitmapRoidb = BitMap(total_num) # initial samples # initial_num = int(total_num*0.2) # for i in range(initial_num): # bitmapRoidb.set(i) # # train_roidb = [roidb[i] for i in range(initial_num)] initialidx = [] train_roidb = [] for i, x in enumerate(roidb): if x['image'].split('/')[-1] in roidbnames: initialidx.append(i) train_roidb.append(x) for i in initialidx: bitmapRoidb.set(i) logger.info('{:d} the number initial roidb entries'.format( len(train_roidb))) # append flipped images train_roidb = flipped_roidb_for_training(train_roidb) logger.info('{:d} the number initial roidb entries'.format( len(train_roidb))) alamount = 0 ssamount = 0 gamma = 0.95 # control al proportion al_proportion_checkpoint = [ int(x * total_num * 0.4) for x in np.linspace(0.2, 1, 10) ] # control ss proportion ss_proportion_checkpoint = [ int(x * total_num) for x in np.linspace(0.2, 2, 10) ] next_iters = 90000 sum_iters = next_iters '''load the lasted checkpoints''' checkpoints = detectron.utils.train.train_model(sum_iters, train_roidb, cfg.TRAIN.WEIGHTS) while True: # to do a test on the test dataset test_model(checkpoints[(sum_iters - 1)], args.multi_gpu_testing, args.opts) if sum_iters > cfg.SOLVER.MAX_ITER: break # next detect unlabeled samples unlabeledidx = list(set(range(total_num)) - set(bitmapRoidb.nonzero())) # labeled samples labeledidx = list(set(bitmapRoidb.nonzero())) # detect unlabeled samples BBoxes, YClass, Scores, al_candidate_idx, ALScore = detect_im( checkpoints[(sum_iters - 1)], roidb, gamma, idxs=unlabeledidx, gpu_id=0) al_avg_idx = np.argsort(np.array(ALScore)) al_candidate_idx = [al_candidate_idx[i] for i in al_avg_idx] gamma = max(gamma - 0.05, 0.7) # the ss candidate idx ss_candidate_idx = [ i for i in unlabeledidx if i not in al_candidate_idx ] # update roidb for next training train_roidb = replace_roidb(roidb, BBoxes, YClass, ss_candidate_idx) # control the proportion if alamount + len(al_candidate_idx) >= al_proportion_checkpoint[0]: al_candidate_idx = al_candidate_idx[:int( al_proportion_checkpoint[0] - alamount)] tmp = al_proportion_checkpoint.pop(0) al_proportion_checkpoint.append(al_proportion_checkpoint[-1]) if ssamount + len(ss_candidate_idx) >= ss_proportion_checkpoint[0]: ss_candidate_idx = ss_candidate_idx[:int( ss_proportion_checkpoint[0] - ssamount)] tmp = ss_proportion_checkpoint.pop(0) ss_proportion_checkpoint.append(ss_proportion_checkpoint[-1]) # record ss and al factor alamount += len(al_candidate_idx) ssamount += len(ss_candidate_idx) logger.info('alfactor:{},ssfactor:{}'.format(alamount / total_num, ssamount / total_num)) # for idx in al_candidate_idx: # bitmapRoidb.set(idx) next_train_idx = bitmapRoidb.nonzero() next_train_idx.extend(ss_candidate_idx) train_roidb = blur_image(train_roidb, ss_candidate_idx) # the next training roidb train_roidb = [train_roidb[i] for i in next_train_idx] # flipped the roidb train_roidb = flipped_roidb_for_training(train_roidb) # the next training iters next_iters = 30000 sum_iters += next_iters checkpoints = detectron.utils.train.train_model( sum_iters, train_roidb, checkpoints[(sum_iters - next_iters - 1)])
def main(): workspace.GlobalInit(['caffe2', '--caffe2_log_level=0']) args = parse_args() logger.info('Called with args:') logger.info(args) if args.cfg_file is not None: merge_cfg_from_file(args.cfg_file) if args.opts is not None: merge_cfg_from_list(args.opts) cfg.NUM_GPUS = 1 assert_and_infer_cfg() logger.info('Converting model with config:') logger.info(pprint.pformat(cfg)) # script will stop when it can't find an operator rather # than stopping based on these flags # # assert not cfg.MODEL.KEYPOINTS_ON, "Keypoint model not supported." # assert not cfg.MODEL.MASK_ON, "Mask model not supported." # assert not cfg.FPN.FPN_ON, "FPN not supported." # assert not cfg.RETINANET.RETINANET_ON, "RetinaNet model not supported." # load model from cfg model, blobs = load_model(args) net = core.Net('') net.Proto().op.extend(copy.deepcopy(model.net.Proto().op)) net.Proto().external_input.extend( copy.deepcopy(model.net.Proto().external_input)) net.Proto().external_output.extend( copy.deepcopy(model.net.Proto().external_output)) net.Proto().type = args.net_execution_type net.Proto().num_workers = 1 if args.net_execution_type == 'simple' else 4 # Reset the device_option, change to unscope name and replace python operators convert_net(args, net.Proto(), blobs) # add operators for bbox add_bbox_ops(args, net, blobs) if args.fuse_af: print('Fusing affine channel...') net, blobs = mutils.fuse_net_affine(net, blobs) if args.use_nnpack: mutils.update_mobile_engines(net.Proto()) # generate init net empty_blobs = ['data', 'im_info'] init_net = gen_init_net(net, blobs, empty_blobs) if args.device == 'gpu': [net, init_net] = convert_model_gpu(args, net, init_net) net.Proto().name = args.net_name init_net.Proto().name = args.net_name + "_init" if args.test_img is not None: verify_model(args, [net, init_net], args.test_img) _save_models(net, init_net, args)
def test_hsm_search(self): samples = 10 dim_in = 5 X = np.random.rand(samples, dim_in).astype(np.float32) - 0.5 w = np.random.rand(hierarchy_proto.size, dim_in) \ .astype(np.float32) - 0.5 b = np.random.rand(hierarchy_proto.size).astype(np.float32) - 0.5 labels = np.array([np.random.randint(0, 8) for i in range(samples)]) \ .astype(np.int32) workspace.GlobalInit(['caffe2']) workspace.FeedBlob("data", X) workspace.FeedBlob("weights", w) workspace.FeedBlob("bias", b) workspace.FeedBlob("labels", labels) op = core.CreateOperator('HSoftmaxSearch', ['data', 'weights', 'bias'], ['names', 'scores'], 'HSoftmaxSearch', arg=args_search) workspace.RunOperatorOnce(op) names = workspace.FetchBlob('names') scores = workspace.FetchBlob('scores') def simulation_hsm_search(): names = [] scores = [] for line in struct: s, e = line[0], line[0] + line[1] score = np.dot(X, w[s:e].transpose()) + b[s:e] score = np.exp(score - np.max(score, axis=1, keepdims=True)) score /= score.sum(axis=1, keepdims=True) score = -np.log(score) score = score.transpose() idx = -1 for j, n in enumerate(names): if n == line[3]: idx = j score += scores[j] if idx == -1: score[score > beam] = np.inf else: score[score - scores[idx] > beam] = np.inf for i, name in enumerate(line[2]): scores.append(score[i]) names.append(name) scores = np.vstack(scores) return names, scores.transpose() p_names, p_scores = simulation_hsm_search() idx = np.argsort(p_scores, axis=1) p_scores = np.sort(p_scores, axis=1) p_names = np.array(p_names)[idx] for i in range(names.shape[0]): for j in range(names.shape[1]): if names[i][j]: self.assertEquals(names[i][j], p_names[i][j].item().encode('utf-8')) self.assertAlmostEqual(scores[i][j], p_scores[i][j], delta=0.001)
default=128, help="Max sequence length" ) parser.add_argument( "--iters_to_report", type=int, default=20, help="Number of iterations to report progress" ) parser.add_argument( "--implementation", type=str, default="sinusoid", help="'table' or 'sinusoid'", ) return parser if __name__ == '__main__': args, extra_args = GetArgumentParser().parse_known_args() workspace.GlobalInit([ 'caffe2', '--caffe2_log_level=0', '--caffe2_print_blob_sizes_at_exit=0'] + extra_args) device = core.DeviceOption(caffe2_pb2.CPU) with core.DeviceScope(device): Benchmark(args)
class TestRNNExecutor(unittest.TestCase): def setUp(self): self.batch_size = 8 self.input_dim = 20 self.hidden_dim = 30 self.encoder_dim = 40 @given(T=st.integers(10, 100), forward_only=st.booleans(), **hu.gcs) def test_lstm_with_attention_equal_simplenet(self, T, forward_only, gc, dc): self.Tseq = [T, T // 2, T // 2 + T // 4, T, T // 2 + 1] workspace.ResetWorkspace() with core.DeviceScope(gc): print("Run with device: {}, forward only: {}".format( gc, forward_only)) workspace.FeedBlob("seq_lengths", np.array([T] * self.batch_size, dtype=np.int32)) workspace.FeedBlob( "target", np.random.rand(T, self.batch_size, self.hidden_dim).astype(np.float32)) workspace.FeedBlob( "hidden_init", np.zeros([1, self.batch_size, self.hidden_dim], dtype=np.float32)) workspace.FeedBlob( "cell_init", np.zeros([1, self.batch_size, self.hidden_dim], dtype=np.float32)) model = model_helper.ModelHelper(name="lstm") model.net.AddExternalInputs(["input"]) init_blobs = [] hidden_init, cell_init, encoder_outputs = model.net.AddExternalInputs( "hidden_init", "cell_init", "encoder_outputs") awec_init = model.net.AddExternalInputs([ 'initial_attention_weighted_encoder_context', ]) init_blobs.extend([hidden_init, cell_init]) workspace.FeedBlob( awec_init, np.random.rand(1, self.batch_size, self.encoder_dim).astype(np.float32), ) workspace.FeedBlob( encoder_outputs, np.random.rand(1, self.batch_size, self.encoder_dim).astype(np.float32), ) outputs = rnn_cell.LSTMWithAttention( model=model, decoder_inputs="input", decoder_input_lengths="seq_lengths", initial_decoder_hidden_state=hidden_init, initial_decoder_cell_state=cell_init, initial_attention_weighted_encoder_context=awec_init, encoder_output_dim=self.encoder_dim, encoder_outputs=encoder_outputs, encoder_lengths=None, decoder_input_dim=self.input_dim, decoder_state_dim=self.hidden_dim, scope="", attention_type=AttentionType.Recurrent, forward_only=forward_only, outputs_with_grads=[0], ) output = outputs[0] print(outputs) loss = model.AveragedLoss( model.SquaredL2Distance([output, "target"], "dist"), "loss") # Add gradient ops if not forward_only: model.AddGradientOperators([loss]) # init for init_blob in init_blobs: workspace.FeedBlob( init_blob, np.zeros([1, self.batch_size, self.hidden_dim], dtype=np.float32)) self._compare(model, forward_only) @given(num_layers=st.integers(1, 8), T=st.integers(4, 100), forward_only=st.booleans(), **hu.gcs) def test_lstm_equal_simplenet(self, num_layers, T, forward_only, gc, dc): ''' Test that the RNN executor produces same results as the non-executor (i.e running step nets as sequence of simple nets). ''' self.Tseq = [T, T // 2, T // 2 + T // 4, T, T // 2 + 1] workspace.ResetWorkspace() with core.DeviceScope(gc): print("Run with device: {}, forward only: {}".format( gc, forward_only)) workspace.FeedBlob("seq_lengths", np.array([T] * self.batch_size, dtype=np.int32)) workspace.FeedBlob( "target", np.random.rand(T, self.batch_size, self.hidden_dim).astype(np.float32)) workspace.FeedBlob( "hidden_init", np.zeros([1, self.batch_size, self.hidden_dim], dtype=np.float32)) workspace.FeedBlob( "cell_init", np.zeros([1, self.batch_size, self.hidden_dim], dtype=np.float32)) model = model_helper.ModelHelper(name="lstm") model.net.AddExternalInputs(["input"]) init_blobs = [] for i in range(num_layers): hidden_init, cell_init = model.net.AddExternalInputs( "hidden_init_{}".format(i), "cell_init_{}".format(i)) init_blobs.extend([hidden_init, cell_init]) output, last_hidden, _, last_state = rnn_cell.LSTM( model=model, input_blob="input", seq_lengths="seq_lengths", initial_states=init_blobs, dim_in=self.input_dim, dim_out=[self.hidden_dim] * num_layers, scope="", drop_states=True, forward_only=forward_only, return_last_layer_only=True, ) loss = model.AveragedLoss( model.SquaredL2Distance([output, "target"], "dist"), "loss") # Add gradient ops if not forward_only: model.AddGradientOperators([loss]) # init for init_blob in init_blobs: workspace.FeedBlob( init_blob, np.zeros([1, self.batch_size, self.hidden_dim], dtype=np.float32)) self._compare(model, forward_only) def _compare(self, model, forward_only): # Store list of blobs that exist in the beginning workspace.RunNetOnce(model.param_init_net) init_ws = {k: workspace.FetchBlob(k) for k in workspace.Blobs()} # Run with executor for enable_executor in [0, 1]: self.enable_rnn_executor(model.net, enable_executor, forward_only) workspace.ResetWorkspace() # Reset original state for k, v in init_ws.items(): workspace.FeedBlob(k, v) np.random.seed(10022015) ws = {} for j in range(len(self.Tseq)): input_shape = [self.Tseq[j], self.batch_size, self.input_dim] workspace.FeedBlob( "input", np.random.rand(*input_shape).astype(np.float32)) workspace.FeedBlob( "target", np.random.rand(self.Tseq[j], self.batch_size, self.hidden_dim).astype(np.float32)) if j == 0: workspace.CreateNet(model.net, overwrite=True) workspace.RunNet(model.net.Proto().name) # Store results for each iteration for k in workspace.Blobs(): ws[k + "." + str(j)] = workspace.FetchBlob(k) if enable_executor: rnn_exec_ws = ws else: non_exec_ws = ws # Test that all blobs are equal after running with executor # or without. self.assertEqual(list(non_exec_ws.keys()), list(rnn_exec_ws.keys())) mismatch = False for k in rnn_exec_ws.keys(): non_exec_v = non_exec_ws[k] rnn_exec_v = rnn_exec_ws[k] if type(non_exec_v) is np.ndarray: if not np.array_equal(non_exec_v, rnn_exec_v): print("Mismatch: {}".format(k)) nv = non_exec_v.flatten() rv = rnn_exec_v.flatten() c = 0 for j in range(len(nv)): if rv[j] != nv[j]: print(j, rv[j], nv[j]) c += 1 if c == 10: break mismatch = True self.assertFalse(mismatch) def enable_rnn_executor(self, net, value, forward_only): num_found = 0 for op in net.Proto().op: if op.type.startswith("RecurrentNetwork"): for arg in op.arg: if arg.name == 'enable_rnn_executor': arg.i = value num_found += 1 # This sanity check is so that if someone changes the # enable_rnn_executor parameter name, the test will # start failing as this function will become defective. self.assertEqual(1 if forward_only else 2, num_found) if __name__ == "__main__": import unittest import random random.seed(2603) workspace.GlobalInit( ['caffe2', '--caffe2_log_level=0', '--caffe2_rnn_executor=1']) unittest.main()
def main(): parser = argparse.ArgumentParser( description="Run microbenchmarks.", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument( '--tag_filter', help='tag_filter can be used to run the benchmarks which matches the tag', default='short') # This option is used to filter test cases to run. parser.add_argument( '--operators', help='Filter tests based on comma-delimited list of operators to test', default=None) parser.add_argument( '--test_name', help='Run tests that have the provided test_name', default=None) parser.add_argument( '--list_ops', help='List operators without running them', action='store_true') parser.add_argument( '--list_tests', help='List all test cases without running them', action='store_true') parser.add_argument( "--iterations", help="Repeat each operator for the number of iterations", type=int ) parser.add_argument( "--num_runs", help="Run each test for num_runs. Each run executes an operator for number of <--iterations>", type=int, default=1, ) parser.add_argument( "--min_time_per_test", help="Set the minimum time (unit: seconds) to run each test", type=int, default=0, ) parser.add_argument( "--warmup_iterations", help="Number of iterations to ignore before measuring performance", default=100, type=int ) parser.add_argument( "--omp_num_threads", help="Number of OpenMP threads used in PyTorch/Caffe2 runtime", default=None, type=int ) parser.add_argument( "--mkl_num_threads", help="Number of MKL threads used in PyTorch/Caffe2 runtime", default=None, type=int ) parser.add_argument( "--ai_pep_format", help="Print result when running on AI-PEP", default=False, type=bool ) parser.add_argument( "--use_jit", help="Run operators with PyTorch JIT mode", action='store_true' ) parser.add_argument( "--forward_only", help="Only run the forward path of operators", action='store_true' ) parser.add_argument( '--framework', help='Comma-delimited list of frameworks to test (Caffe2, PyTorch)', default="Caffe2,PyTorch") parser.add_argument( '--wipe_cache', help='Wipe cache before benchmarking each operator', action='store_true', default=False ) args, _ = parser.parse_known_args() if benchmark_utils.is_caffe2_enabled(args.framework): workspace.GlobalInit(['caffe2', '--caffe2_log_level=0']) workspace.ClearGlobalNetObserver() if args.omp_num_threads: # benchmark_utils.set_omp_threads sets the env variable OMP_NUM_THREADS # which doesn't have any impact as C2 init logic has already been called # before setting the env var. # In general, OMP_NUM_THREADS (and other OMP env variables) needs to be set # before the program is started. # From Chapter 4 in OMP standard: https://www.openmp.org/wp-content/uploads/openmp-4.5.pdf # "Modifications to the environment variables after the program has started, # even if modified by the program itself, are ignored by the OpenMP implementation" benchmark_utils.set_omp_threads(args.omp_num_threads) if benchmark_utils.is_pytorch_enabled(args.framework): torch.set_num_threads(args.omp_num_threads) if args.mkl_num_threads: benchmark_utils.set_mkl_threads(args.mkl_num_threads) benchmark_core.BenchmarkRunner(args).run()
from __future__ import absolute_import, division, print_function, unicode_literals import collections import caffe2.python.hypothesis_test_util as hu import hypothesis.strategies as st import numpy as np from caffe2.python import core, dyndep, utils, workspace from caffe2.quantization.server import utils as dnnlowp_utils from dnnlowp_test_utils import check_quantized_results_close, run_conv_or_fc from hypothesis import assume, given dyndep.InitOpsLibrary("//caffe2/caffe2/quantization/server:dnnlowp_ops") workspace.GlobalInit([ "caffe2", "--caffe2_omp_num_threads=11", # Increase this threshold to test acc16 with randomly generated data "--caffe2_dnnlowp_acc16_density_threshold=0.5", ]) class DNNLowPOpConvAcc16OpTest(hu.HypothesisTestCase): # correctness test with no quantization error in inputs @given(stride=st.integers(1, 2), pad=st.integers(0, 2), kernel=st.integers(1, 5), dilation=st.integers(1, 2), size=st.integers(10, 16), group=st.integers(1, 4), input_channels_per_group=st.sampled_from([2, 3, 4, 5, 8, 16, 32]), output_channels_per_group=st.integers(2, 16), batch_size=st.integers(0, 3),
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################## from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np from caffe2.proto import caffe2_pb2 from caffe2.python import core, workspace, dyndep, test_util dyndep.InitOpsLibrary('@/caffe2/caffe2/contrib/warpctc:ctc_ops') workspace.GlobalInit(["python"]) def softmax(w): maxes = np.amax(w, axis=-1, keepdims=True) e = np.exp(w - maxes) dist = e / np.sum(e, axis=-1, keepdims=True) return dist class CTCOpsTest(test_util.TestCase): def verify_cost(self, device_option): alphabet_size = 5 N = 1 T = 2
import datetime import numpy as np from hypothesis import given, settings, example from hypothesis import strategies as st from caffe2.python import core, workspace from caffe2.python.onnx.onnxifi import onnxifi_caffe2_net from caffe2.python.fakelowp.test_utils import print_test_debug_info import caffe2.python.serialized_test.serialized_test_util as serial # Test that parallel chunks behave the same way as the serial one workspace.GlobalInit([ "caffe2", "--glow_global_fp16=1", "--glow_global_fused_scale_offset_fp16=1", "--glow_global_force_sls_fp16_accum=1", "--glow_nnpi_num_parallel_chunks=2", "--glow_use_dag_optimizer=false", "--glow_dump_graph=true", ]) class Fusions(serial.SerializedTestCase): def _get_scale_zp(self, tensor): tensor_max = np.max(tensor) tensor_min = min(0, np.min(tensor)) scale = np.float32(np.float16((tensor_max - tensor_min) / 255.0)) if scale < 1e-6: scale = np.float32(1e-6) zero_point = 0 - tensor_min / scale zero_point = int(round(np.clip(zero_point, 0, 255.0)))
#!/usr/bin/env python2 """Create a network that perfoms some mathematical operations. Run inference on this network.""" from caffe2.python import workspace, model_helper import numpy as np # Initialize Caffe2 workspace.GlobalInit([ "caffe2", ]) # Initialize a model with the name "Math model" model = model_helper.ModelHelper("Math model") # Add a matrix multiplication operator to the model. # This operator takes blobs "A" and "B" as inputs and produces blob "C" as output. model.net.MatMul(["A", "B"], "C") # Add a Sigmoid operator to the model. # This operator takes blob "C" as input and produces blob "D" as output. model.net.Sigmoid("C", "D") # Add a Softmax operator to the model. # This operator takes blob "D" as input and produces blob "E" as output. model.net.Softmax("D", "E", axis=0) # Create input A, a 3x3 matrix initialized with some values A = np.linspace(-0.4, 0.4, num=9, dtype=np.float32).reshape(3, 3) # Create input B, a 3x1 matrix initialized with some values
help="If set, blindly prefer the given engine(s) for every op.") parser.add_argument("--dump_model", action='store_true', help="If True, dump the model prototxts to disk.") parser.add_argument("--net_type", type=str, default="simple") parser.add_argument("--num_workers", type=int, default=2) parser.add_argument("--use-nvtx", default=False, action='store_true') parser.add_argument("--htrace_span_log_path", type=str) return parser if __name__ == '__main__': args = GetArgumentParser().parse_args() if (not args.batch_size or not args.model or not args.order): GetArgumentParser().print_help() else: workspace.GlobalInit( ['caffe2', '--caffe2_log_level=0'] + (['--caffe2_use_nvtx'] if args.use_nvtx else []) + (['--caffe2_htrace_span_log_path=' + args.htrace_span_log_path] if args.htrace_span_log_path else [])) model_map = { 'AlexNet': AlexNet, 'OverFeat': OverFeat, 'VGGA': VGGA, 'Inception': Inception, 'MLP': MLP, } Benchmark(model_map[args.model], args)
def test_small_sls_acc32(self, seed): workspace.GlobalInit([ "caffe2", "--glow_global_fp16=0", "--glow_global_fused_scale_offset_fp16=0", "--glow_global_force_sls_fp16_accum=0", ]) np.random.seed(seed) workspace.ResetWorkspace() n = 2 DIM = 3 data = 4 * (np.random.random_sample((n, DIM)) + 1).astype(np.float32) lengths = np.array([n], dtype=np.int32) indices = np.array(range(n), dtype=np.int64) weights = np.random.uniform(low=0.01, high=0.5, size=[n]).astype(np.float32) pred_net = caffe2_pb2.NetDef() pred_net.name = "pred" pred_net.external_input.extend( ["quantized_data", "weights", "indices", "lengths"]) pred_net.external_output.append("Y") pred_net.op.add().CopyFrom( core.CreateOperator( "SparseLengthsWeightedSumFused8BitRowwise", ["quantized_data", "weights", "indices", "lengths"], ["Y"], )) ref_net = caffe2_pb2.NetDef() ref_net.name = "ref" ref_net.external_input.extend( ["quantized_data", "weights", "indices", "lengths"]) ref_net.external_output.append("Y") ref_net.op.add().CopyFrom( core.CreateOperator( "SparseLengthsWeightedSumFused8BitRowwiseFakeFP32NNPI", ["quantized_data", "weights", "indices", "lengths"], ["Y"], )) workspace.FeedBlob("data", data) workspace.RunOperatorOnce( core.CreateOperator("FloatToFused8BitRowwiseQuantized", ["data"], ["quantized_data"])) quantized_data = workspace.FetchBlob("quantized_data") onnxified_net = onnxifi_caffe2_net( pred_net, {}, max_batch_size=1, max_seq_size=n, debug=True, adjust_batch=True, use_onnx=False, ) num_onnxified_ops = sum(1 if o.type == "Onnxifi" else 0 for o in onnxified_net.op) np.testing.assert_equal(num_onnxified_ops, 1) workspace.FeedBlob("indices", indices) workspace.FeedBlob("lengths", lengths) workspace.FeedBlob("weights", weights) workspace.CreateNet(onnxified_net) workspace.CreateNet(ref_net) workspace.RunNet(onnxified_net.name) Y_glow = workspace.FetchBlob("Y") workspace.RunNet(ref_net.name) Y_ref = workspace.FetchBlob("Y") diff = np.abs((Y_ref - Y_glow) / (Y_ref + 1e-8)) max_err = np.max(diff, axis=1) num_offenders = (max_err > 0).sum() if num_offenders > 0: np.set_printoptions(precision=12) print( "ref", Y_ref.astype(np.float16).astype(np.float32), "glow", Y_glow.astype(np.float16).astype(np.float32), ) print_test_debug_info( "test_small_sls_acc32", { "seed": seed, "num_rows": num_rows, "embedding_dim": embedding_dim, "batch_size": batch_size, "indices": indices, "data": data, "quantized_data": quantized_data, "lengths": lengths, "weights": weights, "Y_glow": Y_glow, "Y_ref": Y_ref, "diff": diff, "rowwise_diff": np.max(diff, axis=1), }, ) assert 0
results.append((copy(args), float(t_own), float(t_cudnn))) print(args) print("t_cudnn / t_own: {}".format(t_cudnn / t_own)) for args, t_own, t_cudnn in results: print("{}: cudnn time: {}, own time: {}, ratio: {}".format( str(args), t_cudnn, t_own, t_cudnn / t_own)) ratio_sum = 0 for args, t_own, t_cudnn in results: ratio = float(t_cudnn) / t_own ratio_sum += ratio print( "hidden_dim: {}, seq_lengths: {}, batch_size: {}, num_layers: {}:" " cudnn time: {}, own time: {}, ratio: {}".format( args.hidden_dim, args.seq_length, args.batch_size, args.num_layers, t_cudnn, t_own, ratio)) print("Ratio average: {}".format(ratio_sum / len(results))) if __name__ == '__main__': args = lstm_benchmark.GetArgumentParser().parse_args() workspace.GlobalInit([ 'caffe2', '--caffe2_log_level=0', '--caffe2_print_blob_sizes_at_exit=0', '--caffe2_gpu_memory_tracking=1' ]) Compare(args)
def test_slws_fused_8bit_rowwise_acc32_nnpi(self, seed, num_rows, embedding_dim, batch_size, max_weight): workspace.GlobalInit([ "caffe2", "--glow_global_fp16=0", "--glow_global_fused_scale_offset_fp16=0", "--glow_global_force_sls_fp16_accum=0", ]) workspace.ResetWorkspace() np.random.seed(seed) data = np.random.rand(num_rows, embedding_dim).astype(np.float32) lengths = np.random.choice(np.arange(1, num_rows), batch_size).astype(np.int32) indices = [] for length in lengths: indices.extend(np.random.choice(np.arange(1, num_rows), length)) indices = np.asarray(indices).astype(np.int64) weights = np.random.uniform(low=0, high=max_weight, size=[len(indices)]).astype(np.float32) pred_net = caffe2_pb2.NetDef() pred_net.name = "pred" pred_net.external_input.extend( ["quantized_data", "weights", "indices", "lengths"]) pred_net.external_output.append("Y") pred_net.op.add().CopyFrom( core.CreateOperator( "SparseLengthsWeightedSumFused8BitRowwise", ["quantized_data", "weights", "indices", "lengths"], ["Y"], )) ref_net = caffe2_pb2.NetDef() ref_net.name = "ref" ref_net.external_input.extend( ["quantized_data", "weights", "indices", "lengths"]) ref_net.external_output.append("Y") ref_net.op.add().CopyFrom( core.CreateOperator( "SparseLengthsWeightedSumFused8BitRowwiseFakeFP32NNPI", ["quantized_data", "weights", "indices", "lengths"], ["Y"], )) workspace.FeedBlob("data", data) workspace.RunOperatorOnce( core.CreateOperator("FloatToFused8BitRowwiseQuantized", ["data"], ["quantized_data"])) onnxified_net = onnxifi_caffe2_net( pred_net, {}, max_batch_size=batch_size, max_seq_size=np.max(lengths), debug=True, adjust_batch=True, use_onnx=False, ) num_onnxified_ops = sum(1 if o.type == "Onnxifi" else 0 for o in onnxified_net.op) np.testing.assert_equal(num_onnxified_ops, 1) workspace.FeedBlob("indices", indices) workspace.FeedBlob("lengths", lengths) workspace.FeedBlob("weights", weights) workspace.CreateNet(onnxified_net) workspace.CreateNet(ref_net) workspace.RunNet(onnxified_net.name) Y_glow = workspace.FetchBlob("Y") workspace.RunNet(ref_net.name) Y_ref = workspace.FetchBlob("Y") diff = np.abs((Y_ref - Y_glow) / (Y_ref + 1e-8)) max_err = np.max(diff, axis=1) num_offenders = (max_err > 0).sum() if num_offenders > 0: print_test_debug_info( "test_slws_fused_8bit_rowwise_acc32_nnpi", { "seed": seed, "num_rows": num_rows, "embedding_dim": embedding_dim, "batch_size": batch_size, "indices": indices, "data": data.shape, "lengths": lengths, "weights": weights, "Y_glow": Y_glow, "Y_ref": Y_ref, "diff": diff, "rowwise_diff": np.max(diff, axis=1), }, ) assert 0
def main(): parser = argparse.ArgumentParser( description="Run microbenchmarks.", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument( '--tag_filter', help= 'tag_filter can be used to run the benchmarks which matches the tag', default='short') # This option is used to filter test cases to run. parser.add_argument( '--operator', help='Run the test cases that contain the provided operator' ' as a substring of their names', default=None) parser.add_argument('--test_name', help='Run tests that have the provided test_name', default=None) parser.add_argument('--list_ops', help='List operators without running them', action='store_true') parser.add_argument('--list_tests', help='List all test cases without running them', action='store_true') parser.add_argument( "--iterations", help="Repeat each operator for the number of iterations", type=int) parser.add_argument( "--num_runs", help= "Run each test for num_runs. Each run executes an operator for number of <--iterations>", type=int, default=1, ) parser.add_argument( "--min_time_per_test", help="Set the minimum time (unit: seconds) to run each test", type=int, default=0, ) parser.add_argument( "--warmup_iterations", help="Number of iterations to ignore before measuring performance", default=10, type=int) parser.add_argument( "--omp_num_threads", help="Number of OpenMP threads used in PyTorch/Caffe2 runtime", default=None, type=int) parser.add_argument( "--mkl_num_threads", help="Number of MKL threads used in PyTorch/Caffe2 runtime", default=None, type=int) parser.add_argument("--ai_pep_format", help="Print result when running on AI-PEP", default=False, type=bool) parser.add_argument("--use_jit", help="Run operators with PyTorch JIT mode", action='store_true') parser.add_argument("--forward_only", help="Only run the forward path of operators", action='store_true') parser.add_argument( '--framework', help='Comma-delimited list of frameworks to test (Caffe2, PyTorch)', default="Caffe2,PyTorch") args = parser.parse_args() if benchmark_utils.is_caffe2_enabled(args.framework): workspace.GlobalInit(['caffe2', '--caffe2_log_level=0']) workspace.ClearGlobalNetObserver() if args.omp_num_threads: benchmark_utils.set_omp_threads(args.omp_num_threads) if args.mkl_num_threads: benchmark_utils.set_mkl_threads(args.mkl_num_threads) benchmark_core.BenchmarkRunner(args).run()
def test_sparse_lengths_sum(self, num_rows, blocksize, weighted, seed, empty_indices, engine, bit_rate): net = core.Net("bench") np.random.seed(seed) input_data = np.random.rand(num_rows, blocksize).astype(np.float32) if empty_indices: lengths = np.zeros(num_rows, dtype=np.int32) num_indices = 0 else: num_indices = np.random.randint(len(input_data)) # the number of indices per sample lengths_split = np.clip(num_indices // 2, 1, 10) lengths = ( np.ones([num_indices // lengths_split], dtype=np.int32) * lengths_split) # readjust num_indices when lengths_split doesn't divide num_indices num_indices = num_indices // lengths_split * lengths_split indices = np.random.randint(low=0, high=len(input_data), size=[num_indices], dtype=np.int64) weights = np.random.uniform(size=[len(indices)]).astype(np.float32) op = core.CreateOperator( "FloatToFused" + str(bit_rate) + "BitRowwiseQuantized", "input_data", "quantized_data", engine=engine, ) net.Proto().op.extend([op]) op = core.CreateOperator( "Fused" + str(bit_rate) + "BitRowwiseQuantizedToFloat", "quantized_data", "dequantized_data", ) net.Proto().op.extend([op]) op = core.CreateOperator( "FloatToFused" + str(bit_rate) + "BitFakeRowwiseQuantized", "input_data", "fake_quantized_data", engine=engine, ) net.Proto().op.extend([op]) if weighted: net.SparseLengthsWeightedSum( ["dequantized_data", "weights", "indices", "lengths"], "sum_reference") net.SparseLengthsWeightedSumFused8BitRowwise( ["fake_quantized_data", "weights", "indices", "lengths"], "sum_fake_quantized", ) op = core.CreateOperator( "SparseLengthsWeightedSumFused" + str(bit_rate) + "BitRowwise", ["quantized_data", "weights", "indices", "lengths"], "sum_quantized", ) net.Proto().op.extend([op]) else: net.SparseLengthsSum(["dequantized_data", "indices", "lengths"], "sum_reference") net.SparseLengthsSumFused8BitRowwise( ["fake_quantized_data", "indices", "lengths"], "sum_fake_quantized") op = core.CreateOperator( "SparseLengthsSumFused" + str(bit_rate) + "BitRowwise", ["quantized_data", "indices", "lengths"], "sum_quantized", ) net.Proto().op.extend([op]) net.Proto().external_input.extend(["input_data"]) workspace.FeedBlob("input_data", input_data) workspace.FeedBlob("weights", weights) workspace.FeedBlob("indices", indices) workspace.FeedBlob("lengths", lengths) workspace.GlobalInit(["caffe2", "--caffe2_log_level=0"]) workspace.RunNetOnce(net) sum_reference = workspace.FetchBlob("sum_reference") sum_fake_quantized = workspace.FetchBlob("sum_fake_quantized") sum_quantized = workspace.FetchBlob("sum_quantized") np.testing.assert_array_almost_equal(sum_reference, sum_quantized) np.testing.assert_array_equal(sum_fake_quantized, sum_quantized)
B = np.random.randn(2, 3, 5).astype(np.float32) self._run_test(A, B, check_grad=True) def test_large_forward(self): A = np.random.randn(2, 256, 42, 100).astype(np.float32) B = np.random.randn(2, 256, 35, 87).astype(np.float32) self._run_test(A, B) A = np.random.randn(2, 256, 42, 87).astype(np.float32) B = np.random.randn(2, 256, 35, 87).astype(np.float32) self._run_test(A, B) def test_size_exceptions(self): A = np.random.randn(2, 256, 42, 86).astype(np.float32) B = np.random.randn(2, 256, 35, 87).astype(np.float32) with self.assertRaises(RuntimeError): self._run_test(A, B) A = np.random.randn(2, 255, 42, 88).astype(np.float32) B = np.random.randn(2, 256, 35, 87).astype(np.float32) with self.assertRaises(RuntimeError): self._run_test(A, B) if __name__ == '__main__': workspace.GlobalInit(['caffe2', '--caffe2_log_level=0']) utils.c2.import_detectron_ops() assert 'SpatialNarrowAs' in workspace.RegisteredOperators() utils.logging.setup_logging(__name__) unittest.main()
def test_sparse_lengths_mean_rowwise_sparse_with_skipped_pruning( self, num_rows, blocksize, seed, empty_indices, engine, bit_rate): net = core.Net("bench") np.random.seed(seed) input_data = np.random.rand(num_rows, blocksize).astype(np.float32) if empty_indices: lengths = np.zeros(num_rows, dtype=np.int32) num_indices = 0 else: num_indices = np.random.randint(len(input_data)) # the number of indices per sample lengths_split = np.clip(num_indices // 2, 1, 10) lengths = ( np.ones([num_indices // lengths_split], dtype=np.int32) * lengths_split) # readjust num_indices when lengths_split doesn't divide num_indices num_indices = num_indices // lengths_split * lengths_split # Use int32 here because int64 is covered by test_sparse_lengths_sum indices = np.random.randint(low=0, high=len(input_data), size=[num_indices], dtype=np.int32) op = core.CreateOperator( "FloatToFused" + str(bit_rate) + "BitRowwiseQuantized", "input_data", "quantized_data", engine=engine, ) net.Proto().op.extend([op]) op = core.CreateOperator( "Fused" + str(bit_rate) + "BitRowwiseQuantizedToFloat", "quantized_data", "dequantized_data", ) net.Proto().op.extend([op]) op = core.CreateOperator( "FloatToFused" + str(bit_rate) + "BitFakeRowwiseQuantized", "input_data", "fake_quantized_data", engine=engine, ) net.Proto().op.extend([op]) net.SparseLengthsMean(["dequantized_data", "indices", "lengths"], "mean_reference") net.SparseLengthsMeanFused8BitRowwise( ["fake_quantized_data", "indices", "lengths"], "mean_fake_quantized") op1 = core.CreateOperator( "SparseLengthsMeanFused" + str(bit_rate) + "BitRowwise", ["quantized_data", "indices", "lengths"], "mean_quantized", ) op2 = core.CreateOperator( "SparseLengthsMean" + str(bit_rate) + "BitRowwiseSparse", ["quantized_data", "indices", "lengths"] + ["mapping_table"], "mean_quantized_pruned", ) net.Proto().op.extend([op1, op2]) net.Proto().external_input.extend(["input_data", "mapping_table"]) workspace.FeedBlob("input_data", input_data) workspace.FeedBlob("indices", indices) workspace.FeedBlob("lengths", lengths) mapping_table = np.array([0]).astype(dtype=np.int32) workspace.FeedBlob("mapping_table", mapping_table) workspace.GlobalInit(["caffe2", "--caffe2_log_level=0"]) workspace.RunNetOnce(net) mean_reference = workspace.FetchBlob("mean_reference") mean_fake_quantized = workspace.FetchBlob("mean_fake_quantized") mean_quantized = workspace.FetchBlob("mean_quantized") mean_quantized_pruned = workspace.FetchBlob("mean_quantized_pruned") np.testing.assert_array_almost_equal(mean_reference, mean_quantized) np.testing.assert_array_equal(mean_fake_quantized, mean_quantized) np.testing.assert_array_equal(mean_quantized_pruned, mean_quantized)
default='', help='empty or async_scheduling') parser.add_argument('--async_threads', type=int, default=0, help='async_thread_pool_size') parser.add_argument('--batch_size', type=int, default=1, help='Batch Size') parser.add_argument('--steps', type=int, default=10, help='Number of steps to measure.') args, _ = parser.parse_known_args() workspace.ResetWorkspace() workspace.GlobalInit([ 'caffe2', '--caffe2_log_level=2', '--caffe2_net_async_thread_pool_size=' + str(args.async_threads) ]) init_net = mynet.init_net predict_net = mynet.predict_net # you must name it something predict_net.name = "predict" from caffe2.python import net_drawer g = net_drawer.GetPydotGraph(predict_net, rankdir="TB") g.write_dot('test.dot') if args.proto_type != '': predict_net.type = 'async_scheduling' #predict_net.type = 'prof_dag'
) parser.add_argument("--use_pool1", type=int, default=0, help="use pool1 layer") parser.add_argument("--use_local_file", type=int, default=0, help="use local file") parser.add_argument("--crop_per_clip", type=int, default=1, help="number of spatial crops per clip") args = parser.parse_args() log.info(args) assert model_builder.model_validation( args.model_name, args.model_depth, args.clip_length_of if args.input_type else args.clip_length_rgb, args.crop_size if not args.use_convolutional_pred else 112, ) ExtractFeatures(args) if __name__ == "__main__": workspace.GlobalInit(["caffe2", "--caffe2_log_level=2"]) main()
import collections import caffe2.python.hypothesis_test_util as hu import hypothesis.strategies as st import numpy as np from caffe2.python import core, dyndep, workspace from caffe2.quantization.server import utils as dnnlowp_utils from dnnlowp_test_utils import ( avoid_vpmaddubsw_overflow_fc, check_quantized_results_close, ) from hypothesis import given dyndep.InitOpsLibrary("//caffe2/caffe2/quantization/server:dnnlowp_ops") workspace.GlobalInit(["caffe2", "--caffe2_omp_num_threads=11"]) class DNNLowPFullyConnectedOpTest(hu.HypothesisTestCase): # correctness test with no quantization error in inputs @given(input_channels=st.sampled_from([3, 4, 5, 8, 16, 32]), output_channels=st.integers(2, 16), batch_size=st.integers(1, 16), in_quantized=st.booleans(), out_quantized=st.booleans(), weight_quantized=st.booleans(), prepack_weight=st.booleans(), preserve_activation_sparsity=st.booleans(), preserve_weight_sparsity=st.booleans(), fuse_relu=st.booleans(), **hu.gcs_cpu_only)
def test_one_crop(lfb=None, suffix='', shift=None): """Test one crop.""" workspace.GlobalInit(['caffe2', '--caffe2_log_level=0']) np.random.seed(cfg.RNG_SEED) cfg.AVA.FULL_EVAL = True if lfb is None and cfg.LFB.ENABLED: print_cfg() lfb = get_lfb(cfg.LFB.MODEL_PARAMS_FILE, is_train=False) print_cfg() workspace.ResetWorkspace() logger.info("Done ResetWorkspace...") timer = Timer() logger.warning('Testing started...') # for monitoring cluster jobs if shift is None: shift = cfg.TEST.CROP_SHIFT test_model = model_builder_video.ModelBuilder(train=False, use_cudnn=True, cudnn_exhaustive_search=True, split=cfg.TEST.DATA_TYPE) test_model.build_model(lfb=lfb, suffix=suffix, shift=shift) if cfg.PROF_DAG: test_model.net.Proto().type = 'prof_dag' else: test_model.net.Proto().type = 'dag' workspace.RunNetOnce(test_model.param_init_net) workspace.CreateNet(test_model.net) misc.save_net_proto(test_model.net) misc.save_net_proto(test_model.param_init_net) total_test_net_iters = misc.get_total_test_iters(test_model) test_model.start_data_loader() test_meter = metrics.MetricsCalculator( model=test_model, split=cfg.TEST.DATA_TYPE, video_idx_to_name=test_model.input_db._video_idx_to_name, total_num_boxes=(test_model.input_db._num_boxes_used if cfg.DATASET in ['ava', 'avabox'] else None)) if cfg.TEST.PARAMS_FILE: checkpoints.load_model_from_params_file_for_test( test_model, cfg.TEST.PARAMS_FILE) else: raise Exception('No params files specified for testing model.') begin_time = time.time() for test_iter in range(total_test_net_iters): timer.tic() workspace.RunNet(test_model.net.Proto().name) timer.toc() if test_iter == 0: misc.print_net(test_model) os.system('nvidia-smi') misc.show_flops_params(test_model) test_meter.calculate_and_log_all_metrics_test(test_iter, timer, total_test_net_iters, suffix) logger.info('TTTTTTTIME: {}'.format(time.time() - begin_time)) test_meter.finalize_metrics(name=get_test_name(shift)) test_meter.log_final_metrics(test_iter, total_test_net_iters) test_model.shutdown_data_loader()