Example #1
0
    # Shutdown data loading threads
    model.roi_data_loader.shutdown()
    return checkpoints


def test_model(model_file, multi_gpu_testing, opts=None):
    # All arguments to inference functions are passed via cfg
    cfg.TEST.WEIGHTS = model_file
    # Clear memory before inference
    workspace.ResetWorkspace()
    # Run inference
    test_net.main(multi_gpu_testing=multi_gpu_testing)


if __name__ == '__main__':
    workspace.GlobalInit(
        ['caffe2', '--caffe2_log_level=0', '--caffe2_gpu_memory_tracking'])
    set_loggers()
    # TODO(rbg): set C2 random seed
    np.random.seed(cfg.RNG_SEED)
    args = parse_args()
    logger.info('Called with args:')
    logger.info(args)
    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.opts is not None:
        cfg_from_list(args.opts)
    assert_and_infer_cfg()
    logger.info('Training with config:')
    logger.info(pprint.pformat(cfg))
    checkpoints = net_trainer()
    if not args.skip_test:
Example #2
0
    def test_SparseSegmentUint8(self):

        init_net = core.Net("init")
        net = core.Net("bench")
        size = 10**3
        isize = 10**2

        # input preparation
        d = init_net.UniformFill([], shape=[size, 32])
        w = init_net.UniformFill([], shape=[isize, ])
        i = init_net.UniformIntFill([], shape=[isize], max=size - 1)
        i = init_net.Cast([i], to=core.DataType.INT64)
        l = init_net.ConstantFill(
            [],
            ['l'],
            shape=[isize // 10],
            value=10,
            dtype=core.DataType.INT32,
        )
        net.FloatToRowwiseQuantized8Bits([d],
                                         ['quantized_data', 'scale_bias'])
        net.Rowwise8BitQuantizedToFloat(['quantized_data', 'scale_bias'],
                                        ['dequantized_data'])

        # SparseLengthsWeightedSum
        net.SparseLengthsWeightedSum(['dequantized_data', w, i, l],
                                     ['PositionWeighted_0'], engine='fp16')
        net.SparseLengthsWeightedSum8BitsRowwise(
            ['quantized_data', w, i, l, 'scale_bias'],
            ['PositionWeighted_1'])

        # SparseLengthsSum
        net.SparseLengthsSum(['dequantized_data', i, l],
                             ['Sum_0'], engine='fp16')

        net.SparseLengthsSum8BitsRowwise(
            ['quantized_data', i, l, 'scale_bias'],
            ['Sum_1'])

        # SparseLengthsWeightedMean
        # net.SparseLengthsWeightedMean(['dequantized_data', w, i, l],
        #                              ['WeightedMean_0'])
        # net.SparseLengthsWeightedMean8BitsRowwise(
        #     ['quantized_data', w, i, l, 'scale_bias'],
        #     ['WeightedMean_1'])

        # SparseLengthsMean
        net.SparseLengthsMean(['dequantized_data', i, l],
                              ['Mean_0'], engine='fp16')

        net.SparseLengthsMean8BitsRowwise(
            ['quantized_data', i, l, 'scale_bias'],
            ['Mean_1'])

        gathered_w = net.Gather(['quantized_data', i],
                                engine='fp16')

        gathered_scale_bias = net.Gather(['scale_bias', i],
                                         engine='fp16')
        net.Rowwise8BitQuantizedToFloat(
            [gathered_w, gathered_scale_bias],
            'Gathered_1')

        net.Gather(['dequantized_data', i], 'Gathered_0')

        workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
        workspace.RunNetOnce(init_net)
        workspace.CreateNet(net)
        workspace.RunNetOnce(net)

        PositionWeighted_1 = workspace.FetchBlob('PositionWeighted_1')
        ground_truth_posw = workspace.FetchBlob('PositionWeighted_0')
        np.testing.assert_array_almost_equal(PositionWeighted_1,
                                             ground_truth_posw, decimal=5)
        Sum_1 = workspace.FetchBlob('Sum_1')
        ground_truth_sum = workspace.FetchBlob('Sum_0')
        np.testing.assert_array_almost_equal(Sum_1,
                                             ground_truth_sum, decimal=5)

        Mean_1 = workspace.FetchBlob('Mean_1')
        ground_truth_mean = workspace.FetchBlob('Mean_0')
        np.testing.assert_array_almost_equal(Mean_1,
                                             ground_truth_mean, decimal=5)

        Gathered_1 = workspace.FetchBlob('Gathered_1')
        ground_truth_gathered = workspace.FetchBlob('Gathered_0')
        np.testing.assert_array_almost_equal(Gathered_1,
                                             ground_truth_gathered, decimal=5)
Example #3
0
from caffe2.python import workspace, core
import numpy as np

from utils import NUM_LOOP_ITERS

workspace.GlobalInit(['caffe2'])


def add_blob(ws, blob_name, tensor_size):
    blob_tensor = np.random.randn(*tensor_size).astype(np.float32)
    ws.FeedBlob(blob_name, blob_tensor)


class C2SimpleNet(object):
    """
    This module constructs a net with 'op_name' operator. The net consist
    a series of such operator.
    It initializes the workspace with input blob equal to the number of parameters
    needed for the op.
    Provides forward method to run the net niter times.
    """
    def __init__(self, op_name, num_inputs=1, debug=False):
        self.input_names = []
        self.net = core.Net("framework_benchmark_net")
        self.input_names = ["in_{}".format(i) for i in range(num_inputs)]
        for i in range(num_inputs):
            add_blob(workspace, self.input_names[i], [1])
        self.net.AddExternalInputs(self.input_names)
        op_constructor = getattr(self.net, op_name)
        op_constructor(self.input_names)
        self.output_name = self.net._net.op[-1].output
Example #4
0
 def setUpClass(cls):
     workspace.GlobalInit(get_default_test_flags())
     # clear the default engines settings to separate out its
     # affect from the ops tests
     core.SetEnginePref({}, {})
Example #5
0
    def __init__(
        self,
        cli_args,
        model=None,
        tag=None,
        enable_prof=False,
        id_qs = None,
        len_qs = None,
        fc_q  = None # TODO: Rename this as there are no bottom fc layers
    ):
        super(Wide_and_Deep, self).__init__()
        self.args = cli_args

        # Check to ensure we are configure wide and deep networks correctly
        self.check_args(self.args)

        ### parse command line arguments ###
        ln_bot = np.fromstring(cli_args.arch_mlp_bot, dtype=int, sep="-")
        m_den = ln_bot[0]

        m_spa = cli_args.arch_sparse_feature_size
        ln_emb = np.fromstring(cli_args.arch_embedding_size, dtype=int, sep="-")
        num_fea = ln_emb.size + 1  # num sparse + num dense features

        accel_en = self.args.use_accel

        # print("num features ", num_fea)


        # Size of input dimension to TopFC layers is m_den_out * ln_emb (sparse features) + dense feature input
        num_int = (num_fea-1) * int(m_spa) + int(ln_bot[0])
        #num_int = (num_fea) * m_den_out

        arch_mlp_top_adjusted = str(num_int) + "-" + cli_args.arch_mlp_top
        # print("mlp_top is: ", arch_mlp_top_adjusted)
        ln_top = np.fromstring(arch_mlp_top_adjusted, dtype=int, sep="-")

        ### initialize the model ###
        if model is None:
            global_init_opt = ["caffe2", "--caffe2_log_level=0"]
            if enable_prof:
                global_init_opt += [
                    "--logtostderr=0",
                    "--log_dir=$HOME",
                    #"--caffe2_logging_print_net_summary=1",
                ]
            workspace.GlobalInit(global_init_opt)
            self.set_tags()
            self.model = model_helper.ModelHelper(name="Wide_and_Deep", init_params=True)

            if cli_args:
              self.model.net.Proto().type = cli_args.caffe2_net_type
              self.model.net.Proto().num_workers = cli_args.inter_op_workers

        else:
            # WARNING: assume that workspace and tags have been initialized elsewhere
            self.set_tags(tag[0], tag[1], tag[2], tag[3], tag[4], tag[5], tag[6],
                          tag[7], tag[8], tag[9])
            self.model = model

        # save arguments
        self.m_spa = m_spa
        self.ln_emb = ln_emb
        self.ln_bot = ln_bot
        self.ln_top = ln_top
        self.arch_interaction_op = cli_args.arch_interaction_op
        self.arch_interaction_itself = cli_args.arch_interaction_itself
        self.sigmoid_bot = -1 # TODO: Lets not hard-code this going forward
        self.sigmoid_top = ln_top.size - 1
        self.accel_en = accel_en

        return self.create_sequential_forward_ops(id_qs, len_qs, fc_q)
import unittest

# Must happen before importing caffe2.python.*
import caffe2.python.fakelowp.init_shared_libs  # noqa
import numpy as np
from hypothesis import given, settings
from hypothesis import strategies as st
from caffe2.proto import caffe2_pb2
from caffe2.python import core, workspace
from caffe2.python.onnx.onnxifi import onnxifi_caffe2_net
from caffe2.python.fakelowp.test_utils import print_test_debug_info
import caffe2.python.serialized_test.serialized_test_util as serial

workspace.GlobalInit([
    "caffe2",
    "--glow_global_fp16=1",
    "--glow_global_fused_scale_offset_fp16=1",
    "--glow_global_force_sls_fp16_accum=1",
])
GLOW_MATMUL_ATOL = 1e-5
GLOW_MATMUL_RTOL = 1e-3


class SparseLengthsSum8BitFakeNNPIFp16Test(serial.SerializedTestCase):
    def Skip_test_SLS_NonQuantized_fp16(self):
        N = 20000
        DIM = 64
        D = (4 * np.random.random_sample((N, DIM)) + 1).astype(np.float32)
        I = (np.random.randint(0, N, size=12)).astype(np.int64)
        L = np.asarray([4, 4, 4]).astype(np.int32)
        workspace.FeedBlob("D", D)
                        if rv[j] != nv[j]:
                            print(j, rv[j], nv[j])
                            c += 1
                            if c == 10:
                                break

                    mismatch = True

        self.assertFalse(mismatch)

    def enable_rnn_executor(self, net, value, forward_only):
        num_found = 0
        for op in net.Proto().op:
            if op.type.startswith("RecurrentNetwork"):
                for arg in op.arg:
                    if arg.name == 'enable_rnn_executor':
                        arg.i = value
                        num_found += 1
        # This sanity check is so that if someone changes the
        # enable_rnn_executor parameter name, the test will
        # start failing as this function will become defective.
        self.assertEqual(1 if forward_only else 2, num_found)


if __name__ == "__main__":
    import random
    random.seed(2603)
    workspace.GlobalInit(
        ['caffe2', '--caffe2_log_level=0', '--caffe2_rnn_executor=1'])
    unittest.main()
Example #8
0
 def setUpClass(cls):
     workspace.GlobalInit([
         'caffe2',
         '--caffe2_log_level=0',
         '--caffe2_omp_num_threads=1',
     ])
Example #9
0
def main():
    # Initialize C2
    workspace.GlobalInit(
        ['caffe2', '--caffe2_log_level=0', '--caffe2_gpu_memory_tracking=1'])
    # Set up logging and load config options
    logger = setup_logging(__name__)
    logging.getLogger('detectron.roi_data.loader').setLevel(logging.INFO)
    args = parse_args()
    logger.info('Called with args:')
    logger.info(args)
    if args.cfg_file is not None:
        merge_cfg_from_file(args.cfg_file)
    if args.opts is not None:
        merge_cfg_from_list(args.opts)

    assert_and_infer_cfg()
    logger.info('Training with config:')
    logger.info(pprint.pformat(cfg))
    # Note that while we set the numpy random seed network training will not be
    # deterministic in general. There are sources of non-determinism that cannot
    # be removed with a reasonble execution-speed tradeoff (such as certain
    # non-deterministic cudnn functions).
    np.random.seed(cfg.RNG_SEED)
    # Execute the training run

    fs = open('imgnames.pkl', 'rb')
    roidbnames = pickle.load(fs)
    fs.close()

    logger.info('Loading dataset: {}'.format(cfg.TRAIN.DATASETS))

    dataset_names = cfg.TRAIN.DATASETS
    proposal_files = cfg.TRAIN.PROPOSAL_FILES

    roidb = get_training_roidb(dataset_names, proposal_files)

    logger.info('{:d} roidb entries'.format(len(roidb)))

    total_num = len(roidb)

    # bitmap idx indicated for training
    bitmapRoidb = BitMap(total_num)

    # initial samples
    #    initial_num = int(total_num*0.2)
    #    for i in range(initial_num):
    #        bitmapRoidb.set(i)
    #
    #    train_roidb = [roidb[i] for i in range(initial_num)]

    initialidx = []
    train_roidb = []

    for i, x in enumerate(roidb):
        if x['image'].split('/')[-1] in roidbnames:
            initialidx.append(i)
            train_roidb.append(x)

    for i in initialidx:
        bitmapRoidb.set(i)

    logger.info('{:d} the number initial roidb entries'.format(
        len(train_roidb)))
    # append flipped images
    train_roidb = flipped_roidb_for_training(train_roidb)

    logger.info('{:d} the number initial roidb entries'.format(
        len(train_roidb)))
    alamount = 0
    ssamount = 0
    gamma = 0.95
    # control al proportion
    al_proportion_checkpoint = [
        int(x * total_num * 0.4) for x in np.linspace(0.2, 1, 10)
    ]
    # control ss proportion
    ss_proportion_checkpoint = [
        int(x * total_num) for x in np.linspace(0.2, 2, 10)
    ]

    next_iters = 90000
    sum_iters = next_iters
    '''load the lasted checkpoints'''
    checkpoints = detectron.utils.train.train_model(sum_iters, train_roidb,
                                                    cfg.TRAIN.WEIGHTS)
    while True:
        # to do a test on the test dataset
        test_model(checkpoints[(sum_iters - 1)], args.multi_gpu_testing,
                   args.opts)
        if sum_iters > cfg.SOLVER.MAX_ITER:
            break
        # next detect unlabeled samples
        unlabeledidx = list(set(range(total_num)) - set(bitmapRoidb.nonzero()))
        # labeled samples
        labeledidx = list(set(bitmapRoidb.nonzero()))
        # detect unlabeled samples
        BBoxes, YClass, Scores, al_candidate_idx, ALScore = detect_im(
            checkpoints[(sum_iters - 1)],
            roidb,
            gamma,
            idxs=unlabeledidx,
            gpu_id=0)

        al_avg_idx = np.argsort(np.array(ALScore))
        al_candidate_idx = [al_candidate_idx[i] for i in al_avg_idx]

        gamma = max(gamma - 0.05, 0.7)

        # the ss candidate idx
        ss_candidate_idx = [
            i for i in unlabeledidx if i not in al_candidate_idx
        ]

        # update roidb for next training
        train_roidb = replace_roidb(roidb, BBoxes, YClass, ss_candidate_idx)

        # control the proportion
        if alamount + len(al_candidate_idx) >= al_proportion_checkpoint[0]:
            al_candidate_idx = al_candidate_idx[:int(
                al_proportion_checkpoint[0] - alamount)]
            tmp = al_proportion_checkpoint.pop(0)
            al_proportion_checkpoint.append(al_proportion_checkpoint[-1])
        if ssamount + len(ss_candidate_idx) >= ss_proportion_checkpoint[0]:
            ss_candidate_idx = ss_candidate_idx[:int(
                ss_proportion_checkpoint[0] - ssamount)]
            tmp = ss_proportion_checkpoint.pop(0)
            ss_proportion_checkpoint.append(ss_proportion_checkpoint[-1])

        # record ss and al factor

        alamount += len(al_candidate_idx)
        ssamount += len(ss_candidate_idx)

        logger.info('alfactor:{},ssfactor:{}'.format(alamount / total_num,
                                                     ssamount / total_num))

        #       for idx in al_candidate_idx:
        #            bitmapRoidb.set(idx)
        next_train_idx = bitmapRoidb.nonzero()
        next_train_idx.extend(ss_candidate_idx)

        train_roidb = blur_image(train_roidb, ss_candidate_idx)
        # the next training roidb
        train_roidb = [train_roidb[i] for i in next_train_idx]
        # flipped the roidb
        train_roidb = flipped_roidb_for_training(train_roidb)
        # the next training iters
        next_iters = 30000
        sum_iters += next_iters
        checkpoints = detectron.utils.train.train_model(
            sum_iters, train_roidb, checkpoints[(sum_iters - next_iters - 1)])
Example #10
0
def main():
    workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
    args = parse_args()
    logger.info('Called with args:')
    logger.info(args)
    if args.cfg_file is not None:
        merge_cfg_from_file(args.cfg_file)
    if args.opts is not None:
        merge_cfg_from_list(args.opts)
    cfg.NUM_GPUS = 1
    assert_and_infer_cfg()
    logger.info('Converting model with config:')
    logger.info(pprint.pformat(cfg))

    # script will stop when it can't find an operator rather
    # than stopping based on these flags
    #
    # assert not cfg.MODEL.KEYPOINTS_ON, "Keypoint model not supported."
    # assert not cfg.MODEL.MASK_ON, "Mask model not supported."
    # assert not cfg.FPN.FPN_ON, "FPN not supported."
    # assert not cfg.RETINANET.RETINANET_ON, "RetinaNet model not supported."

    # load model from cfg
    model, blobs = load_model(args)

    net = core.Net('')
    net.Proto().op.extend(copy.deepcopy(model.net.Proto().op))
    net.Proto().external_input.extend(
        copy.deepcopy(model.net.Proto().external_input))
    net.Proto().external_output.extend(
        copy.deepcopy(model.net.Proto().external_output))
    net.Proto().type = args.net_execution_type
    net.Proto().num_workers = 1 if args.net_execution_type == 'simple' else 4

    # Reset the device_option, change to unscope name and replace python operators
    convert_net(args, net.Proto(), blobs)

    # add operators for bbox
    add_bbox_ops(args, net, blobs)

    if args.fuse_af:
        print('Fusing affine channel...')
        net, blobs = mutils.fuse_net_affine(net, blobs)

    if args.use_nnpack:
        mutils.update_mobile_engines(net.Proto())

    # generate init net
    empty_blobs = ['data', 'im_info']
    init_net = gen_init_net(net, blobs, empty_blobs)

    if args.device == 'gpu':
        [net, init_net] = convert_model_gpu(args, net, init_net)

    net.Proto().name = args.net_name
    init_net.Proto().name = args.net_name + "_init"

    if args.test_img is not None:
        verify_model(args, [net, init_net], args.test_img)

    _save_models(net, init_net, args)
Example #11
0
    def test_hsm_search(self):
        samples = 10
        dim_in = 5
        X = np.random.rand(samples, dim_in).astype(np.float32) - 0.5
        w = np.random.rand(hierarchy_proto.size, dim_in) \
            .astype(np.float32) - 0.5
        b = np.random.rand(hierarchy_proto.size).astype(np.float32) - 0.5
        labels = np.array([np.random.randint(0, 8) for i in range(samples)]) \
            .astype(np.int32)

        workspace.GlobalInit(['caffe2'])
        workspace.FeedBlob("data", X)
        workspace.FeedBlob("weights", w)
        workspace.FeedBlob("bias", b)
        workspace.FeedBlob("labels", labels)
        op = core.CreateOperator('HSoftmaxSearch', ['data', 'weights', 'bias'],
                                 ['names', 'scores'],
                                 'HSoftmaxSearch',
                                 arg=args_search)
        workspace.RunOperatorOnce(op)
        names = workspace.FetchBlob('names')
        scores = workspace.FetchBlob('scores')

        def simulation_hsm_search():
            names = []
            scores = []
            for line in struct:
                s, e = line[0], line[0] + line[1]
                score = np.dot(X, w[s:e].transpose()) + b[s:e]
                score = np.exp(score - np.max(score, axis=1, keepdims=True))
                score /= score.sum(axis=1, keepdims=True)
                score = -np.log(score)

                score = score.transpose()
                idx = -1
                for j, n in enumerate(names):
                    if n == line[3]:
                        idx = j
                        score += scores[j]
                if idx == -1:
                    score[score > beam] = np.inf
                else:
                    score[score - scores[idx] > beam] = np.inf

                for i, name in enumerate(line[2]):
                    scores.append(score[i])
                    names.append(name)
            scores = np.vstack(scores)
            return names, scores.transpose()

        p_names, p_scores = simulation_hsm_search()
        idx = np.argsort(p_scores, axis=1)
        p_scores = np.sort(p_scores, axis=1)
        p_names = np.array(p_names)[idx]
        for i in range(names.shape[0]):
            for j in range(names.shape[1]):
                if names[i][j]:
                    self.assertEquals(names[i][j],
                                      p_names[i][j].item().encode('utf-8'))
                    self.assertAlmostEqual(scores[i][j],
                                           p_scores[i][j],
                                           delta=0.001)
        default=128,
        help="Max sequence length"
    )
    parser.add_argument(
        "--iters_to_report",
        type=int,
        default=20,
        help="Number of iterations to report progress"
    )
    parser.add_argument(
        "--implementation",
        type=str,
        default="sinusoid",
        help="'table' or 'sinusoid'",
    )
    return parser


if __name__ == '__main__':
    args, extra_args = GetArgumentParser().parse_known_args()

    workspace.GlobalInit([
        'caffe2',
        '--caffe2_log_level=0',
        '--caffe2_print_blob_sizes_at_exit=0'] + extra_args)

    device = core.DeviceOption(caffe2_pb2.CPU)

    with core.DeviceScope(device):
        Benchmark(args)
Example #13
0
class TestRNNExecutor(unittest.TestCase):
    def setUp(self):
        self.batch_size = 8
        self.input_dim = 20
        self.hidden_dim = 30
        self.encoder_dim = 40

    @given(T=st.integers(10, 100), forward_only=st.booleans(), **hu.gcs)
    def test_lstm_with_attention_equal_simplenet(self, T, forward_only, gc,
                                                 dc):
        self.Tseq = [T, T // 2, T // 2 + T // 4, T, T // 2 + 1]
        workspace.ResetWorkspace()
        with core.DeviceScope(gc):
            print("Run with device: {}, forward only: {}".format(
                gc, forward_only))

            workspace.FeedBlob("seq_lengths",
                               np.array([T] * self.batch_size, dtype=np.int32))
            workspace.FeedBlob(
                "target",
                np.random.rand(T, self.batch_size,
                               self.hidden_dim).astype(np.float32))
            workspace.FeedBlob(
                "hidden_init",
                np.zeros([1, self.batch_size, self.hidden_dim],
                         dtype=np.float32))
            workspace.FeedBlob(
                "cell_init",
                np.zeros([1, self.batch_size, self.hidden_dim],
                         dtype=np.float32))

            model = model_helper.ModelHelper(name="lstm")
            model.net.AddExternalInputs(["input"])

            init_blobs = []
            hidden_init, cell_init, encoder_outputs = model.net.AddExternalInputs(
                "hidden_init", "cell_init", "encoder_outputs")

            awec_init = model.net.AddExternalInputs([
                'initial_attention_weighted_encoder_context',
            ])
            init_blobs.extend([hidden_init, cell_init])

            workspace.FeedBlob(
                awec_init,
                np.random.rand(1, self.batch_size,
                               self.encoder_dim).astype(np.float32),
            )
            workspace.FeedBlob(
                encoder_outputs,
                np.random.rand(1, self.batch_size,
                               self.encoder_dim).astype(np.float32),
            )

            outputs = rnn_cell.LSTMWithAttention(
                model=model,
                decoder_inputs="input",
                decoder_input_lengths="seq_lengths",
                initial_decoder_hidden_state=hidden_init,
                initial_decoder_cell_state=cell_init,
                initial_attention_weighted_encoder_context=awec_init,
                encoder_output_dim=self.encoder_dim,
                encoder_outputs=encoder_outputs,
                encoder_lengths=None,
                decoder_input_dim=self.input_dim,
                decoder_state_dim=self.hidden_dim,
                scope="",
                attention_type=AttentionType.Recurrent,
                forward_only=forward_only,
                outputs_with_grads=[0],
            )
            output = outputs[0]

            print(outputs)
            loss = model.AveragedLoss(
                model.SquaredL2Distance([output, "target"], "dist"), "loss")
            # Add gradient ops
            if not forward_only:
                model.AddGradientOperators([loss])

            # init
            for init_blob in init_blobs:
                workspace.FeedBlob(
                    init_blob,
                    np.zeros([1, self.batch_size, self.hidden_dim],
                             dtype=np.float32))

            self._compare(model, forward_only)

    @given(num_layers=st.integers(1, 8),
           T=st.integers(4, 100),
           forward_only=st.booleans(),
           **hu.gcs)
    def test_lstm_equal_simplenet(self, num_layers, T, forward_only, gc, dc):
        '''
        Test that the RNN executor produces same results as
        the non-executor (i.e running step nets as sequence of simple nets).
        '''
        self.Tseq = [T, T // 2, T // 2 + T // 4, T, T // 2 + 1]

        workspace.ResetWorkspace()
        with core.DeviceScope(gc):
            print("Run with device: {}, forward only: {}".format(
                gc, forward_only))

            workspace.FeedBlob("seq_lengths",
                               np.array([T] * self.batch_size, dtype=np.int32))
            workspace.FeedBlob(
                "target",
                np.random.rand(T, self.batch_size,
                               self.hidden_dim).astype(np.float32))
            workspace.FeedBlob(
                "hidden_init",
                np.zeros([1, self.batch_size, self.hidden_dim],
                         dtype=np.float32))
            workspace.FeedBlob(
                "cell_init",
                np.zeros([1, self.batch_size, self.hidden_dim],
                         dtype=np.float32))

            model = model_helper.ModelHelper(name="lstm")
            model.net.AddExternalInputs(["input"])

            init_blobs = []
            for i in range(num_layers):
                hidden_init, cell_init = model.net.AddExternalInputs(
                    "hidden_init_{}".format(i), "cell_init_{}".format(i))
                init_blobs.extend([hidden_init, cell_init])

            output, last_hidden, _, last_state = rnn_cell.LSTM(
                model=model,
                input_blob="input",
                seq_lengths="seq_lengths",
                initial_states=init_blobs,
                dim_in=self.input_dim,
                dim_out=[self.hidden_dim] * num_layers,
                scope="",
                drop_states=True,
                forward_only=forward_only,
                return_last_layer_only=True,
            )

            loss = model.AveragedLoss(
                model.SquaredL2Distance([output, "target"], "dist"), "loss")
            # Add gradient ops
            if not forward_only:
                model.AddGradientOperators([loss])

            # init
            for init_blob in init_blobs:
                workspace.FeedBlob(
                    init_blob,
                    np.zeros([1, self.batch_size, self.hidden_dim],
                             dtype=np.float32))

            self._compare(model, forward_only)

    def _compare(self, model, forward_only):
        # Store list of blobs that exist in the beginning
        workspace.RunNetOnce(model.param_init_net)
        init_ws = {k: workspace.FetchBlob(k) for k in workspace.Blobs()}

        # Run with executor
        for enable_executor in [0, 1]:
            self.enable_rnn_executor(model.net, enable_executor, forward_only)
            workspace.ResetWorkspace()

            # Reset original state
            for k, v in init_ws.items():
                workspace.FeedBlob(k, v)

            np.random.seed(10022015)
            ws = {}
            for j in range(len(self.Tseq)):
                input_shape = [self.Tseq[j], self.batch_size, self.input_dim]
                workspace.FeedBlob(
                    "input",
                    np.random.rand(*input_shape).astype(np.float32))
                workspace.FeedBlob(
                    "target",
                    np.random.rand(self.Tseq[j], self.batch_size,
                                   self.hidden_dim).astype(np.float32))
                if j == 0:
                    workspace.CreateNet(model.net, overwrite=True)

                workspace.RunNet(model.net.Proto().name)

                # Store results for each iteration
                for k in workspace.Blobs():
                    ws[k + "." + str(j)] = workspace.FetchBlob(k)

            if enable_executor:
                rnn_exec_ws = ws
            else:
                non_exec_ws = ws

        # Test that all blobs are equal after running with executor
        # or without.
        self.assertEqual(list(non_exec_ws.keys()), list(rnn_exec_ws.keys()))

        mismatch = False
        for k in rnn_exec_ws.keys():
            non_exec_v = non_exec_ws[k]
            rnn_exec_v = rnn_exec_ws[k]
            if type(non_exec_v) is np.ndarray:
                if not np.array_equal(non_exec_v, rnn_exec_v):
                    print("Mismatch: {}".format(k))
                    nv = non_exec_v.flatten()
                    rv = rnn_exec_v.flatten()
                    c = 0
                    for j in range(len(nv)):
                        if rv[j] != nv[j]:
                            print(j, rv[j], nv[j])
                            c += 1
                            if c == 10:
                                break

                    mismatch = True

        self.assertFalse(mismatch)

    def enable_rnn_executor(self, net, value, forward_only):
        num_found = 0
        for op in net.Proto().op:
            if op.type.startswith("RecurrentNetwork"):
                for arg in op.arg:
                    if arg.name == 'enable_rnn_executor':
                        arg.i = value
                        num_found += 1
        # This sanity check is so that if someone changes the
        # enable_rnn_executor parameter name, the test will
        # start failing as this function will become defective.
        self.assertEqual(1 if forward_only else 2, num_found)

    if __name__ == "__main__":
        import unittest
        import random
        random.seed(2603)
        workspace.GlobalInit(
            ['caffe2', '--caffe2_log_level=0', '--caffe2_rnn_executor=1'])
        unittest.main()
Example #14
0
def main():
    parser = argparse.ArgumentParser(
        description="Run microbenchmarks.",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )

    parser.add_argument(
        '--tag_filter',
        help='tag_filter can be used to run the benchmarks which matches the tag',
        default='short')

    # This option is used to filter test cases to run.
    parser.add_argument(
        '--operators',
        help='Filter tests based on comma-delimited list of operators to test',
        default=None)

    parser.add_argument(
        '--test_name',
        help='Run tests that have the provided test_name',
        default=None)

    parser.add_argument(
        '--list_ops',
        help='List operators without running them',
        action='store_true')

    parser.add_argument(
        '--list_tests',
        help='List all test cases without running them',
        action='store_true')

    parser.add_argument(
        "--iterations",
        help="Repeat each operator for the number of iterations",
        type=int
    )

    parser.add_argument(
        "--num_runs",
        help="Run each test for num_runs. Each run executes an operator for number of <--iterations>",
        type=int,
        default=1,
    )

    parser.add_argument(
        "--min_time_per_test",
        help="Set the minimum time (unit: seconds) to run each test",
        type=int,
        default=0,
    )

    parser.add_argument(
        "--warmup_iterations",
        help="Number of iterations to ignore before measuring performance",
        default=100,
        type=int
    )

    parser.add_argument(
        "--omp_num_threads",
        help="Number of OpenMP threads used in PyTorch/Caffe2 runtime",
        default=None,
        type=int
    )

    parser.add_argument(
        "--mkl_num_threads",
        help="Number of MKL threads used in PyTorch/Caffe2 runtime",
        default=None,
        type=int
    )

    parser.add_argument(
        "--ai_pep_format",
        help="Print result when running on AI-PEP",
        default=False,
        type=bool
    )

    parser.add_argument(
        "--use_jit",
        help="Run operators with PyTorch JIT mode",
        action='store_true'
    )

    parser.add_argument(
        "--forward_only",
        help="Only run the forward path of operators",
        action='store_true'
    )

    parser.add_argument(
        '--framework',
        help='Comma-delimited list of frameworks to test (Caffe2, PyTorch)',
        default="Caffe2,PyTorch")

    parser.add_argument(
        '--wipe_cache',
        help='Wipe cache before benchmarking each operator',
        action='store_true',
        default=False
    )

    args, _ = parser.parse_known_args()

    if benchmark_utils.is_caffe2_enabled(args.framework):
        workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
        workspace.ClearGlobalNetObserver()
    if args.omp_num_threads:
        # benchmark_utils.set_omp_threads sets the env variable OMP_NUM_THREADS
        # which doesn't have any impact as C2 init logic has already been called
        # before setting the env var.

        # In general, OMP_NUM_THREADS (and other OMP env variables) needs to be set
        # before the program is started.
        # From Chapter 4 in OMP standard: https://www.openmp.org/wp-content/uploads/openmp-4.5.pdf
        # "Modifications to the environment variables after the program has started,
        # even if modified by the program itself, are ignored by the OpenMP implementation"
        benchmark_utils.set_omp_threads(args.omp_num_threads)
        if benchmark_utils.is_pytorch_enabled(args.framework):
            torch.set_num_threads(args.omp_num_threads)
    if args.mkl_num_threads:
        benchmark_utils.set_mkl_threads(args.mkl_num_threads)

    benchmark_core.BenchmarkRunner(args).run()
from __future__ import absolute_import, division, print_function, unicode_literals

import collections

import caffe2.python.hypothesis_test_util as hu
import hypothesis.strategies as st
import numpy as np
from caffe2.python import core, dyndep, utils, workspace
from caffe2.quantization.server import utils as dnnlowp_utils
from dnnlowp_test_utils import check_quantized_results_close, run_conv_or_fc
from hypothesis import assume, given

dyndep.InitOpsLibrary("//caffe2/caffe2/quantization/server:dnnlowp_ops")
workspace.GlobalInit([
    "caffe2",
    "--caffe2_omp_num_threads=11",
    # Increase this threshold to test acc16 with randomly generated data
    "--caffe2_dnnlowp_acc16_density_threshold=0.5",
])


class DNNLowPOpConvAcc16OpTest(hu.HypothesisTestCase):
    # correctness test with no quantization error in inputs
    @given(stride=st.integers(1, 2),
           pad=st.integers(0, 2),
           kernel=st.integers(1, 5),
           dilation=st.integers(1, 2),
           size=st.integers(10, 16),
           group=st.integers(1, 4),
           input_channels_per_group=st.sampled_from([2, 3, 4, 5, 8, 16, 32]),
           output_channels_per_group=st.integers(2, 16),
           batch_size=st.integers(0, 3),
Example #16
0
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
from caffe2.proto import caffe2_pb2

from caffe2.python import core, workspace, dyndep, test_util

dyndep.InitOpsLibrary('@/caffe2/caffe2/contrib/warpctc:ctc_ops')
workspace.GlobalInit(["python"])


def softmax(w):
    maxes = np.amax(w, axis=-1, keepdims=True)
    e = np.exp(w - maxes)
    dist = e / np.sum(e, axis=-1, keepdims=True)
    return dist


class CTCOpsTest(test_util.TestCase):
    def verify_cost(self, device_option):
        alphabet_size = 5
        N = 1
        T = 2
Example #17
0
import datetime
import numpy as np
from hypothesis import given, settings, example
from hypothesis import strategies as st
from caffe2.python import core, workspace
from caffe2.python.onnx.onnxifi import onnxifi_caffe2_net
from caffe2.python.fakelowp.test_utils import print_test_debug_info
import caffe2.python.serialized_test.serialized_test_util as serial

# Test that parallel chunks behave the same way as the serial one

workspace.GlobalInit([
    "caffe2",
    "--glow_global_fp16=1",
    "--glow_global_fused_scale_offset_fp16=1",
    "--glow_global_force_sls_fp16_accum=1",
    "--glow_nnpi_num_parallel_chunks=2",
    "--glow_use_dag_optimizer=false",
    "--glow_dump_graph=true",
])


class Fusions(serial.SerializedTestCase):
    def _get_scale_zp(self, tensor):
        tensor_max = np.max(tensor)
        tensor_min = min(0, np.min(tensor))
        scale = np.float32(np.float16((tensor_max - tensor_min) / 255.0))
        if scale < 1e-6:
            scale = np.float32(1e-6)
        zero_point = 0 - tensor_min / scale
        zero_point = int(round(np.clip(zero_point, 0, 255.0)))
Example #18
0
#!/usr/bin/env python2
"""Create a network that perfoms some mathematical operations.
Run inference on this network."""

from caffe2.python import workspace, model_helper
import numpy as np

# Initialize Caffe2
workspace.GlobalInit([
    "caffe2",
])

# Initialize a model with the name "Math model"
model = model_helper.ModelHelper("Math model")

# Add a matrix multiplication operator to the model.
# This operator takes blobs "A" and "B" as inputs and produces blob "C" as output.
model.net.MatMul(["A", "B"], "C")

# Add a Sigmoid operator to the model.
# This operator takes blob "C" as input and produces blob "D" as output.
model.net.Sigmoid("C", "D")

# Add a Softmax operator to the model.
# This operator takes blob "D" as input and produces blob "E" as output.
model.net.Softmax("D", "E", axis=0)

# Create input A, a 3x3 matrix initialized with some values
A = np.linspace(-0.4, 0.4, num=9, dtype=np.float32).reshape(3, 3)

# Create input B, a 3x1 matrix initialized with some values
Example #19
0
        help="If set, blindly prefer the given engine(s) for every op.")
    parser.add_argument("--dump_model",
                        action='store_true',
                        help="If True, dump the model prototxts to disk.")
    parser.add_argument("--net_type", type=str, default="simple")
    parser.add_argument("--num_workers", type=int, default=2)
    parser.add_argument("--use-nvtx", default=False, action='store_true')
    parser.add_argument("--htrace_span_log_path", type=str)
    return parser


if __name__ == '__main__':
    args = GetArgumentParser().parse_args()
    if (not args.batch_size or not args.model or not args.order):
        GetArgumentParser().print_help()
    else:
        workspace.GlobalInit(
            ['caffe2', '--caffe2_log_level=0'] +
            (['--caffe2_use_nvtx'] if args.use_nvtx else []) +
            (['--caffe2_htrace_span_log_path=' +
              args.htrace_span_log_path] if args.htrace_span_log_path else []))

        model_map = {
            'AlexNet': AlexNet,
            'OverFeat': OverFeat,
            'VGGA': VGGA,
            'Inception': Inception,
            'MLP': MLP,
        }
        Benchmark(model_map[args.model], args)
Example #20
0
    def test_small_sls_acc32(self, seed):
        workspace.GlobalInit([
            "caffe2",
            "--glow_global_fp16=0",
            "--glow_global_fused_scale_offset_fp16=0",
            "--glow_global_force_sls_fp16_accum=0",
        ])
        np.random.seed(seed)
        workspace.ResetWorkspace()

        n = 2
        DIM = 3
        data = 4 * (np.random.random_sample((n, DIM)) + 1).astype(np.float32)

        lengths = np.array([n], dtype=np.int32)
        indices = np.array(range(n), dtype=np.int64)
        weights = np.random.uniform(low=0.01, high=0.5,
                                    size=[n]).astype(np.float32)

        pred_net = caffe2_pb2.NetDef()
        pred_net.name = "pred"
        pred_net.external_input.extend(
            ["quantized_data", "weights", "indices", "lengths"])
        pred_net.external_output.append("Y")
        pred_net.op.add().CopyFrom(
            core.CreateOperator(
                "SparseLengthsWeightedSumFused8BitRowwise",
                ["quantized_data", "weights", "indices", "lengths"],
                ["Y"],
            ))

        ref_net = caffe2_pb2.NetDef()
        ref_net.name = "ref"
        ref_net.external_input.extend(
            ["quantized_data", "weights", "indices", "lengths"])
        ref_net.external_output.append("Y")
        ref_net.op.add().CopyFrom(
            core.CreateOperator(
                "SparseLengthsWeightedSumFused8BitRowwiseFakeFP32NNPI",
                ["quantized_data", "weights", "indices", "lengths"],
                ["Y"],
            ))

        workspace.FeedBlob("data", data)
        workspace.RunOperatorOnce(
            core.CreateOperator("FloatToFused8BitRowwiseQuantized", ["data"],
                                ["quantized_data"]))

        quantized_data = workspace.FetchBlob("quantized_data")

        onnxified_net = onnxifi_caffe2_net(
            pred_net,
            {},
            max_batch_size=1,
            max_seq_size=n,
            debug=True,
            adjust_batch=True,
            use_onnx=False,
        )
        num_onnxified_ops = sum(1 if o.type == "Onnxifi" else 0
                                for o in onnxified_net.op)
        np.testing.assert_equal(num_onnxified_ops, 1)

        workspace.FeedBlob("indices", indices)
        workspace.FeedBlob("lengths", lengths)
        workspace.FeedBlob("weights", weights)

        workspace.CreateNet(onnxified_net)
        workspace.CreateNet(ref_net)

        workspace.RunNet(onnxified_net.name)
        Y_glow = workspace.FetchBlob("Y")

        workspace.RunNet(ref_net.name)
        Y_ref = workspace.FetchBlob("Y")

        diff = np.abs((Y_ref - Y_glow) / (Y_ref + 1e-8))
        max_err = np.max(diff, axis=1)
        num_offenders = (max_err > 0).sum()
        if num_offenders > 0:
            np.set_printoptions(precision=12)
            print(
                "ref",
                Y_ref.astype(np.float16).astype(np.float32),
                "glow",
                Y_glow.astype(np.float16).astype(np.float32),
            )
            print_test_debug_info(
                "test_small_sls_acc32",
                {
                    "seed": seed,
                    "num_rows": num_rows,
                    "embedding_dim": embedding_dim,
                    "batch_size": batch_size,
                    "indices": indices,
                    "data": data,
                    "quantized_data": quantized_data,
                    "lengths": lengths,
                    "weights": weights,
                    "Y_glow": Y_glow,
                    "Y_ref": Y_ref,
                    "diff": diff,
                    "rowwise_diff": np.max(diff, axis=1),
                },
            )
            assert 0
Example #21
0
                    results.append((copy(args), float(t_own), float(t_cudnn)))
                    print(args)
                    print("t_cudnn / t_own: {}".format(t_cudnn / t_own))

    for args, t_own, t_cudnn in results:
        print("{}: cudnn time: {}, own time: {}, ratio: {}".format(
            str(args), t_cudnn, t_own, t_cudnn / t_own))

    ratio_sum = 0
    for args, t_own, t_cudnn in results:
        ratio = float(t_cudnn) / t_own
        ratio_sum += ratio
        print(
            "hidden_dim: {}, seq_lengths: {}, batch_size: {}, num_layers: {}:"
            " cudnn time: {}, own time: {}, ratio: {}".format(
                args.hidden_dim, args.seq_length, args.batch_size,
                args.num_layers, t_cudnn, t_own, ratio))

    print("Ratio average: {}".format(ratio_sum / len(results)))


if __name__ == '__main__':
    args = lstm_benchmark.GetArgumentParser().parse_args()

    workspace.GlobalInit([
        'caffe2', '--caffe2_log_level=0',
        '--caffe2_print_blob_sizes_at_exit=0', '--caffe2_gpu_memory_tracking=1'
    ])

    Compare(args)
Example #22
0
    def test_slws_fused_8bit_rowwise_acc32_nnpi(self, seed, num_rows,
                                                embedding_dim, batch_size,
                                                max_weight):
        workspace.GlobalInit([
            "caffe2",
            "--glow_global_fp16=0",
            "--glow_global_fused_scale_offset_fp16=0",
            "--glow_global_force_sls_fp16_accum=0",
        ])

        workspace.ResetWorkspace()
        np.random.seed(seed)
        data = np.random.rand(num_rows, embedding_dim).astype(np.float32)
        lengths = np.random.choice(np.arange(1, num_rows),
                                   batch_size).astype(np.int32)

        indices = []
        for length in lengths:
            indices.extend(np.random.choice(np.arange(1, num_rows), length))
        indices = np.asarray(indices).astype(np.int64)

        weights = np.random.uniform(low=0,
                                    high=max_weight,
                                    size=[len(indices)]).astype(np.float32)

        pred_net = caffe2_pb2.NetDef()
        pred_net.name = "pred"
        pred_net.external_input.extend(
            ["quantized_data", "weights", "indices", "lengths"])
        pred_net.external_output.append("Y")
        pred_net.op.add().CopyFrom(
            core.CreateOperator(
                "SparseLengthsWeightedSumFused8BitRowwise",
                ["quantized_data", "weights", "indices", "lengths"],
                ["Y"],
            ))

        ref_net = caffe2_pb2.NetDef()
        ref_net.name = "ref"
        ref_net.external_input.extend(
            ["quantized_data", "weights", "indices", "lengths"])
        ref_net.external_output.append("Y")
        ref_net.op.add().CopyFrom(
            core.CreateOperator(
                "SparseLengthsWeightedSumFused8BitRowwiseFakeFP32NNPI",
                ["quantized_data", "weights", "indices", "lengths"],
                ["Y"],
            ))

        workspace.FeedBlob("data", data)
        workspace.RunOperatorOnce(
            core.CreateOperator("FloatToFused8BitRowwiseQuantized", ["data"],
                                ["quantized_data"]))
        onnxified_net = onnxifi_caffe2_net(
            pred_net,
            {},
            max_batch_size=batch_size,
            max_seq_size=np.max(lengths),
            debug=True,
            adjust_batch=True,
            use_onnx=False,
        )
        num_onnxified_ops = sum(1 if o.type == "Onnxifi" else 0
                                for o in onnxified_net.op)
        np.testing.assert_equal(num_onnxified_ops, 1)

        workspace.FeedBlob("indices", indices)
        workspace.FeedBlob("lengths", lengths)
        workspace.FeedBlob("weights", weights)

        workspace.CreateNet(onnxified_net)
        workspace.CreateNet(ref_net)

        workspace.RunNet(onnxified_net.name)
        Y_glow = workspace.FetchBlob("Y")

        workspace.RunNet(ref_net.name)
        Y_ref = workspace.FetchBlob("Y")

        diff = np.abs((Y_ref - Y_glow) / (Y_ref + 1e-8))
        max_err = np.max(diff, axis=1)
        num_offenders = (max_err > 0).sum()
        if num_offenders > 0:
            print_test_debug_info(
                "test_slws_fused_8bit_rowwise_acc32_nnpi",
                {
                    "seed": seed,
                    "num_rows": num_rows,
                    "embedding_dim": embedding_dim,
                    "batch_size": batch_size,
                    "indices": indices,
                    "data": data.shape,
                    "lengths": lengths,
                    "weights": weights,
                    "Y_glow": Y_glow,
                    "Y_ref": Y_ref,
                    "diff": diff,
                    "rowwise_diff": np.max(diff, axis=1),
                },
            )
            assert 0
Example #23
0
def main():
    parser = argparse.ArgumentParser(
        description="Run microbenchmarks.",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )

    parser.add_argument(
        '--tag_filter',
        help=
        'tag_filter can be used to run the benchmarks which matches the tag',
        default='short')

    # This option is used to filter test cases to run.
    parser.add_argument(
        '--operator',
        help='Run the test cases that contain the provided operator'
        ' as a substring of their names',
        default=None)

    parser.add_argument('--test_name',
                        help='Run tests that have the provided test_name',
                        default=None)

    parser.add_argument('--list_ops',
                        help='List operators without running them',
                        action='store_true')

    parser.add_argument('--list_tests',
                        help='List all test cases without running them',
                        action='store_true')

    parser.add_argument(
        "--iterations",
        help="Repeat each operator for the number of iterations",
        type=int)

    parser.add_argument(
        "--num_runs",
        help=
        "Run each test for num_runs. Each run executes an operator for number of <--iterations>",
        type=int,
        default=1,
    )

    parser.add_argument(
        "--min_time_per_test",
        help="Set the minimum time (unit: seconds) to run each test",
        type=int,
        default=0,
    )

    parser.add_argument(
        "--warmup_iterations",
        help="Number of iterations to ignore before measuring performance",
        default=10,
        type=int)

    parser.add_argument(
        "--omp_num_threads",
        help="Number of OpenMP threads used in PyTorch/Caffe2 runtime",
        default=None,
        type=int)

    parser.add_argument(
        "--mkl_num_threads",
        help="Number of MKL threads used in PyTorch/Caffe2 runtime",
        default=None,
        type=int)

    parser.add_argument("--ai_pep_format",
                        help="Print result when running on AI-PEP",
                        default=False,
                        type=bool)

    parser.add_argument("--use_jit",
                        help="Run operators with PyTorch JIT mode",
                        action='store_true')

    parser.add_argument("--forward_only",
                        help="Only run the forward path of operators",
                        action='store_true')

    parser.add_argument(
        '--framework',
        help='Comma-delimited list of frameworks to test (Caffe2, PyTorch)',
        default="Caffe2,PyTorch")

    args = parser.parse_args()

    if benchmark_utils.is_caffe2_enabled(args.framework):
        workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
        workspace.ClearGlobalNetObserver()
    if args.omp_num_threads:
        benchmark_utils.set_omp_threads(args.omp_num_threads)
    if args.mkl_num_threads:
        benchmark_utils.set_mkl_threads(args.mkl_num_threads)

    benchmark_core.BenchmarkRunner(args).run()
Example #24
0
    def test_sparse_lengths_sum(self, num_rows, blocksize, weighted, seed,
                                empty_indices, engine, bit_rate):
        net = core.Net("bench")

        np.random.seed(seed)

        input_data = np.random.rand(num_rows, blocksize).astype(np.float32)
        if empty_indices:
            lengths = np.zeros(num_rows, dtype=np.int32)
            num_indices = 0
        else:
            num_indices = np.random.randint(len(input_data))
            # the number of indices per sample
            lengths_split = np.clip(num_indices // 2, 1, 10)
            lengths = (
                np.ones([num_indices // lengths_split], dtype=np.int32) *
                lengths_split)
            # readjust num_indices when lengths_split doesn't divide num_indices
            num_indices = num_indices // lengths_split * lengths_split
        indices = np.random.randint(low=0,
                                    high=len(input_data),
                                    size=[num_indices],
                                    dtype=np.int64)
        weights = np.random.uniform(size=[len(indices)]).astype(np.float32)

        op = core.CreateOperator(
            "FloatToFused" + str(bit_rate) + "BitRowwiseQuantized",
            "input_data",
            "quantized_data",
            engine=engine,
        )
        net.Proto().op.extend([op])
        op = core.CreateOperator(
            "Fused" + str(bit_rate) + "BitRowwiseQuantizedToFloat",
            "quantized_data",
            "dequantized_data",
        )
        net.Proto().op.extend([op])
        op = core.CreateOperator(
            "FloatToFused" + str(bit_rate) + "BitFakeRowwiseQuantized",
            "input_data",
            "fake_quantized_data",
            engine=engine,
        )
        net.Proto().op.extend([op])

        if weighted:
            net.SparseLengthsWeightedSum(
                ["dequantized_data", "weights", "indices", "lengths"],
                "sum_reference")
            net.SparseLengthsWeightedSumFused8BitRowwise(
                ["fake_quantized_data", "weights", "indices", "lengths"],
                "sum_fake_quantized",
            )
            op = core.CreateOperator(
                "SparseLengthsWeightedSumFused" + str(bit_rate) + "BitRowwise",
                ["quantized_data", "weights", "indices", "lengths"],
                "sum_quantized",
            )
            net.Proto().op.extend([op])
        else:
            net.SparseLengthsSum(["dequantized_data", "indices", "lengths"],
                                 "sum_reference")
            net.SparseLengthsSumFused8BitRowwise(
                ["fake_quantized_data", "indices", "lengths"],
                "sum_fake_quantized")
            op = core.CreateOperator(
                "SparseLengthsSumFused" + str(bit_rate) + "BitRowwise",
                ["quantized_data", "indices", "lengths"],
                "sum_quantized",
            )
            net.Proto().op.extend([op])
        net.Proto().external_input.extend(["input_data"])

        workspace.FeedBlob("input_data", input_data)
        workspace.FeedBlob("weights", weights)
        workspace.FeedBlob("indices", indices)
        workspace.FeedBlob("lengths", lengths)

        workspace.GlobalInit(["caffe2", "--caffe2_log_level=0"])
        workspace.RunNetOnce(net)

        sum_reference = workspace.FetchBlob("sum_reference")
        sum_fake_quantized = workspace.FetchBlob("sum_fake_quantized")
        sum_quantized = workspace.FetchBlob("sum_quantized")

        np.testing.assert_array_almost_equal(sum_reference, sum_quantized)
        np.testing.assert_array_equal(sum_fake_quantized, sum_quantized)
        B = np.random.randn(2, 3, 5).astype(np.float32)
        self._run_test(A, B, check_grad=True)

    def test_large_forward(self):
        A = np.random.randn(2, 256, 42, 100).astype(np.float32)
        B = np.random.randn(2, 256, 35, 87).astype(np.float32)
        self._run_test(A, B)

        A = np.random.randn(2, 256, 42, 87).astype(np.float32)
        B = np.random.randn(2, 256, 35, 87).astype(np.float32)
        self._run_test(A, B)

    def test_size_exceptions(self):
        A = np.random.randn(2, 256, 42, 86).astype(np.float32)
        B = np.random.randn(2, 256, 35, 87).astype(np.float32)
        with self.assertRaises(RuntimeError):
            self._run_test(A, B)

        A = np.random.randn(2, 255, 42, 88).astype(np.float32)
        B = np.random.randn(2, 256, 35, 87).astype(np.float32)
        with self.assertRaises(RuntimeError):
            self._run_test(A, B)


if __name__ == '__main__':
    workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
    utils.c2.import_detectron_ops()
    assert 'SpatialNarrowAs' in workspace.RegisteredOperators()
    utils.logging.setup_logging(__name__)
    unittest.main()
Example #26
0
    def test_sparse_lengths_mean_rowwise_sparse_with_skipped_pruning(
            self, num_rows, blocksize, seed, empty_indices, engine, bit_rate):
        net = core.Net("bench")

        np.random.seed(seed)

        input_data = np.random.rand(num_rows, blocksize).astype(np.float32)
        if empty_indices:
            lengths = np.zeros(num_rows, dtype=np.int32)
            num_indices = 0
        else:
            num_indices = np.random.randint(len(input_data))
            # the number of indices per sample
            lengths_split = np.clip(num_indices // 2, 1, 10)
            lengths = (
                np.ones([num_indices // lengths_split], dtype=np.int32) *
                lengths_split)
            # readjust num_indices when lengths_split doesn't divide num_indices
            num_indices = num_indices // lengths_split * lengths_split
        #  Use int32 here because int64 is covered by test_sparse_lengths_sum
        indices = np.random.randint(low=0,
                                    high=len(input_data),
                                    size=[num_indices],
                                    dtype=np.int32)

        op = core.CreateOperator(
            "FloatToFused" + str(bit_rate) + "BitRowwiseQuantized",
            "input_data",
            "quantized_data",
            engine=engine,
        )
        net.Proto().op.extend([op])
        op = core.CreateOperator(
            "Fused" + str(bit_rate) + "BitRowwiseQuantizedToFloat",
            "quantized_data",
            "dequantized_data",
        )
        net.Proto().op.extend([op])
        op = core.CreateOperator(
            "FloatToFused" + str(bit_rate) + "BitFakeRowwiseQuantized",
            "input_data",
            "fake_quantized_data",
            engine=engine,
        )
        net.Proto().op.extend([op])

        net.SparseLengthsMean(["dequantized_data", "indices", "lengths"],
                              "mean_reference")
        net.SparseLengthsMeanFused8BitRowwise(
            ["fake_quantized_data", "indices", "lengths"],
            "mean_fake_quantized")
        op1 = core.CreateOperator(
            "SparseLengthsMeanFused" + str(bit_rate) + "BitRowwise",
            ["quantized_data", "indices", "lengths"],
            "mean_quantized",
        )
        op2 = core.CreateOperator(
            "SparseLengthsMean" + str(bit_rate) + "BitRowwiseSparse",
            ["quantized_data", "indices", "lengths"] + ["mapping_table"],
            "mean_quantized_pruned",
        )
        net.Proto().op.extend([op1, op2])
        net.Proto().external_input.extend(["input_data", "mapping_table"])

        workspace.FeedBlob("input_data", input_data)
        workspace.FeedBlob("indices", indices)
        workspace.FeedBlob("lengths", lengths)
        mapping_table = np.array([0]).astype(dtype=np.int32)
        workspace.FeedBlob("mapping_table", mapping_table)

        workspace.GlobalInit(["caffe2", "--caffe2_log_level=0"])
        workspace.RunNetOnce(net)

        mean_reference = workspace.FetchBlob("mean_reference")
        mean_fake_quantized = workspace.FetchBlob("mean_fake_quantized")
        mean_quantized = workspace.FetchBlob("mean_quantized")
        mean_quantized_pruned = workspace.FetchBlob("mean_quantized_pruned")

        np.testing.assert_array_almost_equal(mean_reference, mean_quantized)
        np.testing.assert_array_equal(mean_fake_quantized, mean_quantized)
        np.testing.assert_array_equal(mean_quantized_pruned, mean_quantized)
Example #27
0
                    default='',
                    help='empty or async_scheduling')
parser.add_argument('--async_threads',
                    type=int,
                    default=0,
                    help='async_thread_pool_size')
parser.add_argument('--batch_size', type=int, default=1, help='Batch Size')
parser.add_argument('--steps',
                    type=int,
                    default=10,
                    help='Number of steps to measure.')
args, _ = parser.parse_known_args()

workspace.ResetWorkspace()
workspace.GlobalInit([
    'caffe2', '--caffe2_log_level=2',
    '--caffe2_net_async_thread_pool_size=' + str(args.async_threads)
])

init_net = mynet.init_net
predict_net = mynet.predict_net
# you must name it something
predict_net.name = "predict"

from caffe2.python import net_drawer

g = net_drawer.GetPydotGraph(predict_net, rankdir="TB")
g.write_dot('test.dot')

if args.proto_type != '':
    predict_net.type = 'async_scheduling'
#predict_net.type = 'prof_dag'
Example #28
0
    )
    parser.add_argument("--use_pool1",
                        type=int,
                        default=0,
                        help="use pool1 layer")
    parser.add_argument("--use_local_file",
                        type=int,
                        default=0,
                        help="use local file")
    parser.add_argument("--crop_per_clip",
                        type=int,
                        default=1,
                        help="number of spatial crops per clip")

    args = parser.parse_args()
    log.info(args)

    assert model_builder.model_validation(
        args.model_name,
        args.model_depth,
        args.clip_length_of if args.input_type else args.clip_length_rgb,
        args.crop_size if not args.use_convolutional_pred else 112,
    )

    ExtractFeatures(args)


if __name__ == "__main__":
    workspace.GlobalInit(["caffe2", "--caffe2_log_level=2"])
    main()
import collections

import caffe2.python.hypothesis_test_util as hu
import hypothesis.strategies as st
import numpy as np
from caffe2.python import core, dyndep, workspace
from caffe2.quantization.server import utils as dnnlowp_utils
from dnnlowp_test_utils import (
    avoid_vpmaddubsw_overflow_fc,
    check_quantized_results_close,
)
from hypothesis import given

dyndep.InitOpsLibrary("//caffe2/caffe2/quantization/server:dnnlowp_ops")
workspace.GlobalInit(["caffe2", "--caffe2_omp_num_threads=11"])


class DNNLowPFullyConnectedOpTest(hu.HypothesisTestCase):
    # correctness test with no quantization error in inputs
    @given(input_channels=st.sampled_from([3, 4, 5, 8, 16, 32]),
           output_channels=st.integers(2, 16),
           batch_size=st.integers(1, 16),
           in_quantized=st.booleans(),
           out_quantized=st.booleans(),
           weight_quantized=st.booleans(),
           prepack_weight=st.booleans(),
           preserve_activation_sparsity=st.booleans(),
           preserve_weight_sparsity=st.booleans(),
           fuse_relu=st.booleans(),
           **hu.gcs_cpu_only)
Example #30
0
def test_one_crop(lfb=None, suffix='', shift=None):
    """Test one crop."""
    workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
    np.random.seed(cfg.RNG_SEED)

    cfg.AVA.FULL_EVAL = True

    if lfb is None and cfg.LFB.ENABLED:
        print_cfg()
        lfb = get_lfb(cfg.LFB.MODEL_PARAMS_FILE, is_train=False)

    print_cfg()

    workspace.ResetWorkspace()
    logger.info("Done ResetWorkspace...")

    timer = Timer()

    logger.warning('Testing started...')  # for monitoring cluster jobs

    if shift is None:
        shift = cfg.TEST.CROP_SHIFT
    test_model = model_builder_video.ModelBuilder(train=False,
                                                  use_cudnn=True,
                                                  cudnn_exhaustive_search=True,
                                                  split=cfg.TEST.DATA_TYPE)

    test_model.build_model(lfb=lfb, suffix=suffix, shift=shift)

    if cfg.PROF_DAG:
        test_model.net.Proto().type = 'prof_dag'
    else:
        test_model.net.Proto().type = 'dag'

    workspace.RunNetOnce(test_model.param_init_net)
    workspace.CreateNet(test_model.net)

    misc.save_net_proto(test_model.net)
    misc.save_net_proto(test_model.param_init_net)

    total_test_net_iters = misc.get_total_test_iters(test_model)

    test_model.start_data_loader()
    test_meter = metrics.MetricsCalculator(
        model=test_model,
        split=cfg.TEST.DATA_TYPE,
        video_idx_to_name=test_model.input_db._video_idx_to_name,
        total_num_boxes=(test_model.input_db._num_boxes_used
                         if cfg.DATASET in ['ava', 'avabox'] else None))

    if cfg.TEST.PARAMS_FILE:
        checkpoints.load_model_from_params_file_for_test(
            test_model, cfg.TEST.PARAMS_FILE)
    else:
        raise Exception('No params files specified for testing model.')

    begin_time = time.time()

    for test_iter in range(total_test_net_iters):
        timer.tic()
        workspace.RunNet(test_model.net.Proto().name)
        timer.toc()

        if test_iter == 0:
            misc.print_net(test_model)
            os.system('nvidia-smi')
            misc.show_flops_params(test_model)

        test_meter.calculate_and_log_all_metrics_test(test_iter, timer,
                                                      total_test_net_iters,
                                                      suffix)

    logger.info('TTTTTTTIME: {}'.format(time.time() - begin_time))

    test_meter.finalize_metrics(name=get_test_name(shift))
    test_meter.log_final_metrics(test_iter, total_test_net_iters)
    test_model.shutdown_data_loader()