Exemple #1
0
    def test_collect_tensor_ops(self):
        init_net = core.Net('init_net')
        blobs = ['blob_1', 'blob_2', 'blob_3']
        bvec_map = {}
        ONE = init_net.ConstantFill([], 'ONE', shape=[1, 2], value=1)
        for b in blobs:
            init_net.ConstantFill([], [b], shape=[1, 2], value=0)
            bvec_map[b] = b + '_vec'
            init_net.CreateTensorVector([], [bvec_map[b]])

        reader_net = core.Net('reader_net')
        for b in blobs:
            reader_net.Add([b, ONE], [b])

        collect_net = core.Net('collect_net')
        num_to_collect = 1000
        max_example_to_cover = 100000
        bvec = [bvec_map[b] for b in blobs]
        collect_net.CollectTensor(
            bvec + blobs,
            bvec,
            num_to_collect=num_to_collect,
        )

        print('Collect Net Proto: {}'.format(collect_net.Proto()))

        plan = core.Plan('collect_data')
        plan.AddStep(core.execution_step('collect_init', init_net))
        plan.AddStep(
            core.execution_step('collect_data', [reader_net, collect_net],
                                num_iter=max_example_to_cover))
        workspace.RunPlan(plan)

        # concat the collected tensors
        concat_net = core.Net('concat_net')
        bconcated_map = {}
        bsize_map = {}
        for b in blobs:
            bconcated_map[b] = b + '_concated'
            bsize_map[b] = b + '_size'
            concat_net.ConcatTensorVector([bvec_map[b]], [bconcated_map[b]])
            concat_net.TensorVectorSize([bvec_map[b]], [bsize_map[b]])

        workspace.RunNetOnce(concat_net)

        # check data
        reference_result = workspace.FetchBlob(bconcated_map[blobs[0]])
        self.assertEqual(reference_result.shape,
                         (min(num_to_collect, max_example_to_cover), 2))
        size = workspace.FetchBlob(bsize_map[blobs[0]])
        self.assertEqual(tuple(), size.shape)
        self.assertEqual(min(num_to_collect, max_example_to_cover),
                         size.item())

        hist, _ = np.histogram(reference_result[:, 0],
                               bins=10,
                               range=(1, max_example_to_cover))
        print('Sample histogram: {}'.format(hist))

        self.assertTrue(all(hist > 0.6 * (num_to_collect / 10)))
        for i in range(1, len(blobs)):
            result = workspace.FetchBlob(bconcated_map[blobs[i]])
            self.assertEqual(reference_result.tolist(), result.tolist())
    def test_groupwise_dnnlowp_conv_int(
        self,
        stride,
        pad,
        kernel,
        dilation,
        size,
        group,
        input_channels_per_group,
        output_channels_per_group,
        batch_size,
        order,
        prepack_weight,
        preserve_activation_sparsity,
        preserve_weight_sparsity,
        gc,
        dc,
    ):
        assume(group == 1 or dilation == 1)
        assume((not prepack_weight) or order == "NHWC")

        X, W, b = generate_conv_inputs(
            stride,
            pad,
            kernel,
            dilation,
            size,
            group,
            input_channels_per_group,
            output_channels_per_group,
            batch_size,
            order,
            groupwise_quantization=True,
            preserve_activation_sparsity=preserve_activation_sparsity,
            preserve_weight_sparsity=preserve_weight_sparsity,
        )

        Output = collections.namedtuple("Output",
                                        ["Y", "op_type", "engine", "order"])
        outputs = []

        op_engine_list = [
            ("Conv", ""),
            ("Conv", "DNNLOWP"),
            ("Conv", "DNNLOWP_16"),
            ("Int8Conv", "DNNLOWP"),
        ]

        for op_type, engine in op_engine_list:
            init_net = core.Net("test_init_net")
            net = core.Net("test_net")

            do_quantize = "DNNLOWP" in engine
            do_dequantize = "DNNLOWP" in engine
            do_prepack_weight = engine == "DNNLOWP" and prepack_weight

            if do_quantize:
                quantize = core.CreateOperator(
                    "Quantize",
                    ["X"],
                    ["X_q"],
                    preserve_activation_sparsity=preserve_activation_sparsity,
                    engine=engine,
                    device_option=gc,
                )
                net.Proto().op.extend([quantize])

            if do_prepack_weight:
                X_min = 0 if X.size == 0 else X.min()
                X_max = 0 if X.size == 0 else X.max()
                x_q_param = hardcode_scale_zp.choose_quantization_params(
                    X_min, X_max)
                inputs = ["W"]
                if do_dequantize:
                    inputs += ["b"]
                pack = core.CreateOperator(
                    "Int8ConvPackWeight",
                    inputs,
                    ["W_packed"],
                    group=group,
                    quantize_groupwise=1,
                    preserve_weight_sparsity=preserve_weight_sparsity,
                    in_scale=x_q_param.scale,
                    engine=engine,
                )
                init_net.Proto().op.extend([pack])

            conv = core.CreateOperator(
                op_type,
                [
                    "X_q" if do_quantize else "X",
                    "W_packed" if do_prepack_weight else "W",
                    "b",
                ],
                ["Y_q" if do_dequantize else "Y"],
                stride=stride,
                kernel=kernel,
                dilation=dilation,
                pad=pad,
                order=order,
                preserve_activation_sparsity=preserve_activation_sparsity,
                preserve_weight_sparsity=preserve_weight_sparsity,
                engine=engine,
                group=group,
                quantize_groupwise=1,
                device_option=gc,
            )
            if do_dequantize or do_prepack_weight:
                # groupwise quantization only works with static quantization
                # so we need to set quantization parameters
                dnnlowp_utils.add_quantization_param_args(
                    conv, outputs[0][0], preserve_activation_sparsity)
            net.Proto().op.extend([conv])

            if do_dequantize:
                dequantize = core.CreateOperator(
                    "Dequantize",
                    ["Y_q"],
                    ["Y"],
                    preserve_activation_sparsity=preserve_activation_sparsity,
                    engine=engine,
                    device_option=gc,
                )
                net.Proto().op.extend([dequantize])

            run_conv_or_fc(self, init_net, net, X, W, b, op_type, engine,
                           order, gc, outputs)

        check_quantized_results_close(outputs,
                                      symmetric=preserve_activation_sparsity)
Exemple #3
0
    def test_dnnlowp_elementwise_mul_int(
        self, N, is_empty, in_quantized, out_quantized, in_place, gc, dc
    ):
        if is_empty:
            N = 0
        # FIXME: DNNLOWP Mul doesn't support inplace operation and
        # dequantize_output=1 at the same time
        if in_place[0] or in_place[1]:
            in_quantized = True
            out_quantized = True

        # All inputs have scale 1, so exactly represented after quantization
        min_ = -100
        max_ = min_ + 255
        A = np.round(np.random.rand(N) * (max_ - min_) + min_)
        A = A.astype(np.float32)
        if N != 0:
            A[0] = min_
            A[1] = max_

        B = np.round(np.random.rand(N) * 255 - 128).astype(np.float32)
        if N != 0:
            B[0] = -128
            B[1] = 127

        Output = collections.namedtuple("Output", ["Y", "engine"])
        outputs = []

        engine_list = ["", "DNNLOWP"]
        for engine in engine_list:
            net = core.Net("test_net")

            do_quantize = "DNNLOWP" in engine and in_quantized
            do_dequantize = "DNNLOWP" in engine and out_quantized

            if do_quantize:
                quantize_A = core.CreateOperator(
                    "Quantize", ["A"], ["A_q"], engine=engine, device_option=gc
                )
                net.Proto().op.extend([quantize_A])

                quantize_B = core.CreateOperator(
                    "Quantize", ["B"], ["B_q"], engine=engine, device_option=gc
                )
                net.Proto().op.extend([quantize_B])

            out = "Y"
            if in_place[0]:
                out = "A"
            elif in_place[1]:
                out = "B"

            mul = core.CreateOperator(
                "Mul",
                ["A_q", "B_q"] if do_quantize else ["A", "B"],
                [(out + "_q") if do_dequantize else out],
                dequantize_output=not do_dequantize,
                engine=engine,
                device_option=gc,
            )
            net.Proto().op.extend([mul])

            if do_dequantize:
                dequantize = core.CreateOperator(
                    "Dequantize", [out + "_q"], [out], engine=engine, device_option=gc
                )
                net.Proto().op.extend([dequantize])

            self.ws.create_blob("A").feed(A, device_option=gc)
            self.ws.create_blob("B").feed(B, device_option=gc)
            self.ws.run(net)
            outputs.append(Output(Y=self.ws.blobs[out].fetch(), engine=engine))

        check_quantized_results_close(outputs)
Exemple #4
0
    def CreateModel(self):
        log.debug("Start training")
        model = cnn.CNNModelHelper(name="char_rnn")

        input_blob, seq_lengths, hidden_init, cell_init, target = \
            model.net.AddExternalInputs(
                'input_blob',
                'seq_lengths',
                'hidden_init',
                'cell_init',
                'target',
            )

        hidden_output_all, self.hidden_output, _, self.cell_state = LSTM(
            model,
            input_blob,
            seq_lengths, (hidden_init, cell_init),
            self.D,
            self.hidden_size,
            scope="LSTM")
        output = model.FC(hidden_output_all,
                          None,
                          dim_in=self.hidden_size,
                          dim_out=self.D,
                          axis=2)

        # axis is 2 as first two are T (time) and N (batch size).
        # We treat them as one big batch of size T * N
        softmax = model.Softmax(output, 'softmax', axis=2)

        softmax_reshaped, _ = model.Reshape(softmax, ['softmax_reshaped', '_'],
                                            shape=[-1, self.D])

        # Create a copy of the current net. We will use it on the forward
        # pass where we don't need loss and backward operators
        self.forward_net = core.Net(model.net.Proto())

        xent = model.LabelCrossEntropy([softmax_reshaped, target], 'xent')
        # Loss is average both across batch and through time
        # Thats why the learning rate below is multiplied by self.seq_length
        loss = model.AveragedLoss(xent, 'loss')
        model.AddGradientOperators([loss])

        # Hand made SGD update. Normally one can use helper functions
        # to build an optimizer
        ITER = model.Iter("iter")
        LR = model.LearningRate(ITER,
                                "LR",
                                base_lr=-0.1 * self.seq_length,
                                policy="step",
                                stepsize=1,
                                gamma=0.9999)
        ONE = model.param_init_net.ConstantFill([],
                                                "ONE",
                                                shape=[1],
                                                value=1.0)

        # Update weights for each of the model parameters
        for param in model.params:
            param_grad = model.param_to_grad[param]
            model.WeightedSum([param, ONE, param_grad, LR], param)

        self.model = model
        self.predictions = softmax
        self.loss = loss

        self.prepare_state = core.Net("prepare_state")
        self.prepare_state.Copy(self.hidden_output, hidden_init)
        self.prepare_state.Copy(self.cell_state, cell_init)
    def test_dnnlowp_average_pool(
        self,
        ndim,
        stride,
        pad,
        kernel,
        size,
        input_channels,
        batch_size,
        order,
        in_quantized,
        gc,
        dc,
    ):
        kernel = 2  # Only kernel size 2 is supported
        assume(kernel <= size)
        assume(pad < kernel)

        C = input_channels
        N = batch_size

        strides = (stride, ) * ndim
        pads = (pad, ) * (ndim * 2)
        kernels = (kernel, ) * ndim
        sizes = (size, ) * ndim

        # X has scale 1, so no input quantization error
        min_ = -100
        max_ = min_ + 255
        if order == "NCHW":
            X = np.round(
                np.random.rand(*((N, C) + sizes)) * (max_ - min_) + min_)
            X = X.astype(np.float32)
            X[(0, ) * (ndim + 2)] = min_
            X[(0, ) * (ndim + 1) + (1, )] = max_
        elif order == "NHWC":
            X = np.round(
                np.random.rand(*((N, ) + sizes + (C, ))) * (max_ - min_) +
                min_)
            X = X.astype(np.float32)
            X[(0, ) * (ndim + 2)] = min_
            X[(0, 1) + (0, ) * ndim] = max_

        Output = collections.namedtuple("Output", ["Y", "op_type", "engine"])
        outputs = []

        op_engine_list = [
            ("AveragePool", ""),
            ("AveragePool", "DNNLOWP"),
            ("Int8AveragePool", "DNNLOWP"),
        ]

        for op_type, engine in op_engine_list:
            net = core.Net("test_net")

            do_quantize = "DNNLOWP" in engine and in_quantized

            if do_quantize:
                quantize = core.CreateOperator("Quantize", ["X"], ["X_q"],
                                               engine=engine,
                                               device_option=gc)
                net.Proto().op.extend([quantize])

            max_pool = core.CreateOperator(
                op_type,
                ["X_q" if do_quantize else "X"],
                ["Y_q" if engine == "DNNLOWP" else "Y"],
                strides=strides,
                kernels=kernels,
                pads=pads,
                order=order,
                engine=engine,
                device_option=gc,
            )
            net.Proto().op.extend([max_pool])

            if engine == "DNNLOWP":
                dequantize = core.CreateOperator("Dequantize", ["Y_q"], ["Y"],
                                                 engine=engine,
                                                 device_option=gc)
                net.Proto().op.extend([dequantize])

            self.ws.create_blob("X").feed(X, device_option=gc)
            self.ws.run(net)
            outputs.append(
                Output(Y=self.ws.blobs["Y"].fetch(),
                       op_type=op_type,
                       engine=engine))

        check_quantized_results_close(outputs)
 def write_ex(self, fields, local_init_net, local_finish_net, status):
     self._wrapper._new_writer(self.schema(), local_init_net)
     enqueue_net = core.Net('enqueue')
     enqueue(enqueue_net, self._wrapper.queue(), fields, status)
     return [enqueue_net]
    def test_dnnlowp_conv_acc16_outlier(
        self,
        stride,
        pad,
        kernel,
        dilation,
        size,
        group,
        input_channels_per_group,
        output_channels_per_group,
        batch_size,
        order,
        in_quantized,
        out_quantized,
        weight_quantized,
        prepack_weight,
        nbits_in_non_outlier,
        share_col_buffer,
        preserve_activation_sparsity,
        preserve_weight_sparsity,
        gc,
        dc,
    ):
        assume(group == 1 or dilation == 1)
        assume(size >= dilation * (kernel - 1) + 1)

        input_channels = input_channels_per_group * group
        output_channels = output_channels_per_group * group

        if nbits_in_non_outlier == 0:
            X, W, b = generate_conv_inputs(
                stride,
                pad,
                kernel,
                dilation,
                size,
                group,
                input_channels_per_group,
                output_channels_per_group,
                batch_size,
                order,
                preserve_activation_sparsity=preserve_activation_sparsity,
                preserve_weight_sparsity=preserve_weight_sparsity,
            )
        else:
            X_min = 0 if preserve_activation_sparsity else -77
            X_max = X_min + 255
            X = np.random.rand(batch_size, size, size,
                               input_channels) * 4 + X_min
            X = np.round(X).astype(np.float32)
            X[..., 0] = X_min
            X[0, 0, 0, 1] = X_max

            if preserve_weight_sparsity:
                W_min = -128
                W_max = 100
            else:
                W_min = -100
                W_max = W_min + 255
            W = (np.random.rand(output_channels, kernel, kernel,
                                input_channels_per_group) * 4 - 2 + W_min +
                 128)
            W = np.round(W).astype(np.float32)
            W[0, 0, 0, 0] = W_min
            W[1, 0, 0, 0] = W_max
            W[..., 1] = W_min + 128

            # No input quantization error in bias
            b = np.round(np.random.randn(output_channels)).astype(np.float32)

        Output = collections.namedtuple("Output",
                                        ["Y", "op_type", "engine", "order"])
        outputs = []

        op_engine_list = [
            ("Conv", ""),
            ("Conv", "DNNLOWP_ACC16"),
            ("Int8Conv", "DNNLOWP_ACC16"),
        ]

        for op_type, engine in op_engine_list:
            init_net = core.Net("test_init_net")
            net = core.Net("test_net")

            do_quantize = "DNNLOWP" in engine and in_quantized
            do_dequantize = "DNNLOWP" in engine and out_quantized
            do_quantize_weight = "DNNLOWP" in engine and weight_quantized
            do_prepack_weight = "DNNLOWP" in engine and prepack_weight

            if do_quantize:
                quantize = core.CreateOperator(
                    "Quantize",
                    ["X"],
                    ["X_q"],
                    preserve_activation_sparsity=preserve_activation_sparsity,
                    engine="DNNLOWP",
                    device_option=gc,
                )
                net.Proto().op.extend([quantize])

            x_q_param = dnnlowp_utils.choose_quantization_params(
                X.min(), X.max(), preserve_activation_sparsity)
            if do_quantize_weight:
                int8_given_tensor_fill, w_q_param = dnnlowp_utils.create_int8_given_tensor_fill(
                    W, "W_q", preserve_weight_sparsity)
                init_net.Proto().op.extend([int8_given_tensor_fill])

                # Bias
                int8_bias_tensor_fill = dnnlowp_utils.create_int8_bias_tensor_fill(
                    b, "b_q", x_q_param, w_q_param)
                init_net.Proto().op.extend([int8_bias_tensor_fill])

            if do_prepack_weight:
                inputs = ["W_q" if do_quantize_weight else "W"]
                if do_dequantize:
                    inputs += ["b_q" if do_quantize_weight else "b"]
                pack = core.CreateOperator(
                    "Int8ConvPackWeight",
                    inputs,
                    ["W_packed"],
                    group=group,
                    nbits_in_non_outlier=nbits_in_non_outlier,
                    preserve_weight_sparsity=preserve_weight_sparsity,
                    in_scale=x_q_param.scale,
                    engine=engine,
                )
                init_net.Proto().op.extend([pack])

            conv = core.CreateOperator(
                op_type,
                [
                    "X_q" if do_quantize else "X",
                    "W_packed" if do_prepack_weight else
                    ("W_q" if do_quantize_weight else "W"),
                    "b_q" if do_quantize_weight else "b",
                ],
                ["Y_q" if do_dequantize else "Y"],
                stride=stride,
                kernel=kernel,
                dilation=dilation,
                pad=pad,
                order=order,
                dequantize_output=not do_dequantize,
                nbits_in_non_outlier=nbits_in_non_outlier,
                shared_buffer=(1 if share_col_buffer else 0),
                preserve_activation_sparsity=preserve_activation_sparsity,
                preserve_weight_sparsity=preserve_weight_sparsity,
                engine=engine,
                group=group,
                device_option=gc,
            )
            if do_dequantize or do_quantize_weight or do_prepack_weight:
                # When quantized weight is provided, we can't rescale the
                # output dynamically by looking at the range of output of each
                # batch, so here we provide the range of output observed from
                # fp32 reference implementation
                dnnlowp_utils.add_quantization_param_args(
                    conv, outputs[0][0], preserve_activation_sparsity)
            net.Proto().op.extend([conv])

            if do_dequantize:
                dequantize = core.CreateOperator("Dequantize", ["Y_q"], ["Y"],
                                                 engine="DNNLOWP",
                                                 device_option=gc)
                net.Proto().op.extend([dequantize])

            self.ws.create_blob("X").feed(X, device_option=gc)
            self.ws.create_blob("W").feed(W, device_option=gc)
            self.ws.create_blob("b").feed(b, device_option=gc)
            self.ws.run(init_net)
            self.ws.run(net)
            Y = self.ws.blobs["Y"].fetch()
            outputs.append(
                Output(Y=Y, op_type=op_type, engine=engine, order=order))

        check_quantized_results_close(outputs,
                                      symmetric=preserve_activation_sparsity)
Exemple #8
0
    def test_imageinput(self, size_tuple, means, stds, gc, dc):
        # TODO: Does not test on GPU and does not test use_gpu_transform
        # WARNING: Using ModelHelper automatically does NHWC to NCHW
        # transformation if needed.
        width, height, minsize, crop = size_tuple
        means = [float(m) for m in means]
        stds = [float(s) for s in stds]
        out_dir = tempfile.mkdtemp()
        count_images = 2  # One with bounding box and one without
        expected_images = create_test(out_dir,
                                      width=width,
                                      height=height,
                                      default_bound=(3, 5, height - 3,
                                                     width - 5),
                                      minsize=minsize,
                                      crop=crop,
                                      means=means,
                                      stds=stds,
                                      count=count_images)
        for device_option in dc:
            with hu.temp_workspace():
                reader_net = core.Net('reader')
                reader_net.CreateDB([], 'DB', db=out_dir, db_type="lmdb")
                workspace.RunNetOnce(reader_net)
                imageop = core.CreateOperator(
                    'ImageInput', ['DB'], ["data", "label"],
                    batch_size=count_images,
                    color=3,
                    minsize=minsize,
                    crop=crop,
                    is_test=True,
                    bounding_ymin=3,
                    bounding_xmin=5,
                    bounding_height=height - 3,
                    bounding_width=width - 5,
                    mean_per_channel=means,
                    std_per_channel=stds,
                    use_gpu_transform=(device_option.device_type == 1))

                imageop.device_option.CopyFrom(device_option)
                main_net = core.Net('main')
                main_net.Proto().op.extend([imageop])
                workspace.RunNetOnce(main_net)
                l = workspace.FetchBlob('label')
                result = workspace.FetchBlob('data').astype(np.int32)
                # If we don't use_gpu_transform, the output is in NHWC
                # Our reference output is CHW so we swap
                if device_option.device_type != 1:
                    expected = [
                        img.swapaxes(0, 1).swapaxes(1, 2)
                        for img in expected_images
                    ]
                else:
                    expected = expected_images
                for i in range(count_images):
                    self.assertEqual(l[i], i)
                    self.assertEqual((expected[i] - result[i] > 1).sum(), 0)
                # End for
            # End with
        # End for
        shutil.rmtree(out_dir)
    def test_dnnlowp_conv_relu_int(
        self,
        stride,
        pad,
        kernel,
        dilation,
        size,
        group,
        input_channels_per_group,
        output_channels_per_group,
        batch_size,
        order,
        share_col_buffer,
        gc,
        dc,
    ):
        assume(group == 1 or dilation == 1)

        X, W, b = generate_conv_inputs(
            stride,
            pad,
            kernel,
            dilation,
            size,
            group,
            input_channels_per_group,
            output_channels_per_group,
            batch_size,
            order,
        )

        Output = collections.namedtuple("Output",
                                        ["Y", "op_type", "engine", "order"])
        outputs = []

        op_engine_list = [
            ("Conv", ""),
            ("ConvRelu", "DNNLOWP"),
            ("ConvRelu", "DNNLOWP_16"),
            ("Int8ConvRelu", "DNNLOWP"),
        ]

        for op_type, engine in op_engine_list:
            net = core.Net("test_net")

            if "DNNLOWP" in engine:
                quantize = core.CreateOperator("Quantize", ["X"], ["X_q"],
                                               engine=engine,
                                               device_option=gc)
                net.Proto().op.extend([quantize])

                conv = core.CreateOperator(
                    op_type,
                    ["X_q", "W", "b"],
                    ["Y_q"],
                    stride=stride,
                    kernel=kernel,
                    dilation=dilation,
                    pad=pad,
                    order=order,
                    engine=engine,
                    shared_buffer=(1 if share_col_buffer else 0),
                    group=group,
                    device_option=gc,
                )
                net.Proto().op.extend([conv])

                dequantize = core.CreateOperator("Dequantize", ["Y_q"], ["Y"],
                                                 engine=engine,
                                                 device_option=gc)
                net.Proto().op.extend([dequantize])
            else:
                conv = core.CreateOperator(
                    op_type,
                    ["X", "W", "b"],
                    ["Y"],
                    stride=stride,
                    kernel=kernel,
                    dilation=dilation,
                    pad=pad,
                    order=order,
                    shared_buffer=(1 if share_col_buffer else 0),
                    engine=engine,
                    group=group,
                    device_option=gc,
                )
                net.Proto().op.extend([conv])

                relu = core.CreateOperator("Relu", ["Y"], ["Y"],
                                           engine=engine,
                                           device_option=gc)
                net.Proto().op.extend([relu])

            self.ws.create_blob("X").feed(X, device_option=gc)
            self.ws.create_blob("W").feed(W, device_option=gc)
            self.ws.create_blob("b").feed(b, device_option=gc)
            self.ws.run(net)
            Y = self.ws.blobs["Y"].fetch()
            outputs.append(
                Output(Y=Y, op_type=op_type, engine=engine, order=order))

        check_quantized_results_close(outputs)
Exemple #10
0
 def create_init_net(self, name):
     init_net = core.Net(name)
     self._add_global_constants(init_net)
     return init_net
Exemple #11
0
    def test_meta_net_def_net_runs(self):
        for param, value in viewitems(self.params):
            workspace.FeedBlob(param, value)

        extra_init_net = core.Net('extra_init')
        extra_init_net.ConstantFill('data', 'data', value=1.0)
        pem = pe.PredictorExportMeta(
            predict_net=self.predictor_export_meta.predict_net,
            parameters=self.predictor_export_meta.parameters,
            inputs=self.predictor_export_meta.inputs,
            outputs=self.predictor_export_meta.outputs,
            shapes=self.predictor_export_meta.shapes,
            extra_init_net=extra_init_net,
            net_type='dag',
        )

        db_type = 'minidb'
        db_file = tempfile.NamedTemporaryFile(delete=False,
                                              suffix=".{}".format(db_type))
        pe.save_to_db(db_type=db_type,
                      db_destination=db_file.name,
                      predictor_export_meta=pem)

        workspace.ResetWorkspace()

        meta_net_def = pe.load_from_db(
            db_type=db_type,
            filename=db_file.name,
        )

        self.assertTrue("data" not in workspace.Blobs())
        self.assertTrue("y" not in workspace.Blobs())

        init_net = pred_utils.GetNet(meta_net_def, pc.PREDICT_INIT_NET_TYPE)

        # 0-fills externalblobs blobs and runs extra_init_net
        workspace.RunNetOnce(init_net)

        self.assertTrue("data" in workspace.Blobs())
        self.assertTrue("y" in workspace.Blobs())

        print(workspace.FetchBlob("data"))
        np.testing.assert_array_equal(workspace.FetchBlob("data"),
                                      np.ones(shape=(1, 5)))
        np.testing.assert_array_equal(workspace.FetchBlob("y"),
                                      np.zeros(shape=(1, 10)))

        # Load parameters from DB
        global_init_net = pred_utils.GetNet(meta_net_def,
                                            pc.GLOBAL_INIT_NET_TYPE)
        workspace.RunNetOnce(global_init_net)

        # Run the net with a reshaped input and verify we are
        # producing good numbers (with our custom implementation)
        workspace.FeedBlob("data", np.random.randn(2, 5).astype(np.float32))
        predict_net = pred_utils.GetNet(meta_net_def, pc.PREDICT_NET_TYPE)
        self.assertEqual(predict_net.type, 'dag')
        workspace.RunNetOnce(predict_net)
        np.testing.assert_array_almost_equal(
            workspace.FetchBlob("y"),
            workspace.FetchBlob("data").dot(self.params["y_w"].T) +
            self.params["y_b"])
Exemple #12
0
    def test_inject_copy_multi_use(self):
        net = core.Net("test")
        device_option = caffe2_pb2.DeviceOption()
        device_option.device_type = caffe2_pb2.CUDA
        device_option.cuda_gpu_id = 1

        with core.DeviceScope(device_option):
            net.Relu("data", "relu1")
        net.Relu("data", "relu2")
        with core.DeviceScope(device_option):
            net.Relu("data", "relu3")
        net.Relu("data", "relu4")
        device_option.cuda_gpu_id = 0
        with core.DeviceScope(device_option):
            net.Relu("data", "relu5")
        device_option.cuda_gpu_id = 1
        with core.DeviceScope(device_option):
            net.Relu("data", "relu6")

        new_net, _ = core.InjectCrossDeviceCopies(net)
        op = new_net._net.op[0]
        self.assertEqual(op.type, "CopyCPUToGPU")
        self.assertEqual(op.device_option.device_type, 1)
        self.assertEqual(op.device_option.cuda_gpu_id, 1)
        self.assertEqual(op.output[0], "data_cuda_1")
        op = new_net._net.op[1]
        self.assertEqual(op.type, "Relu")
        self.assertEqual(op.device_option.device_type, 1)
        self.assertEqual(op.device_option.cuda_gpu_id, 1)
        self.assertEqual(op.output[0], "relu1")
        op = new_net._net.op[2]
        self.assertEqual(op.type, "Relu")
        self.assertEqual(op.device_option.device_type, 0)
        self.assertEqual(op.output[0], "relu2")
        op = new_net._net.op[3]
        self.assertEqual(op.type, "Relu")
        self.assertEqual(op.device_option.device_type, 1)
        self.assertEqual(op.device_option.cuda_gpu_id, 1)
        self.assertEqual(op.input[0], "data_cuda_1")
        self.assertEqual(op.output[0], "relu3")
        op = new_net._net.op[4]
        self.assertEqual(op.type, "Relu")
        self.assertEqual(op.device_option.device_type, 0)
        self.assertEqual(op.output[0], "relu4")
        op = new_net._net.op[5]
        self.assertEqual(op.type, "CopyCPUToGPU")
        self.assertEqual(op.device_option.device_type, 1)
        self.assertEqual(op.device_option.cuda_gpu_id, 0)
        self.assertEqual(op.output[0], "data_cuda_0")
        op = new_net._net.op[6]
        self.assertEqual(op.type, "Relu")
        self.assertEqual(op.device_option.device_type, 1)
        self.assertEqual(op.device_option.cuda_gpu_id, 0)
        self.assertEqual(op.input[0], "data_cuda_0")
        self.assertEqual(op.output[0], "relu5")
        op = new_net._net.op[7]
        self.assertEqual(op.type, "Relu")
        self.assertEqual(op.device_option.device_type, 1)
        self.assertEqual(op.device_option.cuda_gpu_id, 1)
        self.assertEqual(op.input[0], "data_cuda_1")
        self.assertEqual(op.output[0], "relu6")
        """
Exemple #13
0
    def testPartialClone(self):
        params = core.Net('params')
        p1 = params.ConstantFill([], ['p1'])
        workspace.CreateNet(params)
        workspace.RunNetOnce(params)

        n = core.Net('original')
        a1 = n.AddExternalInput('a1')
        a2 = n.AddExternalInput('a2')
        b1, b2 = n.Concat([a1, a2], ['b1', 'b2'], axis=0)
        c1 = n.Sum([b1, p1], ['c1'])
        c2 = n.Sum([b2], ['c2'])
        d = n.Sum([c1, c2], ['d'])

        # test that gradient ops are ignored when partial-cloning
        n.AddGradientOperators([d])

        # test some in-place ops
        k = n.Sum([p1], ['k'])
        e = n.Sum([d], ['e'])
        e = n.Sum([e, k], [e])
        e = n.Sum([e], [e])
        f = n.Sum(e, ['f'])

        def net_assert(net, num_ops, inputs, outputs, internals):
            self.assertEqual(len(net.Proto().op), num_ops)
            self.assertEqual(set(net.Proto().external_input), inputs)
            self.assertEqual(set(net.Proto().external_output), outputs)
            all_blobs = set(net.Proto().external_input)
            all_blobs |= set(net.Proto().external_output)
            for op in net.Proto().op:
                all_blobs |= set(op.input) | set(op.output)
            self.assertEqual(all_blobs, inputs | outputs | internals)
            # create net to make sure its valid
            for input in inputs:
                workspace.FeedBlob(input, np.array([]))
            workspace.CreateNet(net)

        n2, (d22, ) = n.ClonePartial('f1', {a1: 'a11', a2: 'a22'}, [d])
        net_assert(n2, 4, {'p1', 'a11', 'a22'}, {'f1/d'},
                   {'f1/b1', 'f1/b2', 'f1/c1', 'f1/c2', 'p1'})
        self.assertTrue(isinstance(d22, core.BlobReference))
        self.assertEqual(d22.Net(), n2)
        self.assertEqual(str(d22), 'f1/d')

        n3, (d22, ) = n.ClonePartial('f2', [b1, b2], [d])
        net_assert(n3, 3, {'p1', 'b1', 'b2'}, {'f2/d'},
                   {'f2/c1', 'f2/c2', 'p1'})
        self.assertEqual(str(d22), 'f2/d')

        n4, (c22, ) = n.ClonePartial('f3', [b1], [c1])
        net_assert(n4, 1, {'p1', 'b1'}, {'f3/c1'}, {'p1'})
        self.assertEqual(str(c22), 'f3/c1')

        n5, (c11, c22) = n.ClonePartial('f4', [b1, b2], [c1, c2])
        net_assert(n5, 2, {'p1', 'b1', 'b2'}, {'f4/c1', 'f4/c2'}, {'p1'})
        self.assertEqual(str(c11), 'f4/c1')
        self.assertEqual(str(c22), 'f4/c2')

        with self.assertRaises(AssertionError):
            n.ClonePartial('f4', [a1, a2, c2], [d])

        n6, (e22, ) = n.ClonePartial('f5', [d], [e])
        net_assert(n6, 4, {'p1', 'd'}, {'f5/e'}, {'f5/k', 'p1'})
        self.assertEqual(str(e22), 'f5/e')

        n8, (e22, f22) = n.ClonePartial('f7', [d], [e, f])
        net_assert(n8, 5, {'p1', 'd'}, {'f7/e', 'f7/f'}, {'p1', 'f7/k'})
        self.assertEqual(str(e22), 'f7/e')
        self.assertEqual(str(f22), 'f7/f')

        params._CheckLookupTables()
        n._CheckLookupTables()
Exemple #14
0
    def _create_rnn_variant(cls, init_model, pred_model, n, opset_version):
        assert init_model is not None, "cannot convert RNNs without access to the full model"
        assert pred_model is not None, "cannot convert RNNs without access to the full model"

        attrs = dict(n.attrs) # make a copy, which is safe to mutate
        hidden_size = attrs.pop('hidden_size')
        direction = force_unicode(attrs.pop('direction', 'forward'))

        if n.op_type == 'RNN':
            activation = force_unicode(attrs.pop('activations', ('tanh',))[0])
        elif n.op_type == 'GRU':
            linear_before_reset = attrs.pop('linear_before_reset', 0)

        assert not attrs, "unsupported RNN attributes: " + str(attrs.keys())
        assert direction in ['forward', 'bidirectional'], "unsupported backwards RNN/GRU/LSTM"

        if n.op_type in ['RNN', 'GRU']:
            input_blob, W, R, B, sequence_lens, initial_h = n.inputs
        elif n.op_type == 'LSTM':
            input_blob, W, R, B, sequence_lens, initial_h, initial_c = n.inputs

        if sequence_lens == "":
            sequence_lens = None

        for x in itertools.chain(init_model.graph.input,
                                 init_model.graph.value_info,
                                 pred_model.graph.input,
                                 pred_model.graph.value_info):
            if x.name == W:
                input_size = x.type.tensor_type.shape.dim[2].dim_value
                break
        else:
            raise RuntimeError("best-effort shape inference for RNN/GRU/LSTM failed")

        init_net = core.Net("init-net")
        pred_mh = ModelHelper()

        init_net.Reshape(W, [W, cls.dummy_name()], shape=[1,-1,0])
        init_net.Squeeze(W, W, dims=[0])
        init_net.Reshape(R, [R, cls.dummy_name()], shape=[1,-1,0])
        init_net.Squeeze(R, R, dims=[0])
        init_net.Reshape(B, [B, cls.dummy_name()], shape=[1,-1])
        init_net.Squeeze(B, B, dims=[0])

        if n.op_type == 'RNN':
            def reform(*args):
                pass

            def make_cell(*args, **kwargs):
                return rnn_cell.BasicRNN(*args, activation=activation, **kwargs)

            def make_rnn(direction_offset):
                return cls._make_rnn_direction(
                    input_blob, B, W, R, [(initial_h, '/initial_h')], sequence_lens,
                    pred_mh, init_net, input_size, hidden_size, 1, direction_offset,
                    "/i2h_b", "/gates_t_b", "/i2h_w", "/gates_t_w",
                    reform, make_cell, lambda x: x)

        elif n.op_type == 'GRU':
            def reform(Bi, Br, W_, R_, name, hidden_size, init_net):
                # caffe2 has a different order from onnx. We need to rearrange
                #  z r h  -> r z h
                reforms = ((W_, 'i2h_w',    True,  [(0,-1)]),
                           (R_, 'gate_t_w', False, [(0,-1)]),
                           (Bi, 'i2h_b',    True,  []),
                           (Br, 'gate_t_b', False, []))
                cls._rnn_reform_weights(reforms, name, hidden_size, init_net,
                                        ['update', 'reset', 'output'], [1, 0, 2])

            def make_cell(*args, **kwargs):
                return gru_cell.GRU(*args, linear_before_reset=linear_before_reset, **kwargs)

            def make_rnn(direction_offset):
                return cls._make_rnn_direction(
                    input_blob, B, W, R, [(initial_h, '/initial_h')], sequence_lens,
                    pred_mh, init_net, input_size, hidden_size, 3, direction_offset,
                    "_bias_i2h", "_bias_gates", "/i2h_w_pre", "/gates_t_w_pre",
                    reform, make_cell, lambda x: x)

        elif n.op_type == 'LSTM':
            def reform(Bi, Br, W_, R_, name, hidden_size, init_net):
                # caffe2 has a different order from onnx. We need to rearrange
                #   i o f c -> i f o c
                reforms = ((W_, 'i2h_w',     True, [(0, -1)]),
                           (R_, 'gates_t_w', True, [(0, -1)]),
                           (Bi, 'i2h_b'    , True, []),
                           (Br, 'gates_t_b', True, []))
                cls._rnn_reform_weights(reforms, name, hidden_size, init_net,
                                        ['input', 'output', 'forget', 'cell'], [0, 2, 1, 3])

            def make_cell(*args, **kwargs):
                return rnn_cell.LSTM(*args, **kwargs)

            def make_rnn(direction_offset):
                return cls._make_rnn_direction(
                    input_blob, B, W, R, [(initial_h, '/initial_h'), (initial_c, '/initial_c')], sequence_lens,
                    pred_mh, init_net, input_size, hidden_size, 4, direction_offset,
                    "/i2h_b", "/gates_t_b", "/i2h_w", "/gates_t_w",
                    reform, make_cell, lambda x: [x[0], x[1], x[3]])

        if direction == 'forward':
            outputs = make_rnn(0)

            # in the forward case, storage is shared between the
            # last outputs. We need to decouple them so that the
            # VariableLengthSequencePadding only mutates
            # n.outputs[0]
            for i in range(1, len(outputs)):
                pred_mh.net.Copy(outputs[i], n.outputs[i])

            if sequence_lens is not None:
                pred_mh.net.VariableLengthSequencePadding(
                    [outputs[0], sequence_lens], [outputs[0]])
            pred_mh.net.ExpandDims([outputs[0]], [n.outputs[0]], dims=[1])
        elif direction == 'bidirectional':
            outputs_f = make_rnn(0)
            outputs_b = make_rnn(1)

            concatted_output, _ = pred_mh.net.Concat(
                [outputs_f[0], outputs_b[0]], [cls.dummy_name(), cls.dummy_name()], axis=2)
            if sequence_lens is not None:
                pred_mh.net.VariableLengthSequencePadding(
                    [concatted_output, sequence_lens], [concatted_output])
            unsqueezed_output = pred_mh.net.ExpandDims([concatted_output], [cls.dummy_name()], dims=[1])
            pred_mh.net.Reshape(unsqueezed_output, [n.outputs[0], cls.dummy_name()], shape=[0,2,0,-1])
            for i in range(1, len(n.outputs)):
                pred_mh.net.Concat([outputs_f[i], outputs_b[i]],
                                   [n.outputs[i], cls.dummy_name()], axis=0)

        return Caffe2Ops(list(pred_mh.Proto().op),
                         list(init_net.Proto().op),
                         list(pred_mh.Proto().external_input))
Exemple #15
0
    def create_net(self):
        net = core.Net("feature_extractor")
        init_net = core.Net("feature_extractor_init")
        missing_scalar = self.create_const(init_net, "MISSING_SCALAR",
                                           MISSING_VALUE)

        action_schema = map_schema(
        ) if self.sorted_action_features else schema.Scalar()

        if self.max_q_learning:
            next_action_field = InputColumn.POSSIBLE_NEXT_ACTIONS
            next_action_schema = schema.List(action_schema)
        else:
            next_action_field = InputColumn.NEXT_ACTION
            next_action_schema = action_schema

        input_schema = schema.Struct(
            (InputColumn.STATE_FEATURES, map_schema()),
            (InputColumn.NEXT_STATE_FEATURES, map_schema()),
            (InputColumn.ACTION, action_schema),
            (next_action_field, next_action_schema),
        )

        input_record = net.set_input_record(input_schema)

        state = self.extract_float_features(
            net,
            "state",
            input_record[InputColumn.STATE_FEATURES],
            self.sorted_state_features,
            missing_scalar,
        )
        next_state = self.extract_float_features(
            net,
            "next_state",
            input_record[InputColumn.NEXT_STATE_FEATURES],
            self.sorted_state_features,
            missing_scalar,
        )

        if self.max_q_learning and self.sorted_action_features is not None:
            next_state_field = "tiled_next_state"
            # TODO: this will need to be more complicated to support sparse features
            next_state = net.LengthsTile(
                [next_state,
                 input_record.possible_next_actions.lengths()],
                ["tiled_next_state"],
            )
        else:
            next_state_field = "next_state"

        action = input_record.action
        next_action = input_record[next_action_field]
        if self.max_q_learning:
            next_action = next_action["values"]
        if self.sorted_action_features:
            action = self.extract_float_features(net, "action", action,
                                                 self.sorted_action_features,
                                                 missing_scalar)
            next_action = self.extract_float_features(
                net,
                next_action_field,
                next_action,
                self.sorted_action_features,
                missing_scalar,
            )

        next_action_output = (schema.List(
            next_action,
            lengths_blob=input_record.possible_next_actions.lengths)
                              if self.max_q_learning else next_action)

        net.set_output_record(
            schema.Struct(
                ("state", state),
                ("action", action),
                (next_state_field, next_state),
                (next_action_field, next_action_output),
            ))

        return FeatureExtractorNet(net, init_net)
    def _test_dnnlowp_nd_int(
        self,
        stride,
        pad,
        kernels,
        dilation,
        size,
        group,
        input_channels_per_group,
        output_channels_per_group,
        batch_size,
        order,
        prepack_weight,
        gc,
        dc,
    ):
        assume(group == 1 or dilation == 1)
        assume((not prepack_weight) or order == "NHWC")
        ndim = len(kernels)

        X, W, b = generate_convnd_inputs(
            (stride, ) * ndim,
            (pad, ) * ndim,
            kernels,
            (dilation, ) * ndim,
            (size, ) * ndim,
            group,
            input_channels_per_group,
            output_channels_per_group,
            batch_size,
            order,
        )

        Output = collections.namedtuple("Output",
                                        ["Y", "op_type", "engine", "order"])
        outputs = []

        op_engine_list = [("Conv", ""), ("Conv", "DNNLOWP_16"),
                          ("Int8Conv", "DNNLOWP")]

        for op_type, engine in op_engine_list:
            init_net = core.Net("test_init_net")
            net = core.Net("test_net")

            fall_back_to_NCHW = "DNNLOWP" not in engine and order == "NHWC"

            if fall_back_to_NCHW:
                X_nchw = utils.NHWC2NCHW(X)
                W_nchw = utils.NHWC2NCHW(W)

            do_quantize = "DNNLOWP" in engine
            do_dequantize = "DNNLOWP" in engine
            # If output scale/zp aren't set, it gets computed from ref fp32 op
            # in DNNLOWP, which isn't possible when we quantize input weights.
            # Make sure atleast one output is collected to compute output
            # scale/zp.
            do_quantize_weight = engine == "DNNLOWP" and len(outputs) > 0
            do_prepack_weight = engine == "DNNLOWP" and prepack_weight

            if do_quantize:
                quantize = core.CreateOperator("Quantize", ["X"], ["X_q"],
                                               engine=engine,
                                               device_option=gc)
                net.Proto().op.extend([quantize])

            x_q_param = dnnlowp_utils.choose_quantization_params(
                X.min(), X.max())
            if do_quantize_weight:
                int8_given_tensor_fill, w_q_param = dnnlowp_utils.create_int8_given_tensor_fill(
                    W, "W_q")
                init_net.Proto().op.extend([int8_given_tensor_fill])

                # Bias
                int8_bias_tensor_fill = dnnlowp_utils.create_int8_bias_tensor_fill(
                    b, "b_q", x_q_param, w_q_param)
                init_net.Proto().op.extend([int8_bias_tensor_fill])

            if do_prepack_weight:
                inputs = ["W_q" if do_quantize_weight else "W"]
                if do_dequantize:
                    inputs += ["b_q" if do_quantize_weight else "b"]
                pack = core.CreateOperator(
                    "Int8ConvPackWeight",
                    inputs,
                    ["W_packed"],
                    group=group,
                    in_scale=x_q_param.scale,
                    engine=engine,
                )
                init_net.Proto().op.extend([pack])

            conv = core.CreateOperator(
                op_type,
                [
                    "X_q" if do_quantize else "X",
                    "W_packed" if do_prepack_weight else
                    ("W_q" if do_quantize_weight else "W"),
                    "b_q" if do_quantize_weight else "b",
                ],
                ["Y_q" if do_dequantize else "Y"],
                strides=[stride] * ndim,
                kernels=kernels,
                dilations=[dilation] * ndim,
                pads=[pad] * (ndim * 2),
                order="NCHW" if fall_back_to_NCHW else order,
                dequantize_output=not do_dequantize,
                engine=engine,
                group=group,
                device_option=gc,
            )
            if do_quantize_weight or do_prepack_weight:
                # When quantized weight is provided, we can't rescale the
                # output dynamically by looking at the range of output of each
                # batch, so here we provide the range of output observed from
                # fp32 reference implementation
                dnnlowp_utils.add_quantization_param_args(conv, outputs[0][0])
            net.Proto().op.extend([conv])

            if do_dequantize:
                dequantize = core.CreateOperator("Dequantize", ["Y_q"], ["Y"],
                                                 engine=engine,
                                                 device_option=gc)
                net.Proto().op.extend([dequantize])

            self.ws.create_blob("X").feed(X_nchw if fall_back_to_NCHW else X,
                                          device_option=gc)
            self.ws.create_blob("W").feed(W_nchw if fall_back_to_NCHW else W,
                                          device_option=gc)
            self.ws.create_blob("b").feed(b, device_option=gc)
            self.ws.run(init_net)
            self.ws.run(net)
            Y = self.ws.blobs["Y"].fetch()
            if fall_back_to_NCHW:
                Y = utils.NCHW2NHWC(Y)
            outputs.append(
                Output(Y=Y, op_type=op_type, engine=engine, order=order))

        check_quantized_results_close(outputs)
Exemple #17
0
 def read_ex(self, local_init_net, local_finish_net):
     self._wrapper._new_reader(local_init_net)
     dequeue_net = core.Net('dequeue')
     fields, status_blob = dequeue(dequeue_net, self._wrapper.queue(),
                                   len(self.schema().field_names()))
     return [dequeue_net], status_blob, fields
    def test_dnnlowp_conv_int(
        self,
        stride,
        pad,
        kernel,
        dilation,
        size,
        group,
        input_channels_per_group,
        output_channels_per_group,
        batch_size,
        order,
        weight_quantized,
        prepack_weight,
        share_col_buffer,
        preserve_activation_sparsity,
        preserve_weight_sparsity,
        gc,
        dc,
    ):
        assume(group == 1 or dilation == 1)
        assume((not prepack_weight) or order == "NHWC")

        X, W, b = generate_conv_inputs(
            stride,
            pad,
            kernel,
            dilation,
            size,
            group,
            input_channels_per_group,
            output_channels_per_group,
            batch_size,
            order,
            preserve_activation_sparsity=preserve_activation_sparsity,
            preserve_weight_sparsity=preserve_weight_sparsity,
        )

        Output = collections.namedtuple("Output",
                                        ["Y", "op_type", "engine", "order"])
        outputs = []

        op_engine_list = [
            ("Conv", ""),
            ("Conv", "DNNLOWP"),
            ("Conv", "DNNLOWP_16"),
            ("Int8Conv", "DNNLOWP"),
        ]

        for op_type, engine in op_engine_list:
            init_net = core.Net("test_init_net")
            net = core.Net("test_net")

            do_quantize = "DNNLOWP" in engine
            do_dequantize = "DNNLOWP" in engine
            # If output scale/zp aren't set, it gets computed from ref fp32 op
            # in DNNLOWP, which isn't possible when we quantize input weights.
            # Make sure atleast one output is collected to compute output
            # scale/zp.
            do_quantize_weight = (engine == "DNNLOWP" and weight_quantized
                                  and len(outputs) > 0)
            do_prepack_weight = engine == "DNNLOWP" and prepack_weight

            if do_quantize:
                quantize = core.CreateOperator(
                    "Quantize",
                    ["X"],
                    ["X_q"],
                    preserve_activation_sparsity=preserve_activation_sparsity,
                    engine=engine,
                    device_option=gc,
                )
                net.Proto().op.extend([quantize])

            x_q_param = dnnlowp_utils.choose_quantization_params(
                X.min(), X.max(), preserve_activation_sparsity)  # noqa
            if do_quantize_weight:
                int8_given_tensor_fill, w_q_param = dnnlowp_utils.create_int8_given_tensor_fill(
                    W, "W_q", preserve_weight_sparsity)
                init_net.Proto().op.extend([int8_given_tensor_fill])

                # Bias
                int8_bias_tensor_fill = dnnlowp_utils.create_int8_bias_tensor_fill(
                    b, "b_q", x_q_param, w_q_param)
                init_net.Proto().op.extend([int8_bias_tensor_fill])

            if do_prepack_weight:
                inputs = ["W_q" if do_quantize_weight else "W"]
                if do_dequantize:
                    inputs += ["b_q" if do_quantize_weight else "b"]
                pack = core.CreateOperator(
                    "Int8ConvPackWeight",
                    inputs,
                    ["W_packed"],
                    group=group,
                    preserve_weight_sparsity=preserve_weight_sparsity,
                    in_scale=x_q_param.scale,
                    engine=engine,
                )
                init_net.Proto().op.extend([pack])

            conv = core.CreateOperator(
                op_type,
                [
                    "X_q" if do_quantize else "X",
                    "W_packed" if do_prepack_weight else
                    ("W_q" if do_quantize_weight else "W"),
                    "b_q" if do_quantize_weight else "b",
                ],
                ["Y_q" if do_dequantize else "Y"],
                stride=stride,
                kernel=kernel,
                dilation=dilation,
                pad=pad,
                order=order,
                shared_buffer=(1 if share_col_buffer else 0),
                preserve_activation_sparsity=preserve_activation_sparsity,
                preserve_weight_sparsity=preserve_weight_sparsity,
                engine=engine,
                group=group,
                device_option=gc,
            )
            if do_quantize_weight or do_prepack_weight:
                # When quantized weight is provided, we can't rescale the
                # output dynamically by looking at the range of output of each
                # batch, so here we provide the range of output observed from
                # fp32 reference implementation
                dnnlowp_utils.add_quantization_param_args(
                    conv, outputs[0][0], preserve_activation_sparsity)
            net.Proto().op.extend([conv])

            if do_dequantize:
                dequantize = core.CreateOperator("Dequantize", ["Y_q"], ["Y"],
                                                 engine=engine,
                                                 device_option=gc)
                net.Proto().op.extend([dequantize])

            self.ws.create_blob("X").feed(X, device_option=gc)
            self.ws.create_blob("W").feed(W, device_option=gc)
            self.ws.create_blob("b").feed(b, device_option=gc)
            self.ws.run(init_net)
            self.ws.run(net)
            Y = self.ws.blobs["Y"].fetch()
            outputs.append(
                Output(Y=Y, op_type=op_type, engine=engine, order=order))

        check_quantized_results_close(outputs,
                                      symmetric=preserve_activation_sparsity)
Exemple #19
0
    def test_rowwise_dnnlowp_fully_connected_int(
        self,
        input_channels,
        output_channels,
        batch_size,
        in_quantized,
        out_quantized,
        prepack_weight,
        gc,
        dc,
    ):
        print("@given M ", batch_size, " K ", input_channels, " N ",
              output_channels)
        print("@given in_quantized ", in_quantized, " out_quantized ",
              out_quantized)

        # X has scale 1, so exactly represented after quantization
        X_min = -77
        X_max = X_min + 255
        X = np.round(
            np.random.rand(batch_size, input_channels) * (X_max - X_min) +
            X_min)
        X = X.astype(np.float32)
        # input channels 0 and 1 are all X_min to avoid overflow from vpmaddubsw
        # when multiplied with W_min and W_max
        X[:, 0:2] = X_min
        X[0, 2] = X_max

        # Each row of W has scale 1 but with different offset, so row-wise
        # quantization shouldn't have any input quantization error.
        W = np.zeros((output_channels, input_channels))
        W = W.astype(np.float32)
        for i in range(output_channels):
            W_min = -100 + i
            W_max = W_min + 255
            W[i, :] = np.round(
                np.random.rand(input_channels) * (W_max - W_min) + W_min)
            W[i, 0] = W_min
            W[i, 1] = W_max

            # Make sure we won't have overflows from vpmaddubsw instruction used in
            # fbgemm
            avoid_vpmaddubsw_overflow_fc(
                batch_size,
                input_channels,
                1,
                X,
                X_min,
                X_max,
                W[i:i + 1, ],
                W_min,
                W_max,
            )

        b = np.random.randn(output_channels).astype(np.float32)

        Output = collections.namedtuple("Output", ["Y", "op_type", "engine"])
        outputs = []

        op_engine_list = [
            ("FC", ""),
            ("FC", "DNNLOWP_ROWWISE"),
            ("FC", "DNNLOWP_ROWWISE_16"),
            ("Int8FC", "DNNLOWP_ROWWISE"),
            ("Int8FCRowWise", "DNNLOWP"),
        ]

        for op_type, engine in op_engine_list:
            init_net = core.Net("test_init_net")
            net = core.Net("test_net")

            do_quantize = "DNNLOWP" in engine and in_quantized
            do_dequantize = "DNNLOWP" in engine and out_quantized
            do_prepack_weight = engine == "DNNLOWP_ROWWISE" and prepack_weight

            if do_quantize:
                quantize = core.CreateOperator("Quantize", ["X"], ["X_q"],
                                               engine=engine,
                                               device_option=gc)
                net.Proto().op.extend([quantize])

            x_q_param = dnnlowp_utils.choose_quantization_params(
                X.min(), X.max())

            if do_prepack_weight:
                inputs = ["W"]
                if do_dequantize:
                    inputs += ["b"]
                pack = core.CreateOperator(
                    "Int8FCPackWeight",
                    inputs,
                    ["W_packed"],
                    in_scale=x_q_param.scale,
                    engine=engine,
                )
                init_net.Proto().op.extend([pack])

            fc = core.CreateOperator(
                op_type,
                [
                    "X_q" if do_quantize else "X",
                    "W_packed" if do_prepack_weight else "W",
                    "b",
                ],
                ["Y_q" if do_dequantize else "Y"],
                dequantize_output=not do_dequantize,
                engine=engine,
                device_option=gc,
            )
            if do_prepack_weight:
                # When pre-packed quantized weight is provided, we can't rescale
                # the output dynamically by looking at the range of output of
                # each batch, so here we provide the range of output observed
                # from fp32 reference implementation
                dnnlowp_utils.add_quantization_param_args(fc, outputs[0][0])
            net.Proto().op.extend([fc])

            if do_dequantize:
                dequantize = core.CreateOperator("Dequantize", ["Y_q"], ["Y"],
                                                 engine=engine,
                                                 device_option=gc)
                net.Proto().op.extend([dequantize])

            self.ws.create_blob("X").feed(X, device_option=gc)
            self.ws.create_blob("W").feed(W, device_option=gc)
            self.ws.create_blob("b").feed(b, device_option=gc)
            self.ws.run(init_net)
            self.ws.run(net)
            outputs.append(
                Output(Y=self.ws.blobs["Y"].fetch(),
                       op_type=op_type,
                       engine=engine))

        check_quantized_results_close(outputs)
Exemple #20
0
    def _create_rnn(cls, init_model, pred_model, n, opset_version):
        assert init_model is not None, "cannot convert RNNs without access to the full model"
        assert pred_model is not None, "cannot convert RNNs without access to the full model"

        attrs = dict(n.attrs)  # make a copy, which is safe to mutate
        hidden_size = attrs.pop('hidden_size')
        activation = force_unicode(attrs.pop('activations', ('tanh', ))[0])
        direction = force_unicode(attrs.pop('direction', 'forward'))
        assert not attrs, "unsupported RNN attributes: " + str(attrs.keys())
        assert direction in ['forward',
                             'bidirectional'], "unsupported backwards RNN"

        input_blob, W, R, B, sequence_lens, initial_h = n.inputs

        if sequence_lens == "":
            sequence_lens = None

        input_size = cls._rnn_shape_inference(init_model, pred_model, n,
                                              input_blob, W)
        if input_size is None:
            raise RuntimeError(
                "best-effort shape inference for RNN input failed")

        init_net = core.Net("init-net")
        pred_mh = ModelHelper()

        def make_rnn(direction_offset):
            name = dummy_name()

            # input and recurrence biases are squashed together in
            # onnx but not in caffe2

            bias_offset = 2 * direction_offset * hidden_size
            init_net.Slice(B,
                           name + "/i2h_b",
                           starts=[bias_offset + 0 * hidden_size],
                           ends=[bias_offset + 1 * hidden_size])
            init_net.Slice(B,
                           name + "/gates_t_b",
                           starts=[bias_offset + 1 * hidden_size],
                           ends=[bias_offset + 2 * hidden_size])

            weight_offset = direction_offset * hidden_size
            init_net.Slice(W,
                           name + '/i2h_w',
                           starts=[weight_offset + 0 * hidden_size, 0],
                           ends=[weight_offset + 1 * hidden_size, -1])
            init_net.Slice(R,
                           name + '/gates_t_w',
                           starts=[weight_offset + 0 * hidden_size, 0],
                           ends=[weight_offset + 1 * hidden_size, -1])

            initial_h_sliced = name + '/initial_h'
            init_net.Slice(initial_h,
                           initial_h_sliced,
                           starts=[direction_offset + 0, 0, 0],
                           ends=[direction_offset + 1, -1, -1])

            if direction_offset == 1:
                input = pred_mh.net.ReversePackedSegs(
                    [input_blob, sequence_lens], name + "/input-reversed")
            else:
                input = input_blob

            hidden_t_all, hidden_t_last = rnn_cell.BasicRNN(
                pred_mh,
                input,
                sequence_lens, [initial_h_sliced],
                input_size,
                hidden_size,
                name,
                drop_states=False,
                forward_only=True,
                activation=activation)

            if direction_offset == 1:
                hidden_t_all = pred_mh.net.ReversePackedSegs(
                    [hidden_t_all, sequence_lens], name + "/output-reversed")

            return hidden_t_all, hidden_t_last

        if direction == 'forward':
            hidden_t_all, hidden_t_last = make_rnn(0)

            # in the forward case, storage is shared between the two
            # outputs. We need to decouple them so that the
            # VariableLengthSequencePadding only mutates n.outputs[0]
            pred_mh.net.Copy(hidden_t_last, n.outputs[1])

            pred_mh.net = pred_mh.net.Clone(
                "dummy-clone-net", blob_remap={hidden_t_all: n.outputs[0]})
        elif direction == 'bidirectional':
            hidden_t_all_f, hidden_t_last_f = make_rnn(0)
            hidden_t_all_b, hidden_t_last_b = make_rnn(1)
            pred_mh.net.Concat([hidden_t_all_f, hidden_t_all_b],
                               [n.outputs[0], dummy_name()],
                               axis=2)
            pred_mh.net.Concat([hidden_t_last_f, hidden_t_last_b],
                               [n.outputs[1], dummy_name()],
                               axis=0)

        if sequence_lens is not None:
            pred_mh.net.VariableLengthSequencePadding(
                [n.outputs[0], sequence_lens], [n.outputs[0]])

        return Caffe2Ops(list(pred_mh.Proto().op), list(init_net.Proto().op),
                         list(pred_mh.Proto().external_input))
    def test_dnnlowp_conv_acc16_int(
        self,
        stride,
        pad,
        kernel,
        dilation,
        size,
        group,
        input_channels_per_group,
        output_channels_per_group,
        batch_size,
        order,
        in_quantized,
        out_quantized,
        weight_quantized,
        share_col_buffer,
        preserve_activation_sparsity,
        preserve_weight_sparsity,
        gc,
        dc,
    ):
        assume(group == 1 or dilation == 1)
        assume(size >= dilation * (kernel - 1) + 1)

        input_channels = input_channels_per_group * group
        output_channels = output_channels_per_group * group

        # X and W have scale 1, so exactly represented after quantization
        # This was made sure by having at least one 0 and one 255 for unsigned
        # 8-bit tensors, and at least one -128 and one 127 for signed 8-bit
        # tensors.
        # Since fbgemm_acc16 accumulates to 16-bit, To avoid overflow, we use
        # small numbers except for those 0, 255, -128, and 127, for this test
        # We also make sure 255, -128, or 127 are not multiplied together by
        # putting them in different input channels and the corresponding input
        # channel in other matrix is 0.
        # For example, we put 255 in input channel 1 in X, so we make the
        # corresponding input channel in W all zeros.
        X_min = 0 if preserve_activation_sparsity else -77
        X_max = X_min + 255
        X = np.random.rand(batch_size, size, size, input_channels) * 4 + X_min
        X = np.round(X).astype(np.float32)
        X[..., 0] = X_min
        X[0, 0, 0, 1] = X_max

        if preserve_weight_sparsity:
            W_min = -128
            W_max = 100
        else:
            W_min = -100
            W_max = W_min + 255
        W = (np.random.rand(output_channels, kernel, kernel,
                            input_channels_per_group) * 4 - 2 + W_min + 128)
        W = np.round(W).astype(np.float32)
        W[0, 0, 0, 0] = W_min
        W[1, 0, 0, 0] = W_max
        W[..., 1] = W_min + 128  # "zeros"

        if order == "NCHW":
            X = nhwc2nchw(X)
            W = nhwc2nchw(W)

        # No input quantization error in bias
        b = np.round(np.random.randn(output_channels)).astype(np.float32)

        Output = collections.namedtuple("Output",
                                        ["Y", "op_type", "engine", "order"])
        outputs = []

        op_engine_list = [
            ("Conv", ""),
            ("Conv", "DNNLOWP_ACC16"),
            ("Int8Conv", "DNNLOWP_ACC16"),
        ]

        for op_type, engine in op_engine_list:
            net = core.Net("test_net")

            do_quantize = "DNNLOWP" in engine and in_quantized
            do_dequantize = "DNNLOWP" in engine and out_quantized
            do_quantize_weight = ("DNNLOWP" in engine and weight_quantized
                                  and len(outputs) > 0)

            if do_quantize:
                quantize = core.CreateOperator(
                    "Quantize",
                    ["X"],
                    ["X_q"],
                    preserve_activation_sparsity=preserve_activation_sparsity,
                    engine="DNNLOWP",
                    device_option=gc,
                )
                net.Proto().op.extend([quantize])

            if do_quantize_weight:
                int8_given_tensor_fill, w_q_param = dnnlowp_utils.create_int8_given_tensor_fill(
                    W, "W_q", preserve_weight_sparsity)
                net.Proto().op.extend([int8_given_tensor_fill])

                # Bias
                x_q_param = dnnlowp_utils.choose_quantization_params(
                    X.min(), X.max(), preserve_activation_sparsity)
                int8_bias_tensor_fill = dnnlowp_utils.create_int8_bias_tensor_fill(
                    b, "b_q", x_q_param, w_q_param)
                net.Proto().op.extend([int8_bias_tensor_fill])

            conv = core.CreateOperator(
                op_type,
                [
                    "X_q" if do_quantize else "X",
                    "W_q" if do_quantize_weight else "W",
                    "b_q" if do_quantize_weight else "b",
                ],
                ["Y_q" if do_dequantize else "Y"],
                stride=stride,
                kernel=kernel,
                dilation=dilation,
                pad=pad,
                order=order,
                dequantize_output=not do_dequantize,
                shared_buffer=(1 if share_col_buffer else 0),
                preserve_activation_sparsity=preserve_activation_sparsity,
                preserve_weight_sparsity=preserve_weight_sparsity,
                engine=engine,
                group=group,
                device_option=gc,
            )
            if do_dequantize or do_quantize_weight:
                # When quantized weight is provided, we can't rescale the
                # output dynamically by looking at the range of output of each
                # batch, so here we provide the range of output observed from
                # fp32 reference implementation
                dnnlowp_utils.add_quantization_param_args(
                    conv, outputs[0][0], preserve_activation_sparsity)
            net.Proto().op.extend([conv])

            if do_dequantize:
                dequantize = core.CreateOperator("Dequantize", ["Y_q"], ["Y"],
                                                 engine="DNNLOWP",
                                                 device_option=gc)
                net.Proto().op.extend([dequantize])

            self.ws.create_blob("X").feed(X, device_option=gc)
            self.ws.create_blob("W").feed(W, device_option=gc)
            self.ws.create_blob("b").feed(b, device_option=gc)
            self.ws.run(net)
            Y = self.ws.blobs["Y"].fetch()
            outputs.append(
                Output(Y=Y, op_type=op_type, engine=engine, order=order))

        check_quantized_results_close(outputs,
                                      symmetric=preserve_activation_sparsity)
Exemple #22
0
    def _create_gru(cls, init_model, pred_model, n, opset_version):
        assert init_model is not None, "cannot convert GRUs without access to the full model"
        assert pred_model is not None, "cannot convert GRUs without access to the full model"

        attrs = dict(n.attrs)  # make a copy, which is safe to mutate
        hidden_size = attrs.pop('hidden_size')
        linear_before_reset = attrs.pop('linear_before_reset', 0)
        direction = force_unicode(attrs.pop('direction', 'forward'))
        assert not attrs, "unsupported GRU attributes: " + str(attrs.keys())
        assert direction in ['forward',
                             'bidirectional'], "unsupported backwards GRU"

        input_blob, W, R, B, sequence_lens, initial_h = n.inputs

        if sequence_lens == "":
            sequence_lens = None

        input_size = cls._rnn_shape_inference(init_model, pred_model, n,
                                              input_blob, W)
        if input_size is None:
            raise RuntimeError(
                "best-effort shape inference for GRU input failed")

        init_net = core.Net("init-net")
        pred_mh = ModelHelper()

        def make_gru(direction_offset):
            name = dummy_name()

            # input and recurrence biases are squashed together in
            # onnx but not in caffe2

            bias_offset = 6 * direction_offset * hidden_size
            Bi = init_net.Slice(B,
                                name + "_bias_i2h",
                                starts=[bias_offset + 0 * hidden_size],
                                ends=[bias_offset + 3 * hidden_size])
            Br = init_net.Slice(B,
                                name + "_bias_gates",
                                starts=[bias_offset + 3 * hidden_size],
                                ends=[bias_offset + 6 * hidden_size])

            weight_offset = 3 * direction_offset * hidden_size
            W_ = init_net.Slice(W,
                                name + '/i2h_w_pre',
                                starts=[weight_offset + 0 * hidden_size, 0],
                                ends=[weight_offset + 3 * hidden_size, -1])
            R_ = init_net.Slice(R,
                                name + '/gates_t_w_pre',
                                starts=[weight_offset + 0 * hidden_size, 0],
                                ends=[weight_offset + 3 * hidden_size, -1])

            # caffe2 has a different order from onnx. We need to rearrange
            #  z r h  -> r z h
            reforms = ((W_, 'i2h_w', True, [(0, -1)]), (R_, 'gate_t_w', False,
                                                        [(0, -1)]),
                       (Bi, 'i2h_b', True, []), (Br, 'gate_t_b', False, []))
            for name_from, name_to, do_concat, extra_dims in reforms:
                xz, xr, xh = [
                    '%s/%s_%s' % (name, prefix, name_to)
                    for prefix in ('update', 'reset', 'output')
                ]
                for i, x in enumerate([xz, xr, xh]):
                    dim0 = i * hidden_size, (i + 1) * hidden_size
                    starts, ends = zip(dim0, *extra_dims)
                    init_net.Slice(name_from, x, starts=starts, ends=ends)
                if do_concat:
                    init_net.Concat([xr, xz, xh],
                                    ['%s/%s' % (name, name_to),
                                     dummy_name()],
                                    axis=0)

            initial_h_sliced = name + '/initial_h'
            init_net.Slice(initial_h,
                           initial_h_sliced,
                           starts=[direction_offset + 0, 0, 0],
                           ends=[direction_offset + 1, -1, -1])

            if direction_offset == 1:
                input = pred_mh.net.ReversePackedSegs(
                    [input_blob, sequence_lens], name + "/input-reversed")
            else:
                input = input_blob

            hidden_t_all, hidden_t_last = gru_cell.GRU(
                pred_mh,
                input,
                sequence_lens, [initial_h_sliced],
                input_size,
                hidden_size,
                name,
                drop_states=False,
                forward_only=True,
                linear_before_reset=linear_before_reset)

            if direction_offset == 1:
                hidden_t_all = pred_mh.net.ReversePackedSegs(
                    [hidden_t_all, sequence_lens], name + "/output-reversed")

            return hidden_t_all, hidden_t_last

        if direction == 'forward':
            hidden_t_all, hidden_t_last = make_gru(0)

            # in the forward case, storage is shared between the two
            # outputs. We need to decouple them so that the
            # VariableLengthSequencePadding only mutates n.outputs[0]
            pred_mh.net.Copy(hidden_t_last, n.outputs[1])

            pred_mh.net = pred_mh.net.Clone(
                "dummy-clone-net", blob_remap={hidden_t_all: n.outputs[0]})
        elif direction == 'bidirectional':
            hidden_t_all_f, hidden_t_last_f = make_gru(0)
            hidden_t_all_b, hidden_t_last_b = make_gru(1)
            pred_mh.net.Concat([hidden_t_all_f, hidden_t_all_b],
                               [n.outputs[0], dummy_name()],
                               axis=2)
            pred_mh.net.Concat([hidden_t_last_f, hidden_t_last_b],
                               [n.outputs[1], dummy_name()],
                               axis=0)

        if sequence_lens is not None:
            pred_mh.net.VariableLengthSequencePadding(
                [n.outputs[0], sequence_lens], [n.outputs[0]])

        return Caffe2Ops(list(pred_mh.Proto().op), list(init_net.Proto().op),
                         list(pred_mh.Proto().external_input))
Exemple #23
0
    [],  # GaussianFill does not need any parameters.
    ["Z"],
    shape=[100, 100],  # shape argument as a list of ints.
    mean=1.0,  # mean as a single float
    std=1.0,  # std as a single float
)
print("Content of op:\n")
print(str(op))

workspace.RunOperatorOnce(op)
temp = workspace.FetchBlob("Z")
pyplot.hist(temp.flatten(), bins=50)
pyplot.title("Distribution of Z")
pyplot.show()

net = core.Net("my_first_net")
X = net.GaussianFill([], ["X"], mean=0.0, std=1.0, shape=[2, 3], run_once=0)
print("New network proto:\n\n{}".format(net.Proto()))
print("Type of X is: {}".format(type(X)))
print("The blob name is: {}".format(str(X)))

W = net.GaussianFill([], ["W"], mean=0.0, std=1.0, shape=[5, 3], run_once=0)
b = net.ConstantFill([], ["b"], shape=[
    5,
], value=1.0, run_once=0)

Y = X.FC([W, b], ["Y"])

from caffe2.python import net_drawer
from IPython import display
graph = net_drawer.GetPydotGraph(net, rankdir="LR")
Exemple #24
0
    def preprocess_samples(
        self,
        states: List[Dict[int, float]],
        actions: List[Dict[int, float]],
        rewards: List[float],
        next_states: List[Dict[int, float]],
        next_actions: List[Dict[int, float]],
        is_terminals: List[bool],
        possible_next_actions: List[List[Dict[int, float]]],
        reward_timelines: List[Dict[int, float]],
        minibatch_size: int,
    ) -> List[TrainingDataPage]:
        # Shuffle
        merged = list(
            zip(states, actions, rewards, next_states, next_actions,
                is_terminals, possible_next_actions, reward_timelines))
        random.shuffle(merged)
        states, actions, rewards, next_states, next_actions, is_terminals, \
            possible_next_actions, reward_timelines = zip(*merged)

        net = core.Net('gridworld_preprocessing')
        C2.set_net(net)
        preprocessor = PreprocessorNet(net, True)
        saa = StackedAssociativeArray.from_dict_list(states, 'states')
        state_matrix, _ = preprocessor.normalize_sparse_matrix(
            saa.lengths,
            saa.keys,
            saa.values,
            self.normalization,
            'state_norm',
        )
        saa = StackedAssociativeArray.from_dict_list(next_states,
                                                     'next_states')
        next_state_matrix, _ = preprocessor.normalize_sparse_matrix(
            saa.lengths,
            saa.keys,
            saa.values,
            self.normalization,
            'next_state_norm',
        )
        saa = StackedAssociativeArray.from_dict_list(actions, 'action')
        action_matrix, _ = preprocessor.normalize_sparse_matrix(
            saa.lengths,
            saa.keys,
            saa.values,
            self.normalization_action,
            'action_norm',
        )
        saa = StackedAssociativeArray.from_dict_list(next_actions,
                                                     'next_action')
        next_action_matrix, _ = preprocessor.normalize_sparse_matrix(
            saa.lengths,
            saa.keys,
            saa.values,
            self.normalization_action,
            'next_action_norm',
        )
        rewards = np.array(rewards, dtype=np.float32).reshape(-1, 1)

        pnas_lengths_list = []
        pnas_flat = []
        for pnas in possible_next_actions:
            pnas_lengths_list.append(len(pnas))
            pnas_flat.extend(pnas)
        saa = StackedAssociativeArray.from_dict_list(pnas_flat,
                                                     'possible_next_actions')
        pnas_lengths = np.array(pnas_lengths_list, dtype=np.int32)
        possible_next_actions_matrix, _ = preprocessor.normalize_sparse_matrix(
            saa.lengths,
            saa.keys,
            saa.values,
            self.normalization_action,
            'possible_next_action_norm',
        )
        workspace.RunNetOnce(net)

        states_ndarray = workspace.FetchBlob(state_matrix)
        actions_ndarray = workspace.FetchBlob(action_matrix)
        next_states_ndarray = workspace.FetchBlob(next_state_matrix)
        next_actions_ndarray = workspace.FetchBlob(next_action_matrix)
        possible_next_actions_ndarray = workspace.FetchBlob(
            possible_next_actions_matrix)
        tdps = []
        pnas_start = 0
        for start in range(0, states_ndarray.shape[0], minibatch_size):
            end = start + minibatch_size
            if end > states_ndarray.shape[0]:
                break
            pnas_end = pnas_start + np.sum(pnas_lengths[start:end])
            pnas = possible_next_actions_ndarray[pnas_start:pnas_end]
            pnas_start = pnas_end
            tdps.append(
                TrainingDataPage(
                    states=states_ndarray[start:end],
                    actions=actions_ndarray[start:end],
                    rewards=rewards[start:end],
                    next_states=next_states_ndarray[start:end],
                    next_actions=next_actions_ndarray[start:end],
                    possible_next_actions=StackedArray(pnas_lengths[start:end],
                                                       pnas),
                    not_terminals=(pnas_lengths[start:end] > 0).reshape(-1, 1),
                    reward_timelines=reward_timelines[start:end]
                    if reward_timelines else None,
                ))
        return tdps
    def test_dnnlowp_max_pool(
        self,
        stride,
        pad,
        kernel,
        size,
        input_channels,
        batch_size,
        order,
        in_quantized,
        gc,
        dc,
    ):
        assume(kernel <= size)
        assume(pad < kernel)

        C = input_channels
        N = batch_size
        H = W = size

        min_ = -10
        max_ = 20
        if order == "NCHW":
            X = np.round(np.random.rand(N, C, H, W) * (max_ - min_) + min_)
        elif order == "NHWC":
            X = np.round(np.random.rand(N, H, W, C) * (max_ - min_) + min_)
        X = X.astype(np.float32)
        Output = collections.namedtuple("Output", ["Y", "op_type", "engine"])
        outputs = []

        op_engine_list = [
            ("MaxPool", ""),
            ("MaxPool", "DNNLOWP"),
            ("Int8MaxPool", "DNNLOWP"),
        ]

        for op_type, engine in op_engine_list:
            net = core.Net("test_net")

            do_quantize = "DNNLOWP" in engine and in_quantized

            if do_quantize:
                quantize = core.CreateOperator("Quantize", ["X"], ["X_q"],
                                               engine=engine,
                                               device_option=gc)
                net.Proto().op.extend([quantize])

            max_pool = core.CreateOperator(
                op_type,
                ["X_q" if do_quantize else "X"],
                ["Y_q" if engine == "DNNLOWP" else "Y"],
                stride=stride,
                kernel=kernel,
                pad=pad,
                order=order,
                engine=engine,
                device_option=gc,
            )
            net.Proto().op.extend([max_pool])

            if engine == "DNNLOWP":
                dequantize = core.CreateOperator("Dequantize", ["Y_q"], ["Y"],
                                                 engine=engine,
                                                 device_option=gc)
                net.Proto().op.extend([dequantize])

            self.ws.create_blob("X").feed(X, device_option=gc)
            self.ws.run(net)
            outputs.append(
                Output(Y=self.ws.blobs["Y"].fetch(),
                       op_type=op_type,
                       engine=engine))

        # Y_i = max(X_j) so the only error is in quantization of inputs
        check_quantized_results_close(outputs, ref=X)
Exemple #26
0
def recurrent_net(net,
                  cell_net,
                  inputs,
                  initial_cell_inputs,
                  links,
                  timestep=None,
                  scope=None,
                  outputs_with_grads=(0, )):
    '''
    net: the main net operator should be added to

    cell_net: cell_net which is executed in a recurrent fasion

    inputs: sequences to be fed into the recurrent net. Currently only one input
    is supported. It has to be in a format T x N x (D1...Dk) where T is lengths
    of the sequence. N is a batch size and (D1...Dk) are the rest of dimentions

    initial_cell_inputs: inputs of the cell_net for the 0 timestamp.
    Format for each input is:
        (cell_net_input_name, external_blob_with_data)

    links: a dictionary from cell_net input names in moment t+1 and
    output names of moment t. Currently we assume that each output becomes
    an input for the next timestep.

    timestep: name of the timestep blob to be used. If not provided "timestep"
    is used.

    scope: Internal blobs are going to be scoped in a format
    <scope_name>/<blob_name>
    If not provided we generate a scope name automatically

    outputs_with_grads : position indices of output blobs which will receive
    error gradient (from outside recurrent network) during backpropagation
    '''
    assert len(inputs) == 1, "Only one input blob is supported so far"

    input_blobs = [str(i[0]) for i in inputs]
    initial_input_blobs = [str(x[1]) for x in initial_cell_inputs]
    op_name = net.NextName('recurrent')

    def s(name):
        # We have to manually scope due to our internal/external blob
        # relationships.
        scope_name = op_name if scope is None else scope
        return "{}/{}".format(str(scope_name), str(name))

    # determine inputs that are considered to be references
    # it is those that are not referred to in inputs or initial_cell_inputs
    known_inputs = map(str, input_blobs + initial_input_blobs)
    known_inputs += [str(x[0]) for x in initial_cell_inputs]
    if timestep is not None:
        known_inputs.append(str(timestep))
    references = [
        b for b in cell_net.Proto().external_input if b not in known_inputs
    ]

    inner_outputs = list(cell_net.Proto().external_output)
    # These gradients are expected to be available during the backward pass
    inner_outputs_map = {o: o + '_grad' for o in inner_outputs}

    # compute the backward pass of the cell net
    backward_ops, backward_mapping = core.GradientRegistry.GetBackwardPass(
        cell_net.Proto().op, inner_outputs_map)
    backward_mapping = {str(k): str(v) for k, v in backward_mapping.items()}
    backward_cell_net = core.Net("RecurrentBackwardStep")

    del backward_cell_net.Proto().op[:]
    backward_cell_net.Proto().op.extend(backward_ops)

    # compute blobs used but not defined in the backward pass
    ssa, _ = core.get_ssa(backward_cell_net.Proto())
    undefined = core.get_undefined_blobs(ssa)

    # also add to the output list the intermediate outputs of fwd_step that
    # are used by backward.
    ssa, blob_versions = core.get_ssa(cell_net.Proto())
    scratches = [
        blob for (blob, ver) in blob_versions.items() if ver > 0
        and blob in undefined and blob not in cell_net.Proto().external_output
    ]
    backward_cell_net.Proto().external_input.extend(scratches)

    all_inputs = [i[1] for i in inputs] + [x[1] for x in initial_cell_inputs
                                           ] + references
    all_outputs = []

    cell_net.Proto().type = 'simple'
    backward_cell_net.Proto().type = 'simple'

    # Internal arguments used by RecurrentNetwork operator

    # Links are in the format blob_name, recurrent_states, offset.
    # In the moment t we know that corresponding data block is at
    # t + offset position in the recurrent_states tensor
    forward_links = []
    backward_links = []

    # Aliases are used to expose outputs to external world
    # Format (internal_blob, external_blob, offset)
    # Negative offset stands for going from the end,
    # positive - from the beginning
    aliases = []

    # States held inputs to the cell net
    recurrent_states = []

    for cell_input, _ in initial_cell_inputs:
        cell_input = str(cell_input)
        # Recurrent_states is going to be (T + 1) x ...
        # It stores all inputs and outputs of the cell net over time.
        # Or their gradients in the case of the backward pass.
        state = s(cell_input + "_states")
        states_grad = state + "_grad"
        cell_output = links[str(cell_input)]
        forward_links.append((cell_input, state, 0))
        forward_links.append((cell_output, state, 1))
        backward_links.append((cell_input + "_grad", states_grad, 0))
        backward_links.append((cell_output + "_grad", states_grad, 1))

        backward_cell_net.Proto().external_input.append(
            str(cell_output) + "_grad")
        aliases.append((state, cell_output + "_all", 1))
        aliases.append((state, cell_output + "_last", -1))
        all_outputs.extend([cell_output + "_all", cell_output + "_last"])

        recurrent_states.append(state)

    for input_t, input_blob in inputs:
        forward_links.append((str(input_t), str(input_blob), 0))
        backward_links.append(
            (backward_mapping[str(input_t)], str(input_blob) + "_grad", 0))
    backward_cell_net.Proto().external_input.extend(
        cell_net.Proto().external_input)
    backward_cell_net.Proto().external_input.extend(
        cell_net.Proto().external_output)

    def unpack_triple(x):
        if x:
            a, b, c = zip(*x)
            return a, b, c
        return [], [], []

    # Splitting to separate lists so we can pass them to c++
    # where we ensemle them back
    link_internal, link_external, link_offset = unpack_triple(forward_links)
    backward_link_internal, backward_link_external, backward_link_offset = \
        unpack_triple(backward_links)
    alias_src, alias_dst, alias_offset = unpack_triple(aliases)

    params = [x for x in references if x in backward_mapping.keys()]
    recurrent_inputs = [str(x[1]) for x in initial_cell_inputs]

    results = net.RecurrentNetwork(
        all_inputs,
        all_outputs + [s("step_workspaces")],
        param=map(all_inputs.index, params),
        alias_src=alias_src,
        alias_dst=map(str, alias_dst),
        alias_offset=alias_offset,
        recurrent_states=recurrent_states,
        initial_recurrent_state_ids=map(all_inputs.index, recurrent_inputs),
        link_internal=map(str, link_internal),
        link_external=map(str, link_external),
        link_offset=link_offset,
        backward_link_internal=map(str, backward_link_internal),
        backward_link_external=map(str, backward_link_external),
        backward_link_offset=backward_link_offset,
        step_net=str(cell_net.Proto()),
        backward_step_net=str(backward_cell_net.Proto()),
        timestep="timestep" if timestep is None else str(timestep),
        outputs_with_grads=outputs_with_grads,
    )
    # The last output is a list of step workspaces,
    # which is only needed internally for gradient propogation
    return results[:-1]
Exemple #27
0
    def __init__(self, model, input_record, output_names_or_num, function,
                 name='functional', output_dtypes=None, tags=None, **kwargs):

        # allow coercion
        input_record = schema.as_record(input_record)

        super(Functional, self).__init__(model, name, input_record, tags=tags, **kwargs)
        self._function = function
        self._kwargs = kwargs
        return_struct = (
            isinstance(output_names_or_num, list) or
            (isinstance(output_names_or_num, six.integer_types) and
             output_names_or_num != 1)
        )

        with scope.NameScope(self.name, reset=True):
            if isinstance(output_names_or_num, int):
                struct_output_schema = schema.NewRecord(
                    model.net, schema.RawTuple(output_names_or_num))
            elif isinstance(output_names_or_num, schema.Field):
                self.output_schema = output_names_or_num.clone(keep_blobs=True)
                return
            else:
                if not isinstance(output_names_or_num, list):
                    output_names_or_num = [output_names_or_num]
                out_tuple = [(out, np.void) for out in output_names_or_num]
                struct_output_schema = schema.NewRecord(
                    model.net, schema.Struct(*out_tuple))

        num_outputs = len(struct_output_schema.field_blobs())

        # functional layer returns Struct if more than one outputs or output is
        # a list, otherwise Scalar
        if return_struct:
            self.output_schema = struct_output_schema
        else:
            self.output_schema = struct_output_schema[0]

        # If output_dtypes is provided, use it for output schema. Otherwise
        # the shape and type will be inferred.
        if output_dtypes is not None:
            if not isinstance(output_dtypes, list):
                output_dtypes = [output_dtypes] * num_outputs
            assert len(output_dtypes) == num_outputs
            for dtype, scalar in zip(output_dtypes,
                                     self.output_schema.all_scalars()):
                scalar.set_type(dtype)
            return

        # Fake execution of the function to infer shapes and types automatically
        had_issues = False
        try:
            type_net = core.Net('_temp_type_and_shape_inference_net')
            schema.InitEmptyRecord(type_net, input_record, enforce_types=True)

            function(type_net, self.input_record, self.output_schema, **kwargs)
            (shapes, types) = workspace.InferShapesAndTypes([type_net], {})
            for i in range(num_outputs):
                scalar_schema = (self.output_schema[i] if return_struct
                                 else self.output_schema)
                blob = scalar_schema()
                if blob not in types or blob not in shapes:
                    had_issues = True
                    continue
                if shapes[blob] == []:
                    # Scalar type
                    shape = tuple()
                elif shapes[blob][0] == 0:
                    shape = tuple(shapes[blob][1:])
                else:
                    logger.warning("unexpected shape: {}".format(shapes[blob]))
                    # If batch dimension is not first - give up on shape
                    # inference for that blob
                    had_issues = True
                    continue

                # TODO(amalevich): Move it to some shared library
                dtype = None
                if types[blob] == caffe2_pb2.TensorProto.DOUBLE:
                    dtype = (np.float64, shape)
                elif types[blob] == caffe2_pb2.TensorProto.FLOAT:
                    dtype = (np.float32, shape)
                elif types[blob] == caffe2_pb2.TensorProto.INT32:
                    dtype = (np.int32, shape)
                elif types[blob] == caffe2_pb2.TensorProto.INT64:
                    dtype = (np.int64, shape)
                elif types[blob] == caffe2_pb2.TensorProto.FLOAT16:
                    dtype = (np.float16, shape)

                if dtype is not None:
                    scalar_schema.set_type(dtype)
        except TypeError as ex:
            had_issues = True
            logger.warning(str(ex))

        if had_issues:
            logger.warning(
                "Type inference had problems for layer: {}".format(self.name))
Exemple #28
0
    def test_prepare_normalization_and_normalize(self):
        feature_value_map = read_data()

        normalization_parameters = {}
        for name, values in feature_value_map.items():
            normalization_parameters[name] = normalization.identify_parameter(
                values, 10, feature_type=self._feature_type_override(name))
        for k, v in normalization_parameters.items():
            if id_to_type(k) == CONTINUOUS:
                self.assertEqual(v.feature_type, CONTINUOUS)
                self.assertIs(v.boxcox_lambda, None)
                self.assertIs(v.boxcox_shift, None)
            elif id_to_type(k) == BOXCOX:
                self.assertEqual(v.feature_type, BOXCOX)
                self.assertIsNot(v.boxcox_lambda, None)
                self.assertIsNot(v.boxcox_shift, None)
            else:
                assert v.feature_type == id_to_type(k)
        sorted_features, _ = sort_features_by_normalization(
            normalization_parameters)

        norm_net = core.Net("net")
        C2.set_net(norm_net)
        preprocessor = PreprocessorNet(False)
        input_matrix = np.zeros([10000, len(sorted_features)],
                                dtype=np.float32)
        for i, feature in enumerate(sorted_features):
            input_matrix[:, i] = feature_value_map[feature]
        input_matrix_blob = "input_matrix_blob"
        workspace.FeedBlob(input_matrix_blob, np.array([], dtype=np.float32))
        output_blob, _ = preprocessor.normalize_dense_matrix(
            input_matrix_blob, sorted_features, normalization_parameters, "",
            False)
        workspace.FeedBlob(input_matrix_blob, input_matrix)
        workspace.RunNetOnce(norm_net)
        normalized_feature_matrix = workspace.FetchBlob(output_blob)

        normalized_features = {}
        on_column = 0
        for feature in sorted_features:
            norm = normalization_parameters[feature]
            if norm.feature_type == ENUM:
                column_size = len(norm.possible_values)
            else:
                column_size = 1
            normalized_features[
                feature] = normalized_feature_matrix[:,
                                                     on_column:(on_column +
                                                                column_size)]
            on_column += column_size

        self.assertTrue(
            all([
                np.isfinite(parameter.stddev) and np.isfinite(parameter.mean)
                for parameter in normalization_parameters.values()
            ]))
        for k, v in six.iteritems(normalized_features):
            self.assertTrue(np.all(np.isfinite(v)))
            feature_type = normalization_parameters[k].feature_type
            if feature_type == identify_types.PROBABILITY:
                sigmoidv = special.expit(v)
                self.assertTrue(
                    np.all(
                        np.logical_and(np.greater(sigmoidv, 0),
                                       np.less(sigmoidv, 1))))
            elif feature_type == identify_types.ENUM:
                possible_values = normalization_parameters[k].possible_values
                self.assertEqual(v.shape[0], len(feature_value_map[k]))
                self.assertEqual(v.shape[1], len(possible_values))

                possible_value_map = {}
                for i, possible_value in enumerate(possible_values):
                    possible_value_map[possible_value] = i

                for i, row in enumerate(v):
                    original_feature = feature_value_map[k][i]
                    self.assertEqual(possible_value_map[original_feature],
                                     np.where(row == 1)[0][0])
            elif feature_type == identify_types.QUANTILE:
                for i, feature in enumerate(v[0]):
                    original_feature = feature_value_map[k][i]
                    expected = self._value_to_quantile(
                        original_feature,
                        normalization_parameters[k].quantiles)
                    self.assertAlmostEqual(feature, expected, 2)
            elif feature_type == identify_types.BINARY:
                pass
            elif (feature_type == identify_types.CONTINUOUS
                  or feature_type == identify_types.BOXCOX):
                one_stddev = np.isclose(np.std(v, ddof=1), 1, atol=0.01)
                zero_stddev = np.isclose(np.std(v, ddof=1), 0, atol=0.01)
                zero_mean = np.isclose(np.mean(v), 0, atol=0.01)
                self.assertTrue(
                    np.all(zero_mean),
                    "mean of feature {} is {}, not 0".format(k, np.mean(v)),
                )
                self.assertTrue(np.all(np.logical_or(one_stddev, zero_stddev)))
            elif feature_type == identify_types.CONTINUOUS_ACTION:
                less_than_max = v < 1
                more_than_min = v > -1
                self.assertTrue(
                    np.all(less_than_max),
                    "values are not less than 1: {}".format(
                        v[less_than_max == False]),
                )
                self.assertTrue(
                    np.all(more_than_min),
                    "values are not more than -1: {}".format(
                        v[more_than_min == False]),
                )
            else:
                raise NotImplementedError()
    def test_groupwise_dnnlowp_conv_relu_int(
        self,
        stride,
        pad,
        kernel,
        dilation,
        size,
        group,
        input_channels_per_group,
        output_channels_per_group,
        batch_size,
        order,
        gc,
        dc,
    ):
        if group > 1:
            dilation = 1

        X, W, b = generate_conv_inputs(
            stride,
            pad,
            kernel,
            dilation,
            size,
            group,
            input_channels_per_group,
            output_channels_per_group,
            batch_size,
            order,
            True,  # group-wise
        )

        Output = collections.namedtuple("Output",
                                        ["Y", "op_type", "engine", "order"])
        outputs = []

        op_engine_list = [
            ("Conv", ""),
            ("ConvRelu", "DNNLOWP"),
            ("ConvRelu", "DNNLOWP_16"),
            ("Int8ConvRelu", "DNNLOWP"),
        ]

        for op_type, engine in op_engine_list:
            net = core.Net("test_net")

            if "DNNLOWP" in engine:
                quantize = core.CreateOperator("Quantize", ["X"], ["X_q"],
                                               engine=engine,
                                               device_option=gc)
                net.Proto().op.extend([quantize])

                conv = core.CreateOperator(
                    op_type,
                    ["X_q", "W", "b"],
                    ["Y_q"],
                    stride=stride,
                    kernel=kernel,
                    dilation=dilation,
                    pad=pad,
                    order=order,
                    engine=engine,
                    group=group,
                    quantize_groupwise=1,
                    device_option=gc,
                )
                # groupwise quantization only works with static quantization
                # so we need to set quantization parameters
                dnnlowp_utils.add_quantization_param_args(conv, outputs[0][0])
                net.Proto().op.extend([conv])

                dequantize = core.CreateOperator("Dequantize", ["Y_q"], ["Y"],
                                                 engine=engine,
                                                 device_option=gc)
                net.Proto().op.extend([dequantize])
            else:
                conv = core.CreateOperator(
                    op_type,
                    ["X", "W", "b"],
                    ["Y"],
                    stride=stride,
                    kernel=kernel,
                    dilation=dilation,
                    pad=pad,
                    order=order,
                    engine=engine,
                    group=group,
                    device_option=gc,
                )
                net.Proto().op.extend([conv])

                relu = core.CreateOperator("Relu", ["Y"], ["Y"],
                                           engine=engine,
                                           device_option=gc)
                net.Proto().op.extend([relu])

            self.ws.create_blob("X").feed(X, device_option=gc)
            self.ws.create_blob("W").feed(W, device_option=gc)
            self.ws.create_blob("b").feed(b, device_option=gc)
            self.ws.run(net)
            Y = self.ws.blobs["Y"].fetch()
            outputs.append(
                Output(Y=Y, op_type=op_type, engine=engine, order=order))

        check_quantized_results_close(outputs)
Exemple #30
0
    def test_dataset_ops(self):
        """
        1. Defining the schema of our dataset.

        This example schema could represent, for example, a search query log.
        """
        schema = Struct(
            # fixed size vector, which will be stored as a matrix when batched
            ('dense', Scalar((np.float32, 3))),
            # could represent a feature map from feature ID to float value
            ('floats', Map(Scalar(np.int32), Scalar(np.float32))),
            # could represent a multi-valued categorical feature map
            ('int_lists', Map(
                Scalar(np.int32),
                List(Scalar(np.int64)),
            )),
            # could represent a multi-valued, weighted categorical feature map
            ('id_score_pairs',
             Map(
                 Scalar(np.int32),
                 Map(Scalar(np.int64),
                     Scalar(np.float32),
                     keys_name='ids',
                     values_name='scores'),
             )),
            # additional scalar information
            ('metadata',
             Struct(
                 ('user_id', Scalar(np.int64)),
                 ('user_embed', Scalar((np.float32, 2))),
                 ('query', Scalar(str)),
             )),
        )
        """
        This is what the flattened fields for this schema look like, along
        with its type. Each one of these fields will be stored, read and
        written as a tensor.
        """
        expected_fields = [
            ('dense', (np.float32, 3)),
            ('floats:lengths', np.int32),
            ('floats:values:keys', np.int32),
            ('floats:values:values', np.float32),
            ('int_lists:lengths', np.int32),
            ('int_lists:values:keys', np.int32),
            ('int_lists:values:values:lengths', np.int32),
            ('int_lists:values:values:values', np.int64),
            ('id_score_pairs:lengths', np.int32),
            ('id_score_pairs:values:keys', np.int32),
            ('id_score_pairs:values:values:lengths', np.int32),
            ('id_score_pairs:values:values:values:ids', np.int64),
            ('id_score_pairs:values:values:values:scores', np.float32),
            ('metadata:user_id', np.int64),
            ('metadata:user_embed', (np.float32, 2)),
            ('metadata:query', str),
        ]
        zipped = zip(expected_fields, schema.field_names(),
                     schema.field_types())
        for (ref_name, ref_type), name, dtype in zipped:
            self.assertEquals(ref_name, name)
            self.assertEquals(np.dtype(ref_type), dtype)
        """
        2. The contents of our dataset.

        Contents as defined below could represent, for example, a log of
        search queries along with dense, sparse features and metadata.
        The dataset below has 3 top-level entries.
        """
        contents_raw = [
            # dense
            [[1.1, 1.2, 1.3], [2.1, 2.2, 2.3], [3.1, 3.2, 3.3]],
            # floats
            [1, 2, 3],  # len
            [11, 21, 22, 31, 32, 33],  # key
            [1.1, 2.1, 2.2, 3.1, 3.2, 3.3],  # value
            # int lists
            [2, 0, 1],  # len
            [11, 12, 31],  # key
            [2, 4, 3],  # value:len
            [111, 112, 121, 122, 123, 124, 311, 312, 313],  # value:value
            # id score pairs
            [1, 2, 2],  # len
            [11, 21, 22, 31, 32],  # key
            [1, 1, 2, 2, 3],  # value:len
            [111, 211, 221, 222, 311, 312, 321, 322, 323],  # value:ids
            [11.1, 21.1, 22.1, 22.2, 31.1, 31.2, 32.1, 32.2,
             32.3],  # val:score
            # metadata
            [123, 234, 456],  # user_id
            [[0.2, 0.8], [0.5, 0.5], [0.7, 0.3]],  # user_embed
            ['dog posts', 'friends who like to', 'posts about ca'],  # query
        ]
        # convert the above content to ndarrays, checking against the schema
        contents = from_blob_list(schema, contents_raw)
        """
        3. Creating and appending to the dataset.
        We first create an empty dataset with the given schema.
        Then, a Writer is used to append these entries to the dataset.
        """
        ds = dataset.Dataset(schema)
        net = core.Net('init')
        with core.NameScope('init'):
            ds.init_empty(net)

            content_blobs = NewRecord(net, contents)
            FeedRecord(content_blobs, contents)
            writer = ds.writer(init_net=net)
            writer.write_record(net, content_blobs)
        workspace.RunNetOnce(net)
        """
        4. Iterating through the dataset contents.

        If we were to iterate through the top level entries of our dataset,
        this is what we should expect to see:
        """
        entries_raw = [
            (
                [[1.1, 1.2, 1.3]],  # dense
                [1],
                [11],
                [1.1],  # floats
                [2],
                [11, 12],
                [2, 4],
                [111, 112, 121, 122, 123, 124],  # intlst
                [1],
                [11],
                [1],
                [111],
                [11.1],  # id score pairs
                [123],
                [[0.2, 0.8]],
                ['dog posts'],  # metadata
            ),
            (
                [[2.1, 2.2, 2.3]],  # dense
                [2],
                [21, 22],
                [2.1, 2.2],  # floats
                [0],
                [],
                [],
                [],  # int list
                [2],
                [21, 22],
                [1, 2],
                [211, 221, 222],
                [21.1, 22.1, 22.2],
                [234],
                [[0.5, 0.5]],
                ['friends who like to'],  # metadata
            ),
            (
                [[3.1, 3.2, 3.3]],  # dense
                [3],
                [31, 32, 33],
                [3.1, 3.2, 3.3],  # floats
                [1],
                [31],
                [3],
                [311, 312, 313],  # int lst
                [2],
                [31, 32],
                [2, 3],
                [311, 312, 321, 322, 323],
                [31.1, 31.2, 32.1, 32.2, 32.3],  # id score list
                [456],
                [[0.7, 0.3]],
                ['posts about ca'],  # metadata
            ),
            # after the end of the dataset, we will keep getting empty vectors
            (
                [], ) * 16,
            ([], ) * 16,
        ]
        entries = [from_blob_list(schema, e) for e in entries_raw]
        """
        Let's go ahead and create the reading nets.
        We will run `read` net multiple times and assert that we are reading the
        entries the way we stated above.
        """
        read_init_net = core.Net('read_init')
        read_next_net = core.Net('read_next')
        reader = ds.reader(read_init_net)
        should_continue, batch = reader.read_record(read_next_net)

        workspace.RunNetOnce(read_init_net)
        workspace.CreateNet(read_next_net, True)

        for entry in entries:
            workspace.RunNet(str(read_next_net))
            actual = FetchRecord(batch)
            _assert_records_equal(actual, entry)
        """
        5. Reading/writing in a single plan

        If all of operations on the data are expressible as Caffe2 operators,
        we don't need to load the data to python, iterating through the dataset
        in a single Plan.

        Where we will process the dataset a little and store it in a second
        dataset. We can reuse the same Reader since it supports reset.
        """
        reset_net = core.Net('reset_net')
        reader.reset(reset_net)
        read_step, batch = reader.execution_step()
        """ We will add the line number * 1000 to the feature ids. """
        process_net = core.Net('process')
        line_no = Const(process_net, 0, dtype=np.int32)
        const_one = Const(process_net, 1000, dtype=np.int32)
        process_net.Add([line_no, const_one], [line_no])
        field = batch.floats.keys.get()
        process_net.Print(field, [])
        process_net.Add([field, line_no], field, broadcast=1, axis=0)
        """ Lets create a second dataset and append to it. """
        ds2 = dataset.Dataset(schema, name='dataset2')
        ds2.init_empty(reset_net)
        writer = ds2.writer(reset_net)
        writer.write_record(process_net, batch)
        # commit is not necessary for DatasetWriter but will add it for
        # generality of the example
        commit_net = core.Net('commit')
        writer.commit(commit_net)
        """ Time to create and run a plan which will do the processing """
        plan = core.Plan('process')
        plan.AddStep(core.execution_step('reset', reset_net))
        plan.AddStep(read_step.AddNet(process_net))
        plan.AddStep(core.execution_step('commit', commit_net))
        workspace.RunPlan(plan)
        """
        Now we should have dataset2 populated.
        """
        ds2_data = FetchRecord(ds2.content())
        field = ds2_data.floats.keys
        field.set(blob=field.get() - [1000, 2000, 2000, 3000, 3000, 3000])
        _assert_records_equal(contents, ds2_data)
        """
        6. Slicing a dataset

        You can create a new schema from pieces of another schema and reuse
        the same data.
        """
        subschema = Struct(('top_level', schema.int_lists.values))
        int_list_contents = contents.int_lists.values.field_names()
        self.assertEquals(len(subschema.field_names()), len(int_list_contents))
        """
        7. Random Access a dataset

        """
        read_init_net = core.Net('read_init')
        read_next_net = core.Net('read_next')

        idx = np.array([2, 1, 0])
        indices_blob = Const(read_init_net, idx, name='indices')
        reader = ds.random_reader(read_init_net, indices_blob)
        reader.computeoffset(read_init_net)

        should_stop, batch = reader.read_record(read_next_net)

        workspace.CreateNet(read_init_net, True)
        workspace.RunNetOnce(read_init_net)

        workspace.CreateNet(read_next_net, True)

        for i in range(len(entries)):
            k = idx[i] if i in idx else i
            entry = entries[k]
            workspace.RunNet(str(read_next_net))
            actual = FetchRecord(batch)
            _assert_records_equal(actual, entry)
        workspace.RunNet(str(read_next_net))
        self.assertEquals(True, workspace.FetchBlob(should_stop))
        """
        8. Random Access a dataset with loop_over = true

        """
        read_init_net = core.Net('read_init')
        read_next_net = core.Net('read_next')

        idx = np.array([2, 1, 0])
        indices_blob = Const(read_init_net, idx, name='indices')
        reader = ds.random_reader(read_init_net, indices_blob, loop_over=True)
        reader.computeoffset(read_init_net)

        should_stop, batch = reader.read_record(read_next_net)

        workspace.CreateNet(read_init_net, True)
        workspace.RunNetOnce(read_init_net)

        workspace.CreateNet(read_next_net, True)

        for _ in range(len(entries) * 3):
            workspace.RunNet(str(read_next_net))
            self.assertEquals(False, workspace.FetchBlob(should_stop))
        """
        9. Sort and shuffle a dataset

        This sort the dataset using the score of a certain column,
        and then shuffle within each chunk of size batch_size * shuffle_size
        before shuffling the chunks.

        """
        read_init_net = core.Net('read_init')
        read_next_net = core.Net('read_next')

        reader = ds.random_reader(read_init_net)
        reader.sort_and_shuffle(read_init_net, 'int_lists:lengths', 1, 2)
        reader.computeoffset(read_init_net)

        should_continue, batch = reader.read_record(read_next_net)

        workspace.CreateNet(read_init_net, True)
        workspace.RunNetOnce(read_init_net)

        workspace.CreateNet(read_next_net, True)

        expected_idx = np.array([2, 1, 0])
        for i in range(len(entries)):
            k = expected_idx[i] if i in expected_idx else i
            entry = entries[k]
            workspace.RunNet(str(read_next_net))
            actual = FetchRecord(batch)
            _assert_records_equal(actual, entry)
        """
        Trim a dataset
        """
        trim_net = core.Net('trim_ds')
        ds.trim(trim_net, multiple_of=2)
        workspace.RunNetOnce(trim_net)
        trimmed = FetchRecord(ds.content())
        EXPECTED_SIZES = [2, 2, 3, 3, 2, 2, 2, 6, 2, 3, 3, 4, 4, 2, 2, 2]
        actual_sizes = [d.shape[0] for d in trimmed.field_blobs()]
        self.assertEquals(EXPECTED_SIZES, actual_sizes)