Ejemplo n.º 1
0
def MLP(order, cudnn_ws, mkl):
    model = ModelHelper(name="benchmark")
    d = 256
    depth = 20
    width = 3
    for i in range(depth):
        for j in range(width):
            current = "fc_{}_{}".format(i, j) if i > 0 else "data"
            next_ = "fc_{}_{}".format(i + 1, j)
            brew.fc(model,
                    current,
                    next_,
                    dim_in=d,
                    dim_out=d,
                    weight_init=('XavierFill', {}),
                    bias_init=('XavierFill', {}))

    brew.sum(model, ["fc_{}_{}".format(depth, j) for j in range(width)],
             ["sum"])
    brew.fc(model,
            "sum",
            "last",
            dim_in=d,
            dim_out=1000,
            weight_init=('XavierFill', {}),
            bias_init=('XavierFill', {}))
    xent = model.LabelCrossEntropy(["last", "label"], "xent")
    if not mkl:
        model.AveragedLoss(xent, "loss")
    return model, d
Ejemplo n.º 2
0
    def test_release_blobs_when_used(self):
        m = model_helper.ModelHelper()
        fc1 = brew.fc(m, "data", "x", dim_in=2, dim_out=2)
        fc2 = brew.fc(m, fc1, "y", dim_in=2, dim_out=2)
        fc3 = brew.fc(m, fc1, "z", dim_in=2, dim_out=2)
        fc4 = brew.fc(m, fc2, "u", dim_in=2, dim_out=2)
        m.net.Alias(["u"], ["u_alias"])

        brew.sum(m, [fc3, fc4], "out")

        with_frees = memonger.release_blobs_when_used(m.net.Proto(),
                                                      set("data"))

        expect_frees = {"x", "y", "z"}  # out is external output
        # and u is aliased so cannot be freed
        found_frees = set()
        for op in with_frees.op:
            if op.type == "Free":
                self.assertFalse(op.input[0] in found_frees)  # no double frees
                found_frees.add(op.input[0])
            else:
                # Check a freed blob is not used anymore
                for inp in op.input:
                    self.assertFalse(inp in found_frees)
                for outp in op.output:
                    self.assertFalse(outp in found_frees)

        self.assertEqual(expect_frees, found_frees)
Ejemplo n.º 3
0
    def test_verify_graph_inequality(self, input_dim, output_dim, batch_size):
        m = model_helper.ModelHelper()
        m.Proto().type = "dag"
        m.Proto().num_workers = 4
        with core.NameScope("name_x"):
            fc1 = brew.fc(m, "data", "x", dim_in=input_dim, dim_out=output_dim)
            fc2 = brew.fc(m, fc1, "y", dim_in=output_dim, dim_out=output_dim)
            fc3 = brew.fc(m, fc1, "z", dim_in=output_dim, dim_out=output_dim)
            brew.sum(m, [fc2, fc3], "out")

        m2 = model_helper.ModelHelper()
        m2.Proto().type = "dag"
        m2.Proto().num_workers = 4
        with core.NameScope("name_x"):
            fc1 = brew.fc(m2,
                          "data",
                          "x",
                          dim_in=input_dim,
                          dim_out=output_dim)
            fc2 = brew.fc(m2, fc1, "y", dim_in=output_dim, dim_out=output_dim)
            fc3 = brew.fc(m2, fc1, "y", dim_in=output_dim, dim_out=output_dim)
            brew.sum(m2, [fc2, fc3], "out")

        self.assertFalse(
            memonger.verify_graph_equality(m.net.Proto(), m2.net.Proto()))
Ejemplo n.º 4
0
    def test_release_blobs_when_used(self):
        m = model_helper.ModelHelper()
        fc1 = brew.fc(m, "data", "x", dim_in=2, dim_out=2)
        fc2 = brew.fc(m, fc1, "y", dim_in=2, dim_out=2)
        fc3 = brew.fc(m, fc1, "z", dim_in=2, dim_out=2)
        fc4 = brew.fc(m, fc2, "u", dim_in=2, dim_out=2)
        m.net.Alias(["u"], ["u_alias"])

        brew.sum(m, [fc3, fc4], "out")

        with_frees = memonger.release_blobs_when_used(m.net.Proto(), set("data"))

        expect_frees = {"x", "y", "z"}  # out is external output
                                        # and u is aliased so cannot be freed
        found_frees = set()
        for op in with_frees.op:
            if op.type == "Free":
                self.assertFalse(op.input[0] in found_frees)  # no double frees
                found_frees.add(op.input[0])
            else:
                # Check a freed blob is not used anymore
                for inp in op.input:
                    self.assertFalse(inp in found_frees)
                for outp in op.output:
                    self.assertFalse(outp in found_frees)

        self.assertEqual(expect_frees, found_frees)
Ejemplo n.º 5
0
def MLP(order, cudnn_ws, device):
    model = ModelHelper(name="benchmark")
    d = 256
    depth = 20
    width = 3
    for i in range(depth):
        for j in range(width):
            current = "fc_{}_{}".format(i, j) if i > 0 else "data"
            next_ = "fc_{}_{}".format(i + 1, j)
            brew.fc(
                model,
                current, next_,
                dim_in=d, dim_out=d,
                weight_init=('XavierFill', {}),
                bias_init=('XavierFill', {}))

    brew.sum(model, ["fc_{}_{}".format(depth, j) for j in range(width)], ["sum"])
    brew.fc(model, "sum", "last",
             dim_in=d, dim_out=1000,
             weight_init=('XavierFill', {}),
             bias_init=('XavierFill', {}))
    xent = model.LabelCrossEntropy(["last", "label"], "xent")
    if device != 'MKL':
        model.AveragedLoss(xent, "loss")
    return model, d
Ejemplo n.º 6
0
def MLP(order, gpu_engine_ws):
    model = model_helper.ModelHelper(name="MLP")
    d = 256
    depth = 20
    width = 3
    for i in range(depth):
        for j in range(width):
            current = "fc_{}_{}".format(i, j) if i > 0 else "data"
            next_ = "fc_{}_{}".format(i + 1, j)
            brew.fc(
                model,
                current,
                next_,
                dim_in=d,
                dim_out=d,
                weight_init=('XavierFill', {}),
                bias_init=('XavierFill', {}),
            )
    brew.sum(model, ["fc_{}_{}".format(depth, j) for j in range(width)],
             ["sum"])
    brew.fc(
        model,
        "sum",
        "last",
        dim_in=d,
        dim_out=1000,
        weight_init=('XavierFill', {}),
        bias_init=('XavierFill', {}),
    )
    xent = model.net.LabelCrossEntropy(["last", "label"], "xent")
    model.net.AveragedLoss(xent, "loss")
    return model, d
Ejemplo n.º 7
0
    def test_fast_memonger_unique_outputs(self):
        m = model_helper.ModelHelper()
        fc = []
        for i in range(2):
            z = brew.fc(m,
                        "data{}".format(i),
                        "fc".format(i),
                        dim_in=2,
                        dim_out=2)
            fc.append(z)
        r = []
        # Trick is here to have same input appear twice in a same Sum
        for x in fc:
            for y in fc:
                r.append(brew.sum(m, [x, y], 1))
        concated = brew.concat(m, r, "concated")
        brew.relu(m, concated, "merged")

        static_blobs = \
            [o for op in m.param_init_net.Proto().op for o in op.output] + \
            ["merged"] + ["data{}".format(i) for i in range(len(fc))]

        optimized_net = memonger.optimize_inference_fast(
            m.Proto(), static_blobs)
        for op in optimized_net.op:
            self.assertEqual(len(op.output), len(set(op.output)), str(op))
Ejemplo n.º 8
0
 def cross_gated_global_pool(
     self,
     blob_in,
     dim_in,
     prefix='',
     ratio=8,
     reduced_dim=2,
     res_gate=False,
 ):
     gp_blob = self.model.ReduceBackMean(blob_in,
                                         prefix + '_g_pool',
                                         num_reduce_dim=reduced_dim)
     fc1 = self.model.FC(gp_blob, prefix + '_fc1', dim_in, dim_in // ratio)
     fc1_relu = self.model.Relu(fc1, prefix + '_fc1_relu')
     fc2 = self.model.FC(fc1_relu, prefix + '_fc2', dim_in // ratio, dim_in)
     sig = self.model.Sigmoid(fc2, fc2 + '_sig')
     shortcut_blob = self.prev_blob
     self.prev_blob = self.model.Mul([self.prev_blob, sig],
                                     [prefix + 'g_pool_out'],
                                     broadcast=1,
                                     axis=0)
     if res_gate:
         self.prev_blob = brew.sum(self.model,
                                   [shortcut_blob, self.prev_blob],
                                   prefix + 'res_pool_out')
     return self.prev_blob
Ejemplo n.º 9
0
    def add_simple_block(
        self,
        input_filters,
        num_filters,
        down_sampling=False,
        spatial_batch_norm=True
    ):
        self.comp_idx = 0
        shortcut_blob = self.prev_blob

        # 3x3
        self.add_conv(
            input_filters,
            num_filters,
            kernel=3,
            stride=(1 if down_sampling is False else 2),
            pad=1
        )

        if spatial_batch_norm:
            self.add_spatial_bn(num_filters)
        self.add_relu()

        last_conv = self.add_conv(num_filters, num_filters, kernel=3, pad=1)
        if spatial_batch_norm:
            last_conv = self.add_spatial_bn(num_filters)

        # Increase of dimensions, need a projection for the shortcut
        if (num_filters != input_filters):
            shortcut_blob = brew.conv(
                self.model,
                shortcut_blob,
                'shortcut_projection_%d' % self.comp_count,
                input_filters,
                num_filters,
                weight_init=("MSRAFill", {}),
                kernel=1,
                stride=(1 if down_sampling is False else 2),
                no_bias=self.no_bias,
            )
            if spatial_batch_norm:
                shortcut_blob = brew.spatial_bn(
                    self.model,
                    shortcut_blob,
                    'shortcut_projection_%d_spatbn' % self.comp_count,
                    num_filters,
                    epsilon=1e-3,
                    is_test=self.is_test,
                )

        self.prev_blob = brew.sum(
            self.model, [shortcut_blob, last_conv],
            'comp_%d_sum_%d' % (self.comp_count, self.comp_idx)
        )
        self.comp_idx += 1
        self.add_relu()

        # Keep track of number of high level components if this ResNetBuilder
        self.comp_count += 1
Ejemplo n.º 10
0
    def add_simple_block(
        self,
        input_filters,
        num_filters,
        down_sampling=False,
        spatial_batch_norm=True
    ):
        self.comp_idx = 0
        shortcut_blob = self.prev_blob

        # 3x3
        self.add_conv(
            input_filters,
            num_filters,
            kernel=3,
            stride=(1 if down_sampling is False else 2),
            pad=1
        )

        if spatial_batch_norm:
            self.add_spatial_bn(num_filters)
        self.add_relu()

        last_conv = self.add_conv(num_filters, num_filters, kernel=3, pad=1)
        if spatial_batch_norm:
            last_conv = self.add_spatial_bn(num_filters)

        # Increase of dimensions, need a projection for the shortcut
        if (num_filters != input_filters):
            shortcut_blob = brew.conv(
                self.model,
                shortcut_blob,
                'shortcut_projection_%d' % self.comp_count,
                input_filters,
                num_filters,
                weight_init=("MSRAFill", {}),
                kernel=1,
                stride=(1 if down_sampling is False else 2),
                no_bias=self.no_bias,
            )
            if spatial_batch_norm:
                shortcut_blob = brew.spatial_bn(
                    self.model,
                    shortcut_blob,
                    'shortcut_projection_%d_spatbn' % self.comp_count,
                    num_filters,
                    epsilon=1e-3,
                    is_test=self.is_test,
                )

        self.prev_blob = brew.sum(
            self.model, [shortcut_blob, last_conv],
            'comp_%d_sum_%d' % (self.comp_count, self.comp_idx)
        )
        self.comp_idx += 1
        self.add_relu()

        # Keep track of number of high level components if this ResNetBuilder
        self.comp_count += 1
Ejemplo n.º 11
0
    def test_forward_optim_tree_dag_traversal(self):
        input_dim = 4
        output_dim = 4
        batch_size = 4

        m = model_helper.ModelHelper()
        m.Proto().type = "dag"
        m.Proto().num_workers = 4

        with core.NameScope("name_x"):
            fc1 = brew.fc(m,
                          "data",
                          "fc1",
                          dim_in=input_dim,
                          dim_out=output_dim)
            fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)

            fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
            fc4 = brew.fc(m, fc3, "fc4", dim_in=output_dim, dim_out=output_dim)
            fc5 = brew.fc(m, fc4, "fc5", dim_in=output_dim, dim_out=output_dim)

            # Branch
            fc3b = brew.fc(m,
                           fc2,
                           "fc3b",
                           dim_in=output_dim,
                           dim_out=output_dim)
            fc4b = brew.fc(m,
                           fc3b,
                           "fc4b",
                           dim_in=output_dim,
                           dim_out=output_dim)
            fc5b = brew.fc(m,
                           fc4b,
                           "fc5b",
                           dim_in=output_dim,
                           dim_out=output_dim)

            fc5sum = brew.sum(m, [fc5, fc5b], "fc5sum")

            fc5.Relu([], fc5sum) \
               .Softmax([], "pred1") \
               .LabelCrossEntropy(["label"], ["xent1"]) \
               .AveragedLoss([], "loss1")
            fc6 = brew.fc(m, fc5, "fc6", dim_in=output_dim, dim_out=output_dim)
            fc6.Relu([], fc6) \
               .Softmax([], "pred2") \
               .LabelCrossEntropy(["label"], ["xent2"]) \
               .AveragedLoss([], "loss2")

        blobs_before = count_blobs(m.net.Proto())
        # adding name_x/fc5_w as heads (which belongs to non-root op)
        # to make sure that dag traversal always starts from root ops
        optim_proto = memonger.optimize_inference_for_dag(
            m.net, ["name_x/fc5_w", "name_x/data"], "name_x")
        blobs_after = count_blobs(optim_proto)
        self.assertLess(blobs_after, blobs_before)
 def add_residual_block(self, in_blob, out_dim):
     self.conv_prelu(in_blob, out_dim)
     self.prev_dim = out_dim
     self.conv_prelu(self.prev_blob, out_dim)
     self.prev_dim = out_dim
     self.prev_blob = brew.sum(
         self.model, [self.prev_blob, in_blob],
         "res{}_{}".format(self.comp_idx, self.comp_count - 1))
     return self.prev_blob
Ejemplo n.º 13
0
    def test_verify_graph_inequality(self, input_dim, output_dim, batch_size):
        m = model_helper.ModelHelper()
        m.Proto().type = "dag"
        m.Proto().num_workers = 4
        with core.NameScope("name_x"):
            fc1 = brew.fc(m, "data", "x", dim_in=input_dim, dim_out=output_dim)
            fc2 = brew.fc(m, fc1, "y", dim_in=output_dim, dim_out=output_dim)
            fc3 = brew.fc(m, fc1, "z", dim_in=output_dim, dim_out=output_dim)
            brew.sum(m, [fc2, fc3], "out")

        m2 = model_helper.ModelHelper()
        m2.Proto().type = "dag"
        m2.Proto().num_workers = 4
        with core.NameScope("name_x"):
            fc1 = brew.fc(m2, "data", "x", dim_in=input_dim, dim_out=output_dim)
            fc2 = brew.fc(m2, fc1, "y", dim_in=output_dim, dim_out=output_dim)
            fc3 = brew.fc(m2, fc1, "y", dim_in=output_dim, dim_out=output_dim)
            brew.sum(m2, [fc2, fc3], "out")

        self.assertFalse(memonger.verify_graph_equality(m.net.Proto(), m2.net.Proto()))
Ejemplo n.º 14
0
    def test_forward_optim_tree_daggy(self, input_dim, output_dim, batch_size):
        m = model_helper.ModelHelper()
        m.Proto().type = "dag"
        m.Proto().num_workers = 4

        with core.NameScope("name_x"):
            fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
            fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)

            fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
            fc4 = brew.fc(m, fc3, "fc4", dim_in=output_dim, dim_out=output_dim)
            fc5 = brew.fc(m, fc4, "fc5", dim_in=output_dim, dim_out=output_dim)

            # Branch
            fc3b = brew.fc(m, fc2, "fc3b", dim_in=output_dim, dim_out=output_dim)
            fc4b = brew.fc(m, fc3b, "fc4b", dim_in=output_dim, dim_out=output_dim)
            fc5b = brew.fc(m, fc4b, "fc5b", dim_in=output_dim, dim_out=output_dim)

            fc5sum = brew.sum(m, [fc5, fc5b], "fc5sum")

            fc5.Relu([], fc5sum) \
               .Softmax([], "pred1") \
               .LabelCrossEntropy(["label"], ["xent1"]) \
               .AveragedLoss([], "loss1")
            fc6 = brew.fc(m, fc5, "fc6", dim_in=output_dim, dim_out=output_dim)
            fc6.Relu([], fc6) \
               .Softmax([], "pred2") \
               .LabelCrossEntropy(["label"], ["xent2"]) \
               .AveragedLoss([], "loss2")

        blobs_before = count_blobs(m.net.Proto())
        optim_proto = memonger.optimize_inference_for_dag(
            m.net, ["name_x/data"], "name_x"
        )
        blobs_after = count_blobs(optim_proto)
        self.assertLess(blobs_after, blobs_before)

        # Test networks produce exactly same results
        data = np.random.randn(batch_size, input_dim).astype(np.float32)
        label = np.random.randint(
            low=0, high=output_dim, size=(batch_size,)).astype(np.int32)
        workspace.RunNetOnce(m.param_init_net)
        workspace.FeedBlob("name_x/data", data)
        workspace.FeedBlob("name_x/label", label)
        workspace.RunNetOnce(m.net)
        loss1 = workspace.FetchBlob("name_x/loss1")
        loss2 = workspace.FetchBlob("name_x/loss2")
        workspace.RunNetOnce(optim_proto)
        optimized_loss1 = workspace.FetchBlob("name_x/loss1")
        optimized_loss2 = workspace.FetchBlob("name_x/loss2")
        np.testing.assert_almost_equal(loss1, optimized_loss1)
        np.testing.assert_almost_equal(loss2, optimized_loss2)
Ejemplo n.º 15
0
    def test_forward_optim_tree_daggy(self, input_dim, output_dim, batch_size):
        m = model_helper.ModelHelper()
        m.Proto().type = "dag"
        m.Proto().num_workers = 4

        with core.NameScope("name_x"):
            fc1 = brew.fc(m, "data", "fc1", dim_in=input_dim, dim_out=output_dim)
            fc2 = brew.fc(m, fc1, "fc2", dim_in=output_dim, dim_out=output_dim)

            fc3 = brew.fc(m, fc2, "fc3", dim_in=output_dim, dim_out=output_dim)
            fc4 = brew.fc(m, fc3, "fc4", dim_in=output_dim, dim_out=output_dim)
            fc5 = brew.fc(m, fc4, "fc5", dim_in=output_dim, dim_out=output_dim)

            # Branch
            fc3b = brew.fc(m, fc2, "fc3b", dim_in=output_dim, dim_out=output_dim)
            fc4b = brew.fc(m, fc3b, "fc4b", dim_in=output_dim, dim_out=output_dim)
            fc5b = brew.fc(m, fc4b, "fc5b", dim_in=output_dim, dim_out=output_dim)

            fc5sum = brew.sum(m, [fc5, fc5b], "fc5sum")

            fc5.Relu([], fc5sum) \
               .Softmax([], "pred1") \
               .LabelCrossEntropy(["label"], ["xent1"]) \
               .AveragedLoss([], "loss1")
            fc6 = brew.fc(m, fc5, "fc6", dim_in=output_dim, dim_out=output_dim)
            fc6.Relu([], fc6) \
               .Softmax([], "pred2") \
               .LabelCrossEntropy(["label"], ["xent2"]) \
               .AveragedLoss([], "loss2")

        blobs_before = count_blobs(m.net.Proto())
        optim_proto = memonger.optimize_inference_for_dag(
            m.net, ["name_x/data"], "name_x"
        )
        blobs_after = count_blobs(optim_proto)
        self.assertLess(blobs_after, blobs_before)

        # Test networks produce exactly same results
        data = np.random.randn(batch_size, input_dim).astype(np.float32)
        label = np.random.randint(
            low=0, high=output_dim, size=(batch_size,)).astype(np.int32)
        workspace.RunNetOnce(m.param_init_net)
        workspace.FeedBlob("name_x/data", data)
        workspace.FeedBlob("name_x/label", label)
        workspace.RunNetOnce(m.net)
        loss1 = workspace.FetchBlob("name_x/loss1")
        loss2 = workspace.FetchBlob("name_x/loss2")
        workspace.RunNetOnce(optim_proto)
        optimized_loss1 = workspace.FetchBlob("name_x/loss1")
        optimized_loss2 = workspace.FetchBlob("name_x/loss2")
        np.testing.assert_almost_equal(loss1, optimized_loss1)
        np.testing.assert_almost_equal(loss2, optimized_loss2)
 def block_head(self, model, v, towers, num_in_channels, num_out_channels,
                scale=1.0, relu=True, name='block_head_node'):
     tower_mixed = brew.concat(model, towers, blob_out=name+'_tower_mixed')
     tower_out = self.conv_factory(model, tower_mixed, num_in_channels,
                                   num_filters=num_out_channels,
                                   kernel=1, relu=relu, name=name+'tower_out')
     #v = v + scale * tower_out
     scaled = model.Scale(tower_out, name + '_scale', scale=scale)
     v = brew.sum(model, [v, scaled], name+'_sum')
     #
     if relu is True:
         v = brew.relu(model, v, name + '_relu')
     return v
Ejemplo n.º 17
0
    def add_simple_block(self,
                         input_filters,
                         num_filters,
                         down_sampling=False,
                         spatial_batch_norm=True):
        self.comp_idx = 0
        shortcut_blob = self.prev_blob

        # 3x3
        self.add_conv(input_filters,
                      num_filters,
                      kernel=3,
                      stride=(1 if down_sampling is False else 2),
                      pad=1)

        if spatial_batch_norm:
            self.add_spatial_bn(num_filters)
        self.add_relu()

        last_conv = self.add_conv(num_filters, num_filters, kernel=3, pad=1)
        if spatial_batch_norm:
            last_conv = self.add_spatial_bn(num_filters)

        if (num_filters != input_filters):
            shortcut_blob = brew.conv(
                self.model,
                shortcut_blob,
                'shortcut_projection_%d' % self.comp_count,
                input_filters,
                num_filters,
                weight_init=("MSRAFill", {}),
                kernel=1,
                stride=(1 if down_sampling is False else 2),
                no_bias=self.no_bias,
            )
            if spatial_batch_norm:
                shortcut_blob = brew.spatial_bn(
                    self.model,
                    shortcut_blob,
                    'shortcut_projection_%d_spatbn' % self.comp_count,
                    num_filters,
                    epsilon=1e-3,
                    is_test=self.is_test,
                )

        self.prev_blob = brew.sum(
            self.model, [shortcut_blob, last_conv],
            'comp_%d_sum_%d' % (self.comp_count, self.comp_idx))
        self.comp_idx += 1
        self.add_relu()
        self.comp_count += 1
Ejemplo n.º 18
0
    def add_simple_block(
        self,
        input_filters,
        num_filters,
        down_sampling=False,
        spatial_batch_norm=True,
    ):
        self.comp_idx = 0
        shortcut_blob = self.prev_blob

        if spatial_batch_norm:
            self.add_spatial_bn(input_filters)
        pre_relu = self.add_relu()

        self.add_conv(
            input_filters,
            num_filters,
            kernel=3,
            stride=(1 if down_sampling is False else 2),
            pad=1,
        )

        if spatial_batch_norm:
            self.add_spatial_bn(num_filters)
        self.add_relu()

        last_conv = self.add_conv(num_filters, num_filters, kernel=3, pad=1)

        # Increase of dimensions, need a projection for the shortcut
        if (num_filters != input_filters):
            shortcut_blob = brew.conv(
                self.model,
                pre_relu,
                'shortcut_projection_%d' % self.comp_count,
                input_filters,
                num_filters,
                weight_init=("MSRAFill", {}),
                kernel=1,
                stride=(1 if down_sampling is False else 2),
                no_bias=self.no_bias,
            )

        self.prev_blob = brew.sum(
            self.model,
            [shortcut_blob, last_conv],
            'comp_%d_sum_%d' % (self.comp_count, self.comp_idx),
        )
        self.comp_idx += 1
        self.comp_count += 1
Ejemplo n.º 19
0
    def test_fast_memonger_unique_outputs(self):
        m = model_helper.ModelHelper()
        fc = []
        for i in range(2):
            z = brew.fc(
                m, "data{}".format(i), "fc".format(i), dim_in=2, dim_out=2)
            fc.append(z)
        r = []
        # Trick is here to have same input appear twice in a same Sum
        for x in fc:
            for y in fc:
                r.append(brew.sum(m, [x, y], 1))
        concated = brew.concat(m, r, "concated")
        brew.relu(m, concated, "merged")

        static_blobs = \
            [o for op in m.param_init_net.Proto().op for o in op.output] + \
            ["merged"] + ["data{}".format(i) for i in range(len(fc))]

        optimized_net = memonger.optimize_inference_fast(
            m.Proto(), static_blobs)
        for op in optimized_net.op:
            self.assertEqual(len(op.output), len(set(op.output)), str(op))
Ejemplo n.º 20
0
 def fused_gated_global_pool(
     self,
     blob_in,
     dim_in,
     prefix='',
     ratio=8,
     down_rate=2,
     res_gate=False,
 ):
     fc1 = self.model.FC(blob_in, prefix + '_fc1', dim_in, dim_in // ratio)
     fc1_relu = self.model.Relu(fc1, prefix + '_fc1_relu')
     fc2 = self.model.FC(fc1_relu, prefix + '_fc2', dim_in // ratio,
                         dim_in // down_rate)
     sig = self.model.Sigmoid(fc2, fc2 + '_sig')
     shortcut_blob = self.prev_blob
     self.prev_blob = self.model.Mul([self.prev_blob, sig],
                                     [prefix + 'g_pool_out'],
                                     broadcast=1,
                                     axis=0)
     if res_gate:
         self.prev_blob = brew.sum(self.model,
                                   [shortcut_blob, self.prev_blob],
                                   prefix + 'res_pool_out')
     return self.prev_blob
Ejemplo n.º 21
0
    def add_bottleneck(
        self,
        input_filters,  # num of feature maps from preceding layer
        base_filters,  # num of filters internally in the component
        output_filters,  # num of feature maps to output
        stride=1,
        group=1,
        spatial_batch_norm=True,
    ):
        self.comp_idx = 0
        shortcut_blob = self.prev_blob

        # 1x1
        self.add_conv(
            input_filters,
            base_filters,
            kernel=1,
            stride=1,
        )

        if spatial_batch_norm:
            self.add_spatial_bn(base_filters)

        self.add_relu()

        # 3x3 (note the pad, required for keeping dimensions)
        self.add_conv(
            base_filters,
            base_filters,
            kernel=3,
            stride=stride,
            group=group,
            pad=1,
        )

        if spatial_batch_norm:
            self.add_spatial_bn(base_filters)
        self.add_relu()

        # 1x1
        last_conv = self.add_conv(base_filters, output_filters, kernel=1)
        if spatial_batch_norm:
            last_conv = self.add_spatial_bn(output_filters)

        # Summation with input signal (shortcut)
        # When the number of feature maps mismatch between the input
        # and output (this usually happens when the residual stage
        # changes), we need to do a projection for the short cut
        if output_filters != input_filters:
            shortcut_blob = brew.conv(
                self.model,
                shortcut_blob,
                'shortcut_projection_%d' % self.comp_count,
                input_filters,
                output_filters,
                weight_init=("MSRAFill", {}),
                kernel=1,
                stride=stride,
                no_bias=self.no_bias,
            )
            if spatial_batch_norm:
                shortcut_blob = brew.spatial_bn(
                    self.model,
                    shortcut_blob,
                    'shortcut_projection_%d_spatbn' % self.comp_count,
                    output_filters,
                    epsilon=self.bn_epsilon,
                    momentum=self.bn_momentum,
                    is_test=self.is_test,
                )

        self.prev_blob = brew.sum(
            self.model, [shortcut_blob, last_conv],
            'comp_%d_sum_%d' % (self.comp_count, self.comp_idx))
        self.comp_idx += 1
        self.add_relu()

        # Keep track of number of high level components if this ResNetBuilder
        self.comp_count += 1

        return output_filters
Ejemplo n.º 22
0
    def add_bottleneck(
        self,
        input_filters,  # num of feature maps from preceding layer
        base_filters,  # num of filters internally in the component
        output_filters,  # num of feature maps to output
        down_sampling=False,
        spatial_batch_norm=True,
    ):
        self.comp_idx = 0
        shortcut_blob = self.prev_blob

        # 1x1
        self.add_conv(input_filters, base_filters, kernel=1, stride=1)

        if spatial_batch_norm:
            self.add_spatial_bn(base_filters)

        self.add_relu()

        # 3x3 (note the pad, required for keeping dimensions)
        self.add_conv(base_filters,
                      base_filters,
                      kernel=3,
                      stride=(1 if down_sampling is False else 2),
                      pad=1)

        if spatial_batch_norm:
            self.add_spatial_bn(base_filters)
        self.add_relu()

        # 1x1
        last_conv = self.add_conv(base_filters, output_filters, kernel=1)
        if spatial_batch_norm:
            last_conv = self.add_spatial_bn(output_filters)

        # Summation with input signal (shortcut)
        # If we need to increase dimensions (feature maps), need to
        # do a projection for the short cut
        if (output_filters > input_filters):
            shortcut_blob = brew.conv(
                self.model,
                shortcut_blob,
                'shortcut_projection_%d' % self.comp_count,
                input_filters,
                output_filters,
                weight_init=("MSRAFill", {}),
                kernel=1,
                stride=(1 if down_sampling is False else 2),
                no_bias=self.no_bias,
            )
            if spatial_batch_norm:
                shortcut_blob = brew.spatial_bn(
                    self.model,
                    shortcut_blob,
                    'shortcut_projection_%d_spatbn' % self.comp_count,
                    output_filters,
                    epsilon=1e-3,
                    momentum=self.spatial_bn_mom,
                    is_test=self.is_test,
                )

        self.prev_blob = brew.sum(
            self.model, [shortcut_blob, last_conv],
            'comp_%d_sum_%d' % (self.comp_count, self.comp_idx))
        self.comp_idx += 1
        self.add_relu()

        # Keep track of number of high level components if this ResNetBuilder
        self.comp_count += 1
Ejemplo n.º 23
0
def create_caffe2_model(model,
                        input_shape,
                        use_cudnn=True,
                        init_params=False,
                        keras_channel_last=True):

    arg_scope = {'order': 'NCHW', 'use_cudnn': use_cudnn}
    caffe2_model = model_helper.ModelHelper(name='model',
                                            init_params=init_params,
                                            arg_scope=arg_scope)

    num_conv_layers = 0

    layer_num = 0
    layer_sizes = {}
    prev_layer_name = ''

    for layer in model.layers:

        inb_node = layer._inbound_nodes[0]
        num_input_layers = len(inb_node.inbound_layers)

        input_name_list = []

        for ii in range(0, num_input_layers):
            inp_layer = inb_node.inbound_layers[ii]

            input_name_list.append(inp_layer.name)
            prev_layer_name = inp_layer.name

            if isinstance(inp_layer, keras.layers.Flatten):
                pass
                #pinb_node = inp_layer._inbound_nodes[0]
                #prev_layer_name = pinb_node.inbound_layers[0].name

        name = layer.name

        config = layer.get_config()
        inputShape = layer.input_shape
        outputShape = layer.output_shape

        if isinstance(layer, keras.engine.input_layer.InputLayer):
            input_sizes = (input_shape[2], input_shape[3])
            layer_sizes[name] = input_sizes
        else:
            if (input_name_list[0] not in layer_sizes):
                raise ValueError("Can't find layer size for ",
                                 input_name_list[0])
            else:
                input_sizes = layer_sizes[input_name_list[0]]

        layer_dim = len(outputShape)
        if (layer_dim == 4):
            if (keras_channel_last):
                out_sizes = (outputShape[1], outputShape[2])
            else:
                out_sizes = (outputShape[2], outputShape[3])
        elif (layer_dim == 2):
            out_sizes = (0, 0)  #flattened
        else:
            raise ValueError(
                'Unsupported layer dimension : {0}'.format(layer_dim))

        if isinstance(layer, keras.layers.Flatten):
            tmp_prev = prev_layer_name

            if (keras_channel_last):
                tmp_prev = prev_layer_name + '_transpose'  #nb, img_h, img_w, chan <-- nb, chan, img_h, img_w
                c2_layer = brew.transpose(caffe2_model,
                                          prev_layer_name,
                                          tmp_prev,
                                          axes=(0, 2, 3, 1))

            c2_layer = caffe2_model.net.Flatten(tmp_prev, name)

            #print('FLatten previous layer ', prev_layer_name, ' current layer ', name , 'inputshape ', inputShape)

            layer_sizes[name] = out_sizes

        elif isinstance(layer, keras.layers.Dropout):
            #print('name is ', name, ' prev_layer_name ', prev_layer_name)
            c2_layer = caffe2_model.net.Dropout(prev_layer_name,
                                                name,
                                                is_test=True
                                                #ratio=config['rate']
                                                )

            #same size
            layer_sizes[name] = input_sizes

        elif (isinstance(layer, keras.layers.convolutional.Conv2D)):

            dim_in = inputShape[-1]
            dim_out = outputShape[-1]
            kernel = config['kernel_size'][0]
            stride = config['strides'][0]

            if (config['padding'] == 'same'):
                pad_sizes = get_padding_sizes(input_sizes,
                                              config['kernel_size'],
                                              config['strides'])
            elif (config['padding'] == 'valid'):
                pad_sizes = ((0, 0), (0, 0))
            else:
                raise ValueError('unsupported padding')

            #print('pad sizes ', pad_sizes)

            layer_sizes[name] = out_sizes

            c2_layer = brew.conv(caffe2_model,
                                 prev_layer_name,
                                 name,
                                 dim_in=dim_in,
                                 dim_out=dim_out,
                                 kernel=kernel,
                                 stride=stride,
                                 pad_l=pad_sizes[0][0],
                                 pad_r=pad_sizes[0][1],
                                 pad_t=pad_sizes[1][0],
                                 pad_b=pad_sizes[1][1])

            if config['activation'] == 'linear':
                pass
            elif config['activation'] == 'relu':
                c2_layer = brew.relu(caffe2_model, name, name)
            elif config['activation'] == 'softmax':
                #c2_layer = brew.softmax(caffe2_model, name, name)
                c2_layer = brew.softmax(caffe2_model, name, 'softmax')
            else:
                raise ValueError(
                    'The only supported activation for conv layer is relu')

        elif isinstance(layer, keras.layers.MaxPooling2D):
            kernel = config['pool_size'][0]
            stride = config['strides'][0]

            pad_size = ((0, 0), (0, 0))
            layer_sizes[name] = out_sizes

            c2_layer = brew.max_pool(caffe2_model,
                                     prev_layer_name,
                                     name,
                                     kernel=kernel,
                                     stride=stride)

        elif isinstance(layer, keras.layers.AveragePooling2D):
            kernel = config['pool_size'][0]
            stride = config['strides'][0]

            pad_size = ((0, 0), (0, 0))
            layer_sizes[name] = out_sizes

            c2_layer = brew.average_pool(caffe2_model,
                                         prev_layer_name,
                                         name,
                                         kernel=kernel,
                                         stride=stride)

        elif isinstance(layer, keras.layers.BatchNormalization):

            dim_in = inputShape[-1]
            epsilon = config['epsilon']
            momentum = config['momentum']
            c2_layer = brew.spatial_bn(caffe2_model,
                                       prev_layer_name,
                                       name,
                                       dim_in=dim_in,
                                       epsilon=epsilon,
                                       momentum=momentum,
                                       is_test=True)

            #same size
            layer_sizes[name] = input_sizes

        elif (isinstance(layer, keras.layers.core.Dense)):

            dim_in = inputShape[-1]
            dim_out = outputShape[-1]

            #print('input shape for dense is ', inputShape)
            if (len(inputShape) == 2):  #flattened input
                c2_layer = brew.fc(caffe2_model,
                                   prev_layer_name,
                                   name,
                                   dim_in=dim_in,
                                   dim_out=dim_out)
            else:  #fully convolutional input
                c2_layer = brew.conv(caffe2_model,
                                     prev_layer_name,
                                     name,
                                     dim_in=dim_in,
                                     dim_out=dim_out,
                                     kernel=1,
                                     stride=1)

            activation = config['activation']
            if activation == 'relu':
                c2_layer = brew.relu(caffe2_model, name, name)
            elif activation == 'softmax':
                c2_layer = brew.softmax(caffe2_model, name, 'softmax')
            elif activation == 'linear':
                pass  #
            else:
                raise ValueError(
                    'The only supported activations for fc layer are relu and softmax'
                )

            #same size
            layer_sizes[name] = input_sizes

        elif (isinstance(layer, keras.layers.advanced_activations.LeakyReLU)):

            dim_in = inputShape[-1]

            c2_layer = caffe2_model.net.LeakyRelu(prev_layer_name,
                                                  name,
                                                  alpha=config['alpha'])

            #same size
            layer_sizes[name] = input_sizes

        elif (isinstance(layer, keras.layers.merge.Add)):

            c2_layer = brew.sum(caffe2_model,
                                [input_name_list[0], input_name_list[1]], name)

            #same size
            layer_sizes[name] = input_sizes

    layer_num = layer_num + 1
    if (layer_num == len(model.layers)):
        caffe2_model.net.AddExternalOutput(c2_layer)

    return caffe2_model
Ejemplo n.º 24
0
 def Sum(self, *args, **kwargs):
     return brew.sum(self, *args, **kwargs)
Ejemplo n.º 25
0
def res_block_1_conv_topk(
    model,
    inputs,
    dim_in,
    dim_out,
    topk=5,
    no_bias=False,
    is_test=False,
    module_seq=None,
    sub_seq=None,
):
    # branch of 1 (projection)
    branch1_conv = meta_conv_conv_topk(
        model,
        inputs,
        dim_in,
        dim_out,
        kernel=1,
        pad=0,
        stride=2,
        topk=topk,
        no_bias=no_bias,
        is_test=is_test,
        has_relu=False,
        module_seq=module_seq,
        sub_seq=sub_seq,
        branch_seq='1',
        conv_seq='',
    )

    # branch of 2 (normal)
    branch2_conv1 = meta_conv_conv_topk(
        model,
        inputs,
        dim_in,
        dim_out,
        kernel=3,
        pad=1,
        stride=2,
        topk=topk,
        no_bias=no_bias,
        is_test=is_test,
        has_relu=True,
        module_seq=module_seq,
        sub_seq=sub_seq,
        branch_seq='2',
        conv_seq='0',
    )

    branch2_conv2 = meta_conv_conv_topk(
        model,
        branch2_conv1,
        dim_out,
        dim_out,
        kernel=3,
        pad=1,
        stride=1,
        topk=topk,
        no_bias=no_bias,
        is_test=is_test,
        has_relu=False,
        module_seq=module_seq,
        sub_seq=sub_seq,
        branch_seq='2',
        conv_seq='1',
    )

    # sum
    branch_sum = brew.sum(
        model,
        [branch2_conv2, branch1_conv],
        # branch2_conv2 # in-place
        ["group{}_conv{}_sum".format(module_seq, sub_seq)]
    )
    branch_relu = brew.relu(
        model,
        branch_sum,
        branch_sum
    )
    return branch_relu
Ejemplo n.º 26
0
    def create_gru_unit(self, emb_ls, user_emb_ids, model, tag, seq_q, hid_q):
        (tag_layer, tag_in, tag_out) = tag

        emb_ls_str = []
        for user_emb_id in user_emb_ids:
            emb_ls_str.append(emb_ls[user_emb_id])

        tag_cat = tag_layer + ":::_rnn_inputs"
        tag_cat_info = tag_cat + "_info"
        rnn_inputs, info = model.net.Concat(emb_ls_str,
                                            [tag_cat, tag_cat_info])
        rnn_shape = model.net.Reshape(
            rnn_inputs, [tag_layer + ":::rnn_shape", "old_shape"],
            shape=(len(user_emb_ids), -1, self.input_size))

        gates_t_w_data = np.random.randn(
            self.args.hidden_size, self.args.hidden_size).astype(np.float32)
        gates_t_b_data = np.random.randn(self.args.hidden_size).astype(
            np.float32)
        i2h_w_data = np.random.randn(self.args.hidden_size,
                                     self.input_size).astype(np.float32)
        i2h_b_data = np.random.randn(self.args.hidden_size).astype(np.float32)

        workspace.FeedBlob('rnn_0/gates_t_w', gates_t_w_data)
        workspace.FeedBlob('rnn_0/gates_t_b', gates_t_b_data)
        workspace.FeedBlob('rnn_0/i2h_w', i2h_w_data)
        workspace.FeedBlob('rnn_0/i2h_b', i2h_b_data)

        if seq_q:
            model.net.DequeueBlobs(seq_q, "seq_lengths")
        if hid_q:
            model.net.DequeueBlobs(hid_q, "initial_h")

        rnn_0_out, _ = rnn_cell.BasicRNN(model,
                                         tag_layer + ":::rnn_shape",
                                         'seq_lengths', ['initial_h'],
                                         self.input_size,
                                         self.args.hidden_size,
                                         "rnn_0",
                                         activation="tanh",
                                         forward_only=True)

        output = brew.fc(self.model,
                         rnn_0_out,
                         None,
                         dim_in=self.args.hidden_size,
                         dim_out=self.args.hidden_size,
                         axis=2,
                         engine=self.args.engine,
                         max_num_tasks=self.args.fc_workers)

        output = brew.softmax(self.model, output, axis=2)
        output = brew.sum(self.model, rnn_0_out, output, axis=2)

        # TODO: Need to make input_h_data an input to the overall model due to
        # batch-size
        gates_t_w_data = np.random.randn(
            self.args.hidden_size, self.args.hidden_size).astype(np.float32)
        gates_t_b_data = np.random.randn(self.args.hidden_size).astype(
            np.float32)
        i2h_w_data = np.random.randn(self.args.hidden_size,
                                     self.args.hidden_size).astype(np.float32)
        i2h_b_data = np.random.randn(self.args.hidden_size).astype(np.float32)

        workspace.FeedBlob('rnn_1/gates_t_w', gates_t_w_data)
        workspace.FeedBlob('rnn_1/gates_t_b', gates_t_b_data)
        workspace.FeedBlob('rnn_1/i2h_w', i2h_w_data)
        workspace.FeedBlob('rnn_1/i2h_b', i2h_b_data)

        rnn_1_all_out, rnn_1_out = rnn_cell.BasicRNN(model,
                                                     output,
                                                     'seq_lengths',
                                                     ['initial_h'],
                                                     self.args.hidden_size,
                                                     self.args.hidden_size,
                                                     "rnn_1",
                                                     activation="tanh",
                                                     forward_only=True)

        return rnn_1_out
Ejemplo n.º 27
0
 def Sum(self, *args, **kwargs):
     return brew.sum(self, *args, **kwargs)
Ejemplo n.º 28
0
def res_block_2_conv_topk(
    model,
    inputs,
    dim_in,
    dim_out,
    topk=5,
    no_bias=False,
    is_test=False,
    module_seq=None,
    sub_seq=None,
):
    # input & output channel check
    assert(dim_in == dim_out)

    # branch of 2 (normal)
    branch2_conv1 = meta_conv_conv_topk(
        model,
        inputs,
        dim_in,
        dim_out,
        kernel=3,
        pad=1,
        stride=1,
        topk=topk,
        no_bias=no_bias,
        is_test=is_test,
        has_relu=True,
        module_seq=module_seq,
        sub_seq=sub_seq,
        branch_seq='2',
        conv_seq='0',
    )

    branch2_conv2 = meta_conv_conv_topk(
        model,
        branch2_conv1,
        dim_in,
        dim_out,
        kernel=3,
        pad=1,
        stride=1,
        topk=topk,
        no_bias=no_bias,
        is_test=is_test,
        has_relu=False,
        module_seq=module_seq,
        sub_seq=sub_seq,
        branch_seq='2',
        conv_seq='1',
    )

    # sum
    branch_sum = brew.sum(
        model,
        [branch2_conv2, inputs],
        # branch2_conv2 # in-place
        ["group{}_conv{}_sum".format(module_seq, sub_seq)]
    )
    branch_relu = brew.relu(
        model,
        branch_sum,
        branch_sum
    )
    return branch_relu
Ejemplo n.º 29
0
    def add_bottleneck(
        self,
        input_filters,
        base_filters,
        output_filters,
        down_sampling=False,
        spatial_batch_norm=True,
    ):
        self.comp_idx = 0
        shortcut_blob = self.prev_blob

        # 1x1
        self.add_conv(input_filters, base_filters, kernel=1, stride=1)

        if spatial_batch_norm:
            self.add_spatial_bn(base_filters)

        self.add_relu()

        # 3x3 (note the pad, required for keeping dimensions)
        self.add_conv(base_filters,
                      base_filters,
                      kernel=3,
                      stride=(1 if down_sampling is False else 2),
                      pad=1)

        if spatial_batch_norm:
            self.add_spatial_bn(base_filters)
        self.add_relu()

        # 1x1
        last_conv = self.add_conv(base_filters, output_filters, kernel=1)
        if spatial_batch_norm:
            last_conv = self.add_spatial_bn(output_filters)

        if (output_filters > input_filters):
            shortcut_blob = brew.conv(
                self.model,
                shortcut_blob,
                'shortcut_projection_%d' % self.comp_count,
                input_filters,
                output_filters,
                weight_init=("MSRAFill", {}),
                kernel=1,
                stride=(1 if down_sampling is False else 2),
                no_bias=self.no_bias,
            )
            if spatial_batch_norm:
                shortcut_blob = brew.spatial_bn(
                    self.model,
                    shortcut_blob,
                    'shortcut_projection_%d_spatbn' % self.comp_count,
                    output_filters,
                    epsilon=1e-3,
                    momentum=self.spatial_bn_mom,
                    is_test=self.is_test,
                )

        self.prev_blob = brew.sum(
            self.model, [shortcut_blob, last_conv],
            'comp_%d_sum_%d' % (self.comp_count, self.comp_idx))
        self.comp_idx += 1
        self.add_relu()
        self.comp_count += 1
Ejemplo n.º 30
0
    def add_bottleneck(
        self,
        input_filters,   # num of feature maps from preceding layer
        base_filters,    # num of filters internally in the component
        output_filters,  # num of feature maps to output
        down_sampling=False,
        spatial_batch_norm=True,
    ):
        self.comp_idx = 0
        shortcut_blob = self.prev_blob

        # 1x1
        self.add_conv(
            input_filters,
            base_filters,
            kernel=1,
            stride=1
        )

        if spatial_batch_norm:
            self.add_spatial_bn(base_filters)

        self.add_relu()

        # 3x3 (note the pad, required for keeping dimensions)
        self.add_conv(
            base_filters,
            base_filters,
            kernel=3,
            stride=(1 if down_sampling is False else 2),
            pad=1
        )

        if spatial_batch_norm:
            self.add_spatial_bn(base_filters)
        self.add_relu()

        # 1x1
        last_conv = self.add_conv(base_filters, output_filters, kernel=1)
        if spatial_batch_norm:
            last_conv = self.add_spatial_bn(output_filters)

        # Summation with input signal (shortcut)
        # If we need to increase dimensions (feature maps), need to
        # do a projection for the short cut
        if (output_filters > input_filters):
            shortcut_blob = brew.conv(
                self.model,
                shortcut_blob,
                'shortcut_projection_%d' % self.comp_count,
                input_filters,
                output_filters,
                weight_init=("MSRAFill", {}),
                kernel=1,
                stride=(1 if down_sampling is False else 2),
                no_bias=self.no_bias,
            )
            if spatial_batch_norm:
                shortcut_blob = brew.spatial_bn(
                    self.model,
                    shortcut_blob,
                    'shortcut_projection_%d_spatbn' % self.comp_count,
                    output_filters,
                    epsilon=1e-3,
                    momentum=self.spatial_bn_mom,
                    is_test=self.is_test,
                )

        self.prev_blob = brew.sum(
            self.model, [shortcut_blob, last_conv],
            'comp_%d_sum_%d' % (self.comp_count, self.comp_idx)
        )
        self.comp_idx += 1
        self.add_relu()

        # Keep track of number of high level components if this ResNetBuilder
        self.comp_count += 1
Ejemplo n.º 31
0
 def residual_unit(self,
                   model,
                   v,
                   num_in_channels,
                   num_filter,
                   stride,
                   dim_match,
                   name,
                   bottle_neck=True,
                   bn_mom=0.9,
                   is_inference=False):
     """Return ResNet Unit symbol for building ResNet
     Parameters
     ----------
     data : str
         Input data
     num_filter : int
         Number of output channels
     bnf : int
         Bottle neck channels factor with regard to num_filter
     stride : tuple
         Stride used in convolution
     dim_match : Boolean
         True means channel number between input and output is the same, otherwise means differ
     name : str
         Base name of the operators
     workspace : int
         Workspace used in convolution operator
     """
     if bottle_neck:
         # Branch 1
         if dim_match:
             shortcut = v
         else:
             shortcut = brew.conv(model,
                                  v,
                                  name + '_sc',
                                  num_in_channels,
                                  num_filter,
                                  kernel=1,
                                  stride=stride,
                                  no_bias=True)
             shortcut = brew.spatial_bn(model,
                                        shortcut,
                                        name + '_sc_bn',
                                        num_filter,
                                        eps=2e-5,
                                        momentum=bn_mom,
                                        is_test=is_inference)
         # Branch 2
         interim_filters = int(num_filter * 0.25)
         #     Block 1
         conv1 = brew.conv(model,
                           v,
                           name + '_conv1',
                           num_in_channels,
                           interim_filters,
                           kernel=1,
                           pad=0,
                           stride=1,
                           no_bias=True)
         bn1 = brew.spatial_bn(model,
                               conv1,
                               name + '_bn1',
                               interim_filters,
                               eps=2e-5,
                               momentum=bn_mom,
                               is_test=is_inference)
         act1 = brew.relu(model, bn1, name + '_relu1')
         #     Block 2
         conv2 = brew.conv(model,
                           act1,
                           name + '_conv2',
                           interim_filters,
                           int(num_filter * 0.25),
                           kernel=3,
                           pad=1,
                           stride=stride,
                           no_bias=True)
         bn2 = brew.spatial_bn(model,
                               conv2,
                               name + '_bn2',
                               interim_filters,
                               eps=2e-5,
                               momentum=bn_mom,
                               is_test=is_inference)
         act2 = brew.relu(model, bn2, name + '_relu2')
         #     Block 3
         conv3 = brew.conv(model,
                           act2,
                           name + '_conv3',
                           interim_filters,
                           num_filter,
                           kernel=1,
                           pad=0,
                           stride=1,
                           no_bias=True)
         bn3 = brew.spatial_bn(model,
                               conv3,
                               name + '_bn3',
                               num_filter,
                               eps=2e-5,
                               momentum=bn_mom,
                               is_test=is_inference)
         # Element-wise summation and ReLU
         output = brew.sum(model, [shortcut, bn3], name + '_sum')
         output = brew.relu(model, output, name + '_relu')
         return output
     else:
         # Branch 1
         if dim_match:
             shortcut = v
         else:
             shortcut = brew.conv(model,
                                  v,
                                  name + '_sc_conv',
                                  num_in_channels,
                                  num_filter,
                                  kernel=1,
                                  stride=stride,
                                  no_bias=True)
             shortcut = brew.spatial_bn(model,
                                        shortcut,
                                        name + '_sc_bn',
                                        num_filter,
                                        eps=2e-5,
                                        momentum=bn_mom,
                                        is_test=is_inference)
         # Branch 2
         #     Block 1
         conv1 = brew.conv(model,
                           v,
                           name + '_conv1',
                           num_in_channels,
                           num_filter,
                           kernel=3,
                           pad=1,
                           stride=stride,
                           no_bias=True)
         bn1 = brew.spatial_bn(model,
                               conv1,
                               name + '_bn1',
                               num_filter,
                               eps=2e-5,
                               momentum=bn_mom,
                               is_test=is_inference)
         act1 = brew.relu(model, bn1, name + '_relu1')
         #     Block 2
         conv2 = brew.conv(model,
                           act1,
                           name + '_conv2',
                           num_filter,
                           num_filter,
                           kernel=3,
                           pad=1,
                           stride=1,
                           no_bias=True)
         bn2 = brew.spatial_bn(model,
                               conv2,
                               name + '_bn2',
                               num_filter,
                               eps=2e-5,
                               momentum=bn_mom,
                               is_test=is_inference)
         # Element-wise summation and ReLU
         output = brew.sum(model, [shortcut, bn2], name + '_sum')
         output = brew.relu(model, output, name + '_relu')
         return output
Ejemplo n.º 32
0
    def add_bottleneck(
        self,
        input_filters,  # num of feature maps from preceding layer
        output_filters,  # num of feature maps to output
        base_filters,  # num of filters internally in the component
        down_sampling=False,
        spatial_batch_norm=True,
    ):

        self.comp_idx = 0
        shortcut_blob = self.prev_blob

        # 1x1
        self.add_conv(
            input_filters,
            base_filters,
            kernel=1,
        )

        if spatial_batch_norm:
            self.add_spatial_bn(base_filters)

        self.add_relu()

        # 3x3
        self.add_conv(
            base_filters,
            base_filters,
            kernel=3,
            stride=(2 if down_sampling else 1),
            pad=1,
        )

        if spatial_batch_norm:
            self.add_spatial_bn(base_filters)
        self.add_relu()

        # 1x1
        last_conv = self.add_conv(
            base_filters,
            output_filters,
            kernel=1,
        )
        if spatial_batch_norm:
            last_conv = self.add_spatial_bn(output_filters)

        # Summation with input signal (shortcut)
        # If we need to increase dimensions (feature maps), need to
        # do do a projection for the short cut
        if (output_filters > input_filters or down_sampling):
            shortcut_blob = brew.conv(
                self.model,
                shortcut_blob,
                '%sshortcut_projection_%d' % (self.prefix, self.comp_count),
                input_filters,
                output_filters,
                kernel=1,
                weight_init=("MSRAFill", {}),
                stride=(2 if down_sampling else 1),
                no_bias=self.no_bias,
            )
            if spatial_batch_norm:
                shortcut_blob = brew.spatial_bn(
                    self.model,
                    shortcut_blob,
                    '%sshortcut_projection_%d_spatbn' %
                    (self.prefix, self.comp_count),
                    output_filters,
                    epsilon=1e-3,
                    momentum=self.spatial_bn_mom,
                    is_test=self.is_test,
                )

        self.prev_blob = brew.sum(
            self.model, [shortcut_blob, last_conv],
            '%scomp_%d_sum_%d' % (self.prefix, self.comp_count, self.comp_idx))
        self.comp_idx += 1
        self.add_relu()

        self.comp_count += 1
def res_block_2(
    model,
    inputs,
    dim_in,
    dim_mid,
    dim_out,
    no_bias=False,
    is_test=False,
    module_seq=None,
    sub_seq=None,
):
    # branch2 (right)
    branch2_conv1 = meta_conv(
        model,
        inputs,
        dim_in,
        dim_mid,
        kernel=1,
        pad=0,
        stride=1,
        no_bias=no_bias,
        is_test=is_test,
        has_relu=True,
        module_seq=module_seq,
        sub_seq=sub_seq,
        branch_seq='2',
        conv_seq='a',
    )

    branch2_conv2 = meta_conv(
        model,
        branch2_conv1,
        dim_mid,
        dim_mid,
        kernel=3,
        pad=1,
        stride=1,
        no_bias=no_bias,
        is_test=is_test,
        has_relu=True,
        module_seq=module_seq,
        sub_seq=sub_seq,
        branch_seq='2',
        conv_seq='b',
    )

    branch2_conv3 = meta_conv(
        model,
        branch2_conv2,
        dim_mid,
        dim_out,
        kernel=1,
        pad=0,
        stride=1,
        no_bias=no_bias,
        is_test=is_test,
        has_relu=False,
        module_seq=module_seq,
        sub_seq=sub_seq,
        branch_seq='2',
        conv_seq='c',
    )

    # sum
    branch_sum = brew.sum(model, [branch2_conv3, inputs], branch2_conv3)
    branch_relu = brew.relu(model, branch_sum, branch_sum)
    return branch_relu