def smooth_loss3(pred, canny, spixel_init, l_weight):

    spixel_x = gradient_x(spixel_init)
    spixel_y = gradient_y(spixel_init)
    pred_x = gradient_x(pred)
    pred_y = gradient_y(pred)
    weight_init_x = weight_edges2(spixel_x, 9, power=0.0001)
    weight_init_y = weight_edges2(spixel_y, 9, power=0.0001)
    w_pred_x = L.Eltwise(pred_x, weight_init_x, operation=P.Eltwise.PROD)
    w_pred_y = L.Eltwise(pred_y, weight_init_y, operation=P.Eltwise.PROD)

    canny_x = crop_x(canny)
    canny_y = crop_y(canny)
    weight_x = weight_edges2(canny_x, 9)
    weight_y = weight_edges2(canny_y, 9)
    smoothness_x = L.Eltwise(w_pred_x, weight_x, operation=P.Eltwise.PROD)
    smoothness_y = L.Eltwise(w_pred_y, weight_y, operation=P.Eltwise.PROD)

    mean_x_smooth = L.Reduction(
        smoothness_x, reduction_param=dict(operation=P.Reduction.SUM))
    mean_y_smooth = L.Reduction(
        smoothness_y, reduction_param=dict(operation=P.Reduction.SUM))

    smooth_loss = L.Eltwise(mean_x_smooth,
                            mean_y_smooth,
                            operation=P.Eltwise.SUM,
                            loss_weight=l_weight)

    return smooth_loss
Beispiel #2
0
def bn_model_caffe(request, tmpdir):
    """Same as bn_model but with Caffe."""

    import caffe
    from caffe import layers as L

    bounds = (0, 1)
    num_classes = channels = getattr(request, "param", 1000)

    net_spec = caffe.NetSpec()
    net_spec.data = L.Input(name="data",
                            shape=dict(dim=[1, channels, 5, 5]))
    net_spec.reduce_1 = L.Reduction(net_spec.data,
                                    reduction_param={"operation": 4,
                                                     "axis": 3})
    net_spec.output = L.Reduction(net_spec.reduce_1,
                                  reduction_param={"operation": 4,
                                                   "axis": 2})
    net_spec.label = L.Input(name="label", shape=dict(dim=[1]))
    net_spec.loss = L.SoftmaxWithLoss(net_spec.output, net_spec.label)
    wf = tmpdir.mkdir("test_models_caffe_fixture")\
               .join("test_caffe_{}.prototxt".format(num_classes))
    wf.write("force_backward: true\n" + str(net_spec.to_proto()))
    net = caffe.Net(str(wf), caffe.TEST)
    model = CaffeModel(net, bounds=bounds)
    return model
def smooth_loss(pred, img, l_weight):

    img_x = gradient_x(img)
    img_y = gradient_y(img)

    pred_x = gradient_x(pred)
    pred_y = gradient_y(pred)

    weight_x = weight_edges(img_x)
    weight_y = weight_edges(img_y)

    smoothness_x = L.Eltwise(pred_x, weight_x, operation=P.Eltwise.PROD)
    smoothness_y = L.Eltwise(pred_y, weight_y, operation=P.Eltwise.PROD)

    mean_x_smooth = L.Reduction(
        smoothness_x, reduction_param=dict(operation=P.Reduction.SUM))
    mean_y_smooth = L.Reduction(
        smoothness_y, reduction_param=dict(operation=P.Reduction.SUM))

    smooth_loss = L.Eltwise(mean_x_smooth,
                            mean_y_smooth,
                            operation=P.Eltwise.SUM,
                            loss_weight=l_weight)

    return smooth_loss
Beispiel #4
0
def test_caffe_model_forward_gradient(tmpdir):
    import caffe
    from caffe import layers as L

    bounds = (0, 255)
    channels = num_classes = 1000

    net_spec = caffe.NetSpec()
    net_spec.data = L.Input(name="data",
                            shape=dict(dim=[1, num_classes, 5, 5]))
    net_spec.reduce_1 = L.Reduction(net_spec.data,
                                    reduction_param={
                                        "operation": 4,
                                        "axis": 3
                                    })
    net_spec.output = L.Reduction(net_spec.reduce_1,
                                  reduction_param={
                                      "operation": 4,
                                      "axis": 2
                                  })
    net_spec.label = L.Input(name="label", shape=dict(dim=[1]))
    net_spec.loss = L.SoftmaxWithLoss(net_spec.output, net_spec.label)
    wf = tmpdir.mkdir("test_models_caffe").join(
        "test_caffe_model_gradient_proto_{}.prototxt".format(num_classes))
    wf.write("force_backward: true\n" + str(net_spec.to_proto()))
    preprocessing = (
        np.arange(num_classes)[:, None, None],
        np.random.uniform(size=(channels, 5, 5)) + 1,
    )
    net = caffe.Net(str(wf), caffe.TEST)
    model = CaffeModel(net, bounds=bounds, preprocessing=preprocessing)

    epsilon = 1e-2

    np.random.seed(23)
    test_images = np.random.rand(5, channels, 5, 5).astype(np.float32)
    test_labels = [7] * 5

    _, g1 = model.forward_and_gradient_one(test_images, test_labels)

    l1 = model._loss_fn(test_images - epsilon / 2 * g1, test_labels)
    l2 = model._loss_fn(test_images + epsilon / 2 * g1, test_labels)

    assert np.all(1e4 * (l2 - l1) > 1)

    # make sure that gradient is numerically correct
    np.testing.assert_array_almost_equal(
        1e4 * (l2 - l1),
        1e4 * epsilon *
        np.linalg.norm(g1.reshape(len(g1), -1, g1.shape[-1]), axis=(1, 2))**2,
        decimal=1,
    )
Beispiel #5
0
    def context_supervision_loss(self, distance, lw=1, ind_loss=None):
        """
    Distance is positive; want gt distance to be SMALLER than other distances.
    Loss used for context supervision is also ranking loss:
        Look at rank loss between all possible pairs of moments; want gt distance to be smaller.
        Take average.
    """

        slices = L.Slice(distance, ntop=21, axis=1)
        gt = slices[0]
        setattr(self.n, 'gt_slice', gt)
        ranking_losses = []
        for i in range(1, 21):
            setattr(self.n, 'context_slice_%d' % i, slices[i])
            negate_distance = L.Power(slices[i], scale=-1)
            max_sum = L.Eltwise(gt, negate_distance, operation=1)
            max_sum_margin = L.Power(max_sum, shift=self.margin)
            max_sum_margin_relu = L.ReLU(max_sum_margin, in_place=False)
            if ind_loss:
                max_sum_margin_relu = L.Reshape(
                    max_sum_margin_relu, shape=dict(dim=[self.batch_size, 1]))
                max_sum_margin_relu = L.Eltwise(max_sum_margin_relu,
                                                ind_loss,
                                                operation=0)
            setattr(self.n, 'max_sum_margin_relu_%d' % i, max_sum_margin_relu)
            ranking_loss = L.Reduction(max_sum_margin_relu, operation=4)
            ranking_losses.append(ranking_loss)
        sum_ranking_losses = L.Eltwise(*ranking_losses, operation=1)
        loss = L.Power(sum_ranking_losses, scale=1 / 21., loss_weight=[lw])
        return loss
def smooth_loss2(pred, l_weight):

    pred_x = gradient_x(pred)
    pred_y = gradient_y(pred)

    mean_x_smooth = L.Reduction(
        pred_x, reduction_param=dict(operation=P.Reduction.SUM))
    mean_y_smooth = L.Reduction(
        pred_y, reduction_param=dict(operation=P.Reduction.SUM))

    smooth_loss = L.Eltwise(mean_x_smooth,
                            mean_y_smooth,
                            operation=P.Eltwise.SUM,
                            loss_weight=l_weight)

    return smooth_loss
 def test_reduce4(self):
     n = caffe.NetSpec()
     n.input1 = L.Input(shape=make_shape([10, 3, 64, 64]))
     n.pooling1 = L.Reduction(n.input1,
                              operation=P.Reduction.MEAN,
                              axis=3,
                              coeff=1.3)
     self._test_model(*self._netspec_to_model(n, 'reduce3'))
Beispiel #8
0
    def l2normed(self,vec, dim):
        #Returns L2-normalized instances of vec; i.e., for each instance x in vec,
        #computes  x / ((x ** 2).sum() ** 0.5). Assumes vec has shape N x dim."""
        denom = L.Reduction(vec, axis=1, operation=P.Reduction.SUMSQ)
        denom = L.Power(denom, power=(-0.5), shift=1e-12)
        denom = L.Reshape(denom, num_axes=0, axis=-1, shape=dict(dim=[1]))
        denom = L.Tile(denom, axis=1, tiles=dim)

        return L.Eltwise(vec, denom, operation=P.Eltwise.PROD)
Beispiel #9
0
 def normalize(self, bottom, axis=1, numtiles=4096):
     power = L.Power(bottom, power=2)
     power_sum = L.Reduction(power, axis=axis, operation=1)
     sqrt = L.Power(power_sum, power=-0.5, shift=0.00001)
     if axis == 1:
         reshape = L.Reshape(sqrt, shape=dict(dim=[-1, 1]))
     if axis == 2:
         reshape = L.Reshape(sqrt, shape=dict(dim=[self.batch_size, -1, 1]))
     tile = L.Tile(reshape, axis=axis, tiles=numtiles)
     return L.Eltwise(tile, bottom, operation=0)
def l1_loss(bottom1, bottom2, l_weight):

    diff = L.Eltwise(bottom1,
                     bottom2,
                     eltwise_param=dict(operation=P.Eltwise.SUM, coeff=[1,
                                                                        -1]))
    absval = L.AbsVal(diff)
    loss = L.Reduction(absval,
                       reduction_param=dict(operation=P.Reduction.SUM),
                       loss_weight=l_weight)

    return loss
Beispiel #11
0
 def tall_loss(self, positive, negative, query, lw=1):
     scores_p = self.distance_function(positive, query)
     scores_n = self.distance_function(negative, query)
     alpha_c = 1
     alpha_w = 1
     exp_p = L.Exp(scores_p, scale=-1)
     exp_n = L.Exp(scores_n)
     log_p = L.Log(exp_p, shift=1)
     log_n = L.Log(exp_n, shift=1)
     scale_p = L.Power(log_p, scale=alpha_c)
     scale_n = L.Power(log_n, scale=alpha_w)
     all_scores = L.Concat(scale_p, scale_n, axis=0)
     return L.Reduction(all_scores, operation=4, loss_weight=[lw])
Beispiel #12
0
    def ranking_loss(self, p, n, t, lw=1):

        # I <3 Caffe - this is not obnoxious to write at all.
        distance_p = self.distance_function(p, t)
        distance_n = self.distance_function(n, t)
        negate_distance_n = L.Power(distance_n, scale=-1)
        max_sum = L.Eltwise(distance_p, negate_distance_n, operation=1)
        max_sum_margin = L.Power(max_sum, shift=self.margin)
        max_sum_margin_relu = L.ReLU(max_sum_margin, in_place=False)
        ranking_loss = L.Reduction(max_sum_margin_relu,
                                   operation=4,
                                   loss_weight=[lw])

        return ranking_loss
Beispiel #13
0
    def relational_ranking_loss(self, distance_p, distance_n, lw=1):
        """
    This function assumes you want to MINIMIZE distances
    """

        negate_distance_n = L.Power(distance_n, scale=-1)
        max_sum = L.Eltwise(distance_p, negate_distance_n, operation=1)
        max_sum_margin = L.Power(max_sum, shift=self.margin)
        max_sum_margin_relu = L.ReLU(max_sum_margin, in_place=False)
        ranking_loss = L.Reduction(max_sum_margin_relu,
                                   operation=4,
                                   loss_weight=[lw])

        return ranking_loss
def smooth_loss4(pred, canny, l_weight):

    pred_x = gradient_x(pred)
    pred_y = gradient_y(pred)

    canny_x = crop_x(canny)
    canny_y = crop_y(canny)
    weight_x = weight_edges2(canny_x, 5)
    weight_y = weight_edges2(canny_y, 5)
    smoothness_x = L.Eltwise(pred_x, weight_x, operation=P.Eltwise.PROD)
    smoothness_y = L.Eltwise(pred_y, weight_y, operation=P.Eltwise.PROD)

    mean_x_smooth = L.Reduction(
        smoothness_x, reduction_param=dict(operation=P.Reduction.SUM))
    mean_y_smooth = L.Reduction(
        smoothness_y, reduction_param=dict(operation=P.Reduction.SUM))

    smooth_loss = L.Eltwise(mean_x_smooth,
                            mean_y_smooth,
                            operation=P.Eltwise.SUM,
                            loss_weight=l_weight)

    return smooth_loss
Beispiel #15
0
    def ranking_loss(self, p, n, t, lw=1):

        #For ranking used in paper
        distance_p = self.distance_function(p, t)
        distance_n = self.distance_function(n, t)
        negate_distance_n = L.Power(distance_n, scale=-1)
        max_sum = L.Eltwise(distance_p, negate_distance_n, operation=1)
        max_sum_margin = L.Power(max_sum, shift=self.margin)
        max_sum_margin_relu = L.ReLU(max_sum_margin, in_place=False)
        ranking_loss = L.Reduction(max_sum_margin_relu,
                                   operation=4,
                                   loss_weight=[lw])

        return ranking_loss
def l2normed(dim):
    n = caffe.NetSpec()
    n.data, n.label = L.Python(module='layers',
                               layer='tripletDataLayer',
                               ntop=2)
    """Returns L2-normalized instances of vec; i.e., for each instance x in vec,
    computes  x / ((x ** 2).sum() ** 0.5). Assumes vec has shape N x dim."""
    n.denom = L.Reduction(n.data, axis=1, operation=P.Reduction.SUMSQ)
    #denom = L.Power(denom, power=(-0.5))
    n.power = L.Power(n.denom, power=(-0.5),
                      shift=1e-12)  # For numerical stability
    n.reshape = L.Reshape(n.power, num_axes=0, axis=-1, shape=dict(dim=[1]))
    n.tile = L.Tile(n.reshape, axis=1, tiles=dim)
    n.elwise = L.Eltwise(n.data, n.tile, operation=P.Eltwise.PROD)
    return n.to_proto()
Beispiel #17
0
    def _code_regularization(self, lCW):
        ns = self.netspec

        # Semantic codes. Needs to be initialized.
        code_shape = [
            sum(self.code_dim),
            len(self.train_classes)
            if self.semantics == ATTRIBUTES else sum(self.num_states)
        ]

        name = 'SCoRe/cwReg/codewords'
        sem_cw = ns[name] = L.DummyData(name=name,
                                        shape=dict(dim=code_shape),
                                        include=dict(phase=caffe.TRAIN))

        # Classification codes.
        name = 'SCoRe/cwReg/eye'
        x = ns[name] = L.DummyData(
            name=name,
            shape=dict(dim=[code_shape[0], code_shape[0]]),
            include=dict(phase=caffe.TRAIN))

        name = 'SCoRe/cwReg/cls_codewords'
        clf_cw = ns[name] = L.InnerProduct(x,
                                           name=name,
                                           num_output=code_shape[1],
                                           bias_term=False,
                                           param=[{
                                               'name': lCW
                                           }],
                                           include=dict(phase=caffe.TRAIN))

        # Compute \sum |S-C|^2
        name = 'SCoRe/cwReg/diff'
        x_diff = ns[name] = L.Eltwise(*[sem_cw, clf_cw],
                                      name=name,
                                      operation=P.Eltwise.SUM,
                                      coeff=[1., -1.],
                                      include=dict(phase=caffe.TRAIN))

        name = 'SCoRe/cwReg'
        ns[name] = L.Reduction(x_diff,
                               name=name,
                               operation=P.Reduction.SUMSQ,
                               axis=0,
                               loss_weight=self.code_coeff,
                               include=dict(phase=caffe.TRAIN))
Beispiel #18
0
    def pool_distances(self, vec, minimum_distance=True):
        #want to MINIMIZE distance; negate, maximize, then negate (again)
        #Assume that scores are Nx21 size blob
        if args.pool_type in ['max', 'average']:
            prep_pool = L.Reshape(vec,
                                  shape=dict(dim=[self.batch_size, 1, 21, 1]))

            if minimum_distance:
                prep_pool = L.Power(prep_pool, scale=-1)
            max_pool = L.Pooling(prep_pool,
                                 pool=pooling_type[self.args.pool_type],
                                 kernel_h=21,
                                 kernel_w=1)
            pool = L.Reshape(max_pool, shape=dict(dim=[self.batch_size]))
            if minimum_distance:
                pool = L.Power(pool, scale=-1)
        elif args.pool_type in ['sum']:
            #untested
            negative = L.Power(vec, scale=-1)
            pool = L.Reduction(negative, axis=1, operation=1)  #sum
        else:
            raise Exception("You did not select a valid pooling type.")
        return pool
Beispiel #19
0
    def compile_time_operation(self, learning_option, cluster):
        """
        define reduction operation for input blob
        """
        # get input
        input_ = self.get_input('input')
        indim = self.get_dimension('input')

        # get attr
        # required field
        op = self.get_attr('operation', default=None)
        if op is None:
            raise Exception(
                '[DLMDL ERROR]: {0} in {1} layer must be declared.'.format(
                    'op', self.name))

        # optional field
        axis = self.get_attr('axis', default=None)
        scale = float(self.get_attr('scale', default=1.0))

        # get output dimension
        if axis == len(indim):
            indim.pop()
            outdim = indim
        else:
            outdim = indim
            outdim[axis] = 1

        reduction = L.Reduction(input_,
                                name=self.name,
                                operation=op,
                                axis=axis,
                                coeff=scale)

        # set output
        self.set_output('output', reduction)
        self.set_dimension('output', outdim)
Beispiel #20
0
def test_caffe_model_preprocessing_shape_change(tmpdir):
    import caffe
    from caffe import layers as L

    bounds = (0, 255)
    channels = num_classes = 1000

    net_spec = caffe.NetSpec()
    net_spec.data = L.Input(name="data",
                            shape=dict(dim=[1, num_classes, 5, 5]))
    net_spec.reduce_1 = L.Reduction(net_spec.data,
                                    reduction_param={
                                        "operation": 4,
                                        "axis": 3
                                    })
    net_spec.output = L.Reduction(net_spec.reduce_1,
                                  reduction_param={
                                      "operation": 4,
                                      "axis": 2
                                  })
    net_spec.label = L.Input(name="label", shape=dict(dim=[1]))
    net_spec.loss = L.SoftmaxWithLoss(net_spec.output, net_spec.label)
    wf = tmpdir.mkdir("test_models_caffe")\
               .join("test_caffe_model_preprocessing_shape_change_{}.prototxt"
                     .format(num_classes))
    wf.write("force_backward: true\n" + str(net_spec.to_proto()))
    net = caffe.Net(str(wf), caffe.TEST)
    model1 = CaffeModel(net, bounds=bounds)

    def preprocessing2(x):
        if x.ndim == 3:
            x = np.transpose(x, axes=(2, 0, 1))
        elif x.ndim == 4:
            x = np.transpose(x, axes=(0, 3, 1, 2))

        def grad(dmdp):
            assert dmdp.ndim == 3
            dmdx = np.transpose(dmdp, axes=(1, 2, 0))
            return dmdx

        return x, grad

    model2 = CaffeModel(net, bounds=bounds, preprocessing=preprocessing2)

    np.random.seed(22)
    test_images_nhwc = np.random.rand(2, 5, 5, channels).astype(np.float32)
    test_images_nchw = np.transpose(test_images_nhwc, (0, 3, 1, 2))

    p1 = model1.forward(test_images_nchw)
    p2 = model2.forward(test_images_nhwc)

    assert np.all(p1 == p2)

    p1 = model1.forward_one(test_images_nchw[0])
    p2 = model2.forward_one(test_images_nhwc[0])

    assert np.all(p1 == p2)

    g1 = model1.gradient_one(test_images_nchw[0], 3)
    assert g1.ndim == 3
    g1 = np.transpose(g1, (1, 2, 0))
    g2 = model2.gradient_one(test_images_nhwc[0], 3)

    np.testing.assert_array_almost_equal(g1, g2)
Beispiel #21
0
def CaffeTrackerNet(net, from_layer="data", label_layer="label"):
    # CaffeNet
    kwargs = {
        'param':
        [dict(lr_mult=0, decay_mult=1),
         dict(lr_mult=0, decay_mult=0)],
        'weight_filler': dict(type='gaussian', std=0.01),
        'bias_filler': dict(type='constant', value=0),
    }
    # conv1
    net.conv1 = L.Convolution(net[from_layer],
                              num_output=96,
                              stride=4,
                              kernel_size=11,
                              **kwargs)
    net.relu1 = L.ReLU(net.conv1, in_place=True)
    # pool1
    net.pool1 = L.Pooling(net.relu1,
                          pool=P.Pooling.MAX,
                          kernel_size=3,
                          stride=2)
    # norm1
    net.norm1 = L.LRN(net.pool1,
                      lrn_param=dict(local_size=5, alpha=0.0001, beta=0.75))
    # conv2
    net.conv2 = L.Convolution(net.norm1,
                              num_output=256,
                              pad=2,
                              group=2,
                              kernel_size=5,
                              **kwargs)
    net.relu2 = L.ReLU(net.conv2, in_place=True)
    # pool2
    net.pool2 = L.Pooling(net.relu2,
                          pool=P.Pooling.MAX,
                          kernel_size=3,
                          stride=2)
    # norm2
    net.norm2 = L.LRN(net.pool2,
                      lrn_param=dict(local_size=5, alpha=0.0001, beta=0.75))
    # conv3
    net.conv3 = L.Convolution(net.norm2,
                              num_output=384,
                              pad=1,
                              kernel_size=3,
                              **kwargs)
    net.relu3 = L.ReLU(net.conv3, in_place=True)
    # conv4
    #net.conv4 = L.Convolution(net.relu3, num_output=384, pad=1, group=2, kernel_size=3, **kwargs)
    #net.relu4 = L.ReLU(net.conv4, in_place=True)
    # conv5
    #net.conv5 = L.Convolution(net.relu4, num_output=256, pad=1, group=2, kernel_size=3, **kwargs)
    #net.relu5 = L.ReLU(net.conv5, in_place=True)
    # pool5
    net.pool5 = L.Pooling(net.relu3,
                          pool=P.Pooling.MAX,
                          kernel_size=3,
                          stride=2)
    # HalfMerge
    net.convf = L.Halfmerge(net.pool5)
    # FC layers
    fc_kwargs = {
        'param':
        [dict(lr_mult=10, decay_mult=1),
         dict(lr_mult=20, decay_mult=0)],
        'weight_filler': dict(type='gaussian', std=0.005),
        'bias_filler': dict(type='constant', value=1),
    }
    net.fc6 = L.InnerProduct(net.convf,
                             name="fc6-new1",
                             num_output=4096,
                             **fc_kwargs)
    net.relu6 = L.ReLU(net.fc6, in_place=True)
    net.drop6 = L.Dropout(net.relu6,
                          in_place=True,
                          dropout_param=dict(dropout_ratio=0.5))
    net.fc7 = L.InnerProduct(net.drop6,
                             name="fc7-new1",
                             num_output=4096,
                             **fc_kwargs)
    net.relu7 = L.ReLU(net.fc7, in_place=True)
    net.drop7 = L.Dropout(net.relu7,
                          in_place=True,
                          dropout_param=dict(dropout_ratio=0.5))
    net.fc7b = L.InnerProduct(net.drop7,
                              name="fc7-newb1",
                              num_output=4096,
                              **fc_kwargs)
    net.relu7b = L.ReLU(net.fc7b, in_place=True)
    net.drop7b = L.Dropout(net.relu7b,
                           in_place=True,
                           dropout_param=dict(dropout_ratio=0.5))
    fc_kwargs = {
        'param':
        [dict(lr_mult=10, decay_mult=1),
         dict(lr_mult=20, decay_mult=0)],
        'weight_filler': dict(type='gaussian', std=0.01),
        'bias_filler': dict(type='constant', value=0),
    }
    net.fc8 = L.InnerProduct(net.drop7b,
                             name="fc8-shapes1",
                             num_output=4,
                             **fc_kwargs)
    # GT layers
    net.neg = L.Power(net[label_layer],
                      power_param=dict(power=1, scale=-1, shift=0))
    net.neg_flat = L.Flatten(net.neg, name="flatten1")
    # add
    net.out_diff = L.Eltwise(net.fc8, net.neg_flat, name="subtract1")
    # loss
    net.loss = L.Reduction(net.out_diff,
                           name="abssum1",
                           loss_weight=1,
                           reduction_param=dict(operation=2))
    return net
 def test_reduce(self):
     n = caffe.NetSpec()
     n.input1 = L.Input(shape=make_shape([10, 3, 64, 64]))
     n.pooling1 = L.Reduction(n.input1, operation=P.Reduction.SUM, axis=0)
     self._test_model(*self._netspec_to_model(n, 'reduce'))
Beispiel #23
0
    def lrcn_reinforce(self, save_name, RL_loss='lstm_classification', lw=20):

        data_inputs = self.data_inputs
        param_str = self.param_str

        ss_tag = 'reg_'
        #reg sentences will be the first part of the batch
        if self.separate_sents:
            if not 'batch_size' in param_str.keys():
                param_str['batch_size'] = 100
            self.slice_point = param_str['batch_size'] / 2
            self.batch_size = param_str['batch_size']

        param_str_loss = {}
        param_str_loss['vocab'] = param_str['vocabulary']
        param_str_loss['avoid_words'] = ['red', 'small']
        if self.baseline:
            param_str_loss['baseline'] = True
        data_input = 'fc8'

        data_tops = self.python_input_layer(data_inputs['module'],
                                            data_inputs['layer'], param_str)
        self.rename_tops(data_tops, data_inputs['param_str']['top_names'])
        feature_name = 'fc8'
        self.n.tops[feature_name] = L.InnerProduct(
            self.n.tops[param_str['image_data_key']],
            num_output=1000,
            weight_filler=self.uniform_weight_filler(-.08, .08),
            bias_filler=self.constant_filler(0),
            param=self.init_params([[1, 1], [2, 0]]))

        if self.cc:
            #If class conditional
            data_top = self.n.tops['fc8']
            class_top = self.n.tops[param_str['data_label_feat']]
            self.n.tops['class_input'] = L.Concat(data_top, class_top, axis=1)
            data_input = 'class_input'
        else:
            self.silence(self.n.tops[param_str['data_label_feat']])

        bottom_sent = self.n.tops[param_str['text_data_key']]
        bottom_cont = self.n.tops[param_str['text_marker_key']]

        #prep for caption model
        bottom_cont_slice = L.Slice(bottom_cont, ntop=self.T, axis=0)
        self.rename_tops(bottom_cont_slice,
                         ['bottom_cont_%d' % i for i in range(self.T)])

        if not self.separate_sents:
            bottom_sent_slice = L.Slice(bottom_sent, ntop=self.T, axis=0)
            self.rename_tops(bottom_sent_slice,
                             ['input_sent_%d' % i for i in range(self.T)])
            target_sentence = self.n.tops['target_sentence']
        else:
            bottom_sents = L.Slice(bottom_sent,
                                   slice_point=[self.slice_point],
                                   axis=1,
                                   ntop=2)
            self.rename_tops(bottom_sents, ['reg_input_sent', 'rl_input_sent'])
            reg_bottom_sents_slice = L.Slice(self.n.tops['reg_input_sent'],
                                             axis=0,
                                             ntop=20)
            rl_bottom_sents_slice = L.Slice(self.n.tops['rl_input_sent'],
                                            axis=0,
                                            ntop=20)
            self.silence([rl_bottom_sents_slice[i] for i in range(1, self.T)])
            self.n.tops['input_sent_0'] = L.Concat(reg_bottom_sents_slice[0],
                                                   rl_bottom_sents_slice[0],
                                                   axis=1)
            self.rename_tops(
                reg_bottom_sents_slice,
                ['reg_input_sent_%d' % i for i in range(1, self.T)])

            self.rename_tops(reg_bottom_sents_slice,
                             ['reg_input_sent_%d' % i for i in range(self.T)])
            slice_target_sentence = L.Slice(self.n.tops['target_sentence'],
                                            slice_point=[self.slice_point],
                                            axis=1,
                                            ntop=2)
            self.rename_tops(slice_target_sentence,
                             ['reg_target_sentence', 'rl_target_sentence'])
            self.silence(self.n.tops['rl_target_sentence'])
            target_sentence = self.n.tops['reg_target_sentence']

        self.n.tops['lstm1_h0'] = self.dummy_data_layer(
            [1, self.N, self.lstm_dim], 0)
        self.n.tops['lstm1_c0'] = self.dummy_data_layer(
            [1, self.N, self.lstm_dim], 0)
        self.n.tops['lstm2_h0'] = self.dummy_data_layer(
            [1, self.N, self.lstm_dim], 0)
        self.n.tops['lstm2_c0'] = self.dummy_data_layer(
            [1, self.N, self.lstm_dim], 0)

        self.make_caption_model(static_input=data_input)

        #prep bottoms for loss
        predict_tops = [self.n.tops['predict_%d' % i] for i in range(self.T)]
        self.n.tops['predict_concat'] = L.Concat(*predict_tops, axis=0)
        if self.separate_sents:
            word_sample_tops = [
                self.n.tops['rl_word_sample_reshape_%d' % i]
                for i in range(1, self.T + 1)
            ]
            self.n.tops['word_sample_concat'] = L.Concat(*word_sample_tops,
                                                         axis=0)
            concat_predict_tops = L.Slice(self.n.tops['predict_concat'],
                                          slice_point=[self.slice_point],
                                          axis=1,
                                          ntop=2)
            reg_predict = concat_predict_tops[0]
            RL_predict = concat_predict_tops[1]
            bottom_cont_tops = L.Slice(bottom_cont,
                                       slice_point=[self.slice_point],
                                       axis=1,
                                       ntop=2)
            self.silence(bottom_cont_tops[0])
            label_tops = L.Slice(self.n.tops[param_str['data_label']],
                                 slice_point=[self.slice_point],
                                 axis=0,
                                 ntop=2)
            self.silence(label_tops[0])
            self.rename_tops([bottom_cont_tops[1], label_tops[1]],
                             ['rl_bottom_cont', 'rl_label_top'])
            label_top = self.n.tops['rl_label_top']
            bottom_cont = self.n.tops['rl_bottom_cont']
        else:
            word_sample_tops = [
                self.n.tops['word_sample_reshape_%d' % i]
                for i in range(1, self.T + 1)
            ]
            self.n.tops['word_sample_concat'] = L.Concat(*word_sample_tops,
                                                         axis=0)
            reg_predict = self.n.tops['predict_concat']
            RL_predict = self.n.tops['predict_concat']
            label_top = self.n.tops[param_str['data_label']]

        #RL loss
        if RL_loss == 'lstm_classification':
            self.n.tops['embed_classification'] = self.embed(
                self.n.tops['word_sample_concat'],
                1000,
                input_dim=self.vocab_size,
                bias_term=False,
                learning_param=self.init_params([[0, 0]]))
            self.n.tops['lstm_classification'] = self.lstm(
                self.n.tops['embed_classification'],
                bottom_cont,
                learning_param_lstm=self.init_params([[0, 0], [0, 0], [0, 0]]),
                lstm_hidden=1000)
            self.n.tops['predict_classification'] = L.InnerProduct(
                self.n.tops['lstm_classification'], num_output=200, axis=2)
            self.n.tops['probs_classification'] = L.Softmax(
                self.n.tops['predict_classification'], axis=2)
            #classification reward layer: classification, word_sample_concat (to get sentence length),
            #data label should be single stream; even though trained with 20 stream...
            self.n.tops['reward'] = self.python_layer([
                self.n.tops['probs_classification'],
                self.n.tops['word_sample_concat'], label_top
            ], 'loss_layers', 'sequenceClassificationLoss', param_str_loss)

        self.n.tops['reward_reshape'] = L.Reshape(self.n.tops['reward'],
                                                  shape=dict(dim=[1, -1]))
        self.n.tops['reward_tile'] = L.Tile(self.n.tops['reward_reshape'],
                                            axis=0,
                                            tiles=self.T)

        #softmax with sampled words as "correct" word
        self.n.tops['sample_loss'] = self.softmax_per_inst_loss(
            RL_predict, self.n.tops['word_sample_concat'], axis=2)
        self.n.tops['sample_reward'] = L.Eltwise(self.n.tops['sample_loss'],
                                                 self.n.tops['reward_tile'],
                                                 propagate_down=[1, 0],
                                                 operation=0)
        avoid_lw = 100
        self.n.tops['normalized_reward'] = L.Power(
            self.n.tops['sample_reward'], scale=(1. / self.N) * avoid_lw)
        self.n.tops['sum_rewards'] = L.Reduction(
            self.n.tops['normalized_reward'], loss_weight=[1])
        self.n.tops['sentence_loss'] = self.softmax_loss(reg_predict,
                                                         target_sentence,
                                                         axis=2,
                                                         loss_weight=20)

        self.write_net(save_name)
Beispiel #24
0
def mynet(batch, steps, loss_type, dep=False, descr=False, part='gen'):

    conv_lr = [dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=1)]
    bcnv_lr = [dict(lr_mult=1, decay_mult=1)]
    scale_lr = [dict(lr_mult=1, decay_mult=1), dict(lr_mult=1, decay_mult=1)]
    bn_param = dict(eps=0.001, use_global_stats=False)

    fr_lr = [dict(lr_mult=0, decay_mult=0), dict(lr_mult=0, decay_mult=0)]
    fr_clr = [dict(lr_mult=0, decay_mult=0)]
    #fr_bn = dict(eps=0.001,use_global_stats=True)
    fr_bn = dict(eps=0.001, use_global_stats=False)

    if part == 'gen':
        gen_conv_lr = conv_lr
        gen_bcnv_lr = bcnv_lr
        gen_scale_lr = scale_lr
        gen_bn_param = bn_param
        dsc_conv_lr = fr_lr
    else:
        gen_conv_lr = fr_lr
        gen_bcnv_lr = fr_clr
        gen_scale_lr = fr_lr
        gen_bn_param = fr_bn
        dsc_conv_lr = conv_lr

    n = caffe.NetSpec()

    sp = dict(bias_term=True, filler=dict(value=1.0))

    if dep:
        n.source = L.Input(input_param=dict(shape=[dict(dim=[1, 1, 64, 64])]))
    else:
        if descr:
            if part == 'gen':
                bs = batch
            else:
                bs = batch / 2
        else:
            bs = batch
        n.data = L.Data(
            data_param=dict(source="db", batch_size=bs, backend=P.Data.LMDB))

        n.expected, n.source = L.Slice(n.data,
                                       slice_param=dict(axis=1, slice_point=1),
                                       ntop=2)
        if descr:
            if part != 'gen':
                #n.data_ref = L.Split(n.expected)
                n.data_ref = L.Data(data_param=dict(
                    source="db_ref", batch_size=batch /
                    2, backend=P.Data.LMDB))
                n.label_0 = L.DummyData(shape=[dict(dim=[batch / 2])],
                                        data_filler=dict(value=0.0))
                n.label_1 = L.DummyData(shape=[dict(dim=[batch / 2])],
                                        data_filler=dict(value=1.0))
                n.label = L.Concat(n.label_0,
                                   n.label_1,
                                   concat_param=dict(axis=0))
            else:
                n.label = L.DummyData(shape=[dict(dim=[batch])],
                                      data_filler=dict(value=1.0))

    n.conv1 = L.Convolution(n.source,
                            convolution_param=conv_param_nb(3, 16),
                            param=gen_bcnv_lr)
    n.bn1 = L.BatchNorm(n.conv1, batch_norm_param=gen_bn_param)
    n.scale1 = L.Scale(n.bn1, scale_param=sp, param=gen_scale_lr)
    n.scale1 = L.ReLU(n.scale1)
    inp = "scale1"
    for m in range(steps):
        k = m + 1
        cid1 = "step%d/conv1" % k
        cid2 = "step%d/conv2" % k
        bid1 = "step%d/bn1" % k
        bid2 = "step%d/bn2" % k
        eid = "step%d/elt" % k

        n[cid1] = L.Convolution(n[inp],
                                convolution_param=conv_param_nb(3, 16),
                                param=gen_bcnv_lr)
        n[bid1] = L.BatchNorm(n[cid1], batch_norm_param=gen_bn_param)
        n[bid1] = L.Scale(n[bid1], scale_param=sp, param=gen_scale_lr)
        n[bid1] = L.ReLU(n[bid1])

        n[cid2] = L.Convolution(n[bid1],
                                convolution_param=conv_param_nb(3, 16),
                                param=gen_bcnv_lr)
        n[bid2] = L.BatchNorm(n[cid2], batch_norm_param=gen_bn_param)
        n[bid2] = L.Scale(n[bid2], scale_param=sp, param=gen_scale_lr)
        n[bid2] = L.ReLU(n[bid2])

        n[eid] = L.Eltwise(n[bid2], n[inp])
        inp = eid

    outname = "topconv"
    n[outname] = L.Convolution(n[inp],
                               convolution_param=conv_param(3, 1),
                               param=gen_conv_lr)
    n.generated = L.Sigmoid(n.topconv)
    if not dep:
        lw = 1 if part == 'gen' else 0
        if loss_type == 'euc':
            n.l2_loss = L.EuclideanLoss(n.generated,
                                        n.expected,
                                        name="loss",
                                        loss_weight=lw)
        else:
            n.l2_loss = L.EuclideanLoss(n.generated,
                                        n.expected,
                                        name="loss",
                                        loss_weight=0)
            n.cross_entropy_loss = L.SigmoidCrossEntropyLoss(n.topconv,
                                                             n.expected,
                                                             name="loss",
                                                             loss_weight=lw)
    if descr:
        if part != 'gen':
            n.desc_inp = L.Concat(n.generated,
                                  n.data_ref,
                                  concat_param=dict(axis=0))
            cinp = "desc_inp"
        else:
            cinp = "generated"
        n.d_conv1 = L.Convolution(n[cinp],
                                  convolution_param=conv_param(5, 32),
                                  param=dsc_conv_lr)
        n.d_pool1 = L.Pooling(n.d_conv1,
                              pooling_param=dict(kernel_size=3,
                                                 stride=2,
                                                 pool=P.Pooling.MAX))
        n.d_pool1 = L.ReLU(n.d_pool1)

        n.d_conv2 = L.Convolution(n.d_pool1,
                                  convolution_param=conv_param(5, 32),
                                  param=dsc_conv_lr)
        n.d_pool2 = L.Pooling(n.d_conv2,
                              pooling_param=dict(kernel_size=3,
                                                 stride=2,
                                                 pool=P.Pooling.MAX))
        n.d_pool2 = L.ReLU(n.d_pool2)

        n.d_conv3 = L.Convolution(n.d_pool2,
                                  convolution_param=conv_param(5, 64),
                                  param=dsc_conv_lr)
        n.d_pool3 = L.Pooling(n.d_conv3,
                              pooling_param=dict(kernel_size=3,
                                                 stride=2,
                                                 pool=P.Pooling.MAX))
        n.d_pool3 = L.ReLU(n.d_pool3)

        n.d_conv4 = L.Convolution(n.d_pool3,
                                  convolution_param=conv_param(3, 64),
                                  param=dsc_conv_lr)
        n.d_pool4 = L.Pooling(n.d_conv4,
                              pooling_param=dict(kernel_size=3,
                                                 stride=2,
                                                 pool=P.Pooling.MAX))
        n.d_pool4 = L.ReLU(n.d_pool4)

        n.d_ip1 = L.InnerProduct(n.d_pool4,
                                 param=dsc_conv_lr,
                                 inner_product_param=ip_param(512))
        n.d_ip1 = L.ReLU(n.d_ip1)
        n.d_ip2 = L.InnerProduct(n.d_ip1,
                                 param=dsc_conv_lr,
                                 inner_product_param=ip_param(1))

        n.sigmoid_loss = L.SigmoidCrossEntropyLoss(n.d_ip2,
                                                   n.label,
                                                   name="loss",
                                                   loss_weight=100)
        n.score = L.Sigmoid(n.d_ip2)
        n.lbl_flat = L.Reshape(n.label,
                               reshape_param=dict(shape=dict(dim=[-1, 1])))
        n.diff = L.Eltwise(
            n.score,
            n.lbl_flat,
            eltwise_param=dict(coeff=[1.0 / batch, -1.0 / batch]))
        n.error = L.Reduction(n.diff,
                              reduction_param=dict(operation=P.Reduction.ASUM))
        #n.output = L.Split(n[cinp])
        #n.output_labels = L.Split(n.score)
        #n.inputs = n.source

    return n
Beispiel #25
0
 def net():
     n = caffe.NetSpec()
     n.data = L.Input(input_param=dict(shape=dict(dim=data_shape)))
     n.dataout = L.Reduction(n.data, axis=0, coeff=1, operation=_operation)
     return n.to_proto()
Beispiel #26
0
 def dot_product_distance(self, vec1, vec2, axis=1):
     mult = L.Eltwise(vec1, vec2, operation=0)
     reduction = L.Reduction(mult, axis=axis)
     negative = L.Power(reduction, scale=-1, shift=1)
     return negative
Beispiel #27
0
 def euclidean_distance(self, vec1, vec2, axis=1):
     negative = L.Power(vec2, scale=-1)
     difference = L.Eltwise(vec1, negative, operation=1)
     squared = L.Power(difference, power=2)
     reduction = L.Reduction(squared, axis=axis)
     return reduction