Beispiel #1
0
    def context_supervision_loss(self, distance, lw=1, ind_loss=None):
        """
    Distance is positive; want gt distance to be SMALLER than other distances.
    Loss used for context supervision is also ranking loss:
        Look at rank loss between all possible pairs of moments; want gt distance to be smaller.
        Take average.
    """

        slices = L.Slice(distance, ntop=21, axis=1)
        gt = slices[0]
        setattr(self.n, 'gt_slice', gt)
        ranking_losses = []
        for i in range(1, 21):
            setattr(self.n, 'context_slice_%d' % i, slices[i])
            negate_distance = L.Power(slices[i], scale=-1)
            max_sum = L.Eltwise(gt, negate_distance, operation=1)
            max_sum_margin = L.Power(max_sum, shift=self.margin)
            max_sum_margin_relu = L.ReLU(max_sum_margin, in_place=False)
            if ind_loss:
                max_sum_margin_relu = L.Reshape(
                    max_sum_margin_relu, shape=dict(dim=[self.batch_size, 1]))
                max_sum_margin_relu = L.Eltwise(max_sum_margin_relu,
                                                ind_loss,
                                                operation=0)
            setattr(self.n, 'max_sum_margin_relu_%d' % i, max_sum_margin_relu)
            ranking_loss = L.Reduction(max_sum_margin_relu, operation=4)
            ranking_losses.append(ranking_loss)
        sum_ranking_losses = L.Eltwise(*ranking_losses, operation=1)
        loss = L.Power(sum_ranking_losses, scale=1 / 21., loss_weight=[lw])
        return loss
 def test_power3(self):
     n = caffe.NetSpec()
     n.input1 = L.Input(shape=make_shape([6, 4, 64, 64]))
     # These two powers can not be united
     n.pow1 = L.Power(n.input1, power=2.0)
     n.pow2 = L.Power(n.pow1, scale=0.3)
     self._test_model(*self._netspec_to_model(n, 'power3'))
Beispiel #3
0
def scat_layer(bottom, dim, kernel_size, name, group=1):
    conv1 = conv_layer(bottom, dim, kernel_size, name + '_real', group=group)
    pow1 = L.Power(conv1, power=2, in_place=True)
    conv2 = conv_layer(bottom, dim, kernel_size, name + '_imag', group=group)
    pow2 = L.Power(conv2, power=2, in_place=True)
    res_add = add(pow1, pow2)
    res_add = L.Power(res_add, power=.5, in_place=True)
    return res_add
Beispiel #4
0
def test_power3():
    # type: ()->caffe.NetSpec

    n = caffe.NetSpec()
    n.input1 = L.Input(shape=make_shape([6, 4, 64, 64]))
    # These two powers can not be united
    n.pow1 = L.Power(n.input1, power=2.0)
    n.pow2 = L.Power(n.pow1, scale=0.3)
    return n
Beispiel #5
0
 def normalize(self, bottom, axis=1, numtiles=4096):
     power = L.Power(bottom, power=2)
     power_sum = L.Reduction(power, axis=axis, operation=1)
     sqrt = L.Power(power_sum, power=-0.5, shift=0.00001)
     if axis == 1:
         reshape = L.Reshape(sqrt, shape=dict(dim=[-1, 1]))
     if axis == 2:
         reshape = L.Reshape(sqrt, shape=dict(dim=[self.batch_size, -1, 1]))
     tile = L.Tile(reshape, axis=axis, tiles=numtiles)
     return L.Eltwise(tile, bottom, operation=0)
Beispiel #6
0
 def tall_loss(self, positive, negative, query, lw=1):
     scores_p = self.distance_function(positive, query)
     scores_n = self.distance_function(negative, query)
     alpha_c = 1
     alpha_w = 1
     exp_p = L.Exp(scores_p, scale=-1)
     exp_n = L.Exp(scores_n)
     log_p = L.Log(exp_p, shift=1)
     log_n = L.Log(exp_n, shift=1)
     scale_p = L.Power(log_p, scale=alpha_c)
     scale_n = L.Power(log_n, scale=alpha_w)
     all_scores = L.Concat(scale_p, scale_n, axis=0)
     return L.Reduction(all_scores, operation=4, loss_weight=[lw])
Beispiel #7
0
    def ranking_loss(self, p, n, t, lw=1):

        #For ranking used in paper
        distance_p = self.distance_function(p, t)
        distance_n = self.distance_function(n, t)
        negate_distance_n = L.Power(distance_n, scale=-1)
        max_sum = L.Eltwise(distance_p, negate_distance_n, operation=1)
        max_sum_margin = L.Power(max_sum, shift=self.margin)
        max_sum_margin_relu = L.ReLU(max_sum_margin, in_place=False)
        ranking_loss = L.Reduction(max_sum_margin_relu,
                                   operation=4,
                                   loss_weight=[lw])

        return ranking_loss
Beispiel #8
0
    def ranking_loss(self, p, n, t, lw=1):

        # I <3 Caffe - this is not obnoxious to write at all.
        distance_p = self.distance_function(p, t)
        distance_n = self.distance_function(n, t)
        negate_distance_n = L.Power(distance_n, scale=-1)
        max_sum = L.Eltwise(distance_p, negate_distance_n, operation=1)
        max_sum_margin = L.Power(max_sum, shift=self.margin)
        max_sum_margin_relu = L.ReLU(max_sum_margin, in_place=False)
        ranking_loss = L.Reduction(max_sum_margin_relu,
                                   operation=4,
                                   loss_weight=[lw])

        return ranking_loss
Beispiel #9
0
    def relational_ranking_loss(self, distance_p, distance_n, lw=1):
        """
    This function assumes you want to MINIMIZE distances
    """

        negate_distance_n = L.Power(distance_n, scale=-1)
        max_sum = L.Eltwise(distance_p, negate_distance_n, operation=1)
        max_sum_margin = L.Power(max_sum, shift=self.margin)
        max_sum_margin_relu = L.ReLU(max_sum_margin, in_place=False)
        ranking_loss = L.Reduction(max_sum_margin_relu,
                                   operation=4,
                                   loss_weight=[lw])

        return ranking_loss
Beispiel #10
0
    def early_combine_mult_not_relational(self, vec1, vec2):
        mult = L.Eltwise(vec1, vec2, operation=0)
        setattr(self.n, 'mult', mult)
        norm_mult = self.normalize(mult,
                                   numtiles=self.visual_embedding_dim[-1],
                                   axis=1)
        setattr(self.n, 'norm_mult', norm_mult)

        intermediate = L.InnerProduct(
            norm_mult,
            num_output=self.visual_embedding_dim[-1],
            weight_filler=self.uniform_weight_filler(-0.08, .08),
            param=self.learning_params([[1, 1], [2, 0]],
                                       ['eltwise_dist1', 'eltwise_dist1_b']),
            axis=1)
        nonlin_1 = L.ReLU(intermediate)
        setattr(self.n, 'intermediate', nonlin_1)
        dropout = L.Dropout(nonlin_1, dropout_ratio=self.dropout_visual)

        score = L.InnerProduct(
            dropout,
            num_output=1,
            weight_filler=self.uniform_weight_filler(-0.08, .08),
            param=self.learning_params([[1, 1], [2, 0]],
                                       ['eltwise_dist2', 'eltwise_dist2_b']),
            axis=1)
        negative_score = L.Power(score, scale=-1)
        setattr(self.n, 'rank_score', score)
        return score
Beispiel #11
0
def test_power():
    # type: ()->caffe.NetSpec

    n = caffe.NetSpec()
    n.input1 = L.Input(shape=make_shape([6, 4, 64, 64]))
    n.pow1 = L.Power(n.input1, power=2.0, scale=0.5, shift=0.01)
    return n
def Power(bottom, name='power', p=1, a=1, b=0):  # (ax+b)^p
    return L.Power(bottom,
                   name=name,
                   power_param={
                       'power': p,
                       'scale': a,
                       'shift': b
                   })
Beispiel #13
0
    def l2normed(self,vec, dim):
        #Returns L2-normalized instances of vec; i.e., for each instance x in vec,
        #computes  x / ((x ** 2).sum() ** 0.5). Assumes vec has shape N x dim."""
        denom = L.Reduction(vec, axis=1, operation=P.Reduction.SUMSQ)
        denom = L.Power(denom, power=(-0.5), shift=1e-12)
        denom = L.Reshape(denom, num_axes=0, axis=-1, shape=dict(dim=[1]))
        denom = L.Tile(denom, axis=1, tiles=dim)

        return L.Eltwise(vec, denom, operation=P.Eltwise.PROD)
def weight_edges2(bottom, num_output, power=1.0):
    bottom_avg = L.Convolution(bottom,
                               convolution_param=dict(num_output=num_output,
                                                      kernel_size=1,
                                                      stride=1,
                                                      pad=0,
                                                      bias_term=False,
                                                      weight_filler=dict(
                                                          type='constant',
                                                          value=1.0)),
                               param=[{
                                   'lr_mult': 0,
                                   'decay_mult': 0
                               }])

    binarized = L.Power(bottom_avg, power_param=dict(power=power))
    weight = L.Power(binarized, power_param=dict(shift=1, scale=-1))

    return weight
Beispiel #15
0
def normalize(bottom, dim):

    bottom_relu = L.ReLU(bottom)
    sum = L.Convolution(bottom_relu,
                        convolution_param = dict(num_output = 1, kernel_size = 1, stride = 1,
                                                 weight_filler = dict(type = 'constant', value = 1),
                                                 bias_filler = dict(type = 'constant', value = 0)),
                        param=[{'lr_mult':0, 'decay_mult':0}, {'lr_mult':0, 'decay_mult':0}])

    denom = L.Power(sum, power=(-1.0), shift=1e-12)
    denom = L.Tile(denom, axis=1, tiles=dim)

    return L.Eltwise(bottom_relu, denom, operation=P.Eltwise.PROD)
Beispiel #16
0
    def pool_distances(self, vec, minimum_distance=True):
        #want to MINIMIZE distance; negate, maximize, then negate (again)
        #Assume that scores are Nx21 size blob
        if args.pool_type in ['max', 'average']:
            prep_pool = L.Reshape(vec,
                                  shape=dict(dim=[self.batch_size, 1, 21, 1]))

            if minimum_distance:
                prep_pool = L.Power(prep_pool, scale=-1)
            max_pool = L.Pooling(prep_pool,
                                 pool=pooling_type[self.args.pool_type],
                                 kernel_h=21,
                                 kernel_w=1)
            pool = L.Reshape(max_pool, shape=dict(dim=[self.batch_size]))
            if minimum_distance:
                pool = L.Power(pool, scale=-1)
        elif args.pool_type in ['sum']:
            #untested
            negative = L.Power(vec, scale=-1)
            pool = L.Reduction(negative, axis=1, operation=1)  #sum
        else:
            raise Exception("You did not select a valid pooling type.")
        return pool
def l2normed(dim):
    n = caffe.NetSpec()
    n.data, n.label = L.Python(module='layers',
                               layer='tripletDataLayer',
                               ntop=2)
    """Returns L2-normalized instances of vec; i.e., for each instance x in vec,
    computes  x / ((x ** 2).sum() ** 0.5). Assumes vec has shape N x dim."""
    n.denom = L.Reduction(n.data, axis=1, operation=P.Reduction.SUMSQ)
    #denom = L.Power(denom, power=(-0.5))
    n.power = L.Power(n.denom, power=(-0.5),
                      shift=1e-12)  # For numerical stability
    n.reshape = L.Reshape(n.power, num_axes=0, axis=-1, shape=dict(dim=[1]))
    n.tile = L.Tile(n.reshape, axis=1, tiles=dim)
    n.elwise = L.Eltwise(n.data, n.tile, operation=P.Eltwise.PROD)
    return n.to_proto()
Beispiel #18
0
    def early_combine_mult_tall(self, vec1, vec2):
        feature = self.tall_feature(vec1, vec2)
        setattr(self.n, 'feature', feature)
        intermediate = L.InnerProduct(
            feature,
            num_output=self.visual_embedding_dim[-1],
            weight_filler=self.uniform_weight_filler(-0.08, .08),
            param=self.learning_params([[1, 1], [2, 0]],
                                       ['eltwise_dist1', 'eltwise_dist1_b']),
            axis=1)
        nonlin_1 = L.ReLU(intermediate)
        setattr(self.n, 'intermediate', nonlin_1)
        dropout = L.Dropout(nonlin_1, dropout_ratio=self.dropout_visual)

        score = L.InnerProduct(
            dropout,
            num_output=1,
            weight_filler=self.uniform_weight_filler(-0.08, .08),
            param=self.learning_params([[1, 1], [2, 0]],
                                       ['eltwise_dist2', 'eltwise_dist2_b']),
            axis=1)
        negative_score = L.Power(score, scale=-1)
        setattr(self.n, 'rank_score', score)
        return score
Beispiel #19
0
    def gru_unit(self,
                 prefix,
                 x,
                 cont,
                 static=None,
                 h=None,
                 batch_size=100,
                 timestep=0,
                 gru_hidden=1000,
                 weight_lr_mult=1,
                 bias_lr_mult=2,
                 weight_decay_mult=1,
                 bias_decay_mult=0,
                 concat_hidden=True,
                 weight_filler=None,
                 bias_filler=None):

        #assume static input already transformed

        if not weight_filler:
            weight_filler = self.uniform_weight_filler(-0.08, 0.08)
        if not bias_filler:
            bias_filler = self.constant_filler(0)
        if not h:
            h = self.dummy_data_layer([1, batch_size, lstm_hidden], 1)

        def get_name(name):
            return '%s_%s' % (prefix, name)

        def get_param(weight_name, bias_name=None):
            #TODO: write this in terms of earlier method "init_params"
            w = dict(lr_mult=weight_lr_mult,
                     decay_mult=weight_decay_mult,
                     name=get_name(weight_name))
            if bias_name is not None:
                b = dict(lr_mult=bias_lr_mult,
                         decay_mult=bias_decay_mult,
                         name=get_name(bias_name))
                return [w, b]
            return [w]

        gate_dim = gru_hidden * 3

        #transform x_t
        x = L.InnerProduct(x,
                           num_output=gate_dim,
                           axis=2,
                           weight_filler=weight_filler,
                           bias_filler=bias_filler,
                           param=get_param('W_xc', 'b_c'))
        self.rename_tops(x, get_name('%d_x_transform' % timestep))

        #transform h
        h_conted = L.Scale(h, cont, axis=0)
        h = L.InnerProduct(h_conted,
                           num_output=gru_hidden * 2,
                           axis=2,
                           bias_term=False,
                           weight_filler=weight_filler,
                           param=get_param('W_hc'))
        h_name = get_name('%d_h_transform' % timestep)
        if not hasattr(self.n, h_name):
            setattr(self.n, h_name, h)

        #gru stuff TODO: write GRUUnit in caffe?  would make all this much prettier.
        x_transform_z_r, x_transform_hc = L.Slice(x,
                                                  slice_point=gru_hidden * 2,
                                                  axis=2,
                                                  ntop=2)
        sum_items = [x_transform_z_r, h]
        if static:
            sum_items += static
        z_r_sum = self.sum(sum_items)
        z_r = L.Sigmoid(z_r_sum)
        z, r = L.Slice(z_r, slice_point=gru_hidden, axis=2, ntop=2)

        z_weighted_h = self.prod([r, h_conted])
        z_h_transform = L.InnerProduct(z_weighted_h,
                                       num_output=gru_hidden,
                                       axis=2,
                                       bias_term=False,
                                       weight_filler=weight_filler,
                                       param=get_param('W_hzc'))
        sum_items = [x_transform_hc, z_h_transform]
        if static:
            sum_items += static
        hc_sum = self.sum(sum_items)
        hc = L.TanH(hc)

        zm1 = L.Power(z, scale=-1, shift=1)
        h_h = self.prod([zm1, h_conted])
        h_hc = self.prod([z, hc])
        h = self.sum([h_h, h_hc])

        return h
Beispiel #20
0
 def subtract(self, bottoms):
     assert len(bottoms) == 2
     negate = L.Power(bottoms[1], scale=-1)
     return L.Eltwise(bottoms[0], bottoms[1], operation=1)
def convert_symbol2proto(symbol):
    def looks_like_weight(name):
        """Internal helper to figure out if node should be hidden with `hide_weights`.
        """
        if name.endswith("_weight"):
            return True
        if name.endswith("_bias"):
            return True
        if name.endswith("_beta") or name.endswith("_gamma") or name.endswith(
                "_moving_var") or name.endswith("_moving_mean"):
            return True
        return False

    json_symbol = json.loads(symbol.tojson())
    all_nodes = json_symbol['nodes']
    no_weight_nodes = []
    for node in all_nodes:
        op = node['op']
        name = node['name']
        if op == 'null':
            if looks_like_weight(name):
                continue
        no_weight_nodes.append(node)

    # build next node dict
    next_node = dict()
    for node in no_weight_nodes:
        node_name = node['name']
        for input in node['inputs']:
            last_node_name = all_nodes[input[0]]['name']
            if last_node_name in next_node:
                next_node[last_node_name].append(node_name)
            else:
                next_node[last_node_name] = [node_name]

    supported_op_type = [
        'null', 'BatchNorm', 'Convolution', 'Activation', 'Pooling',
        'elemwise_add', 'SliceChannel', 'FullyConnected', 'SoftmaxOutput',
        '_maximum', 'add_n', 'Concat', '_mul_scalar', 'Deconvolution',
        'UpSampling'
    ]
    top_dict = dict()
    caffe_net = caffe.NetSpec()
    for node in no_weight_nodes:
        if node['op'] == 'null':
            input_param = dict()
            if node['name'] == 'data':
                input_param['shape'] = dict(dim=[1, 3, 160, 160])
            else:
                input_param['shape'] = dict(dim=[1])
            top_data = CL.Input(ntop=1, input_param=input_param)
            top_dict[node['name']] = [top_data]
            setattr(caffe_net, node['name'], top_data)
        elif node['op'].endswith('_copy'):
            pass
        elif node['op'] == 'BatchNorm':
            input = node['inputs'][0]
            while True:
                if all_nodes[input[0]]['op'] not in supported_op_type:
                    input = all_nodes[input[0]]['inputs'][0]
                else:
                    break
            bottom_node_name = all_nodes[input[0]]['name']
            attr = node['attrs']
            in_place = False
            if len(next_node[bottom_node_name]) == 1:
                in_place = True
            if 'momentum' in attr:
                momentum = float(attr['momentum'])
            else:
                momentum = 0.9
            if 'eps' in attr:
                eps = float(attr['eps'])
            else:
                eps = 0.001
            if NO_INPLACE:
                in_place = False
            bn_top = CL.BatchNorm(top_dict[bottom_node_name][input[1]],
                                  ntop=1,
                                  batch_norm_param=dict(
                                      use_global_stats=True,
                                      moving_average_fraction=momentum,
                                      eps=eps),
                                  in_place=in_place)
            setattr(caffe_net, node['name'], bn_top)
            scale_top = CL.Scale(bn_top,
                                 ntop=1,
                                 scale_param=dict(bias_term=True),
                                 in_place=not NO_INPLACE)
            top_dict[node['name']] = [scale_top]
            setattr(caffe_net, node['name'] + '_scale', scale_top)
        elif node['op'] == 'Convolution':
            input = node['inputs'][0]
            while True:
                if all_nodes[input[0]]['op'] not in supported_op_type:
                    input = all_nodes[input[0]]['inputs'][0]
                else:
                    break
            bottom_node_name = all_nodes[input[0]]['name']
            attr = node['attrs']
            convolution_param = dict()
            if 'kernel' in attr:
                kernel_size = eval(attr['kernel'])
                assert kernel_size[0] == kernel_size[1]
                convolution_param['kernel_size'] = kernel_size[0]
            else:
                convolution_param['kernel_size'] = 1
            if 'no_bias' in attr:
                convolution_param['bias_term'] = not eval(attr['no_bias'])
            if 'num_group' in attr:
                convolution_param['group'] = int(attr['num_group'])
            convolution_param['num_output'] = int(attr['num_filter'])
            if 'pad' in attr:
                pad_size = eval(attr['pad'])
                assert pad_size[0] == pad_size[1]
                convolution_param['pad'] = pad_size[0]
            if 'stride' in attr:
                stride_size = eval(attr['stride'])
                assert stride_size[0] == stride_size[1]
                convolution_param['stride'] = stride_size[0]
            conv_top = CL.Convolution(top_dict[bottom_node_name][input[1]],
                                      ntop=1,
                                      convolution_param=convolution_param)
            top_dict[node['name']] = [conv_top]
            setattr(caffe_net, node['name'], conv_top)
        elif node['op'] == 'Deconvolution':
            input = node['inputs'][0]
            while True:
                if all_nodes[input[0]]['op'] not in supported_op_type:
                    input = all_nodes[input[0]]['inputs'][0]
                else:
                    break
            bottom_node_name = all_nodes[input[0]]['name']
            attr = node['attrs']
            convolution_param = dict()
            if 'kernel' in attr:
                kernel_size = eval(attr['kernel'])
                assert kernel_size[0] == kernel_size[1]
                convolution_param['kernel_size'] = kernel_size[0]
            else:
                convolution_param['kernel_size'] = 1
            if 'no_bias' in attr:
                convolution_param['bias_term'] = not eval(attr['no_bias'])
            else:
                convolution_param['bias_term'] = False
            if 'num_group' in attr:
                convolution_param['group'] = int(attr['num_group'])
            convolution_param['num_output'] = int(attr['num_filter'])
            if 'pad' in attr:
                pad_size = eval(attr['pad'])
                assert pad_size[0] == pad_size[1]
                convolution_param['pad'] = pad_size[0]
            if 'stride' in attr:
                stride_size = eval(attr['stride'])
                assert stride_size[0] == stride_size[1]
                convolution_param['stride'] = stride_size[0]
            conv_top = CL.Deconvolution(top_dict[bottom_node_name][input[1]],
                                        ntop=1,
                                        convolution_param=convolution_param)
            top_dict[node['name']] = [conv_top]
            setattr(caffe_net, node['name'], conv_top)
        elif node['op'] == 'UpSampling':
            input = node['inputs'][0]
            while True:
                if all_nodes[input[0]]['op'] not in supported_op_type:
                    input = all_nodes[input[0]]['inputs'][0]
                else:
                    break
            bottom_node_name = all_nodes[input[0]]['name']
            attr = node['attrs']
            convolution_param = dict()
            if 'scale' in attr:
                kernel_size = 2 * eval(attr['scale']) - eval(attr['scale']) % 2
                convolution_param['kernel_size'] = kernel_size
            else:
                convolution_param['kernel_size'] = 1
            convolution_param['bias_term'] = False
            convolution_param['num_output'] = int(attr['num_filter'])
            convolution_param['group'] = int(attr['num_filter'])
            convolution_param['pad'] = int(
                math.ceil((eval(attr['scale']) - 1) / 2.))
            convolution_param['stride'] = eval(attr['scale'])
            conv_top = CL.Deconvolution(top_dict[bottom_node_name][input[1]],
                                        ntop=1,
                                        convolution_param=convolution_param)
            top_dict[node['name']] = [conv_top]
            setattr(caffe_net, node['name'], conv_top)
        elif node['op'] == 'Activation':
            input = node['inputs'][0]
            while True:
                if all_nodes[input[0]]['op'] not in supported_op_type:
                    input = all_nodes[input[0]]['inputs'][0]
                else:
                    break
            bottom_node_name = all_nodes[input[0]]['name']
            attr = node['attrs']
            in_place = False
            if len(next_node[bottom_node_name]) == 1:
                in_place = True
            if NO_INPLACE:
                in_place = False
            if attr['act_type'] == 'relu':
                ac_top = CL.ReLU(top_dict[bottom_node_name][input[1]],
                                 ntop=1,
                                 in_place=in_place)
            elif attr['act_type'] == 'sigmoid':
                ac_top = CL.Sigmoid(top_dict[bottom_node_name][input[1]],
                                    ntop=1,
                                    in_place=in_place)
            elif attr['act_type'] == 'tanh':
                ac_top = CL.TanH(top_dict[bottom_node_name][input[1]],
                                 ntop=1,
                                 in_place=in_place)
            top_dict[node['name']] = [ac_top]
            setattr(caffe_net, node['name'], ac_top)
        elif node['op'] == 'Pooling':
            input = node['inputs'][0]
            while True:
                if all_nodes[input[0]]['op'] not in supported_op_type:
                    input = all_nodes[input[0]]['inputs'][0]
                else:
                    break
            bottom_node_name = all_nodes[input[0]]['name']
            attr = node['attrs']
            pooling_param = dict()
            if attr['pool_type'] == 'avg':
                pooling_param['pool'] = 1
            elif attr['pool_type'] == 'max':
                pooling_param['pool'] = 0
            else:
                assert False, attr['pool_type']
            if 'global_pool' in attr and eval(attr['global_pool']) is True:
                pooling_param['global_pooling'] = True
            else:
                if 'kernel' in attr:
                    kernel_size = eval(attr['kernel'])
                    assert kernel_size[0] == kernel_size[1]
                    pooling_param['kernel_size'] = kernel_size[0]
                if 'pad' in attr:
                    pad_size = eval(attr['pad'])
                    assert pad_size[0] == pad_size[1]
                    pooling_param['pad'] = pad_size[0]
                if 'stride' in attr:
                    stride_size = eval(attr['stride'])
                    assert stride_size[0] == stride_size[1]
                    pooling_param['stride'] = stride_size[0]
            pool_top = CL.Pooling(top_dict[bottom_node_name][input[1]],
                                  ntop=1,
                                  pooling_param=pooling_param)
            top_dict[node['name']] = [pool_top]
            setattr(caffe_net, node['name'], pool_top)
        elif node['op'] == 'elemwise_add' or node['op'] == 'add_n':
            input_a = node['inputs'][0]
            while True:
                if all_nodes[input_a[0]]['op'] not in supported_op_type:
                    input_a = all_nodes[input_a[0]]['inputs'][0]
                else:
                    break
            input_b = node['inputs'][1]
            while True:
                if all_nodes[input_b[0]]['op'] not in supported_op_type:
                    input_b = all_nodes[input_b[0]]['inputs'][0]
                else:
                    break
            bottom_node_name_a = all_nodes[input_a[0]]['name']
            bottom_node_name_b = all_nodes[input_b[0]]['name']
            eltwise_param = dict()
            eltwise_param['operation'] = 1
            ele_add_top = CL.Eltwise(top_dict[bottom_node_name_a][input_a[1]],
                                     top_dict[bottom_node_name_b][input_b[1]],
                                     ntop=1,
                                     eltwise_param=eltwise_param)
            top_dict[node['name']] = [ele_add_top]
            setattr(caffe_net, node['name'], ele_add_top)
        elif node['op'] == '_maximum':
            input_a = node['inputs'][0]
            while True:
                if all_nodes[input_a[0]]['op'] not in supported_op_type:
                    input_a = all_nodes[input_a[0]]['inputs'][0]
                else:
                    break
            input_b = node['inputs'][1]
            while True:
                if all_nodes[input_b[0]]['op'] not in supported_op_type:
                    input_b = all_nodes[input_b[0]]['inputs'][0]
                else:
                    break
            bottom_node_name_a = all_nodes[input_a[0]]['name']
            bottom_node_name_b = all_nodes[input_b[0]]['name']
            eltwise_param = dict()
            eltwise_param['operation'] = 2
            ele_add_top = CL.Eltwise(top_dict[bottom_node_name_a][input_a[1]],
                                     top_dict[bottom_node_name_b][input_b[1]],
                                     ntop=1,
                                     eltwise_param=eltwise_param)
            top_dict[node['name']] = [ele_add_top]
            setattr(caffe_net, node['name'], ele_add_top)
        elif node['op'] == '_mul_scalar':
            input = node['inputs'][0]
            while True:
                if all_nodes[input[0]]['op'] not in supported_op_type:
                    input = all_nodes[input[0]]['inputs'][0]
                else:
                    break
            bottom_node_name = all_nodes[input[0]]['name']
            attr = node['attrs']
            in_place = False
            if len(next_node[bottom_node_name]) == 1:
                in_place = True
            if NO_INPLACE:
                in_place = False
            ac_top = CL.Power(top_dict[bottom_node_name][input[1]],
                              power=1.0,
                              scale=float(attr['scalar']),
                              shift=0,
                              in_place=in_place)
            top_dict[node['name']] = [ac_top]
            setattr(caffe_net, node['name'], ac_top)
        elif node['op'] == 'SliceChannel':
            input = node['inputs'][0]
            while True:
                if all_nodes[input[0]]['op'] not in supported_op_type:
                    input = all_nodes[input[0]]['inputs'][0]
                else:
                    break
            bottom_node_name = all_nodes[input[0]]['name']
            slice_param = dict()
            slice_param['slice_dim'] = 1
            slice_num = 2
            slice_outputs = CL.Slice(top_dict[bottom_node_name][input[1]],
                                     ntop=slice_num,
                                     slice_param=slice_param)
            top_dict[node['name']] = slice_outputs
            for idx, output in enumerate(slice_outputs):
                setattr(caffe_net, node['name'] + '_' + str(idx), output)
        elif node['op'] == 'FullyConnected':
            input = node['inputs'][0]
            while True:
                if all_nodes[input[0]]['op'] not in supported_op_type:
                    input = all_nodes[input[0]]['inputs'][0]
                else:
                    break
            bottom_node_name = all_nodes[input[0]]['name']
            attr = node['attrs']
            inner_product_param = dict()
            inner_product_param['num_output'] = int(attr['num_hidden'])
            fc_top = CL.InnerProduct(top_dict[bottom_node_name][input[1]],
                                     ntop=1,
                                     inner_product_param=inner_product_param)
            top_dict[node['name']] = [fc_top]
            setattr(caffe_net, node['name'], fc_top)
        elif node['op'] == 'SoftmaxOutput':
            input_a = node['inputs'][0]
            while True:
                if all_nodes[input_a[0]]['op'] not in supported_op_type:
                    input_a = all_nodes[input_a[0]]['inputs'][0]
                else:
                    break
            input_b = node['inputs'][1]
            while True:
                if all_nodes[input_b[0]]['op'] not in supported_op_type:
                    input_b = all_nodes[input_b[0]]['inputs'][0]
                else:
                    break
            bottom_node_name_a = all_nodes[input_a[0]]['name']
            bottom_node_name_b = all_nodes[input_b[0]]['name']
            softmax_loss = CL.SoftmaxWithLoss(
                top_dict[bottom_node_name_a][input_a[1]],
                top_dict[bottom_node_name_b][input_b[1]],
                ntop=1)
            top_dict[node['name']] = [softmax_loss]
            setattr(caffe_net, node['name'], softmax_loss)
        elif node['op'] == 'Concat':
            if len(node['inputs']) == 2:
                input_a = node['inputs'][0]
                while True:
                    if all_nodes[input_a[0]]['op'] not in supported_op_type:
                        input_a = all_nodes[input_a[0]]['inputs'][0]
                    else:
                        break
                input_b = node['inputs'][1]
                while True:
                    if all_nodes[input_b[0]]['op'] not in supported_op_type:
                        input_b = all_nodes[input_b[0]]['inputs'][0]
                    else:
                        break
                bottom_node_name_a = all_nodes[input_a[0]]['name']
                bottom_node_name_b = all_nodes[input_b[0]]['name']
                concat_top = CL.Concat(
                    top_dict[bottom_node_name_a][input_a[1]],
                    top_dict[bottom_node_name_b][input_b[1]],
                    ntop=1)
                top_dict[node['name']] = [concat_top]
                setattr(caffe_net, node['name'], concat_top)
            elif len(node['inputs']) == 3:
                input_a = node['inputs'][0]
                while True:
                    if all_nodes[input_a[0]]['op'] not in supported_op_type:
                        input_a = all_nodes[input_a[0]]['inputs'][0]
                    else:
                        break
                input_b = node['inputs'][1]
                while True:
                    if all_nodes[input_b[0]]['op'] not in supported_op_type:
                        input_b = all_nodes[input_b[0]]['inputs'][0]
                    else:
                        break
                input_c = node['inputs'][2]
                while True:
                    if all_nodes[input_c[0]]['op'] not in supported_op_type:
                        input_c = all_nodes[input_c[0]]['inputs'][0]
                    else:
                        break
                bottom_node_name_a = all_nodes[input_a[0]]['name']
                bottom_node_name_b = all_nodes[input_b[0]]['name']
                bottom_node_name_c = all_nodes[input_c[0]]['name']
                concat_top = CL.Concat(
                    top_dict[bottom_node_name_a][input_a[1]],
                    top_dict[bottom_node_name_b][input_b[1]],
                    top_dict[bottom_node_name_c][input_c[1]],
                    ntop=1)
                top_dict[node['name']] = [concat_top]
                setattr(caffe_net, node['name'], concat_top)
        else:
            logging.warn('unknown op type = %s' % node['op'])

    return caffe_net.to_proto()
Beispiel #22
0
def pva_convHeader(net, from_layer, out_layer, use_pool=True, lr=1, decay=1):
    bn_kwargs = {
        'param': [
            dict(lr_mult=0, decay_mult=0),
            dict(lr_mult=0, decay_mult=0),
            dict(lr_mult=0, decay_mult=0)
        ],
        'batch_norm_param':
        dict(use_global_stats=True),
    }
    scale_kwargs = {
        'bias_term': True,
        'param':
        [dict(lr_mult=lr, decay_mult=0),
         dict(lr_mult=lr, decay_mult=0)],
    }
    power_kwargs = {'power': 1, 'scale': -1.0, 'shift': 0}
    conv_kwargs = {
        'param': [dict(lr_mult=lr, decay_mult=decay)],
        'weight_filler': dict(type='xavier'),
        'bias_term': False,
    }
    layer_name = "{}/conv".format(out_layer)
    name = "{}/conv".format(out_layer)
    net[name] = L.Convolution(net[from_layer], name=layer_name, num_output=16, \
        kernel_size=7, pad=3, stride=2, **conv_kwargs)
    start_layer = name
    layer_name = "{}/bn".format(out_layer)
    name = "{}/bn".format(out_layer)
    net[name] = L.BatchNorm(net[start_layer],
                            name=layer_name,
                            in_place=True,
                            **bn_kwargs)
    feaLayers = []
    feaLayers.append(net[name])
    start_layer = name
    neg_layer = "{}/neg".format(out_layer)
    neg_name = "{}/neg".format(out_layer)
    net[neg_name] = L.Power(net[start_layer], name=neg_layer, **power_kwargs)
    feaLayers.append(net[neg_name])
    concat_layer = "{}/concat".format(out_layer)
    concat_name = out_layer
    net[concat_name] = L.Concat(*feaLayers, name=concat_layer, axis=1)
    start_layer = concat_name
    layer_name = "{}/scale".format(out_layer)
    name = "{}/scale".format(out_layer)
    net[name] = L.Scale(net[start_layer],
                        name=layer_name,
                        in_place=True,
                        **scale_kwargs)
    start_layer = name
    layer_name = "{}/relu".format(out_layer)
    name = "{}/relu".format(out_layer)
    net[name] = L.ReLU(net[start_layer], name=layer_name, in_place=True)
    start_layer = name
    # pool
    if use_pool:
        layer_name = "pool1"
        name = "pool1"
        net[name] = L.Pooling(net[start_layer],
                              pool=P.Pooling.MAX,
                              kernel_size=3,
                              stride=2)

    return net
Beispiel #23
0
def ResInceptionLayer(net, from_layer, out_layer, cross_stage=False, channels_1=64, \
                      channels_3=[48,128], channels_5=[24,48,128],channels_pool=128, \
                      channels_output=256, lr=1, decay=1, out_bn=False):
    assert len(channels_3) == 2
    assert len(channels_5) == 3
    bn_kwargs = {
        'param': [
            dict(lr_mult=0, decay_mult=0),
            dict(lr_mult=0, decay_mult=0),
            dict(lr_mult=0, decay_mult=0)
        ],
        'batch_norm_param':
        dict(use_global_stats=True),
    }
    scale_kwargs = {
        'bias_term': True,
        'param':
        [dict(lr_mult=lr, decay_mult=0),
         dict(lr_mult=lr, decay_mult=0)],
    }
    input_kwargs = {'power': 1, 'scale': 1, 'shift': 0}
    conv_kwargs = {
        'param': [dict(lr_mult=lr, decay_mult=decay)],
        'weight_filler': dict(type='xavier'),
        'bias_term': False,
    }
    convbias_kwargs = {
        'param': [
            dict(lr_mult=lr, decay_mult=decay),
            dict(lr_mult=2 * lr, decay_mult=0)
        ],
        'weight_filler':
        dict(type='xavier'),
        'bias_filler':
        dict(type='constant', value=0)
    }
    eltwise_kwargs = {'operation': 1, 'coeff': [1, 1]}
    start_layer = from_layer
    if cross_stage:
        stride = 2
    else:
        stride = 1
    # pre-stage: bn/scale/relu
    layer_name = "{}/incep/bn".format(out_layer)
    name = "{}/incep/pre".format(out_layer)
    net[name] = L.BatchNorm(net[start_layer],
                            name=layer_name,
                            in_place=False,
                            **bn_kwargs)
    start_layer = name
    layer_name = "{}/incep/bn_scale".format(out_layer)
    name = "{}/incep/bn_scale".format(out_layer)
    net[name] = L.Scale(net[start_layer],
                        name=layer_name,
                        in_place=True,
                        **scale_kwargs)
    start_layer = name
    layer_name = "{}/incep/relu".format(out_layer)
    name = "{}/incep/relu".format(out_layer)
    net[name] = L.ReLU(net[start_layer], name=layer_name, in_place=True)
    fea_layer = name

    mlayers = []
    # conv-1x1
    layer_name = "{}/incep/0/conv".format(out_layer)
    name = "{}/incep/0".format(out_layer)
    net[name] = L.Convolution(net[fea_layer], name=layer_name, num_output=channels_1, \
        kernel_size=1, pad=0, stride=stride, **conv_kwargs)
    start_layer = name
    layer_name = "{}/incep/0/bn".format(out_layer)
    name = "{}/incep/0/bn".format(out_layer)
    net[name] = L.BatchNorm(net[start_layer],
                            name=layer_name,
                            in_place=True,
                            **bn_kwargs)
    start_layer = name
    layer_name = "{}/incep/0/bn_scale".format(out_layer)
    name = "{}/incep/0/bn_scale".format(out_layer)
    net[name] = L.Scale(net[start_layer],
                        name=layer_name,
                        in_place=True,
                        **scale_kwargs)
    start_layer = name
    layer_name = "{}/incep/0/relu".format(out_layer)
    name = "{}/incep/0/relu".format(out_layer)
    net[name] = L.ReLU(net[start_layer], name=layer_name, in_place=True)
    mlayers.append(net[name])

    # conv-3x3
    layer_name = "{}/incep/1_reduce/conv".format(out_layer)
    name = "{}/incep/1_reduce".format(out_layer)
    net[name] = L.Convolution(net[fea_layer], name=layer_name, num_output=channels_3[0], \
        kernel_size=1, pad=0, stride=stride, **conv_kwargs)
    start_layer = name
    layer_name = "{}/incep/1_reduce/bn".format(out_layer)
    name = "{}/incep/1_reduce/bn".format(out_layer)
    net[name] = L.BatchNorm(net[start_layer],
                            name=layer_name,
                            in_place=True,
                            **bn_kwargs)
    start_layer = name
    layer_name = "{}/incep/1_reduce/bn_scale".format(out_layer)
    name = "{}/incep/1_reduce/bn_scale".format(out_layer)
    net[name] = L.Scale(net[start_layer],
                        name=layer_name,
                        in_place=True,
                        **scale_kwargs)
    start_layer = name
    layer_name = "{}/incep/1_reduce/relu".format(out_layer)
    name = "{}/incep/1_reduce/relu".format(out_layer)
    net[name] = L.ReLU(net[start_layer], name=layer_name, in_place=True)
    start_layer = name
    layer_name = "{}/incep/1_0/conv".format(out_layer)
    name = "{}/incep/1_0".format(out_layer)
    net[name] = L.Convolution(net[start_layer], name=layer_name, num_output=channels_3[1], \
        kernel_size=3, pad=1, stride=1, **conv_kwargs)
    start_layer = name
    layer_name = "{}/incep/1_0/bn".format(out_layer)
    name = "{}/incep/1_0/bn".format(out_layer)
    net[name] = L.BatchNorm(net[start_layer],
                            name=layer_name,
                            in_place=True,
                            **bn_kwargs)
    start_layer = name
    layer_name = "{}/incep/1_0/bn_scale".format(out_layer)
    name = "{}/incep/1_0/bn_scale".format(out_layer)
    net[name] = L.Scale(net[start_layer],
                        name=layer_name,
                        in_place=True,
                        **scale_kwargs)
    start_layer = name
    layer_name = "{}/incep/1_0/relu".format(out_layer)
    name = "{}/incep/1_0/relu".format(out_layer)
    net[name] = L.ReLU(net[start_layer], name=layer_name, in_place=True)
    mlayers.append(net[name])

    # conv-5x5
    layer_name = "{}/incep/2_reduce/conv".format(out_layer)
    name = "{}/incep/2_reduce".format(out_layer)
    net[name] = L.Convolution(net[fea_layer], name=layer_name, num_output=channels_5[0], \
        kernel_size=1, pad=0, stride=stride, **conv_kwargs)
    start_layer = name
    layer_name = "{}/incep/2_reduce/bn".format(out_layer)
    name = "{}/incep/2_reduce/bn".format(out_layer)
    net[name] = L.BatchNorm(net[start_layer],
                            name=layer_name,
                            in_place=True,
                            **bn_kwargs)
    start_layer = name
    layer_name = "{}/incep/2_reduce/bn_scale".format(out_layer)
    name = "{}/incep/2_reduce/bn_scale".format(out_layer)
    net[name] = L.Scale(net[start_layer],
                        name=layer_name,
                        in_place=True,
                        **scale_kwargs)
    start_layer = name
    layer_name = "{}/incep/2_reduce/relu".format(out_layer)
    name = "{}/incep/2_reduce/relu".format(out_layer)
    net[name] = L.ReLU(net[start_layer], name=layer_name, in_place=True)
    start_layer = name
    layer_name = "{}/incep/2_0/conv".format(out_layer)
    name = "{}/incep/2_0".format(out_layer)
    net[name] = L.Convolution(net[start_layer], name=layer_name, num_output=channels_5[1], \
        kernel_size=3, pad=1, stride=1, **conv_kwargs)
    start_layer = name
    layer_name = "{}/incep/2_0/bn".format(out_layer)
    name = "{}/incep/2_0/bn".format(out_layer)
    net[name] = L.BatchNorm(net[start_layer],
                            name=layer_name,
                            in_place=True,
                            **bn_kwargs)
    start_layer = name
    layer_name = "{}/incep/2_0/bn_scale".format(out_layer)
    name = "{}/incep/2_0/bn_scale".format(out_layer)
    net[name] = L.Scale(net[start_layer],
                        name=layer_name,
                        in_place=True,
                        **scale_kwargs)
    start_layer = name
    layer_name = "{}/incep/2_0/relu".format(out_layer)
    name = "{}/incep/2_0/relu".format(out_layer)
    net[name] = L.ReLU(net[start_layer], name=layer_name, in_place=True)
    start_layer = name
    layer_name = "{}/incep/2_1/conv".format(out_layer)
    name = "{}/incep/2_1".format(out_layer)
    net[name] = L.Convolution(net[start_layer], name=layer_name, num_output=channels_5[2], \
        kernel_size=3, pad=1, stride=1, **conv_kwargs)
    start_layer = name
    layer_name = "{}/incep/2_1/bn".format(out_layer)
    name = "{}/incep/2_1/bn".format(out_layer)
    net[name] = L.BatchNorm(net[start_layer],
                            name=layer_name,
                            in_place=True,
                            **bn_kwargs)
    start_layer = name
    layer_name = "{}/incep/2_1/bn_scale".format(out_layer)
    name = "{}/incep/2_1/bn_scale".format(out_layer)
    net[name] = L.Scale(net[start_layer],
                        name=layer_name,
                        in_place=True,
                        **scale_kwargs)
    start_layer = name
    layer_name = "{}/incep/2_1/relu".format(out_layer)
    name = "{}/incep/2_1/relu".format(out_layer)
    net[name] = L.ReLU(net[start_layer], name=layer_name, in_place=True)
    mlayers.append(net[name])

    # pool
    if cross_stage:
        layer_name = "{}/incep/pool".format(out_layer)
        name = "{}/incep/pool".format(out_layer)
        net[name] = L.Pooling(net[fea_layer],
                              pool=P.Pooling.MAX,
                              kernel_size=3,
                              stride=2)
        start_layer = name
        layer_name = "{}/incep/poolproj/conv".format(out_layer)
        name = "{}/incep/poolproj".format(out_layer)
        net[name] = L.Convolution(net[start_layer], name=layer_name, num_output=channels_pool, \
            kernel_size=1, pad=0, stride=1, **conv_kwargs)
        start_layer = name
        layer_name = "{}/incep/poolproj/bn".format(out_layer)
        name = "{}/incep/poolproj/bn".format(out_layer)
        net[name] = L.BatchNorm(net[start_layer],
                                name=layer_name,
                                in_place=True,
                                **bn_kwargs)
        start_layer = name
        layer_name = "{}/incep/poolproj/bn_scale".format(out_layer)
        name = "{}/incep/poolproj/bn_scale".format(out_layer)
        net[name] = L.Scale(net[start_layer],
                            name=layer_name,
                            in_place=True,
                            **scale_kwargs)
        start_layer = name
        layer_name = "{}/incep/poolproj/relu".format(out_layer)
        name = "{}/incep/poolproj/relu".format(out_layer)
        net[name] = L.ReLU(net[start_layer], name=layer_name, in_place=True)
        mlayers.append(net[name])

    # incep
    layer_name = "{}/incep".format(out_layer)
    name = "{}/incep".format(out_layer)
    net[name] = L.Concat(*mlayers, name=layer_name, axis=1)
    start_layer = name
    # out-conv
    scLayers = []
    if not out_bn:
        layer_name = "{}/out/conv".format(out_layer)
        name = "{}/out".format(out_layer)
        net[name] = L.Convolution(net[start_layer], name=layer_name, num_output=channels_output, \
            kernel_size=1, pad=0, stride=1, **convbias_kwargs)
        scLayers.append(net[name])
    else:
        layer_name = "{}/out/conv".format(out_layer)
        name = "{}/out".format(out_layer)
        net[name] = L.Convolution(net[start_layer], name=layer_name, num_output=channels_output, \
            kernel_size=1, pad=0, stride=1, **conv_kwargs)
        start_layer = name
        layer_name = "{}/out/bn".format(out_layer)
        name = "{}/out/bn".format(out_layer)
        net[name] = L.BatchNorm(net[start_layer],
                                name=layer_name,
                                in_place=True,
                                **bn_kwargs)
        start_layer = name
        layer_name = "{}/out/bn_scale".format(out_layer)
        name = "{}/out/bn_scale".format(out_layer)
        net[name] = L.Scale(net[start_layer],
                            name=layer_name,
                            in_place=True,
                            **scale_kwargs)
        scLayers.append(net[name])

    # proj or input
    if cross_stage:
        layer_name = "{}/proj".format(out_layer)
        name = "{}/proj".format(out_layer)
        net[name] = L.Convolution(net[from_layer], name=layer_name, num_output=channels_output, \
            kernel_size=1, pad=0, stride=2, **convbias_kwargs)
        scLayers.append(net[name])
    else:
        layer_name = "{}/input".format(out_layer)
        name = "{}/input".format(out_layer)
        net[name] = L.Power(net[from_layer], name=layer_name, **input_kwargs)
        scLayers.append(net[name])

    # Eltwise
    layer_name = out_layer
    name = out_layer
    net[name] = L.Eltwise(*scLayers, name=layer_name, **eltwise_kwargs)

    return net
Beispiel #24
0
def mCReLULayer(net, from_layer, out_layer, reduced_channels=24, \
                inter_channels=24, output_channels=48, lr=1, decay=1, \
                use_prior_bn=True, cross_stage=False, has_pool=False):
    bn_kwargs = {
        'param': [
            dict(lr_mult=0, decay_mult=0),
            dict(lr_mult=0, decay_mult=0),
            dict(lr_mult=0, decay_mult=0)
        ],
        'batch_norm_param':
        dict(use_global_stats=True),
    }
    scale_kwargs = {
        'bias_term': True,
        'param':
        [dict(lr_mult=lr, decay_mult=0),
         dict(lr_mult=lr, decay_mult=0)],
    }
    power_kwargs = {'power': 1, 'scale': -1.0, 'shift': 0}
    input_kwargs = {'power': 1, 'scale': 1, 'shift': 0}
    conv_kwargs = {
        'param': [
            dict(lr_mult=lr, decay_mult=decay),
            dict(lr_mult=2 * lr, decay_mult=0)
        ],
        'weight_filler':
        dict(type='xavier'),
        'bias_filler':
        dict(type='constant', value=0)
    }
    eltwise_kwargs = {'operation': 1, 'coeff': [1, 1]}
    # conv/1: bn/scale/relu/conv
    start_layer = from_layer
    if use_prior_bn:
        layer_name = "{}/1/bn".format(out_layer)
        name = "{}/1/pre".format(out_layer)
        net[name] = L.BatchNorm(net[start_layer],
                                name=layer_name,
                                in_place=False,
                                **bn_kwargs)
        start_layer = name
        layer_name = "{}/1/bn_scale".format(out_layer)
        name = "{}/1/bn_scale".format(out_layer)
        net[name] = L.Scale(net[start_layer],
                            name=layer_name,
                            in_place=True,
                            **scale_kwargs)
        start_layer = name
        layer_name = "{}/1/relu".format(out_layer)
        name = "{}/1/relu".format(out_layer)
        net[name] = L.ReLU(net[start_layer], name=layer_name, in_place=True)
        start_layer = name
    layer_name = "{}/1/conv".format(out_layer)
    name = "{}/1".format(out_layer)
    if has_pool:
        stride = 2
    else:
        stride = 1
    net[name] = L.Convolution(net[start_layer], name=layer_name, num_output=reduced_channels, \
        kernel_size=1, pad=0, stride=stride, **conv_kwargs)
    start_layer = name

    # conv/2: bn/scale/relu/conv
    layer_name = "{}/2/bn".format(out_layer)
    name = "{}/2/pre".format(out_layer)
    net[name] = L.BatchNorm(net[start_layer],
                            name=layer_name,
                            in_place=False,
                            **bn_kwargs)
    start_layer = name
    layer_name = "{}/2/bn_scale".format(out_layer)
    name = "{}/2/bn_scale".format(out_layer)
    net[name] = L.Scale(net[start_layer],
                        name=layer_name,
                        in_place=True,
                        **scale_kwargs)
    start_layer = name
    layer_name = "{}/2/relu".format(out_layer)
    name = "{}/2/relu".format(out_layer)
    net[name] = L.ReLU(net[start_layer], name=layer_name, in_place=True)
    start_layer = name
    layer_name = "{}/2/conv".format(out_layer)
    name = "{}/2".format(out_layer)
    net[name] = L.Convolution(net[start_layer], name=layer_name, num_output=inter_channels, \
        kernel_size=3, pad=1, stride=1, **conv_kwargs)
    start_layer = name

    # conv/3: bn/neg/concat/scale/relu/conv
    feaLayers = []
    bn_layer = "{}/3/bn".format(out_layer)
    bn_name = "{}/3/pre".format(out_layer)
    net[bn_name] = L.BatchNorm(net[start_layer],
                               name=bn_layer,
                               in_place=False,
                               **bn_kwargs)
    feaLayers.append(net[bn_name])
    start_layer = bn_name
    neg_layer = "{}/3/neg".format(out_layer)
    neg_name = "{}/3/neg".format(out_layer)
    net[neg_name] = L.Power(net[start_layer], name=neg_layer, **power_kwargs)
    feaLayers.append(net[neg_name])
    concat_layer = "{}/3/concat".format(out_layer)
    concat_name = "{}/3/preAct".format(out_layer)
    net[concat_name] = L.Concat(*feaLayers, name=concat_layer, axis=1)
    layer_name = "{}/3/scale".format(out_layer)
    name = "{}/3/scale".format(out_layer)
    net[name] = L.Scale(net[concat_name],
                        name=layer_name,
                        in_place=True,
                        **scale_kwargs)
    start_layer = name
    layer_name = "{}/3/relu".format(out_layer)
    name = "{}/3/relu".format(out_layer)
    net[name] = L.ReLU(net[start_layer], name=layer_name, in_place=True)
    start_layer = name
    layer_name = "{}/3/conv".format(out_layer)
    name = "{}/3".format(out_layer)
    net[name] = L.Convolution(net[start_layer], name=layer_name, num_output=output_channels, \
        kernel_size=1, pad=0, stride=1, **conv_kwargs)
    start_layer = name
    mlayers = []
    mlayers.append(net[name])
    # proj or input
    if cross_stage:
        layer_name = "{}/proj".format(out_layer)
        name = "{}/proj".format(out_layer)
        if has_pool:
            start_layer = "{}/1/pre".format(out_layer)
            stride = 2
        else:
            start_layer = from_layer
            stride = 1
        net[name] = L.Convolution(net[start_layer], name=layer_name, num_output=output_channels, \
            kernel_size=1, pad=0, stride=stride, **conv_kwargs)
        mlayers.append(net[name])
    else:
        layer_name = "{}/input".format(out_layer)
        name = "{}/input".format(out_layer)
        start_layer = from_layer
        net[name] = L.Power(net[start_layer], name=layer_name, **input_kwargs)
        mlayers.append(net[name])

    # eltwise
    layer_name = out_layer
    name = out_layer
    net[name] = L.Eltwise(*mlayers, name=layer_name, **eltwise_kwargs)

    return net
Beispiel #25
0
def smCReLULayer_NBN(net, from_layer, out_layer, channels=32, use_reduced_layer=False, reduced_layers=[], \
                 lr=1, decay=1):
    bn_kwargs = {
        'param': [
            dict(lr_mult=0, decay_mult=0),
            dict(lr_mult=0, decay_mult=0),
            dict(lr_mult=0, decay_mult=0)
        ],
        'batch_norm_param':
        dict(use_global_stats=True),
    }
    scale_kwargs = {
        'bias_term': True,
        'param':
        [dict(lr_mult=lr, decay_mult=0),
         dict(lr_mult=lr, decay_mult=0)],
    }
    power_kwargs = {'power': 1, 'scale': -1.0, 'shift': 0}
    conv_kwargs = {
        'param': [
            dict(lr_mult=lr, decay_mult=decay),
            dict(lr_mult=2 * lr, decay_mult=0)
        ],
        'weight_filler':
        dict(type='xavier'),
        'bias_filler':
        dict(type='constant', value=0)
    }
    conv_nb_kwargs = {
        'param': [dict(lr_mult=lr, decay_mult=decay)],
        'weight_filler': dict(type='xavier'),
        'bias_term': False,
    }
    start_layer = from_layer
    # 1x1 convLayer
    if use_reduced_layer:
        name = "{}/reduced/conv".format(out_layer)
        net[name] = L.Convolution(net[start_layer], num_output=reduced_layers[0], \
            kernel_size=1, pad=0, stride=1, **conv_kwargs)
        start_layer = name
        name = "{}/reduced/relu".format(out_layer)
        net[name] = L.ReLU(net[start_layer], in_place=True)
        start_layer = name
    # 3x3 convLayer
    if use_reduced_layer:
        name = "{}/inter/conv".format(out_layer)
        net[name] = L.Convolution(net[start_layer], num_output=reduced_layers[1], \
            kernel_size=3, pad=1, stride=1, **conv_nb_kwargs)
        start_layer = name
        name = "{}/inter/bn".format(out_layer)
        net[name] = L.BatchNorm(net[start_layer], in_place=False, **bn_kwargs)
        start_layer = name
        neg_name = "{}/inter/neg".format(out_layer)
        net[neg_name] = L.Power(net[start_layer], **power_kwargs)
        name = "{}/inter/concat".format(out_layer)
        net[name] = L.Concat(net[start_layer], net[neg_name], axis=1)
        start_layer = name
        name = "{}/inter/scale".format(out_layer)
        net[name] = L.Scale(net[start_layer], in_place=True, **scale_kwargs)
        start_layer = name
        name = "{}/inter/relu".format(out_layer)
        net[name] = L.ReLU(net[start_layer], in_place=True)
        start_layer = name
    else:
        name = "{}/conv".format(out_layer)
        net[name] = L.Convolution(net[start_layer], num_output=channels, \
            kernel_size=3, pad=1, stride=1, **conv_nb_kwargs)
        start_layer = name
        name = "{}/bn".format(out_layer)
        net[name] = L.BatchNorm(net[start_layer], in_place=False, **bn_kwargs)
        start_layer = name
        neg_name = "{}/neg".format(out_layer)
        net[neg_name] = L.Power(net[start_layer], **power_kwargs)
        name = "{}/concat".format(out_layer)
        net[name] = L.Concat(net[start_layer], net[neg_name], axis=1)
        start_layer = name
        name = "{}/scale".format(out_layer)
        net[name] = L.Scale(net[start_layer], in_place=True, **scale_kwargs)
        start_layer = name
        name = "{}/relu".format(out_layer)
        net[name] = L.ReLU(net[start_layer], in_place=True)
        start_layer = name
    # 1x1
    if use_reduced_layer:
        name = "{}/out/conv".format(out_layer)
        net[name] = L.Convolution(net[start_layer], num_output=reduced_layers[2], \
            kernel_size=1, pad=0, stride=1, **conv_kwargs)
        start_layer = name
        name = "{}/out/relu".format(out_layer)
        net[name] = L.ReLU(net[start_layer], in_place=True)
        start_layer = name
    return net
Beispiel #26
0
def ReIDExtLayers(net,
                  from_layer="convf",
                  label_layer="label",
                  net_input_width=432,
                  net_input_height=324,
                  train=True,
                  lr=1,
                  decay=1):
    # roi_data_layer -> [ROI_POOLING + LABEL]
    # roi_pooling_layer -> (10,10) (0.0625(1/16))
    # we use [conv4_3(reorg) + conv5_5] as convf
    # use stride_conv to get conv6_1
    # -> conv6_2 -> conv6_3 -> (stride_conv) conv7_1 -> conv7_2 -> avg_pool
    # -> FC (256) -> Normalize
    # -> LabeledMatch / UnlabeledMatch (use label)
    # use scale / concat to get {L+Q} array
    # -> softmaxWithLoss & accuracy (train)
    assert from_layer in net.keys()
    #  Roi_Data_Layer
    roi_data_kwargs = {
        'net_input_width': net_input_width,
        'net_input_height': net_input_height
    }
    net.roi_pool, net.roi_label = L.RoiData(net[label_layer],
                                            ntop=2,
                                            roi_data_param=roi_data_kwargs)
    # Roi_Pooling_Layer
    roi_pool_kwargs = {
        'pooled_h': 10,
        'pooled_w': 10,
        'spatial_scale': 0.0625,
    }
    net.rpf = L.ROIPooling(net[from_layer],
                           net.roi_pool,
                           roi_pooling_param=roi_pool_kwargs)
    # ConvLayers
    # conv6
    ConvBNUnitLayer(net, "rpf", "reid_c61", use_bn=False, use_relu=True, \
        num_output=256, kernel_size=3, pad=1,stride=2)
    ConvBNUnitLayer(net, "reid_c61", "reid_c62", use_bn=False, use_relu=True, \
        num_output=256, kernel_size=3, pad=1,stride=1)
    # ConvBNUnitLayer(net, "reid_c62", "reid_c63", use_bn=False, use_relu=True, \
    # 				num_output=256, kernel_size=3, pad=1,stride=1)
    # conv7
    ConvBNUnitLayer(net, "reid_c62", "reid_c71", use_bn=False, use_relu=True, \
        num_output=256, kernel_size=3, pad=1,stride=2)
    ConvBNUnitLayer(net, "reid_c71", "reid_c72", use_bn=False, use_relu=True, \
        num_output=256, kernel_size=3, pad=1,stride=1)
    # avg_pool
    net.avgpool = L.Pooling(net["reid_c72"],
                            pool=P.Pooling.AVE,
                            global_pooling=True)
    # FC & Norm
    fc_kwargs = {
        'param': [
            dict(lr_mult=lr, decay_mult=decay),
            dict(lr_mult=2 * lr, decay_mult=0)
        ],
        'weight_filler':
        dict(type='gaussian', std=0.005),
        'bias_filler':
        dict(type='constant', value=0)
    }
    net.fp = L.InnerProduct(net.avgpool, num_output=256, **fc_kwargs)
    net.fpn = L.Normalize(net.fp)
    # Match
    labelMatch_kwargs = {
        'num_classes': 5532,
        'momentum': 0.5,
    }
    net.labeled_match, net.gt = L.LabeledMatch(
        net.fpn, net.roi_label, ntop=2, labeled_match_param=labelMatch_kwargs)
    unlabelMatch_kwargs = {
        'queue_size': 5000,
    }
    net.unlabeled_match = L.UnlabeledMatch(
        net.fpn, net.roi_label, unlabeled_match_param=unlabelMatch_kwargs)
    # scale
    power_kwargs = {'scale': 10}
    net.labeled_match_scale = L.Power(net.labeled_match, **power_kwargs)
    net.unlabeled_match_scale = L.Power(net.unlabeled_match, **power_kwargs)
    # concat: cosine similarity
    net.cosine = L.Concat(net.labeled_match_scale,
                          net.unlabeled_match_scale,
                          axis=1)
    if train:
        # softmaxWithLoss
        loss_kwargs = {
            'ignore_label': -1,
            'normalize': True,
        }
        net.loss = L.SoftmaxWithLoss(net.cosine,
                                     net.gt,
                                     propagate_down=[True, False],
                                     loss_weight=[1],
                                     loss_param=loss_kwargs)
    else:
        # accuracy
        accu_kwargs = {
            'ignore_label': -1,
            'top_k': 1,
        }
        net.accuracy = L.AccuracyReid(net.cosine,
                                      net.gt,
                                      accuracy_param=accu_kwargs)
    return net
def create_ssn_net(img_height,
                   img_width,
                   num_spixels,
                   pos_scale,
                   color_scale,
                   num_spixels_h,
                   num_spixels_w,
                   num_steps,
                   phase=None):

    n = caffe.NetSpec()

    if phase == 'TRAIN':
        n.img, n.spixel_init, n.feat_spixel_init, n.label, n.problabel = \
            L.Python(python_param = dict(module = "input_patch_data_layer", layer = "InputRead", param_str = "TRAIN_1000000_" + str(num_spixels)),
                     include = dict(phase = 0),
                     ntop = 5)
    elif phase == 'TEST':
        n.img, n.spixel_init, n.feat_spixel_init, n.label, n.problabel = \
            L.Python(python_param = dict(module = "input_patch_data_layer", layer = "InputRead", param_str = "VAL_10_" + str(num_spixels)),
                     include = dict(phase = 1),
                     ntop = 5)
    else:
        n.img = L.Input(shape=[dict(dim=[1, 3, img_height, img_width])])
        n.spixel_init = L.Input(
            shape=[dict(dim=[1, 1, img_height, img_width])])
        n.feat_spixel_init = L.Input(
            shape=[dict(dim=[1, 1, img_height, img_width])])

    n.pixel_features = L.PixelFeature(n.img,
                                      pixel_feature_param=dict(
                                          type=P.PixelFeature.POSITION_AND_RGB,
                                          pos_scale=float(pos_scale),
                                          color_scale=float(color_scale)))

    ### Transform Pixel features
    n.trans_features = cnn_module(n.pixel_features, trans_dim)

    # Initial Superpixels
    n.init_spixel_feat = L.SpixelFeature(n.trans_features, n.feat_spixel_init,
                                         spixel_feature_param =\
        dict(type = P.SpixelFeature.AVGRGB, rgb_scale = 1.0, ignore_idx_value = -10,
             ignore_feature_value = 255, max_spixels = int(num_spixels)))

    ### Iteration-1
    n.spixel_feat1 = exec_iter(n.init_spixel_feat, n.trans_features,
                               n.spixel_init, num_spixels_h, num_spixels_w,
                               num_spixels, trans_dim)

    ### Iteration-2
    n.spixel_feat2 = exec_iter(n.spixel_feat1, n.trans_features, n.spixel_init,
                               num_spixels_h, num_spixels_w, num_spixels,
                               trans_dim)

    ### Iteration-3
    n.spixel_feat3 = exec_iter(n.spixel_feat2, n.trans_features, n.spixel_init,
                               num_spixels_h, num_spixels_w, num_spixels,
                               trans_dim)

    ### Iteration-4
    n.spixel_feat4 = exec_iter(n.spixel_feat3, n.trans_features, n.spixel_init,
                               num_spixels_h, num_spixels_w, num_spixels,
                               trans_dim)

    if num_steps == 5:
        ### Iteration-5
        n.final_pixel_assoc  = \
            compute_assignments(n.spixel_feat4, n.trans_features,
                                n.spixel_init, num_spixels_h,
                                num_spixels_w, num_spixels, trans_dim)

    elif num_steps == 10:
        ### Iteration-5
        n.spixel_feat5 = exec_iter(n.spixel_feat4, n.trans_features,
                                   n.spixel_init, num_spixels_h, num_spixels_w,
                                   num_spixels, trans_dim)

        ### Iteration-6
        n.spixel_feat6 = exec_iter(n.spixel_feat5, n.trans_features,
                                   n.spixel_init, num_spixels_h, num_spixels_w,
                                   num_spixels, trans_dim)

        ### Iteration-7
        n.spixel_feat7 = exec_iter(n.spixel_feat6, n.trans_features,
                                   n.spixel_init, num_spixels_h, num_spixels_w,
                                   num_spixels, trans_dim)

        ### Iteration-8
        n.spixel_feat8 = exec_iter(n.spixel_feat7, n.trans_features,
                                   n.spixel_init, num_spixels_h, num_spixels_w,
                                   num_spixels, trans_dim)

        ### Iteration-9
        n.spixel_feat9 = exec_iter(n.spixel_feat8, n.trans_features,
                                   n.spixel_init, num_spixels_h, num_spixels_w,
                                   num_spixels, trans_dim)

        ### Iteration-10
        n.final_pixel_assoc  = \
            compute_assignments(n.spixel_feat9, n.trans_features,
                                n.spixel_init, num_spixels_h,
                                num_spixels_w, num_spixels, trans_dim)

    if phase == 'TRAIN' or phase == 'TEST':

        # Compute final spixel features
        n.new_spixel_feat = L.SpixelFeature2(n.pixel_features,
                                             n.final_pixel_assoc,
                                             n.spixel_init,
                                             spixel_feature2_param =\
            dict(num_spixels_h = num_spixels_h, num_spixels_w = num_spixels_w))

        n.new_spix_indices = compute_final_spixel_labels(
            n.final_pixel_assoc, n.spixel_init, num_spixels_h, num_spixels_w)
        n.recon_feat2 = L.Smear(n.new_spixel_feat,
                                n.new_spix_indices,
                                propagate_down=[True, False])
        n.loss1, n.loss2 = position_color_loss(n.recon_feat2,
                                               n.pixel_features,
                                               pos_weight=0.00001,
                                               col_weight=0.0)

        # Convert pixel labels to spixel labels
        n.spixel_label = L.SpixelFeature2(n.problabel,
                                          n.final_pixel_assoc,
                                          n.spixel_init,
                                          spixel_feature2_param =\
            dict(num_spixels_h = num_spixels_h, num_spixels_w = num_spixels_w))
        # Convert spixel labels back to pixel labels
        n.recon_label = decode_features(n.final_pixel_assoc,
                                        n.spixel_label,
                                        n.spixel_init,
                                        num_spixels_h,
                                        num_spixels_w,
                                        num_spixels,
                                        num_channels=50)

        n.recon_label = L.ReLU(n.recon_label, in_place=True)
        n.recon_label2 = L.Power(n.recon_label, power_param=dict(shift=1e-10))
        n.recon_label3 = normalize(n.recon_label2, 50)
        n.loss3 = L.LossWithoutSoftmax(n.recon_label3,
                                       n.label,
                                       loss_param=dict(ignore_label=255),
                                       loss_weight=1.0)

    else:
        n.new_spix_indices = compute_final_spixel_labels(
            n.final_pixel_assoc, n.spixel_init, num_spixels_h, num_spixels_w)

    return n.to_proto()
def generate_model(split, config):
    n = caffe.NetSpec()
    dataset = config.dataset
    batch_size = config.N
    mode_str = str(dict(dataset=dataset, split=split, batch_size=batch_size))
    n.image1, n.image2, n.label, n.sample_weights, n.feat_crop = L.Python(
        module=config.data_provider,
        layer=config.data_provider_layer,
        param_str=mode_str,
        ntop=5)

    ################################
    # the base net (VGG-16) branch 1
    n.conv1_1, n.relu1_1 = conv_relu(n.image1,
                                     64,
                                     param_names=('conv1_1_w', 'conv1_1_b'),
                                     fix_param=True,
                                     finetune=False)
    n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1,
                                     64,
                                     param_names=('conv1_2_w', 'conv1_2_b'),
                                     fix_param=True,
                                     finetune=False)
    n.pool1 = max_pool(n.relu1_2)

    n.conv2_1, n.relu2_1 = conv_relu(n.pool1,
                                     128,
                                     param_names=('conv2_1_w', 'conv2_1_b'),
                                     fix_param=True,
                                     finetune=False)
    n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1,
                                     128,
                                     param_names=('conv2_2_w', 'conv2_2_b'),
                                     fix_param=True,
                                     finetune=False)
    n.pool2 = max_pool(n.relu2_2)

    n.conv3_1, n.relu3_1 = conv_relu(n.pool2,
                                     256,
                                     param_names=('conv3_1_w', 'conv3_1_b'),
                                     fix_param=config.fix_vgg,
                                     finetune=config.finetune)
    n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1,
                                     256,
                                     param_names=('conv3_2_w', 'conv3_2_b'),
                                     fix_param=config.fix_vgg,
                                     finetune=config.finetune)
    n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2,
                                     256,
                                     param_names=('conv3_3_w', 'conv3_3_b'),
                                     fix_param=config.fix_vgg,
                                     finetune=config.finetune)
    n.pool3 = max_pool(n.relu3_3)
    # spatial L2 norm
    n.pool3_lrn = L.LRN(n.pool3, local_size=513, alpha=513, beta=0.5, k=1e-16)

    n.conv4_1, n.relu4_1 = conv_relu(n.pool3,
                                     512,
                                     param_names=('conv4_1_w', 'conv4_1_b'),
                                     fix_param=config.fix_vgg,
                                     finetune=config.finetune)
    n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1,
                                     512,
                                     param_names=('conv4_2_w', 'conv4_2_b'),
                                     fix_param=config.fix_vgg,
                                     finetune=config.finetune)
    n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2,
                                     512,
                                     param_names=('conv4_3_w', 'conv4_3_b'),
                                     fix_param=config.fix_vgg,
                                     finetune=config.finetune)
    # spatial L2 norm
    n.relu4_3_lrn = L.LRN(n.relu4_3,
                          local_size=1025,
                          alpha=1025,
                          beta=0.5,
                          k=1e-16)
    #n.pool4 = max_pool(n.relu4_3)

    #n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512,
    #                                 param_names=('conv5_1_w', 'conv5_1_b'),
    #                                 fix_param=config.fix_vgg,
    #                                 finetune=config.finetune)
    #n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512,
    #                                 param_names=('conv5_2_w', 'conv5_2_b'),
    #                                 fix_param=config.fix_vgg,
    #                                 finetune=config.finetune)
    #n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512,
    #                                 param_names=('conv5_3_w', 'conv5_3_b'),
    #                                 fix_param=config.fix_vgg,
    #                                 finetune=config.finetune)
    # upsampling feature map
    #n.relu5_3_upsampling = L.Deconvolution(n.relu5_3,
    #                                       convolution_param=dict(num_output=512,
    #                                                              group=512,
    #                                                              kernel_size=4,
    #                                                              stride=2,
    #                                                              pad=1,
    #                                                              bias_term=False,
    #                                                              weight_filler=dict(type='bilinear')),
    #                                       param=[dict(lr_mult=0, decay_mult=0)])
    # spatial L2 norm
    #n.relu5_3_lrn = L.LRN(n.relu5_3_upsampling, local_size=1025, alpha=1025, beta=0.5, k=1e-16)

    # concat all skip features
    #n.feat_all1 = n.relu4_3_lrn
    n.feat_all1 = L.Concat(n.pool3_lrn,
                           n.relu4_3_lrn,
                           concat_param=dict(axis=1))
    #n.feat_all1 = L.Concat(n.pool3_lrn, n.relu4_3_lrn, n.relu5_3_lrn, concat_param=dict(axis=1))
    n.feat_all1_crop = L.Crop(n.feat_all1,
                              n.feat_crop,
                              crop_param=dict(axis=2,
                                              offset=[
                                                  config.query_featmap_H // 3,
                                                  config.query_featmap_W // 3
                                              ]))

    ################################
    # the base net (VGG-16) branch 2
    n.conv1_1_p, n.relu1_1_p = conv_relu(n.image2,
                                         64,
                                         param_names=('conv1_1_w',
                                                      'conv1_1_b'),
                                         fix_param=True,
                                         finetune=False)
    n.conv1_2_p, n.relu1_2_p = conv_relu(n.relu1_1_p,
                                         64,
                                         param_names=('conv1_2_w',
                                                      'conv1_2_b'),
                                         fix_param=True,
                                         finetune=False)
    n.pool1_p = max_pool(n.relu1_2_p)

    n.conv2_1_p, n.relu2_1_p = conv_relu(n.pool1_p,
                                         128,
                                         param_names=('conv2_1_w',
                                                      'conv2_1_b'),
                                         fix_param=True,
                                         finetune=False)
    n.conv2_2_p, n.relu2_2_p = conv_relu(n.relu2_1_p,
                                         128,
                                         param_names=('conv2_2_w',
                                                      'conv2_2_b'),
                                         fix_param=True,
                                         finetune=False)
    n.pool2_p = max_pool(n.relu2_2_p)

    n.conv3_1_p, n.relu3_1_p = conv_relu(n.pool2_p,
                                         256,
                                         param_names=('conv3_1_w',
                                                      'conv3_1_b'),
                                         fix_param=config.fix_vgg,
                                         finetune=config.finetune)
    n.conv3_2_p, n.relu3_2_p = conv_relu(n.relu3_1_p,
                                         256,
                                         param_names=('conv3_2_w',
                                                      'conv3_2_b'),
                                         fix_param=config.fix_vgg,
                                         finetune=config.finetune)
    n.conv3_3_p, n.relu3_3_p = conv_relu(n.relu3_2_p,
                                         256,
                                         param_names=('conv3_3_w',
                                                      'conv3_3_b'),
                                         fix_param=config.fix_vgg,
                                         finetune=config.finetune)
    n.pool3_p = max_pool(n.relu3_3_p)
    # spatial L2 norm
    n.pool3_lrn_p = L.LRN(n.pool3_p,
                          local_size=513,
                          alpha=513,
                          beta=0.5,
                          k=1e-16)

    n.conv4_1_p, n.relu4_1_p = conv_relu(n.pool3_p,
                                         512,
                                         param_names=('conv4_1_w',
                                                      'conv4_1_b'),
                                         fix_param=config.fix_vgg,
                                         finetune=config.finetune)
    n.conv4_2_p, n.relu4_2_p = conv_relu(n.relu4_1_p,
                                         512,
                                         param_names=('conv4_2_w',
                                                      'conv4_2_b'),
                                         fix_param=config.fix_vgg,
                                         finetune=config.finetune)
    n.conv4_3_p, n.relu4_3_p = conv_relu(n.relu4_2_p,
                                         512,
                                         param_names=('conv4_3_w',
                                                      'conv4_3_b'),
                                         fix_param=config.fix_vgg,
                                         finetune=config.finetune)
    # spatial L2 norm
    n.relu4_3_lrn_p = L.LRN(n.relu4_3_p,
                            local_size=1025,
                            alpha=1025,
                            beta=0.5,
                            k=1e-16)
    #n.pool4_p = max_pool(n.relu4_3_p)

    #n.conv5_1_p, n.relu5_1_p = conv_relu(n.pool4_p, 512,
    #                                     param_names=('conv5_1_w', 'conv5_1_b'),
    #                                     fix_param=config.fix_vgg,
    #                                     finetune=config.finetune)
    #n.conv5_2_p, n.relu5_2_p = conv_relu(n.relu5_1_p, 512,
    #                                     param_names=('conv5_2_w', 'conv5_2_b'),
    #                                     fix_param=config.fix_vgg,
    #                                     finetune=config.finetune)
    #n.conv5_3_p, n.relu5_3_p = conv_relu(n.relu5_2_p, 512,
    #                                     param_names=('conv5_3_w', 'conv5_3_b'),
    #                                     fix_param=config.fix_vgg,
    #                                     finetune=config.finetune)
    # upsampling feature map
    #n.relu5_3_upsampling_p = L.Deconvolution(n.relu5_3_p,
    #                                         convolution_param=dict(num_output=512,
    #                                                                group=512,
    #                                                                kernel_size=4,
    #                                                                stride=2,
    #                                                                pad=1,
    #                                                                bias_term=False,
    #                                                                weight_filler=dict(type='bilinear')),
    #                                         param=[dict(lr_mult=0, decay_mult=0)])
    # spatial L2 norm
    #n.relu5_3_lrn_p = L.LRN(n.relu5_3_upsampling_p, local_size=1025, alpha=1025, beta=0.5, k=1e-16)

    # concat all skip features
    #n.feat_all2 = n.relu4_3_lrn_p
    n.feat_all2 = L.Concat(n.pool3_lrn_p,
                           n.relu4_3_lrn_p,
                           concat_param=dict(axis=1))
    #n.feat_all2 = L.Concat(n.pool3_lrn_p, n.relu4_3_lrn_p, n.relu5_3_lrn_p, concat_param=dict(axis=1))

    # Dyn conv layer
    n.fcn_scores = L.DynamicConvolution(n.feat_all2,
                                        n.feat_all1_crop,
                                        convolution_param=dict(
                                            num_output=1,
                                            kernel_size=11,
                                            stride=1,
                                            pad=5,
                                            bias_term=False))

    # scale scores with zero mean 0.01196 -> 0.02677
    n.fcn_scaled_scores = L.Power(n.fcn_scores,
                                  power_param=dict(scale=0.01196,
                                                   shift=-1.0,
                                                   power=1))

    # Loss Layer
    n.loss = L.WeightedSigmoidCrossEntropyLoss(n.fcn_scaled_scores, n.label,
                                               n.sample_weights)

    return n.to_proto()
Beispiel #29
0
    def lrcn_reinforce(self, save_name, RL_loss='lstm_classification', lw=20):

        data_inputs = self.data_inputs
        param_str = self.param_str

        ss_tag = 'reg_'
        #reg sentences will be the first part of the batch
        if self.separate_sents:
            if not 'batch_size' in param_str.keys():
                param_str['batch_size'] = 100
            self.slice_point = param_str['batch_size'] / 2
            self.batch_size = param_str['batch_size']

        param_str_loss = {}
        param_str_loss['vocab'] = param_str['vocabulary']
        param_str_loss['avoid_words'] = ['red', 'small']
        if self.baseline:
            param_str_loss['baseline'] = True
        data_input = 'fc8'

        data_tops = self.python_input_layer(data_inputs['module'],
                                            data_inputs['layer'], param_str)
        self.rename_tops(data_tops, data_inputs['param_str']['top_names'])
        feature_name = 'fc8'
        self.n.tops[feature_name] = L.InnerProduct(
            self.n.tops[param_str['image_data_key']],
            num_output=1000,
            weight_filler=self.uniform_weight_filler(-.08, .08),
            bias_filler=self.constant_filler(0),
            param=self.init_params([[1, 1], [2, 0]]))

        if self.cc:
            #If class conditional
            data_top = self.n.tops['fc8']
            class_top = self.n.tops[param_str['data_label_feat']]
            self.n.tops['class_input'] = L.Concat(data_top, class_top, axis=1)
            data_input = 'class_input'
        else:
            self.silence(self.n.tops[param_str['data_label_feat']])

        bottom_sent = self.n.tops[param_str['text_data_key']]
        bottom_cont = self.n.tops[param_str['text_marker_key']]

        #prep for caption model
        bottom_cont_slice = L.Slice(bottom_cont, ntop=self.T, axis=0)
        self.rename_tops(bottom_cont_slice,
                         ['bottom_cont_%d' % i for i in range(self.T)])

        if not self.separate_sents:
            bottom_sent_slice = L.Slice(bottom_sent, ntop=self.T, axis=0)
            self.rename_tops(bottom_sent_slice,
                             ['input_sent_%d' % i for i in range(self.T)])
            target_sentence = self.n.tops['target_sentence']
        else:
            bottom_sents = L.Slice(bottom_sent,
                                   slice_point=[self.slice_point],
                                   axis=1,
                                   ntop=2)
            self.rename_tops(bottom_sents, ['reg_input_sent', 'rl_input_sent'])
            reg_bottom_sents_slice = L.Slice(self.n.tops['reg_input_sent'],
                                             axis=0,
                                             ntop=20)
            rl_bottom_sents_slice = L.Slice(self.n.tops['rl_input_sent'],
                                            axis=0,
                                            ntop=20)
            self.silence([rl_bottom_sents_slice[i] for i in range(1, self.T)])
            self.n.tops['input_sent_0'] = L.Concat(reg_bottom_sents_slice[0],
                                                   rl_bottom_sents_slice[0],
                                                   axis=1)
            self.rename_tops(
                reg_bottom_sents_slice,
                ['reg_input_sent_%d' % i for i in range(1, self.T)])

            self.rename_tops(reg_bottom_sents_slice,
                             ['reg_input_sent_%d' % i for i in range(self.T)])
            slice_target_sentence = L.Slice(self.n.tops['target_sentence'],
                                            slice_point=[self.slice_point],
                                            axis=1,
                                            ntop=2)
            self.rename_tops(slice_target_sentence,
                             ['reg_target_sentence', 'rl_target_sentence'])
            self.silence(self.n.tops['rl_target_sentence'])
            target_sentence = self.n.tops['reg_target_sentence']

        self.n.tops['lstm1_h0'] = self.dummy_data_layer(
            [1, self.N, self.lstm_dim], 0)
        self.n.tops['lstm1_c0'] = self.dummy_data_layer(
            [1, self.N, self.lstm_dim], 0)
        self.n.tops['lstm2_h0'] = self.dummy_data_layer(
            [1, self.N, self.lstm_dim], 0)
        self.n.tops['lstm2_c0'] = self.dummy_data_layer(
            [1, self.N, self.lstm_dim], 0)

        self.make_caption_model(static_input=data_input)

        #prep bottoms for loss
        predict_tops = [self.n.tops['predict_%d' % i] for i in range(self.T)]
        self.n.tops['predict_concat'] = L.Concat(*predict_tops, axis=0)
        if self.separate_sents:
            word_sample_tops = [
                self.n.tops['rl_word_sample_reshape_%d' % i]
                for i in range(1, self.T + 1)
            ]
            self.n.tops['word_sample_concat'] = L.Concat(*word_sample_tops,
                                                         axis=0)
            concat_predict_tops = L.Slice(self.n.tops['predict_concat'],
                                          slice_point=[self.slice_point],
                                          axis=1,
                                          ntop=2)
            reg_predict = concat_predict_tops[0]
            RL_predict = concat_predict_tops[1]
            bottom_cont_tops = L.Slice(bottom_cont,
                                       slice_point=[self.slice_point],
                                       axis=1,
                                       ntop=2)
            self.silence(bottom_cont_tops[0])
            label_tops = L.Slice(self.n.tops[param_str['data_label']],
                                 slice_point=[self.slice_point],
                                 axis=0,
                                 ntop=2)
            self.silence(label_tops[0])
            self.rename_tops([bottom_cont_tops[1], label_tops[1]],
                             ['rl_bottom_cont', 'rl_label_top'])
            label_top = self.n.tops['rl_label_top']
            bottom_cont = self.n.tops['rl_bottom_cont']
        else:
            word_sample_tops = [
                self.n.tops['word_sample_reshape_%d' % i]
                for i in range(1, self.T + 1)
            ]
            self.n.tops['word_sample_concat'] = L.Concat(*word_sample_tops,
                                                         axis=0)
            reg_predict = self.n.tops['predict_concat']
            RL_predict = self.n.tops['predict_concat']
            label_top = self.n.tops[param_str['data_label']]

        #RL loss
        if RL_loss == 'lstm_classification':
            self.n.tops['embed_classification'] = self.embed(
                self.n.tops['word_sample_concat'],
                1000,
                input_dim=self.vocab_size,
                bias_term=False,
                learning_param=self.init_params([[0, 0]]))
            self.n.tops['lstm_classification'] = self.lstm(
                self.n.tops['embed_classification'],
                bottom_cont,
                learning_param_lstm=self.init_params([[0, 0], [0, 0], [0, 0]]),
                lstm_hidden=1000)
            self.n.tops['predict_classification'] = L.InnerProduct(
                self.n.tops['lstm_classification'], num_output=200, axis=2)
            self.n.tops['probs_classification'] = L.Softmax(
                self.n.tops['predict_classification'], axis=2)
            #classification reward layer: classification, word_sample_concat (to get sentence length),
            #data label should be single stream; even though trained with 20 stream...
            self.n.tops['reward'] = self.python_layer([
                self.n.tops['probs_classification'],
                self.n.tops['word_sample_concat'], label_top
            ], 'loss_layers', 'sequenceClassificationLoss', param_str_loss)

        self.n.tops['reward_reshape'] = L.Reshape(self.n.tops['reward'],
                                                  shape=dict(dim=[1, -1]))
        self.n.tops['reward_tile'] = L.Tile(self.n.tops['reward_reshape'],
                                            axis=0,
                                            tiles=self.T)

        #softmax with sampled words as "correct" word
        self.n.tops['sample_loss'] = self.softmax_per_inst_loss(
            RL_predict, self.n.tops['word_sample_concat'], axis=2)
        self.n.tops['sample_reward'] = L.Eltwise(self.n.tops['sample_loss'],
                                                 self.n.tops['reward_tile'],
                                                 propagate_down=[1, 0],
                                                 operation=0)
        avoid_lw = 100
        self.n.tops['normalized_reward'] = L.Power(
            self.n.tops['sample_reward'], scale=(1. / self.N) * avoid_lw)
        self.n.tops['sum_rewards'] = L.Reduction(
            self.n.tops['normalized_reward'], loss_weight=[1])
        self.n.tops['sentence_loss'] = self.softmax_loss(reg_predict,
                                                         target_sentence,
                                                         axis=2,
                                                         loss_weight=20)

        self.write_net(save_name)
 def test_power(self):
     n = caffe.NetSpec()
     n.input1 = L.Input(shape=make_shape([6, 4, 64, 64]))
     n.pow1 = L.Power(n.input1, power=2.0, scale=0.5, shift=0.01)
     self._test_model(*self._netspec_to_model(n, 'power'))