Ejemplo n.º 1
0
    def testShapeInferenceTranspose(self):
        model = model_helper.ModelHelper(name="test_model")

        workspace.FeedBlob(
            "tensor",
            np.random.rand(4, 2, 3, 3, 5).astype(np.float32)
        )

        # Testing with axes undefined
        brew.transpose(
            model,
            ["tensor"],
            "transpose",
        )
        self.InferTensorRunAndCompare(model)

        # Testing with axes defined
        brew.transpose(
            model,
            ["tensor"],
            "transpose",
            axes=np.random.permutation(5)
        )

        return self.InferTensorRunAndCompare(model)
Ejemplo n.º 2
0
    def testShapeInferenceTranspose(self):
        model = model_helper.ModelHelper(name="test_model")

        workspace.FeedBlob(
            "tensor",
            np.random.rand(4, 2, 3, 3, 5).astype(np.float32)
        )

        # Testing with axes undefined
        brew.transpose(
            model,
            ["tensor"],
            "transpose",
        )
        self.InferTensorRunAndCompare(model)

        # Testing with axes defined
        brew.transpose(
            model,
            ["tensor"],
            "transpose",
            axes=np.random.permutation(5)
        )

        return self.InferTensorRunAndCompare(model)
Ejemplo n.º 3
0
def _calc_attention_logits_from_sum_match(
    model,
    decoder_hidden_encoder_outputs_sum,
    encoder_output_dim,
    scope,
):
    # [encoder_length, batch_size, encoder_output_dim]
    decoder_hidden_encoder_outputs_sum = model.net.Tanh(
        decoder_hidden_encoder_outputs_sum,
        decoder_hidden_encoder_outputs_sum,
    )

    # [encoder_length, batch_size, 1]
    attention_logits = brew.fc(
        model,
        decoder_hidden_encoder_outputs_sum,
        s(scope, 'attention_logits'),
        dim_in=encoder_output_dim,
        dim_out=1,
        axis=2,
        freeze_bias=True,
    )

    # [batch_size, encoder_length, 1]
    attention_logits_transposed = brew.transpose(
        model,
        attention_logits,
        s(scope, 'attention_logits_transposed'),
        axes=[1, 0, 2],
    )
    return attention_logits_transposed
Ejemplo n.º 4
0
def _calc_attention_logits_from_sum_match(
    model,
    decoder_hidden_encoder_outputs_sum,
    encoder_output_dim,
    scope,
):
    # [encoder_length, batch_size, encoder_output_dim]
    decoder_hidden_encoder_outputs_sum = model.net.Tanh(
        decoder_hidden_encoder_outputs_sum,
        decoder_hidden_encoder_outputs_sum,
    )

    # [encoder_length, batch_size, 1]
    attention_logits = brew.fc(
        model,
        decoder_hidden_encoder_outputs_sum,
        s(scope, 'attention_logits'),
        dim_in=encoder_output_dim,
        dim_out=1,
        axis=2,
        freeze_bias=True,
    )

    # [batch_size, encoder_length, 1]
    attention_logits_transposed = brew.transpose(
        model,
        attention_logits,
        s(scope, 'attention_logits_transposed'),
        axes=[1, 0, 2],
    )
    return attention_logits_transposed
Ejemplo n.º 5
0
    def prepare_input(self, model, input_blob):
        if self.encoder_outputs_transposed is None:
            self.encoder_outputs_transposed = brew.transpose(
                model,
                self.encoder_outputs,
                self.scope('encoder_outputs_transposed'),
                axes=[1, 2, 0],
            )
        if self.weighted_encoder_outputs is None:
            self.weighted_encoder_outputs = brew.fc(
                model,
                self.encoder_outputs,
                self.scope('weighted_encoder_outputs'),
                dim_in=self.encoder_output_dim,
                dim_out=self.encoder_output_dim,
                axis=2,
            )

        return self.decoder_cell.prepare_input(model, input_blob)
Ejemplo n.º 6
0
    def prepare_input(self, model, input_blob):
        if self.encoder_outputs_transposed is None:
            self.encoder_outputs_transposed = brew.transpose(
                model,
                self.encoder_outputs,
                self.scope('encoder_outputs_transposed'),
                axes=[1, 2, 0],
            )
        if self.weighted_encoder_outputs is None:
            self.weighted_encoder_outputs = brew.fc(
                model,
                self.encoder_outputs,
                self.scope('weighted_encoder_outputs'),
                dim_in=self.encoder_output_dim,
                dim_out=self.encoder_output_dim,
                axis=2,
            )

        return self.decoder_cell.prepare_input(model, input_blob)
Ejemplo n.º 7
0
def _calc_attention_logits_from_sum_match(
    model,
    decoder_hidden_encoder_outputs_sum,
    encoder_output_dim,
    scope,
):
    # [encoder_length, batch_size, encoder_output_dim]
    decoder_hidden_encoder_outputs_sum = model.net.Tanh(
        decoder_hidden_encoder_outputs_sum,
        decoder_hidden_encoder_outputs_sum,
    )

    attention_v = model.param_init_net.XavierFill(
        [],
        s(scope, 'attention_v'),
        shape=[1, encoder_output_dim],
    )
    model.params.append(attention_v)

    attention_zeros = model.param_init_net.ConstantFill(
        [],
        s(scope, 'attention_zeros'),
        value=0.0,
        shape=[1],
    )

    # [encoder_length, batch_size, 1]
    attention_logits = model.net.FC(
        [decoder_hidden_encoder_outputs_sum, attention_v, attention_zeros],
        [s(scope, 'attention_logits')],
        axis=2,
    )
    # [batch_size, encoder_length, 1]
    attention_logits_transposed = brew.transpose(
        model,
        attention_logits,
        s(scope, 'attention_logits_transposed'),
        axes=[1, 0, 2],
    )
    return attention_logits_transposed
Ejemplo n.º 8
0
 def Transpose(self, *args, **kwargs):
     return brew.transpose(self, *args, use_cudnn=self.use_cudnn, **kwargs)
Ejemplo n.º 9
0
    def build_crf_net(self, input_blob, initial_state, transitions):
        """
            Adds the crf_net recurrent operator to the model.

            model: model_helper.ModelHelper object new operators would be added
            to

            input_blob: the input sequence in a format T x N x D
            where T is sequence size, N - batch size and D - input dimension
            ##Only supports batch-size 1##

            seq_lengths: blob containing sequence lengths (unused)
            """

        scope = "crf_net"

        def s(name):
            ""
            # We have to manually scope due to our internal/external blob
            # relationships.
            return "{}/{}".format(str(scope), str(name))

        step_model = model_helper.ModelHelper(name="crf_step", param_model=self.model)
        input_t, cell_t_prev, _ = step_model.net.AddExternalInputs(
            core.ScopedBlobReference("input_t"),
            core.ScopedBlobReference("cell_t_prev"),
            transitions,
        )
        zero_segment_id = step_model.param_init_net.ConstantFill(
            [],
            [s("zero_segment_id")],
            value=0,
            shape=[self.num_classes_padded],
            dtype=core.DataType.INT32,
        )

        # A hack to bypass model cloning for test
        step_model.param_init_net.AddExternalOutput(zero_segment_id)
        """ the CRF step """
        # Do tile
        prev_transpose = brew.transpose(
            step_model, cell_t_prev, [s("prev_transpose")], axes=(0, 2, 1)
        )
        prev_tiled = step_model.net.Tile(
            prev_transpose, [s("prev_tiled")], tiles=self.num_classes_padded, axis=2
        )
        input_t_tiled = step_model.net.Tile(
            input_t, [s("input_t_tiled")], tiles=self.num_classes_padded, axis=1
        )
        input_with_prev = step_model.net.Add(
            [prev_tiled, input_t_tiled], [s("input_with_prev")]
        )
        all_with_transitions = step_model.net.Add(
            [input_with_prev, transitions],
            [s("prev_with_transitions")],
            broadcast=1,
            use_grad_hack=1,
        )
        all_with_transitions_reshaped, _ = step_model.net.Reshape(
            all_with_transitions,
            [s("all_with_transitions_reshaped"), s("all_with_transitions_orig")],
            shape=(self.num_classes_padded, self.num_classes_padded),
        )
        cell_t = step_model.net.SortedSegmentRangeLogSumExp(
            [all_with_transitions_reshaped, zero_segment_id], [s("cell_t")]
        )
        step_model.net.AddExternalOutputs(cell_t)
        """ recurrent network """
        cell_input_blob = initial_state
        out_all, out_last = recurrent.recurrent_net(
            net=self.model.net,
            cell_net=step_model.net,
            inputs=[(input_t, input_blob)],
            initial_cell_inputs=[(cell_t_prev, cell_input_blob)],
            links={cell_t_prev: cell_t},
            scope=scope,
            outputs_with_grads=(1,),
        )
        return out_last
Ejemplo n.º 10
0
def apply_soft_coverage_attention(
    model,
    encoder_output_dim,
    encoder_outputs_transposed,
    weighted_encoder_outputs,
    decoder_hidden_state_t,
    decoder_hidden_state_dim,
    scope,
    encoder_lengths,
    coverage_t_prev,
    coverage_weights,
):

    weighted_decoder_hidden_state = _apply_fc_weight_for_sum_match(
        model=model,
        input=decoder_hidden_state_t,
        dim_in=decoder_hidden_state_dim,
        dim_out=encoder_output_dim,
        scope=scope,
        name='weighted_decoder_hidden_state',
    )

    # [encoder_length, batch_size, encoder_output_dim]
    decoder_hidden_encoder_outputs_sum_tmp = model.net.Add(
        [weighted_encoder_outputs, weighted_decoder_hidden_state],
        s(scope, 'decoder_hidden_encoder_outputs_sum_tmp'),
        broadcast=1,
    )
    # [batch_size, encoder_length]
    coverage_t_prev_2d = model.net.Squeeze(
        coverage_t_prev,
        s(scope, 'coverage_t_prev_2d'),
        dims=[0],
    )
    # [encoder_length, batch_size]
    coverage_t_prev_transposed = brew.transpose(
        model,
        coverage_t_prev_2d,
        s(scope, 'coverage_t_prev_transposed'),
    )

    # [encoder_length, batch_size, encoder_output_dim]
    scaled_coverage_weights = model.net.Mul(
        [coverage_weights, coverage_t_prev_transposed],
        s(scope, 'scaled_coverage_weights'),
        broadcast=1,
        axis=0,
    )

    # [encoder_length, batch_size, encoder_output_dim]
    decoder_hidden_encoder_outputs_sum = model.net.Add(
        [decoder_hidden_encoder_outputs_sum_tmp, scaled_coverage_weights],
        s(scope, 'decoder_hidden_encoder_outputs_sum'),
    )

    # [batch_size, encoder_length, 1]
    attention_logits_transposed = _calc_attention_logits_from_sum_match(
        model=model,
        decoder_hidden_encoder_outputs_sum=decoder_hidden_encoder_outputs_sum,
        encoder_output_dim=encoder_output_dim,
        scope=scope,
    )

    # [batch_size, encoder_length, 1]
    attention_weights_3d = _calc_attention_weights(
        model=model,
        attention_logits_transposed=attention_logits_transposed,
        scope=scope,
        encoder_lengths=encoder_lengths,
    )

    # [batch_size, encoder_output_dim, 1]
    attention_weighted_encoder_context = _calc_weighted_context(
        model=model,
        encoder_outputs_transposed=encoder_outputs_transposed,
        encoder_output_dim=encoder_output_dim,
        attention_weights_3d=attention_weights_3d,
        scope=scope,
    )

    # [batch_size, encoder_length]
    attention_weights_2d = model.net.Squeeze(
        attention_weights_3d,
        s(scope, 'attention_weights_2d'),
        dims=[2],
    )

    coverage_t = model.net.Add(
        [coverage_t_prev, attention_weights_2d],
        s(scope, 'coverage_t'),
        broadcast=1,
    )

    return (
        attention_weighted_encoder_context,
        attention_weights_3d,
        [decoder_hidden_encoder_outputs_sum],
        coverage_t,
    )
Ejemplo n.º 11
0
 def Transpose(self, *args, **kwargs):
     return brew.transpose(self, *args, use_cudnn=self.use_cudnn, **kwargs)
Ejemplo n.º 12
0
def create_caffe2_model(model,
                        input_shape,
                        use_cudnn=True,
                        init_params=False,
                        keras_channel_last=True):

    arg_scope = {'order': 'NCHW', 'use_cudnn': use_cudnn}
    caffe2_model = model_helper.ModelHelper(name='model',
                                            init_params=init_params,
                                            arg_scope=arg_scope)

    num_conv_layers = 0

    layer_num = 0
    layer_sizes = {}
    prev_layer_name = ''

    for layer in model.layers:

        inb_node = layer._inbound_nodes[0]
        num_input_layers = len(inb_node.inbound_layers)

        input_name_list = []

        for ii in range(0, num_input_layers):
            inp_layer = inb_node.inbound_layers[ii]

            input_name_list.append(inp_layer.name)
            prev_layer_name = inp_layer.name

            if isinstance(inp_layer, keras.layers.Flatten):
                pass
                #pinb_node = inp_layer._inbound_nodes[0]
                #prev_layer_name = pinb_node.inbound_layers[0].name

        name = layer.name

        config = layer.get_config()
        inputShape = layer.input_shape
        outputShape = layer.output_shape

        if isinstance(layer, keras.engine.input_layer.InputLayer):
            input_sizes = (input_shape[2], input_shape[3])
            layer_sizes[name] = input_sizes
        else:
            if (input_name_list[0] not in layer_sizes):
                raise ValueError("Can't find layer size for ",
                                 input_name_list[0])
            else:
                input_sizes = layer_sizes[input_name_list[0]]

        layer_dim = len(outputShape)
        if (layer_dim == 4):
            if (keras_channel_last):
                out_sizes = (outputShape[1], outputShape[2])
            else:
                out_sizes = (outputShape[2], outputShape[3])
        elif (layer_dim == 2):
            out_sizes = (0, 0)  #flattened
        else:
            raise ValueError(
                'Unsupported layer dimension : {0}'.format(layer_dim))

        if isinstance(layer, keras.layers.Flatten):
            tmp_prev = prev_layer_name

            if (keras_channel_last):
                tmp_prev = prev_layer_name + '_transpose'  #nb, img_h, img_w, chan <-- nb, chan, img_h, img_w
                c2_layer = brew.transpose(caffe2_model,
                                          prev_layer_name,
                                          tmp_prev,
                                          axes=(0, 2, 3, 1))

            c2_layer = caffe2_model.net.Flatten(tmp_prev, name)

            #print('FLatten previous layer ', prev_layer_name, ' current layer ', name , 'inputshape ', inputShape)

            layer_sizes[name] = out_sizes

        elif isinstance(layer, keras.layers.Dropout):
            #print('name is ', name, ' prev_layer_name ', prev_layer_name)
            c2_layer = caffe2_model.net.Dropout(prev_layer_name,
                                                name,
                                                is_test=True
                                                #ratio=config['rate']
                                                )

            #same size
            layer_sizes[name] = input_sizes

        elif (isinstance(layer, keras.layers.convolutional.Conv2D)):

            dim_in = inputShape[-1]
            dim_out = outputShape[-1]
            kernel = config['kernel_size'][0]
            stride = config['strides'][0]

            if (config['padding'] == 'same'):
                pad_sizes = get_padding_sizes(input_sizes,
                                              config['kernel_size'],
                                              config['strides'])
            elif (config['padding'] == 'valid'):
                pad_sizes = ((0, 0), (0, 0))
            else:
                raise ValueError('unsupported padding')

            #print('pad sizes ', pad_sizes)

            layer_sizes[name] = out_sizes

            c2_layer = brew.conv(caffe2_model,
                                 prev_layer_name,
                                 name,
                                 dim_in=dim_in,
                                 dim_out=dim_out,
                                 kernel=kernel,
                                 stride=stride,
                                 pad_l=pad_sizes[0][0],
                                 pad_r=pad_sizes[0][1],
                                 pad_t=pad_sizes[1][0],
                                 pad_b=pad_sizes[1][1])

            if config['activation'] == 'linear':
                pass
            elif config['activation'] == 'relu':
                c2_layer = brew.relu(caffe2_model, name, name)
            elif config['activation'] == 'softmax':
                #c2_layer = brew.softmax(caffe2_model, name, name)
                c2_layer = brew.softmax(caffe2_model, name, 'softmax')
            else:
                raise ValueError(
                    'The only supported activation for conv layer is relu')

        elif isinstance(layer, keras.layers.MaxPooling2D):
            kernel = config['pool_size'][0]
            stride = config['strides'][0]

            pad_size = ((0, 0), (0, 0))
            layer_sizes[name] = out_sizes

            c2_layer = brew.max_pool(caffe2_model,
                                     prev_layer_name,
                                     name,
                                     kernel=kernel,
                                     stride=stride)

        elif isinstance(layer, keras.layers.AveragePooling2D):
            kernel = config['pool_size'][0]
            stride = config['strides'][0]

            pad_size = ((0, 0), (0, 0))
            layer_sizes[name] = out_sizes

            c2_layer = brew.average_pool(caffe2_model,
                                         prev_layer_name,
                                         name,
                                         kernel=kernel,
                                         stride=stride)

        elif isinstance(layer, keras.layers.BatchNormalization):

            dim_in = inputShape[-1]
            epsilon = config['epsilon']
            momentum = config['momentum']
            c2_layer = brew.spatial_bn(caffe2_model,
                                       prev_layer_name,
                                       name,
                                       dim_in=dim_in,
                                       epsilon=epsilon,
                                       momentum=momentum,
                                       is_test=True)

            #same size
            layer_sizes[name] = input_sizes

        elif (isinstance(layer, keras.layers.core.Dense)):

            dim_in = inputShape[-1]
            dim_out = outputShape[-1]

            #print('input shape for dense is ', inputShape)
            if (len(inputShape) == 2):  #flattened input
                c2_layer = brew.fc(caffe2_model,
                                   prev_layer_name,
                                   name,
                                   dim_in=dim_in,
                                   dim_out=dim_out)
            else:  #fully convolutional input
                c2_layer = brew.conv(caffe2_model,
                                     prev_layer_name,
                                     name,
                                     dim_in=dim_in,
                                     dim_out=dim_out,
                                     kernel=1,
                                     stride=1)

            activation = config['activation']
            if activation == 'relu':
                c2_layer = brew.relu(caffe2_model, name, name)
            elif activation == 'softmax':
                c2_layer = brew.softmax(caffe2_model, name, 'softmax')
            elif activation == 'linear':
                pass  #
            else:
                raise ValueError(
                    'The only supported activations for fc layer are relu and softmax'
                )

            #same size
            layer_sizes[name] = input_sizes

        elif (isinstance(layer, keras.layers.advanced_activations.LeakyReLU)):

            dim_in = inputShape[-1]

            c2_layer = caffe2_model.net.LeakyRelu(prev_layer_name,
                                                  name,
                                                  alpha=config['alpha'])

            #same size
            layer_sizes[name] = input_sizes

        elif (isinstance(layer, keras.layers.merge.Add)):

            c2_layer = brew.sum(caffe2_model,
                                [input_name_list[0], input_name_list[1]], name)

            #same size
            layer_sizes[name] = input_sizes

    layer_num = layer_num + 1
    if (layer_num == len(model.layers)):
        caffe2_model.net.AddExternalOutput(c2_layer)

    return caffe2_model
Ejemplo n.º 13
0
    def build_crf_net(self, input_blob, initial_state, transitions):
            '''
            Adds the crf_net recurrent operator to the model.

            model: model_helper.ModelHelper object new operators would be added
            to

            input_blob: the input sequence in a format T x N x D
            where T is sequence size, N - batch size and D - input dimention
            ##Only supports batch-size 1##

            seq_lengths: blob containing sequence lengths (unused)
            '''

            scope = 'crf_net'

            def s(name):
                ''
                # We have to manually scope due to our internal/external blob
                # relationships.
                return "{}/{}".format(str(scope), str(name))

            step_model = model_helper.ModelHelper(name='crf_step',
                                                  param_model=self.model)
            input_t, cell_t_prev, _ = (
                step_model.net.AddExternalInputs(
                    core.ScopedBlobReference('input_t'),
                    core.ScopedBlobReference('cell_t_prev'),
                    transitions
                )
            )
            zero_segment_id = step_model.param_init_net.ConstantFill(
                [],
                [s('zero_segment_id')],
                value=0,
                shape=[self.num_classes_padded],
                dtype=core.DataType.INT32,
            )

            # A hack to bypass model cloning for test
            step_model.param_init_net.AddExternalOutput(zero_segment_id)
            """ the CRF step """
            # Do tile
            prev_transpose = brew.transpose(
                step_model,
                cell_t_prev,
                [s('prev_transpose')],
                axes=(0, 2, 1),
            )
            prev_tiled = step_model.net.Tile(
                prev_transpose,
                [s('prev_tiled')],
                tiles=self.num_classes_padded,
                axis=2,
            )
            input_t_tiled = step_model.net.Tile(
                input_t,
                [s('input_t_tiled')],
                tiles=self.num_classes_padded,
                axis=1,
            )
            input_with_prev = step_model.net.Add(
                [prev_tiled, input_t_tiled],
                [s('input_with_prev')]
            )
            all_with_transitions = step_model.net.Add(
                [input_with_prev, transitions],
                [s('prev_with_transitions')],
                broadcast=1,
                use_grad_hack=1,
            )
            all_with_transitions_reshaped, _ = step_model.net.Reshape(
                all_with_transitions,
                [s('all_with_transitions_reshaped'), s('all_with_transitions_orig')],
                shape=(self.num_classes_padded, self.num_classes_padded)
            )
            cell_t = step_model.net.SortedSegmentRangeLogSumExp(
                [all_with_transitions_reshaped, zero_segment_id],
                [s('cell_t')],
            )
            step_model.net.AddExternalOutputs(cell_t)
            """ recurrent network """
            cell_input_blob = initial_state
            out_all, out_last = recurrent.recurrent_net(
                net=self.model.net,
                cell_net=step_model.net,
                inputs=[(input_t, input_blob)],
                initial_cell_inputs=[
                    (cell_t_prev, cell_input_blob),
                ],
                links={
                    cell_t_prev: cell_t,
                },
                scope=scope,
                outputs_with_grads=(1,)
            )
            return out_last