예제 #1
0
    def test_extract(self, T, n, d):
        model = ModelHelper(name='external')
        workspace.ResetWorkspace()

        input_blob, initial_input_blob = model.net.AddExternalInputs(
            'input', 'initial_input')

        step = ModelHelper(name='step', param_model=model)
        input_t, output_t_prev = step.net.AddExternalInput(
            'input_t', 'output_t_prev')
        output_t = step.net.Mul([input_t, output_t_prev])
        step.net.AddExternalOutput(output_t)

        inputs = np.random.randn(T, n, d).astype(np.float32)
        initial_input = np.random.randn(1, n, d).astype(np.float32)
        recurrent.recurrent_net(
            net=model.net,
            cell_net=step.net,
            inputs=[(input_t, input_blob)],
            initial_cell_inputs=[(output_t_prev, initial_input_blob)],
            links={output_t_prev: output_t},
            scope="test_rnn_sum_mull",
        )

        workspace.blobs[input_blob] = inputs
        workspace.blobs[initial_input_blob] = initial_input

        workspace.RunNetOnce(model.param_init_net)
        workspace.CreateNet(model.net)

        prefix = "extractTest"

        workspace.RunNet(model.net.Proto().name, T)
        retrieved_blobs = recurrent.retrieve_step_blobs(
            model.net, prefix
        )

        # needed for python3.6, which returns bytearrays instead of str
        retrieved_blobs = [x.decode() for x in retrieved_blobs]

        for i in range(T):
            blob_name = prefix + "_" + "input_t" + str(i)
            self.assertTrue(
                blob_name in retrieved_blobs,
                "blob extraction failed on timestep {}\
                    . \n\n Extracted Blobs: {} \n\n Looking for {}\
                    .".format(i, retrieved_blobs, blob_name)
            )
예제 #2
0
    def test_extract(self, T, n, d):
        model = ModelHelper(name='external')
        workspace.ResetWorkspace()

        input_blob, initial_input_blob = model.net.AddExternalInputs(
            'input', 'initial_input')

        step = ModelHelper(name='step', param_model=model)
        input_t, output_t_prev = step.net.AddExternalInput(
            'input_t', 'output_t_prev')
        output_t = step.net.Mul([input_t, output_t_prev])
        step.net.AddExternalOutput(output_t)

        inputs = np.random.randn(T, n, d).astype(np.float32)
        initial_input = np.random.randn(1, n, d).astype(np.float32)
        recurrent.recurrent_net(
            net=model.net,
            cell_net=step.net,
            inputs=[(input_t, input_blob)],
            initial_cell_inputs=[(output_t_prev, initial_input_blob)],
            links={output_t_prev: output_t},
            scope="test_rnn_sum_mull",
        )

        workspace.blobs[input_blob] = inputs
        workspace.blobs[initial_input_blob] = initial_input

        workspace.RunNetOnce(model.param_init_net)
        workspace.CreateNet(model.net)

        prefix = "extractTest"

        workspace.RunNet(model.net.Proto().name, T)
        retrieved_blobs = recurrent.retrieve_step_blobs(
            model.net, prefix
        )

        # needed for python3.6, which returns bytearrays instead of str
        retrieved_blobs = [x.decode() for x in retrieved_blobs]

        for i in range(T):
            blob_name = prefix + "_" + "input_t" + str(i)
            self.assertTrue(
                blob_name in retrieved_blobs,
                "blob extraction failed on timestep {}\
                    . \n\n Extracted Blobs: {} \n\n Looking for {}\
                    .".format(i, retrieved_blobs, blob_name)
            )
예제 #3
0
    def apply_over_sequence(
        self,
        model,
        inputs,
        seq_lengths,
        initial_states,
        outputs_with_grads=None,
    ):
        preprocessed_inputs = self.prepare_input(model, inputs)
        step_model = ModelHelper(name=self.name, param_model=model)
        input_t, timestep = step_model.net.AddScopedExternalInputs(
            'input_t',
            'timestep',
        )
        states_prev = step_model.net.AddScopedExternalInputs(*[
            s + '_prev' for s in self.get_state_names()
        ])
        states = self._apply(
            model=step_model,
            input_t=input_t,
            seq_lengths=seq_lengths,
            states=states_prev,
            timestep=timestep,
        )

        if outputs_with_grads is None:
            outputs_with_grads = [self.get_output_state_index() * 2]

        # states_for_all_steps consists of combination of
        # states gather for all steps and final states. It looks like this:
        # (state_1_all, state_1_final, state_2_all, state_2_final, ...)
        states_for_all_steps = recurrent.recurrent_net(
            net=model.net,
            cell_net=step_model.net,
            inputs=[(input_t, preprocessed_inputs)],
            initial_cell_inputs=list(zip(states_prev, initial_states)),
            links=dict(zip(states_prev, states)),
            timestep=timestep,
            scope=self.name,
            forward_only=self.forward_only,
            outputs_with_grads=outputs_with_grads,
            recompute_blobs_on_backward=self.recompute_blobs,
        )

        output = self._prepare_output_sequence(
            model,
            states_for_all_steps,
        )
        return output, states_for_all_steps
예제 #4
0
    def apply_over_sequence(
        self,
        model,
        inputs,
        seq_lengths,
        initial_states,
        outputs_with_grads=None,
    ):
        preprocessed_inputs = self.prepare_input(model, inputs)
        step_model = ModelHelper(name=self.name, param_model=model)
        input_t, timestep = step_model.net.AddScopedExternalInputs(
            'input_t',
            'timestep',
        )
        states_prev = step_model.net.AddScopedExternalInputs(*[
            s + '_prev' for s in self.get_state_names()
        ])
        states = self._apply(
            model=step_model,
            input_t=input_t,
            seq_lengths=seq_lengths,
            states=states_prev,
            timestep=timestep,
        )

        if outputs_with_grads is None:
            outputs_with_grads = [self.get_output_state_index() * 2]

        # states_for_all_steps consists of combination of
        # states gather for all steps and final states. It looks like this:
        # (state_1_all, state_1_final, state_2_all, state_2_final, ...)
        states_for_all_steps = recurrent.recurrent_net(
            net=model.net,
            cell_net=step_model.net,
            inputs=[(input_t, preprocessed_inputs)],
            initial_cell_inputs=list(zip(states_prev, initial_states)),
            links=dict(zip(states_prev, states)),
            timestep=timestep,
            scope=self.name,
            outputs_with_grads=outputs_with_grads,
            recompute_blobs_on_backward=self.recompute_blobs,
        )

        output = self._prepare_output_sequence(
            model,
            states_for_all_steps,
        )
        return output, states_for_all_steps
예제 #5
0
파일: rnn_cell.py 프로젝트: zlbing/caffe2
 def apply_over_sequence(
     self,
     model,
     inputs,
     seq_lengths,
     initial_states,
     outputs_with_grads=None,
 ):
     preprocessed_inputs = self.prepare_input(model, inputs)
     step_model = CNNModelHelper(name=self.name, param_model=model)
     input_t, timestep = step_model.net.AddScopedExternalInputs(
         'input_t',
         'timestep',
     )
     states_prev = step_model.net.AddScopedExternalInputs(
         *[s + '_prev' for s in self.get_state_names()])
     states = self._apply(
         model=step_model,
         input_t=input_t,
         seq_lengths=seq_lengths,
         states=states_prev,
         timestep=timestep,
     )
     return recurrent.recurrent_net(
         net=model.net,
         cell_net=step_model.net,
         inputs=[(input_t, preprocessed_inputs)],
         initial_cell_inputs=zip(states_prev, initial_states),
         links=dict(zip(states_prev, states)),
         timestep=timestep,
         scope=self.name,
         outputs_with_grads=(outputs_with_grads
                             if outputs_with_grads is not None else
                             self.get_outputs_with_grads()),
         recompute_blobs_on_backward=self.recompute_blobs,
         forward_only=self.forward_only,
     )
예제 #6
0
    def simple_rnn(self, T, n, d, model, step, input_t, output_t, output_t_prev,
                   input_blob, initial_input_blob):

        input = np.random.randn(T, n, d).astype(np.float32)
        initial_input = np.random.randn(1, n, d).astype(np.float32)
        print(locals())
        recurrent.recurrent_net(
            net=model.net,
            cell_net=step.net,
            inputs=[(input_t, input_blob)],
            initial_cell_inputs=[(output_t_prev, initial_input_blob)],
            links={output_t_prev: output_t},
            scope="test_rnn_sum_mull",
        )
        workspace.blobs[input_blob] = input
        workspace.blobs[initial_input_blob] = initial_input

        op = model.net._net.op[-1]
        # Just conviniently store all inputs in an array in the same
        # order as op.input
        inputs = [workspace.blobs[name] for name in op.input]

        def reference(input, initial_input):
            global_ws_name = workspace.CurrentWorkspace()
            input_all = workspace.blobs[input_blob]

            workspace.SwitchWorkspace("ref", create_if_missing=True)
            workspace.blobs[input_blob] = input
            workspace.blobs[output_t_prev] = initial_input.reshape(n, d)
            res_all = np.zeros(shape=input.shape, dtype=np.float32)

            for t_cur in range(T):
                workspace.blobs[input_t] = input_all[t_cur]
                workspace.RunNetOnce(step.net)
                result_t = workspace.blobs[output_t]
                workspace.blobs[output_t_prev] = result_t
                res_all[t_cur] = result_t

            workspace.SwitchWorkspace(global_ws_name)

            shape = list(input.shape)
            shape[0] = 1
            return (res_all, res_all[-1].reshape(shape))

        self.assertReferenceChecks(
            device_option=hu.cpu_do,
            op=op,
            inputs=inputs,
            reference=reference,
            output_to_grad=op.output[0],
            outputs_to_check=[0, 1],
        )

        self.assertGradientChecks(
            device_option=hu.cpu_do,
            op=op,
            inputs=inputs,
            outputs_to_check=0,
            outputs_with_grads=[0],
            threshold=0.01,
            stepsize=0.005,
        )
예제 #7
0
    def simple_rnn(self, T, n, d, model, step, input_t, output_t, output_t_prev,
                   input_blob, initial_input_blob):

        input = np.random.randn(T, n, d).astype(np.float32)
        initial_input = np.random.randn(1, n, d).astype(np.float32)
        print(locals())
        recurrent.recurrent_net(
            net=model.net,
            cell_net=step.net,
            inputs=[(input_t, input_blob)],
            initial_cell_inputs=[(output_t_prev, initial_input_blob)],
            links={output_t_prev: output_t},
            scope="test_rnn_sum_mull",
        )
        workspace.blobs[input_blob] = input
        workspace.blobs[initial_input_blob] = initial_input

        op = model.net._net.op[-1]
        # Just conviniently store all inputs in an array in the same
        # order as op.input
        inputs = [workspace.blobs[name] for name in op.input]

        def reference(input, initial_input):
            global_ws_name = workspace.CurrentWorkspace()
            input_all = workspace.blobs[input_blob]

            workspace.SwitchWorkspace("ref", create_if_missing=True)
            workspace.blobs[input_blob] = input
            workspace.blobs[output_t_prev] = initial_input.reshape(n, d)
            res_all = np.zeros(shape=input.shape, dtype=np.float32)

            for t_cur in range(T):
                workspace.blobs[input_t] = input_all[t_cur]
                workspace.RunNetOnce(step.net)
                result_t = workspace.blobs[output_t]
                workspace.blobs[output_t_prev] = result_t
                res_all[t_cur] = result_t

            workspace.SwitchWorkspace(global_ws_name)

            shape = list(input.shape)
            shape[0] = 1
            return (res_all, res_all[-1].reshape(shape))

        self.assertReferenceChecks(
            device_option=hu.cpu_do,
            op=op,
            inputs=inputs,
            reference=reference,
            output_to_grad=op.output[0],
            outputs_to_check=[0, 1],
        )

        self.assertGradientChecks(
            device_option=hu.cpu_do,
            op=op,
            inputs=inputs,
            outputs_to_check=0,
            outputs_with_grads=[0],
            threshold=0.01,
            stepsize=0.005,
        )
예제 #8
0
파일: crf.py 프로젝트: zlbing/caffe2
    def build_crf_net(self, input_blob, initial_state, transitions):
        '''
            Adds the crf_net recurrent operator to the model.

            model: CNNModelHelper object new operators would be added to

            input_blob: the input sequence in a format T x N x D
            where T is sequence size, N - batch size and D - input dimention
            ##Only supports batch-size 1##

            seq_lengths: blob containing sequence lengths (unused)
            '''

        scope = 'crf_net'

        def s(name):
            ''
            # We have to manually scope due to our internal/external blob
            # relationships.
            return "{}/{}".format(str(scope), str(name))

        step_model = CNNModelHelper(name='crf_step', param_model=self.model)
        input_t, cell_t_prev, _ = (step_model.net.AddExternalInputs(
            'input_t', 'cell_t_prev', transitions))
        zero_segment_id = step_model.param_init_net.ConstantFill(
            [],
            [s('zero_segment_id')],
            value=0,
            shape=[self.num_classes_padded],
            dtype=core.DataType.INT32,
        )

        # A hack to bypass model cloning for test
        step_model.param_init_net.AddExternalOutput(zero_segment_id)
        """ the CRF step """
        # Do tile
        prev_transpose = step_model.Transpose(
            cell_t_prev,
            [s('prev_transpose')],
            axes=(0, 2, 1),
        )
        prev_tiled = step_model.net.Tile(
            prev_transpose,
            [s('prev_tiled')],
            tiles=self.num_classes_padded,
            axis=2,
        )
        input_t_tiled = step_model.net.Tile(
            input_t,
            [s('input_t_tiled')],
            tiles=self.num_classes_padded,
            axis=1,
        )
        input_with_prev = step_model.net.Add([prev_tiled, input_t_tiled],
                                             [s('input_with_prev')])
        all_with_transitions = step_model.net.Add(
            [input_with_prev, transitions],
            [s('prev_with_transitions')],
            broadcast=1,
            use_grad_hack=1,
        )
        all_with_transitions_reshaped, _ = step_model.net.Reshape(
            all_with_transitions, [
                s('all_with_transitions_reshaped'),
                s('all_with_transitions_orig')
            ],
            shape=(self.num_classes_padded, self.num_classes_padded))
        cell_t = step_model.net.SortedSegmentRangeLogSumExp(
            [all_with_transitions_reshaped, zero_segment_id],
            [s('cell_t')],
        )
        step_model.net.AddExternalOutputs(cell_t)
        """ recurrent network """
        cell_input_blob = initial_state
        out_all, out_last = recurrent.recurrent_net(net=self.model.net,
                                                    cell_net=step_model.net,
                                                    inputs=[(input_t,
                                                             input_blob)],
                                                    initial_cell_inputs=[
                                                        (cell_t_prev,
                                                         cell_input_blob),
                                                    ],
                                                    links={
                                                        cell_t_prev: cell_t,
                                                    },
                                                    scope=scope,
                                                    outputs_with_grads=(1, ))
        return out_last
예제 #9
0
    def test_mul_rnn(self, T, n, d):
        model = ModelHelperBase(name='external')

        one_blob = model.param_init_net.ConstantFill(
            [], value=1.0, shape=[1, n, d])
        input_blob = model.net.AddExternalInput('input')

        step = ModelHelperBase(name='step', param_model=model)
        input_t, output_t_prev = step.net.AddExternalInput(
            'input_t', 'output_t_prev')
        output_t = step.net.Mul([input_t, output_t_prev])
        step.net.AddExternalOutput(output_t)

        recurrent.recurrent_net(
            net=model.net,
            cell_net=step.net,
            inputs=[(input_t, input_blob)],
            initial_cell_inputs=[(output_t_prev, one_blob)],
            links={output_t_prev: output_t},
            scope="test_mul_rnn",
        )

        workspace.FeedBlob(
            str(input_blob), np.random.randn(T, n, d).astype(np.float32))
        workspace.RunNetOnce(model.param_init_net)

        op = model.net._net.op[-1]

        def reference(input, initial_input):
            recurrent_input = initial_input
            result = np.zeros(shape=input.shape)

            for t_cur in range(T):
                recurrent_input = recurrent_input * input[t_cur]
                result[t_cur] = recurrent_input

            shape = list(input.shape)
            shape[0] = 1
            return (result, result[-1].reshape(shape))

        def grad_reference(output_grad, ref_output, inputs):
            input = inputs[0]
            output = ref_output[0]
            initial_input = inputs[1]
            input_grad = np.zeros(shape=input.shape)
            right_grad = 0

            for t_cur in range(T - 1, -1, -1):
                prev_output = output[t_cur - 1] if t_cur > 0 else initial_input
                input_grad[t_cur] = (output_grad[t_cur] +
                                     right_grad) * prev_output
                right_grad = input[t_cur] * (output_grad[t_cur] + right_grad)
            return (input_grad, right_grad.reshape([1, n, d]))

        self.assertReferenceChecks(
            device_option=hu.cpu_do,
            op=op,
            inputs=[
                workspace.FetchBlob(name)
                for name in [input_blob, one_blob]
            ],
            reference=reference,
            grad_reference=grad_reference,
            output_to_grad=op.output[0],
            outputs_to_check=[0, 1],
        )
예제 #10
0
    def test_mul_rnn(self, T, n, d):
        model = ModelHelperBase(name='external')

        one_blob = model.param_init_net.ConstantFill(
            [], value=1.0, shape=[1, n, d])
        input_blob = model.net.AddExternalInput('input')

        step = ModelHelperBase(name='step', param_model=model)
        input_t, output_t_prev = step.net.AddExternalInput(
            'input_t', 'output_t_prev')
        output_t = step.net.Mul([input_t, output_t_prev])
        step.net.AddExternalOutput(output_t)

        recurrent.recurrent_net(
            net=model.net,
            cell_net=step.net,
            inputs=[(input_t, input_blob)],
            initial_cell_inputs=[(output_t_prev, one_blob)],
            links={output_t_prev: output_t},
            scratch_sizes=[],
            scope="test_mul_rnn",
        )

        workspace.FeedBlob(
            str(input_blob), np.random.randn(T, n, d).astype(np.float32))
        workspace.RunNetOnce(model.param_init_net)

        op = model.net._net.op[-1]

        def reference(input, initial_input):
            recurrent_input = initial_input
            result = np.zeros(shape=input.shape)

            for t_cur in range(T):
                recurrent_input = recurrent_input * input[t_cur]
                result[t_cur] = recurrent_input

            shape = list(input.shape)
            shape[0] = 1
            return (result, result[-1].reshape(shape))

        def grad_reference(output_grad, ref_output, inputs):
            input = inputs[0]
            output = ref_output[0]
            initial_input = inputs[1]
            input_grad = np.zeros(shape=input.shape)
            right_grad = 0

            for t_cur in range(T - 1, -1, -1):
                prev_output = output[t_cur - 1] if t_cur > 0 else initial_input
                input_grad[t_cur] = (output_grad[t_cur] +
                                     right_grad) * prev_output
                right_grad = input[t_cur] * (output_grad[t_cur] + right_grad)

            return (input_grad, np.zeros(shape=[T, n, d]).astype(np.float32))

        self.assertReferenceChecks(
            hu.cpu_do,
            op,
            [workspace.FetchBlob(name)
             for name in [input_blob, one_blob]],
            reference,
            grad_reference=grad_reference,
            output_to_grad=op.output[0],
        )
예제 #11
0
파일: crf.py 프로젝트: Yangqing/caffe2
    def build_crf_net(self, input_blob, initial_state, transitions):
            '''
            Adds the crf_net recurrent operator to the model.

            model: model_helper.ModelHelper object new operators would be added
            to

            input_blob: the input sequence in a format T x N x D
            where T is sequence size, N - batch size and D - input dimention
            ##Only supports batch-size 1##

            seq_lengths: blob containing sequence lengths (unused)
            '''

            scope = 'crf_net'

            def s(name):
                ''
                # We have to manually scope due to our internal/external blob
                # relationships.
                return "{}/{}".format(str(scope), str(name))

            step_model = model_helper.ModelHelper(name='crf_step',
                                                  param_model=self.model)
            input_t, cell_t_prev, _ = (
                step_model.net.AddExternalInputs(
                    core.ScopedBlobReference('input_t'),
                    core.ScopedBlobReference('cell_t_prev'),
                    transitions
                )
            )
            zero_segment_id = step_model.param_init_net.ConstantFill(
                [],
                [s('zero_segment_id')],
                value=0,
                shape=[self.num_classes_padded],
                dtype=core.DataType.INT32,
            )

            # A hack to bypass model cloning for test
            step_model.param_init_net.AddExternalOutput(zero_segment_id)
            """ the CRF step """
            # Do tile
            prev_transpose = brew.transpose(
                step_model,
                cell_t_prev,
                [s('prev_transpose')],
                axes=(0, 2, 1),
            )
            prev_tiled = step_model.net.Tile(
                prev_transpose,
                [s('prev_tiled')],
                tiles=self.num_classes_padded,
                axis=2,
            )
            input_t_tiled = step_model.net.Tile(
                input_t,
                [s('input_t_tiled')],
                tiles=self.num_classes_padded,
                axis=1,
            )
            input_with_prev = step_model.net.Add(
                [prev_tiled, input_t_tiled],
                [s('input_with_prev')]
            )
            all_with_transitions = step_model.net.Add(
                [input_with_prev, transitions],
                [s('prev_with_transitions')],
                broadcast=1,
                use_grad_hack=1,
            )
            all_with_transitions_reshaped, _ = step_model.net.Reshape(
                all_with_transitions,
                [s('all_with_transitions_reshaped'), s('all_with_transitions_orig')],
                shape=(self.num_classes_padded, self.num_classes_padded)
            )
            cell_t = step_model.net.SortedSegmentRangeLogSumExp(
                [all_with_transitions_reshaped, zero_segment_id],
                [s('cell_t')],
            )
            step_model.net.AddExternalOutputs(cell_t)
            """ recurrent network """
            cell_input_blob = initial_state
            out_all, out_last = recurrent.recurrent_net(
                net=self.model.net,
                cell_net=step_model.net,
                inputs=[(input_t, input_blob)],
                initial_cell_inputs=[
                    (cell_t_prev, cell_input_blob),
                ],
                links={
                    cell_t_prev: cell_t,
                },
                scope=scope,
                outputs_with_grads=(1,)
            )
            return out_last