def test_extract(self, T, n, d): model = ModelHelper(name='external') workspace.ResetWorkspace() input_blob, initial_input_blob = model.net.AddExternalInputs( 'input', 'initial_input') step = ModelHelper(name='step', param_model=model) input_t, output_t_prev = step.net.AddExternalInput( 'input_t', 'output_t_prev') output_t = step.net.Mul([input_t, output_t_prev]) step.net.AddExternalOutput(output_t) inputs = np.random.randn(T, n, d).astype(np.float32) initial_input = np.random.randn(1, n, d).astype(np.float32) recurrent.recurrent_net( net=model.net, cell_net=step.net, inputs=[(input_t, input_blob)], initial_cell_inputs=[(output_t_prev, initial_input_blob)], links={output_t_prev: output_t}, scope="test_rnn_sum_mull", ) workspace.blobs[input_blob] = inputs workspace.blobs[initial_input_blob] = initial_input workspace.RunNetOnce(model.param_init_net) workspace.CreateNet(model.net) prefix = "extractTest" workspace.RunNet(model.net.Proto().name, T) retrieved_blobs = recurrent.retrieve_step_blobs( model.net, prefix ) # needed for python3.6, which returns bytearrays instead of str retrieved_blobs = [x.decode() for x in retrieved_blobs] for i in range(T): blob_name = prefix + "_" + "input_t" + str(i) self.assertTrue( blob_name in retrieved_blobs, "blob extraction failed on timestep {}\ . \n\n Extracted Blobs: {} \n\n Looking for {}\ .".format(i, retrieved_blobs, blob_name) )
def apply_over_sequence( self, model, inputs, seq_lengths, initial_states, outputs_with_grads=None, ): preprocessed_inputs = self.prepare_input(model, inputs) step_model = ModelHelper(name=self.name, param_model=model) input_t, timestep = step_model.net.AddScopedExternalInputs( 'input_t', 'timestep', ) states_prev = step_model.net.AddScopedExternalInputs(*[ s + '_prev' for s in self.get_state_names() ]) states = self._apply( model=step_model, input_t=input_t, seq_lengths=seq_lengths, states=states_prev, timestep=timestep, ) if outputs_with_grads is None: outputs_with_grads = [self.get_output_state_index() * 2] # states_for_all_steps consists of combination of # states gather for all steps and final states. It looks like this: # (state_1_all, state_1_final, state_2_all, state_2_final, ...) states_for_all_steps = recurrent.recurrent_net( net=model.net, cell_net=step_model.net, inputs=[(input_t, preprocessed_inputs)], initial_cell_inputs=list(zip(states_prev, initial_states)), links=dict(zip(states_prev, states)), timestep=timestep, scope=self.name, forward_only=self.forward_only, outputs_with_grads=outputs_with_grads, recompute_blobs_on_backward=self.recompute_blobs, ) output = self._prepare_output_sequence( model, states_for_all_steps, ) return output, states_for_all_steps
def apply_over_sequence( self, model, inputs, seq_lengths, initial_states, outputs_with_grads=None, ): preprocessed_inputs = self.prepare_input(model, inputs) step_model = ModelHelper(name=self.name, param_model=model) input_t, timestep = step_model.net.AddScopedExternalInputs( 'input_t', 'timestep', ) states_prev = step_model.net.AddScopedExternalInputs(*[ s + '_prev' for s in self.get_state_names() ]) states = self._apply( model=step_model, input_t=input_t, seq_lengths=seq_lengths, states=states_prev, timestep=timestep, ) if outputs_with_grads is None: outputs_with_grads = [self.get_output_state_index() * 2] # states_for_all_steps consists of combination of # states gather for all steps and final states. It looks like this: # (state_1_all, state_1_final, state_2_all, state_2_final, ...) states_for_all_steps = recurrent.recurrent_net( net=model.net, cell_net=step_model.net, inputs=[(input_t, preprocessed_inputs)], initial_cell_inputs=list(zip(states_prev, initial_states)), links=dict(zip(states_prev, states)), timestep=timestep, scope=self.name, outputs_with_grads=outputs_with_grads, recompute_blobs_on_backward=self.recompute_blobs, ) output = self._prepare_output_sequence( model, states_for_all_steps, ) return output, states_for_all_steps
def apply_over_sequence( self, model, inputs, seq_lengths, initial_states, outputs_with_grads=None, ): preprocessed_inputs = self.prepare_input(model, inputs) step_model = CNNModelHelper(name=self.name, param_model=model) input_t, timestep = step_model.net.AddScopedExternalInputs( 'input_t', 'timestep', ) states_prev = step_model.net.AddScopedExternalInputs( *[s + '_prev' for s in self.get_state_names()]) states = self._apply( model=step_model, input_t=input_t, seq_lengths=seq_lengths, states=states_prev, timestep=timestep, ) return recurrent.recurrent_net( net=model.net, cell_net=step_model.net, inputs=[(input_t, preprocessed_inputs)], initial_cell_inputs=zip(states_prev, initial_states), links=dict(zip(states_prev, states)), timestep=timestep, scope=self.name, outputs_with_grads=(outputs_with_grads if outputs_with_grads is not None else self.get_outputs_with_grads()), recompute_blobs_on_backward=self.recompute_blobs, forward_only=self.forward_only, )
def simple_rnn(self, T, n, d, model, step, input_t, output_t, output_t_prev, input_blob, initial_input_blob): input = np.random.randn(T, n, d).astype(np.float32) initial_input = np.random.randn(1, n, d).astype(np.float32) print(locals()) recurrent.recurrent_net( net=model.net, cell_net=step.net, inputs=[(input_t, input_blob)], initial_cell_inputs=[(output_t_prev, initial_input_blob)], links={output_t_prev: output_t}, scope="test_rnn_sum_mull", ) workspace.blobs[input_blob] = input workspace.blobs[initial_input_blob] = initial_input op = model.net._net.op[-1] # Just conviniently store all inputs in an array in the same # order as op.input inputs = [workspace.blobs[name] for name in op.input] def reference(input, initial_input): global_ws_name = workspace.CurrentWorkspace() input_all = workspace.blobs[input_blob] workspace.SwitchWorkspace("ref", create_if_missing=True) workspace.blobs[input_blob] = input workspace.blobs[output_t_prev] = initial_input.reshape(n, d) res_all = np.zeros(shape=input.shape, dtype=np.float32) for t_cur in range(T): workspace.blobs[input_t] = input_all[t_cur] workspace.RunNetOnce(step.net) result_t = workspace.blobs[output_t] workspace.blobs[output_t_prev] = result_t res_all[t_cur] = result_t workspace.SwitchWorkspace(global_ws_name) shape = list(input.shape) shape[0] = 1 return (res_all, res_all[-1].reshape(shape)) self.assertReferenceChecks( device_option=hu.cpu_do, op=op, inputs=inputs, reference=reference, output_to_grad=op.output[0], outputs_to_check=[0, 1], ) self.assertGradientChecks( device_option=hu.cpu_do, op=op, inputs=inputs, outputs_to_check=0, outputs_with_grads=[0], threshold=0.01, stepsize=0.005, )
def build_crf_net(self, input_blob, initial_state, transitions): ''' Adds the crf_net recurrent operator to the model. model: CNNModelHelper object new operators would be added to input_blob: the input sequence in a format T x N x D where T is sequence size, N - batch size and D - input dimention ##Only supports batch-size 1## seq_lengths: blob containing sequence lengths (unused) ''' scope = 'crf_net' def s(name): '' # We have to manually scope due to our internal/external blob # relationships. return "{}/{}".format(str(scope), str(name)) step_model = CNNModelHelper(name='crf_step', param_model=self.model) input_t, cell_t_prev, _ = (step_model.net.AddExternalInputs( 'input_t', 'cell_t_prev', transitions)) zero_segment_id = step_model.param_init_net.ConstantFill( [], [s('zero_segment_id')], value=0, shape=[self.num_classes_padded], dtype=core.DataType.INT32, ) # A hack to bypass model cloning for test step_model.param_init_net.AddExternalOutput(zero_segment_id) """ the CRF step """ # Do tile prev_transpose = step_model.Transpose( cell_t_prev, [s('prev_transpose')], axes=(0, 2, 1), ) prev_tiled = step_model.net.Tile( prev_transpose, [s('prev_tiled')], tiles=self.num_classes_padded, axis=2, ) input_t_tiled = step_model.net.Tile( input_t, [s('input_t_tiled')], tiles=self.num_classes_padded, axis=1, ) input_with_prev = step_model.net.Add([prev_tiled, input_t_tiled], [s('input_with_prev')]) all_with_transitions = step_model.net.Add( [input_with_prev, transitions], [s('prev_with_transitions')], broadcast=1, use_grad_hack=1, ) all_with_transitions_reshaped, _ = step_model.net.Reshape( all_with_transitions, [ s('all_with_transitions_reshaped'), s('all_with_transitions_orig') ], shape=(self.num_classes_padded, self.num_classes_padded)) cell_t = step_model.net.SortedSegmentRangeLogSumExp( [all_with_transitions_reshaped, zero_segment_id], [s('cell_t')], ) step_model.net.AddExternalOutputs(cell_t) """ recurrent network """ cell_input_blob = initial_state out_all, out_last = recurrent.recurrent_net(net=self.model.net, cell_net=step_model.net, inputs=[(input_t, input_blob)], initial_cell_inputs=[ (cell_t_prev, cell_input_blob), ], links={ cell_t_prev: cell_t, }, scope=scope, outputs_with_grads=(1, )) return out_last
def test_mul_rnn(self, T, n, d): model = ModelHelperBase(name='external') one_blob = model.param_init_net.ConstantFill( [], value=1.0, shape=[1, n, d]) input_blob = model.net.AddExternalInput('input') step = ModelHelperBase(name='step', param_model=model) input_t, output_t_prev = step.net.AddExternalInput( 'input_t', 'output_t_prev') output_t = step.net.Mul([input_t, output_t_prev]) step.net.AddExternalOutput(output_t) recurrent.recurrent_net( net=model.net, cell_net=step.net, inputs=[(input_t, input_blob)], initial_cell_inputs=[(output_t_prev, one_blob)], links={output_t_prev: output_t}, scope="test_mul_rnn", ) workspace.FeedBlob( str(input_blob), np.random.randn(T, n, d).astype(np.float32)) workspace.RunNetOnce(model.param_init_net) op = model.net._net.op[-1] def reference(input, initial_input): recurrent_input = initial_input result = np.zeros(shape=input.shape) for t_cur in range(T): recurrent_input = recurrent_input * input[t_cur] result[t_cur] = recurrent_input shape = list(input.shape) shape[0] = 1 return (result, result[-1].reshape(shape)) def grad_reference(output_grad, ref_output, inputs): input = inputs[0] output = ref_output[0] initial_input = inputs[1] input_grad = np.zeros(shape=input.shape) right_grad = 0 for t_cur in range(T - 1, -1, -1): prev_output = output[t_cur - 1] if t_cur > 0 else initial_input input_grad[t_cur] = (output_grad[t_cur] + right_grad) * prev_output right_grad = input[t_cur] * (output_grad[t_cur] + right_grad) return (input_grad, right_grad.reshape([1, n, d])) self.assertReferenceChecks( device_option=hu.cpu_do, op=op, inputs=[ workspace.FetchBlob(name) for name in [input_blob, one_blob] ], reference=reference, grad_reference=grad_reference, output_to_grad=op.output[0], outputs_to_check=[0, 1], )
def test_mul_rnn(self, T, n, d): model = ModelHelperBase(name='external') one_blob = model.param_init_net.ConstantFill( [], value=1.0, shape=[1, n, d]) input_blob = model.net.AddExternalInput('input') step = ModelHelperBase(name='step', param_model=model) input_t, output_t_prev = step.net.AddExternalInput( 'input_t', 'output_t_prev') output_t = step.net.Mul([input_t, output_t_prev]) step.net.AddExternalOutput(output_t) recurrent.recurrent_net( net=model.net, cell_net=step.net, inputs=[(input_t, input_blob)], initial_cell_inputs=[(output_t_prev, one_blob)], links={output_t_prev: output_t}, scratch_sizes=[], scope="test_mul_rnn", ) workspace.FeedBlob( str(input_blob), np.random.randn(T, n, d).astype(np.float32)) workspace.RunNetOnce(model.param_init_net) op = model.net._net.op[-1] def reference(input, initial_input): recurrent_input = initial_input result = np.zeros(shape=input.shape) for t_cur in range(T): recurrent_input = recurrent_input * input[t_cur] result[t_cur] = recurrent_input shape = list(input.shape) shape[0] = 1 return (result, result[-1].reshape(shape)) def grad_reference(output_grad, ref_output, inputs): input = inputs[0] output = ref_output[0] initial_input = inputs[1] input_grad = np.zeros(shape=input.shape) right_grad = 0 for t_cur in range(T - 1, -1, -1): prev_output = output[t_cur - 1] if t_cur > 0 else initial_input input_grad[t_cur] = (output_grad[t_cur] + right_grad) * prev_output right_grad = input[t_cur] * (output_grad[t_cur] + right_grad) return (input_grad, np.zeros(shape=[T, n, d]).astype(np.float32)) self.assertReferenceChecks( hu.cpu_do, op, [workspace.FetchBlob(name) for name in [input_blob, one_blob]], reference, grad_reference=grad_reference, output_to_grad=op.output[0], )
def build_crf_net(self, input_blob, initial_state, transitions): ''' Adds the crf_net recurrent operator to the model. model: model_helper.ModelHelper object new operators would be added to input_blob: the input sequence in a format T x N x D where T is sequence size, N - batch size and D - input dimention ##Only supports batch-size 1## seq_lengths: blob containing sequence lengths (unused) ''' scope = 'crf_net' def s(name): '' # We have to manually scope due to our internal/external blob # relationships. return "{}/{}".format(str(scope), str(name)) step_model = model_helper.ModelHelper(name='crf_step', param_model=self.model) input_t, cell_t_prev, _ = ( step_model.net.AddExternalInputs( core.ScopedBlobReference('input_t'), core.ScopedBlobReference('cell_t_prev'), transitions ) ) zero_segment_id = step_model.param_init_net.ConstantFill( [], [s('zero_segment_id')], value=0, shape=[self.num_classes_padded], dtype=core.DataType.INT32, ) # A hack to bypass model cloning for test step_model.param_init_net.AddExternalOutput(zero_segment_id) """ the CRF step """ # Do tile prev_transpose = brew.transpose( step_model, cell_t_prev, [s('prev_transpose')], axes=(0, 2, 1), ) prev_tiled = step_model.net.Tile( prev_transpose, [s('prev_tiled')], tiles=self.num_classes_padded, axis=2, ) input_t_tiled = step_model.net.Tile( input_t, [s('input_t_tiled')], tiles=self.num_classes_padded, axis=1, ) input_with_prev = step_model.net.Add( [prev_tiled, input_t_tiled], [s('input_with_prev')] ) all_with_transitions = step_model.net.Add( [input_with_prev, transitions], [s('prev_with_transitions')], broadcast=1, use_grad_hack=1, ) all_with_transitions_reshaped, _ = step_model.net.Reshape( all_with_transitions, [s('all_with_transitions_reshaped'), s('all_with_transitions_orig')], shape=(self.num_classes_padded, self.num_classes_padded) ) cell_t = step_model.net.SortedSegmentRangeLogSumExp( [all_with_transitions_reshaped, zero_segment_id], [s('cell_t')], ) step_model.net.AddExternalOutputs(cell_t) """ recurrent network """ cell_input_blob = initial_state out_all, out_last = recurrent.recurrent_net( net=self.model.net, cell_net=step_model.net, inputs=[(input_t, input_blob)], initial_cell_inputs=[ (cell_t_prev, cell_input_blob), ], links={ cell_t_prev: cell_t, }, scope=scope, outputs_with_grads=(1,) ) return out_last