def model_build_fun(model, loss_scale): workspace.FeedBlob( core.ScopedBlobReference("seq_lengths"), np.array([self.T] * self.batch_per_device, dtype=np.int32) ) model.param_init_net.ConstantFill( [], "hidden_init", value=0.0, shape=[1, self.batch_per_device, self.hidden_dim] ) model.param_init_net.ConstantFill( [], "cell_init", value=0.0, shape=[1, self.batch_per_device, self.hidden_dim] ) output, _last_hidden, _, _last_state, = rnn_cell.LSTM( model=model, input_blob="data", seq_lengths="seq_lengths", initial_states=("hidden_init", "cell_init"), dim_in=self.input_dim, dim_out=self.hidden_dim, scope="partest", ) # A silly loss function loss = model.AveragedLoss( model.Sub([output, "target"], "dist"), "loss", ) loss = model.Scale(loss, "loss_scaled", scale=loss_scale) return [loss]
def create_model(args, queue, label_queue, input_shape): model = cnn.CNNModelHelper(name="LSTM_bench") seq_lengths, hidden_init, cell_init, target = \ model.net.AddExternalInputs( 'seq_lengths', 'hidden_init', 'cell_init', 'target', ) input_blob = model.DequeueBlobs(queue, "input_data") labels = model.DequeueBlobs(label_queue, "label") if args.implementation == "own": output, last_hidden, _, last_state = rnn_cell.LSTM( model=model, input_blob=input_blob, seq_lengths=seq_lengths, initial_states=(hidden_init, cell_init), dim_in=args.input_dim, dim_out=args.hidden_dim, scope="lstm1", memory_optimization=args.memory_optimization, ) elif args.implementation == "cudnn": # We need to feed a placeholder input so that RecurrentInitOp # can infer the dimensions. model.param_init_net.ConstantFill([], input_blob, shape=input_shape) output, last_hidden, _ = rnn_cell.cudnn_LSTM( model=model, input_blob=input_blob, initial_states=(hidden_init, cell_init), dim_in=args.input_dim, dim_out=args.hidden_dim, scope="cudnnlstm", num_layers=1, ) else: assert False, "Unknown implementation" weights = model.UniformFill(labels, "weights") softmax, loss = model.SoftmaxWithLoss( [model.Flatten(output), labels, weights], ['softmax', 'loss'], ) model.AddGradientOperators([loss]) # carry states over model.net.Copy(last_hidden, hidden_init) model.net.Copy(last_hidden, cell_init) workspace.FeedBlob( hidden_init, np.zeros([1, args.batch_size, args.hidden_dim], dtype=np.float32)) workspace.FeedBlob( cell_init, np.zeros([1, args.batch_size, args.hidden_dim], dtype=np.float32)) return model, output
def _create_lstm(cls, init_model, pred_model, n, opset_version): assert init_model is not None, "cannot convert LSTMs without access to the full model" assert pred_model is not None, "cannot convert LSTMs without access to the full model" attrs = dict(n.attrs) # make a copy, which is safe to mutate hidden_size = attrs.pop('hidden_size') assert not attrs, "unsupported LSTM attributes: " + str(attrs.keys()) input_blob, W, R, B, sequence_lens, initial_h, initial_c = n.inputs input_size = cls._rnn_shape_inference(init_model, pred_model, n, input_blob, W) if input_size is None: raise RuntimeError("best-effort shape inference for LSTM input failed") name = dummy_name() init_net = core.Net("init-net") pred_mh = ModelHelper() hidden_t_all, hidden_t_last, _, _, params = rnn_cell.LSTM( pred_mh, input_blob, sequence_lens, [initial_h, initial_c], input_size, hidden_size, name, forward_only=True, return_params=True ) # input and recurrence biases are squashed together in onnx but not in caffe2 Bi = name + "_bias_i2h" Br = name + "_bias_gates" init_net.Slice(B, Bi, starts=[0*hidden_size], ends=[4*hidden_size]) init_net.Slice(B, Br, starts=[4*hidden_size], ends=[8*hidden_size]) # caffe2 has a different order from onnx. We need to rearrange # i o f c -> i f o c reforms = ((W, params['input'] ['weights'], [(0, input_size)]), (R, params['recurrent']['weights'], [(0, hidden_size)]), (Bi, params['input'] ['biases'], []), (Br, params['recurrent']['biases'], [])) for name_from, name_to, extra_dims in reforms: xi, xo, xf, xc = [name_from + suffix for suffix in ("_i", "_o", "_f", "_c")] for i, x in enumerate([xi, xo, xf, xc]): dim0 = i * hidden_size, (i+1) * hidden_size starts, ends = zip(dim0, *extra_dims) init_net.Slice(name_from, x, starts=starts, ends=ends) init_net.Concat([xi, xf, xo, xc], [name_to, dummy_name()], axis=0) pred_mh.net = pred_mh.net.Clone( "dummy-clone-net", blob_remap={ hidden_t_all: n.outputs[0], hidden_t_last: n.outputs[1] } ) return Caffe2Ops(list(pred_mh.Proto().op), list(init_net.Proto().op), list(pred_mh.Proto().external_input))
def test_rnn(self): from caffe2.python import rnn_cell T = 5 model = model_helper.ModelHelper() seq_lengths, labels = \ model.net.AddExternalInputs( 'seq_lengths', 'labels', ) init_blobs = [] for i in range(2): hidden_init, cell_init = model.net.AddExternalInputs( "hidden_init_{}".format(i), "cell_init_{}".format(i) ) init_blobs.extend([hidden_init, cell_init]) model.param_init_net.ConstantFill([], ["input"], shape=[T, 4, 10]) output, last_hidden, _, last_state = rnn_cell.LSTM( model=model, input_blob="input", seq_lengths=seq_lengths, initial_states=init_blobs, dim_in=10, dim_out=[10, 10], scope="lstm1", forward_only=False, drop_states=True, return_last_layer_only=True, ) softmax, loss = model.net.SoftmaxWithLoss( [model.Flatten(output), "labels"], ['softmax', 'loss'], ) model.AddGradientOperators([loss]) blobs_before = count_blobs(model.net.Proto()) optim_proto = memonger.share_grad_blobs( model.net, ["loss"], set(viewvalues(model.param_to_grad)), "", share_activations=True, dont_share_blobs=set(), ) blobs_after = count_blobs(optim_proto) self.assertLess(blobs_after, blobs_before) # Run once to see all blobs are set up correctly for init_blob in init_blobs: workspace.FeedBlob(init_blob, np.zeros( [1, 4, 10], dtype=np.float32 )) workspace.FeedBlob("seq_lengths", np.array([T] * 4, dtype=np.int32)) workspace.FeedBlob("labels", np.random.rand(T).astype(np.int32)) workspace.RunNetOnce(model.param_init_net) workspace.RunNetOnce(model.net)
def init_lstm_model(self, T, num_layers, forward_only, use_loss=True): workspace.FeedBlob("seq_lengths", np.array([T] * self.batch_size, dtype=np.int32)) workspace.FeedBlob( "target", np.random.rand(T, self.batch_size, self.hidden_dim).astype(np.float32)) workspace.FeedBlob( "hidden_init", np.zeros([1, self.batch_size, self.hidden_dim], dtype=np.float32)) workspace.FeedBlob( "cell_init", np.zeros([1, self.batch_size, self.hidden_dim], dtype=np.float32)) model = model_helper.ModelHelper(name="lstm") model.net.AddExternalInputs(["input"]) init_blobs = [] for i in range(num_layers): hidden_init, cell_init = model.net.AddExternalInputs( "hidden_init_{}".format(i), "cell_init_{}".format(i)) init_blobs.extend([hidden_init, cell_init]) output, last_hidden, _, last_state = rnn_cell.LSTM( model=model, input_blob="input", seq_lengths="seq_lengths", initial_states=init_blobs, dim_in=self.input_dim, dim_out=[self.hidden_dim] * num_layers, scope="", drop_states=True, forward_only=forward_only, return_last_layer_only=True, ) if use_loss: loss = model.AveragedLoss( model.SquaredL2Distance([output, "target"], "dist"), "loss") # Add gradient ops if not forward_only: model.AddGradientOperators([loss]) # init for init_blob in init_blobs: workspace.FeedBlob( init_blob, np.zeros([1, self.batch_size, self.hidden_dim], dtype=np.float32)) return model, output
def rnn_unidirectional_encoder(model, embedded_inputs, input_lengths, initial_hidden_state, initial_cell_state, embedding_size, encoder_num_units, use_attention): """ Unidirectional (forward pass) LSTM encoder.""" outputs, final_hidden_state, _, final_cell_state = rnn_cell.LSTM( model=model, input_blob=embedded_inputs, seq_lengths=input_lengths, initial_states=(initial_hidden_state, initial_cell_state), dim_in=embedding_size, dim_out=encoder_num_units, scope='encoder', outputs_with_grads=([0] if use_attention else [1, 3]), ) return outputs, final_hidden_state, final_cell_state
def test_lstm_params(self): model = ModelHelper(name="lstm_params_test") with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU, 0)): output, _, _, _ = rnn_cell.LSTM( model=model, input_blob="input", seq_lengths="seqlengths", initial_states=None, dim_in=20, dim_out=40, scope="test", drop_states=True, return_last_layer_only=True, ) for param in model.GetParams(): self.assertNotEqual(model.get_param_info(param), None)
def test_multi_lstm( self, input_length, dim_in, max_num_units, num_layers, batch_size, ): model = ModelHelper(name='external') ( input_sequence, seq_lengths, ) = model.net.AddExternalInputs( 'input_sequence', 'seq_lengths', ) dim_out = [ np.random.randint(1, max_num_units + 1) for _ in range(num_layers) ] h_all, h_last, c_all, c_last = rnn_cell.LSTM( model=model, input_blob=input_sequence, seq_lengths=seq_lengths, initial_states=None, dim_in=dim_in, dim_out=dim_out, scope='test', outputs_with_grads=(0,), return_params=False, memory_optimization=False, forget_bias=0.0, forward_only=False, return_last_layer_only=True, ) workspace.RunNetOnce(model.param_init_net) seq_lengths_val = np.random.randint( 1, input_length + 1, size=(batch_size), ).astype(np.int32) input_sequence_val = np.random.randn( input_length, batch_size, dim_in, ).astype(np.float32) workspace.FeedBlob(seq_lengths, seq_lengths_val) workspace.FeedBlob(input_sequence, input_sequence_val) hidden_input_list = [] cell_input_list = [] i2h_w_list = [] i2h_b_list = [] gates_w_list = [] gates_b_list = [] for i in range(num_layers): hidden_input_list.append( workspace.FetchBlob('test/initial_hidden_state_{}'.format(i)), ) cell_input_list.append( workspace.FetchBlob('test/initial_cell_state_{}'.format(i)), ) i2h_w_list.append( workspace.FetchBlob('test/layer_{}/i2h_w'.format(i)), ) i2h_b_list.append( workspace.FetchBlob('test/layer_{}/i2h_b'.format(i)), ) gates_w_list.append( workspace.FetchBlob('test/layer_{}/gates_t_w'.format(i)), ) gates_b_list.append( workspace.FetchBlob('test/layer_{}/gates_t_b'.format(i)), ) workspace.RunNetOnce(model.net) h_all_calc = workspace.FetchBlob(h_all) h_last_calc = workspace.FetchBlob(h_last) c_all_calc = workspace.FetchBlob(c_all) c_last_calc = workspace.FetchBlob(c_last) h_all_ref, h_last_ref, c_all_ref, c_last_ref = multi_lstm_reference( input_sequence_val, hidden_input_list, cell_input_list, i2h_w_list, i2h_b_list, gates_w_list, gates_b_list, seq_lengths_val, forget_bias=0.0, ) h_all_delta = np.abs(h_all_ref - h_all_calc).sum() h_last_delta = np.abs(h_last_ref - h_last_calc).sum() c_all_delta = np.abs(c_all_ref - c_all_calc).sum() c_last_delta = np.abs(c_last_ref - c_last_calc).sum() self.assertAlmostEqual(h_all_delta, 0.0, places=5) self.assertAlmostEqual(h_last_delta, 0.0, places=5) self.assertAlmostEqual(c_all_delta, 0.0, places=5) self.assertAlmostEqual(c_last_delta, 0.0, places=5) input_values = { 'input_sequence': input_sequence_val, 'seq_lengths': seq_lengths_val, } for param in model.GetParams(): value = workspace.FetchBlob(param) input_values[str(param)] = value output_sum = model.net.SumElements( [h_all], 'output_sum', average=True, ) fake_loss = model.net.Tanh( output_sum, ) for param in model.GetParams(): gradient_checker.NetGradientChecker.Check( model.net, outputs_with_grad=[fake_loss], input_values=input_values, input_to_check=str(param), print_net=False, step_size=0.0001, threshold=0.05, )
def testEqualToCudnn(self): with core.DeviceScope(core.DeviceOption(workspace.GpuDeviceType)): T = 8 batch_size = 4 input_dim = 8 hidden_dim = 31 workspace.FeedBlob("seq_lengths", np.array([T] * batch_size, dtype=np.int32)) workspace.FeedBlob( "target", np.zeros([T, batch_size, hidden_dim], dtype=np.float32)) workspace.FeedBlob( "hidden_init", np.zeros([1, batch_size, hidden_dim], dtype=np.float32)) workspace.FeedBlob( "cell_init", np.zeros([1, batch_size, hidden_dim], dtype=np.float32)) own_model = model_helper.ModelHelper(name="own_lstm") input_shape = [T, batch_size, input_dim] cudnn_model = model_helper.ModelHelper(name="cudnn_lstm") input_blob = cudnn_model.param_init_net.UniformFill( [], "input", shape=input_shape) workspace.FeedBlob( "CUDNN/hidden_init_cudnn", np.zeros([1, batch_size, hidden_dim], dtype=np.float32)) workspace.FeedBlob( "CUDNN/cell_init_cudnn", np.zeros([1, batch_size, hidden_dim], dtype=np.float32)) cudnn_output, cudnn_last_hidden, cudnn_last_state, param_extract = rnn_cell.cudnn_LSTM( model=cudnn_model, input_blob=input_blob, initial_states=("hidden_init_cudnn", "cell_init_cudnn"), dim_in=input_dim, dim_out=hidden_dim, scope="CUDNN", return_params=True, ) cudnn_loss = cudnn_model.AveragedLoss( cudnn_model.SquaredL2Distance([cudnn_output, "target"], "CUDNN/dist"), "CUDNN/loss") own_output, own_last_hidden, _, own_last_state, own_params = rnn_cell.LSTM( model=own_model, input_blob=input_blob, seq_lengths="seq_lengths", initial_states=("hidden_init", "cell_init"), dim_in=input_dim, dim_out=hidden_dim, scope="OWN", return_params=True, ) own_loss = own_model.AveragedLoss( own_model.SquaredL2Distance([own_output, "target"], "OWN/dist"), "OWN/loss") # Add gradients cudnn_model.AddGradientOperators([cudnn_loss]) own_model.AddGradientOperators([own_loss]) # Add parameter updates LR = cudnn_model.param_init_net.ConstantFill([], shape=[1], value=0.01) ONE = cudnn_model.param_init_net.ConstantFill([], shape=[1], value=1.0) for param in cudnn_model.GetParams(): cudnn_model.WeightedSum( [param, ONE, cudnn_model.param_to_grad[param], LR], param) for param in own_model.GetParams(): own_model.WeightedSum( [param, ONE, own_model.param_to_grad[param], LR], param) # Copy states over own_model.net.Copy(own_last_hidden, "hidden_init") own_model.net.Copy(own_last_state, "cell_init") cudnn_model.net.Copy(cudnn_last_hidden, "CUDNN/hidden_init_cudnn") cudnn_model.net.Copy(cudnn_last_state, "CUDNN/cell_init_cudnn") workspace.RunNetOnce(cudnn_model.param_init_net) workspace.CreateNet(cudnn_model.net) ## ## CUDNN LSTM MODEL EXECUTION ## # Get initial values from CuDNN LSTM so we can feed them # to our own. (param_extract_net, param_extract_mapping) = param_extract workspace.RunNetOnce(param_extract_net) cudnn_lstm_params = { input_type: {k: workspace.FetchBlob(v[0]) for k, v in viewitems(pars)} for input_type, pars in viewitems(param_extract_mapping) } # Run the model 3 times, so that some parameter updates are done workspace.RunNet(cudnn_model.net.Proto().name, 3) ## ## OWN LSTM MODEL EXECUTION ## # Map the cuDNN parameters to our own workspace.RunNetOnce(own_model.param_init_net) rnn_cell.InitFromLSTMParams(own_params, cudnn_lstm_params) # Run the model 3 times, so that some parameter updates are done workspace.CreateNet(own_model.net) workspace.RunNet(own_model.net.Proto().name, 3) ## ## COMPARE RESULTS ## # Then compare that final results after 3 runs are equal own_output_data = workspace.FetchBlob(own_output) own_last_hidden = workspace.FetchBlob(own_last_hidden) own_loss = workspace.FetchBlob(own_loss) cudnn_output_data = workspace.FetchBlob(cudnn_output) cudnn_last_hidden = workspace.FetchBlob(cudnn_last_hidden) cudnn_loss = workspace.FetchBlob(cudnn_loss) self.assertTrue(np.allclose(own_output_data, cudnn_output_data)) self.assertTrue(np.allclose(own_last_hidden, cudnn_last_hidden)) self.assertTrue(np.allclose(own_loss, cudnn_loss))
def make_cell(*args, **kwargs): return rnn_cell.LSTM(*args, **kwargs)
def make_lstm(direction_offset): name = dummy_name() # input and recurrence biases are squashed together in # onnx but not in caffe2 bias_offset = 8 * direction_offset * hidden_size Bi = init_net.Slice(B, name + "_bias_i2h", starts=[bias_offset + 0 * hidden_size], ends =[bias_offset + 4 * hidden_size]) Br = init_net.Slice(B, name + "_bias_gates", starts=[bias_offset + 4 * hidden_size], ends =[bias_offset + 8 * hidden_size]) weight_offset = 4 * direction_offset * hidden_size W_ = init_net.Slice(W, name + '/i2h_w_pre', starts=[weight_offset + 0 * hidden_size, 0], ends =[weight_offset + 4 * hidden_size,-1]) R_ = init_net.Slice(R, name + '/gates_t_w_pre', starts=[weight_offset + 0 * hidden_size, 0], ends =[weight_offset + 4 * hidden_size,-1]) # caffe2 has a different order from onnx. We need to rearrange # i o f c -> i f o c reforms = ((W_, 'i2h_w', [(0, -1)]), (R_, 'gates_t_w', [(0, -1)]), (Bi, 'i2h_b' , []), (Br, 'gates_t_b', [])) for name_from, name_to, extra_dims in reforms: xi, xo, xf, xc = [name_from + suffix for suffix in ("_i", "_o", "_f", "_c")] for i, x in enumerate([xi, xo, xf, xc]): dim0 = i * hidden_size, (i+1) * hidden_size starts, ends = zip(dim0, *extra_dims) init_net.Slice(name_from, x, starts=starts, ends=ends) init_net.Concat([xi, xf, xo, xc], ['%s/%s' % (name, name_to), dummy_name()], axis=0) initial_h_sliced = name + '/initial_h' init_net.Slice(initial_h, initial_h_sliced, starts=[direction_offset + 0, 0, 0], ends =[direction_offset + 1,-1,-1]) initial_c_sliced = name + '/initial_c' init_net.Slice(initial_c, initial_c_sliced, starts=[direction_offset + 0, 0, 0], ends =[direction_offset + 1,-1,-1]) if direction_offset == 1: input = pred_mh.net.ReversePackedSegs( [input_blob, sequence_lens], name + "/input-reversed") else: input = input_blob hidden_t_all, hidden_t_last, _, _, params = rnn_cell.LSTM( pred_mh, input, sequence_lens, [initial_h_sliced, initial_c_sliced], input_size, hidden_size, name, drop_states=True, forward_only=True, return_params=True ) if direction_offset == 1: hidden_t_all = pred_mh.net.ReversePackedSegs( [hidden_t_all, sequence_lens], name + "/output-reversed") return hidden_t_all, hidden_t_last
def rnn_bidirectional_encoder( model, embedded_inputs, input_lengths, initial_hidden_state, initial_cell_state, embedding_size, encoder_num_units, use_attention, scope=None, ): """ Bidirectional (forward pass and backward pass) LSTM encoder.""" # Forward pass ( outputs_fw, final_hidden_state_fw, _, final_cell_state_fw, ) = rnn_cell.LSTM( model=model, input_blob=embedded_inputs, seq_lengths=input_lengths, initial_states=(initial_hidden_state, initial_cell_state), dim_in=embedding_size, dim_out=encoder_num_units, scope=(scope + '/' if scope else '') + 'forward_encoder', outputs_with_grads=([0] if use_attention else [1, 3]), ) # Backward pass reversed_embedded_inputs = model.net.ReversePackedSegs( [embedded_inputs, input_lengths], ['reversed_embedded_inputs'], ) ( outputs_bw, final_hidden_state_bw, _, final_cell_state_bw, ) = rnn_cell.LSTM( model=model, input_blob=reversed_embedded_inputs, seq_lengths=input_lengths, initial_states=(initial_hidden_state, initial_cell_state), dim_in=embedding_size, dim_out=encoder_num_units, scope=(scope + '/' if scope else '') + 'backward_encoder', outputs_with_grads=([0] if use_attention else [1, 3]), ) outputs_bw = model.net.ReversePackedSegs( [outputs_bw, input_lengths], ['outputs_bw'], ) # Concatenate forward and backward results outputs, _ = model.net.Concat( [outputs_fw, outputs_bw], ['outputs', 'outputs_dim'], axis=2, ) final_hidden_state, _ = model.net.Concat( [final_hidden_state_fw, final_hidden_state_bw], ['final_hidden_state', 'final_hidden_state_dim'], axis=2, ) final_cell_state, _ = model.net.Concat( [final_cell_state_fw, final_cell_state_bw], ['final_cell_state', 'final_cell_state_dim'], axis=2, ) return outputs, final_hidden_state, final_cell_state
def test_observer_rnn_executor(self, num_layers, forward_only): ''' Test that the RNN executor produces same results as the non-executor (i.e running step nets as sequence of simple nets). ''' Tseq = [2, 3, 4] batch_size = 10 input_dim = 3 hidden_dim = 3 run_cnt = [0] * len(Tseq) avg_time = [0] * len(Tseq) for j in range(len(Tseq)): T = Tseq[j] ws.ResetWorkspace() ws.FeedBlob("seq_lengths", np.array([T] * batch_size, dtype=np.int32)) ws.FeedBlob( "target", np.random.rand(T, batch_size, hidden_dim).astype(np.float32)) ws.FeedBlob( "hidden_init", np.zeros([1, batch_size, hidden_dim], dtype=np.float32)) ws.FeedBlob( "cell_init", np.zeros([1, batch_size, hidden_dim], dtype=np.float32)) model = model_helper.ModelHelper(name="lstm") model.net.AddExternalInputs(["input"]) init_blobs = [] for i in range(num_layers): hidden_init, cell_init = model.net.AddExternalInputs( "hidden_init_{}".format(i), "cell_init_{}".format(i)) init_blobs.extend([hidden_init, cell_init]) output, last_hidden, _, last_state = rnn_cell.LSTM( model=model, input_blob="input", seq_lengths="seq_lengths", initial_states=init_blobs, dim_in=input_dim, dim_out=[hidden_dim] * num_layers, drop_states=True, forward_only=forward_only, return_last_layer_only=True, ) loss = model.AveragedLoss( model.SquaredL2Distance([output, "target"], "dist"), "loss") # Add gradient ops if not forward_only: model.AddGradientOperators([loss]) # init for init_blob in init_blobs: ws.FeedBlob( init_blob, np.zeros([1, batch_size, hidden_dim], dtype=np.float32)) ws.RunNetOnce(model.param_init_net) # Run with executor self.enable_rnn_executor(model.net, 1, forward_only) np.random.seed(10022015) input_shape = [T, batch_size, input_dim] ws.FeedBlob("input", np.random.rand(*input_shape).astype(np.float32)) ws.FeedBlob( "target", np.random.rand(T, batch_size, hidden_dim).astype(np.float32)) ws.CreateNet(model.net, overwrite=True) time_ob = model.net.AddObserver("TimeObserver") run_cnt_ob = model.net.AddObserver("RunCountObserver") ws.RunNet(model.net) avg_time[j] = time_ob.average_time() run_cnt[j] = int(''.join(x for x in run_cnt_ob.debug_info() if x.isdigit())) model.net.RemoveObserver(time_ob) model.net.RemoveObserver(run_cnt_ob) print(avg_time) print(run_cnt) self.assertTrue(run_cnt[1] > run_cnt[0] and run_cnt[2] > run_cnt[1]) self.assertEqual(run_cnt[1] - run_cnt[0], run_cnt[2] - run_cnt[1])
def test_lstm_equal_simplenet(self, num_layers, T, forward_only, gc, dc): ''' Test that the RNN executor produces same results as the non-executor (i.e running step nets as sequence of simple nets). ''' self.Tseq = [T, T // 2, T // 2 + T // 4, T, T // 2 + 1] workspace.ResetWorkspace() with core.DeviceScope(gc): print("Run with device: {}, forward only: {}".format( gc, forward_only)) workspace.FeedBlob("seq_lengths", np.array([T] * self.batch_size, dtype=np.int32)) workspace.FeedBlob( "target", np.random.rand(T, self.batch_size, self.hidden_dim).astype(np.float32)) workspace.FeedBlob( "hidden_init", np.zeros([1, self.batch_size, self.hidden_dim], dtype=np.float32)) workspace.FeedBlob( "cell_init", np.zeros([1, self.batch_size, self.hidden_dim], dtype=np.float32)) model = model_helper.ModelHelper(name="lstm") model.net.AddExternalInputs(["input"]) init_blobs = [] for i in range(num_layers): hidden_init, cell_init = model.net.AddExternalInputs( "hidden_init_{}".format(i), "cell_init_{}".format(i)) init_blobs.extend([hidden_init, cell_init]) output, last_hidden, _, last_state = rnn_cell.LSTM( model=model, input_blob="input", seq_lengths="seq_lengths", initial_states=init_blobs, dim_in=self.input_dim, dim_out=[self.hidden_dim] * num_layers, scope="", drop_states=True, forward_only=forward_only, return_last_layer_only=True, ) loss = model.AveragedLoss( model.SquaredL2Distance([output, "target"], "dist"), "loss") # Add gradient ops if not forward_only: model.AddGradientOperators([loss]) # init for init_blob in init_blobs: workspace.FeedBlob( init_blob, np.zeros([1, self.batch_size, self.hidden_dim], dtype=np.float32)) self._compare(model, forward_only)
def test_lstm_extract_predictor_net(self): model = ModelHelper(name="lstm_extract_test") with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU, 0)): output, _, _, _ = rnn_cell.LSTM( model=model, input_blob="input", seq_lengths="seqlengths", initial_states=("hidden_init", "cell_init"), dim_in=20, dim_out=40, scope="test", drop_states=True, return_last_layer_only=True, ) # Run param init net to get the shapes for all inputs shapes = {} workspace.RunNetOnce(model.param_init_net) for b in workspace.Blobs(): shapes[b] = workspace.FetchBlob(b).shape # But export in CPU (predict_net, export_blobs) = ExtractPredictorNet( net_proto=model.net.Proto(), input_blobs=["input"], output_blobs=[output], device=core.DeviceOption(caffe2_pb2.CPU, 1), ) # Create the net and run once to see it is valid # Populate external inputs with correctly shaped random input # and also ensure that the export_blobs was constructed correctly. workspace.ResetWorkspace() shapes['input'] = [10, 4, 20] shapes['cell_init'] = [1, 4, 40] shapes['hidden_init'] = [1, 4, 40] print(predict_net.Proto().external_input) self.assertTrue('seqlengths' in predict_net.Proto().external_input) for einp in predict_net.Proto().external_input: if einp == 'seqlengths': workspace.FeedBlob( "seqlengths", np.array([10] * 4, dtype=np.int32) ) else: workspace.FeedBlob( einp, np.zeros(shapes[einp]).astype(np.float32), ) if einp != 'input': self.assertTrue(einp in export_blobs) print(str(predict_net.Proto())) self.assertTrue(workspace.CreateNet(predict_net.Proto())) self.assertTrue(workspace.RunNet(predict_net.Proto().name)) # Validate device options set correctly for the RNNs import google.protobuf.text_format as protobuftx for op in predict_net.Proto().op: if op.type == 'RecurrentNetwork': for arg in op.arg: if arg.name == "step_net": step_proto = caffe2_pb2.NetDef() protobuftx.Merge(arg.s.decode("ascii"), step_proto) for step_op in step_proto.op: self.assertEqual(0, step_op.device_option.device_type) self.assertEqual(1, step_op.device_option.cuda_gpu_id) elif arg.name == 'backward_step_net': self.assertEqual(b"", arg.s)
def model_build_fun(self, model, forward_only=False, loss_scale=None): encoder_inputs = model.net.AddExternalInput( workspace.GetNameScope() + 'encoder_inputs', ) encoder_lengths = model.net.AddExternalInput( workspace.GetNameScope() + 'encoder_lengths', ) decoder_inputs = model.net.AddExternalInput( workspace.GetNameScope() + 'decoder_inputs', ) decoder_lengths = model.net.AddExternalInput( workspace.GetNameScope() + 'decoder_lengths', ) targets = model.net.AddExternalInput( workspace.GetNameScope() + 'targets', ) target_weights = model.net.AddExternalInput( workspace.GetNameScope() + 'target_weights', ) attention_type = self.model_params['attention'] assert attention_type in ['none', 'regular'] ( encoder_outputs, weighted_encoder_outputs, final_encoder_hidden_state, final_encoder_cell_state, encoder_output_dim, ) = seq2seq_util.build_embedding_encoder( model=model, encoder_params=self.encoder_params, inputs=encoder_inputs, input_lengths=encoder_lengths, vocab_size=self.source_vocab_size, embeddings=self.encoder_embeddings, embedding_size=self.model_params['encoder_embedding_size'], use_attention=(attention_type != 'none'), num_gpus=self.num_gpus, ) assert len(self.model_params['decoder_layer_configs']) == 1 decoder_num_units = ( self.model_params['decoder_layer_configs'][0]['num_units']) initial_states = seq2seq_util.build_initial_rnn_decoder_states( model=model, encoder_num_units=encoder_output_dim, decoder_num_units=decoder_num_units, final_encoder_hidden_state=final_encoder_hidden_state, final_encoder_cell_state=final_encoder_cell_state, use_attention=(attention_type != 'none'), ) if self.num_gpus == 0: embedded_decoder_inputs = model.net.Gather( [self.decoder_embeddings, decoder_inputs], ['embedded_decoder_inputs'], ) else: with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU)): embedded_decoder_inputs_cpu = model.net.Gather( [self.decoder_embeddings, decoder_inputs], ['embedded_decoder_inputs_cpu'], ) embedded_decoder_inputs = model.CopyCPUToGPU( embedded_decoder_inputs_cpu, 'embedded_decoder_inputs', ) # seq_len x batch_size x decoder_embedding_size if attention_type == 'none': decoder_outputs, _, _, _ = rnn_cell.LSTM( model=model, input_blob=embedded_decoder_inputs, seq_lengths=decoder_lengths, initial_states=initial_states, dim_in=self.model_params['decoder_embedding_size'], dim_out=decoder_num_units, scope='decoder', outputs_with_grads=[0], ) decoder_output_size = decoder_num_units else: (decoder_outputs, _, _, _, attention_weighted_encoder_contexts, _) = rnn_cell.LSTMWithAttention( model=model, decoder_inputs=embedded_decoder_inputs, decoder_input_lengths=decoder_lengths, initial_decoder_hidden_state=initial_states[0], initial_decoder_cell_state=initial_states[1], initial_attention_weighted_encoder_context=initial_states[2], encoder_output_dim=encoder_output_dim, encoder_outputs=encoder_outputs, decoder_input_dim=self.model_params['decoder_embedding_size'], decoder_state_dim=decoder_num_units, scope='decoder', outputs_with_grads=[0, 4], ) decoder_outputs, _ = model.net.Concat( [decoder_outputs, attention_weighted_encoder_contexts], [ 'states_and_context_combination', '_states_and_context_combination_concat_dims', ], axis=2, ) decoder_output_size = decoder_num_units + encoder_output_dim # we do softmax over the whole sequence # (max_length in the batch * batch_size) x decoder embedding size # -1 because we don't know max_length yet decoder_outputs_flattened, _ = model.net.Reshape( [decoder_outputs], [ 'decoder_outputs_flattened', 'decoder_outputs_and_contexts_combination_old_shape', ], shape=[-1, decoder_output_size], ) output_logits = seq2seq_util.output_projection( model=model, decoder_outputs=decoder_outputs_flattened, decoder_output_size=decoder_output_size, target_vocab_size=self.target_vocab_size, decoder_softmax_size=self.model_params['decoder_softmax_size'], ) targets, _ = model.net.Reshape( [targets], ['targets', 'targets_old_shape'], shape=[-1], ) target_weights, _ = model.net.Reshape( [target_weights], ['target_weights', 'target_weights_old_shape'], shape=[-1], ) output_probs = model.net.Softmax( [output_logits], ['output_probs'], engine=('CUDNN' if self.num_gpus > 0 else None), ) label_cross_entropy = model.net.LabelCrossEntropy( [output_probs, targets], ['label_cross_entropy'], ) weighted_label_cross_entropy = model.net.Mul( [label_cross_entropy, target_weights], 'weighted_label_cross_entropy', ) total_loss_scalar = model.net.SumElements( [weighted_label_cross_entropy], 'total_loss_scalar', ) total_loss_scalar_weighted = model.net.Scale( [total_loss_scalar], 'total_loss_scalar_weighted', scale=1.0 / self.batch_size, ) return [total_loss_scalar_weighted]
def create_model(args, queue, label_queue, input_shape): model = model_helper.ModelHelper(name="LSTM_bench") seq_lengths, target = \ model.net.AddExternalInputs( 'seq_lengths', 'target', ) input_blob = model.net.DequeueBlobs(queue, "input_data") labels = model.net.DequeueBlobs(label_queue, "label") init_blobs = [] if args.implementation in ["own", "static", "static_dag"]: T = None if "static" in args.implementation: assert args.fixed_shape, \ "Random input length is not static RNN compatible" T = args.seq_length print("Using static RNN of size {}".format(T)) for i in range(args.num_layers): hidden_init, cell_init = model.net.AddExternalInputs( "hidden_init_{}".format(i), "cell_init_{}".format(i)) init_blobs.extend([hidden_init, cell_init]) output, last_hidden, _, last_state = rnn_cell.LSTM( model=model, input_blob=input_blob, seq_lengths=seq_lengths, initial_states=init_blobs, dim_in=args.input_dim, dim_out=[args.hidden_dim] * args.num_layers, scope="lstm1", memory_optimization=args.memory_optimization, forward_only=args.forward_only, drop_states=True, return_last_layer_only=True, static_rnn_unroll_size=T, ) if "dag" in args.implementation: print("Using DAG net type") model.net.Proto().type = 'dag' model.net.Proto().num_workers = 4 elif args.implementation == "cudnn": # We need to feed a placeholder input so that RecurrentInitOp # can infer the dimensions. init_blobs = model.net.AddExternalInputs("hidden_init", "cell_init") model.param_init_net.ConstantFill([], input_blob, shape=input_shape) output, last_hidden, _ = rnn_cell.cudnn_LSTM( model=model, input_blob=input_blob, initial_states=init_blobs, dim_in=args.input_dim, dim_out=args.hidden_dim, scope="cudnnlstm", num_layers=args.num_layers, ) else: assert False, "Unknown implementation" weights = model.net.UniformFill(labels, "weights") softmax, loss = model.net.SoftmaxWithLoss( [model.Flatten(output), labels, weights], ['softmax', 'loss'], ) if not args.forward_only: model.AddGradientOperators([loss]) # carry states over for init_blob in init_blobs: model.net.Copy(last_hidden, init_blob) sz = args.hidden_dim if args.implementation == "cudnn": sz *= args.num_layers workspace.FeedBlob( init_blob, np.zeros([1, args.batch_size, sz], dtype=np.float32)) if args.rnn_executor: for op in model.net.Proto().op: if op.type.startswith('RecurrentNetwork'): recurrent.set_rnn_executor_config( op, num_threads=args.rnn_executor_num_threads, max_cuda_streams=args.rnn_executor_max_cuda_streams, ) return model, output
def create_model(args, queue, label_queue, input_shape): model = cnn.CNNModelHelper(name="LSTM_bench") seq_lengths, target = \ model.net.AddExternalInputs( 'seq_lengths', 'target', ) input_blob = model.DequeueBlobs(queue, "input_data") labels = model.DequeueBlobs(label_queue, "label") init_blobs = [] if args.implementation == "own": for i in range(args.num_layers): init_blobs.append("hidden_init_{}".format(i)) init_blobs.append("cell_init_{}".format(i)) model.net.AddExternalInputs(init_blobs) output, last_hidden, _, last_state = rnn_cell.LSTM( model=model, input_blob=input_blob, seq_lengths=seq_lengths, initial_states=init_blobs, dim_in=args.input_dim, dim_out=[args.hidden_dim] * args.num_layers, scope="lstm1", memory_optimization=args.memory_optimization, forward_only=args.forward_only, drop_states=True, return_last_layer_only=True, ) elif args.implementation == "cudnn": # We need to feed a placeholder input so that RecurrentInitOp # can infer the dimensions. init_blobs = model.net.AddExternalInputs("hidden_init", "cell_init") model.param_init_net.ConstantFill([], input_blob, shape=input_shape) output, last_hidden, _ = rnn_cell.cudnn_LSTM( model=model, input_blob=input_blob, initial_states=init_blobs, dim_in=args.input_dim, dim_out=args.hidden_dim, scope="cudnnlstm", num_layers=args.num_layers, ) else: assert False, "Unknown implementation" weights = model.UniformFill(labels, "weights") softmax, loss = model.SoftmaxWithLoss( [model.Flatten(output), labels, weights], ['softmax', 'loss'], ) if not args.forward_only: model.AddGradientOperators([loss]) # carry states over for init_blob in init_blobs: model.net.Copy(last_hidden, init_blob) sz = args.hidden_dim if args.implementation == "cudnn": sz *= args.num_layers workspace.FeedBlob(init_blob, np.zeros( [1, args.batch_size, sz], dtype=np.float32 )) return model, output