def build_embedding_decoder( model, decoder_layer_configs, inputs, input_lengths, encoder_lengths, encoder_outputs, weighted_encoder_outputs, final_encoder_hidden_states, final_encoder_cell_states, encoder_units_per_layer, vocab_size, embeddings, embedding_size, attention_type, forward_only, num_gpus=0, scope=None, ): with core.NameScope(scope or ''): if num_gpus == 0: embedded_decoder_inputs = model.net.Gather( [embeddings, inputs], ['embedded_decoder_inputs'], ) else: with core.DeviceScope(core.DeviceOption(caffe2_pb2.CPU)): embedded_decoder_inputs_cpu = model.net.Gather( [embeddings, inputs], ['embedded_decoder_inputs_cpu'], ) embedded_decoder_inputs = model.CopyCPUToGPU( embedded_decoder_inputs_cpu, 'embedded_decoder_inputs', ) decoder_cells = [] decoder_units_per_layer = [] for i, layer_config in enumerate(decoder_layer_configs): num_units = layer_config['num_units'] decoder_units_per_layer.append(num_units) if i == 0: input_size = embedding_size else: input_size = decoder_cells[-1].get_output_dim() cell = rnn_cell.LSTMCell( forward_only=forward_only, input_size=input_size, hidden_size=num_units, forget_bias=0.0, memory_optimization=False, ) dropout_keep_prob = layer_config.get('dropout_keep_prob', None) if dropout_keep_prob is not None: dropout_ratio = 1.0 - layer_config.dropout_keep_prob cell = rnn_cell.DropoutCell( internal_cell=cell, dropout_ratio=dropout_ratio, forward_only=forward_only, is_test=False, name=get_layer_scope(scope, 'decoder_dropout', i), ) decoder_cells.append(cell) states = build_initial_rnn_decoder_states( model=model, encoder_units_per_layer=encoder_units_per_layer, decoder_units_per_layer=decoder_units_per_layer, final_encoder_hidden_states=final_encoder_hidden_states, final_encoder_cell_states=final_encoder_cell_states, use_attention=(attention_type != 'none'), ) attention_decoder = LSTMWithAttentionDecoder( encoder_outputs=encoder_outputs, encoder_output_dim=encoder_units_per_layer[-1], encoder_lengths=encoder_lengths, vocab_size=vocab_size, attention_type=attention_type, embedding_size=embedding_size, decoder_num_units=decoder_units_per_layer[-1], decoder_cells=decoder_cells, weighted_encoder_outputs=weighted_encoder_outputs, name=scope, ) decoder_outputs, _ = attention_decoder.apply_over_sequence( model=model, inputs=embedded_decoder_inputs, seq_lengths=input_lengths, initial_states=states, ) # we do softmax over the whole sequence # (max_length in the batch * batch_size) x decoder embedding size # -1 because we don't know max_length yet decoder_outputs_flattened, _ = model.net.Reshape( [decoder_outputs], [ 'decoder_outputs_flattened', 'decoder_outputs_and_contexts_combination_old_shape', ], shape=[-1, attention_decoder.get_output_dim()], ) decoder_outputs = decoder_outputs_flattened decoder_output_dim = attention_decoder.get_output_dim() return (decoder_outputs, decoder_output_dim)
def rnn_unidirectional_layer( model, inputs, input_lengths, input_size, num_units, dropout_keep_prob, forward_only, return_sequence_output, return_final_state, scope=None, ): """ Unidirectional LSTM encoder.""" with core.NameScope(scope): initial_cell_state = model.param_init_net.ConstantFill( [], 'initial_cell_state', shape=[num_units], value=0.0, ) initial_hidden_state = model.param_init_net.ConstantFill( [], 'initial_hidden_state', shape=[num_units], value=0.0, ) cell = rnn_cell.LSTMCell( input_size=input_size, hidden_size=num_units, forget_bias=0.0, memory_optimization=False, name=(scope + '/' if scope else '') + 'lstm', forward_only=forward_only, ) dropout_ratio = (None if dropout_keep_prob is None else (1.0 - dropout_keep_prob)) if dropout_ratio is not None: cell = rnn_cell.DropoutCell( internal_cell=cell, dropout_ratio=dropout_ratio, name=(scope + '/' if scope else '') + 'dropout', forward_only=forward_only, is_test=False, ) outputs_with_grads = [] if return_sequence_output: outputs_with_grads.append(0) if return_final_state: outputs_with_grads.extend([1, 3]) outputs, (_, final_hidden_state, _, final_cell_state) = (cell.apply_over_sequence( model=model, inputs=inputs, seq_lengths=input_lengths, initial_states=(initial_hidden_state, initial_cell_state), outputs_with_grads=outputs_with_grads, )) return outputs, final_hidden_state, final_cell_state
def _prepare_attention(t, n, dim_in, encoder_dim, forward_only=False, T=None, dim_out=None, residual=False, final_dropout=False): if dim_out is None: dim_out = [dim_in] print("Dims: t={} n={} dim_in={} dim_out={}".format(t, n, dim_in, dim_out)) model = ModelHelper(name='external') def generate_input_state(shape): return np.random.random(shape).astype(np.float32) initial_states = [] for layer_id, d in enumerate(dim_out): h, c = model.net.AddExternalInputs( "hidden_init_{}".format(layer_id), "cell_init_{}".format(layer_id), ) initial_states.extend([h, c]) workspace.FeedBlob(h, generate_input_state((1, n, d))) workspace.FeedBlob(c, generate_input_state((1, n, d))) awec_init = model.net.AddExternalInputs([ 'initial_attention_weighted_encoder_context', ]) initial_states.append(awec_init) workspace.FeedBlob( awec_init, generate_input_state((1, n, encoder_dim)), ) # Due to convoluted RNN scoping logic we make sure that things # work from a namescope with scope.NameScope("test_name_scope"): ( input_blob, seq_lengths, encoder_outputs, weighted_encoder_outputs, ) = model.net.AddScopedExternalInputs( 'input_blob', 'seq_lengths', 'encoder_outputs', 'weighted_encoder_outputs', ) layer_input_dim = dim_in cells = [] for layer_id, d in enumerate(dim_out): cell = rnn_cell.MILSTMCell( name='decoder_{}'.format(layer_id), forward_only=forward_only, input_size=layer_input_dim, hidden_size=d, forget_bias=0.0, memory_optimization=False, ) cells.append(cell) layer_input_dim = d decoder_cell = rnn_cell.MultiRNNCell( cells, name='decoder', residual_output_layers=range(1, len(cells)) if residual else None, ) attention_cell = rnn_cell.AttentionCell( encoder_output_dim=encoder_dim, encoder_outputs=encoder_outputs, encoder_lengths=None, decoder_cell=decoder_cell, decoder_state_dim=dim_out[-1], name='attention_decoder', attention_type=AttentionType.Recurrent, weighted_encoder_outputs=weighted_encoder_outputs, attention_memory_optimization=True, ) if final_dropout: # dropout ratio of 0.0 used to test mechanism but not interfere # with numerical tests attention_cell = rnn_cell.DropoutCell( internal_cell=attention_cell, dropout_ratio=0.0, name='dropout', forward_only=forward_only, ) attention_cell = (attention_cell if T is None else rnn_cell.UnrolledCell(attention_cell, T)) output_indices = decoder_cell.output_indices output_indices.append(2 * len(cells)) outputs_with_grads = [2 * i for i in output_indices] final_output, state_outputs = attention_cell.apply_over_sequence( model=model, inputs=input_blob, seq_lengths=seq_lengths, initial_states=initial_states, outputs_with_grads=outputs_with_grads, ) workspace.RunNetOnce(model.param_init_net) workspace.FeedBlob( seq_lengths, np.random.randint(1, t + 1, size=(n, )).astype(np.int32)) return { 'final_output': final_output, 'net': model.net, 'initial_states': initial_states, 'input_blob': input_blob, 'encoder_outputs': encoder_outputs, 'weighted_encoder_outputs': weighted_encoder_outputs, 'outputs_with_grads': outputs_with_grads, }