tgt_embed_layer.initialize(1) second_input = tgt_embed_layer.belongs_to(decoder).compute( feedback_var, mask=tgt_mask_var) second_input = DimShuffle(1, 0, 2).compute(second_input) recurrent_unit = GRU(args.hidden_size, input_type="sequence", output_type="sequence", additional_input_dims=[args.word_embed]) expander_chain = Chain(Dense(600), Dense(args.tgt_vocab_size)) expander_chain.initialize(args.hidden_size) attention_layer = SoftAttentionalLayer(recurrent_unit) attention_var = attention_layer.belongs_to(decoder).compute( hidden_layer, mask=src_mask_var, feedback=second_input, steps=tgt_var.shape(1)) # expander output_var = expander_chain.belongs_to(expander).compute(attention_var) cost = TMCostLayer(tgt_var, tgt_mask_var, args.tgt_vocab_size).compute(output_var) model = ComputationalGraph(input_dim=[src_var, src_mask_var]) model.training_callbacks.append(training_monitor)
# decoder # the first token is <s>=1 feedback_var = tgt_var.apply(lambda t: T.concatenate([T.ones((t.shape[0], 1), dtype="int32"), t[:, :-1]], axis=1)) tgt_embed_layer = WordEmbedding(args.word_embed, args.tgt_vocab_size) tgt_embed_layer.initialize(1) second_input = tgt_embed_layer.belongs_to(decoder).compute(feedback_var, mask=tgt_mask_var) second_input = DimShuffle(1, 0, 2).compute(second_input) recurrent_unit = LSTM(args.hidden_size, input_type="sequence", output_type="sequence", additional_input_dims=[args.word_embed]) attention_layer = SoftAttentionalLayer(recurrent_unit) attention_var = attention_layer.belongs_to(decoder).compute(hidden_layer, mask=src_mask_var, feedback=second_input, steps=tgt_var.shape[1]) # expander output_var = Chain(Dense(600), Dense(args.tgt_vocab_size)).belongs_to(expander).compute(attention_var) cost = TMCostLayer(tgt_var, tgt_mask_var, args.tgt_vocab_size).compute(output_var) model = ComputationalGraph(input_vars=[src_var, src_mask_var], target_vars=[tgt_var, tgt_mask_var], blocks=[encoder, decoder, expander], cost=cost) data = OnDiskDataset("{}/aspec.enja1_train.pkl".format(WMT_ROOT), valid_path="{}/aspec.enja1_valid.pkl".format(WMT_ROOT),
backward_rnn_var = Chain(GRU(args.hidden_size, input_type="sequence", output_type="sequence", backward=True), Reverse3D()).belongs_to(encoder).compute(encoder_embed) encoder_output_var = Concatenate(axis=2).compute(forward_rnn_var, backward_rnn_var) # decoder decoder = Block() last_token_var = create_var(T.ivector("tok"), test_shape=[64], test_dtype="int32") seq_input_var = create_var(T.matrix('seq'), dim=args.hidden_size * 2, test_shape=[64, args.hidden_size * 2]) state_var = create_var(T.matrix("s"), dim=args.hidden_size, test_shape=[64, args.hidden_size]) input_embed = WordEmbedding(args.word_embed, args.tgt_vocab_size).belongs_to(decoder).compute(last_token_var) recurrent_unit = GRU(args.hidden_size, input_type="sequence", output_type="sequence", additional_input_dims=[input_embed.dim()]) attention_layer = SoftAttentionalLayer(recurrent_unit) attention_layer.belongs_to(decoder).initialize(input_dim=args.hidden_size * 2) step_parameters = attention_layer.get_step_inputs(seq_input_var, state=state_var, feedback=input_embed) new_state = attention_layer.step(step_parameters)["state"] new_state.output_dim = args.hidden_size decoder_output_var = new_state # expander expander = Block() expander_input_var = create_var(T.matrix("expander_input"), dim=args.hidden_size, test_shape=[64, args.hidden_size]) dense_var = Chain(Dense(600), Dense(args.tgt_vocab_size)).belongs_to(expander).compute(expander_input_var)
.belongs_to(encoder).compute(encoder_embed)) backward_rnn_var = Chain(GRU(args.hidden_size, input_type="sequence", output_type="sequence", backward=True), Reverse3D()).belongs_to(encoder).compute(encoder_embed) encoder_output_var = Concatenate(axis=2).compute(forward_rnn_var, backward_rnn_var) # decoder decoder = Block() last_token_var = create_var(T.ivector("tok"), test_shape=[64], test_dtype="int32") seq_input_var = create_var(T.matrix('seq'), dim=args.hidden_size * 2, test_shape=[64, args.hidden_size * 2]) state_var = create_var(T.matrix("s"), dim=args.hidden_size, test_shape=[64, args.hidden_size]) input_embed = WordEmbedding(args.word_embed, args.tgt_vocab_size).belongs_to(decoder).compute(last_token_var) recurrent_unit = GRU(args.hidden_size, input_type="sequence", output_type="sequence", additional_input_dims=[input_embed.dim()]) attention_layer = SoftAttentionalLayer(recurrent_unit, test=True) attention_layer.belongs_to(decoder).initialize(args.hidden_size * 2) new_state = attention_layer.step({ "UaH": T.dot(seq_input_var.tensor, attention_layer.Ua), "feedback": input_embed.tensor, "inputs": seq_input_var.tensor, "state": state_var.tensor })["state"] decoder_output_var = create_var(new_state, dim=args.hidden_size) # expander expander = Block() expander_input_var = create_var(T.matrix("expander_input"), dim=args.hidden_size, test_shape=[64, args.hidden_size])
seq_input_var = create_var(T.matrix('seq'), dim=args.hidden_size * 2, test_shape=[64, args.hidden_size * 2]) state_var = create_var(T.matrix("s"), dim=args.hidden_size, test_shape=[64, args.hidden_size]) input_embed = WordEmbedding( args.word_embed, args.tgt_vocab_size).belongs_to(decoder).compute(last_token_var) recurrent_unit = GRU(args.hidden_size, input_type="sequence", output_type="sequence", additional_input_dims=[input_embed.dim()]) attention_layer = SoftAttentionalLayer(recurrent_unit, test=True) attention_layer.belongs_to(decoder).initialize(args.hidden_size * 2) new_state = attention_layer.step({ "UaH": T.dot(seq_input_var.tensor, attention_layer.Ua), "feedback": input_embed.tensor, "inputs": seq_input_var.tensor, "state": state_var.tensor })["state"] decoder_output_var = create_var(new_state, dim=args.hidden_size) # expander