Exemple #1
0
        feedback_var, mask=tgt_mask_var)

    second_input = DimShuffle(1, 0, 2).compute(second_input)

    recurrent_unit = GRU(args.hidden_size,
                         input_type="sequence",
                         output_type="sequence",
                         additional_input_dims=[args.word_embed])

    expander_chain = Chain(Dense(600), Dense(args.tgt_vocab_size))
    expander_chain.initialize(args.hidden_size)

    attention_layer = SoftAttentionalLayer(recurrent_unit)
    attention_var = attention_layer.belongs_to(decoder).compute(
        hidden_layer,
        mask=src_mask_var,
        feedback=second_input,
        steps=tgt_var.shape(1))

    # expander
    output_var = expander_chain.belongs_to(expander).compute(attention_var)

    cost = TMCostLayer(tgt_var, tgt_mask_var,
                       args.tgt_vocab_size).compute(output_var)

    model = ComputationalGraph(input_dim=[src_var, src_mask_var])
    model.training_callbacks.append(training_monitor)

    data = OnDiskDataset("/tmp/data_prefix_train.pkl",
                         valid_path="/tmp/data_prefix_valid.pkl",
                         train_size=args.train_size,
Exemple #2
0
    backward_rnn_var = Chain(GRU(args.hidden_size, input_type="sequence", output_type="sequence", backward=True),
                             Reverse3D()).belongs_to(encoder).compute(encoder_embed)
    encoder_output_var = Concatenate(axis=2).compute(forward_rnn_var, backward_rnn_var)

    # decoder
    decoder = Block()

    last_token_var = create_var(T.ivector("tok"), test_shape=[64], test_dtype="int32")
    seq_input_var = create_var(T.matrix('seq'), dim=args.hidden_size * 2, test_shape=[64, args.hidden_size * 2])
    state_var = create_var(T.matrix("s"), dim=args.hidden_size, test_shape=[64, args.hidden_size])

    input_embed = WordEmbedding(args.word_embed, args.tgt_vocab_size).belongs_to(decoder).compute(last_token_var)

    recurrent_unit = GRU(args.hidden_size, input_type="sequence", output_type="sequence", additional_input_dims=[input_embed.dim()])
    attention_layer = SoftAttentionalLayer(recurrent_unit, test=True)
    attention_layer.belongs_to(decoder).initialize(args.hidden_size * 2)

    new_state = attention_layer.step({
        "UaH": T.dot(seq_input_var.tensor, attention_layer.Ua),
        "feedback": input_embed.tensor,
        "inputs": seq_input_var.tensor,
        "state": state_var.tensor
    })["state"]
    decoder_output_var = create_var(new_state, dim=args.hidden_size)

    # expander

    expander = Block()

    expander_input_var = create_var(T.matrix("expander_input"), dim=args.hidden_size, test_shape=[64, args.hidden_size])
Exemple #3
0
    # decoder
    # the first token is <s>=1
    feedback_var = tgt_var.apply(lambda t: T.concatenate([T.ones((t.shape[0], 1), dtype="int32"), t[:, :-1]], axis=1))

    tgt_embed_layer = WordEmbedding(args.word_embed, args.tgt_vocab_size)
    tgt_embed_layer.initialize(1)

    second_input = tgt_embed_layer.belongs_to(decoder).compute(feedback_var, mask=tgt_mask_var)

    second_input = DimShuffle(1, 0, 2).compute(second_input)


    recurrent_unit = LSTM(args.hidden_size, input_type="sequence", output_type="sequence", additional_input_dims=[args.word_embed])

    attention_layer = SoftAttentionalLayer(recurrent_unit)
    attention_var = attention_layer.belongs_to(decoder).compute(hidden_layer, mask=src_mask_var, feedback=second_input, steps=tgt_var.shape[1])

    # expander
    output_var = Chain(Dense(600), Dense(args.tgt_vocab_size)).belongs_to(expander).compute(attention_var)

    cost = TMCostLayer(tgt_var, tgt_mask_var, args.tgt_vocab_size).compute(output_var)


    model = ComputationalGraph(input_vars=[src_var, src_mask_var],
                               target_vars=[tgt_var, tgt_mask_var],
                               blocks=[encoder, decoder, expander],
                               cost=cost)

    data = OnDiskDataset("{}/aspec.enja1_train.pkl".format(WMT_ROOT),
                         valid_path="{}/aspec.enja1_valid.pkl".format(WMT_ROOT),
                         cached=True, shuffle_memory=False)
Exemple #4
0
                               dim=args.hidden_size * 2,
                               test_shape=[64, args.hidden_size * 2])
    state_var = create_var(T.matrix("s"),
                           dim=args.hidden_size,
                           test_shape=[64, args.hidden_size])

    input_embed = WordEmbedding(
        args.word_embed,
        args.tgt_vocab_size).belongs_to(decoder).compute(last_token_var)

    recurrent_unit = GRU(args.hidden_size,
                         input_type="sequence",
                         output_type="sequence",
                         additional_input_dims=[input_embed.dim()])
    attention_layer = SoftAttentionalLayer(recurrent_unit, test=True)
    attention_layer.belongs_to(decoder).initialize(args.hidden_size * 2)

    new_state = attention_layer.step({
        "UaH":
        T.dot(seq_input_var.tensor, attention_layer.Ua),
        "feedback":
        input_embed.tensor,
        "inputs":
        seq_input_var.tensor,
        "state":
        state_var.tensor
    })["state"]
    decoder_output_var = create_var(new_state, dim=args.hidden_size)

    # expander