Exemple #1
0
            def decoder_body(time, old_state, output_ta_t, attention_tracker):
                if feedback:

                    def from_previous():
                        prev_1 = tf.matmul(old_state, W_out) + b_out
                        return tf.gather(embeddings, tf.argmax(prev_1, 1))

                    x_t = tf.cond(tf.greater(time, 0), from_previous,
                                  lambda: input_ta.read(0))
                else:
                    x_t = input_ta.read(time)

                # attention
                part2 = tf.matmul(old_state, W_a) + b_a
                part2 = tf.expand_dims(part2, 1)
                john = part1 + part2
                e = tf.reduce_sum(v_a * tf.tanh(john), [2])
                alpha = tf.nn.softmax(e)
                alpha = tf.to_float(mask(attention_lengths)) * alpha
                alpha = alpha / tf.reduce_sum(alpha, [1], keep_dims=True)
                attention_tracker = attention_tracker.write(time, alpha)
                c = tf.reduce_sum(
                    tf.expand_dims(alpha, 2) * tf.squeeze(hidden), [1])

                # GRU
                con = tf.concat(1, [x_t, old_state, c])
                z = tf.sigmoid(tf.matmul(con, W_z) + b_z)
                r = tf.sigmoid(tf.matmul(con, W_r) + b_r)
                con = tf.concat(1, [x_t, r * old_state, c])
                h = tf.tanh(tf.matmul(con, W_h) + b_h)
                new_state = (1 - z) * h + z * old_state

                output_ta_t = output_ta_t.write(time, new_state)

                return (time + 1, new_state, output_ta_t, attention_tracker)
Exemple #2
0
            def decoder_body(time, old_state, output_ta_t, attention_tracker):
                if feedback:
                    def from_previous():
                        prev_1 = tf.matmul(old_state, W_out) + b_out
                        return tf.gather(embeddings, tf.argmax(prev_1, 1))
                    x_t = tf.cond(tf.greater(time, 0), from_previous, lambda: input_ta.read(0))
                else:
                    x_t = input_ta.read(time)

                # attention
                part2 = tf.matmul(old_state, W_a) + b_a
                part2 = tf.expand_dims(part2, 1)
                john = part1 + part2
                e = tf.reduce_sum(v_a * tf.tanh(john), [2])
                alpha = tf.nn.softmax(e)
                alpha = tf.to_float(mask(attention_lengths)) * alpha
                alpha = alpha / tf.reduce_sum(alpha, [1], keep_dims=True)
                attention_tracker = attention_tracker.write(time, alpha)
                c = tf.reduce_sum(tf.expand_dims(alpha, 2) * tf.squeeze(hidden), [1])

                # GRU
                con = tf.concat(1, [x_t, old_state, c])
                z = tf.sigmoid(tf.matmul(con, W_z) + b_z)
                r = tf.sigmoid(tf.matmul(con, W_r) + b_r)
                con = tf.concat(1, [x_t, r*old_state, c])
                h = tf.tanh(tf.matmul(con, W_h) + b_h)
                new_state = (1-z)*h + z*old_state

                output_ta_t = output_ta_t.write(time, new_state)

                return (time + 1, new_state, output_ta_t, attention_tracker)
Exemple #3
0
            def decoder_body(time, old_state, output_ta_t, attention_tracker):
                if feedback:
                    def from_previous():
                        prev_1 = tf.matmul(old_state, W_out) + b_out
                        return tf.gather(embeddings, tf.argmax(prev_1, 1))
                    x_t = tf.cond(tf.greater(time, 0), from_previous, lambda: input_ta.read(0))
                else:
                    x_t = input_ta.read(time)

                # attention
                part1_ex = tf.expand_dims(old_state, 1)
                #part1_ex = tf.Print(part1_ex, [tf.shape(part1_ex)], summarize=1000)
                part1_tiled = tf.tile(part1_ex, tf.pack([1, attn_len, 1]))
                #part1_tiled = tf.Print(part1_tiled, [tf.shape(part1_tiled)], summarize=1000)
                dot_input = tf.concat(2, [part1_tiled, attention_input])
                #dot_input = tf.Print(dot_input, [tf.shape(dot_input)], summarize=1000)
                dot_reshape = tf.reshape(dot_input, tf.pack([-1, attn_len, 1, attention_dims*2]))
                #dot_reshape = tf.Print(dot_reshape, [tf.shape(dot_reshape)], summarize=1000)
                john = tf.nn.conv2d(dot_reshape, U_a, [1, 1, 1, 1], "SAME")
                #john = tf.Print(john, [tf.shape(john)], summarize=1000)
                john = tf.squeeze(john, [2])  # squeeze out the third dimension
                #john = tf.Print(john, [tf.shape(john)], summarize=1000)

                a = v_a * tf.tanh(john)
                #a = tf.Print(a, [tf.shape(a)], summarize=1000)
                e = tf.reduce_sum(a, [2])
                #e = tf.Print(e, [tf.shape(e)], summarize=1000)
                alpha = tf.nn.softmax(e)
                alpha = tf.to_float(mask(attention_lengths)) * alpha
                alpha = alpha / tf.reduce_sum(alpha, [1], keep_dims=True)
                #alpha = tf.Print(alpha, [tf.shape(alpha)], summarize=1000)
                attention_tracker = attention_tracker.write(time, alpha)
                c = tf.reduce_sum(tf.expand_dims(alpha, 2) * attention_input, [1])

                # GRU
                con = tf.concat(1, [x_t, old_state, c])
                z = tf.sigmoid(tf.matmul(con, W_z) + b_z)
                r = tf.sigmoid(tf.matmul(con, W_r) + b_r)
                con = tf.concat(1, [x_t, r*old_state, c])
                h = tf.tanh(tf.matmul(con, W_h) + b_h)
                new_state = (1-z)*h + z*old_state

                output_ta_t = output_ta_t.write(time, new_state)

                return (time + 1, new_state, output_ta_t, attention_tracker)