def decoder_body(time, old_state, output_ta_t, attention_tracker): if feedback: def from_previous(): prev_1 = tf.matmul(old_state, W_out) + b_out return tf.gather(embeddings, tf.argmax(prev_1, 1)) x_t = tf.cond(tf.greater(time, 0), from_previous, lambda: input_ta.read(0)) else: x_t = input_ta.read(time) # attention part2 = tf.matmul(old_state, W_a) + b_a part2 = tf.expand_dims(part2, 1) john = part1 + part2 e = tf.reduce_sum(v_a * tf.tanh(john), [2]) alpha = tf.nn.softmax(e) alpha = tf.to_float(mask(attention_lengths)) * alpha alpha = alpha / tf.reduce_sum(alpha, [1], keep_dims=True) attention_tracker = attention_tracker.write(time, alpha) c = tf.reduce_sum( tf.expand_dims(alpha, 2) * tf.squeeze(hidden), [1]) # GRU con = tf.concat(1, [x_t, old_state, c]) z = tf.sigmoid(tf.matmul(con, W_z) + b_z) r = tf.sigmoid(tf.matmul(con, W_r) + b_r) con = tf.concat(1, [x_t, r * old_state, c]) h = tf.tanh(tf.matmul(con, W_h) + b_h) new_state = (1 - z) * h + z * old_state output_ta_t = output_ta_t.write(time, new_state) return (time + 1, new_state, output_ta_t, attention_tracker)
def decoder_body(time, old_state, output_ta_t, attention_tracker): if feedback: def from_previous(): prev_1 = tf.matmul(old_state, W_out) + b_out return tf.gather(embeddings, tf.argmax(prev_1, 1)) x_t = tf.cond(tf.greater(time, 0), from_previous, lambda: input_ta.read(0)) else: x_t = input_ta.read(time) # attention part2 = tf.matmul(old_state, W_a) + b_a part2 = tf.expand_dims(part2, 1) john = part1 + part2 e = tf.reduce_sum(v_a * tf.tanh(john), [2]) alpha = tf.nn.softmax(e) alpha = tf.to_float(mask(attention_lengths)) * alpha alpha = alpha / tf.reduce_sum(alpha, [1], keep_dims=True) attention_tracker = attention_tracker.write(time, alpha) c = tf.reduce_sum(tf.expand_dims(alpha, 2) * tf.squeeze(hidden), [1]) # GRU con = tf.concat(1, [x_t, old_state, c]) z = tf.sigmoid(tf.matmul(con, W_z) + b_z) r = tf.sigmoid(tf.matmul(con, W_r) + b_r) con = tf.concat(1, [x_t, r*old_state, c]) h = tf.tanh(tf.matmul(con, W_h) + b_h) new_state = (1-z)*h + z*old_state output_ta_t = output_ta_t.write(time, new_state) return (time + 1, new_state, output_ta_t, attention_tracker)
def decoder_body(time, old_state, output_ta_t, attention_tracker): if feedback: def from_previous(): prev_1 = tf.matmul(old_state, W_out) + b_out return tf.gather(embeddings, tf.argmax(prev_1, 1)) x_t = tf.cond(tf.greater(time, 0), from_previous, lambda: input_ta.read(0)) else: x_t = input_ta.read(time) # attention part1_ex = tf.expand_dims(old_state, 1) #part1_ex = tf.Print(part1_ex, [tf.shape(part1_ex)], summarize=1000) part1_tiled = tf.tile(part1_ex, tf.pack([1, attn_len, 1])) #part1_tiled = tf.Print(part1_tiled, [tf.shape(part1_tiled)], summarize=1000) dot_input = tf.concat(2, [part1_tiled, attention_input]) #dot_input = tf.Print(dot_input, [tf.shape(dot_input)], summarize=1000) dot_reshape = tf.reshape(dot_input, tf.pack([-1, attn_len, 1, attention_dims*2])) #dot_reshape = tf.Print(dot_reshape, [tf.shape(dot_reshape)], summarize=1000) john = tf.nn.conv2d(dot_reshape, U_a, [1, 1, 1, 1], "SAME") #john = tf.Print(john, [tf.shape(john)], summarize=1000) john = tf.squeeze(john, [2]) # squeeze out the third dimension #john = tf.Print(john, [tf.shape(john)], summarize=1000) a = v_a * tf.tanh(john) #a = tf.Print(a, [tf.shape(a)], summarize=1000) e = tf.reduce_sum(a, [2]) #e = tf.Print(e, [tf.shape(e)], summarize=1000) alpha = tf.nn.softmax(e) alpha = tf.to_float(mask(attention_lengths)) * alpha alpha = alpha / tf.reduce_sum(alpha, [1], keep_dims=True) #alpha = tf.Print(alpha, [tf.shape(alpha)], summarize=1000) attention_tracker = attention_tracker.write(time, alpha) c = tf.reduce_sum(tf.expand_dims(alpha, 2) * attention_input, [1]) # GRU con = tf.concat(1, [x_t, old_state, c]) z = tf.sigmoid(tf.matmul(con, W_z) + b_z) r = tf.sigmoid(tf.matmul(con, W_r) + b_r) con = tf.concat(1, [x_t, r*old_state, c]) h = tf.tanh(tf.matmul(con, W_h) + b_h) new_state = (1-z)*h + z*old_state output_ta_t = output_ta_t.write(time, new_state) return (time + 1, new_state, output_ta_t, attention_tracker)