def bidrectional_encode(cell_fw, cell_bw, inputs, sequence_length, initial_state_fw=None, initial_state_bw=None, dtype=None, output_method=OutputMethod.last, use_sum=False): if cell_bw is None: cell_bw = copy.deepcopy(cell_fw) if initial_state_bw is None: initial_state_bw = initial_state_fw outputs, states = tf.nn.bidirectional_dynamic_rnn( cell_fw=cell_fw, cell_bw=cell_bw, inputs=inputs, initial_state_fw=initial_state_fw, initial_state_bw=initial_state_bw, dtype=dtype, sequence_length=sequence_length) output_fws, output_bws = outputs if output_method == OutputMethod.sum: output_forward = tf.reduce_sum(output_fws, 1) elif output_method == OutputMethod.mean: output_forward = tf.reduce_sum(output_fws, 1) / tf.to_float( tf.expand_dims(sequence_length, 1)) elif output_method == OutputMethod.last: output_forward = dynamic_last_relevant(output_fws, sequence_length) elif output_method == OutputMethod.first: output_forward = output_fws[:, 0, :] else: output_forward = output_fws if output_method == OutputMethod.sum: output_backward = tf.reduce_sum(output_bws, 1) elif output_method == OutputMethod.mean: output_backward = tf.reduce_sum(output_bws, 1) / tf.to_float( tf.expand_dims(sequence_length, 1)) elif output_method == OutputMethod.last: output_backward = dynamic_last_relevant(output_bws, sequence_length) elif output_method == OutputMethod.first: output_backward = output_bws[:, 0, :] else: output_backward = output_bws if use_sum: output = output_forward + output_backward else: output = tf.concat([output_forward, output_backward], -1) return output, states[0]
def encode_outputs(outputs, output_method=OutputMethod.last, sequence_length=None): #--seems slower convergence and not good result when only using last output, so change to use sum if output_method == OutputMethod.sum: return tf.reduce_sum(outputs, 1) elif output_method == OutputMethod.max: assert sequence_length is not None #below not work.. sequence is different for each row instance #return tf.reduce_max(outputs[:, :sequence_length, :], 1) #return tf.reduce_max(outputs, 1) #not exclude padding embeddings #return tf.reduce_max(tf.abs(outputs), 1) return melt.max_pooling(outputs, sequence_length) elif output_method == OutputMethod.argmax: assert sequence_length is not None #return tf.argmax(outputs[:, :sequence_length, :], 1) #return tf.argmax(outputs, 1) #return tf.argmax(tf.abs(outputs), 1) return melt.argmax_pooling(outputs, sequence_length) elif output_method == OutputMethod.mean: assert sequence_length is not None return tf.reduce_sum(outputs, 1) / tf.to_float(tf.expand_dims(sequence_length, 1)), state elif output_method == OutputMethod.last: #TODO actually return state.h is last revlevant? return dynamic_last_relevant(outputs, sequence_length) elif output_method == OutputMethod.first: return outputs[:, 0, :] else: # all return outputs
def backward_encode(cell, inputs, sequence_length, initial_state=None, dtype=None, output_method=OutputMethod.last): outputs, state = tf.nn.dynamic_rnn(cell, tf.reverse_sequence( inputs, sequence_length, 1), initial_state=initial_state, dtype=dtype, sequence_length=sequence_length) #--seems slower convergence and not good result when only using last output, so change to use sum if output_method == OutputMethod.sum: return tf.reduce_sum(outputs, 1), state elif output_method == OutputMethod.mean: return tf.reduce_sum(outputs, 1) / tf.to_float( tf.expand_dims(sequence_length, 1)), state elif output_method == OutputMethod.last: return dynamic_last_relevant(outputs, sequence_length), state elif output_method == OutputMethod.first: return outputs[:, 0, :], state else: return outputs, state
def encode_outputs(outputs, sequence_length=None, output_method=OutputMethod.last, state=None, attention_hidden_size=128, window_size=3): if output_method == OutputMethod.state: assert state is not None return state #--seems slower convergence and not good result when only using last output, so change to use sum if output_method == OutputMethod.sum: return tf.reduce_sum(outputs, 1) elif output_method == OutputMethod.masked_sum: return melt.sum_pooling(outputs, sequence_length) elif output_method == OutputMethod.max: assert sequence_length is not None #below not work.. sequence is different for each row instance #return tf.reduce_max(outputs[:, :sequence_length, :], 1) #return tf.reduce_max(outputs, 1) #not exclude padding embeddings #return tf.reduce_max(tf.abs(outputs), 1) return melt.max_pooling(outputs, sequence_length) elif output_method == OutputMethod.argmax: assert sequence_length is not None #return tf.argmax(outputs[:, :sequence_length, :], 1) #return tf.argmax(outputs, 1) #return tf.argmax(tf.abs(outputs), 1) return melt.argmax_pooling(outputs, sequence_length) elif output_method == OutputMethod.mean: assert sequence_length is not None return tf.reduce_sum(outputs, 1) / tf.to_float( tf.expand_dims(sequence_length, 1)) elif output_method == OutputMethod.masked_mean: return melt.mean_pooling(outputs, sequence_length) elif output_method == OutputMethod.last: #TODO actually return state.h is last revlevant? return dynamic_last_relevant(outputs, sequence_length) elif output_method == OutputMethod.first: return outputs[:, 0, :] elif output_method == OutputMethod.attention: logging.info('attention_hidden_size:', attention_hidden_size) encoding, alphas = melt.layers.self_attention(outputs, sequence_length, attention_hidden_size) tf.add_to_collection('self_attention', alphas) return encoding elif output_method == OutputMethod.hier: return melt.hier_pooling(outputs, sequence_length, window_size=window_size) else: # all return outputs