Exemple #1
0
def bidrectional_encode(cell_fw,
                        cell_bw,
                        inputs,
                        sequence_length,
                        initial_state_fw=None,
                        initial_state_bw=None,
                        dtype=None,
                        output_method=OutputMethod.last,
                        use_sum=False):
    if cell_bw is None:
        cell_bw = copy.deepcopy(cell_fw)
    if initial_state_bw is None:
        initial_state_bw = initial_state_fw

    outputs, states = tf.nn.bidirectional_dynamic_rnn(
        cell_fw=cell_fw,
        cell_bw=cell_bw,
        inputs=inputs,
        initial_state_fw=initial_state_fw,
        initial_state_bw=initial_state_bw,
        dtype=dtype,
        sequence_length=sequence_length)

    output_fws, output_bws = outputs

    if output_method == OutputMethod.sum:
        output_forward = tf.reduce_sum(output_fws, 1)
    elif output_method == OutputMethod.mean:
        output_forward = tf.reduce_sum(output_fws, 1) / tf.to_float(
            tf.expand_dims(sequence_length, 1))
    elif output_method == OutputMethod.last:
        output_forward = dynamic_last_relevant(output_fws, sequence_length)
    elif output_method == OutputMethod.first:
        output_forward = output_fws[:, 0, :]
    else:
        output_forward = output_fws

    if output_method == OutputMethod.sum:
        output_backward = tf.reduce_sum(output_bws, 1)
    elif output_method == OutputMethod.mean:
        output_backward = tf.reduce_sum(output_bws, 1) / tf.to_float(
            tf.expand_dims(sequence_length, 1))
    elif output_method == OutputMethod.last:
        output_backward = dynamic_last_relevant(output_bws, sequence_length)
    elif output_method == OutputMethod.first:
        output_backward = output_bws[:, 0, :]
    else:
        output_backward = output_bws

    if use_sum:
        output = output_forward + output_backward
    else:
        output = tf.concat([output_forward, output_backward], -1)

    return output, states[0]
Exemple #2
0
def encode_outputs(outputs, output_method=OutputMethod.last, sequence_length=None):
  #--seems slower convergence and not good result when only using last output, so change to use sum
  if output_method == OutputMethod.sum:
    return tf.reduce_sum(outputs, 1)
  elif output_method == OutputMethod.max:
    assert sequence_length is not None
    #below not work.. sequence is different for each row instance
    #return tf.reduce_max(outputs[:, :sequence_length, :], 1)
    #return tf.reduce_max(outputs, 1) #not exclude padding embeddings
    #return tf.reduce_max(tf.abs(outputs), 1)
    return melt.max_pooling(outputs, sequence_length)
  elif output_method == OutputMethod.argmax:
    assert sequence_length is not None
    #return tf.argmax(outputs[:, :sequence_length, :], 1)
    #return tf.argmax(outputs, 1)
    #return tf.argmax(tf.abs(outputs), 1)
    return melt.argmax_pooling(outputs, sequence_length)
  elif output_method == OutputMethod.mean:
    assert sequence_length is not None
    return tf.reduce_sum(outputs, 1) / tf.to_float(tf.expand_dims(sequence_length, 1)), state 
  elif output_method == OutputMethod.last:
    #TODO actually return state.h is last revlevant?
    return dynamic_last_relevant(outputs, sequence_length)
  elif output_method == OutputMethod.first:
    return outputs[:, 0, :]
  else: # all
    return outputs
Exemple #3
0
def backward_encode(cell,
                    inputs,
                    sequence_length,
                    initial_state=None,
                    dtype=None,
                    output_method=OutputMethod.last):
    outputs, state = tf.nn.dynamic_rnn(cell,
                                       tf.reverse_sequence(
                                           inputs, sequence_length, 1),
                                       initial_state=initial_state,
                                       dtype=dtype,
                                       sequence_length=sequence_length)

    #--seems slower convergence and not good result when only using last output, so change to use sum
    if output_method == OutputMethod.sum:
        return tf.reduce_sum(outputs, 1), state
    elif output_method == OutputMethod.mean:
        return tf.reduce_sum(outputs, 1) / tf.to_float(
            tf.expand_dims(sequence_length, 1)), state
    elif output_method == OutputMethod.last:
        return dynamic_last_relevant(outputs, sequence_length), state
    elif output_method == OutputMethod.first:
        return outputs[:, 0, :], state
    else:
        return outputs, state
Exemple #4
0
def encode_outputs(outputs,
                   sequence_length=None,
                   output_method=OutputMethod.last,
                   state=None,
                   attention_hidden_size=128,
                   window_size=3):
    if output_method == OutputMethod.state:
        assert state is not None
        return state

    #--seems slower convergence and not good result when only using last output, so change to use sum
    if output_method == OutputMethod.sum:
        return tf.reduce_sum(outputs, 1)
    elif output_method == OutputMethod.masked_sum:
        return melt.sum_pooling(outputs, sequence_length)
    elif output_method == OutputMethod.max:
        assert sequence_length is not None
        #below not work.. sequence is different for each row instance
        #return tf.reduce_max(outputs[:, :sequence_length, :], 1)
        #return tf.reduce_max(outputs, 1) #not exclude padding embeddings
        #return tf.reduce_max(tf.abs(outputs), 1)
        return melt.max_pooling(outputs, sequence_length)
    elif output_method == OutputMethod.argmax:
        assert sequence_length is not None
        #return tf.argmax(outputs[:, :sequence_length, :], 1)
        #return tf.argmax(outputs, 1)
        #return tf.argmax(tf.abs(outputs), 1)
        return melt.argmax_pooling(outputs, sequence_length)
    elif output_method == OutputMethod.mean:
        assert sequence_length is not None
        return tf.reduce_sum(outputs, 1) / tf.to_float(
            tf.expand_dims(sequence_length, 1))
    elif output_method == OutputMethod.masked_mean:
        return melt.mean_pooling(outputs, sequence_length)
    elif output_method == OutputMethod.last:
        #TODO actually return state.h is last revlevant?
        return dynamic_last_relevant(outputs, sequence_length)
    elif output_method == OutputMethod.first:
        return outputs[:, 0, :]
    elif output_method == OutputMethod.attention:
        logging.info('attention_hidden_size:', attention_hidden_size)
        encoding, alphas = melt.layers.self_attention(outputs, sequence_length,
                                                      attention_hidden_size)
        tf.add_to_collection('self_attention', alphas)
        return encoding
    elif output_method == OutputMethod.hier:
        return melt.hier_pooling(outputs,
                                 sequence_length,
                                 window_size=window_size)
    else:  # all
        return outputs