def __init__(self, dataset, parameters): self.verbose = False self.input_token_indices = tf.placeholder(tf.int32, [None], name="input_token_indices") self.input_label_indices_vector = tf.placeholder(tf.float32, [None, dataset.number_of_classes], name="input_label_indices_vector") self.input_label_indices_flat = tf.placeholder(tf.int32, [None], name="input_label_indices_flat") self.input_token_character_indices = tf.placeholder(tf.int32, [None, None], name="input_token_indices") self.input_token_lengths = tf.placeholder(tf.int32, [None], name="input_token_lengths") self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") # xavier_initializer is that uses 6/(layer_in + layer_out) initializer = tf.contrib.layers.xavier_initializer() if parameters['use_character_lstm']: # Character embedding layer with tf.variable_scope("character_embedding"): self.character_embedding_weights = tf.get_variable( "character_embedding_weights", shape=[dataset.alphabet_size, parameters['character_embedding_dimension']], initializer=initializer) embedded_characters = tf.nn.embedding_lookup(self.character_embedding_weights, self.input_token_character_indices, name='embedded_characters') if self.verbose: print("embedded_characters: {0}".format(embedded_characters)) utils_tf.variable_summaries(self.character_embedding_weights) # Character LSTM layer with tf.variable_scope('character_lstm') as vs: character_lstm_output = bidirectional_LSTM(embedded_characters, parameters['character_lstm_hidden_state_dimension'], initializer, sequence_length=self.input_token_lengths, output_sequence=False) self.character_lstm_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) # Token embedding layer with tf.variable_scope("token_embedding"): self.token_embedding_weights = tf.get_variable( "token_embedding_weights", shape=[dataset.vocabulary_size, parameters['token_embedding_dimension']], initializer=initializer, trainable=not parameters['freeze_token_embeddings']) embedded_tokens = tf.nn.embedding_lookup(self.token_embedding_weights, self.input_token_indices) utils_tf.variable_summaries(self.token_embedding_weights) # Concatenate character LSTM outputs and token embeddings if parameters['use_character_lstm']: with tf.variable_scope("concatenate_token_and_character_vectors"): if self.verbose: print('embedded_tokens: {0}'.format(embedded_tokens)) token_lstm_input = tf.concat([character_lstm_output, embedded_tokens], axis=1, name='token_lstm_input') if self.verbose: print("token_lstm_input: {0}".format(token_lstm_input)) else: token_lstm_input = embedded_tokens # Add dropout with tf.variable_scope("dropout"): token_lstm_input_drop = tf.nn.dropout(token_lstm_input, self.dropout_keep_prob, # 1, name='token_lstm_input_drop') if self.verbose: print("token_lstm_input_drop: {0}".format(token_lstm_input_drop)) token_lstm_input_drop_expanded = tf.expand_dims(token_lstm_input_drop, axis=0, name='token_lstm_input_drop_expanded') if self.verbose: print("token_lstm_input_drop_expanded: {0}".format(token_lstm_input_drop_expanded)) # Token LSTM layer with tf.variable_scope('token_lstm') as vs: token_lstm_output = bidirectional_LSTM(token_lstm_input_drop_expanded, parameters['token_lstm_hidden_state_dimension'], initializer, output_sequence=True) token_lstm_output_squeezed = tf.squeeze(token_lstm_output, axis=0, name='token_lstm_output_squeezed') self.token_lstm_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) # Needed only if Bidirectional LSTM is used for token level with tf.variable_scope("feedforward_after_lstm") as vs: W = tf.get_variable( "W", shape=[2 * parameters['token_lstm_hidden_state_dimension'], parameters['token_lstm_hidden_state_dimension']], initializer=initializer) b = tf.Variable(tf.constant(0.0, shape=[parameters['token_lstm_hidden_state_dimension']]), name="bias") # a fc layer with tanh activation outputs = tf.nn.xw_plus_b(token_lstm_output_squeezed, W, b, name="output_before_tanh") outputs = tf.nn.tanh(outputs, name="output_after_tanh") self.output_after_lstm = outputs utils_tf.variable_summaries(W) utils_tf.variable_summaries(b) self.token_lstm_variables += tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) with tf.variable_scope("feedforward_before_crf") as vs: self.W_before_crf = tf.get_variable( "W", shape=[parameters['token_lstm_hidden_state_dimension'], dataset.number_of_classes], initializer=initializer) self.b_before_crf = tf.Variable(tf.constant(0.0, shape=[dataset.number_of_classes]), name="bias") scores = tf.nn.xw_plus_b(outputs, self.W_before_crf, self.b_before_crf, name="scores") self.unary_scores = scores self.predictions = tf.argmax(self.unary_scores, 1, name="predictions") utils_tf.variable_summaries(W) utils_tf.variable_summaries(b) self.feedforward_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) # CRF layer if parameters['use_crf']: with tf.variable_scope("crf") as vs: small_score = -1000.0 large_score = 0.0 sequence_length = tf.shape(self.unary_scores)[0] unary_scores_with_start_and_end = tf.concat( [ self.unary_scores, tf.tile(tf.constant(small_score, shape=[1, 2]), [sequence_length, 1]) ], 1) start_unary_scores = [[small_score] * dataset.number_of_classes + [large_score, small_score]] end_unary_scores = [[small_score] * dataset.number_of_classes + [small_score, large_score]] self.unary_scores = tf.concat([start_unary_scores, unary_scores_with_start_and_end, end_unary_scores], 0) start_index = dataset.number_of_classes end_index = dataset.number_of_classes + 1 input_label_indices_flat_with_start_and_end = tf.concat( [ tf.constant(start_index, shape=[1]), self.input_label_indices_flat, tf.constant(end_index, shape=[1]) ], 0) # Apply CRF layer sequence_length = tf.shape(self.unary_scores)[0] sequence_lengths = tf.expand_dims(sequence_length, axis=0, name='sequence_lengths') unary_scores_expanded = tf.expand_dims(self.unary_scores, axis=0, name='unary_scores_expanded') input_label_indices_flat_batch = tf.expand_dims(input_label_indices_flat_with_start_and_end, axis=0, name='input_label_indices_flat_batch') if self.verbose: print('unary_scores_expanded: {0}'.format(unary_scores_expanded)) if self.verbose: print('input_label_indices_flat_batch: {0}'.format(input_label_indices_flat_batch)) if self.verbose: print("sequence_lengths: {0}".format(sequence_lengths)) # https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/crf log_likelihood, self.transition_parameters = tf.contrib.crf.crf_log_likelihood( unary_scores_expanded, input_label_indices_flat_batch, sequence_lengths) utils_tf.variable_summaries(self.transition_parameters) self.loss = tf.reduce_mean(-log_likelihood, name='cross_entropy_mean_loss') self.accuracy = tf.constant(1) self.crf_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) # Do not use CRF layer else: with tf.variable_scope("crf") as vs: self.transition_parameters = tf.get_variable( "transitions", shape=[dataset.number_of_classes + 2, dataset.number_of_classes + 2], initializer=initializer) utils_tf.variable_summaries(self.transition_parameters) self.crf_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) # Calculate mean cross-entropy loss with tf.variable_scope("loss"): losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.unary_scores, labels=self.input_label_indices_vector, name='softmax') self.loss = tf.reduce_mean(losses, name='cross_entropy_mean_loss') with tf.variable_scope("accuracy"): correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_label_indices_vector, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, 'float'), name='accuracy') self.define_training_procedure(parameters, dataset) self.summary_op = tf.summary.merge_all()
def __init__(self, dataset, parameters): self.verbose = False # Placeholders for input, output and dropout self.input_token_indices = tf.placeholder(tf.int32, [None], name="input_token_indices") self.input_label_indices_vector = tf.placeholder( tf.float32, [None, dataset.number_of_classes], name="input_label_indices_vector") self.input_label_indices_flat = tf.placeholder( tf.int32, [None], name="input_label_indices_flat") self.input_token_character_indices = tf.placeholder( tf.int32, [None, None], name="input_token_indices") self.input_token_lengths = tf.placeholder(tf.int32, [None], name="input_token_lengths") self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") if parameters['use_pos']: self.input_pos_tag_indices = tf.placeholder( tf.int32, [None, dataset.number_of_POS_types], name="input_pos_tag_indices") #self.input_pos_tag_indices_vector = tf.placeholder(tf.float32, [None, dataset.number_of_POS_types], name="input_pos_tag_indices_vector") #self.input_pos_tag_indices_flat = tf.placeholder(tf.int32, [None], name="input_pos_tag_indices_flat") if parameters['use_gaz']: self.input_gaz_indices = tf.placeholder(tf.int32, [None, 1], name="input_gaz_indices") if parameters['use_aff']: self.input_aff_indices = tf.placeholder(tf.int32, [None, 1], name="input_aff_indices") # Internal parameters initializer = tf.contrib.layers.xavier_initializer() if parameters['use_character_lstm']: # Character-level LSTM # Idea: reshape so that we have a tensor [number_of_token, max_token_length, token_embeddings_size], which we pass to the LSTM # Character embedding layer with tf.variable_scope("character_embedding"): self.character_embedding_weights = tf.get_variable( "character_embedding_weights", shape=[ dataset.alphabet_size, parameters['character_embedding_dimension'] ], initializer=initializer) embedded_characters = tf.nn.embedding_lookup( self.character_embedding_weights, self.input_token_character_indices, name='embedded_characters') if self.verbose: print( "embedded_characters: {0}".format(embedded_characters)) utils_tf.variable_summaries(self.character_embedding_weights) # Character LSTM layer with tf.variable_scope('character_lstm') as vs: character_lstm_output = bidirectional_LSTM( embedded_characters, parameters['character_lstm_hidden_state_dimension'], initializer, sequence_length=self.input_token_lengths, output_sequence=False) self.character_lstm_variables = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) # Token embedding layer with tf.variable_scope("token_embedding"): self.token_embedding_weights = tf.get_variable( "token_embedding_weights", shape=[ dataset.vocabulary_size, parameters['token_embedding_dimension'] ], initializer=initializer, trainable=not parameters['freeze_token_embeddings']) embedded_tokens = tf.nn.embedding_lookup( self.token_embedding_weights, self.input_token_indices) utils_tf.variable_summaries(self.token_embedding_weights) if parameters['use_pos']: # POS embedding layer # Idea: reshape so that we have a tensor [number_of_token, max_token_length, token_embeddings_size,number_of_pos_tags_typs], which we pass to the LSTM with tf.variable_scope("pos_tag_embedding"): self.pos_tag_embedding_weights = tf.get_variable( "pos_tag_embedding_weights", shape=[dataset.number_of_POS_types ], #, parameters['character_embedding_dimension']], initializer=initializer, trainable=not parameters['freeze_pos']) embedded_pos_tags = tf.nn.embedding_lookup( self.pos_tag_embedding_weights, self.input_pos_tag_indices, name='embedded_pos_tags') if self.verbose: print("embedded_pos_tags: {0}".format(embedded_pos_tags)) utils_tf.variable_summaries(self.pos_tag_embedding_weights) if parameters['use_gaz']: # GAZ embedding layer with tf.variable_scope("gaz_embedding"): self.gaz_embedding_weights = tf.get_variable( "gaz_embedding_weights", shape=[2], #[1], initializer=initializer, trainable=not parameters['freeze_gaz']) embedded_gazs = tf.nn.embedding_lookup( self.gaz_embedding_weights, self.input_gaz_indices, name='embedded_gazs') if self.verbose: print("embedded_gazs: {0}".format(embedded_gazs)) utils_tf.variable_summaries(self.gaz_embedding_weights) if parameters['use_aff']: # affix embedding layer with tf.variable_scope("aff_embedding"): self.aff_embedding_weights = tf.get_variable( "aff_embedding_weights", shape=[2], #[1], initializer=initializer, trainable=not parameters['freeze_aff']) embedded_affs = tf.nn.embedding_lookup( self.aff_embedding_weights, self.input_aff_indices, name='embedded_affs') if self.verbose: print("embedded_affs: {0}".format(embedded_affs)) utils_tf.variable_summaries(self.aff_embedding_weights) ''' # POS LSTM layer with tf.variable_scope('pos_tag_lstm') as vs: pos_tag_lstm_output = bidirectional_LSTM(embedded_pos_tags, 1, initializer, sequence_length=self.input_token_lengths, output_sequence=False) self.pos_tag_lstm_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) ''' # Concatenate character LSTM outputs and token embeddings # Also: POS # SHould be refactored if not parameters['use_aff']: if not parameters['use_gaz']: if parameters[ 'use_character_lstm'] and not parameters['use_pos']: with tf.variable_scope( "concatenate_token_and_character_vectors"): if self.verbose: print( 'embedded_tokens: {0}'.format(embedded_tokens)) token_lstm_input = tf.concat( [character_lstm_output, embedded_tokens], axis=1, name='token_lstm_input') if self.verbose: print("token_lstm_input: {0}".format( token_lstm_input)) elif parameters['use_character_lstm'] and parameters['use_pos']: with tf.variable_scope( "concatenate_token_and_character_and_pos_vectors"): if self.verbose: print( 'embedded_tokens: {0}'.format(embedded_tokens)) if self.verbose: print('embedded_tokens: {0}'.format( embedded_pos_tags)) token_lstm_input = tf.concat([ embedded_pos_tags, character_lstm_output, embedded_tokens ], axis=1, name='token_lstm_input') if self.verbose: print("token_lstm_input: {0}".format( token_lstm_input)) else: token_lstm_input = embedded_tokens else: if parameters[ 'use_character_lstm'] and not parameters['use_pos']: with tf.variable_scope( "concatenate_token_and_character_and_gaz_vectors"): if self.verbose: print( 'embedded_tokens: {0}'.format(embedded_tokens)) if self.verbose: print('embedded_tokens: {0}'.format(embedded_gazs)) token_lstm_input = tf.concat([ embedded_gazs, character_lstm_output, embedded_tokens ], axis=1, name='token_lstm_input') if self.verbose: print("token_lstm_input: {0}".format( token_lstm_input)) elif parameters['use_character_lstm'] and parameters['use_pos']: with tf.variable_scope( "concatenate_token_and_character_and_pos_and_gaz_vectors" ): if self.verbose: print( 'embedded_tokens: {0}'.format(embedded_tokens)) if self.verbose: print('embedded_tokens: {0}'.format( embedded_pos_tags)) if self.verbose: print('embedded_tokens: {0}'.format(embedded_gazs)) token_lstm_input = tf.concat([ embedded_gazs, embedded_pos_tags, character_lstm_output, embedded_tokens ], axis=1, name='token_lstm_input') if self.verbose: print("token_lstm_input: {0}".format( token_lstm_input)) else: with tf.variable_scope( "concatenate_token_and_gaz_vectors"): if self.verbose: print( 'embedded_tokens: {0}'.format(embedded_tokens)) if self.verbose: print('embedded_tokens: {0}'.format( embedded_pos_tags)) token_lstm_input = tf.concat( [embedded_gazs, embedded_tokens], axis=1, name='token_lstm_input') if self.verbose: print("token_lstm_input: {0}".format( token_lstm_input)) else: if not parameters['use_gaz']: if parameters[ 'use_character_lstm'] and not parameters['use_pos']: with tf.variable_scope( "concatenate_token_and_character_vectors"): if self.verbose: print( 'embedded_tokens: {0}'.format(embedded_tokens)) token_lstm_input = tf.concat([ embedded_affs, character_lstm_output, embedded_tokens ], axis=1, name='token_lstm_input') if self.verbose: print("token_lstm_input: {0}".format( token_lstm_input)) elif parameters['use_character_lstm'] and parameters['use_pos']: with tf.variable_scope( "concatenate_token_and_character_and_pos_vectors"): if self.verbose: print( 'embedded_tokens: {0}'.format(embedded_tokens)) if self.verbose: print('embedded_tokens: {0}'.format( embedded_pos_tags)) token_lstm_input = tf.concat([ embedded_affs, embedded_pos_tags, character_lstm_output, embedded_tokens ], axis=1, name='token_lstm_input') if self.verbose: print("token_lstm_input: {0}".format( token_lstm_input)) else: token_lstm_input = embedded_tokens else: if parameters[ 'use_character_lstm'] and not parameters['use_pos']: with tf.variable_scope( "concatenate_token_and_character_and_gaz_vectors"): if self.verbose: print( 'embedded_tokens: {0}'.format(embedded_tokens)) if self.verbose: print('embedded_tokens: {0}'.format(embedded_gazs)) token_lstm_input = tf.concat([ embedded_affs, embedded_gazs, character_lstm_output, embedded_tokens ], axis=1, name='token_lstm_input') if self.verbose: print("token_lstm_input: {0}".format( token_lstm_input)) elif parameters['use_character_lstm'] and parameters['use_pos']: with tf.variable_scope( "concatenate_token_and_character_and_pos_and_gaz_vectors" ): if self.verbose: print( 'embedded_tokens: {0}'.format(embedded_tokens)) if self.verbose: print('embedded_tokens: {0}'.format( embedded_pos_tags)) if self.verbose: print('embedded_tokens: {0}'.format(embedded_gazs)) token_lstm_input = tf.concat([ embedded_affs, embedded_gazs, embedded_pos_tags, character_lstm_output, embedded_tokens ], axis=1, name='token_lstm_input') if self.verbose: print("token_lstm_input: {0}".format( token_lstm_input)) else: with tf.variable_scope( "concatenate_token_and_gaz_vectors"): if self.verbose: print( 'embedded_tokens: {0}'.format(embedded_tokens)) if self.verbose: print('embedded_tokens: {0}'.format( embedded_pos_tags)) token_lstm_input = tf.concat( [embedded_affs, embedded_gazs, embedded_tokens], axis=1, name='token_lstm_input') if self.verbose: print("token_lstm_input: {0}".format( token_lstm_input)) # Add dropout with tf.variable_scope("dropout"): token_lstm_input_drop = tf.nn.dropout(token_lstm_input, self.dropout_keep_prob, name='token_lstm_input_drop') if self.verbose: print( "token_lstm_input_drop: {0}".format(token_lstm_input_drop)) # https://www.tensorflow.org/api_guides/python/contrib.rnn # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) token_lstm_input_drop_expanded = tf.expand_dims( token_lstm_input_drop, axis=0, name='token_lstm_input_drop_expanded') if self.verbose: print("token_lstm_input_drop_expanded: {0}".format( token_lstm_input_drop_expanded)) # Token LSTM layer with tf.variable_scope('token_lstm') as vs: token_lstm_output = bidirectional_LSTM( token_lstm_input_drop_expanded, parameters['token_lstm_hidden_state_dimension'], initializer, output_sequence=True) token_lstm_output_squeezed = tf.squeeze(token_lstm_output, axis=0) self.token_lstm_variables = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) # Needed only if Bidirectional LSTM is used for token level with tf.variable_scope("feedforward_after_lstm") as vs: W = tf.get_variable( "W", shape=[ 2 * parameters['token_lstm_hidden_state_dimension'], parameters['token_lstm_hidden_state_dimension'] ], initializer=initializer) b = tf.Variable(tf.constant( 0.0, shape=[parameters['token_lstm_hidden_state_dimension']]), name="bias") outputs = tf.nn.xw_plus_b(token_lstm_output_squeezed, W, b, name="output_before_tanh") outputs = tf.nn.tanh(outputs, name="output_after_tanh") utils_tf.variable_summaries(W) utils_tf.variable_summaries(b) self.token_lstm_variables += tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) with tf.variable_scope("feedforward_before_crf") as vs: W = tf.get_variable( "W", shape=[ parameters['token_lstm_hidden_state_dimension'], dataset.number_of_classes ], initializer=initializer) b = tf.Variable(tf.constant(0.0, shape=[dataset.number_of_classes]), name="bias") scores = tf.nn.xw_plus_b(outputs, W, b, name="scores") self.unary_scores = scores self.predictions = tf.argmax(self.unary_scores, 1, name="predictions") utils_tf.variable_summaries(W) utils_tf.variable_summaries(b) self.feedforward_variables = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) # CRF layer if parameters['use_crf']: with tf.variable_scope("crf") as vs: # Add start and end tokens small_score = -1000.0 large_score = 0.0 sequence_length = tf.shape(self.unary_scores)[0] unary_scores_with_start_and_end = tf.concat([ self.unary_scores, tf.tile(tf.constant(small_score, shape=[1, 2]), [sequence_length, 1]) ], 1) start_unary_scores = [ [small_score] * dataset.number_of_classes + [large_score, small_score] ] end_unary_scores = [[small_score] * dataset.number_of_classes + [small_score, large_score]] self.unary_scores = tf.concat([ start_unary_scores, unary_scores_with_start_and_end, end_unary_scores ], 0) start_index = dataset.number_of_classes end_index = dataset.number_of_classes + 1 input_label_indices_flat_with_start_and_end = tf.concat([ tf.constant(start_index, shape=[1]), self.input_label_indices_flat, tf.constant(end_index, shape=[1]) ], 0) # Apply CRF layer sequence_length = tf.shape(self.unary_scores)[0] sequence_lengths = tf.expand_dims(sequence_length, axis=0, name='sequence_lengths') unary_scores_expanded = tf.expand_dims( self.unary_scores, axis=0, name='unary_scores_expanded') input_label_indices_flat_batch = tf.expand_dims( input_label_indices_flat_with_start_and_end, axis=0, name='input_label_indices_flat_batch') if self.verbose: print('unary_scores_expanded: {0}'.format( unary_scores_expanded)) if self.verbose: print('input_label_indices_flat_batch: {0}'.format( input_label_indices_flat_batch)) if self.verbose: print("sequence_lengths: {0}".format(sequence_lengths)) # https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/crf # Compute the log-likelihood of the gold sequences and keep the transition params for inference at test time. self.transition_parameters = tf.get_variable( "transitions", shape=[ dataset.number_of_classes + 2, dataset.number_of_classes + 2 ], initializer=initializer) utils_tf.variable_summaries(self.transition_parameters) log_likelihood, _ = tf.contrib.crf.crf_log_likelihood( unary_scores_expanded, input_label_indices_flat_batch, sequence_lengths, transition_params=self.transition_parameters) self.loss = tf.reduce_mean(-log_likelihood, name='cross_entropy_mean_loss') self.accuracy = tf.constant(1) self.crf_variables = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) # Do not use CRF layer else: with tf.variable_scope("crf") as vs: self.transition_parameters = tf.get_variable( "transitions", shape=[ dataset.number_of_classes + 2, dataset.number_of_classes + 2 ], initializer=initializer) utils_tf.variable_summaries(self.transition_parameters) self.crf_variables = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) # Calculate mean cross-entropy loss with tf.variable_scope("loss"): losses = tf.nn.softmax_cross_entropy_with_logits( logits=self.unary_scores, labels=self.input_label_indices_vector, name='softmax') self.loss = tf.reduce_mean(losses, name='cross_entropy_mean_loss') with tf.variable_scope("accuracy"): correct_predictions = tf.equal( self.predictions, tf.argmax(self.input_label_indices_vector, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, 'float'), name='accuracy') self.define_training_procedure(parameters) self.summary_op = tf.summary.merge_all() self.saver = tf.train.Saver( max_to_keep=parameters['maximum_number_of_epochs'] ) # defaults to saving all variables
def __init__(self, dataset, parameters): self.verbose = False # Placeholders for input, output and dropout self.input_token_indices = tf.placeholder(tf.int32, [None,None], name="input_token_indices") #[batch, sequence_length] self.input_sequence_lengths = tf.placeholder(tf.int32, [None], name="input_sequence_lengths") #[batch_size] self.input_label_indices_vector = tf.placeholder(tf.int32, [None,None, dataset.number_of_classes], name="input_label_indices_vector") self.input_label_indices_flat = tf.placeholder(tf.int32, [None,None], name="input_label_indices_flat") #[batch_size, max_sentence_length] self.input_token_character_indices = tf.placeholder(tf.int32, [None,None, None], name="input_token_character_indices")# [batch, sequence_length, token_length] self.input_token_lengths = tf.placeholder(tf.int32, [None,None], name="input_token_lengths") # [batch, sequence_length] self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") self.embedding_dim = parameters['embedding_dimension'] batch_size = tf.shape(self.input_token_character_indices)[0] sentence_size = tf.shape(self.input_token_character_indices)[1] token_size = tf.shape(self.input_token_character_indices)[2] # Internal parameters initializer = tf.contrib.layers.xavier_initializer() if parameters['use_character_lstm']: # Character-level LSTM # Idea: reshape so that we have a tensor [number_of_token, max_token_length, token_embeddings_size], which we pass to the LSTM # Character embedding layer with tf.variable_scope("character_embedding"): self.character_embedding_weights = tf.get_variable( "character_embedding_weights", shape=[dataset.alphabet_size, parameters['character_embedding_dimension']], initializer=initializer) embedded_characters = tf.nn.embedding_lookup(self.character_embedding_weights, self.input_token_character_indices, name='embedded_characters') if self.verbose: print("embedded_characters: {0}".format(embedded_characters)) utils_tf.variable_summaries(self.character_embedding_weights) # Character LSTM layer with tf.variable_scope('character_lstm') as vs: #batch_size = tf.shape(embedded_characters)[0] #sentence_size = tf.shape(embedded_characters)[1] #token_size = tf.shape(embedded_characters)[2] embedded_characters = tf.reshape(embedded_characters, [batch_size*sentence_size, token_size, parameters['character_embedding_dimension']]) input_token_lengths = tf.reshape(self.input_token_lengths, [-1]) if parameters['gru_neuron']: character_lstm_output = bidirectional_GRU(embedded_characters, parameters['character_lstm_hidden_state_dimension'], parameters['character_hidden_layer'], initializer, sequence_length=input_token_lengths, output_sequence=False) else: character_lstm_output = bidirectional_LSTM(embedded_characters, parameters['character_lstm_hidden_state_dimension'], initializer, sequence_length=input_token_lengths, output_sequence=False) self.character_lstm_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) character_lstm_output = tf.reshape(character_lstm_output, [batch_size, sentence_size, 2*parameters['character_lstm_hidden_state_dimension']]) # Token embedding layer with tf.variable_scope("token_embedding"): self.token_embedding_weights = tf.get_variable( "token_embedding_weights", shape=[dataset.vocabulary_size, parameters['embedding_dimension']], initializer=initializer, trainable=not parameters['freeze_token_embeddings']) embedded_tokens = tf.nn.embedding_lookup(self.token_embedding_weights, self.input_token_indices) utils_tf.variable_summaries(self.token_embedding_weights) # Concatenate character LSTM outputs and token embeddings if parameters['use_character_lstm']: with tf.variable_scope("concatenate_token_and_character_vectors"): if self.verbose: print('embedded_tokens: {0}'.format(embedded_tokens)) token_lstm_input = tf.concat([character_lstm_output, embedded_tokens], axis=-1, name='token_lstm_input') if self.verbose: print("token_lstm_input: {0}".format(token_lstm_input)) else: token_lstm_input = embedded_tokens # Add dropout with tf.variable_scope("dropout"): token_lstm_input_drop = tf.nn.dropout(token_lstm_input, self.dropout_keep_prob, name='token_lstm_input_drop') if self.verbose: print("token_lstm_input_drop: {0}".format(token_lstm_input_drop)) # https://www.tensorflow.org/api_guides/python/contrib.rnn # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) # token_lstm_input_drop_expanded = tf.expand_dims(token_lstm_input_drop, axis=0, name='token_lstm_input_drop_expanded') token_lstm_input_drop_expanded = token_lstm_input_drop if self.verbose: print("token_lstm_input_drop_expanded: {0}".format(token_lstm_input_drop_expanded)) # Token LSTM layer with tf.variable_scope('token_lstm') as vs: if parameters['gru_neuron']: token_lstm_output = bidirectional_GRU(token_lstm_input_drop_expanded, parameters['token_lstm_hidden_state_dimension'], parameters['token_hidden_layer'], initializer=initializer, sequence_length = self.input_sequence_lengths, output_sequence=True) else: token_lstm_output = bidirectional_LSTM(token_lstm_input_drop_expanded, parameters['token_lstm_hidden_state_dimension'], initializer=initializer, sequence_length = self.input_sequence_lengths, output_sequence=True) self.token_lstm_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) # Needed only if Bidirectional LSTM is used for token level with tf.variable_scope("feedforward_after_lstm") as vs: token_lstm_output_squeezed = tf.reshape(token_lstm_output, [batch_size * sentence_size,2*parameters['token_lstm_hidden_state_dimension']]) W = tf.get_variable( "W", shape=[2 * parameters['token_lstm_hidden_state_dimension'], parameters['token_lstm_hidden_state_dimension']], initializer=initializer) b = tf.Variable(tf.constant(0.0, shape=[parameters['token_lstm_hidden_state_dimension']]), name="bias") outputs = tf.nn.xw_plus_b(token_lstm_output_squeezed, W, b, name="output_before_tanh") outputs = tf.nn.tanh(outputs, name="output_after_tanh") utils_tf.variable_summaries(W) utils_tf.variable_summaries(b) self.token_lstm_variables += tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) with tf.variable_scope("feedforward_before_crf") as vs: W = tf.get_variable( "W", shape=[parameters['token_lstm_hidden_state_dimension'], dataset.number_of_classes], initializer=initializer) b = tf.Variable(tf.constant(0.0, shape=[dataset.number_of_classes]), name="bias") scores = tf.nn.xw_plus_b(outputs, W, b, name="scores") self.unary_scores = tf.reshape(scores, [batch_size, sentence_size, dataset.number_of_classes]) self.predictions = tf.argmax(self.unary_scores, 1, name="predictions") utils_tf.variable_summaries(W) utils_tf.variable_summaries(b) self.feedforward_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) # Add dropout #with tf.variable_scope("dropout"): # self.unary_scores = tf.nn.dropout(self.unary_scores, self.dropout_keep_prob, # name='crf__input_drop') # if self.verbose: print("crf_input_drop: {0}".format(self.unary_scores)) # https://www.tensorflow.org/api_guides/python/contrib.rnn # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) # token_lstm_input_drop_expanded = tf.expand_dims(token_lstm_input_drop, axis=0, name='token_lstm_input_drop_expanded') # CRF layer if parameters['use_crf']: with tf.variable_scope("crf") as vs: # Add start and end tokens #small_score = -1000.0 #large_score = 0.0 #sequence_length = tf.shape(self.unary_scores)[1] #tmp = tf.tile( tf.constant(small_score, shape=[1, 2]) , [sentence_size, 1]) #unary_scores_with_start_and_end = tf.concat([self.unary_scores, tmp], 1) #start_unary_scores = [[small_score] * dataset.number_of_classes + [large_score, small_score]] #end_unary_scores = [[small_score] * dataset.number_of_classes + [small_score, large_score]] #self.unary_scores = tf.concat([start_unary_scores, unary_scores_with_start_and_end, end_unary_scores], 0) #start_index = dataset.number_of_classes #end_index = dataset.number_of_classes + 1 #input_label_indices_flat_with_start_and_end = tf.concat([ tf.constant(start_index, shape=[1]), self.input_label_indices_flat, tf.constant(end_index, shape=[1]) ], 0) # Apply CRF layer #sequence_length = tf.shape(self.unary_scores)[0] #sequence_lengths = tf.expand_dims(sequence_length, axis=0, name='sequence_lengths') #unary_scores_expanded = tf.expand_dims(self.unary_scores, axis=0, name='unary_scores_expanded') #input_label_indices_flat_batch = tf.expand_dims(input_label_indices_flat_with_start_and_end, axis=0, name='input_label_indices_flat_batch') #if self.verbose: print('unary_scores_expanded: {0}'.format(unary_scores_expanded)) #if self.verbose: print('input_label_indices_flat_batch: {0}'.format(input_label_indices_flat_batch)) #if self.verbose: print("sequence_lengths: {0}".format(sequence_lengths)) # https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/crf # Compute the log-likelihood of the gold sequences and keep the transition params for inference at test time. self.transition_parameters=tf.get_variable( "transitions", #shape=[dataset.number_of_classes+2, dataset.number_of_classes+2], shape=[dataset.number_of_classes, dataset.number_of_classes], initializer=initializer) utils_tf.variable_summaries(self.transition_parameters) #log_likelihood, _ = tf.contrib.crf.crf_log_likelihood( # unary_scores_expanded, input_label_indices_flat_batch, sequence_lengths, transition_params=self.transition_parameters) log_likelihood, _ = tf.contrib.crf.crf_log_likelihood( self.unary_scores, self.input_label_indices_flat, self.input_sequence_lengths, transition_params=self.transition_parameters) #regularizer = tf.nn.l2_loss(W) self.loss = tf.reduce_mean(-log_likelihood, name='cross_entropy_mean_loss') self.accuracy = tf.constant(1) self.crf_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) # Do not use CRF layer else: #with tf.variable_scope("crf") as vs: # self.transition_parameters = tf.get_variable( # "transitions", # shape=[dataset.number_of_classes+2, dataset.number_of_classes+2], # initializer=initializer) # utils_tf.variable_summaries(self.transition_parameters) # self.crf_variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=vs.name) # Calculate mean cross-entropy loss with tf.variable_scope("loss"): self.unary_scores = tf.reshape(self.unary_scores, [-1,dataset.number_of_classes]) self.input_label_indices_vector = tf.reshape(self.input_label_indices_vector, [-1,dataset.number_of_classes]) losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.unary_scores, labels=self.input_label_indices_vector, name='softmax') mask = tf.sequence_mask(self.input_sequence_lengths) losses = tf.boolean_mask(losses, mask) self.loss = tf.reduce_mean(losses, name='cross_entropy_mean_loss') with tf.variable_scope("accuracy"): correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_label_indices_vector, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, 'float'), name='accuracy') self. unary_scores = tf.reshape(self.unary_scores, [batch_size,-1, dataset.number_of_classes]) self.input_label_indices_vector = tf.reshape(self.input_label_indices_vector, [batch_size, -1, dataset.number_of_classes]) self.define_training_procedure(parameters) self.summary_op = tf.summary.merge_all()