def __init__(self, opts, test_opts=None): self.opts = opts self.test_opts = test_opts self.loader = Dataset(opts, test_opts) self.batch_size = opts.batch_size self.get_features() self.add_placeholders() self.inputs_dim = self.opts.embedding_dim + self.opts.suffix_dim + self.opts.cap + self.opts.num + self.opts.jk_dim + self.opts.nb_filters self.outputs_dim = (1 + self.opts.bi) * self.opts.units inputs_list = [self.add_word_embedding()] if self.opts.suffix_dim > 0: inputs_list.append(self.add_suffix_embedding()) if self.opts.cap: inputs_list.append(self.add_cap()) if self.opts.num: inputs_list.append(self.add_num()) if self.opts.jk_dim > 0: inputs_list.append(self.add_jackknife_embedding()) if self.opts.chars_dim > 0: inputs_list.append(self.add_char_embedding()) inputs_tensor = tf.concat(inputs_list, 2) ## [seq_len, batch_size, inputs_dim] forward_inputs_tensor = self.add_dropout(inputs_tensor, self.input_keep_prob) self.weight = tf.cast( tf.not_equal( self.inputs_placeholder_dict['words'], tf.zeros(tf.shape(self.inputs_placeholder_dict['words']), tf.int32)), tf.float32) ## [batch_size, seq_len] for i in xrange(self.opts.num_layers): forward_inputs_tensor = self.add_dropout( self.add_lstm(forward_inputs_tensor, i, 'Forward'), self.keep_prob) ## [seq_len, batch_size, units] lstm_outputs = forward_inputs_tensor if self.opts.bi: backward_inputs_tensor = self.add_dropout( tf.reverse(inputs_tensor, [0]), self.input_keep_prob) for i in xrange(self.opts.num_layers): backward_inputs_tensor = self.add_dropout( self.add_lstm(backward_inputs_tensor, i, 'Backward', True), self.keep_prob) ## [seq_len, batch_size, units] backward_inputs_tensor = tf.reverse(backward_inputs_tensor, [0]) lstm_outputs = tf.concat([lstm_outputs, backward_inputs_tensor], 2) ## [seq_len, batch_size, outputs_dim] projected_outputs = tf.map_fn( lambda x: self.add_projection(x), lstm_outputs) #[seq_len, batch_size, nb_tags] projected_outputs = tf.transpose( projected_outputs, perm=[1, 0, 2]) # [batch_size, seq_len, nb_tags] self.loss = self.add_loss_op(projected_outputs) self.train_op = self.add_train_op(self.loss) self.add_accuracy(projected_outputs)
def __init__(self, opts, test_opts=None, beam_size=0): ## Notation: ## b: batch_size ## d: # units ## n: # tokens in the sentence ## B: beam_size self.opts = opts self.opts.bi = 0 ## no bidirecion self.test_opts = test_opts self.loader = Dataset(opts, test_opts) self.batch_size = 100 #self.batch_size = 32 self.beam_size = beam_size self.add_placeholders() self.inputs_dim = self.opts.embedding_dim + self.opts.suffix_dim + self.opts.cap + self.opts.num + self.opts.jk_dim self.outputs_dim = (1 + self.opts.bi) * self.opts.units inputs_list = [self.add_word_embedding()] if self.opts.suffix_dim > 0: inputs_list.append(self.add_suffix_embedding()) if self.opts.cap: inputs_list.append(self.add_cap()) if self.opts.num: inputs_list.append(self.add_num()) if self.opts.jk_dim > 0: inputs_list.append(self.add_jackknife_embedding()) inputs_tensor = tf.concat(inputs_list, 2) ## [seq_len, batch_size, inputs_dim] #forward_inputs_tensor = inputs_tensor forward_inputs_tensor = tf.reverse(inputs_tensor, [0]) forward_inputs_tensor = self.add_dropout(forward_inputs_tensor, self.input_keep_prob) forward_cells = [] forward_hs = [] for i in xrange(self.opts.num_layers): c, h = self.add_lstm(forward_inputs_tensor, i, 'Forward') ## [seq_len, batch_size, units] forward_hs.append(h) h = self.add_dropout( h, self.keep_prob) ## [seq_len, batch_size, units] forward_cells.append(c) forward_inputs_tensor = h # if beam_size > 0: # self.predictions, self.scores, self.back_pointers = self.add_forward_beam_path_mt(forward_inputs_tensor, beam_size) ## [seq_len, batch_size, nb_tags] # self.weight = tf.not_equal(self.inputs_placeholder_list[0], tf.zeros(tf.shape(self.inputs_placeholder_list[0]), tf.int32)) # [self.batch_size, seq_len] # self.weight_beam = tf.reshape(tf.tile(self.weight, [1, beam_size]), [-1, tf.shape(self.weight)[1]]) # [batch_size, seq_len] # else: self.predictions, projected_outputs = self.add_forward_path_mt( forward_cells, forward_hs, forward_inputs_tensor) ## [seq_len, batch_size, nb_tags] self.weight = tf.cast( tf.not_equal( self.inputs_placeholder_list[0], tf.zeros(tf.shape(self.inputs_placeholder_list[0]), tf.int32)), tf.float32) ## [batch_size, seq_len] self.add_lm_accuracy() self.loss = self.add_loss_op(projected_outputs) self.train_op = self.add_train_op(self.loss)
def __init__(self, opts, test_opts=None): self.opts = opts self.test_opts = test_opts self.loader = Dataset(opts, test_opts) self.batch_size = 100 self.add_placeholders() self.inputs_dim = self.opts.embedding_dim + self.opts.jk_dim + self.opts.stag_dim self.outputs_dim = (1 + self.opts.bi) * self.opts.units inputs_list = [self.add_word_embedding()] if self.opts.jk_dim: inputs_list.append(self.add_jackknife_embedding()) if self.opts.stag_dim > 0: inputs_list.append(self.add_stag_embedding()) inputs_tensor = tf.concat(inputs_list, 2) ## [seq_len, batch_size, inputs_dim] forward_inputs_tensor = self.add_dropout(inputs_tensor, self.input_keep_prob) for i in xrange(self.opts.num_layers): forward_inputs_tensor = self.add_dropout( self.add_lstm(forward_inputs_tensor, i, 'Forward'), self.keep_prob) ## [seq_len, batch_size, units] lstm_outputs = forward_inputs_tensor if self.opts.bi: backward_inputs_tensor = self.add_dropout( tf.reverse(inputs_tensor, [0]), self.input_keep_prob) for i in xrange(self.opts.num_layers): backward_inputs_tensor = self.add_dropout( self.add_lstm(backward_inputs_tensor, i, 'Backward'), self.keep_prob) ## [seq_len, batch_size, units] backward_inputs_tensor = tf.reverse(backward_inputs_tensor, [0]) lstm_outputs = tf.concat([lstm_outputs, backward_inputs_tensor], 2) ## [seq_len, batch_size, outputs_dim] self.arc_outputs, rel_outputs, self.rel_scores = self.add_biaffine( lstm_outputs) # projected_outputs = tf.map_fn(lambda x: self.add_projection(x), lstm_outputs) #[seq_len, batch_size, nb_tags] # projected_outputs = tf.transpose(projected_outputs, perm=[1, 0, 2]) # [batch_size, seq_len, nb_tags] inputs_shape = tf.shape(self.inputs_placeholder_dict['words']) self.weight = tf.cast( tf.not_equal(self.inputs_placeholder_dict['words'], tf.zeros(inputs_shape, tf.int32)), tf.float32) * tf.cast( tf.not_equal( self.inputs_placeholder_dict['words'], tf.ones(inputs_shape, tf.int32) * self.loader.word_index['<-root->']), tf.float32) ## [batch_size, seq_len] ## no need to worry about the heads of <-root-> and zero-pads self.loss = self.add_loss_op( self.arc_outputs, self.inputs_placeholder_dict['arcs']) + self.add_loss_op( rel_outputs, self.inputs_placeholder_dict['rels']) self.predicted_arcs, self.UAS = self.add_accuracy( self.arc_outputs, self.inputs_placeholder_dict['arcs']) self.predicted_rels, self.rel_acc = self.add_accuracy( rel_outputs, self.inputs_placeholder_dict['rels']) self.train_op = self.add_train_op(self.loss)
def __init__(self, opts, test_opts=None): self.opts = opts self.test_opts = test_opts self.loader = Dataset(opts, test_opts) self.batch_size = 100 self.get_features() self.add_placeholders() self.inputs_dim = self.opts.embedding_dim + self.opts.jk_dim + self.opts.stag_dim + self.opts.nb_filters self.outputs_dim = (1+self.opts.bi)*self.opts.units inputs_list = [self.add_word_embedding()] if self.opts.jk_dim: inputs_list.append(self.add_jackknife_embedding()) if self.opts.stag_dim > 0: inputs_list.append(self.add_stag_embedding()) if self.opts.chars_dim > 0: inputs_list.append(self.add_char_embedding()) inputs_tensor = tf.concat(inputs_list, 2) ## [seq_len, batch_size, inputs_dim] inputs_tensor = self.add_dropout(inputs_tensor, self.input_keep_prob) inputs_shape = tf.shape(self.inputs_placeholder_dict['words']) ## no need to worry about the heads of <-root-> and zero-pads ## Let's get those non-padding places so we can reinitialize hidden states after each padding in the backward path ### because the backward path starts with zero pads. self.weight = tf.cast(tf.not_equal(self.inputs_placeholder_dict['words'], tf.zeros(inputs_shape, tf.int32)), tf.float32) ## [batch_size, seq_len] for i in xrange(self.opts.num_layers): forward_outputs_tensor = self.add_dropout(self.add_lstm(inputs_tensor, i, 'Forward'), self.keep_prob) ## [seq_len, batch_size, units] if self.opts.bi: backward_outputs_tensor = self.add_dropout(self.add_lstm(tf.reverse(inputs_tensor, [0]), i, 'Backward', True), self.keep_prob) ## [seq_len, batch_size, units] inputs_tensor = tf.concat([forward_outputs_tensor, tf.reverse(backward_outputs_tensor, [0])], 2) else: inputs_tensor = forward_outputs_tensor self.weight = self.weight*tf.cast(tf.not_equal(self.inputs_placeholder_dict['words'], tf.ones(inputs_shape, tf.int32)*self.loader.word_index['<-root->']), tf.float32) ## [batch_size, seq_len] lstm_outputs = inputs_tensor ## [seq_len, batch_size, outputs_dim] self.arc_outputs, rel_outputs, self.rel_scores, joint_output, joint_output_jk = self.add_biaffine(lstm_outputs) # projected_outputs = tf.map_fn(lambda x: self.add_projection(x), lstm_outputs) #[seq_len, batch_size, nb_tags] # projected_outputs = tf.transpose(projected_outputs, perm=[1, 0, 2]) # [batch_size, seq_len, nb_tags] self.loss = self.add_loss_op(self.arc_outputs, self.inputs_placeholder_dict['arcs']) + self.add_loss_op(rel_outputs, self.inputs_placeholder_dict['rels']) + self.add_loss_op(joint_output, self.inputs_placeholder_dict['stags']) + self.add_loss_op(joint_output_jk, self.inputs_placeholder_dict['jk']) self.add_probs(joint_output) self.predicted_arcs, self.UAS = self.add_accuracy(self.arc_outputs, self.inputs_placeholder_dict['arcs']) self.predicted_rels, self.rel_acc = self.add_accuracy(rel_outputs, self.inputs_placeholder_dict['rels']) self.predicted_stags, self.stag_acc = self.add_accuracy(joint_output, self.inputs_placeholder_dict['stags']) self.predicted_jk, self.jk_acc = self.add_accuracy(joint_output_jk, self.inputs_placeholder_dict['jk']) self.train_op = self.add_train_op(self.loss)
def __init__(self, opts, test_opts=None, beam_size=0): ## Notation: ## b: batch_size ## d: # units ## n: # tokens in the sentence ## B: beam_size self.opts = opts self.test_opts = test_opts self.loader = Dataset(opts, test_opts) self.batch_size = opts.batch_size self.beam_size = beam_size print('beam') print(beam_size) self.get_features() self.add_placeholders() self.inputs_dim = self.opts.embedding_dim + self.opts.suffix_dim + self.opts.cap + self.opts.num + self.opts.jk_dim + self.opts.nb_filters self.outputs_dim = (1+self.opts.bi)*self.opts.units inputs_list = [self.add_word_embedding()] if self.opts.suffix_dim > 0: inputs_list.append(self.add_suffix_embedding()) if self.opts.cap: inputs_list.append(self.add_cap()) if self.opts.num: inputs_list.append(self.add_num()) if self.opts.jk_dim > 0: inputs_list.append(self.add_jackknife_embedding()) if self.opts.chars_dim > 0: inputs_list.append(self.add_char_embedding()) inputs_tensor = tf.concat(inputs_list, 2) ## [seq_len, batch_size, inputs_dim] forward_inputs_tensor = self.add_dropout(inputs_tensor, self.input_keep_prob) for i in xrange(self.opts.num_layers): forward_inputs_tensor = self.add_dropout(self.add_lstm(forward_inputs_tensor, i, 'Forward'), self.keep_prob) ## [seq_len, batch_size, units] lstm_outputs = forward_inputs_tensor if self.opts.bi: backward_inputs_tensor = self.add_dropout(tf.reverse(inputs_tensor, [0]), self.input_keep_prob) for i in xrange(self.opts.num_layers): backward_inputs_tensor = self.add_dropout(self.add_lstm(backward_inputs_tensor, i, 'Backward'), self.keep_prob) ## [seq_len, batch_size, units] backward_inputs_tensor = tf.reverse(backward_inputs_tensor, [0]) lstm_outputs = tf.concat([lstm_outputs, backward_inputs_tensor], 2) ## [seq_len, batch_size, outputs_dim] crf_inputs = tf.map_fn(lambda x: self.feed_to_crf(x), lstm_outputs) ## [seq_len, batch_size, outputs_dim] => [seq_len, batch_size, lm] crf_inputs = self.add_dropout(crf_inputs, self.keep_prob) # if beam_size > 0: # self.predictions, self.scores, self.back_pointers = self.add_forward_beam_path(forward_inputs_tensor, backward_inputs_tensor, beam_size) ## [seq_len, batch_size, nb_tags] # self.weight = tf.not_equal(self.inputs_placeholder_dict['words'], tf.zeros(tf.shape(self.inputs_placeholder_dict['words']), tf.int32)) # [self.batch_size, seq_len] # self.weight_beam = tf.reshape(tf.tile(self.weight, [1, beam_size]), [-1, tf.shape(self.weight)[1]]) # [batch_size, seq_len] # else: self.predictions, projected_outputs = self.add_crf_path(crf_inputs) ## [seq_len, batch_size, nb_tags] self.weight = tf.cast(tf.not_equal(self.inputs_placeholder_dict['words'], tf.zeros(tf.shape(self.inputs_placeholder_dict['words']), tf.int32)), tf.float32) ## [batch_size, seq_len] self.add_lm_accuracy() self.loss = self.add_loss_op(projected_outputs) self.train_op = self.add_train_op(self.loss)
def __init__(self, opts, test_opts=None): self.opts = opts self.test_opts = test_opts self.loader = Dataset(opts, test_opts) self.batch_size = opts.batch_size self.get_features() self.add_placeholders() self.inputs_dim = self.opts.embedding_dim + self.opts.suffix_dim + self.opts.cap + self.opts.num + self.opts.jk_dim + self.opts.nb_filters self.outputs_dim = (1 + self.opts.bi) * self.opts.units inputs_list = [self.add_word_embedding()] if self.opts.suffix_dim > 0: inputs_list.append(self.add_suffix_embedding()) if self.opts.cap: inputs_list.append(self.add_cap()) if self.opts.num: inputs_list.append(self.add_num()) if self.opts.jk_dim > 0: inputs_list.append(self.add_jackknife_embedding()) self.adv_embedding = inputs_list[:] if self.opts.chars_dim > 0: inputs_list.append(self.add_char_embedding()) inputs_tensor = tf.concat(inputs_list, 2) ## [seq_len, batch_size, inputs_dim] self.weight = tf.cast( tf.not_equal( self.inputs_placeholder_dict['words'], tf.zeros(tf.shape(self.inputs_placeholder_dict['words']), tf.int32)), tf.float32) ## [batch_size, seq_len] clean_loss, projected_outputs = self.feed_network_inputs(inputs_tensor) self.add_accuracy(projected_outputs) adv_loss, _ = self.add_adversarial_loss( clean_loss) ## do not care about adversarial prediction accuracy alpha = 0.5 #alpha = 1.0 self.loss = alpha * clean_loss + (1.0 - alpha) * adv_loss self.train_op = self.add_train_op(self.loss)
class Parsing_Model(object): def add_placeholders(self): self.inputs_placeholder_dict = {} for feature in self.features: if feature == 'chars': self.inputs_placeholder_dict[feature] = tf.placeholder( tf.int32, shape=[None, None, None]) else: self.inputs_placeholder_dict[feature] = tf.placeholder( tf.int32, shape=[None, None]) self.keep_prob = tf.placeholder(tf.float32) self.input_keep_prob = tf.placeholder(tf.float32) self.hidden_prob = tf.placeholder(tf.float32) self.mlp_prob = tf.placeholder(tf.float32) def add_word_embedding(self): with tf.device('/cpu:0'): with tf.variable_scope('word_embedding') as scope: embedding = tf.get_variable( 'word_embedding_mat', self.loader.word_embeddings.shape, initializer=tf.constant_initializer( self.loader.word_embeddings)) inputs = tf.nn.embedding_lookup( embedding, self.inputs_placeholder_dict['words'] ) ## [batch_size, seq_len, embedding_dim] inputs = tf.transpose( inputs, perm=[1, 0, 2]) # [seq_length, batch_size, embedding_dim] return inputs def add_jackknife_embedding(self): with tf.device('/cpu:0'): with tf.variable_scope('jk_embedding') as scope: embedding = tf.get_variable( 'jk_embedding_mat', [self.loader.nb_jk + 1, self.opts.jk_dim ]) # +1 for padding inputs = tf.nn.embedding_lookup( embedding, self.inputs_placeholder_dict['jk'] ) ## [batch_size, seq_len, embedding_dim] inputs = tf.transpose( inputs, perm=[1, 0, 2]) # [seq_length, batch_size, embedding_dim] return inputs def add_stag_embedding(self): with tf.device('/cpu:0'): with tf.variable_scope('stag_embedding') as scope: embedding = tf.get_variable( 'stag_embedding_mat', [self.loader.nb_stags + 1, self.opts.stag_dim ]) # +1 for padding inputs = tf.nn.embedding_lookup( embedding, self.inputs_placeholder_dict['stags'] ) ## [batch_size, seq_len, embedding_dim] inputs = tf.transpose( inputs, perm=[1, 0, 2]) # [seq_length, batch_size, embedding_dim] self.stag_embeddings = embedding return inputs def add_char_embedding(self): with tf.device('/cpu:0'): with tf.variable_scope('char_embedding') as scope: embedding = tf.get_variable( 'char_embedding_mat', [self.loader.nb_chars + 1, self.opts.chars_dim ]) # +1 for padding inputs = tf.nn.embedding_lookup( embedding, self.inputs_placeholder_dict['chars'] ) ## [batch_size, seq_len-1, word_len, embedding_dim] ## -1 because we don't have ROOT inputs = tf.transpose(inputs, perm=[1, 0, 2, 3]) ## [seq_len-1, batch_size, word_len, embedding_dim] inputs = self.add_dropout(inputs, self.input_keep_prob) weights = get_char_weights(self.opts, 'char_encoding') inputs = encode_char( inputs, weights) ## [seq_len-1, batch_size, nb_filters] shape = tf.shape(inputs) ## add 0 vectors for <-root-> inputs = tf.concat([tf.zeros([1, shape[1], shape[2]]), inputs], 0) return inputs def add_lstm(self, inputs, i, name, backward=False): prev_init = tf.zeros([2, tf.shape(inputs)[1], self.opts.units]) # [2, batch_size, num_units] #prev_init = tf.zeros([2, 100, self.opts.units]) # [2, batch_size, num_units] if i == 0: inputs_dim = self.inputs_dim else: inputs_dim = self.opts.units * 2 ## concat after each layer weights = get_lstm_weights('{}_LSTM_layer{}'.format(name, i), inputs_dim, self.opts.units, tf.shape(inputs)[1], self.hidden_prob) if backward: ## backward: reset states after zero paddings non_paddings = tf.transpose( self.weight, [1, 0]) ## [batch_size, seq_len] => [seq_len, batch_size] non_paddings = tf.reverse(non_paddings, [0]) cell_hidden = tf.scan( lambda prev, x: lstm(prev, x, weights, backward=backward), [inputs, non_paddings], prev_init) else: cell_hidden = tf.scan(lambda prev, x: lstm(prev, x, weights), inputs, prev_init) #cell_hidden [seq_len, 2, batch_size, units] h = tf.unstack(cell_hidden, 2, axis=1)[1] #[seq_len, batch_size, units] return h def add_dropout(self, inputs, keep_prob): ## inputs [seq_len, batch_size, inputs_dims/units] dummy_dp = tf.ones(tf.shape(inputs)[1:]) dummy_dp = tf.nn.dropout(dummy_dp, keep_prob) return tf.map_fn(lambda x: dummy_dp * x, inputs) def add_projection(self, inputs): with tf.variable_scope('Projection') as scope: proj_U = tf.get_variable('weight', [self.outputs_dim, self.loader.nb_tags]) proj_b = tf.get_variable('bias', [self.loader.nb_tags]) outputs = tf.matmul(inputs, proj_U) + proj_b return outputs def add_loss_op(self, output, gold): cross_entropy = sequence_loss(output, gold, self.weight) loss = tf.reduce_sum(cross_entropy) return loss def add_accuracy(self, output, gold): predictions = tf.cast(tf.argmax(output, 2), tf.int32) ## [batch_size, seq_len] correct_predictions = self.weight * tf.cast( tf.equal(predictions, gold), tf.float32) accuracy = tf.reduce_sum(tf.cast(correct_predictions, tf.float32)) / tf.reduce_sum( tf.cast(self.weight, tf.float32)) return predictions, accuracy def add_train_op(self, loss): optimizer = tf.train.AdamOptimizer() train_op = optimizer.minimize(loss) return train_op def get_features(self): self.features = ['words', 'arcs', 'rels'] if self.opts.jk_dim > 0: self.features.append('jk') if self.opts.stag_dim > 0: self.features.append('stags') if self.opts.chars_dim > 0: self.features.append('chars') def add_biaffine(self, inputs): ## inputs [seq_len, batch_size, units] ## first define four different MLPs arc_roles = ['arc-dep', 'arc-head'] rel_roles = ['rel-dep', 'rel-head'] vectors = {} for arc_role in arc_roles: for i in xrange(self.opts.mlp_num_layers): if i == 0: inputs_dim = self.outputs_dim vector_mlp = inputs else: inputs_dim = self.opts.arc_mlp_units weights = get_mlp_weights('{}_MLP_Layer{}'.format(arc_role, i), inputs_dim, self.opts.arc_mlp_units) vector_mlp = self.add_dropout( tf.map_fn(lambda x: mlp(x, weights), vector_mlp), self.mlp_prob) ## [seq_len, batch_size, 2*mlp_units] vectors[arc_role] = vector_mlp weights = get_arc_weights('arc', self.opts.arc_mlp_units) arc_output = arc_equation( vectors['arc-head'], vectors['arc-dep'], weights ) # [batch_size, seq_len, seq_len] dim 1: deps, dim 2: heads # arc_predictions = get_arcs(arc_output, self.test_opts) # [batch_size, seq_len] arc_predictions = tf.argmax(arc_output, 2) # [batch_size, seq_len] for rel_role in rel_roles: for i in xrange(self.opts.mlp_num_layers): if i == 0: inputs_dim = self.outputs_dim vector_mlp = inputs else: inputs_dim = self.opts.rel_mlp_units weights = get_mlp_weights('{}_MLP_Layer{}'.format(rel_role, i), inputs_dim, self.opts.rel_mlp_units) vector_mlp = self.add_dropout( tf.map_fn(lambda x: mlp(x, weights), vector_mlp), self.mlp_prob) ## [seq_len, batch_size, 2*mlp_units] vectors[rel_role] = vector_mlp weights = get_rel_weights('rel', self.opts.rel_mlp_units, self.loader.nb_rels) rel_output, rel_scores = rel_equation( vectors['rel-head'], vectors['rel-dep'], weights, arc_predictions) #[batch_size, seq_len, nb_rels] return arc_output, rel_output, rel_scores def __init__(self, opts, test_opts=None): self.opts = opts self.test_opts = test_opts self.loader = Dataset(opts, test_opts) self.batch_size = 100 self.get_features() self.add_placeholders() self.inputs_dim = self.opts.embedding_dim + self.opts.jk_dim + self.opts.stag_dim + self.opts.nb_filters self.outputs_dim = (1 + self.opts.bi) * self.opts.units inputs_list = [self.add_word_embedding()] if self.opts.jk_dim: inputs_list.append(self.add_jackknife_embedding()) if self.opts.stag_dim > 0: inputs_list.append(self.add_stag_embedding()) if self.opts.chars_dim > 0: inputs_list.append(self.add_char_embedding()) inputs_tensor = tf.concat(inputs_list, 2) ## [seq_len, batch_size, inputs_dim] inputs_tensor = self.add_dropout(inputs_tensor, self.input_keep_prob) inputs_shape = tf.shape(self.inputs_placeholder_dict['words']) ## no need to worry about the heads of <-root-> and zero-pads ## Let's get those non-padding places so we can reinitialize hidden states after each padding in the backward path ### because the backward path starts with zero pads. self.weight = tf.cast( tf.not_equal(self.inputs_placeholder_dict['words'], tf.zeros(inputs_shape, tf.int32)), tf.float32) ## [batch_size, seq_len] for i in xrange(self.opts.num_layers): forward_outputs_tensor = self.add_dropout( self.add_lstm(inputs_tensor, i, 'Forward'), self.keep_prob) ## [seq_len, batch_size, units] if self.opts.bi: backward_outputs_tensor = self.add_dropout( self.add_lstm(tf.reverse(inputs_tensor, [0]), i, 'Backward', True), self.keep_prob) ## [seq_len, batch_size, units] inputs_tensor = tf.concat([ forward_outputs_tensor, tf.reverse(backward_outputs_tensor, [0]) ], 2) else: inputs_tensor = forward_outputs_tensor self.weight = self.weight * tf.cast( tf.not_equal( self.inputs_placeholder_dict['words'], tf.ones(inputs_shape, tf.int32) * self.loader.word_index['<-root->']), tf.float32) ## [batch_size, seq_len] lstm_outputs = inputs_tensor ## [seq_len, batch_size, outputs_dim] self.arc_outputs, rel_outputs, self.rel_scores = self.add_biaffine( lstm_outputs) # projected_outputs = tf.map_fn(lambda x: self.add_projection(x), lstm_outputs) #[seq_len, batch_size, nb_tags] # projected_outputs = tf.transpose(projected_outputs, perm=[1, 0, 2]) # [batch_size, seq_len, nb_tags] self.loss = self.add_loss_op( self.arc_outputs, self.inputs_placeholder_dict['arcs']) + self.add_loss_op( rel_outputs, self.inputs_placeholder_dict['rels']) self.predicted_arcs, self.UAS = self.add_accuracy( self.arc_outputs, self.inputs_placeholder_dict['arcs']) self.predicted_rels, self.rel_acc = self.add_accuracy( rel_outputs, self.inputs_placeholder_dict['rels']) self.train_op = self.add_train_op(self.loss) def run_batch(self, session, testmode=False): if not testmode: feed = {} for feat in self.inputs_placeholder_dict.keys(): feed[self.inputs_placeholder_dict[ feat]] = self.loader.inputs_train_batch[feat] feed[self.keep_prob] = self.opts.dropout_p feed[self.hidden_prob] = self.opts.hidden_p feed[self.input_keep_prob] = self.opts.input_dp feed[self.mlp_prob] = self.opts.mlp_prob train_op = self.train_op _, loss, UAS, rel_acc = session.run( [train_op, self.loss, self.UAS, self.rel_acc], feed_dict=feed) return loss, UAS, rel_acc else: feed = {} predictions_batch = {} for feat in self.inputs_placeholder_dict.keys(): feed[self.inputs_placeholder_dict[ feat]] = self.loader.inputs_test_batch[feat] feed[self.keep_prob] = 1.0 feed[self.hidden_prob] = 1.0 feed[self.input_keep_prob] = 1.0 feed[self.mlp_prob] = 1.0 # loss, accuracy, predictions, weight = session.run([self.loss, self.accuracy, self.predictions, self.weight], feed_dict=feed) loss, predicted_arcs, predicted_rels, UAS, weight, arc_outputs, rel_scores = session.run( [ self.loss, self.predicted_arcs, self.predicted_rels, self.UAS, self.weight, self.arc_outputs, self.rel_scores ], feed_dict=feed) weight = weight.astype(bool) predicted_arcs_greedy = predicted_arcs[weight] predicted_rels_greedy = predicted_rels[weight] predictions_batch['arcs_greedy'] = predicted_arcs_greedy predictions_batch['rels_greedy'] = predicted_rels_greedy non_padding = weight.astype(bool) non_padding[:, 0] = True ## take the dummy root nodes predicted_arcs, predicted_rels = predict_arcs_rels( arc_outputs, rel_scores, non_padding) predictions_batch['arcs'] = predicted_arcs predictions_batch['rels'] = predicted_rels # print(predicted_greedy_arcs.shape) # print(predicted_arcs.shape) #print(arc_outputs.shape) return loss, predictions_batch, UAS def run_epoch(self, session, testmode=False): if not testmode: epoch_start_time = time.time() next_batch = self.loader.next_batch epoch_incomplete = next_batch(self.batch_size) while epoch_incomplete: loss, UAS, rel_acc = self.run_batch(session) print('{}/{}, loss {:.4f}, Raw UAS {:.4f}, Rel Acc {:.4f}'. format(self.loader._index_in_epoch, self.loader.nb_train_samples, loss, UAS, rel_acc), end='\r') epoch_incomplete = next_batch(self.batch_size) print('\nEpoch Training Time {}'.format(time.time() - epoch_start_time)) return loss, UAS else: next_test_batch = self.loader.next_test_batch test_incomplete = next_test_batch(self.batch_size) output_types = ['arcs', 'rels', 'arcs_greedy', 'rels_greedy'] predictions = {output_type: [] for output_type in output_types} while test_incomplete: loss, predictions_batch, UAS = self.run_batch(session, True) for name, pred in predictions_batch.items(): predictions[name].append(pred) #print('Testmode {}/{}, loss {}, accuracy {}'.format(self.loader._index_in_test, self.loader.nb_validation_samples, loss, accuracy), end = '\r') print('Test mode {}/{}, Raw UAS {:.4f}'.format( self.loader._index_in_test, self.loader.nb_validation_samples, UAS), end='\r') #, end = '\r') test_incomplete = next_test_batch(self.batch_size) for name, pred in predictions.items(): predictions[name] = np.hstack(pred) if self.test_opts is not None: self.loader.output_arcs(predictions['arcs'], self.test_opts.predicted_arcs_file) self.loader.output_rels(predictions['rels'], self.test_opts.predicted_rels_file) self.loader.output_arcs( predictions['arcs_greedy'], self.test_opts.predicted_arcs_file_greedy) self.loader.output_rels( predictions['rels_greedy'], self.test_opts.predicted_rels_file_greedy) scores = self.loader.get_scores(predictions, self.opts, self.test_opts) if self.test_opts.get_weight: stag_embeddings = session.run(self.stag_embeddings) self.loader.output_weight(stag_embeddings) #scores['UAS'] = np.mean(predictions['arcs'][self.loader.punc] == self.loader.gold_arcs[self.loader.punc]) #scores['UAS_greedy'] = np.mean(predictions['arcs_greedy'][self.loader.punc] == self.loader.gold_arcs[self.loader.punc]) return scores
class Stagging_Model_Concat_Adv(object): def add_placeholders(self): #self.inputs_placeholder_list = [tf.placeholder(tf.int32, shape = [None, None]) for _ in xrange(2+self.opts.suffix+self.opts.num+self.opts.cap+self.opts.jackknife)] # 2 for text_sequences and tag_sequences, necessary no matter what #self.inputs_placeholder_list = [tf.placeholder(tf.int32, shape = [None, None]) for _ in xrange(6)] # 2 for text_sequences and tag_sequences, necessary no matter what self.inputs_placeholder_dict = {} for feature in self.features: if feature == 'chars': self.inputs_placeholder_dict[feature] = tf.placeholder( tf.int32, shape=[None, None, None]) else: self.inputs_placeholder_dict[feature] = tf.placeholder( tf.int32, shape=[None, None]) self.keep_prob = tf.placeholder(tf.float32) self.input_keep_prob = tf.placeholder(tf.float32) self.hidden_prob = tf.placeholder(tf.float32) def add_word_embedding(self): with tf.device('/cpu:0'): with tf.variable_scope('word_embedding') as scope: embedding = tf.get_variable( 'word_embedding_mat', self.loader.word_embeddings.shape, initializer=tf.constant_initializer( self.loader.word_embeddings)) embedding = self.normalize_embedding(embedding, self.loader.word_freqs) inputs = tf.nn.embedding_lookup( embedding, self.inputs_placeholder_dict['words'] ) ## [batch_size, seq_len, embedding_dim] inputs = tf.transpose( inputs, perm=[1, 0, 2]) # [seq_length, batch_size, embedding_dim] return inputs def add_suffix_embedding(self): with tf.device('/cpu:0'): with tf.variable_scope('suffix_embedding') as scope: embedding = tf.get_variable( 'suffix_embedding_mat', [self.loader.nb_suffixes + 1, self.opts.suffix_dim ]) # +1 for padding embedding = self.normalize_embedding(embedding, self.loader.suffix_freqs) inputs = tf.nn.embedding_lookup( embedding, self.inputs_placeholder_dict['suffix'] ) ## [batch_size, seq_len, embedding_dim] inputs = tf.transpose( inputs, perm=[1, 0, 2]) # [seq_length, batch_size, embedding_dim] return inputs def add_cap(self): inputs = tf.cast( tf.expand_dims(self.inputs_placeholder_dict['cap'], -1), tf.float32) inputs = tf.transpose(inputs, perm=[1, 0, 2]) # [seq_length, batch_size, 1] return inputs # [seq_length, batch_size, 1] def add_num(self): inputs = tf.cast( tf.expand_dims(self.inputs_placeholder_dict['num'], -1), tf.float32) inputs = tf.transpose(inputs, perm=[1, 0, 2]) # [seq_length, batch_size, 1] return inputs # [seq_length, batch_size, 1] def add_char_embedding(self): with tf.device('/cpu:0'): with tf.variable_scope('char_embedding') as scope: embedding = tf.get_variable( 'char_embedding_mat', [self.loader.nb_chars + 1, self.opts.chars_dim ]) # +1 for padding embedding = self.normalize_embedding(embedding, self.loader.char_freqs) inputs = tf.nn.embedding_lookup( embedding, self.inputs_placeholder_dict['chars'] ) ## [batch_size, seq_len, word_len, embedding_dim] inputs = tf.transpose(inputs, perm=[1, 0, 2, 3]) ## [seq_len, batch_size, word_len, embedding_dim] self.adv_embedding.append(inputs) inputs = self.add_dropout(inputs, self.input_keep_prob) weights = get_char_weights(self.opts, 'char_encoding') self.char_weights = weights ## for adversarial inputs = encode_char(inputs, weights) ## [seq_len, batch_size, nb_filters] return inputs def add_jackknife_embedding(self): with tf.device('/cpu:0'): with tf.variable_scope('jk_embedding') as scope: embedding = tf.get_variable( 'jk_embedding_mat', [self.loader.nb_jk + 1, self.opts.jk_dim ]) # +1 for padding embedding = self.normalize_embedding(embedding, self.loader.jk_freqs) inputs = tf.nn.embedding_lookup( embedding, self.inputs_placeholder_dict['jk'] ) ## [batch_size, seq_len, embedding_dim] inputs = tf.transpose( inputs, perm=[1, 0, 2]) # [seq_length, batch_size, embedding_dim] return inputs def add_lstm(self, inputs, i, name, backward=False, adv=False): prev_init = tf.zeros([2, tf.shape(inputs)[1], self.opts.units]) # [2, batch_size, num_units] #prev_init = tf.zeros([2, 100, self.opts.units]) # [2, batch_size, num_units] if i == 0: inputs_dim = self.inputs_dim else: inputs_dim = self.opts.units * 2 ## concat after each layer weights = get_lstm_weights('{}_LSTM_layer{}'.format(name, i), inputs_dim, self.opts.units, tf.shape(inputs)[1], self.hidden_prob, reuse=adv) if backward: ## backward: reset states after zero paddings non_paddings = tf.transpose( self.weight, [1, 0]) ## [batch_size, seq_len] => [seq_len, batch_size] non_paddings = tf.reverse(non_paddings, [0]) cell_hidden = tf.scan( lambda prev, x: lstm(prev, x, weights, backward=backward), [inputs, non_paddings], prev_init) else: cell_hidden = tf.scan(lambda prev, x: lstm(prev, x, weights), inputs, prev_init) #cell_hidden [seq_len, 2, batch_size, units] h = tf.unstack(cell_hidden, 2, axis=1)[1] #[seq_len, batch_size, units] return h def add_dropout(self, inputs, keep_prob): ## inputs [seq_len, batch_size, inputs_dims/units] dummy_dp = tf.ones(tf.shape(inputs)[1:]) dummy_dp = tf.nn.dropout(dummy_dp, keep_prob) return tf.map_fn(lambda x: dummy_dp * x, inputs) def add_projection(self, inputs, reuse=False, name=None): if name is None: name = 'Projection' with tf.variable_scope(name) as scope: if reuse: scope.reuse_variables() proj_U = tf.get_variable('weight', [self.outputs_dim, self.loader.nb_tags]) proj_b = tf.get_variable('bias', [self.loader.nb_tags]) outputs = tf.matmul(inputs, proj_U) + proj_b return outputs def add_loss_op(self, output): cross_entropy = sequence_loss(output, self.inputs_placeholder_dict['tags'], self.weight) tf.add_to_collection('total loss', cross_entropy) loss = tf.add_n(tf.get_collection('total loss')) return loss def add_accuracy(self, output): self.predictions = tf.cast(tf.argmax(output, 2), tf.int32) ## [batch_size, seq_len] correct_predictions = self.weight * tf.cast( tf.equal(self.predictions, self.inputs_placeholder_dict['tags']), tf.float32) self.accuracy = tf.reduce_sum(tf.cast( correct_predictions, tf.float32)) / tf.reduce_sum( tf.cast(self.weight, tf.float32)) def add_train_op(self, loss): optimizer = tf.train.AdamOptimizer() #optimizer = tf.train.MomentumOptimizer(0.01, 0.9) train_op = optimizer.minimize(loss) return train_op def normalize_embedding(self, embedding, freqs): ## embedding [nb_words, dim] ## e_embedding = tf.reduce_sum(embedding * tf.expand_dims(freqs, 1), 0, keep_dims=True) v_embedding = tf.reduce_sum( (embedding - e_embedding)**2 * tf.expand_dims(freqs, 1), 0, keep_dims=True) embedding = (embedding - e_embedding) / tf.sqrt(v_embedding) return embedding def add_adversarial_loss(self, loss): ## self.adv_embedding. The last element is char embedddings, which need computation ## run char computation gradients = tf.gradients(loss, self.adv_embedding) new_embeddings = [] for i, embedding in enumerate(self.adv_embedding): normalized_gradient = tf.stop_gradient( gradients[i] / tf.norm(gradients[i], axis=-1, keep_dims=True)) ## do not take second-order gradient epsilon = 0.001 * tf.sqrt( tf.cast(tf.shape(embedding)[-1], tf.float32)) new_embedding = embedding + epsilon * normalized_gradient if i == len(self.adv_embedding) - 1: ## char computation if self.opts.chars_dim > 0: new_embedding = self.add_dropout(new_embedding, self.input_keep_prob) new_embedding = encode_char( new_embedding, self.char_weights ) ## [seq_len, batch_size, nb_filters] new_embeddings.append(new_embedding) inputs_tensor = tf.concat(new_embeddings, 2) ## [seq_len, batch_size, inputs_dim] adv_loss, projected_outputs = self.feed_network_inputs(inputs_tensor, adv=True) return adv_loss, projected_outputs def get_features(self): self.features = ['words', 'tags'] if self.opts.suffix_dim > 0: self.features.append('suffix') if self.opts.cap: self.features.append('cap') if self.opts.num: self.features.append('num') if self.opts.jk_dim > 0: self.features.append('jk') if self.opts.chars_dim > 0: self.features.append('chars') def feed_network_inputs(self, inputs_tensor, adv=False): inputs_tensor = self.add_dropout(inputs_tensor, self.input_keep_prob) for i in xrange(self.opts.num_layers): forward_outputs_tensor = self.add_dropout( self.add_lstm(inputs_tensor, i, 'Forward', adv=adv), self.keep_prob) ## [seq_len, batch_size, units] if self.opts.bi: backward_outputs_tensor = self.add_dropout( self.add_lstm(tf.reverse(inputs_tensor, [0]), i, 'Backward', True, adv=adv), self.keep_prob) ## [seq_len, batch_size, units] inputs_tensor = tf.concat([ forward_outputs_tensor, tf.reverse(backward_outputs_tensor, [0]) ], 2) else: inputs_tensor = forward_outputs_tensor lstm_outputs = inputs_tensor ## [seq_len, batch_size, outputs_dim] projected_outputs = tf.map_fn( lambda x: self.add_projection(x, reuse=adv), lstm_outputs) #[seq_len, batch_size, nb_tags] projected_outputs = tf.transpose( projected_outputs, perm=[1, 0, 2]) # [batch_size, seq_len, nb_tags] loss = self.add_loss_op(projected_outputs) return loss, projected_outputs def __init__(self, opts, test_opts=None): self.opts = opts self.test_opts = test_opts self.loader = Dataset(opts, test_opts) self.batch_size = opts.batch_size self.get_features() self.add_placeholders() self.inputs_dim = self.opts.embedding_dim + self.opts.suffix_dim + self.opts.cap + self.opts.num + self.opts.jk_dim + self.opts.nb_filters self.outputs_dim = (1 + self.opts.bi) * self.opts.units inputs_list = [self.add_word_embedding()] if self.opts.suffix_dim > 0: inputs_list.append(self.add_suffix_embedding()) if self.opts.cap: inputs_list.append(self.add_cap()) if self.opts.num: inputs_list.append(self.add_num()) if self.opts.jk_dim > 0: inputs_list.append(self.add_jackknife_embedding()) self.adv_embedding = inputs_list[:] if self.opts.chars_dim > 0: inputs_list.append(self.add_char_embedding()) inputs_tensor = tf.concat(inputs_list, 2) ## [seq_len, batch_size, inputs_dim] self.weight = tf.cast( tf.not_equal( self.inputs_placeholder_dict['words'], tf.zeros(tf.shape(self.inputs_placeholder_dict['words']), tf.int32)), tf.float32) ## [batch_size, seq_len] clean_loss, projected_outputs = self.feed_network_inputs(inputs_tensor) self.add_accuracy(projected_outputs) adv_loss, _ = self.add_adversarial_loss( clean_loss) ## do not care about adversarial prediction accuracy alpha = 0.5 #alpha = 1.0 self.loss = alpha * clean_loss + (1.0 - alpha) * adv_loss self.train_op = self.add_train_op(self.loss) def run_batch(self, session, testmode=False): if not testmode: feed = {} #for placeholder, data in zip(self.inputs_placeholder_list, self.loader.inputs_train_batch): # feed[placeholder] = data for feat in self.inputs_placeholder_dict.keys(): feed[self.inputs_placeholder_dict[ feat]] = self.loader.inputs_train_batch[feat] feed[self.keep_prob] = self.opts.dropout_p feed[self.hidden_prob] = self.opts.hidden_p feed[self.input_keep_prob] = self.opts.input_dp train_op = self.train_op _, loss, accuracy = session.run( [train_op, self.loss, self.accuracy], feed_dict=feed) return loss, accuracy else: feed = {} for feat in self.inputs_placeholder_dict.keys(): feed[self.inputs_placeholder_dict[ feat]] = self.loader.inputs_test_batch[feat] feed[self.keep_prob] = 1.0 feed[self.hidden_prob] = 1.0 feed[self.input_keep_prob] = 1.0 loss, accuracy, predictions, weight = session.run( [self.loss, self.accuracy, self.predictions, self.weight], feed_dict=feed) weight = weight.astype(bool) predictions = predictions[weight] return loss, accuracy, predictions def run_epoch(self, session, testmode=False): if not testmode: epoch_start_time = time.time() next_batch = self.loader.next_batch epoch_incomplete = next_batch(self.batch_size) while epoch_incomplete: loss, accuracy = self.run_batch(session) print('{}/{}, loss {:.4f}, accuracy {:.4f}'.format( self.loader._index_in_epoch, self.loader.nb_train_samples, loss, accuracy), end='\r') epoch_incomplete = next_batch(self.batch_size) print('\nEpoch Training Time {}'.format(time.time() - epoch_start_time)) return loss, accuracy else: next_test_batch = self.loader.next_test_batch test_incomplete = next_test_batch(self.batch_size) predictions = [] while test_incomplete: loss, accuracy, predictions_batch = self.run_batch( session, True) predictions.append(predictions_batch) #print('Testmode {}/{}, loss {}, accuracy {}'.format(self.loader._index_in_test, self.loader.nb_validation_samples, loss, accuracy), end = '\r') print('Test mode {}/{}'.format( self.loader._index_in_test, self.loader.nb_validation_samples), end='\r') test_incomplete = next_test_batch(self.batch_size) predictions = np.hstack(predictions) if self.test_opts is not None: self.loader.output_stags(predictions, self.test_opts.save_tags) accuracy = np.mean(predictions == self.loader.test_gold) return accuracy
def __init__(self, opts, test_opts=None, beam_size=16): ## Notation: ## b: batch_size ## d: # units ## n: # tokens in the sentence ## B: beam_size self.opts = opts self.test_opts = test_opts self.loader = Dataset(opts, test_opts) self.batch_size = 32 self.beam_size = beam_size self.add_placeholders() self.inputs_dim = self.opts.embedding_dim + self.opts.suffix_dim + self.opts.cap + self.opts.num + self.opts.jk_dim self.outputs_dim = (1 + self.opts.bi) * self.opts.units inputs_list = [self.add_word_embedding()] if self.opts.suffix_dim > 0: inputs_list.append(self.add_suffix_embedding()) if self.opts.cap: inputs_list.append(self.add_cap()) if self.opts.num: inputs_list.append(self.add_num()) if self.opts.jk_dim > 0: inputs_list.append(self.add_jackknife_embedding()) inputs_tensor = tf.concat(inputs_list, 2) ## [seq_len, batch_size, inputs_dim] forward_inputs_tensor = inputs_tensor ## Backward path is deterministic, just run it first and make it embeddings if self.opts.bi: backward_inputs_tensor = self.add_dropout( tf.reverse(inputs_tensor, [0]), self.input_keep_prob) for i in xrange(self.opts.num_layers): backward_inputs_tensor = self.add_dropout( self.add_lstm(backward_inputs_tensor, i, 'Backward'), self.keep_prob) ## [seq_len, batch_size, units] backward_inputs_tensor = tf.reverse( backward_inputs_tensor, [0]) ## [seq_len, batch_size, units] ## backward path is done forward_inputs_tensor = self.add_dropout(forward_inputs_tensor, self.input_keep_prob) if beam_size > 0: self.weight = tf.cast( tf.not_equal( self.inputs_placeholder_list[0], tf.zeros(tf.shape(self.inputs_placeholder_list[0]), tf.int32)), tf.float32) # [self.batch_size, seq_len] self.predictions, self.scores, self.back_pointers = self.add_forward_beam_path( forward_inputs_tensor, backward_inputs_tensor, beam_size) ## [seq_len, batch_size, nb_tags] self.weight_beam = tf.reshape( tf.tile(self.weight, [1, beam_size]), [-1, tf.shape(self.weight)[1]]) # [batch_size, seq_len] self.accuracy = self.loss ## for dummy #_, projected_outputs = self.add_forward_path(forward_inputs_tensor, backward_inputs_tensor, True) ## [seq_len, batch_size, nb_tags] #self.loss += self.add_loss_op(projected_outputs) self.train_op = self.add_train_op(self.loss) else: self.predictions, projected_outputs = self.add_forward_path( forward_inputs_tensor, backward_inputs_tensor) ## [seq_len, batch_size, nb_tags] self.weight = tf.cast( tf.not_equal( self.inputs_placeholder_list[0], tf.zeros(tf.shape(self.inputs_placeholder_list[0]), tf.int32)), tf.float32) ## [batch_size, seq_len] self.add_lm_accuracy() self.loss = self.add_loss_op(projected_outputs) self.train_op = self.add_train_op(self.loss)
class Stagging_Model_Forward_Chain(object): def add_stag_embedding_mat(self): with tf.variable_scope('stag_embedding') as scope: self.stag_embedding_mat = tf.get_variable('stag_embedding_mat', [self.loader.nb_tags+1, self.opts.lm]) # +1 for padding def add_stag_dropout_mat(self, batch_size): self.stag_dropout_mat = tf.ones([batch_size, self.opts.lm]) self.stag_dropout_mat = tf.nn.dropout(self.stag_dropout_mat, self.input_keep_prob) def add_placeholders(self): #self.inputs_placeholder_list = [tf.placeholder(tf.int32, shape = [None, None]) for _ in xrange(2+self.opts.suffix+self.opts.num+self.opts.cap+self.opts.jackknife)] # 2 for text_sequences and tag_sequences, necessary no matter what #self.inputs_placeholder_list = [tf.placeholder(tf.int32, shape = [None, None]) for _ in xrange(6)] # 2 for text_sequences and tag_sequences, necessary no matter what self.inputs_placeholder_dict = {} for feature in self.features: if feature == 'chars': self.inputs_placeholder_dict[feature] = tf.placeholder(tf.int32, shape = [None, None, None]) else: self.inputs_placeholder_dict[feature] = tf.placeholder(tf.int32, shape = [None, None]) self.keep_prob = tf.placeholder(tf.float32) self.input_keep_prob = tf.placeholder(tf.float32) self.hidden_prob = tf.placeholder(tf.float32) def add_word_embedding(self): with tf.device('/cpu:0'): with tf.variable_scope('word_embedding') as scope: embedding = tf.get_variable('word_embedding_mat', self.loader.word_embeddings.shape, initializer=tf.constant_initializer(self.loader.word_embeddings)) inputs = tf.nn.embedding_lookup(embedding, self.inputs_placeholder_dict['words']) ## [batch_size, seq_len, embedding_dim] inputs = tf.transpose(inputs, perm=[1, 0, 2]) # [seq_length, batch_size, embedding_dim] return inputs def add_suffix_embedding(self): with tf.device('/cpu:0'): with tf.variable_scope('suffix_embedding') as scope: embedding = tf.get_variable('suffix_embedding_mat', [self.loader.nb_suffixes+1, self.opts.suffix_dim]) # +1 for padding inputs = tf.nn.embedding_lookup(embedding, self.inputs_placeholder_dict['suffix']) ## [batch_size, seq_len, embedding_dim] inputs = tf.transpose(inputs, perm=[1, 0, 2]) # [seq_length, batch_size, embedding_dim] return inputs def add_cap(self): inputs = tf.cast(tf.expand_dims(self.inputs_placeholder_dict['cap'], -1), tf.float32) inputs = tf.transpose(inputs, perm=[1, 0, 2]) # [seq_length, batch_size, 1] return inputs # [seq_length, batch_size, 1] def add_num(self): inputs = tf.cast(tf.expand_dims(self.inputs_placeholder_dict['num'], -1), tf.float32) inputs = tf.transpose(inputs, perm=[1, 0, 2]) # [seq_length, batch_size, 1] return inputs # [seq_length, batch_size, 1] def add_char_embedding(self): with tf.device('/cpu:0'): with tf.variable_scope('char_embedding') as scope: embedding = tf.get_variable('char_embedding_mat', [self.loader.nb_chars+1, self.opts.chars_dim]) # +1 for padding inputs = tf.nn.embedding_lookup(embedding, self.inputs_placeholder_dict['chars']) ## [batch_size, seq_len, word_len, embedding_dim] inputs = tf.transpose(inputs, perm=[1, 0, 2, 3]) ## [seq_len, batch_size, word_len, embedding_dim] inputs = self.add_dropout(inputs, self.input_keep_prob) weights = get_char_weights(self.opts, 'char_encoding') inputs = encode_char(inputs, weights) ## [seq_len, batch_size, nb_filters] return inputs def add_jackknife_embedding(self): with tf.device('/cpu:0'): with tf.variable_scope('jk_embedding') as scope: embedding = tf.get_variable('jk_embedding_mat', [self.loader.nb_jk+1, self.opts.jk_dim]) # +1 for padding inputs = tf.nn.embedding_lookup(embedding, self.inputs_placeholder_dict['jk']) ## [batch_size, seq_len, embedding_dim] inputs = tf.transpose(inputs, perm=[1, 0, 2]) # [seq_length, batch_size, embedding_dim] return inputs def add_stag_embedding(self, stags=None): # if None, use gold stags with tf.device('/cpu:0'): if stags is None: stags = self.inputs_placeholder_dict['tags'] inputs = tf.nn.embedding_lookup(self.stag_embedding_mat, stags) ## [batch_size, stag_dims] return inputs def add_predictions(self, output): predictions = tf.cast(tf.argmax(output, 1), tf.int32) ## [batch_size, nb_tags] -> [batch_size] return predictions def add_lm_accuracy(self): correct_predictions = self.weight*tf.cast(tf.equal(self.predictions, self.inputs_placeholder_dict['tags']), tf.float32) self.accuracy = tf.reduce_sum(tf.cast(correct_predictions, tf.float32))/tf.reduce_sum(tf.cast(self.weight, tf.float32)) def add_forward_path(self, lstm_outputs): batch_size = tf.shape(lstm_outputs)[1] prev_init = [tf.zeros([batch_size], tf.int32), tf.zeros([batch_size, self.loader.nb_tags])] ## We need the following memory states (list of four elements): ## 1. Previous predictions (stag_idx): [batch_size] ## 2. Projected outputs for cost calculation name = 'Forward' ## Define all the necessary weights for recursion self.add_stag_embedding_mat() self.add_stag_dropout_mat(batch_size) ## weights = get_chain_weights('{}_Chain_layer'.format(name), self.opts.lm, self.outputs_dim) #weights = get_lstm_weights('{}_Chain_layer{}'.format(name), self.opts.lm, self.opts.lm, batch_size, self.hidden_prob) all_states = tf.scan(lambda prev, x: self.add_one_forward(prev, x, weights), lstm_outputs, prev_init) all_predictions = all_states[0] # [seq_len, batch_size] all_predictions = tf.transpose(all_predictions, perm=[1, 0]) # [batch_size, seq_len] all_projected_outputs = all_states[1] # [seq_len, batch_size, outputs_dim] all_projected_outputs = tf.transpose(all_projected_outputs, perm=[1, 0, 2]) # [batch_size, seq_len, outputs_dim] return all_predictions, all_projected_outputs def add_one_forward(self, prev_list, x, weights): ## compute one word in the forward direction ## weights['L_weight'] for previous prediction embeddings ## weights['L_bias'] for previous prediction embeddings prev_predictions = prev_list[0] prev_embedding = self.add_stag_embedding(prev_predictions) ## [batch_size, self.opts.lm] prev_embedding = prev_embedding*self.stag_dropout_mat inputs = tf.nn.relu(x + tf.matmul(prev_embedding, weights['L_weight']) + weights['L_bias']) projected_outputs = self.add_projection(inputs) ## [batch_size, nb_tags] predictions = self.add_predictions(projected_outputs) ## [batch_sizes] new_state = [predictions, projected_outputs] return new_state def add_lstm(self, inputs, i, name, backward=False): prev_init = tf.zeros([2, tf.shape(inputs)[1], self.opts.units]) # [2, batch_size, num_units] #prev_init = tf.zeros([2, 100, self.opts.units]) # [2, batch_size, num_units] if i == 0: inputs_dim = self.inputs_dim else: inputs_dim = self.opts.units*2 ## concat after each layer weights = get_lstm_weights('{}_LSTM_layer{}'.format(name, i), inputs_dim, self.opts.units, tf.shape(inputs)[1], self.hidden_prob) if backward: ## backward: reset states after zero paddings non_paddings = tf.transpose(self.weight, [1, 0]) ## [batch_size, seq_len] => [seq_len, batch_size] non_paddings = tf.reverse(non_paddings, [0]) cell_hidden = tf.scan(lambda prev, x: lstm(prev, x, weights, backward=backward), [inputs, non_paddings], prev_init) else: cell_hidden = tf.scan(lambda prev, x: lstm(prev, x, weights), inputs, prev_init) #cell_hidden [seq_len, 2, batch_size, units] h = tf.unstack(cell_hidden, 2, axis=1)[1] #[seq_len, batch_size, units] return h def add_dropout(self, inputs, keep_prob): ## inputs [seq_len, batch_size, inputs_dims/units] dummy_dp = tf.ones(tf.shape(inputs)[1:]) dummy_dp = tf.nn.dropout(dummy_dp, keep_prob) return tf.map_fn(lambda x: dummy_dp*x, inputs) def add_projection(self, inputs, reuse=False, name=None): if name is None: name = 'Projection' with tf.variable_scope(name) as scope: if reuse: scope.reuse_variables() proj_U = tf.get_variable('weight', [self.outputs_dim, self.loader.nb_tags]) proj_b = tf.get_variable('bias', [self.loader.nb_tags]) outputs = tf.matmul(inputs, proj_U)+proj_b return outputs def add_loss_op(self, output): cross_entropy = sequence_loss(output, self.inputs_placeholder_dict['tags'], self.weight) tf.add_to_collection('total loss', cross_entropy) loss = tf.add_n(tf.get_collection('total loss')) return loss def add_accuracy(self, output): self.predictions = tf.cast(tf.argmax(output, 2), tf.int32) ## [batch_size, seq_len] correct_predictions = self.weight*tf.cast(tf.equal(self.predictions, self.inputs_placeholder_dict['tags']), tf.float32) self.accuracy = tf.reduce_sum(tf.cast(correct_predictions, tf.float32))/tf.reduce_sum(tf.cast(self.weight, tf.float32)) def add_train_op(self, loss): optimizer = tf.train.AdamOptimizer() train_op = optimizer.minimize(loss) return train_op def get_features(self): self.features = ['words', 'tags'] if self.opts.suffix_dim > 0: self.features.append('suffix') if self.opts.cap: self.features.append('cap') if self.opts.num: self.features.append('num') if self.opts.jk_dim > 0: self.features.append('jk') if self.opts.chars_dim > 0: self.features.append('chars') def __init__(self, opts, test_opts=None): self.opts = opts self.test_opts = test_opts self.loader = Dataset(opts, test_opts) self.batch_size = opts.batch_size self.get_features() self.add_placeholders() self.inputs_dim = self.opts.embedding_dim + self.opts.suffix_dim + self.opts.cap + self.opts.num + self.opts.jk_dim + self.opts.nb_filters self.outputs_dim = (1+self.opts.bi)*self.opts.units inputs_list = [self.add_word_embedding()] if self.opts.suffix_dim > 0: inputs_list.append(self.add_suffix_embedding()) if self.opts.cap: inputs_list.append(self.add_cap()) if self.opts.num: inputs_list.append(self.add_num()) if self.opts.jk_dim > 0: inputs_list.append(self.add_jackknife_embedding()) if self.opts.chars_dim > 0: inputs_list.append(self.add_char_embedding()) inputs_tensor = tf.concat(inputs_list, 2) ## [seq_len, batch_size, inputs_dim] inputs_tensor = self.add_dropout(inputs_tensor, self.input_keep_prob) self.weight = tf.cast(tf.not_equal(self.inputs_placeholder_dict['words'], tf.zeros(tf.shape(self.inputs_placeholder_dict['words']), tf.int32)), tf.float32) ## [batch_size, seq_len] for i in xrange(self.opts.num_layers): forward_outputs_tensor = self.add_dropout(self.add_lstm(inputs_tensor, i, 'Forward'), self.keep_prob) ## [seq_len, batch_size, units] if self.opts.bi: backward_outputs_tensor = self.add_dropout(self.add_lstm(tf.reverse(inputs_tensor, [0]), i, 'Backward', True), self.keep_prob) ## [seq_len, batch_size, units] inputs_tensor = tf.concat([forward_outputs_tensor, tf.reverse(backward_outputs_tensor, [0])], 2) else: inputs_tensor = forward_outputs_tensor lstm_outputs = inputs_tensor ## [seq_len, batch_size, outputs_dim] self.predictions, projected_outputs = self.add_forward_path(lstm_outputs) ## [seq_len, batch_size, nb_tags] self.add_lm_accuracy() self.loss = self.add_loss_op(projected_outputs) self.train_op = self.add_train_op(self.loss) # if self.opts.bi: self.add_accuracy(projected_outputs) def run_batch(self, session, testmode = False): if not testmode: feed = {} #for placeholder, data in zip(self.inputs_placeholder_list, self.loader.inputs_train_batch): # feed[placeholder] = data for feat in self.inputs_placeholder_dict.keys(): feed[self.inputs_placeholder_dict[feat]] = self.loader.inputs_train_batch[feat] feed[self.keep_prob] = self.opts.dropout_p feed[self.hidden_prob] = self.opts.hidden_p feed[self.input_keep_prob] = self.opts.input_dp train_op = self.train_op _, loss, accuracy = session.run([train_op, self.loss, self.accuracy], feed_dict=feed) return loss, accuracy else: feed = {} for feat in self.inputs_placeholder_dict.keys(): feed[self.inputs_placeholder_dict[feat]] = self.loader.inputs_test_batch[feat] feed[self.keep_prob] = 1.0 feed[self.hidden_prob] = 1.0 feed[self.input_keep_prob] = 1.0 loss, accuracy, predictions, weight = session.run([self.loss, self.accuracy, self.predictions, self.weight], feed_dict=feed) weight = weight.astype(bool) predictions = predictions[weight] return loss, accuracy, predictions def run_epoch(self, session, testmode = False): if not testmode: epoch_start_time = time.time() next_batch = self.loader.next_batch epoch_incomplete = next_batch(self.batch_size) while epoch_incomplete: loss, accuracy = self.run_batch(session) print('{}/{}, loss {:.4f}, accuracy {:.4f}'.format(self.loader._index_in_epoch, self.loader.nb_train_samples, loss, accuracy), end = '\r') epoch_incomplete = next_batch(self.batch_size) print('\nEpoch Training Time {}'.format(time.time() - epoch_start_time)) return loss, accuracy else: next_test_batch = self.loader.next_test_batch test_incomplete = next_test_batch(self.batch_size) predictions = [] while test_incomplete: loss, accuracy, predictions_batch = self.run_batch(session, True) predictions.append(predictions_batch) #print('Testmode {}/{}, loss {}, accuracy {}'.format(self.loader._index_in_test, self.loader.nb_validation_samples, loss, accuracy), end = '\r') print('Test mode {}/{}'.format(self.loader._index_in_test, self.loader.nb_validation_samples), end = '\r') test_incomplete = next_test_batch(self.batch_size) predictions = np.hstack(predictions) if self.test_opts is not None: self.loader.output_stags(predictions, self.test_opts.save_tags) accuracy = np.mean(predictions == self.loader.test_gold) return accuracy