Ejemplo n.º 1
0
    def __init__(self, opts, test_opts=None):

        self.opts = opts
        self.test_opts = test_opts
        self.loader = Dataset(opts, test_opts)
        self.batch_size = opts.batch_size
        self.get_features()
        self.add_placeholders()
        self.inputs_dim = self.opts.embedding_dim + self.opts.suffix_dim + self.opts.cap + self.opts.num + self.opts.jk_dim + self.opts.nb_filters
        self.outputs_dim = (1 + self.opts.bi) * self.opts.units
        inputs_list = [self.add_word_embedding()]
        if self.opts.suffix_dim > 0:
            inputs_list.append(self.add_suffix_embedding())
        if self.opts.cap:
            inputs_list.append(self.add_cap())
        if self.opts.num:
            inputs_list.append(self.add_num())
        if self.opts.jk_dim > 0:
            inputs_list.append(self.add_jackknife_embedding())
        if self.opts.chars_dim > 0:
            inputs_list.append(self.add_char_embedding())
        inputs_tensor = tf.concat(inputs_list,
                                  2)  ## [seq_len, batch_size, inputs_dim]
        forward_inputs_tensor = self.add_dropout(inputs_tensor,
                                                 self.input_keep_prob)
        self.weight = tf.cast(
            tf.not_equal(
                self.inputs_placeholder_dict['words'],
                tf.zeros(tf.shape(self.inputs_placeholder_dict['words']),
                         tf.int32)), tf.float32)  ## [batch_size, seq_len]
        for i in xrange(self.opts.num_layers):
            forward_inputs_tensor = self.add_dropout(
                self.add_lstm(forward_inputs_tensor, i, 'Forward'),
                self.keep_prob)  ## [seq_len, batch_size, units]
        lstm_outputs = forward_inputs_tensor
        if self.opts.bi:
            backward_inputs_tensor = self.add_dropout(
                tf.reverse(inputs_tensor, [0]), self.input_keep_prob)
            for i in xrange(self.opts.num_layers):
                backward_inputs_tensor = self.add_dropout(
                    self.add_lstm(backward_inputs_tensor, i, 'Backward', True),
                    self.keep_prob)  ## [seq_len, batch_size, units]
            backward_inputs_tensor = tf.reverse(backward_inputs_tensor, [0])
            lstm_outputs = tf.concat([lstm_outputs, backward_inputs_tensor],
                                     2)  ## [seq_len, batch_size, outputs_dim]
        projected_outputs = tf.map_fn(
            lambda x: self.add_projection(x),
            lstm_outputs)  #[seq_len, batch_size, nb_tags]
        projected_outputs = tf.transpose(
            projected_outputs, perm=[1, 0,
                                     2])  # [batch_size, seq_len, nb_tags]
        self.loss = self.add_loss_op(projected_outputs)
        self.train_op = self.add_train_op(self.loss)
        self.add_accuracy(projected_outputs)
Ejemplo n.º 2
0
    def __init__(self, opts, test_opts=None, beam_size=0):
        ## Notation:
        ## b: batch_size
        ## d: # units
        ## n: # tokens in the sentence
        ## B: beam_size
        self.opts = opts
        self.opts.bi = 0  ## no bidirecion
        self.test_opts = test_opts
        self.loader = Dataset(opts, test_opts)
        self.batch_size = 100
        #self.batch_size = 32
        self.beam_size = beam_size
        self.add_placeholders()
        self.inputs_dim = self.opts.embedding_dim + self.opts.suffix_dim + self.opts.cap + self.opts.num + self.opts.jk_dim
        self.outputs_dim = (1 + self.opts.bi) * self.opts.units
        inputs_list = [self.add_word_embedding()]
        if self.opts.suffix_dim > 0:
            inputs_list.append(self.add_suffix_embedding())
        if self.opts.cap:
            inputs_list.append(self.add_cap())
        if self.opts.num:
            inputs_list.append(self.add_num())
        if self.opts.jk_dim > 0:
            inputs_list.append(self.add_jackknife_embedding())
        inputs_tensor = tf.concat(inputs_list,
                                  2)  ## [seq_len, batch_size, inputs_dim]
        #forward_inputs_tensor = inputs_tensor
        forward_inputs_tensor = tf.reverse(inputs_tensor, [0])

        forward_inputs_tensor = self.add_dropout(forward_inputs_tensor,
                                                 self.input_keep_prob)
        forward_cells = []
        forward_hs = []
        for i in xrange(self.opts.num_layers):
            c, h = self.add_lstm(forward_inputs_tensor, i,
                                 'Forward')  ## [seq_len, batch_size, units]
            forward_hs.append(h)
            h = self.add_dropout(
                h, self.keep_prob)  ## [seq_len, batch_size, units]
            forward_cells.append(c)
            forward_inputs_tensor = h

#        if beam_size > 0:
#            self.predictions, self.scores, self.back_pointers = self.add_forward_beam_path_mt(forward_inputs_tensor, beam_size) ## [seq_len, batch_size, nb_tags]
#            self.weight = tf.not_equal(self.inputs_placeholder_list[0], tf.zeros(tf.shape(self.inputs_placeholder_list[0]), tf.int32)) # [self.batch_size, seq_len]
#            self.weight_beam = tf.reshape(tf.tile(self.weight, [1, beam_size]), [-1, tf.shape(self.weight)[1]]) # [batch_size, seq_len]
#        else:
        self.predictions, projected_outputs = self.add_forward_path_mt(
            forward_cells, forward_hs,
            forward_inputs_tensor)  ## [seq_len, batch_size, nb_tags]
        self.weight = tf.cast(
            tf.not_equal(
                self.inputs_placeholder_list[0],
                tf.zeros(tf.shape(self.inputs_placeholder_list[0]), tf.int32)),
            tf.float32)  ## [batch_size, seq_len]
        self.add_lm_accuracy()
        self.loss = self.add_loss_op(projected_outputs)
        self.train_op = self.add_train_op(self.loss)
Ejemplo n.º 3
0
    def __init__(self, opts, test_opts=None):

        self.opts = opts
        self.test_opts = test_opts
        self.loader = Dataset(opts, test_opts)
        self.batch_size = 100
        self.add_placeholders()
        self.inputs_dim = self.opts.embedding_dim + self.opts.jk_dim + self.opts.stag_dim
        self.outputs_dim = (1 + self.opts.bi) * self.opts.units
        inputs_list = [self.add_word_embedding()]
        if self.opts.jk_dim:
            inputs_list.append(self.add_jackknife_embedding())
        if self.opts.stag_dim > 0:
            inputs_list.append(self.add_stag_embedding())
        inputs_tensor = tf.concat(inputs_list,
                                  2)  ## [seq_len, batch_size, inputs_dim]
        forward_inputs_tensor = self.add_dropout(inputs_tensor,
                                                 self.input_keep_prob)
        for i in xrange(self.opts.num_layers):
            forward_inputs_tensor = self.add_dropout(
                self.add_lstm(forward_inputs_tensor, i, 'Forward'),
                self.keep_prob)  ## [seq_len, batch_size, units]
        lstm_outputs = forward_inputs_tensor
        if self.opts.bi:
            backward_inputs_tensor = self.add_dropout(
                tf.reverse(inputs_tensor, [0]), self.input_keep_prob)
            for i in xrange(self.opts.num_layers):
                backward_inputs_tensor = self.add_dropout(
                    self.add_lstm(backward_inputs_tensor, i, 'Backward'),
                    self.keep_prob)  ## [seq_len, batch_size, units]
            backward_inputs_tensor = tf.reverse(backward_inputs_tensor, [0])
            lstm_outputs = tf.concat([lstm_outputs, backward_inputs_tensor],
                                     2)  ## [seq_len, batch_size, outputs_dim]
        self.arc_outputs, rel_outputs, self.rel_scores = self.add_biaffine(
            lstm_outputs)
        #        projected_outputs = tf.map_fn(lambda x: self.add_projection(x), lstm_outputs) #[seq_len, batch_size, nb_tags]
        #        projected_outputs = tf.transpose(projected_outputs, perm=[1, 0, 2]) # [batch_size, seq_len, nb_tags]
        inputs_shape = tf.shape(self.inputs_placeholder_dict['words'])
        self.weight = tf.cast(
            tf.not_equal(self.inputs_placeholder_dict['words'],
                         tf.zeros(inputs_shape, tf.int32)),
            tf.float32) * tf.cast(
                tf.not_equal(
                    self.inputs_placeholder_dict['words'],
                    tf.ones(inputs_shape, tf.int32) *
                    self.loader.word_index['<-root->']),
                tf.float32)  ## [batch_size, seq_len]
        ## no need to worry about the heads of <-root-> and zero-pads
        self.loss = self.add_loss_op(
            self.arc_outputs,
            self.inputs_placeholder_dict['arcs']) + self.add_loss_op(
                rel_outputs, self.inputs_placeholder_dict['rels'])
        self.predicted_arcs, self.UAS = self.add_accuracy(
            self.arc_outputs, self.inputs_placeholder_dict['arcs'])
        self.predicted_rels, self.rel_acc = self.add_accuracy(
            rel_outputs, self.inputs_placeholder_dict['rels'])
        self.train_op = self.add_train_op(self.loss)
Ejemplo n.º 4
0
    def __init__(self, opts, test_opts=None):
       
        self.opts = opts
        self.test_opts = test_opts
        self.loader = Dataset(opts, test_opts)
        self.batch_size = 100
	self.get_features()
        self.add_placeholders()
        self.inputs_dim = self.opts.embedding_dim + self.opts.jk_dim + self.opts.stag_dim + self.opts.nb_filters
        self.outputs_dim = (1+self.opts.bi)*self.opts.units
        inputs_list = [self.add_word_embedding()]
        if self.opts.jk_dim:
            inputs_list.append(self.add_jackknife_embedding())
        if self.opts.stag_dim > 0:
            inputs_list.append(self.add_stag_embedding())
        if self.opts.chars_dim > 0:
            inputs_list.append(self.add_char_embedding())
        inputs_tensor = tf.concat(inputs_list, 2) ## [seq_len, batch_size, inputs_dim]
        inputs_tensor = self.add_dropout(inputs_tensor, self.input_keep_prob)
        inputs_shape = tf.shape(self.inputs_placeholder_dict['words'])
        ## no need to worry about the heads of <-root-> and zero-pads
        ## Let's get those non-padding places so we can reinitialize hidden states after each padding in the backward path
        ### because the backward path starts with zero pads.
        self.weight = tf.cast(tf.not_equal(self.inputs_placeholder_dict['words'], tf.zeros(inputs_shape, tf.int32)), tf.float32) ## [batch_size, seq_len]
        for i in xrange(self.opts.num_layers):
            forward_outputs_tensor = self.add_dropout(self.add_lstm(inputs_tensor, i, 'Forward'), self.keep_prob) ## [seq_len, batch_size, units]
            if self.opts.bi:
                backward_outputs_tensor = self.add_dropout(self.add_lstm(tf.reverse(inputs_tensor, [0]), i, 'Backward', True), self.keep_prob) ## [seq_len, batch_size, units]
                inputs_tensor = tf.concat([forward_outputs_tensor, tf.reverse(backward_outputs_tensor, [0])], 2)
            else:
                inputs_tensor = forward_outputs_tensor
        self.weight = self.weight*tf.cast(tf.not_equal(self.inputs_placeholder_dict['words'], tf.ones(inputs_shape, tf.int32)*self.loader.word_index['<-root->']), tf.float32) ## [batch_size, seq_len]
        lstm_outputs = inputs_tensor ## [seq_len, batch_size, outputs_dim]

        self.arc_outputs, rel_outputs, self.rel_scores, joint_output, joint_output_jk = self.add_biaffine(lstm_outputs)
#        projected_outputs = tf.map_fn(lambda x: self.add_projection(x), lstm_outputs) #[seq_len, batch_size, nb_tags]
#        projected_outputs = tf.transpose(projected_outputs, perm=[1, 0, 2]) # [batch_size, seq_len, nb_tags]
        self.loss = self.add_loss_op(self.arc_outputs, self.inputs_placeholder_dict['arcs']) + self.add_loss_op(rel_outputs, self.inputs_placeholder_dict['rels']) + self.add_loss_op(joint_output, self.inputs_placeholder_dict['stags']) + self.add_loss_op(joint_output_jk, self.inputs_placeholder_dict['jk'])
        self.add_probs(joint_output)
        self.predicted_arcs, self.UAS = self.add_accuracy(self.arc_outputs, self.inputs_placeholder_dict['arcs'])
        self.predicted_rels, self.rel_acc = self.add_accuracy(rel_outputs, self.inputs_placeholder_dict['rels'])
        self.predicted_stags, self.stag_acc = self.add_accuracy(joint_output, self.inputs_placeholder_dict['stags'])
        self.predicted_jk, self.jk_acc = self.add_accuracy(joint_output_jk, self.inputs_placeholder_dict['jk'])
        self.train_op = self.add_train_op(self.loss)
Ejemplo n.º 5
0
    def __init__(self, opts, test_opts=None, beam_size=0):
        ## Notation:
        ## b: batch_size
        ## d: # units
        ## n: # tokens in the sentence
        ## B: beam_size
        self.opts = opts
        self.test_opts = test_opts
        self.loader = Dataset(opts, test_opts)
        self.batch_size = opts.batch_size
        self.beam_size = beam_size
        print('beam')
        print(beam_size)
        self.get_features()
        self.add_placeholders()
        self.inputs_dim = self.opts.embedding_dim + self.opts.suffix_dim + self.opts.cap + self.opts.num + self.opts.jk_dim + self.opts.nb_filters
        self.outputs_dim = (1+self.opts.bi)*self.opts.units
        inputs_list = [self.add_word_embedding()]
        if self.opts.suffix_dim > 0:
            inputs_list.append(self.add_suffix_embedding())
        if self.opts.cap:
            inputs_list.append(self.add_cap())
        if self.opts.num:
            inputs_list.append(self.add_num())
        if self.opts.jk_dim > 0:
            inputs_list.append(self.add_jackknife_embedding())
        if self.opts.chars_dim > 0:
            inputs_list.append(self.add_char_embedding())
        inputs_tensor = tf.concat(inputs_list, 2) ## [seq_len, batch_size, inputs_dim]
        forward_inputs_tensor = self.add_dropout(inputs_tensor, self.input_keep_prob)
        for i in xrange(self.opts.num_layers):
            forward_inputs_tensor = self.add_dropout(self.add_lstm(forward_inputs_tensor, i, 'Forward'), self.keep_prob) ## [seq_len, batch_size, units]
        lstm_outputs = forward_inputs_tensor
        if self.opts.bi:
            backward_inputs_tensor = self.add_dropout(tf.reverse(inputs_tensor, [0]), self.input_keep_prob)
            for i in xrange(self.opts.num_layers):
                backward_inputs_tensor = self.add_dropout(self.add_lstm(backward_inputs_tensor, i, 'Backward'), self.keep_prob) ## [seq_len, batch_size, units]
            backward_inputs_tensor = tf.reverse(backward_inputs_tensor, [0])
            lstm_outputs = tf.concat([lstm_outputs, backward_inputs_tensor], 2) ## [seq_len, batch_size, outputs_dim]
        crf_inputs = tf.map_fn(lambda x: self.feed_to_crf(x), lstm_outputs) ## [seq_len, batch_size, outputs_dim] => [seq_len, batch_size, lm]
        crf_inputs =  self.add_dropout(crf_inputs, self.keep_prob)

#        if beam_size > 0:
#            self.predictions, self.scores, self.back_pointers = self.add_forward_beam_path(forward_inputs_tensor, backward_inputs_tensor, beam_size) ## [seq_len, batch_size, nb_tags]
#            self.weight = tf.not_equal(self.inputs_placeholder_dict['words'], tf.zeros(tf.shape(self.inputs_placeholder_dict['words']), tf.int32)) # [self.batch_size, seq_len]
#            self.weight_beam = tf.reshape(tf.tile(self.weight, [1, beam_size]), [-1, tf.shape(self.weight)[1]]) # [batch_size, seq_len]
#        else:
        self.predictions, projected_outputs = self.add_crf_path(crf_inputs) ## [seq_len, batch_size, nb_tags]
        self.weight = tf.cast(tf.not_equal(self.inputs_placeholder_dict['words'], tf.zeros(tf.shape(self.inputs_placeholder_dict['words']), tf.int32)), tf.float32) ## [batch_size, seq_len]
        self.add_lm_accuracy()
        self.loss = self.add_loss_op(projected_outputs)
        self.train_op = self.add_train_op(self.loss)
    def __init__(self, opts, test_opts=None):

        self.opts = opts
        self.test_opts = test_opts
        self.loader = Dataset(opts, test_opts)
        self.batch_size = opts.batch_size
        self.get_features()
        self.add_placeholders()
        self.inputs_dim = self.opts.embedding_dim + self.opts.suffix_dim + self.opts.cap + self.opts.num + self.opts.jk_dim + self.opts.nb_filters
        self.outputs_dim = (1 + self.opts.bi) * self.opts.units
        inputs_list = [self.add_word_embedding()]
        if self.opts.suffix_dim > 0:
            inputs_list.append(self.add_suffix_embedding())
        if self.opts.cap:
            inputs_list.append(self.add_cap())
        if self.opts.num:
            inputs_list.append(self.add_num())
        if self.opts.jk_dim > 0:
            inputs_list.append(self.add_jackknife_embedding())
        self.adv_embedding = inputs_list[:]
        if self.opts.chars_dim > 0:
            inputs_list.append(self.add_char_embedding())
        inputs_tensor = tf.concat(inputs_list,
                                  2)  ## [seq_len, batch_size, inputs_dim]
        self.weight = tf.cast(
            tf.not_equal(
                self.inputs_placeholder_dict['words'],
                tf.zeros(tf.shape(self.inputs_placeholder_dict['words']),
                         tf.int32)), tf.float32)  ## [batch_size, seq_len]
        clean_loss, projected_outputs = self.feed_network_inputs(inputs_tensor)
        self.add_accuracy(projected_outputs)
        adv_loss, _ = self.add_adversarial_loss(
            clean_loss)  ## do not care about adversarial prediction accuracy
        alpha = 0.5
        #alpha = 1.0
        self.loss = alpha * clean_loss + (1.0 - alpha) * adv_loss
        self.train_op = self.add_train_op(self.loss)
Ejemplo n.º 7
0
class Parsing_Model(object):
    def add_placeholders(self):
        self.inputs_placeholder_dict = {}
        for feature in self.features:
            if feature == 'chars':
                self.inputs_placeholder_dict[feature] = tf.placeholder(
                    tf.int32, shape=[None, None, None])
            else:
                self.inputs_placeholder_dict[feature] = tf.placeholder(
                    tf.int32, shape=[None, None])

        self.keep_prob = tf.placeholder(tf.float32)
        self.input_keep_prob = tf.placeholder(tf.float32)
        self.hidden_prob = tf.placeholder(tf.float32)
        self.mlp_prob = tf.placeholder(tf.float32)

    def add_word_embedding(self):
        with tf.device('/cpu:0'):
            with tf.variable_scope('word_embedding') as scope:
                embedding = tf.get_variable(
                    'word_embedding_mat',
                    self.loader.word_embeddings.shape,
                    initializer=tf.constant_initializer(
                        self.loader.word_embeddings))

            inputs = tf.nn.embedding_lookup(
                embedding, self.inputs_placeholder_dict['words']
            )  ## [batch_size, seq_len, embedding_dim]
            inputs = tf.transpose(
                inputs, perm=[1, 0,
                              2])  # [seq_length, batch_size, embedding_dim]
        return inputs

    def add_jackknife_embedding(self):
        with tf.device('/cpu:0'):
            with tf.variable_scope('jk_embedding') as scope:
                embedding = tf.get_variable(
                    'jk_embedding_mat',
                    [self.loader.nb_jk + 1, self.opts.jk_dim
                     ])  # +1 for padding
            inputs = tf.nn.embedding_lookup(
                embedding, self.inputs_placeholder_dict['jk']
            )  ## [batch_size, seq_len, embedding_dim]
            inputs = tf.transpose(
                inputs, perm=[1, 0,
                              2])  # [seq_length, batch_size, embedding_dim]
        return inputs

    def add_stag_embedding(self):
        with tf.device('/cpu:0'):
            with tf.variable_scope('stag_embedding') as scope:
                embedding = tf.get_variable(
                    'stag_embedding_mat',
                    [self.loader.nb_stags + 1, self.opts.stag_dim
                     ])  # +1 for padding
            inputs = tf.nn.embedding_lookup(
                embedding, self.inputs_placeholder_dict['stags']
            )  ## [batch_size, seq_len, embedding_dim]
            inputs = tf.transpose(
                inputs, perm=[1, 0,
                              2])  # [seq_length, batch_size, embedding_dim]
        self.stag_embeddings = embedding
        return inputs

    def add_char_embedding(self):
        with tf.device('/cpu:0'):
            with tf.variable_scope('char_embedding') as scope:
                embedding = tf.get_variable(
                    'char_embedding_mat',
                    [self.loader.nb_chars + 1, self.opts.chars_dim
                     ])  # +1 for padding

            inputs = tf.nn.embedding_lookup(
                embedding, self.inputs_placeholder_dict['chars']
            )  ## [batch_size, seq_len-1, word_len, embedding_dim]
            ## -1 because we don't have ROOT
            inputs = tf.transpose(inputs, perm=[1, 0, 2, 3])
            ## [seq_len-1, batch_size, word_len, embedding_dim]
            inputs = self.add_dropout(inputs, self.input_keep_prob)
            weights = get_char_weights(self.opts, 'char_encoding')
            inputs = encode_char(
                inputs, weights)  ## [seq_len-1, batch_size, nb_filters]
            shape = tf.shape(inputs)
            ## add 0 vectors for <-root->
            inputs = tf.concat([tf.zeros([1, shape[1], shape[2]]), inputs], 0)
        return inputs

    def add_lstm(self, inputs, i, name, backward=False):
        prev_init = tf.zeros([2, tf.shape(inputs)[1],
                              self.opts.units])  # [2, batch_size, num_units]
        #prev_init = tf.zeros([2, 100, self.opts.units])  # [2, batch_size, num_units]
        if i == 0:
            inputs_dim = self.inputs_dim
        else:
            inputs_dim = self.opts.units * 2  ## concat after each layer
        weights = get_lstm_weights('{}_LSTM_layer{}'.format(name, i),
                                   inputs_dim, self.opts.units,
                                   tf.shape(inputs)[1], self.hidden_prob)
        if backward:
            ## backward: reset states after zero paddings
            non_paddings = tf.transpose(
                self.weight,
                [1, 0])  ## [batch_size, seq_len] => [seq_len, batch_size]
            non_paddings = tf.reverse(non_paddings, [0])
            cell_hidden = tf.scan(
                lambda prev, x: lstm(prev, x, weights, backward=backward),
                [inputs, non_paddings], prev_init)
        else:
            cell_hidden = tf.scan(lambda prev, x: lstm(prev, x, weights),
                                  inputs, prev_init)
        #cell_hidden [seq_len, 2, batch_size, units]
        h = tf.unstack(cell_hidden, 2,
                       axis=1)[1]  #[seq_len, batch_size, units]
        return h

    def add_dropout(self, inputs, keep_prob):
        ## inputs [seq_len, batch_size, inputs_dims/units]
        dummy_dp = tf.ones(tf.shape(inputs)[1:])
        dummy_dp = tf.nn.dropout(dummy_dp, keep_prob)
        return tf.map_fn(lambda x: dummy_dp * x, inputs)

    def add_projection(self, inputs):
        with tf.variable_scope('Projection') as scope:
            proj_U = tf.get_variable('weight',
                                     [self.outputs_dim, self.loader.nb_tags])
            proj_b = tf.get_variable('bias', [self.loader.nb_tags])
            outputs = tf.matmul(inputs, proj_U) + proj_b
            return outputs

    def add_loss_op(self, output, gold):
        cross_entropy = sequence_loss(output, gold, self.weight)
        loss = tf.reduce_sum(cross_entropy)
        return loss

    def add_accuracy(self, output, gold):
        predictions = tf.cast(tf.argmax(output, 2),
                              tf.int32)  ## [batch_size, seq_len]
        correct_predictions = self.weight * tf.cast(
            tf.equal(predictions, gold), tf.float32)
        accuracy = tf.reduce_sum(tf.cast(correct_predictions,
                                         tf.float32)) / tf.reduce_sum(
                                             tf.cast(self.weight, tf.float32))
        return predictions, accuracy

    def add_train_op(self, loss):
        optimizer = tf.train.AdamOptimizer()
        train_op = optimizer.minimize(loss)
        return train_op

    def get_features(self):
        self.features = ['words', 'arcs', 'rels']
        if self.opts.jk_dim > 0:
            self.features.append('jk')
        if self.opts.stag_dim > 0:
            self.features.append('stags')
        if self.opts.chars_dim > 0:
            self.features.append('chars')

    def add_biaffine(self, inputs):
        ## inputs [seq_len, batch_size, units]
        ## first define four different MLPs
        arc_roles = ['arc-dep', 'arc-head']
        rel_roles = ['rel-dep', 'rel-head']
        vectors = {}
        for arc_role in arc_roles:
            for i in xrange(self.opts.mlp_num_layers):
                if i == 0:
                    inputs_dim = self.outputs_dim
                    vector_mlp = inputs
                else:
                    inputs_dim = self.opts.arc_mlp_units
                weights = get_mlp_weights('{}_MLP_Layer{}'.format(arc_role, i),
                                          inputs_dim, self.opts.arc_mlp_units)
                vector_mlp = self.add_dropout(
                    tf.map_fn(lambda x: mlp(x, weights), vector_mlp),
                    self.mlp_prob)
                ## [seq_len, batch_size, 2*mlp_units]
            vectors[arc_role] = vector_mlp
        weights = get_arc_weights('arc', self.opts.arc_mlp_units)
        arc_output = arc_equation(
            vectors['arc-head'], vectors['arc-dep'], weights
        )  # [batch_size, seq_len, seq_len] dim 1: deps, dim 2: heads
        #        arc_predictions = get_arcs(arc_output, self.test_opts) # [batch_size, seq_len]
        arc_predictions = tf.argmax(arc_output, 2)  # [batch_size, seq_len]
        for rel_role in rel_roles:
            for i in xrange(self.opts.mlp_num_layers):
                if i == 0:
                    inputs_dim = self.outputs_dim
                    vector_mlp = inputs
                else:
                    inputs_dim = self.opts.rel_mlp_units
                weights = get_mlp_weights('{}_MLP_Layer{}'.format(rel_role, i),
                                          inputs_dim, self.opts.rel_mlp_units)
                vector_mlp = self.add_dropout(
                    tf.map_fn(lambda x: mlp(x, weights), vector_mlp),
                    self.mlp_prob)
                ## [seq_len, batch_size, 2*mlp_units]
            vectors[rel_role] = vector_mlp
        weights = get_rel_weights('rel', self.opts.rel_mlp_units,
                                  self.loader.nb_rels)
        rel_output, rel_scores = rel_equation(
            vectors['rel-head'], vectors['rel-dep'], weights,
            arc_predictions)  #[batch_size, seq_len, nb_rels]
        return arc_output, rel_output, rel_scores

    def __init__(self, opts, test_opts=None):

        self.opts = opts
        self.test_opts = test_opts
        self.loader = Dataset(opts, test_opts)
        self.batch_size = 100
        self.get_features()
        self.add_placeholders()
        self.inputs_dim = self.opts.embedding_dim + self.opts.jk_dim + self.opts.stag_dim + self.opts.nb_filters
        self.outputs_dim = (1 + self.opts.bi) * self.opts.units
        inputs_list = [self.add_word_embedding()]
        if self.opts.jk_dim:
            inputs_list.append(self.add_jackknife_embedding())
        if self.opts.stag_dim > 0:
            inputs_list.append(self.add_stag_embedding())
        if self.opts.chars_dim > 0:
            inputs_list.append(self.add_char_embedding())
        inputs_tensor = tf.concat(inputs_list,
                                  2)  ## [seq_len, batch_size, inputs_dim]
        inputs_tensor = self.add_dropout(inputs_tensor, self.input_keep_prob)
        inputs_shape = tf.shape(self.inputs_placeholder_dict['words'])
        ## no need to worry about the heads of <-root-> and zero-pads
        ## Let's get those non-padding places so we can reinitialize hidden states after each padding in the backward path
        ### because the backward path starts with zero pads.
        self.weight = tf.cast(
            tf.not_equal(self.inputs_placeholder_dict['words'],
                         tf.zeros(inputs_shape, tf.int32)),
            tf.float32)  ## [batch_size, seq_len]
        for i in xrange(self.opts.num_layers):
            forward_outputs_tensor = self.add_dropout(
                self.add_lstm(inputs_tensor, i, 'Forward'),
                self.keep_prob)  ## [seq_len, batch_size, units]
            if self.opts.bi:
                backward_outputs_tensor = self.add_dropout(
                    self.add_lstm(tf.reverse(inputs_tensor, [0]), i,
                                  'Backward', True),
                    self.keep_prob)  ## [seq_len, batch_size, units]
                inputs_tensor = tf.concat([
                    forward_outputs_tensor,
                    tf.reverse(backward_outputs_tensor, [0])
                ], 2)
            else:
                inputs_tensor = forward_outputs_tensor
        self.weight = self.weight * tf.cast(
            tf.not_equal(
                self.inputs_placeholder_dict['words'],
                tf.ones(inputs_shape, tf.int32) *
                self.loader.word_index['<-root->']),
            tf.float32)  ## [batch_size, seq_len]
        lstm_outputs = inputs_tensor  ## [seq_len, batch_size, outputs_dim]

        self.arc_outputs, rel_outputs, self.rel_scores = self.add_biaffine(
            lstm_outputs)
        #        projected_outputs = tf.map_fn(lambda x: self.add_projection(x), lstm_outputs) #[seq_len, batch_size, nb_tags]
        #        projected_outputs = tf.transpose(projected_outputs, perm=[1, 0, 2]) # [batch_size, seq_len, nb_tags]
        self.loss = self.add_loss_op(
            self.arc_outputs,
            self.inputs_placeholder_dict['arcs']) + self.add_loss_op(
                rel_outputs, self.inputs_placeholder_dict['rels'])
        self.predicted_arcs, self.UAS = self.add_accuracy(
            self.arc_outputs, self.inputs_placeholder_dict['arcs'])
        self.predicted_rels, self.rel_acc = self.add_accuracy(
            rel_outputs, self.inputs_placeholder_dict['rels'])
        self.train_op = self.add_train_op(self.loss)

    def run_batch(self, session, testmode=False):
        if not testmode:
            feed = {}
            for feat in self.inputs_placeholder_dict.keys():
                feed[self.inputs_placeholder_dict[
                    feat]] = self.loader.inputs_train_batch[feat]
            feed[self.keep_prob] = self.opts.dropout_p
            feed[self.hidden_prob] = self.opts.hidden_p
            feed[self.input_keep_prob] = self.opts.input_dp
            feed[self.mlp_prob] = self.opts.mlp_prob
            train_op = self.train_op
            _, loss, UAS, rel_acc = session.run(
                [train_op, self.loss, self.UAS, self.rel_acc], feed_dict=feed)
            return loss, UAS, rel_acc
        else:
            feed = {}
            predictions_batch = {}
            for feat in self.inputs_placeholder_dict.keys():
                feed[self.inputs_placeholder_dict[
                    feat]] = self.loader.inputs_test_batch[feat]
            feed[self.keep_prob] = 1.0
            feed[self.hidden_prob] = 1.0
            feed[self.input_keep_prob] = 1.0
            feed[self.mlp_prob] = 1.0
            #            loss, accuracy, predictions, weight = session.run([self.loss, self.accuracy, self.predictions, self.weight], feed_dict=feed)
            loss, predicted_arcs, predicted_rels, UAS, weight, arc_outputs, rel_scores = session.run(
                [
                    self.loss, self.predicted_arcs, self.predicted_rels,
                    self.UAS, self.weight, self.arc_outputs, self.rel_scores
                ],
                feed_dict=feed)
            weight = weight.astype(bool)
            predicted_arcs_greedy = predicted_arcs[weight]
            predicted_rels_greedy = predicted_rels[weight]
            predictions_batch['arcs_greedy'] = predicted_arcs_greedy
            predictions_batch['rels_greedy'] = predicted_rels_greedy
            non_padding = weight.astype(bool)
            non_padding[:, 0] = True  ## take the dummy root nodes
            predicted_arcs, predicted_rels = predict_arcs_rels(
                arc_outputs, rel_scores, non_padding)
            predictions_batch['arcs'] = predicted_arcs
            predictions_batch['rels'] = predicted_rels
            #            print(predicted_greedy_arcs.shape)
            #            print(predicted_arcs.shape)
            #print(arc_outputs.shape)
            return loss, predictions_batch, UAS

    def run_epoch(self, session, testmode=False):

        if not testmode:
            epoch_start_time = time.time()
            next_batch = self.loader.next_batch
            epoch_incomplete = next_batch(self.batch_size)
            while epoch_incomplete:
                loss, UAS, rel_acc = self.run_batch(session)
                print('{}/{}, loss {:.4f}, Raw UAS {:.4f}, Rel Acc {:.4f}'.
                      format(self.loader._index_in_epoch,
                             self.loader.nb_train_samples, loss, UAS, rel_acc),
                      end='\r')
                epoch_incomplete = next_batch(self.batch_size)
            print('\nEpoch Training Time {}'.format(time.time() -
                                                    epoch_start_time))
            return loss, UAS
        else:
            next_test_batch = self.loader.next_test_batch
            test_incomplete = next_test_batch(self.batch_size)
            output_types = ['arcs', 'rels', 'arcs_greedy', 'rels_greedy']
            predictions = {output_type: [] for output_type in output_types}
            while test_incomplete:
                loss, predictions_batch, UAS = self.run_batch(session, True)
                for name, pred in predictions_batch.items():
                    predictions[name].append(pred)
                #print('Testmode {}/{}, loss {}, accuracy {}'.format(self.loader._index_in_test, self.loader.nb_validation_samples, loss, accuracy), end = '\r')
                print('Test mode {}/{}, Raw UAS {:.4f}'.format(
                    self.loader._index_in_test,
                    self.loader.nb_validation_samples, UAS),
                      end='\r')  #, end = '\r')
                test_incomplete = next_test_batch(self.batch_size)
            for name, pred in predictions.items():
                predictions[name] = np.hstack(pred)
            if self.test_opts is not None:
                self.loader.output_arcs(predictions['arcs'],
                                        self.test_opts.predicted_arcs_file)
                self.loader.output_rels(predictions['rels'],
                                        self.test_opts.predicted_rels_file)
                self.loader.output_arcs(
                    predictions['arcs_greedy'],
                    self.test_opts.predicted_arcs_file_greedy)
                self.loader.output_rels(
                    predictions['rels_greedy'],
                    self.test_opts.predicted_rels_file_greedy)
            scores = self.loader.get_scores(predictions, self.opts,
                                            self.test_opts)
            if self.test_opts.get_weight:
                stag_embeddings = session.run(self.stag_embeddings)
                self.loader.output_weight(stag_embeddings)
            #scores['UAS'] = np.mean(predictions['arcs'][self.loader.punc] == self.loader.gold_arcs[self.loader.punc])
            #scores['UAS_greedy'] = np.mean(predictions['arcs_greedy'][self.loader.punc] == self.loader.gold_arcs[self.loader.punc])
            return scores
class Stagging_Model_Concat_Adv(object):
    def add_placeholders(self):
        #self.inputs_placeholder_list = [tf.placeholder(tf.int32, shape = [None, None]) for _ in xrange(2+self.opts.suffix+self.opts.num+self.opts.cap+self.opts.jackknife)] # 2 for text_sequences and tag_sequences, necessary no matter what
        #self.inputs_placeholder_list = [tf.placeholder(tf.int32, shape = [None, None]) for _ in xrange(6)] # 2 for text_sequences and tag_sequences, necessary no matter what
        self.inputs_placeholder_dict = {}
        for feature in self.features:
            if feature == 'chars':
                self.inputs_placeholder_dict[feature] = tf.placeholder(
                    tf.int32, shape=[None, None, None])
            else:
                self.inputs_placeholder_dict[feature] = tf.placeholder(
                    tf.int32, shape=[None, None])

        self.keep_prob = tf.placeholder(tf.float32)
        self.input_keep_prob = tf.placeholder(tf.float32)
        self.hidden_prob = tf.placeholder(tf.float32)

    def add_word_embedding(self):
        with tf.device('/cpu:0'):
            with tf.variable_scope('word_embedding') as scope:
                embedding = tf.get_variable(
                    'word_embedding_mat',
                    self.loader.word_embeddings.shape,
                    initializer=tf.constant_initializer(
                        self.loader.word_embeddings))
            embedding = self.normalize_embedding(embedding,
                                                 self.loader.word_freqs)

            inputs = tf.nn.embedding_lookup(
                embedding, self.inputs_placeholder_dict['words']
            )  ## [batch_size, seq_len, embedding_dim]
            inputs = tf.transpose(
                inputs, perm=[1, 0,
                              2])  # [seq_length, batch_size, embedding_dim]
        return inputs

    def add_suffix_embedding(self):
        with tf.device('/cpu:0'):
            with tf.variable_scope('suffix_embedding') as scope:
                embedding = tf.get_variable(
                    'suffix_embedding_mat',
                    [self.loader.nb_suffixes + 1, self.opts.suffix_dim
                     ])  # +1 for padding
            embedding = self.normalize_embedding(embedding,
                                                 self.loader.suffix_freqs)

            inputs = tf.nn.embedding_lookup(
                embedding, self.inputs_placeholder_dict['suffix']
            )  ## [batch_size, seq_len, embedding_dim]
            inputs = tf.transpose(
                inputs, perm=[1, 0,
                              2])  # [seq_length, batch_size, embedding_dim]
        return inputs

    def add_cap(self):
        inputs = tf.cast(
            tf.expand_dims(self.inputs_placeholder_dict['cap'], -1),
            tf.float32)
        inputs = tf.transpose(inputs, perm=[1, 0,
                                            2])  # [seq_length, batch_size, 1]
        return inputs  # [seq_length, batch_size, 1]

    def add_num(self):
        inputs = tf.cast(
            tf.expand_dims(self.inputs_placeholder_dict['num'], -1),
            tf.float32)
        inputs = tf.transpose(inputs, perm=[1, 0,
                                            2])  # [seq_length, batch_size, 1]
        return inputs  # [seq_length, batch_size, 1]

    def add_char_embedding(self):
        with tf.device('/cpu:0'):
            with tf.variable_scope('char_embedding') as scope:
                embedding = tf.get_variable(
                    'char_embedding_mat',
                    [self.loader.nb_chars + 1, self.opts.chars_dim
                     ])  # +1 for padding
            embedding = self.normalize_embedding(embedding,
                                                 self.loader.char_freqs)

            inputs = tf.nn.embedding_lookup(
                embedding, self.inputs_placeholder_dict['chars']
            )  ## [batch_size, seq_len, word_len, embedding_dim]
            inputs = tf.transpose(inputs, perm=[1, 0, 2, 3])
            ## [seq_len, batch_size, word_len, embedding_dim]
            self.adv_embedding.append(inputs)
            inputs = self.add_dropout(inputs, self.input_keep_prob)
            weights = get_char_weights(self.opts, 'char_encoding')
            self.char_weights = weights  ## for adversarial
            inputs = encode_char(inputs,
                                 weights)  ## [seq_len, batch_size, nb_filters]
        return inputs

    def add_jackknife_embedding(self):
        with tf.device('/cpu:0'):
            with tf.variable_scope('jk_embedding') as scope:
                embedding = tf.get_variable(
                    'jk_embedding_mat',
                    [self.loader.nb_jk + 1, self.opts.jk_dim
                     ])  # +1 for padding
            embedding = self.normalize_embedding(embedding,
                                                 self.loader.jk_freqs)
            inputs = tf.nn.embedding_lookup(
                embedding, self.inputs_placeholder_dict['jk']
            )  ## [batch_size, seq_len, embedding_dim]
            inputs = tf.transpose(
                inputs, perm=[1, 0,
                              2])  # [seq_length, batch_size, embedding_dim]
        return inputs

    def add_lstm(self, inputs, i, name, backward=False, adv=False):
        prev_init = tf.zeros([2, tf.shape(inputs)[1],
                              self.opts.units])  # [2, batch_size, num_units]
        #prev_init = tf.zeros([2, 100, self.opts.units])  # [2, batch_size, num_units]
        if i == 0:
            inputs_dim = self.inputs_dim
        else:
            inputs_dim = self.opts.units * 2  ## concat after each layer
        weights = get_lstm_weights('{}_LSTM_layer{}'.format(name, i),
                                   inputs_dim,
                                   self.opts.units,
                                   tf.shape(inputs)[1],
                                   self.hidden_prob,
                                   reuse=adv)
        if backward:
            ## backward: reset states after zero paddings
            non_paddings = tf.transpose(
                self.weight,
                [1, 0])  ## [batch_size, seq_len] => [seq_len, batch_size]
            non_paddings = tf.reverse(non_paddings, [0])
            cell_hidden = tf.scan(
                lambda prev, x: lstm(prev, x, weights, backward=backward),
                [inputs, non_paddings], prev_init)
        else:
            cell_hidden = tf.scan(lambda prev, x: lstm(prev, x, weights),
                                  inputs, prev_init)
        #cell_hidden [seq_len, 2, batch_size, units]
        h = tf.unstack(cell_hidden, 2,
                       axis=1)[1]  #[seq_len, batch_size, units]
        return h

    def add_dropout(self, inputs, keep_prob):
        ## inputs [seq_len, batch_size, inputs_dims/units]
        dummy_dp = tf.ones(tf.shape(inputs)[1:])
        dummy_dp = tf.nn.dropout(dummy_dp, keep_prob)
        return tf.map_fn(lambda x: dummy_dp * x, inputs)

    def add_projection(self, inputs, reuse=False, name=None):
        if name is None:
            name = 'Projection'
        with tf.variable_scope(name) as scope:
            if reuse:
                scope.reuse_variables()
            proj_U = tf.get_variable('weight',
                                     [self.outputs_dim, self.loader.nb_tags])
            proj_b = tf.get_variable('bias', [self.loader.nb_tags])
            outputs = tf.matmul(inputs, proj_U) + proj_b
        return outputs

    def add_loss_op(self, output):
        cross_entropy = sequence_loss(output,
                                      self.inputs_placeholder_dict['tags'],
                                      self.weight)
        tf.add_to_collection('total loss', cross_entropy)
        loss = tf.add_n(tf.get_collection('total loss'))
        return loss

    def add_accuracy(self, output):
        self.predictions = tf.cast(tf.argmax(output, 2),
                                   tf.int32)  ## [batch_size, seq_len]
        correct_predictions = self.weight * tf.cast(
            tf.equal(self.predictions, self.inputs_placeholder_dict['tags']),
            tf.float32)
        self.accuracy = tf.reduce_sum(tf.cast(
            correct_predictions, tf.float32)) / tf.reduce_sum(
                tf.cast(self.weight, tf.float32))

    def add_train_op(self, loss):
        optimizer = tf.train.AdamOptimizer()
        #optimizer = tf.train.MomentumOptimizer(0.01, 0.9)
        train_op = optimizer.minimize(loss)
        return train_op

    def normalize_embedding(self, embedding, freqs):
        ## embedding [nb_words, dim]
        ##
        e_embedding = tf.reduce_sum(embedding * tf.expand_dims(freqs, 1),
                                    0,
                                    keep_dims=True)
        v_embedding = tf.reduce_sum(
            (embedding - e_embedding)**2 * tf.expand_dims(freqs, 1),
            0,
            keep_dims=True)
        embedding = (embedding - e_embedding) / tf.sqrt(v_embedding)
        return embedding

    def add_adversarial_loss(self, loss):
        ## self.adv_embedding. The last element is char embedddings, which need computation
        ## run char computation
        gradients = tf.gradients(loss, self.adv_embedding)
        new_embeddings = []
        for i, embedding in enumerate(self.adv_embedding):
            normalized_gradient = tf.stop_gradient(
                gradients[i] /
                tf.norm(gradients[i], axis=-1,
                        keep_dims=True))  ## do not take second-order gradient
            epsilon = 0.001 * tf.sqrt(
                tf.cast(tf.shape(embedding)[-1], tf.float32))
            new_embedding = embedding + epsilon * normalized_gradient
            if i == len(self.adv_embedding) - 1:  ## char computation
                if self.opts.chars_dim > 0:
                    new_embedding = self.add_dropout(new_embedding,
                                                     self.input_keep_prob)
                    new_embedding = encode_char(
                        new_embedding, self.char_weights
                    )  ## [seq_len, batch_size, nb_filters]
            new_embeddings.append(new_embedding)
        inputs_tensor = tf.concat(new_embeddings,
                                  2)  ## [seq_len, batch_size, inputs_dim]
        adv_loss, projected_outputs = self.feed_network_inputs(inputs_tensor,
                                                               adv=True)
        return adv_loss, projected_outputs

    def get_features(self):
        self.features = ['words', 'tags']
        if self.opts.suffix_dim > 0:
            self.features.append('suffix')
        if self.opts.cap:
            self.features.append('cap')
        if self.opts.num:
            self.features.append('num')
        if self.opts.jk_dim > 0:
            self.features.append('jk')
        if self.opts.chars_dim > 0:
            self.features.append('chars')

    def feed_network_inputs(self, inputs_tensor, adv=False):
        inputs_tensor = self.add_dropout(inputs_tensor, self.input_keep_prob)
        for i in xrange(self.opts.num_layers):
            forward_outputs_tensor = self.add_dropout(
                self.add_lstm(inputs_tensor, i, 'Forward', adv=adv),
                self.keep_prob)  ## [seq_len, batch_size, units]
            if self.opts.bi:
                backward_outputs_tensor = self.add_dropout(
                    self.add_lstm(tf.reverse(inputs_tensor, [0]),
                                  i,
                                  'Backward',
                                  True,
                                  adv=adv),
                    self.keep_prob)  ## [seq_len, batch_size, units]
                inputs_tensor = tf.concat([
                    forward_outputs_tensor,
                    tf.reverse(backward_outputs_tensor, [0])
                ], 2)
            else:
                inputs_tensor = forward_outputs_tensor
        lstm_outputs = inputs_tensor  ## [seq_len, batch_size, outputs_dim]
        projected_outputs = tf.map_fn(
            lambda x: self.add_projection(x, reuse=adv),
            lstm_outputs)  #[seq_len, batch_size, nb_tags]
        projected_outputs = tf.transpose(
            projected_outputs, perm=[1, 0,
                                     2])  # [batch_size, seq_len, nb_tags]
        loss = self.add_loss_op(projected_outputs)
        return loss, projected_outputs

    def __init__(self, opts, test_opts=None):

        self.opts = opts
        self.test_opts = test_opts
        self.loader = Dataset(opts, test_opts)
        self.batch_size = opts.batch_size
        self.get_features()
        self.add_placeholders()
        self.inputs_dim = self.opts.embedding_dim + self.opts.suffix_dim + self.opts.cap + self.opts.num + self.opts.jk_dim + self.opts.nb_filters
        self.outputs_dim = (1 + self.opts.bi) * self.opts.units
        inputs_list = [self.add_word_embedding()]
        if self.opts.suffix_dim > 0:
            inputs_list.append(self.add_suffix_embedding())
        if self.opts.cap:
            inputs_list.append(self.add_cap())
        if self.opts.num:
            inputs_list.append(self.add_num())
        if self.opts.jk_dim > 0:
            inputs_list.append(self.add_jackknife_embedding())
        self.adv_embedding = inputs_list[:]
        if self.opts.chars_dim > 0:
            inputs_list.append(self.add_char_embedding())
        inputs_tensor = tf.concat(inputs_list,
                                  2)  ## [seq_len, batch_size, inputs_dim]
        self.weight = tf.cast(
            tf.not_equal(
                self.inputs_placeholder_dict['words'],
                tf.zeros(tf.shape(self.inputs_placeholder_dict['words']),
                         tf.int32)), tf.float32)  ## [batch_size, seq_len]
        clean_loss, projected_outputs = self.feed_network_inputs(inputs_tensor)
        self.add_accuracy(projected_outputs)
        adv_loss, _ = self.add_adversarial_loss(
            clean_loss)  ## do not care about adversarial prediction accuracy
        alpha = 0.5
        #alpha = 1.0
        self.loss = alpha * clean_loss + (1.0 - alpha) * adv_loss
        self.train_op = self.add_train_op(self.loss)

    def run_batch(self, session, testmode=False):
        if not testmode:
            feed = {}
            #for placeholder, data in zip(self.inputs_placeholder_list, self.loader.inputs_train_batch):
            #    feed[placeholder] = data
            for feat in self.inputs_placeholder_dict.keys():
                feed[self.inputs_placeholder_dict[
                    feat]] = self.loader.inputs_train_batch[feat]
            feed[self.keep_prob] = self.opts.dropout_p
            feed[self.hidden_prob] = self.opts.hidden_p
            feed[self.input_keep_prob] = self.opts.input_dp
            train_op = self.train_op
            _, loss, accuracy = session.run(
                [train_op, self.loss, self.accuracy], feed_dict=feed)
            return loss, accuracy
        else:
            feed = {}
            for feat in self.inputs_placeholder_dict.keys():
                feed[self.inputs_placeholder_dict[
                    feat]] = self.loader.inputs_test_batch[feat]
            feed[self.keep_prob] = 1.0
            feed[self.hidden_prob] = 1.0
            feed[self.input_keep_prob] = 1.0
            loss, accuracy, predictions, weight = session.run(
                [self.loss, self.accuracy, self.predictions, self.weight],
                feed_dict=feed)
            weight = weight.astype(bool)
            predictions = predictions[weight]
            return loss, accuracy, predictions

    def run_epoch(self, session, testmode=False):

        if not testmode:
            epoch_start_time = time.time()
            next_batch = self.loader.next_batch
            epoch_incomplete = next_batch(self.batch_size)
            while epoch_incomplete:
                loss, accuracy = self.run_batch(session)
                print('{}/{}, loss {:.4f}, accuracy {:.4f}'.format(
                    self.loader._index_in_epoch, self.loader.nb_train_samples,
                    loss, accuracy),
                      end='\r')
                epoch_incomplete = next_batch(self.batch_size)
            print('\nEpoch Training Time {}'.format(time.time() -
                                                    epoch_start_time))
            return loss, accuracy
        else:
            next_test_batch = self.loader.next_test_batch
            test_incomplete = next_test_batch(self.batch_size)
            predictions = []
            while test_incomplete:
                loss, accuracy, predictions_batch = self.run_batch(
                    session, True)
                predictions.append(predictions_batch)
                #print('Testmode {}/{}, loss {}, accuracy {}'.format(self.loader._index_in_test, self.loader.nb_validation_samples, loss, accuracy), end = '\r')
                print('Test mode {}/{}'.format(
                    self.loader._index_in_test,
                    self.loader.nb_validation_samples),
                      end='\r')
                test_incomplete = next_test_batch(self.batch_size)
            predictions = np.hstack(predictions)
            if self.test_opts is not None:
                self.loader.output_stags(predictions, self.test_opts.save_tags)

            accuracy = np.mean(predictions == self.loader.test_gold)
            return accuracy
Ejemplo n.º 9
0
    def __init__(self, opts, test_opts=None, beam_size=16):
        ## Notation:
        ## b: batch_size
        ## d: # units
        ## n: # tokens in the sentence
        ## B: beam_size
        self.opts = opts
        self.test_opts = test_opts
        self.loader = Dataset(opts, test_opts)
        self.batch_size = 32
        self.beam_size = beam_size
        self.add_placeholders()
        self.inputs_dim = self.opts.embedding_dim + self.opts.suffix_dim + self.opts.cap + self.opts.num + self.opts.jk_dim
        self.outputs_dim = (1 + self.opts.bi) * self.opts.units
        inputs_list = [self.add_word_embedding()]
        if self.opts.suffix_dim > 0:
            inputs_list.append(self.add_suffix_embedding())
        if self.opts.cap:
            inputs_list.append(self.add_cap())
        if self.opts.num:
            inputs_list.append(self.add_num())
        if self.opts.jk_dim > 0:
            inputs_list.append(self.add_jackknife_embedding())
        inputs_tensor = tf.concat(inputs_list,
                                  2)  ## [seq_len, batch_size, inputs_dim]
        forward_inputs_tensor = inputs_tensor

        ## Backward path is deterministic, just run it first and make it embeddings
        if self.opts.bi:
            backward_inputs_tensor = self.add_dropout(
                tf.reverse(inputs_tensor, [0]), self.input_keep_prob)
            for i in xrange(self.opts.num_layers):
                backward_inputs_tensor = self.add_dropout(
                    self.add_lstm(backward_inputs_tensor, i, 'Backward'),
                    self.keep_prob)  ## [seq_len, batch_size, units]
            backward_inputs_tensor = tf.reverse(
                backward_inputs_tensor, [0])  ## [seq_len, batch_size, units]
        ## backward path is done
        forward_inputs_tensor = self.add_dropout(forward_inputs_tensor,
                                                 self.input_keep_prob)

        if beam_size > 0:
            self.weight = tf.cast(
                tf.not_equal(
                    self.inputs_placeholder_list[0],
                    tf.zeros(tf.shape(self.inputs_placeholder_list[0]),
                             tf.int32)),
                tf.float32)  # [self.batch_size, seq_len]
            self.predictions, self.scores, self.back_pointers = self.add_forward_beam_path(
                forward_inputs_tensor, backward_inputs_tensor,
                beam_size)  ## [seq_len, batch_size, nb_tags]
            self.weight_beam = tf.reshape(
                tf.tile(self.weight, [1, beam_size]),
                [-1, tf.shape(self.weight)[1]])  # [batch_size, seq_len]
            self.accuracy = self.loss  ## for dummy
            #_, projected_outputs = self.add_forward_path(forward_inputs_tensor, backward_inputs_tensor, True) ## [seq_len, batch_size, nb_tags]
            #self.loss += self.add_loss_op(projected_outputs)
            self.train_op = self.add_train_op(self.loss)
        else:
            self.predictions, projected_outputs = self.add_forward_path(
                forward_inputs_tensor,
                backward_inputs_tensor)  ## [seq_len, batch_size, nb_tags]
            self.weight = tf.cast(
                tf.not_equal(
                    self.inputs_placeholder_list[0],
                    tf.zeros(tf.shape(self.inputs_placeholder_list[0]),
                             tf.int32)), tf.float32)  ## [batch_size, seq_len]
            self.add_lm_accuracy()
            self.loss = self.add_loss_op(projected_outputs)
            self.train_op = self.add_train_op(self.loss)
class Stagging_Model_Forward_Chain(object):
    def add_stag_embedding_mat(self):
        with tf.variable_scope('stag_embedding') as scope:
            self.stag_embedding_mat = tf.get_variable('stag_embedding_mat', [self.loader.nb_tags+1, self.opts.lm]) # +1 for padding
    def add_stag_dropout_mat(self, batch_size):
        self.stag_dropout_mat = tf.ones([batch_size, self.opts.lm])
        self.stag_dropout_mat = tf.nn.dropout(self.stag_dropout_mat, self.input_keep_prob)

    def add_placeholders(self):
        #self.inputs_placeholder_list = [tf.placeholder(tf.int32, shape = [None, None]) for _ in xrange(2+self.opts.suffix+self.opts.num+self.opts.cap+self.opts.jackknife)] # 2 for text_sequences and tag_sequences, necessary no matter what
        #self.inputs_placeholder_list = [tf.placeholder(tf.int32, shape = [None, None]) for _ in xrange(6)] # 2 for text_sequences and tag_sequences, necessary no matter what
        self.inputs_placeholder_dict = {}
        for feature in self.features:
            if feature == 'chars':
                self.inputs_placeholder_dict[feature] = tf.placeholder(tf.int32, shape = [None, None, None])
            else:
                self.inputs_placeholder_dict[feature] = tf.placeholder(tf.int32, shape = [None, None])

        self.keep_prob = tf.placeholder(tf.float32)  
        self.input_keep_prob = tf.placeholder(tf.float32)  
        self.hidden_prob = tf.placeholder(tf.float32)  

    def add_word_embedding(self): 
        with tf.device('/cpu:0'):
            with tf.variable_scope('word_embedding') as scope:
                embedding = tf.get_variable('word_embedding_mat', self.loader.word_embeddings.shape, initializer=tf.constant_initializer(self.loader.word_embeddings))

            inputs = tf.nn.embedding_lookup(embedding, self.inputs_placeholder_dict['words']) ## [batch_size, seq_len, embedding_dim]
            inputs = tf.transpose(inputs, perm=[1, 0, 2]) # [seq_length, batch_size, embedding_dim]
        return inputs 

    def add_suffix_embedding(self):
        with tf.device('/cpu:0'):
            with tf.variable_scope('suffix_embedding') as scope:
                embedding = tf.get_variable('suffix_embedding_mat', [self.loader.nb_suffixes+1, self.opts.suffix_dim]) # +1 for padding

            inputs = tf.nn.embedding_lookup(embedding, self.inputs_placeholder_dict['suffix']) ## [batch_size, seq_len, embedding_dim]
            inputs = tf.transpose(inputs, perm=[1, 0, 2]) # [seq_length, batch_size, embedding_dim]
        return inputs 

    def add_cap(self):
        inputs = tf.cast(tf.expand_dims(self.inputs_placeholder_dict['cap'], -1), tf.float32)
        inputs = tf.transpose(inputs, perm=[1, 0, 2]) # [seq_length, batch_size, 1]
        return inputs # [seq_length, batch_size, 1]

    def add_num(self):
        inputs = tf.cast(tf.expand_dims(self.inputs_placeholder_dict['num'], -1), tf.float32)
        inputs = tf.transpose(inputs, perm=[1, 0, 2]) # [seq_length, batch_size, 1]
        return inputs # [seq_length, batch_size, 1]

    def add_char_embedding(self):
        with tf.device('/cpu:0'):
            with tf.variable_scope('char_embedding') as scope:
                embedding = tf.get_variable('char_embedding_mat', [self.loader.nb_chars+1, self.opts.chars_dim]) # +1 for padding

            inputs = tf.nn.embedding_lookup(embedding, self.inputs_placeholder_dict['chars']) ## [batch_size, seq_len, word_len, embedding_dim]
            inputs = tf.transpose(inputs, perm=[1, 0, 2, 3])
            ## [seq_len, batch_size, word_len, embedding_dim]
            inputs = self.add_dropout(inputs, self.input_keep_prob)
            weights = get_char_weights(self.opts, 'char_encoding')
            inputs = encode_char(inputs, weights) ## [seq_len, batch_size, nb_filters]
        return inputs 

    def add_jackknife_embedding(self):
        with tf.device('/cpu:0'):
            with tf.variable_scope('jk_embedding') as scope:
                embedding = tf.get_variable('jk_embedding_mat', [self.loader.nb_jk+1, self.opts.jk_dim]) # +1 for padding
            inputs = tf.nn.embedding_lookup(embedding, self.inputs_placeholder_dict['jk']) ## [batch_size, seq_len, embedding_dim]
            inputs = tf.transpose(inputs, perm=[1, 0, 2]) # [seq_length, batch_size, embedding_dim]
        return inputs 

    def add_stag_embedding(self, stags=None): # if None, use gold stags
        with tf.device('/cpu:0'):
            if stags is None:
                stags = self.inputs_placeholder_dict['tags']
            inputs = tf.nn.embedding_lookup(self.stag_embedding_mat, stags)  ## [batch_size, stag_dims]
        return inputs 

    def add_predictions(self, output):
        predictions = tf.cast(tf.argmax(output, 1), tf.int32) ## [batch_size, nb_tags] -> [batch_size]
        return predictions

    def add_lm_accuracy(self):
        correct_predictions = self.weight*tf.cast(tf.equal(self.predictions, self.inputs_placeholder_dict['tags']), tf.float32)

        self.accuracy = tf.reduce_sum(tf.cast(correct_predictions, tf.float32))/tf.reduce_sum(tf.cast(self.weight, tf.float32))

    def add_forward_path(self, lstm_outputs):
        batch_size = tf.shape(lstm_outputs)[1]
        prev_init = [tf.zeros([batch_size], tf.int32), tf.zeros([batch_size, self.loader.nb_tags])]
        ## We need the following memory states (list of four elements): 
        ## 1. Previous predictions (stag_idx): [batch_size]
        ## 2. Projected outputs for cost calculation
        name = 'Forward'
        ## Define all the necessary weights for recursion
        self.add_stag_embedding_mat()
        self.add_stag_dropout_mat(batch_size)
        ##
        weights = get_chain_weights('{}_Chain_layer'.format(name), self.opts.lm, self.outputs_dim)
        #weights = get_lstm_weights('{}_Chain_layer{}'.format(name), self.opts.lm, self.opts.lm, batch_size, self.hidden_prob)
        all_states = tf.scan(lambda prev, x: self.add_one_forward(prev, x, weights), lstm_outputs, prev_init)
        all_predictions = all_states[0] # [seq_len, batch_size]
        all_predictions = tf.transpose(all_predictions, perm=[1, 0]) # [batch_size, seq_len]
        all_projected_outputs = all_states[1] # [seq_len, batch_size, outputs_dim]
        all_projected_outputs = tf.transpose(all_projected_outputs, perm=[1, 0, 2]) # [batch_size, seq_len, outputs_dim]
        return all_predictions, all_projected_outputs


    def add_one_forward(self, prev_list, x, weights):
        ## compute one word in the forward direction
	## weights['L_weight'] for previous prediction embeddings
	## weights['L_bias'] for previous prediction embeddings
        prev_predictions = prev_list[0]
        prev_embedding = self.add_stag_embedding(prev_predictions) ## [batch_size, self.opts.lm]
        prev_embedding = prev_embedding*self.stag_dropout_mat
	inputs = tf.nn.relu(x + tf.matmul(prev_embedding, weights['L_weight']) + weights['L_bias'])
        projected_outputs = self.add_projection(inputs) ## [batch_size, nb_tags]
        predictions = self.add_predictions(projected_outputs) ## [batch_sizes]
        new_state = [predictions, projected_outputs]
        return new_state


    def add_lstm(self, inputs, i, name, backward=False):
        prev_init = tf.zeros([2, tf.shape(inputs)[1], self.opts.units])  # [2, batch_size, num_units]
        #prev_init = tf.zeros([2, 100, self.opts.units])  # [2, batch_size, num_units]
        if i == 0:
            inputs_dim = self.inputs_dim
        else:
            inputs_dim = self.opts.units*2 ## concat after each layer
        weights = get_lstm_weights('{}_LSTM_layer{}'.format(name, i), inputs_dim, self.opts.units, tf.shape(inputs)[1], self.hidden_prob)
        if backward:
            ## backward: reset states after zero paddings
            non_paddings = tf.transpose(self.weight, [1, 0]) ## [batch_size, seq_len] => [seq_len, batch_size]
            non_paddings = tf.reverse(non_paddings, [0])
            cell_hidden = tf.scan(lambda prev, x: lstm(prev, x, weights, backward=backward), [inputs, non_paddings], prev_init)
        else:
            cell_hidden = tf.scan(lambda prev, x: lstm(prev, x, weights), inputs, prev_init)
         #cell_hidden [seq_len, 2, batch_size, units]
        h = tf.unstack(cell_hidden, 2, axis=1)[1] #[seq_len, batch_size, units]
        return h

    def add_dropout(self, inputs, keep_prob):
        ## inputs [seq_len, batch_size, inputs_dims/units]
        dummy_dp = tf.ones(tf.shape(inputs)[1:])
        dummy_dp = tf.nn.dropout(dummy_dp, keep_prob)
        return tf.map_fn(lambda x: dummy_dp*x, inputs)

    def add_projection(self, inputs, reuse=False, name=None): 
        if name is None:
            name = 'Projection'
        with tf.variable_scope(name) as scope:
            if reuse:
                scope.reuse_variables()
            proj_U = tf.get_variable('weight', [self.outputs_dim, self.loader.nb_tags]) 
            proj_b = tf.get_variable('bias', [self.loader.nb_tags])
            outputs = tf.matmul(inputs, proj_U)+proj_b 
        return outputs

    def add_loss_op(self, output):
        cross_entropy = sequence_loss(output, self.inputs_placeholder_dict['tags'], self.weight)
        tf.add_to_collection('total loss', cross_entropy)
        loss = tf.add_n(tf.get_collection('total loss'))
        return loss

    def add_accuracy(self, output):
        self.predictions = tf.cast(tf.argmax(output, 2), tf.int32) ## [batch_size, seq_len]
        correct_predictions = self.weight*tf.cast(tf.equal(self.predictions, self.inputs_placeholder_dict['tags']), tf.float32)
        self.accuracy = tf.reduce_sum(tf.cast(correct_predictions, tf.float32))/tf.reduce_sum(tf.cast(self.weight, tf.float32))

    def add_train_op(self, loss):
        optimizer = tf.train.AdamOptimizer()
        train_op = optimizer.minimize(loss)
        return train_op

    def get_features(self):
        self.features = ['words', 'tags']
        if self.opts.suffix_dim > 0:
            self.features.append('suffix')
        if self.opts.cap:
            self.features.append('cap')
        if self.opts.num:
            self.features.append('num')
        if self.opts.jk_dim > 0:
            self.features.append('jk')
        if self.opts.chars_dim > 0:
            self.features.append('chars')
    
    def __init__(self, opts, test_opts=None):
       
        self.opts = opts
        self.test_opts = test_opts
        self.loader = Dataset(opts, test_opts)
        self.batch_size = opts.batch_size
        self.get_features()
        self.add_placeholders()
        self.inputs_dim = self.opts.embedding_dim + self.opts.suffix_dim + self.opts.cap + self.opts.num + self.opts.jk_dim + self.opts.nb_filters
        self.outputs_dim = (1+self.opts.bi)*self.opts.units
        inputs_list = [self.add_word_embedding()]
        if self.opts.suffix_dim > 0:
            inputs_list.append(self.add_suffix_embedding())
        if self.opts.cap:
            inputs_list.append(self.add_cap())
        if self.opts.num:
            inputs_list.append(self.add_num())
        if self.opts.jk_dim > 0:
            inputs_list.append(self.add_jackknife_embedding())
        if self.opts.chars_dim > 0:
            inputs_list.append(self.add_char_embedding())
        inputs_tensor = tf.concat(inputs_list, 2) ## [seq_len, batch_size, inputs_dim]
        inputs_tensor = self.add_dropout(inputs_tensor, self.input_keep_prob)
        self.weight = tf.cast(tf.not_equal(self.inputs_placeholder_dict['words'], tf.zeros(tf.shape(self.inputs_placeholder_dict['words']), tf.int32)), tf.float32) ## [batch_size, seq_len]
        for i in xrange(self.opts.num_layers):
            forward_outputs_tensor = self.add_dropout(self.add_lstm(inputs_tensor, i, 'Forward'), self.keep_prob) ## [seq_len, batch_size, units]
            if self.opts.bi:
                backward_outputs_tensor = self.add_dropout(self.add_lstm(tf.reverse(inputs_tensor, [0]), i, 'Backward', True), self.keep_prob) ## [seq_len, batch_size, units]
                inputs_tensor = tf.concat([forward_outputs_tensor, tf.reverse(backward_outputs_tensor, [0])], 2)
            else:
                inputs_tensor = forward_outputs_tensor
        lstm_outputs = inputs_tensor ## [seq_len, batch_size, outputs_dim]
	self.predictions, projected_outputs = self.add_forward_path(lstm_outputs) ## [seq_len, batch_size, nb_tags]
	self.add_lm_accuracy()
	self.loss = self.add_loss_op(projected_outputs)
	self.train_op = self.add_train_op(self.loss)
#        if self.opts.bi:

        self.add_accuracy(projected_outputs)

    def run_batch(self, session, testmode = False):
        if not testmode:
            feed = {}
            #for placeholder, data in zip(self.inputs_placeholder_list, self.loader.inputs_train_batch):
            #    feed[placeholder] = data
            for feat in self.inputs_placeholder_dict.keys():
                feed[self.inputs_placeholder_dict[feat]] = self.loader.inputs_train_batch[feat]
            feed[self.keep_prob] = self.opts.dropout_p
            feed[self.hidden_prob] = self.opts.hidden_p
            feed[self.input_keep_prob] = self.opts.input_dp
            train_op = self.train_op
            _, loss, accuracy = session.run([train_op, self.loss, self.accuracy], feed_dict=feed)
            return loss, accuracy
        else:
            feed = {}
            for feat in self.inputs_placeholder_dict.keys():
                feed[self.inputs_placeholder_dict[feat]] = self.loader.inputs_test_batch[feat]
            feed[self.keep_prob] = 1.0
            feed[self.hidden_prob] = 1.0
            feed[self.input_keep_prob] = 1.0
            loss, accuracy, predictions, weight = session.run([self.loss, self.accuracy, self.predictions, self.weight], feed_dict=feed)
            weight = weight.astype(bool)
            predictions = predictions[weight]
            return loss, accuracy, predictions

    def run_epoch(self, session, testmode = False):

        if not testmode:
            epoch_start_time = time.time()
            next_batch = self.loader.next_batch
            epoch_incomplete = next_batch(self.batch_size)
            while epoch_incomplete:
                loss, accuracy = self.run_batch(session)
                print('{}/{}, loss {:.4f}, accuracy {:.4f}'.format(self.loader._index_in_epoch, self.loader.nb_train_samples, loss, accuracy), end = '\r')
                epoch_incomplete = next_batch(self.batch_size)
            print('\nEpoch Training Time {}'.format(time.time() - epoch_start_time))
            return loss, accuracy
        else: 
            next_test_batch = self.loader.next_test_batch
            test_incomplete = next_test_batch(self.batch_size)
            predictions = []
            while test_incomplete:
                loss, accuracy, predictions_batch = self.run_batch(session, True)
                predictions.append(predictions_batch)
                #print('Testmode {}/{}, loss {}, accuracy {}'.format(self.loader._index_in_test, self.loader.nb_validation_samples, loss, accuracy), end = '\r')
                print('Test mode {}/{}'.format(self.loader._index_in_test, self.loader.nb_validation_samples), end = '\r')
                test_incomplete = next_test_batch(self.batch_size)
            predictions = np.hstack(predictions)
            if self.test_opts is not None:
                self.loader.output_stags(predictions, self.test_opts.save_tags)
                        
            accuracy = np.mean(predictions == self.loader.test_gold)
            return accuracy