def __init__(self, char_domain_size, char_embedding_dim, hidden_dim, filter_width, embeddings=None):

        self.char_domain_size = char_domain_size
        self.embedding_size = char_embedding_dim
        self.hidden_dim = hidden_dim
        self.filter_width = filter_width

        # char embedding input
        self.input_chars = tf.placeholder(tf.int64, [None, None], name="input_chars")

        # padding mask
        # self.input_mask = tf.placeholder(tf.float32, [None, None], name="input_mask")

        self.batch_size = tf.placeholder(tf.int32, None, name="batch_size")

        self.max_seq_len = tf.placeholder(tf.int32, None, name="max_seq_len")

        self.max_tok_len = tf.placeholder(tf.int32, None, name="max_tok_len")

        self.input_dropout_keep_prob = tf.placeholder_with_default(1.0, [], name="input_dropout_keep_prob")

        # sequence lengths
        self.sequence_lengths = tf.placeholder(tf.int32, [None, None], name="sequence_lengths")
        self.token_lengths = tf.placeholder(tf.int32, [None, None], name="tok_lengths")

        print("CNN char embedding model:")
        print("embedding dim: ", self.embedding_size)
        print("out dim: ", self.hidden_dim)

        char_embeddings_shape = (self.char_domain_size-1, self.embedding_size)
        self.char_embeddings = tf_utils.initialize_embeddings(char_embeddings_shape, name="char_embeddings", pretrained=embeddings)

        self.outputs = self.forward(self.input_chars, self.input_dropout_keep_prob, reuse=False)
Exemple #2
0
    def forward(self, hidden_dropout_keep_prob, input_dropout_keep_prob, middle_dropout_keep_prob, reuse=True):
        word_embeddings = tf.nn.embedding_lookup(self.w_e, self.input_x1)

        with tf.variable_scope("forward", reuse=reuse):
            input_list = [word_embeddings]
            input_size = self.embedding_size
            if self.use_characters:
                input_list.append(self.char_embeddings)
                input_size += self.char_size
            if self.use_shape:
                shape_embeddings_shape = (self.shape_domain_size - 1, self.shape_size)
                w_s = tf_utils.initialize_embeddings(shape_embeddings_shape, name="w_s")
                shape_embeddings = tf.nn.embedding_lookup(w_s, self.input_x2)
                input_list.append(shape_embeddings)
                input_size += self.shape_size

            input_feats = tf.concat(axis=2, values=input_list)
            # self.input_feats_expanded = tf.expand_dims(self.input_feats, 1)
            input_feats_expanded_drop = tf.nn.dropout(input_feats, input_dropout_keep_prob)

            total_output_width = 2*self.hidden_dim

            with tf.name_scope("bilstm"):
                # selected_col_embeddings = tf.nn.embedding_lookup(token_embeddings, self.token_batch)
                fwd_cell = tf.contrib.rnn.BasicLSTMCell(self.hidden_dim, state_is_tuple=True, reuse=reuse)
                bwd_cell = tf.contrib.rnn.BasicLSTMCell(self.hidden_dim, state_is_tuple=True, reuse=reuse)
                lstm_outputs, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw=fwd_cell, cell_bw=bwd_cell, dtype=tf.float32,
                                                                 inputs=input_feats_expanded_drop,
                                                                 parallel_iterations=50,
                                                                 sequence_length=self.flat_sequence_lengths)
                hidden_outputs = tf.concat(axis=2, values=lstm_outputs)

            h_concat_flat = tf.reshape(hidden_outputs, [-1, total_output_width])

            # Add dropout
            with tf.name_scope("middle_dropout"):
                h_drop = tf.nn.dropout(h_concat_flat, middle_dropout_keep_prob)

            # second projection
            with tf.name_scope("tanh_proj"):
                w_tanh = tf_utils.initialize_weights([total_output_width, self.hidden_dim], "w_tanh", init_type="xavier")
                b_tanh = tf.get_variable(initializer=tf.constant(0.01, shape=[self.hidden_dim]), name="b_tanh")
                self.l2_loss_A += tf.nn.l2_loss(w_tanh)
                self.l2_loss_A += tf.nn.l2_loss(b_tanh)
                self.l2_loss_B += tf.nn.l2_loss(w_tanh)
                self.l2_loss_B += tf.nn.l2_loss(b_tanh)
                h2_concat_flat = tf.nn.xw_plus_b(h_drop, w_tanh, b_tanh, name="h2_tanh")
                h2_tanh = tf_utils.apply_nonlinearity(h2_concat_flat, self.nonlinearity)

            # Add dropout
            with tf.name_scope("hidden_dropout"):
                h2_drop = tf.nn.dropout(h2_tanh, hidden_dropout_keep_prob)

        return h2_drop
Exemple #3
0
    def __init__(self,
                 char_domain_size,
                 char_embedding_dim,
                 hidden_dim,
                 embeddings=None):

        self.char_domain_size = char_domain_size
        self.embedding_size = char_embedding_dim
        self.hidden_dim = hidden_dim

        # char embedding input
        self.input_chars = tf.placeholder(tf.int64, [None, None],
                                          name="input_chars")

        # padding mask
        # self.input_mask = tf.placeholder(tf.float32, [None, None], name="input_mask")

        self.batch_size = tf.placeholder(tf.int32, None, name="batch_size")

        self.max_seq_len = tf.placeholder(tf.int32, None, name="max_seq_len")

        self.max_tok_len = tf.placeholder(tf.int32, None, name="max_tok_len")

        self.input_dropout_keep_prob = tf.placeholder_with_default(
            1.0, [], name="input_dropout_keep_prob")

        # sequence lengths
        self.sequence_lengths = tf.placeholder(tf.int32, [None, None],
                                               name="sequence_lengths")
        self.token_lengths = tf.placeholder(tf.int32, [None, None],
                                            name="tok_lengths")

        self.output_size = 2 * self.hidden_dim

        print("LSTM char embedding model")
        print("embedding dim: ", self.embedding_size)
        print("out dim: ", self.output_size)

        # set the pad token to a constant 0 vector
        # self.char_zero_pad = tf.constant(0.0, dtype=tf.float32, shape=[1, self.embedding_size])

        # Embedding layer
        shape = (char_domain_size - 1, self.embedding_size)
        self.char_embeddings = tf_utils.initialize_embeddings(
            shape, name="char_embeddings", pretrained=embeddings)

        self.outputs = self.forward(self.input_chars,
                                    self.input_dropout_keep_prob,
                                    reuse=False)
Exemple #4
0
    def __init__(self, char_domain_size, char_embedding_dim, hidden_dim, embeddings=None):

        self.char_domain_size = char_domain_size
        self.embedding_size = char_embedding_dim
        self.hidden_dim = hidden_dim

        # char embedding input
        self.input_chars = tf.placeholder(tf.int64, [None, None], name="input_chars")

        # padding mask
        # self.input_mask = tf.placeholder(tf.float32, [None, None], name="input_mask")

        self.batch_size = tf.placeholder(tf.int32, None, name="batch_size")

        self.max_seq_len = tf.placeholder(tf.int32, None, name="max_seq_len")

        self.max_tok_len = tf.placeholder(tf.int32, None, name="max_tok_len")

        self.input_dropout_keep_prob = tf.placeholder_with_default(1.0, [], name="input_dropout_keep_prob")

        # sequence lengths
        self.sequence_lengths = tf.placeholder(tf.int32, [None, None], name="sequence_lengths")
        self.token_lengths = tf.placeholder(tf.int32, [None, None], name="tok_lengths")

        self.output_size = 2*self.hidden_dim

        print("LSTM char embedding model")
        print("embedding dim: ", self.embedding_size)
        print("out dim: ", self.output_size)

        # set the pad token to a constant 0 vector
        # self.char_zero_pad = tf.constant(0.0, dtype=tf.float32, shape=[1, self.embedding_size])

        # Embedding layer
        shape = (char_domain_size-1, self.embedding_size)
        self.char_embeddings = tf_utils.initialize_embeddings(shape, name="char_embeddings", pretrained=embeddings)

        self.outputs = self.forward(self.input_chars, self.input_dropout_keep_prob, reuse=False)
Exemple #5
0
    def __init__(self, num_classes, vocab_size, shape_domain_size, char_domain_size, char_size, embedding_size,
                 shape_size, nonlinearity, layers_map, viterbi, projection, loss, margin, repeats, share_repeats,
                 char_embeddings, embeddings=None):

        self.num_classes = num_classes
        self.shape_domain_size = shape_domain_size
        self.char_domain_size = char_domain_size
        self.char_size = char_size
        self.embedding_size = embedding_size
        self.shape_size = shape_size
        self.nonlinearity = nonlinearity
        self.layers_map = layers_map
        self.projection = projection
        self.which_loss = loss
        self.margin = margin
        self.char_embeddings = char_embeddings
        self.repeats = repeats
        self.viterbi = viterbi
        self.share_repeats = share_repeats

        # word embedding input
        self.input_x1 = tf.placeholder(tf.int64, [None, None], name="input_x1")

        # shape embedding input
        self.input_x2 = tf.placeholder(tf.int64, [None, None], name="input_x2")

        # labels
        self.input_y = tf.placeholder(tf.int64, [None, None], name="input_y")

        # padding mask
        self.input_mask = tf.placeholder(tf.float32, [None, None], name="input_mask")

        # dims
        self.batch_size = tf.placeholder(tf.int32, None, name="batch_size")
        self.max_seq_len = tf.placeholder(tf.int32, None, name="max_seq_len")

        # sequence lengths
        self.sequence_lengths = tf.placeholder(tf.int32, [None, None], name="sequence_lengths")

        # dropout and l2 penalties
        self.hidden_dropout_keep_prob = tf.placeholder_with_default(1.0, [], name="hidden_dropout_keep_prob")
        self.input_dropout_keep_prob = tf.placeholder_with_default(1.0, [], name="input_dropout_keep_prob")
        self.middle_dropout_keep_prob = tf.placeholder_with_default(1.0, [], name="middle_dropout_keep_prob")
        self.training = tf.placeholder_with_default(False, [], name="training")

        self.l2_penalty = tf.placeholder_with_default(0.0, [], name="l2_penalty")
        self.drop_penalty = tf.placeholder_with_default(0.0, [], name="drop_penalty")

        self.l2_loss = tf.constant(0.0)

        self.use_characters = char_size != 0
        self.use_shape = shape_size != 0

        self.ones = tf.ones([self.batch_size, self.max_seq_len, self.num_classes])

        if self.viterbi:
            self.transition_params = tf.get_variable("transitions", [num_classes, num_classes])

        word_embeddings_shape = (vocab_size-1, embedding_size)
        self.w_e = tf_utils.initialize_embeddings(word_embeddings_shape, name="w_e", pretrained=embeddings, old=False)

        self.block_unflat_scores, self.hidden_layer = self.forward(self.input_x1, self.input_x2, self.max_seq_len,
                                          self.hidden_dropout_keep_prob,
                                          self.input_dropout_keep_prob, self.middle_dropout_keep_prob, reuse=False)

        # CalculateMean cross-entropy loss
        with tf.name_scope("loss"):

            self.loss = tf.constant(0.0)

            self.block_unflat_no_dropout_scores, _ = self.forward(self.input_x1, self.input_x2, self.max_seq_len, 1.0, 1.0, 1.0)

            labels = tf.cast(self.input_y, 'int32')

            if self.which_loss == "block":
                for unflat_scores, unflat_no_dropout_scores in zip(self.block_unflat_scores, self.block_unflat_no_dropout_scores):
                    self.loss += self.compute_loss(unflat_scores, unflat_no_dropout_scores, labels)
                self.unflat_scores = self.block_unflat_scores[-1]
            else:
                self.unflat_scores = self.block_unflat_scores[-1]
                self.unflat_no_dropout_scores = self.block_unflat_no_dropout_scores[-1]
                self.loss = self.compute_loss(self.unflat_scores, self.unflat_no_dropout_scores, labels)

        with tf.name_scope("predictions"):
            if viterbi:
                self.predictions = self.unflat_scores
            else:
                self.predictions = tf.argmax(self.unflat_scores, 2)
Exemple #6
0
    def forward(self, input_x1, input_x2, max_seq_len, hidden_dropout_keep_prob, input_dropout_keep_prob,
                middle_dropout_keep_prob, reuse=True):

        block_unflat_scores = []

        with tf.variable_scope("forward", reuse=reuse):
            word_embeddings = tf.nn.embedding_lookup(self.w_e, input_x1)

            input_list = [word_embeddings]
            input_size = self.embedding_size
            if self.use_characters:
                char_embeddings_masked = tf.multiply(self.char_embeddings, tf.expand_dims(self.input_mask, 2))
                input_list.append(char_embeddings_masked)
                input_size += self.char_size
            if self.use_shape:
                shape_embeddings_shape = (self.shape_domain_size-1, self.shape_size)
                w_s = tf_utils.initialize_embeddings(shape_embeddings_shape, name="w_s")
                shape_embeddings = tf.nn.embedding_lookup(w_s, input_x2)
                input_list.append(shape_embeddings)
                input_size += self.shape_size

            initial_filter_width = self.layers_map[0][1]['width']
            initial_num_filters = self.layers_map[0][1]['filters']
            filter_shape = [1, initial_filter_width, input_size, initial_num_filters]
            initial_layer_name = "conv0"

            if not reuse:
                print(input_list)
                print("Adding initial layer %s: width: %d; filters: %d" % (
                    initial_layer_name, initial_filter_width, initial_num_filters))

            input_feats = tf.concat(axis=2, values=input_list)
            input_feats_expanded = tf.expand_dims(input_feats, 1)
            input_feats_expanded_drop = tf.nn.dropout(input_feats_expanded, input_dropout_keep_prob)
            print("input feats expanded drop", input_feats_expanded_drop.get_shape())

            # first projection of embeddings
            w = tf_utils.initialize_weights(filter_shape, initial_layer_name + "_w", init_type='xavier', gain='relu')
            b = tf.get_variable(initial_layer_name + "_b", initializer=tf.constant(0.01, shape=[initial_num_filters]))
            conv0 = tf.nn.conv2d(input_feats_expanded_drop, w, strides=[1, 1, 1, 1], padding="SAME", name=initial_layer_name)
            h0 = tf_utils.apply_nonlinearity(tf.nn.bias_add(conv0, b), 'relu')

            initial_inputs = [h0]
            last_dims = initial_num_filters

            # Stacked atrous convolutions
            last_output = tf.concat(axis=3, values=initial_inputs)

            for block in range(self.repeats):
                print("last out shape", last_output.get_shape())
                print("last dims", last_dims)
                hidden_outputs = []
                total_output_width = 0
                reuse_block = (block != 0 and self.share_repeats) or reuse
                block_name_suff = "" if self.share_repeats else str(block)
                inner_last_dims = last_dims
                inner_last_output = last_output
                with tf.variable_scope("block" + block_name_suff, reuse=reuse_block):
                    for layer_name, layer in self.layers_map:
                        dilation = layer['dilation']
                        filter_width = layer['width']
                        num_filters = layer['filters']
                        initialization = layer['initialization']
                        take_layer = layer['take']
                        if not reuse:
                            print("Adding layer %s: dilation: %d; width: %d; filters: %d; take: %r" % (
                            layer_name, dilation, filter_width, num_filters, take_layer))
                        with tf.name_scope("atrous-conv-%s" % layer_name):
                            # [filter_height, filter_width, in_channels, out_channels]
                            filter_shape = [1, filter_width, inner_last_dims, num_filters]
                            w = tf_utils.initialize_weights(filter_shape, layer_name + "_w", init_type=initialization, gain=self.nonlinearity, divisor=self.num_classes)
                            b = tf.get_variable(layer_name + "_b", initializer=tf.constant(0.0 if initialization == "identity" or initialization == "varscale" else 0.001, shape=[num_filters]))
                            # h = tf_utils.residual_layer(inner_last_output, w, b, dilation, self.nonlinearity, self.batch_norm, layer_name + "_r",
                            #                             self.batch_size, max_seq_len, self.res_activation, self.training) \
                            #     if last_output != input_feats_expanded_drop \
                            #     else tf_utils.residual_layer(inner_last_output, w, b, dilation, self.nonlinearity, False, layer_name + "_r",
                            #                             self.batch_size, max_seq_len, 0, self.training)

                            conv = tf.nn.atrous_conv2d(inner_last_output, w, rate=dilation, padding="SAME", name=layer_name)
                            conv_b = tf.nn.bias_add(conv, b)
                            h = tf_utils.apply_nonlinearity(conv_b, self.nonlinearity)

                            # so, only apply "take" to last block (may want to change this later)
                            if take_layer:
                                hidden_outputs.append(h)
                                total_output_width += num_filters
                            inner_last_dims = num_filters
                            inner_last_output = h

                    h_concat = tf.concat(axis=3, values=hidden_outputs)
                    last_output = tf.nn.dropout(h_concat, middle_dropout_keep_prob)
                    last_dims = total_output_width

                    h_concat_squeeze = tf.squeeze(h_concat, [1])
                    h_concat_flat = tf.reshape(h_concat_squeeze, [-1, total_output_width])

                    # Add dropout
                    with tf.name_scope("hidden_dropout"):
                        h_drop = tf.nn.dropout(h_concat_flat, hidden_dropout_keep_prob)

                    def do_projection():
                        # Project raw outputs down
                        with tf.name_scope("projection"):
                            projection_width = int(total_output_width/(2*len(hidden_outputs)))
                            w_p = tf_utils.initialize_weights([total_output_width, projection_width], "w_p", init_type="xavier")
                            b_p = tf.get_variable("b_p", initializer=tf.constant(0.01, shape=[projection_width]))
                            projected = tf.nn.xw_plus_b(h_drop, w_p, b_p, name="projected")
                            projected_nonlinearity = tf_utils.apply_nonlinearity(projected, self.nonlinearity)
                        return projected_nonlinearity, projection_width

                    # only use projection if we wanted to, and only apply middle dropout here if projection
                    input_to_pred, proj_width = do_projection() if self.projection else (h_drop, total_output_width)
                    input_to_pred_drop = tf.nn.dropout(input_to_pred, middle_dropout_keep_prob) if self.projection else input_to_pred

                    # Final (unnormalized) scores and predictions
                    with tf.name_scope("output"+block_name_suff):
                        w_o = tf_utils.initialize_weights([proj_width, self.num_classes], "w_o", init_type="xavier")
                        b_o = tf.get_variable("b_o", initializer=tf.constant(0.01, shape=[self.num_classes]))
                        self.l2_loss += tf.nn.l2_loss(w_o)
                        self.l2_loss += tf.nn.l2_loss(b_o)
                        scores = tf.nn.xw_plus_b(input_to_pred_drop, w_o, b_o, name="scores")
                        unflat_scores = tf.reshape(scores, tf.stack([self.batch_size, max_seq_len, self.num_classes]))
                        block_unflat_scores.append(unflat_scores)

        return block_unflat_scores, h_concat_squeeze
Exemple #7
0
    def __init__(self, num_classes_A, num_classes_B, vocab_size, shape_domain_size,
                 char_domain_size, char_size, embedding_size, shape_size,
                 nonlinearity, viterbi, hidden_dim, char_embeddings,
                 embeddings=None):

        self.num_classes_A = num_classes_A
        self.num_classes_B = num_classes_B
        self.vocab_size = vocab_size
        self.shape_domain_size = shape_domain_size
        self.char_domain_size = char_domain_size
        self.char_size = char_size
        self.embedding_size = embedding_size
        self.shape_size = shape_size
        self.nonlinearity = nonlinearity
        self.viterbi = viterbi
        self.hidden_dim = hidden_dim
        self.char_embeddings = char_embeddings
        self.embeddings = embeddings

        # word embedding input
        self.input_x1 = tf.placeholder(tf.int64, [None, None], name="input_x1")

        # shape embedding input
        self.input_x2 = tf.placeholder(tf.int64, [None, None], name="input_x2")

        # labels
        self.input_y = tf.placeholder(tf.int64, [None, None], name="input_y")

        # padding mask
        self.input_mask = tf.placeholder(tf.float32, [None, None], name="input_mask")

        self.batch_size = tf.placeholder(tf.int32, None, name="batch_size")

        self.max_seq_len = tf.placeholder(tf.int32, None, name="max_seq_len")

        # sequence lengths
        self.sequence_lengths = tf.placeholder(tf.int32, [None, None], name="sequence_lengths")

        # dropout and l2 penalties
        self.middle_dropout_keep_prob = tf.placeholder_with_default(1.0, [], name="middle_dropout_keep_prob")
        self.hidden_dropout_keep_prob = tf.placeholder_with_default(1.0, [], name="hidden_dropout_keep_prob")
        self.input_dropout_keep_prob = tf.placeholder_with_default(1.0, [], name="input_dropout_keep_prob")
        self.word_dropout_keep_prob = tf.placeholder_with_default(1.0, [], name="word_dropout_keep_prob")

        self.l2_penalty = tf.placeholder_with_default(0.0, [], name="l2_penalty")

        self.projection = tf.placeholder_with_default(False, [], name="projection")

        self.drop_penalty = tf.placeholder_with_default(0.0, [], name="drop_penalty")

        # Keeping track of l2 regularization loss (optional)
        self.l2_loss_A = tf.constant(0.0)
        self.l2_loss_B = tf.constant(0.0)

        # set the pad token to a constant 0 vector
        self.word_zero_pad = tf.constant(0.0, dtype=tf.float32, shape=[1, embedding_size])
        self.shape_zero_pad = tf.constant(0.0, dtype=tf.float32, shape=[1, shape_size])
        self.char_zero_pad = tf.constant(0.0, dtype=tf.float32, shape=[1, char_size])

        self.use_characters = char_size != 0
        self.use_shape = shape_size != 0

        if self.viterbi:
            self.transition_params_A = tf.get_variable("transitions_A", [num_classes_A, num_classes_A])
            self.transition_params_B = tf.get_variable("transitions_B", [num_classes_B, num_classes_B])

        word_embeddings_shape = (vocab_size - 1, embedding_size)
        self.w_e = tf_utils.initialize_embeddings(word_embeddings_shape, name="w_e", pretrained=embeddings)

        self.flat_sequence_lengths
        self.lstm_output
        self.lstm_output_no_drop
        self.unflat_scores_A
        self.unflat_scores_B
        self.unflat_no_dropout_scores_A
        self.unflat_no_dropout_scores_B
        self.predictions_A
        self.predictions_B
        self.loss_A
        self.loss_B
    def forward(self,
                input_x1,
                input_x2,
                max_seq_len,
                hidden_dropout_keep_prob,
                input_dropout_keep_prob,
                middle_dropout_keep_prob,
                reuse=True):
        word_embeddings = tf.nn.embedding_lookup(self.w_e, input_x1)

        with tf.variable_scope("forward", reuse=reuse):
            input_list = [word_embeddings]
            input_size = self.embedding_size
            if self.use_characters:
                input_list.append(self.char_embeddings)
                input_size += self.char_size
            # todo add embeddings for all discrete features
            if self.use_shape:
                shape_embeddings_shape = (self.shape_domain_size - 1,
                                          self.shape_size)
                w_s = tf_utils.initialize_embeddings(shape_embeddings_shape,
                                                     name="w_s")
                shape_embeddings = tf.nn.embedding_lookup(w_s, input_x2)
                input_list.append(shape_embeddings)
                input_size += self.shape_size

            if self.use_geometric_feats:
                # it's giving some issue with the typing, so I'm just casting everythint ot be the same
                input_list.append(tf.cast(self.widths, tf.float32))
                input_list.append(tf.cast(self.heights, tf.float32))
                input_list.append(tf.cast(self.wh_ratios, tf.float32))
                input_list.append(tf.cast(self.x_coords, tf.float32))
                input_list.append(tf.cast(self.y_coords, tf.float32))
                # input_list.append(tf.cast(self.pages, tf.float32))
                input_list.append(tf.cast(self.lines, tf.float32))
                input_list.append(tf.cast(self.zones, tf.float32))
                input_size += 7

            if self.use_lexicons:
                lex_embeddings_shape = (1, self.lex_size)
                # params for lexicon embeddings
                w_place = tf_utils.initialize_embeddings(lex_embeddings_shape,
                                                         name="w_place")
                w_dept = tf_utils.initialize_embeddings(lex_embeddings_shape,
                                                        name="w_dept")
                w_uni = tf_utils.initialize_embeddings(lex_embeddings_shape,
                                                       name="w_uni")
                w_person = tf_utils.initialize_embeddings(lex_embeddings_shape,
                                                          name="w_person")
                # embedding lookup tables
                place_embeddings = tf.nn.embedding_lookup(
                    w_place, self.place_scores)
                dept_embeddings = tf.nn.embedding_lookup(
                    w_dept, self.department_scores)
                uni_embeddings = tf.nn.embedding_lookup(
                    w_uni, self.university_scores)
                person_embeddings = tf.nn.embedding_lookup(
                    w_person, self.person_scores)
                # add lex embeddings to input list
                input_list.append(place_embeddings)
                input_list.append(dept_embeddings)
                input_list.append(uni_embeddings)
                input_list.append(person_embeddings)
                input_size += self.shape_size
                # input_list.append(tf.cast(self.place_scores, tf.float32))
                # input_list.append(tf.cast(self.department_scores, tf.float32))
                # input_list.append(tf.cast(self.university_scores, tf.float32))
                # input_list.append(tf.cast(self.person_scores, tf.float32))
                input_size += 4 * self.lex_size

            # print(input.get_shape())
            # (w, h) = self.widths.get_shape()
            # print(tf.reshape(self.widths, (w, h, 1)).get_shape())

            input_feats = tf.concat(2, input_list)

            print(input_feats.get_shape())

            # self.input_feats_expanded = tf.expand_dims(self.input_feats, 1)
            input_feats_expanded_drop = tf.nn.dropout(input_feats,
                                                      input_dropout_keep_prob)

            total_output_width = 2 * self.hidden_dim

            with tf.name_scope("bilstm"):
                # selected_col_embeddings = tf.nn.embedding_lookup(token_embeddings, self.token_batch)
                fwd_cell = tf.nn.rnn_cell.BasicLSTMCell(self.hidden_dim,
                                                        state_is_tuple=True)
                bwd_cell = tf.nn.rnn_cell.BasicLSTMCell(self.hidden_dim,
                                                        state_is_tuple=True)
                lstm_outputs, _ = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw=fwd_cell,
                    cell_bw=bwd_cell,
                    dtype=tf.float32,
                    inputs=input_feats_expanded_drop,
                    # inputs = input_feats,
                    parallel_iterations=50,
                    sequence_length=self.flat_sequence_lengths)
                hidden_outputs = tf.concat(2, lstm_outputs)

            # concatenate the results of the forward and backward cells
            h_concat_flat = tf.reshape(hidden_outputs,
                                       [-1, total_output_width])

            # Add dropout
            with tf.name_scope("middle_dropout"):
                h_drop = tf.nn.dropout(h_concat_flat, middle_dropout_keep_prob)

            # second projection
            with tf.name_scope("tanh_proj"):
                w_tanh = tf_utils.initialize_weights(
                    [total_output_width, self.hidden_dim],
                    "w_tanh",
                    init_type="xavier")
                b_tanh = tf.get_variable(initializer=tf.constant(
                    0.01, shape=[self.hidden_dim]),
                                         name="b_tanh")
                self.l2_loss += tf.nn.l2_loss(w_tanh)
                self.l2_loss += tf.nn.l2_loss(b_tanh)
                h2_concat_flat = tf.nn.xw_plus_b(h_drop,
                                                 w_tanh,
                                                 b_tanh,
                                                 name="h2_tanh")
                h2_tanh = tf_utils.apply_nonlinearity(h2_concat_flat,
                                                      self.nonlinearity)

            # Add dropout
            with tf.name_scope("hidden_dropout"):
                h2_drop = tf.nn.dropout(h2_tanh, hidden_dropout_keep_prob)

            # Final (unnormalized) scores and predictions
            with tf.name_scope("output"):
                w_o = tf_utils.initialize_weights(
                    [self.hidden_dim, self.num_classes],
                    "w_o",
                    init_type="xavier")
                b_o = tf.get_variable(initializer=tf.constant(
                    0.01, shape=[self.num_classes]),
                                      name="b_o")
                self.l2_loss += tf.nn.l2_loss(w_o)
                self.l2_loss += tf.nn.l2_loss(b_o)
                scores = tf.nn.xw_plus_b(h2_drop, w_o, b_o, name="scores")
                unflat_scores = tf.reshape(
                    scores,
                    tf.pack([self.batch_size, max_seq_len, self.num_classes]))
        return unflat_scores
    def __init__(self,
                 num_classes,
                 vocab_size,
                 shape_domain_size,
                 char_domain_size,
                 char_size,
                 embedding_size,
                 shape_size,
                 lex_size,
                 nonlinearity,
                 viterbi,
                 hidden_dim,
                 char_embeddings,
                 embeddings=None,
                 use_geometric_feats=False,
                 use_lexicons=False):

        self.num_classes = num_classes
        self.shape_domain_size = shape_domain_size
        self.char_domain_size = char_domain_size
        self.char_size = char_size
        self.embedding_size = embedding_size
        self.shape_size = shape_size
        self.lex_size = lex_size
        self.hidden_dim = hidden_dim
        self.nonlinearity = nonlinearity
        self.char_embeddings = char_embeddings

        # word embedding input
        self.input_x1 = tf.placeholder(tf.int64, [None, None], name="input_x1")

        # shape embedding input
        self.input_x2 = tf.placeholder(tf.int64, [None, None], name="input_x2")

        # geometric inputs
        self.widths = tf.placeholder(tf.float32, [None, None, 1],
                                     name="widths")
        self.heights = tf.placeholder(tf.float32, [None, None, 1],
                                      name="heights")
        self.wh_ratios = tf.placeholder(tf.float32, [None, None, 1],
                                        name="wh_ratios")
        self.x_coords = tf.placeholder(tf.float32, [None, None, 1],
                                       name="x_coords")
        self.y_coords = tf.placeholder(tf.float32, [None, None, 1],
                                       name="y_coords")
        self.pages = tf.placeholder(tf.int64, [None, None, 1], name="pages")
        self.lines = tf.placeholder(tf.int64, [None, None, 1], name="lines")
        self.zones = tf.placeholder(tf.int64, [None, None, 1], name="zones")

        # dictionary matching inputs
        self.place_scores = tf.placeholder(tf.int64, [None, None],
                                           name="place_scores")
        self.department_scores = tf.placeholder(tf.int64, [None, None],
                                                name="department_scores")
        self.university_scores = tf.placeholder(tf.int64, [None, None],
                                                name="university_scores")
        self.person_scores = tf.placeholder(tf.int64, [None, None],
                                            name="person_scores")

        # labels
        self.input_y = tf.placeholder(tf.int64, [None, None], name="input_y")

        # padding mask
        self.input_mask = tf.placeholder(tf.float32, [None, None],
                                         name="input_mask")

        self.batch_size = tf.placeholder(tf.int32, None, name="batch_size")

        self.max_seq_len = tf.placeholder(tf.int32, None, name="max_seq_len")

        # sequence lengths
        self.sequence_lengths = tf.placeholder(tf.int32, [None, None],
                                               name="sequence_lengths")

        # dropout and l2 penalties
        self.middle_dropout_keep_prob = tf.placeholder_with_default(
            1.0, [], name="middle_dropout_keep_prob")
        self.hidden_dropout_keep_prob = tf.placeholder_with_default(
            1.0, [], name="hidden_dropout_keep_prob")
        self.input_dropout_keep_prob = tf.placeholder_with_default(
            1.0, [], name="input_dropout_keep_prob")
        self.word_dropout_keep_prob = tf.placeholder_with_default(
            1.0, [], name="word_dropout_keep_prob")

        self.l2_penalty = tf.placeholder_with_default(0.0, [],
                                                      name="l2_penalty")

        self.projection = tf.placeholder_with_default(False, [],
                                                      name="projection")

        self.drop_penalty = tf.placeholder_with_default(0.0, [],
                                                        name="drop_penalty")

        # Keeping track of l2 regularization loss (optional)
        self.l2_loss = tf.constant(0.0)

        # set the pad token to a constant 0 vector
        # todo do something similar for all embedded, discrete features
        self.word_zero_pad = tf.constant(0.0,
                                         dtype=tf.float32,
                                         shape=[1, embedding_size])
        self.shape_zero_pad = tf.constant(0.0,
                                          dtype=tf.float32,
                                          shape=[1, shape_size])
        self.char_zero_pad = tf.constant(0.0,
                                         dtype=tf.float32,
                                         shape=[1, char_size])

        self.use_characters = char_size != 0
        self.use_shape = shape_size != 0
        self.use_geometric_feats = use_geometric_feats
        self.use_lexicons = use_lexicons

        # Embedding layer
        # with tf.device('/cpu:0'), tf.name_scope("embedding"):
        word_embeddings_shape = (vocab_size - 1, embedding_size)
        self.w_e = tf_utils.initialize_embeddings(word_embeddings_shape,
                                                  name="w_e",
                                                  pretrained=embeddings)

        nonzero_elements = tf.not_equal(self.sequence_lengths,
                                        tf.zeros_like(self.sequence_lengths))
        count_nonzero_per_row = tf.reduce_sum(tf.to_int32(nonzero_elements),
                                              reduction_indices=1)
        # todo: this is the wrong type or something?
        # self.flat_sequence_lengths = tf.cast(tf.add(tf.reduce_sum(self.sequence_lengths, 1), tf.scalar_mul(2, count_nonzero_per_row)), tf.int64)
        # print(self.flat_sequence_lengths.get_shape())
        self.flat_sequence_lengths = tf.reshape(self.sequence_lengths, [-1])

        # tf.Print(self.flat_sequence_lengths, [self.flat_sequence_lengths.type])

        self.unflat_scores = self.forward(self.input_x1,
                                          self.input_x2,
                                          self.max_seq_len,
                                          self.hidden_dropout_keep_prob,
                                          self.input_dropout_keep_prob,
                                          self.middle_dropout_keep_prob,
                                          reuse=False)

        # Calculate mean cross-entropy loss
        with tf.name_scope("loss"):
            labels = tf.cast(self.input_y, 'int32')
            if viterbi:
                log_likelihood, transition_params = tf.contrib.crf.crf_log_likelihood(
                    self.unflat_scores, labels, self.flat_sequence_lengths)
                self.transition_params = transition_params
                self.loss = tf.reduce_mean(-log_likelihood)
            else:
                losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=self.unflat_scores, labels=labels)
                masked_losses = tf.mul(losses, self.input_mask)
                self.loss = tf.div(tf.reduce_sum(masked_losses),
                                   tf.reduce_sum(self.input_mask))
            self.loss += self.l2_penalty * self.l2_loss

            # run the forward pass to get scores
            self.unflat_no_dropout_scores = self.forward(
                self.input_x1, self.input_x2, self.max_seq_len, 1.0, 1.0, 1.0)

            drop_loss = tf.nn.l2_loss(
                tf.sub(self.unflat_scores, self.unflat_no_dropout_scores))
            self.loss += self.drop_penalty * drop_loss

        # Accuracy
        with tf.name_scope("predictions"):
            if viterbi:
                self.predictions = self.unflat_scores
            else:
                self.predictions = tf.argmax(self.unflat_scores, 2)
Exemple #10
0
    def forward(self, input_x1, input_x2, max_seq_len, hidden_dropout_keep_prob,
                input_dropout_keep_prob, middle_dropout_keep_prob, reuse=True):
        word_embeddings = tf.nn.embedding_lookup(self.w_e, input_x1)

        with tf.variable_scope("forward", reuse=reuse):
            input_list = [word_embeddings]
            input_size = self.embedding_size
            if self.use_characters:
                input_list.append(self.char_embeddings)
                input_size += self.char_size
            if self.use_shape:
                shape_embeddings_shape = (self.shape_domain_size - 1, self.shape_size)
                w_s = tf_utils.initialize_embeddings(shape_embeddings_shape, name="w_s")
                shape_embeddings = tf.nn.embedding_lookup(w_s, input_x2)
                input_list.append(shape_embeddings)
                input_size += self.shape_size

            if self.use_geometric_feats:
                # TODO: add other features to input list, concat them to end of input_feats
                # todo this is the wrong shape to be concatenated
                # it's giving some issue with the typing, so I'm just casting everythint ot be the same
                input_list.append(tf.cast(self.widths, tf.float32))
                input_list.append(tf.cast(self.heights, tf.float32))
                input_list.append(tf.cast(self.wh_ratios, tf.float32))
                input_list.append(tf.cast(self.x_coords, tf.float32))
                input_list.append(tf.cast(self.y_coords, tf.float32))
                input_list.append(tf.cast(self.pages, tf.float32))
                input_list.append(tf.cast(self.lines, tf.float32))
                input_list.append(tf.cast(self.zones, tf.float32))
                input_size += 8

            # print(input.get_shape())
            # (w, h) = self.widths.get_shape()
            # print(tf.reshape(self.widths, (w, h, 1)).get_shape())

            input_feats = tf.concat(2, input_list)

            print(input_feats.get_shape())

            # self.input_feats_expanded = tf.expand_dims(self.input_feats, 1)
            input_feats_expanded_drop = tf.nn.dropout(input_feats, input_dropout_keep_prob)

            total_output_width = self.hidden_dim # 2*self.hidden_dim

            with tf.name_scope("lstm"):
                # selected_col_embeddings = tf.nn.embedding_lookup(token_embeddings, self.token_batch)
                fwd_cell = tf.nn.rnn_cell.BasicLSTMCell(self.hidden_dim, state_is_tuple=True)
                lstm_outputs, _ = tf.nn.dynamic_rnn(cell=fwd_cell, dtype=tf.float32,
                                                                 inputs=input_feats_expanded_drop,
                                                                 # inputs = input_feats,
                                                                 parallel_iterations=50,
                                                                 sequence_length=self.flat_sequence_lengths)
                # hidden_outputs = tf.concat(2, lstm_outputs)

            # flatten the outputs of the lstm? is this necessary?
            h_concat_flat = tf.reshape(lstm_outputs, [-1, total_output_width])

            # Add dropout
            with tf.name_scope("middle_dropout"):
                h_drop = tf.nn.dropout(h_concat_flat, middle_dropout_keep_prob)

            # second projection
            with tf.name_scope("tanh_proj"):
                w_tanh = tf_utils.initialize_weights([total_output_width, self.hidden_dim], "w_tanh", init_type="xavier")
                b_tanh = tf.get_variable(initializer=tf.constant(0.01, shape=[self.hidden_dim]), name="b_tanh")
                self.l2_loss += tf.nn.l2_loss(w_tanh)
                self.l2_loss += tf.nn.l2_loss(b_tanh)
                h2_concat_flat = tf.nn.xw_plus_b(h_drop, w_tanh, b_tanh, name="h2_tanh")
                h2_tanh = tf_utils.apply_nonlinearity(h2_concat_flat, self.nonlinearity)

            # Add dropout
            with tf.name_scope("hidden_dropout"):
                h2_drop = tf.nn.dropout(h2_tanh, hidden_dropout_keep_prob)

            # Final (unnormalized) scores and predictions
            with tf.name_scope("output"):
                w_o = tf_utils.initialize_weights([self.hidden_dim, self.num_classes], "w_o", init_type="xavier")
                b_o = tf.get_variable(initializer=tf.constant(0.01, shape=[self.num_classes]), name="b_o")
                self.l2_loss += tf.nn.l2_loss(w_o)
                self.l2_loss += tf.nn.l2_loss(b_o)
                scores = tf.nn.xw_plus_b(h2_drop, w_o, b_o, name="scores")
                unflat_scores = tf.reshape(scores, tf.pack([self.batch_size, max_seq_len, self.num_classes]))
        return unflat_scores
    def __init__(self,
                 num_classes,
                 vocab_size,
                 shape_domain_size,
                 char_domain_size,
                 char_size,
                 embedding_size,
                 shape_size,
                 nonlinearity,
                 viterbi,
                 hidden_dim,
                 char_embeddings,
                 embeddings=None):

        self.num_classes = num_classes
        self.shape_domain_size = shape_domain_size
        self.char_domain_size = char_domain_size
        self.char_size = char_size
        self.embedding_size = embedding_size
        self.shape_size = shape_size
        self.hidden_dim = hidden_dim
        self.nonlinearity = nonlinearity
        self.char_embeddings = char_embeddings
        self.viterbi = viterbi

        # word embedding input
        self.input_x1 = tf.placeholder(tf.int64, [None, None], name="input_x1")

        # shape embedding input
        self.input_x2 = tf.placeholder(tf.int64, [None, None], name="input_x2")

        # labels
        self.input_y = tf.placeholder(tf.int64, [None, None], name="input_y")

        # padding mask
        self.input_mask = tf.placeholder(tf.float32, [None, None],
                                         name="input_mask")

        self.batch_size = tf.placeholder(tf.int32, None, name="batch_size")

        self.max_seq_len = tf.placeholder(tf.int32, None, name="max_seq_len")

        # sequence lengths
        self.sequence_lengths = tf.placeholder(tf.int32, [None, None],
                                               name="sequence_lengths")

        # dropout and l2 penalties
        self.middle_dropout_keep_prob = tf.placeholder_with_default(
            1.0, [], name="middle_dropout_keep_prob")
        self.hidden_dropout_keep_prob = tf.placeholder_with_default(
            1.0, [], name="hidden_dropout_keep_prob")
        self.input_dropout_keep_prob = tf.placeholder_with_default(
            1.0, [], name="input_dropout_keep_prob")
        self.word_dropout_keep_prob = tf.placeholder_with_default(
            1.0, [], name="word_dropout_keep_prob")

        self.l2_penalty = tf.placeholder_with_default(0.0, [],
                                                      name="l2_penalty")

        self.projection = tf.placeholder_with_default(False, [],
                                                      name="projection")

        self.drop_penalty = tf.placeholder_with_default(0.0, [],
                                                        name="drop_penalty")

        # Keeping track of l2 regularization loss (optional)
        self.l2_loss = tf.constant(0.0)

        # set the pad token to a constant 0 vector
        self.word_zero_pad = tf.constant(0.0,
                                         dtype=tf.float32,
                                         shape=[1, embedding_size])
        self.shape_zero_pad = tf.constant(0.0,
                                          dtype=tf.float32,
                                          shape=[1, shape_size])
        self.char_zero_pad = tf.constant(0.0,
                                         dtype=tf.float32,
                                         shape=[1, char_size])

        self.use_characters = char_size != 0
        self.use_shape = shape_size != 0

        if self.viterbi:
            self.transition_params = tf.get_variable(
                "transitions", [num_classes, num_classes])

        # Embedding layer
        # with tf.device('/cpu:0'), tf.name_scope("embedding"):
        word_embeddings_shape = (vocab_size - 1, embedding_size)
        self.w_e = tf_utils.initialize_embeddings(word_embeddings_shape,
                                                  name="w_e",
                                                  pretrained=embeddings)

        nonzero_elements = tf.not_equal(self.sequence_lengths,
                                        tf.zeros_like(self.sequence_lengths))
        count_nonzero_per_row = tf.reduce_sum(tf.to_int32(nonzero_elements),
                                              axis=1)
        self.flat_sequence_lengths = tf.add(
            tf.reduce_sum(self.sequence_lengths, 1),
            tf.scalar_mul(2, count_nonzero_per_row))

        self.unflat_scores, self.hidden_layer = self.forward(
            self.input_x1,
            self.input_x2,
            self.max_seq_len,
            self.hidden_dropout_keep_prob,
            self.input_dropout_keep_prob,
            self.middle_dropout_keep_prob,
            reuse=False)

        # Calculate mean cross-entropy loss
        with tf.name_scope("loss"):
            labels = tf.cast(self.input_y, 'int32')
            if viterbi:
                log_likelihood, transition_params = tf.contrib.crf.crf_log_likelihood(
                    self.unflat_scores,
                    labels,
                    self.flat_sequence_lengths,
                    transition_params=self.transition_params)
                # self.transition_params = transition_params
                self.loss = tf.reduce_mean(-log_likelihood)
            else:
                losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
                    logits=self.unflat_scores, labels=labels)
                masked_losses = tf.multiply(losses, self.input_mask)
                self.loss = tf.div(tf.reduce_sum(masked_losses),
                                   tf.reduce_sum(self.input_mask))
            self.loss += self.l2_penalty * self.l2_loss

            self.unflat_no_dropout_scores, _ = self.forward(
                self.input_x1, self.input_x2, self.max_seq_len, 1.0, 1.0, 1.0)

            drop_loss = tf.nn.l2_loss(
                tf.subtract(self.unflat_scores, self.unflat_no_dropout_scores))
            self.loss += self.drop_penalty * drop_loss

        # Accuracy
        with tf.name_scope("predictions"):
            if viterbi:
                self.predictions = self.unflat_scores
            else:
                self.predictions = tf.argmax(self.unflat_scores, 2)
Exemple #12
0
    def forward(self, input_x1, input_x2, max_seq_len, hidden_dropout_keep_prob,
                input_dropout_keep_prob, middle_dropout_keep_prob, reuse=True):
        word_embeddings = tf.nn.embedding_lookup(self.w_e, input_x1)

        with tf.variable_scope("forward", reuse=reuse):

            input_list = [word_embeddings]
            input_size = self.embedding_size
            if self.use_characters:
                input_list.append(self.char_embeddings)
                input_size += self.char_size
            if self.use_shape:
                shape_embeddings_shape = (self.shape_domain_size - 1, self.shape_size)
                w_s = tf_utils.initialize_embeddings(shape_embeddings_shape, name="w_s")
                shape_embeddings = tf.nn.embedding_lookup(w_s, input_x2)
                input_list.append(shape_embeddings)
                input_size += self.shape_size

            input_feats = tf.concat(axis=2, values=input_list)
            # self.input_feats_expanded = tf.expand_dims(self.input_feats, 1)
            input_feats_expanded_drop = tf.nn.dropout(input_feats, input_dropout_keep_prob)

            total_output_width = 2*self.hidden_dim

            with tf.name_scope("bilstm"):
                # selected_col_embeddings = tf.nn.embedding_lookup(token_embeddings, self.token_batch)
                fwd_cell = tf.nn.rnn_cell.BasicLSTMCell(self.hidden_dim, state_is_tuple=True)
                bwd_cell = tf.nn.rnn_cell.BasicLSTMCell(self.hidden_dim, state_is_tuple=True)
                lstm_outputs, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw=fwd_cell, cell_bw=bwd_cell, dtype=tf.float32,
                                                                 inputs=input_feats_expanded_drop,
                                                                 parallel_iterations=50,
                                                                 sequence_length=self.flat_sequence_lengths)
                hidden_outputs = tf.concat(axis=2, values=lstm_outputs)

            h_concat_flat = tf.reshape(hidden_outputs, [-1, total_output_width])

            # Add dropout
            with tf.name_scope("middle_dropout"):
                h_drop = tf.nn.dropout(h_concat_flat, middle_dropout_keep_prob)

            # second projection
            with tf.name_scope("tanh_proj"):
                w_tanh = tf_utils.initialize_weights([total_output_width, self.hidden_dim], "w_tanh", init_type="xavier")
                b_tanh = tf.get_variable(initializer=tf.constant(0.01, shape=[self.hidden_dim]), name="b_tanh")
                self.l2_loss += tf.nn.l2_loss(w_tanh)
                self.l2_loss += tf.nn.l2_loss(b_tanh)
                h2_concat_flat = tf.nn.xw_plus_b(h_drop, w_tanh, b_tanh, name="h2_tanh")
                h2_tanh = tf_utils.apply_nonlinearity(h2_concat_flat, self.nonlinearity)

            # Add dropout
            with tf.name_scope("hidden_dropout"):
                h2_drop = tf.nn.dropout(h2_tanh, hidden_dropout_keep_prob)

            # Final (unnormalized) scores and predictions
            with tf.name_scope("output"):
                w_o = tf_utils.initialize_weights([self.hidden_dim, self.num_classes], "w_o", init_type="xavier")
                b_o = tf.get_variable(initializer=tf.constant(0.01, shape=[self.num_classes]), name="b_o")
                self.l2_loss += tf.nn.l2_loss(w_o)
                self.l2_loss += tf.nn.l2_loss(b_o)
                scores = tf.nn.xw_plus_b(h2_drop, w_o, b_o, name="scores")
                unflat_scores = tf.reshape(scores, tf.stack([self.batch_size, max_seq_len, self.num_classes]))
        return unflat_scores, hidden_outputs
Exemple #13
0
    def __init__(self, num_classes, vocab_size, shape_domain_size, char_domain_size, char_size,
            embedding_size, shape_size, nonlinearity, viterbi, hidden_dim, char_embeddings, embeddings=None):

        self.num_classes = num_classes
        self.shape_domain_size = shape_domain_size
        self.char_domain_size = char_domain_size
        self.char_size = char_size
        self.embedding_size = embedding_size
        self.shape_size = shape_size
        self.hidden_dim = hidden_dim
        self.nonlinearity = nonlinearity
        self.char_embeddings = char_embeddings
        self.viterbi = viterbi

        # word embedding input
        self.input_x1 = tf.placeholder(tf.int64, [None, None], name="input_x1")

        # shape embedding input
        self.input_x2 = tf.placeholder(tf.int64, [None, None], name="input_x2")

        # labels
        self.input_y = tf.placeholder(tf.int64, [None, None], name="input_y")

        # padding mask
        self.input_mask = tf.placeholder(tf.float32, [None, None], name="input_mask")

        self.batch_size = tf.placeholder(tf.int32, None, name="batch_size")

        self.max_seq_len = tf.placeholder(tf.int32, None, name="max_seq_len")

        # sequence lengths
        self.sequence_lengths = tf.placeholder(tf.int32, [None, None], name="sequence_lengths")

        # dropout and l2 penalties
        self.middle_dropout_keep_prob = tf.placeholder_with_default(1.0, [], name="middle_dropout_keep_prob")
        self.hidden_dropout_keep_prob = tf.placeholder_with_default(1.0, [], name="hidden_dropout_keep_prob")
        self.input_dropout_keep_prob = tf.placeholder_with_default(1.0, [], name="input_dropout_keep_prob")
        self.word_dropout_keep_prob = tf.placeholder_with_default(1.0, [], name="word_dropout_keep_prob")

        self.l2_penalty = tf.placeholder_with_default(0.0, [], name="l2_penalty")

        self.projection = tf.placeholder_with_default(False, [], name="projection")

        self.drop_penalty = tf.placeholder_with_default(0.0, [], name="drop_penalty")
        # learning rate
        self.lr = tf.placeholder(dtype=tf.float32, shape=[], name="lr")

        # Keeping track of l2 regularization loss (optional)
        self.l2_loss = tf.constant(0.0)

        # set the pad token to a constant 0 vector
        self.word_zero_pad = tf.constant(0.0, dtype=tf.float32, shape=[1, embedding_size])
        self.shape_zero_pad = tf.constant(0.0, dtype=tf.float32, shape=[1, shape_size])
        self.char_zero_pad = tf.constant(0.0, dtype=tf.float32, shape=[1, char_size])

        self.use_characters = char_size != 0
        self.use_shape = shape_size != 0

        if self.viterbi:
            self.transition_params = tf.get_variable("transitions", [num_classes, num_classes])

        # Embedding layer
        # with tf.device('/cpu:0'), tf.name_scope("embedding"):
        word_embeddings_shape = (vocab_size - 1, embedding_size)
        self.w_e = tf_utils.initialize_embeddings(word_embeddings_shape, name="w_e", pretrained=embeddings)

        nonzero_elements = tf.not_equal(self.sequence_lengths, tf.zeros_like(self.sequence_lengths))
        count_nonzero_per_row = tf.reduce_sum(tf.to_int32(nonzero_elements), axis=1)
        self.flat_sequence_lengths = tf.add(tf.reduce_sum(self.sequence_lengths, 1), tf.scalar_mul(2, count_nonzero_per_row))

        self.unflat_scores, self.hidden_layer = self.forward(self.input_x1, self.input_x2, self.max_seq_len,
                                          self.hidden_dropout_keep_prob,
                                          self.input_dropout_keep_prob, self.middle_dropout_keep_prob, reuse=False)

        # Calculate mean cross-entropy loss
        with tf.name_scope("loss"):
            labels = tf.cast(self.input_y, 'int32')
            if viterbi:
                log_likelihood, transition_params = tf.contrib.crf.crf_log_likelihood(self.unflat_scores, labels, self.flat_sequence_lengths, transition_params=self.transition_params)
                # self.transition_params = transition_params
                self.loss = tf.reduce_mean(-log_likelihood)
            else:
                losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.unflat_scores, labels=labels)
                masked_losses = tf.multiply(losses, self.input_mask)
                self.loss = tf.div(tf.reduce_sum(masked_losses), tf.reduce_sum(self.input_mask))
            self.loss += self.l2_penalty * self.l2_loss

            self.unflat_no_dropout_scores, _ = self.forward(self.input_x1, self.input_x2, self.max_seq_len,
                                                         1.0, 1.0, 1.0)

            drop_loss = tf.nn.l2_loss(tf.subtract(self.unflat_scores, self.unflat_no_dropout_scores))
            self.loss += self.drop_penalty * drop_loss

        # Accuracy
        with tf.name_scope("predictions"):
            if viterbi:
                self.predictions = self.unflat_scores
            else:
                self.predictions = tf.argmax(self.unflat_scores, 2)