Esempio n. 1
0
def biGRU(input, input_length, params, dropout=None, layers=None):
    dropout = dropout or params.dropout
    cell_fw = MultiRNNCell([
        DropoutWrapper(
            GRUCell(params.units),
            # output_keep_prob=1.0 - dropout,
            input_keep_prob=1.0 - dropout,
            state_keep_prob=1.0 - dropout,
            variational_recurrent=True,
            dtype=tf.float32,
            input_size=input.get_shape()[-1]
            if layer == 0 else tf.TensorShape(params.units))
        for layer in range(layers or params.layers)
    ])
    cell_bw = MultiRNNCell([
        DropoutWrapper(
            GRUCell(params.units),
            # output_keep_prob=1.0 - dropout,
            input_keep_prob=1.0 - dropout,
            state_keep_prob=1.0 - dropout,
            variational_recurrent=True,
            dtype=tf.float32,
            input_size=input.get_shape()[-1]
            if layer == 0 else tf.TensorShape(params.units))
        for layer in range(layers or params.layers)
    ])

    output, states = tf.nn.bidirectional_dynamic_rnn(
        cell_fw,
        cell_bw,
        input,
        sequence_length=input_length,
        dtype=tf.float32)
    output = tf.concat(output, -1)
    return output, states
Esempio n. 2
0
def BiRNN(sequence, num_hidden, sequence_w_len = None, reuse = None, keep_prob = 0.8, scope = None):
    cell_fw = DropoutWrapper(tf.contrib.rnn.LSTMCell(num_hidden, reuse=reuse),
                             output_keep_prob=keep_prob,
                             dtype=tf.float32)
    cell_bw = DropoutWrapper(tf.contrib.rnn.LSTMCell(num_hidden, reuse=reuse),
                             output_keep_prob=keep_prob,
                             dtype=tf.float32)
    outputs, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw,
                                                          sequence,
                                                          sequence_length=sequence_w_len,
                                                          dtype=tf.float32,
                                                          scope = scope)
    return tf.concat(outputs, 2)
Esempio n. 3
0
 def __init__(self,
              num_symbols,
              num_embed_units,
              num_units,
              num_labels,
              embed,
              learning_rate=0.001,
              max_gradient_norm=5.0):
     self.texts = tf.placeholder(tf.int32, [None, None]) # shape: sentence*max_word
     self.text_length = tf.placeholder(tf.int32, [None]) # shape: sentence
     self.labels = tf.placeholder(tf.int32, [None])      # shape: sentence
     self.keep_prob = tf.placeholder(tf.float32)
     
     self.learning_rate = tf.Variable(float(learning_rate), trainable=False, dtype=tf.float32)
     self.global_step = tf.Variable(0, trainable=False)
     self.epoch = tf.Variable(0, trainable=False)
     self.epoch_add_op = self.epoch.assign(self.epoch + 1)
     
     # build the embedding table (index to vector)
     self.embed = tf.get_variable('embed', dtype=tf.float32, initializer=embed)
     
     self.embed_inputs = tf.nn.embedding_lookup(self.embed, self.texts)   # shape: sentence*max_word*num_embed_units
     fw_cell = DropoutWrapper(BasicLSTMCell(num_units), output_keep_prob=self.keep_prob)
     bw_cell = DropoutWrapper(BasicLSTMCell(num_units), output_keep_prob=self.keep_prob)
     
     middle_outputs, middle_states = bidirectional_dynamic_rnn(fw_cell, bw_cell, self.embed_inputs, self.text_length, dtype=tf.float32, scope="word_rnn")
     middle_outputs = tf.concat(middle_outputs, 2)   # shape: sentence*max_word*(2*num_units)
     
     middle_inputs = tf.expand_dims(tf.reduce_max(middle_outputs, axis=1), 0)    # shape: 1*sentence*(2*num_units)
     top_cell = DropoutWrapper(BasicLSTMCell(num_units), output_keep_prob=self.keep_prob)
     
     outputs, states = dynamic_rnn(top_cell, middle_inputs, dtype=tf.float32, scope="sentence_rnn")
     self.outputs = outputs[0]   # shape: sentence*num_units
     logits = tf.layers.dense(self.outputs, num_labels)
     
     self.loss = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.labels, logits=logits), name='loss')
     mean_loss = self.loss / tf.cast(tf.shape(self.labels)[0], dtype=tf.float32)
     self.predict_labels = tf.argmax(logits, 1, 'predict_labels', output_type=tf.int32)
     self.accuracy = tf.reduce_sum(tf.cast(tf.equal(self.labels, self.predict_labels), tf.int32), name='accuracy')
     
     self.params = tf.trainable_variables()
         
     # calculate the gradient of parameters
     opt = tf.train.AdamOptimizer(self.learning_rate)
     gradients = tf.gradients(mean_loss, self.params)
     clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(gradients, max_gradient_norm)
     self.update = opt.apply_gradients(zip(clipped_gradients, self.params), global_step=self.global_step)
     
     self.saver = tf.train.Saver(max_to_keep=3, pad_step_number=True)
Esempio n. 4
0
    def __init__(self,
                 num_tokens,
                 embeddings,
                 embeddings_size,
                 train_embeddings,
                 dropout_input,
                 rnn_hidden_size,
                 id2token,
                 token2id,
                 id2label,
                 label2id,
                 mode='teacher',
                 vocab_proj_dim=None):
        super(eVSNLI_net, self).__init__()

        self.mode = mode
        assert mode == 'teacher' or 'forloop'

        self.num_tokens = num_tokens

        self.lstm_cell = DropoutWrapper(
            tf.nn.rnn_cell.LSTMCell(rnn_hidden_size),
            input_keep_prob=dropout_input,
            output_keep_prob=dropout_input)

        if embeddings is not None:
            self.embedding_matrix = tf.get_variable(
                "embedding_matrix",
                shape=(num_tokens, embeddings_size),
                initializer=glove_embeddings_initializer(embeddings),
                trainable=train_embeddings)
            print("Loaded GloVe embeddings!")
        else:
            self.embedding_matrix = tf.get_variable(
                "embedding_matrix",
                shape=(num_tokens, embeddings_size),
                initializer=tf.random_normal_initializer(stddev=0.05),
                trainable=train_embeddings)

        #vocab_proj_dim for vocab projection
        #self.decoder = RNN_Decoder(embeddings_size, rnn_hidden_size, num_tokens, vocab_proj_dim)
        self.decoder = RNN_Decoder(self.embedding_matrix, rnn_hidden_size,
                                   num_tokens, vocab_proj_dim)

        keys = list(token2id.keys())
        values = [token2id[k] for k in keys]
        self.token2id_table = tf.contrib.lookup.HashTable(
            tf.contrib.lookup.KeyValueTensorInitializer(keys,
                                                        values,
                                                        key_dtype=tf.string,
                                                        value_dtype=tf.int64),
            -1)

        mapping_token = tf.constant(list(id2token.values()), dtype=tf.string)
        self.id2token_table = tf.contrib.lookup.index_to_string_table_from_tensor(
            mapping_token, default_value="#unk#", name=None)

        mapping_label = tf.constant(list(id2label.values()), dtype=tf.string)
        self.id2label_table = tf.contrib.lookup.index_to_string_table_from_tensor(
            mapping_label, default_value="#unk#", name=None)
Esempio n. 5
0
 def __init__(self,
              cell,
              input_keep_prob=1.0,
              output_keep_prob=1.0,
              state_keep_prob=1.0,
              variational_recurrent=False,
              input_size=None,
              dtype=None,
              seed=None,
              dropout_state_filter_visitor=None,
              is_train=True):
     DropoutWrapper.__init__(self, cell, input_keep_prob, output_keep_prob,
                             state_keep_prob, variational_recurrent,
                             input_size, dtype, seed,
                             dropout_state_filter_visitor)
     self.is_train = is_train
Esempio n. 6
0
def _create_single_cell(cell_fn, num_units, is_residual=False, is_dropout=False, keep_prob=None):
    """Create single RNN cell based on cell_fn."""
    cell = cell_fn(num_units)
    if is_dropout:
        cell = DropoutWrapper(cell, input_keep_prob=keep_prob)
    if is_residual:
        cell = ResidualWrapper(cell)
    return cell
        def dropout():
            """为每一个rnn核后面加一个dropout层"""
            if self.config.rnn == 'lstm':
                cell = lstm_cell()
            else:
                cell = gru_cell()

            return DropoutWrapper(cell,
                                  output_keep_prob=self.config.dropout_keep_prob)
Esempio n. 8
0
def build_simple_ic_model(sentence_input, img_features_input, dropout_input,
                          num_tokens, num_labels, embeddings, embeddings_size,
                          train_embeddings, rnn_hidden_size,
                          multimodal_fusion_hidden_size,
                          classification_hidden_size):
    sentence_length = tf.cast(
        tf.reduce_sum(
            tf.cast(
                tf.not_equal(sentence_input,
                             tf.zeros_like(sentence_input, dtype=tf.int32)),
                tf.int64), 1), tf.int32)
    if embeddings is not None:
        embedding_matrix = tf.get_variable(
            "embedding_matrix",
            shape=(num_tokens, embeddings_size),
            initializer=glove_embeddings_initializer(embeddings),
            trainable=train_embeddings)
        print("Loaded GloVe embeddings!")
    else:
        embedding_matrix = tf.get_variable(
            "embedding_matrix",
            shape=(num_tokens, embeddings_size),
            initializer=tf.random_normal_initializer(stddev=0.05),
            trainable=train_embeddings)
    sentence_embeddings = tf.nn.embedding_lookup(embedding_matrix,
                                                 sentence_input)
    lstm_cell = DropoutWrapper(tf.nn.rnn_cell.LSTMCell(rnn_hidden_size),
                               input_keep_prob=dropout_input,
                               output_keep_prob=dropout_input)
    sentence_outputs, sentence_final_states = tf.nn.dynamic_rnn(
        cell=lstm_cell,
        inputs=sentence_embeddings,
        sequence_length=sentence_length,
        dtype=tf.float32)
    normalized_img_features = tf.nn.l2_normalize(img_features_input, dim=1)
    gated_sentence_hidden_layer = tf.nn.dropout(gated_tanh(
        sentence_final_states.h, multimodal_fusion_hidden_size),
                                                keep_prob=dropout_input)
    gated_img_hidden_layer = tf.nn.dropout(gated_tanh(
        normalized_img_features, multimodal_fusion_hidden_size),
                                           keep_prob=dropout_input)
    sentence_img_multimodal_fusion = tf.multiply(gated_sentence_hidden_layer,
                                                 gated_img_hidden_layer)
    gated_first_layer = tf.nn.dropout(gated_tanh(
        sentence_img_multimodal_fusion, classification_hidden_size),
                                      keep_prob=dropout_input)
    gated_second_layer = tf.nn.dropout(gated_tanh(gated_first_layer,
                                                  classification_hidden_size),
                                       keep_prob=dropout_input)
    gated_third_layer = tf.nn.dropout(gated_tanh(gated_second_layer,
                                                 classification_hidden_size),
                                      keep_prob=dropout_input)

    return tf.contrib.layers.fully_connected(gated_third_layer,
                                             num_labels,
                                             activation_fn=None)
Esempio n. 9
0
def RNN(sequence, num_hidden, sequence_w_len = None, reuse = None, keep_prob = 0.8, scope = None):
    cell = DropoutWrapper(tf.contrib.rnn.LSTMCell(num_hidden, reuse=reuse),
                          output_keep_prob=keep_prob,
                          dtype=tf.float32)
    outputs, _ = tf.nn.dynamic_rnn(cell, sequence,
                                    sequence_length=sequence_w_len,
                                    dtype=tf.float32,
                                    scope = scope)

    return outputs
Esempio n. 10
0
 def highway_lstm_cell(size):
     _cell = HighwayLSTMCell(size,
                             highway=True,
                             initializer=numpy_orthogonal_initializer,
                             use_layer_norm=config.layer_norm)
     return DropoutWrapper(_cell,
                           variational_recurrent=True,
                           dtype=tf.float32,
                           state_keep_prob=keep_prob,
                           input_keep_prob=input_keep_prob,
                           output_keep_prob=output_keep_prob)
Esempio n. 11
0
 def cell(_size, name=None):
     _cell = LSTMCell(config.state_size,
                      name=name,
                      initializer=orthogonal_initializer(4),
                      forget_bias=config.forget_bias)
     return DropoutWrapper(_cell,
                           variational_recurrent=True,
                           dtype=tf.float32,
                           input_size=_size,
                           output_keep_prob=output_keep_prob,
                           state_keep_prob=keep_prob,
                           input_keep_prob=input_keep_prob)
Esempio n. 12
0
 def create_cell():
     if self.dropout_keep_prob < 1.0:
         single_cell = lambda: BasicLSTMCell(hidden_size)
         hidden = MultiRNNCell(
             [single_cell() for _ in range(num_layer)])
         hidden = DropoutWrapper(
             hidden,
             input_keep_prob=self.dropout_keep_prob,
             output_keep_prob=self.dropout_keep_prob)
     else:
         single_cell = lambda: BasicLSTMCell(hidden_size)
         hidden = MultiRNNCell(
             [single_cell() for _ in range(num_layer)])
     return hidden
def build_simple_te_model_h(premise_input, hypothesis_input, dropout_input,
                            num_tokens, num_labels, embeddings,
                            embeddings_size, train_embeddings, rnn_hidden_size,
                            classification_hidden_size):
    hypothesis_length = tf.cast(
        tf.reduce_sum(
            tf.cast(
                tf.not_equal(hypothesis_input,
                             tf.zeros_like(hypothesis_input, dtype=tf.int32)),
                tf.int64), 1), tf.int32)
    if embeddings is not None:
        embedding_matrix = tf.get_variable(
            "embedding_matrix",
            shape=(num_tokens, embeddings_size),
            initializer=glove_embeddings_initializer(embeddings),
            trainable=train_embeddings)
        print("Loaded GloVe embeddings!")
    else:
        embedding_matrix = tf.get_variable(
            "embedding_matrix",
            shape=(num_tokens, embeddings_size),
            initializer=tf.random_normal_initializer(stddev=0.05),
            trainable=train_embeddings)
    hypothesis_embeddings = tf.nn.embedding_lookup(embedding_matrix,
                                                   hypothesis_input)
    lstm_cell = DropoutWrapper(tf.nn.rnn_cell.LSTMCell(rnn_hidden_size),
                               input_keep_prob=dropout_input,
                               output_keep_prob=dropout_input)
    hypothesis_outputs, hypothesis_final_states = tf.nn.dynamic_rnn(
        cell=lstm_cell,
        inputs=hypothesis_embeddings,
        sequence_length=hypothesis_length,
        dtype=tf.float32)
    gated_first_layer = tf.nn.dropout(gated_tanh(hypothesis_final_states.h,
                                                 classification_hidden_size),
                                      keep_prob=dropout_input)
    gated_second_layer = tf.nn.dropout(gated_tanh(gated_first_layer,
                                                  classification_hidden_size),
                                       keep_prob=dropout_input)
    gated_third_layer = tf.nn.dropout(gated_tanh(gated_second_layer,
                                                 classification_hidden_size),
                                      keep_prob=dropout_input)

    return tf.contrib.layers.fully_connected(gated_third_layer,
                                             num_labels,
                                             activation_fn=None)
Esempio n. 14
0
def build_tl_mt_model(sentence_input, premise_input, hypothesis_input,
                      img_features_input, dropout_input, num_tokens,
                      num_ic_labels, num_vte_labels, embeddings,
                      embeddings_size, num_img_features, img_features_size,
                      train_embeddings, rnn_hidden_size,
                      multimodal_fusion_hidden_size,
                      classification_hidden_size):
    sentence_length = tf.cast(
        tf.reduce_sum(
            tf.cast(
                tf.not_equal(sentence_input,
                             tf.zeros_like(sentence_input, dtype=tf.int32)),
                tf.int64), 1), tf.int32)
    premise_length = tf.cast(
        tf.reduce_sum(
            tf.cast(
                tf.not_equal(premise_input,
                             tf.zeros_like(premise_input, dtype=tf.int32)),
                tf.int64), 1), tf.int32)
    hypothesis_length = tf.cast(
        tf.reduce_sum(
            tf.cast(
                tf.not_equal(hypothesis_input,
                             tf.zeros_like(hypothesis_input, dtype=tf.int32)),
                tf.int64), 1), tf.int32)
    if embeddings is not None:
        embedding_matrix = tf.get_variable(
            "embedding_matrix",
            shape=(num_tokens, embeddings_size),
            initializer=glove_embeddings_initializer(embeddings),
            trainable=train_embeddings)
        print("Loaded GloVe embeddings!")
    else:
        embedding_matrix = tf.get_variable(
            "embedding_matrix",
            shape=(num_tokens, embeddings_size),
            initializer=tf.random_normal_initializer(stddev=0.05),
            trainable=train_embeddings)
    sentence_embeddings = tf.nn.embedding_lookup(embedding_matrix,
                                                 sentence_input)
    premise_embeddings = tf.nn.embedding_lookup(embedding_matrix,
                                                premise_input)
    hypothesis_embeddings = tf.nn.embedding_lookup(embedding_matrix,
                                                   hypothesis_input)
    lstm_cell = DropoutWrapper(tf.nn.rnn_cell.LSTMCell(rnn_hidden_size),
                               input_keep_prob=dropout_input,
                               output_keep_prob=dropout_input)
    sentence_outputs, sentence_final_states = tf.nn.dynamic_rnn(
        cell=lstm_cell,
        inputs=sentence_embeddings,
        sequence_length=sentence_length,
        dtype=tf.float32)
    premise_outputs, premise_final_states = tf.nn.dynamic_rnn(
        cell=lstm_cell,
        inputs=premise_embeddings,
        sequence_length=premise_length,
        dtype=tf.float32)
    hypothesis_outputs, hypothesis_final_states = tf.nn.dynamic_rnn(
        cell=lstm_cell,
        inputs=hypothesis_embeddings,
        sequence_length=hypothesis_length,
        dtype=tf.float32)
    normalized_img_features = tf.nn.l2_normalize(img_features_input, dim=2)

    reshaped_sentence = tf.reshape(
        tf.tile(sentence_final_states.h, [1, num_img_features]),
        [-1, num_img_features, rnn_hidden_size])
    img_sentence_concatenation = tf.concat(
        [normalized_img_features, reshaped_sentence], -1)
    gated_img_sentence_concatenation = tf.nn.dropout(gated_tanh(
        img_sentence_concatenation, rnn_hidden_size),
                                                     keep_prob=dropout_input)
    att_wa_sentence = lambda x: tf.nn.dropout(
        tf.contrib.layers.fully_connected(
            x, 1, activation_fn=None, biases_initializer=None),
        keep_prob=dropout_input)
    a_sentence = att_wa_sentence(gated_img_sentence_concatenation)
    a_sentence = tf.nn.softmax(tf.squeeze(a_sentence))
    v_head_sentence = tf.squeeze(
        tf.matmul(tf.expand_dims(a_sentence, 1), normalized_img_features))

    with tf.variable_scope(
            "gated_sentence_scope_W_plus_b") as gated_sentence_scope_W_plus_b:
        gated_sentence_W_plus_b = lambda x: tf.contrib.layers.fully_connected(
            x,
            multimodal_fusion_hidden_size,
            activation_fn=None,
            scope=gated_sentence_scope_W_plus_b)
    with tf.variable_scope("gated_sentence_scope_W_plus_b_prime"
                           ) as gated_sentence_scope_W_plus_b_prime:
        gated_sentence_W_plus_b_prime = lambda x: tf.contrib.layers.fully_connected(
            x,
            multimodal_fusion_hidden_size,
            activation_fn=None,
            scope=gated_sentence_scope_W_plus_b_prime)
    gated_sentence = tf.nn.dropout(
        gated_tanh(sentence_final_states.h,
                   multimodal_fusion_hidden_size,
                   W_plus_b=gated_sentence_W_plus_b,
                   W_plus_b_prime=gated_sentence_W_plus_b_prime),
        keep_prob=dropout_input,
    )

    v_head_sentence.set_shape(
        (sentence_embeddings.get_shape()[0], img_features_size))
    with tf.variable_scope("gated_img_features_sentence_scope_W_plus_b"
                           ) as gated_img_features_sentence_scope_W_plus_b:
        gated_img_features_sentence_W_plus_b = lambda x: tf.contrib.layers.fully_connected(
            x,
            multimodal_fusion_hidden_size,
            activation_fn=None,
            scope=gated_img_features_sentence_scope_W_plus_b)
    with tf.variable_scope(
            "gated_img_features_sentence_scope_W_plus_b_prime"
    ) as gated_img_features_sentence_scope_W_plus_b_prime:
        gated_img_features_sentence_W_plus_b_prime = lambda x: tf.contrib.layers.fully_connected(
            x,
            multimodal_fusion_hidden_size,
            activation_fn=None,
            scope=gated_img_features_sentence_scope_W_plus_b_prime)
    gated_img_features_sentence = tf.nn.dropout(gated_tanh(
        v_head_sentence,
        multimodal_fusion_hidden_size,
        W_plus_b=gated_img_features_sentence_W_plus_b,
        W_plus_b_prime=gated_img_features_sentence_W_plus_b_prime),
                                                keep_prob=dropout_input)

    h_premise_img = tf.multiply(gated_sentence, gated_img_features_sentence)

    with tf.variable_scope("gated_first_layer_scope_W_plus_b"
                           ) as gated_first_layer_scope_W_plus_b:
        gated_first_layer_W_plus_b = lambda x: tf.contrib.layers.fully_connected(
            x,
            classification_hidden_size,
            activation_fn=None,
            scope=gated_first_layer_scope_W_plus_b)
    with tf.variable_scope("gated_first_layer_scope_W_plus_b_prime"
                           ) as gated_first_layer_scope_W_plus_b_prime:
        gated_first_layer_W_plus_b_prime = lambda x: tf.contrib.layers.fully_connected(
            x,
            classification_hidden_size,
            activation_fn=None,
            scope=gated_first_layer_scope_W_plus_b_prime)
    gated_first_layer = tf.nn.dropout(gated_tanh(
        h_premise_img,
        W_plus_b=gated_first_layer_W_plus_b,
        W_plus_b_prime=gated_first_layer_W_plus_b_prime),
                                      keep_prob=dropout_input)

    gated_second_layer = tf.nn.dropout(gated_tanh(gated_first_layer,
                                                  classification_hidden_size),
                                       keep_prob=dropout_input)
    gated_third_layer = tf.nn.dropout(gated_tanh(gated_second_layer,
                                                 classification_hidden_size),
                                      keep_prob=dropout_input)

    ic_classification = tf.nn.dropout(tf.contrib.layers.fully_connected(
        gated_third_layer, num_ic_labels, activation_fn=None),
                                      keep_prob=dropout_input)

    reshaped_premise = tf.reshape(
        tf.tile(premise_final_states.h, [1, num_img_features]),
        [-1, num_img_features, rnn_hidden_size])
    img_premise_concatenation = tf.concat(
        [normalized_img_features, reshaped_premise], -1)
    gated_img_premise_concatenation = tf.nn.dropout(gated_tanh(
        img_premise_concatenation, rnn_hidden_size),
                                                    keep_prob=dropout_input)
    att_wa_premise = lambda x: tf.nn.dropout(tf.contrib.layers.fully_connected(
        x, 1, activation_fn=None, biases_initializer=None),
                                             keep_prob=dropout_input)
    a_premise = att_wa_premise(gated_img_premise_concatenation)
    a_premise = tf.nn.softmax(tf.squeeze(a_premise))
    v_head_premise = tf.squeeze(
        tf.matmul(tf.expand_dims(a_premise, 1), normalized_img_features))

    reshaped_hypothesis = tf.reshape(
        tf.tile(hypothesis_final_states.h, [1, num_img_features]),
        [-1, num_img_features, rnn_hidden_size])
    img_hypothesis_concatenation = tf.concat(
        [normalized_img_features, reshaped_hypothesis], -1)
    gated_img_hypothesis_concatenation = tf.nn.dropout(gated_tanh(
        img_hypothesis_concatenation, rnn_hidden_size),
                                                       keep_prob=dropout_input)
    att_wa_hypothesis = lambda x: tf.nn.dropout(
        tf.contrib.layers.fully_connected(
            x, 1, activation_fn=None, biases_initializer=None),
        keep_prob=dropout_input)
    a_hypothesis = att_wa_hypothesis(gated_img_hypothesis_concatenation)
    a_hypothesis = tf.nn.softmax(tf.squeeze(a_hypothesis))
    v_head_hypothesis = tf.squeeze(
        tf.matmul(tf.expand_dims(a_hypothesis, 1), normalized_img_features))

    with tf.variable_scope(
            "gated_sentence_scope_W_plus_b") as gated_sentence_scope_W_plus_b:
        gated_premise_W_plus_b = lambda x: tf.contrib.layers.fully_connected(
            x,
            multimodal_fusion_hidden_size,
            activation_fn=None,
            scope=gated_sentence_scope_W_plus_b,
            reuse=True)
    with tf.variable_scope("gated_sentence_scope_W_plus_b_prime"
                           ) as gated_sentence_scope_W_plus_b_prime:
        gated_premise_W_plus_b_prime = lambda x: tf.contrib.layers.fully_connected(
            x,
            multimodal_fusion_hidden_size,
            activation_fn=None,
            scope=gated_sentence_scope_W_plus_b_prime,
            reuse=True)
    gated_premise = tf.nn.dropout(
        gated_tanh(premise_final_states.h,
                   multimodal_fusion_hidden_size,
                   W_plus_b=gated_premise_W_plus_b,
                   W_plus_b_prime=gated_premise_W_plus_b_prime),
        keep_prob=dropout_input,
    )

    with tf.variable_scope(
            "gated_sentence_scope_W_plus_b") as gated_sentence_scope_W_plus_b:
        gated_hypothesis_W_plus_b = lambda x: tf.contrib.layers.fully_connected(
            x,
            multimodal_fusion_hidden_size,
            activation_fn=None,
            scope=gated_sentence_scope_W_plus_b,
            reuse=True)
    with tf.variable_scope("gated_sentence_scope_W_plus_b_prime"
                           ) as gated_sentence_scope_W_plus_b_prime:
        gated_hypothesis_W_plus_b_prime = lambda x: tf.contrib.layers.fully_connected(
            x,
            multimodal_fusion_hidden_size,
            activation_fn=None,
            scope=gated_sentence_scope_W_plus_b_prime,
            reuse=True)
    gated_hypothesis = tf.nn.dropout(
        gated_tanh(hypothesis_final_states.h,
                   multimodal_fusion_hidden_size,
                   W_plus_b=gated_hypothesis_W_plus_b,
                   W_plus_b_prime=gated_hypothesis_W_plus_b_prime),
        keep_prob=dropout_input,
    )

    v_head_premise.set_shape(
        (premise_embeddings.get_shape()[0], img_features_size))
    with tf.variable_scope("gated_img_features_sentence_scope_W_plus_b"
                           ) as gated_img_features_sentence_scope_W_plus_b:
        gated_img_features_premise_W_plus_b = lambda x: tf.contrib.layers.fully_connected(
            x,
            multimodal_fusion_hidden_size,
            activation_fn=None,
            scope=gated_img_features_sentence_scope_W_plus_b,
            reuse=True)
    with tf.variable_scope(
            "gated_img_features_sentence_scope_W_plus_b_prime"
    ) as gated_img_features_sentence_scope_W_plus_b_prime:
        gated_img_features_premise_W_plus_b_prime = lambda x: tf.contrib.layers.fully_connected(
            x,
            multimodal_fusion_hidden_size,
            activation_fn=None,
            scope=gated_img_features_sentence_scope_W_plus_b_prime,
            reuse=True)
    gated_img_features_premise = tf.nn.dropout(gated_tanh(
        v_head_premise,
        multimodal_fusion_hidden_size,
        W_plus_b=gated_img_features_premise_W_plus_b,
        W_plus_b_prime=gated_img_features_premise_W_plus_b_prime),
                                               keep_prob=dropout_input)

    v_head_hypothesis.set_shape(
        (hypothesis_embeddings.get_shape()[0], img_features_size))
    with tf.variable_scope("gated_img_features_sentence_scope_W_plus_b"
                           ) as gated_img_features_sentence_scope_W_plus_b:
        gated_img_features_hypothesis_W_plus_b = lambda x: tf.contrib.layers.fully_connected(
            x,
            multimodal_fusion_hidden_size,
            activation_fn=None,
            scope=gated_img_features_sentence_scope_W_plus_b,
            reuse=True)
    with tf.variable_scope(
            "gated_img_features_sentence_scope_W_plus_b_prime"
    ) as gated_img_features_sentence_scope_W_plus_b_prime:
        gated_img_features_hypothesis_W_plus_b_prime = lambda x: tf.contrib.layers.fully_connected(
            x,
            multimodal_fusion_hidden_size,
            activation_fn=None,
            scope=gated_img_features_sentence_scope_W_plus_b_prime,
            reuse=True)
    gated_img_features_hypothesis = tf.nn.dropout(gated_tanh(
        v_head_hypothesis,
        multimodal_fusion_hidden_size,
        W_plus_b=gated_img_features_hypothesis_W_plus_b,
        W_plus_b_prime=gated_img_features_hypothesis_W_plus_b_prime),
                                                  keep_prob=dropout_input)

    h_premise_img = tf.multiply(gated_premise, gated_img_features_premise)
    h_hypothesis_img = tf.multiply(gated_hypothesis,
                                   gated_img_features_hypothesis)

    with tf.variable_scope("gated_first_layer_scope_W_plus_b"
                           ) as gated_first_layer_scope_W_plus_b:
        gated_h_premise_img_hidden_layer_W_plus_b = lambda x: tf.contrib.layers.fully_connected(
            x,
            classification_hidden_size,
            activation_fn=None,
            scope=gated_first_layer_scope_W_plus_b,
            reuse=True)
    with tf.variable_scope("gated_first_layer_scope_W_plus_b_prime"
                           ) as gated_first_layer_scope_W_plus_b_prime:
        gated_h_premise_hidden_layer_W_plus_b_prime = lambda x: tf.contrib.layers.fully_connected(
            x,
            classification_hidden_size,
            activation_fn=None,
            scope=gated_first_layer_scope_W_plus_b_prime,
            reuse=True)
    gated_h_premise_img_hidden_layer = tf.nn.dropout(gated_tanh(
        h_premise_img,
        W_plus_b=gated_h_premise_img_hidden_layer_W_plus_b,
        W_plus_b_prime=gated_h_premise_hidden_layer_W_plus_b_prime),
                                                     keep_prob=dropout_input)

    with tf.variable_scope("gated_first_layer_scope_W_plus_b"
                           ) as gated_first_layer_scope_W_plus_b:
        gated_h_hypothesis_img_hidden_layer_W_plus_b = lambda x: tf.contrib.layers.fully_connected(
            x,
            classification_hidden_size,
            activation_fn=None,
            scope=gated_first_layer_scope_W_plus_b,
            reuse=True)
    with tf.variable_scope("gated_first_layer_scope_W_plus_b_prime"
                           ) as gated_first_layer_scope_W_plus_b_prime:
        gated_h_hypothesis_hidden_layer_W_plus_b_prime = lambda x: tf.contrib.layers.fully_connected(
            x,
            classification_hidden_size,
            activation_fn=None,
            scope=gated_first_layer_scope_W_plus_b_prime,
            reuse=True)
    gated_h_hypothesis_img_hidden_layer = tf.nn.dropout(
        gated_tanh(
            h_hypothesis_img,
            W_plus_b=gated_h_hypothesis_img_hidden_layer_W_plus_b,
            W_plus_b_prime=gated_h_hypothesis_hidden_layer_W_plus_b_prime),
        keep_prob=dropout_input)

    final_concatenation = tf.concat([
        gated_h_premise_img_hidden_layer, gated_h_hypothesis_img_hidden_layer
    ], 1)

    gated_first_layer = tf.nn.dropout(gated_tanh(final_concatenation,
                                                 classification_hidden_size),
                                      keep_prob=dropout_input)
    gated_second_layer = tf.nn.dropout(gated_tanh(gated_first_layer,
                                                  classification_hidden_size),
                                       keep_prob=dropout_input)

    vte_classification = tf.nn.dropout(tf.contrib.layers.fully_connected(
        gated_second_layer, num_vte_labels, activation_fn=None),
                                       keep_prob=dropout_input)

    return ic_classification, vte_classification
Esempio n. 15
0
    # 词向量表-随机初始化
    train_x, vocab_size = get_vocabulary(train_x)
    test_x, vocab_size_test = get_vocabulary(test_x)
    print("datas shape:",train_x.shape)
    embeddings = tf.get_variable("embeddings", [vocab_size, embedding_size],
                                 initializer=tf.truncated_normal_initializer)

    # 将词索引号转换为词向量[None, max_document_length] => [None, max_document_length, embedding_size]
    embedded = tf.nn.embedding_lookup(embeddings, datas_placeholder)

    # 转换为LSTM的输入格式,要求是数组,数组的每个元素代表一个Batch下,一个时序的数据(即一个词)
    rnn_input = tf.unstack(embedded, max_document_length, axis=1)

    # 定义LSTM网络结构
    lstm_cell = BasicLSTMCell(num_units=num_units, forget_bias=1.0) # cell
    lstm_cell = DropoutWrapper(cell=lstm_cell, input_keep_prob=1.0, output_keep_prob=keep_prob)
    rnn_outputs, rnn_states = static_rnn(cell=lstm_cell, inputs=rnn_input, dtype=tf.float32) # network

    # 最后一层
    logits = tf.layers.dense(units=num_classes,inputs=rnn_outputs[-1]) # fully-connected
    pred_labels = tf.arg_max(input=logits,dimension=1) # 概率最大的类别为预测的类别

    # 定义损失函数, logists为网络最后一层输出, labels为真实标签
    losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits,
                                                     labels=tf.one_hot(labels_placeholder, num_classes))
    mean_losses = tf.reduce_mean(losses)
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(mean_losses)

    with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
        # 初始化变量
        print("---init all variables---")
Esempio n. 16
0
 def layer_fn():
     return DropoutWrapper(cell_fn(),
                           output_keep_prob=keep_probability)
Esempio n. 17
0
def model_fn(features,
             labels,
             mode,
             params,
             word_embeddings_np=None,
             char_embeddings_np=None):
    attention_fun = partial(BahdanauAttention, num_units=params.units) if params.attention == 'bahdanau' \
        else partial(LuongAttention, num_units=2 * params.units)

    dropout = params.dropout if mode == tf.estimator.ModeKeys.TRAIN else 0.0
    passage_count = params.passage_count if mode != tf.estimator.ModeKeys.TRAIN \
        else params.train_passage_count

    question_words_length = features['question_length']
    passage_words_length = features['passage_length']

    devices = get_devices()

    with tf.device('/cpu:0'):
        word_embeddings_placeholder = tf.placeholder(
            shape=[params.vocab_size, params.emb_size], dtype=tf.float32)
        char_embeddings_placeholder = tf.placeholder(
            shape=[params.char_vocab_size, params.char_emb_size],
            dtype=tf.float32)

        # word_embeddings = tf.create_partitioned_variables(shape=[params.vocab_size, params.emb_size],
        #                                                   slicing=[10, 1],
        #                                                   initializer=word_embeddings_placeholder,
        #                                                   trainable=False, name="word_embeddings")
        word_embeddings = tf.Variable(word_embeddings_placeholder,
                                      trainable=False,
                                      name="word_embeddings")
        char_embeddings = tf.Variable(char_embeddings_placeholder,
                                      trainable=False,
                                      name="char_embeddings")

        word_embeddings = tf.nn.dropout(word_embeddings,
                                        1.0 - dropout,
                                        noise_shape=[params.vocab_size, 1])
        char_embeddings = tf.nn.dropout(
            char_embeddings,
            1.0 - dropout,
            noise_shape=[params.char_vocab_size, 1])

    question_words_emb = tf.nn.embedding_lookup(word_embeddings,
                                                features['question_words'])
    question_chars_emb = tf.nn.embedding_lookup(char_embeddings,
                                                features['question_chars'])

    passage_words_emb = tf.nn.embedding_lookup(word_embeddings,
                                               features['passage_words'])
    passage_chars_emb = tf.nn.embedding_lookup(char_embeddings,
                                               features['passage_chars'])

    with tf.device(next(devices)):
        with tf.variable_scope('question_encoding'):
            question_enc = encoder(question_words_emb,
                                   question_words_length,
                                   question_chars_emb,
                                   features['question_char_length'],
                                   params,
                                   dropout=dropout)

    with tf.device(next(devices)):
        with tf.variable_scope('passage_encoding'):
            passage_enc = encoder(passage_words_emb,
                                  passage_words_length,
                                  passage_chars_emb,
                                  features['passage_char_length'],
                                  params,
                                  dropout=dropout)
        # question_enc = tf.Print(question_enc, [question_enc], summarize=1000)

        with tf.variable_scope('attention'):
            attention = attention_fun(
                memory=question_enc,
                memory_sequence_length=question_words_length)
            cell_fw = GatedAttentionWrapper(
                attention,
                DropoutWrapper(
                    GRUCell(params.units, name="attention_gru"),
                    # output_keep_prob=1.0 - dropout,
                    input_keep_prob=1.0 - dropout,
                    # state_keep_prob=1.0 - dropout,
                    variational_recurrent=True,
                    input_size=4 * params.units,
                    dtype=tf.float32),
                dropout=0)

            cell_bw = GatedAttentionWrapper(
                attention,
                DropoutWrapper(
                    GRUCell(params.units, name="attention_gru"),
                    # output_keep_prob=1.0 - dropout,
                    input_keep_prob=1.0 - dropout,
                    # state_keep_prob=1.0 - dropout
                    variational_recurrent=True,
                    input_size=4 * params.units,
                    dtype=tf.float32),
                dropout=0)

            passage_repr, _ = tf.nn.bidirectional_dynamic_rnn(
                cell_fw,
                cell_bw,
                passage_enc,
                passage_words_length,
                dtype=tf.float32)
            passage_repr = tf.concat(passage_repr, -1)

        with tf.variable_scope('pointer'):
            question_att = attention_fun(
                memory=question_enc,
                memory_sequence_length=question_words_length,
                name="question_align")

            pool_param = tf.get_variable('pool_param',
                                         shape=(question_att._num_units, ),
                                         initializer=tf.initializers.ones)
            pool_param = tf.reshape(
                tf.tile(pool_param, [tf.shape(question_enc)[0]]),
                (-1, question_att._num_units))

            question_alignments, _ = question_att(pool_param, None)
            question_pool = tf.reduce_sum(
                tf.expand_dims(question_alignments, -1) * question_enc, 1)

            logits1, logits2 = pointer_net(passage_repr,
                                           passage_words_length,
                                           question_pool,
                                           params,
                                           attention_fun=attention_fun,
                                           dropout=dropout)

        outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                          tf.expand_dims(tf.nn.softmax(logits2), axis=1))
        outer = tf.matrix_band_part(outer, 0, 15)
        p1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
        p2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)

        if mode == tf.estimator.ModeKeys.PREDICT:
            predictions = {'start': p1, 'end': p2}
            export_outputs = {
                'prediction': tf.estimator.export.PredictOutput(predictions)
            }

            return tf.estimator.EstimatorSpec(mode,
                                              predictions=predictions,
                                              export_outputs=export_outputs)

        with tf.variable_scope('passage_ranking'):
            W_g = Dense(params.units, activation=tf.tanh, use_bias=False)
            v_g = Dense(1, use_bias=False)

            memory_layer = Dense(params.units,
                                 name="memory_layer",
                                 use_bias=False,
                                 dtype=tf.float32)
            query_layer = Dense(params.units,
                                name="query_layer",
                                use_bias=False,
                                dtype=tf.float32)
            g = []

            for i in range(passage_count):
                passage_mask = tf.boolean_mask(
                    passage_repr, tf.equal(features['partitions'], i))
                passage_i = tf.split(passage_mask,
                                     features['partitions_len'][:, i])
                passage_i = [
                    pad_to_shape_2d(
                        p, (tf.Dimension(params.passage_max_len), p.shape[1]))
                    for p in passage_i
                ]
                passage_i = tf.stack(passage_i)

                passage_alignment, _ = ReusableBahdanauAttention(
                    params.units,
                    passage_i,
                    features['partitions_len'][:, i],
                    memory_layer=memory_layer,
                    query_layer=query_layer,
                    name="passage_align")(question_pool, None)

                passage_pool = tf.reduce_sum(
                    tf.expand_dims(passage_alignment, -1) * passage_i, 1)
                g_i = v_g(W_g(tf.concat([question_pool, passage_pool], -1)))

                # g_i = tf.Print(g_i, [passage_mask, passage_i], message='is_nan_{}'.format(i), summarize=1000)
                g.append(g_i)

            g = tf.concat(g, -1)

    answer_start, answer_end, passage_rank = labels

    loss1 = tf.nn.softmax_cross_entropy_with_logits_v2(
        logits=logits1, labels=tf.stop_gradient(answer_start))
    loss2 = tf.nn.softmax_cross_entropy_with_logits_v2(
        logits=logits2, labels=tf.stop_gradient(answer_end))

    loss3 = tf.nn.softmax_cross_entropy_with_logits_v2(
        logits=g, labels=tf.stop_gradient(passage_rank))

    # loss1 = tf.Print(loss1, [tf.argmax(answer_start, -1), tf.argmax(answer_end, -1),
    #                          tf.reduce_mean(loss1), tf.reduce_mean(loss2), tf.reduce_mean(loss3)], message="loss")

    loss = (params.r * tf.reduce_mean(loss1 + loss2) + (1 - params.r) * tf.reduce_mean(loss3)) \
        if params.r < 1 else tf.reduce_mean(loss1 + loss2)

    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.AdadeltaOptimizer(
            learning_rate=params.learning_rate, epsilon=1e-6)
        global_step = tf.train.get_or_create_global_step()

        grads = optimizer.compute_gradients(loss)
        gradients, variables = zip(*grads)
        capped_grads, _ = tf.clip_by_global_norm(gradients, params.grad_clip)
        train_op = optimizer.apply_gradients(zip(capped_grads, variables),
                                             global_step=global_step)

        return EstimatorSpec(
            mode,
            loss=loss,
            train_op=train_op,
            scaffold=tf.train.Scaffold(
                init_feed_dict={
                    word_embeddings_placeholder: word_embeddings_np,
                    char_embeddings_placeholder: char_embeddings_np
                }),
        )

    if mode == tf.estimator.ModeKeys.EVAL:
        table = lookup_ops.index_to_string_table_from_file(
            params.word_vocab_file, value_column_index=0, delimiter=" ")
        return EstimatorSpec(mode,
                             loss=loss,
                             eval_metric_ops={
                                 'rouge-l':
                                 extraction_metric(p1, p2,
                                                   tf.argmax(answer_start, -1),
                                                   tf.argmax(answer_end, -1),
                                                   features['passage_words'],
                                                   params, table),
                                 'f1':
                                 extraction_metric(p1,
                                                   p2,
                                                   tf.argmax(answer_start, -1),
                                                   tf.argmax(answer_end, -1),
                                                   features['passage_words'],
                                                   params,
                                                   table,
                                                   metric='f1')
                             })
def cell(num_units):
    cell = tf.nn.rnn_cell.BasicLSTMCell(num_units=num_units)
    return DropoutWrapper(cell, output_keep_prob=keep_prob)
Esempio n. 19
0
def build_bottom_up_top_down_ic_model(sentence_input,
                                      img_features_input,
                                      dropout_input,
                                      num_tokens,
                                      num_labels,
                                      embeddings,
                                      embeddings_size,
                                      num_img_features,
                                      img_features_size,
                                      train_embeddings,
                                      rnn_hidden_size,
                                      multimodal_fusion_hidden_size,
                                      classification_hidden_size):
    sentence_length = tf.cast(
        tf.reduce_sum(
            tf.cast(tf.not_equal(sentence_input, tf.zeros_like(sentence_input, dtype=tf.int32)), tf.int64),
            1
        ),
        tf.int32
    )
    if embeddings is not None:
        embedding_matrix = tf.get_variable(
            "embedding_matrix",
            shape=(num_tokens, embeddings_size),
            initializer=glove_embeddings_initializer(embeddings),
            trainable=train_embeddings
        )
        print("Loaded GloVe embeddings!")
    else:
        embedding_matrix = tf.get_variable(
            "embedding_matrix",
            shape=(num_tokens, embeddings_size),
            initializer=tf.random_normal_initializer(stddev=0.05),
            trainable=train_embeddings
        )
    sentence_embeddings = tf.nn.embedding_lookup(embedding_matrix, sentence_input)
    lstm_cell = DropoutWrapper(
        tf.nn.rnn_cell.LSTMCell(rnn_hidden_size),
        input_keep_prob=dropout_input,
        output_keep_prob=dropout_input
    )
    sentence_outputs, sentence_final_states = tf.nn.dynamic_rnn(
        cell=lstm_cell,
        inputs=sentence_embeddings,
        sequence_length=sentence_length,
        dtype=tf.float32
    )
    normalized_img_features = tf.nn.l2_normalize(img_features_input, dim=2)

    reshaped_sentence = tf.reshape(tf.tile(sentence_final_states.h, [1, num_img_features]), [-1, num_img_features, rnn_hidden_size])
    img_sentence_concatenation = tf.concat([normalized_img_features, reshaped_sentence], -1)
    gated_img_sentence_concatenation = gated_tanh(img_sentence_concatenation, rnn_hidden_size)
    att_wa_sentence = lambda x: tf.contrib.layers.fully_connected(x, 1, activation_fn=None, biases_initializer=None)
    a_sentence = att_wa_sentence(gated_img_sentence_concatenation)
    a_sentence = tf.nn.softmax(tf.squeeze(a_sentence))
    v_head_sentence = tf.squeeze(tf.matmul(tf.expand_dims(a_sentence, 1), normalized_img_features))
    v_head_sentence.set_shape((sentence_embeddings.get_shape()[0], img_features_size))

    gated_sentence = tf.nn.dropout(
        gated_tanh(sentence_final_states.h, multimodal_fusion_hidden_size),
        keep_prob=dropout_input
    )
    gated_img_features_sentence = tf.nn.dropout(
        gated_tanh(v_head_sentence, multimodal_fusion_hidden_size),
        keep_prob=dropout_input
    )
    h_sentence_img = tf.multiply(gated_sentence, gated_img_features_sentence)
    gated_first_layer = tf.nn.dropout(
        gated_tanh(h_sentence_img, classification_hidden_size),
        keep_prob=dropout_input
    )
    gated_second_layer = tf.nn.dropout(
        gated_tanh(gated_first_layer, classification_hidden_size),
        keep_prob=dropout_input
    )
    gated_third_layer = tf.nn.dropout(
        gated_tanh(gated_second_layer, classification_hidden_size),
        keep_prob=dropout_input
    )

    return tf.contrib.layers.fully_connected(
        gated_third_layer,
        num_labels,
        activation_fn=None
    )
Esempio n. 20
0
learning_rate = 0.002
training_epochs = 10
batch_size = 100
steps_for_print = 5
steps_for_validate = 10
keep_prob = tf.placeholder(tf.float32)

# input place holders
X = tf.placeholder(tf.float32, [None, 784])
X_img = tf.reshape(X, [-1, 28, 28])
Y = tf.placeholder(tf.int32, [None, 1])
Y_onehot = tf.reshape(tf.one_hot(Y, 10), [-1, 10])

# layers
cells = tf.nn.rnn_cell.MultiRNNCell([
    DropoutWrapper(tf.nn.rnn_cell.BasicLSTMCell(num_units=256),
                   output_keep_prob=keep_prob) for _ in range(3)
])
h0 = cells.zero_state(batch_size, dtype=tf.float32)
output, hs = tf.nn.dynamic_rnn(cells, inputs=X_img, initial_state=h0)
L1 = output[:, -1, :]

W2 = tf.Variable(tf.random_normal([256, 10]))
b2 = tf.Variable(tf.random_normal([10]))
hypothesis = tf.matmul(L1, W2) + b2

# define cost/loss & optimizer
cost = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(logits=hypothesis,
                                            labels=Y_onehot))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
Esempio n. 21
0
def build_lstm_vte_model(premise_input,
                         hypothesis_input,
                         img_features_input,
                         dropout_input,
                         num_tokens,
                         num_labels,
                         embeddings,
                         embeddings_size,
                         train_embeddings,
                         rnn_hidden_size,
                         multimodal_fusion_hidden_size,
                         classification_hidden_size):
    premise_length = tf.cast(
        tf.reduce_sum(
            tf.cast(tf.not_equal(premise_input, tf.zeros_like(premise_input, dtype=tf.int32)), tf.int64),
            1
        ),
        tf.int32
    )
    hypothesis_length = tf.cast(
        tf.reduce_sum(
            tf.cast(tf.not_equal(hypothesis_input, tf.zeros_like(hypothesis_input, dtype=tf.int32)), tf.int64),
            1
        ),
        tf.int32
    )
    if embeddings is not None:
        embedding_matrix = tf.get_variable(
            "embedding_matrix",
            shape=(num_tokens, embeddings_size),
            initializer=glove_embeddings_initializer(embeddings),
            trainable=train_embeddings
        )
        print("Loaded GloVe embeddings!")
    else:
        embedding_matrix = tf.get_variable(
            "embedding_matrix",
            shape=(num_tokens, embeddings_size),
            initializer=tf.random_normal_initializer(stddev=0.05),
            trainable=train_embeddings
        )
    premise_embeddings = tf.nn.embedding_lookup(embedding_matrix, premise_input)
    hypothesis_embeddings = tf.nn.embedding_lookup(embedding_matrix, hypothesis_input)
    lstm_cell = DropoutWrapper(
        tf.nn.rnn_cell.LSTMCell(rnn_hidden_size),
        input_keep_prob=dropout_input,
        output_keep_prob=dropout_input
    )
    premise_outputs, premise_final_states = tf.nn.dynamic_rnn(
        cell=lstm_cell,
        inputs=premise_embeddings,
        sequence_length=premise_length,
        dtype=tf.float32
    )
    # premise_last = extract_axis_1(premise_outputs, premise_length - 1)
    hypothesis_outputs, hypothesis_final_states = tf.nn.dynamic_rnn(
        cell=lstm_cell,
        inputs=hypothesis_embeddings,
        sequence_length=hypothesis_length,
        dtype=tf.float32
    )
    # hypothesis_last = extract_axis_1(hypothesis_outputs, hypothesis_length - 1)
    normalized_img_features = tf.nn.l2_normalize(img_features_input, dim=1)
    premise_hidden_features = tf.contrib.layers.fully_connected(
        premise_final_states.h,
        multimodal_fusion_hidden_size,
        activation_fn=tf.nn.relu
    )
    hypothesis_hidden_features = tf.contrib.layers.fully_connected(
        hypothesis_final_states.h,
        multimodal_fusion_hidden_size,
        activation_fn=tf.nn.relu
    )
    img_hidden_features = tf.contrib.layers.fully_connected(
        normalized_img_features,
        multimodal_fusion_hidden_size,
        activation_fn=tf.nn.relu
    )
    premise_img_multimodal_fusion = tf.multiply(premise_hidden_features, img_hidden_features)
    hypothesis_img_multimodal_fusion = tf.multiply(hypothesis_hidden_features, img_hidden_features)
    final_concatenation = tf.concat([premise_img_multimodal_fusion, hypothesis_img_multimodal_fusion], axis=1)
    return tf.contrib.layers.fully_connected(
        tf.contrib.layers.fully_connected(
            tf.contrib.layers.fully_connected(
                tf.contrib.layers.fully_connected(
                    final_concatenation,
                    classification_hidden_size,
                    activation_fn=tf.nn.relu
                ),
                classification_hidden_size,
                activation_fn=tf.nn.relu
            ),
            classification_hidden_size,
            activation_fn=tf.nn.relu
        ),
        num_labels,
        activation_fn=None
    )