Example #1
0
    def model_fn(self, features, labels, mode, params):
        _, final_states = tf.nn.dynamic_rnn(
            cell=lstm_cell(**params, mode=mode),
            inputs=features["sentence_emb"],
            sequence_length=features["sentence_len"],
            dtype=tf.float32,
        )

        logits = tf.layers.dense(
            inputs=final_states.h,
            units=params["_n_out_classes"],
            kernel_initializer=params["initializer"],
            bias_initializer=params["initializer"],
        )

        loss = tf.losses.sparse_softmax_cross_entropy(
            labels=labels, logits=logits
        )

        optimizer = resolve_optimizer(**params)

        return self.make_estimator_spec(
            mode=mode, logits=logits, optimizer=optimizer, loss=loss
        )
Example #2
0
    def model_fn(self, features, labels, mode, params):
        with tf.variable_scope("left_lstm"):
            _, final_states_left = tf.nn.dynamic_rnn(
                cell=lstm_cell(**params, mode=mode),
                inputs=features["left_emb"],
                sequence_length=features["left_len"],
                dtype=tf.float32,
            )

        with tf.variable_scope("right_lstm"):
            _, final_states_right = tf.nn.dynamic_rnn(
                cell=lstm_cell(**params, mode=mode),
                inputs=features["right_emb"],
                sequence_length=features["right_len"],
                dtype=tf.float32,
            )

        concatenated_final_states = tf.concat(
            [final_states_left.h, final_states_right.h], axis=1)

        logits = tf.layers.dense(
            inputs=concatenated_final_states,
            units=params["_n_out_classes"],
            kernel_initializer=params["initializer"],
            bias_initializer=params["initializer"],
        )

        loss = tf.losses.sparse_softmax_cross_entropy(labels=labels,
                                                      logits=logits)

        optimizer = resolve_optimizer(**params)

        return self.make_estimator_spec(mode=mode,
                                        logits=logits,
                                        optimizer=optimizer,
                                        loss=loss)
Example #3
0
    def model_fn(self, features, labels, mode, params):
        target_offset = tf.cast(features["target_offset"], tf.int32)

        memory = features["context_emb"]

        max_ctxt_len = tf.shape(memory)[1]

        context_locations = get_absolute_distance_vector(
            target_locs=target_offset,
            seq_lens=features["context_len"],
            max_seq_len=max_ctxt_len,
        )

        v_aspect = variable_len_batch_mean(
            input_tensor=features["target_emb"],
            seq_lengths=features["target_len"],
            op_name="target_embedding_avg",
        )

        attn_snapshots = create_snapshots_container(
            shape_like=features["context_ids"], n_snaps=params["n_hops"]
        )

        hop_number = tf.constant(1)

        initial_hop_inputs = (hop_number, memory, v_aspect, attn_snapshots)

        def condition(hop_num, ext_memory, input_vec, attn_snapshots):
            return tf.less_equal(hop_num, params["n_hops"])

        def hop(hop_num, ext_memory, input_vec, attn_snapshots):
            location_vector_model_fn = get_location_vector_model(
                model_num=params["location_model"]
            )

            v_loc = location_vector_model_fn(
                locs=context_locations,
                seq_lens=features["context_len"],
                emb_dim=params["_embedding_dim"],
                hop=hop_num,
                init=params["initializer"],
            )

            if params["location_model"] == 3:
                ext_memory = ext_memory + v_loc
            else:
                ext_memory = tf.multiply(memory, v_loc)

            with tf.variable_scope("linear_layer", reuse=tf.AUTO_REUSE):
                linear_out = tf.layers.dense(
                    inputs=tf.squeeze(input_vec, axis=1),
                    units=params["_embedding_dim"],
                    activation=None,
                    kernel_initializer=params["initializer"],
                    bias_initializer=params["initializer"],
                )

            with tf.variable_scope("attention_layer", reuse=tf.AUTO_REUSE):
                attn_out, attn_snapshot = memnet_content_attn_unit(
                    seq_lens=features["context_len"],
                    memory=ext_memory,
                    v_aspect=input_vec,
                    emb_dim=params["_embedding_dim"],
                    init=params["initializer"],
                )

            attn_snapshots = append_snapshot(
                container=attn_snapshots, new_snap=attn_snapshot, index=hop_num
            )

            output_vec = attn_out + linear_out
            output_vec = tf.expand_dims(output_vec, axis=1)

            hop_num = tf.add(hop_num, 1)

            return (hop_num, ext_memory, output_vec, attn_snapshots)

        _, _, final_sentence_rep, attn_snapshots = tf.while_loop(
            cond=condition,
            body=hop,
            loop_vars=initial_hop_inputs,
            shape_invariants=(
                hop_number.get_shape(),
                memory.get_shape(),
                v_aspect.get_shape(),
                tf.TensorShape(dims=[params["n_hops"], None, None, 1]),
            ),
        )

        literals, attn_snapshots = zip_attn_snapshots_with_literals(
            literals=features["context"],
            snapshots=attn_snapshots,
            num_layers=params["n_hops"],
        )
        attn_info = tf.tuple([literals, attn_snapshots])
        generate_attn_heatmap_summary(attn_info)

        final_sentence_rep = tf.squeeze(final_sentence_rep, axis=1)

        logits = tf.layers.dense(
            inputs=final_sentence_rep,
            units=params["_n_out_classes"],
            kernel_initializer=params["initializer"],
            bias_initializer=params["initializer"],
        )

        loss = tf.losses.sparse_softmax_cross_entropy(
            labels=labels, logits=logits
        )

        optimizer = resolve_optimizer(**params)

        return self.make_estimator_spec(
            mode=mode, logits=logits, optimizer=optimizer, loss=loss
        )
Example #4
0
    def model_fn(self, features, labels, mode, params):
        with tf.variable_scope("target_bi_lstm"):
            features["target_emb"] = tf.nn.dropout(
                features["target_emb"], keep_prob=params["keep_prob"])
            target_hidden_states, _, _ = stack_bidirectional_dynamic_rnn(
                cells_fw=[lstm_cell(**params, mode=mode)],
                cells_bw=[lstm_cell(**params, mode=mode)],
                inputs=features["target_emb"],
                sequence_length=features["target_len"],
                dtype=tf.float32,
            )
            r_t = variable_len_batch_mean(
                input_tensor=target_hidden_states,
                seq_lengths=features["target_len"],
                op_name="target_avg_pooling",
            )

        with tf.variable_scope("left_bi_lstm"):
            features["left_emb"] = tf.nn.dropout(features["left_emb"],
                                                 keep_prob=params["keep_prob"])
            left_hidden_states, _, _ = stack_bidirectional_dynamic_rnn(
                cells_fw=[lstm_cell(**params, mode=mode)],
                cells_bw=[lstm_cell(**params, mode=mode)],
                inputs=features["left_emb"],
                sequence_length=features["left_len"],
                dtype=tf.float32,
            )

        with tf.variable_scope("right_bi_lstm"):
            features["right_emb"] = tf.nn.dropout(
                features["right_emb"], keep_prob=params["keep_prob"])
            right_hidden_states, _, _ = stack_bidirectional_dynamic_rnn(
                cells_fw=[lstm_cell(**params, mode=mode)],
                cells_bw=[lstm_cell(**params, mode=mode)],
                inputs=features["right_emb"],
                sequence_length=features["right_len"],
                dtype=tf.float32,
            )

        with tf.variable_scope("left_t2c_attn"):
            left_hidden_states = tf.nn.dropout(left_hidden_states,
                                               keep_prob=params["keep_prob"])
            r_l, left_attn_info = attention_unit(
                h_states=left_hidden_states,
                hidden_units=params["hidden_units"] * 2,
                seq_lengths=features["left_len"],
                attn_focus=r_t,
                init=params["initializer"],
                bias_init=params["bias_initializer"],
                sp_literal=features["left"],
            )

        with tf.variable_scope("right_t2c_attn"):
            right_hidden_states = tf.nn.dropout(right_hidden_states,
                                                keep_prob=params["keep_prob"])
            r_r, right_attn_info = attention_unit(
                h_states=right_hidden_states,
                hidden_units=params["hidden_units"] * 2,
                seq_lengths=features["right_len"],
                attn_focus=r_t,
                init=params["initializer"],
                bias_init=params["bias_initializer"],
                sp_literal=features["right"],
            )

        target_hidden_states = tf.nn.dropout(target_hidden_states,
                                             keep_prob=params["keep_prob"])

        with tf.variable_scope("left_c2t_attn"):
            r_t_l, left_target_attn_info = attention_unit(
                h_states=target_hidden_states,
                hidden_units=params["hidden_units"] * 2,
                seq_lengths=features["target_len"],
                attn_focus=tf.expand_dims(r_l, axis=1),
                init=params["initializer"],
                bias_init=params["bias_initializer"],
                sp_literal=features["target"],
            )

        with tf.variable_scope("right_c2t_attn"):
            r_t_r, right_target_attn_info = attention_unit(
                h_states=target_hidden_states,
                hidden_units=params["hidden_units"] * 2,
                seq_lengths=features["target_len"],
                attn_focus=tf.expand_dims(r_r, axis=1),
                init=params["initializer"],
                bias_init=params["bias_initializer"],
                sp_literal=features["target"],
            )

        generate_attn_heatmap_summary(
            left_attn_info,
            left_target_attn_info,
            right_target_attn_info,
            right_attn_info,
        )

        final_sentence_rep = tf.concat([r_l, r_t_l, r_t_r, r_r], axis=1)

        final_sentence_rep = tf.nn.dropout(final_sentence_rep,
                                           keep_prob=params["keep_prob"])
        logits = tf.layers.dense(
            inputs=final_sentence_rep,
            units=params["_n_out_classes"],
            kernel_initializer=params["initializer"],
            bias_initializer=params["bias_initializer"],
        )

        loss = l2_regularized_loss(labels=labels,
                                   logits=logits,
                                   l2_weight=params["l2_weight"])

        optimizer = resolve_optimizer(**params)

        return self.make_estimator_spec(mode=mode,
                                        logits=logits,
                                        optimizer=optimizer,
                                        loss=loss)
Example #5
0
File: ian.py Project: SijanC147/Msc
    def model_fn(self, features, labels, mode, params):
        with tf.variable_scope("context_lstm"):
            features["context_emb"] = tf.nn.dropout(
                features["context_emb"], keep_prob=params["keep_prob"]
            )
            context_hidden_states, _ = tf.nn.dynamic_rnn(
                cell=lstm_cell(**params),
                inputs=features["context_emb"],
                sequence_length=features["context_len"],
                dtype=tf.float32,
            )
            c_avg = variable_len_batch_mean(
                input_tensor=context_hidden_states,
                seq_lengths=features["context_len"],
                op_name="context_avg_pooling",
            )

        with tf.variable_scope("target_lstm"):
            features["target_emb"] = tf.nn.dropout(
                features["target_emb"], keep_prob=params["keep_prob"]
            )
            target_hidden_states, _ = tf.nn.dynamic_rnn(
                cell=lstm_cell(**params),
                inputs=features["target_emb"],
                sequence_length=features["target_len"],
                dtype=tf.float32,
            )
            t_avg = variable_len_batch_mean(
                input_tensor=target_hidden_states,
                seq_lengths=features["target_len"],
                op_name="target_avg_pooling",
            )

        with tf.variable_scope("attention_layer", reuse=tf.AUTO_REUSE):
            context_hidden_states = tf.nn.dropout(
                context_hidden_states, keep_prob=params["keep_prob"]
            )
            c_r, ctxt_attn_info = attention_unit(
                h_states=context_hidden_states,
                hidden_units=params["hidden_units"],
                seq_lengths=features["context_len"],
                attn_focus=t_avg,
                init=params["initializer"],
                bias_init=params["bias_initializer"],
                sp_literal=features["context"],
            )

            target_hidden_states = tf.nn.dropout(
                target_hidden_states, keep_prob=params["keep_prob"]
            )
            t_r, trg_attn_info = attention_unit(
                h_states=target_hidden_states,
                hidden_units=params["hidden_units"],
                seq_lengths=features["target_len"],
                attn_focus=c_avg,
                init=params["initializer"],
                bias_init=params["bias_initializer"],
                sp_literal=features["target"],
            )

        generate_attn_heatmap_summary(trg_attn_info, ctxt_attn_info)

        final_sentence_rep = tf.concat([t_r, c_r], axis=1)
        final_sentence_rep = tf.nn.dropout(
            final_sentence_rep, keep_prob=params["keep_prob"]
        )

        logits = tf.layers.dense(
            inputs=final_sentence_rep,
            units=params["_n_out_classes"],
            activation=tf.nn.tanh,
            kernel_initializer=params["initializer"],
            bias_initializer=params.get(
                "bias_initializer", params["initializer"]
            ),
        )

        loss = l2_regularized_loss(
            labels=labels, logits=logits, l2_weight=params["l2_weight"]
        )

        optimizer = resolve_optimizer(**params)

        return self.make_estimator_spec(
            mode=mode, logits=logits, optimizer=optimizer, loss=loss
        )
Example #6
0
    def model_fn(self, features, labels, mode, params):
        max_left_len = tf.shape(features["left_emb"])[1]
        max_right_len = tf.shape(features["right_emb"])[1]

        with tf.name_scope("target_connection"):
            mean_target_embedding = variable_len_batch_mean(
                input_tensor=features["target_emb"],
                seq_lengths=features["target_len"],
                op_name="target_embedding_avg",
            )
            features["left_emb"] = tf.stack(
                values=[
                    features["left_emb"],
                    tf.ones(tf.shape(features["left_emb"])) *
                    mean_target_embedding,
                ],
                axis=2,
            )
            features["left_emb"] = tf.reshape(
                tensor=features["left_emb"],
                shape=[-1, max_left_len, 2 * params["_embedding_dim"]],
            )
            features["right_emb"] = tf.stack(
                values=[
                    features["right_emb"],
                    tf.ones(tf.shape(features["right_emb"])) *
                    mean_target_embedding,
                ],
                axis=2,
            )
            features["right_emb"] = tf.reshape(
                tensor=features["right_emb"],
                shape=[-1, max_right_len, 2 * params["_embedding_dim"]],
            )

        with tf.variable_scope("left_lstm"):
            _, final_states_left = tf.nn.dynamic_rnn(
                cell=lstm_cell(**params, mode=mode),
                inputs=features["left_emb"],
                sequence_length=features["left_len"],
                dtype=tf.float32,
            )

        with tf.variable_scope("right_lstm"):
            _, final_states_right = tf.nn.dynamic_rnn(
                cell=lstm_cell(**params, mode=mode),
                inputs=features["right_emb"],
                sequence_length=features["right_len"],
                dtype=tf.float32,
            )

        concatenated_final_states = tf.concat(
            [final_states_left.h, final_states_right.h], axis=1)

        logits = tf.layers.dense(
            inputs=concatenated_final_states,
            units=params["_n_out_classes"],
            kernel_initializer=params["initializer"],
            bias_initializer=params["initializer"],
        )

        loss = tf.losses.sparse_softmax_cross_entropy(labels=labels,
                                                      logits=logits)

        optimizer = resolve_optimizer(**params)

        return self.make_estimator_spec(mode=mode,
                                        logits=logits,
                                        optimizer=optimizer,
                                        loss=loss)