コード例 #1
0
    def build_graph(self, reps, context_mask):
        """give the final prediction for start_pos and end_pos
        Args:
            reps: final output representation 
                  [batch_sz, context_length, hidden_sz]

            context_mask:
                  [batch_sz, context_length]
        Return: 
            (logits_start, probdist_start, logits_end, probdist_end)
            each of shape [batch_sz, context_length]
        """
        cx_len = context_mask.shape[1]
        with vs.variable_scope(self.scope):
            start_reps = tf.contrib.layers.fully_connected(
                reps, num_outputs=self.hidden_sz)
            logits_start, probdist_start = self._pred_start(
                start_reps, context_mask)

            end_reps = tf.concat([reps, tf.expand_dims(probdist_start, 2)], 2)
            end_encoder = RNNEncoder(self.hidden_sz, 1, "lstm", "end_encoder")
            end_reps = end_encoder.build_graph(end_reps, context_mask)
            logits_end, probdist_end = self._pred_end(end_reps, context_mask)

            if not self.is_training:
                # [batch_sz]: index of starting word
                start_idx = tf.argmax(probdist_start, 1)
                # # [batch_sz, context_length]: 1 if valid for end word else 0.001
                start_mask = 1 - 0.999 * tf.cast(
                    tf.sequence_mask(start_idx, cx_len, dtype=tf.int32),
                    tf.float32)
                # a position is valid for end work if both context mask and start mask are both 1
                logits_end = logits_end * start_mask
                probdist_end = probdist_end * start_mask
            return (logits_start, probdist_start, logits_end, probdist_end)
class OutputDoubleLSTM(object):
    """base class for output representation"""
    def __init__(self, output_sz, keep_prob):
        """
        Args:
        """
        self.output_sz = output_sz
        self.scope = "double_lstm"
        self.keep_prob = keep_prob
        self.lstm_encoder1 = RNNEncoder(output_sz, 1, "lstm", "encoder1")
        self.lstm_encoder2 = RNNEncoder(output_sz, keep_prob, "lstm",
                                        "encoder2")

    def build_graph(self, reps, context_mask):
        """
        Args:
             reps: [batch_sz, context_length, reps_sz]

        Return: 
             [batch_sz, context_length, output_sz]
        """
        with vs.variable_scope(self.scope):
            lstm_1_out = self.lstm_encoder1.build_graph(reps, context_mask)
            lstm_2_out = self.lstm_encoder2.build_graph(
                lstm_1_out, context_mask)
            return lstm_2_out
コード例 #3
0
class OutputDoubleLSTMAct(object):
    """base class for output representation"""
    def __init__(self, output_sz, keep_prob, activation):
        """
        Args:
        """
        self.output_sz = output_sz
        self.activation = activation
        self.scope = "double_lstm_{}".format(activation)
        self.keep_prob = keep_prob
        self.lstm_encoder1 = RNNEncoder(output_sz, keep_prob, "lstm",
                                        "encoder1")
        self.lstm_encoder2 = RNNEncoder(output_sz, keep_prob, "gru",
                                        "encoder2")
        logger.error(
            "Output Layer with Double LSTM and Activation {} created ...".
            format(activation))

    def build_graph(self, reps, context_mask):
        """
        Args:
             reps: [batch_sz, context_length, reps_sz]

        Return: 
             [batch_sz, context_length, output_sz]
        """
        with vs.variable_scope(self.scope):
            lstm_1_out = self.lstm_encoder1.build_graph(reps, context_mask)
            lstm_2_out = self.lstm_encoder2.build_graph(
                lstm_1_out, context_mask)
            if self.activation == "tanh":
                return tf.nn.tanh(lstm_2_out)
            elif self.activation == "relu":
                return tf.nn.relu(lstm_2_out)
            sys.exit(0, "No such activation: {}!".format(self.activation))
コード例 #4
0
 def __init__(self, output_sz, keep_prob):
     """
     Args:
     """
     self.output_sz = output_sz
     self.scope = "output_lstm"
     self.lstm_encoder = RNNEncoder(output_sz, keep_prob, "lstm")
 def __init__(self, output_sz, keep_prob):
     """
     Args:
     """
     self.output_sz = output_sz
     self.scope = "double_lstm"
     self.keep_prob = keep_prob
     self.lstm_encoder1 = RNNEncoder(output_sz, 1, "lstm", "encoder1")
     self.lstm_encoder2 = RNNEncoder(output_sz, keep_prob, "lstm",
                                     "encoder2")
コード例 #6
0
 def __init__(self, output_sz, keep_prob, activation):
     """
     Args:
     """
     self.output_sz = output_sz
     self.activation = activation
     self.scope = "double_lstm_{}".format(activation)
     self.keep_prob = keep_prob
     self.lstm_encoder1 = RNNEncoder(output_sz, keep_prob, "lstm",
                                     "encoder1")
     self.lstm_encoder2 = RNNEncoder(output_sz, keep_prob, "gru",
                                     "encoder2")
     logger.error(
         "Output Layer with Double LSTM and Activation {} created ...".
         format(activation))
コード例 #7
0
 def add_encoder(self, vocab_sizes, embedding_sizes, rnn_type, hidden_size,
                 num_layers, bidirectional):
     encoder_input = DiscreteFeatureSequenceInput(vocab_sizes,
                                                  embedding_sizes)
     encoder = RNNEncoder(encoder_input, rnn_type, hidden_size, num_layers,
                          bidirectional)
     self.encoder = encoder
     return self
コード例 #8
0
ファイル: rnnseq2seq.py プロジェクト: kedz/ntg
    def from_args(cls,
                  args,
                  encoder_input_modules,
                  decoder_input_modules,
                  dropout=None,
                  rnn_type=None,
                  target_vocab_size=None,
                  attention_type=None,
                  bidirectional=None,
                  learn_init=None,
                  bridge_type=None):

        if learn_init is None:
            learn_init = bool(args.learn_init)

        if bridge_type is None:
            bridge_type = args.bridge_type

        encoder = RNNEncoder.from_args(
            args,
            encoder_input_size=encoder_input_modules.embedding_size,
            dropout=dropout,
            rnn_type=rnn_type,
            bidirectional=bidirectional)

        if args.rnn_type == "lstm":
            bridge1 = be.from_args(args,
                                   bridge_type=bridge_type,
                                   bidirectional=bidirectional)
            bridge2 = be.from_args(args,
                                   bridge_type=bridge_type,
                                   bidirectional=bidirectional)
            bridge = ParallelModule([bridge1, bridge2])

        else:
            bridge = be.from_args(args,
                                  bridge_type=bridge_type,
                                  bidirectional=bidirectional)

        decoder = RNNDecoder.from_args(
            args,
            decoder_input_size=decoder_input_modules.embedding_size,
            dropout=dropout,
            rnn_type=rnn_type,
            target_vocab_size=target_vocab_size,
            attention_type=attention_type)
        return cls(encoder_input_modules,
                   decoder_input_modules,
                   encoder,
                   bridge,
                   decoder,
                   learn_init=learn_init)
コード例 #9
0
class OutputLSTM(object):
    """base class for output representation"""
    def __init__(self, output_sz, keep_prob):
        """
        Args:
        """
        self.output_sz = output_sz
        self.scope = "output_lstm"
        self.lstm_encoder = RNNEncoder(output_sz, keep_prob, "lstm")

    def build_graph(self, reps, context_mask):
        """
        Args:
             reps: [batch_sz, context_length, reps_sz]

        Return: 
             [batch_sz, context_length, output_sz]
        """
        with vs.variable_scope(self.scope):
            return self.lstm_encoder.build_graph(reps, context_mask)
コード例 #10
0
ファイル: rnnclassifier.py プロジェクト: kedz/ntg
    def from_args(cls, args, encoder_input_modules,
                  dropout=None, rnn_type=None, target_vocab_size=None,
                  bidirectional=None, learn_init=None):

        if learn_init is None:
            learn_init = bool(args.learn_init)
        
        if target_vocab_size is None:
            target_vocab_size = args.target_vocab_size

        if dropout is None:
            dropout = args.dropout

        encoder = RNNEncoder.from_args(
            args, encoder_input_size=encoder_input_modules.embedding_size,
            dropout=dropout, rnn_type=rnn_type, bidirectional=bidirectional)

        mlp_input_size = 0
        for dim in encoder.rnn_state_dims:
            mlp_input_size += dim[0] * dim[2]

        mlp = MLP(mlp_input_size, target_vocab_size, dropout=dropout)

        return cls(encoder_input_modules, encoder, mlp, learn_init=learn_init)
コード例 #11
0
ファイル: gokc_model.py プロジェクト: jq2276/Learning2Copy
    def __init__(self,
                 src_vocab_size,
                 tgt_vocab_size,
                 cue_vocab_size,
                 goal_vocab_size,
                 embed_size,
                 hidden_size,
                 padding_idx=None,
                 num_layers=1,
                 bidirectional=True,
                 attn_mode="mlp",
                 with_bridge=False,
                 tie_embedding=False,
                 dropout=0.0,
                 use_gpu=False,
                 use_bow=False,
                 use_kd=False,
                 use_posterior=False,
                 device=None,
                 unk_idx=None,
                 force_copy=True,
                 stage=None):
        super().__init__()

        self.src_vocab_size = src_vocab_size
        self.tgt_vocab_size = tgt_vocab_size
        self.cue_vocab_size = cue_vocab_size
        self.goal_vocab_size = goal_vocab_size
        self.embed_size = embed_size
        self.hidden_size = hidden_size
        self.padding_idx = padding_idx
        self.num_layers = num_layers
        self.bidirectional = bidirectional
        self.attn_mode = attn_mode
        self.with_bridge = with_bridge
        self.tie_embedding = tie_embedding
        self.dropout = dropout
        self.use_gpu = use_gpu
        self.use_bow = use_bow
        self.use_kd = use_kd
        self.use_posterior = use_posterior
        self.baseline = 0
        self.device = device if device >= 0 else "cpu"
        self.unk_idx = unk_idx
        self.force_copy = force_copy
        self.stage = stage

        # the utterance embedding
        enc_embedder = Embedder(num_embeddings=self.src_vocab_size,
                                embedding_dim=self.embed_size,
                                padding_idx=self.padding_idx)

        self.utt_encoder = RNNEncoder(input_size=self.embed_size,
                                      hidden_size=self.hidden_size,
                                      embedder=enc_embedder,
                                      num_layers=self.num_layers,
                                      bidirectional=self.bidirectional,
                                      dropout=self.dropout)

        if self.with_bridge:
            self.utt_bridge = nn.Sequential(
                nn.Linear(self.hidden_size, self.hidden_size), nn.Tanh())
            self.goal_bridge = nn.Sequential(
                nn.Linear(self.hidden_size, self.hidden_size), nn.Tanh())

        # self.prior_query_mlp = nn.Sequential(nn.Linear(self.hidden_size * 2, self.hidden_size), nn.Tanh())
        self.fc1 = nn.Linear(self.hidden_size, self.hidden_size)
        self.fc2 = nn.Linear(self.hidden_size, self.hidden_size)
        self.fc3 = nn.Linear(self.hidden_size * 2, 1)

        if self.tie_embedding:
            # share the same embedding with utt encoder
            assert self.src_vocab_size == self.tgt_vocab_size == self.cue_vocab_size == self.goal_vocab_size
            self.dec_embedder = enc_embedder
            knowledge_embedder = enc_embedder
            goal_embedder = enc_embedder
        else:
            self.dec_embedder = Embedder(num_embeddings=self.tgt_vocab_size,
                                         embedding_dim=self.embed_size,
                                         padding_idx=self.padding_idx)
            knowledge_embedder = Embedder(num_embeddings=self.cue_vocab_size,
                                          embedding_dim=self.embed_size,
                                          padding_idx=self.padding_idx)
            goal_embedder = Embedder(num_embeddings=self.goal_vocab_size,
                                     embedding_dim=self.embed_size,
                                     padding_idx=self.padding_idx)

        self.knowledge_encoder = RNNEncoder(input_size=self.embed_size,
                                            hidden_size=self.hidden_size,
                                            embedder=knowledge_embedder,
                                            num_layers=self.num_layers,
                                            bidirectional=self.bidirectional,
                                            dropout=self.dropout)

        self.goal_encoder = RNNEncoder(input_size=self.embed_size,
                                       hidden_size=self.hidden_size,
                                       embedder=goal_embedder,
                                       num_layers=self.num_layers,
                                       bidirectional=self.bidirectional,
                                       dropout=self.dropout)

        self.prior_attention = Attention(query_size=self.hidden_size,
                                         memory_size=self.hidden_size,
                                         hidden_size=self.hidden_size,
                                         mode="dot",
                                         device=self.device)

        self.posterior_attention = Attention(query_size=self.hidden_size,
                                             memory_size=self.hidden_size,
                                             hidden_size=self.hidden_size,
                                             mode="dot",
                                             device=self.device)

        self.decoder = Decoder(input_size=self.embed_size,
                               hidden_size=self.hidden_size,
                               output_size=self.tgt_vocab_size,
                               embedder=self.dec_embedder,
                               num_layers=self.num_layers,
                               attn_mode=self.attn_mode,
                               memory_size=self.hidden_size,
                               dropout=self.dropout,
                               device=self.device)

        self.softmax = nn.Softmax(dim=-1)
        self.sigmoid = nn.Sigmoid()
        self.tanh = nn.Tanh()

        if self.use_bow:
            self.bow_output_layer = nn.Sequential(
                nn.Linear(in_features=self.hidden_size,
                          out_features=self.hidden_size), nn.Tanh(),
                nn.Linear(in_features=self.hidden_size,
                          out_features=self.tgt_vocab_size),
                nn.LogSoftmax(dim=-1))

        if self.use_kd:
            self.knowledge_dropout = nn.Dropout(self.dropout)

        if self.padding_idx is not None:
            self.weight = torch.ones(self.tgt_vocab_size)
            self.weight[self.padding_idx] = 0
        else:
            self.weight = None

        self.nll_loss = NLLLoss(weight=self.weight,
                                ignore_index=self.padding_idx,
                                reduction='mean')
        self.copy_gen_loss = CopyGeneratorLoss(vocab_size=self.tgt_vocab_size,
                                               force_copy=self.force_copy,
                                               unk_index=self.unk_idx,
                                               ignore_index=self.padding_idx)

        self.kl_loss = torch.nn.KLDivLoss(reduction="mean")

        if self.use_gpu:
            self.cuda()
            self.weight = self.weight.cuda()
コード例 #12
0
class CoAttn(BasicAttn):
    """class for CoAttention"""
    def __init__(self, keep_prob, key_vec_size, value_vec_size):
        BasicAttn.__init__(self, keep_prob, key_vec_size, value_vec_size)
        self.scope = "CoAttn"
        self.encoder = RNNEncoder(key_vec_size, keep_prob, "lstm")

    def build_graph(self, values, values_mask, keys, keys_mask):
        """
        Args:
            values:       [batch_sz, M, h]
            values_mask:  [batch_sz, M] 
            keys:         [batch_sz, N, h]
            keys_mask:    [batch_sz, N] 
            
            (N = n_keys, M = n_values, h = hidden_size)

        Return: 
            attn_dist:    [batch_sz, N, num_values]
            output:       [batch_sz, N, output_sz]
        """
        h = self.key_vec_size
        M = values.shape[1]
        N = keys.shape[1]
        assert (values.shape[-1] == h)

        logger.error("values: {}".format(values.shape))
        with vs.variable_scope(self.scope):
            # weight matrix: [h, h]
            W = tf.get_variable("W", [h, h], tf.float32,
                                tf.contrib.layers.xavier_initializer())
            # bias: [h]
            b = tf.get_variable("b", [h], tf.float32, tf.zeros_initializer())
            # sentinel vectors for keys and values
            # k0, v0 = [tf.get_variable(name, [h, 1], tf.float32,
            # tf.zeros_initializer()) for name in ("k0", "v0")]
            # sen_mat = tf.matmul(v0, tf.transpose(k0, [1, 0]))
            # logger.error("sen_mat: {}".format(sen_mat.shape))
            # [batch_sz * M, h]
            q_prime = tf.nn.tanh(tf.matmul(tf.reshape(values, [-1, h]), W) + b)
            # [batch_sz, M, h]
            q_prime = tf.reshape(q_prime, [-1, M, h])

            # affinity matrix: L = [batch_sz, N, M]
            # logger.error("values: {}".format(values.shape))
            # logger.error("tf.matmul(keys, tf.transpose(values, [0, 2, 1])): {}".format((tf.matmul(keys, tf.transpose(values, [0, 2, 1]))).shape))
            L = tf.matmul(keys, tf.transpose(q_prime, [0, 2, 1]))
            logger.error("L: {}".format(L.shape))

            ############ C2Q ############
            # [batch_size, 1, M]
            values_mask_exp = tf.expand_dims(values_mask, 1)
            # [batch_size, N, 1]
            keys_mask_exp = tf.expand_dims(keys_mask, 2)

            # softmax for L over values: [batch_sz, N, M]
            _, alpha = masked_softmax(L, values_mask_exp, 2)

            logger.error("alpha: {}".format(alpha.shape))
            # [batch_sz, N, h]
            k2v = tf.matmul(alpha, values)
            logger.error("k2v: {}".format(k2v.shape))

            ############ Q2C ############
            # softmax for L over keys: [batch_sz, N, M]
            _, beta = masked_softmax(L, keys_mask_exp, 1)
            logger.error("beta: {}".format(beta.shape))
            # beta = tf.transpose(beta, [1, 2, 0])
            logger.error("beta: {}".format(beta.shape))
            # [batch_sz, M, h]
            v2k = tf.matmul(tf.transpose(beta, [0, 2, 1]), keys)
            logger.error("v2k: {}".format(v2k.shape))

            ############ Second Level Attn ############
            # [batch_sz, N, h]: alpha = [batch_sz, N, M], v2k = [batch_sz, M, h]
            s = tf.matmul(alpha, v2k)
            logger.error("s: {}".format(s.shape))

            # [batch_sz, N, 2 * h]
            lstm_inputs = tf.concat([s, k2v], 2)
            logger.error("lstm_inputs: {}".format(lstm_inputs.shape))
            logger.error("keys mask: {}".format(keys_mask.shape))
            attn = self.encoder.build_graph(lstm_inputs, keys_mask)
            logger.error("attn: {}".format(attn.shape))

            # Apply dropout
            attn = tf.nn.dropout(attn, self.keep_prob)

            return _, attn
コード例 #13
0
if __name__ == '__main__':
    from encoder import CNNEncoder, RNNEncoder
    x1 = tf.placeholder(tf.int32, [None, 20], name="input_x1")
    x2 = tf.placeholder(tf.int32, [None, 20], name="input_x2")
    y = tf.placeholder(tf.float32, [None], name="input_y")
    cnn_encoder = CNNEncoder(
        sequence_length=20,
        embedding_dim=128,
        filter_sizes=[3, 4, 5],
        num_filters=100,
    )
    rnn_encoder = RNNEncoder(
        rnn_cell='lstm',
        hidden_units=100,
        num_layers=2,
        dropout_keep_prob=0.7,
        use_dynamic=False,
        use_attention=False,
    )
    model1 = SiameseSimilarityNets(input_x1=x1,
                                   input_x2=x2,
                                   input_y=y,
                                   word_embedding_type='rand',
                                   vocab_size=10000,
                                   embedding_size=128,
                                   encoder_type='cnn',
                                   cnn_encoder=cnn_encoder,
                                   rnn_encoder=rnn_encoder,
                                   dense_layer=False,
                                   l2_reg_lambda=0,
                                   pred_threshold=0.5,
コード例 #14
0
ファイル: train.py プロジェクト: fodrh1201/double_encode
import time
import os
import pprint
import util
import tensorflow as tf
import datetime
from encoder import RNNEncoder, RNNEncoderTrainer, RNNEncoderEvaluator

pp = pprint.PrettyPrinter(indent=2)

# Encoder parameters

RNNEncoder.add_flags()

# Training parameters

tf.flags.DEFINE_integer("max_sequence_length", 525, "Examples will be padded/truncated to this length")
tf.flags.DEFINE_integer("num_epochs", 20, "Number of training epochs")
tf.flags.DEFINE_integer("checkpoint_every", 1, "Evaluate model after this number of steps")
tf.flags.DEFINE_integer("evaluate_every", 1, "Evaluate model on dev set after this number of steps")

# Session Parameters

tf.flags.DEFINE_boolean("allow_soft_placement", False, "Allow soft device placement (e.g. no GPU)")
tf.flags.DEFINE_boolean("log_device_placement", True, "Log placement of ops on devices")

FLAGS = tf.flags.FLAGS
FLAGS.batch_size
print("\nParameters:")
for attr, value in sorted(FLAGS.__flags.items()):
    print("{}={}".format(attr.upper(), value))
コード例 #15
0
ファイル: train.py プロジェクト: xingweihappyer/atec-nlp
def train():
    print("Using TensorFlow Version %s" % tf.__version__)
    assert "1.5" <= tf.__version__, "Need TensorFlow 1.5 or Later."
    print("\nParameters:")
    for attr in FLAGS:
        value = FLAGS[attr].value
        print("{}={}".format(attr.upper(), value))
    print("")
    if not FLAGS.data_file:
        exit("Train data file is empty. Set --data_file argument.")

    dataset = Dataset(data_file=FLAGS.data_file,
                      char_level=FLAGS.char_model,
                      embedding_dim=FLAGS.embedding_dim)
    vocab, word2id = dataset.read_vocab()
    print("Vocabulary Size: {:d}".format(len(vocab)))
    # Generate batches
    data = dataset.process_data(
        data_file=FLAGS.data_file,
        sequence_length=FLAGS.max_document_length)  # (x1, x2, y)
    train_data, eval_data = dataset.train_test_split(
        data, test_size=FLAGS.val_percentage, random_seed=FLAGS.random_seed)
    train_batches = dataset.batch_iter(train_data,
                                       FLAGS.batch_size,
                                       FLAGS.num_epochs,
                                       shuffle=True)

    with tf.Graph().as_default():
        tf.set_random_seed(FLAGS.random_seed)
        session_conf = tf.ConfigProto(allow_soft_placement=True,
                                      log_device_placement=False)
        sess = tf.Session(config=session_conf)

        input_x1 = tf.placeholder(tf.int32, [None, FLAGS.max_document_length],
                                  name="input_x1")
        input_x2 = tf.placeholder(tf.int32, [None, FLAGS.max_document_length],
                                  name="input_x2")
        input_y = tf.placeholder(tf.float32, [None], name="input_y")
        dropout_keep_prob = tf.placeholder(tf.float32, name="input_y")
        cnn_encoder = CNNEncoder(
            sequence_length=FLAGS.max_document_length,
            embedding_dim=FLAGS.embedding_dim,
            filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
            num_filters=FLAGS.num_filters,
        )
        rnn_encoder = RNNEncoder(
            rnn_cell=FLAGS.rnn_cell,
            hidden_units=FLAGS.hidden_units,
            num_layers=FLAGS.num_layers,
            dropout_keep_prob=dropout_keep_prob,
            use_dynamic=FLAGS.use_dynamic,
            use_attention=FLAGS.use_attention,
        )

        with sess.as_default():
            if FLAGS.model_class == 'similarity':
                model = SiameseSimilarityNets(
                    input_x1=input_x1,
                    input_x2=input_x2,
                    input_y=input_y,
                    encoder_type=FLAGS.model_type,
                    cnn_encoder=cnn_encoder,
                    rnn_encoder=rnn_encoder,
                    vocab_size=len(vocab),
                    embedding_size=FLAGS.embedding_dim,
                    word_embedding_type=FLAGS.word_embedding_type,
                    dense_layer=FLAGS.dense_layer,
                    pred_threshold=FLAGS.pred_threshold,
                    l2_reg_lambda=FLAGS.l2_reg_lambda,
                    energy_func=FLAGS.energy_function,
                    loss_func=FLAGS.loss_function,
                    margin=FLAGS.margin,
                    contrasive_loss_pos_weight=FLAGS.scale_pos_weight,
                    weight_sharing=FLAGS.weight_sharing)
                print("Initialized SiameseSimilarityNets model.")
            elif FLAGS.model_class == 'classification':
                model = SiameseClassificationNets(
                    input_x1=input_x1,
                    input_x2=input_x2,
                    input_y=input_y,
                    word_embedding_type=FLAGS.word_embedding_type,
                    vocab_size=len(vocab),
                    embedding_size=FLAGS.embedding_dim,
                    encoder_type=FLAGS.model_type,
                    cnn_encoder=cnn_encoder,
                    rnn_encoder=rnn_encoder,
                    dense_layer=FLAGS.dense_layer,
                    l2_reg_lambda=FLAGS.l2_reg_lambda,
                    interaction='multiply',
                    weight_sharing=FLAGS.weight_sharing)
                print("Initialized SiameseClassificationNets model.")
            else:
                raise ValueError(
                    "Invalid model class. Expected one of {`similarity`, `classification`} "
                )
            model.forward()

            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            learning_rate = tf.train.exponential_decay(
                FLAGS.lr,
                global_step,
                decay_steps=int(40000 / FLAGS.batch_size),
                decay_rate=FLAGS.weight_decay_rate,
                staircase=True)
            optimizer = tf.train.AdamOptimizer(learning_rate)
            # optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9)
            # optimizer = tf.train.GradientDescentOptimizer(learning_rate)
            # optimizer = tf.train.RMSPropOptimizer(learning_rate)
            # optimizer = tf.train.AdadeltaOptimizer(learning_rate, epsilon=1e-6)

        # for i, (g, v) in enumerate(grads_and_vars):
        #     if g is not None:
        #         grads_and_vars[i] = (tf.clip_by_global_norm(g, 5), v)  # clip gradients
        # train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)
        if FLAGS.clip_norm:  # improve loss, but small weight cause small score, need to turn threshold for better f1.
            variables = tf.trainable_variables()
            grads, _ = tf.clip_by_global_norm(
                tf.gradients(model.loss, variables), FLAGS.clip_norm)
            train_op = optimizer.apply_gradients(zip(grads, variables),
                                                 global_step=global_step)
            grads_and_vars = zip(grads, variables)
        else:
            grads_and_vars = optimizer.compute_gradients(model.loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)
        # Keep track of gradient values and sparsity (optional)
        grad_summaries = []
        for g, v in grads_and_vars:
            if g is not None:
                grad_hist_summary = tf.summary.histogram(
                    "{}/grad/hist".format(v.name), g)
                sparsity_summary = tf.summary.scalar(
                    "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
                grad_summaries.append(grad_hist_summary)
                grad_summaries.append(sparsity_summary)
        grad_summaries_merged = tf.summary.merge(grad_summaries)
        print("Defined gradient summaries.")

        # Summaries for loss and accuracy
        loss_summary = tf.summary.scalar("loss", model.loss)
        f1_summary = tf.summary.scalar("F1-score", model.f1)

        # Train Summaries
        train_summary_op = tf.summary.merge(
            [loss_summary, f1_summary, grad_summaries_merged])
        train_summary_dir = os.path.join(FLAGS.model_dir, "summaries", "train")
        train_summary_writer = tf.summary.FileWriter(train_summary_dir,
                                                     sess.graph)

        # Dev summaries
        dev_summary_op = tf.summary.merge([loss_summary, f1_summary])
        dev_summary_dir = os.path.join(FLAGS.model_dir, "summaries", "dev")
        dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph)

        # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
        checkpoint_dir = os.path.abspath(
            os.path.join(FLAGS.model_dir, "checkpoints"))
        checkpoint_prefix = os.path.join(checkpoint_dir, "model")
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        graph_def = tf.get_default_graph().as_graph_def()
        with open(os.path.join(checkpoint_dir, "graphpb.txt"), 'w') as f:
            f.write(str(graph_def))
        saver = tf.train.Saver(tf.global_variables(),
                               max_to_keep=FLAGS.num_checkpoints)
        # Initialize all variables
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())

        if FLAGS.word_embedding_type != 'rand':
            # initial matrix with random uniform
            # embedding_init = np.random.uniform(-0.25, 0.25, (len(vocab), FLAGS.embedding_dim))
            embedding_init = np.zeros(shape=(len(vocab), FLAGS.embedding_dim))
            # load vectors from the word2vec
            print("Initializing word embedding with pre-trained word2vec.")
            words, vectors = dataset.load_word2vec()
            for idx, w in enumerate(vocab):
                vec = vectors[words.index(w)]
                embedding_init[idx] = np.asarray(vec).astype(np.float32)
            sess.run(model.W.assign(embedding_init))

        print("Starting training...")
        F1_best = 0.0
        last_improved_step = 0
        for batch in train_batches:
            x1_batch, x2_batch, y_batch = zip(*batch)
            feed_dict = {
                input_x1: x1_batch,
                input_x2: x2_batch,
                input_y: y_batch,
                dropout_keep_prob: FLAGS.dropout_keep_prob
            }
            _, step, loss, cm, acc, precision, recall, f1, summaries = sess.run(
                [
                    train_op, global_step, model.loss, model.cm, model.acc,
                    model.precision, model.recall, model.f1, train_summary_op
                ], feed_dict)
            time_str = datetime.datetime.now().isoformat()
            if step % FLAGS.log_every_steps == 0:
                train_summary_writer.add_summary(summaries, step)
                print(
                    "{} step {} TRAIN loss={:g} acc={:.3f} P={:.3f} R={:.3f} F1={:.6f}"
                    .format(time_str, step, loss, acc, precision, recall, f1))
            if step % FLAGS.evaluate_every_steps == 0:
                # eval
                x1_batch, x2_batch, y_batch = zip(*eval_data)
                feed_dict = {
                    input_x1: x1_batch,
                    input_x2: x2_batch,
                    input_y: y_batch,
                    dropout_keep_prob: 1
                }
                #### debug for similarity model
                # x1, out1, out2, sim_euc, sim_cos, sim_ma, sim = sess.run(
                #   [model.embedded_1, model.out1, model.out2, model.sim_euc, model.sim_cos, model.sim_ma, model.sim], feed_dict)
                # print(x1)
                # sim_euc = [round(s, 2) for s in sim_euc[:30]]
                # sim_cos = [round(s, 2) for s in sim_cos[:30]]
                # sim_ma = [round(s, 2) for s in sim_ma[:30]]
                # sim = [round(s, 2) for s in sim[:30]]
                # # print(out1)
                # out1 = [round(s, 3) for s in out1[0]]
                # out2 = [round(s, 3) for s in out2[0]]
                # print(zip(out1, out2))
                # for w in zip(y_batch[:30], sim, sim_euc, sim_cos, sim_ma):
                #     print(w)

                ##### debug for classification model
                # out1, out2, out, logits = sess.run(
                #     [model.out1, model.out2, model.out, model.logits], feed_dict)
                # out1 = [round(s, 3) for s in out1[0]]
                # out2 = [round(s, 3) for s in out2[0]]
                # out = [round(s, 3) for s in out[0]]
                # print(zip(out1, out2))
                # print(out)
                # print(logits)

                loss, cm, acc, precision, recall, f1, summaries = sess.run([
                    model.loss, model.cm, model.acc, model.precision,
                    model.recall, model.f1, dev_summary_op
                ], feed_dict)
                dev_summary_writer.add_summary(summaries, step)
                if f1 > F1_best:
                    F1_best = f1
                    last_improved_step = step
                    if F1_best > 0.5:
                        path = saver.save(sess,
                                          checkpoint_prefix,
                                          global_step=step)
                        print(
                            "Saved model with F1={} checkpoint to {}\n".format(
                                F1_best, path))
                    improved_token = '*'
                else:
                    improved_token = ''
                print(
                    "{} step {} DEV loss={:g} acc={:.3f} cm{} P={:.3f} R={:.3f} F1={:.6f} {}"
                    .format(time_str, step, loss, acc, cm, precision, recall,
                            f1, improved_token))
                # if step % FLAGS.checkpoint_every_steps == 0:
                #     if F1 >= F1_best:
                #         F1_best = F1
                #         path = saver.save(sess, checkpoint_prefix, global_step=step)
                #         print("Saved model with F1={} checkpoint to {}\n".format(F1_best, path))
            if step - last_improved_step > 4000:  # 2000 steps
                print(
                    "No improvement for a long time, early-stopping at best F1={}"
                    .format(F1_best))
                break
コード例 #16
0
 def __init__(self, keep_prob, key_vec_size, value_vec_size):
     BasicAttn.__init__(self, keep_prob, key_vec_size, value_vec_size)
     self.scope = "CoAttn"
     self.encoder = RNNEncoder(key_vec_size, keep_prob, "lstm")
コード例 #17
0
    def __init__(self, params):
        self.pos_relation_ids = tf.placeholder(tf.int32, [None, 3])
        self.neg_relation_ids = tf.placeholder(tf.int32, [None, 3])
        self.q_word_ids = tf.placeholder(tf.int32, [None, params['max_sentence_len']], name='q_word_ids')
        self.q_sentence_lengths = tf.placeholder(tf.int64, [None], name="q_sentence_lengths")
        self.q_char_ids = tf.placeholder(tf.int32, [None, params['max_sentence_len'], params['max_word_len']], name='q_char_ids')
        self.q_word_lengths = tf.placeholder(tf.int64, [None, params['max_sentence_len']])
        self.dropout_keep_prob = tf.placeholder(tf.float32, name='dropout_keep_prob')
        self.pattern_positions = tf.placeholder(tf.float32, [None, params['max_sentence_len'], params['question_config']['word_dim']])
        self.relation_positions = tf.placeholder(tf.float32, [None, 3, params['relation_config']['word_dim']])

        with tf.device('/gpu:%s' % params.get('gpu', 0)):
            if params['encode_name'] == 'CNN':
                question_encoder = CNNEncoder(params['question_config'], 'question_cnn')
                relation_encoder = CNNEncoder(params['relation_config'], 'relation_cnn')
                # relation_encoder = AdditionEncoder(params['relation_config'], 'relation_add')
                if 'char_dim' in params['question_config']:
                    question = question_encoder.encode(self.q_char_ids)
                else:
                    question = question_encoder.encode(self.q_word_ids)
                pos_relation = relation_encoder.encode(self.pos_relation_ids, False)
                neg_relation = relation_encoder.encode(self.neg_relation_ids, True)
                question = question / tf.sqrt(tf.reduce_sum(question ** 2, 1, keep_dims=True))
                pos_relation = pos_relation / tf.sqrt(tf.reduce_sum(pos_relation ** 2, 1, keep_dims=True))
                neg_relation = neg_relation / tf.sqrt(tf.reduce_sum(neg_relation ** 2, 1, keep_dims=True))

            elif params['encode_name'] == 'ADD':
                if params['question_config'].get("use_position", False):
                    question_encoder = PositionADDEncoder(params['question_config'], "question_add")
                    question = question_encoder.encode(self.q_word_ids, self.pattern_positions)
                else:
                    question_encoder = ADDEncoder(params['question_config'], "question_add")
                    question = question_encoder.encode(self.q_word_ids, self.q_sentence_lengths)
                    question = question / tf.sqrt(tf.reduce_sum(question ** 2, 1, keep_dims=True))

                if params['relation_config'].get("use_position", False):
                    relation_encoder = PositionADDEncoder(params['relation_config'], 'relation_add')
                    pos_relation = relation_encoder.encode(self.pos_relation_ids, self.relation_positions)
                    neg_relation = relation_encoder.encode(self.neg_relation_ids, self.relation_positions)
                else:
                    relation_encoder = ADDEncoder(params['relation_config'], 'relation_add')
                    pos_relation = relation_encoder.encode(self.pos_relation_ids, None)
                    neg_relation = relation_encoder.encode(self.neg_relation_ids, None)
                    pos_relation = pos_relation / tf.sqrt(tf.reduce_sum(pos_relation ** 2, 1, keep_dims=True))
                    neg_relation = neg_relation / tf.sqrt(tf.reduce_sum(neg_relation ** 2, 1, keep_dims=True))



            elif params['encode_name'] == 'RNN':
                question_encoder = RNNEncoder(params['question_config'], 'question_rnn')
                relation_encoder = RNNEncoder(params['relation_config'], 'relation_rnn')
                # relation_encoder = AdditionEncoder(params['relation_config'], 'relation_add')
                question = question_encoder.encode(self.q_word_ids, self.q_sentence_lengths, self.q_char_ids, self.q_word_lengths, False)
                pos_relation = relation_encoder.encode(self.pos_relation_ids, None, None, None, False)
                neg_relation = relation_encoder.encode(self.neg_relation_ids, None, None, None, True)
                # question = question / tf.sqrt(tf.reduce_sum(question ** 2, 1, keep_dims=True))
                # pos_relation = pos_relation / tf.sqrt(tf.reduce_sum(pos_relation ** 2, 1, keep_dims=True))
                # neg_relation = neg_relation / tf.sqrt(tf.reduce_sum(neg_relation ** 2, 1, keep_dims=True))

                # pos_relation = relation_encoder.encode(self.pos_relation_ids, None, False)
                # neg_relation = relation_encoder.encode(self.neg_relation_ids, None, True)
            else:
                raise ValueError('encoder_name should be one of [CNN, ADD, RNN]')

            self.question_drop = tf.nn.dropout(question, self.dropout_keep_prob)
            self.pos_relation_drop = tf.nn.dropout(pos_relation, self.dropout_keep_prob)
            self.neg_relation_drop = tf.nn.dropout(neg_relation, self.dropout_keep_prob)
            self.pos_sim = self.dot_sim(self.question_drop, self.pos_relation_drop)
            self.neg_sim = self.dot_sim(self.question_drop, self.neg_relation_drop)
            self.loss = tf.reduce_mean(tf.maximum(0., self.neg_sim + params['margin'] - self.pos_sim))
            tvars = tf.trainable_variables()
            max_grad_norm = 2
            self.grads, _ = tf.clip_by_global_norm(tf.gradients(self.loss, tvars), max_grad_norm)
            optimizer = tf.train.AdamOptimizer(params['lr'])
            self.train_op = optimizer.apply_gradients(zip(self.grads, tvars))

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        config.allow_soft_placement = True
        config.log_device_placement = False
        self.session = tf.Session(config=config)
        self.saver = tf.train.Saver(tf.all_variables(), max_to_keep=1)
        if params['load_path']:
            self.saver.restore(self.session, params['load_path'])
        else:
            self.session.run(tf.initialize_all_variables())

        self.params = params
コード例 #18
0
 def __init__(self, keep_prob, key_size, value_size):
     BasicAttn.__init__(self, keep_prob, key_size, value_size)
     self.scope = "SelfAttn"
     self.encoder = RNNEncoder(key_size, keep_prob, "gru")
     self.v_size = 35
コード例 #19
0
class SelfAttn(BasicAttn):
    """class for SelfAttention"""
    def __init__(self, keep_prob, key_size, value_size):
        BasicAttn.__init__(self, keep_prob, key_size, value_size)
        self.scope = "SelfAttn"
        self.encoder = RNNEncoder(key_size, keep_prob, "gru")
        self.v_size = 35

    def build_graph(self, values, values_mask, keys, keys_mask):
        """
        Args:
            values:       [batch_sz, M, h]
            values_mask:  [batch_sz, M]
            keys:         [batch_sz, N, h]
            keys_mask:     [batch_sz, N]

        Return:
            attn_dist:    [batch_sz, N, 2h]
            output:       _
        """
        h = self.key_vec_size
        M = values_mask.shape[1]
        N = keys_mask.shape[1]
        v = self.v_size
        # convert keys to first level attention
        _, keys = super(SelfAttn, self).build_graph(values, values_mask, keys,
                                                    keys_mask)

        with vs.variable_scope(self.scope):
            W_1 = tf.get_variable('W_1', [h, v], tf.float32,
                                  tf.contrib.layers.xavier_initializer())
            W_2 = tf.get_variable('W_2', [h, v], tf.float32,
                                  tf.contrib.layers.xavier_initializer())
            v_weight = tf.get_variable('v', [v, 1], tf.float32,
                                       tf.contrib.layers.xavier_initializer())

            ###### W_1 * v_j & W_2 * v_i & their sum ######
            keys = tf.reshape(keys, [-1, h])
            # [batch_sz, N, N, v] - v: self.v_size
            W1v = tf.tile(tf.expand_dims(\
                          tf.reshape(tf.matmul(keys, W_1), [-1, N, v]),\
                          2), [1, 1, N, 1])
            # [batch_sz, N, N, v]
            W2v = tf.tile(tf.expand_dims(\
                          tf.reshape(tf.matmul(keys, W_2), [-1, N, v]),\
                          2), [1, 1, N, 1])
            # restore keys to [batch_sz, N, h]
            keys = tf.reshape(keys, [-1, N, h])

            # [batch_sz, N, N, v]
            # each vector in W_mixed (i, j) is W1v_i + W2v_j
            W_mixed = W1v + tf.transpose(W2v, [0, 2, 1, 3])
            # [batch_sz * N, N]
            E = tf.matmul(tf.reshape(W_mixed, [-1, v]), v_weight)
            # [batch_sz, N, N]
            E = tf.reshape(E, [-1, N, N])
            # [N, batch_sz, N]
            _, alpha = masked_softmax(tf.transpose(E, [1, 0, 2]), keys_mask, 2)
            # [batch_sz, N, N]
            alpha = tf.transpose(alpha, [1, 0, 2])
            # [batch_sz, N, h]
            alpha = tf.matmul(alpha, keys)

            #### Bi-RNN ####
            bidirectional_gru_input = tf.concat([keys, alpha], 2)
            attn = self.encoder.build_graph(bidirectional_gru_input, keys_mask)

            attn = tf.nn.dropout(attn, self.keep_prob)

            return None, attn