예제 #1
0
  def __init__(self, config, pretrained_embedding):
    self._input         = tf.placeholder(dtype=tf.int32,shape=[None,config['num_steps']],name='input')
    self._target        = tf.placeholder(dtype=tf.int32,shape=[None],name='target')
    self.batch_size     = config['batch_size']
    self.num_steps      = config['num_steps']
    self.embed_size     = config['embed_size']
    self.size           = config['hidden_size']
    self._lr            = config['lr']
    self.num_classes    = config['num_classes']
    self.keep_prob      = tf.Variable(config['keep_prob'],trainable=False)
    self.combine_mode   = config['combine_mode']
    self.weight_decay   = config['weight_decay']

    with tf.device("/cpu:0"):
        embedding = tf.Variable(pretrained_embedding,dtype=tf.float32,name='embedding',trainable=True)
        inputs = tf.nn.embedding_lookup(embedding, self._input)

    # outputs = BCNEncoder(inputs,self.size).get_output()
    outputs = LSTMEncoder(inputs,embed_size=self.embed_size,\
                        hidden_size=self.size,\
                        vocab_size=config['vocab_size'],num_steps=self.num_steps,\
                        keep_prob=self.keep_prob).get_output()
    print('outputs', outputs.get_shape())

    # outputs = tf.contrib.layers.fully_connected(outputs,self.size)
    # outputs = tf.nn.dropout(outputs,keep_prob=self.keep_prob)
    softmax_w = tf.get_variable("softmax_w", [self.embed_size, self.num_classes], dtype=tf.float32)
    softmax_b = tf.get_variable("softmax_b", [self.num_classes], dtype=tf.float32)
    logits    = tf.matmul(outputs, softmax_w) + softmax_b


    # update the cost variables
    loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self._target,logits=logits)
    self.l2_loss =  sum(tf.nn.l2_loss(tf_var)
        for tf_var in tf.trainable_variables()
        )
    self._cost = cost = tf.reduce_mean(loss) + self.weight_decay*self.l2_loss

    self._lr = tf.Variable(self._lr, trainable=False)
    tvars    = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                      config['max_grad_norm'])
    optimizer = tf.train.AdamOptimizer(self._lr)
    # optimizer = tf.train.AdamOptimizer()

    # optimizer = tf.train.GradientDescentOptimizer(self._lr)

    self._train_op = optimizer.apply_gradients(zip(grads, tvars))

    self._new_lr = tf.placeholder(tf.float32, shape=[], name="new_learning_rate")
    self._lr_update = tf.assign(self._lr, self._new_lr)
    self.predicted_class = tf.cast(tf.argmax(tf.nn.softmax(logits),axis=-1),tf.int32)
예제 #2
0
 def build_model(args, vocab):
     print('build LSTMSeq2SeqModel')
     encoder_hidden_size = 2 * args.hidden_size if args.bidirectional else args.hidden_size
     encoder_ctx_size = encoder_hidden_size * args.num_encoder_layers
     encoder = LSTMEncoder(vocab.src, args.embed_size, args.hidden_size,
                           args.num_encoder_layers, args.dropout)
     decoder = LSTMDecoder(vocab.trg, args.embed_size, args.hidden_size,
                           args.num_decoder_layers, args.dropout,
                           encoder_hidden_size, encoder_ctx_size)
     return LSTMSeq2SeqModel(args, encoder, decoder)
예제 #3
0
    def __init__(self, fields, args):
        super(LMModel, self).__init__()
        vocab = fields["sent"].vocab
        self.vocab_size = len(vocab)
        self.unk_idx = vocab.stoi[utils.UNK_WORD]
        self.padding_idx = vocab.stoi[utils.PAD_WORD]
        self.bos_idx = vocab.stoi[utils.BOS_WORD]
        self.eos_idx = vocab.stoi[utils.EOS_WORD]
        self.device = args.device

        self.embeddings = nn.Embedding(self.vocab_size,
                                       args.emb_dim,
                                       padding_idx=self.padding_idx)

        self.encoder = None
        if args.num_z_samples > 0:
            self.encoder = LSTMEncoder(
                hidden_size=args.rnn_size,
                num_layers=args.num_enc_layers,
                bidirectional=args.bidirectional_encoder,
                embeddings=self.embeddings,
                padding_idx=self.padding_idx,
                dropout=args.dropout,
            )
            self.mu = nn.Linear(args.rnn_size, args.z_dim, bias=False)
            self.logvar = nn.Linear(args.rnn_size, args.z_dim, bias=False)
            self.z2h = nn.Linear(args.z_dim, args.rnn_size, bias=False)
            self.z_dim = args.z_dim

        self.decoder = LSTMDecoder(
            hidden_size=args.rnn_size,
            num_layers=args.num_dec_layers,
            embeddings=self.embeddings,
            padding_idx=self.padding_idx,
            unk_idx=self.unk_idx,
            bos_idx=self.bos_idx,
            dropout=args.dropout,
            z_dim=args.z_dim,
            z_cat=args.z_cat,
            inputless=args.inputless,
            word_dropout_rate=args.word_dropout_rate,
        )

        self.dropout = nn.Dropout(args.dropout)
        self.generator = nn.Linear(args.rnn_size, self.vocab_size, bias=False)
        self.num_dec_layers = args.num_dec_layers
        self.rnn_size = args.rnn_size
        self.num_z_samples = args.num_z_samples
        self.use_avg = args.use_avg
        self.criterion = nn.CrossEntropyLoss(ignore_index=self.padding_idx,
                                             reduction="none")

        self._init_params(args)
    def __init__(self, config, pretrained_embedding):
        self._input = tf.placeholder(dtype=tf.int32,
                                     shape=[None, config['num_steps']],
                                     name='input')
        self._target = tf.placeholder(dtype=tf.int32,
                                      shape=[None],
                                      name='target')
        self.batch_size = config['batch_size']
        self.num_steps = config['num_steps']
        self.embed_size = config['embed_size']
        self.size = config['hidden_size']
        self._lr = config['lr']
        self.num_classes = config['num_classes']
        self.keep_prob = tf.Variable(config['keep_prob'], trainable=False)
        self.combine_mode = config['combine_mode']
        self.weight_decay = config['weight_decay']
        self.max_grad_norm = config['max_grad_norm']

        self._lr = tf.Variable(self._lr, trainable=False)
        self._new_lr = tf.placeholder(tf.float32,
                                      shape=[],
                                      name="new_learning_rate")
        self._lr_update = tf.assign(self._lr, self._new_lr)

        with tf.device("/cpu:0"):
            embedding = tf.get_variable(
                "embedding", [config['vocab_size'] + 1, self.embed_size],
                dtype=tf.float32)
            inputs = tf.nn.embedding_lookup(embedding, self._input)

        self.size = inputs.get_shape().as_list()[-1]
        self.encoder_outputs = LSTMEncoder(inputs,inputs.get_shape().as_list()[-1],\
                                 self.size,\
                                 config['vocab_size'],self.num_steps,\
                                 self.keep_prob,\
                                 num_layers=config['num_layers'],\
                                 variational_dropout=True,\
                                 combine_mode=None).get_output()

        self.lm_loss = self.get_lm_graph(self._input, self.encoder_outputs,
                                         embedding)

        self.cl_loss, self.predicted_class = self.get_classification_graph()

        self.lm_train_op = self.get_train_op(self.lm_loss)
        self.cl_train_op = self.get_train_op(self.cl_loss)