def __init__(self, config, pretrained_embedding): self._input = tf.placeholder(dtype=tf.int32,shape=[None,config['num_steps']],name='input') self._target = tf.placeholder(dtype=tf.int32,shape=[None],name='target') self.batch_size = config['batch_size'] self.num_steps = config['num_steps'] self.embed_size = config['embed_size'] self.size = config['hidden_size'] self._lr = config['lr'] self.num_classes = config['num_classes'] self.keep_prob = tf.Variable(config['keep_prob'],trainable=False) self.combine_mode = config['combine_mode'] self.weight_decay = config['weight_decay'] with tf.device("/cpu:0"): embedding = tf.Variable(pretrained_embedding,dtype=tf.float32,name='embedding',trainable=True) inputs = tf.nn.embedding_lookup(embedding, self._input) # outputs = BCNEncoder(inputs,self.size).get_output() outputs = LSTMEncoder(inputs,embed_size=self.embed_size,\ hidden_size=self.size,\ vocab_size=config['vocab_size'],num_steps=self.num_steps,\ keep_prob=self.keep_prob).get_output() print('outputs', outputs.get_shape()) # outputs = tf.contrib.layers.fully_connected(outputs,self.size) # outputs = tf.nn.dropout(outputs,keep_prob=self.keep_prob) softmax_w = tf.get_variable("softmax_w", [self.embed_size, self.num_classes], dtype=tf.float32) softmax_b = tf.get_variable("softmax_b", [self.num_classes], dtype=tf.float32) logits = tf.matmul(outputs, softmax_w) + softmax_b # update the cost variables loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self._target,logits=logits) self.l2_loss = sum(tf.nn.l2_loss(tf_var) for tf_var in tf.trainable_variables() ) self._cost = cost = tf.reduce_mean(loss) + self.weight_decay*self.l2_loss self._lr = tf.Variable(self._lr, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config['max_grad_norm']) optimizer = tf.train.AdamOptimizer(self._lr) # optimizer = tf.train.AdamOptimizer() # optimizer = tf.train.GradientDescentOptimizer(self._lr) self._train_op = optimizer.apply_gradients(zip(grads, tvars)) self._new_lr = tf.placeholder(tf.float32, shape=[], name="new_learning_rate") self._lr_update = tf.assign(self._lr, self._new_lr) self.predicted_class = tf.cast(tf.argmax(tf.nn.softmax(logits),axis=-1),tf.int32)
def build_model(args, vocab): print('build LSTMSeq2SeqModel') encoder_hidden_size = 2 * args.hidden_size if args.bidirectional else args.hidden_size encoder_ctx_size = encoder_hidden_size * args.num_encoder_layers encoder = LSTMEncoder(vocab.src, args.embed_size, args.hidden_size, args.num_encoder_layers, args.dropout) decoder = LSTMDecoder(vocab.trg, args.embed_size, args.hidden_size, args.num_decoder_layers, args.dropout, encoder_hidden_size, encoder_ctx_size) return LSTMSeq2SeqModel(args, encoder, decoder)
def __init__(self, fields, args): super(LMModel, self).__init__() vocab = fields["sent"].vocab self.vocab_size = len(vocab) self.unk_idx = vocab.stoi[utils.UNK_WORD] self.padding_idx = vocab.stoi[utils.PAD_WORD] self.bos_idx = vocab.stoi[utils.BOS_WORD] self.eos_idx = vocab.stoi[utils.EOS_WORD] self.device = args.device self.embeddings = nn.Embedding(self.vocab_size, args.emb_dim, padding_idx=self.padding_idx) self.encoder = None if args.num_z_samples > 0: self.encoder = LSTMEncoder( hidden_size=args.rnn_size, num_layers=args.num_enc_layers, bidirectional=args.bidirectional_encoder, embeddings=self.embeddings, padding_idx=self.padding_idx, dropout=args.dropout, ) self.mu = nn.Linear(args.rnn_size, args.z_dim, bias=False) self.logvar = nn.Linear(args.rnn_size, args.z_dim, bias=False) self.z2h = nn.Linear(args.z_dim, args.rnn_size, bias=False) self.z_dim = args.z_dim self.decoder = LSTMDecoder( hidden_size=args.rnn_size, num_layers=args.num_dec_layers, embeddings=self.embeddings, padding_idx=self.padding_idx, unk_idx=self.unk_idx, bos_idx=self.bos_idx, dropout=args.dropout, z_dim=args.z_dim, z_cat=args.z_cat, inputless=args.inputless, word_dropout_rate=args.word_dropout_rate, ) self.dropout = nn.Dropout(args.dropout) self.generator = nn.Linear(args.rnn_size, self.vocab_size, bias=False) self.num_dec_layers = args.num_dec_layers self.rnn_size = args.rnn_size self.num_z_samples = args.num_z_samples self.use_avg = args.use_avg self.criterion = nn.CrossEntropyLoss(ignore_index=self.padding_idx, reduction="none") self._init_params(args)
def __init__(self, config, pretrained_embedding): self._input = tf.placeholder(dtype=tf.int32, shape=[None, config['num_steps']], name='input') self._target = tf.placeholder(dtype=tf.int32, shape=[None], name='target') self.batch_size = config['batch_size'] self.num_steps = config['num_steps'] self.embed_size = config['embed_size'] self.size = config['hidden_size'] self._lr = config['lr'] self.num_classes = config['num_classes'] self.keep_prob = tf.Variable(config['keep_prob'], trainable=False) self.combine_mode = config['combine_mode'] self.weight_decay = config['weight_decay'] self.max_grad_norm = config['max_grad_norm'] self._lr = tf.Variable(self._lr, trainable=False) self._new_lr = tf.placeholder(tf.float32, shape=[], name="new_learning_rate") self._lr_update = tf.assign(self._lr, self._new_lr) with tf.device("/cpu:0"): embedding = tf.get_variable( "embedding", [config['vocab_size'] + 1, self.embed_size], dtype=tf.float32) inputs = tf.nn.embedding_lookup(embedding, self._input) self.size = inputs.get_shape().as_list()[-1] self.encoder_outputs = LSTMEncoder(inputs,inputs.get_shape().as_list()[-1],\ self.size,\ config['vocab_size'],self.num_steps,\ self.keep_prob,\ num_layers=config['num_layers'],\ variational_dropout=True,\ combine_mode=None).get_output() self.lm_loss = self.get_lm_graph(self._input, self.encoder_outputs, embedding) self.cl_loss, self.predicted_class = self.get_classification_graph() self.lm_train_op = self.get_train_op(self.lm_loss) self.cl_train_op = self.get_train_op(self.cl_loss)