def __init__(self): # set log level to debug tf.sg_verbosity(10) # batch size self.batch_size = 1 # vocabulary size self.voca_size = sttwdata.voca_size # mfcc feature of audio self.x = tf.placeholder(dtype=tf.sg_floatx, shape=(self.batch_size, None, 20)) # encode audio feature self.logit = get_logit(self.x, voca_size=self.voca_size) # sequence length except zero-padding self.seq_len = tf.not_equal(self.x.sg_sum(axis=2), 0.).sg_int().sg_sum(axis=1) # run network self.session = tf.Session() tf.sg_init(self.session) self.saver = tf.train.Saver() self.saver.restore(self.session, tf.train.latest_checkpoint('asset/train'))
def sg_reverse_seq(tensor, opt): r"""Reverses variable length slices. Before applying the pure tensorflow function tf.reverse_sequence, this function calculates sequence lengths by counting non-zeros. For example, ``` tensor = [[1, 2, 3, 0, 0], [4, 5, 0, 0, 0]] tensor.sg_reverse_seq() => [[3 2 1 0 0] [5 4 0 0 0]] ``` Args: tensor: A 2-D `Tensor` (automatically given by chain). opt: dim: Dimension to reverse. Default is 1. name : If provided, it replaces current tensor's name. Returns: A `Tensor` with the same shape and type as `tensor`. """ # default sequence dimension opt += tf.sg_opt(dim=1) seq_len = tf.not_equal(tensor, tf.zeros_like(tensor)).sg_int().sg_sum(dims=opt.dim) return tf.reverse_sequence(tensor, seq_len, opt.dim, name=opt.name)
def init_model(): global x, y # set log level to debug tf.sg_verbosity(10) # # hyper parameters # batch_size = 1 # batch size # # inputs # # vocabulary size voca_size = data.voca_size # print(voca_size) # mfcc feature of audio x = tf.placeholder(dtype=tf.sg_floatx, shape=(batch_size, None, 20)) # sequence length except zero-padding seq_len = tf.not_equal(x.sg_sum(axis=2), 0.).sg_int().sg_sum(axis=1) # encode audio feature logit = get_logit(x, voca_size=voca_size) # ctc decoding decoded, _ = tf.nn.ctc_beam_search_decoder( logit.sg_transpose(perm=[1, 0, 2]), seq_len, merge_repeated=False) # to dense tensor y = tf.sparse_to_dense(decoded[0].indices, decoded[0].dense_shape, decoded[0].values) + 1
def __init__(self, mode="train"): ''' Args: mode: A string. Either "train" or "test" ''' self.char2idx, self.idx2char = load_char_vocab() self.word2idx, self.idx2word = load_word_vocab() if mode == "train": self.x, self.y, self.num_batch = get_batch_data() else: self.x = tf.placeholder(tf.int32, [None, Hyperparams.seqlen]) self.emb_x = tf.sg_emb(name='emb_x', voca_size=len(self.char2idx), dim=Hyperparams.embed_dim) self.enc = self.x.sg_lookup(emb=self.emb_x) with tf.sg_context(size=5, act='relu', bn=True): for _ in range(20): dim = self.enc.get_shape().as_list()[-1] self.enc += self.enc.sg_conv1d( dim=dim) # (64, 50, 300) float32 self.enc = self.enc.sg_conv1d(size=1, dim=len(self.word2idx), act='linear', bn=False) # (64, 50, 21293) float32 # self.logits = self.enc.sg_mean(dims=[1], keep_dims=False) # (64, 21293) float32 # Weighted Sum. Updated on Feb. 15, 2017. def make_weights(size): weights = tf.range(1, size + 1, dtype=tf.float32) weights *= 1. / ((1 + size) * size // 2) weights = tf.expand_dims(weights, 0) weights = tf.expand_dims(weights, -1) return weights self.weights = make_weights(Hyperparams.seqlen) # (1, 50, 1) self.enc *= self.weights # Broadcasting self.logits = self.enc.sg_sum(axis=[1], keep_dims=False) # (64, 21293) if mode == "train": self.ce = self.logits.sg_ce(target=self.y, mask=False, one_hot=False) self.istarget = tf.not_equal(self.y, tf.ones_like( self.y)).sg_float() # 1: Unkown self.reduced_loss = ((self.ce * self.istarget).sg_sum()) / ( self.istarget.sg_sum() + 1e-5) tf.sg_summary_loss(self.reduced_loss, "reduced_loss")
def sg_ce(tensor, opt): r"""Returns softmax cross entropy loss between `tensor` and `target`. Args: tensor: A `Tensor`. Logits. Unscaled log probabilities. opt: target: A `Tensor` with the same length in the first dimension as the `tensor`. Labels. one_hot: Boolean. Whether to treat the labels as one-hot encoding. Default is False. mask: Boolean. If True, zeros in the target will be excluded from the calculation. name: A `string`. A name to display in the tensor board web UI. Returns: A 1-D `Tensor` with the same shape as `tensor`. For example, ``` tensor = [[[2, -1, 3], [3, 1, -2]]] target = [[2, 1]] tensor.sg_ce(target=target) => [[ 0.32656264 2.13284516]] ``` For example, ``` tensor = [[2, -1, 3], [3, 1, -2]] target = [[0, 0, 1], [1, 0, 0]] tensor.sg_ce(target=target, one_hot=True) => [ 0.32656264 0.13284527] ``` """ opt += tf.sg_opt(one_hot=False) assert opt.target is not None, 'target is mandatory.' if opt.one_hot: out = tf.identity( tf.nn.softmax_cross_entropy_with_logits(labels=opt.target, logits=tensor), 'ce') else: out = tf.identity( tf.nn.sparse_softmax_cross_entropy_with_logits(labels=opt.target, logits=tensor), 'ce') # masking loss if opt.mask: out *= tf.not_equal(opt.target, tf.zeros_like(opt.target)).sg_float() # add summary tf.sg_summary_loss(out, name=opt.name) return out
def sg_ce(tensor, opt): opt += tf.sg_opt(one_hot=False) assert opt.target is not None, 'target is mandatory.' if opt.one_hot: out = tf.identity(tf.nn.softmax_cross_entropy_with_logits(tensor, opt.target), 'ce') else: out = tf.identity(tf.nn.sparse_softmax_cross_entropy_with_logits(tensor, opt.target), 'ce') # masking loss if opt.mask: out *= tf.not_equal(opt.target, tf.zeros_like(opt.target)).sg_float() # add summary tf.sg_summary_loss(out) return out
def sg_to_sparse(tensor, opt): r"""Converts a dense tensor into a sparse tensor. See `tf.SparseTensor()` in tensorflow. Args: tensor: A `Tensor` with zero-padding (automatically given by chain). opt: name: If provided, replace current tensor's name. Returns: A `SparseTensor`. """ indices = tf.where(tf.not_equal(tensor.sg_float(), 0.)) return tf.SparseTensor(indices=indices, values=tf.gather_nd(tensor, indices) - 1, # for zero-based index dense_shape=tf.shape(tensor).sg_cast(dtype=tf.int64))
def __init__(self, mode="train"): ''' Args: is_train: Boolean. If True, backprop is executed. ''' if mode == "train": self.x, self.y, self.num_batch = get_batch_data( ) # (64, 50) int64, (64, 50) int64, 1636 else: # test self.x = tf.placeholder(tf.int64, [None, Hyperparams.maxlen]) # make embedding matrix for input characters pnyn2idx, _, hanzi2idx, _ = load_vocab() self.emb_x = tf.sg_emb(name='emb_x', voca_size=len(pnyn2idx), dim=Hyperparams.embed_dim) self.enc = self.x.sg_lookup(emb=self.emb_x) with tf.sg_context(size=5, act='relu', bn=True): for _ in range(20): dim = self.enc.get_shape().as_list()[-1] self.enc += self.enc.sg_conv1d( dim=dim) # (64, 50, 300) float32 # final fully convolutional layer for softmax self.logits = self.enc.sg_conv1d(size=1, dim=len(hanzi2idx), act='linear', bn=False) # (64, 50, 5072) float32 if mode == "train": self.ce = self.logits.sg_ce(target=self.y, mask=True) # (64, 50) float32 self.istarget = tf.not_equal(self.y, tf.zeros_like( self.y)).sg_float() # (64, 50) float32 self.reduced_loss = self.ce.sg_sum() / self.istarget.sg_sum( ) # () float32 tf.sg_summary_loss(self.reduced_loss, "reduced_loss")
def __init__(self, is_train=True): ''' Args: is_train: Boolean. If True, backprop is executed. ''' if is_train: self.x, self.y = get_batch_data() # (16, 100), (16, 100) else: # self.x = tf.placeholder(tf.int32, [Hyperparams.batch_size, Hyperparams.maxlen]) self.x = tf.placeholder(tf.int32, [None, Hyperparams.maxlen]) # make embedding matrix for input characters hangul2idx, _, hanja2idx, _ = load_charmaps() self.emb_x = tf.sg_emb(name='emb_x', voca_size=len(hangul2idx), dim=Hyperparams.hidden_dim) # embed table lookup self.enc = self.x.sg_lookup(emb=self.emb_x).sg_float() # (16, 100, 200) # loop dilated conv block for i in range(2): self.enc = (self.enc .sg_res_block(size=5, rate=1) .sg_res_block(size=5, rate=2) .sg_res_block(size=5, rate=4) .sg_res_block(size=5, rate=8) .sg_res_block(size=5, rate=16)) # final fully convolutional layer for softmax self.logits = self.enc.sg_conv1d(size=1, dim=len(hanja2idx)) # (16, 100, 4543) if is_train: self.ce = self.logits.sg_ce(target=self.y, mask=True) # (16, 100) self.nonzeros = tf.not_equal(self.y, tf.zeros_like(self.y)).sg_float() # (16, 100) self.reduced_loss = self.ce.sg_sum() / self.nonzeros.sg_sum() # () tf.sg_summary_loss(self.reduced_loss, "reduced_loss")
def sg_reverse_seq(tensor, opt): # default sequence dimension opt += tf.sg_opt(dim=1) seq_len = tf.not_equal(tensor, tf.zeros_like(tensor)).sg_int().sg_sum(dims=opt.dim) return tf.reverse_sequence(tensor, seq_len, opt.dim, name=opt.name)
batch_size = 16 # # inputs # # corpus input tensor ( with QueueRunner ) data = SpeechCorpus(batch_size=batch_size, set_name=tf.sg_arg().set) # mfcc feature of audio x = data.mfcc # target sentence label y = data.label # sequence length except zero-padding seq_len = tf.not_equal(x.sg_sum(axis=2), 0.).sg_int().sg_sum(axis=1) # # Testing Graph # # encode audio feature logit = get_logit(x, voca_size=voca_size) # CTC loss loss = logit.sg_ctc(target=y, seq_len=seq_len) # # run network #
# inputs # # corpus input tensor data = SpeechCorpus(batch_size=batch_size * tf.sg_gpus()) # mfcc feature of audio inputs = tf.split(data.mfcc, tf.sg_gpus(), axis=0) # target sentence label labels = tf.split(data.label, tf.sg_gpus(), axis=0) # sequence length except zero-padding seq_len = [] for input_ in inputs: seq_len.append( tf.not_equal(input_.sg_sum(axis=2), 0.).sg_int().sg_sum(axis=1)) # parallel loss tower @tf.sg_parallel def get_loss(opt): # encode audio feature logit = get_logit(opt.input[opt.gpu_index], voca_size=voca_size) # CTC loss return logit.sg_ctc(target=opt.target[opt.gpu_index], seq_len=opt.seq_len[opt.gpu_index]) # # train #
import sugartensor as tf import numpy as np import librosa import tensorflow as tfw from tensorflow.python.framework import graph_util from model import * import data batch_size = 1 # batch size voca_size = data.voca_size x = tf.placeholder(dtype=tf.sg_floatx, shape=(batch_size, None, 20)) # sequence length except zero-padding seq_len = tf.not_equal(x.sg_sum(axis=2), 0.).sg_int().sg_sum(axis=1) # encode audio feature logit = get_logit(x, voca_size) # ctc decoding decoded, _ = tf.nn.ctc_beam_search_decoder(logit.sg_transpose(perm=[1, 0, 2]), seq_len, merge_repeated=False) # to dense tensor y = tf.add(tf.sparse_to_dense(decoded[0].indices, decoded[0].dense_shape, decoded[0].values), 1, name="output") with tf.Session() as sess: tf.sg_init(sess) saver = tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint('asset/train')) graph = tf.get_default_graph() input_graph_def = graph.as_graph_def()
# # inputs # # corpus input tensor data = SpeechCorpus(batch_size=batch_size * tf.sg_gpus()) # mfcc feature of audio inputs = tf.split(data.mfcc, tf.sg_gpus(), axis=0) # target sentence label labels = tf.split(data.label, tf.sg_gpus(), axis=0) # sequence length except zero-padding seq_len = [] for input_ in inputs: seq_len.append(tf.not_equal(input_.sg_sum(axis=2), 0.).sg_int().sg_sum(axis=1)) # parallel loss tower @tf.sg_parallel def get_loss(opt): # encode audio feature logit = get_logit(opt.input[opt.gpu_index], voca_size=voca_size) # CTC loss return logit.sg_ctc(target=opt.target[opt.gpu_index], seq_len=opt.seq_len[opt.gpu_index]) # # train # tf.sg_train(lr=0.0001, loss=get_loss(input=inputs, target=labels, seq_len=seq_len), ep_size=data.num_batch, max_ep=50)
def __init__(self, mode="train"): # Inputs and Labels if mode == "train": self.x, self.y, self.num_batch = get_batch_data( ) # (16, 150) int32, (16, 150) int32, int self.y_src = tf.concat( [tf.zeros((Hp.batch_size, 1), tf.int32), self.y[:, :-1]], 1) # (16, 150) int32 else: # inference self.x = tf.placeholder(tf.int32, shape=(Hp.batch_size, Hp.maxlen)) self.y_src = tf.placeholder(tf.int32, shape=(Hp.batch_size, Hp.maxlen)) # Load vocabulary char2idx, idx2char = load_vocab() # Embedding emb_x = tf.sg_emb(name='emb_x', voca_size=len(char2idx), dim=Hp.hidden_units) # (179, 320) emb_y = tf.sg_emb(name='emb_y', voca_size=len(char2idx), dim=Hp.hidden_units) # (179, 320) X = self.x.sg_lookup(emb=emb_x) # (16, 150, 320) Y = self.y_src.sg_lookup(emb=emb_y) # (16, 150, 320) # Encoding conv = X.sg_quasi_conv1d(is_enc=True, size=6) # (16*3, 150, 320) pool = conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) H_zfo1 = pool[Hp.batch_size:] # (16*3, 15, 320) for decoding conv = pool.sg_quasi_conv1d(is_enc=True, size=2) # (16*3, 150, 320) pool = conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) H_zfo2 = pool[Hp.batch_size:] # (16*3, 150, 320) for decoding conv = pool.sg_quasi_conv1d(is_enc=True, size=2) # (16*3, 150, 320) pool = conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) H_zfo3 = pool[Hp.batch_size:] # (16*3, 150, 320) for decoding conv = pool.sg_quasi_conv1d(is_enc=True, size=2) # (16*3, 150, 320) pool = conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) H4 = pool[:Hp.batch_size] # (16, 150, 320) for decoding H_zfo4 = pool[Hp.batch_size:] # (16*3, 150, 320) for decoding # Decoding d_conv = (Y.sg_concat(target=H_zfo1, axis=0).sg_quasi_conv1d(is_enc=False, size=2)) d_pool = d_conv.sg_quasi_rnn(is_enc=False, att=False) # (16*4, 150, 320) d_conv = (d_pool.sg_concat(target=H_zfo2, axis=0).sg_quasi_conv1d(is_enc=False, size=2)) d_pool = d_conv.sg_quasi_rnn(is_enc=False, att=False) # (16*4, 150, 320) d_conv = (d_pool.sg_concat(target=H_zfo3, axis=0).sg_quasi_conv1d(is_enc=False, size=2)) d_pool = d_conv.sg_quasi_rnn(is_enc=False, att=False) # (16*4, 150, 320) d_conv = (d_pool.sg_concat(target=H_zfo4, axis=0).sg_quasi_conv1d(is_enc=False, size=2)) concat = H4.sg_concat(target=d_conv, axis=0) d_pool = concat.sg_quasi_rnn(is_enc=False, att=True) # (16, 150, 320) logits = d_pool.sg_conv1d(size=1, dim=len(char2idx), act="linear") # (16, 150, 179) if mode == 'train': # cross entropy loss with logits ( for training set ) loss = logits.sg_ce(target=self.y, mask=True) istarget = tf.not_equal(self.y, 0).sg_float() self.reduced_loss = (loss.sg_sum()) / (istarget.sg_sum() + 0.00001) tf.sg_summary_loss(self.reduced_loss, "reduced_loss") else: # inference self.preds = logits.sg_argmax()
def __init__(self, x, y, num_batch, vocab_size, emb_dim, hidden_dim, max_ep=240, infer_shape=(1, 1), mode="train"): self.num_batch = num_batch self.emb_dim = emb_dim self.hidden_dim = hidden_dim self.vocab_size = vocab_size self.max_len_infer = 512 self.max_ep = max_ep # reuse = len([t for t in tf.global_variables() if t.name.startswith('gen')]) > 0 reuse = (mode == 'infer') if mode == "train": self.x = x self.y = y elif mode == "infer": self.x = tf.placeholder(tf.int32, shape=infer_shape) self.y = tf.placeholder(tf.int32, shape=infer_shape) with tf.variable_scope("gen_embs", reuse=reuse): self.emb_x = tf.get_variable("emb_x", [self.vocab_size, self.emb_dim]) self.emb_y = tf.get_variable("emb_y", [self.vocab_size, self.emb_dim]) self.X = tf.nn.embedding_lookup(self.emb_x, self.x) self.Y = tf.nn.embedding_lookup(self.emb_y, self.y) with tf.sg_context(name='gen', reuse=reuse): # self.emb_x = tf.Variable(tf.random_uniform([self.vocab_size, self.emb_dim], 0.0, 1.0), name="emb_x") # self.emb_y = tf.Variable(tf.random_uniform([self.vocab_size, self.emb_dim], 0.0, 1.0), name="emb_y") # self.emb_x = tf.sg_emb(name='emb_x', voca_size=self.vocab_size, dim=self.emb_dim) # (68,16) # self.emb_y = tf.sg_emb(name='emb_y', voca_size=self.vocab_size, dim=self.emb_dim) # (68,16) # self.X = self.x.sg_lookup(emb=self.emb_x) # (8,63,16) # self.Y = self.y.sg_lookup(emb=self.emb_y) # (8,63,16) if mode == "train": self.lstm_layer = self.X.sg_lstm(in_dim=self.emb_dim, dim=self.vocab_size, name="lstm") # (8, 63, 68) self.test = self.lstm_layer.sg_softmax(name="testtt") print "mazum??" print self.test elif mode == "infer": self.lstm_layer = self.X.sg_lstm(in_dim=self.emb_dim, dim=self.vocab_size, last_only=True, name="lstm") self.log_prob = tf.log(self.lstm_layer) # next_token: select by distribution probability, preds: select by argmax self.multinormed = tf.multinomial(self.log_prob, 1) self.next_token = tf.cast( tf.reshape(tf.multinomial(self.log_prob, 1), [1, infer_shape[0]]), tf.int32) self.preds = self.lstm_layer.sg_argmax() if mode == "train": self.loss = self.lstm_layer.sg_ce(target=self.y) self.istarget = tf.not_equal(self.y, 0).sg_float() self.reduced_loss = (self.loss.sg_sum()) / ( self.istarget.sg_sum() + 0.0000001) tf.sg_summary_loss(self.reduced_loss, "reduced_loss")
def sg_to_sparse(tensor, opt): indices = tf.where(tf.not_equal(tensor.sg_float(), 0.)) return tf.SparseTensor( indices=indices, values=tf.gather_nd(tensor, indices) - 1, # for zero-based index shape=tf.shape(tensor).sg_cast(dtype=tf.int64))
def __init__(self, mode="train"): # Inputs and Labels if mode == "train": self.x, self.y, self.num_batch = get_batch_data() # (16, 150) int32, (16, 150) int32, int self.y_src = tf.concat([tf.zeros((Hp.batch_size, 1), tf.int32), self.y[:, :-1]], 1) # (16, 150) int32 else: # inference self.x = tf.placeholder(tf.int32, shape=(Hp.batch_size, Hp.maxlen)) self.y_src = tf.placeholder(tf.int32, shape=(Hp.batch_size, Hp.maxlen)) # Load vocabulary char2idx, idx2char = load_vocab() # Embedding def embed(inputs, vocab_size, embed_size, variable_scope): ''' inputs = tf.expand_dims(tf.range(5), 0) => (1, 5) _embed(inputs, 5, 10) => (1, 5, 10) ''' with tf.variable_scope(variable_scope): lookup_table = tf.get_variable('lookup_table', dtype=tf.float32, shape=[vocab_size, embed_size], initializer=tf.truncated_normal_initializer()) return tf.nn.embedding_lookup(lookup_table, inputs) X = embed(self.x, vocab_size=len(char2idx), embed_size=Hp.hidden_units, variable_scope='X') # (179, 320) Y = embed(self.y_src, vocab_size=len(char2idx), embed_size=Hp.hidden_units, variable_scope='Y') # (179, 320) # Y = tf.concat((tf.zeros_like(Y[:, :1, :]), Y[:, :-1, :]), 1) # Encoding conv = X.sg_quasi_conv1d(is_enc=True, size=6) # (16*3, 150, 320) pool = conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) H_zfo1 = pool[Hp.batch_size:] # (16*3, 15, 320) for decoding conv = pool.sg_quasi_conv1d(is_enc=True, size=2) # (16*3, 150, 320) pool = conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) H_zfo2 = pool[Hp.batch_size:] # (16*3, 150, 320) for decoding conv = pool.sg_quasi_conv1d(is_enc=True, size=2) # (16*3, 150, 320) pool = conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) H_zfo3 = pool[Hp.batch_size:] # (16*3, 150, 320) for decoding conv = pool.sg_quasi_conv1d(is_enc=True, size=2) # (16*3, 150, 320) pool = conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) H4 = pool[:Hp.batch_size] # (16, 150, 320) for decoding H_zfo4 = pool[Hp.batch_size:] # (16*3, 150, 320) for decoding # Decoding d_conv = (Y.sg_concat(target=H_zfo1, axis=0) .sg_quasi_conv1d(is_enc=False, size=2)) d_pool = d_conv.sg_quasi_rnn(is_enc=False, att=False) # (16*4, 150, 320) d_conv = (d_pool.sg_concat(target=H_zfo2, axis=0) .sg_quasi_conv1d(is_enc=False, size=2)) d_pool = d_conv.sg_quasi_rnn(is_enc=False, att=False) # (16*4, 150, 320) d_conv = (d_pool.sg_concat(target=H_zfo3, axis=0) .sg_quasi_conv1d(is_enc=False, size=2)) d_pool = d_conv.sg_quasi_rnn(is_enc=False, att=False) # (16*4, 150, 320) d_conv = (d_pool.sg_concat(target=H_zfo4, axis=0) .sg_quasi_conv1d(is_enc=False, size=2)) concat = H4.sg_concat(target=d_conv, axis=0) d_pool = concat.sg_quasi_rnn(is_enc=False, att=True) # (16, 150, 320) logits = d_pool.sg_conv1d(size=1, dim=len(char2idx), act="linear") # (16, 150, 179) if mode=='train': # cross entropy loss with logits ( for training set ) self.loss = logits.sg_ce(target=self.y, mask=True) istarget = tf.not_equal(self.y, 0).sg_float() self.reduced_loss = (self.loss.sg_sum()) / (istarget.sg_sum() + 1e-8) tf.sg_summary_loss(self.reduced_loss, "reduced_loss") else: # inference self.preds = logits.sg_argmax()
def __init__(self, mode="train"): # Inputs and Labels if mode == "train": self.x, self.y, self.num_batch = get_batch_data( ) # (16, 150) int32, (16, 150) int32, int self.y_src = tf.concat( axis=1, values=[tf.zeros((Hp.bs, 1), tf.int32), self.y[:, :-1]]) # (16, 150) int32 else: # inference self.x = tf.placeholder(tf.int32, shape=(Hp.bs, Hp.maxlen)) self.y_src = tf.placeholder(tf.int32, shape=(Hp.bs, Hp.maxlen)) # Load vocabulary self.char2idx, self.idx2char = load_vocab() # Embedding self.emb_x = tf.sg_emb(name='emb_x', voca_size=len(self.char2idx), dim=Hp.hd) # (179, 320) self.emb_y = tf.sg_emb(name='emb_y', voca_size=len(self.char2idx), dim=Hp.hd) # (179, 320) self.X = self.x.sg_lookup(emb=self.emb_x) # (16, 150, 320) self.Y = self.y_src.sg_lookup(emb=self.emb_y) # (16, 150, 320) # Encoding self.conv = self.X.sg_quasi_conv1d(is_enc=True, size=6) # (16*4, 150, 320) self.pool = self.conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) self.H_zfo1 = self.pool[Hp.bs:] # (16*3, 15, 320) for decoding self.conv = self.pool.sg_quasi_conv1d(is_enc=True, size=2) # (16*4, 150, 320) self.pool = self.conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) self.H_zfo2 = self.pool[Hp.bs:] # (16*3, 150, 320) for decoding self.conv = self.pool.sg_quasi_conv1d(is_enc=True, size=2) # (16*4, 150, 320) self.pool = self.conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) self.H_zfo3 = self.pool[Hp.bs:] # (16*3, 150, 320) for decoding self.conv = self.pool.sg_quasi_conv1d(is_enc=True, size=2) # (16*4, 150, 320) self.pool = self.conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) self.H4 = self.pool[:Hp.bs] self.H_zfo4 = self.pool[Hp.bs:] # (16*3, 150, 320) for decoding # Decoding self.dec = self.Y.sg_concat(target=self.H_zfo1, dim=0) self.d_conv = self.dec.sg_quasi_conv1d(is_enc=False, size=2) self.d_pool = self.d_conv.sg_quasi_rnn(is_enc=False, att=False) # (16*4, 150, 320) self.d_conv = (self.d_pool.sg_concat( target=self.H_zfo2, dim=0).sg_quasi_conv1d(is_enc=False, size=2)) self.d_pool = self.d_conv.sg_quasi_rnn(is_enc=False, att=False) # (16*4, 150, 320) self.d_conv = (self.d_pool.sg_concat( target=self.H_zfo3, dim=0).sg_quasi_conv1d(is_enc=False, size=2)) self.d_pool = self.d_conv.sg_quasi_rnn(is_enc=False, att=False) # (16*4, 150, 320) self.d_conv = (self.d_pool.sg_concat( target=self.H_zfo4, dim=0).sg_quasi_conv1d(is_enc=False, size=2)) self.concat = self.H4.sg_concat(target=self.d_conv, dim=0) self.d_pool = self.concat.sg_quasi_rnn(is_enc=False, att=True) # (16*4, 150, 320) self.logits = self.d_pool.sg_conv1d(size=1, dim=len(self.char2idx), act="linear") # (16, 150, 179) self.preds = self.logits.sg_argmax() if mode == 'train': # cross entropy loss with logits ( for training set ) self.loss = self.logits.sg_ce(target=self.y, mask=True) self.istarget = tf.not_equal(self.y, 0).sg_float() self.reduced_loss = (self.loss.sg_sum()) / ( self.istarget.sg_sum() + 0.00001) tf.sg_summary_loss(self.reduced_loss, "reduced_loss")