def sg_ctc(tensor, opt): r"""Computes the CTC (Connectionist Temporal Classification) Loss between `tensor` and `target`. Args: tensor: A 3-D `float Tensor`. opt: target: A `Tensor` with the same length in the first dimension as the `tensor`. Labels. ( Dense tensor ) name: A `string`. A name to display in the tensor board web UI. Returns: A 1-D `Tensor` with the same length in the first dimension of the `tensor`. For example, ``` tensor = [[[2., -1., 3.], [3., 1., -2.]], [[1., -1., 2.], [3., 1., -2.]]] target = [[2., 1.], [2., 3.]] tensor.sg_ctc(target=target) => [ 4.45940781 2.43091154] ``` """ assert opt.target is not None, 'target is mandatory.' # default sequence length shape = tf.shape(tensor) opt += tf.sg_opt(seq_len=tf.ones((shape[0],), dtype=tf.sg_intx) * shape[1], merge=True) # ctc loss out = tf.nn.ctc_loss(opt.target.sg_to_sparse(), tensor, opt.seq_len, ctc_merge_repeated=opt.merge, time_major=False) out = tf.identity(out, 'ctc') # add summary tf.sg_summary_loss(out, name=opt.name) return out
def sg_mae(tensor, opt): r"""Returns absolute error between `tensor` and `target`. Args: tensor: A `Tensor`. opt: target: A `Tensor` with the same shape and dtype as `tensor`. name: A `string`. A name to display in the tensor board web UI. Returns: A `Tensor` of the same shape and dtype as `tensor` For example, ``` tensor = [[34, 11, 40], [13, 30, 42]] target = [[34, 10, 41], [14, 31, 40]] tensor.sg_mse(target=target) => [[ 0. 1. 1.] [ 1. 1. 2.]] ``` """ assert opt.target is not None, 'target is mandatory.' # absolute error out = tf.identity(tf.abs(tensor - opt.target), 'mae') # add summary tf.sg_summary_loss(out, name=opt.name) return out
def sg_bce(tensor, opt): r"""Returns sigmoid cross entropy loss between `tensor` and `target`. Args: tensor: A `Tensor`. Logits. Unscaled log probabilities. target: A `Tensor` with the same shape and dtype as `tensor`. Labels. Returns: A `Tensor` of the same shape as `tensor` For example, ``` tensor = [[2, -1, 3], [3, 1, -2]] target = [[0, 1, 1], [1, 1, 0]] tensor.sg_bce(target=target) => [[ 2.12692809 1.31326163 0.04858733] [ 0.04858733 0.31326166 0.12692805]] ``` """ assert opt.target is not None, 'target is mandatory.' out = tf.identity( tf.nn.sigmoid_cross_entropy_with_logits(tensor, opt.target), 'bce') # add summary tf.sg_summary_loss(out) return out
def sg_mse(tensor, opt): r"""Returns squared error between `tensor` and `target`. Args: tensor: A `Tensor`. target: A `Tensor` with the same shape and dtype as `tensor`. Returns: A `Tensor` of the same shape and dtype as `tensor` For example, ``` tensor = [[34, 11, 40], [13, 30, 42]] target = [[34, 10, 41], [14, 31, 40]] tensor.sg_mse(target=target) => [[ 0. 1. 1.] [ 1. 1. 4.]] ``` """ assert opt.target is not None, 'target is mandatory.' # squared error out = tf.identity(tf.square(tensor - opt.target), 'mse') # add summary tf.sg_summary_loss(out) return out
def sg_bce(tensor, opt): assert opt.target is not None, 'target is mandatory.' out = tf.identity(tf.nn.sigmoid_cross_entropy_with_logits(tensor, opt.target), 'bce') # add summary tf.sg_summary_loss(out) return out
def sg_mse(tensor, opt): assert opt.target is not None, 'target is mandatory.' # squared error out = tf.identity(tf.square(tensor - opt.target), 'mse') # add summary tf.sg_summary_loss(out) return out
def sg_mae(tensor, opt): assert opt.target is not None, 'target is mandatory.' # absolute error out = tf.identity(tf.abs(tensor - opt.target), 'mae') # add summary tf.sg_summary_loss(out) return out
def __init__(self, mode="train"): ''' Args: mode: A string. Either "train" or "test" ''' self.char2idx, self.idx2char = load_char_vocab() self.word2idx, self.idx2word = load_word_vocab() if mode == "train": self.x, self.y, self.num_batch = get_batch_data() else: self.x = tf.placeholder(tf.int32, [None, Hyperparams.seqlen]) self.emb_x = tf.sg_emb(name='emb_x', voca_size=len(self.char2idx), dim=Hyperparams.embed_dim) self.enc = self.x.sg_lookup(emb=self.emb_x) with tf.sg_context(size=5, act='relu', bn=True): for _ in range(20): dim = self.enc.get_shape().as_list()[-1] self.enc += self.enc.sg_conv1d( dim=dim) # (64, 50, 300) float32 self.enc = self.enc.sg_conv1d(size=1, dim=len(self.word2idx), act='linear', bn=False) # (64, 50, 21293) float32 # self.logits = self.enc.sg_mean(dims=[1], keep_dims=False) # (64, 21293) float32 # Weighted Sum. Updated on Feb. 15, 2017. def make_weights(size): weights = tf.range(1, size + 1, dtype=tf.float32) weights *= 1. / ((1 + size) * size // 2) weights = tf.expand_dims(weights, 0) weights = tf.expand_dims(weights, -1) return weights self.weights = make_weights(Hyperparams.seqlen) # (1, 50, 1) self.enc *= self.weights # Broadcasting self.logits = self.enc.sg_sum(axis=[1], keep_dims=False) # (64, 21293) if mode == "train": self.ce = self.logits.sg_ce(target=self.y, mask=False, one_hot=False) self.istarget = tf.not_equal(self.y, tf.ones_like( self.y)).sg_float() # 1: Unkown self.reduced_loss = ((self.ce * self.istarget).sg_sum()) / ( self.istarget.sg_sum() + 1e-5) tf.sg_summary_loss(self.reduced_loss, "reduced_loss")
def sg_ce(tensor, opt): r"""Returns softmax cross entropy loss between `tensor` and `target`. Args: tensor: A `Tensor`. Logits. Unscaled log probabilities. opt: target: A `Tensor` with the same length in the first dimension as the `tensor`. Labels. one_hot: Boolean. Whether to treat the labels as one-hot encoding. Default is False. mask: Boolean. If True, zeros in the target will be excluded from the calculation. name: A `string`. A name to display in the tensor board web UI. Returns: A 1-D `Tensor` with the same shape as `tensor`. For example, ``` tensor = [[[2, -1, 3], [3, 1, -2]]] target = [[2, 1]] tensor.sg_ce(target=target) => [[ 0.32656264 2.13284516]] ``` For example, ``` tensor = [[2, -1, 3], [3, 1, -2]] target = [[0, 0, 1], [1, 0, 0]] tensor.sg_ce(target=target, one_hot=True) => [ 0.32656264 0.13284527] ``` """ opt += tf.sg_opt(one_hot=False) assert opt.target is not None, 'target is mandatory.' if opt.one_hot: out = tf.identity( tf.nn.softmax_cross_entropy_with_logits(labels=opt.target, logits=tensor), 'ce') else: out = tf.identity( tf.nn.sparse_softmax_cross_entropy_with_logits(labels=opt.target, logits=tensor), 'ce') # masking loss if opt.mask: out *= tf.not_equal(opt.target, tf.zeros_like(opt.target)).sg_float() # add summary tf.sg_summary_loss(out, name=opt.name) return out
def sg_ctc(tensor, opt): assert opt.target is not None, 'target is mandatory.' # default sequence length shape = tf.shape(tensor) opt += tf.sg_opt(seq_len=tf.ones((shape[0],), dtype=tf.sg_intx) * shape[1]) # ctc loss out = tf.nn.ctc_loss(tensor, opt.target.sg_to_sparse(), opt.seq_len, time_major=False) out = tf.identity(out, 'ctc') # add summary tf.sg_summary_loss(out) return out
def sg_ce(tensor, opt): opt += tf.sg_opt(one_hot=False) assert opt.target is not None, 'target is mandatory.' if opt.one_hot: out = tf.identity( tf.nn.softmax_cross_entropy_with_logits(tensor, opt.target), 'ce') else: out = tf.identity( tf.nn.sparse_softmax_cross_entropy_with_logits(tensor, opt.target), 'ce') # add summary tf.sg_summary_loss(out) return out
def sg_ce(tensor, opt): opt += tf.sg_opt(one_hot=False) assert opt.target is not None, 'target is mandatory.' if opt.one_hot: out = tf.identity(tf.nn.softmax_cross_entropy_with_logits(tensor, opt.target), 'ce') else: out = tf.identity(tf.nn.sparse_softmax_cross_entropy_with_logits(tensor, opt.target), 'ce') # masking loss if opt.mask: out *= tf.not_equal(opt.target, tf.zeros_like(opt.target)).sg_float() # add summary tf.sg_summary_loss(out) return out
def sg_hinge(tensor, opt): assert opt.target is not None, 'target is mandatory.' # default margin opt += tf.sg_opt(margin=1) # reshape target shape = tensor.get_shape().as_list() broadcast_shape = [-1] + [1] * (len(shape) - 2) + [shape[-1]] target = tf.cast(tf.reshape(opt.target, broadcast_shape), tf.sg_floatx) # hinge loss out = tf.identity(tf.maximum(opt.margin - target * tensor, 0), 'hinge') # add summary tf.sg_summary_loss(out) return out
def ner_cost(tensor, opt): one_hot_labels = tf.one_hot(opt.target - 1, opt.num_classes, dtype=tf.float32) cross_entropy = one_hot_labels * tf.log(tensor) cross_entropy = -tf.reduce_sum(cross_entropy, reduction_indices=2) mask = tf.sign(tf.reduce_max(tf.abs(one_hot_labels), reduction_indices=2)) cross_entropy *= tf.cast(mask, tf.float32) cross_entropy = tf.reduce_sum(cross_entropy, reduction_indices=1) length = tf.cast(tf.reduce_sum(tf.sign(opt.target), reduction_indices=1), tf.int32) cross_entropy /= tf.cast(length, tf.float32) out = tf.reduce_mean(cross_entropy, name='ner_cost') # add summary tf.sg_summary_loss(out, name=opt.name) return out
def ner_cost(tensor, opt): one_hot_labels = tf.one_hot(opt.target - 1, opt.num_classes, dtype=tf.float32) cross_entropy = one_hot_labels * tf.log(tensor) cross_entropy = -tf.reduce_sum(cross_entropy, reduction_indices=2) mask = tf.sign(tf.abs(opt.target)) cross_entropy *= tf.cast(mask, tf.float32) cross_entropy = tf.reduce_sum(cross_entropy, reduction_indices=1) length = tf.cast(tf.reduce_sum(tf.sign(opt.target), reduction_indices=1), tf.int32) cross_entropy /= tf.cast(length, tf.float32) out = tf.reduce_mean(cross_entropy, name='ner_cost') # add summary tf.sg_summary_loss(out, name=opt.name) return out
def __init__(self, is_train=True): # inputs if is_train: self.X, self.Y, self.num_batch = get_batch_data( ) # (16, 9, 9, 1), (16, 9, 9) self.X_val, self.Y_val, _ = get_batch_data(is_train=False) else: self.X = tf.placeholder(tf.float32, [None, 9, 9, 1]) with tf.sg_context(size=3, act='relu', bn=True): self.logits = self.X.sg_identity() for _ in range(5): self.logits = (self.logits.sg_conv(dim=512)) self.logits = self.logits.sg_conv( dim=10, size=1, act='linear', bn=False) # (16, 9, 9, 10) float32 if is_train: self.ce = self.logits.sg_ce(target=self.Y, mask=False) # (16, 9, 9) dtype=float32 self.istarget = tf.equal( self.X.sg_squeeze(), tf.zeros_like(self.X.sg_squeeze()) ).sg_float() # zeros: 1, non-zeros: 0 (16, 9, 9) dtype=float32 self.loss = self.ce * self.istarget # (16, 9, 9) dtype=float32 self.reduced_loss = self.loss.sg_sum() / self.istarget.sg_sum() tf.sg_summary_loss(self.reduced_loss, "reduced_loss") # accuracy evaluation ( for train set ) self.preds = (self.logits.sg_argmax()).sg_int() self.hits = tf.equal(self.preds, self.Y).sg_float() self.acc_train = (self.hits * self.istarget).sg_sum() / self.istarget.sg_sum() # accuracy evaluation ( for validation set ) self.preds_ = (self.logits.sg_reuse( input=self.X_val).sg_argmax()).sg_int() self.hits_ = tf.equal(self.preds_, self.Y_val).sg_float() self.istarget_ = tf.equal(self.X_val.sg_squeeze(), tf.zeros_like( self.X_val.sg_squeeze())).sg_float() self.acc_val = (self.hits_ * self.istarget_).sg_sum() / self.istarget_.sg_sum()
def sg_hinge(tensor, opt): r"""Returns hinge loss between `tensor` and `target`. Args: tensor: A `Tensor`. opt: target: A `Tensor`. Labels. margin: An int. Maximum margin. Default is 1. name: A `string`. A name to display in the tensor board web UI. Returns: A `Tensor`. For example, ``` tensor = [[30, 10, 40], [13, 30, 42]] target = [[0, 0, 1], [0, 1, 0]] tensor.sg_hinge(target=target, one_hot=True) => [[ 1. 1. 0.] [ 1. 0. 1.]] ``` """ assert opt.target is not None, 'target is mandatory.' # default margin opt += tf.sg_opt(margin=1) # reshape target shape = tensor.get_shape().as_list() broadcast_shape = [-1] + [1] * (len(shape) - 2) + [shape[-1]] target = tf.cast(tf.reshape(opt.target, broadcast_shape), tf.sg_floatx) # hinge loss out = tf.identity(tf.maximum(opt.margin - target * tensor, 0), 'hinge') # add summary tf.sg_summary_loss(out, name=opt.name) return out
def __init__(self, mode="train"): ''' Args: is_train: Boolean. If True, backprop is executed. ''' if mode == "train": self.x, self.y, self.num_batch = get_batch_data( ) # (64, 50) int64, (64, 50) int64, 1636 else: # test self.x = tf.placeholder(tf.int64, [None, Hyperparams.maxlen]) # make embedding matrix for input characters pnyn2idx, _, hanzi2idx, _ = load_vocab() self.emb_x = tf.sg_emb(name='emb_x', voca_size=len(pnyn2idx), dim=Hyperparams.embed_dim) self.enc = self.x.sg_lookup(emb=self.emb_x) with tf.sg_context(size=5, act='relu', bn=True): for _ in range(20): dim = self.enc.get_shape().as_list()[-1] self.enc += self.enc.sg_conv1d( dim=dim) # (64, 50, 300) float32 # final fully convolutional layer for softmax self.logits = self.enc.sg_conv1d(size=1, dim=len(hanzi2idx), act='linear', bn=False) # (64, 50, 5072) float32 if mode == "train": self.ce = self.logits.sg_ce(target=self.y, mask=True) # (64, 50) float32 self.istarget = tf.not_equal(self.y, tf.zeros_like( self.y)).sg_float() # (64, 50) float32 self.reduced_loss = self.ce.sg_sum() / self.istarget.sg_sum( ) # () float32 tf.sg_summary_loss(self.reduced_loss, "reduced_loss")
def __init__(self, is_train=True): ''' Args: is_train: Boolean. If True, backprop is executed. ''' if is_train: self.x, self.y = get_batch_data() # (16, 100), (16, 100) else: # self.x = tf.placeholder(tf.int32, [Hyperparams.batch_size, Hyperparams.maxlen]) self.x = tf.placeholder(tf.int32, [None, Hyperparams.maxlen]) # make embedding matrix for input characters hangul2idx, _, hanja2idx, _ = load_charmaps() self.emb_x = tf.sg_emb(name='emb_x', voca_size=len(hangul2idx), dim=Hyperparams.hidden_dim) # embed table lookup self.enc = self.x.sg_lookup(emb=self.emb_x).sg_float() # (16, 100, 200) # loop dilated conv block for i in range(2): self.enc = (self.enc .sg_res_block(size=5, rate=1) .sg_res_block(size=5, rate=2) .sg_res_block(size=5, rate=4) .sg_res_block(size=5, rate=8) .sg_res_block(size=5, rate=16)) # final fully convolutional layer for softmax self.logits = self.enc.sg_conv1d(size=1, dim=len(hanja2idx)) # (16, 100, 4543) if is_train: self.ce = self.logits.sg_ce(target=self.y, mask=True) # (16, 100) self.nonzeros = tf.not_equal(self.y, tf.zeros_like(self.y)).sg_float() # (16, 100) self.reduced_loss = self.ce.sg_sum() / self.nonzeros.sg_sum() # () tf.sg_summary_loss(self.reduced_loss, "reduced_loss")
def __init__(self, is_train=True): # inputs if is_train: self.x, self.y, self.num_batch = get_batch_data() self.x_val, self.y_val, _ = get_batch_data(is_train=False) else: self.x = tf.placeholder(tf.float32, [None, 9, 9, 1]) with tf.sg_context(size=3, act='relu', bn=True): self.logits = self.x.sg_identity() for _ in range(10): self.logits = (self.logits.sg_conv(dim=512)) self.logits = self.logits.sg_conv(dim=10, size=1, act='linear', bn=False) if is_train: self.ce = self.logits.sg_ce(target=self.y, mask=False) self.istarget = tf.equal(self.x.sg_squeeze(), tf.zeros_like( self.x.sg_squeeze())).sg_float() self.loss = self.ce * self.istarget self.reduced_loss = self.loss.sg_sum() / self.istarget.sg_sum() tf.sg_summary_loss(self.reduced_loss, "reduced_loss") # accuracy evaluation ( for validation set ) self.preds_ = (self.logits.sg_reuse( input=self.x_val).sg_argmax()).sg_int() self.hits_ = tf.equal(self.preds_, self.y_val).sg_float() self.istarget_ = tf.equal(self.x_val.sg_squeeze(), tf.zeros_like( self.x_val.sg_squeeze())).sg_float() self.acc = (self.hits_ * self.istarget_).sg_sum() / self.istarget_.sg_sum()
def sg_ctc(tensor, opt): r"""Returns softmax cross entropy loss between `tensor` and `target`. Args: tensor: A `Tensor`. Logits. Unscaled log probabilities. target: A `Tensor` with the same length in the first dimension as the `tensor`. Labels. ( Dense tensor ) Returns: A 1-D `Tensor` with the same shape as `tensor`. For example, ``` tensor = [[[2, -1, 3], [3, 1, -2]]] target = [[2, 1]] tensor.sg_ce(target=target, one_hot=True) => [ 31.32656264 64.13284527] ``` """ assert opt.target is not None, 'target is mandatory.' # default sequence length shape = tf.shape(tensor) opt += tf.sg_opt(seq_len=tf.ones((shape[0], ), dtype=tf.sg_intx) * shape[1]) # ctc loss out = tf.nn.ctc_loss(tensor, opt.target.sg_to_sparse(), opt.seq_len, time_major=False) out = tf.identity(out, 'ctc') # add summary tf.sg_summary_loss(out) return out
def __init__(self, mode="train"): # Inputs and Labels if mode == "train": self.x, self.y, self.num_batch = get_batch_data( ) # (16, 150) int32, (16, 150) int32, int self.y_src = tf.concat( [tf.zeros((Hp.batch_size, 1), tf.int32), self.y[:, :-1]], 1) # (16, 150) int32 else: # inference self.x = tf.placeholder(tf.int32, shape=(Hp.batch_size, Hp.maxlen)) self.y_src = tf.placeholder(tf.int32, shape=(Hp.batch_size, Hp.maxlen)) # Load vocabulary char2idx, idx2char = load_vocab() # Embedding emb_x = tf.sg_emb(name='emb_x', voca_size=len(char2idx), dim=Hp.hidden_units) # (179, 320) emb_y = tf.sg_emb(name='emb_y', voca_size=len(char2idx), dim=Hp.hidden_units) # (179, 320) X = self.x.sg_lookup(emb=emb_x) # (16, 150, 320) Y = self.y_src.sg_lookup(emb=emb_y) # (16, 150, 320) # Encoding conv = X.sg_quasi_conv1d(is_enc=True, size=6) # (16*3, 150, 320) pool = conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) H_zfo1 = pool[Hp.batch_size:] # (16*3, 15, 320) for decoding conv = pool.sg_quasi_conv1d(is_enc=True, size=2) # (16*3, 150, 320) pool = conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) H_zfo2 = pool[Hp.batch_size:] # (16*3, 150, 320) for decoding conv = pool.sg_quasi_conv1d(is_enc=True, size=2) # (16*3, 150, 320) pool = conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) H_zfo3 = pool[Hp.batch_size:] # (16*3, 150, 320) for decoding conv = pool.sg_quasi_conv1d(is_enc=True, size=2) # (16*3, 150, 320) pool = conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) H4 = pool[:Hp.batch_size] # (16, 150, 320) for decoding H_zfo4 = pool[Hp.batch_size:] # (16*3, 150, 320) for decoding # Decoding d_conv = (Y.sg_concat(target=H_zfo1, axis=0).sg_quasi_conv1d(is_enc=False, size=2)) d_pool = d_conv.sg_quasi_rnn(is_enc=False, att=False) # (16*4, 150, 320) d_conv = (d_pool.sg_concat(target=H_zfo2, axis=0).sg_quasi_conv1d(is_enc=False, size=2)) d_pool = d_conv.sg_quasi_rnn(is_enc=False, att=False) # (16*4, 150, 320) d_conv = (d_pool.sg_concat(target=H_zfo3, axis=0).sg_quasi_conv1d(is_enc=False, size=2)) d_pool = d_conv.sg_quasi_rnn(is_enc=False, att=False) # (16*4, 150, 320) d_conv = (d_pool.sg_concat(target=H_zfo4, axis=0).sg_quasi_conv1d(is_enc=False, size=2)) concat = H4.sg_concat(target=d_conv, axis=0) d_pool = concat.sg_quasi_rnn(is_enc=False, att=True) # (16, 150, 320) logits = d_pool.sg_conv1d(size=1, dim=len(char2idx), act="linear") # (16, 150, 179) if mode == 'train': # cross entropy loss with logits ( for training set ) loss = logits.sg_ce(target=self.y, mask=True) istarget = tf.not_equal(self.y, 0).sg_float() self.reduced_loss = (loss.sg_sum()) / (istarget.sg_sum() + 0.00001) tf.sg_summary_loss(self.reduced_loss, "reduced_loss") else: # inference self.preds = logits.sg_argmax()
def __init__(self, x, y, num_batch, vocab_size, emb_dim, hidden_dim, max_ep=240, infer_shape=(1, 1), mode="train"): self.num_batch = num_batch self.emb_dim = emb_dim self.hidden_dim = hidden_dim self.vocab_size = vocab_size self.max_len_infer = 512 self.max_ep = max_ep # reuse = len([t for t in tf.global_variables() if t.name.startswith('gen')]) > 0 reuse = (mode == 'infer') if mode == "train": self.x = x self.y = y elif mode == "infer": self.x = tf.placeholder(tf.int32, shape=infer_shape) self.y = tf.placeholder(tf.int32, shape=infer_shape) with tf.variable_scope("gen_embs", reuse=reuse): self.emb_x = tf.get_variable("emb_x", [self.vocab_size, self.emb_dim]) self.emb_y = tf.get_variable("emb_y", [self.vocab_size, self.emb_dim]) self.X = tf.nn.embedding_lookup(self.emb_x, self.x) self.Y = tf.nn.embedding_lookup(self.emb_y, self.y) with tf.sg_context(name='gen', reuse=reuse): # self.emb_x = tf.Variable(tf.random_uniform([self.vocab_size, self.emb_dim], 0.0, 1.0), name="emb_x") # self.emb_y = tf.Variable(tf.random_uniform([self.vocab_size, self.emb_dim], 0.0, 1.0), name="emb_y") # self.emb_x = tf.sg_emb(name='emb_x', voca_size=self.vocab_size, dim=self.emb_dim) # (68,16) # self.emb_y = tf.sg_emb(name='emb_y', voca_size=self.vocab_size, dim=self.emb_dim) # (68,16) # self.X = self.x.sg_lookup(emb=self.emb_x) # (8,63,16) # self.Y = self.y.sg_lookup(emb=self.emb_y) # (8,63,16) if mode == "train": self.lstm_layer = self.X.sg_lstm(in_dim=self.emb_dim, dim=self.vocab_size, name="lstm") # (8, 63, 68) self.test = self.lstm_layer.sg_softmax(name="testtt") print "mazum??" print self.test elif mode == "infer": self.lstm_layer = self.X.sg_lstm(in_dim=self.emb_dim, dim=self.vocab_size, last_only=True, name="lstm") self.log_prob = tf.log(self.lstm_layer) # next_token: select by distribution probability, preds: select by argmax self.multinormed = tf.multinomial(self.log_prob, 1) self.next_token = tf.cast( tf.reshape(tf.multinomial(self.log_prob, 1), [1, infer_shape[0]]), tf.int32) self.preds = self.lstm_layer.sg_argmax() if mode == "train": self.loss = self.lstm_layer.sg_ce(target=self.y) self.istarget = tf.not_equal(self.y, 0).sg_float() self.reduced_loss = (self.loss.sg_sum()) / ( self.istarget.sg_sum() + 0.0000001) tf.sg_summary_loss(self.reduced_loss, "reduced_loss")
with tf.sg_context(name='encoder', size=4, stride=2, act='relu'): mu = (x.sg_conv(dim=64).sg_conv(dim=128).sg_flatten().sg_dense( dim=1024).sg_dense(dim=num_dim, act='linear')) # re-parameterization trick with random gaussian z = mu + tf.random_normal(mu.get_shape()) # decoder network with tf.sg_context(name='decoder', size=4, stride=2, act='relu'): xx = (z.sg_dense(dim=1024).sg_dense(dim=7 * 7 * 128).sg_reshape( shape=(-1, 7, 7, 128)).sg_upconv(dim=64).sg_upconv(dim=1, act='sigmoid')) # add image summary tf.sg_summary_image(x, name='origin') tf.sg_summary_image(xx, name='recon') # loss loss_recon = xx.sg_mse(target=x, name='recon').sg_mean(axis=[1, 2, 3]) loss_kld = tf.square(mu).sg_sum(axis=1) / (28 * 28) tf.sg_summary_loss(loss_kld, name='kld') loss = loss_recon + loss_kld * 0.5 # do training tf.sg_train(loss=loss, log_interval=10, ep_size=data.train.num_batch, max_ep=30, early_stop=False, save_dir='asset/train/vae')
def __init__(self, mode="train"): # Inputs and Labels if mode == "train": self.x, self.y, self.num_batch = get_batch_data() # (16, 150) int32, (16, 150) int32, int self.y_src = tf.concat([tf.zeros((Hp.batch_size, 1), tf.int32), self.y[:, :-1]], 1) # (16, 150) int32 else: # inference self.x = tf.placeholder(tf.int32, shape=(Hp.batch_size, Hp.maxlen)) self.y_src = tf.placeholder(tf.int32, shape=(Hp.batch_size, Hp.maxlen)) # Load vocabulary char2idx, idx2char = load_vocab() # Embedding def embed(inputs, vocab_size, embed_size, variable_scope): ''' inputs = tf.expand_dims(tf.range(5), 0) => (1, 5) _embed(inputs, 5, 10) => (1, 5, 10) ''' with tf.variable_scope(variable_scope): lookup_table = tf.get_variable('lookup_table', dtype=tf.float32, shape=[vocab_size, embed_size], initializer=tf.truncated_normal_initializer()) return tf.nn.embedding_lookup(lookup_table, inputs) X = embed(self.x, vocab_size=len(char2idx), embed_size=Hp.hidden_units, variable_scope='X') # (179, 320) Y = embed(self.y_src, vocab_size=len(char2idx), embed_size=Hp.hidden_units, variable_scope='Y') # (179, 320) # Y = tf.concat((tf.zeros_like(Y[:, :1, :]), Y[:, :-1, :]), 1) # Encoding conv = X.sg_quasi_conv1d(is_enc=True, size=6) # (16*3, 150, 320) pool = conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) H_zfo1 = pool[Hp.batch_size:] # (16*3, 15, 320) for decoding conv = pool.sg_quasi_conv1d(is_enc=True, size=2) # (16*3, 150, 320) pool = conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) H_zfo2 = pool[Hp.batch_size:] # (16*3, 150, 320) for decoding conv = pool.sg_quasi_conv1d(is_enc=True, size=2) # (16*3, 150, 320) pool = conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) H_zfo3 = pool[Hp.batch_size:] # (16*3, 150, 320) for decoding conv = pool.sg_quasi_conv1d(is_enc=True, size=2) # (16*3, 150, 320) pool = conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) H4 = pool[:Hp.batch_size] # (16, 150, 320) for decoding H_zfo4 = pool[Hp.batch_size:] # (16*3, 150, 320) for decoding # Decoding d_conv = (Y.sg_concat(target=H_zfo1, axis=0) .sg_quasi_conv1d(is_enc=False, size=2)) d_pool = d_conv.sg_quasi_rnn(is_enc=False, att=False) # (16*4, 150, 320) d_conv = (d_pool.sg_concat(target=H_zfo2, axis=0) .sg_quasi_conv1d(is_enc=False, size=2)) d_pool = d_conv.sg_quasi_rnn(is_enc=False, att=False) # (16*4, 150, 320) d_conv = (d_pool.sg_concat(target=H_zfo3, axis=0) .sg_quasi_conv1d(is_enc=False, size=2)) d_pool = d_conv.sg_quasi_rnn(is_enc=False, att=False) # (16*4, 150, 320) d_conv = (d_pool.sg_concat(target=H_zfo4, axis=0) .sg_quasi_conv1d(is_enc=False, size=2)) concat = H4.sg_concat(target=d_conv, axis=0) d_pool = concat.sg_quasi_rnn(is_enc=False, att=True) # (16, 150, 320) logits = d_pool.sg_conv1d(size=1, dim=len(char2idx), act="linear") # (16, 150, 179) if mode=='train': # cross entropy loss with logits ( for training set ) self.loss = logits.sg_ce(target=self.y, mask=True) istarget = tf.not_equal(self.y, 0).sg_float() self.reduced_loss = (self.loss.sg_sum()) / (istarget.sg_sum() + 1e-8) tf.sg_summary_loss(self.reduced_loss, "reduced_loss") else: # inference self.preds = logits.sg_argmax()
mse = tf.reduce_mean(tf.square(disc - xx), reduction_indices=[1, 2, 3]) mse_real, mse_fake = mse[:batch_size], mse[batch_size:] loss_disc = mse_real + tf.maximum(margin - mse_fake, 0) # discriminator loss loss_gen = mse_fake + pt * pt_weight # generator loss + PT regularizer train_disc = tf.sg_optim(loss_disc, lr=0.001, category='discriminator') # discriminator train ops train_gen = tf.sg_optim(loss_gen, lr=0.001, category='generator') # generator train ops # # add summary # tf.sg_summary_loss(tf.identity(loss_disc, name='disc')) tf.sg_summary_loss(tf.identity(loss_gen, name='gen')) tf.sg_summary_image(gen) # # training # # def alternate training func @tf.sg_train_func def alt_train(sess, opt): l_disc = sess.run([loss_disc, train_disc])[0] # training discriminator l_gen = sess.run([loss_gen, train_gen])[0] # training generator return np.mean(l_disc) + np.mean(l_gen)
def __init__(self, mode="train"): # Inputs and Labels if mode == "train": self.x, self.y, self.num_batch = get_batch_data( ) # (16, 150) int32, (16, 150) int32, int self.y_src = tf.concat( axis=1, values=[tf.zeros((Hp.bs, 1), tf.int32), self.y[:, :-1]]) # (16, 150) int32 else: # inference self.x = tf.placeholder(tf.int32, shape=(Hp.bs, Hp.maxlen)) self.y_src = tf.placeholder(tf.int32, shape=(Hp.bs, Hp.maxlen)) # Load vocabulary self.char2idx, self.idx2char = load_vocab() # Embedding self.emb_x = tf.sg_emb(name='emb_x', voca_size=len(self.char2idx), dim=Hp.hd) # (179, 320) self.emb_y = tf.sg_emb(name='emb_y', voca_size=len(self.char2idx), dim=Hp.hd) # (179, 320) self.X = self.x.sg_lookup(emb=self.emb_x) # (16, 150, 320) self.Y = self.y_src.sg_lookup(emb=self.emb_y) # (16, 150, 320) # Encoding self.conv = self.X.sg_quasi_conv1d(is_enc=True, size=6) # (16*4, 150, 320) self.pool = self.conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) self.H_zfo1 = self.pool[Hp.bs:] # (16*3, 15, 320) for decoding self.conv = self.pool.sg_quasi_conv1d(is_enc=True, size=2) # (16*4, 150, 320) self.pool = self.conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) self.H_zfo2 = self.pool[Hp.bs:] # (16*3, 150, 320) for decoding self.conv = self.pool.sg_quasi_conv1d(is_enc=True, size=2) # (16*4, 150, 320) self.pool = self.conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) self.H_zfo3 = self.pool[Hp.bs:] # (16*3, 150, 320) for decoding self.conv = self.pool.sg_quasi_conv1d(is_enc=True, size=2) # (16*4, 150, 320) self.pool = self.conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320) self.H4 = self.pool[:Hp.bs] self.H_zfo4 = self.pool[Hp.bs:] # (16*3, 150, 320) for decoding # Decoding self.dec = self.Y.sg_concat(target=self.H_zfo1, dim=0) self.d_conv = self.dec.sg_quasi_conv1d(is_enc=False, size=2) self.d_pool = self.d_conv.sg_quasi_rnn(is_enc=False, att=False) # (16*4, 150, 320) self.d_conv = (self.d_pool.sg_concat( target=self.H_zfo2, dim=0).sg_quasi_conv1d(is_enc=False, size=2)) self.d_pool = self.d_conv.sg_quasi_rnn(is_enc=False, att=False) # (16*4, 150, 320) self.d_conv = (self.d_pool.sg_concat( target=self.H_zfo3, dim=0).sg_quasi_conv1d(is_enc=False, size=2)) self.d_pool = self.d_conv.sg_quasi_rnn(is_enc=False, att=False) # (16*4, 150, 320) self.d_conv = (self.d_pool.sg_concat( target=self.H_zfo4, dim=0).sg_quasi_conv1d(is_enc=False, size=2)) self.concat = self.H4.sg_concat(target=self.d_conv, dim=0) self.d_pool = self.concat.sg_quasi_rnn(is_enc=False, att=True) # (16*4, 150, 320) self.logits = self.d_pool.sg_conv1d(size=1, dim=len(self.char2idx), act="linear") # (16, 150, 179) self.preds = self.logits.sg_argmax() if mode == 'train': # cross entropy loss with logits ( for training set ) self.loss = self.logits.sg_ce(target=self.y, mask=True) self.istarget = tf.not_equal(self.y, 0).sg_float() self.reduced_loss = (self.loss.sg_sum()) / ( self.istarget.sg_sum() + 0.00001) tf.sg_summary_loss(self.reduced_loss, "reduced_loss")
reduction_indices=[1, 2, 3]) mse_fake = tf.reduce_mean(tf.square(disc_fake - gen), reduction_indices=[1, 2, 3]) # discriminator loss loss_disc = mse_real + tf.maximum(margin - mse_fake, 0) # generator loss + PT regularizer loss_gen = mse_fake + pt * pt_weight train_disc = tf.sg_optim(loss_disc, lr=0.001, category='discriminator') # discriminator train ops train_gen = tf.sg_optim(loss_gen, lr=0.001, category='generator') # generator train ops # add summary tf.sg_summary_loss(loss_disc, name='disc') tf.sg_summary_loss(loss_gen, name='gen') # # training # # def alternate training func @tf.sg_train_func def alt_train(sess, opt): l_disc = sess.run([loss_disc, train_disc])[0] # training discriminator l_gen = sess.run([loss_gen, train_gen])[0] # training generator return np.mean(l_disc) + np.mean(l_gen)