예제 #1
0
파일: recog.py 프로젝트: goldsteink/STTWall
    def __init__(self):
        # set log level to debug
        tf.sg_verbosity(10)

        # batch size
        self.batch_size = 1

        # vocabulary size
        self.voca_size = sttwdata.voca_size

        # mfcc feature of audio
        self.x = tf.placeholder(dtype=tf.sg_floatx,
                                shape=(self.batch_size, None, 20))

        # encode audio feature
        self.logit = get_logit(self.x, voca_size=self.voca_size)

        # sequence length except zero-padding
        self.seq_len = tf.not_equal(self.x.sg_sum(axis=2),
                                    0.).sg_int().sg_sum(axis=1)

        # run network
        self.session = tf.Session()
        tf.sg_init(self.session)
        self.saver = tf.train.Saver()
        self.saver.restore(self.session,
                           tf.train.latest_checkpoint('asset/train'))
예제 #2
0
def sg_reverse_seq(tensor, opt):
    r"""Reverses variable length slices.

    Before applying the pure tensorflow function tf.reverse_sequence,
      this function calculates sequence lengths by counting non-zeros.

    For example,
    
    ```
    tensor = [[1, 2, 3, 0, 0], [4, 5, 0, 0, 0]]
    tensor.sg_reverse_seq()
    => [[3 2 1 0 0]
        [5 4 0 0 0]]
    ```
        
    Args:
      tensor: A 2-D `Tensor` (automatically given by chain).
      opt:
        dim: Dimension to reverse. Default is 1.
        name : If provided, it replaces current tensor's name.

    Returns:
      A `Tensor` with the same shape and type as `tensor`.
    """
    # default sequence dimension
    opt += tf.sg_opt(dim=1)
    seq_len = tf.not_equal(tensor,
                           tf.zeros_like(tensor)).sg_int().sg_sum(dims=opt.dim)
    return tf.reverse_sequence(tensor, seq_len, opt.dim, name=opt.name)
예제 #3
0
def init_model():
    global x, y
    # set log level to debug
    tf.sg_verbosity(10)
    #
    # hyper parameters
    #
    batch_size = 1  # batch size
    #
    # inputs
    #
    # vocabulary size
    voca_size = data.voca_size
    # print(voca_size)
    # mfcc feature of audio
    x = tf.placeholder(dtype=tf.sg_floatx, shape=(batch_size, None, 20))
    # sequence length except zero-padding
    seq_len = tf.not_equal(x.sg_sum(axis=2), 0.).sg_int().sg_sum(axis=1)
    # encode audio feature
    logit = get_logit(x, voca_size=voca_size)
    # ctc decoding
    decoded, _ = tf.nn.ctc_beam_search_decoder(
        logit.sg_transpose(perm=[1, 0, 2]), seq_len, merge_repeated=False)
    # to dense tensor
    y = tf.sparse_to_dense(decoded[0].indices, decoded[0].dense_shape,
                           decoded[0].values) + 1
예제 #4
0
    def __init__(self, mode="train"):
        '''
        Args:
          mode: A string. Either "train" or "test"
        '''
        self.char2idx, self.idx2char = load_char_vocab()
        self.word2idx, self.idx2word = load_word_vocab()

        if mode == "train":
            self.x, self.y, self.num_batch = get_batch_data()
        else:
            self.x = tf.placeholder(tf.int32, [None, Hyperparams.seqlen])

        self.emb_x = tf.sg_emb(name='emb_x',
                               voca_size=len(self.char2idx),
                               dim=Hyperparams.embed_dim)
        self.enc = self.x.sg_lookup(emb=self.emb_x)

        with tf.sg_context(size=5, act='relu', bn=True):
            for _ in range(20):
                dim = self.enc.get_shape().as_list()[-1]
                self.enc += self.enc.sg_conv1d(
                    dim=dim)  # (64, 50, 300) float32

        self.enc = self.enc.sg_conv1d(size=1,
                                      dim=len(self.word2idx),
                                      act='linear',
                                      bn=False)  # (64, 50, 21293) float32

        #         self.logits = self.enc.sg_mean(dims=[1], keep_dims=False) # (64, 21293) float32

        # Weighted Sum. Updated on Feb. 15, 2017.
        def make_weights(size):
            weights = tf.range(1, size + 1, dtype=tf.float32)
            weights *= 1. / ((1 + size) * size // 2)
            weights = tf.expand_dims(weights, 0)
            weights = tf.expand_dims(weights, -1)
            return weights

        self.weights = make_weights(Hyperparams.seqlen)  # (1, 50, 1)
        self.enc *= self.weights  # Broadcasting
        self.logits = self.enc.sg_sum(axis=[1], keep_dims=False)  # (64, 21293)

        if mode == "train":
            self.ce = self.logits.sg_ce(target=self.y,
                                        mask=False,
                                        one_hot=False)
            self.istarget = tf.not_equal(self.y, tf.ones_like(
                self.y)).sg_float()  # 1: Unkown
            self.reduced_loss = ((self.ce * self.istarget).sg_sum()) / (
                self.istarget.sg_sum() + 1e-5)
            tf.sg_summary_loss(self.reduced_loss, "reduced_loss")
예제 #5
0
def sg_ce(tensor, opt):
    r"""Returns softmax cross entropy loss between `tensor` and `target`.
    
    Args:
      tensor: A `Tensor`. Logits. Unscaled log probabilities.
      opt:
        target: A `Tensor` with the same length in the first dimension as the `tensor`. Labels. 
        one_hot: Boolean. Whether to treat the labels as one-hot encoding. Default is False.
        mask: Boolean. If True, zeros in the target will be excluded from the calculation.
        name: A `string`. A name to display in the tensor board web UI.
      
    Returns:
      A 1-D `Tensor` with the same shape as `tensor`. 
    
    For example, 
    
    ```
    tensor = [[[2, -1, 3], [3, 1, -2]]]
    target = [[2, 1]]
    tensor.sg_ce(target=target) => [[ 0.32656264  2.13284516]]
    ```
    
    For example,
    
    ```
    tensor = [[2, -1, 3], [3, 1, -2]]
    target = [[0, 0, 1], [1, 0, 0]]
    tensor.sg_ce(target=target, one_hot=True) => [ 0.32656264  0.13284527]
    ```
    """
    opt += tf.sg_opt(one_hot=False)
    assert opt.target is not None, 'target is mandatory.'

    if opt.one_hot:
        out = tf.identity(
            tf.nn.softmax_cross_entropy_with_logits(labels=opt.target,
                                                    logits=tensor), 'ce')
    else:
        out = tf.identity(
            tf.nn.sparse_softmax_cross_entropy_with_logits(labels=opt.target,
                                                           logits=tensor),
            'ce')

    # masking loss
    if opt.mask:
        out *= tf.not_equal(opt.target, tf.zeros_like(opt.target)).sg_float()

    # add summary
    tf.sg_summary_loss(out, name=opt.name)

    return out
예제 #6
0
def sg_ce(tensor, opt):
    opt += tf.sg_opt(one_hot=False)
    assert opt.target is not None, 'target is mandatory.'

    if opt.one_hot:
        out = tf.identity(tf.nn.softmax_cross_entropy_with_logits(tensor, opt.target), 'ce')
    else:
        out = tf.identity(tf.nn.sparse_softmax_cross_entropy_with_logits(tensor, opt.target), 'ce')

    # masking loss
    if opt.mask:
        out *= tf.not_equal(opt.target, tf.zeros_like(opt.target)).sg_float()

    # add summary
    tf.sg_summary_loss(out)

    return out
예제 #7
0
def sg_to_sparse(tensor, opt):
    r"""Converts a dense tensor into a sparse tensor.
    
    See `tf.SparseTensor()` in tensorflow.

    Args:
      tensor: A `Tensor` with zero-padding (automatically given by chain).
      opt:
        name: If provided, replace current tensor's name.

    Returns:
        A `SparseTensor`.
    """
    indices = tf.where(tf.not_equal(tensor.sg_float(), 0.))
    return tf.SparseTensor(indices=indices,
                           values=tf.gather_nd(tensor, indices) - 1,  # for zero-based index
                           dense_shape=tf.shape(tensor).sg_cast(dtype=tf.int64))
예제 #8
0
    def __init__(self, mode="train"):
        '''
        Args:
          is_train: Boolean. If True, backprop is executed.
        '''
        if mode == "train":
            self.x, self.y, self.num_batch = get_batch_data(
            )  # (64, 50) int64, (64, 50) int64, 1636
        else:  # test
            self.x = tf.placeholder(tf.int64, [None, Hyperparams.maxlen])

        # make embedding matrix for input characters
        pnyn2idx, _, hanzi2idx, _ = load_vocab()

        self.emb_x = tf.sg_emb(name='emb_x',
                               voca_size=len(pnyn2idx),
                               dim=Hyperparams.embed_dim)
        self.enc = self.x.sg_lookup(emb=self.emb_x)

        with tf.sg_context(size=5, act='relu', bn=True):
            for _ in range(20):
                dim = self.enc.get_shape().as_list()[-1]
                self.enc += self.enc.sg_conv1d(
                    dim=dim)  # (64, 50, 300) float32

        # final fully convolutional layer for softmax
        self.logits = self.enc.sg_conv1d(size=1,
                                         dim=len(hanzi2idx),
                                         act='linear',
                                         bn=False)  # (64, 50, 5072) float32
        if mode == "train":
            self.ce = self.logits.sg_ce(target=self.y,
                                        mask=True)  # (64, 50) float32
            self.istarget = tf.not_equal(self.y, tf.zeros_like(
                self.y)).sg_float()  # (64, 50) float32
            self.reduced_loss = self.ce.sg_sum() / self.istarget.sg_sum(
            )  # () float32
            tf.sg_summary_loss(self.reduced_loss, "reduced_loss")
예제 #9
0
    def __init__(self, is_train=True):
        '''
        Args:
          is_train: Boolean. If True, backprop is executed.
        '''
        if is_train:
            self.x, self.y = get_batch_data() # (16, 100), (16, 100)
        else:
#             self.x = tf.placeholder(tf.int32, [Hyperparams.batch_size, Hyperparams.maxlen])
            self.x = tf.placeholder(tf.int32, [None, Hyperparams.maxlen])
        
        # make embedding matrix for input characters
        hangul2idx, _, hanja2idx, _ = load_charmaps()
        
        self.emb_x = tf.sg_emb(name='emb_x', voca_size=len(hangul2idx), dim=Hyperparams.hidden_dim)
        
        # embed table lookup
        self.enc = self.x.sg_lookup(emb=self.emb_x).sg_float() # (16, 100, 200)
        
        # loop dilated conv block
        for i in range(2):
            self.enc = (self.enc
                   .sg_res_block(size=5, rate=1)
                   .sg_res_block(size=5, rate=2)
                   .sg_res_block(size=5, rate=4)
                   .sg_res_block(size=5, rate=8)
                   .sg_res_block(size=5, rate=16))
        
        # final fully convolutional layer for softmax
        self.logits = self.enc.sg_conv1d(size=1, dim=len(hanja2idx)) # (16, 100, 4543)
        
        if is_train:
            self.ce = self.logits.sg_ce(target=self.y, mask=True) # (16, 100)
            self.nonzeros = tf.not_equal(self.y, tf.zeros_like(self.y)).sg_float() # (16, 100)
            self.reduced_loss = self.ce.sg_sum() / self.nonzeros.sg_sum() # ()
            tf.sg_summary_loss(self.reduced_loss, "reduced_loss")
예제 #10
0
def sg_reverse_seq(tensor, opt):
    # default sequence dimension
    opt += tf.sg_opt(dim=1)
    seq_len = tf.not_equal(tensor,
                           tf.zeros_like(tensor)).sg_int().sg_sum(dims=opt.dim)
    return tf.reverse_sequence(tensor, seq_len, opt.dim, name=opt.name)
batch_size = 16

#
# inputs
#

# corpus input tensor ( with QueueRunner )
data = SpeechCorpus(batch_size=batch_size, set_name=tf.sg_arg().set)

# mfcc feature of audio
x = data.mfcc
# target sentence label
y = data.label

# sequence length except zero-padding
seq_len = tf.not_equal(x.sg_sum(axis=2), 0.).sg_int().sg_sum(axis=1)

#
# Testing Graph
#

# encode audio feature
logit = get_logit(x, voca_size=voca_size)

# CTC loss
loss = logit.sg_ctc(target=y, seq_len=seq_len)

#
# run network
#
예제 #12
0
# inputs
#

# corpus input tensor
data = SpeechCorpus(batch_size=batch_size * tf.sg_gpus())

# mfcc feature of audio
inputs = tf.split(data.mfcc, tf.sg_gpus(), axis=0)
# target sentence label
labels = tf.split(data.label, tf.sg_gpus(), axis=0)

# sequence length except zero-padding
seq_len = []
for input_ in inputs:
    seq_len.append(
        tf.not_equal(input_.sg_sum(axis=2), 0.).sg_int().sg_sum(axis=1))


# parallel loss tower
@tf.sg_parallel
def get_loss(opt):
    # encode audio feature
    logit = get_logit(opt.input[opt.gpu_index], voca_size=voca_size)
    # CTC loss
    return logit.sg_ctc(target=opt.target[opt.gpu_index],
                        seq_len=opt.seq_len[opt.gpu_index])


#
# train
#
예제 #13
0
import sugartensor as tf
import numpy as np
import librosa
import tensorflow as tfw
from tensorflow.python.framework import graph_util

from model import *
import data


batch_size = 1     # batch size
voca_size = data.voca_size
x = tf.placeholder(dtype=tf.sg_floatx, shape=(batch_size, None, 20))
# sequence length except zero-padding
seq_len = tf.not_equal(x.sg_sum(axis=2), 0.).sg_int().sg_sum(axis=1)
# encode audio feature
logit = get_logit(x, voca_size)
# ctc decoding
decoded, _ = tf.nn.ctc_beam_search_decoder(logit.sg_transpose(perm=[1, 0, 2]), seq_len, merge_repeated=False)
# to dense tensor
y = tf.add(tf.sparse_to_dense(decoded[0].indices, decoded[0].dense_shape, decoded[0].values), 1, name="output")

with tf.Session() as sess:
     tf.sg_init(sess)
     saver = tf.train.Saver()
     saver.restore(sess, tf.train.latest_checkpoint('asset/train'))

graph = tf.get_default_graph()
input_graph_def = graph.as_graph_def()
예제 #14
0
#
# inputs
#

# corpus input tensor
data = SpeechCorpus(batch_size=batch_size * tf.sg_gpus())

# mfcc feature of audio
inputs = tf.split(data.mfcc, tf.sg_gpus(), axis=0)
# target sentence label
labels = tf.split(data.label, tf.sg_gpus(), axis=0)

# sequence length except zero-padding
seq_len = []
for input_ in inputs:
    seq_len.append(tf.not_equal(input_.sg_sum(axis=2), 0.).sg_int().sg_sum(axis=1))


# parallel loss tower
@tf.sg_parallel
def get_loss(opt):
    # encode audio feature
    logit = get_logit(opt.input[opt.gpu_index], voca_size=voca_size)
    # CTC loss
    return logit.sg_ctc(target=opt.target[opt.gpu_index], seq_len=opt.seq_len[opt.gpu_index])

#
# train
#
tf.sg_train(lr=0.0001, loss=get_loss(input=inputs, target=labels, seq_len=seq_len),
            ep_size=data.num_batch, max_ep=50)
예제 #15
0
파일: train.py 프로젝트: liean/quasi-rnn
    def __init__(self, mode="train"):
        # Inputs and Labels
        if mode == "train":
            self.x, self.y, self.num_batch = get_batch_data(
            )  # (16, 150) int32, (16, 150) int32, int
            self.y_src = tf.concat(
                [tf.zeros((Hp.batch_size, 1), tf.int32), self.y[:, :-1]],
                1)  # (16, 150) int32
        else:  # inference
            self.x = tf.placeholder(tf.int32, shape=(Hp.batch_size, Hp.maxlen))
            self.y_src = tf.placeholder(tf.int32,
                                        shape=(Hp.batch_size, Hp.maxlen))

        # Load vocabulary
        char2idx, idx2char = load_vocab()

        # Embedding
        emb_x = tf.sg_emb(name='emb_x',
                          voca_size=len(char2idx),
                          dim=Hp.hidden_units)  # (179, 320)
        emb_y = tf.sg_emb(name='emb_y',
                          voca_size=len(char2idx),
                          dim=Hp.hidden_units)  # (179, 320)
        X = self.x.sg_lookup(emb=emb_x)  # (16, 150, 320)
        Y = self.y_src.sg_lookup(emb=emb_y)  # (16, 150, 320)

        # Encoding
        conv = X.sg_quasi_conv1d(is_enc=True, size=6)  # (16*3, 150, 320)
        pool = conv.sg_quasi_rnn(is_enc=True, att=False)  # (16*4, 150, 320)
        H_zfo1 = pool[Hp.batch_size:]  # (16*3, 15, 320) for decoding

        conv = pool.sg_quasi_conv1d(is_enc=True, size=2)  # (16*3, 150, 320)
        pool = conv.sg_quasi_rnn(is_enc=True, att=False)  # (16*4, 150, 320)
        H_zfo2 = pool[Hp.batch_size:]  # (16*3, 150, 320) for decoding

        conv = pool.sg_quasi_conv1d(is_enc=True, size=2)  # (16*3, 150, 320)
        pool = conv.sg_quasi_rnn(is_enc=True, att=False)  # (16*4, 150, 320)
        H_zfo3 = pool[Hp.batch_size:]  # (16*3, 150, 320) for decoding

        conv = pool.sg_quasi_conv1d(is_enc=True, size=2)  # (16*3, 150, 320)
        pool = conv.sg_quasi_rnn(is_enc=True, att=False)  # (16*4, 150, 320)
        H4 = pool[:Hp.batch_size]  # (16, 150, 320) for decoding
        H_zfo4 = pool[Hp.batch_size:]  # (16*3, 150, 320) for decoding

        # Decoding
        d_conv = (Y.sg_concat(target=H_zfo1,
                              axis=0).sg_quasi_conv1d(is_enc=False, size=2))
        d_pool = d_conv.sg_quasi_rnn(is_enc=False,
                                     att=False)  # (16*4, 150, 320)

        d_conv = (d_pool.sg_concat(target=H_zfo2,
                                   axis=0).sg_quasi_conv1d(is_enc=False,
                                                           size=2))
        d_pool = d_conv.sg_quasi_rnn(is_enc=False,
                                     att=False)  # (16*4, 150, 320)

        d_conv = (d_pool.sg_concat(target=H_zfo3,
                                   axis=0).sg_quasi_conv1d(is_enc=False,
                                                           size=2))
        d_pool = d_conv.sg_quasi_rnn(is_enc=False,
                                     att=False)  # (16*4, 150, 320)

        d_conv = (d_pool.sg_concat(target=H_zfo4,
                                   axis=0).sg_quasi_conv1d(is_enc=False,
                                                           size=2))
        concat = H4.sg_concat(target=d_conv, axis=0)
        d_pool = concat.sg_quasi_rnn(is_enc=False, att=True)  # (16, 150, 320)

        logits = d_pool.sg_conv1d(size=1, dim=len(char2idx),
                                  act="linear")  # (16, 150, 179)

        if mode == 'train':
            # cross entropy loss with logits ( for training set )
            loss = logits.sg_ce(target=self.y, mask=True)
            istarget = tf.not_equal(self.y, 0).sg_float()
            self.reduced_loss = (loss.sg_sum()) / (istarget.sg_sum() + 0.00001)
            tf.sg_summary_loss(self.reduced_loss, "reduced_loss")
        else:  # inference
            self.preds = logits.sg_argmax()
예제 #16
0
    def __init__(self,
                 x,
                 y,
                 num_batch,
                 vocab_size,
                 emb_dim,
                 hidden_dim,
                 max_ep=240,
                 infer_shape=(1, 1),
                 mode="train"):

        self.num_batch = num_batch
        self.emb_dim = emb_dim
        self.hidden_dim = hidden_dim
        self.vocab_size = vocab_size
        self.max_len_infer = 512
        self.max_ep = max_ep

        # reuse = len([t for t in tf.global_variables() if t.name.startswith('gen')]) > 0
        reuse = (mode == 'infer')

        if mode == "train":
            self.x = x
            self.y = y
        elif mode == "infer":
            self.x = tf.placeholder(tf.int32, shape=infer_shape)
            self.y = tf.placeholder(tf.int32, shape=infer_shape)

        with tf.variable_scope("gen_embs", reuse=reuse):
            self.emb_x = tf.get_variable("emb_x",
                                         [self.vocab_size, self.emb_dim])
            self.emb_y = tf.get_variable("emb_y",
                                         [self.vocab_size, self.emb_dim])
            self.X = tf.nn.embedding_lookup(self.emb_x, self.x)
            self.Y = tf.nn.embedding_lookup(self.emb_y, self.y)

        with tf.sg_context(name='gen', reuse=reuse):
            #     self.emb_x = tf.Variable(tf.random_uniform([self.vocab_size, self.emb_dim], 0.0, 1.0), name="emb_x")
            #     self.emb_y = tf.Variable(tf.random_uniform([self.vocab_size, self.emb_dim], 0.0, 1.0), name="emb_y")
            # self.emb_x = tf.sg_emb(name='emb_x', voca_size=self.vocab_size, dim=self.emb_dim)  # (68,16)
            # self.emb_y = tf.sg_emb(name='emb_y', voca_size=self.vocab_size, dim=self.emb_dim)  # (68,16)
            # self.X = self.x.sg_lookup(emb=self.emb_x)  # (8,63,16)
            # self.Y = self.y.sg_lookup(emb=self.emb_y)  # (8,63,16)

            if mode == "train":
                self.lstm_layer = self.X.sg_lstm(in_dim=self.emb_dim,
                                                 dim=self.vocab_size,
                                                 name="lstm")  # (8, 63, 68)
                self.test = self.lstm_layer.sg_softmax(name="testtt")

                print "mazum??"
                print self.test

            elif mode == "infer":
                self.lstm_layer = self.X.sg_lstm(in_dim=self.emb_dim,
                                                 dim=self.vocab_size,
                                                 last_only=True,
                                                 name="lstm")
                self.log_prob = tf.log(self.lstm_layer)

                # next_token: select by distribution probability, preds: select by argmax

                self.multinormed = tf.multinomial(self.log_prob, 1)
                self.next_token = tf.cast(
                    tf.reshape(tf.multinomial(self.log_prob, 1),
                               [1, infer_shape[0]]), tf.int32)
                self.preds = self.lstm_layer.sg_argmax()

        if mode == "train":
            self.loss = self.lstm_layer.sg_ce(target=self.y)
            self.istarget = tf.not_equal(self.y, 0).sg_float()

            self.reduced_loss = (self.loss.sg_sum()) / (
                self.istarget.sg_sum() + 0.0000001)
            tf.sg_summary_loss(self.reduced_loss, "reduced_loss")
예제 #17
0
def sg_to_sparse(tensor, opt):
    indices = tf.where(tf.not_equal(tensor.sg_float(), 0.))
    return tf.SparseTensor(
        indices=indices,
        values=tf.gather_nd(tensor, indices) - 1,  # for zero-based index
        shape=tf.shape(tensor).sg_cast(dtype=tf.int64))
예제 #18
0
    def __init__(self, mode="train"):
        # Inputs and Labels
        if mode == "train":
            self.x, self.y, self.num_batch = get_batch_data() # (16, 150) int32, (16, 150) int32, int
            self.y_src = tf.concat([tf.zeros((Hp.batch_size, 1), tf.int32), self.y[:, :-1]], 1) # (16, 150) int32
        else: # inference
            self.x = tf.placeholder(tf.int32, shape=(Hp.batch_size, Hp.maxlen))
            self.y_src = tf.placeholder(tf.int32, shape=(Hp.batch_size, Hp.maxlen))
        
        # Load vocabulary    
        char2idx, idx2char = load_vocab()
        
        # Embedding
        def embed(inputs, vocab_size, embed_size, variable_scope):
            '''
            inputs = tf.expand_dims(tf.range(5), 0) => (1, 5)
            _embed(inputs, 5, 10) => (1, 5, 10)
            '''
            with tf.variable_scope(variable_scope):
                lookup_table = tf.get_variable('lookup_table', 
                                               dtype=tf.float32, 
                                               shape=[vocab_size, embed_size],
                                               initializer=tf.truncated_normal_initializer())
            return tf.nn.embedding_lookup(lookup_table, inputs)
        
        X = embed(self.x, vocab_size=len(char2idx), embed_size=Hp.hidden_units, variable_scope='X')  # (179, 320)
        Y = embed(self.y_src, vocab_size=len(char2idx), embed_size=Hp.hidden_units, variable_scope='Y')  # (179, 320)
#         Y = tf.concat((tf.zeros_like(Y[:, :1, :]), Y[:, :-1, :]), 1)
            
        # Encoding
        conv = X.sg_quasi_conv1d(is_enc=True, size=6) # (16*3, 150, 320)
        pool = conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320)
        H_zfo1 = pool[Hp.batch_size:] # (16*3, 15, 320) for decoding
         
        conv = pool.sg_quasi_conv1d(is_enc=True, size=2) # (16*3, 150, 320)
        pool = conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320)
        H_zfo2 = pool[Hp.batch_size:] # (16*3, 150, 320) for decoding
         
        conv = pool.sg_quasi_conv1d(is_enc=True, size=2) # (16*3, 150, 320)
        pool = conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320)
        H_zfo3 = pool[Hp.batch_size:] # (16*3, 150, 320) for decoding
         
        conv = pool.sg_quasi_conv1d(is_enc=True, size=2) # (16*3, 150, 320)
        pool = conv.sg_quasi_rnn(is_enc=True, att=False) # (16*4, 150, 320)
        H4 = pool[:Hp.batch_size] # (16, 150, 320) for decoding
        H_zfo4 = pool[Hp.batch_size:] # (16*3, 150, 320) for decoding

        # Decoding
        d_conv = (Y.sg_concat(target=H_zfo1, axis=0)
                   .sg_quasi_conv1d(is_enc=False, size=2))
        d_pool = d_conv.sg_quasi_rnn(is_enc=False, att=False) # (16*4, 150, 320)
        
        d_conv = (d_pool.sg_concat(target=H_zfo2, axis=0)
                        .sg_quasi_conv1d(is_enc=False, size=2))
        d_pool = d_conv.sg_quasi_rnn(is_enc=False, att=False) # (16*4, 150, 320)
        
        d_conv = (d_pool.sg_concat(target=H_zfo3, axis=0)
                        .sg_quasi_conv1d(is_enc=False, size=2))
        d_pool = d_conv.sg_quasi_rnn(is_enc=False, att=False) # (16*4, 150, 320)
        
        d_conv = (d_pool.sg_concat(target=H_zfo4, axis=0)
                        .sg_quasi_conv1d(is_enc=False, size=2))
        concat = H4.sg_concat(target=d_conv, axis=0)
        d_pool = concat.sg_quasi_rnn(is_enc=False, att=True) # (16, 150, 320)
        
        logits = d_pool.sg_conv1d(size=1, dim=len(char2idx), act="linear") # (16, 150, 179)

        if mode=='train':
            # cross entropy loss with logits ( for training set )
            self.loss = logits.sg_ce(target=self.y, mask=True)
            istarget = tf.not_equal(self.y, 0).sg_float()
            self.reduced_loss = (self.loss.sg_sum()) / (istarget.sg_sum() + 1e-8)
            tf.sg_summary_loss(self.reduced_loss, "reduced_loss")
        else: # inference
            self.preds = logits.sg_argmax() 
예제 #19
0
    def __init__(self, mode="train"):
        # Inputs and Labels
        if mode == "train":
            self.x, self.y, self.num_batch = get_batch_data(
            )  # (16, 150) int32, (16, 150) int32, int
            self.y_src = tf.concat(
                axis=1,
                values=[tf.zeros((Hp.bs, 1), tf.int32),
                        self.y[:, :-1]])  # (16, 150) int32
        else:  # inference
            self.x = tf.placeholder(tf.int32, shape=(Hp.bs, Hp.maxlen))
            self.y_src = tf.placeholder(tf.int32, shape=(Hp.bs, Hp.maxlen))

        # Load vocabulary
        self.char2idx, self.idx2char = load_vocab()

        # Embedding
        self.emb_x = tf.sg_emb(name='emb_x',
                               voca_size=len(self.char2idx),
                               dim=Hp.hd)  # (179, 320)
        self.emb_y = tf.sg_emb(name='emb_y',
                               voca_size=len(self.char2idx),
                               dim=Hp.hd)  # (179, 320)
        self.X = self.x.sg_lookup(emb=self.emb_x)  # (16, 150, 320)
        self.Y = self.y_src.sg_lookup(emb=self.emb_y)  # (16, 150, 320)

        # Encoding
        self.conv = self.X.sg_quasi_conv1d(is_enc=True,
                                           size=6)  # (16*4, 150, 320)
        self.pool = self.conv.sg_quasi_rnn(is_enc=True,
                                           att=False)  # (16*4, 150, 320)
        self.H_zfo1 = self.pool[Hp.bs:]  # (16*3, 15, 320) for decoding

        self.conv = self.pool.sg_quasi_conv1d(is_enc=True,
                                              size=2)  # (16*4, 150, 320)
        self.pool = self.conv.sg_quasi_rnn(is_enc=True,
                                           att=False)  # (16*4, 150, 320)
        self.H_zfo2 = self.pool[Hp.bs:]  # (16*3, 150, 320) for decoding

        self.conv = self.pool.sg_quasi_conv1d(is_enc=True,
                                              size=2)  # (16*4, 150, 320)
        self.pool = self.conv.sg_quasi_rnn(is_enc=True,
                                           att=False)  # (16*4, 150, 320)
        self.H_zfo3 = self.pool[Hp.bs:]  # (16*3, 150, 320) for decoding

        self.conv = self.pool.sg_quasi_conv1d(is_enc=True,
                                              size=2)  # (16*4, 150, 320)
        self.pool = self.conv.sg_quasi_rnn(is_enc=True,
                                           att=False)  # (16*4, 150, 320)
        self.H4 = self.pool[:Hp.bs]
        self.H_zfo4 = self.pool[Hp.bs:]  # (16*3, 150, 320) for decoding

        # Decoding
        self.dec = self.Y.sg_concat(target=self.H_zfo1, dim=0)

        self.d_conv = self.dec.sg_quasi_conv1d(is_enc=False, size=2)
        self.d_pool = self.d_conv.sg_quasi_rnn(is_enc=False,
                                               att=False)  # (16*4, 150, 320)

        self.d_conv = (self.d_pool.sg_concat(
            target=self.H_zfo2, dim=0).sg_quasi_conv1d(is_enc=False, size=2))
        self.d_pool = self.d_conv.sg_quasi_rnn(is_enc=False,
                                               att=False)  # (16*4, 150, 320)

        self.d_conv = (self.d_pool.sg_concat(
            target=self.H_zfo3, dim=0).sg_quasi_conv1d(is_enc=False, size=2))
        self.d_pool = self.d_conv.sg_quasi_rnn(is_enc=False,
                                               att=False)  # (16*4, 150, 320)

        self.d_conv = (self.d_pool.sg_concat(
            target=self.H_zfo4, dim=0).sg_quasi_conv1d(is_enc=False, size=2))
        self.concat = self.H4.sg_concat(target=self.d_conv, dim=0)
        self.d_pool = self.concat.sg_quasi_rnn(is_enc=False,
                                               att=True)  # (16*4, 150, 320)

        self.logits = self.d_pool.sg_conv1d(size=1,
                                            dim=len(self.char2idx),
                                            act="linear")  # (16, 150, 179)
        self.preds = self.logits.sg_argmax()
        if mode == 'train':
            # cross entropy loss with logits ( for training set )
            self.loss = self.logits.sg_ce(target=self.y, mask=True)
            self.istarget = tf.not_equal(self.y, 0).sg_float()
            self.reduced_loss = (self.loss.sg_sum()) / (
                self.istarget.sg_sum() + 0.00001)
            tf.sg_summary_loss(self.reduced_loss, "reduced_loss")