Ejemplo n.º 1
0
def Net(aa, yt, x):
    s=aa.shape[1]
    with tf.sg_context(name='NNReg', stride=1, act='leaky_relu', bn=True, reuse=tf.AUTO_REUSE): 
        yt=tf.expand_dims(yt,2)
        
        v1=tf.expand_dims(x,2).sg_conv(dim=16, size=(1,1),  name='gen9',pad="SAME",bn=True)        
        v2=v1.sg_conv(dim=64, size=(1,1),  name='gen1',pad="SAME",bn=True)        
        v3=v2.sg_conv(dim=128, size=(1,1),  name='gen2',pad="SAME",bn=True) 
        v4=v3.sg_conv(dim=256, size=(1,1),  name='gen3',pad="SAME",bn=True) 
        v5=v4.sg_conv(dim=512, size=(1,1),  name='gen4',pad="SAME",bn=True) 
        v5=tf.tile(tf.expand_dims(tf.reduce_max(v5, axis=1),axis=1),[1,s,1,1])
        vv5=v5
        
        v1=yt.sg_conv(dim=16, size=(1,1),  name='gen99',pad="SAME",bn=True)        
        v2=v1.sg_conv(dim=64, size=(1,1),  name='gen11',pad="SAME",bn=True)        
        v3=v2.sg_conv(dim=128, size=(1,1),  name='gen22',pad="SAME",bn=True) 
        v4=v3.sg_conv(dim=256, size=(1,1),  name='gen33',pad="SAME",bn=True) 
        v5=v4.sg_conv(dim=512, size=(1,1),  name='gen44',pad="SAME",bn=True) 
        v5=tf.tile(tf.expand_dims(tf.reduce_max(v5, axis=1),axis=1),[1,s,1,1])
        
        ff=tf.concat([tf.expand_dims(aa,2),v5], axis=-1) 
        ff=tf.concat([ff,vv5], axis=-1) 
        f1=ff.sg_conv(dim=256, size=(1,1),  name='f1',pad="SAME",bn=True)  
        f2=f1.sg_conv(dim=128, size=(1,1),  name='f2',pad="SAME",bn=True)  
        
        f3=f2.sg_conv(dim=2, size=(1,1),  name='f3',pad="SAME",bn=False, act="linear")  
        f3=tf.squeeze(f3,axis=2)
        
    return f3
Ejemplo n.º 2
0
def pairwise_dist(xt, y_p):
    a = xt.shape[1]
    b = y_p.shape[1]
    dist = tf.tile(tf.expand_dims(y_p, 1), [1, a, 1, 1]) - tf.tile(
        tf.expand_dims(xt, 2), [1, 1, b, 1])
    dist = (dist[:, :, :, 0]**2 + dist[:, :, :, 1]**2)
    return dist
        def symbols_to_logits_fn(ids, dec_state):
            dec = []
            dec_c, dec_h = [], []
            # (batch x beam_size x decoded_seq)
            ids = tf.reshape(ids, [Hp.batch_size, beam_size, -1])
            print("dec_state ", dec_state[0].get_shape().as_list())
            for ind in range(beam_size):
                with tf.variable_scope('dec_lstm', reuse=ind > 0
                                       or reuse_vars):
                    w_input = ids[:, ind, -1].sg_lookup(emb=emb_word)
                    dec_state0 = tf.contrib.rnn.LSTMStateTuple(
                        c=dec_state.c[:, ind, :], h=dec_state.h[:, ind, :])
                    dec_out, dec_state_i = dec_cell(w_input, dec_state0)
                    dec_out = tf.expand_dims(dec_out, 1)
                dec_i = dec_out.sg_conv1d_gpus(size=1,
                                               dim=Hp.word_vs,
                                               name="out_conv",
                                               act="linear",
                                               dev=dev,
                                               reuse=ind > 0 or reuse_vars)

                dec.append(tf.squeeze(dec_i, 1))
                dec_c.append(dec_state_i[0])
                dec_h.append(dec_state_i[1])
            return tf.stack(dec, 1), tf.contrib.rnn.LSTMStateTuple(
                tf.stack(dec_c, 1), tf.stack(dec_h, 1))
Ejemplo n.º 4
0
def sg_expand_dims(tensor, opt):
    r"""Inserts a new dimension.
    
    See tf.expand_dims() in tensorflow.

    Args:
      tensor: A `Tensor` (automatically given by chain).
      opt:
        dim : Dimension to expand. Default is -1.
        name: If provided, it replaces current tensor's name.

    Returns:
        A `Tensor`.
    """
    opt += tf.sg_opt(dim=-1)
    return tf.expand_dims(tensor, opt.dim, name=opt.name)
def tower_infer_dec(chars,
                    scope,
                    rnn_cell,
                    dec_cell,
                    word_emb,
                    rnn_state,
                    out_reuse_vars=False,
                    dev='/cpu:0'):

    with tf.device(dev):
        with tf.variable_scope('embatch_size', reuse=True):
            # (vocab_size, latent_dim)
            emb_char = tf.sg_emb(name='emb_char',
                                 voca_size=Hp.char_vs,
                                 dim=Hp.hd,
                                 dev=dev)
            emb_word = tf.sg_emb(name='emb_word',
                                 emb=word_emb,
                                 voca_size=Hp.word_vs,
                                 dim=300,
                                 dev=dev)

    print(chars)
    ch = chars
    ch = tf.reverse_sequence(input=ch,
                             seq_lengths=[Hp.c_maxlen] * Hp.batch_size,
                             seq_dim=1)
    reuse_vars = reuse_vars_enc = True

    # --------------------------   BYTENET ENCODER   --------------------------

    with tf.variable_scope('encoder'):
        # embed table lookup
        enc = ch.sg_lookup(emb=emb_char)  #(batch, sentlen, latentdim)
        # loop dilated conv block
        for i in range(Hp.num_blocks):
            enc = (enc.sg_res_block(size=5,
                                    rate=1,
                                    name="enc1_%d" % (i),
                                    is_first=True,
                                    reuse_vars=reuse_vars,
                                    dev=dev).sg_res_block(
                                        size=5,
                                        rate=2,
                                        name="enc2_%d" % (i),
                                        reuse_vars=reuse_vars,
                                        dev=dev).sg_res_block(
                                            size=5,
                                            rate=4,
                                            name="enc4_%d" % (i),
                                            reuse_vars=reuse_vars,
                                            dev=dev).sg_res_block(
                                                size=5,
                                                rate=8,
                                                name="enc8_%d" % (i),
                                                reuse_vars=reuse_vars,
                                                dev=dev).sg_res_block(
                                                    size=5,
                                                    rate=16,
                                                    name="enc16_%d" % (i),
                                                    reuse_vars=reuse_vars,
                                                    dev=dev))
        byte_enc = enc
        # --------------------------   QCNN + QPOOL ENCODER #1  --------------------------

        with tf.variable_scope('quazi'):

            #quasi cnn layer ZFO  [batch * 3, seqlen, dim2 ]
            conv = byte_enc.sg_quasi_conv1d(is_enc=True,
                                            size=4,
                                            name="qconv_1",
                                            dev=dev,
                                            reuse_vars=reuse_vars)
            # c = f * c + (1 - f) * z, h = o*c [batch * 4, seqlen, hd]
            pool0 = conv.sg_quasi_rnn(is_enc=False,
                                      att=False,
                                      name="qrnn_1",
                                      reuse_vars=reuse_vars,
                                      dev=dev)

            qpool_last = pool0[:, -1, :]

    # --------------------------   MAXPOOL along time dimension   --------------------------

    inpt_maxpl = tf.expand_dims(byte_enc, 1)  # [batch, 1, seqlen, channels]
    maxpool = tf.nn.max_pool(inpt_maxpl, [1, 1, Hp.c_maxlen, 1], [1, 1, 1, 1],
                             'VALID')
    maxpool = tf.squeeze(maxpool, [1, 2])

    # --------------------------   HIGHWAY   --------------------------

    concat = qpool_last + maxpool
    with tf.variable_scope('highway', reuse=reuse_vars):
        input_lstm = highway(concat, concat.get_shape()[-1], num_layers=1)

    # --------------------------   CONTEXT LSTM  --------------------------

    input_lstm = tf.nn.dropout(input_lstm, Hp.keep_prob)

    with tf.variable_scope('contx_lstm', reuse=reuse_vars):
        output, rnn_state = rnn_cell(input_lstm, rnn_state)

    beam_size = 8
    reuse_vars = out_reuse_vars

    greedy = False
    if greedy:

        dec_state = rnn_state
        dec_out = []
        d_out = tf.constant([1] * Hp.batch_size)
        for idx in range(Hp.w_maxlen):
            w_input = d_out.sg_lookup(emb=emb_word)
            dec_state = tf.contrib.rnn.LSTMStateTuple(c=dec_state.c,
                                                      h=dec_state.h)
            with tf.variable_scope('dec_lstm', reuse=idx > 0 or reuse_vars):
                d_out, dec_state = dec_cell(w_input, dec_state)

            dec_out.append(d_out)
            d_out = tf.expand_dims(d_out, 1).sg_conv1d_gpus(size=1,
                                                            dim=Hp.word_vs,
                                                            name="out_conv",
                                                            act="linear",
                                                            dev=dev,
                                                            reuse=idx > 0
                                                            or reuse_vars)
            d_out = tf.squeeze(d_out).sg_argmax()

        dec_out = tf.stack(dec_out, 1)

        dec = dec_out.sg_conv1d_gpus(size=1,
                                     dim=Hp.word_vs,
                                     name="out_conv",
                                     act="linear",
                                     dev=dev,
                                     reuse=True)
        return dec.sg_argmax(), rnn_state

    else:

        # ------------------ BEAM SEARCH --------------------
        dec_state = tf.contrib.rnn.LSTMStateTuple(
            tf.tile(tf.expand_dims(rnn_state[0], 1), [1, beam_size, 1]),
            tf.tile(tf.expand_dims(rnn_state[1], 1), [1, beam_size, 1]))
        initial_ids = tf.constant([1] * Hp.batch_size)

        def symbols_to_logits_fn(ids, dec_state):
            dec = []
            dec_c, dec_h = [], []
            # (batch x beam_size x decoded_seq)
            ids = tf.reshape(ids, [Hp.batch_size, beam_size, -1])
            print("dec_state ", dec_state[0].get_shape().as_list())
            for ind in range(beam_size):
                with tf.variable_scope('dec_lstm', reuse=ind > 0
                                       or reuse_vars):
                    w_input = ids[:, ind, -1].sg_lookup(emb=emb_word)
                    dec_state0 = tf.contrib.rnn.LSTMStateTuple(
                        c=dec_state.c[:, ind, :], h=dec_state.h[:, ind, :])
                    dec_out, dec_state_i = dec_cell(w_input, dec_state0)
                    dec_out = tf.expand_dims(dec_out, 1)
                dec_i = dec_out.sg_conv1d_gpus(size=1,
                                               dim=Hp.word_vs,
                                               name="out_conv",
                                               act="linear",
                                               dev=dev,
                                               reuse=ind > 0 or reuse_vars)

                dec.append(tf.squeeze(dec_i, 1))
                dec_c.append(dec_state_i[0])
                dec_h.append(dec_state_i[1])
            return tf.stack(dec, 1), tf.contrib.rnn.LSTMStateTuple(
                tf.stack(dec_c, 1), tf.stack(dec_h, 1))

        final_ids, final_probs = beam_search.beam_search(symbols_to_logits_fn,
                                                         dec_state,
                                                         initial_ids,
                                                         beam_size,
                                                         Hp.w_maxlen - 1,
                                                         Hp.word_vs,
                                                         3.5,
                                                         eos_id=2)

        return final_ids[:, 0, :], rnn_state
    def rnn_body_stat(time, rnn_state):
        ch = chars_sent.read(time)
        ch = tf.reverse_sequence(input=ch,
                                 seq_lengths=[Hp.c_maxlen] * Hp.batch_size,
                                 seq_dim=1)
        reuse_vars = out_reuse_vars

        # --------------------------   BYTENET ENCODER   --------------------------

        with tf.variable_scope('encoder'):
            # embed table lookup
            enc = ch.sg_lookup(emb=emb_char)  #(batch, sentlen, latentdim)
            # loop dilated conv block
            for i in range(Hp.num_blocks):
                enc = (enc.sg_res_block(size=5,
                                        rate=1,
                                        name="enc1_%d" % (i),
                                        is_first=True,
                                        reuse_vars=reuse_vars,
                                        dev=dev).sg_res_block(
                                            size=5,
                                            rate=2,
                                            name="enc2_%d" % (i),
                                            reuse_vars=reuse_vars,
                                            dev=dev).sg_res_block(
                                                size=5,
                                                rate=4,
                                                name="enc4_%d" % (i),
                                                reuse_vars=reuse_vars,
                                                dev=dev).sg_res_block(
                                                    size=5,
                                                    rate=8,
                                                    name="enc8_%d" % (i),
                                                    reuse_vars=reuse_vars,
                                                    dev=dev).sg_res_block(
                                                        size=5,
                                                        rate=16,
                                                        name="enc16_%d" % (i),
                                                        reuse_vars=reuse_vars,
                                                        dev=dev))
            byte_enc = enc
            # --------------------------   QCNN + QPOOL ENCODER #1  --------------------------

            with tf.variable_scope('quazi'):

                #quasi cnn layer ZFO  [batch * 3, seqlen, dim2 ]
                conv = byte_enc.sg_quasi_conv1d(is_enc=True,
                                                size=4,
                                                name="qconv_1",
                                                dev=dev,
                                                reuse_vars=reuse_vars)
                # c = f * c + (1 - f) * z, h = o*c [batch * 4, seqlen, hd]
                pool0 = conv.sg_quasi_rnn(is_enc=False,
                                          att=False,
                                          name="qrnn_1",
                                          reuse_vars=reuse_vars,
                                          dev=dev)

                qpool_last = pool0[:, -1, :]

        # --------------------------   MAXPOOL along time dimension   --------------------------

        inpt_maxpl = tf.expand_dims(byte_enc,
                                    1)  # [batch, 1, seqlen, channels]
        maxpool = tf.nn.max_pool(inpt_maxpl, [1, 1, Hp.c_maxlen, 1],
                                 [1, 1, 1, 1], 'VALID')
        maxpool = tf.squeeze(maxpool, [1, 2])

        # --------------------------   HIGHWAY   --------------------------

        concat = qpool_last + maxpool
        with tf.variable_scope('highway', reuse=reuse_vars):
            input_lstm = highway(concat, concat.get_shape()[-1], num_layers=1)

        # --------------------------   CONTEXT LSTM  --------------------------
        input_lstm = tf.nn.dropout(input_lstm, Hp.keep_prob)

        with tf.variable_scope('contx_lstm', reuse=reuse_vars):
            output, rnn_state = rnn_cell(input_lstm, rnn_state)

        return (time + 1, rnn_state)
Ejemplo n.º 7
0
def sg_expand_dims(tensor, opt):
    opt += tf.sg_opt(dim=-1)
    return tf.expand_dims(tensor, opt.dim, name=opt.name)
Ejemplo n.º 8
0
    def wrapper(tensor, **kwargs):
        r"""Manages arguments of `tf.sg_opt`.

        Args:
          tensor: A `tensor` (automatically passed by decorator).
          kwargs:
            shape:  A list of integers. The shape of `tensor`. Inferred if not specified.
            in_dim: An integer. The size of input dimension, which is set to the last one by default.
            dim: An integer. The size of output dimension. Has the same value as in_dim by default.
            bn: Boolean. If True, batch normalization is applied.
            ln: Boolean. If True, layer normalization is applied.
            scale: If true, multiple by a trainable gamma variable. When the activation is
              linear (relu included), this can be disabled because it can be implicitly
              learned by the next layer. The default is True.
            dout: A float of range [0, 100). A dropout rate. Set to 0 by default.
            bias: Boolean. If True, biases are added. As a default, it is set to True
            name: A name for the layer. As a default, the function name is assigned.
            act: A name of activation function. e.g., `sigmoid`, `tanh`, etc.
            reuse: `True` or `None`; if `True`, we go into reuse mode for this `layer` scope
              as well as all sub-scopes; if `None`, we just inherit the parent scope reuse.
            regularizer:  A string. None, 'l1' or 'l2'. The default is None
            summary: If True, summaries are added. The default is True.
        """

        from . import sg_initializer as init
        from . import sg_activation

        # kwargs parsing
        opt = tf.sg_opt(kwargs) + sg_get_context()

        # set default argument
        try:
            shape = tensor.get_shape().as_list()
            # batch normalization off, layer normalization off, dropout off
            opt += tf.sg_opt(shape=shape,
                             in_dim=shape[-1],
                             dim=shape[-1],
                             bn=False,
                             ln=False,
                             dout=0,
                             summary=True,
                             scale=True)
            if opt.regularizer == 'l1':
                opt.regularizer = lambda x: tf.reduce_mean(tf.abs(x))
            elif opt.regularizer == 'l2':
                opt.regularizer = lambda x: tf.square(
                    tf.reduce_mean(tf.square(x)))
            else:
                opt.regularizer = None

            assert not (
                opt.bn and opt.ln
            ), 'one of batch normalization and layer normalization is available.'

            # disable bias when normalization on
            opt += tf.sg_opt(bias=not (opt.bn or opt.ln))
        finally:
            pass

        # automatic layer naming
        if opt.name is None:

            # layer function name will be used as layer name
            opt.name = func.__name__.replace('sg_', '')

            # find existing layer names
            exist_layers = []
            for t in tf.global_variables():
                scope_name = tf.get_variable_scope().name
                prefix = scope_name + '/' if len(scope_name) > 0 else ''
                i = t.name.rfind(prefix + opt.name)
                if i >= 0:
                    exist_layers.append(t.name[i:].split('/')[-2])
            exist_layers = list(set(exist_layers))

            # layer name numbering
            if len(exist_layers) == 0:
                opt.name += '_1'
            else:
                opt.name += '_%d' % (
                    max([int(n.split('_')[-1]) for n in exist_layers]) + 1)

        with tf.variable_scope(opt.name, reuse=opt.reuse) as scope:

            # call layer function
            out = func(tensor, opt)
            out_shape = out.get_shape()

            # apply batch normalization
            if opt.bn:
                beta = init.constant('beta', opt.dim, summary=opt.summary)
                gamma = init.constant('gamma',
                                      opt.dim,
                                      value=1,
                                      summary=opt.summary,
                                      trainable=opt.scale)

                # offset, scale parameter ( for inference )
                mean_running = init.constant('mean',
                                             opt.dim,
                                             trainable=False,
                                             summary=opt.summary)
                variance_running = init.constant('variance',
                                                 opt.dim,
                                                 value=1,
                                                 trainable=False,
                                                 summary=opt.summary)

                # use fused batch norm if ndims in [2, 3, 4]
                if out_shape.ndims in [2, 3, 4]:
                    # add HW dims if necessary, fused_batch_norm requires shape to be NHWC
                    if out_shape.ndims == 2:
                        out = tf.expand_dims(out, axis=1)
                        out = tf.expand_dims(out, axis=2)
                    elif out_shape.ndims == 3:
                        out = tf.expand_dims(out, axis=2)

                    fused_eps = tf.sg_eps if tf.sg_eps > 1e-5 else 1e-5
                    out, mean, variance = tf.cond(
                        _phase,
                        lambda: tf.nn.fused_batch_norm(
                            out, gamma, beta, epsilon=fused_eps),
                        lambda: tf.nn.fused_batch_norm(out,
                                                       gamma,
                                                       beta,
                                                       mean=mean_running,
                                                       variance=
                                                       variance_running,
                                                       epsilon=fused_eps,
                                                       is_training=False),
                    )

                    # restore original shape if HW dims was added
                    if out_shape.ndims == 2:
                        out = tf.squeeze(out, axis=[1, 2])
                    elif out_shape.ndims == 3:
                        out = tf.squeeze(out, axis=2)

                # fallback to naive batch norm
                else:
                    mean, variance = tf.nn.moments(
                        out, axes=list(range(len(out.get_shape()) - 1)))
                    out = tf.cond(
                        _phase, lambda: tf.nn.batch_normalization(
                            out, mean, variance, beta, gamma, tf.sg_eps),
                        lambda: tf.nn.batch_normalization(
                            out, mean_running, variance_running, beta, gamma,
                            tf.sg_eps))

                decay = 0.99
                tf.add_to_collection(
                    tf.GraphKeys.UPDATE_OPS,
                    mean_running.assign(mean_running * decay + mean *
                                        (1 - decay)))
                tf.add_to_collection(
                    tf.GraphKeys.UPDATE_OPS,
                    variance_running.assign(variance_running * decay +
                                            variance * (1 - decay)))

            # apply layer normalization
            if opt.ln:
                # offset, scale parameter
                beta = init.constant('beta', opt.dim, summary=opt.summary)
                if opt.scale:
                    gamma = init.constant('gamma',
                                          opt.dim,
                                          value=1,
                                          summary=opt.summary)

                # calc layer mean, variance for final axis
                mean, variance = tf.nn.moments(out,
                                               axes=[len(out.get_shape()) - 1],
                                               keep_dims=True)

                # apply normalization
                out = (out - mean) / tf.sqrt(variance + tf.sg_eps)
                # apply parameter
                if opt.scale:
                    out = gamma * out + beta
                else:
                    out = out + beta

            # apply activation
            if opt.act:
                out = getattr(sg_activation, 'sg_' + opt.act.lower())(out)

            # apply dropout
            if opt.dout:
                out = tf.cond(_phase, lambda: tf.nn.dropout(out, 1 - opt.dout),
                              lambda: out)

            # rename tensor
            out = tf.identity(out, 'out')

            # add final output summary
            if opt.summary:
                tf.sg_summary_activation(out)

            # save node info for reuse
            out._sugar = tf.sg_opt(func=func,
                                   arg=tf.sg_opt(kwargs) + sg_get_context(),
                                   prev=tensor,
                                   is_layer=True,
                                   name=opt.name)
            # inject reuse function
            out.sg_reuse = types.MethodType(sg_reuse, out)

        return out
Ejemplo n.º 9
0
 def make_weights(size):
     weights = tf.range(1, size + 1, dtype=tf.float32)
     weights *= 1. / ((1 + size) * size // 2)
     weights = tf.expand_dims(weights, 0)
     weights = tf.expand_dims(weights, -1)
     return weights