def Net(aa, yt, x): s=aa.shape[1] with tf.sg_context(name='NNReg', stride=1, act='leaky_relu', bn=True, reuse=tf.AUTO_REUSE): yt=tf.expand_dims(yt,2) v1=tf.expand_dims(x,2).sg_conv(dim=16, size=(1,1), name='gen9',pad="SAME",bn=True) v2=v1.sg_conv(dim=64, size=(1,1), name='gen1',pad="SAME",bn=True) v3=v2.sg_conv(dim=128, size=(1,1), name='gen2',pad="SAME",bn=True) v4=v3.sg_conv(dim=256, size=(1,1), name='gen3',pad="SAME",bn=True) v5=v4.sg_conv(dim=512, size=(1,1), name='gen4',pad="SAME",bn=True) v5=tf.tile(tf.expand_dims(tf.reduce_max(v5, axis=1),axis=1),[1,s,1,1]) vv5=v5 v1=yt.sg_conv(dim=16, size=(1,1), name='gen99',pad="SAME",bn=True) v2=v1.sg_conv(dim=64, size=(1,1), name='gen11',pad="SAME",bn=True) v3=v2.sg_conv(dim=128, size=(1,1), name='gen22',pad="SAME",bn=True) v4=v3.sg_conv(dim=256, size=(1,1), name='gen33',pad="SAME",bn=True) v5=v4.sg_conv(dim=512, size=(1,1), name='gen44',pad="SAME",bn=True) v5=tf.tile(tf.expand_dims(tf.reduce_max(v5, axis=1),axis=1),[1,s,1,1]) ff=tf.concat([tf.expand_dims(aa,2),v5], axis=-1) ff=tf.concat([ff,vv5], axis=-1) f1=ff.sg_conv(dim=256, size=(1,1), name='f1',pad="SAME",bn=True) f2=f1.sg_conv(dim=128, size=(1,1), name='f2',pad="SAME",bn=True) f3=f2.sg_conv(dim=2, size=(1,1), name='f3',pad="SAME",bn=False, act="linear") f3=tf.squeeze(f3,axis=2) return f3
def pairwise_dist(xt, y_p): a = xt.shape[1] b = y_p.shape[1] dist = tf.tile(tf.expand_dims(y_p, 1), [1, a, 1, 1]) - tf.tile( tf.expand_dims(xt, 2), [1, 1, b, 1]) dist = (dist[:, :, :, 0]**2 + dist[:, :, :, 1]**2) return dist
def symbols_to_logits_fn(ids, dec_state): dec = [] dec_c, dec_h = [], [] # (batch x beam_size x decoded_seq) ids = tf.reshape(ids, [Hp.batch_size, beam_size, -1]) print("dec_state ", dec_state[0].get_shape().as_list()) for ind in range(beam_size): with tf.variable_scope('dec_lstm', reuse=ind > 0 or reuse_vars): w_input = ids[:, ind, -1].sg_lookup(emb=emb_word) dec_state0 = tf.contrib.rnn.LSTMStateTuple( c=dec_state.c[:, ind, :], h=dec_state.h[:, ind, :]) dec_out, dec_state_i = dec_cell(w_input, dec_state0) dec_out = tf.expand_dims(dec_out, 1) dec_i = dec_out.sg_conv1d_gpus(size=1, dim=Hp.word_vs, name="out_conv", act="linear", dev=dev, reuse=ind > 0 or reuse_vars) dec.append(tf.squeeze(dec_i, 1)) dec_c.append(dec_state_i[0]) dec_h.append(dec_state_i[1]) return tf.stack(dec, 1), tf.contrib.rnn.LSTMStateTuple( tf.stack(dec_c, 1), tf.stack(dec_h, 1))
def sg_expand_dims(tensor, opt): r"""Inserts a new dimension. See tf.expand_dims() in tensorflow. Args: tensor: A `Tensor` (automatically given by chain). opt: dim : Dimension to expand. Default is -1. name: If provided, it replaces current tensor's name. Returns: A `Tensor`. """ opt += tf.sg_opt(dim=-1) return tf.expand_dims(tensor, opt.dim, name=opt.name)
def tower_infer_dec(chars, scope, rnn_cell, dec_cell, word_emb, rnn_state, out_reuse_vars=False, dev='/cpu:0'): with tf.device(dev): with tf.variable_scope('embatch_size', reuse=True): # (vocab_size, latent_dim) emb_char = tf.sg_emb(name='emb_char', voca_size=Hp.char_vs, dim=Hp.hd, dev=dev) emb_word = tf.sg_emb(name='emb_word', emb=word_emb, voca_size=Hp.word_vs, dim=300, dev=dev) print(chars) ch = chars ch = tf.reverse_sequence(input=ch, seq_lengths=[Hp.c_maxlen] * Hp.batch_size, seq_dim=1) reuse_vars = reuse_vars_enc = True # -------------------------- BYTENET ENCODER -------------------------- with tf.variable_scope('encoder'): # embed table lookup enc = ch.sg_lookup(emb=emb_char) #(batch, sentlen, latentdim) # loop dilated conv block for i in range(Hp.num_blocks): enc = (enc.sg_res_block(size=5, rate=1, name="enc1_%d" % (i), is_first=True, reuse_vars=reuse_vars, dev=dev).sg_res_block( size=5, rate=2, name="enc2_%d" % (i), reuse_vars=reuse_vars, dev=dev).sg_res_block( size=5, rate=4, name="enc4_%d" % (i), reuse_vars=reuse_vars, dev=dev).sg_res_block( size=5, rate=8, name="enc8_%d" % (i), reuse_vars=reuse_vars, dev=dev).sg_res_block( size=5, rate=16, name="enc16_%d" % (i), reuse_vars=reuse_vars, dev=dev)) byte_enc = enc # -------------------------- QCNN + QPOOL ENCODER #1 -------------------------- with tf.variable_scope('quazi'): #quasi cnn layer ZFO [batch * 3, seqlen, dim2 ] conv = byte_enc.sg_quasi_conv1d(is_enc=True, size=4, name="qconv_1", dev=dev, reuse_vars=reuse_vars) # c = f * c + (1 - f) * z, h = o*c [batch * 4, seqlen, hd] pool0 = conv.sg_quasi_rnn(is_enc=False, att=False, name="qrnn_1", reuse_vars=reuse_vars, dev=dev) qpool_last = pool0[:, -1, :] # -------------------------- MAXPOOL along time dimension -------------------------- inpt_maxpl = tf.expand_dims(byte_enc, 1) # [batch, 1, seqlen, channels] maxpool = tf.nn.max_pool(inpt_maxpl, [1, 1, Hp.c_maxlen, 1], [1, 1, 1, 1], 'VALID') maxpool = tf.squeeze(maxpool, [1, 2]) # -------------------------- HIGHWAY -------------------------- concat = qpool_last + maxpool with tf.variable_scope('highway', reuse=reuse_vars): input_lstm = highway(concat, concat.get_shape()[-1], num_layers=1) # -------------------------- CONTEXT LSTM -------------------------- input_lstm = tf.nn.dropout(input_lstm, Hp.keep_prob) with tf.variable_scope('contx_lstm', reuse=reuse_vars): output, rnn_state = rnn_cell(input_lstm, rnn_state) beam_size = 8 reuse_vars = out_reuse_vars greedy = False if greedy: dec_state = rnn_state dec_out = [] d_out = tf.constant([1] * Hp.batch_size) for idx in range(Hp.w_maxlen): w_input = d_out.sg_lookup(emb=emb_word) dec_state = tf.contrib.rnn.LSTMStateTuple(c=dec_state.c, h=dec_state.h) with tf.variable_scope('dec_lstm', reuse=idx > 0 or reuse_vars): d_out, dec_state = dec_cell(w_input, dec_state) dec_out.append(d_out) d_out = tf.expand_dims(d_out, 1).sg_conv1d_gpus(size=1, dim=Hp.word_vs, name="out_conv", act="linear", dev=dev, reuse=idx > 0 or reuse_vars) d_out = tf.squeeze(d_out).sg_argmax() dec_out = tf.stack(dec_out, 1) dec = dec_out.sg_conv1d_gpus(size=1, dim=Hp.word_vs, name="out_conv", act="linear", dev=dev, reuse=True) return dec.sg_argmax(), rnn_state else: # ------------------ BEAM SEARCH -------------------- dec_state = tf.contrib.rnn.LSTMStateTuple( tf.tile(tf.expand_dims(rnn_state[0], 1), [1, beam_size, 1]), tf.tile(tf.expand_dims(rnn_state[1], 1), [1, beam_size, 1])) initial_ids = tf.constant([1] * Hp.batch_size) def symbols_to_logits_fn(ids, dec_state): dec = [] dec_c, dec_h = [], [] # (batch x beam_size x decoded_seq) ids = tf.reshape(ids, [Hp.batch_size, beam_size, -1]) print("dec_state ", dec_state[0].get_shape().as_list()) for ind in range(beam_size): with tf.variable_scope('dec_lstm', reuse=ind > 0 or reuse_vars): w_input = ids[:, ind, -1].sg_lookup(emb=emb_word) dec_state0 = tf.contrib.rnn.LSTMStateTuple( c=dec_state.c[:, ind, :], h=dec_state.h[:, ind, :]) dec_out, dec_state_i = dec_cell(w_input, dec_state0) dec_out = tf.expand_dims(dec_out, 1) dec_i = dec_out.sg_conv1d_gpus(size=1, dim=Hp.word_vs, name="out_conv", act="linear", dev=dev, reuse=ind > 0 or reuse_vars) dec.append(tf.squeeze(dec_i, 1)) dec_c.append(dec_state_i[0]) dec_h.append(dec_state_i[1]) return tf.stack(dec, 1), tf.contrib.rnn.LSTMStateTuple( tf.stack(dec_c, 1), tf.stack(dec_h, 1)) final_ids, final_probs = beam_search.beam_search(symbols_to_logits_fn, dec_state, initial_ids, beam_size, Hp.w_maxlen - 1, Hp.word_vs, 3.5, eos_id=2) return final_ids[:, 0, :], rnn_state
def rnn_body_stat(time, rnn_state): ch = chars_sent.read(time) ch = tf.reverse_sequence(input=ch, seq_lengths=[Hp.c_maxlen] * Hp.batch_size, seq_dim=1) reuse_vars = out_reuse_vars # -------------------------- BYTENET ENCODER -------------------------- with tf.variable_scope('encoder'): # embed table lookup enc = ch.sg_lookup(emb=emb_char) #(batch, sentlen, latentdim) # loop dilated conv block for i in range(Hp.num_blocks): enc = (enc.sg_res_block(size=5, rate=1, name="enc1_%d" % (i), is_first=True, reuse_vars=reuse_vars, dev=dev).sg_res_block( size=5, rate=2, name="enc2_%d" % (i), reuse_vars=reuse_vars, dev=dev).sg_res_block( size=5, rate=4, name="enc4_%d" % (i), reuse_vars=reuse_vars, dev=dev).sg_res_block( size=5, rate=8, name="enc8_%d" % (i), reuse_vars=reuse_vars, dev=dev).sg_res_block( size=5, rate=16, name="enc16_%d" % (i), reuse_vars=reuse_vars, dev=dev)) byte_enc = enc # -------------------------- QCNN + QPOOL ENCODER #1 -------------------------- with tf.variable_scope('quazi'): #quasi cnn layer ZFO [batch * 3, seqlen, dim2 ] conv = byte_enc.sg_quasi_conv1d(is_enc=True, size=4, name="qconv_1", dev=dev, reuse_vars=reuse_vars) # c = f * c + (1 - f) * z, h = o*c [batch * 4, seqlen, hd] pool0 = conv.sg_quasi_rnn(is_enc=False, att=False, name="qrnn_1", reuse_vars=reuse_vars, dev=dev) qpool_last = pool0[:, -1, :] # -------------------------- MAXPOOL along time dimension -------------------------- inpt_maxpl = tf.expand_dims(byte_enc, 1) # [batch, 1, seqlen, channels] maxpool = tf.nn.max_pool(inpt_maxpl, [1, 1, Hp.c_maxlen, 1], [1, 1, 1, 1], 'VALID') maxpool = tf.squeeze(maxpool, [1, 2]) # -------------------------- HIGHWAY -------------------------- concat = qpool_last + maxpool with tf.variable_scope('highway', reuse=reuse_vars): input_lstm = highway(concat, concat.get_shape()[-1], num_layers=1) # -------------------------- CONTEXT LSTM -------------------------- input_lstm = tf.nn.dropout(input_lstm, Hp.keep_prob) with tf.variable_scope('contx_lstm', reuse=reuse_vars): output, rnn_state = rnn_cell(input_lstm, rnn_state) return (time + 1, rnn_state)
def sg_expand_dims(tensor, opt): opt += tf.sg_opt(dim=-1) return tf.expand_dims(tensor, opt.dim, name=opt.name)
def wrapper(tensor, **kwargs): r"""Manages arguments of `tf.sg_opt`. Args: tensor: A `tensor` (automatically passed by decorator). kwargs: shape: A list of integers. The shape of `tensor`. Inferred if not specified. in_dim: An integer. The size of input dimension, which is set to the last one by default. dim: An integer. The size of output dimension. Has the same value as in_dim by default. bn: Boolean. If True, batch normalization is applied. ln: Boolean. If True, layer normalization is applied. scale: If true, multiple by a trainable gamma variable. When the activation is linear (relu included), this can be disabled because it can be implicitly learned by the next layer. The default is True. dout: A float of range [0, 100). A dropout rate. Set to 0 by default. bias: Boolean. If True, biases are added. As a default, it is set to True name: A name for the layer. As a default, the function name is assigned. act: A name of activation function. e.g., `sigmoid`, `tanh`, etc. reuse: `True` or `None`; if `True`, we go into reuse mode for this `layer` scope as well as all sub-scopes; if `None`, we just inherit the parent scope reuse. regularizer: A string. None, 'l1' or 'l2'. The default is None summary: If True, summaries are added. The default is True. """ from . import sg_initializer as init from . import sg_activation # kwargs parsing opt = tf.sg_opt(kwargs) + sg_get_context() # set default argument try: shape = tensor.get_shape().as_list() # batch normalization off, layer normalization off, dropout off opt += tf.sg_opt(shape=shape, in_dim=shape[-1], dim=shape[-1], bn=False, ln=False, dout=0, summary=True, scale=True) if opt.regularizer == 'l1': opt.regularizer = lambda x: tf.reduce_mean(tf.abs(x)) elif opt.regularizer == 'l2': opt.regularizer = lambda x: tf.square( tf.reduce_mean(tf.square(x))) else: opt.regularizer = None assert not ( opt.bn and opt.ln ), 'one of batch normalization and layer normalization is available.' # disable bias when normalization on opt += tf.sg_opt(bias=not (opt.bn or opt.ln)) finally: pass # automatic layer naming if opt.name is None: # layer function name will be used as layer name opt.name = func.__name__.replace('sg_', '') # find existing layer names exist_layers = [] for t in tf.global_variables(): scope_name = tf.get_variable_scope().name prefix = scope_name + '/' if len(scope_name) > 0 else '' i = t.name.rfind(prefix + opt.name) if i >= 0: exist_layers.append(t.name[i:].split('/')[-2]) exist_layers = list(set(exist_layers)) # layer name numbering if len(exist_layers) == 0: opt.name += '_1' else: opt.name += '_%d' % ( max([int(n.split('_')[-1]) for n in exist_layers]) + 1) with tf.variable_scope(opt.name, reuse=opt.reuse) as scope: # call layer function out = func(tensor, opt) out_shape = out.get_shape() # apply batch normalization if opt.bn: beta = init.constant('beta', opt.dim, summary=opt.summary) gamma = init.constant('gamma', opt.dim, value=1, summary=opt.summary, trainable=opt.scale) # offset, scale parameter ( for inference ) mean_running = init.constant('mean', opt.dim, trainable=False, summary=opt.summary) variance_running = init.constant('variance', opt.dim, value=1, trainable=False, summary=opt.summary) # use fused batch norm if ndims in [2, 3, 4] if out_shape.ndims in [2, 3, 4]: # add HW dims if necessary, fused_batch_norm requires shape to be NHWC if out_shape.ndims == 2: out = tf.expand_dims(out, axis=1) out = tf.expand_dims(out, axis=2) elif out_shape.ndims == 3: out = tf.expand_dims(out, axis=2) fused_eps = tf.sg_eps if tf.sg_eps > 1e-5 else 1e-5 out, mean, variance = tf.cond( _phase, lambda: tf.nn.fused_batch_norm( out, gamma, beta, epsilon=fused_eps), lambda: tf.nn.fused_batch_norm(out, gamma, beta, mean=mean_running, variance= variance_running, epsilon=fused_eps, is_training=False), ) # restore original shape if HW dims was added if out_shape.ndims == 2: out = tf.squeeze(out, axis=[1, 2]) elif out_shape.ndims == 3: out = tf.squeeze(out, axis=2) # fallback to naive batch norm else: mean, variance = tf.nn.moments( out, axes=list(range(len(out.get_shape()) - 1))) out = tf.cond( _phase, lambda: tf.nn.batch_normalization( out, mean, variance, beta, gamma, tf.sg_eps), lambda: tf.nn.batch_normalization( out, mean_running, variance_running, beta, gamma, tf.sg_eps)) decay = 0.99 tf.add_to_collection( tf.GraphKeys.UPDATE_OPS, mean_running.assign(mean_running * decay + mean * (1 - decay))) tf.add_to_collection( tf.GraphKeys.UPDATE_OPS, variance_running.assign(variance_running * decay + variance * (1 - decay))) # apply layer normalization if opt.ln: # offset, scale parameter beta = init.constant('beta', opt.dim, summary=opt.summary) if opt.scale: gamma = init.constant('gamma', opt.dim, value=1, summary=opt.summary) # calc layer mean, variance for final axis mean, variance = tf.nn.moments(out, axes=[len(out.get_shape()) - 1], keep_dims=True) # apply normalization out = (out - mean) / tf.sqrt(variance + tf.sg_eps) # apply parameter if opt.scale: out = gamma * out + beta else: out = out + beta # apply activation if opt.act: out = getattr(sg_activation, 'sg_' + opt.act.lower())(out) # apply dropout if opt.dout: out = tf.cond(_phase, lambda: tf.nn.dropout(out, 1 - opt.dout), lambda: out) # rename tensor out = tf.identity(out, 'out') # add final output summary if opt.summary: tf.sg_summary_activation(out) # save node info for reuse out._sugar = tf.sg_opt(func=func, arg=tf.sg_opt(kwargs) + sg_get_context(), prev=tensor, is_layer=True, name=opt.name) # inject reuse function out.sg_reuse = types.MethodType(sg_reuse, out) return out
def make_weights(size): weights = tf.range(1, size + 1, dtype=tf.float32) weights *= 1. / ((1 + size) * size // 2) weights = tf.expand_dims(weights, 0) weights = tf.expand_dims(weights, -1) return weights