def chamfer_loss(A,B): r=tf.reduce_sum(A*A,2) r=tf.reshape(r,[int(r.shape[0]),int(r.shape[1]),1]) r2=tf.reduce_sum(B*B,2) r2=tf.reshape(r2,[int(r.shape[0]),int(r.shape[1]),1]) t=(r-2*tf.matmul(A, tf.transpose(B,perm=[0, 2, 1])) + tf.transpose(r2,perm=[0, 2, 1])) return tf.reduce_mean((tf.reduce_min(t, axis=1)+tf.reduce_min(t,axis=2))/2.0)
def sg_rnn(tensor, opt): # parameter initialize w = init.orthogonal('W', (opt.in_dim, opt.dim)) u = init.identity('U', opt.dim) if opt.bias: b = init.constant('b', opt.dim) # layer normalization parameters if opt.ln: # offset, scale parameter beta = init.constant('beta', opt.dim) gamma = init.constant('gamma', opt.dim, value=1) # initial state init_h = opt.init_state if opt.init_state \ else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx) # permute dimension for scan loop xx = tf.transpose(tensor, [1, 0, 2]) # step func def step(h, x): # layer normalization def ln(xx, opt): if opt.ln: # calc layer mean, variance for final axis mean, variance = tf.nn.moments(xx, axes=[len(xx.get_shape()) - 1]) # apply layer normalization ( explicit broadcasting needed ) broadcast_shape = [-1] + [1] * (len(xx.get_shape()) - 1) xx = (xx - tf.reshape(mean, broadcast_shape)) \ / tf.reshape(tf.sqrt(variance + tf.sg_eps), broadcast_shape) # apply parameter return gamma * xx + beta # apply transform y = ln(tf.matmul(x, w) + tf.matmul(h, u) + (b if opt.bias else 0), opt) return y # loop by scan out = tf.scan(step, xx, init_h) # recover dimension out = tf.transpose(out, [1, 0, 2]) # last sequence only if opt.last_only: out = out[:, tensor.get_shape().as_list()[1]-1, :] return out
def linear(input_, output_size, scope=None): ''' Linear map: output[k] = sum_i(Matrix[k, i] * args[i] ) + Bias[k] Args: args: a tensor or a list of 2D, batch x n, Tensors. output_size: int, second dimension of W[i]. scope: VariableScope for the created subgraph; defaults to "Linear". Returns: A 2D Tensor with shape [batch x output_size] equal to sum_i(args[i] * W[i]), where W[i]s are newly created matrices. Raises: ValueError: if some of the arguments has unspecified or wrong shape. ''' shape = input_.get_shape().as_list() if len(shape) != 2: raise ValueError("Linear is expecting 2D arguments: %s" % str(shape)) if not shape[1]: raise ValueError("Linear expects shape[1] of arguments: %s" % str(shape)) input_size = shape[1] # Now the computation. with tf.variable_scope(scope or "SimpleLinear"): matrix = tf.get_variable("Matrix", [output_size, input_size], dtype=input_.dtype) bias_term = tf.get_variable("Bias", [output_size], dtype=input_.dtype) return tf.matmul(input_, tf.transpose(matrix)) + bias_term
def get_decoded_seq(logits, seq_lens, targets): logits = tf.transpose(logits, [1, 0, 2]) decoded_sequence, _ = tf.nn.ctc_beam_search_decoder(logits, \ seq_lens) # TODO: what is the exact dimension that's returned? decoded_sequence = decoded_sequence[0] return decoded_sequence
def sg_transpose(tensor, opt): r"""Permutes the dimensions according to `opt.perm`. See `tf.transpose()` in tensorflow. Args: tensor: A `Tensor` (automatically given by chain). opt: perm: A permutation of the dimensions of `tensor`. The target shape. name: If provided, replace current tensor's name. Returns: A `Tensor`. """ assert opt.perm is not None, 'perm is mandatory' return tf.transpose(tensor, opt.perm, name=opt.name)
def tower_infer_enc(chars, scope, rnn_cell, dec_cell, word_emb, out_reuse_vars=False, dev='/cpu:0'): out_rvars = out_reuse_vars # make embedding matrix for source and target with tf.device(dev): with tf.variable_scope('embatch_size', reuse=out_reuse_vars): # (vocab_size, latent_dim) emb_char = tf.sg_emb(name='emb_char', voca_size=Hp.char_vs, dim=Hp.hd, dev=dev) emb_word = tf.sg_emb(name='emb_word', emb=word_emb, voca_size=Hp.word_vs, dim=300, dev=dev) chars = tf.cast(chars, tf.int32) time = tf.constant(0) inputs = tf.transpose(chars, perm=[1, 0, 2]) input_ta = tensor_array_ops.TensorArray(tf.int32, size=tf.shape(chars)[1], dynamic_size=True, clear_after_read=True) chars_sent = input_ta.unstack(inputs) #each element is (batch, sentlen) resp_steps = tf.shape(chars)[1] # number of sentences in paragraph statm_steps = resp_steps // 2 rnn_state = rnn_cell.zero_state( Hp.batch_size, tf.float32) #rnn_cell.rnn_state, rnn_cell.rnn_h maxdecode = 3 # -------------------------------------------- STATEMENT ENCODING ----------------------------------------------- def rnn_cond_stat(time, rnn_state): return tf.less(time, statm_steps - 1) def rnn_body_stat(time, rnn_state): ch = chars_sent.read(time) ch = tf.reverse_sequence(input=ch, seq_lengths=[Hp.c_maxlen] * Hp.batch_size, seq_dim=1) reuse_vars = out_reuse_vars # -------------------------- BYTENET ENCODER -------------------------- with tf.variable_scope('encoder'): # embed table lookup enc = ch.sg_lookup(emb=emb_char) #(batch, sentlen, latentdim) # loop dilated conv block for i in range(Hp.num_blocks): enc = (enc.sg_res_block(size=5, rate=1, name="enc1_%d" % (i), is_first=True, reuse_vars=reuse_vars, dev=dev).sg_res_block( size=5, rate=2, name="enc2_%d" % (i), reuse_vars=reuse_vars, dev=dev).sg_res_block( size=5, rate=4, name="enc4_%d" % (i), reuse_vars=reuse_vars, dev=dev).sg_res_block( size=5, rate=8, name="enc8_%d" % (i), reuse_vars=reuse_vars, dev=dev).sg_res_block( size=5, rate=16, name="enc16_%d" % (i), reuse_vars=reuse_vars, dev=dev)) byte_enc = enc # -------------------------- QCNN + QPOOL ENCODER #1 -------------------------- with tf.variable_scope('quazi'): #quasi cnn layer ZFO [batch * 3, seqlen, dim2 ] conv = byte_enc.sg_quasi_conv1d(is_enc=True, size=4, name="qconv_1", dev=dev, reuse_vars=reuse_vars) # c = f * c + (1 - f) * z, h = o*c [batch * 4, seqlen, hd] pool0 = conv.sg_quasi_rnn(is_enc=False, att=False, name="qrnn_1", reuse_vars=reuse_vars, dev=dev) qpool_last = pool0[:, -1, :] # -------------------------- MAXPOOL along time dimension -------------------------- inpt_maxpl = tf.expand_dims(byte_enc, 1) # [batch, 1, seqlen, channels] maxpool = tf.nn.max_pool(inpt_maxpl, [1, 1, Hp.c_maxlen, 1], [1, 1, 1, 1], 'VALID') maxpool = tf.squeeze(maxpool, [1, 2]) # -------------------------- HIGHWAY -------------------------- concat = qpool_last + maxpool with tf.variable_scope('highway', reuse=reuse_vars): input_lstm = highway(concat, concat.get_shape()[-1], num_layers=1) # -------------------------- CONTEXT LSTM -------------------------- input_lstm = tf.nn.dropout(input_lstm, Hp.keep_prob) with tf.variable_scope('contx_lstm', reuse=reuse_vars): output, rnn_state = rnn_cell(input_lstm, rnn_state) return (time + 1, rnn_state) loop_vars_stat = [time, rnn_state] time, rnn_state = tf.while_loop\ (rnn_cond_stat, rnn_body_stat, loop_vars_stat, swap_memory=False) return rnn_state
def sg_transpose(tensor, opt): assert opt.perm is not None, 'perm is mandatory' return tf.transpose(tensor, opt.perm, name=opt.name)
def sg_gru(tensor, opt): # parameter initialize w_z = init.orthogonal('W_z', (opt.in_dim, opt.dim)) u_z = init.identity('U_z', opt.dim) w_r = init.orthogonal('W_r', (opt.in_dim, opt.dim)) u_r = init.identity('U_r', opt.dim) w_h = init.orthogonal('W_h', (opt.in_dim, opt.dim)) u_h = init.identity('U_h', opt.dim) if opt.bias: b_z = init.constant('b_z', opt.dim) b_r = init.constant('b_r', opt.dim) b_h = init.constant('b_h', opt.dim) # layer normalization parameters if opt.ln: # offset, scale parameter beta = init.constant('beta', opt.dim) gamma = init.constant('gamma', opt.dim, value=1) # initial state init_h = opt.init_state if opt.init_state \ else tf.zeros((tensor.get_shape().as_list()[0], opt.dim), dtype=tf.sg_floatx) # permute dimension for scan loop xx = tf.transpose(tensor, [1, 0, 2]) # step func def step(h, x): # layer normalization def ln(xx, opt): if opt.ln: # calc layer mean, variance for final axis mean, variance = tf.nn.moments(xx, axes=[len(xx.get_shape()) - 1]) # apply layer normalization ( explicit broadcasting needed ) broadcast_shape = [-1] + [1] * (len(xx.get_shape()) - 1) xx = (xx - tf.reshape(mean, broadcast_shape)) \ / tf.reshape(tf.sqrt(variance + tf.sg_eps), broadcast_shape) # apply parameter return gamma * xx + beta # update gate z = tf.sigmoid(ln(tf.matmul(x, w_z) + tf.matmul(h, u_z) + (b_z if opt.bias else 0), opt)) # reset gate r = tf.sigmoid(ln(tf.matmul(x, w_r) + tf.matmul(h, u_r) + (b_r if opt.bias else 0), opt)) # h_hat hh = tf.sigmoid(ln(tf.matmul(x, w_h) + tf.matmul(r*h, u_h) + (b_h if opt.bias else 0), opt)) # final output y = (1. - z) * h + z * hh return y # loop by scan out = tf.scan(step, xx, init_h) # recover dimension out = tf.transpose(out, [1, 0, 2]) # last sequence only if opt.last_only: out = out[:, tensor.get_shape().as_list()[1]-1, :] return out
act='leaky_relu', bn=False) d_p4 = ops.upconv_and_scale(d_p3, dim=1, size=size, stride=stride, act='linear', bn=False) disc = d_p4 # # pull-away term ( PT ) regularizer # sample = gen.sg_flatten() nom = tf.matmul(sample, tf.transpose(sample, perm=[1, 0])) denom = tf.reduce_sum(tf.square(sample), reduction_indices=[1], keep_dims=True) pt = tf.square(nom / denom) pt -= tf.diag(tf.diag_part(pt)) pt = tf.reduce_sum(pt) / (batch_size * (batch_size - 1)) # # loss & train ops # # mean squared errors mse = tf.reduce_mean(tf.square(disc - xx), reduction_indices=[1, 2, 3]) mse_real, mse_fake = mse[:batch_size], mse[batch_size:] loss_disc = mse_real + tf.maximum(margin - mse_fake, 0) # discriminator loss loss_gen = mse_fake + pt * pt_weight # generator loss + PT regularizer
with tf.name_scope('outputs'): outputs = tf.reshape(outputs, [-1, num_hidden]) with tf.name_scope('weights'): W = tf.Variable(tf.truncated_normal([num_hidden, num_classes], stddev=0.1), name='weights') with tf.name_scope('biases'): b = tf.get_variable("b", initializer=tf.constant(0., shape=[num_classes])) with tf.name_scope('logits'): logits = tf.matmul(outputs, W) + b logits = tf.reshape(logits, [batch_s, -1, num_classes]) logits = tf.transpose(logits, (1, 0, 2), name="out/logits") with tf.name_scope('loss'): loss = tf.nn.ctc_loss(targets, logits, seq_len, ctc_merge_repeated=True, preprocess_collapse_repeated=True) with tf.name_scope('cost'): cost = tf.reduce_mean(loss) tf.summary.scalar("cost", cost) with tf.name_scope('optimizer'): optimizer = tf.train.RMSPropOptimizer(learning_rate, decay=decay, momentum=momentum, centered=True) gvs = optimizer.compute_gradients(cost)
def LSTM(self, name, _X): # import pdb; pdb.set_trace() ''' shape: (batchsize, nsteps, len_vec) ''' _X = tf.transpose(_X, [1, 0, 2]) # ''' shape: (nsteps, batchsize, len_vec) ''' # _X = tf.reshape(_X, [self.nsteps * self.batchsize, self.len_vec]) # ''' shape: n_steps * (batchsize, len_vec) ''' # _X = tf.split(_X, num_or_size_splits=self.nsteps, axis=0) latent_dimensions = self.len_vec num_blocks = 1 def res_block(tensor, size, rate, dim=latent_dimensions): # filter convolution conv_filter = tensor.sg_aconv1d(size=size, rate=rate, act='tanh', bn=True) # gate convolution conv_gate = tensor.sg_aconv1d(size=size, rate=rate, act='sigmoid', bn=True) # output by gate multiplying out = conv_filter * conv_gate # final output out = out.sg_conv1d(size=1, dim=dim, act='tanh', bn=True) # residual and skip output return out + tensor, out # expand dimension z = _X.sg_conv1d(size=1, dim=latent_dimensions, act='tanh', bn=True) # dilated conv block loop skip = 0 # skip connections for i in range(num_blocks): for r in [1, 2]: z, s = res_block(z, size=3, rate=r) skip += s # final logit layers logit = (skip.sg_conv1d(size=1, act='tanh', bn=True).sg_conv1d(size=1, dim=5) ) #5 => 4 coords + confidence # import pdb; pdb.set_trace() # dense_coords_conf = self.dnn_layers(pred[-1], (self.len_vec, 256, 32, self.len_coord+1), activation=tf.sigmoid) # # batch_pred_feats = pred[0][:, 0:self.len_feat] # batch_pred_coords = dense_coords_conf[:, 0:4] # batch_pred_confs = dense_coords_conf[:,4] # import pdb; pdb.set_trace() batch_pred_coords = logit[-1][:, 0:4] batch_pred_confs = logit[-1][:, 4] return None, batch_pred_coords, batch_pred_confs, None
def tower_loss_manyparams(xx, scope, reu_vars=False): # make embedding matrix for source and target reu_vars = reu_vars with tf.variable_scope('embatch_size', reuse=reu_vars): # (vocab_size, latent_dim) emb_x = tf.sg_emb(name='emb_x', voca_size=Hp.vs, dim=Hp.hd, dev=self._dev) emb_y = tf.sg_emb(name='emb_y', voca_size=Hp.vs, dim=Hp.hd, dev=self._dev) xx = tf.cast(xx, tf.int32) time = tf.constant(0) losses_int = tf.constant(0.0) inputs = tf.transpose(xx, perm=[1, 0, 2]) input_ta = tensor_array_ops.TensorArray(tf.int32, size=1, dynamic_size=True, clear_after_read=False) x_sent = input_ta.unstack(inputs) #each element is (batch, sentlen) n_steps = tf.shape(xx)[1] # number of sentences in paragraph # generate first an unconditioned sentence n_input = Hp.hd subrec1_init = subrec_zero_state(Hp.batch_size, Hp.hd) subrec2_init = subrec_zero_state(Hp.batch_size, Hp.hd) with tf.variable_scope("mem", reuse=reu_vars) as scp: rnn_cell = LSTMCell(in_dim=h, dim=Hp.hd) crnn_cell = ConvLSTMCell(seqlen=Hp.maxlen, in_dim=n_input // 2, dim=Hp.hd // 2) (rnn_state_init, rnn_h_init) = rnn_cell.zero_state(Hp.batch_size) # (batch, sentlen, latentdim/2) (crnn_state_init, crnn_h_init) = crnn_cell.zero_state(Hp.batch_size) def rnn_cond(time, subrec1, subrec2, rnn_state, rnn_h, crnn_state, crnn_h, losses): return tf.less(time, n_steps - 1) def rnn_body(time, subrec1, subrec2, rnn_state, rnn_h, crnn_state, crnn_h, losses): x = x_sent.read(time) y = x_sent.read(time + 1) # (batch, sentlen) = (16, 200) # shift target by one step for training source y_src = tf.concat([tf.zeros((Hp.batch_size, 1), tf.int32), y[:, :-1]], 1) reuse_vars = time == tf.constant(0) or reu_vars # -------------------------- BYTENET ENCODER -------------------------- # embed table lookup enc = x.sg_lookup(emb=emb_x) #(batch, sentlen, latentdim) # loop dilated conv block for i in range(num_blocks): enc = (enc.sg_res_block( size=5, rate=1, name="enc1_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=5, rate=2, name="enc2_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=5, rate=4, name="enc4_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=5, rate=8, name="enc8_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=5, rate=16, name="enc16_%d" % (i), reuse_vars=reuse_vars)) # -------------------------- QCNN + QPOOL ENCODER with attention #1 -------------------------- #quasi cnn layer ZFO [batch * 3, t, dim2 ] conv = enc.sg_quasi_conv1d(is_enc=True, size=3, name="qconv_1", reuse_vars=reuse_vars) #attention layer # recurrent layer # 1 + final encoder hidden state subrec1 = tf.tile((subrec1.sg_expand_dims(axis=1)), [1, Hp.maxlen, 1]) concat = conv.sg_concat(target=subrec1, axis=0) # (batch*4, sentlen, latentdim) pool = concat.sg_quasi_rnn(is_enc=True, att=True, name="qrnn_1", reuse_vars=reuse_vars) subrec1 = pool[:Hp.batch_size, -1, :] # last character in sequence # -------------------------- QCNN + QPOOL ENCODER with attention #2 -------------------------- # quazi cnn ZFO (batch*3, sentlen, latentdim) conv = pool.sg_quasi_conv1d(is_enc=True, size=2, name="qconv_2", reuse_vars=reuse_vars) # (batch, sentlen-duplicated, latentdim) subrec2 = tf.tile((subrec2.sg_expand_dims(axis=1)), [1, Hp.maxlen, 1]) # (batch*4, sentlen, latentdim) concat = conv.sg_concat(target=subrec2, axis=0) pool = concat.sg_quasi_rnn(is_enc=True, att=True, name="qrnn_2", reuse_vars=reuse_vars) subrec2 = pool[:Hp.batch_size, -1, :] # last character in sequence # -------------------------- ConvLSTM with RESIDUAL connection and MULTIPLICATIVE block -------------------------- #residual block causal = False # for encoder crnn_input = (pool[:Hp.batch_size, :, :].sg_bypass_gpus( name='relu_0', act='relu', bn=(not causal), ln=causal).sg_conv1d_gpus(name="dimred_0", size=1, dev="/cpu:0", reuse=reuse_vars, dim=Hp.hd / 2, act='relu', bn=(not causal), ln=causal)) # conv LSTM with tf.variable_scope("mem/clstm") as scp: (crnn_state, crnn_h) = crnn_cell(crnn_input, (crnn_state, crnn_h), size=5, reuse_vars=reuse_vars) # dimension recover and residual connection rnn_input0 = pool[:Hp.batch_size,:,:] + crnn_h\ .sg_conv1d_gpus(name = "diminc_0",size=1,dev="/cpu:0", dim=Hp.hd,reuse=reuse_vars, act='relu', bn=(not causal), ln=causal) # -------------------------- QCNN + QPOOL ENCODER with attention #3 -------------------------- # pooling for lstm input # quazi cnn ZFO (batch*3, sentlen, latentdim) conv = rnn_input0.sg_quasi_conv1d(is_enc=True, size=2, name="qconv_3", reuse_vars=reuse_vars) pool = conv.sg_quasi_rnn(is_enc=True, att=False, name="qrnn_3", reuse_vars=reuse_vars) rnn_input = pool[:Hp.batch_size, -1, :] # last character in sequence # -------------------------- LSTM with RESIDUAL connection and MULTIPLICATIVE block -------------------------- # recurrent block with tf.variable_scope("mem/lstm") as scp: (rnn_state, rnn_h) = rnn_cell(rnn_input, (rnn_state, rnn_h)) rnn_h2 = tf.tile(((rnn_h + rnn_input).sg_expand_dims(axis=1)), [1, Hp.maxlen, 1]) # -------------------------- BYTENET DECODER -------------------------- # CNN decoder dec = y_src.sg_lookup(emb=emb_y).sg_concat(target=rnn_h2, name="dec") for i in range(num_blocks): dec = (dec.sg_res_block( size=3, rate=1, causal=True, name="dec1_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=3, rate=2, causal=True, name="dec2_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=3, rate=4, causal=True, name="dec4_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=3, rate=8, causal=True, name="dec8_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=3, rate=16, causal=True, name="dec16_%d" % (i), reuse_vars=reuse_vars)) # final fully convolution layer for softmax dec = dec.sg_conv1d_gpus(size=1, dim=Hp.vs, name="out", summary=False, dev=self._dev, reuse=reuse_vars) ce_array = dec.sg_ce(target=y, mask=True, name="cross_ent_example") cross_entropy_mean = tf.reduce_mean(ce_array, name='cross_entropy') losses = tf.add_n([losses, cross_entropy_mean], name='total_loss') return (time + 1, subrec1, subrec2, rnn_state, rnn_h, crnn_state, crnn_h, losses)