def train(self): # train baseline model input_ph = tf.placeholder(shape=[batch_size, 28, 28, 1], dtype=tf.float32) label_ph = tf.placeholder(shape=[ batch_size, ], dtype=tf.int32) predict = self.forward(input_ph) loss_tensor = tf.reduce_mean(predict.sg_ce(target=label_ph)) # use to update network parameters optim = tf.sg_optim(loss_tensor, optim='Adam', lr=1e-3) # use saver to save a new model saver = tf.train.Saver() sess = tf.Session() with tf.sg_queue_context(sess): # inital tf.sg_init(sess) # validation acc = (predict.sg_reuse( input=Mnist.valid.image).sg_softmax().sg_accuracy( target=Mnist.valid.label, name='validation')) tf.sg_train(loss=loss, eval_metric=[acc], max_ep=max_ep, save_dir=save_dir, ep_size=Mnist.train.num_batch, log_interval=10)
def chamfer_loss(A,B): r=tf.reduce_sum(A*A,2) r=tf.reshape(r,[int(r.shape[0]),int(r.shape[1]),1]) r2=tf.reduce_sum(B*B,2) r2=tf.reshape(r2,[int(r.shape[0]),int(r.shape[1]),1]) t=(r-2*tf.matmul(A, tf.transpose(B,perm=[0, 2, 1])) + tf.transpose(r2,perm=[0, 2, 1])) return tf.reduce_mean((tf.reduce_min(t, axis=1)+tf.reduce_min(t,axis=2))/2.0)
def sg_regularizer_loss(scale=1.0): r""" Get regularizer losss Args: scale: A scalar. A weight applied to regularizer loss """ return scale * tf.reduce_mean(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
def sg_summary_metric(tensor, prefix='20. metric'): # defaults prefix = '' if prefix is None else prefix + '/' # summary name name = prefix + _pretty_name(tensor) # summary statistics with tf.name_scope('summary'): tf.scalar_summary(name + '/avg', tf.reduce_mean(tensor)) tf.histogram_summary(name, tensor)
def sg_summary_metric(tensor, prefix='20. metric'): r"""Writes the average of `tensor` (=metric such as accuracy). """ # defaults prefix = '' if prefix is None else prefix + '/' # summary name name = prefix + _pretty_name(tensor) # summary statistics with tf.name_scope('summary'): tf.scalar_summary(name + '/avg', tf.reduce_mean(tensor)) tf.histogram_summary(name, tensor)
def sg_summary_activation(tensor, prefix='30. activation'): # defaults prefix = '' if prefix is None else prefix + '/' # summary name name = prefix + _pretty_name(tensor) # summary statistics with tf.name_scope('summary'): tf.scalar_summary(name + '/norm', tf.global_norm([tensor])) tf.scalar_summary( name + '/ratio', tf.reduce_mean(tf.cast(tf.greater(tensor, 0), tf.sg_floatx))) tf.histogram_summary(name, tensor)
def sg_mean(tensor, opt): r"""Computes the mean of elements across axis of a tensor. See `tf.reduce_mean()` in tensorflow. Args: tensor: A `Tensor` (automatically given by chain). opt: axis : A tuple/list of integers or an integer. The axis to reduce. keep_dims: If true, retains reduced dimensions with length 1. name: If provided, replace current tensor's name. Returns: A `Tensor`. """ return tf.reduce_mean(tensor, axis=opt.axis, keep_dims=opt.keep_dims, name=opt.name)
def MSE(y, y_pred): # MSE = sg_loss.sg_mse(y,y_pred) # squared error MSE = tf.reduce_mean(tf.squared_difference(y_pred, y.sg_squeeze())) # print MSE # count = 0 # MSE = 0 (num, h, w, c) = y.shape # MSE = y_pred.sg_mse(target = y) # print num # for idx in xrange(0,num): # im = (y[idx,:,:,:]) # MSE = MSE + tf.reduce_mean(np.square(im - (y_pred[idx,:,:]))) # count = count + 1 # PSNR = -10*log10(MSE/num) return MSE
def sg_summary_param(tensor, prefix=None, name=None): r"""Register `tensor` to summary report as `parameters` Args: tensor: A `Tensor` to log as parameters prefix: A `string`. A prefix to display in the tensor board web UI. name: A `string`. A name to display in the tensor board web UI. Returns: None """ # defaults prefix = '' if prefix is None else prefix + '/' # summary name name = prefix + _pretty_name(tensor) if name is None else prefix + name # summary statistics _scalar(name + '/abs', tf.reduce_mean(tf.abs(tensor))) _histogram(name + '/abs-h', tf.abs(tensor))
def ner_cost(tensor, opt): one_hot_labels = tf.one_hot(opt.target - 1, opt.num_classes, dtype=tf.float32) cross_entropy = one_hot_labels * tf.log(tensor) cross_entropy = -tf.reduce_sum(cross_entropy, reduction_indices=2) mask = tf.sign(tf.abs(opt.target)) cross_entropy *= tf.cast(mask, tf.float32) cross_entropy = tf.reduce_sum(cross_entropy, reduction_indices=1) length = tf.cast(tf.reduce_sum(tf.sign(opt.target), reduction_indices=1), tf.int32) cross_entropy /= tf.cast(length, tf.float32) out = tf.reduce_mean(cross_entropy, name='ner_cost') # add summary tf.sg_summary_loss(out, name=opt.name) return out
def ner_cost(tensor, opt): one_hot_labels = tf.one_hot(opt.target - 1, opt.num_classes, dtype=tf.float32) cross_entropy = one_hot_labels * tf.log(tensor) cross_entropy = -tf.reduce_sum(cross_entropy, reduction_indices=2) mask = tf.sign(tf.reduce_max(tf.abs(one_hot_labels), reduction_indices=2)) cross_entropy *= tf.cast(mask, tf.float32) cross_entropy = tf.reduce_sum(cross_entropy, reduction_indices=1) length = tf.cast(tf.reduce_sum(tf.sign(opt.target), reduction_indices=1), tf.int32) cross_entropy /= tf.cast(length, tf.float32) out = tf.reduce_mean(cross_entropy, name='ner_cost') # add summary tf.sg_summary_loss(out, name=opt.name) return out
def sg_summary_activation(tensor, prefix=None, name=None): r"""Register `tensor` to summary report as `activation` Args: tensor: A `Tensor` to log as activation prefix: A `string`. A prefix to display in the tensor board web UI. name: A `string`. A name to display in the tensor board web UI. Returns: None """ # defaults prefix = '' if prefix is None else prefix + '/' # summary name name = prefix + _pretty_name(tensor) if name is None else prefix + name # summary statistics _scalar(name + '/ratio', tf.reduce_mean(tf.cast(tf.greater(tensor, 0), tf.sg_floatx))) _histogram(name + '/ratio-h', tensor)
def sg_summary_gradient(tensor, gradient, prefix=None, name=None): r"""Register `tensor` to summary report as `gradient` Args: tensor: A `Tensor` to log as gradient gradient: A 0-D `Tensor`. A gradient to log prefix: A `string`. A prefix to display in the tensor board web UI. name: A `string`. A name to display in the tensor board web UI. Returns: None """ # defaults prefix = '' if prefix is None else prefix + '/' # summary name name = prefix + _pretty_name(tensor) if name is None else prefix + name # summary statistics # noinspection PyBroadException _scalar(name + '/grad', tf.reduce_mean(tf.abs(gradient))) _histogram(name + '/grad-h', tf.abs(gradient))
def testIt(): data = raw positive = np.array(data.label_train) > 0 x = tf.placeholder(tf.float32, [None, 4096]) y = tf.placeholder(tf.float32) disc_real = discriminator(x) accuracy = tf.reduce_mean( tf.cast(tf.equal(tf.cast(disc_real > 0.5, "float"), y), tf.float32)) np.set_printoptions(precision=3, suppress=True) with tf.Session() as sess: sess.run( tf.group(tf.global_variables_initializer(), tf.sg_phase().assign(False))) # restore parameters tf.sg_restore(sess, tf.train.latest_checkpoint('asset/train/gan'), category=['generator', 'discriminator']) ans = sess.run(disc_real, feed_dict={x: np.array(data.test)}) print np.sum(ans > 0.5) np.save('dm_bird.npy', ans)
def sg_summary_metric(tensor, prefix='metrics', name=None): r"""Register `tensor` to summary report as `metric` Args: tensor: A `Tensor` to log as metric prefix: A `string`. A prefix to display in the tensor board web UI. name: A `string`. A name to display in the tensor board web UI. Returns: None """ # defaults prefix = '' if prefix is None else prefix + '/' # summary name name = prefix + _pretty_name(tensor) if name is None else prefix + name # summary statistics # noinspection PyBroadException try: tf.summary.scalar(name, tf.reduce_mean(tensor)) tf.summary.histogram(name + '-h', tensor) except: pass
def wrapper(tensor, **kwargs): r"""Manages arguments of `tf.sg_opt`. Args: tensor: A `tensor` (automatically passed by decorator). kwargs: shape: A list of integers. The shape of `tensor`. Inferred if not specified. in_dim: An integer. The size of input dimension, which is set to the last one by default. dim: An integer. The size of output dimension. Has the same value as in_dim by default. bn: Boolean. If True, batch normalization is applied. ln: Boolean. If True, layer normalization is applied. dout: A float of range [0, 100). A dropout rate. Set to 0 by default. bias: Boolean. If True, biases are added. As a default, it is set to True name: A name for the layer. As a default, the function name is assigned. act: A name of activation function. e.g., `sigmoid`, `tanh`, etc. reuse: `True` or `None`; if `True`, we go into reuse mode for this `layer` scope as well as all sub-scopes; if `None`, we just inherit the parent scope reuse. regularizer: A string. None, 'l1' or 'l2'. The default is None summary: If True, summaries are added. The default is True. """ from . import sg_initializer as init from . import sg_activation # kwargs parsing opt = tf.sg_opt(kwargs) + sg_get_context() # set default argument try: shape = tensor.get_shape().as_list() # batch normalization off, layer normalization off, dropout off opt += tf.sg_opt(shape=shape, in_dim=shape[-1], dim=shape[-1], bn=False, ln=False, dout=0, summary=True) if opt.regularizer == 'l1': opt.regularizer = lambda x: tf.reduce_mean(tf.abs(x)) elif opt.regularizer == 'l2': opt.regularizer = lambda x: tf.square( tf.reduce_mean(tf.square(x))) else: opt.regularizer = None assert not ( opt.bn and opt.ln ), 'one of batch normalization and layer normalization is available.' # disable bias when normalization on opt += tf.sg_opt(bias=not (opt.bn or opt.ln)) finally: pass # automatic layer naming if opt.name is None: # layer function name will be used as layer name opt.name = func.__name__.replace('sg_', '') # find existing layer names exist_layers = [] for t in tf.global_variables(): scope_name = tf.get_variable_scope().name prefix = scope_name + '/' if len(scope_name) > 0 else '' i = t.name.rfind(prefix + opt.name) if i >= 0: exist_layers.append(t.name[i:].split('/')[-2]) exist_layers = list(set(exist_layers)) # layer name numbering if len(exist_layers) == 0: opt.name += '_1' else: opt.name += '_%d' % ( max([int(n.split('_')[-1]) for n in exist_layers]) + 1) with tf.variable_scope(opt.name, reuse=opt.reuse) as scope: # call layer function out = func(tensor, opt) # apply batch normalization if opt.bn: # offset, scale parameter beta = init.constant('beta', opt.dim) gamma = init.constant('gamma', opt.dim, value=1) # calc batch mean, variance mean, variance = tf.nn.moments( out, axes=list(range(len(out.get_shape()) - 1))) # offset, scale parameter ( for inference ) mean_running = init.constant('mean', opt.dim, trainable=False) variance_running = init.constant('variance', opt.dim, value=1, trainable=False) # add running mean, variance to UPDATE_OP collection decay = 0.99 tf.add_to_collection( tf.GraphKeys.UPDATE_OPS, mean_running.assign(mean_running * decay + mean * (1 - decay))) tf.add_to_collection( tf.GraphKeys.UPDATE_OPS, variance_running.assign(variance_running * decay + variance * (1 - decay))) # select mean, variance by training phase m, v = tf.cond( _phase, lambda: (mean, variance), # batch mean, variance lambda: (mean_running, variance_running)) # saved mean, variance # apply batch normalization out = tf.nn.batch_normalization(out, m, v, beta, gamma, tf.sg_eps) # apply layer normalization if opt.ln: # offset, scale parameter beta = init.constant('beta', opt.dim) gamma = init.constant('gamma', opt.dim, value=1) # calc layer mean, variance for final axis mean, variance = tf.nn.moments(out, axes=[len(out.get_shape()) - 1], keep_dims=True) # apply normalization out = (out - mean) / tf.sqrt(variance + tf.sg_eps) # apply parameter out = gamma * out + beta # apply activation if opt.act: out = getattr(sg_activation, 'sg_' + opt.act.lower())(out) # apply dropout if opt.dout: out = tf.cond(_phase, lambda: tf.nn.dropout(out, 1 - opt.dout), lambda: out) # rename tensor out = tf.identity(out, 'out') # add final output summary tf.sg_summary_activation(out) # save node info for reuse out._sugar = tf.sg_opt(func=func, arg=tf.sg_opt(kwargs) + sg_get_context(), prev=tensor, is_layer=True, name=opt.name) # inject reuse function out.sg_reuse = types.MethodType(sg_reuse, out) return out
# create discriminator & recognizer # # create real + fake image input xx = tf.concat([x_real_pair, x_fake_pair], 0) with tf.sg_context(name='discriminator', size=4, stride=2, act='leaky_relu'): # discriminator part disc = (xx.sg_conv(dim=64).sg_conv(dim=128).sg_flatten().sg_dense( dim=1024).sg_dense(dim=1, act='linear').sg_squeeze()) # # loss and train ops # loss_disc = tf.reduce_mean(disc.sg_bce(target=y_disc)) # discriminator loss loss_gen = tf.reduce_mean( disc.sg_reuse(input=x_fake_pair).sg_bce(target=y)) # generator loss train_disc = tf.sg_optim(loss_disc, lr=0.0001, category='discriminator') # discriminator train ops train_gen = tf.sg_optim(loss_gen, lr=0.001, category='generator') # generator train ops # # training # # def alternate training func @tf.sg_train_func
.sg_flatten() .sg_dense(dim=1024)) # shared recognizer part recog_shared = shared[batch_size:, :].sg_dense(dim=128) # discriminator end disc = shared.sg_dense(dim=1, act='linear').sg_squeeze() # categorical recognizer end recog_cat = recog_shared.sg_dense(dim=num_category, act='linear') # continuous recognizer end recog_cont = recog_shared.sg_dense(dim=num_cont, act='sigmoid') # # loss and train ops # loss_disc = tf.reduce_mean(disc.sg_bce(target=y_disc)) # discriminator loss loss_gen = tf.reduce_mean(disc.sg_reuse(input=gen).sg_bce(target=y)) # generator loss loss_recog = tf.reduce_mean(recog_cat.sg_ce(target=z_cat)) \ + tf.reduce_mean(recog_cont.sg_mse(target=z_cont)) # recognizer loss train_disc = tf.sg_optim(loss_disc + loss_recog, lr=0.0001, category='discriminator') # discriminator train ops train_gen = tf.sg_optim(loss_gen + loss_recog, lr=0.001, category='generator') # generator train ops # # training # # def alternate training func @tf.sg_train_func def alt_train(sess, opt):
shared = (xx.sg_dense(dim=280).sg_dense(dim=280).sg_dense(dim=70)) # shared recognizer part recog_shared = shared.sg_dense(dim=128) # discriminator end disc = shared.sg_dense(dim=1, act='linear').sg_squeeze() # categorical recognizer end recog_cat = recog_shared.sg_dense(dim=num_category, act='linear') # continuous recognizer end recog_cont = recog_shared[batch_size:, :].sg_dense(dim=num_cont, act='sigmoid') # # loss and train ops # loss_disc = tf.reduce_mean(disc.sg_bce(target=y_disc)) # discriminator loss loss_gen = tf.reduce_mean( disc.sg_reuse(input=gen).sg_bce(target=y)) # generator loss loss_recog = tf.reduce_mean(recog_cat.sg_ce(target=label)) \ + tf.reduce_mean(recog_cont.sg_mse(target=z_cont)) # recognizer loss train_disc = tf.sg_optim(loss_disc + loss_recog, lr=0.0001, category='discriminator') # discriminator train ops train_gen = tf.sg_optim(loss_gen + loss_recog, lr=0.001, category='generator') # generator train ops # # training #
xx = tf.concat(0, [x_real_pair, x_fake_pair]) with tf.sg_context(name='discriminator', size=4, stride=2, act='leaky_relu'): # discriminator part disc = (xx.sg_conv(dim=64) .sg_conv(dim=128) .sg_flatten() .sg_dense(dim=1024) .sg_dense(dim=1, act='linear') .sg_squeeze()) # # loss and train ops # loss_disc = tf.reduce_mean(disc.sg_bce(target=y_disc)) # discriminator loss loss_gen = tf.reduce_mean(disc.sg_reuse(input=x_fake_pair).sg_bce(target=y)) # generator loss train_disc = tf.sg_optim(loss_disc, lr=0.0001, category='discriminator') # discriminator train ops train_gen = tf.sg_optim(loss_gen, lr=0.001, category='generator') # generator train ops # # training # # def alternate training func @tf.sg_train_func def alt_train(sess, opt): l_disc = sess.run([loss_disc, train_disc])[0] # training discriminator l_gen = sess.run([loss_gen, train_gen])[0] # training generator
def train(self): ''' Network ''' batch_pred_feats, batch_pred_coords, batch_pred_confs, self.final_state = self.LSTM( 'lstm', self.x) iou_predict_truth, intersection = self.iou(batch_pred_coords, self.y[:, 0:4]) should_exist = I = tf.cast( tf.reduce_sum(self.y[:, 0:4], axis=1) > 0., tf.float32) no_I = tf.ones_like(I, dtype=tf.float32) - I object_loss = tf.nn.l2_loss( I * (batch_pred_confs - iou_predict_truth)) * self.object_scale noobject_loss = tf.nn.l2_loss( no_I * (batch_pred_confs - iou_predict_truth)) * self.noobject_scale p_sqrt_w = tf.sqrt( tf.minimum(1.0, tf.maximum(0.0, batch_pred_coords[:, 2]))) p_sqrt_h = tf.sqrt( tf.minimum(1.0, tf.maximum(0.0, batch_pred_coords[:, 3]))) sqrt_w = tf.sqrt(tf.abs(self.y[:, 2])) sqrt_h = tf.sqrt(tf.abs(self.y[:, 3])) loss = (tf.nn.l2_loss(I * (batch_pred_coords[:, 0] - self.y[:, 0])) + tf.nn.l2_loss(I * (batch_pred_coords[:, 1] - self.y[:, 1])) + tf.nn.l2_loss(I * (p_sqrt_w - sqrt_w)) + tf.nn.l2_loss(I * (p_sqrt_h - sqrt_h))) * self.coord_scale #max_iou = tf.nn.l2_loss(I*(tf.ones_like(iou_predict_truth, dtype=tf.float32) - iou_predict_truth)) total_loss = loss + object_loss + noobject_loss #+ max_iou ''' Optimizer ''' optimizer = tf.train.AdamOptimizer( learning_rate=self.learning_rate).minimize( total_loss) # Adam Optimizer ''' Summary for tensorboard analysis ''' dataset_loss = -1 dataset_loss_best = 100 test_writer = tf.summary.FileWriter('summary/test') tf.summary.scalar('dataset_loss', dataset_loss) summary_op = tf.summary.merge_all() ''' Initializing the variables ''' self.saver = tf.train.Saver() batch_states = np.zeros((self.batchsize, 2 * self.len_vec)) # TODO: make this a command line argument, etc. # training set loader batch_loader = BatchLoader( "./DATA/TRAINING/", seq_len=self.nsteps, batch_size=self.batchsize, step_size=1, folders_to_use=[ "GOPR0005", "GOPR0006", "GOPR0008", "GOPR0008_2", "GOPR0009", "GOPR0009_2", "GOPR0010", "GOPR0011", "GOPR0012", "GOPR0013", "GOPR0014", "GOPR0015", "GOPR0016", "MVI_8607", "MVI_8609", "MVI_8610", "MVI_8612", "MVI_8614", "MVI_8615", "MVI_8616" ]) validation_set_loader = BatchLoader( "./DATA/VALID/", seq_len=self.nsteps, batch_size=self.batchsize, step_size=1, folders_to_use=[ "bbd_2017__2017-01-09-21-40-02_cam_flimage_raw", "bbd_2017__2017-01-09-21-44-31_cam_flimage_raw", "bbd_2017__2017-01-09-21-48-46_cam_flimage_raw", "bbd_2017__2017-01-10-16-07-49_cam_flimage_raw", "bbd_2017__2017-01-10-16-21-01_cam_flimage_raw", "bbd_2017__2017-01-10-16-31-57_cam_flimage_raw", "bbd_2017__2017-01-10-21-43-03_cam_flimage_raw", "bbd_2017__2017-01-11-20-21-32_cam_flimage_raw", "bbd_2017__2017-01-11-21-02-37_cam_flimage_raw" ]) print("%d available training batches" % len(batch_loader.batches)) print("%d available validation batches" % len(validation_set_loader.batches)) ''' Launch the graph ''' with tf.Session() as sess: if self.restore_weights == True and os.path.isfile( self.rolo_current_save + ".index"): # sess.run(init) tf.sg_init(sess) self.saver.restore(sess, self.rolo_current_save) print("Weight loaded, finetuning") else: # sess.run(init) tf.sg_init(sess) print("Training from scratch") epoch_loss = [] for self.iter_id in range(self.n_iters): ''' Load training data & ground truth ''' batch_id = self.iter_id - self.batch_offset batch_xs, batch_ys, _ = batch_loader.load_batch(batch_id) ''' Update weights by back-propagation ''' sess.run(optimizer, feed_dict={ self.x: batch_xs, self.y: batch_ys }) if self.iter_id % self.display_step == 0: ''' Calculate batch loss ''' batch_loss = sess.run(total_loss, feed_dict={ self.x: batch_xs, self.y: batch_ys }) epoch_loss.append(batch_loss) print("Total Batch loss for iteration %d: %.9f" % (self.iter_id, batch_loss)) if self.iter_id % self.display_step == 0: ''' Calculate batch loss ''' batch_loss = sess.run(loss, feed_dict={ self.x: batch_xs, self.y: batch_ys }) print( "Bounding box coord error loss for iteration %d: %.9f" % (self.iter_id, batch_loss)) if self.display_object_loss and self.iter_id % self.display_step == 0: ''' Calculate batch object loss ''' batch_o_loss = sess.run(object_loss, feed_dict={ self.x: batch_xs, self.y: batch_ys }) print("Object loss for iteration %d: %.9f" % (self.iter_id, batch_o_loss)) if self.display_object_loss and self.iter_id % self.display_step == 0: ''' Calculate batch object loss ''' batch_noo_loss = sess.run(noobject_loss, feed_dict={ self.x: batch_xs, self.y: batch_ys }) print("No Object loss for iteration %d: %.9f" % (self.iter_id, batch_noo_loss)) if self.iou_with_ground_truth and self.iter_id % self.display_step == 0: ''' Calculate batch object loss ''' batch_o_loss = sess.run(tf.reduce_mean(iou_predict_truth), feed_dict={ self.x: batch_xs, self.y: batch_ys }) print("Average IOU with ground for iteration %d: %.9f" % (self.iter_id, batch_o_loss)) if self.display_coords is True and self.iter_id % self.display_step == 0: ''' Caculate predicted coordinates ''' coords_predict = sess.run(batch_pred_coords, feed_dict={ self.x: batch_xs, self.y: batch_ys }) print("predicted coords:" + str(coords_predict[0])) print("ground truth coords:" + str(batch_ys[0])) ''' Save model ''' if self.iter_id % self.save_step == 1: self.saver.save(sess, self.rolo_current_save) print("\n Model saved in file: %s" % self.rolo_current_save) ''' Validation ''' if self.validate == True and self.iter_id % self.validate_step == 0 and self.iter_id > 0: # Run validation set dataset_loss = self.test(sess, total_loss, validation_set_loader, batch_pred_feats, batch_pred_coords, batch_pred_confs, self.final_state) ''' Early-stop regularization ''' if dataset_loss <= dataset_loss_best: dataset_loss_best = dataset_loss self.saver.save(sess, self.rolo_weights_file) print("\n Better Model saved in file: %s" % self.rolo_weights_file) ''' Write summary for tensorboard ''' summary = sess.run(summary_op, feed_dict={ self.x: batch_xs, self.y: batch_ys }) test_writer.add_summary(summary, self.iter_id) print("Average total loss %f" % np.mean(epoch_loss)) return
def tower_loss2_old(xx, scope, reuse_vars=False): # make embedding matrix for source and target with tf.variable_scope('embs', reuse=reuse_vars): emb_x = tf.sg_emb(name='emb_x', voca_size=Hp.vs, dim=Hp.hd, dev=self._dev) emb_y = tf.sg_emb(name='emb_y', voca_size=Hp.vs, dim=Hp.hd, dev=self._dev) x_sents = tf.unstack(xx, axis=1) #each element is (batch, sentlen) # generate first an unconditioned sentence n_input = Hp.hd subrec1 = subrec_zero_state(Hp.bs, Hp.hd) subrec2 = subrec_zero_state(Hp.bs, Hp.hd) rnn_cell = LSTMCell(in_dim=n_input, dim=Hp.hd) (rnn_state, rnn_h) = rnn_cell.zero_state(Hp.bs) crnn_cell = ConvLSTMCell(in_dim=n_input, dim=Hp.hd) (crnn_state, crnn_h) = crnn_cell.zero_state(n_input) for sent in range(len(x_sents) - 1): y = x_sents[i + 1] x = x_sents[i] # (batch, sentlen) = (16, 200) # shift target by one step for training source y_src = tf.concat([tf.zeros((Hp.bs, 1), tf.sg_intx), y[:, :-1]], 1) # embed table lookup enc = x.sg_lookup(emb=emb_x) #(batch, sentlen, dim1) # loop dilated conv block for i in range(num_blocks): enc = (enc.sg_res_block( size=5, rate=1, name="enc1_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=5, rate=2, name="enc2_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=5, rate=4, name="enc4_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=5, rate=8, name="enc8_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=5, rate=16, name="enc16_%d" % (i), reuse_vars=reuse_vars)) #quasi rnn layer [batch * 3, t, dim2 ] conv = enc.sg_quasi_conv1d(is_enc=True, size=2, name="conv1", reuse_vars=reuse_vars) #attention layer # recurrent layer # 1 + final encoder hidden state concat = subrec1.sg_concat(target=conv, dim=0) subrec1 = conv.sg_quasi_rnn(is_enc=True, att=True) conv = pool.sg_quasi_conv1d(is_enc=True, size=2, name="conv2", reuse_vars=reuse_vars) concat = subrec2.sg_concat(target=conv, dim=0) subrec2 = conv.sg_quasi_rnn(is_enc=True, att=True) # conv LSTM (crnn_state, crnn_h) = crnn_cell(subrec2, (crnn_state, crnn_h), 5) # recurrent block (rnn_state, rnn_h) = rnn_cell(crnn_h, (rnn_state, rnn_h)) # CNN decoder dec = crnn_h.sg_concat(target=y_src.sg_lookup(emb=emb_y), name="dec") for i in range(num_blocks): dec = (dec.sg_res_block( size=3, rate=1, causal=True, name="dec1_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=3, rate=2, causal=True, name="dec2_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=3, rate=4, causal=True, name="dec4_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=3, rate=8, causal=True, name="dec8_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=3, rate=16, causal=True, name="dec16_%d" % (i), reuse_vars=reuse_vars)) # final fully convolution layer for softmax dec = dec.sg_conv1d_gpus(size=1, dim=Hp.vs,name="out",summary=False,\ dev = self._dev,reuse=reuse_vars) ce_array = dec.sg_ce(target=y, mask=True, name="cross_ent_example") cross_entropy_mean = tf.reduce_mean(ce_array, name='cross_entropy') tf.add_to_collection('losses', cross_entropy_mean) # Assemble all of the losses for the current tower only. losses = tf.get_collection('losses', scope) # Calculate the total loss for the current tower. total_loss = tf.add_n(losses, name='total_loss') return total_loss
# pull-away term ( PT ) regularizer # sample = gen.sg_flatten() nom = tf.matmul(sample, tf.transpose(sample, perm=[1, 0])) denom = tf.reduce_sum(tf.square(sample), reduction_indices=[1], keep_dims=True) pt = tf.square(nom / denom) pt -= tf.diag(tf.diag_part(pt)) pt = tf.reduce_sum(pt) / (batch_size * (batch_size - 1)) # # loss & train ops # # mean squared errors mse_real = tf.reduce_mean(tf.square(disc_real - x), reduction_indices=[1, 2, 3]) mse_fake = tf.reduce_mean(tf.square(disc_fake - gen), reduction_indices=[1, 2, 3]) # discriminator loss loss_disc = mse_real + tf.maximum(margin - mse_fake, 0) # generator loss + PT regularizer loss_gen = mse_fake + pt * pt_weight train_disc = tf.sg_optim(loss_disc, lr=0.001, category='discriminator') # discriminator train ops train_gen = tf.sg_optim(loss_gen, lr=0.001, category='generator') # generator train ops # add summary tf.sg_summary_loss(loss_disc, name='disc')
def train_with_GP(self): input_ph = tf.placeholder(shape=[batch_size, 28, 28, 1], dtype=tf.float32) label_ph = tf.placeholder(shape=[ batch_size, ], dtype=tf.int32) predict = self.forward(input_ph) loss_tensor = tf.reduce_mean(predict.sg_ce(target=label_ph)) # use to update network parameters optim = tf.sg_optim(loss_tensor, optim='Adam', lr=1e-3) # use saver to save a new model saver = tf.train.Saver() sess = tf.Session() with tf.sg_queue_context(sess): # inital tf.sg_init(sess) # train by GP guilding for e in range(max_ep): previous_loss = None for i in range(Mnist.train.num_batch): [image_array, label_array ] = sess.run([Mnist.train.image, Mnist.train.label]) if (e == 0 or e == 1 ): # first and second epoch train no noisy image loss = sess.run([loss_tensor, optim], feed_dict={ input_ph: image_array, label_ph: label_array })[0] print 'Baseline loss = ', loss elif ( e == 2 ): # third epoch train with gp image and original image gpIn1 = np.squeeze(image_array) gpIn2 = np.zeros((28, 28)) image_gp = GP(gpIn1, gpIn2, seed=0.8) image_gp2 = image_gp[np.newaxis, ...] image_gp2 = image_gp2[..., np.newaxis] loss = sess.run([loss_tensor, optim], feed_dict={ input_ph: image_array, label_ph: label_array })[0] print 'GP without nosiy loss = ', loss loss = sess.run([loss_tensor, optim], feed_dict={ input_ph: image_gp2, label_ph: label_array })[0] print 'GP loss = ', loss else: # other epoch train with gp evolution gpIn1 = np.squeeze(image_array) gpIn2 = np.zeros((28, 28)) image_gp = GP(gpIn1, gpIn2, seed=random.random()) image_gp2 = image_gp[np.newaxis, ...] image_gp2 = image_gp2[..., np.newaxis] loss = sess.run([loss_tensor, optim], feed_dict={ input_ph: image_array, label_ph: label_array })[0] print 'GP without nosiy loss = ', loss loss = sess.run([loss_tensor, optim], feed_dict={ input_ph: image_gp2, label_ph: label_array })[0] print 'GP loss = ', loss if loss < previous_loss: for i in range(5): loss = sess.run([loss_tensor, optim], feed_dict={ input_ph: image_gp2, label_ph: label_array })[0] gpIn1 = image_gp2 image_gp2[0, :, :, 0] = GP(gpIn1[0, :, :, 0], gpIn2, seed=random.random()) print 'GP EV loss = ', loss previous_loss = loss saver.save(sess, os.path.join(save_dir, 'gp_model'), global_step=e) # close session sess.close()
# pull-away term ( PT ) regularizer # sample = gen.sg_flatten() nom = tf.matmul(sample, tf.transpose(sample, perm=[1, 0])) denom = tf.reduce_sum(tf.square(sample), reduction_indices=[1], keep_dims=True) pt = tf.square(nom / denom) pt -= tf.diag(tf.diag_part(pt)) pt = tf.reduce_sum(pt) / (batch_size * (batch_size - 1)) # # loss & train ops # # mean squared errors mse = tf.reduce_mean(tf.square(disc - xx), reduction_indices=[1, 2, 3]) mse_real, mse_fake = mse[:batch_size], mse[batch_size:] loss_disc = mse_real + tf.maximum(margin - mse_fake, 0) # discriminator loss loss_gen = mse_fake + pt * pt_weight # generator loss + PT regularizer train_disc = tf.sg_optim(loss_disc, lr=0.001, category='discriminator') # discriminator train ops train_gen = tf.sg_optim(loss_gen, lr=0.001, category='generator') # generator train ops # # add summary # tf.sg_summary_loss(tf.identity(loss_disc, name='disc'))
def sg_mean(tensor, opt): return tf.reduce_mean(tensor, reduction_indices=opt.dims, keep_dims=opt.keep_dims, name=opt.name)
W_3 = tf.Variable( tf.random_normal([n_hidden_units_two, n_hidden_units_three], mean=0, stddev=sd)) b_3 = tf.Variable(tf.random_normal([n_hidden_units_three], mean=0, stddev=sd)) h_3 = tf.nn.sigmoid(tf.matmul(h_2, W_3) + b_3) W = tf.Variable( tf.random_normal([n_hidden_units_three, num_classes], mean=0, stddev=sd)) b = tf.Variable(tf.random_normal([num_classes], mean=0, stddev=sd)) with tf.name_scope('out'): y_ = tf.nn.softmax(tf.matmul(h_3, W) + b, name="out") init = tf.global_variables_initializer() cost_function = tf.reduce_mean( -tf.reduce_sum(Y * tf.log(y_), reduction_indices=[1])) #optimizer = tf.train.RMSPropOptimizer(learning_rate,decay=0.9,momentum=0.9,centered=True).minimize(cost_function) optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize( cost_function) correct_prediction = tf.equal(tf.argmax(y_, 1), tf.argmax(Y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) cost_history = np.empty(shape=[1], dtype=float) acc_history = np.empty(shape=[1], dtype=float) t_cost_history = np.empty(shape=[1], dtype=float) t_acc_history = np.empty(shape=[1], dtype=float) y_true, y_pred = None, None with tf.Session() as session: session.run(init)
def rnn_body(time, subrec1, subrec2, rnn_state, rnn_h, crnn_state, crnn_h, losses): x = x_sent.read(time) y = x_sent.read(time + 1) # (batch, sentlen) = (16, 200) # shift target by one step for training source y_src = tf.concat([tf.zeros((Hp.batch_size, 1), tf.int32), y[:, :-1]], 1) reuse_vars = time == tf.constant(0) or reu_vars # -------------------------- BYTENET ENCODER -------------------------- # embed table lookup enc = x.sg_lookup(emb=emb_x) #(batch, sentlen, latentdim) # loop dilated conv block for i in range(num_blocks): enc = (enc.sg_res_block( size=5, rate=1, name="enc1_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=5, rate=2, name="enc2_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=5, rate=4, name="enc4_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=5, rate=8, name="enc8_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=5, rate=16, name="enc16_%d" % (i), reuse_vars=reuse_vars)) # -------------------------- QCNN + QPOOL ENCODER with attention #1 -------------------------- #quasi cnn layer ZFO [batch * 3, t, dim2 ] conv = enc.sg_quasi_conv1d(is_enc=True, size=3, name="qconv_1", reuse_vars=reuse_vars) #attention layer # recurrent layer # 1 + final encoder hidden state subrec1 = tf.tile((subrec1.sg_expand_dims(axis=1)), [1, Hp.maxlen, 1]) concat = conv.sg_concat(target=subrec1, axis=0) # (batch*4, sentlen, latentdim) pool = concat.sg_quasi_rnn(is_enc=True, att=True, name="qrnn_1", reuse_vars=reuse_vars) subrec1 = pool[:Hp.batch_size, -1, :] # last character in sequence # -------------------------- QCNN + QPOOL ENCODER with attention #2 -------------------------- # quazi cnn ZFO (batch*3, sentlen, latentdim) conv = pool.sg_quasi_conv1d(is_enc=True, size=2, name="qconv_2", reuse_vars=reuse_vars) # (batch, sentlen-duplicated, latentdim) subrec2 = tf.tile((subrec2.sg_expand_dims(axis=1)), [1, Hp.maxlen, 1]) # (batch*4, sentlen, latentdim) concat = conv.sg_concat(target=subrec2, axis=0) pool = concat.sg_quasi_rnn(is_enc=True, att=True, name="qrnn_2", reuse_vars=reuse_vars) subrec2 = pool[:Hp.batch_size, -1, :] # last character in sequence # -------------------------- ConvLSTM with RESIDUAL connection and MULTIPLICATIVE block -------------------------- #residual block causal = False # for encoder crnn_input = (pool[:Hp.batch_size, :, :].sg_bypass_gpus( name='relu_0', act='relu', bn=(not causal), ln=causal).sg_conv1d_gpus(name="dimred_0", size=1, dev="/cpu:0", reuse=reuse_vars, dim=Hp.hd / 2, act='relu', bn=(not causal), ln=causal)) # conv LSTM with tf.variable_scope("mem/clstm") as scp: (crnn_state, crnn_h) = crnn_cell(crnn_input, (crnn_state, crnn_h), size=5, reuse_vars=reuse_vars) # dimension recover and residual connection rnn_input0 = pool[:Hp.batch_size,:,:] + crnn_h\ .sg_conv1d_gpus(name = "diminc_0",size=1,dev="/cpu:0", dim=Hp.hd,reuse=reuse_vars, act='relu', bn=(not causal), ln=causal) # -------------------------- QCNN + QPOOL ENCODER with attention #3 -------------------------- # pooling for lstm input # quazi cnn ZFO (batch*3, sentlen, latentdim) conv = rnn_input0.sg_quasi_conv1d(is_enc=True, size=2, name="qconv_3", reuse_vars=reuse_vars) pool = conv.sg_quasi_rnn(is_enc=True, att=False, name="qrnn_3", reuse_vars=reuse_vars) rnn_input = pool[:Hp.batch_size, -1, :] # last character in sequence # -------------------------- LSTM with RESIDUAL connection and MULTIPLICATIVE block -------------------------- # recurrent block with tf.variable_scope("mem/lstm") as scp: (rnn_state, rnn_h) = rnn_cell(rnn_input, (rnn_state, rnn_h)) rnn_h2 = tf.tile(((rnn_h + rnn_input).sg_expand_dims(axis=1)), [1, Hp.maxlen, 1]) # -------------------------- BYTENET DECODER -------------------------- # CNN decoder dec = y_src.sg_lookup(emb=emb_y).sg_concat(target=rnn_h2, name="dec") for i in range(num_blocks): dec = (dec.sg_res_block( size=3, rate=1, causal=True, name="dec1_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=3, rate=2, causal=True, name="dec2_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=3, rate=4, causal=True, name="dec4_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=3, rate=8, causal=True, name="dec8_%d" % (i), reuse_vars=reuse_vars).sg_res_block( size=3, rate=16, causal=True, name="dec16_%d" % (i), reuse_vars=reuse_vars)) # final fully convolution layer for softmax dec = dec.sg_conv1d_gpus(size=1, dim=Hp.vs, name="out", summary=False, dev=self._dev, reuse=reuse_vars) ce_array = dec.sg_ce(target=y, mask=True, name="cross_ent_example") cross_entropy_mean = tf.reduce_mean(ce_array, name='cross_entropy') losses = tf.add_n([losses, cross_entropy_mean], name='total_loss') return (time + 1, subrec1, subrec2, rnn_state, rnn_h, crnn_state, crnn_h, losses)
b = tf.get_variable("b", initializer=tf.constant(0., shape=[num_classes])) with tf.name_scope('logits'): logits = tf.matmul(outputs, W) + b logits = tf.reshape(logits, [batch_s, -1, num_classes]) logits = tf.transpose(logits, (1, 0, 2), name="out/logits") with tf.name_scope('loss'): loss = tf.nn.ctc_loss(targets, logits, seq_len, ctc_merge_repeated=True, preprocess_collapse_repeated=True) with tf.name_scope('cost'): cost = tf.reduce_mean(loss) tf.summary.scalar("cost", cost) with tf.name_scope('optimizer'): optimizer = tf.train.RMSPropOptimizer(learning_rate, decay=decay, momentum=momentum, centered=True) gvs = optimizer.compute_gradients(cost) def ClipIfNotNone(grad): if grad is None: return grad return tf.clip_by_value(grad, -1, 1) capped_gvs = [(ClipIfNotNone(grad), var) for grad, var in gvs] train_optimizer = optimizer.apply_gradients(capped_gvs)