def trainer(): inp = ct.ph([None, None, None]) # image gt = ct.ph([10]) # labels x = inp - 0.5 x = tf.expand_dims(x, axis=1) #[NHWC] -> [N1HWC] gt2 = tf.expand_dims(gt, axis=1) #[batch, dims] -> [batch, 1, dims] timesteps = 8 # how many timesteps would you evaluate the RNN x = tf.tile(x, multiples=[1, timesteps, 1, 1, 1]) gt2 = tf.tile(gt2, multiples=[1, timesteps, 1]) y = gg2dclf(x) # [batch, timesteps, 10] loss = mean_softmax_cross_entropy(y, gt2) # mean of cross entropy, over all timesteps. accuracy = one_hot_accuracy(y[:, -1, :], gt) opt = tf.train.AdamOptimizer() train_step = opt.minimize(loss, var_list=gg2dclf.get_weights()) def feed(img, lbl): sess = get_session() res = sess.run([train_step, loss, accuracy], feed_dict={ inp: img, gt: lbl }) return res[1:3] # extract foveal pattern from hidden states set_training_state(False) # set training state to false to enable softmax y_softmaxed, hiddens = gg2dclf(x, return_hidden_states=True) # [batch, timesteps, 10], [batch, num_h] set_training_state(True) hs = tf.shape(hiddens) hiddens = tf.reshape(hiddens, shape=[-1, hs[2]]) #[batch*time, dims] offsets = gg2dclf.unit.get_offset(hiddens) shifted_means = gg2dclf.unit.glimpse2d.shifted_means_given_offsets(offsets) shifted_means = tf.reshape(shifted_means, shape=[ hs[0], hs[1], -1, 2 ]) #[batch*time,num_receptors,2] -> [batch,time,num_receptors,2] variances = gg2dclf.unit.glimpse2d.variances() #[num_receptors,1] def test(img): sess = get_session() res = sess.run([x, y_softmaxed, shifted_means, variances], feed_dict={inp: img}) return res return feed, test
def trainer(): x, gt = ct.ph([None, None, None, 3]), ct.ph([None, None, None, 1]) xf, gtf = tf.cast(x, tf.float32) / 255. - .5, tf.cast(gt, tf.float32) / 255., # s = tf.shape(gtf) # gtf = tf.reshape(gtf,[s[0]*s[1],s[2],s[3],s[4]]) # gtf = MaxPool2D(k=4,std=4)(gtf) # 96->24 # ns = tf.shape(gtf) # gtf = tf.reshape(gtf,[s[0],s[1],ns[1],ns[2],ns[3]]) xf += tf.random_normal(tf.shape(xf), stddev=0.05) y, _ending_state = tec(xf) loss = ct.binary_cross_entropy_loss(y, gtf, l=0.1) # bias against black lr = tf.Variable(1e-3) print('connecting optimizer...') opt = tf.train.AdamOptimizer(lr) train_step = opt.minimize(loss, var_list=tec.get_weights()) def feed(xin, yin, ilr): sess = ct.get_session() res = sess.run([train_step, loss], feed_dict={ x: xin, gt: yin, lr: ilr }) return res[1] # loss #tf.placeholder(tf.float32, shape=[None, None]) starting_state = ct.ph([None, None, None]) # an image of some sort stateful_y, ending_state = tec(xf, starting_state=starting_state) def stateful_predict(st, i): # stateful, to enable fast generation. sess = ct.get_session() if st is None: # if starting state not exist yet res = sess.run([y, _ending_state], feed_dict={x: i}) else: res = sess.run([stateful_y, ending_state], feed_dict={ x: i, starting_state: st }) return res return feed, stateful_predict
def gan(g, d): # initialize a GAN trainer # this is the fastest way to train a GAN in TensorFlow # two models are updated simutaneously in one pass noise = tf.random_normal(mean=0., stddev=1., shape=[batch_size, 1, 1, zed]) real_data = ct.ph([None, None, 3]) inl = tf.Variable(1e-11) def noisy(i): return i + tf.random_normal(mean=0, stddev=inl, shape=tf.shape(i)) generated = g(noise) gscore = d(noisy(generated)) rscore = d(noisy(real_data)) def log_eps(i): return tf.reduce_mean(tf.log(i + 1e-8)) # single side label smoothing: replace 1.0 with 0.9 #dloss = - (log_eps(1-gscore) + .1 * log_eps(1-rscore)+ .9*log_eps(rscore)) #gloss = - log_eps(gscore) dloss = tf.reduce_mean((gscore - 0)**2 + (rscore - 1)**2) gloss = tf.reduce_mean((gscore - 1)**2) Adam = tf.train.AdamOptimizer #Adam = tf.train.MomentumOptimizer lr, b1 = tf.Variable(1.2e-4), .5 # otherwise won't converge. optimizer = Adam(lr, beta1=b1) #optimizer = Adam(lr) def l2(m): l = m.get_weights() return tf.reduce_sum([tf.reduce_sum(i**2) * 1e-7 for i in l]) update_wd = optimizer.minimize(dloss, var_list=d.get_weights()) update_wg = optimizer.minimize(gloss + l2(g), var_list=g.get_weights()) train_step = [update_wd, update_wg] losses = [dloss, gloss] def gan_feed(sess, batch_image, nl, lllr): # actual GAN training function nonlocal train_step, losses, noise, real_data res = sess.run([train_step, losses], feed_dict={ real_data: batch_image, inl: nl, lr: lllr, }) loss_values = res[1] return loss_values #[dloss,gloss] return gan_feed
def trainer(): x, gt = ct.ph([None, None, 3]), ct.ph([None, None, 1]) xf, gtf = tf.cast(x, tf.float32) / 255. - .5, tf.cast(gt, tf.float32) / 255., # s = tf.shape(gtf) # gtf = tf.reshape(gtf,[s[0]*s[1],s[2],s[3],s[4]]) # gtf = MaxPool2D(k=4,std=4)(gtf) # 96->24 # ns = tf.shape(gtf) # gtf = tf.reshape(gtf,[s[0],s[1],ns[1],ns[2],ns[3]]) xf += tf.random_normal(tf.shape(xf), stddev=0.05) y = clf(xf) loss = ct.mean_sigmoid_cross_entropy_loss(y, gtf) lr = tf.Variable(1e-3) print('connecting optimizer...') opt = tf.train.AdamOptimizer(lr) train_step = opt.minimize(loss, var_list=clf.get_weights()) def feed(xin, yin, ilr): sess = ct.get_session() res = sess.run([train_step, loss], feed_dict={ x: xin, gt: yin, lr: ilr }) return res[1] # loss def predict(i): sess = ct.get_session() res = sess.run([y], feed_dict={x: i}) return res[0] return feed, predict
def trainer(): x, gt = ct.ph([None, None, 1]), ct.ph([None, None, 1]) y = tec(x) decay = tf.reduce_mean([tf.reduce_mean(w**2) for w in tec.get_weights()]) * 1e-4 loss = ct.binary_cross_entropy_loss(y, gt) lr = tf.Variable(1e-3) print('connecting optimizer...') opt = tf.train.AdamOptimizer(lr) train_step = opt.minimize(loss + decay, var_list=tec.get_weights()) def feed(xin, yin, ilr): sess = ct.get_session() res = sess.run([train_step, loss], feed_dict={ x: xin, gt: yin, lr: ilr }) return res[1] # loss return feed
def feed_gen(output_size=[512, 512]): # all the logic ct.set_session( K.get_session() ) # because keras load model variables into his own session, so we have to use it def into_variable(value): v = tf.Variable(initial_value=value) sess = ct.get_session() sess.run([tf.variables_initializer([v])]) return v print('output size chosen:', output_size) # create white_noise_image global white_noise_image white_noise_image = into_variable( tf.random_normal([1] + output_size + [3], stddev=1e-3)) print('white_noise_image initialized.') # the model to descent the white noise image global vggmodel_d, vggmodel_e vggmodel_d = VGG19(include_top=False, weights='imagenet', input_tensor=white_noise_image) vggmodel_d.summary() reference_image = ct.ph([None, None, 3]) # the model to extract style representations vggmodel_e = VGG19(include_top=False, weights='imagenet', input_tensor=reference_image) #vggmodel_e.summary() print('VGG models created.') def get_representations(vggmodel): # activations of each layer, 5 layers for style capture, 1 layer for content capture. layer_for_styles = list( filter(lambda x: 'conv1' in x.name or 'block5_conv3' in x.name, vggmodel.layers)) style_activations = [i.output for i in layer_for_styles] layer_for_content = ['block5_conv2'] content_activations = [ vggmodel.get_layer(l).output for l in layer_for_content ] def gram_4d(i): # calculate gram matrix (inner product) of feature maps. # where gram[n1, n2] is the correlation between two out of n features. # for example two feature map are each sensitive to tree and flower, # then gram[tree, flower] tells you how tree and flower are # correlated in the layer activations. # in other words, how likely tree and flower will appear together. # this correlation does not depend on position in image, # and that's why we can calculate style loss globally. # in other words, we don't care about the exact position of features, # but how likely each of them appear with another. # assume input is 4d tensor of shape [1, h, w, f] s = tf.shape(i) # reshape into feature matrix of shape [h*w, f] fm = tf.reshape(i, [s[1] * s[2], s[3]]) # inner product gram = tf.matmul(tf.transpose(fm), fm) # [f, f] # because h*w*f elements are included in computing the inner product, # we have to normalize the result: gram = gram / tf.cast((s[1] * s[2] * s[3]) * 2, tf.float32) return gram gram_matrices = [gram_4d(i) for i in style_activations] return gram_matrices, content_activations # get the gram matrices of the style reference image style_gram_matrices, content_activations = get_representations(vggmodel_e) # image shape manipulation: from HWC into NHWC sn = starry_night.view() sn.shape = (1, ) + sn.shape sess = ct.get_session() gram_ref = sess.run([style_gram_matrices], feed_dict={reference_image: sn})[0] print('reference style gram matrices calculated.') # load style references into memory style_references = [into_variable(gr) for gr in gram_ref] print('reference style gram matrices loaded into memory as variables.') # get content representation of the content image gz = guangzhou.view() gz.shape = (1, ) + gz.shape reference_content_activations = sess.run([content_activations], feed_dict={reference_image: gz})[0] print('reference content representations calculated.') # load content reps into memory reference_content_activations = [ into_variable(rca) for rca in reference_content_activations ] print('reference content activations loaded into memory as variables.') # calculate losses of white_noise_image's style wrt style references: white_gram_matrices, white_content_activations = get_representations( vggmodel_d) def square_loss( g1, g2): # difference between two gram matrix, used as style loss return tf.reduce_sum((g1 - g2)**2) white_style_losses = [ square_loss(white_gram_matrices[idx], style_references[idx]) for idx, gs in enumerate(style_references) ] # calculate losses of white_noise_image's content wrt content reference: white_content_losses = [ tf.reduce_mean((reference_content_activations[idx] - white_content_activations[idx])**2) for idx, _ in enumerate(reference_content_activations) ] def amplitude_penalty(tensor, ceiling=0.499, floor=-0.499): p = tf.maximum(white_noise_image - ceiling, 0) + tf.maximum( floor - white_noise_image, 0) return p def proportional_loss( lis): # similar to reduce mean, adds penalty if imbalance. mean_loss = tf.reduce_mean(lis) pro_loss = tf.reduce_mean([abs(l - mean_loss) for l in lis]) return mean_loss + pro_loss * 5 white_amplitude_penalty = amplitude_penalty(white_noise_image) white_loss = proportional_loss([ proportional_loss(white_style_losses), tf.reduce_mean(white_content_losses) * .01 ]) white_loss += tf.reduce_mean(white_amplitude_penalty**2) * 10000 # minimize loss by gradient descent on white_noise_image learning_rate = tf.Variable(0.01) #adam = tf.train.AdamOptimizer(learning_rate) adam = tf.train.AdamOptimizer(learning_rate) print('connecting momentum sgd optimizer...') descent_step = adam.minimize(white_loss, var_list=[white_noise_image]) slots = [ adam.get_slot(white_noise_image, name) for name in adam.get_slot_names() ] # initialize the white_noise_image and optimizer slots. sess.run([ tf.variables_initializer([white_noise_image] + slots + list(adam._get_beta_accumulators())) ]) def feed(lr=.01): nonlocal white_loss, descent_step, learning_rate sess = ct.get_session() res = sess.run([descent_step, white_loss], feed_dict={learning_rate: lr}) loss = res[1] return loss print('feed function generated.') return feed