def smooth_l1(x, name=None): '''Pointwise smooth abs function''' absx = tf.abs(x) big = tf.cast(tf.greater(absx, tf.ones_like(absx)), tf.float32) activation = tf.add(tf.mul(big, absx - .5), tf.mul((1 - big), .5 * tf.square(x)), name=name) return activation
def createTrainingMethod(self): self.actionInput = tf.placeholder("float", [None, self.actions]) self.yInput = tf.placeholder("float", [None]) Q_Action = tf.reduce_sum(tf.mul(self.QValue, self.actionInput), reduction_indices=1) self.cost = tf.reduce_mean(tf.square(self.yInput - Q_Action)) self.trainStep = tf.train.AdamOptimizer(1e-6).minimize(self.cost)
def _get_sparsity(self, weight_name): """Return target sparsity for the given layer/weight name.""" target_sparsity = [ sparsity for name, sparsity in self._weight_sparsity_map.items() if weight_name.find(name) != -1 ] if not target_sparsity: return self._sparsity if len(target_sparsity) > 1: raise ValueError( 'Multiple matches in weight_sparsity_map for weight %s' % weight_name) return tf.mul(self._sparsity, tf.div(target_sparsity[0], self._spec.target_sparsity))
def cosine_similarity(v1, v2): """Cosine similarity [-1, 1], `wiki <https://en.wikipedia.org/wiki/Cosine_similarity>`_. Parameters ----------- v1, v2 : tensor of [batch_size, n_feature], with the same number of features. Returns ----------- a tensor of [batch_size, ] """ try: ## TF1.0 cost = tf.reduce_sum(tf.multiply(v1, v2), 1) / (tf.sqrt(tf.reduce_sum(tf.multiply(v1, v1), 1)) * tf.sqrt(tf.reduce_sum(tf.multiply(v2, v2), 1))) except: ## TF0.12 cost = tf.reduce_sum(tf.mul(v1, v2), reduction_indices=1) / (tf.sqrt(tf.reduce_sum(tf.mul(v1, v1), reduction_indices=1)) * tf.sqrt(tf.reduce_sum(tf.mul(v2, v2), reduction_indices=1))) return cost
def cross_entropy_reward_loss(logits, actions, rewards, name=None): """ Calculate the loss for Policy Gradient Network. Parameters ---------- logits : tensor The network outputs without softmax. This function implements softmax inside. actions : tensor/ placeholder The agent actions. rewards : tensor/ placeholder The rewards. Examples ---------- >>> states_batch_pl = tf.placeholder(tf.float32, shape=[None, D]) # observation for training >>> network = tl.layers.InputLayer(states_batch_pl, name='input_layer') >>> network = tl.layers.DenseLayer(network, n_units=H, act = tf.nn.relu, name='relu1') >>> network = tl.layers.DenseLayer(network, n_units=3, act = tl.activation.identity, name='output_layer') >>> probs = network.outputs >>> sampling_prob = tf.nn.softmax(probs) >>> actions_batch_pl = tf.placeholder(tf.int32, shape=[None]) >>> discount_rewards_batch_pl = tf.placeholder(tf.float32, shape=[None]) >>> loss = cross_entropy_reward_loss(probs, actions_batch_pl, discount_rewards_batch_pl) >>> train_op = tf.train.RMSPropOptimizer(learning_rate, decay_rate).minimize(loss) """ try: # TF 1.0 cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=actions, logits=logits, name=name) except: cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, targets=actions) # cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, actions) try: ## TF1.0 loss = tf.reduce_sum(tf.multiply(cross_entropy, rewards)) except: ## TF0.12 loss = tf.reduce_sum(tf.mul(cross_entropy, rewards)) # element-wise mul return loss
def calculate_loss_function(self, predicted, groundTruth): """ Calculate the total loss for gradient descent. For each ground truth object, loss needs to be calculated. It is assumed that each image consists of only one object. Predicted 0-19 CLass prediction 20-21 Confidence that objects exist in bbox1 or bbox2 of grid 22-29 Coordinates for bbo1, followed by those of bbox2 Real 0-19 Class prediction (One-Hot Encoded) 20-23 Ground truth coordinates for that box 24-72 Cell has an object/no object (Only one can be is 1) """ predictedParameters = np.reshape( predicted, [-1, self.numOfGridsIn1D, self.numOfGridsIn1D, 30]) predictedClasses = predictedParameters[:, :, :, :20] predictedObjectConfidence = predictedParameters[:, :, :, 20:22] predictedBoxes = predictedParameters[:, :, :, 22:] groundTruthClasses = np.reshape(groundTruth[:, :20], [-1, 1, 1, 20]) groundTruthBoxes = np.reshape(groundTruth[:, 20:24], [-1, 1, 1, 4]) groundTruthGrid = np.reshape(groundTruth[:, 24:], [-1, 7, 7, 1]) predictedFirstBoxes = predictedBoxes[:, :, :, :4] predictedSecondBoxes = predictedBoxes[:, :, :, 5:] # Calulate loss along the 4th axis, localFirstBoxes -1x7x7x1 # Think there should be a simpler method to do this lossFirstBoxes = tf.reduce_sum( tf.square(predictedFirstBoxes - groundTruthBoxes), 3) lossSecondBoxes = tf.reduce_sum( tf.square(predictedSecondBoxes - groundTruthBoxes), 3) # Computing which box (bbox1 or bbox2) is responsible for # detection IOU = iou_train(predictedFirstBoxes, predictedSecondBoxes, groundTruthBoxes) responsbileBox = tf.greater(IOU[:, :, :, 0], IOU[:, :, :, 1]) # Suppose it is known which iou is greater, # coordinate loss (loss due to difference in coordinates of # predicted-responsible and real box) coordinateLoss = tf.where(responsibleBox, lossFirstBoxes, lossSecondBoxes) # why do we need to reshape it coordinateLoss = tf.reshape(coordinateLoss, [-1, 7, 7, 1]) # count the loss only if the object is in the groundTruth grid # gives a sparse -1x7x7x1 matrix, only one element would be nonzero in # each slice coorinateLoss = self.lambdaCoordinate * \ tf.multiply(groundTruthGrid, coordinateLoss) # object loss (loss due to difference in object confidence) # only take the objectLoss of the predicted grid with higher IoU is # responsible for the object objectLoss = tf.square(predictedObjectConfidence - groundTruthGrid) objectLoss = tf.where(responsibleBox, objectLoss[:, :, :, 0], objectLoss[:, :, :, 1]) tempObjectLoss = tf.reshape(objectLoss, [-1, 7, 7, 1]) objectLoss = tf.multiply(groundTruthGrid, tempObjectLoss) # class loss (loss due to misjudgement in class of the object # detected classLoss = tf.square(predictedClasses - groundTruthClasses) classLoss = tf.reduce_sum(tf.mul(groundTruthGrid, classLoss), reduction_indices=3) classLoss = tf.reshape(classLoss, [-1, 7, 7, 1]) # no-object loss, decrease the confidence where there is no # object in the ground truth noObjectLoss = self.lambdaNoObject * \ tf.multiply(1 - groundTruthGrid, tempObjectLoss) # total loss totalLoss = coordinateLoss + objectLoss + classLoss + noObjectLoss totalLoss = tf.reduce_mean(tf.reduce_sum(totalLoss, reduction_indeces=[1, 2, 3]), reduction_indices=0) return totalLoss
def train(model_path, learning_rate, epoch, noisy=False): total_epoch = epoch teacher = nin() student = lenet() if noisy == True: drop_scale = 1 / Nratio noisy_mask = tf.nn.dropout(tf.constant( np.float32(np.ones((batch_size, 1))) / drop_scale), keep_prob=Nratio) #(batchsize,1) gaussian = tf.random_normal(shape=[batch_size, 1], mean=0.0, stddev=Nsigma) noisy = tf.mul(noisy_mask, gaussian) #noisy_add = tf.add(tf.constant(np.float32(np.ones((batch_size,1)))), noisy) teacher = tf.mul(teacher, tf.tile(noisy, tf.constant([1, 10]))) #(batchsize,10) #teacher = tf.add(teacher, tf.tile(noisy,tf.constant([1,10]))) print(bcolors.G + "prepare for training, noisy mode" + bcolors.END) tf_loss = tf.nn.l2_loss(teacher - student) / batch_size elif KD == True: # correct Hinton method at 2017.1.3 print(bcolors.G + "prepare for training, knowledge distilling mode" + bcolors.END) one_hot = tf.one_hot(y, n_classes, 1.0, 0.0) #one_hot = tf.cast(one_hot_int, tf.float32) teacher_tau = tf.scalar_mul(1.0 / tau, teacher) student_tau = tf.scalar_mul(1.0 / tau, student) objective1 = tf.nn.sigmoid_cross_entropy_with_logits( student_tau, one_hot) objective2 = tf.scalar_mul(0.5, tf.square(student_tau - teacher_tau)) tf_loss = (lamda * tf.reduce_sum(objective1) + (1 - lamda) * tf.reduce_sum(objective2)) / batch_size else: print(bcolors.G + "prepare for training, NIPS2014 mode" + bcolors.END) tf_loss = tf.nn.l2_loss(teacher - student) / batch_size optimizer1 = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(tf_loss) optimizer2 = tf.train.AdamOptimizer(learning_rate=learning_rate / 10).minimize(tf_loss) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5) sess = tf.InteractiveSession(config=tf.ConfigProto( gpu_options=gpu_options, allow_soft_placement=True)) tf.initialize_all_variables().run() with tf.device('/cpu:0'): saver = tf.train.Saver(max_to_keep=100) #saver.restore(sess, os.path.join(model_path,'model-99') data, label = read_cifar10('train') index = np.array(range(len(data))) # index randomly ordered mean = cal_mean() begin = time.time() iterations = len(data) // batch_size decay_step = int(total_epoch * 0.8) cnt = 0 dropout_rate = dropout print(bcolors.G + "number of iterations (per epoch) =" + str(len(data) / batch_size) + bcolors.END) for i in range(total_epoch): np.random.shuffle(index) cost_sum = 0 for j in range(iterations): batch_x = np.float32( data[index[j * batch_size:(j + 1) * batch_size]]) - mean batch_y = np.squeeze( np.float32(label[index[j * batch_size:(j + 1) * batch_size]])) if cnt / decay_step == 0: lr = learning_rate _, cost = sess.run([optimizer1, tf_loss], feed_dict={ x: batch_x, y: batch_y, keep_prob: 1 - dropout_rate }) elif cnt / decay_step == 1: lr = learning_rate / 10 _, cost = sess.run([optimizer2, tf_loss], feed_dict={ x: batch_x, y: batch_y, keep_prob: 1 - dropout_rate }) cost_sum += cost #pdb.set_trace() #if (j % int(iterations*0.25) == 0): # print(("epoch %d-iter %d, cost = %f , avg-cost = %f"%(i, j, cost, cost/n_classes)) # sys.stdout.flush() cnt += 1 avg_time = time.time() - begin print( "epoch %d - avg. %f seconds in each epoch, lr = %.0e, cost = %f , avg-cost-per-logits = %f" % (i, avg_time / cnt, lr, cost_sum, cost_sum / iterations / n_classes)) if np.mod(i + 1, 10) == 0: print("Epoch ", i + 1, " is done. Saving the model ...") with tf.device('/cpu:0'): if not os.path.exists(model_path): os.makedirs(model_path) saver.save(sess, os.path.join(model_path, 'model'), global_step=i) sys.stdout.flush()
def apply_gradient_clipping(gradient): if gradient is not None: return tf.mul(tf.clip_by_value(tf.abs(grad), 0.1, 1.), tf.sign(grad)) else: return None