コード例 #1
0
def smooth_l1(x, name=None):
    '''Pointwise smooth abs function'''
    absx = tf.abs(x)
    big = tf.cast(tf.greater(absx, tf.ones_like(absx)), tf.float32)
    activation = tf.add(tf.mul(big, absx - .5),
                        tf.mul((1 - big), .5 * tf.square(x)),
                        name=name)
    return activation
コード例 #2
0
 def createTrainingMethod(self):
     self.actionInput = tf.placeholder("float", [None, self.actions])
     self.yInput = tf.placeholder("float", [None])
     Q_Action = tf.reduce_sum(tf.mul(self.QValue, self.actionInput),
                              reduction_indices=1)
     self.cost = tf.reduce_mean(tf.square(self.yInput - Q_Action))
     self.trainStep = tf.train.AdamOptimizer(1e-6).minimize(self.cost)
コード例 #3
0
    def _get_sparsity(self, weight_name):
        """Return target sparsity for the given layer/weight name."""
        target_sparsity = [
            sparsity for name, sparsity in self._weight_sparsity_map.items()
            if weight_name.find(name) != -1
        ]
        if not target_sparsity:
            return self._sparsity

        if len(target_sparsity) > 1:
            raise ValueError(
                'Multiple matches in weight_sparsity_map for weight %s' %
                weight_name)
        return tf.mul(self._sparsity,
                      tf.div(target_sparsity[0], self._spec.target_sparsity))
コード例 #4
0
def cosine_similarity(v1, v2):
    """Cosine similarity [-1, 1], `wiki <https://en.wikipedia.org/wiki/Cosine_similarity>`_.

    Parameters
    -----------
    v1, v2 : tensor of [batch_size, n_feature], with the same number of features.

    Returns
    -----------
    a tensor of [batch_size, ]
    """
    try: ## TF1.0
        cost = tf.reduce_sum(tf.multiply(v1, v2), 1) / (tf.sqrt(tf.reduce_sum(tf.multiply(v1, v1), 1)) * tf.sqrt(tf.reduce_sum(tf.multiply(v2, v2), 1)))
    except: ## TF0.12
        cost = tf.reduce_sum(tf.mul(v1, v2), reduction_indices=1) / (tf.sqrt(tf.reduce_sum(tf.mul(v1, v1), reduction_indices=1)) * tf.sqrt(tf.reduce_sum(tf.mul(v2, v2), reduction_indices=1)))
    return cost
コード例 #5
0
def cross_entropy_reward_loss(logits, actions, rewards, name=None):
    """ Calculate the loss for Policy Gradient Network.

    Parameters
    ----------
    logits : tensor
        The network outputs without softmax. This function implements softmax
        inside.
    actions : tensor/ placeholder
        The agent actions.
    rewards : tensor/ placeholder
        The rewards.

    Examples
    ----------
    >>> states_batch_pl = tf.placeholder(tf.float32, shape=[None, D])   # observation for training
    >>> network = tl.layers.InputLayer(states_batch_pl, name='input_layer')
    >>> network = tl.layers.DenseLayer(network, n_units=H, act = tf.nn.relu, name='relu1')
    >>> network = tl.layers.DenseLayer(network, n_units=3, act = tl.activation.identity, name='output_layer')
    >>> probs = network.outputs
    >>> sampling_prob = tf.nn.softmax(probs)
    >>> actions_batch_pl = tf.placeholder(tf.int32, shape=[None])
    >>> discount_rewards_batch_pl = tf.placeholder(tf.float32, shape=[None])
    >>> loss = cross_entropy_reward_loss(probs, actions_batch_pl, discount_rewards_batch_pl)
    >>> train_op = tf.train.RMSPropOptimizer(learning_rate, decay_rate).minimize(loss)
    """

    try: # TF 1.0
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=actions, logits=logits, name=name)
    except:
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, targets=actions)
        # cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, actions)

    try: ## TF1.0
        loss = tf.reduce_sum(tf.multiply(cross_entropy, rewards))
    except: ## TF0.12
        loss = tf.reduce_sum(tf.mul(cross_entropy, rewards))   # element-wise mul
    return loss
コード例 #6
0
    def calculate_loss_function(self, predicted, groundTruth):
        """
        Calculate the total loss for gradient descent.

        For each ground truth object, loss needs to be calculated.
        It is assumed that each image consists of only one object.

        Predicted
        0-19 CLass prediction
        20-21 Confidence that objects exist in bbox1 or bbox2 of grid
        22-29 Coordinates for bbo1, followed by those of bbox2

        Real
        0-19 Class prediction (One-Hot Encoded)
        20-23 Ground truth coordinates for that box
        24-72 Cell has an object/no object (Only one can be is 1)
        """
        predictedParameters = np.reshape(
            predicted, [-1, self.numOfGridsIn1D, self.numOfGridsIn1D, 30])
        predictedClasses = predictedParameters[:, :, :, :20]
        predictedObjectConfidence = predictedParameters[:, :, :, 20:22]
        predictedBoxes = predictedParameters[:, :, :, 22:]
        groundTruthClasses = np.reshape(groundTruth[:, :20], [-1, 1, 1, 20])
        groundTruthBoxes = np.reshape(groundTruth[:, 20:24], [-1, 1, 1, 4])
        groundTruthGrid = np.reshape(groundTruth[:, 24:], [-1, 7, 7, 1])
        predictedFirstBoxes = predictedBoxes[:, :, :, :4]
        predictedSecondBoxes = predictedBoxes[:, :, :, 5:]
        # Calulate loss along the 4th axis, localFirstBoxes -1x7x7x1
        # Think there should be a simpler method to do this
        lossFirstBoxes = tf.reduce_sum(
            tf.square(predictedFirstBoxes - groundTruthBoxes), 3)
        lossSecondBoxes = tf.reduce_sum(
            tf.square(predictedSecondBoxes - groundTruthBoxes), 3)
        # Computing which box (bbox1 or bbox2) is responsible for
        # detection
        IOU = iou_train(predictedFirstBoxes, predictedSecondBoxes,
                        groundTruthBoxes)
        responsbileBox = tf.greater(IOU[:, :, :, 0], IOU[:, :, :, 1])
        # Suppose it is known which iou is greater,
        # coordinate loss (loss due to difference in coordinates of
        # predicted-responsible and real box)
        coordinateLoss = tf.where(responsibleBox, lossFirstBoxes,
                                  lossSecondBoxes)
        # why do we need to reshape it
        coordinateLoss = tf.reshape(coordinateLoss, [-1, 7, 7, 1])
        # count the loss only if the object is in the groundTruth grid
        # gives a sparse -1x7x7x1 matrix, only one element would be nonzero in
        # each slice
        coorinateLoss = self.lambdaCoordinate * \
            tf.multiply(groundTruthGrid, coordinateLoss)
        # object loss (loss due to difference in object confidence)
        # only take the objectLoss of the predicted grid with higher IoU is
        # responsible for the object
        objectLoss = tf.square(predictedObjectConfidence - groundTruthGrid)
        objectLoss = tf.where(responsibleBox, objectLoss[:, :, :, 0],
                              objectLoss[:, :, :, 1])
        tempObjectLoss = tf.reshape(objectLoss, [-1, 7, 7, 1])
        objectLoss = tf.multiply(groundTruthGrid, tempObjectLoss)
        # class loss (loss due to misjudgement in class of the object
        # detected
        classLoss = tf.square(predictedClasses - groundTruthClasses)
        classLoss = tf.reduce_sum(tf.mul(groundTruthGrid, classLoss),
                                  reduction_indices=3)
        classLoss = tf.reshape(classLoss, [-1, 7, 7, 1])
        # no-object loss, decrease the confidence where there is no
        # object in the ground truth
        noObjectLoss = self.lambdaNoObject * \
            tf.multiply(1 - groundTruthGrid, tempObjectLoss)
        # total loss
        totalLoss = coordinateLoss + objectLoss + classLoss + noObjectLoss
        totalLoss = tf.reduce_mean(tf.reduce_sum(totalLoss,
                                                 reduction_indeces=[1, 2, 3]),
                                   reduction_indices=0)
        return totalLoss
コード例 #7
0
def train(model_path, learning_rate, epoch, noisy=False):
    total_epoch = epoch
    teacher = nin()
    student = lenet()
    if noisy == True:
        drop_scale = 1 / Nratio
        noisy_mask = tf.nn.dropout(tf.constant(
            np.float32(np.ones((batch_size, 1))) / drop_scale),
                                   keep_prob=Nratio)  #(batchsize,1)
        gaussian = tf.random_normal(shape=[batch_size, 1],
                                    mean=0.0,
                                    stddev=Nsigma)
        noisy = tf.mul(noisy_mask, gaussian)
        #noisy_add = tf.add(tf.constant(np.float32(np.ones((batch_size,1)))), noisy)
        teacher = tf.mul(teacher,
                         tf.tile(noisy, tf.constant([1, 10])))  #(batchsize,10)
        #teacher = tf.add(teacher, tf.tile(noisy,tf.constant([1,10])))
        print(bcolors.G + "prepare for training, noisy mode" + bcolors.END)
        tf_loss = tf.nn.l2_loss(teacher - student) / batch_size
    elif KD == True:  # correct Hinton method at 2017.1.3
        print(bcolors.G + "prepare for training, knowledge distilling mode" +
              bcolors.END)
        one_hot = tf.one_hot(y, n_classes, 1.0, 0.0)
        #one_hot = tf.cast(one_hot_int, tf.float32)
        teacher_tau = tf.scalar_mul(1.0 / tau, teacher)
        student_tau = tf.scalar_mul(1.0 / tau, student)
        objective1 = tf.nn.sigmoid_cross_entropy_with_logits(
            student_tau, one_hot)
        objective2 = tf.scalar_mul(0.5, tf.square(student_tau - teacher_tau))
        tf_loss = (lamda * tf.reduce_sum(objective1) +
                   (1 - lamda) * tf.reduce_sum(objective2)) / batch_size
    else:
        print(bcolors.G + "prepare for training, NIPS2014 mode" + bcolors.END)
        tf_loss = tf.nn.l2_loss(teacher - student) / batch_size

    optimizer1 = tf.train.AdamOptimizer(
        learning_rate=learning_rate).minimize(tf_loss)
    optimizer2 = tf.train.AdamOptimizer(learning_rate=learning_rate /
                                        10).minimize(tf_loss)

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)
    sess = tf.InteractiveSession(config=tf.ConfigProto(
        gpu_options=gpu_options, allow_soft_placement=True))
    tf.initialize_all_variables().run()
    with tf.device('/cpu:0'):
        saver = tf.train.Saver(max_to_keep=100)
        #saver.restore(sess, os.path.join(model_path,'model-99')
    data, label = read_cifar10('train')
    index = np.array(range(len(data)))  # index randomly ordered
    mean = cal_mean()
    begin = time.time()
    iterations = len(data) // batch_size
    decay_step = int(total_epoch * 0.8)
    cnt = 0
    dropout_rate = dropout
    print(bcolors.G + "number of iterations (per epoch) =" +
          str(len(data) / batch_size) + bcolors.END)
    for i in range(total_epoch):
        np.random.shuffle(index)
        cost_sum = 0
        for j in range(iterations):
            batch_x = np.float32(
                data[index[j * batch_size:(j + 1) * batch_size]]) - mean
            batch_y = np.squeeze(
                np.float32(label[index[j * batch_size:(j + 1) * batch_size]]))
            if cnt / decay_step == 0:
                lr = learning_rate
                _, cost = sess.run([optimizer1, tf_loss],
                                   feed_dict={
                                       x: batch_x,
                                       y: batch_y,
                                       keep_prob: 1 - dropout_rate
                                   })
            elif cnt / decay_step == 1:
                lr = learning_rate / 10
                _, cost = sess.run([optimizer2, tf_loss],
                                   feed_dict={
                                       x: batch_x,
                                       y: batch_y,
                                       keep_prob: 1 - dropout_rate
                                   })
            cost_sum += cost
            #pdb.set_trace()
            #if (j % int(iterations*0.25) == 0):
            #    print(("epoch %d-iter %d, cost = %f , avg-cost = %f"%(i, j, cost, cost/n_classes))
            #    sys.stdout.flush()
        cnt += 1
        avg_time = time.time() - begin
        print(
            "epoch %d - avg. %f seconds in each epoch, lr = %.0e, cost = %f , avg-cost-per-logits = %f"
            % (i, avg_time / cnt, lr, cost_sum,
               cost_sum / iterations / n_classes))
        if np.mod(i + 1, 10) == 0:
            print("Epoch ", i + 1, " is done. Saving the model ...")
            with tf.device('/cpu:0'):
                if not os.path.exists(model_path):
                    os.makedirs(model_path)
                saver.save(sess,
                           os.path.join(model_path, 'model'),
                           global_step=i)
        sys.stdout.flush()
 def apply_gradient_clipping(gradient):
     if gradient is not None:
         return tf.mul(tf.clip_by_value(tf.abs(grad), 0.1, 1.),
                       tf.sign(grad))
     else:
         return None