Esempio n. 1
0
    def add_training_costs(self, graph, name_scopes, output, labels, weights):
        with graph.as_default():
            epsilon = 1e-3  # small float to avoid dividing by zero
            weighted_costs = []  # weighted costs for each example
            gradient_costs = []  # costs used for gradient calculation

            with TensorflowGraph.shared_name_scope('costs', graph,
                                                   name_scopes):
                for task in range(self.n_tasks):
                    task_str = str(task).zfill(len(str(self.n_tasks)))
                    with TensorflowGraph.shared_name_scope(
                            'cost_{}'.format(task_str), graph, name_scopes):
                        with tf.name_scope('weighted'):
                            weighted_cost = self.cost(output[task],
                                                      labels[task],
                                                      weights[task])
                            weighted_costs.append(weighted_cost)

                        with tf.name_scope('gradient'):
                            # Note that we divide by the batch size and not the number of
                            # non-zero weight examples in the batch.  Also, instead of using
                            # tf.reduce_mean (which can put ops on the CPU) we explicitly
                            # calculate with div/sum so it stays on the GPU.
                            gradient_cost = tf.div(
                                tf.reduce_sum(weighted_cost), self.batch_size)
                            gradient_costs.append(gradient_cost)

                # aggregated costs
                with TensorflowGraph.shared_name_scope('aggregated', graph,
                                                       name_scopes):
                    with tf.name_scope('gradient'):
                        loss = tf.add_n(gradient_costs)

                    # weight decay
                    if self.penalty != 0.0:
                        penalty = model_ops.weight_decay(
                            self.penalty_type, self.penalty)
                        loss += penalty

            return loss
Esempio n. 2
0
  def add_training_cost(self, graph, name_scopes, output, labels, weights):
    with graph.as_default():
      epsilon = 1e-3  # small float to avoid dividing by zero
      weighted_costs = []  # weighted costs for each example
      gradient_costs = []  # costs used for gradient calculation

      with TensorflowGraph.shared_name_scope('costs', graph, name_scopes):
        for task in range(self.n_tasks):
          task_str = str(task).zfill(len(str(self.n_tasks)))
          with TensorflowGraph.shared_name_scope('cost_{}'.format(task_str),
                                                 graph, name_scopes):
            with tf.name_scope('weighted'):
              weighted_cost = self.cost(output[task], labels[task],
                                        weights[task])
              weighted_costs.append(weighted_cost)

            with tf.name_scope('gradient'):
              # Note that we divide by the batch size and not the number of
              # non-zero weight examples in the batch.  Also, instead of using
              # tf.reduce_mean (which can put ops on the CPU) we explicitly
              # calculate with div/sum so it stays on the GPU.
              gradient_cost = tf.div(
                  tf.reduce_sum(weighted_cost), self.batch_size)
              gradient_costs.append(gradient_cost)

        # aggregated costs
        with TensorflowGraph.shared_name_scope('aggregated', graph,
                                               name_scopes):
          with tf.name_scope('gradient'):
            loss = tf.add_n(gradient_costs)

          # weight decay
          if self.penalty != 0.0:
            penalty = model_ops.weight_decay(self.penalty_type, self.penalty)
            loss += penalty

      return loss