예제 #1
0
 def __init__(self, value=None, value_inference_type=None, log=True, add_random=None,
              use_unweighted=False, sample=False, sample_prob=None,
              dropconnect_keep_prob=None):
     self._true_counts = {}
     self._actual_counts = {}
     self._log = log
     self._add_random = add_random
     self._use_unweighted = use_unweighted
     self._sample = sample
     self._sample_prob = sample_prob
     # Create internal value generator
     if value is None:
         self._value = LogValue(
             value_inference_type, dropconnect_keep_prob=dropconnect_keep_prob)
     else:
         self._value = value
         self._log = value.log()
예제 #2
0
파일: gd.py 프로젝트: hrstoyanov/libspn
    def __init__(self,
                 root,
                 value=None,
                 value_inference_type=None,
                 dropconnect_keep_prob=None,
                 learning_task_type=LearningTaskType.SUPERVISED,
                 learning_method=LearningMethodType.DISCRIMINATIVE,
                 marginalizing_root=None,
                 name="GDLearning",
                 l1_regularize_coeff=None,
                 l2_regularize_coeff=None,
                 optimizer=None):

        if learning_task_type == LearningTaskType.UNSUPERVISED and \
                learning_method == LearningMethodType.DISCRIMINATIVE:
            raise ValueError(
                "It is not possible to do unsupervised learning discriminatively."
            )

        self._root = root
        self._marginalizing_root = marginalizing_root
        if self._turn_off_dropconnect(dropconnect_keep_prob,
                                      learning_task_type):
            self._root.set_dropconnect_keep_prob(1.0)
            if self._marginalizing_root is not None:
                self._marginalizing_root.set_dropconnect_keep_prob(1.0)

        if value is not None and isinstance(value, LogValue):
            self._log_value = value
        else:
            if value is not None:
                GDLearning.__logger.warn(
                    "{}: Value instance is ignored since the current implementation does "
                    "not support gradients with non-log inference. Using a LogValue instance "
                    "instead.".format(name))
            self._log_value = LogValue(
                value_inference_type,
                dropconnect_keep_prob=dropconnect_keep_prob)
        self._learning_task_type = learning_task_type
        self._learning_method = learning_method
        self._l1_regularize_coeff = l1_regularize_coeff
        self._l2_regularize_coeff = l2_regularize_coeff
        self._dropconnect_keep_prob = dropconnect_keep_prob
        self._optimizer = optimizer
        self._name = name
예제 #3
0
    def __init__(self,
                 root,
                 value=None,
                 value_inference_type=None,
                 learning_task_type=LearningTaskType.SUPERVISED,
                 learning_method=LearningMethodType.DISCRIMINATIVE,
                 learning_rate=1e-4,
                 marginalizing_root=None,
                 name="GDLearning",
                 global_step=None,
                 linear_w_minimum=1e-2):

        if learning_task_type == LearningTaskType.UNSUPERVISED and \
                learning_method == LearningMethodType.DISCRIMINATIVE:
            raise ValueError(
                "It is not possible to do unsupervised learning discriminatively."
            )

        self._root = root
        self._marginalizing_root = marginalizing_root

        if value is not None and isinstance(value, LogValue):
            self._log_value = value
        else:
            if value is not None:
                GDLearning.__logger.warn(
                    "{}: Value instance is ignored since the current implementation does "
                    "not support gradients with non-log inference. Using a LogValue instance "
                    "instead.".format(name))
            self._log_value = LogValue(value_inference_type)
        self._learning_rate = learning_rate
        self._learning_task_type = learning_task_type
        self._learning_method = learning_method
        self._name = name
        self._global_step = global_step
        self._linear_w_minimum = linear_w_minimum
예제 #4
0
    def cross_entropy_loss(self,
                           name="CrossEntropy",
                           reduce_fn=tf.reduce_mean):
        """Sets up the cross entropy loss, which is equivalent to -log(p(Y|X)).

        Args:
            name (str): Name of the name scope for the Ops defined here
            reduce_fn (Op): An operation that reduces the losses for all samples to a scalar.

        Returns:
            A Tensor corresponding to the cross-entropy loss.
        """
        with tf.name_scope(name):
            log_prob_data_and_labels = LogValue().get_value(self._root)
            log_prob_data = self._log_likelihood()
            return -reduce_fn(log_prob_data_and_labels - log_prob_data)
예제 #5
0
    def get_log_value(self, inference_type=None):
        """Assemble TF operations computing the log value of the SPN rooted in
        this node.

        Args:
            inference_type (InferenceType): Determines the type of inference
                that should be used. If set to ``None``, the inference type is
                specified by the ``inference_type`` flag of the node. If set to
                ``MARGINAL``, marginal inference will be used for all nodes. If
                set to ``MPE``, MPE inference will be used for all nodes.

        Returns:
            Tensor: A tensor of shape ``[None, out_size]``, where the first
            dimension corresponds to the batch size.
        """
        from libspn.inference.value import LogValue
        return LogValue(inference_type).get_value(self)
예제 #6
0
 def __init__(self,
              value=None,
              value_inference_type=None,
              log=True,
              use_unweighted=False,
              sample=False,
              sample_prob=None,
              matmul_or_conv=False):
     self._true_counts = {}
     self._actual_counts = {}
     self._log = log
     self._use_unweighted = use_unweighted
     self._sample = sample
     self._sample_prob = sample_prob
     # Create internal value generator
     self._value = value or LogValue(value_inference_type,
                                     matmul_or_conv=matmul_or_conv)
예제 #7
0
파일: mpe_path.py 프로젝트: jostosh/libspn
 def __init__(self,
              value=None,
              value_inference_type=None,
              log=True,
              add_random=None,
              use_unweighted=False):
     self._counts = {}
     self._log = log
     self._add_random = add_random
     self._use_unweighted = use_unweighted
     # Create internal value generator
     if value is None:
         if log:
             self._value = LogValue(value_inference_type)
         else:
             self._value = Value(value_inference_type)
     else:
         self._value = value
예제 #8
0
 def __init__(self,
              value=None,
              value_inference_type=None,
              log=True,
              dropconnect_keep_prob=None):
     self._true_gradients = {}
     self._actual_gradients = {}
     self._log = log
     self._dropconnect_keep_prob = dropconnect_keep_prob
     # Create internal value generator
     if value is None:
         if log:
             self._value = LogValue(
                 value_inference_type,
                 dropconnect_keep_prob=dropconnect_keep_prob)
         else:
             self._value = Value(
                 value_inference_type,
                 dropconnect_keep_prob=dropconnect_keep_prob)
     else:
         self._value = value
         self._log = value.log()
예제 #9
0
파일: gd.py 프로젝트: hrstoyanov/libspn
    def cross_entropy_loss(self,
                           name="CrossEntropy",
                           reduce_fn=tf.reduce_mean,
                           dropconnect_keep_prob=None):
        """Sets up the cross entropy loss, which is equivalent to -log(p(Y|X)).

        Args:
            name (str): Name of the name scope for the Ops defined here
            reduce_fn (Op): An operation that reduces the losses for all samples to a scalar.
            dropconnect_keep_prob (float or Tensor): Keep probability for dropconnect, will
                override the value of GDLearning._dropconnect_keep_prob.

        Returns:
            A Tensor corresponding to the cross-entropy loss.
        """
        dropconnect_keep_prob = dropconnect_keep_prob if dropconnect_keep_prob is None else \
            self._dropconnect_keep_prob
        with tf.name_scope(name):
            log_prob_data_and_labels = LogValue(
                dropconnect_keep_prob=dropconnect_keep_prob).get_value(
                    self._root)
            log_prob_data = self._log_likelihood(
                dropconnect_keep_prob=dropconnect_keep_prob)
            return -reduce_fn(log_prob_data_and_labels - log_prob_data)
예제 #10
0
파일: gd.py 프로젝트: hrstoyanov/libspn
class GDLearning:
    """Assembles TF operations performing Gradient Descent learning of an SPN.

    Args:
        value_inference_type (InferenceType): The inference type used during the
            upwards pass through the SPN. Ignored if ``mpe_path`` is given.
        learning_rate (float): Learning rate parameter used for updating SPN weights.
        learning_task_type (LearningTaskType): Learning type used while learning.
        learning_method (LearningMethodType): Learning method type, can be either generative
            (LearningMethodType.GENERATIVE) or discriminative (LearningMethodType.DISCRIMINATIVE).
        marginalizing_root (Sum, ParSums, SumsLayer): A sum node without IVs attached to it (or
            IVs with a fixed no-evidence feed). If it is omitted here, the node will constructed
            internally once needed.
        name (str): The name given to this instance of GDLearning.
        l1_regularize_coeff (float or Tensor): The L1 regularization coefficient.
        l2_regularize_coeff (float or Tensor): The L2 regularization coefficient.
    """

    __logger = get_logger()

    def __init__(self,
                 root,
                 value=None,
                 value_inference_type=None,
                 dropconnect_keep_prob=None,
                 learning_task_type=LearningTaskType.SUPERVISED,
                 learning_method=LearningMethodType.DISCRIMINATIVE,
                 marginalizing_root=None,
                 name="GDLearning",
                 l1_regularize_coeff=None,
                 l2_regularize_coeff=None,
                 optimizer=None):

        if learning_task_type == LearningTaskType.UNSUPERVISED and \
                learning_method == LearningMethodType.DISCRIMINATIVE:
            raise ValueError(
                "It is not possible to do unsupervised learning discriminatively."
            )

        self._root = root
        self._marginalizing_root = marginalizing_root
        if self._turn_off_dropconnect(dropconnect_keep_prob,
                                      learning_task_type):
            self._root.set_dropconnect_keep_prob(1.0)
            if self._marginalizing_root is not None:
                self._marginalizing_root.set_dropconnect_keep_prob(1.0)

        if value is not None and isinstance(value, LogValue):
            self._log_value = value
        else:
            if value is not None:
                GDLearning.__logger.warn(
                    "{}: Value instance is ignored since the current implementation does "
                    "not support gradients with non-log inference. Using a LogValue instance "
                    "instead.".format(name))
            self._log_value = LogValue(
                value_inference_type,
                dropconnect_keep_prob=dropconnect_keep_prob)
        self._learning_task_type = learning_task_type
        self._learning_method = learning_method
        self._l1_regularize_coeff = l1_regularize_coeff
        self._l2_regularize_coeff = l2_regularize_coeff
        self._dropconnect_keep_prob = dropconnect_keep_prob
        self._optimizer = optimizer
        self._name = name

    def learn(self, loss=None, optimizer=None, post_gradient_ops=True):
        """Assemble TF operations performing GD learning of the SPN. This includes setting up
        the loss function (with regularization), setting up the optimizer and setting up
        post gradient-update ops.

        loss (Tensor): The operation corresponding to the loss to minimize.
        optimizer (tf.train.Optimizer): A TensorFlow optimizer to use for minimizing the loss.

        Returns:
            A tuple of grouped update Ops and a loss Op.
        """
        if self._learning_task_type == LearningTaskType.SUPERVISED and self._root.ivs is None:
            raise StructureError(
                "{}: the SPN rooted at {} does not have a latent IVs node, so cannot setup "
                "conditional class probabilities.".format(
                    self._name, self._root))

        # If a loss function is not provided, define the loss function based
        # on learning-type and learning-method
        with tf.name_scope("Loss"):
            if loss is None:
                loss = (self.negative_log_likelihood() if self._learning_method
                        == LearningMethodType.GENERATIVE else
                        self.cross_entropy_loss())
            if self._l1_regularize_coeff is not None or self._l2_regularize_coeff is not None:
                loss += self.regularization_loss()

        # Assemble TF ops for optimizing and weights normalization
        optimizer = optimizer if optimizer is not None else self._optimizer
        if optimizer is None:
            raise ValueError("Did not specify GD optimizer")
        with tf.name_scope("ParameterUpdate"):
            minimize = optimizer.minimize(loss=loss)
            if post_gradient_ops:
                return self.post_gradient_update(minimize), loss
            else:
                return minimize, loss

    def post_gradient_update(self, update_op):
        """Constructs post-parameter update ops such as normalization of weights and clipping of
        scale parameters of GaussianLeaf nodes.

        Args:
            update_op (Tensor): A Tensor corresponding to the parameter update.

        Returns:
            An updated operation where the post-processing has been ensured by TensorFlow's control
            flow mechanisms.
        """
        with tf.name_scope("PostGradientUpdate"):

            # After applying gradients to weights, normalize weights
            with tf.control_dependencies([update_op]):
                weight_norm_ops = []

                def fun(node):
                    if node.is_param:
                        weight_norm_ops.append(node.normalize())

                    if isinstance(node, GaussianLeaf
                                  ) and node.learn_distribution_parameters:
                        weight_norm_ops.append(
                            tf.assign(
                                node.scale_variable,
                                tf.maximum(node.scale_variable,
                                           node._min_stddev)))

                with tf.name_scope("WeightNormalization"):
                    traverse_graph(self._root, fun=fun)
            return tf.group(*weight_norm_ops, name="weight_norm")

    def cross_entropy_loss(self,
                           name="CrossEntropy",
                           reduce_fn=tf.reduce_mean,
                           dropconnect_keep_prob=None):
        """Sets up the cross entropy loss, which is equivalent to -log(p(Y|X)).

        Args:
            name (str): Name of the name scope for the Ops defined here
            reduce_fn (Op): An operation that reduces the losses for all samples to a scalar.
            dropconnect_keep_prob (float or Tensor): Keep probability for dropconnect, will
                override the value of GDLearning._dropconnect_keep_prob.

        Returns:
            A Tensor corresponding to the cross-entropy loss.
        """
        dropconnect_keep_prob = dropconnect_keep_prob if dropconnect_keep_prob is None else \
            self._dropconnect_keep_prob
        with tf.name_scope(name):
            log_prob_data_and_labels = LogValue(
                dropconnect_keep_prob=dropconnect_keep_prob).get_value(
                    self._root)
            log_prob_data = self._log_likelihood(
                dropconnect_keep_prob=dropconnect_keep_prob)
            return -reduce_fn(log_prob_data_and_labels - log_prob_data)

    def negative_log_likelihood(self,
                                name="NegativeLogLikelihood",
                                reduce_fn=tf.reduce_mean,
                                dropconnect_keep_prob=None):
        """Returns the maximum (log) likelihood estimate loss function which corresponds to
        -log(p(X)) in the case of unsupervised learning or -log(p(X,Y)) in the case of supservised
        learning.

        Args:
            name (str): The name for the name scope to use
            reduce_fn (Op): An operation that reduces the losses for all samples to a scalar.
            dropconnect_keep_prob (float or Tensor): Keep probability for dropconnect, will
                override the value of GDLearning._dropconnect_keep_prob.
        Returns:
            A Tensor corresponding to the MLE loss
        """
        with tf.name_scope(name):
            if self._learning_task_type == LearningTaskType.UNSUPERVISED:
                if self._root.ivs is not None:
                    likelihood = self._log_likelihood(
                        dropconnect_keep_prob=dropconnect_keep_prob)
                else:
                    likelihood = self._log_value.get_value(self._root)
            elif self._root.ivs is None:
                raise StructureError(
                    "Root should have IVs node when doing supervised learning."
                )
            else:
                likelihood = self._log_value.get_value(self._root)
            return -reduce_fn(likelihood)

    def _log_likelihood(self,
                        learning_task_type=None,
                        dropconnect_keep_prob=None):
        """Computes log(p(X)) by creating a copy of the root node without IVs. Also turns off
        dropconnect at the root if necessary.

        Returns:
            A Tensor of shape [batch, 1] corresponding to the log likelihood of the data.
        """
        marginalizing_root = self._marginalizing_root or Sum(
            *self._root.values, weights=self._root.weights)
        learning_task_type = learning_task_type or self._learning_task_type
        dropconnect_keep_prob = dropconnect_keep_prob or self._dropconnect_keep_prob
        if self._turn_off_dropconnect(dropconnect_keep_prob,
                                      learning_task_type):
            marginalizing_root.set_dropconnect_keep_prob(1.0)
        return self._log_value.get_value(marginalizing_root)

    def regularization_loss(self, name="Regularization"):
        """Adds regularization to the weight nodes. This can be either L1 or L2 or both, depending
        on what is specified at instantiation of GDLearning.

        Returns:
            A Tensor with the total regularization loss.
        """

        with tf.name_scope(name):
            losses = []

            def regularize_node(node):
                if node.is_param:
                    if self._l1_regularize_coeff is not None:
                        losses.append(self._l1_regularize_coeff *
                                      tf.reduce_sum(tf.abs(node.variable)))
                    if self._l2_regularize_coeff is not None:
                        losses.append(self._l2_regularize_coeff *
                                      tf.reduce_sum(tf.square(node.variable)))

            traverse_graph(self._root, fun=regularize_node)
            return tf.add_n(losses)

    @staticmethod
    def _turn_off_dropconnect(dropconnect_keep_prob, learning_task_type):
        """Determines whether to turn off dropconnect for the root node. """
        return dropconnect_keep_prob is not None and \
            (not isinstance(dropconnect_keep_prob, (int, float)) or dropconnect_keep_prob == 1.0) \
            and learning_task_type == LearningTaskType.SUPERVISED

    @property
    def value(self):
        """Value or LogValue: Computed SPN values."""
        return self._log_value
예제 #11
0
class MPEPath:
    """Assembles TF operations computing the branch counts for the MPE downward
    path through the SPN. It computes the number of times each branch was
    traveled by a complete subcircuit determined by the MPE value of the latent
    variables in the model.

    Args:
        value (Value or LogValue): Pre-computed SPN values.
        value_inference_type (InferenceType): The inference type used during the
            upwards pass through the SPN. Ignored if ``value`` is given.
        log (bool): If ``True``, calculate the value in the log space. Ignored
                    if ``value`` is given.
    """

    def __init__(self, value=None, value_inference_type=None, log=True, add_random=None,
                 use_unweighted=False, sample=False, sample_prob=None,
                 dropconnect_keep_prob=None):
        self._true_counts = {}
        self._actual_counts = {}
        self._log = log
        self._add_random = add_random
        self._use_unweighted = use_unweighted
        self._sample = sample
        self._sample_prob = sample_prob
        # Create internal value generator
        if value is None:
            self._value = LogValue(
                value_inference_type, dropconnect_keep_prob=dropconnect_keep_prob)
        else:
            self._value = value
            self._log = value.log()

    @property
    def value(self):
        """Value or LogValue: Computed SPN values."""
        return self._value

    @property
    def counts(self):
        """dict: Dictionary indexed by node, where each value is a list of tensors
        computing the branch counts, based on the true value of the SPN's latent
        variable, for the inputs of the node."""
        return MappingProxyType(self._true_counts)

    @property
    def actual_counts(self):
        """dict: Dictionary indexed by node, where each value is a list of tensors
        computing the branch counts, based on the actual value calculated by the
        SPN, for the inputs of the node."""
        return MappingProxyType(self._actual_counts)

    @property
    def log(self):
        return self._log

    def get_mpe_path(self, root):
        """Assemble TF operations computing the true branch counts for the MPE
        downward path through the SPN rooted in ``root``.

        Args:
            root (Node): The root node of the SPN graph.
        """
        def down_fun(node, parent_vals):
            self._true_counts[node] = summed = self._accumulate_parents(*parent_vals)
            basesum_kwargs = dict(
                add_random=self._add_random, use_unweighted=self._use_unweighted,
                sample=self._sample, sample_prob=self._sample_prob)
            if node.is_op:
                kwargs = basesum_kwargs if isinstance(node, BaseSum) else dict()
                # Compute for inputs
                with tf.name_scope(node.name):
                    return node._compute_log_mpe_path(
                        summed, *[self._value.values[i.node] if i else None
                                  for i in node.inputs], **kwargs)

        # Generate values if not yet generated
        if not self._value.values:
            self._value.get_value(root)

        with tf.name_scope("TrueMPEPath"):
            # Compute the tensor to feed to the root node
            graph_input = self._graph_input(self._value.values[root])

            # Traverse the graph computing counts
            self._true_counts = {}
            compute_graph_up_down(root, down_fun=down_fun, graph_input=graph_input)

    @staticmethod
    @utils.lru_cache
    def _accumulate_parents(*parent_vals):
        # Sum up all parent vals
        return tf.add_n([pv for pv in parent_vals if pv is not None], name="AccumulateParents")

    @staticmethod
    @utils.lru_cache
    def _graph_input(root_value):
        return tf.ones_like(root_value)

    def get_mpe_path_actual(self, root):
        """Assemble TF operations computing the actual branch counts for the MPE
        downward path through the SPN rooted in ``root``.

        Args:
            root (Node): The root node of the SPN graph.
        """
        def down_fun(node, parent_vals):
            self._actual_counts[node] = summed = self._accumulate_parents(*parent_vals)
            basesum_kwargs = dict(
                add_random=self._add_random, use_unweighted=self._use_unweighted,
                sample=self._sample, sample_prob=self._sample_prob)
            if node.is_op:
                # Compute for inputs
                kwargs = basesum_kwargs if isinstance(node, BaseSum) else dict()
                with tf.name_scope(node.name):
                    return node._compute_log_mpe_path(
                        summed, *[self._value.values[i.node] if i else None
                                  for i in node.inputs], **kwargs)

        # Generate values if not yet generated
        if not self._value.values:
            self._value.get_value(root)

        with tf.name_scope("ActualMPEPath"):
            graph_input = self._graph_input(self._value.values[root] )

            # Traverse the graph computing counts
            self._actual_counts = {}
            compute_graph_up_down(root, down_fun=down_fun, graph_input=graph_input)
예제 #12
0
class GDLearning:
    """Assembles TF operations performing Gradient Descent learning of an SPN.

    Args:
        value_inference_type (InferenceType): The inference type used during the
            upwards pass through the SPN. Ignored if ``mpe_path`` is given.
        learning_rate (float): Learning rate parameter used for updating SPN weights.
        learning_task_type (LearningTaskType): Learning type used while learning.
        learning_method (LearningMethodType): Learning method type, can be either generative
            (LearningMethodType.GENERATIVE) or discriminative (LearningMethodType.DISCRIMINATIVE).
        marginalizing_root (Sum, ParallelSums, SumsLayer): A sum node without IndicatorLeafs attached to
            it (or IndicatorLeafs with a fixed no-evidence feed). If it is omitted here, the node
            will constructed internally once needed.
        name (str): The name given to this instance of GDLearning.
    """

    __logger = get_logger()

    def __init__(self,
                 root,
                 value=None,
                 value_inference_type=None,
                 learning_task_type=LearningTaskType.SUPERVISED,
                 learning_method=LearningMethodType.DISCRIMINATIVE,
                 learning_rate=1e-4,
                 marginalizing_root=None,
                 name="GDLearning",
                 global_step=None,
                 linear_w_minimum=1e-2):

        if learning_task_type == LearningTaskType.UNSUPERVISED and \
                learning_method == LearningMethodType.DISCRIMINATIVE:
            raise ValueError(
                "It is not possible to do unsupervised learning discriminatively."
            )

        self._root = root
        self._marginalizing_root = marginalizing_root

        if value is not None and isinstance(value, LogValue):
            self._log_value = value
        else:
            if value is not None:
                GDLearning.__logger.warn(
                    "{}: Value instance is ignored since the current implementation does "
                    "not support gradients with non-log inference. Using a LogValue instance "
                    "instead.".format(name))
            self._log_value = LogValue(value_inference_type)
        self._learning_rate = learning_rate
        self._learning_task_type = learning_task_type
        self._learning_method = learning_method
        self._name = name
        self._global_step = global_step
        self._linear_w_minimum = linear_w_minimum

    def loss(self, learning_method=None, reduce_fn=tf.reduce_mean):
        """Assembles main objective operations. In case of generative learning it will select
        the MLE objective, whereas in discriminative learning it selects the cross entropy.

        Args:
            learning_method (LearningMethodType): The learning method (can be either generative
                or discriminative).

        Returns:
            An operation to compute the main loss function.
        """
        learning_method = learning_method or self._learning_method
        if learning_method == LearningMethodType.GENERATIVE:
            return self.negative_log_likelihood(reduce_fn=reduce_fn)
        return self.cross_entropy_loss(reduce_fn=reduce_fn)

    def learn(self,
              loss=None,
              optimizer=None,
              post_gradient_ops=True,
              name="LearnGD"):
        """Assemble TF operations performing GD learning of the SPN. This includes setting up
        the loss function (with regularization), setting up the optimizer and setting up
        post gradient-update ops.

        Args:
            loss (Tensor): The operation corresponding to the loss to minimize.
            optimizer (tf.train.Optimizer): A TensorFlow optimizer to use for minimizing the loss.
            post_gradient_ops (bool): Whether to use post-gradient ops such as normalization.

        Returns:
            A tuple of grouped update Ops and a loss Op.
        """
        if self._learning_task_type == LearningTaskType.SUPERVISED and self._root.latent_indicators is None:
            raise StructureError(
                "{}: the SPN rooted at {} does not have a latent IndicatorLeaf node, so cannot "
                "setup conditional class probabilities.".format(
                    self._name, self._root))

        # If a loss function is not provided, define the loss function based
        # on learning-type and learning-method
        with tf.name_scope(name):
            with tf.name_scope("Loss"):
                if loss is None:
                    if self._learning_method == LearningMethodType.GENERATIVE:
                        loss = self.negative_log_likelihood()
                    else:
                        loss = self.cross_entropy_loss()
            # Assemble TF ops for optimizing and weights normalization
            with tf.name_scope("ParameterUpdate"):
                minimize = optimizer.minimize(loss=loss)
                if post_gradient_ops:
                    return self.post_gradient_update(minimize), loss
                else:
                    return minimize, loss

    def post_gradient_update(self, update_op):
        """Constructs post-parameter update ops such as normalization of weights and clipping of
        scale parameters of NormalLeaf nodes.

        Args:
            update_op (Tensor): A Tensor corresponding to the parameter update.

        Returns:
            An updated operation where the post-processing has been ensured by TensorFlow's control
            flow mechanisms.
        """
        with tf.name_scope("PostGradientUpdate"):

            # After applying gradients to weights, normalize weights
            with tf.control_dependencies([update_op]):
                weight_norm_ops = []

                def fun(node):
                    if node.is_param:
                        weight_norm_ops.append(
                            node.normalize(
                                linear_w_minimum=self._linear_w_minimum))

                    if isinstance(node,
                                  LocationScaleLeaf) and node._trainable_scale:
                        weight_norm_ops.append(
                            tf.assign(
                                node.scale_variable,
                                tf.maximum(node.scale_variable,
                                           node._min_scale)))

                with tf.name_scope("WeightNormalization"):
                    traverse_graph(self._root, fun=fun)
            return tf.group(*weight_norm_ops, name="weight_norm")

    def cross_entropy_loss(self,
                           name="CrossEntropy",
                           reduce_fn=tf.reduce_mean):
        """Sets up the cross entropy loss, which is equivalent to -log(p(Y|X)).

        Args:
            name (str): Name of the name scope for the Ops defined here
            reduce_fn (Op): An operation that reduces the losses for all samples to a scalar.

        Returns:
            A Tensor corresponding to the cross-entropy loss.
        """
        with tf.name_scope(name):
            log_prob_data_and_labels = LogValue().get_value(self._root)
            log_prob_data = self._log_likelihood()
            return -reduce_fn(log_prob_data_and_labels - log_prob_data)

    def negative_log_likelihood(self,
                                name="NegativeLogLikelihood",
                                reduce_fn=tf.reduce_mean):
        """Returns the maximum (log) likelihood estimate loss function which corresponds to
        -log(p(X)) in the case of unsupervised learning or -log(p(X,Y)) in the case of supservised
        learning.

        Args:
            name (str): The name for the name scope to use
            reduce_fn (function): An function that returns an operation that reduces the losses for
                all samples to a scalar.
        Returns:
            A Tensor corresponding to the MLE loss
        """
        with tf.name_scope(name):
            if self._learning_task_type == LearningTaskType.UNSUPERVISED:
                if self._root.latent_indicators is not None:
                    likelihood = self._log_likelihood()
                else:
                    likelihood = self._log_value.get_value(self._root)
            elif self._root.latent_indicators is None:
                raise StructureError(
                    "Root should have latent indicator node when doing supervised "
                    "learning.")
            else:
                likelihood = self._log_value.get_value(self._root)
            return -reduce_fn(likelihood)

    def _log_likelihood(self):
        """Computes log(p(X)) by creating a copy of the root node without latent indicators.

        Returns:
            A Tensor of shape [batch, 1] corresponding to the log likelihood of the data.
        """
        if isinstance(self._root, BaseSum):
            marginalizing_root = self._marginalizing_root or Sum(
                *self._root.values, weights=self._root.weights)
        else:
            marginalizing_root = self._marginalizing_root or BlockSum(
                self._root.values[0],
                weights=self._root.weights,
                num_sums_per_block=1)
        return self._log_value.get_value(marginalizing_root)