def __init__(self, child, weights=None, latent_indicators=None, inference_type=InferenceType.MARGINAL, sample_prob=None, name="BlockRootSum"): super().__init__(child=child, num_sums_per_block=1, weights=weights, latent_indicators=latent_indicators, inference_type=inference_type, sample_prob=sample_prob, name=name) # Take care of generating the random decompositions randomize_node = traverse_graph( root=self, fun=lambda node: isinstance(node, BlockRandomDecompositions)) if randomize_node is not None: factors = [] traverse_graph( self, lambda n: factors.append(n.num_factors) if isinstance(n, BlockPermuteProduct) else None) randomize_node.generate_permutations(factors)
def post_gradient_update(self, update_op): """Constructs post-parameter update ops such as normalization of weights and clipping of scale parameters of GaussianLeaf nodes. Args: update_op (Tensor): A Tensor corresponding to the parameter update. Returns: An updated operation where the post-processing has been ensured by TensorFlow's control flow mechanisms. """ with tf.name_scope("PostGradientUpdate"): # After applying gradients to weights, normalize weights with tf.control_dependencies([update_op]): weight_norm_ops = [] def fun(node): if node.is_param: weight_norm_ops.append(node.normalize()) if isinstance(node, GaussianLeaf ) and node.learn_distribution_parameters: weight_norm_ops.append( tf.assign( node.scale_variable, tf.maximum(node.scale_variable, node._min_stddev))) with tf.name_scope("WeightNormalization"): traverse_graph(self._root, fun=fun) return tf.group(*weight_norm_ops, name="weight_norm")
def serialize_graph(root, save_param_vals=True, sess=None): """Convert an SPN graph rooted in ``root`` into a dictionary for serialization. The graph is converted to a dict here rather than a collection of Node since additional processing is done (retrieval of variable values inside a session) which cannot easily be done from within JSON encoder. Args: root (Node): Root of the SPN to be serialized. save_param_vals (bool): If ``True``, values of parameters will be evaluated in a session and stored. The TF variables of parameter nodes must already be initialized. If a valid session cannot be found, the parameter values will not be retrieved. sess (Session): Optional. Session used to retrieve parameter values. If ``None``, the default session is used. Returns: dict: Dictionary with all the data to be serialized. """ node_datas = [] param_vars = {} def fun(node): data = node.serialize() # The nodes will not be deserialized automatically during JSON # decoding since they do not use the __type__ data field. data['node_type'] = utils.type2str(type(node)) data_index = len(node_datas) node_datas.append(data) # Handle param variables if node.is_param: if save_param_vals: # Get all variables for k, v in data.items(): if isinstance(v, tf.Variable): param_vars[(data_index, k)] = v else: # Ignore all variables for k, v in data.items(): if isinstance(v, tf.Variable): data[k] = None # Check session if sess is None: sess = tf.get_default_session() if save_param_vals and sess is None: logger.debug1("No valid session found, " "parameter values will not be saved!") save_param_vals = False # Serialize all nodes traverse_graph(root, fun=fun, skip_params=False) # Get and fill values of all variables if save_param_vals: param_vals = sess.run(param_vars) for (i, k), v in param_vals.items(): node_datas[i][k] = v.tolist() return {'root': root.name, 'nodes': node_datas}
def set_inference_types(self, inference_type): """Set inference type for each node in the SPN rooted in this node. Args: inference_type (InferenceType): Inference type to set for the nodes. """ def fun(node): node.inference_type = inference_type traverse_graph(self, fun=fun, skip_params=False)
def _create_accumulators(self): def fun(node): if node.is_param: with tf.name_scope(node.name) as scope: if self._initial_accum_value is not None: if node.mask and not all(node.mask): accum = tf.Variable(tf.cast(tf.reshape(node.mask, node.variable.shape), dtype=conf.dtype) * self._initial_accum_value, dtype=conf.dtype, collections=['em_accumulators']) else: accum = tf.Variable(tf.ones_like(node.variable, dtype=conf.dtype) * self._initial_accum_value, dtype=conf.dtype, collections=['em_accumulators']) else: accum = tf.Variable(tf.zeros_like(node.variable, dtype=conf.dtype), dtype=conf.dtype, collections=['em_accumulators']) param_node = EMLearning.ParamNode(node=node, accum=accum, name_scope=scope) self._param_nodes.append(param_node) if isinstance(node, GaussianLeaf) and node.learn_distribution_parameters: with tf.name_scope(node.name) as scope: if self._initial_accum_value is not None: accum = tf.Variable(tf.ones_like(node.loc_variable, dtype=conf.dtype) * self._initial_accum_value, dtype=conf.dtype, collections=['em_accumulators']) sum_x = tf.Variable(node.loc_variable * self._initial_accum_value, dtype=conf.dtype, collections=['em_accumulators']) sum_x2 = tf.Variable(tf.square(node.loc_variable) * self._initial_accum_value, dtype=conf.dtype, collections=['em_accumulators']) else: accum = tf.Variable(tf.zeros_like(node.loc_variable, dtype=conf.dtype), dtype=conf.dtype, collections=['em_accumulators']) sum_x = tf.Variable(tf.zeros_like(node.loc_variable), dtype=conf.dtype, collections=['em_accumulators']) sum_x2 = tf.Variable(tf.zeros_like(node.loc_variable), dtype=conf.dtype, collections=['em_accumulators']) gaussian_node = EMLearning.GaussianLeafNode( node=node, accum=accum, sum_data=sum_x, sum_data_squared=sum_x2, name_scope=scope) self._gaussian_leaf_nodes.append(gaussian_node) self._gaussian_leaf_nodes = [] self._param_nodes = [] with tf.name_scope(self._name_scope): traverse_graph(self._root, fun=fun)
def get_nodes(self, skip_params=False): """Get a list of nodes in the (sub-)graph rooted in this node. Args: skip_params (bool): If ``True``, param nodes will not be included. Returns: list of Node: List of nodes. """ nodes = [] traverse_graph(self, fun=lambda node: nodes.append(node), skip_params=skip_params) return nodes
def _create_accumulators(self): def fun(node): if node.is_param: with tf.name_scope(node.name) as scope: if self._initial_accum_value is not None: if node.mask and not all(node.mask): accum = tf.Variable(tf.cast(tf.reshape(node.mask, node.variable.shape), dtype=conf.dtype) * self._initial_accum_value, dtype=conf.dtype) else: accum = tf.Variable(tf.ones_like(node.variable, dtype=conf.dtype) * self._initial_accum_value, dtype=conf.dtype) else: accum = tf.Variable(tf.zeros_like(node.variable, dtype=conf.dtype), dtype=conf.dtype) param_node = HardEMLearning.ParamNode(node=node, accum=accum, name_scope=scope) self._param_nodes.append(param_node) if isinstance(node, LocationScaleLeaf) and (node.trainable_scale or node.trainable_loc): with tf.name_scope(node.name) as scope: if self._initial_accum_value is not None: accum = tf.Variable(tf.ones_like(node.loc_variable, dtype=conf.dtype) * self._initial_accum_value, dtype=conf.dtype) sum_x = tf.Variable(node.loc_variable * self._initial_accum_value, dtype=conf.dtype) sum_x2 = tf.Variable(tf.square(node.loc_variable) * self._initial_accum_value, dtype=conf.dtype) else: accum = tf.Variable(tf.zeros_like(node.loc_variable, dtype=conf.dtype), dtype=conf.dtype) sum_x = tf.Variable(tf.zeros_like(node.loc_variable), dtype=conf.dtype) sum_x2 = tf.Variable(tf.zeros_like(node.loc_variable), dtype=conf.dtype) loc_scale_node = HardEMLearning.LocationScaleLeafNode( node=node, accum=accum, sum_data=sum_x, sum_data_squared=sum_x2, name_scope=scope) self._loc_scale_leaf_nodes.append(loc_scale_node) self._loc_scale_leaf_nodes = [] self._param_nodes = [] with tf.name_scope(self._name_scope): traverse_graph(self._root, fun=fun)
def initialize_weights(root, name=None): """Generate an assign operation initializing all the sum weights in the SPN graph rooted in ``root``. Args: root (Node): The root node of the SPN graph. """ initialize_ops = [] def initialize(node): if isinstance(node, Weights): initialize_ops.append(node.initialize()) with tf.name_scope("InitializeWeights"): # Get all assignment operations traverse_graph(root, fun=initialize, skip_params=False) # Return collective operation return tf.group(*initialize_ops)
def get_num_nodes(self, skip_params=False): """Get the number of nodes in the SPN graph for which this node is root. Args: skip_params (bool): If ``True`` don't count param nodes. Returns: int: Number of nodes. """ class Counter: """"Mutable int.""" def __init__(self): self.val = 0 def inc(self): self.val += 1 c = Counter() traverse_graph(self, fun=lambda node: c.inc(), skip_params=skip_params) return c.val
def assign_weights(root, value, name=None): """Generate an assign operation assigning a value to all the weights in the SPN graph rooted in ``root``. Args: root (Node): The root node of the SPN graph. value: The value to assign to the weights. """ assign_ops = [] def assign(node): if isinstance(node, Weights): assign_ops.append(node.assign(value)) with tf.name_scope(name, "AssignWeights", [root, value]): # Get all assignment operations traverse_graph(root, fun=assign, skip_params=False) # Return a collective operation return tf.group(*assign_ops)
def get_num_nodes(self, skip_params=False, node_type=None): """Get the number of nodes in the SPN graph for which this node is root. Args: skip_params (bool): If ``True`` don't count param nodes. node_type: Type of node in the SPN graph to be counted. If 'None' count all node types. Returns: int: Number of nodes. """ class Counter: """"Mutable int.""" def __init__(self): self.val = 0 def inc(self, node, *_): if node_type is None or isinstance(node, node_type): self.val += 1 c = Counter() traverse_graph(self, fun=c.inc, skip_params=skip_params) return c.val
def _create_accumulators(self): def fun(node): if node.is_param: with tf.name_scope(node.name) as scope: if self._initial_accum_value is not None: accum = tf.Variable(tf.ones_like(node.variable, dtype=conf.dtype) * self._initial_accum_value, dtype=conf.dtype, collections=['em_accumulators']) else: accum = tf.Variable(tf.zeros_like(node.variable, dtype=conf.dtype), dtype=conf.dtype, collections=['em_accumulators']) param_node = EMLearning.ParamNode(node=node, accum=accum, name_scope=scope) self._param_nodes.append(param_node) self._param_nodes = [] with tf.name_scope(self._name_scope): traverse_graph(self._root, fun=fun)
def regularization_loss(self, name="Regularization"): """Adds regularization to the weight nodes. This can be either L1 or L2 or both, depending on what is specified at instantiation of GDLearning. Returns: A Tensor with the total regularization loss. """ with tf.name_scope(name): losses = [] def regularize_node(node): if node.is_param: if self._l1_regularize_coeff is not None: losses.append(self._l1_regularize_coeff * tf.reduce_sum(tf.abs(node.variable))) if self._l2_regularize_coeff is not None: losses.append(self._l2_regularize_coeff * tf.reduce_sum(tf.square(node.variable))) traverse_graph(self._root, fun=regularize_node) return tf.add_n(losses)
def convert_to_layer_nodes(root): """ At each level in the SPN rooted in the 'root' node, model all the nodes as a single layer-node. Args: root (Node): The root of the SPN graph. Returns: root (Node): The root of the SPN graph, with each layer modelled as a single layer-node. """ parents = defaultdict(list) depths = defaultdict(list) node_to_depth = OrderedDict() node_to_depth[root] = 1 def get_parents(node): # Add to Parents dict if node.is_op: for i in node.inputs: if (i and # Input not empty not (i.is_param or i.is_var)): parents[i.node].append(node) node_to_depth[i.node] = node_to_depth[node] + 1 def permute_inputs(input_values, input_sizes): # For a given list of inputs and their corresponding sizes, create a # nested-list of (input, index) pairs. # E.g: input_values = [(A, [2, 5]), (B, None)] # input_sizes = [2, 3] # inputs = [[('A', 2), ('A', 5)], # [('B', 0), ('B', 1), ('B', 2)]] inputs = [ list(product([inp.node], inp.indices)) if inp and inp.indices else list(product([inp.node], list(range(inp_size)))) for inp, inp_size in zip(input_values, input_sizes) ] # For a given nested-list of (input, index) pairs, permute over the inputs # E.g: permuted_inputs = [('A', 2), ('B', 0), # ('A', 2), ('B', 1), # ('A', 2), ('B', 2), # ('A', 5), ('B', 0), # ('A', 5), ('B', 1), # ('A', 5), ('B', 2)] permuted_inputs = list(product(*[inps for inps in inputs])) return list(chain(*permuted_inputs)) # Create a parents dictionary of the SPN graph traverse_graph(root, fun=get_parents, skip_params=True) # Create a depth dictionary of the SPN graph for key, value in node_to_depth.items(): depths[value].append(key) spn_depth = len(depths) # Iterate through each depth of the SPN, starting from the deepest layer, # moving up to the root node for depth in range(spn_depth, 1, -1): if isinstance(depths[depth][0], (Sum, ParallelSums)): # A Sums Layer # Create a default SumsLayer node with tf.name_scope("Layer%s" % depth): sums_layer = SumsLayer(name="SumsLayer-%s.%s" % (depth, 1)) # Initialize a counter for keeping track of number of sums # modelled in the layer node layer_num_sums = 0 # Initialize an empty list for storing sum-input-sizes of sums # modelled in the layer node num_or_size_sums = [] # Iterate through each node at the current depth of the SPN for node in depths[depth]: # TODO: To be replaced with node.num_sums once AbstractSums # class is introduced # No. of sums modelled by the current node node_num_sums = (1 if isinstance(node, Sum) else node.num_sums) # Add Input values of the current node to the SumsLayer node sums_layer.add_values(*node.values * node_num_sums) # Add sum-input-size, of each sum modelled in the current node, # to the list num_or_size_sums += [sum(node.get_input_sizes()[2:]) ] * node_num_sums # Visit each parent of the current node for parent in parents[node]: try: # 'Values' in case parent is an Op node values = list(parent.values) except AttributeError: # 'Inputs' in case parent is a Concat node values = list(parent.inputs) # Iterate through each input value of the current parent node for i, value in enumerate(values): # If the value is the current node if value.node == node: # Check if it has indices if value.indices is not None: # If so, then just add the num-sums of the # layer-op as offset indices = (np.asarray(value.indices) + layer_num_sums).tolist() else: # If not, then create a list accrodingly indices = list( range(layer_num_sums, (layer_num_sums + node_num_sums))) # Replace previous (node) Input value in the # current parent node, with the new layer-node value values[i] = (sums_layer, indices) break # Once child-node found, don't have to search further # Reset values of the current parent node, by including # the new child (Layer-node) try: # set 'values' in case parent is an Op node parent.set_values(*values) except AttributeError: # set 'inputs' in case parent is a Concat node parent.set_inputs(*values) # Increment num-sums-counter of the layer-node layer_num_sums += node_num_sums # Disconnect node.disconnect_inputs() # After all nodes at a certain depth are modelled into a Layer-node, # set num-sums parameter accordingly sums_layer.set_sum_sizes(num_or_size_sums) elif isinstance(depths[depth][0], (Product, PermuteProducts)): # A Products Layer with tf.name_scope("Layer%s" % depth): prods_layer = ProductsLayer(name="ProductsLayer-%s.%s" % (depth, 1)) # Initialize a counter for keeping track of number of prods # modelled in the layer node layer_num_prods = 0 # Initialize an empty list for storing prod-input-sizes of prods # modelled in the layer node num_or_size_prods = [] # Iterate through each node at the current depth of the SPN for node in depths[depth]: # Get input values and sizes of the product node input_values = list(node.values) input_sizes = list(node.get_input_sizes()) if isinstance(node, PermuteProducts): # Permute over input-values to model permuted products input_values = permute_inputs(input_values, input_sizes) node_num_prods = node.num_prods prod_input_size = len(input_values) // node_num_prods elif isinstance(node, Product): node_num_prods = 1 prod_input_size = int(sum(input_sizes)) # Add Input values of the current node to the ProductsLayer node prods_layer.add_values(*input_values) # Add prod-input-size, of each product modelled in the current # node, to the list num_or_size_prods += [prod_input_size] * node_num_prods # Visit each parent of the current node for parent in parents[node]: values = list(parent.values) # Iterate through each input value of the current parent node for i, value in enumerate(values): # If the value is the current node if value.node == node: # Check if it has indices if value.indices is not None: # If so, then just add the num-prods of the # layer-op as offset indices = value.indices + layer_num_prods else: # If not, then create a list accrodingly indices = list( range(layer_num_prods, (layer_num_prods + node_num_prods))) # Replace previous (node) Input value in the # current parent node, with the new layer-node value values[i] = (prods_layer, indices) # Reset values of the current parent node, by including # the new child (Layer-node) parent.set_values(*values) # Increment num-prods-counter of the layer node layer_num_prods += node_num_prods # Disconnect node.disconnect_inputs() # After all nodes at a certain depth are modelled into a Layer-node, # set num-prods parameter accordingly prods_layer.set_prod_sizes(num_or_size_prods) elif isinstance(depths[depth][0], (SumsLayer, ProductsLayer, Concat)): # A Concat node pass else: raise StructureError("Unknown node-type: {}".format( depths[depth][0])) return root