Example #1
0
 def from_xml(self, xmlnode):
     """ construct statistic tree from given XML document """
     # perform checking on the node
     if not isinstance(xmlnode, ElementTree._ElementInterface):
         raise StatisticError('input must be of type xml.etree.ElementTree.Element'); 
     
     # clean existing stats
     del self.root
     
     # create new stats tree
     self.root = StatisticNode(None, 'root')
     self.root.from_xml(xmlnode)
Example #2
0
 def __init__(self, taxonomy):
     """
     initialize empty statistic using given format file
     format file indicates the order of characteristics in the tree
     """
     self.root = StatisticNode(None, 'root', 'root')
     self.attributes = []
     self.leaves = []
     self.leaves_ordered = False
     self.taxonomy = taxonomy
     self.skips.append(False)
     self.finalized = False
     self.default_parse_order =  [x.name for x in self.taxonomy.attributes]
     
     for attr in self.taxonomy.attributes:
         self.defaults.append(attr.default)
         self.skips.append(False)
Example #3
0
class Statistics (object):
    """
    stores regional statistic of structural characteristics
    statistic are stored in tree format
    """
    
    # default values for each level. used for comparison
    defaults = []
    # determine weather a level of taxonomy should be skipped during evaluation
    skips = []
    
    @logAPICall
    def __init__(self, taxonomy):
        """
        initialize empty statistic using given format file
        format file indicates the order of characteristics in the tree
        """
        self.root = StatisticNode(None, 'root', 'root')
        self.attributes = []
        self.leaves = []
        self.leaves_ordered = False
        self.taxonomy = taxonomy
        self.skips.append(False)
        self.finalized = False
        self.default_parse_order =  [x.name for x in self.taxonomy.attributes]
        
        for attr in self.taxonomy.attributes:
            self.defaults.append(attr.default)
            self.skips.append(False)

    def __str__(self):
        """ return string representation of the underlying tree  """        
        return str(self.root)
    
    @property
    def is_valid(self):
        if not self.root.max_level > 0:
            return False
        if not self.root.is_valid:
            return False
        return True
    
    @property
    def max_level(self):
        """ get depth for underlying tree """
        return self.root.max_level
        
    @logAPICall
    def set_attribute_skip(self, level, skip):
        """ change skip condition for given level """        
        if level > len(self.skips):
            raise StatisticError("index exceeds number of attributes")
        self.skips[level] = skip

    @logAPICall
    def has_node(self, node):
        return self.root.matches(node)

    @logAPICall
    def add_case(self, taxstr, parse_order=None, parse_modifiers=True, additional_data={}, add_times=1):
        """
        add new case of the structural type (taxstr) to the distribution tree
        using given parse_order
        additional_data is aggregated at the leaf node only                
        """
        # assert valid condition
        if self.finalized:
            raise StatisticError('Statistics is already finalized and cannot be modified')
        
        # set parse_order
        if parse_order is None:
            parse_order = self.default_parse_order

        # parse string
        bldg_attrs = self.taxonomy.parse(taxstr)
        # update tree starting from root
        for i in range(add_times):
            self.root.add(bldg_attrs, parse_order, 0, additional_data)

    @logAPICall
    def finalize(self):
        """
        collapse the statistic tree and create weights
        required step before sampling and modification can be performed
        NOTE: add case accumulates counts, finalize call is required to 
              convert the counts into weights
        """
        # do nothing if finalized
        if self.finalized:
            return
        
        # collapse tree to eliminate empty levels        
        self.root.eliminate_empty()
        # convert counts into weight         
        self.root.calculate_weights()
        self.attributes = self.get_attributes(self.root)
        self.finalized = True

    @logAPICall
    def refresh_leaves(self, with_modifier=True, order_attributes=False):     
        """
        collapse weights at all levels of tree into distribution (leaves)
        """
        # do nothing if finalized            
        self.leaves = []
        for val, wt, node in self.root.leaves(self.taxonomy, 
                                              with_modifier,
                                              order_attributes):
            self.leaves.append([val, wt, node])
        return self.leaves
    
    @logAPICall
    def find_node(self, values):
        """
        find a node following the path of given values
        return node if found, None otherwise
        """
        if len(values) == 0:
            return None
        node = self.root
        # non-recursive search implementation
        # can be optimized by using a recursive search        
        for value in values:
            for child in node.children: 
                if value == child.value:
                    node = child
                    break
        if node == self.root:   # this means not find
            return None
        return node
    
    @logAPICall
    def delete_node(self, node):
        """
        delete given node, distribute its weight to sibling nodes equally
        throws exception if node is only child
        """
        # assert valid condition
        if not self.finalized:
            raise StatisticError('stat must be finalized before modification')
        
        # recursive delete, see StatisticNodes.delete_node
        parent = node.parent
        parent.delete_node(node)
    
    @logAPICall
    def test_repeated_value(self, dest_node, branch):
        """
        test if value in root node of branch conflict with values in dest_node's child
        """
        for child in dest_node.children:
            if child.value == branch.value:
                raise StatisticError("Source node value [%s] already exists as destination node's children" % branch.value)
    
    @logAPICall
    def test_repeated_attribute(self, dest_node, branch):
        """
        test if node from is already child node
        """
        # make sure attributes above node does not have the attribute
        # already defined
        existing_attributes = dest_node.ancestor_names
        existing_attributes.append(dest_node.name)
        attributes_to_insert = branch.descendant_names
        attributes_to_insert.insert(0, branch.name)
                    
        for attr in attributes_to_insert:
            try:
                existing_attributes.index(attr)
                # if attr already in attribute list, it means repeat
                # which in this case is an error
                raise StatisticError('Repeating attribute [%s] already exists in source and destination' % attr)                
            except ValueError:
                # error means attr not in attributes
                # which is the acceptable condition
                pass
    
    @logAPICall
    def add_branch(self, node, branch, test_repeating=True, update_stats=True):
        """
        add branch to node as child
        only limitation is that the same attribute does not appear
        multiple times along the path from top to bottom
        """
        # assert valid condition
        if not self.finalized:
            raise StatisticError('stat must be finalized before modification')
        
        if test_repeating:
            self.test_repeated_attribute(node, branch)
            self.test_repeated_value(node, branch)
            
        # no exception means no repeating attributes or repeating not checked
        # add branch to node as child
        
        # clone branch
        branch_to_add = branch.clone
        branch_to_add.set_level_recursive(node.level+1)
        branch_to_add.parent = node
        node.children.append(branch_to_add)
        # adjust weights proportionally
        if update_stats:
            node.balance_weights()
    
    @logAPICall
    def delete_branch(self, node):
        """
        recursively delete node and all its children
        redistribute its weight amongst its siblings
        """
        # assert valid condition
        if not self.finalized:
            raise StatisticError('stat must be finalized before modification')
        
        parent = node.parent
        parent.children.remove(node)
        parent.balance_weights()
        
#        children_count = len(parent.children)
#        if  children_count > 1:
#            weight_to_distribute = node.weight / float(children_count)
#            for child in parent.children:
#                child.weight += weight_to_distribute
    
    @logAPICall
    def get_attributes(self, rootnode):
        """ get name of all attributes in the for given rootnode """
        return rootnode.descendant_names
    
    @logAPICall
    def set_child_weights(self, node, weights):
        """
        change the weight for a node in the tree.        
        """
        # assert valid condition
        if not self.finalized:
            raise StatisticError('stat must be finalized before modification')
        # recursively set weights, see StatisticNodes.set_child_weights
        node.set_child_weights(weights)
    
    @logAPICall
    def get_samples(self, total, method):
        """
        create n samples using statistic tree
        pre-condition: finalize() must be called first
        """
        samples = {}
        if len(self.leaves)==0:
            self.refresh_leaves(with_modifier=True, order_attributes=True)
        
        if method == ExtrapolateOptions.Fraction or method == ExtrapolateOptions.FractionRounded:            
            # multiple weights, size and replacement cost
            for val, wt, node in self.leaves:
                t_count = wt * total
                if method == ExtrapolateOptions.FractionRounded:
                    t_count = round(t_count)
                size = node.get_additional_float(StatisticNode.AverageSize)
                cost = node.get_additional_float(StatisticNode.UnitCost)
                samples[val] = (val, t_count, t_count*size, t_count*size*cost)
        else: 
            # method=ExtrapolateOptions.RandomWalk
            def get_leaf(leaves, thresh):                                
                for val, wt, node in leaves:
                    if wt < thresh:
                        thresh -= wt
                    else:
                        return val, node
                return val, node
            
            for i in range(total):
                val, node = get_leaf(self.leaves, random())
                size = node.get_additional_float(StatisticNode.AverageSize)
                cost = node.get_additional_float(StatisticNode.UnitCost)   
                if samples.has_key(val):
                    t_val, t_count, t_size, t_cost = samples[val]
                    samples[val] = (val, t_count+1, t_size+size, t_cost+size*cost)
                else:
                    samples[val]=(val, 1, size, size*cost)
        return samples.values()
    
    @logAPICall
    def get_tree(self):
        """ get underlying tree """
        return self.root

    @logAPICall
    def get_modifiers(self, max_level):
        """ generator for modifiers up to max_level """
        for node, idx, mod in self.root.get_modifiers(max_level):            
            yield node, idx, mod

    @logAPICall
    def to_xml(self, pretty=False):
        """
        serialize underlying statistic tree into XML.
        this representation is recommended to use for storing tree to file 
        """
        return self.root.to_xml(pretty)
    
    @logAPICall
    def from_xml(self, xmlnode):
        """ construct statistic tree from given XML document """
        # perform checking on the node
        if not isinstance(xmlnode, ElementTree._ElementInterface):
            raise StatisticError('input must be of type xml.etree.ElementTree.Element'); 
        
        # clean existing stats
        del self.root
        
        # create new stats tree
        self.root = StatisticNode(None, 'root')
        self.root.from_xml(xmlnode)
    
    @logAPICall
    def from_xml_str(self, xmlstr):
        """ construct statistic tree from given XML string """
        if not isinstance(xmlstr, str):
            raise StatisticError('input must be string')
        self.from_xml(ElementTree.fromstring(xmlstr))