Example #1
0
    def configure_tree_topology(self, root, degree=2, remove=False):
        """Configures the cluster's network topology as a tree.

        The tree consists of the specified root node and the nodes,
        which build the subtrees. The childrens are incrementally chosen,
        in other words, sequentially as specified in the config file.

        Arguments:
            root {integer} -- The tree's root node.

        Keyword Arguments:
            degree {integer} -- The maximum number of children (default: {2})
            remove {boolean} -- Remove the configuration (default: {False})
        """

        self.logger.info("Configuring tree topology...")
        tree = Tree()
        root_node = self.topology.get_node(root)
        tree.create_node(root_node.name, root_node.node_id)
        parent_node = root
        for nodex in self.topology.nodes:
            if nodex.node_id == root_node.node_id:
                continue
            if len(tree.children(parent_node)) >= degree:
                if parent_node == root and root != 0:
                    parent_node = 0
                elif parent_node + 1 == root:
                    parent_node += 2
                else:
                    parent_node += 1
            tree.create_node(nodex.name, nodex.node_id, parent_node)

        self.logger.info("The following tree will be configured:")
        tree.show()

        for nodex in self.topology.nodes:
            self.logger.debug("%s:", nodex.name)
            subtree = tree.subtree(nodex.node_id)
            for nodey in self.topology.nodes:
                if nodex.node_id == nodey.node_id:
                    continue
                if subtree.contains(nodey.node_id):
                    children = tree.children(nodex.node_id)
                    for child in children:
                        if (child.identifier == nodey.node_id
                                or tree.is_ancestor(child.identifier,
                                                    nodey.node_id)):
                            nodex.add_forwarding(
                                nodey,
                                self.topology.get_node(child.identifier))
                            break
                elif tree.parent(nodex.node_id) != None:
                    nodex.add_forwarding(
                        nodey,
                        self.topology.get_node(
                            tree.parent(nodex.node_id).identifier))

        if not self.testing:
            self.topology.send_forwarding_tables(remove)
Example #2
0
    def fit(self, X):
        '''Learn the n-gram distribution from the given dataset.

        Parameters
        ----------
        X: list of sequences
            The training set.
        '''
        tree = Tree()
        root = tree.create_node('$', data=self.Payload(count=0, freq=0))
        for seq in X:
            ptrs = deque()
            for symbol in seq:
                ptrs.append(root)
                if len(ptrs) > self.n + 1:
                    ptrs.popleft()
                for i, p in enumerate(ptrs):
                    try:
                        next, = [
                            c for c in tree.children(p.identifier)
                            if c.tag == symbol
                        ]
                        next.data.count += 1
                    except ValueError:
                        next = tree.create_node(tag=symbol,
                                                parent=p.identifier,
                                                data=self.Payload(count=1,
                                                                  freq=0))
                    ptrs[i] = next
        self.tree = tree
Example #3
0
def endpoint_cal(swc_p, unit , sep = ","):
    """
        generate a multiBranch Tree from the swc file

    """
    print(unit, sep)
    coords, labels, ids, pars = coords_get(swc_p, unit, sep)
    #coords += 1
    if len(coords) == 0:
        print("{} is something wrong".format(swc_p))
        sys.exit(0)

    ftree = Tree()
    ftree.create_node(ids[0], ids[0], data = coords[0])

    for coord_, id_, par_ in zip(coords[1:], ids[1:], pars[1:]):
        #print(id_, par_)
        ftree.create_node(id_, id_, data = coord_, parent = par_)

    endpoint_coords = [x.data for x in ftree.leaves()]
    endpoint_coords.append(coords[0])

    branch_coords = [x.data for x in ftree.all_nodes() if len(ftree.children(x.tag)) >= 2]
    
    endpoint_coords = np.array(endpoint_coords)
    branch_coords = np.array(branch_coords)
    coords = np.array(coords)
    return endpoint_coords, branch_coords, coords, ftree
 def is_projective_tree(self, parse_tree: Tree):
     is_violateing = [
         len(parse_tree.children(node)) > 2
         for node in parse_tree.expand_tree()
     ]
     if any(is_violateing):
         print('here')
     return not any(is_violateing)
 def compare_actual_folder_with_tree(self, root: path, tree: Tree):
     root_name = tree.root
     root_path = root.joinpath(root_name)
     print(root_path)
     self.assertTrue(root_path.exists(), "The path {} should exist, but doesn't".format(root_path))
     children = tree.children(root_name)
     for children in children:
         subtree = tree.subtree(children.identifier)
         self.compare_actual_folder_with_tree(root_path, subtree)
def tree():
    # 创建类别标签
    cat_tree = Tree()
    cat_tree.create_node((-1, "root"), -1)
    for i in range(10):
        cat_tree.create_node((i, str(i)), i, parent=-1)
    offset = 10
    for i in range(10):
        for j in range(5):
            cur = offset + j
            cat_tree.create_node((cur, str(cur)), cur, parent=i)
        offset += 5

    header1_list = [node.tag for node in cat_tree.children(-1)]
    header2_list = []
    for cat1 in header1_list:
        children = cat_tree.children(cat1[0])
        header2_list.append([node.tag for node in children])
Example #7
0
def render_org_tree(tree: Tree, node: Node, payload='') -> str:
    parent = tree.parent(node.identifier)
    if parent and parent.identifier.lower() == 'vocabulary':
        payload += render_vocabulary_word(tree, node)
    else:
        depth = tree.depth(node) + 1
        payload += ('*' * depth + ' ' + node.tag + '\n')
    for child in tree.children(node.identifier):
        payload += render_org_tree(tree, tree[child.identifier])
    return payload
def find_nodes_that_contains_more_than_three_children(data):
    res = []
    tree = Tree()
    root = tree.create_node("root", "root")
    tree = build_tree(data=data, tree=tree, parent=root)
    nodes = tree.nodes
    for node in nodes:
        if node != "root" and len(tree.children(node)) >= 3:
            res.append(node)
    return set(res)
Example #9
0
def trim_excess_root(tree: Tree) -> Tree:
    # Remove any nodes from the root that have only 1 child.
    # I.e, replace A → B → (C, D) with B → (C, D)
    root_id = tree.root
    branches = tree.children(root_id)
    if len(branches) == 1:
        tree.update_node(branches[0].identifier, parent=None, bpointer=None)
        new_tree = tree.subtree(branches[0].identifier)
        return trim_excess_root(new_tree)
    else:
        return tree
    def map_tree_to_program(self, tree: Tree) -> str:

        self._node_to_subprog = {}
        frontier = []  # Tree nodes that are left to be explored

        for leaf in tree.leaves():
            span = leaf.data.span
            self._node_to_subprog[span] = self._node_to_type(leaf)
            parent = tree.parent(leaf.identifier)
            if parent and parent not in frontier:
                frontier.append(tree.parent(leaf.identifier))

        while frontier:
            node = frontier.pop()
            children = tree.children(node.identifier)
            assert len(children) == 2
            # check if children were already discovered
            if not all([
                    child.data.span in self._node_to_subprog
                    for child in children
            ]):
                frontier.insert(0, node)
                continue

            child_1 = self._node_to_subprog[children[0].data.span]
            child_2 = self._node_to_subprog[children[1].data.span]
            try:
                if child_1 and not child_2:  # child_2=='NO_LABEL'
                    self._node_to_subprog[node.data.span] = child_1
                elif not child_1 and child_2:  # child_1=='NO_LABEL'
                    self._node_to_subprog[node.data.span] = child_2
                elif not child_1 and not child_2:  # Both children are assigned with 'NO_LABEL'
                    self._node_to_subprog[node.data.span] = self._node_to_type(
                        node)  # ignore children and propagate parent
                else:
                    assert child_2.is_full(
                    )  # make sure child_2 value can be formed
                    self._node_to_subprog[node.data.span] = child_1.apply(
                        child_2)
            except Exception as e:
                try:
                    self._node_to_subprog[node.data.span] = child_2.apply(
                        child_1)
                except Exception as e:
                    raise Exception('final apply_exception: {}'.format(e))

            parent = tree.parent(node.identifier)
            if parent and parent not in frontier:
                frontier.insert(0, parent)

        inner_program = self._node_to_subprog[tree.get_node(
            tree.root).data.span].get_value()  # return the root's value
        return inner_program
Example #11
0
    def __init__(self, holes=0):
        self.data = np.zeros((3, 3, 3, 3), dtype='int')

        element = range(3)
        order = direct_product(element, element, element, element)

        i = 0
        genTree = Tree()
        root = Node(i, 'root', data=[order[0], self.data.copy()])
        genTree.add_node(root)
        currentNode = root
        getData = lambda node: node.data[1][tuple(node.data[0])]
        while i < len(order):
            i += 1
            a, b, c, d = order[i - 1]
            numPool = pool(self.data, a, b, c, d) - set(
                map(getData, genTree.children(currentNode.identifier)))
            if numPool:
                self.data[a, b, c, d] = np.random.choice(list(numPool))
                node = Node(i, data=[order[i - 1], self.data.copy()])
                genTree.add_node(node, currentNode)
                currentNode = node
            else:
                prev = genTree.parent(currentNode.identifier)
                while len(genTree.children(prev.identifier)) == len(
                        pool(prev.data[1], *(prev.data[0]))):
                    currentNode = prev
                    prev = genTree.parent(currentNode.identifier)
                else:
                    currentNode = prev
                    self.data = currentNode.data[1].copy()
                    i = currentNode.tag
                continue

        h = np.random.choice(len(order), size=holes, replace=False)
        self._answer = self.data.copy()
        self.holes = np.array(order)[h]
        self.data[tuple(self.holes.T.tolist())] = 0
Example #12
0
def collapse(t1: tl.Tree, t2: tl.Tree) -> tl.Tree:
    # work with copies.
    t1 = tl.Tree(tree=t1, deep=True)
    t2 = tl.Tree(tree=t2, deep=True)

    # reset all the identifiers:
    t1 = reset_ids(t1)
    t2 = reset_ids(t2)

    # paste all the children of t2 into the root of t1
    for child in t2.children(t2.root):
        t1.paste(t1.root, t2.subtree(child.identifier))

    return t1
Example #13
0
def get_metadata(node: Node, tree: Tree, *, \
    include_count=True,
    include_attrs=False,
    include_depth=True) -> Dict:
    data = {'id': node.identifier, 'tag': node.tag}
    if include_count:
        data['childrenCount'] = len(tree.children(node.identifier) or [])

    if include_depth:
        data['depth'] = tree.depth(node)

    if include_attrs:
        data.update(get_attributes(node))

    return data
Example #14
0
def get_intersection_tree(T1, T2):
    T = Tree(tree=T1, deep=True)
    T1_bfs = [n for n in T1.expand_tree(mode=1)]
    T2_bfs = [n for n in T2.expand_tree(mode=1)]
    for nid in T1_bfs:
        X = set(get_leaf_node_ids_for_node(T, nid))
        diff = min([len(X.symmetric_difference(set( \
                    get_leaf_node_ids_for_node(T2,i)))) \
                    for i in T2_bfs])
        if diff != 0:
            par = T.parent(nid).identifier
            for c in T.children(nid):
                T.move_node(c.identifier, par)
            T.remove_subtree(nid)
    return T
Example #15
0
    def _build_tree(self, scores: ndarray, bin_edges: ndarray) -> Tree:

        # Build tree with specified number of children at each level
        tree = Tree()
        tree.add_node(Node())  # root node
        nodes_prev = [tree.get_node(tree.root)]
        for level in range(self.depth):
            nodes_current = []
            for node in nodes_prev:
                children = []
                for _ in range(self.n_children[level]):
                    child = Node()
                    tree.add_node(child, parent=node)
                    children.append(child)
                nodes_current.extend(children)
            nodes_prev = nodes_current

        assignments = np.digitize(scores, bin_edges) - 1

        # Store instance ids in leaves
        leaves = tree.leaves()
        for k, node in enumerate(leaves):
            instance_ids = np.where(assignments == k)[0]
            if instance_ids.size == 0:
                tree.remove_node(node.identifier)
            else:
                node.data = instance_ids

        # Prune empty leaves
        check_for_empty_leaves = True
        while check_for_empty_leaves:
            check_for_empty_leaves = False
            leaves = tree.leaves()
            for node in leaves:
                if node.data is None and len(node.successors(
                        tree.identifier)) == 0:
                    # Node is empty and has no siblings
                    tree.remove_node(node.identifier)
                    check_for_empty_leaves = True

        # Simplify tree: remove nodes that only have one child
        for nid in tree.expand_tree(mode=tree.WIDTH):
            children = tree.children(nid)
            if len(children) == 1:
                tree.link_past_node(nid)

        return tree
def create_dummy_download_folder(root: path, tree: Tree) -> path:
    root_name = tree.root
    root_path = root.joinpath(root_name)

    if not root_path.exists():
        print("Creating {}".format(root_path))
        if root_name.endswith(".mp3"):
            root_path.touch()
        else:
            root_path.mkdir()
        time.sleep(0.01)  # sleep to ensure that the created folders don't have the same ctime

    children = tree.children(root_name)
    for children in children:
        subtree = tree.subtree(children.identifier)
        create_dummy_download_folder(root_path, subtree)
    return root_path
Example #17
0
    def map_tree_to_program(self, tree: Tree) -> str:

        self._node_to_subprog = {}

        frontier = []  # Tree nodes that are left to be explored

        for leaf in tree.leaves():
            span = leaf.data.span
            self._node_to_subprog[span] = self._node_to_type(leaf)
            parent = tree.parent(leaf.identifier)
            if parent and parent not in frontier:
                frontier.append(tree.parent(leaf.identifier))

        while frontier:
            node = frontier.pop()
            children = tree.children(node.identifier)
            assert len(children) in [2, 3]
            # check if children were already discovered
            if not all([
                    child.data.span in self._node_to_subprog
                    for child in children
            ]):
                frontier.insert(0, node)
                continue

            if len(children) == 2:
                child_1 = self._node_to_subprog[children[0].data.span]
                child_2 = self._node_to_subprog[children[1].data.span]
                self._node_to_subprog[node.data.span] = self.merge_children(
                    child_1, child_2, node)
            else:
                children.sort(key=lambda c: c.data.span[0])
                child_1 = self._node_to_subprog[children[0].data.span]
                child_2 = self._node_to_subprog[children[1].data.span]
                child_3 = self._node_to_subprog[children[2].data.span]
                intermediate = self.merge_children(child_1, child_3, node)
                self._node_to_subprog[node.data.span] = self.merge_children(
                    child_2, intermediate, node)
            parent = tree.parent(node.identifier)
            if parent and parent not in frontier:
                frontier.insert(0, parent)

        inner_program = self._node_to_subprog[tree.get_node(
            tree.root).data.span].get_value()  # return the root's value
        return 'answer ( {} )'.format(inner_program)
def get_service_tree():
    s = service.service()

    column = [
        'code', 'name', 'location_code', 'create_time', 'parent_code',
        'parent_name', 'address'
    ]
    result = result_to_dic.to_dic(s, column)
    tree = Tree()
    tree.create_node('root', 'root')

    for x in result:
        if not x['parent_code']:
            tree.create_node(str(x['code']),
                             str(x['code']),
                             parent='root',
                             data=x['name'])
        else:
            tree.create_node(str(x['code']),
                             str(x['code']),
                             parent=x['parent_code'],
                             data=x['name'])

    def transfer(code):
        if not tree.children(code):
            struct = {
                'id': code,
                'label': tree.nodes[code].data,
            }
            return struct
        struct = {'id': code, 'label': tree.nodes[code].data, 'children': []}
        for node in tree.children(code):
            struct['children'].append(transfer(node.tag))

        return struct

    result = []
    for x in tree.children('root'):
        result.append(transfer(x.identifier))
    service.db_close()
    return result
Example #19
0
from treelib import Tree

tree = Tree()

tree.create_node("a", "a", data={"v": 0})
tree.create_node("b", "b", data={"v": 7}, parent="a")
tree.create_node("c", "c", data={"v": 4}, parent="b")
tree.create_node("d", "d", data={"v": 3}, parent="b")
tree.create_node("f", "f", data={"v": 0}, parent="b")
tree.create_node("e", "e", data={"v": 3}, parent="a")

print(tree)


def func_1(node):
    v = 0
    children = tree.children(node.identifier)
    for child in children:
        if child.data["v"] == 0 and len(tree.children(child.identifier)) != 0:
            child.data["v"] = func_1(child)
        v += child.data["v"]

    return v


for node in tree.all_nodes():
    if node.data["v"] == 0 and len(tree.children(node.identifier)) != 0:
        node.data["v"] = func_1(node)

for node in tree.all_nodes():
    print("node: " + node.tag + " value: {}".format(node.data["v"]))
Example #20
0
class verifier(object):
    '''
    This verifier object is intended to receive an Enfragmo instance file,
    and then to determine the original formula corresponding to that file.
    Each subformula which is an atom will be given as a lower-case letter,
    and nesting of formulas will be indicated by appropriate bracketing.
    The symbols used to represent operators are as follows:
        And           &
        Or            v
        Not           ~
        Implication   ->
        Box           box
        Diamond       dia
    With unary operators applied directly to atoms being dropped.
    
    The format of an instance file is assumed to be as follows:
    
        TYPE  Subformula [ 1.. n]
        TYPE World [1..m]
        PREDICATE Atom
        
        ...
        
        PREDICATE And
        ...
        
        PREDICATE Or
        ...
        
        PREDICATE Not
        ...
        
        PREDICATE Implication
        ...
        
        PREDICATE Box
        ...
        
        PREDICATE Diamond
        ...
        
        PREDICATE SameAtom
        
    Where "..." indicates that either singletons, pairs, or triples will occupy
    the lines below the current PREDICATE delimiter and the next, signifying
    the appropriate relationship between the subformulas. For example,
    
        PREDICATE And
        (1, 2, 3)
        
    Indicates that the main connective of subformula 1 is conjunction, and that
    subformula 2 and 3 are the two operands for that operator.
    '''
    
    def __init__(self, filename):
        '''
        Receives the name of the instance file to be verified, then uses this
        to initialize the corresponding tree structure 
        '''
        self.SameAtomList = unionfind.UnionFind()

        self.filename = filename      
        
    def readProblemInstanceFile(self):
        '''
        This method assumes that the instance file exists and is correctly
        formatted.
        
        Subformulas are labeled in pre-order DFS traversal fashion, so as to
        allow the numbering to reflect the operator/operand relationship.
        '''
        self.instanceFileLines = [line.strip() for line in open(self.filename) if line != '\n']
    
    def parseProblemInstanceFile(self):
        self.countNumTreeNodes()
        #self.countNumTreeLeaves()
        #self.countNumAtoms()
        self.setUpSameAtomList()
        self.buildTree()

    def numWorlds(self):
        return int(self.instanceFileLines[1][-2])

    def countNumTreeNodes(self):
        '''
        The number of tree nodes is inherent in the number of subformulas,
        which is given on the first line of a well-formed problem instance
        file. In this way, the given formula is considered to be the first
        subformula, where its main connective labels the root node, and its
        '''
        self.numTreeNodes = int(self.instanceFileLines[0].split(" ")[-1].split("]")[0])
        
    def countNumTreeLeaves(self):
        '''
        The number of tree leaves is simply the number of singletons that 
        satisfy Atom, including duplicates.
        '''
        self.numTreeLeaves = \
            int((self.instanceFileLines.index("PREDICATE SameAtom") - 1)- \
             self.instanceFileLines.index("PREDICATE Atom"))
    
    def countNumAtoms(self):
        '''
        Since multiple subformulas can refer to the same atom, need to subtract
        the count of pairs satisfying SameAtom from the total count of
        subformulas satisfying Atom. This can be done by counting the number of
        entries between each of the appropriate PREDICATE identifiers (given a 
        well-formed instance file).
        '''
        self.numAtoms = int((self.instanceFileLines.index("PREDICATE And")- \
                            (self.instanceFileLines.index("PREDICATE Atom")+1) - \
                           (len(self.instanceFileLines) - \
                           (self.instanceFileLines.index("PREDICATE SameAtom")+1))))
        return self.numAtoms        
      
    def assignSymbol(self, label):
        if label == "And":
            return "&"
        elif label == "Or":
            return "v"
        elif label == "Not":
            return "~"
        elif label == "Implication":
            return "->"
        elif label == "Biconditional":
            return "<->"
        elif label == "Box":
            return "box"
        elif label == "Diamond":
            return "dia"
    
    def assignAtom(self, i):
        for atomEquivClass in self.SameAtomList.get_sets():
            if str(i) in atomEquivClass:
                return self.SameAtomList.get_leader(str(i))
            
        self.SameAtomList.insert(str(i))
        return self.SameAtomList.get_leader(str(i))
            
    def setUpSameAtomList(self):
        '''
        Using the Union Find datastructure, I will keep track of the equivalence
        classes of SameAtoms and then supply a label based on the index of the
        subset in which a subformula corresponding with an atom is contained.
        '''
        
        startIndexSameAtoms = findInFile(self.instanceFileLines, lambda x: "PREDICATE SameAtom" in x) + 1
        sameAtomPairs = self.instanceFileLines[startIndexSameAtoms:]
        
        for pair in sameAtomPairs:
            tmp = pair.split(",")
            label1 = tmp[0].split("(")[1]
            label2 = tmp[1].split(")")[0]
            self.SameAtomList.insert(label1, label2)
            
    def determineConnective(self, i):
        '''
        Each subformula label is guaranteed to be the first argument of some
        tuple; either it will correspond with an atom, the only argument,
        or it will correspond with the main connective of a unary 
        (i.e. negation, box, or diamond) or binary (i.e. conjunction or 
        disjunction) subformula.
        '''
        SiThing=findInFile(self.instanceFileLines, lambda x: "("+str(i)+")" in x)
        if SiThing:
            for k in range(SiThing, 0, -1): # go back in the file until you can find out what predicate we're dealing with
                if self.instanceFileLines[k].split(" ")[0] == "PREDICATE":
                    if self.instanceFileLines[k].split(" ")[1] == "Falsum":
                        return "false"
            return self.assignAtom(i)
        if findInFile(self.instanceFileLines, lambda x: "("+str(i)+"," in x):
            SiAsMainConnective = findInFile(self.instanceFileLines, lambda x: "("+str(i)+"," in x)
            for j in range(SiAsMainConnective, 0, -1): # go back in the file until you can find out what predicate we're dealing with
                if self.instanceFileLines[j].split(" ")[0] == "PREDICATE":
                    if self.instanceFileLines[j].split(" ")[1] == "SameAtom": # if there are no tuples under SameAtom, then this isn't reached due to SiAsMainConnective
                        return self.assignAtom(i)
                    else:
                        return self.assignSymbol(self.instanceFileLines[j].split(" ")[1]) # if the predicate refers to an operator, then we need to find out which one!
    
    def nodeCreation(self, predicate, SiConnective, i):        
        SiAsOperand = findInFile(self.instanceFileLines, predicate)
        ParentOfSi = str(self.instanceFileLines[SiAsOperand].split(",")[0].split("(")[1])
        self.syntaxTree.create_node(SiConnective, str(i), parent=ParentOfSi)        
        
    def makeSyntaxTreeNode(self, SiConnective, i):  
        if findInFile(self.instanceFileLines, lambda x: ","+str(i)+"," in x): #find where subformula i appears as second operand, and 
            self.nodeCreation(lambda x: ","+str(i)+"," in x, SiConnective, i)
        elif findInFile(self.instanceFileLines, lambda x: ","+str(i)+")" in x):                   
            self.nodeCreation(lambda x: ","+str(i)+")" in x, SiConnective, i) 
        else:
            self.syntaxTree.create_node(SiConnective,str(i))
                
    def buildTree(self):
        '''
        To build the syntax tree for the formula as laid out in the instance
        file, we need to delve into the formula by means of stripping off the
        main connective of each subformula (starting with the main connective
        of the formula itself) and labeling a tree node with the symbol
        corresponding with that connective. Note that each subformula appears
        exactly once as the first argument of a tuple, and can appear at most
        once as a second (or third, for binary operators) argument in a tuple.             
        '''
        self.syntaxTree = Tree()
        for i in range(1, self.numTreeNodes+1):
            SiConnective = self.determineConnective(i)
            self.makeSyntaxTreeNode(SiConnective, i)
          
    def myShowTree(self, tree, root):
        '''
        In-order depth-first traversal of syntax tree using deep recursion; first
        layer of recursion receives root of tree, where each sub-layer receives
        respectively the left child then the right child as roots of those subtrees
        with visitation of the root node occurring in the middle.
        '''
        rootNID = root.identifier
        x=self.syntaxTree.children(rootNID)
        
        if len(self.syntaxTree.children(rootNID)) == 2:
            print("(", end=" ")
            self.myShowTree(self.syntaxTree, self.syntaxTree.children(rootNID)[0])
            
        print(self.syntaxTree.get_node(rootNID).tag, end=" ")
        
        if len(self.syntaxTree.children(rootNID)) >= 1:
            if len(self.syntaxTree.children(rootNID)) == 1:
                print("(", end=" ")
                self.myShowTree(self.syntaxTree, self.syntaxTree.children(rootNID)[0])
            else:
                self.myShowTree(self.syntaxTree, self.syntaxTree.children(rootNID)[1])
            print(")", end=" ")   
Example #21
0
class ParentChildEvaluate:
    """
	Class to perform intrinsic evaluation of embeddings using the hierarchical relation of parent/child domains

	1) parse ParendChildTreeFile.txt from interpro
	2)	for each child of root
			nn = ask embeddings model to give M nearest neighbors
		calculate_precision_atM(child.descendants, nn)
		calculate_recall_atN(child.descendants, nn)
	3) plot histogram of precision and recall

	#Credits: https://medium.com/@m_n_malaeb/recall-and-precision-at-k-for-recommender-systems-618483226c54
	"""
    def __init__(self, data_path):
        """
		ParentChildEvaluate class init

		Parameters
		----------
		data_path : str
			full data path

		Returns
		-------
		None
		"""
        print("ParentChildEvaluate")
        self.data_path = data_path
        self.tree = Tree()

    def get_model_name(self):
        """
		Get embedding model name

		Parameters
		----------

		Returns
		-------
		str
			embedding model name
		"""
        return ntpath.basename(self.model_file)

    def load_emb_model(self, model_file, is_model_binary):
        """
		Load embedding model

		Parameters
		----------
		model_file : str
			model file name
		is_model_binary : bool
			model is saved in binary format (True), otherwise (False)

		Returns
		-------
		None
		"""
        self.model_file = model_file
        self.emb_model = KeyedVectors.load_word2vec_format(
            model_file, binary=is_model_binary)

    def parse_parent_child_file(self,
                                parent_child_file_name,
                                out_path,
                                output_file_name,
                                save_parsed_tree=False):
        """
		Parse the parent child file

		Parameters
		----------
		parent_child_file_name : str
			parent child file name
		out_path : str
			output data path
		output_file_name : str
			output file name
		save_parsed_tree : bool
			after parsing save parsed tree (True), otherwise (False)

		Returns
		-------
		None
		"""
        previous_num_minus_signs = 0
        last_interpro_id = None

        self.tree.create_node("INTERPRO", "INTERPRO")
        current_parent = "INTERPRO"
        with open(parent_child_file_name, 'r') as parent_child_file:
            for line in parent_child_file:
                line = line.strip()
                current_num_minus_signs = line[0:line.find("IPR")].count("--")
                double_colon_split = line.strip("--").split("::")
                interpro_id = double_colon_split[0]
                assert interpro_id[
                    0:
                    3] == "IPR", "AssertionError: {} \n interpro id should start with IPR and has length of 9.".format(
                        interpro_id)
                if current_num_minus_signs == 0:
                    # assert child not in the tree
                    current_parent = "INTERPRO"
                    self.tree.create_node(interpro_id,
                                          interpro_id,
                                          parent=current_parent)
                else:
                    # check if you are still with current parent or you need to create a new one
                    if current_num_minus_signs == previous_num_minus_signs:  # same level as last parent
                        self.tree.create_node(interpro_id,
                                              interpro_id,
                                              parent=current_parent)
                    elif current_num_minus_signs > previous_num_minus_signs:  # one level down from last parent -> create new parent
                        current_parent = last_interpro_id
                        self.tree.create_node(interpro_id,
                                              interpro_id,
                                              parent=current_parent)
                    else:  # one level up from last parent -> get parent of the current parent
                        if current_parent == "INTERPRO":  # if one level up is the root then your papa is the root
                            papa = "INTERPRO"
                        else:  # if one level up is not the root then get the parent of your parent (papa)
                            papa = self.tree[current_parent].bpointer
                        self.tree.create_node(interpro_id,
                                              interpro_id,
                                              parent=papa)
                        current_parent = papa
                previous_num_minus_signs = current_num_minus_signs
                last_interpro_id = interpro_id

        # quick test
        # for interpro_node in self.tree.children("IPR000549"):
        #	print(interpro_node.identifier)
        # self.tree.show()
        if save_parsed_tree:
            self.tree.save2file(
                filename=os.path.join(out_path, output_file_name))

    def get_nn_calculate_precision_recall_atN(self, N, plot_histograms,
                                              save_diagnostics):
        """
		Get nearest domain vector for each domains and calculate recall based on the ground truth (parsed tree)

		Parameters
		----------
		N : int
			number of nearest domain vector,
			if N==100 then retrieve as many as the children of a domain in the parsed tree
		plot_histograms : bool
			plot histograms for performance metrics (True), otherwise (False)
		save_diagnostics : bool
			save diagnostic plots for domain with low recall

		Returns
		-------
		None
		"""
        print("Get NN and calculate precision and recall at {}".format(N))
        recalls_n = []
        precisions_n = []
        interpros_recall0 = []
        interpros_num_children_recall0 = []

        if N == 100:
            retrieve_all_children = True
        else:
            retrieve_all_children = False

        for interpro_node in self.tree.children("INTERPRO"):
            recall_n = 0.0
            precision_n = 0.0
            all_children = self.tree.subtree(
                interpro_node.identifier).all_nodes()
            assert interpro_node in all_children, "AssertionError: parent {} is not in the set of all children.".format(
                interpro_node.identifier)
            all_children.remove(interpro_node)
            if retrieve_all_children:
                N = len(all_children)
            if self.emb_model.__contains__(interpro_node.identifier):
                nearest_neighbor_ids = set([
                    nn[0] for nn in self.emb_model.most_similar(
                        positive=interpro_node.identifier, topn=N)
                ])
            else:
                print("Model does not contain this id.")
                continue
            true_positives = set([child.identifier for child in all_children
                                  ]).intersection(nearest_neighbor_ids)
            assert len(all_children) > 0 and len(
                nearest_neighbor_ids
            ) == N, "AssertionError: For parent {} all children should be > 0 and nearest neighbors should be equal to N.".format(
                interpro_node.identifier)
            recall_n = len(true_positives) / len(all_children)
            precision_n = len(true_positives) / len(nearest_neighbor_ids)
            assert 0.0 <= recall_n <= 1.0 and 0.0 <= precision_n <= 1.0, "AssertionError: For parent {} recall or precision is not at (0,1]".format(
                interpro_node.identifier)
            recalls_n.append(recall_n)
            precisions_n.append(precision_n)
            if recall_n == 0.0:
                interpros_recall0.append(interpro_node.identifier)
                interpros_num_children_recall0.append(len(all_children))
        if retrieve_all_children:  # for printing in title
            N = 100
        if plot_histograms:
            if retrieve_all_children:
                self.plot_histogram(recalls_n, "Recall", "Recall",
                                    "Number of Interpro domains", "recall")
            else:
                self.plot_histogram(recalls_n, "Recall@{}".format(N), "Recall",
                                    "Number of Interpro domains",
                                    "recall_{}".format(N))
                self.plot_histogram(precisions_n, "Precision@{}".format(N),
                                    "Precision", "Number of Interpro domains",
                                    "precision_{}".format(N))
        if retrieve_all_children:
            avg_recall = sum(recalls_n) / len(recalls_n)
            print("Average recall at 100: {:.3f}".format(avg_recall))
        if save_diagnostics:
            self.save_diagnostics_recall0(interpros_recall0,
                                          interpros_num_children_recall0)

    def save_diagnostics_recall0(self, interpros_recall0,
                                 interpros_num_children_recall0):
        """
		Save diagnostics histogram for domains with recall of 0

		Parameters
		----------
		interpros_recall0 : list of str
			interpro ids with recall 0
		interpros_num_children_recall0 : list of str
			number of children of each interpro id, found from the parsed tree, with recall 0
		Returns
		-------
		None
		"""
        print("Saving diagnostics for intepro domains with recall 0")
        with open(
                os.path.join(
                    self.data_path,
                    self.get_model_name() + "_interpros_recall0" + ".txt"),
                "w") as interpros_recall0_file:
            # write file with names of interpro having recall 0
            interpros_recall0_file.write("\n".join(interpros_recall0))
        # plot histogram of number of children for interpro parents with recall 0
        self.plot_histogram(interpros_num_children_recall0, None,
                            "Number of Intepro domains", "Number of children",
                            "hist")

    def plot_histogram(self, performance_N, title, xlabel, ylabel, out_suffix):
        """
		Plot histogram for performance metric and also for the number of children

		Parameters
		----------
		performance_N : list of float
			performance metric value per parent domain
		title : str
			histogram title (if not None)
		xlabel : str
			label x
		ylabel : str
			label y
		out_suffix : str
			histogram output file name suffix

		Returns
		-------
		None
		"""
        # plot the histogram of lengths
        fig = plt.figure()
        plt.hist(performance_N,
                 color='g',
                 align='left',
                 edgecolor='k',
                 alpha=0.8)
        plt.xlabel(xlabel, fontsize=14)
        plt.ylabel(ylabel, fontsize=14)
        if title is not None:
            plt.title(title, fontsize=14)
        plt.xticks(np.arange(0, 1.1, 0.1))
        hist_name = self.get_model_name() + "_" + out_suffix + ".png"
        fig.savefig(os.path.join(self.data_path, hist_name),
                    bbox_inches='tight',
                    dpi=600)
Example #22
0
class Bftree:
    def __init__(self,
                 max_depth=2,
                 min_size=30,
                 n_features=500,
                 criterion=None):
        self.max_depth = max_depth
        self.min_size = min_size
        self.n_features = n_features
        self.tree = Tree()
        self.criterion = criterion

    def fit(self, X, y):
        """Fits a Trainingset 

        Calculates initial best scoring feature for root node
        Recurively adds child features to root node
        
        """
        root = utils.get_split(data=X,
                               targets=y,
                               tree=self.tree,
                               criterion=self.criterion,
                               n_features=self.n_features)

        n_tag = "%s=%d (%f)" % (root['index'], root['attr'],
                                round(root['score'], 2))
        n_dict = {"attr": root['attr'], "score": root['score'], "child": 0}
        self.tree.create_node(n_tag, root['index'], data=n_dict, parent=None)
        self.split(root, 1)

    def split(self, res, current_depth):
        """Splits a node into l/r child
        Adds left/right node if split partitions exist
        Split partitions are added as child nodes if score child > score root
        Halts execution if termination criteria are meet:
                    1) get_split returns empty partitions
                    2) current_depth >= max_depth
                    3) partition (l/r) size <= min_size (rows)
        Returns current tree if termination criteria meet
        """
        # Check if partition(s) exist.
        if (res['groups'] != None):
            left, right = res['groups'][0]  # L/R feature partitions
            left_y, right_y = res['groups'][1]  # L/R scoring partitions
        else:
            return self.tree
        # Return tree if max_depth reached.
        if current_depth >= self.max_depth:
            return self.tree

        # Return tree if left feature partition < min_size
        if left.shape[0] >= self.min_size:
            # Calculate split score for left feature partition.
            current_left = utils.get_split(data=left,
                                           targets=left_y,
                                           tree=self.tree,
                                           criterion=self.criterion,
                                           n_features=self.n_features)
            # Check if feature is returned or None when not improving.
            if (current_left['index'] is not None):
                # Add new left child node
                n_tag = "%s=%d(L) (%f)" % (current_left['index'],
                                           current_left['attr'],
                                           round(current_left['score'], 2))
                n_dict = {
                    "attr": current_left['attr'],
                    "score": current_left['score'],
                    "child": 1
                }
                self.tree.create_node(tag=n_tag,
                                      identifier=current_left['index'],
                                      data=n_dict,
                                      parent=res['index'])
                self.split(current_left, current_depth + 1)

        # Return tree if feature feature partition < min_size
        if right.shape[0] >= self.min_size:
            # Calculate split score for right feature partition.
            current_right = utils.get_split(data=right,
                                            targets=right_y,
                                            tree=self.tree,
                                            criterion=self.criterion,
                                            n_features=self.n_features)
            # Check if feature is returned or None when not improving.
            if (current_right['index'] is not None):
                # Add new right child node
                n_tag = "%s=%d(R) (%f)" % (current_right['index'],
                                           current_right['attr'],
                                           round(current_right['score'], 2))
                n_dict = {
                    "attr": current_right['attr'],
                    "score": current_right['score'],
                    "child": 2
                }
                self.tree.create_node(tag=n_tag,
                                      identifier=current_right['index'],
                                      data=n_dict,
                                      parent=res['index'])
                self.split(current_right, current_depth + 1)

    def getChild(self, feature, site):
        """Helper to return the L/R child of a specific node based on a
        Node's dict.
        L_child: 1
        R_child: 2         
        """
        children = self.tree.children(feature)
        if len(children) > 0:
            for ch in children:
                if ch.data['child'] is site:
                    return ch
        else:
            return None

    def predict(self, feature, dataset):
        """"Predicts the "Class" of a feature set (Pandas series).
        Left branches will be classified as class 0 (no-benedit)
        Right branches will be classified as class 1 (benedit)
        """
        current_node = self.tree.get_node(feature)
        if (dataset[feature].item() == current_node.data['attr']):
            # Root match. Check if left child (site=1) exists.
            if self.getChild(feature, 1) is not None:
                # Recursive prediction for child node.
                return (self.predict(
                    self.getChild(feature, 1).identifier, dataset))
            else:
                # Stop iteration
                return (1)
        else:
            # Root match. Check if right child (site=2) exists.
            if self.getChild(feature, 2) is not None:
                # Recursive prediction for child node.
                return (self.predict(
                    self.getChild(feature, 2).identifier, dataset))
            else:
                # Stop iteration
                return (0)
Example #23
0
class MCTS_better():
    def __init__(self,
                 confidence=CONFIDENCE,
                 time=MAX_CAL_TIME,
                 max_actions=1000):

        self.max_cal_time = float(time)
        self.max_actions = max_actions

        self.confidence = confidence

        self.max_depth = 0

    def get_move(self, board, player):
        self.board = board

        self.player = player  # The chess color [BLACK or WHITE] represent the player
        empty_set, a, b = self.board.get_board_item()
        if len(a) == 0 and len(b) == 0:
            return (MIDDLE, MIDDLE)
        if len(empty_set) == 1:
            return (empty_set[0][0], empty_set[0][1])
        if len(empty_set) == 0:
            print("No place to play")
            return None
        self.MCTS_tree = Tree()
        self.HeadNode = Node('HeadNode', 0)
        self.MCTS_tree.add_node(self.HeadNode)

        self.plays = {}
        self.wins = {}
        simulations = 0
        start = time.time()
        while time.time() - start < (self.max_cal_time - 0.5):
            board_for_MCTS = copy.deepcopy(self.board)
            player_for_MCTS = self.player

            self.run_simulation(board_for_MCTS, player_for_MCTS)
            simulations += 1

        print("total simuations = ", simulations)
        move = self.select_best_move()

        print("MCTS move:", move[0], move[1])

        return move

    def run_simulation(self, board, player):

        tree = self.MCTS_tree
        node = self.HeadNode
        availables = board.get_k_dist_empty_tuple(2)

        visited_states = set()
        winner = -1
        expand = True

        # Simulation Start
        for t in range(1, self.max_actions + 1):
            availables = board.get_k_dist_empty_tuple(2)
            children = tree.children(node.identifier)
            self.plays = {}
            self.wins = {}
            plays = self.plays
            wins = self.wins
            for n in children:
                plays[n.tag] = n.data[0]
                wins[n.tag] = n.data[1]
            # Selection

            noused_set = self.select_noused_node(availables, player)

            if len(noused_set) == 0:
                #print("ok")

                #print(sum(plays[(player,move)] for move in availables))
                log_total = log(
                    sum(plays[(player, move)] for move in availables))
                value, move = max(
                    ((wins[(player, move)] / plays[(player, move)]) +
                     sqrt(self.confidence * log_total / plays[(player, move)]),
                     move) for move in availables)
                #print(move)

                for n in children:
                    if n.tag == (player, move):
                        node = n

            else:
                #print('good')
                random.shuffle(noused_set)
                move = noused_set.pop()
                new_node = Node(tag=(player, move), data=[0, 0])
                tree.add_node(new_node, parent=node)
                node = new_node

            board.draw_xy(move[0], move[1], player)

            #Expand
            # if expand and (player,move,t) not in plays:
            # 	expand = False
            # 	plays[(player,move,t)] = 0
            # 	wins[(player,move,t)] = 0
            # 	if t > self.max_depth:
            # 		self.max_depth = t

            #visited_states.add((player,move))
            availables = board.get_k_dist_empty_tuple(2)
            is_full = not len(availables)
            winner = board.anyone_win(move[0], move[1])

            if winner is not EMPTY or is_full:
                #print(str(move) + '----' + str(winner) + '-----' + str(player))
                break

            player = self.player_change(player)

        while (node.is_root() == False):
            if winner == self.player:
                node.data[1] += 1
            node.data[0] += 1
            node = tree.parent(node.identifier)

    def select_best_move(self):
        empty_set = self.board.get_k_dist_empty_tuple(2)
        # for move in empty_set:
        # 	ratio = (self.wins.get((self.player,move),0)/
        # 		self.plays.get((self.player,move),1))
        # 	print(move)
        # 	print(ratio)

        #print(empty_set)
        self.plays = {}
        self.wins = {}
        plays = self.plays
        wins = self.wins
        children = self.MCTS_tree.children(0)

        for n in children:
            plays[n.tag] = n.data[0]
            wins[n.tag] = n.data[1]
        # print(plays)
        # print(wins)
        raio_to_win, move = max(
            (self.wins.get((self.player, move), 0) /
             self.plays.get((self.player, move), 1) + self.closest_value(move),
             move) for move in empty_set)
        print(raio_to_win)
        return move

    def closest_value(self, move):
        x = move[0]
        y = move[1]
        return (abs((x - N_LINE + 1) * x) + abs((y - N_LINE + 1) * y)) * 0.0001

    def player_change(self, player):
        if player == BLACK:
            player = WHITE
        elif player == WHITE:
            player = BLACK

        return player

    def select_noused_node(self, availables, player):
        noused_set = []

        for move in availables:
            if not self.plays.get((player, move)):
                noused_set.append(move)

        return noused_set
Example #24
0
class StepParse:
    def __init__(self):
        pass

    def load_step(self, step_filename):

        self.nauo_lines = []
        self.prod_def_lines = []
        self.prod_def_form_lines = []
        self.prod_lines = []
        self.filename = os.path.splitext(step_filename)[0]

        line_hold = ''
        line_type = ''

        # Find all search lines
        with open(step_filename) as f:
            for line in f:
                # TH: read pointer of lines as they are read, so if the file has text wrap it will notice and add it to the following lines
                index = re.search("#(.*)=", line)
                if index:
                    # TH: if not none then it is the start of a line so read it
                    # want to hold line until it has checked next line
                    # if next line is a new indexed line then save previous line
                    if line_hold:
                        if line_type == 'nauo':
                            self.nauo_lines.append(line_hold)
                        elif line_type == 'prod_def':
                            self.prod_def_lines.append(line_hold)
                        elif line_type == 'prod_def_form':
                            self.prod_def_form_lines.append(line_hold)
                        elif line_type == 'prod':
                            self.prod_lines.append(line_hold)
                        line_hold = ''
                        line_type = ''

                    prev_index = True  # TH remember previous line had an index
                    if 'NEXT_ASSEMBLY_USAGE_OCCURRENCE' in line:
                        line_hold = line.rstrip()
                        line_type = 'nauo'
                    elif ('PRODUCT_DEFINITION ' in line
                          or 'PRODUCT_DEFINITION(' in line):
                        line_hold = line.rstrip()
                        line_type = 'prod_def'
                    elif 'PRODUCT_DEFINITION_FORMATION' in line:
                        line_hold = line.rstrip()
                        line_type = 'prod_def_form'
                    elif ('PRODUCT ' in line or 'PRODUCT(' in line):
                        line_hold = line.rstrip()
                        line_type = 'prod'
                else:
                    prev_index = False
                    #TH: if end of file and previous line was held
                    if 'ENDSEC;' in line:
                        if line_hold:
                            if line_type == 'nauo':
                                self.nauo_lines.append(line_hold)
                            elif line_type == 'prod_def':
                                self.prod_def_lines.append(line_hold)
                            elif line_type == 'prod_def_form':
                                self.prod_def_form_lines.append(line_hold)
                            elif line_type == 'prod':
                                self.prod_lines.append(line_hold)
                            line_hold = ''
                            line_type = ''
                    else:
                        #TH: if not end of file
                        line_hold = line_hold + line.rstrip()

        self.nauo_refs = []
        self.prod_def_refs = []
        self.prod_def_form_refs = []
        self.prod_refs = []

        # TH: added 'replace(","," ").' to replace ',' with a space to make the spilt easier if there are not spaces inbetween the words'
        # Find all (# hashed) line references and product names
        # TH: it might be worth finding a different way of extracting data we do want rather than fixes to get rid of the data we don't
        for j, el_ in enumerate(self.nauo_lines):
            self.nauo_refs.append([
                el.rstrip(',')
                for el in el_.replace(",", " ").replace("=", " ").split()
                if el.startswith('#')
            ])
        for j, el_ in enumerate(self.prod_def_lines):
            self.prod_def_refs.append([
                el.rstrip(',')
                for el in el_.replace(",", " ").replace("=", " ").split()
                if el.startswith('#')
            ])
        for j, el_ in enumerate(self.prod_def_form_lines):
            self.prod_def_form_refs.append([
                el.rstrip(',')
                for el in el_.replace(",", " ").replace("=", " ").split()
                if el.startswith('#')
            ])
        for j, el_ in enumerate(self.prod_lines):
            self.prod_refs.append([
                el.strip(',') for el in el_.replace(",", " ").replace(
                    "(", " ").replace("=", " ").split() if el.startswith('#')
            ])
            self.prod_refs[j].append(el_.split("'")[1])

        # Get first two items in each sublist (as third is shape ref)
        #
        # First item is 'PRODUCT_DEFINITION' ref
        # Second item is 'PRODUCT_DEFINITION_FORMATION <etc>' ref
        self.prod_all_refs = [el[:2] for el in self.prod_def_refs]

        # Match up all references down to level of product name
        for j, el_ in enumerate(self.prod_all_refs):

            # Add 'PRODUCT_DEFINITION' ref
            for i, el in enumerate(self.prod_def_form_refs):
                if el[0] == el_[1]:
                    el_.append(el[1])
                    break

            # Add names from 'PRODUCT_DEFINITION' lines
            for i, el in enumerate(self.prod_refs):
                if el[0] == el_[2]:
                    el_.append(el[2])
                    break

        # Find all parent and child relationships (3rd and 2nd item in each sublist)
        self.parent_refs = [el[1] for el in self.nauo_refs]
        self.child_refs = [el[2] for el in self.nauo_refs]

        # Find distinct parts and assemblies via set operations; returns list, so no repetition of items
        self.all_type_refs = set(self.child_refs) | set(self.parent_refs)
        self.ass_type_refs = set(self.parent_refs)
        self.part_type_refs = set(self.child_refs) - set(self.parent_refs)
        #TH: find root node
        self.root_type_refs = set(self.parent_refs) - set(self.child_refs)

        # Create simple parts dictionary (ref + label)
        self.part_dict = {el[0]: el[3] for el in self.prod_all_refs}
#        self.part_dict_inv = {el[3]:el[0] for el in self.prod_all_refs}

    def show_values(self):
        # TH: basic testing, if needed these could be spilt up
        print(self.nauo_lines)
        print(self.prod_def_lines)
        print(self.prod_def_form_lines)
        print(self.prod_lines)
        print(self.nauo_refs)
        print(self.prod_def_refs)
        print(self.prod_def_form_refs)
        print(self.prod_refs)

#    HR: "create_dict" replaced by list comprehension elsewhere
#
#    def create_dict(self):
#
#        # TH: links nauo number with a name and creates dict
#        self.part_dict  = {}
#        for part in self.all_type_refs:
#            for sublist in self.prod_def_refs:
#                if sublist[0] == part:
#                    prod_loc = '#' + re.findall('\d+',sublist[1])[0]
#                    pass
#            for sublist in self.prod_def_form_refs:
#                if sublist[0] == prod_loc:
#                    prod_loc = '#' + str(re.findall('\d+',sublist[1])[0])
#                    pass
#            for sublist in self.prod_refs:
#                if sublist[0] == prod_loc:
#                    part_name = sublist[2]
#
#            self.part_dict[part] = part_name

    def create_tree(self):

        #TH: create tree diagram in newick format
        #TH: find root node

        self.tree = Tree()
        #TH: check if there are any parts to make a tree from, if not don't bother
        if self.part_dict == {}:
            return

        root_node_ref = list(self.root_type_refs)[0]
        # HR added part reference as data for later use
        self.tree.create_node(self.part_dict[root_node_ref],
                              0,
                              data={'ref': root_node_ref})

        #TH: created root node now fill in next layer
        #TH: create dict for tree, as each node needs a unique name
        i = [0]  # Iterates through nodes
        self.tree_dict = {}
        self.tree_dict[i[0]] = root_node_ref

        def tree_next_layer(self, parent):
            root_node = self.tree_dict[i[0]]
            for line in self.nauo_refs:
                if line[1] == root_node:
                    i[0] += 1
                    self.tree_dict[i[0]] = str(line[2])
                    # HR added part reference as data for later use
                    self.tree.create_node(self.part_dict[line[2]],
                                          i[0],
                                          parent=parent,
                                          data={'ref': str(line[2])})
                    tree_next_layer(self, i[0])

        tree_next_layer(self, 0)
        self.appended = False

        self.get_levels()

    def get_levels(self):

        # Initialise dict and get first level (leaves)
        self.levels = {}
        self.levels_set_p = set()
        self.levels_set_a = set()
        self.leaf_ids = [el.identifier for el in self.tree.leaves()]
        self.all_ids = [el for el in self.tree.nodes]
        self.non_leaf_ids = set(self.all_ids) - set(self.leaf_ids)

        self.part_level = 1

        def do_level(self, tree_level):
            # Get all nodes within this level
            node_ids = [
                el for el in self.tree.nodes
                if self.tree.level(el) == tree_level
            ]
            for el in node_ids:
                # If leaf, then n_p = 1 and n_a = 1
                if el in self.leaf_ids:
                    self.levels[el] = {}
                    self.levels[el]['n_p'] = self.part_level
                    self.levels[el]['n_a'] = self.part_level
                # If assembly, then get all children and sum all parts + assemblies
                else:
                    # Get all children of node and sum levels
                    child_ids = self.tree.is_branch(el)
                    child_sum_p = 0
                    child_sum_a = 0
                    for el_ in child_ids:
                        child_sum_p += self.levels[el_]['n_p']
                        child_sum_a += self.levels[el_]['n_a']
                    self.levels[el] = {}
                    self.levels[el]['n_p'] = child_sum_p
                    self.levels[el]['n_a'] = child_sum_a + 1
                    self.levels_set_p.add(child_sum_p)
                    self.levels_set_a.add(child_sum_a + 1)

        # Go up through tree levels and populate lattice level dict
        for i in range(self.tree.depth(), -1, -1):
            do_level(self, i)

        self.create_lattice()

        self.levels_p_sorted = sorted(list(self.levels_set_p))
        self.levels_a_sorted = sorted(list(self.levels_set_a))

        # Function to return dictionary of item IDs for each lattice level
        def get_levels_inv(list_in, key):

            #Initialise
            levels_inv = {}
            levels_inv[self.part_level] = []
            for el in list_in:
                levels_inv[el] = []
            for k, v in self.levels.items():
                levels_inv[v[key]].append(k)

            return levels_inv

        self.levels_p_inv = get_levels_inv(self.levels_p_sorted, 'n_p')
        self.levels_a_inv = get_levels_inv(self.levels_a_sorted, 'n_a')

    def get_all_children(self, id_):

        ancestors = [el.identifier for el in self.tree.children(id_)]
        parents = ancestors
        while parents:
            children = []
            for parent in parents:
                children = [el.identifier for el in self.tree.children(parent)]
                ancestors.extend(children)
                parents = children
        return ancestors

    def create_lattice(self):

        # Create lattice
        self.g = nx.DiGraph()
        self.default_colour = 'r'
        # Get root node and set parent to -1 to maintain data type of "parent"
        # Set position to top/middle
        node_id = self.tree.root
        label_text = self.tree.get_node(node_id).tag
        self.g.add_node(node_id,
                        parent=-1,
                        label=label_text,
                        colour=self.default_colour)

        # Do nodes from treelib "nodes" dictionary
        for key in self.tree.nodes:
            # Exclude root
            if key != self.tree.root:
                parent_id = self.tree.parent(key).identifier
                label_text = self.tree.get_node(key).tag
                # Node IDs same as for tree
                self.g.add_node(key,
                                parent=parent_id,
                                label=label_text,
                                colour=self.default_colour)

        # Do edges from nodes
        for key in self.tree.nodes:
            # Exclude root
            if key != self.tree.root:
                parent_id = self.tree.parent(key).identifier
                self.g.add_edge(key, parent_id)

        # Escape if only one node
        # HR 6/3/20 QUICK BUG FIX: SINGLE-NODE TREE DOES NOT PLOT
        # IMPROVE LATER; SHOULD BE PART OF A GENERAL METHOD
        if self.tree.size() == 1:
            id_ = [el.identifier for el in self.tree.leaves()]
            self.g.nodes[id_[-1]]['pos'] = (0, 0)
            return

        # Get set of parents of leaf nodes
        leaf_parents = set(
            [self.tree.parent(el).identifier for el in self.leaf_ids])

        # For each leaf_parent, set position of leaf nodes sequentially
        i = 0
        no_leaves = len(self.tree.leaves())
        for el in leaf_parents:
            for el_ in self.tree.is_branch(el):
                child_ids = [el.identifier for el in self.tree.leaves()]
                if el_ in child_ids:
                    self.g.nodes[el_]['pos'] = ((i / (no_leaves)), 1)
                    i += 1

        # To set plot positions of nodes from lattice levels
        # ---
        # Traverse upwards from leaves
        for el in sorted(list(self.levels_set_a)):
            # Get all nodes at that level
            node_ids = [k for k, v in self.levels.items() if v['n_a'] == el]
            # Get all positions of children of that node
            # and set position as mean value of them
            for el_ in node_ids:
                child_ids = self.tree.is_branch(el_)
                pos_sum = 0
                for el__ in child_ids:
                    pos_ = self.g.nodes[el__]['pos'][0]
                    pos_sum += pos_
                pos_sum = pos_sum / len(child_ids)
                self.g.nodes[el_]['pos'] = (pos_sum, el)

    def print_tree(self):

        try:
            self.tree.show()
        except:
            self.create_tree()
            self.tree.show()

    def tree_to_json(self, save_to_file=False, filename='file', path=''):

        #TH: return json format tree, can also save to file
        if self.tree.size() != 0:
            data = self.tree.to_json()
            j = json.loads(data)
            if save_to_file == True:
                if path:
                    file_path = os.path.join(path, filename)
                else:
                    file_path = filename

                with open(file_path + '.json', 'w') as outfile:
                    json.dump(j, outfile)

            return data
        else:
            print("no tree to print")
            return
Example #25
0
class LuaDec:
    def __init__(self, fileName, format = "luadec"):
        self.format = format
        self.ptr = 0
        self.pc = 0
        self.tree = Tree()
        self.readFile(fileName)
        self.readHeader()
        self.readFunction()
        #self.tree.show()

    def readFile(self, fileName):
        f = open(fileName, "rb")
        self.fileBuf = f.read()
        f.close()

    def readUInt32(self):
        result = struct.unpack("<I", self.fileBuf[self.ptr:self.ptr + 4])[0]
        self.ptr += 4
        return result

    def readUInt64(self):
        result = struct.unpack("<Q", self.fileBuf[self.ptr:self.ptr + 8])[0]
        self.ptr += 8
        return result

    def formatValue(self, val):
        if type(val) == str:
            return "\"{}\"".format(val)
        elif type(val) == bool:
            if val:
                return "true"
            else:
                return "false"
        elif val is None:
            return "nil"
        elif type(val) == float and int(val) == val:
            return int(val)
        else:
            return val

    def processUpvalue(self, i, funcName):
        if i[0] == 1:
            if funcName == "root":
                return "G"
            return "UR{}".format(i[1])
        elif i[0] == 0:
            pNode = self.tree.parent(funcName)
            result = self.processUpvalue(pNode.data['upvalues'][i[1]], pNode.identifier)
            if result[-1] != "G":
                return "U" + result
            else:
                return result
        else:
            raise Exception("Unexpected upvalue {}".format(i[0]))

    def readHeader(self):
        magic = self.fileBuf[:4]
        if magic != b"\x1bLua":
            raise Exception("Unknown magic: {0}".format(magic.hex()))

        version = self.fileBuf[4]
        if version != 82:
            raise Exception("This program support ONLY Lua 5.2")

        lua_tail = self.fileBuf[12:18]
        if lua_tail != b"\x19\x93\r\n\x1a\n":
            raise Exception("Unexcepted lua_tail value: {0}".format(lua_tail.hex()))
        self.ptr = 18

    def readFunction(self, parent=None):
        #处理tree
        if parent:
            funcName = "function"
            funcSuffix = []
            #强烈谴责py不支持do...while
            #别问我这堆东西怎么工作的,it just works!!
            pNode = self.tree.get_node(parent).identifier
            funcSuffix.append("_{0}".format(len(self.tree.children(pNode))))
            while self.tree.parent(pNode):
                pNode = self.tree.parent(pNode).identifier
                funcSuffix.append("_{0}".format(len(self.tree.children(pNode)) - 1))
            
            funcSuffix.reverse()
            for i in funcSuffix:
                funcName += i
        else:
            funcName = "root"
        #self.tree.show()

        #ProtoHeader
        protoheader = struct.unpack("<IIccc", self.fileBuf[self.ptr:self.ptr + 11])
        self.ptr += 11
        lineDefined     = protoheader[0]
        lastLineDefined = protoheader[1]
        numParams       = ord(protoheader[2])
        is_vararg       = ord(protoheader[3])
        maxStackSize    = ord(protoheader[4])
        
        #Code
        sizeCode = self.readUInt32()
        instructions = []
        #print("Code total size: {0}".format(sizeCode))
        for i in range(sizeCode):
            ins = self.readUInt32()
            instructions.append(ins)
            #self.processInstruction(ins)
            #print("Instruction: {0}".format(hex(ins)))

        #Constants
        sizeConstants = self.readUInt32()
        constants = []
        #print("Constants total size: {0}".format(sizeConstants))
        for i in range(sizeConstants):
            const_type = self.fileBuf[self.ptr]
            self.ptr += 1
            if const_type == const.LUA_DATATYPE['LUA_TNIL']:
                const_val = None
                const_type = "nil"
            elif const_type == const.LUA_DATATYPE['LUA_TNUMBER']:
                #lua的number=double(8 bytes)
                const_val = struct.unpack("<d", self.fileBuf[self.ptr:self.ptr + 8])[0]
                self.ptr += 8
                const_type = "number"
            elif const_type == const.LUA_DATATYPE['LUA_TBOOLEAN']:
                const_val = bool(self.fileBuf[self.ptr])
                self.ptr += 1
                const_type = "bool"
            elif const_type == const.LUA_DATATYPE['LUA_TSTRING']:
                str_len = self.readUInt32()
                buf = self.fileBuf[self.ptr:self.ptr + str_len - 1]
                try:
                    const_val = str(buf, encoding="utf8")
                except UnicodeDecodeError:
                    const_val = ""
                    for i in buf:
                        const_val += "\\{}".format(i)
                self.ptr += str_len
                const_type = "string"
                if self.fileBuf[self.ptr - 1] != 0:
                    raise Exception("Bad string")
            else:
                raise Exception("Undefined constant type {0}.".format(hex(const_type)))
            constants.append([const_val, const_type])
            #print("Constant: {0}".format(const_val))

        #Skip Protos
        ptrBackupStart = self.ptr #备份protos的位置,先处理后面的upvalue等东西
        sizeProtos = self.readUInt32()
        for i in range(sizeProtos):
            self.skipFunction()

        #Upvalue
        sizeUpvalue = self.readUInt32()
        upvalues = []
        #print("Upvalue total size: {0}".format(sizeUpvalue))
        for i in range(sizeUpvalue):
            instack = self.fileBuf[self.ptr]
            idx     = self.fileBuf[self.ptr + 1]
            self.ptr += 2
            upvalues.append([instack, idx])
            #print("Upvalue: {0} {1}".format(instack, idx))

        #srcName
        sizeSrcName = self.readUInt32()
        #print("srcName size: {0}".format(sizeSrcName))
        if sizeSrcName > 0:
            srcName = str(self.fileBuf[self.ptr:self.ptr + sizeSrcName], encoding="utf8")
            self.ptr += sizeSrcName
            #print("srcName: " + srcName)

        #Lines
        sizeLines = self.readUInt32()
        self.ptr += sizeLines

        #LocVars
        sizeLocVars = self.readUInt32()
        #for i in sizeLocVars:
        #    varname_size = 
        #TODO: sizeLocVars不为0的情况(未strip)

        #UpvalNames
        sizeUpvalNames = self.readUInt32()

        #将内容写入tree
        data = {
            "instructions": instructions,
            "constants":    constants,
            "upvalues":     upvalues,
        }
        self.tree.create_node(funcName, funcName, parent=parent, data=data)
        
        if self.format == "luaasm":
            print("\n.fn(R{}{})".format(numParams, ", __va_args__" if is_vararg else ""))
        print("; {:<20s}{}".format("Function", funcName))
        print("; {:<20s}{}".format("Defined from line", lineDefined))
        print("; {:<20s}{}".format("Defined to line", lastLineDefined))
        print("; {:<20s}{}".format("#Upvalues", sizeUpvalue))
        print("; {:<20s}{}".format("#Parameters", numParams))
        print("; {:<20s}{}".format("Is_vararg", is_vararg))
        if self.format == "luaasm":
            print("; {:<20s}{}".format("Max Stack Size", maxStackSize))
        else:
            print("; {:<20s}{}\n".format("Max Stack Size", maxStackSize))
        
        #生成一个Upvalue和Constant的拼接表
        fmtVals = {}
        count = 0
        for i in data['constants']:
            fmtVals["K{}".format(count)] = self.formatValue(i[0])
            count += 1
        count = 0
        for i in data['upvalues']:
            fmtVals["U{}".format(count)] = self.processUpvalue(i, funcName)
            count += 1

        if self.format == "luadec":
            #处理单个指令
            self.pc = 0
            self.currFunc = funcName
            self.fmtVals = fmtVals
            for i in data['instructions']:
                self.processInstruction(i)
                self.pc += 1
        
        if self.format == "luadec":
            print("\n")

        if self.format == "luaasm":
            print("\n.instruction")
        #处理单个指令
        self.pc = 0
        self.currFunc = funcName
        self.fmtVals = fmtVals
        for i in data['instructions']:
            self.processInstruction(i)
            self.pc += 1

        if self.format == "luaasm":
            print("\n.const")
        else:
            print("\n; Constants")
        count = 0
        for i in data['constants']:
            print("K{:<5s} = {}".format(str(count), self.formatValue(i[0])))
            count += 1

        if self.format == "luaasm":
            print("\n.upvalue")
        else:
            print("\n; Upvalues")
        count = 0
        for i in data['upvalues']:
            if self.format == "luaasm":
                print("U{:<5s} = L{} R{}".format(str(count), i[0], i[1]))
            else:
                print("{:>5s}\t{}\t{}".format(str(count), i[0], i[1]))
            count += 1

        #Proto
        ptrBackupEnd = self.ptr
        self.ptr = ptrBackupStart
        sizeProtos = self.readUInt32()
        #print("Protos total size: {0}".format(sizeProtos))
        for i in range(sizeProtos):
            self.readFunction(parent=funcName)
        self.ptr = ptrBackupEnd

        if self.format == "luaasm":
            print(".endfn\n")


    #跳过函数,用于需要获取后面的指针位置的情况
    def skipFunction(self):
        #print("Start skipping Proto, current ptr at {0}".format(hex(self.ptr)))
        #ProtoHeader
        self.ptr += 11

        #Code
        sizeCode = self.readUInt32()
        for i in range(sizeCode):
            self.ptr += 4

        #Constants
        sizeConstants = self.readUInt32()
        for i in range(sizeConstants):
            const_type = self.fileBuf[self.ptr]
            self.ptr += 1
            if const_type == const.LUA_DATATYPE['LUA_TNIL']:
                pass
            elif const_type == const.LUA_DATATYPE['LUA_TNUMBER']:
                self.ptr += 8
            elif const_type == const.LUA_DATATYPE['LUA_TBOOLEAN']:
                self.ptr += 1
            elif const_type == const.LUA_DATATYPE['LUA_TSTRING']:
                str_len = self.readUInt32()
                self.ptr += str_len
            else:
                raise Exception("Undefined constant type {0}.".format(hex(const_type)))

        #Protos
        sizeProtos = self.readUInt32()
        for i in range(sizeProtos):
            self.skipFunction()

        #Upvalue
        sizeUpvalue = self.readUInt32()
        for i in range(sizeUpvalue):
            self.ptr += 2

        #srcName
        sizeSrcName = self.readUInt32()
        if sizeSrcName > 0:
            self.ptr += sizeSrcName

        #Lines
        sizeLines = self.readUInt32()
        self.ptr += sizeLines

        #LocVars
        sizeLocVars = self.readUInt32()
        #for i in sizeLocVars:
        #    varname_size = 
        #TODO: sizeLocVars不为0的情况(未strip)

        #UpvalNames
        sizeUpvalNames = self.readUInt32()
        #print("End skipping Proto. Current ptr at {0}".format(hex(self.ptr)))
    def getExtraArg(self):
        next_ins = self.tree.get_node(self.currFunc).data['instructions'][self.pc + 1]
        opCode = next_ins % (1 << 6)
        if const.opCode[opCode] == "OP_EXTRAARG":
            Ax = (next_ins >> 6)
            return True, Ax
        else:
            return False, "ERROR: C == 0 but no OP_EXTRAARG followed."
            
    def processInstruction(self, ins):
        opCode = ins % (1 << 6)
        opMode = const.opMode[opCode]
        A = 0
        B = 0
        C = 0

        if opMode[4] == "iABC":
            A   = (ins >> 6 ) % (1 << 8)
            B   = (ins >> 23)#% (1 << 9)
            C   = (ins >> 14) % (1 << 9)
        elif opMode[4] == "iABx":
            A   = (ins >> 6 ) % (1 << 8)
            B   = (ins >> 14)#% (1 << 18)
        elif opMode[4] == "iAsBx":
            A   = (ins >> 6 ) % (1 << 8)
            B   = (ins >> 14) - (1 << 17) + 1
        elif opMode[4] == "iAx":
            A   = (ins >> 6 )#% (1 << 26)
        else:
            raise Exception("Unknown opMode {0}".format(opMode[4]))

        #format A
        if opMode[1] == 1:
            parsedA = "R{0}".format(A)
        elif opMode[1] == 0:
            if const.opCode[opCode] == "OP_SETTABUP":
                parsedA = "U{0}".format(A)
            elif const.opCode[opCode] in ["OP_EQ", "OP_LT", "OP_LE"]:
                parsedA = A
            else:
                parsedA = "R{0}".format(A)
        else:
            raise Exception("Unknown A Mode {0}".format(opMode[1]))

        #format B
        if opMode[2] == 1:
            if const.opCode[opCode].find("UP") >= 0:
                parsedB = "U{0}".format(B)
            else:
                parsedB = "{0}".format(B)
        elif opMode[2] == 0:
            parsedB = ""
        elif opMode[2] == 2 or opMode[2] == 3:
            if opMode[4] == "iAsBx":
                #B为sBx的时候,只有可能是立即数而不是寄存器
                parsedB = "{0}".format(B)
            elif const.opCode[opCode] == "OP_LOADK":
                #LOADK一定是读Kx而不是Rx
                parsedB = "K{0}".format(B)
            elif B < 0x100:
                parsedB = "R{0}".format(B)
            else:
                parsedB = "K{0}".format(B - 0x100)
                B -= 0x100
        else:
            raise Exception("Unknown B Mode {0}".format(opMode[2]))

        #format C
        if opMode[3] == 1:
            if const.opCode[opCode].find("UP") >= 0:
                parsedC = "U{0}".format(C)
            else:
                parsedC = "{0}".format(C)
        elif opMode[3] == 0:
            parsedC = ""
        elif opMode[3] == 2 or opMode[3] == 3:
            if C < 0x100:
                parsedC = "R{0}".format(C)
            else:
                parsedC = "K{0}".format(C - 0x100)
                C -= 0x100
        else:
            raise Exception("Unknown C Mode {0}".format(opMode[3]))

        # parse comment
        #先用模板拼接
        if len(parsedB) > 0 and (parsedB[0] == 'K' or parsedB[0] == 'U'):
            parsedB_ = "{{{}}}".format(parsedB)
        else:
            parsedB_ = parsedB
        if len(parsedC) > 0 and (parsedC[0] == 'K' or parsedC[0] == 'U'):
            parsedC_ = "{{{}}}".format(parsedC)
        else:
            parsedC_ = parsedC
        comment = const.pseudoCode[opCode].format(A=A,B=B,C=C,PB=parsedB_,PC=parsedC_)

        #预处理
        #if BForceK:
        #    comment = comment.replace("R{}".format(B), "K{}".format(B))
        #if const.opCode[opCode] == "OP_SETTABLE" and CForceK:
        #    comment = comment.replace("R{}".format(C), "{{K{}}}".format(C))

        #再处理Upvalue和Constants
        comment = comment.format(**self.fmtVals)

        #对部分需要处理的命令进行处理
        if const.opCode[opCode] == "OP_LOADBOOL":
            #把0/1转换成false/true
            comment = comment[:-1]
            if B:
                comment += "true"
            else:
                comment += "false"
            #处理跳转
            if C:
                comment += "; goto {0}".format(self.pc + 2)
        elif const.opCode[opCode] == "OP_LOADNIL":
            comment = ""
            for i in range(B + 1):
                comment += "R{0}, ".format(A + i)
            comment = comment[:-2]
            comment += " := nil"
        elif const.opCode[opCode] == "OP_SELF":
            comment = "R{}".format(A+1) + comment[2:]
        elif const.opCode[opCode] == "OP_JMP":
            comment += " (goto {0})".format(self.pc + 1 + B)
        elif const.opCode[opCode] in ["OP_EQ", "OP_LT", "OP_LE", "OP_TEST", "OP_TESTSET"]:
            if A:
                if const.opCode[opCode] == "OP_EQ":
                    comment = comment.replace("==", "~=")
                elif const.opCode[opCode] == "OP_LT":
                    comment = comment.replace("<", ">=")
                elif const.opCode[opCode] == "OP_LE":
                    comment = comment.replace("<=", ">")
            comment += " goto {0} else goto {1}".format(self.pc + 2, self.pc + 1)
            if C == 0:
                comment = comment.replace("not ", "")
        elif const.opCode[opCode] == "OP_CALL":
            comment = ""
            for i in range(C - 1):
                comment += "R{}, ".format(A + i)
            if C > 1:
                comment = comment[:-2] + " := R{}(".format(A)
            elif C == 1:
                comment += " := R{}(".format(A)
            else:
                comment = "R{} to top := R{}(".format(A, A)
            
            for i in range(B - 1):
                comment += "R{}, ".format(A + i + 1)
            if B > 1:
                comment = comment[:-2] + ")"
            elif B == 1:
                comment += ")"
            else:
                comment += "R{} to top)".format(C)
        elif const.opCode[opCode] == "OP_TAILCALL":
            comment = "R{} to top := R{}(".format(A, A)
            for i in range(B - 1):
                comment += "R{}, ".format(A + i + 1)
            if B > 1:
                comment = comment[:-2] + ")"
            else:
                comment = comment + ")"
        elif const.opCode[opCode] == "OP_RETURN":
            for i in range(B - 1):
                comment += "R{}, ".format(A + i)
            if B > 1:
                comment = comment[:-2]
            elif B == 0:
                comment += "R{} to top".format(A)
        elif const.opCode[opCode] == "OP_FORLOOP":
            comment = comment.replace("RD", "R{}".format(A + 1))
            comment = comment.replace("RE", "R{}".format(A + 2))
            comment = comment.replace("RF", "R{}".format(A + 3))
            comment += "goto {} end".format(self.pc + B + 1)
        elif const.opCode[opCode] == "OP_FORPREP":
            comment = comment.replace("RD", "R{}".format(A + 2))
            comment += "(goto {})".format(self.pc + B + 1)
        elif const.opCode[opCode] == "OP_TFORCALL":
            comment = comment.replace("RD", "R{}".format(A + 1))
            comment = comment.replace("RE", "R{}".format(A + 2))
            comment = comment.replace("RF", "R{}".format(A + 3))
            comment = comment.replace("RG", "R{}".format(A + 4))
        elif const.opCode[opCode] == "OP_TFORLOOP":
            comment = comment.replace("RD", "R{}".format(A + 1))
            comment += " (goto {}))".format(self.pc + B + 1)
        elif const.opCode[opCode] == "OP_CLOSURE":
            if self.currFunc == "root":
                comment += "function_{})".format(B)
            else:
                comment += self.currFunc + "_{})".format(B)
        elif const.opCode[opCode] == "OP_SETLIST":
            real_c = C
            err = False
            if C == 0:
                success, result = self.getExtraArg()
                if success:
                    real_c = result
                else:
                    comment += result
                    err = True
                
            if not err:
                LFIELDS_PER_FLUSH = 50
                start_index = (real_c - 1) * LFIELDS_PER_FLUSH
                if B == 0:
                    comment += "R{}[{}] to R{}[top] := R{} to top".format(A, start_index, A, A + 1)
                elif B == 1:
                    comment += "R{}[{}] := R{}".format(A, start_index, A + 1)
                else:
                    comment += "R{}[{}] to R{}[{}] := R{} to R{}".format(A, start_index, A, start_index + B - 1, A + 1, A + B)
                if C == 0:
                    comment += "; CONTAINS EXTRAARG"
        elif const.opCode[opCode] == "OP_LOADKX":
            success, result = self.getExtraArg()
            if success:
                Ax = result
                comment += "R{} := {{K{}}}".format(A, Ax).format(**self.fmtVals)
            else:
                comment += result

        seq = []
        for i in [parsedA, parsedB, parsedC]:
            if i != "":
                seq.append(str(i))
        regsFmt = " ".join(seq)
        if self.format == "luaasm":
            print("{:<10s}{:<13s} ; {:>5s} {}".format(const.opCode[opCode][3:], regsFmt, "[{}]".format(str(self.pc)), comment))
        else:
            print("{:>5s} [-]: {:<10s}{:<13s}; {}".format(str(self.pc), const.opCode[opCode][3:], regsFmt, comment))
Example #26
0
class RIAC(AbstractTeacher):
    def __init__(self,
                 mins,
                 maxs,
                 seed,
                 env_reward_lb,
                 env_reward_ub,
                 max_region_size=200,
                 alp_window_size=None,
                 nb_split_attempts=50,
                 sampling_in_leaves_only=False,
                 min_region_size=None,
                 min_dims_range_ratio=1 / 6,
                 discard_ratio=1 / 4):

        AbstractTeacher.__init__(self, mins, maxs, env_reward_lb,
                                 env_reward_ub, seed)

        # Maximal number of (task, reward) pairs a region can hold before splitting
        self.maxlen = max_region_size

        self.alp_window = self.maxlen if alp_window_size is None else alp_window_size

        # Initialize Regions' tree
        self.tree = Tree()
        self.regions_bounds = [Box(self.mins, self.maxs, dtype=np.float32)]
        self.regions_alp = [0.]
        self.tree.create_node('root',
                              'root',
                              data=Region(maxlen=self.maxlen,
                                          r_t_pairs=[
                                              deque(maxlen=self.maxlen + 1),
                                              deque(maxlen=self.maxlen + 1)
                                          ],
                                          bounds=self.regions_bounds[-1],
                                          alp=self.regions_alp[-1]))
        self.nb_dims = len(mins)
        self.nb_split_attempts = nb_split_attempts

        # Whether task sampling uses parent and child regions (False) or only child regions (True)
        self.sampling_in_leaves_only = sampling_in_leaves_only

        # Additional tricks to original RIAC, enforcing splitting rules

        # 1 - Minimum population required for both children when splitting --> set to 1 to cancel
        self.minlen = self.maxlen / 20 if min_region_size is None else min_region_size

        # 2 - minimum children region size (compared to initial range of each dimension)
        # Set min_dims_range_ratio to 1/np.inf to cancel
        self.dims_ranges = self.maxs - self.mins
        self.min_dims_range_ratio = min_dims_range_ratio

        # 3 - If after nb_split_attempts, no split is valid, flush oldest points of parent region
        # If 1- and 2- are canceled, this will be canceled since any split will be valid
        self.discard_ratio = discard_ratio

        # book-keeping
        self.sampled_tasks = []
        self.all_boxes = []
        self.all_alps = []
        self.update_nb = -1
        self.split_iterations = []

        self.hyperparams = locals()

    def compute_alp(self, sub_region):
        if len(sub_region[0]) > 2:
            cp_window = min(len(sub_region[0]),
                            self.alp_window)  # not completely window
            half = int(cp_window / 2)
            # print(str(cp_window) + 'and' + str(half))
            first_half = np.array(sub_region[0])[-cp_window:-half]
            snd_half = np.array(sub_region[0])[-half:]
            diff = first_half.mean() - snd_half.mean()
            cp = np.abs(diff)
        else:
            cp = 0
        alp = np.abs(cp)
        return alp

    def split(self, nid):
        # Try nb_split_attempts splits on region corresponding to node <nid>
        reg = self.tree.get_node(nid).data
        best_split_score = 0
        best_bounds = None
        best_sub_regions = None
        is_split = False
        for i in range(self.nb_split_attempts):
            sub_reg1 = [
                deque(maxlen=self.maxlen + 1),
                deque(maxlen=self.maxlen + 1)
            ]
            sub_reg2 = [
                deque(maxlen=self.maxlen + 1),
                deque(maxlen=self.maxlen + 1)
            ]

            # repeat until the two sub regions contain at least minlen of the mother region
            while len(sub_reg1[0]) < self.minlen or len(
                    sub_reg2[0]) < self.minlen:
                # decide on dimension
                dim = self.random_state.choice(range(self.nb_dims))
                threshold = reg.bounds.sample()[dim]
                bounds1 = Box(reg.bounds.low,
                              reg.bounds.high,
                              dtype=np.float32)
                bounds1.high[dim] = threshold
                bounds2 = Box(reg.bounds.low,
                              reg.bounds.high,
                              dtype=np.float32)
                bounds2.low[dim] = threshold
                bounds = [bounds1, bounds2]
                valid_bounds = True

                if np.any(bounds1.high - bounds1.low < self.dims_ranges *
                          self.min_dims_range_ratio):
                    valid_bounds = False
                if np.any(bounds2.high - bounds2.low < self.dims_ranges *
                          self.min_dims_range_ratio):
                    valid_bounds = valid_bounds and False

                # perform split in sub regions
                sub_reg1 = [
                    deque(maxlen=self.maxlen + 1),
                    deque(maxlen=self.maxlen + 1)
                ]
                sub_reg2 = [
                    deque(maxlen=self.maxlen + 1),
                    deque(maxlen=self.maxlen + 1)
                ]
                for i, task in enumerate(reg.r_t_pairs[1]):
                    if bounds1.contains(task):
                        sub_reg1[1].append(task)
                        sub_reg1[0].append(reg.r_t_pairs[0][i])
                    else:
                        sub_reg2[1].append(task)
                        sub_reg2[0].append(reg.r_t_pairs[0][i])
                sub_regions = [sub_reg1, sub_reg2]

            # compute alp
            alp = [self.compute_alp(sub_reg1), self.compute_alp(sub_reg2)]

            # compute score
            split_score = len(sub_reg1) * len(sub_reg2) * np.abs(alp[0] -
                                                                 alp[1])
            if split_score >= best_split_score and valid_bounds:
                is_split = True
                best_split_score = split_score
                best_sub_regions = sub_regions
                best_bounds = bounds

        if is_split:
            # add new nodes to tree
            for i, (r_t_pairs,
                    bounds) in enumerate(zip(best_sub_regions, best_bounds)):
                self.tree.create_node(identifier=self.tree.size(),
                                      parent=nid,
                                      data=Region(self.maxlen,
                                                  r_t_pairs=r_t_pairs,
                                                  bounds=bounds,
                                                  alp=alp[i]))
        else:
            assert len(reg.r_t_pairs[0]) == (self.maxlen + 1)
            reg.r_t_pairs[0] = deque(
                islice(reg.r_t_pairs[0], int(self.maxlen * self.discard_ratio),
                       self.maxlen + 1))
            reg.r_t_pairs[1] = deque(
                islice(reg.r_t_pairs[1], int(self.maxlen * self.discard_ratio),
                       self.maxlen + 1))

        return is_split

    def add_task_reward(self, node, task, reward):
        reg = node.data
        nid = node.identifier
        if reg.bounds.contains(task):  # task falls within region
            self.nodes_to_recompute.append(nid)
            children = self.tree.children(nid)
            for n in children:  # if task in region, task is in one sub-region
                self.add_task_reward(n, task, reward)

            need_split = reg.add(task, reward, children == [])  # COPY ALL MODE
            if need_split:
                self.nodes_to_split.append(nid)

    def episodic_update(self, task, reward, is_success):
        self.update_nb += 1

        # Add new (task, reward) to regions nodes
        self.nodes_to_split = []
        self.nodes_to_recompute = []
        new_split = False
        root = self.tree.get_node('root')
        self.add_task_reward(
            root, task, reward)  # Will update self.nodes_to_split if needed
        assert len(self.nodes_to_split) <= 1

        # Split a node if needed
        need_split = len(self.nodes_to_split) == 1
        if need_split:
            new_split = self.split(self.nodes_to_split[0])  # Execute the split
            if new_split:
                # Update list of regions_bounds
                if self.sampling_in_leaves_only:
                    self.regions_bounds = [
                        n.data.bounds for n in self.tree.leaves()
                    ]
                else:
                    self.regions_bounds = [
                        n.data.bounds for n in self.tree.all_nodes()
                    ]

        # Recompute ALPs of modified nodes
        for nid in self.nodes_to_recompute:
            node = self.tree.get_node(nid)
            reg = node.data
            reg.alp = self.compute_alp(reg.r_t_pairs)

        # Collect regions data (regions' ALP and regions' (task, reward) pairs)
        all_nodes = self.tree.all_nodes(
        ) if not self.sampling_in_leaves_only else self.tree.leaves()
        self.regions_alp = []
        self.r_t_pairs = []
        for n in all_nodes:
            self.regions_alp.append(n.data.alp)
            self.r_t_pairs.append(n.data.r_t_pairs)

        # Book-keeping
        if new_split:
            self.all_boxes.append(copy.copy(self.regions_bounds))
            self.all_alps.append(copy.copy(self.regions_alp))
            self.split_iterations.append(self.update_nb)
        assert len(self.regions_alp) == len(self.regions_bounds)

        return new_split, None

    def sample_random_task(self):
        return self.regions_bounds[0].sample()  # First region is root region

    def sample_task(self):
        mode = self.random_state.rand()
        if mode < 0.1:  # "mode 3" (10%) -> sample on regions and then mutate lowest-performing task in region
            if len(self.sampled_tasks) == 0:
                self.sampled_tasks.append(self.sample_random_task())
            else:
                self.sampled_tasks.append(
                    self.non_exploratory_task_sampling()["task"])

        elif mode < 0.3:  # "mode 2" (20%) -> random task
            self.sampled_tasks.append(self.sample_random_task())

        else:  # "mode 1" (70%) -> proportional sampling on regions based on ALP and then random task in selected region
            region_id = proportional_choice(self.regions_alp,
                                            self.random_state,
                                            eps=0.0)
            self.sampled_tasks.append(self.regions_bounds[region_id].sample())

        return self.sampled_tasks[-1].astype(np.float32)

    def non_exploratory_task_sampling(self):
        # 1 - Sample region proportionally to its ALP
        region_id = proportional_choice(self.regions_alp,
                                        self.random_state,
                                        eps=0.0)

        # 2 - Retrieve (task, reward) pair with lowest reward
        worst_task_idx = np.argmin(self.r_t_pairs[region_id][0])

        # 3 - Mutate task by a small amount (using Gaussian centered on task, with 0.1 std)
        task = self.random_state.normal(
            self.r_t_pairs[region_id][1][worst_task_idx].copy(), 0.1)
        # clip to stay within region (add small epsilon to avoid falling in multiple regions)
        task = np.clip(task, self.regions_bounds[region_id].low + 1e-5,
                       self.regions_bounds[region_id].high - 1e-5)
        return {
            "task": task,
            "infos": {
                "bk_index": len(self.all_boxes) - 1,
                "task_infos": region_id
            }
        }

    def dump(self, dump_dict):
        dump_dict['all_boxes'] = self.all_boxes
        dump_dict['split_iterations'] = self.split_iterations
        dump_dict['all_alps'] = self.all_alps
        # dump_dict['riac_params'] = self.hyperparams
        return dump_dict

    @property
    def nb_regions(self):
        return len(self.regions_bounds)

    @property
    def get_regions(self):
        return self.regions_bounds
Example #27
0
class PathList:
    def __init__(self, disk):
        self._tree = Tree()
        self._disk = disk
        self._tree.create_node(tag='root', identifier='root')
        self.depth = 3

    def update_path_list(self, file_id='root', depth=None, is_fid=True):
        if depth is None:
            depth = self.depth
        if not is_fid:
            file_id = self.get_path_fid(file_id, auto_update=False)
        file_list = self._disk.get_file_list(file_id)
        if 'items' not in file_list:
            return False
        for i in file_list['items']:
            if i['type'] == 'file':
                file_info = FileInfo(name=i['name'], id=i['file_id'], pid=i['parent_file_id'], type=True,
                                     ctime=time.strptime(i['created_at'], '%Y-%m-%dT%H:%M:%S.%fZ'),
                                     update_time=time.strptime(i['updated_at'], '%Y-%m-%dT%H:%M:%S.%fZ'),
                                     hidden=i['hidden'],
                                     category=i['category'], size=i['size'], content_hash_name=i['content_hash_name'],
                                     content_hash=i['content_hash'], download_url=i['download_url'])
            else:
                file_info = FileInfo(name=i['name'], id=i['file_id'], pid=i['parent_file_id'], type=False,
                                     ctime=time.strptime(i['created_at'], '%Y-%m-%dT%H:%M:%S.%fZ'),
                                     update_time=time.strptime(i['updated_at'], '%Y-%m-%dT%H:%M:%S.%fZ'),
                                     hidden=i['hidden'])
            if self._tree.get_node(file_info.id):
                self._tree.update_node(file_id, data=file_info)
            else:
                self._tree.create_node(tag=file_info.name, identifier=file_info.id, data=file_info, parent=file_id)
            if not file_info.type and depth:
                self.update_path_list(file_id=file_info.id, depth=depth - 1)
        return True

    def tree(self, path='root', auto_update=True):
        file_id = self.get_path_fid(path, auto_update=auto_update)
        if not file_id:
            raise Exception('No such file or directory')
        self._tree.show(file_id)

    def get_path_list(self, path, auto_update=True):
        file_id = self.get_path_fid(path, auto_update=auto_update)
        return self.get_fid_list(file_id, auto_update=auto_update)

    def get_fid_list(self, file_id, auto_update=True):
        self.auto_update_path_list(auto_update)
        if not file_id:
            raise Exception('No such file or directory')
        if file_id != 'root' and self._tree.get_node(file_id).data.type:
            return [self._tree.get_node(file_id).data]
        return [i.data for i in self._tree.children(file_id)]

    def get_path_fid(self, path, file_id='root', auto_update=True):
        self.auto_update_path_list(auto_update)
        path = Path(path)
        if str(path) in ('', '/', '\\', '.', 'root'):
            return 'root'
        flag = False
        for i in filter(None, path.as_posix().split('/')):
            flag = False
            for j in self._tree.children(file_id):
                if i == j.tag:
                    flag = True
                    file_id = j.identifier
                    break
        if flag:
            return file_id
        return False

    def get_path_node(self, path, auto_update=True):
        file_id = self.get_path_fid(path, auto_update=auto_update)
        if file_id:
            return self._tree.get_node(file_id)
        return False

    def get_path_parent_node(self, path, auto_update=True):
        file_id = self.get_path_fid(path, auto_update=auto_update)
        if file_id:
            node = self._tree.parent(file_id)
            if node:
                return node
        return False

    def auto_update_path_list(self, auto_update=True):
        if auto_update and len(self._tree) == 1:
            return self.update_path_list()
Example #28
0
class PathList:
    def __init__(self, disk):
        self._tree = Tree()
        self._disk = disk
        self._tree.create_node(tag='root', identifier='root')

    def update_path_list(self, path='root', depth=3):
        for i in self._disk.get_file_list(path)['items']:
            if i['type'] == 'file':
                file_info = FileInfo(name=i['name'], id=i['file_id'], pid=i['parent_file_id'], type=True,
                                     ctime=time.strptime(i['created_at'], '%Y-%m-%dT%H:%M:%S.%fZ'),
                                     update_time=time.strptime(i['updated_at'], '%Y-%m-%dT%H:%M:%S.%fZ'),
                                     hidden=i['hidden'],
                                     category=i['category'], size=i['size'], content_hash_name=i['content_hash_name'],
                                     content_hash=i['content_hash'], download_url=i['download_url'])
            else:
                file_info = FileInfo(name=i['name'], id=i['file_id'], pid=i['parent_file_id'], type=False,
                                     ctime=time.strptime(i['created_at'], '%Y-%m-%dT%H:%M:%S.%fZ'),
                                     update_time=time.strptime(i['updated_at'], '%Y-%m-%dT%H:%M:%S.%fZ'),
                                     hidden=i['hidden'])
            self._tree.create_node(tag=file_info.name, identifier=file_info.id, data=file_info, parent=path)
            if not file_info.type and depth:
                self.update_path_list(path=file_info.id, depth=depth - 1)

    def tree(self, path):
        if len(self._tree) == 1:
            self.update_path_list()
        elif len(self._tree) > 1:
            self.__init__(self._disk)
            self.update_path_list()
        file_id = self.get_path_fid(path)
        if not file_id:
            raise Exception('No such file or directory')
        self._tree.show(file_id)

    def get_path_list(self, path):
        file_id = self.get_path_fid(path)
        if not file_id:
            raise Exception('No such file or directory')
        if file_id != 'root' and self._tree.get_node(file_id).data.type:
            return [self._tree.get_node(file_id).data]
        return [i.data for i in self._tree.children(file_id)]

    def get_path_fid(self, path, file_id='root'):
        if len(self._tree) == 1:
            self.update_path_list()
        elif len(self._tree) > 1:
            self.__init__(self._disk)
            self.update_path_list()
        if path == '/' or path == '' or path == 'root':
            return 'root'
        flag = False
        for i in filter(None, path.split('/')):
            flag = False
            for j in self._tree.children(file_id):
                if i == j.tag:
                    flag = True
                    file_id = j.identifier
                    break
        if flag:
            return file_id
        return False
Example #29
0
class MonteCarlo:
    N_THREADS = 1
    PERCENTILE = 100

    def __init__(self, engine=None, hero=None):
        # self.last_ev = 0
        # self.rolling_10 = deque(maxlen=10)
        # self.rolling_40 = deque(maxlen=40)
        self.ev_history = {}
        self.time_start = None
        self.duration = None
        self.queue = None
        self.leaf_path = None

        if not engine:
            # logger.info('engine not given, loading from file...')
            self.engine_checksum = None
            self.load_engine(hero)
        else:
            # logger.info('engine given')
            self.init(engine, hero)

    @property
    def current_actions(self):
        return [(c.data['action'], c.data['ev'], c.data['traversed'])
                for c in self.tree.children(self.tree.root)]

    def is_time_left(self):
        return time.time() - self.time_start < self.duration

    @retrace.retry(on_exception=(EOFError, KeyError), interval=0.1, limit=None)
    def load_engine(self, hero):
        with shelve.open(Engine.FILE) as shlv:
            if shlv['hash'] != self.engine_checksum:
                # logger.info('loading engine from file...')
                self.engine_checksum = shlv['hash']
                self.init(shlv['engine'], hero)

    def init(self, engine, hero):
        # logger.info('init state')
        self.engine = engine
        self.hero = hero or self.engine.q[0][0]
        self.hero_pocket = self.engine.data[self.hero]['hand']
        for s in self.engine.data:
            self.ev_history[s] = deque(maxlen=50)
        # logger.info('HERO is at seat {} with {}'.format(self.hero, self.hero_pocket))

        self.watched = False
        self.init_tree()

    def init_tree(self):
        """create the tree. Add a root; available action will add the first level of children"""
        # self.traversed_ceiling = 1
        self.tree = Tree()
        root = self.tree.create_node('root', identifier='root', data={'traversed': 0, 'ev': 0, 'stats': 1, 'cum_stats': 1})
        # # logger.info('tree:\n{}'.format(self.tree.show()))
        # input('new tree')

    def watch(self):
        """Runs when engine file changes. Just kicks off run for 3s sprints"""
        # logger.info('Monte Carlo watching every {}s...'.format(self.timeout))
        while True:

            # loads new engine file if checksum changed
            self.load_engine()

            # do not analyze if game finished
            if self.engine.phase in [self.engine.PHASE_SHOWDOWN, self.engine.PHASE_GG]:
                if not self.watched:
                    # logger.error('game is finished')
                    self.watched = True
                time.sleep(3)
                continue

            # do not analyze if hero does not have pocket
            if self.hero_pocket in [['__', '__'], ['  ', '  ']]:
                if not self.watched:
                    # logger.error('hero does not have a pocket')
                    self.watched = True
                time.sleep(0.5)
                continue

            # do not analyze if hero is not to play
            if self.hero != self.engine.q[0][0]:
                if not self.watched:
                    # logger.error('hero is not to act')
                    self.watched = True
                time.sleep(0.5)
                continue

            if self.is_complete:
                if not self.watched:
                    # logger.error('mc is complete')
                    self.watched = True
                time.sleep(2)
                continue

            # run a few sims
            # logger.debug('running now with timeout {}'.format(self.timeout))
            self.run()
            self.timeout += 0.1

    def run(self, duration):
        """Run simulations
        For x:
         - clone engine
         - start at root
          -- iterate and find next unprocessed node
          -- action engine to that node parent
          -- process that node
         - keep processing
         - with return EV

         Levelling:
            extremely huge iterations when many players. So
            do the most probably actions only till all done.

        Handling close action approximations:
        """
        # logger.info('Monte Carlo started')
        total_traversions_start = sum(a[2] for a in self.current_actions)

        # cannot run if engine in showdown or gg
        if self.engine.phase in [self.engine.PHASE_SHOWDOWN, self.engine.PHASE_GG]:
            logger.warning('cannot run mc with no actions')
            return

        self.duration = duration
        self.time_start = time.time()

        self.queue = PriorityQueue()
        # threads = []
        # for _ in range(self.N_THREADS):
        #     t = MCWorker(self)
        #     # t.start()
        #     threads.append(t)

        # self.traversed_focus = 0
        leaves = self.tree.paths_to_leaves()
        # logger.debug('leaves from tree: {}'.format(len(leaves)))
        # leaves.sort(key=lambda lp: len(lp) + sum(int(lpn.split('_')[0]) for lpn in lp), reverse=True)
        # # logger.debug('{} leaves are now sorted by formula'.format(len(leaves)))
        # logger.debug('{}'.format(json.dumps(leaves[:3], indent=4, default=str)))
        # leaves.sort(key=len)
        # logger.debug('{} leaves are now sorted by length'.format(len(leaves)))
        # logger.debug('{}'.format(json.dumps(leaves[:3], indent=4, default=str)))
        # leaves.sort(key=lambda lp: int(lp[-1][:3]), reverse=True)
        # logger.debug('{} leaves are now sorted by rank'.format(len(leaves)))
        # logger.error(json.dumps(leaves, indent=4, default=str))
        # input('>>')
        for leaf_path in leaves:
            node = self.tree[leaf_path[-1]]
            item = (
                1 - node.data['cum_stats'],
                leaf_path,
            )
            self.queue.put(item)

        # for t in threads:
        #     t.start()
        #
        # for t in threads:
        #     t.join()
        #     if t.error:
        #         raise Exception().with_traceback(t.error[2])

        while self.is_time_left() and not self.queue.empty():
            priority, self.leaf_path = self.queue.get_nowait()
            self.run_item(self.leaf_path)

        if self.queue.empty():
            logger.info(f'Everything was processed in queue!')

        total_traversions_end = sum(a[2] for a in self.current_actions)
        if total_traversions_end <= total_traversions_start:
            logger.warning(f'No new traversion added to {total_traversions_start}')

    def run_item(self, path):
        # logger.debug('running this path: {}'.format(path))
        e = deepcopy(self.engine)
        e.mc = True
        """To calculate the investment for the loss EV, the total amounts used till end is required. Cannot
         use final player balance on engine as that might have winnings allocated to it by the engine. Instead
         the difference from all the new matched bets from the current matched bets will be used.
         Need to add current contrib
        """
        e.matched_start = e.data[self.hero]['matched'] + e.data[self.hero]['contrib']
        # logger.info('hero starting with matched = {} from {} + {}'.format(
        #     e.matched_start, e.data[self.hero]['matched'], e.data[self.hero]['contrib']))

        # self.tree.show()
        self.fast_forward(e, path)
        # logger.info('{}'.format('-' * 200))
        # input('check item')

    def show_best_action(self):
        """Calculates best action on root"""
        # logger.error("\n\n")
        sum_traversed = 0
        delta = 0
        max_ev = float('-inf')
        action = None
        amount = None
        for nid in self.tree[self.tree.root].fpointer:
            child = self.tree[nid]
            # logger.debug('{} {}'.format(child.tag, child.data))
            dat = child.data
            sum_traversed += dat['traversed']
            # logger.error('{} @{} => {}'.format(dat['action'], dat['traversed'], round(dat['ev'], 4)))

            # delta += abs(1 - (self.convergence.get(dat['action'], 1) / dat['ev'] if dat['ev'] else 1))
            # self.convergence[dat['action']] = dat['ev']

            if dat['ev'] > max_ev:
                max_ev = dat['ev']
                action = dat['action']
                if action.startswith('bet') or action.startswith('raise') or action.startswith('allin'):
                    amount = dat['amount']

        best_action = '{}{}'.format(action, ' with {}'.format(amount) if amount else '')

        # self.convergence['deq'].append(round(delta, 1))
        self.convergence['deq'].append(best_action)
        # # logger.error('deq: {}'.format(list(self.convergence['deq'])))

        # logger.error('')
        # logger.error('Timeout: {}'.format(round(self.timeout, 1)))
        # logger.error('Traversed: {}'.format(sum_traversed))
        deq_cnts = Counter(list(self.convergence['deq']))
        # # logger.error('deq: {}'.format(deq_cnts.most_common()))

        # logger.error('{}% for {}'.format(
            # 100 * sum(dq == deq_list[-1] for dq in deq_list[:-1]) // (len(deq_list) - 1)
            # 100 * (deq_cnts.most_common()[0][1] - deq_cnts.most_common()[1][1]) // self.convergence_size
            # if len(deq_cnts) > 1 else 100 * len(self.convergence['deq']) // self.convergence_size,
            # deq_cnts.most_common()[0][0]
        # ))

    def fast_forward(self, e, path):
        """Do actions on engine till the leaf is reached. Need to do available_actions before
        every DO

        First check if the leave is already processed, then skip this path. When the leaf is reached
        then process from that node.

        Remember to send through only the first letter for the action.

        Then update the nodes from this leaf back up the tree
        """
        # logger.info('Fast forwarding {} nodes'.format(len(path)))

        if len(path) == 1:
            # logger.info('processing root for first time')
            self.process_node(e, self.tree[path[0]])
            return

        leaf_node = self.tree[path[-1]]
        # logger.debug('checking if last node has been processed:')
        # logger.debug('last node leaf {} has node data {}'.format(leaf_node.tag, leaf_node.data))
        if leaf_node.data['traversed']:
            # logger.info('This leaf node ({}) above focus level {}'.format(leaf_node.tag, self.traversed_focus))
            # can happen as all actions are added, but then one was chosen to continue on
            # and that path for that action wasn't removed from the queue
            return

        for nid in path[1:]:
            node = self.tree[nid]
            # logger.debug('fast forwarding action for node {}'.format(node.tag))
            e.available_actions()
            cmd = [node.data['action'][0]]
            if 'amount' in node.data:
                cmd.append(node.data['amount'])
                # logger.debug('Adding bet value of {}'.format(node.data['amount']))
            # logger.debug('Executing path action {} for {}'.format(cmd, node.tag))
            # logger.debug('Executing path action {} with data {}'.format(cmd, node.data))
            e.do(cmd)

            if node.is_leaf():
                # logger.debug('{} is a leaf node, processing next...'.format(node.tag))
                self.process_node(e, node)

                logger.info('nodes processed, now updating nodes that were fast forwarded')
                for processed_nid in reversed(path[1:]):
                    processed_node = self.tree[processed_nid]
                    self.update_node(processed_node)

        self.ev_history[self.engine.s].append(sum(a[1] for a in self.current_actions))

    def process_node(self, e, n):
        """Process node
        Get actions available for node
        Pick action to traverse with UCT
        Process action selected
        Return EV
        """
        # logger.info('processing node {} with data {}'.format(n.tag, n.data))

        # this node is the hero folding (to prevent this being processed as leaf)
        # was created with other children (but not most probable at that time to be proc as child)
        # if hero folding, then make this node a leaf node with fold eq
        # exiting before adding children alleviates the need to remove the immediately again thereafter
        # bug: cannot use engine.q as it already rotated after taking action getting here
        if not n.is_root() and n.data['action'] == 'fold' and self.hero == n.data['seat']:
            winnings, losses = self.net(e)
            result = {
                'ev': losses,
                'traversed': 1,
            }
            # logger.info('hero has folded this node given: {}'.format(result))
            n.data.update(result)
            # logger.info('node data after fold: {}'.format(n.data))
            return

        # add the children of the node
        if not n.fpointer:
            self.add_actions(e, n)

        # this node is a leaf (no more actions to take!)
        # either the game finished and we have winner and pot
        # or we have to use pokereval.winners
        if n.is_leaf():
            # logger.info('node {} is the final action in the game'.format(n.tag))
            # winner given (easy resolution)
            if e.winner:
                # logger.debug('engine gave winner {}'.format(e.winner))
                winnings, losses = self.net(e)
                ev = winnings if self.hero in e.winner else losses
            # else if the winner is unknown
            # then calculate winners and use
            # percentage of hero as amt
            else:
                if 'in' not in e.data[self.hero]['status']:
                    # hero fold is handled before in method
                    # and thus for equities calc it is just 0
                    # logger.debug('Hero {} is not in game'.format(self.hero))
                    ev = 0
                else:
                    winnings, losses = self.net(e)
                    equities = PE.showdown_equities(e)
                    # equities = self.get_showdown_equities(e)
                    ev_pos = winnings * equities[self.hero]
                    # logger.debug('ev_pos = {} from winnings {} * eq {}'.format(ev_pos, winnings, equities[self.hero]))
                    ev_neg = losses * (1 - equities[self.hero])
                    # logger.debug('ev_neg = {} from losses {} * -eq {}'.format(ev_neg, losses, (1 - equities[self.hero])))
                    ev = ev_pos + ev_neg
                    logger.info('Net EV: {} from {} + {}'.format(ev, ev_pos, ev_neg))
            result = {
                'ev': ev,
                'traversed': 1,
            }
            # logger.info('{} leaf has result {}'.format(n.tag, result))
            n.data.update(result)
            return

        # node is all good (not leaf (has children) and not hero folding)
        # get child actions and process most probable action
        a_node = self.most_probable_action(n)
        action = a_node.data['action']
        # logger.info('taking next child node action {}'.format(action))

        # if it is hero and he folds,
        # it is not necessarily an immediate ZERO equity
        # since my previous contrib needs to be added to the pot (i.e. contribs after starting mc)
        # i.e. make this a leaf node implicitly
        # no child nodes to remove for fold
        if action == 'fold' and self.hero == a_node.data['seat']:
            winnings, losses = self.net(e)
            result = {
                'ev': losses,
                'traversed': 1,
            }
            # logger.info('hero has folded the child node selected: {}'.format(result))
            a_node.data.update(result)
            # logger.info('a_node data after: {}'.format(a_node.data))

        # else we must process the node
        else:
            # logger.info('taking action {} and processing that node'.format(action))
            cmd = [action[0]]
            if 'amount' in a_node.data:
                cmd.append(a_node.data['amount'])
                # logger.debug('Adding bet value of {}'.format(a_node.data['amount']))
            e.do(cmd)
            self.process_node(e, a_node)

        # action node has been processed, now update node
        self.update_node(n)

    def update_node(self, node):
        """Update the node's data

        If leaf, then it was already calculated during processing, and now
        do not change it: the ev is the ev

        Minimax applied, hero pick best and foe picks min after p

        Traversed will stay the traversed_focus level for leaves, but for parent nodes
        the traversed will be the number of leaves reached from that node.
        """
        is_hero = node.data.get('seat') == self.hero
        # logger.debug('is hero? {}'.format(is_hero))

        # it will traverse back up to the root
        # root can be skipped
        if node.is_root():
            # input('hero {} node data {}'.format(self.hero, node.data.get('seat')))
            # if is_hero:
            #     self.rolling_10.append(abs(self.last_ev))
            #     self.rolling_40.append(abs(self.last_ev))
            #     logger.debug('Added {} ev to collection'.format(self.last_ev))
            #     input('Added {} ev to collection'.format(self.last_ev))
            # logger.debug('reached the root')
            # self.update_ev_change()
            return

        # fast forwarding will send here, just ignore node if leaf
        if node.is_leaf():
            # logger.debug('not updating {}: it is final game result (no leaf nodes)'.format(node.tag))
            # logger.debug('not updating {}: final data {}'.format(node.tag, node.data))
            return

        depth = self.tree.depth(node)
        # logger.info('updating node {} at depth {}'.format(node.tag, depth))
        # logger.info('node has {} before update'.format(node.data))

        if not len(node.fpointer):
            # logger.error('node {} with {} as no children...'.format(node.tag, node.data))
            raise Exception('not necessary to process leaves')
        # logger.debug('extracting data from {} children nodes...'.format(len(node.fpointer)))

        n_ev = float('-inf') if is_hero else 0
        n_traversed = 0
        for child_nid in node.fpointer:
            child_node = self.tree[child_nid]
            # logger.debug('child node {} has {}'.format(child_node.tag, child_node.data))
            dat = child_node.data
            if not dat['traversed']:
                # logger.debug('skipping untraversed {}'.format(child_node.tag))
                continue

            # get max for hero
            if is_hero:
                # todo is this +ev dampening necessary
                # todo this should be fixed when setting for hand range
                # equities = PE.showdown_equities(self.engine)
                # n_ev = max(n_ev, dat['ev'] * equities.get(self.hero, 0))
                n_ev = max(n_ev, dat['ev'])

            # get min for foe
            else:
                # ev_adj = dat['ev'] * dat['stats']
                # logger.debug('foe min between {} and {}'.format(n_ev, ev_adj))
                # n_ev = min(n_ev, ev_adj)
                n_ev += dat['ev'] * dat['stats'] / dat['divider']

            n_traversed += dat['traversed']
            # logger.debug('added {} traversed: now have {} so far'.format(dat['traversed'], n_traversed))

        self.last_ev = node.data['ev'] - n_ev
        node.data.update({
            'ev': n_ev,
            'traversed': n_traversed,
        })
        # logger.info('now node has {} ev~{} after {}'.format(node.tag, round(n_ev, 3), n_traversed))

        if not node.data['traversed']:
            raise Exception('node cannot be untraversed')

    def net(self, e):
        """Stored the balance at the start of sim.
        Now calculate difference as player total matched contrib.
        Winnings will be less initial starting contrib.
        """
        e.gather_the_money()
        p = e.players[self.hero]
        d = e.data[self.hero]

        matched_diff = d['matched'] - e.matched_start
        # logger.debug('matched diff = {} from {} - {}'.format(matched_diff, d['matched'], e.matched_start))

        winnings = int(e.pot - matched_diff)
        # logger.debug('winnings diff = {} from pot {} less matched {}'.format(winnings, e.pot, matched_diff))

        losses = int(-matched_diff)
        # logger.info('Winnings = {} and losses = {}'.format(winnings, losses))
        return winnings, losses

    def most_probable_action(self, parent):
        """All nodes will be processed once at least but it will never happen. Just return
        the most probable node for most accurate play. Using stats fields on data
        There should not be any untraversed nodes. So first get untraversed, then sort
        and pop first one"""
        # logger.info('getting most probable action after {}'.format(parent.tag))
        children = self.tree.children(parent.identifier)
        children = [c for c in children if not c.data['traversed']]
        if not children:
            raise MonteCarloError('Cannot choose most probable action when all nodes are traversed')
        children.sort(key=lambda c: c.data['stats'], reverse=True)
        child = children[0]
        # logger.debug('{} is untraversed, returning that node for actioning'.format(child.tag))
        self.leaf_path.append(child.identifier)
        return child

    def add_actions(self, e, parent):
        """Add actions available to this node
        If in GG phase then no actions possible, ever.
        Remove 'hand'
        Bets:
            - preflop are 2-4x BB
            - postflop are 40-100% pot
        Raise:
            - always double
        Allin:
            - only on river
            - if out of money then converted to allin

        Scale non-fold probabilities even though it should not have an effect.
        """
        # logger.info('adding actions to {}'.format(parent.tag))
        actions = e.available_actions()
        s, p = e.q[0]
        d = e.data[s]
        balance_left = p['balance'] - d['contrib']

        if not actions:
            # logger.warn('no actions to add to node')
            return

        if 'gg' in actions:
            # logger.debug('no actions available, got gg')
            return

        actions.remove('hand')

        # remove fold if player can check
        if 'check' in actions:
            actions.remove('fold')
            # # logger.debug('removed fold when check available')

        # remove fold for hero
        # if s == self.hero and 'fold' in actions:
        #     actions.remove('fold')
        #     # logger.debug('removed fold from hero')

        # remove raise if player has already been aggressive
        if 'raise' in actions and any(pa['action'] in 'br' for pa in d[e.phase]):
            actions.remove('raise')
            # # logger.debug('removed raise as player has already been aggressive')

        # remove allin, but add it later with final stats (if increased from bet/raised)
        if 'allin' in actions:
            actions.remove('allin')
        # logger.debug('removed allin by default')

        # load stats (codes with counts)
        stats = ES.player_stats(e, s)
        max_contrib = max(pd['contrib'] for pd in e.data.values())
        # contrib_short = max_contrib - d['contrib']

        # allin needs to be the doc count
        # where bets and raises result in allin, add those prob dists to this
        # that will give proper probability
        go_allin = stats['actions'].get('a', 0)

        # # logger.info('filtered actions: {}'.format(actions))
        # ev 0 instead of none because of root node sum when not all traversed it gives error
        action_nodes = []
        for a in actions:
            node_data = {
                'stats': stats['actions'].get(ACTIONS_TO_ABBR[a], 0.01),
                'divider': 1,
                'action': a,
                'phase': e.phase,
                'seat': s,
                'name': p['name'],
                'traversed': 0,
                'ev': 0,
            }

            if a in ['bet', 'raise']:
                btps_and_amts = []
                total_pot = sum(pd['contrib'] for pd in e.data.values()) + e.pot

                # for preflop only do 2x and 3x
                if e.phase == e.PHASE_PREFLOP:
                    btps_and_amts.append(('double', e.bb_amt * 2))
                    btps_and_amts.append(('triple', e.bb_amt * 3))
                # else do half and full pots
                else:
                    btps_and_amts.append(('half_pot', total_pot * 0.50))
                    btps_and_amts.append(('full_pot', total_pot * 1.00))
                    # round bets up to a BB
                    # btps_and_amts = [(btp, -(amt // -e.bb_amt) * e.bb_amt)
                    #                  for btp, amt in btps_and_amts]

                betting_info = []
                amts_seen = []
                for btp, amt in btps_and_amts:
                    if amt in amts_seen:
                        # logger.debug('already using {}, skipping duplicate'.format(amt))
                        continue
                    if a == 'bet' and amt < e.bb_amt:
                        # logger.debug('bet cannot be less than BB {}'.format(e.bb_amt))
                        continue
                    if a == 'raise' and amt < (max_contrib * 2):
                        # logger.debug('raise cannot be less than 2x contrib  of {}'.format(max_contrib * 2))
                        continue
                    betting_info.append((btp, amt))
                    amts_seen.append(amt)

                # change raises that cause allin
                betting_info_final = []
                for btp, amt in betting_info:
                    # if amt is more than player balance, it is an allin
                    if amt >= balance_left:
                        go_allin += node_data['stats'] / len(betting_info)
                    else:
                        betting_info_final.append((btp, amt))

                # all good, can have this bet as option
                for btp, amt in betting_info_final:
                    node_data_copy = deepcopy(node_data)
                    node_data_copy['divider'] = len(betting_info_final)
                    node_data_copy['action'] = f'{a}_{btp}'
                    node_data_copy['amount'] = amt
                    action_nodes.append(node_data_copy)

            else:
                action_nodes.append(node_data)

        # allin will have doc counts (from stat, maybe from bets, maybe from raise)
        if go_allin:
            node_data = {
                'stats': go_allin,
                'divider': 1,
                'action': 'allin',
                'phase': e.phase,
                'seat': s,
                'name': p['name'],
                'traversed': 0,
                'ev': 0,
                'amount': balance_left,
            }
            action_nodes.append(node_data)
            # logger.debug('added allin to actions with stat {}'.format(node_data['stats']))

        # scale the stats (it is currently term counts aka histogram) and it is required to be
        # a probability distribution (p~1)
        # Also, certain actions like fold can be removed, and the total stats is not 1
        total_stats = sum(an['stats'] / an['divider'] for an in action_nodes)
        for action_node in action_nodes:
            action_node['stats'] = max(0.01, action_node['stats'] / action_node['divider'] / total_stats)
            action_node['cum_stats'] = parent.data['cum_stats'] * action_node['stats']
            node_tag = f'{action_node["action"]}_{s}_{e.phase}'
            identifier = f'{node_tag}_{str(uuid.uuid4())[:8]}'
            self.tree.create_node(identifier=identifier, tag=node_tag, parent=parent.identifier, data=action_node)
            # logger.debug('new {} for {} with data {}'.format(node_tag, s, action_node))
            item = (
                1 - action_node['cum_stats'],
                self.leaf_path + [identifier]
            )
            self.queue.put(item)
            # logger.debug('new {} for {} with data {}'.format(node_tag, s, action_node))
        # logger.info('{} node actions added'.format(len(action_nodes)))

    def analyze_tree(self):
        """Analyze tree to inspect best action from ev"""
        # self.tree.show()

        # check all finished paths
        for path in self.tree.paths_to_leaves():

            # skip untraversed end
            last_node = self.tree[path[-1]]
            if not last_node.data['traversed']:
                logger.debug('skipping untraversed endpoint {}'.format(last_node.tag))
                continue

            # show all actions
            for nid in path:
                node = self.tree[nid]
                d = node.data
                logger.info('Node: {} ev={}'.format(node.tag, d['ev']))


        0/0
        input('$ check tree')

    def get_showdown_equities(self, e):
        """instead of using pokereval, use hs from se"""
        hss = {}
        for s, d in e.data.items():
            if 'in' in d['status']:
                hss[s] = ES.showdown_hs(e, s, percentile=self.PERCENTILE)
        # calculate for hero
        if self.hero in hss:
            d = e.data[self.hero]
            hss[self.hero] = PE.hand_strength(d['hand'], e.board, e.rivals)
        # normalize
        total = sum(hs for hs in hss.values())
        equities = {s: hs / total for s, hs in hss.items()}
        return equities
Example #30
0
class RMQueue(object):
    __metaclass__ = Singleton

    def __init__(self):
        self.tree = Tree()
        self.MAX_METRIC_COUNT = 12
        self.CAL_INTERVAL_IN_SECOND = 2 * 60 * 60
        self.conf = conf.Config("./conf/config.json")

    def set_stat_interval(self, interval):
        self.CAL_INTERVAL_IN_SECOND = interval

    def set_system_memory(self, size):
        root = self.get_root()
        root.data.set_abs_memory(float(size))

    def create_queue(self, name=None, parent=None):
        data = QueueData()
        self.tree.create_node(name, name, parent, data)

    def display(self):
        self.tree.show()

    def display_score(self, queue=None, depth=0, table=None, printer=None):
        flag = False
        if queue is None:
            queue = self.get_root()
            flag = True
            table = PrettyTable([
                "QUEUE", "PENDING AVG", "PENDING DIV", "MEMORY USAGE AVG(Q)",
                "MEMORY USAGE AVG(C)", "MEMORY USAGE DIV", "ABS CAPACITY"
            ])
        if table is not None:
            table.add_row([
                queue.tag, 0 if queue.data.get_pending() == 0 else "%.3f" %
                queue.data.get_pending(),
                0 if queue.data.get_pending_div() == 0 else "%.3f" %
                queue.data.get_pending_div(),
                0 if queue.data.get_mem_usage() == 0 else "%.3f" %
                queue.data.get_mem_usage(),
                0 if queue.data.cal_queue_memory_usage() == 0 else "%.3f" %
                queue.data.cal_queue_memory_usage(),
                0 if queue.data.get_mem_usage_div() == 0 else "%.3f" %
                queue.data.get_mem_usage_div(),
                str(0 if queue.data.get_abs_capacity() == 0 else "%.3f" %
                    queue.data.get_abs_capacity()) + " %"
            ])
        if not self.is_leaf(queue.tag):
            children = self.tree.children(queue.tag)
            for child in children:
                self.display_score(child, depth + 1, table)
        if flag:
            if printer is None:
                print('------------' + utils.get_str_time() +
                      ' SCORE ----------')
                print table
            else:
                printer.write('\n------------' + utils.get_str_time() +
                              ' SCORE ----------\n')
                printer.write(str(table))

    def display_prediction(self,
                           queue=None,
                           depth=0,
                           table=None,
                           printer=None):
        flag = False
        if queue is None:
            queue = self.get_root()
            flag = True
            table = PrettyTable([
                "QUEUE", "DESIRED CAPACITY(Q)", "DESIRED CAPACITY(C)",
                "ABS CAPACITY"
            ])
        if table is not None:
            table.add_row([
                queue.tag,
                str(0 if queue.data.wish.capacity == 0 else "%.3f" %
                    (100 * queue.data.wish.capacity)) + " %",
                0 if queue.data.wish.abs_capacity == 0 else "%.3f" %
                queue.data.wish.abs_capacity,
                str(0 if queue.data.config.abs_capacity == 0 else "%.3f" %
                    queue.data.config.abs_capacity) + " %"
            ])
        if not self.is_leaf(queue.tag):
            children = self.tree.children(queue.tag)
            for child in children:
                self.display_prediction(child, depth + 1, table)
        if flag:
            if printer is None:
                print('------------' + utils.get_str_time() +
                      ' PREDICTION ----------')
                print table
            else:
                printer.write('\n------------' + utils.get_str_time() +
                              ' PREDICTION ----------\n')
                printer.write(str(table))

    def write_score(self, path):
        FileOperator.touch(path)
        with open(path, 'a') as f:
            self.display_score(printer=f)

    def request_score(self, queue=None):
        if queue is None:
            queue = self.get_root()
        postData = {
            'queue': queue.tag,
            'pending': queue.data.get_pending(),
            'pending_div': queue.data.get_pending_div(),
            'memory_usage': queue.data.get_mem_usage(),
            'memory_usage_div': queue.data.get_mem_usage_div(),
            'abs_capacity': queue.data.get_abs_capacity()
        }
        requests.post(str(self.conf.es_rest_address) +
                      str(self.conf.es_index) + "score",
                      data=json.dumps(postData))
        if not self.is_leaf(queue.tag):
            children = self.tree.children(queue.tag)
            for child in children:
                self.request_score(child)

    def request_prediction(self, queue=None):
        if queue is None:
            queue = self.get_root()
        postData = {
            'queue': queue.tag,
            'wish_capacity': queue.data.wish.capacity,
            'wish_abs_capacity': queue.data.wish.abs_capacity,
            'abs_capacity': queue.data.config.abs_capacity
        }
        requests.post(str(self.conf.es_rest_address) +
                      str(self.conf.es_index) + "prediction",
                      data=json.dumps(postData))
        if not self.is_leaf(queue.tag):
            children = self.tree.children(queue.tag)
            for child in children:
                self.request_prediction(child)

    def write_prediction(self, path):
        FileOperator.touch(path)
        with open(path, 'a') as f:
            self.display_prediction(printer=f)

    def add_job(self, job, qname):
        queue = self.tree.get_node(qname)
        if queue.is_leaf():
            queue.data.add_job(job)
        else:
            print("Cannot add jobs to parent queue", queue.tag,
                  queue.identifier)

    def add_metric(self, qname):
        queue = self.tree.get_node(qname)
        queue.data.add_metric(queue.cur_metric)
        if len(queue.data.metrics) > RMQueue.MAX_METRIC_COUNT:
            del queue.data.metrics[0]

    def remove_queue(self, qname):
        self.tree.remove_node(qname)

    def move_queue(self, src, dest):
        self.tree.move_node(src, dest)

    def get_queue(self, qname):
        return self.tree.get_node(qname)

    def get_root(self):
        return self.get_queue('root')

    def is_leaf(self, qname):
        queue = self.tree.get_node(qname)
        return queue.is_leaf()

    def cal_slowdown(self, queue=None):
        if queue is None:
            queue = self.get_root()

        avg_slowdown = 0.0
        if queue.is_leaf():
            job_count = len(queue.data.jobs)
            for i in list(range(job_count)):
                job = queue.data.jobs[i]
                slowdown = (job.wait_time + job.run_time) / job.run_time
                avg_slowdown += slowdown / job_count
            queue.data.set_job_count(job_count)
            queue.data.cur_metric.slowdown = avg_slowdown
        else:
            children = self.tree.children(queue.tag)
            for child in children:
                self.cal_slowdown(child)

            job_count = 0
            for child in children:
                job_count += child.data.get_job_count()
            queue.data.set_job_count(job_count)

            if job_count == 0:
                queue.data.cur_metric.slowdown = avg_slowdown
                return avg_slowdown

            for child in children:
                avg_slowdown += child.data.get_job_count(
                ) * child.data.get_slowdown() / job_count
            queue.data.cur_metric.slowdown = avg_slowdown
        return queue.data.get_slowdown()

    def cal_pending(self, queue=None):
        if queue is None:
            queue = self.get_root()

        if queue.is_leaf():
            if len(queue.data.pendings) > 0:
                queue.data.cur_metric.pending = np.mean(queue.data.pendings)
        else:
            children = self.tree.children(queue.tag)
            for child in children:
                self.cal_pending(child)
                queue.data.cur_metric.pending += child.data.get_pending()

        return queue.data.get_pending()

    def cal_pending_division(self, queue=None):
        if queue is None:
            queue = self.get_root()

        division = 0.0
        if self.is_leaf(queue.tag):
            return division
        else:
            children = self.tree.children(queue.tag)
            for child in children:
                self.cal_pending_division(child)

            count = len(children)
            avg_pending = queue.data.get_pending() * 1.0 / count
            square_sum = 0.0
            for child in children:
                square_sum += np.square(child.data.get_pending() - avg_pending)

            division = np.sqrt(square_sum / count)
            queue.data.cur_metric.pending_div = division
            return division

    def cal_slowdown_division(self, queue=None):
        if queue is None:
            queue = self.get_root()

        division = 0.0
        if self.is_leaf(queue.tag):
            return division
        else:
            children = self.tree.children(queue.tag)
            for child in children:
                self.cal_slowdown_division(child)

            square_sum = 0.0
            count = len(children)
            for child in children:
                square_sum += np.square(child.data.get_slowdown() -
                                        queue.data.get_slowdown())

            division = np.sqrt(square_sum / count)
            queue.data.cur_metric.slowdown_div = division
            return division

    def cal_memory_usage(self, queue=None):
        if queue is None:
            queue = self.get_root()

        if queue.is_leaf():
            capacity = queue.data.get_abs_capacity()
            memory_usage = 0.0
            if capacity != 0:
                memory_usage = 100.0 * queue.data.cal_queue_memory_usage(
                ) / capacity
            queue.data.set_mem_usage(memory_usage)
        else:
            children = self.tree.children(queue.tag)
            for child in children:
                self.cal_memory_usage(child)

            abs_memory_usage = 0
            for child in children:
                abs_memory_usage += child.data.get_abs_memory_usage()

            queue.data.set_mem_usage(100.0 * abs_memory_usage /
                                     queue.data.get_abs_capacity())
        return queue.data.get_mem_usage()

    def cal_mem_usage_division(self, queue=None):
        if queue is None:
            queue = self.get_root()

        std_division = 0.0
        if self.is_leaf(queue.tag):
            queue.data.cur_metric.mem_usage_div = std_division
            return std_division
        else:
            children = self.tree.children(queue.tag)
            for child in children:
                self.cal_mem_usage_division(child)

            count = len(children)
            total_mem_usage = 0
            for child in children:
                total_mem_usage += child.data.get_mem_usage()
            avg_mem_usage = total_mem_usage / count

            square_sum = 0
            for child in children:
                square_sum += np.square(child.data.get_mem_usage() -
                                        avg_mem_usage)
            std_division = np.sqrt(square_sum / count)
            queue.data.cur_metric.mem_usage_div = std_division
            return std_division

    def cal_abs_capacity_bottom_up(self, queue=None):
        if queue is None:
            queue = self.get_root()

        if self.is_leaf(queue.tag):
            return
        else:
            children = self.tree.children(queue.tag)
            abs_capacity = 0.0
            for child in children:
                self.cal_abs_capacity_bottom_up(child)
                abs_capacity += child.data.get_abs_capacity()
            queue.data.set_abs_capacity(abs_capacity)

    def cal_desired_abs_capacity_bottom_up(self, queue=None, delim=None):
        if queue is None:
            queue = self.get_root()
            delim = 1
        if self.is_leaf(queue.tag):
            queue.data.wish.capacity = queue.data.wish.abs_capacity / delim
        else:
            children = self.tree.children(queue.tag)
            abs_capacity = 0.0
            for child in children:
                self.cal_desired_abs_capacity_bottom_up(
                    child, queue.data.config.abs_capacity * delim / 100)
                abs_capacity += child.data.wish.abs_capacity

            queue.data.wish.capacity = abs_capacity / delim

    def cal_abs_capacity_top_down(self, queue=None):
        if queue is None:
            queue = self.get_root()
            queue.data.set_abs_capacity(100.0)

        if self.is_leaf(queue.tag):
            return
        else:
            children = self.tree.children(queue.tag)
            for child in children:
                child.data.set_abs_capacity(queue.data.get_abs_capacity() *
                                            child.data.get_capacity() / 100.0)
                self.cal_abs_capacity_top_down(child)

    def cal_desired_capacity_top_down(self, queue=None):
        if queue is None:
            queue = self.get_root()
            queue.data.wish.capacity = 100.0

        if self.is_leaf(queue.tag):
            return
        else:
            children = self.tree.children(queue.tag)
            abs_capacity = queue.data.wish.abs_capacity
            for child in children:
                child.data.wish.capacity = child.data.config.capacity
                if abs_capacity == 0:
                    child.data.wish.capacity = 0
                else:
                    child.data.wish.capacity = child.data.wish.abs_capacity / abs_capacity * 100.0
                self.cal_desired_capacity_top_down(child)

    def cal_capacity_top_down(self, queue=None):
        if queue is None:
            queue = self.get_root()

        if self.is_leaf(queue.tag):
            return
        else:
            children = self.tree.children(queue.tag)
            abs_capacity = queue.data.get_abs_capacity()
            for child in children:
                if abs_capacity == 0:
                    child.data.set_capacity(0)
                else:
                    child.data.set_capacity(child.data.get_abs_capacity() /
                                            abs_capacity * 100)
                self.cal_capacity_top_down(child)

    def cal_abs_memory_top_down(self, queue=None):
        if queue is None:
            queue = self.get_root()
            queue.data.cal_totalMb_mean()

        if self.is_leaf(queue.tag):
            return
        else:
            children = self.tree.children(queue.tag)
            for child in children:
                child.data.set_abs_memory(queue.data.get_abs_memory() *
                                          child.data.get_capacity() / 100)
                self.cal_abs_memory_top_down(child)

    def clear_mus_top_down(self, queue=None):
        if queue is None:
            queue = self.get_root()

        if self.is_leaf(queue.tag):
            queue.data.clear_queue_memory_usage()
        else:
            children = self.tree.children(queue.tag)
            for child in children:
                self.clear_mus_top_down(child)

    def clear_jobs_top_down(self, queue=None):
        if queue is None:
            queue = self.get_root()

        if self.is_leaf(queue.tag):
            queue.data.clear_jobs()
        else:
            children = self.tree.children(queue.tag)
            for child in children:
                self.clear_jobs_top_down(child)

    def clear_pendings_top_down(self, queue=None):
        if queue is None:
            queue = self.get_root()

        if self.is_leaf(queue.tag):
            queue.data.clear_pendings()
        else:
            children = self.tree.children(queue.tag)
            for child in children:
                self.clear_pendings_top_down(child)

    def score(self):
        # self.cal_abs_capacity_bottom_up()
        # self.cal_capacity_top_down()
        # self.cal_abs_memory_top_down()
        # self.cal_slowdown()
        # self.cal_slowdown_division()
        self.cal_pending()
        self.cal_pending_division()
        self.cal_memory_usage()
        self.cal_mem_usage_division()
        # self.clear_jobs_top_down()
        # self.clear_pendings_top_down()
        # self.clear_mus_top_down()

    def predict(self):
        self.cal_desired_abs_capacity_bottom_up()
Example #31
0
class PathList:
    def __init__(self, disk):
        self._tree = Tree()
        self._disk = disk
        self._tree.create_node(tag='root', identifier='root')
        self.depth = 3

    def update_path_list(self, file_id='root', depth=None, is_fid=True):
        if depth is None:
            depth = self.depth
        if not is_fid:
            file_id = self.get_path_fid(file_id, update=False)
        file_list = self._disk.get_file_list(file_id)
        if not file_list:
            return False
        for i in file_list:
            if i['type'] == 'file':
                file_info = FileInfo(
                    name=i['name'],
                    id=i['file_id'],
                    pid=i['parent_file_id'],
                    type=True,
                    ctime=time.strptime(i['created_at'],
                                        '%Y-%m-%dT%H:%M:%S.%fZ'),
                    update_time=time.strptime(i['updated_at'],
                                              '%Y-%m-%dT%H:%M:%S.%fZ'),
                    hidden=i['hidden'],
                    category=i['category'],
                    content_type=i['content_type'],
                    size=i['size'],
                    content_hash_name=i['content_hash_name'],
                    content_hash=i['content_hash'],
                    download_url=i['download_url']
                    if 'download_url' in i else '')
            else:
                file_info = FileInfo(
                    name=i['name'],
                    id=i['file_id'],
                    pid=i['parent_file_id'],
                    type=False,
                    ctime=time.strptime(i['created_at'],
                                        '%Y-%m-%dT%H:%M:%S.%fZ'),
                    update_time=time.strptime(i['updated_at'],
                                              '%Y-%m-%dT%H:%M:%S.%fZ'),
                    hidden=i['hidden'])
            if self._tree.get_node(file_info.id):
                self._tree.update_node(file_id, data=file_info)
            else:
                self._tree.create_node(tag=file_info.name,
                                       identifier=file_info.id,
                                       data=file_info,
                                       parent=file_id)
            if not file_info.type and depth:
                self.update_path_list(file_id=file_info.id, depth=depth - 1)
        return True

    def tree(self, path='root'):
        file_id = self.get_path_fid(path, update=False)
        self.update_path_list(file_id)
        if not file_id:
            raise FileNotFoundError(path)
        self._tree.show(file_id)

    def get_path_list(self, path, update=True):
        file_id = self.get_path_fid(path, update=update)
        return self.get_fid_list(file_id, update=update)

    def get_fid_list(self, file_id, update=True):
        if not file_id:
            raise FileNotFoundError(Path)
        self.auto_update_path_list(update, file_id)
        if file_id != 'root' and self._tree.get_node(file_id).data.type:
            return [self._tree.get_node(file_id).data]
        return [i.data for i in self._tree.children(file_id)]

    def get_path_fid(self, path, file_id='root', update=True):
        path = PurePosixPath(Path(path).as_posix())
        if str(path) in ('', '/', '\\', '.', 'root'):
            return 'root'
        flag = False
        path_list = list(filter(None, str(path).split('/')))
        if path_list[0] == 'root':
            path_list = path_list[1:]
        for i in path_list:
            flag = False
            node_list = self._tree.children(file_id)
            if not node_list:
                self.auto_update_path_list(update, file_id)
                node_list = self._tree.children(file_id)
            for j in node_list:
                if i == j.tag:
                    flag = True
                    file_id = j.identifier
                    break
            if not flag:
                return False
        if flag:
            return file_id
        return False

    def get_path_node(self, path, update=True):
        file_id = self.get_path_fid(path, update=update)
        if file_id:
            return self._tree.get_node(file_id)
        return False

    def get_path_parent_node(self, path, update=True):
        file_id = self.get_path_fid(path, update=update)
        if file_id:
            node = self._tree.parent(file_id)
            if node:
                return node
        return False

    def auto_update_path_list(self, update=True, file_id=None):
        if not update and file_id:
            return self.update_path_list(file_id, depth=0)
        elif update and len(self._tree) == 1:
            return self.update_path_list()
Example #32
0
class SAGG_BRIAC():
    def __init__(self, min, max, temperature=20):  # example --> min: [-1,-1] max: [1,1]

        assert len(min) == len(max)
        self.maxlen = 200
        self.window_cp = 200
        self.minlen = self.maxlen / 20
        self.maxregions = 80

        # init regions' tree
        self.tree = Tree()
        self.regions_bounds = [Box(min, max, dtype=np.float32)]
        self.interest = [0.]
        self.tree.create_node('root','root',data=Region(maxlen=self.maxlen,
                                                        cps_gs=[deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1)],
                                                        bounds=self.regions_bounds[-1], interest=self.interest[-1]))
        self.nb_dims = len(min)
        self.temperature = temperature
        self.nb_split_attempts = 50
        self.max_difference = 0.2
        self.init_size = max - min
        self.ndims = len(min)
        self.mode_3_noise = 0.1

        # book-keeping
        self.sampled_tasks = []
        self.all_boxes = []
        self.all_interests = []
        self.update_nb = 0
        self.split_iterations = []

    def compute_interest(self, sub_region):
        if len(sub_region[0]) > self.minlen:  # TRICK NB 4
            cp_window = min(len(sub_region[0]), self.window_cp)  # not completely window
            half = int(cp_window / 2)
            # print(str(cp_window) + 'and' + str(half))
            first_half = np.array(sub_region[0])[-cp_window:-half]
            snd_half = np.array(sub_region[0])[-half:]
            diff = first_half.mean() - snd_half.mean()
            cp = np.abs(diff)
        else:
            cp = 0
        interest = np.abs(cp)
        return interest

    def split(self, nid):
        # try nb_split_attempts splits
        reg = self.tree.get_node(nid).data
        best_split_score = 0
        best_abs_interest_diff = 0
        best_bounds = None
        best_sub_regions = None
        is_split = False
        for i in range(self.nb_split_attempts):
            sub_reg1 = [deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1)]
            sub_reg2 = [deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1)]

            # repeat until the two sub regions contain at least minlen of the mother region TRICK NB 1
            while len(sub_reg1[0]) < self.minlen or len(sub_reg2[0]) < self.minlen:
                # decide on dimension
                dim = np.random.choice(range(self.nb_dims))
                threshold = reg.bounds.sample()[dim]
                bounds1 = Box(reg.bounds.low, reg.bounds.high, dtype=np.float32)
                bounds1.high[dim] = threshold
                bounds2 = Box(reg.bounds.low, reg.bounds.high, dtype=np.float32)
                bounds2.low[dim] = threshold
                bounds = [bounds1, bounds2]
                valid_bounds = True
                if np.any(bounds1.high - bounds1.low < self.init_size / 15):  # to enforce not too small boxes TRICK NB 2
                    valid_bounds = False
                if np.any(bounds2.high - bounds2.low < self.init_size / 15):
                    valid_bounds = valid_bounds and False

                # perform split in sub regions
                sub_reg1 = [deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1)]
                sub_reg2 = [deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1)]
                for i, task in enumerate(reg.cps_gs[1]):
                    if bounds1.contains(task):
                        sub_reg1[1].append(task)
                        sub_reg1[0].append(reg.cps_gs[0][i])
                    else:
                        sub_reg2[1].append(task)
                        sub_reg2[0].append(reg.cps_gs[0][i])
                sub_regions = [sub_reg1, sub_reg2]

            # compute interest
            interest = [self.compute_interest(sub_reg1), self.compute_interest(sub_reg2)]

            # compute score
            split_score = len(sub_reg1) * len(sub_reg2) * np.abs(interest[0] - interest[1])
            if split_score >= best_split_score and valid_bounds: # TRICK NB 3, max diff #and np.abs(interest[0] - interest[1]) >= self.max_difference / 8
                is_split = True
                best_abs_interest_diff = np.abs(interest[0] - interest[1])
                best_split_score = split_score
                best_sub_regions = sub_regions
                best_bounds = bounds

        if is_split:
            if best_abs_interest_diff > self.max_difference:
                self.max_difference = best_abs_interest_diff
            # add new nodes to tree
            for i, (cps_gs, bounds) in enumerate(zip(best_sub_regions, best_bounds)):
                self.tree.create_node(parent=nid, data=Region(self.maxlen, cps_gs=cps_gs, bounds=bounds, interest=interest[i]))
        else:
            #print("abort mission")
            # TRICK NB 6, remove old stuff if can't find split
            assert len(reg.cps_gs[0]) == (self.maxlen + 1)
            reg.cps_gs[0] = deque(islice(reg.cps_gs[0], int(self.maxlen / 4), self.maxlen + 1))
            reg.cps_gs[1] = deque(islice(reg.cps_gs[1], int(self.maxlen / 4), self.maxlen + 1))

        return is_split

    def merge(self, all_nodes):
        # get a list of children pairs
        parent_children = []
        for n in all_nodes:
            if not n.is_leaf():  # if node is a parent
                children = self.tree.children(n.identifier)
                if children[0].is_leaf() and children[1].is_leaf():  # both children must be leaves for an easy remove
                    parent_children.append([n, children])  # [parent, [child1, child2]]

        # sort each pair of children by their summed interest
        parent_children.sort(key=lambda x: np.abs(x[1][0].data.interest - x[1][1].data.interest), reverse=False)

        # remove useless pair
        child1 = parent_children[0][1][0]
        child2 = parent_children[0][1][1]
        # print("just removed {} and {}, daddy is: {}, childs: {}".format(child1.identifier, child2.identifier,
        #                                                                 parent_children[0][0].identifier,
        #                                                                 self.tree.children(
        #
        # print("bef")  #                                                               parent_children[0][0].identifier)))
        # print([n.identifier for n in self.tree.all_nodes()])
        self.tree.remove_node(child1.identifier)
        self.tree.remove_node(child2.identifier)
        # print("aff remove {} and {}".format(child1.identifier), child2.identifier)
        # print([n.identifier for n in self.tree.all_nodes()])

        # remove 1/4 of parent to avoid falling in a splitting-merging loop
        dadta = parent_children[0][0].data  # hahaha!
        dadta.cps_gs[0] = deque(islice(dadta.cps_gs[0], int(self.maxlen / 4), self.maxlen + 1))
        dadta.cps_gs[1] = deque(islice(dadta.cps_gs[1], int(self.maxlen / 4), self.maxlen + 1))
        self.nodes_to_recompute.append(parent_children[0][0].identifier)

        # remove child from recompute list if they where touched when adding the current task
        if child1.identifier in self.nodes_to_recompute:
            self.nodes_to_recompute.pop(self.nodes_to_recompute.index(child1.identifier))
        if child2.identifier in self.nodes_to_recompute:
            self.nodes_to_recompute.pop(self.nodes_to_recompute.index(child2.identifier))




    def add_task_comp(self, node, task, comp):
        reg = node.data
        nid = node.identifier
        if reg.bounds.contains(task): # task falls within region
            self.nodes_to_recompute.append(nid)
            children = self.tree.children(nid)
            for n in children: # if task in region, task is in one sub-region
                self.add_task_comp(n, task, comp)

            need_split = reg.add(task, comp, children == []) # COPY ALL MODE
            if need_split:
                self.nodes_to_split.append(nid)


    def update(self, task, continuous_competence, all_raw_rewards):
        # add new (task, competence) to regions nodes
        self.nodes_to_split = []
        self.nodes_to_recompute = []
        new_split = False
        root = self.tree.get_node('root')
        self.add_task_comp(root, task, continuous_competence)
        #print(self.nodes_to_split)
        assert len(self.nodes_to_split) <= 1

        # split a node if needed
        need_split = len(self.nodes_to_split) == 1
        if need_split:
            new_split = self.split(self.nodes_to_split[0])
            if new_split:
                self.update_nb += 1
                #print(self.update_nb)
                # update list of regions_bounds
                all_nodes = self.tree.all_nodes()
                if len(all_nodes) > self.maxregions:  # too many regions, lets merge one of them
                    self.merge(all_nodes)
                    all_nodes = self.tree.all_nodes()
                self.regions_bounds = [n.data.bounds for n in all_nodes]

        # recompute interests of touched nodes
        #print(self.nodes_to_recompute)
        for nid in self.nodes_to_recompute:
            #print(nid)
            node = self.tree.get_node(nid)
            reg = node.data
            reg.interest = self.compute_interest(reg.cps_gs)

        # collect new interests and new [comp, tasks] lists
        all_nodes = self.tree.all_nodes()
        self.interest = []
        self.cps_gs = []
        for n in all_nodes:
            self.interest.append(n.data.interest)
            self.cps_gs.append(n.data.cps_gs)

        # bk-keeping
        self.all_boxes.append(copy.copy(self.regions_bounds))
        self.all_interests.append(copy.copy(self.interest))
        self.split_iterations.append(self.update_nb)
        assert len(self.interest) == len(self.regions_bounds)

        return new_split, None

    def draw_random_task(self):
        return self.regions_bounds[0].sample()  # first region is root region

    def sample_task(self, args):
        mode = np.random.rand()
        if mode < 0.1:  # "mode 3" (10%) -> sample on regions and then mutate lowest-performing task in region
            if len(self.sampled_tasks) == 0:
                self.sampled_tasks.append(self.draw_random_task())
            else:
                region_id = proportional_choice(self.interest, eps=0.0)
                worst_task_idx = np.argmin(self.cps_gs[region_id][0])
                # mutate task by a small amount (i.e a gaussian scaled to the regions range)
                task = np.random.normal(self.cps_gs[region_id][1][worst_task_idx].copy(), 0.1)
                # clip to stay within region (add small epsilon to avoid falling in multiple regions)
                task = np.clip(task, self.regions_bounds[region_id].low + 1e-5, self.regions_bounds[region_id].high - 1e-5)
                self.sampled_tasks.append(task)

        elif mode < 0.3:  # "mode 2" (20%) -> random task
            self.sampled_tasks.append(self.draw_random_task())

        else:  # "mode 1" (70%) -> sampling on regions and then random task in selected region
            region_id = proportional_choice(self.interest, eps=0.0)
            self.sampled_tasks.append(self.regions_bounds[region_id].sample())


        # # sample region
        # if np.random.rand() < 0.2:
        #     region_id = np.random.choice(range(self.nb_regions))
        # else:
        #     region_id = np.random.choice(range(self.nb_regions), p=np.array(self.probas))

        # # sample task
        # self.sampled_tasks.append(self.regions_bounds[region_id].sample())
        #
        # return self.sampled_tasks[-1].tolist()
        # sample region
        # region_id = proportional_choice(self.interest, eps=0.2)
        # # sample task
        # self.sampled_tasks.append(self.regions_bounds[region_id].sample())

        return self.sampled_tasks[-1]

    def dump(self, dump_dict):
        dump_dict['all_boxes'] = self.all_boxes
        dump_dict['split_iterations'] = self.split_iterations
        dump_dict['all_interests'] = self.all_interests
        return dump_dict

    @property
    def nb_regions(self):
        return len(self.regions_bounds)

    @property
    def get_regions(self):
        return self.regions_bounds
class RMQueue(metaclass=Singleton):
    MAX_METRIC_COUNT = 12
    CAL_INTERVAL_IN_SECOND = 2 * 60 * 60  # 2hours

    def __init__(self):
        self.tree = Tree()

    def set_stat_interval(self, interval):
        RMQueue.CAL_INTERVAL_IN_SECOND = interval

    def set_system_memory(self, size):
        root = self.get_root()
        root.data.set_abs_memory(float(size))

    def create_queue(self, name=None, parent=None):
        data = QueueData()
        self.tree.create_node(name, name, parent, data)

    def display(self):
        self.tree.show()

    def display_score_old(self, queue=None, depth=0):
        if queue is None:
            queue = self.get_root()
            print('------------' + utils.get_str_time() + ' SCORE ----------')
            print(24 * ' ' + '     SLOWDOWN                MEMORY USAGE ')
            print('QUEUE NAME' + 16 * ' ' +
                  ' AVG        DIV            AVG        DIV')

        if depth >= 0:
            print(queue.tag + (22 - len(queue.tag))*' ' + \
                               '%8.3f' % queue.data.get_slowdown(),  \
                               '  %8.3f    ' % queue.data.get_slowdown_div(), \
                               '  %8.3f' % queue.data.get_mem_usage(), \
                               '  %8.3f' % queue.data.get_mem_usage_div())
            """                                              
             print(queue.tag, '(slowdown: %.3f' % queue.data.get_slowdown(), \
                              'div: %.3f)' % queue.data.get_slowdown_div(), \
                              '(mem usage: %.3f' % queue.data.get_mem_usage(), \
                              'div: %.3f)' % queue.data.get_mem_usage_div())
             """
        else:
            print('-'*depth + queue.tag, '(slowdown: %.3f' % queue.data.get_slowdown(), \
                             'div: %.3f)' % queue.data.get_slowdown_div(), \
                             '(mem usage: %.3f' % queue.data.get_mem_usage(), \
                             'div: %.3f)' % queue.data.get_mem_usage_div())

        if self.is_leaf(queue.tag) == False:
            children = self.tree.children(queue.tag)
            for child in children:
                self.display_score(child, depth + 2)

    def display_score(self, queue=None, depth=0):
        if queue is None:
            queue = self.get_root()
            print('------------' + utils.get_str_time() + ' SCORE ----------')
            print(24 * ' ' + '     PENDING                 MEMORY USAGE ')
            print('QUEUE NAME' + 16 * ' ' +
                  ' AVG        DIV            AVG        DIV')

        if depth >= 0:
            print(queue.tag + (22 - len(queue.tag))*' ' + \
                               '%8.3f' % queue.data.get_pending(),  \
                               '  %8.3f    ' % queue.data.get_pending_div(), \
                               '  %8.3f' % queue.data.get_mem_usage(), \
                               '  %8.3f' % queue.data.get_mem_usage_div())
        else:
            print('-'*depth + queue.tag, '(slowdown: %.3f' % queue.data.get_slowdown(), \
                             'div: %.3f)' % queue.data.get_slowdown_div(), \
                             '(mem usage: %.3f' % queue.data.get_mem_usage(), \
                             'div: %.3f)' % queue.data.get_mem_usage_div())

        if self.is_leaf(queue.tag) == False:
            children = self.tree.children(queue.tag)
            for child in children:
                self.display_score(child, depth + 2)

    def display_prediction(self, queue=None, depth=0):
        if queue is None:
            queue = self.get_root()
            print('------------' + utils.get_str_time() +
                  ' PREDICTION ----------')
            print('QUEUE NAME            DESIRED CAPACITY')

        if depth >= 0:
            print(queue.tag + (22 - len(queue.tag)) * ' ',
                  ' %8.3f' % queue.data.wish.capacity)
            # print(queue.tag, 'desired capacity: %.3f' % queue.data.wish.capacity)
        else:
            print('-' * depth + queue.tag,
                  'desired capacity: %.3f' % queue.data.wish.capacity)

        if self.is_leaf(queue.tag) == False:
            children = self.tree.children(queue.tag)
            for child in children:
                self.display_prediction(child, depth + 2)

    def write_score(self, path):
        with open(path, 'a') as f:
            self.write_score_top_down(output=f)

    def write_score_top_down_old(self, queue=None, depth=0, output=None):
        if queue is None:
            queue = self.get_root()
            output.writelines(
                ('\n---------', utils.get_str_time(), '  SCORE ---------\n'))
            output.writelines(24 * ' ' +
                              '     SLOWDOWN                MEMORY USAGE\n')
            output.writelines('QUEUE NAME' + 16 * ' ' +
                              ' AVG        DIV            AVG        DIV\n')

        if depth >= 0:
            output.writelines(queue.tag + (22 - len(queue.tag))*' ' + \
                                '%8.3f' % queue.data.get_slowdown() +   \
                                '  %8.3f    ' % queue.data.get_slowdown_div() + \
                                '  %8.3f' % queue.data.get_mem_usage() + \
                                '  %8.3f' % queue.data.get_mem_usage_div() + '\n')
            """
            output.writelines( (queue.tag, ' (slowdown: %.3f' % queue.data.get_slowdown(), \
                              ' div: %.3f)' % queue.data.get_slowdown_div(), \
                              ' (mem usage: %.3f' % queue.data.get_mem_usage(), \
                              ' div: %.3f)' % queue.data.get_mem_usage_div(), '\n'))
            """
        else:
            output.writelines(('-'*depth + queue.tag, ' (slowdown: %.3f' % queue.data.get_slowdown(), \
                              ' div: %.3f)' % queue.data.get_slowdown_div(), \
                              ' (mem usage: %.3f' % queue.data.get_mem_usage(), \
                              ' div: %.3f)' % queue.data.get_mem_usage_div(), '\n'))

        if self.is_leaf(queue.tag) == False:
            children = self.tree.children(queue.tag)
            for child in children:
                self.write_score_top_down(child, depth + 2, output)

    def write_score_top_down(self, queue=None, depth=0, output=None):
        if queue is None:
            queue = self.get_root()
            output.writelines(
                ('\n---------', utils.get_str_time(), '  SCORE ---------\n'))
            output.writelines(24 * ' ' +
                              '     PENDING                 MEMORY USAGE\n')
            output.writelines('QUEUE NAME' + 16 * ' ' +
                              ' AVG        DIV            AVG        DIV\n')

        output.writelines(queue.tag + (22 - len(queue.tag))*' ' + \
                            '%8.3f' % queue.data.get_pending() +   \
                            '  %8.3f    ' % queue.data.get_pending_div() + \
                            '  %8.3f' % queue.data.get_mem_usage() + \
                            '  %8.3f' % queue.data.get_mem_usage_div() + '\n')

        if self.is_leaf(queue.tag) == False:
            children = self.tree.children(queue.tag)
            for child in children:
                self.write_score_top_down(child, depth + 2, output)

    def write_prediction(self, path):
        with open(path, 'a') as f:
            self.write_prediction_top_down(output=f)

    def write_prediction_top_down(self, queue=None, depth=0, output=None):
        if queue is None:
            queue = self.get_root()
            output.writelines(('\n---------', utils.get_str_time(),
                               '  PREDICTION---------\n'))
            output.writelines('QUEUE NAME            DESIRED CAPACITY\n')

        if depth >= 0:
            output.writelines(queue.tag + (22 - len(queue.tag)) * ' ' +
                              ' %8.3f' % queue.data.wish.capacity + '\n')
            # output.writelines( (queue.tag, ' desired capacity: %.3f' % queue.data.wish.capacity, '\n'))
        else:
            output.writelines(('-'*depth + queue.tag, \
                                ' desired capacity: %.3f' % queue.data.wish.capacity, '\n'))

        if self.is_leaf(queue.tag) == False:
            children = self.tree.children(queue.tag)
            for child in children:
                self.write_prediction_top_down(child, depth + 2, output)

    def add_job(self, job, qname):
        queue = self.tree.get_node(qname)
        if queue.is_leaf():
            queue.data.add_job(job)
        else:
            print("Canot add jobs to parent queue", queue.tag,
                  queue.identifier)

    def add_metric(self, qname):
        queue = self.tree.get_node(qname)
        queue.data.add_metric(queue.cur_metric)
        if len(queue.data.metrics) > RMQueue.MAX_METRIC_COUNT:
            del queue.data.metrics[0]

    def remove_queue(self, qname):
        """
        Remove a queue indicated by 'qname'; all the successors are
        removed as well.
        Return the number of removed nodes.
        """
        self.tree.remove_node(qname)

    def move_queue(self, src, dest):
        """
        Move a queue indicated by @src parameter to be a child of
        @dest.
        """
        self.tree.move_node(src, dest)

    def get_queue(self, qname):
        return self.tree.get_node(qname)

    def get_root(self):
        return self.get_queue('root')

    def is_leaf(self, qname):
        queue = self.tree.get_node(qname)
        return queue.is_leaf()

    def cal_slowdown(self, queue=None):
        """
        if current queue is a leaf queue:
            calculate the average slowdown in is jobs.
        else:
            calculate the average slowdown of its chilren;
            calculate the average slowdown of current queue through its chilren's average slowdown.
        """
        if queue is None:
            queue = self.get_root()

        avg_slowdown = 0.0
        if queue.is_leaf():
            job_count = len(queue.data.jobs)
            for i in list(range(job_count)):
                job = queue.data.jobs[i]
                slowdown = (job.wait_time + job.run_time) / job.run_time
                avg_slowdown += slowdown / job_count
            queue.data.set_job_count(job_count)
            queue.data.cur_metric.slowdown = avg_slowdown
        else:  # parent queue
            # First, get its all chilren queue, and call each child's cal_slowdown function
            children = self.tree.children(queue.tag)
            for child in children:
                self.cal_slowdown(child)

            # Second, get the job count
            job_count = 0
            for child in children:
                job_count += child.data.get_job_count()
            queue.data.set_job_count(job_count)

            # Finally, calculate the average slowdown of the queue
            if job_count == 0:
                queue.data.cur_metric.slowdown = avg_slowdown
                return avg_slowdown

            for child in children:
                avg_slowdown += child.data.get_job_count(
                ) * child.data.get_slowdown() / job_count
            queue.data.cur_metric.slowdown = avg_slowdown
        return queue.data.get_slowdown()

    def cal_pending(self, queue=None):
        """
        if current queue is a leaf queue:
            calculate the average pending count in is pendings.
        else:
            calculate the average pending of its chilren;
            calculate the pending of current queue through the sum all of its chilren's pending.
        """
        if queue is None:
            queue = self.get_root()

        if queue.is_leaf():
            queue.data.cal_leaf_pending()
        else:  # parent queue
            # First, get its all chilren queue, and call each child's cal_pending function
            # Second, get the sum of all its children pending
            children = self.tree.children(queue.tag)
            for child in children:
                self.cal_pending(child)
                queue.data.cur_metric.pending += child.data.get_pending()

        return queue.data.get_pending()

    def cal_pending_division(self, queue=None):
        """
        if current queue is a leaf queue:
            stdDivision is zero.
        else:
            calculate the standard division of its chilren;
            calculate the standard division of current queue through its chilren's average pending.
        """
        if queue is None:
            queue = self.get_root()

        division = 0.0
        if self.is_leaf(queue.tag):
            return division
        else:  # parent queue
            children = self.tree.children(queue.tag)
            # First, get its all chilren queue, and call each child's calSlowDown function
            for child in children:
                self.cal_pending_division(child)

            # Second, calculate the square sum of division
            count = len(children)
            avg_pending = queue.data.get_pending() * 1.0 / count
            squareSum = 0.0
            for child in children:
                squareSum += np.square(child.data.get_pending() - avg_pending)

            # Finally, calculate the standard division of the queue
            # if count == 0:
            #    queue.data.cur_metric.slowdown_div = division
            #    return division
            division = np.sqrt(squareSum / count)
            queue.data.cur_metric.pending_div = division
            return division

    def cal_slowdown_division(self, queue=None):
        """
        if current queue is a leaf queue:
            stdDivision is zero.
        else:
            calculate the standard division of its chilren;
            calculate the standard division of current queue through its chilren's average slowdown.
        """
        if queue is None:
            queue = self.get_root()

        division = 0.0
        if self.is_leaf(queue.tag):
            return division
        else:  # parent queue
            children = self.tree.children(queue.tag)
            # First, get its all chilren queue, and call each child's calSlowDown function
            for child in children:
                self.cal_slowdown_division(child)

            # Second, calculate the square sum of division
            squareSum = 0.0
            count = len(children)
            for child in children:
                squareSum += np.square(child.data.get_slowdown() -
                                       queue.data.get_slowdown())

            # Finally, calculate the standard division of the queue
            # if count == 0:
            #    queue.data.cur_metric.slowdown_div = division
            #    return division
            division = np.sqrt(squareSum / count)
            queue.data.cur_metric.slowdown_div = division
            return division

    def cal_memory_usage_old(self, queue=None):
        """
        if current queue is a leaf queue:
            MemoryUsage is the (sum of job memorySeconds )/(self.absMemory * CAL_INTERVAL_IN_SECOND)
            Get absUsedMemory by self.memoryUsage * self.absMemory
        else:
            calculate the memory usage of its chilren;
            calculate the absolute used memory of the queue.
            MemoryUsage = absUsedMemory / absMemory
        """
        if queue is None:
            queue = self.get_root()

        memory_usage = 0.0
        if queue.is_leaf():
            total_memory_seconds = queue.data.cal_leaf_mem_second()
            total_memory_capacity = queue.data.get_abs_memory(
            ) * RMQueue.CAL_INTERVAL_IN_SECOND
            memory_usage = 1.0 * total_memory_seconds / total_memory_capacity
            queue.data.set_mem_usage(memory_usage)
            queue.data.cal_abs_used_memory()
        else:  # parent queue
            # First, get its all chilren queue, and call each child's calMemoryUsage function
            children = self.tree.children(queue.tag)
            for child in children:
                self.cal_memory_usage_old(child)

            # Second, calculate the absUsedMemory of current queue
            abs_used_memory = 0
            for child in children:
                abs_used_memory += child.data.get_abs_used_memory()
            queue.data.set_abs_used_memory(abs_used_memory)

            # Finally, calculate the memory usage of the queue
            queue.data.set_mem_usage(1.0 * queue.data.get_abs_used_memory() /
                                     queue.data.get_abs_memory())
        return queue.data.get_mem_usage()

    def cal_memory_usage(self, queue=None):
        """
        if current queue is a leaf queue:
            MemoryUsage is the abs_memoryusage/self.absMemoryCapacity
        else:
            calculate the memory usage of its chilren;
            calculate the absolute memory of the queue.
            MemoryUsage = absUsedMemory / absMemory
        """
        if queue is None:
            queue = self.get_root()

        memory_usage = 0.0
        if queue.is_leaf():
            abs_memory_usage = queue.data.cal_queue_memory_usage()
            abs_memory_capacity = queue.data.get_abs_capacity()
            memory_usage = 100.0 * abs_memory_usage / abs_memory_capacity
            queue.data.set_mem_usage(memory_usage)
            queue.data.set_abs_memory_usage(abs_memory_usage)
        else:  # parent queue
            # First, get its all chilren queue, and call each child's calMemoryUsage function
            children = self.tree.children(queue.tag)
            for child in children:
                self.cal_memory_usage(child)

            # Second, calculate the absUsedMemoryUsage of current queue
            abs_memory_usage = 0
            for child in children:
                abs_memory_usage += child.data.get_abs_memory_usage()
            queue.data.set_abs_memory_usage(abs_memory_usage)

            # Finally, calculate the memory usage of the queue
            queue.data.set_mem_usage(100.0 *
                                     queue.data.get_abs_memory_usage() /
                                     queue.data.get_abs_capacity())
        return queue.data.get_mem_usage()

    def cal_mem_usage_division(self, queue=None):
        """
        if current queue is a leaf queue:
            memUsageDivision is zero.
        else:
            calculate the standard division of its chilren;
            calculate the standard division of current queue through its chilren's average memoryUsage 
        """
        if queue is None:
            queue = self.get_root()

        std_division = 0.0
        if self.is_leaf(queue.tag):
            queue.data.cur_metric.mem_usage_div = std_division
            return std_division
        else:  # parent queue
            # First, get its all chilren queue, and call each child's calSlowDown function
            children = self.tree.children(queue.tag)
            for child in children:
                self.cal_mem_usage_division(child)

            # Second, calculate the average memory usage of all its children
            count = len(children)
            total_mem_usage = 0
            for child in children:
                total_mem_usage += child.data.get_mem_usage()
                # print(child.data.get_mem_usage())
            avg_mem_usage = total_mem_usage / count

            # Finally, calculate the standard division of the queue
            squareSum = 0
            for child in children:
                squareSum += np.square(child.data.get_mem_usage() -
                                       avg_mem_usage)
            std_division = np.sqrt(squareSum / count)
            queue.data.cur_metric.mem_usage_div = std_division
            return std_division

    def cal_abs_capacity_bottom_up(self, queue=None):
        if queue is None:
            queue = self.get_root()

        if self.is_leaf(queue.tag):
            return
        else:
            children = self.tree.children(queue.tag)
            abs_capacity = 0
            for child in children:
                # print("Queue name: %s, abs_capacity: %.2f" %(child.tag, child.data.get_abs_capacity()))
                self.cal_abs_capacity_bottom_up(child)
                abs_capacity += child.data.get_abs_capacity()
            queue.data.set_abs_capacity(abs_capacity)

    def cal_desired_abs_capacity_bottom_up(self, queue=None):
        if queue is None:
            queue = self.get_root()

        if self.is_leaf(queue.tag):
            return
        else:
            children = self.tree.children(queue.tag)
            abs_capacity = 0.0
            fixed_capacity = 0.0
            for child in children:
                self.cal_desired_abs_capacity_bottom_up(child)
                if child.data.config.fixed:
                    # print("FIXED")
                    # print(child.data.config.capacity)
                    # print(child.data.config.abs_capacity)
                    fixed_capacity += child.data.config.capacity
                else:
                    abs_capacity += child.data.wish.abs_capacity

            for child in children:
                if child.data.config.fixed:
                    child.data.wish.abs_capacity = abs_capacity / (
                        100.0 - fixed_capacity) * child.data.config.capacity
            queue.data.wish.abs_capacity = abs_capacity * 100.0 / (
                100.0 - fixed_capacity)

    def clear_desired_abs_capacity(self, queue=None):
        if queue is None:
            queue = self.get_root()

        queue.data.wish.abs_capacity = 0
        if self.is_leaf(queue.tag):
            return
        else:
            queue.data.cur_metric.pending = 0.0
            children = self.tree.children(queue.tag)
            for child in children:
                self.clear_desired_abs_capacity(child)

    def cal_abs_capacity_top_down(self, queue=None):
        """
        This function calculate the abs capacity of each queue by its capacity.
        This function should only be called once at the start time.
        """
        if queue is None:
            queue = self.get_root()
            queue.data.set_abs_capacity(100.0)

        if self.is_leaf(queue.tag):
            return
        else:
            children = self.tree.children(queue.tag)
            for child in children:
                child.data.set_abs_capacity(queue.data.get_abs_capacity() *
                                            child.data.get_capacity() / 100.0)
                # print(child.data.get_abs_capacity())
                self.cal_capacity_top_down(child)

    def cal_desired_capacity_top_down(self, queue=None):
        if queue is None:
            queue = self.get_root()
            queue.data.wish.capacity = 100.0

        if self.is_leaf(queue.tag):
            return
        else:
            children = self.tree.children(queue.tag)
            abs_capacity = queue.data.wish.abs_capacity
            remain_capaciy = 100.0
            for child in children:
                if child.data.config.fixed:
                    child.data.wish.capacity = child.data.config.capacity
                elif abs_capacity == 0:
                    child.data.wish.capacity = 0
                else:
                    child.data.wish.capacity = child.data.wish.abs_capacity / abs_capacity * 100.0
                self.cal_desired_capacity_top_down(child)

    def cal_capacity_top_down(self, queue=None):
        if queue is None:
            queue = self.get_root()

        if self.is_leaf(queue.tag):
            return
        else:
            children = self.tree.children(queue.tag)
            abs_capacity = queue.data.get_abs_capacity()
            for child in children:
                if abs_capacity == 0:
                    child.data.set_capacity(0)
                else:
                    child.data.set_capacity(child.data.get_abs_capacity() /
                                            abs_capacity * 100)
                self.cal_capacity_top_down(child)

    def cal_abs_memory_top_down(self, queue=None):
        if queue is None:
            queue = self.get_root()
            queue.data.cal_totalMb_mean()
            queue.data.clear_totalMb()

        if self.is_leaf(queue.tag):
            return
        else:
            children = self.tree.children(queue.tag)
            for child in children:
                child.data.set_abs_memory(queue.data.get_abs_memory() *
                                          child.data.get_capacity() / 100)
                self.cal_abs_memory_top_down(child)

    def clear_mus_top_down(self, queue=None):
        if queue is None:
            queue = self.get_root()

        if self.is_leaf(queue.tag):
            queue.data.clear_queue_memory_usage()
        else:
            children = self.tree.children(queue.tag)
            for child in children:
                self.clear_mus_top_down(child)

    def clear_jobs_top_down(self, queue=None):
        if queue is None:
            queue = self.get_root()

        if self.is_leaf(queue.tag):
            queue.data.clear_jobs()
        else:
            children = self.tree.children(queue.tag)
            for child in children:
                self.clear_jobs_top_down(child)

    def clear_pendings_top_down(self, queue=None):
        if queue is None:
            queue = self.get_root()

        if self.is_leaf(queue.tag):
            queue.data.clear_pendings()
        else:
            children = self.tree.children(queue.tag)
            for child in children:
                self.clear_pendings_top_down(child)

    def before_scoring(self):
        self.cal_abs_capacity_bottom_up()
        self.cal_capacity_top_down()
        self.cal_abs_memory_top_down()

    def after_scoreing(self):
        self.clear_jobs_top_down()
        self.clear_pendings_top_down()
        self.clear_mus_top_down()

    def score(self):
        self.before_scoring()
        # self.cal_slowdown()
        # self.cal_slowdown_division()
        self.cal_pending()
        self.cal_pending_division()
        self.cal_memory_usage()
        self.cal_mem_usage_division()
        self.after_scoreing()

    def before_predict(self):
        self.cal_desired_abs_capacity_bottom_up()

    def after_predict(self):
        self.clear_desired_abs_capacity()

    def predict(self):
        self.before_predict()
        self.cal_desired_capacity_top_down()
        self.after_predict()
Example #34
0
def main():
    """ Solution to Advent of Code Day 7 """
    filename = "puzzle_test.txt" if len(sys.argv) < 2 else sys.argv[1]
    rules = open(filename).read().splitlines()

    # parse input file into a dictionary
    # (bag count, bag name)

    rules = [
        re.sub(r"bags|bag|\.| |contain no other ", "", rule) for rule in rules
    ]

    luggage_dict = dict()

    for rule in rules:
        relations = rule.split('contain')
        if len(relations) > 1:
            members = relations[1].replace(',', ' ')
            data = re.findall(r"(\d+)(\w+)", members)
            luggage_dict[relations[0]] = data

    # generate a dictionary of trees mapping each luggage relationship
    # (bag count, bag name)

    tree_dict = dict()

    for data in luggage_dict:
        tree_dict[data] = Tree()
        root_node = Node(tag=('1', data), data=data)
        parent_id = root_node.identifier
        tree_dict[data].add_node(root_node)
        parent_id = root_node.identifier
        for bag in luggage_dict[data]:
            tree_dict[data].create_node(tag=bag, data=data, parent=parent_id)

    # count every tree that contains a shiny gold bag
    print(
        "part #1:",
        sum([(search_luggage(luggage_dict, key, "shinygold"))
             for key in luggage_dict]) - 1)

    # build tree of luggage containing shiny gold

    items = ['shinygold']
    tree = Tree()
    node = Node('shinygold', data=int(1))
    items = [node]
    tree.add_node(node, parent=None)

    while items:
        parent_node = items.pop()
        parent_id = parent_node.identifier
        if parent_node.tag not in luggage_dict:
            continue
        for data in luggage_dict[parent_node.tag]:
            node = Node(tag=data[1], data=int(data[0]))
            tree.add_node(node, parent=parent_id)
            items.append(node)

    # preorder traversal to distribute the luggage multiplier
    stack = [tree.get_node(tree.root)]
    output = []

    while stack:
        node = stack.pop()
        for child_node in tree.children(node.identifier):
            child_node.data *= node.data
        output.insert(0, node.data)

        if tree.children(node.identifier):
            stack += reversed(tree.children(node.identifier))

    # produce bag total
    print("part #2:", sum(output) - 1)