def conversation_regarding_language(cursor):
    conversation_amount = postgres_queries.find_conversation_number(cursor)
    conversation_list = list()
    depth_dict = dict()
    depth_dict_long = dict()
    depth_dict_short = dict()
    number_of_tweets_dict = dict()
    test_i = 0
    for i in range(0, conversation_amount + 1, 1):
        conversation_tree = Tree()
        conversation = postgres_queries.find_conversation(i, cursor)
        test_i += len(conversation)
        for tweet in conversation:
            if tweet[2] is None and tweet[5] is True:
                conversation_tree.create_node(tweet[0], tweet[0])
                tweets_in_conversation = list()
                build_conversation_lang(tweet[0], conversation, conversation_tree, tweets_in_conversation)
                depth = conversation_tree.depth() + 1
                number_of_tweets = len(conversation_tree.all_nodes())
                #short/long
                if number_of_tweets >=20:
                    if depth in depth_dict_long:
                        depth_dict_long[depth] += 1
                    else:
                        depth_dict_long[depth] = 1
                else:
                    if depth in depth_dict_short:
                        depth_dict_short[depth] += 1
                    else:
                        depth_dict_short[depth] = 1

                if number_of_tweets in number_of_tweets_dict:
                    number_of_tweets_dict[number_of_tweets] += 1
                else:
                     number_of_tweets_dict[number_of_tweets] = 1
                if depth in depth_dict:
                    depth_dict[depth] += 1
                else:
                    depth_dict[depth] = 1
        # check if conversation_tree is null- dont add
        if len(conversation_tree.all_nodes())!=0:
            conversation_list.append(conversation_tree)
    # number = 0
    new_tweet_list_id = list()
    for con in conversation_list:
        nodes = con.all_nodes()
        for node in nodes:
            new_tweet_list_id.append(int(node.tag))
        # number += len(con.all_nodes())
    # print len(new_tweet_list_id)
    # for tweet_id in new_tweet_list_id:
    #     print tweet_id
    return new_tweet_list_id, conversation_list
class Chain:
    def __init__(self):
        self.root = Tree()
        self.root.create_node(0, 0)  # Genesis block

    # Aggiunge un blocco ad una catena
    def add_block(self, block):
        node = self.root.create_node(block.epoch, hash(block), block.hash)
        node.data = block

    # Stampa graficamente la catena in forma di albero
    def print_chain(self):
        self.root.show()

    # restituisce i blocchi di tutta la blockchain, sottoforma di lista
    def return_nodes_data(self):
        nodes = self.root.all_nodes()
        nodes_data = []
        for i in range(1, len(nodes)):
            nodes_data.append(nodes[i].data)
        if len(nodes_data) == 0:
            nodes_data.append(0)
        return nodes_data

    # Resituisce la lista degli identificativi (numeri) delle foglie dell'albero (catena)
    def leaves(self):
        leaves = self.root.leaves(nid=None)
        leaves_identifiers = []
        for i in range(len(leaves)):
            leaves_identifiers.append(leaves[i].identifier)
        return leaves_identifiers

    # ritorna tutte le epoche associate ai nodi della blockchain
    '''
    def return_nodes(self):
        nodes = self.root.all_nodes()
        nodes_identifiers = []
        for i in range(len(nodes)):
            nodes_identifiers.append(nodes[i].tag)
        return nodes_identifiers
    '''

    # restituisce il blocco (oggetto) con l'epoca maggiore nella Chain
    def block_max_epoch(self):
        nodes = self.root.all_nodes()
        max_epoch = 0
        block_max = 0
        for i in range(len(nodes)):
            if nodes[i].tag > max_epoch:
                max_epoch = nodes[i].tag
                block_max = nodes[i].data
        return block_max
Beispiel #3
0
def expand_night_nodes(tree: treelib.Tree):
    """
    Expands night nodes by creating all the appropriate children nodes.
    """
    for node in tree.all_nodes():
        node_id = node.identifier

        path, gs, expanded = tree[node_id].data[0:3]
        if expanded or gs.time == 0:
            continue

        if winner(gs) != 0:
            tree[node_id].data = (path, gs, True)
            continue

        outcomes = night_outcomes(gs)

        for key, val in outcomes.items():
            s = "".join([to_string(x) for x in key[0:3]
                         ]) + (str(key[-1]) if len(key) > 3 else "")
            tree.create_node(f"{s} {str(val)}",
                             node_id + "_" + s,
                             node_id,
                             data=(key, val, False, 0.0))

        tree[node_id].data = (path, gs, True)
    return
Beispiel #4
0
def endpoint_cal(swc_p, unit , sep = ","):
    """
        generate a multiBranch Tree from the swc file

    """
    print(unit, sep)
    coords, labels, ids, pars = coords_get(swc_p, unit, sep)
    #coords += 1
    if len(coords) == 0:
        print("{} is something wrong".format(swc_p))
        sys.exit(0)

    ftree = Tree()
    ftree.create_node(ids[0], ids[0], data = coords[0])

    for coord_, id_, par_ in zip(coords[1:], ids[1:], pars[1:]):
        #print(id_, par_)
        ftree.create_node(id_, id_, data = coord_, parent = par_)

    endpoint_coords = [x.data for x in ftree.leaves()]
    endpoint_coords.append(coords[0])

    branch_coords = [x.data for x in ftree.all_nodes() if len(ftree.children(x.tag)) >= 2]
    
    endpoint_coords = np.array(endpoint_coords)
    branch_coords = np.array(branch_coords)
    coords = np.array(coords)
    return endpoint_coords, branch_coords, coords, ftree
Beispiel #5
0
 def __init__(self, mol, atom0, atom1, depth=5):
     self.mol = mol
     tree = Tree()
     bond_order = mol.GetBondBetweenAtoms(
         atom0.GetIdx(), atom1.GetIdx()).GetBondTypeAsDouble()
     tree.create_node(tag=[atom0.GetAtomicNum(), bond_order],
                      identifier=atom0.GetIdx(),
                      data=atom0)
     tree.create_node(tag=[atom1.GetAtomicNum(), bond_order],
                      identifier=atom1.GetIdx(),
                      data=atom1,
                      parent=atom0.GetIdx())
     for _ in range(depth):
         for node in tree.all_nodes():
             if node.is_leaf():
                 for atom in node.data.GetNeighbors():
                     tree_id = tree._identifier
                     if atom.GetIdx() != node.predecessor(tree_id=tree_id):
                         order = mol.GetBondBetweenAtoms(
                             atom.GetIdx(),
                             node.data.GetIdx()).GetBondTypeAsDouble()
                         identifier = atom.GetIdx()
                         while tree.get_node(identifier) is not None:
                             identifier += len(mol.GetAtoms())
                         tree.create_node(tag=[atom.GetAtomicNum(), order],
                                          identifier=identifier,
                                          data=atom,
                                          parent=node.identifier)
     self.tree = tree
def sum_orbits(orbital_tree: Tree) -> int:

	sum_total_orbits = 0

	for node in orbital_tree.all_nodes():
		sum_total_orbits += orbital_tree.depth(node)

	return sum_total_orbits
Beispiel #7
0
def reset_ids(t: tl.Tree) -> tl.Tree:
    for node in t.all_nodes():
        if node.is_leaf():
            t.update_node(node.identifier, tag=node.tag, identifier=node.tag)
        else:
            t.update_node(node.identifier, identifier=uid())

    return t
Beispiel #8
0
class BlockChain:
    def __init__(self):
        self.root = Tree()
        self.root.create_node(0, 0)  # Genesis block

    # Adds a block to a blockchain
    def add_block(self, block):
        node = self.root.create_node(block.epoch, hash(block), block.hash)
        node.data = block

    # Print the blockchain graphically
    def print_chain(self):
        self.root.show()

    # Returns the list containing all the blocks of the entire blockchain
    def return_nodes_data(self):
        nodes = self.root.all_nodes()
        nodes_data = []
        for i in range(1, len(nodes)):
            nodes_data.append(nodes[i].data)
        if len(nodes_data) == 0:
            nodes_data.append(0)
        return nodes_data

    # Returns the list of identifiers of the leaves of the tree (blockchain)
    def leaves(self):
        leaves = self.root.leaves(nid=None)
        leaves_identifiers = []
        for i in range(len(leaves)):
            leaves_identifiers.append(leaves[i].identifier)
        return leaves_identifiers

    # returns the Block object with the major epoch in the blockchain
    def block_max_epoch(self):
        nodes = self.root.all_nodes()
        max_epoch = 0
        block_max = 0
        for i in range(len(nodes)):
            if nodes[i].tag > max_epoch:
                max_epoch = nodes[i].tag
                block_max = nodes[i].data
        return block_max
Beispiel #9
0
def types_of_conversation():
    conversation_amount = postgres_queries.find_annotated_conversation_number()
    conversation_list = list()
    depth_dict = dict()
    depth_dict_long = dict()
    depth_dict_short = dict()
    number_of_tweets_dict = dict()
    for i in range (0, conversation_amount + 1, 1):
        conversation_tree = Tree()
        converastion = postgres_queries.find_conversation(i)
        for tweet in converastion:
            if tweet[1] is None:
                conversation_tree.create_node(tweet[0], tweet[0])
                build_conversation(tweet[0], converastion, conversation_tree)
                depth = conversation_tree.depth() + 1
                number_of_tweets = len(conversation_tree.all_nodes())
                #short/long
                if number_of_tweets >=20:
                    if depth in depth_dict_long:
                        depth_dict_long[depth] += 1
                    else:
                        depth_dict_long[depth] = 1
                else:
                    if depth in depth_dict_short:
                        depth_dict_short[depth] += 1
                    else:
                        depth_dict_short[depth] = 1

                if number_of_tweets in number_of_tweets_dict:
                    number_of_tweets_dict[number_of_tweets] += 1
                else:
                     number_of_tweets_dict[number_of_tweets] = 1
                if depth in depth_dict:
                    depth_dict[depth] += 1
                else:
                    depth_dict[depth] = 1
        conversation_list.append(conversation_tree)
    #print depth_dict
    print 'Depth of a conversation'
    for depth, count in depth_dict.iteritems():
        print depth, '\t', count
    print 'Number of tweets in a conversation'
    for number, count in number_of_tweets_dict.iteritems():
        print number, '\t', count
    print 'Depth of a long conversation'
    for depth, count in depth_dict_long.iteritems():
        print depth, '\t', count
    print 'Depth of a short conversation'
    for depth, count in depth_dict_short.iteritems():
        print depth, '\t', count

    return conversation_list
Beispiel #10
0
class DecisionTreeClassifier(object):
    def  __init__(self):
        self.tree = Tree()

    def split(self, node, data, attr):
        '''algorithm to recursively split nodes by best attribute until decision tree is
        constructed'''

        attr_vals = get_values(data, attr) #gets all possible values for attribute in data

        for val in attr_vals:
            new_data = data[data[attr] == val] #splits data by attribute val 
            new_attr = get_best_attribute(new_data) # this is the next attribute to split on
            new_node = self.tree.create_node(val,
                                             len(self.tree.all_nodes()),
                                             node.identifier,
                                             data = DecisionTreeNode(data=new_data,
                                                                     attribute = new_attr))
            if (not(is_pure(new_data))): #recursively calls split if data isn't pure
                self.split(new_node, new_data, new_attr)

    def fit(self, X, y):
        '''Build a decision tree classifier from the training set (X,y)'''
        data = pd.concat([X, y], axis = 1)
        attr = get_best_attribute(data)
        node = clas.tree.create_node("Root", "root", data = DecisionTreeNode(data, attr))
        clas.split(node, node.data.data, attr)

    def predict(self, X):
        '''Predict class value for X'''
        preds = []
        for row in X.itertuples(): #iterates through each row of X

            node_cur = self.tree['root'] #initializes current node to root


            while not(node_cur.is_leaf()):
                attr = node_cur.data.attribute #attribute that the current node will be split on

                val = getattr(row, attr) #value of attribute at current row

                node_cur_children = node_cur.fpointer #returns list of children node ids

                for node_id in node_cur_children: #iterates through all children of node_cur
                    node = self.tree[node_id]
                    if(node.tag  == val): #if node's tag matches val
                        node_cur = node

            preds.append(node_cur.data.get_pred())

        return preds
Beispiel #11
0
  def crossOver(individualA, individualB):
    tree = None

    while tree is None or tree.depth(tree.get_node(tree.root)) > TREE_MAX_DEPTH:
      treeA = Tree(tree = individualA.tree, deep=True)
      treeB = Tree(tree = individualB.tree, deep=True)
      regenerate_ids(treeA)
      regenerate_ids(treeB)
      removedNode = random.choice(treeA.all_nodes())
      addedNode = random.choice(treeB.all_nodes())

      addedSubtree = Tree(tree = treeB.subtree(addedNode.identifier), deep=True)

      if treeA.root == removedNode.identifier:
        tree = addedSubtree

      else:
        parent = treeA.parent(removedNode.identifier)
        treeA.remove_subtree(removedNode.identifier)
        treeA.paste(parent.identifier, addedSubtree)
        tree = treeA

    return Individual(tree)
def conversation_regarding_language(): # with width and depth
    conversation_amount = postgres_queries.find_annotated_conversation_number()
    conversation_list = list()
    depth_dict = dict()
    depth_dict_long = dict()
    depth_dict_short = dict()
    number_of_tweets_dict = dict()
    test_i = 0
    for i in range(0, conversation_amount + 1, 1):
        conversation_tree = Tree()
        converastion = postgres_queries.find_conversation(i)
        test_i += len(converastion)
        for tweet in converastion:
            if tweet[1] is None and tweet[5] is True:
                conversation_tree.create_node(tweet[0], tweet[0])
                build_conversation_lang(tweet[0], converastion, conversation_tree)
                depth = conversation_tree.depth() + 1
                number_of_tweets = len(conversation_tree.all_nodes())
                #short/long
                if number_of_tweets >=20:
                    if depth in depth_dict_long:
                        depth_dict_long[depth] += 1
                    else:
                        depth_dict_long[depth] = 1
                else:
                    if depth in depth_dict_short:
                        depth_dict_short[depth] += 1
                    else:
                        depth_dict_short[depth] = 1

                if number_of_tweets in number_of_tweets_dict:
                    number_of_tweets_dict[number_of_tweets] += 1
                else:
                     number_of_tweets_dict[number_of_tweets] = 1
                if depth in depth_dict:
                    depth_dict[depth] += 1
                else:
                    depth_dict[depth] = 1
        conversation_list.append(conversation_tree)
    number = 0
    new_tweet_list_id = list()
    for con in conversation_list:
        nodes = con.all_nodes()
        for node in nodes:
            new_tweet_list_id.append(node.tag)
        number += len(con.all_nodes())

    return new_tweet_list_id, conversation_list
Beispiel #13
0
def createTree(showDescript=False):
    tree = Tree()
    with open('savedFiles/rulesTxt/%s.txt' % fileName, 'r') as fin:
        contentList = fin.readlines()
    currParent = None
    for line in contentList:
        if not line.startswith('V-'):  # new category here, change parent
            currNode, parentNode = map(lambda s: s.strip(), line.split(','))
            currParent = currNode  # set the new parent for following lines
            # now add node to tree
            if parentNode == 'NONE': parentNode = None
            tree.create_node(currNode, currNode, parent=parentNode)
        else:  # is a rule, add to current category
            ruleNum, ruleDescript = parseRule(line)  # TODO
            if showDescript: value = "%s: %s" % (ruleNum, ruleDescript)
            else: value = ruleNum
            tree.create_node(value, ruleNum, parent=currParent)
    # now strip away the \n randomly added to the end of value in every node
    for nodeObj in tree.all_nodes():
        nodeObj.tag = nodeObj.tag.strip()
    return tree
Beispiel #14
0
  def __call__(self, inputs,words,dep,is_train=True):
      """
      :param xs: a list of ngrams (or words if win is set to 1)
      :return: embeddings looked from tables
      """
      list=eval(dep)
      tree = Tree()
      
      finish=False
      err_node=[]
      root_index=0
      while 1:
          if finish:
              break;
          if len(tree.all_nodes())==len(list):
              finish=True;
          for i in range(0,len(list)):
              arr=list[i]
              parentIdx=arr[1]
              nodeIdx=arr[2]
 
              if not tree.contains(nid=nodeIdx):
                  if i==0:
                      tree.create_node(words[nodeIdx-1],identifier=nodeIdx)
                      root_index=nodeIdx-1
                  else:
                      if tree.contains(nid=parentIdx):
                          tree.create_node(words[nodeIdx-1],identifier=nodeIdx,parent=parentIdx)
                              
      H=[]
      
      for idx in range(0,len(inputs)):
          h=self.expr_for_tree(xt=inputs[idx],tree=tree,node=tree.get_node(idx+1),is_train=is_train)
          H.append(h)
      
      return H,root_index
Beispiel #15
0
def build_tree(arg):
    # read parameters
    start = time.time()
    dist_matrix_file = arg[0]
    cls_file = arg[1]
    tree_dir = arg[2]
    ksize = arg[3]
    params = arg[4]
    alpha_ratio = params[0]
    minsize = params[1]
    maxsize = params[2]
    max_cls_size = params[3]

    # save genomes info
    fna_seq = bidict.bidict()  # : 1
    fna_path = {}

    # read dist matrix (represented by similarity: 1-dist)
    # output: dist, fna_path, fna_seq
    f = open(dist_matrix_file, "r")
    lines = f.readlines()
    f.close()
    index = 0
    d = lines[0].rstrip().split("\t")[1:]
    bac_label = 0
    for i in lines[0].rstrip().split("\t")[1:]:
        temp = i[i.rfind('/') + 1:].split(".")[0]
        fna_seq[temp] = index
        fna_path[index] = i
        index += 1
    dist = []
    for line in lines[1:]:
        dist.append(
            [np.array(list(map(float,
                               line.rstrip().split("\t")[1:])))])
    dist = np.concatenate(dist)

    # read initial clustering results. fna_mapping, from 1 for indexing
    f = open(cls_file, 'r')
    lines = f.readlines()
    f.close()
    fna_mapping = defaultdict(set)
    for line in lines:
        temp = line.rstrip().split("\t")
        for i in temp[2].split(","):
            fna_mapping[int(temp[0])].add(fna_seq[i])
    if (len(lines) == 1):
        tree = Tree()
        kmer_sta = defaultdict(int)
        T0 = Node(identifier=list(fna_mapping.keys())[0])
        tree.add_node(T0)
        kmer_sta = defaultdict(int)
        kmer_index_dict = bidict.bidict()
        kmer_index = 1
        alpha_ratio = 1
        Lv = set()
        for i in fna_mapping[T0.identifier]:
            for seq_record in SeqIO.parse(fna_path[i], "fasta"):
                temp = str(seq_record.seq)
                for k in range(0, len(temp) - ksize):
                    forward = temp[k:k + ksize]
                    reverse = seqpy.revcomp(forward)
                    for kmer in [forward, reverse]:
                        try:
                            kmer_sta[kmer_index_dict[kmer]] += 1
                        except KeyError:
                            kmer_index_dict[kmer] = kmer_index
                            kmer_sta[kmer_index] += 1
                            kmer_index += 1
        alpha = len(fna_mapping[T0.identifier]) * alpha_ratio
        for x in kmer_sta:
            if (kmer_sta[x] >= alpha):
                Lv.add(x)
        print(T0.identifier, len(Lv))
        # save2file
        kmerlist = set()
        pkl.dump(tree, open(tree_dir + '/tree.pkl', 'wb'))
        f = open(tree_dir + "/tree_structure.txt", "w")
        os.system("mkdir " + tree_dir + "/kmers")
        os.system("mkdir " + tree_dir + "/overlapping_info")
        f.write("%d\t" % T0.identifier)
        f.close()
        os.system(f'cp {cls_file} {tree_dir}/')
        f = open(tree_dir + "/reconstructed_nodes.txt", "w")
        f.close()
        if (len(Lv) > maxsize):
            Lv = set(random.sample(Lv, maxsize))
        kmerlist = Lv
        length = len(Lv)
        f = open(tree_dir + "/kmers/" + str(T0.identifier), "w")
        for j in Lv:
            f.write("%d " % j)
        f.close()
        f = open(tree_dir + "/node_length.txt", "w")
        f.write("%d\t%d\n" % (T0.identifier, length))
        kmer_mapping = {}
        index = 0
        f = open(tree_dir + "/kmer.fa", "w")
        for i in kmerlist:
            f.write(">1\n")
            f.write(kmer_index_dict.inv[i])
            kmer_mapping[i] = index
            index += 1
            f.write("\n")
        f.close()

        # change index
        files = os.listdir(tree_dir + "/kmers")
        for i in files:
            f = open(tree_dir + "/kmers/" + i, "r")
            lines = f.readlines()
            if (len(lines) == 0):
                continue
            d = lines[0].rstrip().split(" ")
            d = map(int, d)
            f = open(tree_dir + "/kmers/" + i, "w")
            for j in d:
                f.write("%d " % kmer_mapping[j])
            f.close()
        end = time.time()
        print(
            '- The total running time of tree-based indexing struture building is ',
            str(end - start), ' s\n')
        return
    # initially build tree
    cls_dist, mapping, tree, depths, depths_mapping = hierarchy(
        fna_mapping, dist)

    # initially extract k-mers
    kmer_index_dict = bidict.bidict()
    kmer_index = 1
    Lv = defaultdict(set)
    spec = defaultdict(set)  # k-mers <= alpha
    leaves = tree.leaves()
    for i in leaves:
        kmer_index = extract_kmers(fna_mapping[i.identifier], fna_path, ksize,
                                   kmer_index_dict, kmer_index, Lv, spec,
                                   tree_dir, alpha_ratio, i.identifier)
    end = time.time()
    print('- The total running time of k-mer extraction is ', str(end - start),
          ' s\n')
    start = time.time()

    # leaf nodes check
    recls_label = 0

    leaves_check = []
    check_waitlist = reversed(leaves)
    while (True):
        if (recls_label):
            cls_dist, mapping, tree, depths, depths_mapping = hierarchy(
                fna_mapping, dist)
            leaves = tree.leaves()
            temp = {}
            temp2 = []
            for i in check_waitlist:
                if (i in fna_mapping):
                    temp2.append(i)
            check_waitlist = temp2.copy()
            for i in check_waitlist:
                temp[tree.get_node(i)] = depths[tree.get_node(i)]
            check_waitlist = []
            a = sorted(temp.items(), key=lambda x: x[1], reverse=True)
            for i in a:
                check_waitlist.append(i[0])
            for i in fna_mapping:
                if (i not in Lv):
                    kmer_index = extract_kmers(fna_mapping[i], fna_path, ksize,
                                               kmer_index_dict, kmer_index, Lv,
                                               spec, tree_dir, alpha_ratio, i)
        higher_union = defaultdict(set)
        for i in check_waitlist:
            diff, diff_nodes = get_leaf_union(depths[i], higher_union,
                                              depths_mapping, Lv, spec, i)
            kmer_t = Lv[i.identifier] - diff
            for j in diff_nodes:
                kmer_t = kmer_t - Lv[j.identifier]
            for j in diff_nodes:
                kmer_t = kmer_t - spec[j.identifier]
            print(str(i.identifier) + " checking", end="\t")
            print(len(kmer_t))
            if (len(kmer_t) < minsize):
                leaves_check.append(i)
        if (len(leaves_check) > 0):
            recls_label = 1
        else:
            break
        # re-clustering
        check_waitlist = []
        while (recls_label == 1):
            cluster_id = max(list(fna_mapping.keys())) + 1
            check_waitlist.append(cluster_id)
            leaf_a = leaves_check[0].identifier
            row_index = mapping[leaf_a]
            column_index = cls_dist[row_index].argmax()
            leaf_b = mapping.inv[column_index]  # (leaf_a, leaf_b)
            temp2 = fna_mapping[leaf_a] | fna_mapping[leaf_b]
            print(cluster_id, leaf_a, leaf_b, temp2)
            del fna_mapping[leaf_a], fna_mapping[leaf_b]
            if (leaf_a in Lv):
                del Lv[leaf_a], spec[leaf_a]
            if (leaf_b in Lv):
                del Lv[leaf_b], spec[leaf_b]
            del leaves_check[0]
            if (tree.get_node(leaf_b) in leaves_check):
                leaves_check.remove(tree.get_node(leaf_b))
            temp1 = [
                np.concatenate([[cls_dist[row_index]],
                                [cls_dist[column_index]]]).max(axis=0)
            ]
            cls_dist = np.concatenate([cls_dist, temp1], axis=0)
            temp1 = np.append(temp1, -1)
            temp1 = np.vstack(temp1)
            cls_dist = np.concatenate([cls_dist, temp1], axis=1)
            cls_dist = np.delete(cls_dist, [row_index, column_index], axis=0)
            cls_dist = np.delete(cls_dist, [row_index, column_index], axis=1)
            # change mapping
            del mapping[leaf_a], mapping[leaf_b]
            pending = list(fna_mapping.keys())
            pending.sort()
            for i in pending:
                if (mapping[i] > min([row_index, column_index])
                        and mapping[i] < max([row_index, column_index])):
                    mapping[i] -= 1
                elif (mapping[i] > max([row_index, column_index])):
                    mapping[i] -= 2
            fna_mapping[cluster_id] = temp2
            mapping[cluster_id] = len(cls_dist) - 1
            if (len(leaves_check) == 0):
                break
    del higher_union

    # rebuild identifiers
    all_nodes = tree.all_nodes()
    all_leaves_id = set([])
    leaves = set(tree.leaves())
    for i in leaves:
        all_leaves_id.add(i.identifier)
    id_mapping = bidict.bidict()
    index = 1
    index_internal = len(leaves) + 1
    for i in all_nodes:
        if (recls_label == 0):
            id_mapping[i.identifier] = i.identifier
        elif (i in leaves):
            id_mapping[i.identifier] = index
            index += 1
        else:
            id_mapping[i.identifier] = index_internal
            index_internal += 1
    leaves_identifier = list(range(1, len(leaves) + 1))
    all_identifier = list(id_mapping.values())
    all_identifier.sort()

    # save2file
    f = open(tree_dir + "/tree_structure.txt", "w")
    os.system("mkdir " + tree_dir + "/kmers")
    os.system("mkdir " + tree_dir + "/overlapping_info")
    for nn in all_identifier:
        i = id_mapping.inv[nn]
        f.write("%d\t" % id_mapping[i])
        if (i == all_nodes[0].identifier):
            f.write("N\t")
        else:
            f.write("%d\t" % id_mapping[tree.parent(i).identifier])
        if (nn in leaves_identifier):
            f.write("N\t")
        else:
            [child_a, child_b] = tree.children(i)
            f.write("%d %d\t" % (id_mapping[child_a.identifier],
                                 id_mapping[child_b.identifier]))
        if (len(fna_mapping[i]) == 1):
            temp = list(fna_mapping[i])[0]
            temp = fna_seq.inv[temp]
            f.write("%s" % temp)
        f.write("\n")
    f.close()
    f = open(tree_dir + "/hclsMap_95_recls.txt", "w")
    for nn in leaves_identifier:
        i = id_mapping.inv[nn]
        f.write("%d\t%d\t" % (nn, len(fna_mapping[i])))
        temp1 = list(fna_mapping[i])
        for j in temp1:
            temp = fna_seq.inv[j]
            if (j == temp1[-1]):
                f.write("%s\n" % temp)
            else:
                f.write("%s," % temp)
    f.close()
    end = time.time()
    print('- The total running time of re-clustering is ', str(end - start),
          ' s\n')
    start = time.time()

    # build indexing structure
    kmerlist = set([])  # all kmers used
    length = {}
    overload_label = 0
    if (len(tree.leaves()) > max_cls_size):
        overload_label = 1
    # from bottom to top (unique k-mers)
    uniq_temp = defaultdict(set)
    rebuilt_nodes = []
    descendant = defaultdict(set)  # including itself
    ancestor = defaultdict(set)
    descendant_leaves = defaultdict(set)
    ancestor[all_nodes[0].identifier].add(all_nodes[0].identifier)
    for i in all_nodes[1:]:
        ancestor[i.identifier] = ancestor[tree.parent(
            i.identifier).identifier].copy()
        ancestor[i.identifier].add(i.identifier)
    for i in reversed(all_nodes):
        print(str(id_mapping[i.identifier]) + " k-mer removing...")
        if (i in leaves):
            uniq_temp[i.identifier] = Lv[i.identifier]
            descendant_leaves[i.identifier].add(i.identifier)
        else:
            (child_a, child_b) = tree.children(i.identifier)
            descendant[i.identifier] = descendant[
                child_a.identifier] | descendant[child_b.identifier]
            descendant_leaves[i.identifier] = descendant_leaves[
                child_a.identifier] | descendant_leaves[child_b.identifier]
            uniq_temp[i.identifier] = uniq_temp[
                child_a.identifier] & uniq_temp[child_b.identifier]
            uniq_temp[child_a.identifier] = uniq_temp[
                child_a.identifier] - uniq_temp[i.identifier]
            uniq_temp[child_b.identifier] = uniq_temp[
                child_b.identifier] - uniq_temp[i.identifier]
        descendant[i.identifier].add(i.identifier)
    all_nodes_id = set(id_mapping.keys())
    # remove overlapping
    for i in reversed(all_nodes):
        print(str(id_mapping[i.identifier]) + " k-mer set building...")
        # no difference with sibling, subtree and ancestors
        if (i == all_nodes[0]):
            kmer_t = uniq_temp[i.identifier]
        else:
            diff = {}
            temp = all_nodes_id - descendant[i.identifier] - set([
                tree.siblings(i.identifier)[0].identifier
            ]) - ancestor[i.identifier]
            for j in temp:
                diff[j] = len(uniq_temp[j])
            a = sorted(diff.items(), key=lambda x: x[1], reverse=True)
            kmer_t = uniq_temp[i.identifier]
            for j in a:
                k = j[0]
                kmer_t = kmer_t - uniq_temp[k]
            # remove special k-mers
            temp = all_leaves_id - descendant_leaves[i.identifier]
            diff = {}
            for j in temp:
                diff[j] = len(spec[j])
            a = sorted(diff.items(), key=lambda x: x[1], reverse=True)
            for j in a:
                k = j[0]
                kmer_t = kmer_t - spec[k]
        if (len(kmer_t) < minsize and overload_label == 0):
            rebuilt_nodes.append(i)
            print("%d waiting for reconstruction..." %
                  id_mapping[i.identifier])
        else:
            if (len(kmer_t) > maxsize):
                kmer_t = set(random.sample(kmer_t, maxsize))
            f = open(tree_dir + "/kmers/" + str(id_mapping[i.identifier]), "w")
            for j in kmer_t:
                f.write("%d " % j)
            f.close()
            length[i] = len(kmer_t)
            kmerlist = kmerlist | kmer_t
    del uniq_temp

    # rebuild nodes
    overlapping = defaultdict(dict)
    intersection = defaultdict(set)
    higher_union = defaultdict(set)
    del_label = {}
    for i in leaves:
        del_label[i.identifier] = [0, 0]
    for i in rebuilt_nodes:
        print(str(id_mapping[i.identifier]) + " k-mer set rebuilding...")
        kmer_t = get_intersect(intersection, descendant_leaves[i.identifier],
                               Lv, del_label, i.identifier)
        diff = get_diff(higher_union, descendant_leaves, depths, all_nodes, i,
                        Lv, spec, del_label)
        for j in diff:
            kmer_t = kmer_t - j
        lower_leaves = set([])
        for j in leaves:
            if (depths[j] < depths[i]):
                lower_leaves.add(j)
        if (len(kmer_t) > maxsize):
            kmer_overlapping_sta = defaultdict(int)
            for j in lower_leaves:
                kmer_o = Lv[j.identifier] & kmer_t
                for k in kmer_o:
                    kmer_overlapping_sta[k] += 1
            temp = sorted(kmer_overlapping_sta.items(),
                          key=lambda kv: (kv[1], kv[0]))
            kmer_t = set([])
            for j in range(0, maxsize):
                kmer_t.add(temp[j][0])
        nkmer = {}
        f = open(tree_dir + "/kmers/" + str(id_mapping[i.identifier]), "w")
        index = 0
        for j in kmer_t:
            f.write("%d " % j)
            nkmer[j] = index
            index += 1
        length[i] = len(kmer_t)
        kmerlist = kmerlist | kmer_t
        # save overlapping info
        for j in lower_leaves:
            temp = Lv[j.identifier] & kmer_t
            if (len(temp) > 0):
                ii = id_mapping[i.identifier]
                jj = id_mapping[j.identifier]
                overlapping[jj][ii] = set([])
                for k in temp:
                    overlapping[jj][ii].add(nkmer[k])
        delete(Lv, spec, del_label)

    for i in overlapping:
        f = open(tree_dir + "/overlapping_info/" + str(i), "w")
        f1 = open(tree_dir + "/overlapping_info/" + str(i) + "_supple", "w")
        count = -1
        for j in overlapping[i]:
            if (len(overlapping[i]) != 0):
                f.write("%d\n" % j)
                for k in overlapping[i][j]:
                    f.write("%d " % k)
                f.write("\n")
                count += 2
                f1.write("%d %d\n" % (j, count))
        f.close()
        f1.close()

    # final saving
    f = open(tree_dir + "/reconstructed_nodes.txt", "w")
    for i in rebuilt_nodes:
        f.write("%d\n" % id_mapping[i.identifier])
    f.close()

    f = open(tree_dir + "/node_length.txt", "w")
    for nn in all_identifier:
        i = id_mapping.inv[nn]
        f.write("%d\t%d\n" % (nn, length[tree[i]]))
    f.close()

    kmer_mapping = {}
    index = 0
    f = open(tree_dir + "/kmer.fa", "w")
    for i in kmerlist:
        f.write(">1\n")
        f.write(kmer_index_dict.inv[i])
        kmer_mapping[i] = index
        index += 1
        f.write("\n")
    f.close()

    # change index
    files = os.listdir(tree_dir + "/kmers")
    for i in files:
        f = open(tree_dir + "/kmers/" + i, "r")
        lines = f.readlines()
        if (len(lines) == 0):
            continue
        d = lines[0].rstrip().split(" ")
        d = map(int, d)
        f = open(tree_dir + "/kmers/" + i, "w")
        for j in d:
            f.write("%d " % kmer_mapping[j])
        f.close()

    end = time.time()
    print(
        '- The total running time of tree-based indexing struture building is ',
        str(end - start), ' s\n')
Beispiel #16
0
class Blockchain(object):
    def __init__(self, genesis):
        # TODO: figure out if genesis should be passed in or created here
        # self.tinput = tinput
        self.blockCount = 0
        self.blockchain = Tree()
        self.genesis = genesis
        self.addGenesisBlock(genesis)  #Add the genesis block to chain

    def addGenesisBlock(self, genesis):
        self.blockchain.create_node("Genesis Block" + " ID: " +
                                    genesis.proofOfWork[:12],
                                    genesis.proofOfWork,
                                    data=genesis)

    def printBlockchain(self):
        self.blockchain.show()

    def addBlock(self, block):
        # TODO: run proof of work verification before adding block
        # Add block to chain & return true if POW valid
        # Else return false
        self.blockCount += 1
        self.blockchain.create_node("Block " + str(self.blockCount) + " ID: " +
                                    block.proofOfWork[:12],
                                    block.proofOfWork,
                                    parent=block.prevBlockHash,
                                    data=block)

    def getGenesisID(self):
        return self.blockchain.root

    def getLongestChainBlocks(self):
        allNodes = self.blockchain.all_nodes()
        forkNum = 0  #number of leaves at longest branch
        treeDepth = self.blockchain.depth()
        longestPathLeaves = [
        ]  #WIll hold leaves with treeDepth depth ie longest branch(es)
        for node in allNodes:
            currentDepth = self.blockchain.depth(node)
            if (currentDepth == treeDepth):
                forkNum += 1
                longestPathLeaves.append(node)

        return forkNum, longestPathLeaves

    def blockchainLength(self):
        # returns the depth of the tree ie the length of
        #  the longest chain
        return self.blockchain.depth()

    def numBlocks(self):
        return self.blockchain.size()

    def printChain(self, chain):
        chain.show(data_property="humanID")

    def tailBlocks(self, chain):
        leaves = chain.leaves()
        print("Num leaves" + str(len(leaves)))
        print(leaves)

    def checkBlock(self):
        # Check the proof work work
        # return true if proof of work is valid
        # else rerturn false
        print("printing block")

    def createBlockchainGraph(self, outfilename):
        print("creating graph")
        self.blockchain.to_graphviz(filename=outfilename + '.gv',
                                    shape=u'box',
                                    graph=u'digraph')
        g = Source.from_file(outfilename + '.gv')
        g.render()

    def createBlockchainImg(self, outfilename):
        print("creating graph")
        self.blockchain.to_graphviz(filename=outfilename + '.gv',
                                    shape=u'box',
                                    graph=u'digraph')
        g = Source.from_file(outfilename + '.png')
        g.render()
class BasicTree:
    def __init__(self, vehsInfo):
        self.tree = Tree()
        self.root = self.tree.create_node("Root", "root")    # root node
        self.vehsInfo = vehsInfo
        self.vehList = list(vehsInfo.keys())
        self.i = 1


    def _build(self, currentNode, vehList):
        '''
        :param vehList: A dict, keys is the set of vehicles, value is a tuple which represents (lane, position)
        :param currentNode: The current node in the tree
        :return: None
        '''
        s = [currentNode.tag.find(vid) for vid in vehList]    # the quit contidion in recursion
        if (np.array(s) >= 0).all():
            return
        for vehId in vehList:
            if vehId not in currentNode.tag:
                if currentNode.is_root:
                    prefix = currentNode.tag.replace("Root", "")
                else:
                    prefix = currentNode.tag
                self.tree.create_node(prefix + vehId + "-", prefix + vehId, parent=currentNode)
        for node in self.tree.all_nodes():
            if node.is_leaf():
                self._build(currentNode=node, vehList=vehList)


    def _prune(self):
        laneId = [value[0] for value in self.vehsInfo.values()]
        sortedList = []
        for i in list(set(laneId)):
            lane_info = {k: v[1] for k, v in self.vehsInfo.items() if v[0] == i}
            # Vehicles in front are at the front of the lane
            sortedList.append([vid[0] for vid in sorted(lane_info.items(), key=itemgetter(1), reverse=True)])
        pruneList = [sublist for sublist in sortedList if len(sublist) > 1]
        for subList in pruneList:
            for index in range(1, len(subList)):
                # first, prune th subtree which begin with illegal vehicle id
                self.tree.remove_subtree(subList[index])
                # second, delete the nodes which match the illegal pattern
                pattern = subList[index] + ".*" + subList[0]
                for node in self.tree.all_nodes():
                    if re.search(pattern, node.tag):
                        try:
                            self.tree.remove_node(node.identifier)
                        except:
                            pass

    def build(self):
        self._build(self.root, self.vehList)
        self._prune()

    def show(self):
        self.tree.show()

    def _leaves(self):
        '''
        :return: All the plan for vehicle passing currently.
        '''
        all_nodes = self.tree.all_nodes()
        return [node for node in all_nodes if node.is_leaf()]

    def legal_orders(self):
        leaves = self._leaves()
        orders = []
        for pattern in leaves:
            # upToRight.1-leftToBelow.18-belowToRight.2-belowToRight.3-
            tmp = pattern.tag.split("-")
            try:
                tmp.remove('')
            except:
                pass
            if len(tmp) == self.tree.depth():
                orders.append(tmp)
        return orders
Beispiel #18
0
                             identifier=json_data[0].get("mid"),
                             data=Info(json_data[0], param=param))
            uu = 1000 if len(json_data) >= 1000 else len(json_data)
            # print('uu:',uu)
            for j in range(1, uu):
                try:
                    tree.create_node(tag=json_data[j].get("mid"),
                                     identifier=json_data[j].get("mid"),
                                     parent=json_data[j].get("parent"),
                                     data=Info(json_data[j], param=param))
                except:
                    pass
            # 单颗传播树构建完成
            # 对传播树进行简化
            print('简化前:tree_depth:', tree.depth(), 'tree_nodes:',
                  len(tree.all_nodes()))
            tree = simplify_tree(tree)
            print('***********简化后:tree_depth:', tree.depth(), 'tree_nodes:',
                  len(tree.all_nodes()))
            trees.append(tree)
            # print(eid,"---trees simplified")
            # avg_tree_size += tree.size()
        # avg_trees_size.append(int(avg_tree_size/data_array.shape[0]))

    # plt.plot(params, avg_trees_size,'ro-')
    # plt.xlabel('alpha')
    # plt.ylabel('trees avg size')
    # plt.show()
    # plt.savefig('./img/img_1.jpg')
    # # 提取新闻原文的相关特征,并写入文件
    # feature = extract_features(Info(json_data[0]))
Beispiel #19
0
    parent_check[right] = left
    parent_check_reverse[left] = right
    nodeids.append(left)
    nodeids.append(right)

for nodeid in nodeids:
    if nodeid in parent_check:
        continue
    else:
        rootid = nodeid

unique_ids = set(nodeids)
tree.create_node(tag=rootid,identifier=rootid)


while len(unique_ids) != len(tree.all_nodes()):
    for rightNode in list(parent_check.keys()):
        if tree.get_node(parent_check[rightNode]) is not None and tree.get_node(rightNode) is None:
            tree.create_node(tag=rightNode, identifier=rightNode,
                             parent=tree.get_node(parent_check[rightNode]))

for node in tree.all_nodes():
    sum += tree.depth(node)
print(sum)


def intersection(lst1, lst2):
    lst3 = [value for value in lst1 if value in lst2]
    return lst3

sanIndex = 0
Beispiel #20
0
def tree_build_from_list(containers):
    """
    Build a tree based on a unsorted list.

    Build a tree of containers based on an unsorted list of containers.

    Example:
    --------
        >>> containers = [
            {
                "childContainerKey": null,
                "configlets": [],
                "devices": [],
                "imageBundle": "",
                "key": "root",
                "name": "Tenant",
                "parentName": null
            },
            {
                "childContainerKey": null,
                "configlets": [
                    "veos3-basic-configuration"
                ],
                "devices": [
                    "veos-1"
                ],
                "imageBundle": "",
                "key": "container_43_840035860469981",
                "name": "staging",
                "parentName": "Tenant"
            }]
        >>> print(tree_build_from_list(containers=containers))
            {"Tenant": {"children": [{"Fabric": {"children": [{"Leaves": {"children": ["MLAG01", "MLAG02"]}}, "Spines"]}}]}}
    Parameters
    ----------
    containers : dict, optional
        Container topology to create on CVP, by default None

    Returns
    -------
    json
        tree topology
    """
    # Create tree object
    tree = Tree()  # Create the base node
    previously_created = list()
    # Create root node to mimic CVP behavior
    tree.create_node("Tenant", "Tenant")
    # Iterate for first level of containers directly attached under root.
    for cvp_container in containers:
        if cvp_container['parentName'] is None:
            continue
        elif cvp_container['parentName'] in ['Tenant']:
            previously_created.append(cvp_container['name'])
            tree.create_node(cvp_container['name'],
                             cvp_container['name'],
                             parent=cvp_container['parentName'])
    # Loop since expected tree is not equal to number of entries in container topology
    while len(tree.all_nodes()) < len(containers):
        for cvp_container in containers:
            if tree.contains(
                    cvp_container['parentName']
            ):  # and cvp_container['parentName'] not in ['Tenant']
                try:
                    tree.create_node(cvp_container['name'],
                                     cvp_container['name'],
                                     parent=cvp_container['parentName'])
                except:  # noqa E722
                    continue
    return tree.to_json()
Beispiel #21
0
class SAGG_BRIAC():
    def __init__(self, min, max, temperature=20):  # example --> min: [-1,-1] max: [1,1]

        assert len(min) == len(max)
        self.maxlen = 200
        self.window_cp = 200
        self.minlen = self.maxlen / 20
        self.maxregions = 80

        # init regions' tree
        self.tree = Tree()
        self.regions_bounds = [Box(min, max, dtype=np.float32)]
        self.interest = [0.]
        self.tree.create_node('root','root',data=Region(maxlen=self.maxlen,
                                                        cps_gs=[deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1)],
                                                        bounds=self.regions_bounds[-1], interest=self.interest[-1]))
        self.nb_dims = len(min)
        self.temperature = temperature
        self.nb_split_attempts = 50
        self.max_difference = 0.2
        self.init_size = max - min
        self.ndims = len(min)
        self.mode_3_noise = 0.1

        # book-keeping
        self.sampled_tasks = []
        self.all_boxes = []
        self.all_interests = []
        self.update_nb = 0
        self.split_iterations = []

    def compute_interest(self, sub_region):
        if len(sub_region[0]) > self.minlen:  # TRICK NB 4
            cp_window = min(len(sub_region[0]), self.window_cp)  # not completely window
            half = int(cp_window / 2)
            # print(str(cp_window) + 'and' + str(half))
            first_half = np.array(sub_region[0])[-cp_window:-half]
            snd_half = np.array(sub_region[0])[-half:]
            diff = first_half.mean() - snd_half.mean()
            cp = np.abs(diff)
        else:
            cp = 0
        interest = np.abs(cp)
        return interest

    def split(self, nid):
        # try nb_split_attempts splits
        reg = self.tree.get_node(nid).data
        best_split_score = 0
        best_abs_interest_diff = 0
        best_bounds = None
        best_sub_regions = None
        is_split = False
        for i in range(self.nb_split_attempts):
            sub_reg1 = [deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1)]
            sub_reg2 = [deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1)]

            # repeat until the two sub regions contain at least minlen of the mother region TRICK NB 1
            while len(sub_reg1[0]) < self.minlen or len(sub_reg2[0]) < self.minlen:
                # decide on dimension
                dim = np.random.choice(range(self.nb_dims))
                threshold = reg.bounds.sample()[dim]
                bounds1 = Box(reg.bounds.low, reg.bounds.high, dtype=np.float32)
                bounds1.high[dim] = threshold
                bounds2 = Box(reg.bounds.low, reg.bounds.high, dtype=np.float32)
                bounds2.low[dim] = threshold
                bounds = [bounds1, bounds2]
                valid_bounds = True
                if np.any(bounds1.high - bounds1.low < self.init_size / 15):  # to enforce not too small boxes TRICK NB 2
                    valid_bounds = False
                if np.any(bounds2.high - bounds2.low < self.init_size / 15):
                    valid_bounds = valid_bounds and False

                # perform split in sub regions
                sub_reg1 = [deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1)]
                sub_reg2 = [deque(maxlen=self.maxlen + 1), deque(maxlen=self.maxlen + 1)]
                for i, task in enumerate(reg.cps_gs[1]):
                    if bounds1.contains(task):
                        sub_reg1[1].append(task)
                        sub_reg1[0].append(reg.cps_gs[0][i])
                    else:
                        sub_reg2[1].append(task)
                        sub_reg2[0].append(reg.cps_gs[0][i])
                sub_regions = [sub_reg1, sub_reg2]

            # compute interest
            interest = [self.compute_interest(sub_reg1), self.compute_interest(sub_reg2)]

            # compute score
            split_score = len(sub_reg1) * len(sub_reg2) * np.abs(interest[0] - interest[1])
            if split_score >= best_split_score and valid_bounds: # TRICK NB 3, max diff #and np.abs(interest[0] - interest[1]) >= self.max_difference / 8
                is_split = True
                best_abs_interest_diff = np.abs(interest[0] - interest[1])
                best_split_score = split_score
                best_sub_regions = sub_regions
                best_bounds = bounds

        if is_split:
            if best_abs_interest_diff > self.max_difference:
                self.max_difference = best_abs_interest_diff
            # add new nodes to tree
            for i, (cps_gs, bounds) in enumerate(zip(best_sub_regions, best_bounds)):
                self.tree.create_node(parent=nid, data=Region(self.maxlen, cps_gs=cps_gs, bounds=bounds, interest=interest[i]))
        else:
            #print("abort mission")
            # TRICK NB 6, remove old stuff if can't find split
            assert len(reg.cps_gs[0]) == (self.maxlen + 1)
            reg.cps_gs[0] = deque(islice(reg.cps_gs[0], int(self.maxlen / 4), self.maxlen + 1))
            reg.cps_gs[1] = deque(islice(reg.cps_gs[1], int(self.maxlen / 4), self.maxlen + 1))

        return is_split

    def merge(self, all_nodes):
        # get a list of children pairs
        parent_children = []
        for n in all_nodes:
            if not n.is_leaf():  # if node is a parent
                children = self.tree.children(n.identifier)
                if children[0].is_leaf() and children[1].is_leaf():  # both children must be leaves for an easy remove
                    parent_children.append([n, children])  # [parent, [child1, child2]]

        # sort each pair of children by their summed interest
        parent_children.sort(key=lambda x: np.abs(x[1][0].data.interest - x[1][1].data.interest), reverse=False)

        # remove useless pair
        child1 = parent_children[0][1][0]
        child2 = parent_children[0][1][1]
        # print("just removed {} and {}, daddy is: {}, childs: {}".format(child1.identifier, child2.identifier,
        #                                                                 parent_children[0][0].identifier,
        #                                                                 self.tree.children(
        #
        # print("bef")  #                                                               parent_children[0][0].identifier)))
        # print([n.identifier for n in self.tree.all_nodes()])
        self.tree.remove_node(child1.identifier)
        self.tree.remove_node(child2.identifier)
        # print("aff remove {} and {}".format(child1.identifier), child2.identifier)
        # print([n.identifier for n in self.tree.all_nodes()])

        # remove 1/4 of parent to avoid falling in a splitting-merging loop
        dadta = parent_children[0][0].data  # hahaha!
        dadta.cps_gs[0] = deque(islice(dadta.cps_gs[0], int(self.maxlen / 4), self.maxlen + 1))
        dadta.cps_gs[1] = deque(islice(dadta.cps_gs[1], int(self.maxlen / 4), self.maxlen + 1))
        self.nodes_to_recompute.append(parent_children[0][0].identifier)

        # remove child from recompute list if they where touched when adding the current task
        if child1.identifier in self.nodes_to_recompute:
            self.nodes_to_recompute.pop(self.nodes_to_recompute.index(child1.identifier))
        if child2.identifier in self.nodes_to_recompute:
            self.nodes_to_recompute.pop(self.nodes_to_recompute.index(child2.identifier))




    def add_task_comp(self, node, task, comp):
        reg = node.data
        nid = node.identifier
        if reg.bounds.contains(task): # task falls within region
            self.nodes_to_recompute.append(nid)
            children = self.tree.children(nid)
            for n in children: # if task in region, task is in one sub-region
                self.add_task_comp(n, task, comp)

            need_split = reg.add(task, comp, children == []) # COPY ALL MODE
            if need_split:
                self.nodes_to_split.append(nid)


    def update(self, task, continuous_competence, all_raw_rewards):
        # add new (task, competence) to regions nodes
        self.nodes_to_split = []
        self.nodes_to_recompute = []
        new_split = False
        root = self.tree.get_node('root')
        self.add_task_comp(root, task, continuous_competence)
        #print(self.nodes_to_split)
        assert len(self.nodes_to_split) <= 1

        # split a node if needed
        need_split = len(self.nodes_to_split) == 1
        if need_split:
            new_split = self.split(self.nodes_to_split[0])
            if new_split:
                self.update_nb += 1
                #print(self.update_nb)
                # update list of regions_bounds
                all_nodes = self.tree.all_nodes()
                if len(all_nodes) > self.maxregions:  # too many regions, lets merge one of them
                    self.merge(all_nodes)
                    all_nodes = self.tree.all_nodes()
                self.regions_bounds = [n.data.bounds for n in all_nodes]

        # recompute interests of touched nodes
        #print(self.nodes_to_recompute)
        for nid in self.nodes_to_recompute:
            #print(nid)
            node = self.tree.get_node(nid)
            reg = node.data
            reg.interest = self.compute_interest(reg.cps_gs)

        # collect new interests and new [comp, tasks] lists
        all_nodes = self.tree.all_nodes()
        self.interest = []
        self.cps_gs = []
        for n in all_nodes:
            self.interest.append(n.data.interest)
            self.cps_gs.append(n.data.cps_gs)

        # bk-keeping
        self.all_boxes.append(copy.copy(self.regions_bounds))
        self.all_interests.append(copy.copy(self.interest))
        self.split_iterations.append(self.update_nb)
        assert len(self.interest) == len(self.regions_bounds)

        return new_split, None

    def draw_random_task(self):
        return self.regions_bounds[0].sample()  # first region is root region

    def sample_task(self, args):
        mode = np.random.rand()
        if mode < 0.1:  # "mode 3" (10%) -> sample on regions and then mutate lowest-performing task in region
            if len(self.sampled_tasks) == 0:
                self.sampled_tasks.append(self.draw_random_task())
            else:
                region_id = proportional_choice(self.interest, eps=0.0)
                worst_task_idx = np.argmin(self.cps_gs[region_id][0])
                # mutate task by a small amount (i.e a gaussian scaled to the regions range)
                task = np.random.normal(self.cps_gs[region_id][1][worst_task_idx].copy(), 0.1)
                # clip to stay within region (add small epsilon to avoid falling in multiple regions)
                task = np.clip(task, self.regions_bounds[region_id].low + 1e-5, self.regions_bounds[region_id].high - 1e-5)
                self.sampled_tasks.append(task)

        elif mode < 0.3:  # "mode 2" (20%) -> random task
            self.sampled_tasks.append(self.draw_random_task())

        else:  # "mode 1" (70%) -> sampling on regions and then random task in selected region
            region_id = proportional_choice(self.interest, eps=0.0)
            self.sampled_tasks.append(self.regions_bounds[region_id].sample())


        # # sample region
        # if np.random.rand() < 0.2:
        #     region_id = np.random.choice(range(self.nb_regions))
        # else:
        #     region_id = np.random.choice(range(self.nb_regions), p=np.array(self.probas))

        # # sample task
        # self.sampled_tasks.append(self.regions_bounds[region_id].sample())
        #
        # return self.sampled_tasks[-1].tolist()
        # sample region
        # region_id = proportional_choice(self.interest, eps=0.2)
        # # sample task
        # self.sampled_tasks.append(self.regions_bounds[region_id].sample())

        return self.sampled_tasks[-1]

    def dump(self, dump_dict):
        dump_dict['all_boxes'] = self.all_boxes
        dump_dict['split_iterations'] = self.split_iterations
        dump_dict['all_interests'] = self.all_interests
        return dump_dict

    @property
    def nb_regions(self):
        return len(self.regions_bounds)

    @property
    def get_regions(self):
        return self.regions_bounds
Beispiel #22
0
        json_data = json.load(load_f)


        #构建新闻事件传播树
        tree = Tree()
        tree.create_node(tag=json_data[0].get("mid"),identifier=json_data[0].get("mid"),data=Info(json_data[0],param = 10))
        uu = 1000 if len(json_data) >=1000 else len(json_data)
        # print('uu:',uu)
        for j in range(1,uu):
            try:
                tree.create_node(tag=json_data[j].get("mid"),identifier=json_data[j].get("mid"),parent=json_data[j].get("parent"),data=Info(json_data[j]))
            except:
                pass
        #单颗传播树构建完成
        #对传播树进行简化
        print('简化前:tree_depth:',tree.depth(),'tree_nodes:',len(tree.all_nodes()))
        tree = simplify_tree(tree)
        print('简化后:tree_depth:', tree.depth(),'tree_nodes:',len(tree.all_nodes()))
        trees.append(tree)
        # print(eid,"---trees simplified")




    #所有新闻事件的传播树构建结束



    #3折交叉法
    pd_data = pd.read_csv('id_label.txt', sep='\t', header=None)
    # wb_data = pd_data.as_matrix()[0:20,]
Beispiel #23
0
def unexpanded_nodes(game: treelib.Tree):
    return sum([int(not x.data[2]) for x in game.all_nodes()])
Beispiel #24
0
class PrePruneTree:
    def __init__(self, vehsInfo):
        self.tree = Tree()
        self.root = self.tree.create_node("Root", "root")    # root node
        self.vehsInfo = vehsInfo
        self.vehList = list(vehsInfo.keys())
        self.pruneList = None

    @tail_call_optimized
    def _build(self, currentNode, vehList):
        '''
        :param vehList: A dict, keys is the set of vehicles, value is a tuple which represents (lane, position)
        :param currentNode: The current node in the tree
        '''
        s = [currentNode.tag.find(vid) for vid in vehList]    # the quit condition in recursion
        if (np.array(s) >= 0).all() or self._testIllegel(currentNode.tag):
            return
        for vehId in vehList:
            if vehId not in currentNode.tag:
                if currentNode.is_root:
                    prefix = currentNode.tag.replace("Root", "")
                else:
                    prefix = currentNode.tag
                self.tree.create_node(prefix + vehId + "-", prefix + vehId, parent=currentNode)
        # self.show()
        for node in self.tree.all_nodes():
            if node.is_leaf() and not self._testPrePrune(node.tag):
                self._build(currentNode=node, vehList=vehList)


    def _testIllegel(self, tag):
        '''
        test whether need to stop recursion
        :param tag: Node tag
        :return: boolean (if true, the recursion will stop)
        '''
        # upToRight.1-leftToBelow.18-belowToRight.2-belowToRight.3-
        flag = False
        tmp = tag.split("-")
        try:
            tmp.remove('')
        except:
            pass
        if len(tmp) < len(list(self.vehsInfo.keys())) - 1:
            return flag
        surplusVeh = list(set(self.vehList) - set(tmp))
        for veh1 in surplusVeh:
            for veh2 in list(set(self.vehsInfo.keys()) - set(tmp)):
                for subList in self.pruneList:
                    if surplusVeh in subList and veh2 in subList:
                        if subList.index(veh2) > subList.index(veh1):
                            flag = True
        return flag

    def _testPrePrune(self, tag):
        '''
        test whether need to preprune
        :param tag:
        :return:
        '''
        flag = False
        vehs = tag.split("-")
        try:
            vehs.remove('')
        except:
            pass
        for veh1 in vehs:
            for veh2 in list(set(self.vehsInfo.keys()) - set(vehs)):
                for subList in self.pruneList:
                    if veh1 in subList and veh2 in subList:
                        if subList.index(veh2) < subList.index(veh1):
                            flag = True
        return flag

    def obtainPruneList(self):
        laneId = [value[0] for value in self.vehsInfo.values()]
        sortedList = []
        for i in list(set(laneId)):
            lane_info = {k: v[1] for k, v in self.vehsInfo.items() if v[0] == i}
            # Vehicles in front are at the front of the lane
            sortedList.append([vid[0] for vid in sorted(lane_info.items(), key=itemgetter(1), reverse=True)])
        pruneList = [sublist for sublist in sortedList if len(sublist) > 1]
        return pruneList

    def build(self):
        threading.stack_size(20000000)
        self.pruneList = self.obtainPruneList()
        # start a new thread to generate tree
        thread = threading.Thread(target=self._build(self.root, self.vehList))
        thread.start()


    def show(self):
        self.tree.show()

    def _leaves(self):
        '''
        :return: All the plan for vehicle passing currently.
        '''
        all_nodes = self.tree.all_nodes()
        return [node for node in all_nodes if node.is_leaf()]

    def legal_orders(self):
        leaves = self._leaves()
        orders = []
        for pattern in leaves:
            # upToRight.1-leftToBelow.18-belowToRight.2-belowToRight.3-
            tmp = pattern.tag.split("-")
            try:
                tmp.remove('')
            except:
                pass
            if len(tmp) == self.tree.depth():
                orders.append(tmp)
        return orders
Beispiel #25
0
def unexpanded_night_nodes(game: treelib.Tree):
    return sum([
        int((not x.data[2]) and x.data[1].time == 1) for x in game.all_nodes()
    ])
Beispiel #26
0
class TreeT(object):
    def __init__(self, max_id=0):
        self.tree = Tree()

    def from_ptb_to_tree(self, line, max_id=0, leaf_id=1, parent_id=None):
        # starts by ['(', 'pos']
        pos_tag = line[1]
        if parent_id is None:
            pos_id = 0
        else:
            pos_id = max_id
            max_id += 1

        self.tree.create_node(pos_tag, pos_id, parent_id, TreeData())

        parent_id = pos_id
        total_offset = 2

        if line[2] != '(':
            # sub-tree is leaf
            # line[0:3] = ['(', 'pos', 'word', ')']
            word_tag = line[2]
            self.tree.create_node(word_tag, leaf_id, parent_id, TreeData())
            return 4, max_id, leaf_id + 1

        line = line[2:]

        while line[0] != ')':
            offset, max_id, leaf_id = self.from_ptb_to_tree(
                line, max_id, leaf_id, parent_id)
            total_offset += offset
            line = line[offset:]

        return total_offset + 1, max_id, leaf_id

    def add_height(self, tree_dep):

        for n in self.tree.all_nodes():
            n.data.leaves = []

        for leaf in self.tree.leaves():
            lid = leaf.identifier
            hid = tree_dep[lid]
            if hid == self.tree.root:
                self.tree[lid].data.height = self.tree.depth(self.tree[lid])
                for cid in [
                        p for p in self.tree.paths_to_leaves() if lid in p
                ][0]:
                    self.tree[cid].data.leaves += [lid]
            else:
                height = -1
                cid = lid
                cond = True
                while cond:
                    self.tree[cid].data.leaves += [lid]
                    height += 1
                    cid = self.tree.parent(cid).identifier
                    cid_leaves = [l.identifier for l in self.tree.leaves(cid)]
                    cid_l_dep = [tree_dep[l] for l in cid_leaves if l != lid]
                    cond = set(cid_l_dep).issubset(set(cid_leaves))
                self.tree[lid].data.height = height

        x_nodes = [
            n.identifier for n in self.tree.all_nodes() if n.data.leaves == []
        ]
        for x_node in x_nodes[::-1]:
            min_id = min(self.tree.children(x_node),
                         key=lambda c: c.data.height)
            _lid = min_id.data.leaves[0]
            self.tree[_lid].data.height += 1
            self.tree[x_node].data.leaves += [_lid]

        return True

    def _from_tree_to_ptb(self, nid):
        nid = self.tree.subtree(nid).root
        if self.tree[nid].is_leaf():
            return ' (' + self.tree[nid].tag + ' ' + self.tree[
                nid].data.word + ')'

        res = ' (' + self.tree[nid].tag

        for c_nid in sorted(self.tree.children(nid),
                            key=lambda x: x.identifier):
            res += self._from_tree_to_ptb(c_nid.identifier)

        return res + ')'

    def from_tree_to_ptb(self):
        return self._from_tree_to_ptb(self.tree.root)

    def from_tag_to_tree(self, tag, word, pos_id=0):
        parent_id = None
        for tag_nodes in tag:
            if tag_nodes[0] in [CL, CR]:
                c_side = tag_nodes[0]
                _tag_nodes = tag_nodes[1:] if len(tag_nodes) > 1 else ['']
            else:
                c_side = ''
                _tag_nodes = tag_nodes
            self.tree.create_node(_tag_nodes[0],
                                  pos_id,
                                  parent=parent_id,
                                  data=TreeData(comb_side=c_side))

            parent_id = pos_id
            pos_id += 1
            for tag_node in _tag_nodes[1:]:
                self.tree.create_node(tag_node[1:],
                                      pos_id,
                                      parent=parent_id,
                                      data=TreeData(miss_side=tag_node[0]))
                pos_id += 1
        for l in self.tree.leaves():
            if l.data.miss_side == '':
                l.data.word = word
                break
        return pos_id

    @memoize
    def is_combine_to(self, side):
        return self.tree[self.tree.root].data.comb_side == side

    @memoize
    def is_combine_right(self):
        return self.is_combine_to(CR)

    @memoize
    def is_combine_left(self):
        return self.is_combine_to(CL)

    @memoize
    def is_complete_tree(self):
        return all([n.data.miss_side == '' for n in self.tree.all_nodes()])

    @memoize
    def get_missing_leaves_to(self, miss_val, side):
        return [
            l.identifier for l in self.tree.leaves(self.tree.root)
            if l.data.miss_side == side and l.tag == miss_val
        ]

    @memoize
    def get_missing_leaves_left(self, miss_val):
        return self.get_missing_leaves_to(miss_val, L)

    @memoize
    def get_missing_leaves_right(self, miss_val):
        return self.get_missing_leaves_to(miss_val, R)

    @memoize
    def root_tag(self):
        return self.tree[self.tree.root].tag

    @memoize
    def is_no_missing_leaves(self):
        return all(
            [l.data.miss_side == '' for l in self.tree.leaves(self.tree.root)])

    @memoize
    def combine_tree(self, _tree, comb_leaf):
        self.tree.paste(comb_leaf, _tree.tree)
        self.tree.link_past_node(comb_leaf)
        return self

    def tree_to_path(self, nid, path):

        # Stop condition
        if self.tree[nid].is_leaf():
            path[nid] = []
            return nid, self.tree[nid].data.height

        # Recursion
        flag = CR
        for child in self.tree.children(nid):
            cid = child.identifier
            leaf_id, height = self.tree_to_path(cid, path)

            if (height == 0):
                # Reached end of path can add flag
                path[leaf_id].insert(0, flag)
                # path[leaf_id].append(flag)

            if height > 0:
                path[leaf_id].insert(0, nid)
                # only single child will have height>0
                # and its value will be the one that is returned
                # to the parent
                ret_leaf_id, ret_height = leaf_id, height - 1

                # once we reached a height>0, it means that
                # this path includes the parent, and thus flag
                # direction should flip
                flag = CL

        return ret_leaf_id, ret_height

    def path_to_tags(self, path):
        tags = []
        for p in path:
            _res = []
            _p = copy.copy(p)
            if _p[0] in [CL, CR]:
                _res.append(_p[0])
                _p = _p[1:]
            while _p[:-1]:
                el_p = _p.pop(0)
                _res.append(self.tree[el_p].tag)
                for c in self.tree.children(el_p):
                    if c.identifier != _p[0]:
                        _res.append(R + c.tag if c.identifier > _p[0] else L +
                                    c.tag)
            _res.append(self.tree[_p[0]].tag)
            tags.append(_res)
        return tags

    def path_to_words(self, path):
        return [self.tree[k].tag for k in path]

    def from_tree_to_tag(self):
        path = {}
        self.tree_to_path(self.tree.root, path)
        return {
            'tags': self.path_to_tags(path.values()),
            'words': self.path_to_words(path.keys())
        }

    def from_ptb_to_tag(self, line, max_id, depend):
        self.from_ptb_to_tree(line, max_id)
        self.add_height(depend)
        path = {}
        self.tree_to_path(self.tree.root, path)
        return self.path_to_tags(path.values())
Beispiel #27
0
def level_size(tree: treelib.Tree, level=1):
    return len(
        [x for x in tree.all_nodes() if tree.level(x.identifier) == level])
Beispiel #28
0
from treelib import Tree

tree = Tree()

tree.create_node("a", "a", data={"v": 0})
tree.create_node("b", "b", data={"v": 7}, parent="a")
tree.create_node("c", "c", data={"v": 4}, parent="b")
tree.create_node("d", "d", data={"v": 3}, parent="b")
tree.create_node("f", "f", data={"v": 0}, parent="b")
tree.create_node("e", "e", data={"v": 3}, parent="a")

print(tree)


def func_1(node):
    v = 0
    children = tree.children(node.identifier)
    for child in children:
        if child.data["v"] == 0 and len(tree.children(child.identifier)) != 0:
            child.data["v"] = func_1(child)
        v += child.data["v"]

    return v


for node in tree.all_nodes():
    if node.data["v"] == 0 and len(tree.children(node.identifier)) != 0:
        node.data["v"] = func_1(node)

for node in tree.all_nodes():
    print("node: " + node.tag + " value: {}".format(node.data["v"]))
Beispiel #29
0
class HFE(FeatureTransformChoice):
    def __init__(self, dataset_shape, taxonomy=None):
        self.dataset_shape = dataset_shape
        self.taxonomy = taxonomy

    def fit(self, hyperparameters, X, y):

        # Initialize new tree
        self.tree = Tree()

        if isinstance(self.taxonomy, type(None)):
            print(
                'Warning: Could not execute HFE algorithm no taxonomy provided'
            )
        else:

            # Add otus to the internal tree structure
            for i, otu in enumerate(
                [a for a in X.columns.values if a.lower().startswith('otu')]):
                self._add_otu_to_tree(self.tree, otu, X.copy())

            # Perform filtering then determine which nodes in the tree will act as the final features
            self._correlation_filter()
            self._path_ig_filter(y)
            self._leaf_ig_filter(y)
            self.valid_ids = [
                x.identifier for x in self.tree.all_nodes() if x.data['valid']
            ]

    def transform(self, X, y=None):

        # Build a new tree (Note: The self.tree is built on training data, so we need a new tree for unseen data)
        new_tree = Tree()
        for i, otu in enumerate(
            [a for a in X.columns.values if a.lower().startswith('otu')]):
            self._add_otu_to_tree(new_tree, otu, X.copy())

        # Extract the final features and store in dataframe
        result = pd.DataFrame()

        for i, current_id in enumerate(self.valid_ids):
            result[i] = new_tree[current_id].data['feature_vector']

        return result

    def get_name(self):
        return 'HFE'

    def hyperparameter_grid(self):
        return None

    # Description: Populates tree structure
    def _add_otu_to_tree(self, tree, otu_name, otu_table):

        raw_string = self.taxonomy['Taxonomy'][otu_name.lower() == np.array(
            [x.lower() for x in self.taxonomy['OTU']])].values[0]
        taxonomic_string = re.sub(r'[()0-9]', '', raw_string)
        taxonomic_levels = taxonomic_string.split(';')

        feature_vector = otu_table.loc[:, otu_name].values.copy()

        for i, level in enumerate(taxonomic_levels):

            # Some of the levels might be empty if there an extra ; in the taxonomy file or two back to back. These should be skipped
            if level == '':
                continue

            if tree.contains(level):
                # Increment node OTU vector with argument OTU vector (ie node vector + new otu vector)
                tree[level].data['feature_vector'] += feature_vector.copy()

            else:

                # Create new node with dictionary of OTU vector and boolean flag indicating valid node
                tree.create_node(
                    tag=level,
                    identifier=level,
                    parent=taxonomic_levels[i - 1] if i != 0 else None,
                    data={
                        'feature_vector': feature_vector.copy(),
                        'valid': True
                    })

    # Description: Removes all child nodes whose feature vector is correlated with their parent node by switching the valid flag for that node to False
    def _correlation_filter(self, threshold=0.80):

        paths = self.tree.paths_to_leaves()

        for path in paths:

            if len(path) < 1:
                continue

            for i in range(1, len(path)):
                parent_feature_vector = self.tree[path[
                    -1 - i + 1]].data['feature_vector']
                child_feature_vector = self.tree[path[
                    -1 - i]].data['feature_vector']
                current_correlation = pearsonr(parent_feature_vector,
                                               child_feature_vector)[0]

                if current_correlation > threshold:
                    self.tree[path[-1 - i]].data['valid'] = False

    # Description: Filters nodes based on average path information gain (IG)
    def _path_ig_filter(self, labels):

        paths = self.tree.paths_to_leaves()

        for path in paths:

            added = 0
            avg_path_IG = None

            for i, bacteria in enumerate(path):

                # For thoses nodes that passed the correlation filter compute the running IG average
                if self.tree[bacteria].data['valid']:
                    if added == 0:
                        avg_path_IG = mutual_info_classif(
                            X=self.tree[bacteria].data['feature_vector'].
                            reshape(len(labels), 1),
                            y=labels,
                            random_state=0)
                        added += 1
                    else:
                        avg_path_IG = avg_path_IG * (added / (added + 1)) + (
                            mutual_info_classif(X=self.tree[bacteria].data[
                                'feature_vector'].reshape(len(labels), 1),
                                                y=labels,
                                                random_state=0) / (added + 1))
                        added += 1

            # If a node in the path is less than the average or it is uninformative (ie all zeros) remove the node
            for bacteria in path:

                current_bacteria_IG = mutual_info_classif(
                    X=self.tree[bacteria].data['feature_vector'].reshape(
                        len(labels), 1),
                    y=labels,
                    random_state=0)

                if (avg_path_IG
                        == None) or (current_bacteria_IG < avg_path_IG) or sum(
                            self.tree[bacteria].data['feature_vector']) == 0:
                    self.tree[bacteria].data['valid'] = False

    # Description: Filter leaf nodes in incomplete paths based on global information gain (IG)
    def _leaf_ig_filter(self, labels):

        # Returns whether a path is incomplete
        def incomplete_path(path):
            incomplete = False
            for bacteria in path:
                if not self.tree[bacteria].data['valid']:
                    incomplete = True
            return incomplete

        paths = self.tree.paths_to_leaves()
        avg_tree_IG = None
        added = 0

        # Compute global avg IG
        for path in paths:
            for bacteria in path:
                # Only the remaining nodes contribute to global avg IG
                if self.tree[bacteria].data['valid']:
                    if added == 0:
                        avg_tree_IG = mutual_info_classif(
                            X=self.tree[bacteria].data['feature_vector'].
                            reshape(len(labels), 1),
                            y=labels,
                            random_state=0)
                        added += 1
                    else:
                        avg_tree_IG = avg_tree_IG * (added / (added + 1)) + (
                            mutual_info_classif(X=self.tree[bacteria].data[
                                'feature_vector'].reshape(len(labels), 1),
                                                y=labels,
                                                random_state=0) / (added + 1))
                        added += 1

        # The leaf nodes are the final element in the path. If the leaf node IG is zero or less than global IG and is a part of an incomplete path it should be removed
        for path in paths:

            leaf_node_IG = mutual_info_classif(
                X=self.tree[path[-1]].data['feature_vector'].reshape(
                    len(labels), 1),
                y=labels,
                random_state=0)

            if incomplete_path(path) and ((leaf_node_IG == 0) or
                                          (leaf_node_IG < avg_tree_IG)):
                self.tree[path[-1]].data['valid'] = False
Beispiel #30
0
class DMNSPolicy(Policy):
    """
    Representation of a deterministic markovian non-stationary policy.

    Deterministic markovian means that the the act method returns only an action, and that the relevant history consists
    only in the current state.

    Attributes
    __________
    policy: dict
        A dictionary with the deterministic markovian policy.

    tree: treelib.Tree
        a tree object for visualization purposes.
    """
    def __init__(self, space):
        super().__init__(space)

    def __repr__(self):
        st = str(self.tree.show())
        return st

    def _create_tree(self, initial_state):
        """
        A tree object for visualization purposes is created.

        Parameters
        ----------
        initial_state: state
            An initial state
        """
        self.tree = Tree()

        self.tree.add_node(
            Node(f'({0}:{initial_state}:{self.policy[(0, initial_state)]})',
                 f'({0}:{initial_state}:{self.policy[(0, initial_state)]})'))

        def add_sons(s, t):
            a = self.policy[(t, s)]
            if t == self.T:
                for st in self.Q(s, a):
                    n = Node(f'({t + 1}:{st})', f'({t + 1}:{st})')
                    self.tree.add_node(node=n, parent=f'({t}:{s}:{a})')

            elif t < self.T - 1:
                for st in self.Q(s, a):
                    at = self.policy[(t + 1, st)]
                    n = Node(f'({t + 1}:{st}:{at})', f'({t + 1}:{st}:{at})')

                    if n.identifier not in map(lambda x: x.identifier,
                                               self.tree.all_nodes()):
                        self.tree.add_node(node=n, parent=f'({t}:{s}:{a})')
                        add_sons(st, t + 1)

        add_sons(initial_state, 0)

    def act(self, history):
        """

        Parameters
        ----------
        history

        Returns
        -------

        """
        time, state = history
        return self.policy[(time, state)]

    def add_action(self, time, state, action):
        """
        Add an action to the policy.

        Parameters
        ----------
        time: int
            Time
        state: State
            State
        action: Action
            Action
        """
        # assert state in self.S and action in self.A
        self.policy[time, state] = action

    def add_policy(self, policy, initial_state=None):
        """
        Adds a complete policy

        Parameters
        ----------
        policy: dict
            Policy to add
        initial_state: state
            (Optional)
            If the initial state is given, a tree for visualization purposes is created.
        """
        self.policy = policy
        if initial_state is not None:
            self._create_tree(initial_state)
Beispiel #31
0
def tree_build_from_dict(containers=None):
    """
    Build a tree based on a unsorted dictConfig(config).

    Build a tree of containers based on an unsorted dict of containers.

    Example:
    --------
        >>> containers = {'Fabric': {'parent_container': 'Tenant'},
            'Leaves': {'configlets': ['container_configlet'],
                        'devices': ['veos01'],
                        'images': ['4.22.0F'],
                        'parent_container': 'Fabric'},
            'MLAG01': {'configlets': ['container_configlet'],
                        'devices': ['veos01'],
                        'images': ['4.22.0F'],
                        'parent_container': 'Leaves'},
            'MLAG02': {'configlets': ['container_configlet'],
                        'devices': ['veos01'],
                        'images': ['4.22.0F'],
                        'parent_container': 'Leaves'},
            'Spines': {'configlets': ['container_configlet'],
                        'devices': ['veos01'],
                        'images': ['4.22.0F'],
                        'parent_container': 'Fabric'}}
        >>> print(tree_build_from_dict(containers=containers))
            {"Tenant": {"children": [{"Fabric": {"children": [{"Leaves": {"children": ["MLAG01", "MLAG02"]}}, "Spines"]}}]}}
    Parameters
    ----------
    containers : dict, optional
        Container topology to create on CVP, by default None

    Returns
    -------
    json
        tree topology
    """
    # Create tree object
    tree = Tree()  # Create the base node
    previously_created = list()
    # Create root node to mimic CVP behavior
    tree.create_node("Tenant", "Tenant")
    # Iterate for first level of containers directly attached under root.
    for container_name, container_info in containers.items():
        if container_info['parent_container'] in ['Tenant']:
            previously_created.append(container_name)
            tree.create_node(container_name,
                             container_name,
                             parent=container_info['parent_container'])
    # Loop since expected tree is not equal to number of entries in container topology
    while len(tree.all_nodes()) < len(containers) + 1:
        for container_name, container_info in containers.items():
            if tree.contains(
                    container_info['parent_container']
            ) and container_info['parent_container'] not in ['Tenant']:
                try:
                    tree.create_node(container_name,
                                     container_name,
                                     parent=container_info['parent_container'])
                except:  # noqa E722
                    continue
    return tree.to_json()
Beispiel #32
0
class RIAC(AbstractTeacher):
    def __init__(self,
                 mins,
                 maxs,
                 seed,
                 env_reward_lb,
                 env_reward_ub,
                 max_region_size=200,
                 alp_window_size=None,
                 nb_split_attempts=50,
                 sampling_in_leaves_only=False,
                 min_region_size=None,
                 min_dims_range_ratio=1 / 6,
                 discard_ratio=1 / 4):

        AbstractTeacher.__init__(self, mins, maxs, env_reward_lb,
                                 env_reward_ub, seed)

        # Maximal number of (task, reward) pairs a region can hold before splitting
        self.maxlen = max_region_size

        self.alp_window = self.maxlen if alp_window_size is None else alp_window_size

        # Initialize Regions' tree
        self.tree = Tree()
        self.regions_bounds = [Box(self.mins, self.maxs, dtype=np.float32)]
        self.regions_alp = [0.]
        self.tree.create_node('root',
                              'root',
                              data=Region(maxlen=self.maxlen,
                                          r_t_pairs=[
                                              deque(maxlen=self.maxlen + 1),
                                              deque(maxlen=self.maxlen + 1)
                                          ],
                                          bounds=self.regions_bounds[-1],
                                          alp=self.regions_alp[-1]))
        self.nb_dims = len(mins)
        self.nb_split_attempts = nb_split_attempts

        # Whether task sampling uses parent and child regions (False) or only child regions (True)
        self.sampling_in_leaves_only = sampling_in_leaves_only

        # Additional tricks to original RIAC, enforcing splitting rules

        # 1 - Minimum population required for both children when splitting --> set to 1 to cancel
        self.minlen = self.maxlen / 20 if min_region_size is None else min_region_size

        # 2 - minimum children region size (compared to initial range of each dimension)
        # Set min_dims_range_ratio to 1/np.inf to cancel
        self.dims_ranges = self.maxs - self.mins
        self.min_dims_range_ratio = min_dims_range_ratio

        # 3 - If after nb_split_attempts, no split is valid, flush oldest points of parent region
        # If 1- and 2- are canceled, this will be canceled since any split will be valid
        self.discard_ratio = discard_ratio

        # book-keeping
        self.sampled_tasks = []
        self.all_boxes = []
        self.all_alps = []
        self.update_nb = -1
        self.split_iterations = []

        self.hyperparams = locals()

    def compute_alp(self, sub_region):
        if len(sub_region[0]) > 2:
            cp_window = min(len(sub_region[0]),
                            self.alp_window)  # not completely window
            half = int(cp_window / 2)
            # print(str(cp_window) + 'and' + str(half))
            first_half = np.array(sub_region[0])[-cp_window:-half]
            snd_half = np.array(sub_region[0])[-half:]
            diff = first_half.mean() - snd_half.mean()
            cp = np.abs(diff)
        else:
            cp = 0
        alp = np.abs(cp)
        return alp

    def split(self, nid):
        # Try nb_split_attempts splits on region corresponding to node <nid>
        reg = self.tree.get_node(nid).data
        best_split_score = 0
        best_bounds = None
        best_sub_regions = None
        is_split = False
        for i in range(self.nb_split_attempts):
            sub_reg1 = [
                deque(maxlen=self.maxlen + 1),
                deque(maxlen=self.maxlen + 1)
            ]
            sub_reg2 = [
                deque(maxlen=self.maxlen + 1),
                deque(maxlen=self.maxlen + 1)
            ]

            # repeat until the two sub regions contain at least minlen of the mother region
            while len(sub_reg1[0]) < self.minlen or len(
                    sub_reg2[0]) < self.minlen:
                # decide on dimension
                dim = self.random_state.choice(range(self.nb_dims))
                threshold = reg.bounds.sample()[dim]
                bounds1 = Box(reg.bounds.low,
                              reg.bounds.high,
                              dtype=np.float32)
                bounds1.high[dim] = threshold
                bounds2 = Box(reg.bounds.low,
                              reg.bounds.high,
                              dtype=np.float32)
                bounds2.low[dim] = threshold
                bounds = [bounds1, bounds2]
                valid_bounds = True

                if np.any(bounds1.high - bounds1.low < self.dims_ranges *
                          self.min_dims_range_ratio):
                    valid_bounds = False
                if np.any(bounds2.high - bounds2.low < self.dims_ranges *
                          self.min_dims_range_ratio):
                    valid_bounds = valid_bounds and False

                # perform split in sub regions
                sub_reg1 = [
                    deque(maxlen=self.maxlen + 1),
                    deque(maxlen=self.maxlen + 1)
                ]
                sub_reg2 = [
                    deque(maxlen=self.maxlen + 1),
                    deque(maxlen=self.maxlen + 1)
                ]
                for i, task in enumerate(reg.r_t_pairs[1]):
                    if bounds1.contains(task):
                        sub_reg1[1].append(task)
                        sub_reg1[0].append(reg.r_t_pairs[0][i])
                    else:
                        sub_reg2[1].append(task)
                        sub_reg2[0].append(reg.r_t_pairs[0][i])
                sub_regions = [sub_reg1, sub_reg2]

            # compute alp
            alp = [self.compute_alp(sub_reg1), self.compute_alp(sub_reg2)]

            # compute score
            split_score = len(sub_reg1) * len(sub_reg2) * np.abs(alp[0] -
                                                                 alp[1])
            if split_score >= best_split_score and valid_bounds:
                is_split = True
                best_split_score = split_score
                best_sub_regions = sub_regions
                best_bounds = bounds

        if is_split:
            # add new nodes to tree
            for i, (r_t_pairs,
                    bounds) in enumerate(zip(best_sub_regions, best_bounds)):
                self.tree.create_node(identifier=self.tree.size(),
                                      parent=nid,
                                      data=Region(self.maxlen,
                                                  r_t_pairs=r_t_pairs,
                                                  bounds=bounds,
                                                  alp=alp[i]))
        else:
            assert len(reg.r_t_pairs[0]) == (self.maxlen + 1)
            reg.r_t_pairs[0] = deque(
                islice(reg.r_t_pairs[0], int(self.maxlen * self.discard_ratio),
                       self.maxlen + 1))
            reg.r_t_pairs[1] = deque(
                islice(reg.r_t_pairs[1], int(self.maxlen * self.discard_ratio),
                       self.maxlen + 1))

        return is_split

    def add_task_reward(self, node, task, reward):
        reg = node.data
        nid = node.identifier
        if reg.bounds.contains(task):  # task falls within region
            self.nodes_to_recompute.append(nid)
            children = self.tree.children(nid)
            for n in children:  # if task in region, task is in one sub-region
                self.add_task_reward(n, task, reward)

            need_split = reg.add(task, reward, children == [])  # COPY ALL MODE
            if need_split:
                self.nodes_to_split.append(nid)

    def episodic_update(self, task, reward, is_success):
        self.update_nb += 1

        # Add new (task, reward) to regions nodes
        self.nodes_to_split = []
        self.nodes_to_recompute = []
        new_split = False
        root = self.tree.get_node('root')
        self.add_task_reward(
            root, task, reward)  # Will update self.nodes_to_split if needed
        assert len(self.nodes_to_split) <= 1

        # Split a node if needed
        need_split = len(self.nodes_to_split) == 1
        if need_split:
            new_split = self.split(self.nodes_to_split[0])  # Execute the split
            if new_split:
                # Update list of regions_bounds
                if self.sampling_in_leaves_only:
                    self.regions_bounds = [
                        n.data.bounds for n in self.tree.leaves()
                    ]
                else:
                    self.regions_bounds = [
                        n.data.bounds for n in self.tree.all_nodes()
                    ]

        # Recompute ALPs of modified nodes
        for nid in self.nodes_to_recompute:
            node = self.tree.get_node(nid)
            reg = node.data
            reg.alp = self.compute_alp(reg.r_t_pairs)

        # Collect regions data (regions' ALP and regions' (task, reward) pairs)
        all_nodes = self.tree.all_nodes(
        ) if not self.sampling_in_leaves_only else self.tree.leaves()
        self.regions_alp = []
        self.r_t_pairs = []
        for n in all_nodes:
            self.regions_alp.append(n.data.alp)
            self.r_t_pairs.append(n.data.r_t_pairs)

        # Book-keeping
        if new_split:
            self.all_boxes.append(copy.copy(self.regions_bounds))
            self.all_alps.append(copy.copy(self.regions_alp))
            self.split_iterations.append(self.update_nb)
        assert len(self.regions_alp) == len(self.regions_bounds)

        return new_split, None

    def sample_random_task(self):
        return self.regions_bounds[0].sample()  # First region is root region

    def sample_task(self):
        mode = self.random_state.rand()
        if mode < 0.1:  # "mode 3" (10%) -> sample on regions and then mutate lowest-performing task in region
            if len(self.sampled_tasks) == 0:
                self.sampled_tasks.append(self.sample_random_task())
            else:
                self.sampled_tasks.append(
                    self.non_exploratory_task_sampling()["task"])

        elif mode < 0.3:  # "mode 2" (20%) -> random task
            self.sampled_tasks.append(self.sample_random_task())

        else:  # "mode 1" (70%) -> proportional sampling on regions based on ALP and then random task in selected region
            region_id = proportional_choice(self.regions_alp,
                                            self.random_state,
                                            eps=0.0)
            self.sampled_tasks.append(self.regions_bounds[region_id].sample())

        return self.sampled_tasks[-1].astype(np.float32)

    def non_exploratory_task_sampling(self):
        # 1 - Sample region proportionally to its ALP
        region_id = proportional_choice(self.regions_alp,
                                        self.random_state,
                                        eps=0.0)

        # 2 - Retrieve (task, reward) pair with lowest reward
        worst_task_idx = np.argmin(self.r_t_pairs[region_id][0])

        # 3 - Mutate task by a small amount (using Gaussian centered on task, with 0.1 std)
        task = self.random_state.normal(
            self.r_t_pairs[region_id][1][worst_task_idx].copy(), 0.1)
        # clip to stay within region (add small epsilon to avoid falling in multiple regions)
        task = np.clip(task, self.regions_bounds[region_id].low + 1e-5,
                       self.regions_bounds[region_id].high - 1e-5)
        return {
            "task": task,
            "infos": {
                "bk_index": len(self.all_boxes) - 1,
                "task_infos": region_id
            }
        }

    def dump(self, dump_dict):
        dump_dict['all_boxes'] = self.all_boxes
        dump_dict['split_iterations'] = self.split_iterations
        dump_dict['all_alps'] = self.all_alps
        # dump_dict['riac_params'] = self.hyperparams
        return dump_dict

    @property
    def nb_regions(self):
        return len(self.regions_bounds)

    @property
    def get_regions(self):
        return self.regions_bounds
Beispiel #33
0
def use_hyp(word2syn, output, data):
    un_change = []
    dic = Tree()
    dic.create_node("100001740", "100001740")
    add = -1
    while add != 0:
        add = 0
        f = open(datapath + "wn_hyp.pl", "r")
        while True:
            line = f.readline()
            if not line:
                break
            else:
                l, r = re.findall('\d+', line)
                try:
                    dic.create_node(l, l, parent=r)
                    add += 1
                except:
                    pass
        print(dic.size())
    entail = defaultdict(list)
    for n in dic.all_nodes():
        for m in dic.subtree(n.tag).all_nodes():
            if m.tag != n.tag:
                entail[n.tag].append(m.tag)
    label = set()
    for d in data:
        d0 = d[0]
        d1 = d[1]
        if p.singular_noun(d[0]) != False:
            d0 = p.singular_noun(d[0])
        if p.singular_noun(d[1]) != False:
            d1 = p.singular_noun(d[1])
        for i in word2syn[d0]:
            for j in word2syn[d1]:
                if j in entail[i]:
                    if d[0] + "\t" + ">" + "\t" + d[1] not in output:
                        output += [d[0] + "\t" + ">" + "\t" + d[1]]
                        label.add(d)
                elif i in entail[j]:
                    if d[0] + "\t" + "<" + "\t" + d[1] not in output:
                        output += [d[0] + "\t" + "<" + "\t" + d[1]]
                        label.add(d)
        if d not in un_change and d not in label:
            un_change += [d]
    print("before single: " + str(len(data)) + " after: " +
          str(len(un_change)))
    output += ["\n"]
    del entail
    data = un_change
    del un_change
    un_change = []
    alter = defaultdict(list)
    for n in dic.all_nodes():
        for m in dic.siblings(n.tag):
            if m.tag != n.tag and n.bpointer != m.tag:
                alter[n.tag].append(m.tag)
    label = set()
    for d in data:
        d0 = d[0]
        d1 = d[1]
        if p.singular_noun(d[0]) != False:
            d0 = p.singular_noun(d[0])
        if p.singular_noun(d[1]) != False:
            d1 = p.singular_noun(d[1])
        for i in word2syn[d0]:
            for j in word2syn[d1]:
                if j in alter[i]:
                    if d[0] + "\t" + "|" + "\t" + d[1] not in output:
                        output += [d[0] + "\t" + "|" + "\t" + d[1]]
                        label.add(d)
                elif i in alter[j]:
                    if d[0] + "\t" + "|" + "\t" + d[1] not in output:
                        output += [d[0] + "\t" + "|" + "\t" + d[1]]
                        label.add(d)
        if d not in un_change and d not in label:
            un_change += [d]
    del alter
    print("before single: " + str(len(data)) + " after: " +
          str(len(un_change)))
    output += ["\n"]
    return output, un_change