Example #1
0
    def et_define_branch(self, xml_node, tree, tree_parent, a_dict):
        """
        Parses a branching data structure by calling ``parse_et_fork``.
        """

        subtree = Tree()
        parent = subtree.root

        for e in xml_node:
            self.parse_et_fork(e, subtree, parent, a_dict)

        if subtree.size() == 0:
            raise RuntimeError('Event tree branch contains no data')

        a_dict[xml_node.get('name')] = subtree
Example #2
0
class RIAC(AbstractTeacher):
    def __init__(self,
                 mins,
                 maxs,
                 seed,
                 env_reward_lb,
                 env_reward_ub,
                 max_region_size=200,
                 alp_window_size=None,
                 nb_split_attempts=50,
                 sampling_in_leaves_only=False,
                 min_region_size=None,
                 min_dims_range_ratio=1 / 6,
                 discard_ratio=1 / 4):

        AbstractTeacher.__init__(self, mins, maxs, env_reward_lb,
                                 env_reward_ub, seed)

        # Maximal number of (task, reward) pairs a region can hold before splitting
        self.maxlen = max_region_size

        self.alp_window = self.maxlen if alp_window_size is None else alp_window_size

        # Initialize Regions' tree
        self.tree = Tree()
        self.regions_bounds = [Box(self.mins, self.maxs, dtype=np.float32)]
        self.regions_alp = [0.]
        self.tree.create_node('root',
                              'root',
                              data=Region(maxlen=self.maxlen,
                                          r_t_pairs=[
                                              deque(maxlen=self.maxlen + 1),
                                              deque(maxlen=self.maxlen + 1)
                                          ],
                                          bounds=self.regions_bounds[-1],
                                          alp=self.regions_alp[-1]))
        self.nb_dims = len(mins)
        self.nb_split_attempts = nb_split_attempts

        # Whether task sampling uses parent and child regions (False) or only child regions (True)
        self.sampling_in_leaves_only = sampling_in_leaves_only

        # Additional tricks to original RIAC, enforcing splitting rules

        # 1 - Minimum population required for both children when splitting --> set to 1 to cancel
        self.minlen = self.maxlen / 20 if min_region_size is None else min_region_size

        # 2 - minimum children region size (compared to initial range of each dimension)
        # Set min_dims_range_ratio to 1/np.inf to cancel
        self.dims_ranges = self.maxs - self.mins
        self.min_dims_range_ratio = min_dims_range_ratio

        # 3 - If after nb_split_attempts, no split is valid, flush oldest points of parent region
        # If 1- and 2- are canceled, this will be canceled since any split will be valid
        self.discard_ratio = discard_ratio

        # book-keeping
        self.sampled_tasks = []
        self.all_boxes = []
        self.all_alps = []
        self.update_nb = -1
        self.split_iterations = []

        self.hyperparams = locals()

    def compute_alp(self, sub_region):
        if len(sub_region[0]) > 2:
            cp_window = min(len(sub_region[0]),
                            self.alp_window)  # not completely window
            half = int(cp_window / 2)
            # print(str(cp_window) + 'and' + str(half))
            first_half = np.array(sub_region[0])[-cp_window:-half]
            snd_half = np.array(sub_region[0])[-half:]
            diff = first_half.mean() - snd_half.mean()
            cp = np.abs(diff)
        else:
            cp = 0
        alp = np.abs(cp)
        return alp

    def split(self, nid):
        # Try nb_split_attempts splits on region corresponding to node <nid>
        reg = self.tree.get_node(nid).data
        best_split_score = 0
        best_bounds = None
        best_sub_regions = None
        is_split = False
        for i in range(self.nb_split_attempts):
            sub_reg1 = [
                deque(maxlen=self.maxlen + 1),
                deque(maxlen=self.maxlen + 1)
            ]
            sub_reg2 = [
                deque(maxlen=self.maxlen + 1),
                deque(maxlen=self.maxlen + 1)
            ]

            # repeat until the two sub regions contain at least minlen of the mother region
            while len(sub_reg1[0]) < self.minlen or len(
                    sub_reg2[0]) < self.minlen:
                # decide on dimension
                dim = self.random_state.choice(range(self.nb_dims))
                threshold = reg.bounds.sample()[dim]
                bounds1 = Box(reg.bounds.low,
                              reg.bounds.high,
                              dtype=np.float32)
                bounds1.high[dim] = threshold
                bounds2 = Box(reg.bounds.low,
                              reg.bounds.high,
                              dtype=np.float32)
                bounds2.low[dim] = threshold
                bounds = [bounds1, bounds2]
                valid_bounds = True

                if np.any(bounds1.high - bounds1.low < self.dims_ranges *
                          self.min_dims_range_ratio):
                    valid_bounds = False
                if np.any(bounds2.high - bounds2.low < self.dims_ranges *
                          self.min_dims_range_ratio):
                    valid_bounds = valid_bounds and False

                # perform split in sub regions
                sub_reg1 = [
                    deque(maxlen=self.maxlen + 1),
                    deque(maxlen=self.maxlen + 1)
                ]
                sub_reg2 = [
                    deque(maxlen=self.maxlen + 1),
                    deque(maxlen=self.maxlen + 1)
                ]
                for i, task in enumerate(reg.r_t_pairs[1]):
                    if bounds1.contains(task):
                        sub_reg1[1].append(task)
                        sub_reg1[0].append(reg.r_t_pairs[0][i])
                    else:
                        sub_reg2[1].append(task)
                        sub_reg2[0].append(reg.r_t_pairs[0][i])
                sub_regions = [sub_reg1, sub_reg2]

            # compute alp
            alp = [self.compute_alp(sub_reg1), self.compute_alp(sub_reg2)]

            # compute score
            split_score = len(sub_reg1) * len(sub_reg2) * np.abs(alp[0] -
                                                                 alp[1])
            if split_score >= best_split_score and valid_bounds:
                is_split = True
                best_split_score = split_score
                best_sub_regions = sub_regions
                best_bounds = bounds

        if is_split:
            # add new nodes to tree
            for i, (r_t_pairs,
                    bounds) in enumerate(zip(best_sub_regions, best_bounds)):
                self.tree.create_node(identifier=self.tree.size(),
                                      parent=nid,
                                      data=Region(self.maxlen,
                                                  r_t_pairs=r_t_pairs,
                                                  bounds=bounds,
                                                  alp=alp[i]))
        else:
            assert len(reg.r_t_pairs[0]) == (self.maxlen + 1)
            reg.r_t_pairs[0] = deque(
                islice(reg.r_t_pairs[0], int(self.maxlen * self.discard_ratio),
                       self.maxlen + 1))
            reg.r_t_pairs[1] = deque(
                islice(reg.r_t_pairs[1], int(self.maxlen * self.discard_ratio),
                       self.maxlen + 1))

        return is_split

    def add_task_reward(self, node, task, reward):
        reg = node.data
        nid = node.identifier
        if reg.bounds.contains(task):  # task falls within region
            self.nodes_to_recompute.append(nid)
            children = self.tree.children(nid)
            for n in children:  # if task in region, task is in one sub-region
                self.add_task_reward(n, task, reward)

            need_split = reg.add(task, reward, children == [])  # COPY ALL MODE
            if need_split:
                self.nodes_to_split.append(nid)

    def episodic_update(self, task, reward, is_success):
        self.update_nb += 1

        # Add new (task, reward) to regions nodes
        self.nodes_to_split = []
        self.nodes_to_recompute = []
        new_split = False
        root = self.tree.get_node('root')
        self.add_task_reward(
            root, task, reward)  # Will update self.nodes_to_split if needed
        assert len(self.nodes_to_split) <= 1

        # Split a node if needed
        need_split = len(self.nodes_to_split) == 1
        if need_split:
            new_split = self.split(self.nodes_to_split[0])  # Execute the split
            if new_split:
                # Update list of regions_bounds
                if self.sampling_in_leaves_only:
                    self.regions_bounds = [
                        n.data.bounds for n in self.tree.leaves()
                    ]
                else:
                    self.regions_bounds = [
                        n.data.bounds for n in self.tree.all_nodes()
                    ]

        # Recompute ALPs of modified nodes
        for nid in self.nodes_to_recompute:
            node = self.tree.get_node(nid)
            reg = node.data
            reg.alp = self.compute_alp(reg.r_t_pairs)

        # Collect regions data (regions' ALP and regions' (task, reward) pairs)
        all_nodes = self.tree.all_nodes(
        ) if not self.sampling_in_leaves_only else self.tree.leaves()
        self.regions_alp = []
        self.r_t_pairs = []
        for n in all_nodes:
            self.regions_alp.append(n.data.alp)
            self.r_t_pairs.append(n.data.r_t_pairs)

        # Book-keeping
        if new_split:
            self.all_boxes.append(copy.copy(self.regions_bounds))
            self.all_alps.append(copy.copy(self.regions_alp))
            self.split_iterations.append(self.update_nb)
        assert len(self.regions_alp) == len(self.regions_bounds)

        return new_split, None

    def sample_random_task(self):
        return self.regions_bounds[0].sample()  # First region is root region

    def sample_task(self):
        mode = self.random_state.rand()
        if mode < 0.1:  # "mode 3" (10%) -> sample on regions and then mutate lowest-performing task in region
            if len(self.sampled_tasks) == 0:
                self.sampled_tasks.append(self.sample_random_task())
            else:
                self.sampled_tasks.append(
                    self.non_exploratory_task_sampling()["task"])

        elif mode < 0.3:  # "mode 2" (20%) -> random task
            self.sampled_tasks.append(self.sample_random_task())

        else:  # "mode 1" (70%) -> proportional sampling on regions based on ALP and then random task in selected region
            region_id = proportional_choice(self.regions_alp,
                                            self.random_state,
                                            eps=0.0)
            self.sampled_tasks.append(self.regions_bounds[region_id].sample())

        return self.sampled_tasks[-1].astype(np.float32)

    def non_exploratory_task_sampling(self):
        # 1 - Sample region proportionally to its ALP
        region_id = proportional_choice(self.regions_alp,
                                        self.random_state,
                                        eps=0.0)

        # 2 - Retrieve (task, reward) pair with lowest reward
        worst_task_idx = np.argmin(self.r_t_pairs[region_id][0])

        # 3 - Mutate task by a small amount (using Gaussian centered on task, with 0.1 std)
        task = self.random_state.normal(
            self.r_t_pairs[region_id][1][worst_task_idx].copy(), 0.1)
        # clip to stay within region (add small epsilon to avoid falling in multiple regions)
        task = np.clip(task, self.regions_bounds[region_id].low + 1e-5,
                       self.regions_bounds[region_id].high - 1e-5)
        return {
            "task": task,
            "infos": {
                "bk_index": len(self.all_boxes) - 1,
                "task_infos": region_id
            }
        }

    def dump(self, dump_dict):
        dump_dict['all_boxes'] = self.all_boxes
        dump_dict['split_iterations'] = self.split_iterations
        dump_dict['all_alps'] = self.all_alps
        # dump_dict['riac_params'] = self.hyperparams
        return dump_dict

    @property
    def nb_regions(self):
        return len(self.regions_bounds)

    @property
    def get_regions(self):
        return self.regions_bounds
Example #3
0
class Blockchain(object):
    def __init__(self, genesis):
        # TODO: figure out if genesis should be passed in or created here
        # self.tinput = tinput
        self.blockCount = 0
        self.blockchain = Tree()
        self.genesis = genesis
        self.addGenesisBlock(genesis)  #Add the genesis block to chain

    def addGenesisBlock(self, genesis):
        self.blockchain.create_node("Genesis Block" + " ID: " +
                                    genesis.proofOfWork[:12],
                                    genesis.proofOfWork,
                                    data=genesis)

    def printBlockchain(self):
        self.blockchain.show()

    def addBlock(self, block):
        # TODO: run proof of work verification before adding block
        # Add block to chain & return true if POW valid
        # Else return false
        self.blockCount += 1
        self.blockchain.create_node("Block " + str(self.blockCount) + " ID: " +
                                    block.proofOfWork[:12],
                                    block.proofOfWork,
                                    parent=block.prevBlockHash,
                                    data=block)

    def getGenesisID(self):
        return self.blockchain.root

    def getLongestChainBlocks(self):
        allNodes = self.blockchain.all_nodes()
        forkNum = 0  #number of leaves at longest branch
        treeDepth = self.blockchain.depth()
        longestPathLeaves = [
        ]  #WIll hold leaves with treeDepth depth ie longest branch(es)
        for node in allNodes:
            currentDepth = self.blockchain.depth(node)
            if (currentDepth == treeDepth):
                forkNum += 1
                longestPathLeaves.append(node)

        return forkNum, longestPathLeaves

    def blockchainLength(self):
        # returns the depth of the tree ie the length of
        #  the longest chain
        return self.blockchain.depth()

    def numBlocks(self):
        return self.blockchain.size()

    def printChain(self, chain):
        chain.show(data_property="humanID")

    def tailBlocks(self, chain):
        leaves = chain.leaves()
        print("Num leaves" + str(len(leaves)))
        print(leaves)

    def checkBlock(self):
        # Check the proof work work
        # return true if proof of work is valid
        # else rerturn false
        print("printing block")

    def createBlockchainGraph(self, outfilename):
        print("creating graph")
        self.blockchain.to_graphviz(filename=outfilename + '.gv',
                                    shape=u'box',
                                    graph=u'digraph')
        g = Source.from_file(outfilename + '.gv')
        g.render()

    def createBlockchainImg(self, outfilename):
        print("creating graph")
        self.blockchain.to_graphviz(filename=outfilename + '.gv',
                                    shape=u'box',
                                    graph=u'digraph')
        g = Source.from_file(outfilename + '.png')
        g.render()
Example #4
0
class StepParse:
    def __init__(self):
        pass

    def load_step(self, step_filename):

        self.nauo_lines = []
        self.prod_def_lines = []
        self.prod_def_form_lines = []
        self.prod_lines = []
        self.filename = os.path.splitext(step_filename)[0]

        line_hold = ''
        line_type = ''

        # Find all search lines
        with open(step_filename) as f:
            for line in f:
                # TH: read pointer of lines as they are read, so if the file has text wrap it will notice and add it to the following lines
                index = re.search("#(.*)=", line)
                if index:
                    # TH: if not none then it is the start of a line so read it
                    # want to hold line until it has checked next line
                    # if next line is a new indexed line then save previous line
                    if line_hold:
                        if line_type == 'nauo':
                            self.nauo_lines.append(line_hold)
                        elif line_type == 'prod_def':
                            self.prod_def_lines.append(line_hold)
                        elif line_type == 'prod_def_form':
                            self.prod_def_form_lines.append(line_hold)
                        elif line_type == 'prod':
                            self.prod_lines.append(line_hold)
                        line_hold = ''
                        line_type = ''

                    prev_index = True  # TH remember previous line had an index
                    if 'NEXT_ASSEMBLY_USAGE_OCCURRENCE' in line:
                        line_hold = line.rstrip()
                        line_type = 'nauo'
                    elif ('PRODUCT_DEFINITION ' in line
                          or 'PRODUCT_DEFINITION(' in line):
                        line_hold = line.rstrip()
                        line_type = 'prod_def'
                    elif 'PRODUCT_DEFINITION_FORMATION' in line:
                        line_hold = line.rstrip()
                        line_type = 'prod_def_form'
                    elif ('PRODUCT ' in line or 'PRODUCT(' in line):
                        line_hold = line.rstrip()
                        line_type = 'prod'
                else:
                    prev_index = False
                    #TH: if end of file and previous line was held
                    if 'ENDSEC;' in line:
                        if line_hold:
                            if line_type == 'nauo':
                                self.nauo_lines.append(line_hold)
                            elif line_type == 'prod_def':
                                self.prod_def_lines.append(line_hold)
                            elif line_type == 'prod_def_form':
                                self.prod_def_form_lines.append(line_hold)
                            elif line_type == 'prod':
                                self.prod_lines.append(line_hold)
                            line_hold = ''
                            line_type = ''
                    else:
                        #TH: if not end of file
                        line_hold = line_hold + line.rstrip()

        self.nauo_refs = []
        self.prod_def_refs = []
        self.prod_def_form_refs = []
        self.prod_refs = []

        # TH: added 'replace(","," ").' to replace ',' with a space to make the spilt easier if there are not spaces inbetween the words'
        # Find all (# hashed) line references and product names
        # TH: it might be worth finding a different way of extracting data we do want rather than fixes to get rid of the data we don't
        for j, el_ in enumerate(self.nauo_lines):
            self.nauo_refs.append([
                el.rstrip(',')
                for el in el_.replace(",", " ").replace("=", " ").split()
                if el.startswith('#')
            ])
        for j, el_ in enumerate(self.prod_def_lines):
            self.prod_def_refs.append([
                el.rstrip(',')
                for el in el_.replace(",", " ").replace("=", " ").split()
                if el.startswith('#')
            ])
        for j, el_ in enumerate(self.prod_def_form_lines):
            self.prod_def_form_refs.append([
                el.rstrip(',')
                for el in el_.replace(",", " ").replace("=", " ").split()
                if el.startswith('#')
            ])
        for j, el_ in enumerate(self.prod_lines):
            self.prod_refs.append([
                el.strip(',') for el in el_.replace(",", " ").replace(
                    "(", " ").replace("=", " ").split() if el.startswith('#')
            ])
            self.prod_refs[j].append(el_.split("'")[1])

        # Get first two items in each sublist (as third is shape ref)
        #
        # First item is 'PRODUCT_DEFINITION' ref
        # Second item is 'PRODUCT_DEFINITION_FORMATION <etc>' ref
        self.prod_all_refs = [el[:2] for el in self.prod_def_refs]

        # Match up all references down to level of product name
        for j, el_ in enumerate(self.prod_all_refs):

            # Add 'PRODUCT_DEFINITION' ref
            for i, el in enumerate(self.prod_def_form_refs):
                if el[0] == el_[1]:
                    el_.append(el[1])
                    break

            # Add names from 'PRODUCT_DEFINITION' lines
            for i, el in enumerate(self.prod_refs):
                if el[0] == el_[2]:
                    el_.append(el[2])
                    break

        # Find all parent and child relationships (3rd and 2nd item in each sublist)
        self.parent_refs = [el[1] for el in self.nauo_refs]
        self.child_refs = [el[2] for el in self.nauo_refs]

        # Find distinct parts and assemblies via set operations; returns list, so no repetition of items
        self.all_type_refs = set(self.child_refs) | set(self.parent_refs)
        self.ass_type_refs = set(self.parent_refs)
        self.part_type_refs = set(self.child_refs) - set(self.parent_refs)
        #TH: find root node
        self.root_type_refs = set(self.parent_refs) - set(self.child_refs)

        # Create simple parts dictionary (ref + label)
        self.part_dict = {el[0]: el[3] for el in self.prod_all_refs}
#        self.part_dict_inv = {el[3]:el[0] for el in self.prod_all_refs}

    def show_values(self):
        # TH: basic testing, if needed these could be spilt up
        print(self.nauo_lines)
        print(self.prod_def_lines)
        print(self.prod_def_form_lines)
        print(self.prod_lines)
        print(self.nauo_refs)
        print(self.prod_def_refs)
        print(self.prod_def_form_refs)
        print(self.prod_refs)

#    HR: "create_dict" replaced by list comprehension elsewhere
#
#    def create_dict(self):
#
#        # TH: links nauo number with a name and creates dict
#        self.part_dict  = {}
#        for part in self.all_type_refs:
#            for sublist in self.prod_def_refs:
#                if sublist[0] == part:
#                    prod_loc = '#' + re.findall('\d+',sublist[1])[0]
#                    pass
#            for sublist in self.prod_def_form_refs:
#                if sublist[0] == prod_loc:
#                    prod_loc = '#' + str(re.findall('\d+',sublist[1])[0])
#                    pass
#            for sublist in self.prod_refs:
#                if sublist[0] == prod_loc:
#                    part_name = sublist[2]
#
#            self.part_dict[part] = part_name

    def create_tree(self):

        #TH: create tree diagram in newick format
        #TH: find root node

        self.tree = Tree()
        #TH: check if there are any parts to make a tree from, if not don't bother
        if self.part_dict == {}:
            return

        root_node_ref = list(self.root_type_refs)[0]
        # HR added part reference as data for later use
        self.tree.create_node(self.part_dict[root_node_ref],
                              0,
                              data={'ref': root_node_ref})

        #TH: created root node now fill in next layer
        #TH: create dict for tree, as each node needs a unique name
        i = [0]  # Iterates through nodes
        self.tree_dict = {}
        self.tree_dict[i[0]] = root_node_ref

        def tree_next_layer(self, parent):
            root_node = self.tree_dict[i[0]]
            for line in self.nauo_refs:
                if line[1] == root_node:
                    i[0] += 1
                    self.tree_dict[i[0]] = str(line[2])
                    # HR added part reference as data for later use
                    self.tree.create_node(self.part_dict[line[2]],
                                          i[0],
                                          parent=parent,
                                          data={'ref': str(line[2])})
                    tree_next_layer(self, i[0])

        tree_next_layer(self, 0)
        self.appended = False

        self.get_levels()

    def get_levels(self):

        # Initialise dict and get first level (leaves)
        self.levels = {}
        self.levels_set_p = set()
        self.levels_set_a = set()
        self.leaf_ids = [el.identifier for el in self.tree.leaves()]
        self.all_ids = [el for el in self.tree.nodes]
        self.non_leaf_ids = set(self.all_ids) - set(self.leaf_ids)

        self.part_level = 1

        def do_level(self, tree_level):
            # Get all nodes within this level
            node_ids = [
                el for el in self.tree.nodes
                if self.tree.level(el) == tree_level
            ]
            for el in node_ids:
                # If leaf, then n_p = 1 and n_a = 1
                if el in self.leaf_ids:
                    self.levels[el] = {}
                    self.levels[el]['n_p'] = self.part_level
                    self.levels[el]['n_a'] = self.part_level
                # If assembly, then get all children and sum all parts + assemblies
                else:
                    # Get all children of node and sum levels
                    child_ids = self.tree.is_branch(el)
                    child_sum_p = 0
                    child_sum_a = 0
                    for el_ in child_ids:
                        child_sum_p += self.levels[el_]['n_p']
                        child_sum_a += self.levels[el_]['n_a']
                    self.levels[el] = {}
                    self.levels[el]['n_p'] = child_sum_p
                    self.levels[el]['n_a'] = child_sum_a + 1
                    self.levels_set_p.add(child_sum_p)
                    self.levels_set_a.add(child_sum_a + 1)

        # Go up through tree levels and populate lattice level dict
        for i in range(self.tree.depth(), -1, -1):
            do_level(self, i)

        self.create_lattice()

        self.levels_p_sorted = sorted(list(self.levels_set_p))
        self.levels_a_sorted = sorted(list(self.levels_set_a))

        # Function to return dictionary of item IDs for each lattice level
        def get_levels_inv(list_in, key):

            #Initialise
            levels_inv = {}
            levels_inv[self.part_level] = []
            for el in list_in:
                levels_inv[el] = []
            for k, v in self.levels.items():
                levels_inv[v[key]].append(k)

            return levels_inv

        self.levels_p_inv = get_levels_inv(self.levels_p_sorted, 'n_p')
        self.levels_a_inv = get_levels_inv(self.levels_a_sorted, 'n_a')

    def get_all_children(self, id_):

        ancestors = [el.identifier for el in self.tree.children(id_)]
        parents = ancestors
        while parents:
            children = []
            for parent in parents:
                children = [el.identifier for el in self.tree.children(parent)]
                ancestors.extend(children)
                parents = children
        return ancestors

    def create_lattice(self):

        # Create lattice
        self.g = nx.DiGraph()
        self.default_colour = 'r'
        # Get root node and set parent to -1 to maintain data type of "parent"
        # Set position to top/middle
        node_id = self.tree.root
        label_text = self.tree.get_node(node_id).tag
        self.g.add_node(node_id,
                        parent=-1,
                        label=label_text,
                        colour=self.default_colour)

        # Do nodes from treelib "nodes" dictionary
        for key in self.tree.nodes:
            # Exclude root
            if key != self.tree.root:
                parent_id = self.tree.parent(key).identifier
                label_text = self.tree.get_node(key).tag
                # Node IDs same as for tree
                self.g.add_node(key,
                                parent=parent_id,
                                label=label_text,
                                colour=self.default_colour)

        # Do edges from nodes
        for key in self.tree.nodes:
            # Exclude root
            if key != self.tree.root:
                parent_id = self.tree.parent(key).identifier
                self.g.add_edge(key, parent_id)

        # Escape if only one node
        # HR 6/3/20 QUICK BUG FIX: SINGLE-NODE TREE DOES NOT PLOT
        # IMPROVE LATER; SHOULD BE PART OF A GENERAL METHOD
        if self.tree.size() == 1:
            id_ = [el.identifier for el in self.tree.leaves()]
            self.g.nodes[id_[-1]]['pos'] = (0, 0)
            return

        # Get set of parents of leaf nodes
        leaf_parents = set(
            [self.tree.parent(el).identifier for el in self.leaf_ids])

        # For each leaf_parent, set position of leaf nodes sequentially
        i = 0
        no_leaves = len(self.tree.leaves())
        for el in leaf_parents:
            for el_ in self.tree.is_branch(el):
                child_ids = [el.identifier for el in self.tree.leaves()]
                if el_ in child_ids:
                    self.g.nodes[el_]['pos'] = ((i / (no_leaves)), 1)
                    i += 1

        # To set plot positions of nodes from lattice levels
        # ---
        # Traverse upwards from leaves
        for el in sorted(list(self.levels_set_a)):
            # Get all nodes at that level
            node_ids = [k for k, v in self.levels.items() if v['n_a'] == el]
            # Get all positions of children of that node
            # and set position as mean value of them
            for el_ in node_ids:
                child_ids = self.tree.is_branch(el_)
                pos_sum = 0
                for el__ in child_ids:
                    pos_ = self.g.nodes[el__]['pos'][0]
                    pos_sum += pos_
                pos_sum = pos_sum / len(child_ids)
                self.g.nodes[el_]['pos'] = (pos_sum, el)

    def print_tree(self):

        try:
            self.tree.show()
        except:
            self.create_tree()
            self.tree.show()

    def tree_to_json(self, save_to_file=False, filename='file', path=''):

        #TH: return json format tree, can also save to file
        if self.tree.size() != 0:
            data = self.tree.to_json()
            j = json.loads(data)
            if save_to_file == True:
                if path:
                    file_path = os.path.join(path, filename)
                else:
                    file_path = filename

                with open(file_path + '.json', 'w') as outfile:
                    json.dump(j, outfile)

            return data
        else:
            print("no tree to print")
            return
Example #5
0
def pytree(start_path: str = '.',
           include_files: bool = True,
           include_sizes: bool = False,
           include_counts: bool = False,
           specific_extension: str or None = None,
           force_absolute_ids: bool = True
           ) -> None:
    """
    Returns a `treelib.Tree` representing the filesystem under `start_path`.
    You can then print the `Tree` object using `tree.show()`
    :param start_path: String. Represents an absolute or relative path.
    :param include_files: Boolean. Indicates whether to also include the files in the tree.
    :param include_sizes: Boolean. Indicates whether or not tree should display file and folder sizes, in megabytes.
    :param include_counts: Boolean. Indicates whether or not tree should display file and folder counts.
    :param specific_extension: String. Represents a specific file extension to be searched.
    :param force_absolute_ids: Boolean. Indicates whether ids should be absolute. They will
    be relative if start_path is relative, and absolute otherwise.
    """
    # creating tree instance
    tree = Tree()
    first = True

    # getting dirs and files
    all_files_and_folders = os.walk(start_path)

    # starting dirs, files and size count
    total_dirs_num = 0
    total_files_num = 0
    total_disk_size = 0

    # iterating over dirs and files
    for root, _, files in all_files_and_folders:
        p_root = Path(root)
        if first:
            parent_id = None
            first = False
        else:
            parent = p_root.parent
            parent_id = parent.absolute() if force_absolute_ids else parent

        # getting absolute path
        abs_path = p_root.absolute()

        # getting root id
        p_root_id = abs_path if force_absolute_ids else p_root

        # getting dir name
        dir_name = (p_root.name if p_root.name != "" else ".")
        dir_name += '/'

        # coloring dir string
        colored_text_string = f"\033[0;34;42m{dir_name}"

        # recoloring to white so that it doesn't affect other nodes
        colored_text_string += f"\033[0;37;40m"

        # getting number of files and folders inside directory
        current_dir_file_and_folder_count = get_number_of_files_inside_folder(path_to_folder=abs_path)

        # adding count to dir name
        if include_counts:
            colored_text_string += f' [{current_dir_file_and_folder_count}]'

        # adding dir size to name
        if include_sizes:
            dir_size_in_bytes = get_folder_size_in_bytes(path_to_folder=abs_path)
            adjusted_dir_size = get_adjusted_file_size(file_size_in_bytes=dir_size_in_bytes)
            colored_text_string += f' ({adjusted_dir_size})'

        # creating folder node
        tree.create_node(tag=colored_text_string,
                         identifier=p_root_id,
                         parent=parent_id)

        # increasing total dirs count
        total_dirs_num += 1

        # iterating over files
        for file in files:
            # getting file name
            f_id = p_root_id / file
            file_name = f_id.name

            # checking if user has passed specific extension
            if specific_extension is not None:
                # checking if current file is of specified extension
                if not file.endswith(specific_extension):
                    continue

            # adding file size to name
            if include_sizes:
                file_size_in_bytes = get_file_size_in_bytes(file_path=f_id)
                adjusted_file_size = get_adjusted_file_size(file_size_in_bytes=file_size_in_bytes)
                file_name += f' ({adjusted_file_size})'

            # creating file node
            if include_files:
                tree.create_node(tag=file_name,
                                 identifier=f_id,
                                 parent=p_root_id)

            # increasing total files count
            total_files_num += 1

    # getting dirs and files string

    # checking dirs num
    if total_dirs_num == 1:
        dirs_string = 'directory'
    else:
        dirs_string = 'directories'

    # checking files num
    if total_files_num == 1:
        files_string = 'file'
    else:
        files_string = 'files'

    # defining dirs and files string
    dirs_and_files_string = f'{total_dirs_num - 1} {dirs_string}, {total_files_num} {files_string}'

    # adding full size
    if include_sizes:
        full_size = get_folder_size_in_bytes(path_to_folder=start_path)
        adjusted_full_size = get_adjusted_file_size(file_size_in_bytes=full_size)
        full_size_string = f', {adjusted_full_size}'
        dirs_and_files_string += full_size_string

    # getting tree size
    size = tree.size()

    # checking if tree is empty
    if size == 0:
        # printing invalid input message
        print('Invalid input. Must be a directory.\nPlease check input and try again.')
    else:
        # displaying tree
        print(tree)
        print(dirs_and_files_string)
Example #6
0
class Match_base:
    def __init__(self):
        self.token_list = None
        self.index = 0
        self.token = ''
        self.token_node = None
        self.tree = Tree()
        self.anls_proc = []
        self.res = True
        # self.info = []
        self.info = ''

    def set_tokenList(self, token_list):
        self.token_list = token_list
        self.index = 0
        self.token = self.token_list[self.index].tag
        self.token_node = self.token_list[self.index]
        self.tree = Tree()
        self.anls_proc = []
        self.res = True
        # self.info = []
        self.info = ''

    def get_next(self, parent):
        tmp = self.index - len(self.anls_proc)
        if tmp < 0:
            tmp = 0
            self.index += 1
        for i in range(tmp + 1):
            if self.index - tmp + i < len(self.token_list):
                self.anls_proc.append(self.token_list[self.index - tmp +
                                                      i].tag)
        if self.token is not None:
            self.tree.create_node(tag=self.token,
                                  identifier=str(uuid.uuid1()),
                                  parent=parent)

        if self.index >= len(self.token_list) - 1:
            self.index += 1
            self.token = '#'
            self.anls_proc.append(self.token)
            return self.token
        else:
            self.index += 1
            self.token = self.token_list[self.index].tag
            self.token_node = self.token_list[self.index]
            return self.token

    def reset_token(self, re_num=-1):
        if re_num == -1:
            self.index = 0
            self.anls_proc.clear()
            self.token = self.token_list[self.index].tag
            self.token_node = self.token_list[self.index]
        else:
            self.index -= re_num
            for i in range(re_num):
                self.anls_proc.pop(len(self.anls_proc) - 1)
            self.token = self.token_list[self.index].tag
            self.token_node = self.token_list[self.index]

    def creat_node(self, name, parent):
        iid = str(uuid.uuid1())
        if self.tree.size() == 0:
            self.tree.create_node(tag='{}'.format(name), identifier=iid)
        else:
            self.tree.create_node(tag='{}'.format(name),
                                  identifier=iid,
                                  parent=parent)
        return iid

    def func_main(self, parent):
        return False

    def is_var(self):
        res = self.token.isidentifier()
        if self.token in {
                "void", "main", "short", "long", "int", "double", "float",
                "while", "if", "else", "for", "break", "return"
        }:
            res = False
        return res

    def is_const(self):
        return self.token.isdigit()

    def run(self, flag):
        self.res = self.func_main('root')
        if self.res is True:
            if len(self.token_list) > len(self.anls_proc):
                self.info = 'error: {}, token: {}, row: {}, col: {}\n'.format(
                    'unmatched char', self.token_node.tag, self.token_node.row,
                    self.token_node.col)
                if flag:
                    self.res = False
        if self.index == 0:
            self.index += 1
        if len(self.info) == 0:
            self.info = 'all ok'
        return self.res, self.index - 1, self.tree, self.info

    def create_dotPic(self, root_dir):
        if not os.path.exists(root_dir):
            os.makedirs(root_dir)
        self.tree.to_graphviz(filename='{}/tree.dot'.format(root_dir))
        string = open('{}/tree.dot'.format(root_dir)).read()
        dot = graphviz.Source(string)
        dot.render('{}/tree'.format(root_dir), format='png')
Example #7
0
class StepParse:
    def __init__(self):
        pass

    def load_step(self, step_filename):
        self.nauo_lines = []
        self.prod_def_lines = []
        self.prod_def_form_lines = []
        self.prod_lines = []
        self.filename = os.path.splitext(step_filename)[0]

        line_hold = ''
        line_type = ''

        # Find all search lines
        with open(step_filename) as f:
            for line in f:
                # TH: read pointer of lines as they are read, so if the file has text wrap it will notice and add it to the following lines
                index = re.search("#(.*)=", line)
                if index:
                    # TH: if not none then it is the start of a line so read it
                    # want to hold line until it has checked next line
                    # if next line is a new indexed line then save previous line
                    if line_hold:
                        if line_type == 'nauo':
                            self.nauo_lines.append(line_hold)
                        elif line_type == 'prod_def':
                            self.prod_def_lines.append(line_hold)
                        elif line_type == 'prod_def_form':
                            self.prod_def_form_lines.append(line_hold)
                        elif line_type == 'prod':
                            self.prod_lines.append(line_hold)
                        line_hold = ''
                        line_type = ''

                    prev_index = True  # TH rememeber previous line had an index
                    if 'NEXT_ASSEMBLY_USAGE_OCCURRENCE' in line:
                        line_hold = line.rstrip()
                        line_type = 'nauo'
                    elif ('PRODUCT_DEFINITION ' in line
                          or 'PRODUCT_DEFINITION(' in line):
                        line_hold = line.rstrip()
                        line_type = 'prod_def'
                    elif 'PRODUCT_DEFINITION_FORMATION' in line:
                        line_hold = line.rstrip()
                        line_type = 'prod_def_form'
                    elif ('PRODUCT ' in line or 'PRODUCT(' in line):
                        line_hold = line.rstrip()
                        line_type = 'prod'
                else:
                    prev_index = False
                    #TH: if end of file and previous line was held
                    if 'ENDSEC;' in line:
                        if line_hold:
                            if line_type == 'nauo':
                                self.nauo_lines.append(line_hold)
                            elif line_type == 'prod_def':
                                self.prod_def_lines.append(line_hold)
                            elif line_type == 'prod_def_form':
                                self.prod_def_form_lines.append(line_hold)
                            elif line_type == 'prod':
                                self.prod_lines.append(line_hold)
                            line_hold = ''
                            line_type = ''
                    else:
                        #TH: if not end of file
                        line_hold = line_hold + line.rstrip()

        self.nauo_refs = []
        self.prod_def_refs = []
        self.prod_def_form_refs = []
        self.prod_refs = []

        # TH: added 'replace(","," ").' to replace ',' with a space to make the spilt easier if there are not spaces inbetween the words'

        # Find all (# hashed) line references and product names
        # TH: it might be worth finding a different way of extracting data we do want rather than fixes to get rid of the data we don't
        for j in range(len(self.nauo_lines)):
            self.nauo_refs.append([
                el.rstrip(',')
                for el in self.nauo_lines[j].replace(",", " ").replace(
                    "=", " ").split() if el.startswith('#')
            ])
        for j in range(len(self.prod_def_lines)):
            self.prod_def_refs.append([
                el.rstrip(',') for el in self.prod_def_lines[j].replace(
                    ",", " ").replace("=", " ").split() if el.startswith('#')
            ])
        for j in range(len(self.prod_def_form_lines)):
            self.prod_def_form_refs.append([
                el.rstrip(',') for el in self.prod_def_form_lines[j].replace(
                    ",", " ").replace("=", " ").split() if el.startswith('#')
            ])
        for j in range(len(self.prod_lines)):
            self.prod_refs.append([
                el.strip(',')
                for el in self.prod_lines[j].replace(",", " ").replace(
                    "(", " ").replace("=", " ").split() if el.startswith('#')
            ])
            self.prod_refs[j].append(self.prod_lines[j].split("'")[1])

        # Get first two items in each sublist (as third is shape ref)
        #
        # First item is 'PRODUCT_DEFINITION' ref
        # Second item is 'PRODUCT_DEFINITION_FORMATION <etc>' ref
        self.prod_all_refs = [el[:2] for el in self.prod_def_refs]

        # Match up all references down to level of product name
        for j in range(len(self.prod_all_refs)):

            # Add 'PRODUCT_DEFINITION' ref
            for i in range(len(self.prod_def_form_refs)):
                if self.prod_def_form_refs[i][0] == self.prod_all_refs[j][1]:
                    self.prod_all_refs[j].append(self.prod_def_form_refs[i][1])
                    break

            # Add names from 'PRODUCT_DEFINITION' lines
            for i in range(len(self.prod_refs)):
                if self.prod_refs[i][0] == self.prod_all_refs[j][2]:
                    self.prod_all_refs[j].append(self.prod_refs[i][2])
                    break

        # Find all parent and child relationships (3rd and 2nd item in each sublist)
        self.parent_refs = [el[1] for el in self.nauo_refs]
        self.child_refs = [el[2] for el in self.nauo_refs]

        # Find distinct parts and assemblies via set operations; returns list, so no repetition of items
        self.all_type_refs = set(self.child_refs) | set(self.parent_refs)
        self.ass_type_refs = set(self.parent_refs)
        self.part_type_refs = set(self.child_refs) - set(self.parent_refs)

        # Get first two items in each sublist (as third is shape ref)
        #
        # First item is 'PRODUCT_DEFINITION' ref
        # Second item is 'PRODUCT_DEFINITION_FORMATION <etc>' ref
        self.prod_all_refs = [el[:2] for el in self.prod_def_refs]

        # Match up all references down to level of product name
        for j in range(len(self.prod_all_refs)):

            # Add 'PRODUCT_DEFINITION' ref
            for i in range(len(self.prod_def_form_refs)):
                if self.prod_def_form_refs[i][0] == self.prod_all_refs[j][1]:
                    self.prod_all_refs[j].append(self.prod_def_form_refs[i][1])
                    break

            # Add names from 'PRODUCT_DEFINITION' lines
            for i in range(len(self.prod_refs)):
                if self.prod_refs[i][0] == self.prod_all_refs[j][2]:
                    self.prod_all_refs[j].append(self.prod_refs[i][2])
                    break

        # Find all parent and child relationships (3rd and 2nd item in each sublist)
        self.parent_refs = [el[1] for el in self.nauo_refs]
        self.child_refs = [el[2] for el in self.nauo_refs]

        # Find distinct parts and assemblies via set operations; returns list, so no repetition of items
        self.all_type_refs = set(self.child_refs) | set(self.parent_refs)
        self.ass_type_refs = set(self.parent_refs)
        self.part_type_refs = set(self.child_refs) - set(self.parent_refs)
        #TH: find root node
        self.root_type_refs = set(self.parent_refs) - set(self.child_refs)
        self.create_dict()

    def show_values(self):
        # TH: basic testing, if needed these could be spilt up
        print(self.nauo_lines)
        print(self.prod_def_lines)
        print(self.prod_def_form_lines)
        print(self.prod_lines)
        print(self.nauo_refs)
        print(self.prod_def_refs)
        print(self.prod_def_form_refs)
        print(self.prod_refs)

    def create_dict(self):
        # TH: links nauo number with a name and creates dict
        self.part_dict = {}
        for part in self.all_type_refs:
            for sublist in self.prod_def_refs:
                if sublist[0] == part:
                    prod_loc = '#' + re.findall('\d+', sublist[1])[0]
                    pass
            for sublist in self.prod_def_form_refs:
                if sublist[0] == prod_loc:
                    prod_loc = '#' + str(re.findall('\d+', sublist[1])[0])
                    pass
            for sublist in self.prod_refs:
                if sublist[0] == prod_loc:
                    part_name = sublist[2]

            self.part_dict[part] = part_name

    def create_tree(self):
        #TH: create tree diagram in newick format
        #TH: find root node

        self.tree = Tree()
        #TH: check if there are any parts to make a tree from, if not don't bother
        if self.part_dict == {}:
            return

        root_node_ref = list(self.root_type_refs)[0]
        self.tree.create_node(self.part_dict[root_node_ref], 0)

        #TH: created root node now fill in next layer
        #TH: create dict for tree, as each node needs a unique name
        i = [0]  # itirates through nodes
        self.tree_dict = {}
        self.tree_dict[i[0]] = root_node_ref

        def tree_next_layer(self, parent):
            root_node = self.tree_dict[i[0]]
            for line in self.nauo_refs:
                if line[1] == root_node:
                    i[0] += 1
                    self.tree_dict[i[0]] = str(line[2])
                    self.tree.create_node(self.part_dict[line[2]],
                                          i[0],
                                          parent=parent)
                    tree_next_layer(self, i[0])

        tree_next_layer(self, 0)

    def print_tree(self):
        try:
            self.tree.show()
        except:
            self.create_tree()
            self.tree.show()

    def tree_to_json(self, save_to_file=False, filename='file', path=''):
        #TH: return json format tree, can also save to file
        if self.tree.size() != 0:
            data = self.tree.to_json()
            j = json.loads(data)
            if save_to_file == True:
                if path:
                    file_path = os.path.join(path, filename)
                else:
                    file_path = filename

                with open(file_path + '.json', 'w') as outfile:
                    json.dump(j, outfile)

            return data
        else:
            print("no tree to print")
            return
                       "Select extra knowledge to load mirror infomation")
    if filename is not None:
        with open(filename, 'r') as load_f:
            load_dict = json.load(load_f)
    mirror = get_mirror(root.startEA)
    procs = get_all_procs()
    tree.create_node(fname,
                     hex(root.startEA),
                     data=Xref_node(fname, hex(root.startEA), XType.code,
                                    mirror))
    if mirror == fname:
        add_xrefs(root.startEA, XType.code)
    Message("Reference Tree:\n\n")
    tree.show(line_type="ascii-em", idhidden=False, data_property='mirror')
    Message("Unique references:\n")
    for node in tree.all_nodes_itr():
        if type(node.data.mirror) is str:
            print node.identifier
    #hierarchical output
    for level in range(1, tree.depth()):
        Message("\nLevel %d: %d\n" % (level, tree.size(level)))
        for node in tree.all_nodes():
            if tree.level(node.identifier) == level and type(
                    node.data.mirror) is str:
                print node.identifier
    Message("\n%d subroutines in routine %s need transplanting.\n" %
            (Xref_node.xrefTrans - 1, fname))
    conn.close()
else:
    Warning("No function found at location %x" % here())
Example #9
0
class Parser_analyzer:
    """
    语句LL(1)文法:
    NEED:expr, 各种终止符
    NOTE:int_t为无法解决:
        A -> B int
        B -> int b | ϵ
    类型的回溯问题采用的特殊方案, 出现在int_t main()位置。
    """
    def __init__(self):
        self.Vn = []  # 非终结符
        self.Vt = []  # 终结符
        self.table = None  # 预测分析表
        self.stack_anls = []
        self.stack_toke = []

        self.err_info = []

        self.AST_Tree = Tree()
        self.AST_Tree_root = None
        self.parent_uid = None
        self.node_parent_dict = None

        self.current_anal_scope = 0

    def load_analyzer(self, prod_path, ff_path):
        prod_set = {}
        prod_set_ori = open(prod_path, 'r', encoding='utf-8').readlines()
        temp_prod = ''
        for item in prod_set_ori:
            item = item.strip()
            if item[0] != '|':
                temp = item.split(' ')
                temp_prod = temp[0]
                res = ''
                for ii in temp[2:]:
                    res += '{} '.format(ii)
                res = res.strip()
                prod_set[temp_prod] = []
                prod_set[temp_prod].append(res)
                if temp_prod not in self.Vn:
                    self.Vn.append(temp_prod)
            else:
                temp = item.split(' ')
                res = ''
                for ii in temp[1:]:
                    res += '{} '.format(ii)
                res = res.strip()
                prod_set[temp_prod].append(res)

        ff_set = {}
        ff_set_ori = open(ff_path, 'r', encoding='utf-8').readlines()
        for item in ff_set_ori:
            item = item.replace('\n', '')
            item = item.split('\t')

            end_symbol = item[0]
            eps_flag = item[1]
            fi_set = item[2].split(' ')
            if len(item) == 4:
                fo_set = item[3].split(' ')
            else:
                fo_set = []

            ff_set[end_symbol] = {
                'eps_flag': eps_flag,
                'fi_set': fi_set,
                'fo_set': fo_set
            }

        self.table = [[] for row in range(len(self.Vn))]  # 预测分析表

        for item in self.Vn:
            item_prod = prod_set[item]
            item_ff = ff_set[item]

            if item_ff['eps_flag'] == 'true':
                item_ff['fi_set'].remove('eps')
            for non in item_ff['fi_set']:
                if non not in self.Vt:
                    self.Vt.append(non)
                    for n in range(len(self.Vn)):
                        self.table[n].append('')
                aim_prod = None
                aim2_prod = None
                for temp_prod in item_prod:
                    temp_shit = temp_prod.split(' ')
                    temp_first = temp_shit[0]
                    if temp_first == 'eps' and len(temp_shit) > 1:
                        aim2_prod = temp_prod
                    if non == temp_first:
                        aim_prod = temp_prod
                        break
                    elif temp_first in ff_set:
                        if non in ff_set[temp_first]['fi_set'] or ff_set[
                                temp_first]['eps_flag'] == 'true':
                            aim_prod = temp_prod
                            break

                if aim_prod is None:
                    aim_prod = aim2_prod
                self.table[self.Vn.index(item)][self.Vt.index(non)] = aim_prod
            if item_ff['eps_flag'] == 'true':
                for non in item_ff['fo_set']:
                    if non not in self.Vt:
                        self.Vt.append(non)
                        for n in range(len(self.Vn)):
                            self.table[n].append('')
                    self.table[self.Vn.index(item)][self.Vt.index(non)] = 'eps'

    def load_stack(self, token_list, start):
        self.stack_anls = []
        self.stack_anls.append('#')
        self.stack_anls.append(start)

        self.stack_toke = []
        self.stack_toke.append('#')
        temp = list(reversed(token_list))
        self.stack_toke.extend(temp)

        self.err_info = []

        self.node_parent_dict = {start: [None]}

    def table_show(self):
        res = ''
        # print(self.Vt)
        res += "{}\n".format(str(self.Vt))
        idx = 0
        for item in self.table:
            # print('{}'.format(self.Vn[idx]), end='\t')
            res += "{}\t".format(self.Vn[idx])
            idx2 = 0
            for jt in item:
                # print('\'{}\'({})'.format(jt, self.Vt[idx2]), end=' ')
                res += "'{}'({}) ".format(jt, self.Vt[idx2])
                idx2 += 1
            # print()
            res += '\n'
            idx += 1
        return res

    def ans_show(self):
        print(self.stack_anls)
        print(self.stack_toke)
        print()

    def creat_node(self, tag, parent, data):
        if self.AST_Tree.size() == 0:
            node = self.AST_Tree.create_node(tag='{}'.format(tag), data=data)
            self.AST_Tree_root = node
        else:
            node = self.AST_Tree.create_node(tag='{}'.format(tag),
                                             parent=parent,
                                             data=data)
        return node.identifier

    def create_dotPic(self, root_dir):
        # root_dir = './treePic'
        self.AST_Tree.to_graphviz(filename='{}/tree.dot'.format(root_dir))
        string = open('{}/tree.dot'.format(root_dir)).read()
        dot = graphviz.Source(string)
        dot.render('{}/tree'.format(root_dir), format='png')

    def run(self, log=False):
        anlsRes = ''
        anlsLog = ''
        toke = self.stack_toke.pop(-1)
        symbol = self.stack_anls.pop(-1)
        while symbol != '#':
            if symbol in [toke.tag, toke.type]:
                # 刷新作用域
                if symbol == '{':
                    self.current_anal_scope += 1
                elif symbol == '}':
                    self.current_anal_scope -= 1
                else:
                    toke.set_scope(self.current_anal_scope)
                # 刷新真值
                if toke.type == 'num':
                    toke.set_value(toke.tag)
                # 创建节点并新增
                self.creat_node(symbol, self.node_parent_dict[symbol][-1],
                                toke)
                self.node_parent_dict[symbol].pop(-1)
                if len(self.node_parent_dict[symbol]) == 0:
                    self.node_parent_dict.pop(symbol)
                toke = self.stack_toke.pop(-1)
                if log:
                    # print('\t*HIT: {}\t<-\t{}'.format(symbol, toke))
                    anlsLog += "\t*HIT: {}\t<-\t{}\n".format(symbol, toke)
                if toke == '#':
                    break
            elif symbol in self.Vn:
                if toke.type in ['var', 'num']:  # 变量-数字转换
                    table_item = self.table[self.Vn.index(symbol)][
                        self.Vt.index(toke.type)]
                else:
                    table_item = self.table[self.Vn.index(symbol)][
                        self.Vt.index(toke.tag)]
                table_item = table_item.split(' ')
                if table_item[0] == '':  # 错误分析
                    # print('\t*ERROR: {}\t<-\t{}'.format(symbol, toke))
                    anlsLog += "\t*ERROR: {}\t<-\t{}\n".format(symbol, toke)
                    self.err_info.append(
                        "row: {}, col: {}, token: '{}' cont match '{}'\n".
                        format(toke.row, toke.col, toke, symbol))
                elif table_item[0] == 'eps':  # 无效回溯
                    if len(table_item) > 1:  # 有效分析
                        temp = list(reversed(table_item))[0:-1]
                        self.stack_anls.extend(temp)
                        # 添加节点-父节点Hash表
                        for item in temp:
                            if item not in self.node_parent_dict:
                                self.node_parent_dict[item] = []
                            self.node_parent_dict[item].append(self.parent_uid)
                else:  # 有效分析
                    temp = list(reversed(table_item))
                    self.stack_anls.extend(temp)
                    # 创建节点并新增
                    self.parent_uid = self.creat_node(
                        symbol, self.node_parent_dict[symbol][-1], symbol)
                    self.node_parent_dict[symbol].pop(-1)
                    if len(self.node_parent_dict[symbol]) == 0:
                        self.node_parent_dict.pop(symbol)
                    # 添加节点-父节点Hash表
                    for item in temp:
                        if item not in self.node_parent_dict:
                            self.node_parent_dict[item] = []
                        self.node_parent_dict[item].append(self.parent_uid)
                    if log:
                        # print()
                        # print("symb:\'{}\'----stack:{}".format(symbol, list(reversed(self.stack_anls))))
                        # print("toke:{}----stack:{}".format(toke, list(reversed(self.stack_toke))))
                        anlsLog += "\n"
                        anlsLog += "symb:\'{}\'----stack:{}\n".format(
                            symbol, list(reversed(self.stack_anls)))
                        anlsLog += "toke:{}----stack:{}\n".format(
                            toke, list(reversed(self.stack_toke)))
            symbol = self.stack_anls.pop(-1)
        self.node_parent_dict.clear()
        # self.ans_show()
        if len(self.err_info) == 0:
            # print('match compete!')
            anlsRes += "match compete!\n"
        for item in self.err_info:
            anlsRes += "{}".format(item)
        return anlsRes, anlsLog
Example #10
0
def use_hyp(word2syn, output, data):
    un_change = []
    dic = Tree()
    dic.create_node("100001740", "100001740")
    add = -1
    while add != 0:
        add = 0
        f = open(datapath + "wn_hyp.pl", "r")
        while True:
            line = f.readline()
            if not line:
                break
            else:
                l, r = re.findall('\d+', line)
                try:
                    dic.create_node(l, l, parent=r)
                    add += 1
                except:
                    pass
        print(dic.size())
    entail = defaultdict(list)
    for n in dic.all_nodes():
        for m in dic.subtree(n.tag).all_nodes():
            if m.tag != n.tag:
                entail[n.tag].append(m.tag)
    label = set()
    for d in data:
        d0 = d[0]
        d1 = d[1]
        if p.singular_noun(d[0]) != False:
            d0 = p.singular_noun(d[0])
        if p.singular_noun(d[1]) != False:
            d1 = p.singular_noun(d[1])
        for i in word2syn[d0]:
            for j in word2syn[d1]:
                if j in entail[i]:
                    if d[0] + "\t" + ">" + "\t" + d[1] not in output:
                        output += [d[0] + "\t" + ">" + "\t" + d[1]]
                        label.add(d)
                elif i in entail[j]:
                    if d[0] + "\t" + "<" + "\t" + d[1] not in output:
                        output += [d[0] + "\t" + "<" + "\t" + d[1]]
                        label.add(d)
        if d not in un_change and d not in label:
            un_change += [d]
    print("before single: " + str(len(data)) + " after: " +
          str(len(un_change)))
    output += ["\n"]
    del entail
    data = un_change
    del un_change
    un_change = []
    alter = defaultdict(list)
    for n in dic.all_nodes():
        for m in dic.siblings(n.tag):
            if m.tag != n.tag and n.bpointer != m.tag:
                alter[n.tag].append(m.tag)
    label = set()
    for d in data:
        d0 = d[0]
        d1 = d[1]
        if p.singular_noun(d[0]) != False:
            d0 = p.singular_noun(d[0])
        if p.singular_noun(d[1]) != False:
            d1 = p.singular_noun(d[1])
        for i in word2syn[d0]:
            for j in word2syn[d1]:
                if j in alter[i]:
                    if d[0] + "\t" + "|" + "\t" + d[1] not in output:
                        output += [d[0] + "\t" + "|" + "\t" + d[1]]
                        label.add(d)
                elif i in alter[j]:
                    if d[0] + "\t" + "|" + "\t" + d[1] not in output:
                        output += [d[0] + "\t" + "|" + "\t" + d[1]]
                        label.add(d)
        if d not in un_change and d not in label:
            un_change += [d]
    del alter
    print("before single: " + str(len(data)) + " after: " +
          str(len(un_change)))
    output += ["\n"]
    return output, un_change
Example #11
0
def count_of_all_distributions_of_linux(data):
    tree = Tree()
    root = tree.create_node('root', 'root')
    tree = build_tree(data=data["Linux"], tree=tree, parent=root)
    return tree.size() - 1
Example #12
0
tablestocount=list(set(kimTables_tables))
for i in tablestocount:
    table_counts.append(kimTables_tables.count(i))
kimcount=dict(zip(tablestocount,table_counts))
count_list=list()
sheetFrame_new=list()
sheetFrame_new = list(dicttree.keys())
sheet_do=list()
for k in sheetFrame_new:
    addedTree = list()
    count=0
    d_list=dicttree[k]
    tree=Tree()
    tree,addedTree=treeBuildParent(k,fieldsIdentifierdict,addedTree)
    tree,addedTree,newAdded=treeBuild(k,dicttree,fieldsIdentifierdict,tree,addedTree)
    depth.append(tree.size())
    depth1.append(len(addedTree))
    sheet.append(k)
    for i in addedTree:
        if i in tablestocount:
            count+=kimcount[i]
    count_list.append(count)
    sheet_do.append(k)
treedataframe= {'Name': sheet_do,'Count':count_list,'Depth':depth}
treedataframe = pd.DataFrame(data=treedataframe)
treedataframe.to_csv('tree.csv',index=None)
max_value = max(depth)
max_index = depth.index(max_value)

addedTree=list()
k='ANADJP'
class RST_DT:
    def load(self, path2file):
        self.id_EDUs = []
        self.EDU = {}
        self.treeNS = Tree()
        self.tree = Tree()
        # nombre max d'espace pour init id_parents
        with open(path2file, "r") as f:
            max_space = 0
            nb_line = 0
            for i, line in enumerate(f):
                nb_space = 0
                for c in line:
                    if c == " ":
                        nb_space += 1
                    else:
                        break
                if nb_space > max_space:
                    max_space = nb_space
                nb_line += 1
        with open(path2file, "r") as f:
            id_parents = [0] * max_space
            NS_parents = [0] * max_space
            for i, line in enumerate(f):
                # nombre d'espace détermine le parent
                nb_space = 0
                for c in line:
                    if c == " ":
                        nb_space += 1
                    else:
                        break
                space = nb_space / 2
                id_parents[space] = i
                parent = id_parents[space - 1]
                reg = "\(([\w\-\[\]]+)|(_!.+!_)"  # récupération du contenu
                match = re.findall(reg, line)[0]
                if match[0] == "":
                    content = match[1]  # feuille EDU
                    self.id_EDUs.append(i)
                    # print content
                    self.EDU[i] = re.findall("_!(.*)!_", content)
                else:
                    content = match[0]
                    reg2 = "\[(N|S)\]"  # récupération NS
                    match2 = re.findall(reg2, content)
                    NS_parents[space] = match2  # ['N','S']
                # création du noeud
                if i == 0:
                    self.tree.create_node(content, 0)
                    self.treeNS.create_node("Root", 0)
                else:
                    id_NS = len(self.tree.is_branch(parent))  # 0 ou 1 car arbre binaire
                    self.tree.create_node(content, i, parent=parent)
                    self.treeNS.create_node(NS_parents[space - 1][id_NS], i, parent=parent)

    def toDEP(self):

        ###############################
        # Etape 1 : construction du head_tree

        # parcours en largeur de tree afin de récupérer chaque id_node
        # pour chaque profondeur (init à 0) _! sans compter !_ les feuilles (EDUs)

        nodes_depth = [-1] * self.tree.size()
        for i in xrange(self.tree.size()):
            id_nodes = [0]
            depth = [999] * self.tree.size()
            while id_nodes:  # False if empty
                id_node = id_nodes.pop(0)
                node = self.tree.get_node(id_node)
                if node.bpointer != None:
                    node_parent = self.tree.get_node(node.bpointer)
                    depth[node.identifier] = depth[node_parent.identifier] + 1
                else:
                    depth[node.identifier] = 0
                if id_node == i:
                    # print 'noeud ',i,' en profondeur', depth[node.identifier]
                    if node.fpointer:
                        nodes_depth[i] = depth[i]
                    break
                if node.fpointer:
                    id_nodes.append(node.fpointer[0])
                    id_nodes.append(node.fpointer[1])
        # print nodes_depth

        id_nodes_depth = []
        for d in xrange(self.tree.depth()):
            id_nodes_depth.append([])
            for i in xrange(self.tree.size()):
                if nodes_depth[i] == d:
                    id_nodes_depth[d].append(i)
        # print id_nodes_depth

        #
        # construction du head_tree

        head_tree = [-1] * self.treeNS.size()
        # pour chaque noeud (non EDU/feuille) en partant de la plus grande profondeur dans l'arbre
        for d in range(len(id_nodes_depth) - 1, -1, -1):
            for id_node in id_nodes_depth[d]:
                node = self.treeNS.get_node(id_node)
                node_left = self.treeNS.get_node(node.fpointer[0])
                node_right = self.treeNS.get_node(node.fpointer[1])
                if node_left.tag == "N":
                    if head_tree[node_left.identifier] == -1:
                        identifier = node_left.identifier
                    else:
                        identifier = head_tree[node_left.identifier]
                else:
                    if head_tree[node_right.identifier] == -1:
                        identifier = node_right.identifier
                    else:
                        identifier = head_tree[node_right.identifier]
                head_tree[id_node] = identifier
        # print head_tree

        ###############################
        # Etape 2 : construction du DEP

        #
        # construction du DEP

        # init
        # root est le premier noeud de head
        # pour chaque EDU son père est le root dans DEP
        dep_tree = Tree()
        id_root = head_tree[0]
        root = self.tree.get_node(id_root)
        # dep_tree.create_node(root.tag, root.identifier)
        dep_tree.create_node(root.tag, root.identifier)
        for id_EDU in xrange(len(head_tree)):
            if head_tree[id_EDU] == -1 and id_EDU != id_root:
                node = self.tree.get_node(id_EDU)
                # dep_tree.create_node(node.tag, node.identifier, parent=id_root)
                # dep_tree.create_node(str(id_EDU), node.identifier, parent=id_root)
                dep_tree.create_node(node.tag, node.identifier, parent=id_root)

        # print '//////////////////////'
        # print 'EDU', id_root
        # pour chaque EDU
        for id_EDU in xrange(len(head_tree)):
            if head_tree[id_EDU] == -1 and id_EDU != id_root:

                EDU_NS = self.treeNS.get_node(id_EDU)
                # print '.......................'
                # print 'EDU', id_EDU
                # print 'TAG', EDU_NS.tag

                if EDU_NS.tag == "N":
                    # parcours en largeur jusqu'à trouver un S avec un head donc qui soit pas EDU
                    id_nodes = [EDU_NS.identifier]
                    visited = [False] * self.treeNS.size()
                    while id_nodes:
                        id_node = id_nodes.pop(0)
                        EDU = self.tree.get_node(id_node)
                        # print 'visited EDU', EDU.identifier
                        visited[EDU.identifier] = True
                        # cas d'arret
                        head_EDU = head_tree[EDU.identifier] == -1
                        head_EDU = False
                        node_tag = self.treeNS.get_node(EDU.identifier).tag
                        # print '  head_EDU', head_EDU
                        # print '  node_tag', node_tag
                        if not head_EDU and node_tag == "S":
                            break
                        if EDU.bpointer:
                            if not visited[EDU.bpointer]:
                                id_nodes.append(EDU.bpointer)
                        if EDU.fpointer:  # sécurité
                            if not visited[EDU.fpointer[0]]:
                                id_nodes.append(EDU.fpointer[0])
                            if not visited[EDU.fpointer[1]]:
                                id_nodes.append(EDU.fpointer[1])

                    # puis ajouter au DEP comme enfant du head du parent du noeud S
                    id_head = head_tree[EDU.bpointer]

                # si parent S
                else:
                    # parcours en largeur des ancêtre jusqu'à trouver un ancêtre avec un head
                    parent = self.treeNS.get_node(EDU_NS.bpointer)
                    id_head = head_tree[parent.identifier]

                # puis ajouter au DEP comme enfant de ce head
                if id_EDU != id_head:
                    dep_tree.move_node(id_EDU, id_head)
                EDU = self.tree.get_node(id_EDU)
                # print '---- ajout de',EDU.identifier,' à',id_head
                # if id_EDU == id_head:
                # dep_tree.show()

        return dep_tree
        # showDepth(dep_tree, 4)
        # dep_tree.show()

        # node = dep_tree.

    def toString(self):
        """ affiche comme la sortie de Hilda """
        showDepth(self.tree, 0)