Beispiel #1
0
    def split(self, source_file, target_file_pattern, paths_to_files):
        '''
        function that saves selected parts of a xml file into different files 
        :param source_file: original xml file
        :param target_file_pattern:  patter of the target files. The target files will be named as following: <target_file_pattern><filenumber>.xml
        :param paths_to_files: list of paths that select a part of the original document and the filenumbers where they will be saved 
        :return: list of paths to splitted files
        '''
        self.path_to_files = paths_to_files
        # assert that input is a list of file numbers
        for path in paths_to_files:
            assert isinstance(paths_to_files[path], set)
            for filenum in paths_to_files[path]:
                assert isinstance(filenum, int)

        # filenum to full file path
        for path in paths_to_files:
            paths_to_files[path] = set([
                "%s.%s.xml" % (target_file_pattern, x)
                for x in paths_to_files[path]
            ])

        # self.register_write_nodes([x for x in self.path_to_files])
        self.walk_tree(file_path=source_file)
        self.post_actions()
        self.close()
        Logger.info("splitting %s completed" % source_file)
        return self.path_to_files
Beispiel #2
0
    def check_for_references(self, **kwargs):
        """
        FastXMLWalker callback function
        Checks all attributes of the current element whether it occurs in the list of IDs
        If an ID has been found the element and all parent elements are searched for an attribute named ID
        if such an element has been found a reference from the ID to the ID found in the attribute is created
        :param kwargs: FastXMLWalker kwargs
        :return: Mone
        """
        element = kwargs["element"]
        for attrib in [element.attrib[x] for x in element.attrib if x != "ID"]:
            if attrib in self.IDs:
                # find parent
                tmp_elem = element
                target_id = attrib
                source_id = None
                while True:
                    if "ID" in tmp_elem.attrib:
                        source_id = tmp_elem.attrib["ID"]
                        break
                    if tmp_elem.getparent() is None:
                        break
                    tmp_elem = tmp_elem.getparent()

                if not source_id == target_id:
                    self.Refs[source_id].add(target_id)
                    if len(self.Refs) % 10000 == 0:
                        Logger.info("%s Refs" % len(self.Refs))
Beispiel #3
0
    def _node_start(self, walker, element, **kwargs):
        # write tags or content (writing is delayed so that we do not try to write anything that has not been parsed yet!)
        self.write_delayed_element_parts()
        exact_path = walker.exact_path
        parent_xpath = exact_path.rsplit("/", 1)[0]
        element = element
        Logger.debug("write_node_start: element %s" % element)

        self.node_stack.append(element)

        Logger.debug("write_node_start node_stack: %s" % self.node_stack)
        element_wanted_in_files = set()
        #for i in range(len(self.node_stack)):
        for i in range(self.exact_path.count('/')):
            if exact_path in self.path_to_files:
                element_wanted_in_files |= self.path_to_files[exact_path]
            exact_path = exact_path.rsplit("/", 1)[0]

        self.path_to_files[parent_xpath] = element_wanted_in_files
        if element_wanted_in_files:
            for elem in self.node_stack:
                element_to_files = element_wanted_in_files - \
                                   (self.node_written[elem] if elem in self.node_written else set())
                if element_to_files:
                    self.write_start_tag(element_to_files, elem)
                    self.write_text(element_to_files, elem)
                    self.node_written[elem] |= element_to_files
Beispiel #4
0
 def reset_relative_tree(TREEID, **kwargs):
     Logger.debug("Reset relative tree %s" % TREEID)
     walker = kwargs["walker"]
     walker.relative_interests_trees[
         TREEID].current_node = walker.relative_interests_trees[
             TREEID].current_node.root
     walker.relative_interests_trees[TREEID].current_tree_depth = 0
Beispiel #5
0
    def write_end_tag(self, files, element):
        """
        Write end tag

        :param files: Target files where this end tag should be written into
        :param element: LXML element that should be written
        :return: None
        """
        Logger.debug("write_end_tag %s" % element)
        self.delay_element_parts.append(
            self.delayed_element(files=files, element=element, part="end_tag"))
Beispiel #6
0
 def add_split_node(self, **kwargs):
     """
     FastXMLWalker callback function
     Should be fired on split nodes
     To split nodes (self.SplitNodes[<interest>]) the exact path of this node (e.g: /*[0]/*[0](*[0]) is added
     :param kwargs: FastXMLWalker kwargs
     :return: None
     """
     #self.SplitNodes[kwargs["interest"]].append(kwargs["walker"].exact_path)
     self.SplitNodes[kwargs["interest"]].append(kwargs["walker"].exact_path)
     if sum([len(self.SplitNodes[x]) for x in self.SplitNodes]) % 10000 == 0:
         Logger.info("%s SplitNodes" % sum([len(self.SplitNodes[x]) for x in self.SplitNodes]))
Beispiel #7
0
 def add_source(self, **kwargs):
     """
     FastXMLWalker callback function
     Should be fired on an element that is a source to a reference
     Together with add target function it will create a reference from source to target
     :param kwargs: FastXMLWalker kwargs
     :return: None
     """
     source_id = kwargs["element"].text
     relation_node_exact_id = kwargs["walker"].exact_path.rsplit("/", 2)[0]
     Logger.debug("addSource %s to %s" % (source_id, relation_node_exact_id))
     self.relations[relation_node_exact_id]["source_id"].append(source_id)
Beispiel #8
0
 def add_id(self, **kwargs):
     """
     FastXMLWalker callback function
     Should fire on a  node with an ID
     The found ID is added to the set of IDs
     :param kwargs: FastXMLWalker kwargs
     :return: None
     """
     found_id = kwargs["element"].attrib["ID"]
     if not found_id in self.IDs:
         self.IDs.add(found_id)
         if len(self.IDs) % 10000 == 0:
             Logger.info("%s IDs" % len(self.IDs))
Beispiel #9
0
 def add_split_node(self, **kwargs):
     """
     FastXMLWalker callback function
     Add a found split nodes: This callback should fire on a splitnode, it will then add the node to a list of splitnodes
     :param kwargs: FastXMLWalker kwargs
     :return: None
     """
     ID = kwargs["element"].attrib["ID"]
     self.SplitNodes[kwargs['interest']].append(ID)
     walker = kwargs["walker"]
     self.IDs2Exact[ID].add(walker.exact_path)
     if sum([len(self.SplitNodes[x]) for x in self.SplitNodes]) % 10000 == 0:
         Logger.info("%s SplitNodes identified" % sum([len(self.SplitNodes[x]) for x in self.SplitNodes]))
Beispiel #10
0
 def add_split_node_id(self, **kwargs):
     """
     FastXMLWalker callback function
     Should be fired on the id node of a split node
     :param kwargs: FastXMLWalker kwargs
     :return: None
     """
     found_id = kwargs["element"].text
     self.add_id(**kwargs)
     self.ExactPathIDs2SplitNodes[kwargs["walker"].exact_path.rsplit("/", 1)[0]].append(found_id)
     if found_id not in self.IDs:
         self.IDs.add(found_id)
         if len(self.IDs) % 10000 == 0:
             Logger.info("%s IDs identified" % len(self.IDs))
Beispiel #11
0
 def add_id(self, **kwargs):
     """
     FastXMLWalker callback function
     Should be fired on nodes that contain an ID
     Saves the id to self.IDs
     :param kwargs: FastXMLWalker kwargs
     :return: None
     """
     found_id = kwargs["element"].text
     exact_path_parent = kwargs["walker"].exact_path.rsplit("/", 1)[0]
     self.IDs2ExactPaths[found_id].add(exact_path_parent)
     if found_id not in self.IDs:
         self.IDs.add(found_id)
         if len(self.IDs) % 10000 == 0:
             Logger.info("%s IDs" % len(self.IDs))
Beispiel #12
0
    def _node_end(self, **kwargs):
        self.write_delayed_element_parts()
        element = kwargs['element']
        exact_path = kwargs['walker'].exact_path
        Logger.debug("write_node_end: element %s" % element)

        if self.node_written[element]:
            self.write_end_tag(self.node_written[element], element)
            self.write_tail(self.node_written[element], element)

        self.node_stack.pop()
        if element in self.node_written:
            del self.node_written[element]
        if exact_path in self.path_to_files:
            del self.path_to_files[exact_path]
Beispiel #13
0
    def search_genif2(self, genif_file, split_path_node_restriction_tuples):
        """
        Search connected sets in genif2 files
        :param genif_file: path to a genif2 file
        :return: list of sets of exact paths (each list entry should be written in a different file)
        """
        # root=None
        fx = FastXMLCallbackWalker()
        if not split_path_node_restriction_tuples:
            split_path_node_restriction_tuples = [
                ("/{http://www.media-saturn.com/msx}data/{http://www.media-saturn.com/msx}item", 1),
                ("/{http://www.media-saturn.com/msx}data/{http://www.media-saturn.com/msx}asset", 2),
            ]
        interests = {Interest(
            interest="/{http://www.media-saturn.com/msx}data/{http://www.media-saturn.com/msx}relation/{http://www.media-saturn.com/msx}source/{http://www.media-saturn.com/msx}uniqueID",
            callback=self.add_source
        ), Interest(
            interest="/{http://www.media-saturn.com/msx}data/{http://www.media-saturn.com/msx}relation/{http://www.media-saturn.com/msx}target/{http://www.media-saturn.com/msx}uniqueID",
            callback=self.add_target
        ), Interest(
            interest="//{http://www.media-saturn.com/msx}relation",
            callback=self.relation_to_ref,
            event='end'
        ), Interest(
            interest="//{http://www.media-saturn.com/msx}uniqueID",
            callback=self.add_id
        )}
        for split_path_node_restriction_tuple in split_path_node_restriction_tuples:
            interests.add(
                SplitPath(
                    interest=split_path_node_restriction_tuple[0],
                    callback=self.add_split_node,
                    node_restriction=split_path_node_restriction_tuple[1]
                )
            )
            interests.add(
                Interest(
                    interest="%s/{http://www.media-saturn.com/msx}uniqueID" % split_path_node_restriction_tuple[0],
                    callback=self.add_split_node_id,
                )
            )
        fx.register_interests(
            interests
        )
        for _uuid in fx._relative_interests_trees:
            Logger.debug(RenderTree(fx._relative_interests_trees[_uuid].interest_tree))
        fx.walk_tree(genif_file)
        Logger.debug("ids: %s" % self.IDs)
        Logger.debug("split_nodes: %s" % self.SplitNodes)
        Logger.info("%s IDs, %s split nodes, %s direct references identified" % (len(self.IDs), sum([len(self.SplitNodes[x]) for x in self.SplitNodes]), len(self.Refs)))

        Logger.debug("IDsSplitNodes %s" % self.ExactPathIDs2SplitNodes)
        Logger.debug("direct: %s" % self.Refs)
        self.calc_splitnode_ids()

        idr = IndirectIDResovler(self.Refs, self.SplitNodes)
        idr.resolve_indirect()
        Logger.debug("indirect: %s" % idr.refs)
        Logger.info("indirect reference calculation completed")

        connected_sets = self.calc_connected_sets(self.SplitNodes)

        Logger.debug("connected_sets: %s" % connected_sets)
        Logger.info("connected set calculation completed")

        nd=NodeDistributor(connected_sets)
        distribution_to_files = nd.distribute()

        Logger.debug("distribution to files: %s" % distribution_to_files)
        Logger.info("distribution to files completed")

        return distribution_to_files
Beispiel #14
0
    def search_stepxml(self, myfile, split_path_node_size_tuples=None):
        """
        Search for connected sets in a StepXML file
        :param myfile: Path to StepXML file
        :param split_path_node_size_tuples: list of paths to splitnode IDs (e.g.: ["//{http://www.stibosystems.com/step}Product/@ID"])
        :return: list of connected sets (list of IDs)
        """
        fx = FastXMLCallbackWalker()

        interests = {
            Interest(
                interest="//@ID",
                callback=self.add_id
            )
        }
        if not split_path_node_size_tuples:
            split_path_node_size_tuples = [("//{http://www.stibosystems.com/step}Product/@ID", 10)]
        for splitnode_path, node_restriction in split_path_node_size_tuples:
            interests.add(SplitPath(interest=splitnode_path, callback=self.add_split_node, node_restriction=node_restriction))

        fx.register_interests(
            interests
        )
        fx.walk_tree(myfile)
        Logger.debug("IDs: %s" % self.IDs)
        Logger.debug("split_nodes: %s" %self.SplitNodes)
        Logger.info("%S IDs found, %s SplitNodes found" % (len(self.IDs, len(self.SplitNodes))))
        fx2 = FastXMLCallbackWalker()
        fx2.register_event_callback("start", self.check_for_references)
        fx2.walk_tree(myfile)

        Logger.debug("direct: %s" % self.Refs)
        Logger.info("%s direct dependencies found" % (len(self.Refs)))

        idr = IndirectIDResovler(self.Refs, self.SplitNodes)
        idr.resolve_indirect()
        Logger.debug("indirect: %s" % idr.refs)
        Logger.info("%s indirect dependencies found" % (len(idr.refs)))

        connected_sets = self.calc_connected_sets(self.Refs, self.SplitNodes)
        Logger.debug("connected_sets: %s" % connected_sets)
        Logger.info("connected sets calculation completed")
        connected_sets2 = {}
        for path in connected_sets:
            connected_set2 = []
            for connected_set_path in connected_sets[path]:
                my_set = set()
                for item in connected_set_path:
                    for exactpath in self.IDs2Exact[item]:
                        my_set.add(exactpath)
                connected_set2.append(my_set)
            connected_sets2[path] = connected_set2
        nd = NodeDistributor(connected_sets2)
        distribution_to_files = nd.distribute()

        Logger.debug("distribution to files: %s" % distribution_to_files)
        Logger.info("distribution to files completed")

        return distribution_to_files
Beispiel #15
0
 def _pre_detach(self, parent):
     if not self in self.garbage:
         del parent.children_names[self.name]
     Logger.debug("_pre_detach %s" % parent)