Example #1
0
def process(fs, xmldata):
    """
    @param fs: a fieldstorage-like object
    @param xmldata: contents of an xml file as a multiline string
    @return: contents of the new xml file as a multiline string
    """
    # get user input
    start_pos = fs.start_pos
    stop_pos = fs.stop_pos
    nsteps = fs.nsteps
    alignment_id = fs.alignment_id
    # read the xml tree
    tree = etree.parse(StringIO(xmldata))
    # modify the number of mcmc steps
    for event, element in etree.iterwalk(tree, tag='mcmc'):
        if element.get('id') == fs.mcmc_id:
            element.set('chainLength', str(fs.nsteps))
    # modify the sequences within the alignment
    for event, element in etree.iterwalk(tree, tag='alignment'):
        if element.get('id') == fs.alignment_id:
            for seq_element in element:
                if seq_element.tag != 'sequence':
                    continue
                for taxon_element in seq_element:
                    if taxon_element.tag != 'taxon':
                        continue
                    modify_taxon_sequence(
                            taxon_element, start_pos, stop_pos)
    # write the xml tree
    return etree.tostring(tree)
Example #2
0
 def __init__(self, xml):
     """
     DOKU
     """
     einsatz_id = 0
     try:
         xml_root = etree.fromstring(xml)
         context = etree.iterwalk(xml_root, events=("start",))
        
         # Alle Attribute des XML-Files werden durchgelaufen
         for action,elem in context:
             if elem.tag != "root":
                 if elem.tag == "einsatz":
                     einsatzobj = EinsatzKlasse()
                     einsatz_id = elem.get("id")
                     setattr(einsatzobj, elem.tag, elem.get("id"))
                 elif elem.tag == "einsatznr":
                     setattr(einsatzobj,elem.tag,elem.text)
                     einsatzobj.save()
                 elif elem.tag == "dispo":
                     dispoobj = DispoKlasse()
                     setattr(dispoobj, "einsatz", einsatz_id)
                     setattr(dispoobj, elem.tag,elem.get("id"))
                 elif elem.tag == "disponame" or elem.tag == "zeitdispo" or elem.tag =="zeitalarm" or elem.tag =="zeitaus" or elem.tag =="zeitein":
                     setattr(dispoobj, elem.tag,elem.text)
                 elif elem.tag == "hintergrund":
                     setattr(dispoobj, elem.tag, elem.text)
                     dispoobj.save()
                 elif elem.text:
                     setattr(einsatzobj,elem.tag,elem.text)
         
         #Suche nicht abgeschlossene Einsaetze und schliesse sie ggfls. ab
        
         unabgeschl = EinsatzModel.objects.filter(abgeschlossen=False, selbst_erstellt=False)
         # Alle unabgeschlossenen Einsätze werden durchgelaufen
         for unab in unabgeschl:
             close = 0 
             xml_root = etree.fromstring(xml)
             context = etree.iterwalk(xml_root, events=("start",))
             for action,elem in context:
                if elem.tag == "einsatz":
                     if elem.get("id") == unab.einsatz:
                         close = 1
             # Befindet sich ein nicht abgeschlossener Einsatz nicht in dem XML-File, wird dieser abgeschlossen
             if close == 0: 
                 closeeins = EinsatzKlasse()
                 closeeins.closeeinsatz(unab.einsatz)
     except:
         pass
Example #3
0
    def to_xml(self, path, *args, **kwargs):
        """Write xml file to path (adds .xml extension if none provided)"""

        if len(args) == 0:
            schema = Root()
        else:
            schema = Section(*args)

        entries = self.get_content(schema)
        xml = self.content_to_xml(schema.to_xml(), entries)

        # Bilingual fields are structured extremely weird and have to be post-processed
        for _, field_xml in etree.iterwalk(xml, tag="field"):
            child = list(field_xml)[0]
            if child.get("type") == "Bilingual":
                english = child[0].text
                french = child[1].text

                child.remove(child[1])
                child.remove(child[0])
                child.text = english

                field_xml.append(etree.Element("bilingual"))
                field_xml[1].append(etree.Element("french"))
                field_xml[1][0].text = french
                field_xml[1].append(etree.Element("english"))
                field_xml[1][1].text = english

        if not path.endswith(".xml"):
            path = path + ".xml"

        f = open(path, 'wb')
        with f:
            f.write(b'<?xml version="1.0" encoding="UTF-8"?>\n')
            f.write(etree.tostring(xml, **kwargs))
Example #4
0
    def __init__(self, xml_path=None, language="english"):

        self._index = {}
        self._content = {}

        self.language = language
        self.log = logging.getLogger("CCV")

        if xml_path is not None:

            f = open(xml_path, 'rb')
            with f:
                content = f.read()
            xml = etree.XML(content)

            msg = '# Importing existing entries from "%s" #'
            self.log.info(msg, xml_path)

            # Re-mapping existing sections according to specified schema
            for _, section_xml in etree.iterwalk(xml, tag="section"):

                # If any parents have fields, do not move
                section = XML(section_xml, language)
                section = Section(section.id)

                if section.is_container:
                    self.get_container(section)
                elif not section.is_dependent:
                    self.add_content(self.parse_xml(section_xml), section)

            msg = '# Finished importing #'
            self.log.info(msg)
Example #5
0
def render_xml(path, template_name, remove_empty, **nfe):
    nfe = recursively_normalize(nfe)
    env = Environment(
        loader=FileSystemLoader(path), extensions=['jinja2.ext.with_'])

    env.filters["normalize"] = filters.strip_line_feed
    env.filters["normalize_str"] = filters.normalize_str
    env.filters["format_percent"] = filters.format_percent
    env.filters["format_datetime"] = filters.format_datetime
    env.filters["format_date"] = filters.format_date
    env.filters["comma"] = filters.format_with_comma

    template = env.get_template(template_name)

    xml = template.render(**nfe)
    parser = etree.XMLParser(remove_blank_text=True, remove_comments=True,
                             strip_cdata=False)
    root = etree.fromstring(xml, parser=parser)
    if remove_empty:
        context = etree.iterwalk(root)
        for dummy, elem in context:
            parent = elem.getparent()
            if recursively_empty(elem):
                parent.remove(elem)
        return root
    return etree.tostring(root)
Example #6
0
def render_xml(path, template_name, remove_empty, **nfe):
    env = Environment(loader=FileSystemLoader(path),
                      extensions=['jinja2.ext.with_'])

    env.filters["normalize"] = filters.strip_line_feed
    env.filters["normalize_str"] = filters.normalize_str
    env.filters["format_percent"] = filters.format_percent
    env.filters["format_datetime"] = filters.format_datetime
    env.filters["format_date"] = filters.format_date

    template = env.get_template(template_name)

    xml = template.render(**nfe)
    parser = etree.XMLParser(remove_blank_text=True,
                             remove_comments=True,
                             strip_cdata=False)
    root = etree.fromstring(xml, parser=parser)
    if remove_empty:
        context = etree.iterwalk(root)
        for dummy, elem in context:
            parent = elem.getparent()
            if recursively_empty(elem):
                parent.remove(elem)
        return root
    return etree.tostring(root)
Example #7
0
def salesforce_encoding(xdoc):
    r = xml_encoding
    if SF_NS in xdoc.getroot().tag:
        xdoc.getroot().attrib["xmlns"] = SF_NS
    for action, elem in ET.iterwalk(xdoc,
                                    events=("start", "end", "start-ns",
                                            "end-ns", "comment")):
        if action == "start-ns":
            pass  # handle this nicely if SF starts using multiple namespaces
        elif action == "start":
            tag = elem.tag
            if "}" in tag:
                tag = tag.split("}")[1]
            text = (escape(elem.text, {
                "'": "&apos;",
                '"': "&quot;"
            }) if elem.text is not None else "")

            attrs = "".join([f' {k}="{v}"' for k, v in elem.attrib.items()])
            if not has_content(elem):
                r += f"<{tag}{attrs}/>"
            else:
                r += f"<{tag}{attrs}>{text}"
        elif action == "end" and has_content(elem):
            tag = elem.tag
            if "}" in tag:
                tag = tag.split("}")[1]
            tail = elem.tail if elem.tail else "\n"
            r += f"</{tag}>{tail}"
        elif action == "comment":
            r += str(elem) + (elem.tail if elem.tail else "")
    return r
Example #8
0
 def clearEmptyElements(self, root):
     """ Remove root's empty children. """
     context = ET.iterwalk(root)
     for _, elem in context:
         parent = elem.getparent()
         if parent is not None and self.isRecursivelyEmpty(elem):
             parent.remove(elem)
Example #9
0
def xml2simpleconfig(flt_ctxt, in_obj):
    """Mapping (almost bijective) XML -> simpleconfig

    Inverse mapping cannot be generally loseless, as XML cannot contain
    repeated attributes, which seems/is valid with simpleconfig.
    See `simpleconfig` docstring for details about the target representation.
    """
    # using similar trick of stack emulation in-place as command.analyse_chain.
    # but this is iterative rather than recursive :)
    root = []
    for action, e in etree.iterwalk(in_obj('etree'), events=('start', 'end')):
        if action == 'start':
            #print(">>> start", e.tag, root)
            current = [e.tag, None, None]
            root.append(current)
            current[1] = tuple(sorted(iter_items(e.attrib)))
            #print("<<< start", e.tag, root)
        elif action == 'end':
            #print(">>> end", e.tag, root)
            if len(root) == 1:
                #assert id(e) == id(root)
                break
            current = root.pop()
            if root[-1][2] is None:
                root[-1][2] = []
            root[-1][2].append(current)
            #print("<<< end", e.tag, root)
    return ('struct', root[-1])
Example #10
0
    def get_email(self):
        def get_node_content(element):
            return (element.text +
                    "".join(map(etree.tostring, element))).strip()

        subject, body = None, None
        for _, element in etree.iterwalk(self.template, tag="block"):
            if (element.get("onstate", None) == self.status):
                subject = get_node_content(element.find("subject"))
                body = get_node_content(element.find("body"))
            elif ((element.get("afterstate", None) == self.status)
                  and (element.get("time", None) == self.time_string)):
                subject = get_node_content(element.find("subject"))
                body = get_node_content(element.find("body"))
            element.clear()
        # get defaults
        if not subject:
            subject = "Item Notification: %s" % (self.message["title"])
            log.warn("Missing subject template for %s:%s. Using default" %
                     (self.message["type"], self.message["status"]))
        if not body:
            body = "Item, %s, type: %s, status: %s" % (
                self.message["title"], self.message["type"],
                self.message["status"]["value"])
            log.warn("Missing body template for %s:%s. Using default" %
                     (self.message["type"], self.message["status"]))
        subject_template = EmailTemplate()
        body_template = EmailTemplate()
        subject_template.write(subject)
        body_template.write(body)
        return (subject_template(item=Item(self.message)),
                body_template(item=Item(self.message)))
Example #11
0
    def create_svg(self, template, file_out, bone):
        """ """
        colors = {}
        colors[0] = 'fill:#ffffff'
        colors[1] = 'fill:#e1e1e1'
        colors[2] = 'fill:#808080'
        colors[3] = 'fill:#4b4b4b'
        colors[4] = 'fill:#000000'

        doc = etree.parse('svg/' + template)
        for action, el in etree.iterwalk(doc):
            id = el.attrib.get('id')
            if id != None:
                id = self.clear_id(id)
            if id in bone:
                attributes = el.attrib
                if bone[id] != None and bone[id] >= 0:
                    attributes["style"] = colors[bone[id]] + ";fill-opacity:1"
                    if len(el) > 0:
                        for item in el:
                            item_attr = item.attrib
                            styl = item_attr["style"]
                            if "fill:#ffffff" in styl:
                                styl = styl.replace('fill:#ffffff', colors[bone[id]])
                            item_attr["style"] = styl

        with open(file_out + '.svg', 'w') as f:
            f.write('<?xml version="1.0" encoding="UTF-8"?>\n' +
                    etree.tostring(doc, pretty_print=True).decode('utf-8'))

        drawing = svg2rlg(file_out + '.svg')
Example #12
0
 def check(self):
     name_contest_id = {}  # Mapping for <Name> and its Contest ObjectId.
     error_log = []
     for event, element in etree.iterwalk(self.election_tree):
         tag = self.strip_schema_ns(element)
         if tag != "Contest":
             continue
         object_id = element.get("objectId", None)
         name = element.find("Name")
         if name is None or not name.text:
             error_message = "Contest {0} is missing a <Name> ".format(
                 object_id)
             error_log.append(base.ErrorLogEntry(
                 element.sourceline, error_message))
             continue
         name_contest_id.setdefault(name.text, []).append(object_id)
         """Add names and its objectId as key and list of values.
         Ideally 1 objectId. If duplicates are found, then list of multiple
         objectIds."""
     for name, contests in name_contest_id.items():
         if len(contests) > 1:
             error_message = ("Contest name '{0}' appears in following {1} contests: {2}".format(
                 name, len(contests), ", ".join(contests)))
             error_log.append(base.ErrorLogEntry(None, error_message))
     if error_log:
         raise base.ElectionTreeError(
             "The Election File contains duplicate contest names.", error_log)
Example #13
0
def xml2simpleconfig(flt_ctxt, in_obj):
    """Mapping (almost bijective) XML -> simpleconfig

    Inverse mapping cannot be generally loseless, as XML cannot contain
    repeated attributes, which seems/is valid with simpleconfig.
    See `simpleconfig` docstring for details about the target representation.
    """
    # using similar trick of stack emulation in-place as command.analyse_chain.
    # but this is iterative rather than recursive :)
    root = []
    for action, e in etree.iterwalk(in_obj('etree'), events=('start', 'end')):
        if action == 'start':
            #print(">>> start", e.tag, root)
            current = [e.tag, None, None]
            root.append(current)
            current[1] = tuple(sorted(iter_items(e.attrib)))
            #print("<<< start", e.tag, root)
        elif action == 'end':
            #print(">>> end", e.tag, root)
            if len(root) == 1:
                #assert id(e) == id(root)
                break
            current = root.pop()
            if root[-1][2] is None:
                root[-1][2] = []
            root[-1][2].append(current)
            #print("<<< end", e.tag, root)
    return ('struct', root[-1])
Example #14
0
def main():
    xml_input = sys.argv[1]  # build/gtestresults.xml
    xsl_templ = sys.argv[2]  # sonar_test.xsl

    xmldoc = None
    transform = None
    with open(xml_input) as f:
        xmldoc = etree.parse(f)

    with open(xsl_templ) as xsltfile:
        xslt_root = etree.XML(xsltfile.read())
        transform = etree.XSLT(xslt_root)

    result_tree = transform(xmldoc)
    #print(result_tree)

    context = etree.iterwalk(result_tree, tag="file")
    for action, elem in context:
        testcase = elem[0]
        classname = testcase.attrib["classname"]
        casename = testcase.attrib["name"]
        real_file_path = get_file_path(classname, casename)
        if len(real_file_path) > 0:
            elem.attrib["path"] = real_file_path

    print(result_tree)
    def get_email(self):
        def get_node_content(element):
            return (element.text +
                    "".join(map(etree.tostring, element))).strip()

        subject, body = None, None
        for _, element in etree.iterwalk(self.template, tag="block"):
            if (element.get("onstate", None) == self.status):
                subject = get_node_content(element.find("subject"))
                body = get_node_content(element.find("body"))
            elif ((element.get("afterstate", None) == self.status) and
                  (element.get("time", None) == self.time_string)):
                subject = get_node_content(element.find("subject"))
                body = get_node_content(element.find("body"))
            element.clear()
        # get defaults
        if not subject:
            subject = "Item Notification: %s" % (self.message["title"])
            log.warn("Missing subject template for %s:%s. Using default" % (
                    self.message["type"], self.message["status"]))
        if not body:
            body = "Item, %s, type: %s, status: %s" % (self.message["title"],
                self.message["type"], self.message["status"]["value"])
            log.warn("Missing body template for %s:%s. Using default" % (
                    self.message["type"], self.message["status"]))
        subject_template = EmailTemplate()
        body_template = EmailTemplate()
        subject_template.write(subject)
        body_template.write(body)
        return (subject_template(item=Item(self.message)),
                body_template(item=Item(self.message)))
Example #16
0
    def to_pahdb_dict(self, validate=False):
        """Parses the XML, with or without validation.

        Args:
            validate (bool). Defaults to self.valdiate value, but can be
            overridden.

        Note:
            Sets the attribute self.library when successful.

        Returns: library (dict): Dictionary, with the UIDs as keys,
            containing the transitions, geometry data, as well as UID
            metadata, references, comments, and laboratory.

        """

        if self.validate or validate:
            self.verify_schema()

            self._context = \
                etree.iterwalk(self._tree, events=("start", "end"))
        else:
            self._context = \
                etree.iterparse(self.filename, events=("start", "end"))

        self.library = self._tree_to_pahdb_dict()

        return self.library
Example #17
0
  def check_rules(self):
    """Checks all rules."""
    try:
      self.election_tree = etree.parse(self.election_file)
    except etree.LxmlError as e:
      print("Fatal Error. XML file could not be parsed. {}".format(e))
      self.exception_counts[ElectionError] += 1
      self.total_count += 1
      return
    self.register_rules()
    for rule in self.registry.get("tree", []):
      try:
        rule.check()
      except ElectionException as e:
        self.exception_handler(rule, e)
    for _, element in etree.iterwalk(self.election_tree, events=("end",)):
      tag = self.get_element_class(element)

      if not tag or tag not in self.registry:
        continue

      for element_rule in self.registry[tag]:
        try:
          element_rule.check(element)
        except ElectionException as e:
          self.exception_handler(element_rule, e)
Example #18
0
def iter_types(ctx: Context, html: bytes) -> Iterator[ApiType]:
    doc = etree.iterwalk(
        etree.HTML(html),
        events=("start", ),
        tag=("h4"),
    )

    sign = ApiTypeSignature()

    for (action, elem) in doc:
        if sign.consume(elem) != SIGN_NEXT:
            continue

        for sibl in elem.itersiblings():
            resp = sign.consume(sibl)

            if resp == SIGN_MATCHED:
                ty = extract_type(ctx, sign)
                ctx.types_repository[ty.name] = ty
                sign.clear()
                break

            if resp == SIGN_STOP:
                break

    return ctx.types_repository.values()
Example #19
0
 def text(self):
     # TODO: this blob won't allow exact phrase matches across transcript pages.
     # It might be extended a few words into either adjacent page to allow that.
     text = ''
     for event, element in etree.iterwalk(self.xml_tree(),
                                          events=('start', 'end')):
         if element.tag == 'p':
             if len(element) and element[0].tag == 'runningHead':
                 continue
             if event == 'start':
                 if element.text:
                     if len(element.text
                            ) < 20 and TranscriptPageJoiner.ignore_p.match(
                                element.text):
                         continue
                     text += element.text
             else:
                 text += '\n\n'
         elif event == 'end' and element.tag == 'spkr':
             if element.text:
                 text += '<span class="speaker">{}</span> '.format(
                     element.text)
             if element.tail: text += element.tail
         elif event == 'end' and element.tag in ('evidenceFileDoc',
                                                 'exhibitDocDef',
                                                 'exhibitDocPros'):
             if element.text: text += element.text
             if element.tail: text += element.tail
     return text
def parseMethod(method_elem, nt):
    instance_starts = []
    method_instances = []
    instance_depths = []

    prev_action = 'start'
    curr_depth = 0
    context = etree.iterwalk(method_elem,
                             events=('start', 'end'),
                             tag=srcml_tags[nt])
    for action, elem in context:
        if action == 'start':
            instance_starts.append(elem.sourceline - 1)
            method_instances.append(elem)
            if prev_action == 'start':
                curr_depth += 1
            instance_depths.append(curr_depth)

        elif action == 'end':
            if prev_action == 'end':
                curr_depth -= 1

        prev_action = action

    return (method_instances, instance_starts, instance_depths)
Example #21
0
def _find_tag_limits(root):
    START_RE = re.compile(r' __START_(\w+)__ ')
    END_RE = re.compile(r' __END_(\w+)__ ')

    starts = list()
    ends = list()
    for _, element in etree.iterwalk(root, events=('start', )):

        tasks = [(element.text, START_RE, starts, False),
                 (element.text, END_RE, ends, False),
                 (element.tail, START_RE, starts, True),
                 (element.tail, END_RE, ends, True)]

        for text, regexp, storage, is_tail in tasks:
            if not text:
                continue

            for match in regexp.finditer(text):

                if not match:
                    continue

                storage.append(
                    _TagPosition(element=element,
                                 tag=match.group(1),
                                 position=match.start(),
                                 length=match.end() - match.start(),
                                 is_tail=is_tail,
                                 dfs_number=-1))

    return starts, ends
Example #22
0
def render_xml(path, template_name, remove_empty, **nfe):
    nfe = recursively_normalize(nfe)
    env = Environment(loader=FileSystemLoader(path),
                      extensions=['jinja2.ext.with_'])

    env.filters["normalize"] = filters.strip_line_feed
    env.filters["normalize_str"] = filters.normalize_str
    env.filters["format_percent"] = filters.format_percent
    env.filters["format_datetime"] = filters.format_datetime
    env.filters["format_date"] = filters.format_date
    env.filters["comma"] = filters.format_with_comma

    template = env.get_template(template_name)
    xml = template.render(**nfe)
    parser = etree.XMLParser(remove_blank_text=True,
                             remove_comments=True,
                             strip_cdata=False)
    root = etree.fromstring(xml, parser=parser)
    if remove_empty:
        context = etree.iterwalk(root)
        for dummy, elem in context:
            parent = elem.getparent()
            if recursively_empty(elem):
                parent.remove(elem)
        return root
    for element in root.iter("*"):  # remove espaços em branco
        if element.text is not None and not element.text.strip():
            element.text = None
    return etree.tostring(root, encoding=str)
Example #23
0
def html_to_template_text(unsafe_html, context=None):
    """
    Parse html and turn it into template text.
    """
    # TODO: factor out parsing/serializing
    safe_html = sanitize_intermediate(unsafe_html)
    top_level_elements = fragments_fromstring(safe_html)

    # put top level elements in container
    container = etree.Element('div')
    if top_level_elements and not hasattr(top_level_elements[0], 'tag'):
        container.text = top_level_elements.pop(0)
    container.extend(top_level_elements)

    tree = etree.iterwalk(container, events=('end',))
    # walk over all elements
    for action, elem in tree:
        if not elem.tag in tag_handlers:
            continue
        for handler in tag_handlers[elem.tag]:
            can_continue = handler(elem, context)
            if can_continue is False:
                break

    template_bits = [etree.tostring(elem, encoding='UTF-8')
                     for elem in container]
    return sanitize_final(''.join(tag_imports +
                                  [escape(container.text or '')] +
                                  template_bits
                                  )
                         )
Example #24
0
def xml2nodes(xml_content: Union[str, bytes]):
    if isinstance(xml_content, str):
        xml_content = xml_content.encode("utf-8")

    root = etree.fromstring(xml_content)
    nodes = []
    for _, n in etree.iterwalk(root):
        attrib = dict(n.attrib)
        if "bounds" in attrib:
            bounds = re.findall(r"(\d+)", attrib.pop("bounds"))
            if len(bounds) != 4:
                continue
            lx, ly, rx, ry = map(int, bounds)
            attrib['size'] = (rx - lx, ry - ly)
        attrib.pop("index", None)

        ok = False
        for attrname in ("text", "resource-id", "content-desc"):
            if attrname in attrib:
                ok = True
                break
        if ok:
            items = []
            for k, v in sorted(attrib.items()):
                items.append(k + ":" + str(v))
            nodes.append('|'.join(items))
    return nodes
Example #25
0
  def check_rules(self):
    """Checks all rules."""
    try:
      self.schema_tree = etree.parse(self.schema_file)
      self.election_tree = etree.parse(self.election_file)
    except etree.LxmlError as e:
      exp = loggers.ElectionFatal.from_message(
          "Fatal Error. XML file could not be parsed. {}".format(e))
      self.exceptions_wrapper.exception_handler(exp)
      return
    self.register_rules()
    for rule in self.registry.get("tree", []):
      try:
        rule.check()
      except loggers.ElectionException as e:
        rule_name = rule.__class__.__name__
        self.exceptions_wrapper.exception_handler(e, rule_name)
    for _, element in etree.iterwalk(self.election_tree, events=("end",)):
      tag = self.get_element_class(element)

      if not tag or tag not in self.registry:
        continue

      for element_rule in self.registry[tag]:
        try:
          element_rule.check(element)
        except loggers.ElectionException as e:
          rule_name = element_rule.__class__.__name__
          self.exceptions_wrapper.exception_handler(e, rule_name)
    def check_rules(self):
        """Checks all rules.

        Returns:
            0 if no warnings or errors are generated. 1 otherwise.

        Args:
            detailed:if True prints detailed error messages
        """

        try:
            election_tree = etree.parse(self.election_file)
        except etree.LxmlError as e:
            print("Fatal Error. XML file could not be parsed. {}".format(e))
            return 1
        self.register_rules(election_tree)
        for rule in self.registry.get("tree", []):
            try:
                rule.check()
            except ElectionException as e:
                self.exception_handler(rule, e)
        for event, element in etree.iterwalk(election_tree, events=("end", )):
            tag = self.get_element_class(element)
            if not tag or tag not in self.registry.keys():
                continue
            for element_rule in self.registry[tag]:
                try:
                    element_rule.check(element)
                except ElectionException as e:
                    self.exception_handler(element_rule, e)
        if self.total_count == 0:
            return 0
        else:
            return 1
Example #27
0
def _find_tag_limits(root):
    START_RE = re.compile(r' __START_(\w+)__ ')
    END_RE = re.compile(r' __END_(\w+)__ ')

    starts = list()
    ends = list()
    for _, element in etree.iterwalk(root, events=('start',)):

        tasks = [(element.text, START_RE, starts, False),
                 (element.text, END_RE, ends, False),
                 (element.tail, START_RE, starts, True),
                 (element.tail, END_RE, ends, True)]

        for text, regexp, storage, is_tail in tasks:
            if not text:
                continue

            for match in regexp.finditer(text):

                if not match:
                    continue

                storage.append(_TagPosition(element=element,
                                            tag=match.group(1),
                                            position=match.start(),
                                            length=match.end() - match.start(),
                                            is_tail=is_tail,
                                            dfs_number=-1))

    return starts, ends
Example #28
0
def html_to_template_text(unsafe_html):
    """
    Parse html and turn it into template text.
    """
    safe_html = sanitize_intermediate(unsafe_html)
    top_level_elements = fragments_fromstring(safe_html)

    # put top level elements in container
    container = etree.Element('div')
    if top_level_elements and not hasattr(top_level_elements[0], 'tag'):
        container.text = top_level_elements.pop(0)
    container.extend(top_level_elements)

    context = etree.iterwalk(container, events=('end', ))
    # walk over all elements
    for action, elem in context:
        if not elem.tag in tag_handlers:
            continue
        for handler in tag_handlers[elem.tag]:
            can_continue = handler(elem)
            if can_continue is False:
                break

    template_bits = [
        etree.tostring(elem, encoding='utf-8') for elem in container
    ]
    return sanitize_final(''.join(tag_imports + [container.text or ''] +
                                  template_bits))
Example #29
0
def extract_rich_text_from_node(element, url):
    strs = []
    for action, node in etree.iterwalk(element, events=("start", "end")):
        if not isinstance(node.tag, basestring):
            continue
        if action == "start":
            if node.tag == "br":    # new line
                strs.append("\n")
            elif node.tag == "img":  # image
                # 抽取图片
                new_img_url = extract_image_from_node(node, url)
                if new_img_url != None:
                    strs.append("\n")
                    strs.append('<img src="%s"/>' %new_img_url)
                    strs.append("\n")
            if node.tag != "script" and node.tag != "style" and node.text != None:
                strs.append(node.text)

        if action == "end":
            if node.tag in TITLE_PARAGRAPH_TAGS:    # a new paragraph
                strs.append("\n")
            if node.tag == "td":
                strs.append(" ")
            if node.tag == "tr":
                strs.append("\n")
       
            if node.tail != None and len(node.tail.strip()) > 0:
                strs.append(node.tail)

    return strs 
Example #30
0
def to_wkt_list(doc):
    '''converts all geometries to Well Know Text format'''
    from lxml import etree

    def ring_coords_to_wkt(ring):
        '''converts LinearRing coordinates to WKT style coordinates'''
        return ((ring.coordinates.text.strip()).replace(' ', '@@').replace(
            ',', ' ').replace('@@', ', '))

    ring_wkt_list = []
    context = etree.iterwalk(doc, events=("start", ))
    for action, elem in context:
        if elem.tag in [
                '{http://www.opengis.net/kml/2.2}Polygon',
                '{http://www.opengis.net/kml/2.2}MultiPolygon'
        ]:
            #print("%s: %s" % (action, elem.tag))
            if elem.tag == '{http://www.opengis.net/kml/2.2}Polygon':

                # outer boundary
                ringlist = [
                    '({0})'.format(
                        ring_coords_to_wkt(elem.outerBoundaryIs.LinearRing))
                ]
                for obj in elem.findall(
                        '{http://www.opengis.net/kml/2.2}innerBoundaryIs'):
                    ringlist.append('({0})'.format(
                        ring_coords_to_wkt(obj.LinearRing)))

                wkt = 'POLYGON ({rings})'.format(rings=', '.join(ringlist))
                ring_wkt_list.append(wkt)
    return (ring_wkt_list)
Example #31
0
        def _merge_previous(snippet, hooks, elem, children):
            # snippet, an original preprocessed "piece of template puzzle",
            # has some of its subelements substituted as per hooks that
            # together with elem traversal and children dict decides which
            # parts (of previously proceeded symbols) will be grabbed
            scheduled = OrderedDict()  # XXX to keep the law and order
            for _, c_elem in etree.iterwalk(elem, events=('start',)):
                if c_elem is elem:
                    continue
                if c_elem in children:
                    c_up = c_elem
                    while not c_up.tag in hooks and c_up.getparent() != elem:
                        c_up = c_up.getparent()
                    target_tag = c_up.tag if c_up.tag in hooks else '*'
                    if c_up.tag in hooks or '*' in hooks:
                        for h in hooks[target_tag]:
                            l = scheduled.setdefault(h, [])
                            l.append(children[c_elem].getroot())

            for (index_history, mix), substitutes in iter_items(scheduled):
                tag = reduce(lambda x, y: x[y], index_history, snippet)
                parent = tag.getparent()
                index = parent.index(tag)

                for s in substitutes:
                    #assert s.tag == namespaced(CLUFTER_NS, 'snippet')
                    log.debug("before extension: {0}".format(etree.tostring(s)))
                    if s.tag == namespaced(CLUFTER_NS, 'snippet'):
                        # only single root "detached" supported (first == last)
                        dst = parent
                        # cannot use dict.update(dict) because of losing order
                        for k in s.attrib:
                            dst.attrib[k] = s.attrib[k]
                        #dst[index:index] = s
                        tag.extend(s)
                    elif mix:
                        tag.extend(s)
                    else:
                        # required by obfuscate
                        tag.append(s)
                    log.debug("as extended contains: {0}".format(etree.tostring(tag)))

                at = tag.attrib.get('at', '*')
                if mix == 1 and at != '*':  #and elem.getparent() is None:
                    e = nselem(XSL_NS, 'apply-templates', select=".//{0}".format(at))
                    tag.append(e)
                elif mix == 2:
                    e = nselem(XSL_NS, 'copy')
                    e.append(nselem(XSL_NS, 'apply-templates', select="@*|node()"))
                    tag.append(e)

            cl = snippet.xpath("//clufter:descent|//clufter:descent-mix",
                                 namespaces={'clufter': CLUFTER_NS})
            # remove these remnants so cleanup_namespaces works well
            for e in cl:
                parent = e.getparent()
                index = parent.index(e)
                parent[index:index] = e.getchildren()
                e.getparent().remove(e)
Example #32
0
 def getListOfElementsByTagName(self, name):
     listOfElements = list()
     for _, elem in etree.iterwalk(self.xmlTree, events=('end', )):
         if (name == BhpDocumentParser.stripNSFromTagName(elem.tag)):
             listOfElements.append(elem)
     if (len(listOfElements) != 0):
         return listOfElements
     return None
Example #33
0
 def extract_exhibit_codes(self):
     codes = []
     for event, element in etree.iterwalk(self.xml_tree()):
         if element.tag == 'exhibitDocPros':
             codes.append('Prosecution {}'.format(element.get('n')))
         elif element.tag == 'exhibitDocDef':
             codes.append('{} {}'.format(element.get('def') or 'Unknown Defendant', element.get('n')))
     return codes
Example #34
0
 def parse_doc(self, field, wrapper=None):
     """Use for retrieving document-level (as opposed to job-level) tags."""
     for event, element in etree.iterwalk(self.doc):
         if element.tag == field:
             if wrapper:
                 return wrapper(element.text)
             else:
                 return element.text
Example #35
0
    def text(self, page=None, merge_verticals=False):
        """Get the text items.

        If `page` is supplied, it should be a Page (as returned by
        self.pages())

        If `merge_verticals` is supplied, the vertical offsets of items will be
        adjusted such that pages following the first appear immediately after
        the 

        """
        def text_for_page(page):
            for event, text in etree.iterwalk(page.element, tag='text'):
                fontid = text.attrib['font']
                yield Text(text, page, self.fontspec(text.attrib['font']))

        if page is None:
            if merge_verticals:
                offset = 0
                for event, page in etree.iterwalk(self.tree, tag='page'):
                    page = Page(page)
                    items = list(text_for_page(page))
                    if len(items) == 0:
                        continue
                    ymin = 9999999999999999
                    ymax = 0
                    for item in items:
                        if item.text.strip() == '':
                            continue
                        ymin = min(ymin, item.top)
                        ymax = max(ymax, item.bottom)

                    for item in items:
                        if item.text.strip() == '':
                            continue
                        item.yoffset = offset - ymin
                        yield (item)
                    offset += (ymax - ymin)
            else:
                for event, page in etree.iterwalk(self.tree, tag='page'):
                    page = Page(page)
                    for item in text_for_page(page):
                        yield (item)
        else:
            for item in text_for_page(page):
                yield (item)
    def text(self, page=None, merge_verticals=False):
        """Get the text items.

        If `page` is supplied, it should be a Page (as returned by
        self.pages())

        If `merge_verticals` is supplied, the vertical offsets of items will be
        adjusted such that pages following the first appear immediately after
        the 

        """
        def text_for_page(page):
            for event, text in etree.iterwalk(page.element, tag='text'):
                fontid = text.attrib['font']
                yield Text(text, page, self.fontspec(text.attrib['font']))

        if page is None:
            if merge_verticals:
                offset = 0
                for event, page in etree.iterwalk(self.tree, tag='page'):
                    page = Page(page)
                    items = list(text_for_page(page))
                    if len(items) == 0:
                        continue
                    ymin = 9999999999999999
                    ymax = 0
                    for item in items:
                        if item.text.strip() == '':
                            continue
                        ymin = min(ymin, item.top)
                        ymax = max(ymax, item.bottom)

                    for item in items:
                        if item.text.strip() == '':
                            continue
                        item.yoffset = offset - ymin
                        yield(item)
                    offset += (ymax - ymin)
            else:
                for event, page in etree.iterwalk(self.tree, tag='page'):
                    page = Page(page)
                    for item in text_for_page(page):
                        yield(item)
        else:
            for item in text_for_page(page):
                yield(item)
Example #37
0
 def __findFeatures__(self, ifdef):
     result = ""
     context = etree.iterwalk(ifdef)
     for action, elem in context:
         if action == "end":
             if elem.tag.split('}')[1] == "name":
                 result = result + elem.text
     return result
Example #38
0
 def elements(self):
     schema_tree = etree.parse(self.schema_file)
     eligible_elements = []
     for event, element in etree.iterwalk(schema_tree):
         tag = self.strip_schema_ns(element)
         if tag and tag == "element" and element.get("minOccurs") == "0":
             eligible_elements.append(element.get("name"))
     return eligible_elements
Example #39
0
 def __findFeatures__(self, ifdef):
     result = ""
     context = etree.iterwalk(ifdef)
     for action, elem in context:
         if action == "end":
             if elem.tag.split("}")[1] == "name":
                 result = result + elem.text
     return result
Example #40
0
	def parseWards(self):
		# Find the map element with the ID "map"
		mapNode = self.dom.get_element_by_id('map')

		for action,elem in etree.iterwalk(mapNode, tag='area'):
			print "Downloading %s" % elem.get('alt')
			self.wards.append(elem.get('alt'))
			CouncillorInfo(elem.get('href'))
Example #41
0
 def remove_empty(self, tree):
     # nodes that are recursively empty
     context = et.iterwalk(tree)
     for action, node in context:
         parent = node.getparent()
         if self._recursively_empty(node):
             parent.remove(node)
     return tree
    def filter_xml(self, filename):
        pruned = 0
        try:
            tree = etree.parse(filename)

            path = []
            context = etree.iterwalk(tree, events=('start', 'end'))
            for action, elem in context:
                if elem.tag == 'object':
                    if action == 'start':
                        path.append(elem.attrib.get('id'))

                    elif action == 'end':
                        obj_path = '/'.join(path)
                        try:
                            obj = self.dmd.getObjByPath(obj_path)
                            if getattr(obj, 'zpl_managed', False):
                                self.LOG.debug("Removing {} from {}".format(
                                    obj_path, filename))
                                pruned += 1

                                # if there's a comment before it with the
                                # primary path of the object, remove that first.
                                prev = elem.getprevious()
                                if '<!-- ' + repr(
                                        tuple('/'.join(path).split(
                                            '/'))) + ' -->' == repr(prev):
                                    elem.getparent().remove(prev)

                                # Remove the ZPL-managed object
                                elem.getparent().remove(elem)

                        except Exception:
                            self.LOG.warning(
                                "Unable to postprocess {} in {}".format(
                                    obj_path, filename))

                        path.pop()

                if elem.tag == 'tomanycont':
                    if action == 'start':
                        path.append(elem.attrib.get('id'))
                    elif action == 'end':
                        path.pop()

            if len(tree.getroot()) == 0:
                self.LOG.info("Removing {}".format(filename))
                os.remove(filename)
            elif pruned:
                self.LOG.info("Pruning {} objects from {}".format(
                    pruned, filename))
                with open(filename, 'w') as f:
                    f.write(etree.tostring(tree))
            else:
                self.LOG.debug("Leaving {} unchanged".format(filename))

        except Exception, e:
            self.LOG.error("Unable to postprocess {}: {}".format(filename, e))
Example #43
0
 def remove_unlikely_nodes(self):
     remove_list = []
     context = iterwalk(self.html)
     for action, elem in context:
         s = "%s%s" % (elem.get('class', ''), elem.get('id', ''))
         if REGEXPS['unlikelyNodes'].search(s) and (not REGEXPS['okMaybeItsANode'].search(s)) and elem.tag != 'body':
             logger.debug("Removing unlikely node - %s" % s)
             remove_list.append(elem)
     [e.drop_tree() for e in remove_list if e.tag != 'html']
    def __init__(self, fd):
        self.fontspecs = {}
        self.tree = etree.parse(fd, etree.HTMLParser(encoding='utf8'))

        for event, spec in etree.iterwalk(self.tree, tag='fontspec'):
            atts = spec.attrib
            fontid = int(atts['id'])
            assert fontid not in self.fontspecs
            self.fontspecs[fontid] = FontSpec(atts, fontid)
Example #45
0
def ViewParsePage(page):
	childs = []
	iw= etree.iterwalk(page,events=('start','end','start-ns','end-ns'), tag = ('field','delimiter'))
	for event,element in iw:
		if event == 'start':
			p = element.getparent()
			if p is not None or p and p.tag == page.tag:	
				childs.append(_TAGPARSERS[element.tag](element))
	return ['page', dict(page.items()),page.text,childs]
Example #46
0
 def _fast_iter_episode(self, sele, function):
     context = etree.iterwalk(sele, events=('end',), tag='episode')
     for event, elem in context:
         function(elem)
         #Clear memory
         elem.clear()
         while elem.getprevious() is not None:
             del elem.getparent()[0]
     del context
 def elements(self):
     schema_tree = etree.parse(self.schema_file)
     eligible_elements = []
     for event, element in etree.iterwalk(schema_tree):
         tag = self.strip_schema_ns(element)
         if (tag and tag == "element" and
                 element.get("type") in ("xs:IDREF", "xs:IDREFS")):
             eligible_elements.append(element.get("name"))
     return eligible_elements
Example #48
0
def ViewParseViews(view):
	childs = []
	iw= etree.iterwalk(view,events=('start','end','start-ns','end-ns'), tag = _ELTAGS)
	for event,element in iw:
		if event == 'start':
			p = element.getparent()
			if p is not None and p.tag == view.tag:
				childs.append(_TAGPARSERS[element.tag](element))
				
	return [view.tag,dict(view.items()),view.text,childs]
Example #49
0
    def remove_unlikely_candidates(self):

        context = iterwalk(self.html)
        for action,elem in context:

            s = "%s%s" % (elem.get('class', ''), elem.get('id', ''))
            self.debug(s)
            if REGEXES['unlikelyCandidatesRe'].search(s) and (not REGEXES['okMaybeItsACandidateRe'].search(s)) and elem.tag != 'body':
                self.debug("Removing unlikely candidate - %s" % (s,))
                elem.drop_tree()
Example #50
0
def _match_one(tree, nodes, selector):
    elements = [] 
    prefix = selector[0]
    sc = selector[1:]
    #find by attr
    if prefix == ".": 
        parts = sc.split("-")
        if parts:
            cat = parts[0]
            value = "-".join(parts[1:])
        else:
            cat = "class",
            value = sc 
        selector = re.compile(value)
        for node in nodes: 
            v = node.attrib.get(cat, "") 
            if selector.findall(v):
                elements.append(node) 
    #find by line number
    elif prefix == ">":
        if "-" in selector:
            smin, smax = [int(x) for x in sc.split("-")]
        else:        
            smin = smax = int(sc)
        for _, node in etree.iterwalk(tree, tag="*", events=("start", )):
            line = node.sourceline
            if line >= smin and line <= smax:
                elements.append(node)
    #find by text
    elif prefix == "-": 
        for _, node in etree.iterwalk(tree, tag="*", events=("start", )): 
            if node.text and re.findall(sc.decode("utf-8"), node.text, re.UNICODE):
                elements.append(node) 
    #find by xpath
    elif prefix == ",":
        elements.extend(tree.xpath(sc))
    #find by tag
    else:
        for node in nodes:
            if selector == node.tag:
                elements.append(node)
    return elements 
 def elements(self):
     schema_tree = etree.parse(self.schema_file)
     eligible_elements = []
     for event, element in etree.iterwalk(schema_tree):
         tag = self.strip_schema_ns(element)
         if tag == "element":
             elem_type = element.get("type", None)
             if elem_type and elem_type == "InternationalizedText":
                 if element.get("name") not in eligible_elements:
                     eligible_elements.append(element.get("name"))
     return eligible_elements
Example #52
0
def verify(collection, urn, current_wikipages_list, force_update=False):
    """Updates a urn's indexing info and returns the set of its recursive links.

    `collection`: the mongo collection to use as returned by ``get_indexing_mongo_database()``.
    `urn`: the urn to update the index for, starting with "urn:".
    `wikipages_url_list` is the sorted list of urls pointing to `urn`.
    `force_update`: set to True to update the index even if `urn` is already in the index (defaults to ``False``).
    """
    if not force_update:
        q = collection.find_one({"urn": urn}, {"recursive_links": 1})
        if q:
            try:
                return set(q["recursive_links"])
            except KeyError:
                return set()

    resource_database = get_resource_database()

    try:
        tree = resource_database.get_xml_tree(urn)
    except UnexpectedHeader:
        # it must be a blob
        perform_upsert(collection, urn, {"fqn": None})
        return set()

    links = set()
    for event, element in etree.iterwalk(tree):
        if '{http://www.w3.org/1999/xlink}href' in element.attrib and element.getparent().tag != '{http://ductus.us/ns/2009/ductus}parents':
            link = element.attrib['{http://www.w3.org/1999/xlink}href']
            if link.startswith('urn:%s:' % hash_name):
                links.add(link)

    recursive_links = set(links)
    for link in links:
        additional_links = verify(collection, link, [])
        recursive_links.update(additional_links)

    resource = resource_database.get_resource_object(urn)

    assert resource.fqn is not None
    obj = {
        "fqn": resource.fqn,
        "links": list(links),
        "recursive_links": sorted(recursive_links),
        "current_wikipages": sorted(current_wikipages_list),
    }
    try:
        obj["parents"] = sorted([parent.href for parent in resource.common.parents])
        obj["tags"] = sorted([tag.value for tag in resource.tags])
    except AttributeError:
        pass
    perform_upsert(collection, urn, obj)

    return recursive_links
Example #53
0
def query_element(tree, selector): 
    elements = []
    nodes = [] 
    for _, node in etree.iterwalk(tree, tag="*", events=("start", )):
        nodes.append(node) 
    if selector.startswith("["):
        for x in selector.strip("[").strip("]").split(","):
            elements.extend(_match_one(tree, nodes, x.strip())) 
    else:
        elements.extend(_match_one(tree, nodes, selector))
    return elements
def enumerate_paths(element, prefixes):
    added = set()

    def add(key):
        added.add(key)

    indices = {}
    star_index = 0

    context = etree.iterwalk(element, events=("start", "end"))
    it = iter(context)
    the_element = it.next()
    for (k, v) in the_element[1].attrib.items():
        add(("parent-attr", k))

    skip = 0

    paths = []
    has_children = False

    for action, elem in it:
        log("%s %s", action, elem)
        if action == "start":
            skip += 1
            if skip == 1:
                has_children = False
                star_index += 1
                paths.append(("*", star_index, None))
                namespace, tag = lxml_tag(elem.tag)
                log("namespace=%r prefixes=%r", namespace, prefixes)
                prefix = prefixes[namespace]
                el = path_element((prefix, tag))
                indices.setdefault(el, 0)
                indices[el] += 1
                paths.append((el, None, None))
                paths.append((el, indices[el], None))
                for (k, v) in elem.attrib.items():
                    paths.append((el, None, (k, v)))
            else:
                has_children = True
        elif action == "end":
            if skip == 1:
                for p in paths:
                    el, position, att_test = p
                    add(("element", el, position, att_test, has_children))
                paths = []
            skip -= 1

    log("indices=%r", indices)

    for x in added:
        log("x=%r", x)

    return added, indices, star_index
    def getConfig(self,configstr):
        self.methodname="getConfig(self,"+configstr+")"
        self.configstr = configstr
        self.configarr = []
        self.configvar = ''

        log = logger("xml_config_parser")

        if self.verbose:
           log.debug("launched "+self.methodname+" of "+self.classname+".")

        if self.configstr == "excluded_interfaces":
            #for self.configtag in self.abyle_config.getElementsByTagName("interface"):
            configwalk = etree.iterwalk(self.abyle_config,events=("start","end"),tag=str("interface"))
            for action, elem, in configwalk:
                if action in ('start'):
                   attributes = elem.attrib
                   if str(attributes.get("excluded")).upper() == "YES":
                        if self.verbose:
                            log.debug(elem.text)
                        self.configarr.append(elem.text)

            return self.configarr



        else:

            #for self.configtag in self.abyle_config.getElementsByTagName(self.configstr):
            configwalk = etree.iterwalk(self.abyle_config,events=("start","end"),tag=str(self.configstr))
            for action, elem, in configwalk:
                if action in ('start'):
                   if self.verbose:
                       log.debug(elem.text)
                   self.configarr.append(elem.text)

            try:
                self.configvar = self.configarr[1]
                return self.configarr
            except (IndexError):
                return self.configarr[0]
	def print_policy(self, tree):
		policy_out = {}
		logical_elements = ["and-match","or-match"]
		comparison_elements = ["attribute-match"]

		
		criteria_walk = etree.iterwalk(tree,
		events=("start","end"))
		criteria_walk.next()

		logic_stack = []
		obj_policy_set = []
		policy_set = []

		top = "and"
		for action, element in criteria_walk:
			if action == "start":
				if element.tag == "object-policy":
					policy_clause = "This experiment may %s a %s if" % (element.get("allow"), tree.get("for"))
					sub_count = 0
				elif element.tag == "attribute-policy":
					policy_clause = "This experiment may %s a %s of a %s if" % (element.get("allow"),
					element.getparent().get("type"), tree.get("for"))
					sub_count = 0
				elif element.tag in logical_elements:
					if "and" in element.tag:
						top = "and"
					elif "or" in element.tag:
						top = "or"
					logic_stack.push(top)
				elif element.tag in comparison_elements:
					policy_clause = "%s its %s matches %s %s" % (policy_clause, element.get("match"),
					element.get("on_object"), top)
					sub_count += 1

			elif action == "end":
				if  element.tag == "object-criteria":
					policy_clause = policy_clause.rsplit(" ",1)[0]
					obj_policy_set.append(policy_clause)
				elif element.tag == "attribute-criteria":
					policy_clause = policy_clause.rsplit(" ",1)[0]
					obj_policy_set.append(policy_clause)
				elif (element.tag == "object-policy" and
				sub_count == 0):
					policy_clause = policy_clause.rsplit(" ",1)[0]
					obj_policy_set.append(policy_clause)
				elif(element.tag == "attribute-policy" and
				sub_count == 0):
					policy_clause = policy_clause.rsplit(" ",1)[0]
					obj_policy_set.append(policy_clause)
				elif element.tag in logical_elements:
					logic_stack.pop()
		return obj_policy_set
 def __init__(self, election_tree, schema_file):
     super(ValidIDREF, self).__init__(election_tree, schema_file)
     for event, element in etree.iterwalk(
             self.election_tree, events=("end",)):
         if "objectId" not in element.attrib:
             continue
         else:
             obj_id = element.get("objectId")
             if not obj_id:
                 continue
             else:
                 self.all_object_ids.add(obj_id)