Python ElementTree.iterparse Examples, defusedxml.ElementTree.iterparse Python Examples

Example #1

0

Show file

File: xlsx.py Project: cshanahan/plaso

    def _GetSheetRows(self, filename):
        """Parses the contents of the first sheet of an XLSX document.

    Args:
      filename (str): The file path of the XLSX document to parse.

    Returns:
      list[list[str]]: A list of lists representing the rows of the first sheet.

    Raises:
      ValueError: if the sheet cannot be found, or a string cannot be read.
    """
        rows = []
        with zipfile.ZipFile(filename) as zip_file:
            if self._SHEET1 not in zip_file.namelist():
                # Fail if we cannot find the expected first sheet.
                raise ValueError(
                    'Unable to locate expected sheet: {0:s}'.format(
                        self._SHEET1))

            # Generate a reference table of shared strings if available.
            strings = []
            if self._SHARED_STRINGS in zip_file.namelist():
                with zip_file.open(self._SHARED_STRINGS) as zip_file_object:
                    for _, element in ElementTree.iterparse(zip_file_object):
                        if element.tag.endswith(self._SHARED_STRING_TAG):
                            strings.append(element.text)

            row = []
            value = ''
            with zip_file.open(self._SHEET1) as zip_file_object:
                for _, element in ElementTree.iterparse(zip_file_object):
                    if (element.tag.endswith(self._VALUE_STRING_TAG)
                            or element.tag.endswith(self._SHARED_STRING_TAG)):
                        value = element.text

                    if element.tag.endswith(self._COLUMN_TAG):
                        # Grab value from shared string reference table if type shared
                        # string.
                        if strings and element.attrib.get(
                                self._TYPE_ATTRIBUTE
                        ) == self._SHARED_STRING_TYPE:
                            try:
                                value = strings[int(value)]
                            except (IndexError, ValueError):
                                raise ValueError(
                                    'Unable to successfully dereference shared string.'
                                )

                        row.append(value)

                    # If we see the end tag of the row, record row in rows and reset.
                    if element.tag.endswith(self._ROW_TAG):
                        rows.append(row)
                        row = []

        return rows

Example #2

0

Show file

    def is_svg(self, f):
        """
        Check if provided file is svg
        """

        # When is the temporary_file_path
        f_is_path = isinstance(f, str)

        if f_is_path:
            fio = open(f, 'rb')
        else:
            fio = f

        fio.seek(0)

        tag = None
        try:
            for event, el in ElementTree.iterparse(fio, ('start',)):
                tag = el.tag
                break
        except ElementTree.ParseError:
            pass

        if f_is_path:
            fio.close()

        return tag == '{http://www.w3.org/2000/svg}svg'

Example #3

0

Show file

File: labelme_format.py Project: openvinotoolkit/datumaro

    def detect(cls, context: FormatDetectionContext) -> None:
        annot_paths = context.require_files('**/*.xml')

        for annot_path in annot_paths:
            with context.probe_text_file(
                annot_path, "must be a LabelMe annotation file",
            ) as f:
                elem_parents = []

                for event, elem in ElementTree.iterparse(f, events=('start', 'end')):
                    if event == 'start':
                        if elem_parents == [] and elem.tag != 'annotation':
                            raise Exception

                        if elem_parents == ['annotation', 'object'] \
                                and elem.tag in {'polygon', 'segm'}:
                            return

                        elem_parents.append(elem.tag)
                    elif event == 'end':
                        elem_parents.pop()

                        if elem_parents == ['annotation'] and elem.tag == 'object':
                            # If we got here, then we found an object with no
                            # polygon and no mask, so it's probably the wrong
                            # format.
                            raise Exception

Example #4

0

Show file

File: extractor.py Project: openvinotoolkit/datumaro

    def detect(cls, context: FormatDetectionContext) -> None:
        annot_file = context.require_file('*.xml')

        with context.probe_text_file(
                annot_file,
                "must be an XML file with an \"annotations\" root element",
        ) as f:
            _, root_elem = next(ElementTree.iterparse(f, events=('start', )))
            if root_elem.tag != 'annotations':
                raise Exception

Example #5

0

Show file

def read_keymap(filename):
    ret = []
    with open(filename, 'r') as xml:
        tree = ET.iterparse(xml)
        for _, keymap in tree:
            for context in keymap:
                for device in context:
                    for mapping in device:
                        key = mapping.get('id') or mapping.tag
                        action = mapping.text
                        if action:
                            ret.append((context.tag.lower(), action.lower(), key.lower()))
    return ret

Example #6

0

Show file

 def is_svg(self, f):
     """
     Check if provided file is svg
     """
     f.seek(0)
     tag = None
     try:
         for event, el in ElementTree.iterparse(f, ('start', )):
             tag = el.tag
             break
     except ElementTree.ParseError:
         pass
     return tag == '{http://www.w3.org/2000/svg}svg'

Example #7

0

Show file

 def _iter_remote_project_links(self, response):
     source = StringIO()
     # wrap in one outer tag for ElementTree to be happy (HTML vs XML)
     source.write('<html>')
     source.write(response.text)
     source.write('</html>')
     source.seek(0)
     try:
         for event, elem in ElementTree.iterparse(source):
             if elem.tag != 'a' or 'href' not in elem.attrib:
                 continue
             yield elem
     except ElementTree.ParseError:
         threadlog.exception("Error parsing remote project list")

Example #8

0

Show file

File: extractor.py Project: openvinotoolkit/datumaro

    def detect(cls, context: FormatDetectionContext) -> None:
        annot_file = context.require_file('*.xml')

        with context.probe_text_file(
            annot_file, "must be a KITTI-like annotation file",
        ) as f:
            parser = ET.iterparse(f, events=('start',))

            _, elem = next(parser)
            if elem.tag != 'boost_serialization':
                raise Exception

            _, elem = next(parser)
            if elem.tag != 'tracklets':
                raise Exception

Example #9

0

Show file

    def _get_subsets_from_anno(path):
        context = ElementTree.iterparse(path, events=("start", "end"))
        context = iter(context)

        for ev, el in context:
            if ev == 'start':
                if el.tag == 'subsets':
                    if el.text is not None:
                        subsets = el.text.split('\n')
                        return subsets
            if ev == 'end':
                if el.tag == 'meta':
                    return [DEFAULT_SUBSET_NAME]
                el.clear()
        return [DEFAULT_SUBSET_NAME]

Example #10

0

Show file

File: xml2tsv.py Project: YichunQ/machine_wonder

    while True:
        outfilename = "{}-{:03d}.txt".format(args.xml, cnt)
        if not os.path.exists(outfilename):
            break
        else:
            cnt += 1

tsvOut = open(outfilename, "w")
tsvOut.write(args.speparator.join(allfields))
tsvOut.write("\r\n")
tsvOut.flush()

tagstack = []
index = 0
for event, node in DET.iterparse(args.xml,
                                 parser=parser,
                                 events=["start", "end"]):
    if event == 'start':
        tagstack.append(node.tag)
        continue

    # print(event, node.tag)
    if args.tagdepth == len(tagstack):
        # write
        tsvOut.write(args.speparator.join(buflistGlobal))
        tsvOut.write("\r\n")

        index += 1
        if index % 2048 == 0:
            print("\rrunning {} {}".format(index, tagstack[-1]).ljust(64),
                  end="")

Example #11

0

Show file

File: extractor.py Project: openvinotoolkit/datumaro

    def _parse(cls, path):
        tracks = []
        track = None
        shape = None
        attr = None
        labels = {}
        point_tags = {'tx', 'ty', 'tz', 'rx', 'ry', 'rz'}

        # Can fail with "XML declaration not well-formed" on documents with
        # <?xml ... standalone="true"?>
        #                       ^^^^
        # (like the original Kitti dataset), while
        # <?xml ... standalone="yes"?>
        #                       ^^^
        # works.
        tree = ET.iterparse(path, events=("start", "end"))
        for ev, elem in tree:
            if ev == "start":
                if elem.tag == 'item':
                    if track is None:
                        track = {
                            'shapes': [],
                            'scale': {},
                            'label': None,
                            'attributes': {},
                            'start_frame': None,
                            'length': None,
                        }
                    else:
                        shape = {
                            'points': {},
                            'attributes': {},
                            'occluded': None,
                            'occluded_kf': False,
                            'truncated': None,
                        }

                elif elem.tag == 'attribute':
                    attr = {}

            elif ev == "end":
                if elem.tag == 'item':
                    assert track is not None

                    if shape:
                        track['shapes'].append(shape)
                        shape = None
                    else:
                        assert track['length'] == len(track['shapes'])

                        if track['label']:
                            labels.setdefault(track['label'], set())

                            for a in track['attributes']:
                                labels[track['label']].add(a)

                            for s in track['shapes']:
                                for a in s['attributes']:
                                    labels[track['label']].add(a)

                        tracks.append(track)
                        track = None

                # track tags
                elif track and elem.tag == 'objectType':
                    track['label'] = elem.text
                elif track and elem.tag in {'h', 'w', 'l'}:
                    track['scale'][elem.tag] = float(elem.text)
                elif track and elem.tag == 'first_frame':
                    track['start_frame'] = int(elem.text)
                elif track and elem.tag == 'count' and track:
                    track['length'] = int(elem.text)

                # pose tags
                elif shape and elem.tag in point_tags:
                    shape['points'][elem.tag] = float(elem.text)
                elif shape and elem.tag == 'occlusion':
                    shape['occluded'] = OcclusionStates(int(elem.text))
                elif shape and elem.tag == 'occlusion_kf':
                    shape['occluded_kf'] = elem.text == '1'
                elif shape and elem.tag == 'truncation':
                    shape['truncated'] = TruncationStates(int(elem.text))

                # common tags
                elif attr is not None and elem.tag == 'name':
                    if not elem.text:
                        raise ValueError("Attribute name can't be empty")
                    attr['name'] = elem.text
                elif attr is not None and elem.tag == 'value':
                    attr['value'] = elem.text or ''
                elif attr is not None and elem.tag == 'attribute':
                    if shape:
                        shape['attributes'][attr['name']] = attr['value']
                    else:
                        track['attributes'][attr['name']] = attr['value']
                    attr = None

        if track is not None or shape is not None or attr is not None:
            raise Exception("Failed to parse anotations from '%s'" % path)

        special_attrs = KittiRawPath.SPECIAL_ATTRS
        common_attrs = ['occluded']

        if has_meta_file(path):
            categories =  { AnnotationType.label: LabelCategories.
                from_iterable(parse_meta_file(path).keys()) }
        else:
            label_cat = LabelCategories(attributes=common_attrs)
            for label, attrs in sorted(labels.items(), key=lambda e: e[0]):
                label_cat.add(label, attributes=set(attrs) - special_attrs)

            categories = {AnnotationType.label: label_cat}

        items = {}
        for idx, track in enumerate(tracks):
            track_id = idx + 1
            for i, ann in enumerate(
                    cls._parse_track(track_id, track, categories)):
                frame_desc = items.setdefault(track['start_frame'] + i,
                    {'annotations': []})
                frame_desc['annotations'].append(ann)

        return items, categories

Example #12

0

Show file

def load(file_object, annotations):
    from defusedxml import ElementTree
    context = ElementTree.iterparse(file_object, events=("start", "end"))
    context = iter(context)
    ev, _ = next(context)

    supported_shapes = ('box', 'polygon', 'polyline', 'points', 'cuboid')

    track = None
    shape = None
    tag = None
    image_is_opened = False
    attributes = None
    for ev, el in context:
        if ev == 'start':
            if el.tag == 'track':
                track = annotations.Track(
                    label=el.attrib['label'],
                    group=int(el.attrib.get('group_id', 0)),
                    source=el.attrib.get('source', 'manual'),
                    shapes=[],
                )
            elif el.tag == 'image':
                image_is_opened = True
                frame_id = annotations.abs_frame_id(
                    match_dm_item(DatasetItem(
                        id=osp.splitext(el.attrib['name'])[0],
                        attributes={'frame': el.attrib['id']},
                        image=el.attrib['name']),
                                  task_data=annotations))
            elif el.tag in supported_shapes and (track is not None
                                                 or image_is_opened):
                attributes = []
                shape = {
                    'attributes': attributes,
                    'points': [],
                }
            elif el.tag == 'tag' and image_is_opened:
                attributes = []
                tag = {
                    'frame': frame_id,
                    'label': el.attrib['label'],
                    'group': int(el.attrib.get('group_id', 0)),
                    'attributes': attributes,
                    'source': str(el.attrib.get('source', 'manual'))
                }
        elif ev == 'end':
            if el.tag == 'attribute' and attributes is not None:
                attributes.append(
                    annotations.Attribute(
                        name=el.attrib['name'],
                        value=el.text or "",
                    ))
            if el.tag in supported_shapes:
                if track is not None:
                    shape['frame'] = el.attrib['frame']
                    shape['outside'] = el.attrib['outside'] == "1"
                    shape['keyframe'] = el.attrib['keyframe'] == "1"
                else:
                    shape['frame'] = frame_id
                    shape['label'] = el.attrib['label']
                    shape['group'] = int(el.attrib.get('group_id', 0))
                    shape['source'] = str(el.attrib.get('source', 'manual'))

                shape['type'] = 'rectangle' if el.tag == 'box' else el.tag
                shape['occluded'] = el.attrib['occluded'] == '1'
                shape['z_order'] = int(el.attrib.get('z_order', 0))

                if el.tag == 'box':
                    shape['points'].append(el.attrib['xtl'])
                    shape['points'].append(el.attrib['ytl'])
                    shape['points'].append(el.attrib['xbr'])
                    shape['points'].append(el.attrib['ybr'])
                elif el.tag == 'cuboid':
                    shape['points'].append(el.attrib['xtl1'])
                    shape['points'].append(el.attrib['ytl1'])
                    shape['points'].append(el.attrib['xbl1'])
                    shape['points'].append(el.attrib['ybl1'])
                    shape['points'].append(el.attrib['xtr1'])
                    shape['points'].append(el.attrib['ytr1'])
                    shape['points'].append(el.attrib['xbr1'])
                    shape['points'].append(el.attrib['ybr1'])

                    shape['points'].append(el.attrib['xtl2'])
                    shape['points'].append(el.attrib['ytl2'])
                    shape['points'].append(el.attrib['xbl2'])
                    shape['points'].append(el.attrib['ybl2'])
                    shape['points'].append(el.attrib['xtr2'])
                    shape['points'].append(el.attrib['ytr2'])
                    shape['points'].append(el.attrib['xbr2'])
                    shape['points'].append(el.attrib['ybr2'])
                else:
                    for pair in el.attrib['points'].split(';'):
                        shape['points'].extend(map(float, pair.split(',')))

                if track is not None:
                    if shape["keyframe"]:
                        track.shapes.append(annotations.TrackedShape(**shape))
                else:
                    annotations.add_shape(annotations.LabeledShape(**shape))
                shape = None

            elif el.tag == 'track':
                annotations.add_track(track)
                track = None
            elif el.tag == 'image':
                image_is_opened = False
            elif el.tag == 'tag':
                annotations.add_tag(annotations.Tag(**tag))
                tag = None
            el.clear()

Example #13

0

Show file

File: extractor.py Project: zz202/cvat

    def _parse(cls, path):
        context = ElementTree.iterparse(path, events=("start", "end"))
        context = iter(context)

        categories, frame_size = cls._parse_meta(context)

        items = OrderedDict()

        track = None
        shape = None
        tag = None
        attributes = None
        image = None
        for ev, el in context:
            if ev == 'start':
                if el.tag == 'track':
                    track = {
                        'id': el.attrib['id'],
                        'label': el.attrib.get('label'),
                        'group': int(el.attrib.get('group_id', 0)),
                        'height': frame_size[0],
                        'width': frame_size[1],
                    }
                elif el.tag == 'image':
                    image = {
                        'name': el.attrib.get('name'),
                        'frame': el.attrib['id'],
                        'width': el.attrib.get('width'),
                        'height': el.attrib.get('height'),
                    }
                elif el.tag in cls._SUPPORTED_SHAPES and (track or image):
                    attributes = {}
                    shape = {
                        'type': None,
                        'attributes': attributes,
                    }
                    if track:
                        shape.update(track)
                        shape['track_id'] = int(track['id'])
                    if image:
                        shape.update(image)
                elif el.tag == 'tag' and image:
                    attributes = {}
                    tag = {
                        'frame': image['frame'],
                        'attributes': attributes,
                        'group': int(el.attrib.get('group_id', 0)),
                        'label': el.attrib['label'],
                    }
            elif ev == 'end':
                if el.tag == 'attribute' and attributes is not None:
                    attr_value = el.text
                    if el.text in ['true', 'false']:
                        attr_value = attr_value == 'true'
                    else:
                        try:
                            attr_value = float(attr_value)
                        except ValueError:
                            pass
                    attributes[el.attrib['name']] = attr_value
                elif el.tag in cls._SUPPORTED_SHAPES:
                    if track is not None:
                        shape['frame'] = el.attrib['frame']
                        shape['outside'] = (el.attrib.get('outside') == '1')
                        shape['keyframe'] = (el.attrib.get('keyframe') == '1')
                    if image is not None:
                        shape['label'] = el.attrib.get('label')
                        shape['group'] = int(el.attrib.get('group_id', 0))

                    shape['type'] = el.tag
                    shape['occluded'] = (el.attrib.get('occluded') == '1')
                    shape['z_order'] = int(el.attrib.get('z_order', 0))

                    if el.tag == 'box':
                        shape['points'] = list(map(float, [
                            el.attrib['xtl'], el.attrib['ytl'],
                            el.attrib['xbr'], el.attrib['ybr'],
                        ]))
                    else:
                        shape['points'] = []
                        for pair in el.attrib['points'].split(';'):
                            shape['points'].extend(map(float, pair.split(',')))

                    frame_desc = items.get(shape['frame'], {'annotations': []})
                    frame_desc['annotations'].append(
                        cls._parse_shape_ann(shape, categories))
                    items[shape['frame']] = frame_desc
                    shape = None

                elif el.tag == 'tag':
                    frame_desc = items.get(tag['frame'], {'annotations': []})
                    frame_desc['annotations'].append(
                        cls._parse_tag_ann(tag, categories))
                    items[tag['frame']] = frame_desc
                    tag = None
                elif el.tag == 'track':
                    track = None
                elif el.tag == 'image':
                    frame_desc = items.get(image['frame'], {'annotations': []})
                    frame_desc.update({
                        'name': image.get('name'),
                        'height': image.get('height'),
                        'width': image.get('width'),
                    })
                    items[image['frame']] = frame_desc
                    image = None
                el.clear()

        return items, categories

Example #14

0

Show file

# -*- coding: utf-8 -*-
import xml.etree.ElementTree as badET
import defusedxml.ElementTree as goodET

xmlString = "<note>\n<to>Tove</to>\n<from>Jani</from>\n<heading>Reminder</heading>\n<body>Don't forget me this weekend!</body>\n</note>"

# unsafe
tree = badET.fromstring(xmlString)
print(tree)
badET.parse("filethatdoesntexist.xml")
badET.iterparse("filethatdoesntexist.xml")
a = badET.XMLParser()

# safe
tree = goodET.fromstring(xmlString)
print(tree)
goodET.parse("filethatdoesntexist.xml")
goodET.iterparse("filethatdoesntexist.xml")
a = goodET.XMLParser()

Example #15

0

Show file

File: xml_etree_elementtree.py Project: jelly/bandit

import xml.etree.ElementTree as badET
import defusedxml.ElementTree as goodET

xmlString = "<note>\n<to>Tove</to>\n<from>Jani</from>\n<heading>Reminder</heading>\n<body>Don't forget me this weekend!</body>\n</note>"

# unsafe
tree = badET.fromstring(xmlString)
print tree
badET.parse('filethatdoesntexist.xml')
badET.iterparse('filethatdoesntexist.xml')
a = badET.XMLParser()

# safe
tree = goodET.fromstring(xmlString)
print tree
goodET.parse('filethatdoesntexist.xml')
goodET.iterparse('filethatdoesntexist.xml')
a = goodET.XMLParser()

Example #16

0

Show file

File: xml_etree_elementtree.py Project: PyCQA/bandit

import xml.etree.ElementTree as badET
import defusedxml.ElementTree as goodET

xmlString = "<note>\n<to>Tove</to>\n<from>Jani</from>\n<heading>Reminder</heading>\n<body>Don't forget me this weekend!</body>\n</note>"

# unsafe
tree = badET.fromstring(xmlString)
print(tree)
badET.parse('filethatdoesntexist.xml')
badET.iterparse('filethatdoesntexist.xml')
a = badET.XMLParser()

# safe
tree = goodET.fromstring(xmlString)
print(tree)
goodET.parse('filethatdoesntexist.xml')
goodET.iterparse('filethatdoesntexist.xml')
a = goodET.XMLParser()