def _GetSheetRows(self, filename): """Parses the contents of the first sheet of an XLSX document. Args: filename (str): The file path of the XLSX document to parse. Returns: list[list[str]]: A list of lists representing the rows of the first sheet. Raises: ValueError: if the sheet cannot be found, or a string cannot be read. """ rows = [] with zipfile.ZipFile(filename) as zip_file: if self._SHEET1 not in zip_file.namelist(): # Fail if we cannot find the expected first sheet. raise ValueError( 'Unable to locate expected sheet: {0:s}'.format( self._SHEET1)) # Generate a reference table of shared strings if available. strings = [] if self._SHARED_STRINGS in zip_file.namelist(): with zip_file.open(self._SHARED_STRINGS) as zip_file_object: for _, element in ElementTree.iterparse(zip_file_object): if element.tag.endswith(self._SHARED_STRING_TAG): strings.append(element.text) row = [] value = '' with zip_file.open(self._SHEET1) as zip_file_object: for _, element in ElementTree.iterparse(zip_file_object): if (element.tag.endswith(self._VALUE_STRING_TAG) or element.tag.endswith(self._SHARED_STRING_TAG)): value = element.text if element.tag.endswith(self._COLUMN_TAG): # Grab value from shared string reference table if type shared # string. if strings and element.attrib.get( self._TYPE_ATTRIBUTE ) == self._SHARED_STRING_TYPE: try: value = strings[int(value)] except (IndexError, ValueError): raise ValueError( 'Unable to successfully dereference shared string.' ) row.append(value) # If we see the end tag of the row, record row in rows and reset. if element.tag.endswith(self._ROW_TAG): rows.append(row) row = [] return rows
def is_svg(self, f): """ Check if provided file is svg """ # When is the temporary_file_path f_is_path = isinstance(f, str) if f_is_path: fio = open(f, 'rb') else: fio = f fio.seek(0) tag = None try: for event, el in ElementTree.iterparse(fio, ('start',)): tag = el.tag break except ElementTree.ParseError: pass if f_is_path: fio.close() return tag == '{http://www.w3.org/2000/svg}svg'
def detect(cls, context: FormatDetectionContext) -> None: annot_paths = context.require_files('**/*.xml') for annot_path in annot_paths: with context.probe_text_file( annot_path, "must be a LabelMe annotation file", ) as f: elem_parents = [] for event, elem in ElementTree.iterparse(f, events=('start', 'end')): if event == 'start': if elem_parents == [] and elem.tag != 'annotation': raise Exception if elem_parents == ['annotation', 'object'] \ and elem.tag in {'polygon', 'segm'}: return elem_parents.append(elem.tag) elif event == 'end': elem_parents.pop() if elem_parents == ['annotation'] and elem.tag == 'object': # If we got here, then we found an object with no # polygon and no mask, so it's probably the wrong # format. raise Exception
def detect(cls, context: FormatDetectionContext) -> None: annot_file = context.require_file('*.xml') with context.probe_text_file( annot_file, "must be an XML file with an \"annotations\" root element", ) as f: _, root_elem = next(ElementTree.iterparse(f, events=('start', ))) if root_elem.tag != 'annotations': raise Exception
def read_keymap(filename): ret = [] with open(filename, 'r') as xml: tree = ET.iterparse(xml) for _, keymap in tree: for context in keymap: for device in context: for mapping in device: key = mapping.get('id') or mapping.tag action = mapping.text if action: ret.append((context.tag.lower(), action.lower(), key.lower())) return ret
def is_svg(self, f): """ Check if provided file is svg """ f.seek(0) tag = None try: for event, el in ElementTree.iterparse(f, ('start', )): tag = el.tag break except ElementTree.ParseError: pass return tag == '{http://www.w3.org/2000/svg}svg'
def _iter_remote_project_links(self, response): source = StringIO() # wrap in one outer tag for ElementTree to be happy (HTML vs XML) source.write('<html>') source.write(response.text) source.write('</html>') source.seek(0) try: for event, elem in ElementTree.iterparse(source): if elem.tag != 'a' or 'href' not in elem.attrib: continue yield elem except ElementTree.ParseError: threadlog.exception("Error parsing remote project list")
def detect(cls, context: FormatDetectionContext) -> None: annot_file = context.require_file('*.xml') with context.probe_text_file( annot_file, "must be a KITTI-like annotation file", ) as f: parser = ET.iterparse(f, events=('start',)) _, elem = next(parser) if elem.tag != 'boost_serialization': raise Exception _, elem = next(parser) if elem.tag != 'tracklets': raise Exception
def _get_subsets_from_anno(path): context = ElementTree.iterparse(path, events=("start", "end")) context = iter(context) for ev, el in context: if ev == 'start': if el.tag == 'subsets': if el.text is not None: subsets = el.text.split('\n') return subsets if ev == 'end': if el.tag == 'meta': return [DEFAULT_SUBSET_NAME] el.clear() return [DEFAULT_SUBSET_NAME]
while True: outfilename = "{}-{:03d}.txt".format(args.xml, cnt) if not os.path.exists(outfilename): break else: cnt += 1 tsvOut = open(outfilename, "w") tsvOut.write(args.speparator.join(allfields)) tsvOut.write("\r\n") tsvOut.flush() tagstack = [] index = 0 for event, node in DET.iterparse(args.xml, parser=parser, events=["start", "end"]): if event == 'start': tagstack.append(node.tag) continue # print(event, node.tag) if args.tagdepth == len(tagstack): # write tsvOut.write(args.speparator.join(buflistGlobal)) tsvOut.write("\r\n") index += 1 if index % 2048 == 0: print("\rrunning {} {}".format(index, tagstack[-1]).ljust(64), end="")
def _parse(cls, path): tracks = [] track = None shape = None attr = None labels = {} point_tags = {'tx', 'ty', 'tz', 'rx', 'ry', 'rz'} # Can fail with "XML declaration not well-formed" on documents with # <?xml ... standalone="true"?> # ^^^^ # (like the original Kitti dataset), while # <?xml ... standalone="yes"?> # ^^^ # works. tree = ET.iterparse(path, events=("start", "end")) for ev, elem in tree: if ev == "start": if elem.tag == 'item': if track is None: track = { 'shapes': [], 'scale': {}, 'label': None, 'attributes': {}, 'start_frame': None, 'length': None, } else: shape = { 'points': {}, 'attributes': {}, 'occluded': None, 'occluded_kf': False, 'truncated': None, } elif elem.tag == 'attribute': attr = {} elif ev == "end": if elem.tag == 'item': assert track is not None if shape: track['shapes'].append(shape) shape = None else: assert track['length'] == len(track['shapes']) if track['label']: labels.setdefault(track['label'], set()) for a in track['attributes']: labels[track['label']].add(a) for s in track['shapes']: for a in s['attributes']: labels[track['label']].add(a) tracks.append(track) track = None # track tags elif track and elem.tag == 'objectType': track['label'] = elem.text elif track and elem.tag in {'h', 'w', 'l'}: track['scale'][elem.tag] = float(elem.text) elif track and elem.tag == 'first_frame': track['start_frame'] = int(elem.text) elif track and elem.tag == 'count' and track: track['length'] = int(elem.text) # pose tags elif shape and elem.tag in point_tags: shape['points'][elem.tag] = float(elem.text) elif shape and elem.tag == 'occlusion': shape['occluded'] = OcclusionStates(int(elem.text)) elif shape and elem.tag == 'occlusion_kf': shape['occluded_kf'] = elem.text == '1' elif shape and elem.tag == 'truncation': shape['truncated'] = TruncationStates(int(elem.text)) # common tags elif attr is not None and elem.tag == 'name': if not elem.text: raise ValueError("Attribute name can't be empty") attr['name'] = elem.text elif attr is not None and elem.tag == 'value': attr['value'] = elem.text or '' elif attr is not None and elem.tag == 'attribute': if shape: shape['attributes'][attr['name']] = attr['value'] else: track['attributes'][attr['name']] = attr['value'] attr = None if track is not None or shape is not None or attr is not None: raise Exception("Failed to parse anotations from '%s'" % path) special_attrs = KittiRawPath.SPECIAL_ATTRS common_attrs = ['occluded'] if has_meta_file(path): categories = { AnnotationType.label: LabelCategories. from_iterable(parse_meta_file(path).keys()) } else: label_cat = LabelCategories(attributes=common_attrs) for label, attrs in sorted(labels.items(), key=lambda e: e[0]): label_cat.add(label, attributes=set(attrs) - special_attrs) categories = {AnnotationType.label: label_cat} items = {} for idx, track in enumerate(tracks): track_id = idx + 1 for i, ann in enumerate( cls._parse_track(track_id, track, categories)): frame_desc = items.setdefault(track['start_frame'] + i, {'annotations': []}) frame_desc['annotations'].append(ann) return items, categories
def load(file_object, annotations): from defusedxml import ElementTree context = ElementTree.iterparse(file_object, events=("start", "end")) context = iter(context) ev, _ = next(context) supported_shapes = ('box', 'polygon', 'polyline', 'points', 'cuboid') track = None shape = None tag = None image_is_opened = False attributes = None for ev, el in context: if ev == 'start': if el.tag == 'track': track = annotations.Track( label=el.attrib['label'], group=int(el.attrib.get('group_id', 0)), source=el.attrib.get('source', 'manual'), shapes=[], ) elif el.tag == 'image': image_is_opened = True frame_id = annotations.abs_frame_id( match_dm_item(DatasetItem( id=osp.splitext(el.attrib['name'])[0], attributes={'frame': el.attrib['id']}, image=el.attrib['name']), task_data=annotations)) elif el.tag in supported_shapes and (track is not None or image_is_opened): attributes = [] shape = { 'attributes': attributes, 'points': [], } elif el.tag == 'tag' and image_is_opened: attributes = [] tag = { 'frame': frame_id, 'label': el.attrib['label'], 'group': int(el.attrib.get('group_id', 0)), 'attributes': attributes, 'source': str(el.attrib.get('source', 'manual')) } elif ev == 'end': if el.tag == 'attribute' and attributes is not None: attributes.append( annotations.Attribute( name=el.attrib['name'], value=el.text or "", )) if el.tag in supported_shapes: if track is not None: shape['frame'] = el.attrib['frame'] shape['outside'] = el.attrib['outside'] == "1" shape['keyframe'] = el.attrib['keyframe'] == "1" else: shape['frame'] = frame_id shape['label'] = el.attrib['label'] shape['group'] = int(el.attrib.get('group_id', 0)) shape['source'] = str(el.attrib.get('source', 'manual')) shape['type'] = 'rectangle' if el.tag == 'box' else el.tag shape['occluded'] = el.attrib['occluded'] == '1' shape['z_order'] = int(el.attrib.get('z_order', 0)) if el.tag == 'box': shape['points'].append(el.attrib['xtl']) shape['points'].append(el.attrib['ytl']) shape['points'].append(el.attrib['xbr']) shape['points'].append(el.attrib['ybr']) elif el.tag == 'cuboid': shape['points'].append(el.attrib['xtl1']) shape['points'].append(el.attrib['ytl1']) shape['points'].append(el.attrib['xbl1']) shape['points'].append(el.attrib['ybl1']) shape['points'].append(el.attrib['xtr1']) shape['points'].append(el.attrib['ytr1']) shape['points'].append(el.attrib['xbr1']) shape['points'].append(el.attrib['ybr1']) shape['points'].append(el.attrib['xtl2']) shape['points'].append(el.attrib['ytl2']) shape['points'].append(el.attrib['xbl2']) shape['points'].append(el.attrib['ybl2']) shape['points'].append(el.attrib['xtr2']) shape['points'].append(el.attrib['ytr2']) shape['points'].append(el.attrib['xbr2']) shape['points'].append(el.attrib['ybr2']) else: for pair in el.attrib['points'].split(';'): shape['points'].extend(map(float, pair.split(','))) if track is not None: if shape["keyframe"]: track.shapes.append(annotations.TrackedShape(**shape)) else: annotations.add_shape(annotations.LabeledShape(**shape)) shape = None elif el.tag == 'track': annotations.add_track(track) track = None elif el.tag == 'image': image_is_opened = False elif el.tag == 'tag': annotations.add_tag(annotations.Tag(**tag)) tag = None el.clear()
def _parse(cls, path): context = ElementTree.iterparse(path, events=("start", "end")) context = iter(context) categories, frame_size = cls._parse_meta(context) items = OrderedDict() track = None shape = None tag = None attributes = None image = None for ev, el in context: if ev == 'start': if el.tag == 'track': track = { 'id': el.attrib['id'], 'label': el.attrib.get('label'), 'group': int(el.attrib.get('group_id', 0)), 'height': frame_size[0], 'width': frame_size[1], } elif el.tag == 'image': image = { 'name': el.attrib.get('name'), 'frame': el.attrib['id'], 'width': el.attrib.get('width'), 'height': el.attrib.get('height'), } elif el.tag in cls._SUPPORTED_SHAPES and (track or image): attributes = {} shape = { 'type': None, 'attributes': attributes, } if track: shape.update(track) shape['track_id'] = int(track['id']) if image: shape.update(image) elif el.tag == 'tag' and image: attributes = {} tag = { 'frame': image['frame'], 'attributes': attributes, 'group': int(el.attrib.get('group_id', 0)), 'label': el.attrib['label'], } elif ev == 'end': if el.tag == 'attribute' and attributes is not None: attr_value = el.text if el.text in ['true', 'false']: attr_value = attr_value == 'true' else: try: attr_value = float(attr_value) except ValueError: pass attributes[el.attrib['name']] = attr_value elif el.tag in cls._SUPPORTED_SHAPES: if track is not None: shape['frame'] = el.attrib['frame'] shape['outside'] = (el.attrib.get('outside') == '1') shape['keyframe'] = (el.attrib.get('keyframe') == '1') if image is not None: shape['label'] = el.attrib.get('label') shape['group'] = int(el.attrib.get('group_id', 0)) shape['type'] = el.tag shape['occluded'] = (el.attrib.get('occluded') == '1') shape['z_order'] = int(el.attrib.get('z_order', 0)) if el.tag == 'box': shape['points'] = list(map(float, [ el.attrib['xtl'], el.attrib['ytl'], el.attrib['xbr'], el.attrib['ybr'], ])) else: shape['points'] = [] for pair in el.attrib['points'].split(';'): shape['points'].extend(map(float, pair.split(','))) frame_desc = items.get(shape['frame'], {'annotations': []}) frame_desc['annotations'].append( cls._parse_shape_ann(shape, categories)) items[shape['frame']] = frame_desc shape = None elif el.tag == 'tag': frame_desc = items.get(tag['frame'], {'annotations': []}) frame_desc['annotations'].append( cls._parse_tag_ann(tag, categories)) items[tag['frame']] = frame_desc tag = None elif el.tag == 'track': track = None elif el.tag == 'image': frame_desc = items.get(image['frame'], {'annotations': []}) frame_desc.update({ 'name': image.get('name'), 'height': image.get('height'), 'width': image.get('width'), }) items[image['frame']] = frame_desc image = None el.clear() return items, categories
# -*- coding: utf-8 -*- import xml.etree.ElementTree as badET import defusedxml.ElementTree as goodET xmlString = "<note>\n<to>Tove</to>\n<from>Jani</from>\n<heading>Reminder</heading>\n<body>Don't forget me this weekend!</body>\n</note>" # unsafe tree = badET.fromstring(xmlString) print(tree) badET.parse("filethatdoesntexist.xml") badET.iterparse("filethatdoesntexist.xml") a = badET.XMLParser() # safe tree = goodET.fromstring(xmlString) print(tree) goodET.parse("filethatdoesntexist.xml") goodET.iterparse("filethatdoesntexist.xml") a = goodET.XMLParser()
import xml.etree.ElementTree as badET import defusedxml.ElementTree as goodET xmlString = "<note>\n<to>Tove</to>\n<from>Jani</from>\n<heading>Reminder</heading>\n<body>Don't forget me this weekend!</body>\n</note>" # unsafe tree = badET.fromstring(xmlString) print tree badET.parse('filethatdoesntexist.xml') badET.iterparse('filethatdoesntexist.xml') a = badET.XMLParser() # safe tree = goodET.fromstring(xmlString) print tree goodET.parse('filethatdoesntexist.xml') goodET.iterparse('filethatdoesntexist.xml') a = goodET.XMLParser()
import xml.etree.ElementTree as badET import defusedxml.ElementTree as goodET xmlString = "<note>\n<to>Tove</to>\n<from>Jani</from>\n<heading>Reminder</heading>\n<body>Don't forget me this weekend!</body>\n</note>" # unsafe tree = badET.fromstring(xmlString) print(tree) badET.parse('filethatdoesntexist.xml') badET.iterparse('filethatdoesntexist.xml') a = badET.XMLParser() # safe tree = goodET.fromstring(xmlString) print(tree) goodET.parse('filethatdoesntexist.xml') goodET.iterparse('filethatdoesntexist.xml') a = goodET.XMLParser()