Python parseXML Exemples, xml.etree.ElementTree.parseXML Python Exemples

Exemple #1

0

Afficher le fichier

def getHistory():
    history = []
    # Load data from the xml file (using the GPaste CLI would be way too slow)
    for child in parseXML(dataFile).getroot():
        # Ignore non-text entries
        if child.attrib['kind'] == 'Text':
            # Encode and replace &gt; with > since etree fails to do so with GPastes invalid(?) XML 1.0-format
            history.append(child.getchildren()[0].text.encode('utf-8').replace(
                '&gt;', '>'))
    return history

Exemple #2

0

Afficher le fichier

Fichier : upgrade.py Projet : gingerkaan/serveros

def upgradeResourcesXML(resourcesFilePath):
    """
    Convert the old XML format to the twext.who.xml format

    @param resourcesFilePath: the file to convert
    @type resourcesFilePath: L{FilePath}
    """
    try:
        with resourcesFilePath.open() as fh:
            try:
                etree = parseXML(fh)
            except XMLParseError:
                log.error("Cannot parse {path}", path=resourcesFilePath.path)
                return
    except (OSError, IOError):
        # Can't read the file
        log.error("Cannot read {path}", path=resourcesFilePath.path)
        return

    accountsNode = etree.getroot()
    if accountsNode.tag != "accounts":
        return

    tagMap = {
        "uid": ("short-name",),
        "guid": ("guid", "uid"),
        "name": ("full-name",),
    }
    log.info("Converting resources.xml")
    directoryNode = XMLElement("directory")
    directoryNode.set("realm", accountsNode.get("realm"))
    for sourceNode in accountsNode:
        recordType = sourceNode.tag
        destNode = XMLElement("record")
        destNode.set("type", recordType)
        for sourceFieldNode in sourceNode:
            tags = tagMap.get(sourceFieldNode.tag, None)
            if tags:
                for tag in tags:
                    destFieldNode = XMLElement(tag)
                    value = sourceFieldNode.text
                    try:
                        # Normalize UUID values to uppercase
                        value = str(uuid.UUID(value)).upper()
                    except ValueError:
                        pass
                    destFieldNode.text = value
                    destNode.append(destFieldNode)

        directoryNode.append(destNode)

    resourcesFilePath.setContent(etreeToString(directoryNode, "utf-8"))

Exemple #3

0

Afficher le fichier

Fichier : podcast.py Projet : User65k/wake_up_light_interface

def getLastRssFeedEntry(uri):
    r = get(uri)
    if r.status_code != 200:
        logging.info("Not a successfull request.")
        return None

    root = parseXML(r.text)
    # rss > channel[0] > item[0] > enclosure[]
    for item in root.find('channel').find('item').findall('enclosure'):
        if item.get('type') == 'audio/mp3':
            return item.get('url')

    logging.info("No mp3 enclosure found")
    return None

Exemple #4

0

Afficher le fichier

Fichier : upgrade.py Projet : nunb/calendarserver

def upgradeResourcesXML(resourcesFilePath):
    """
    Convert the old XML format to the twext.who.xml format

    @param resourcesFilePath: the file to convert
    @type resourcesFilePath: L{FilePath}
    """
    try:
        with resourcesFilePath.open() as fh:
            try:
                etree = parseXML(fh)
            except XMLParseError:
                log.error("Cannot parse {path}", path=resourcesFilePath.path)
                return
    except (OSError, IOError):
        # Can't read the file
        log.error("Cannot read {path}", path=resourcesFilePath.path)
        return

    accountsNode = etree.getroot()
    if accountsNode.tag != "accounts":
        return

    tagMap = {
        "uid": ("short-name",),
        "guid": ("guid", "uid"),
        "name": ("full-name",),
    }
    log.info("Converting resources.xml")
    directoryNode = XMLElement("directory")
    directoryNode.set("realm", accountsNode.get("realm"))
    for sourceNode in accountsNode:
        recordType = sourceNode.tag
        destNode = XMLElement("record")
        destNode.set("type", recordType)
        for sourceFieldNode in sourceNode:
            tags = tagMap.get(sourceFieldNode.tag, None)
            if tags:
                for tag in tags:
                    destFieldNode = XMLElement(tag)
                    destFieldNode.text = sourceFieldNode.text
                    destNode.append(destFieldNode)

        directoryNode.append(destNode)

    resourcesFilePath.setContent(etreeToString(directoryNode, "utf-8"))

Exemple #5

0

Afficher le fichier

Fichier : xml.py Projet : anemitz/calendarserver

    def loadRecords(self, loadNow=False, stat=True):
        """
        Load records from L{self.filePath}.

        Does nothing if a successful refresh has happened within the
        last L{self.refreshInterval} seconds.

        @param loadNow: If true, load now (ignoring
            L{self.refreshInterval})
        @type loadNow: L{type}

        @param stat: If true, check file metadata and don't reload if
            unchanged.
        @type loadNow: L{type}
        """
        #
        # Punt if we've read the file recently
        #
        now = time()
        if not loadNow and now - self._lastRefresh <= self.refreshInterval:
            return

        #
        # Punt if we've read the file and it's still the same.
        #
        if stat:
            self.filePath.restat()
            cacheTag = (self.filePath.getModificationTime(), self.filePath.getsize())
            if cacheTag == self._cacheTag:
                return
        else:
            cacheTag = None

        #
        # Open and parse the file
        #
        try:
            fh = self.filePath.open()

            try:
                etree = parseXML(fh)
            except XMLParseError as e:
                raise ParseError(e)
        finally:
            fh.close()

        #
        # Pull data from DOM
        #
        directoryNode = etree.getroot()
        if directoryNode.tag != self.element.directory.value:
            raise ParseError("Incorrect root element: %s" % (directoryNode.tag,))

        realmName = directoryNode.get(self.attribute.realm.value, "").encode("utf-8")

        if not realmName:
            raise ParseError("No realm name.")

        unknownRecordTypes   = set()
        unknownFieldElements = set()

        records = set()

        for recordNode in directoryNode:
            try:
                records.add(self.parseRecordNode(recordNode, unknownFieldElements))
            except UnknownRecordTypeError as e:
                unknownRecordTypes.add(e.token)

        #
        # Store results
        #

        index = {}

        for fieldName in self.indexedFields:
            index[fieldName] = {}

        for record in records:
            for fieldName in self.indexedFields:
                values = record.fields.get(fieldName, None)

                if values is not None:
                    if not BaseFieldName.isMultiValue(fieldName):
                        values = (values,)

                    for value in values:
                        index[fieldName].setdefault(value, set()).add(record)

        self._realmName = realmName

        self._unknownRecordTypes   = unknownRecordTypes
        self._unknownFieldElements = unknownFieldElements

        self._cacheTag = cacheTag
        self._lastRefresh = now

        self.index = index

        return etree

Exemple #6

0

Afficher le fichier

Fichier : txt2omr.py Projet : SaifBoras/openMSX

def replaceEvents(inp, out, inputEvents):
	doc = parseXML(inp)

	# Set the serialization date to now.
	rootElem = doc.getroot()
	rootElem.attrib['date_time'] = \
			datetime.now().strftime('%a %b %d %H:%M:%S %Y')
	rootElem.attrib['openmsx_version'] = 'txt2omr'
	rootElem.attrib['platform'] = platform.system().lower()

	# Remove snapshots except the one at timestamp 0.
	snapshots = doc.find('replay/snapshots')
	if snapshots is None:
		print('Base replay lacks snapshots', file=stderr)
	else:
		seenInitialSnapshot = False
		for snapshot in snapshots.findall('item'):
			timeElem = snapshot.find('scheduler/currentTime/time')
			time = int(timeElem.text)
			if time == 0:
				seenInitialSnapshot = True
			else:
				snapshots.remove(snapshot)
		if not seenInitialSnapshot:
			print('No snapshot found with timestamp 0', file=stderr)

	# Replace event log.
	eventsElem = doc.find('replay/events')
	if eventsElem is None:
		print('No events tag found; cannot insert events', file=stderr)
	else:
		tail = eventsElem.tail
		eventsElem.clear()
		eventsElem.text = '\n'
		eventsElem.tail = tail

		# IDs must be unique for the entire document. We look for the highest
		# in-use ID and generate new IDs counting up from there.
		baseID = max(
			int(elem.attrib['id'])
			for elem in doc.iterfind('.//*[@id]')
			) + 1

		def createEvent(i, time):
			itemElem = SubElement(eventsElem, 'item',
					id=str(baseID + i), type='KeyMatrixState')
			itemElem.tail = '\n'
			stateChangeElem = SubElement(itemElem, 'StateChange')
			timeElem1 = SubElement(stateChangeElem, 'time')
			timeElem2 = SubElement(timeElem1, 'time')
			timeElem2.text = str(time)
			return itemElem

		for i, (time, row, press, release) in enumerate(inputEvents):
			itemElem = createEvent(i, time)
			SubElement(itemElem, 'row').text = str(row)
			SubElement(itemElem, 'press').text = str(press)
			SubElement(itemElem, 'release').text = str(release)
		endTime = inputEvents[-1][0] if inputEvents else 0
		createEvent(len(inputEvents), endTime).attrib['type'] = 'EndLog'

	# Reset re-record count.
	reRecordCount = doc.find('replay/reRecordCount')
	if reRecordCount is not None:
		reRecordCount.text = '0'

	# Reset the current time.
	currentTime = doc.find('replay/currentTime/time')
	if currentTime is not None:
		currentTime.text = '0'

	out.write(b'<?xml version="1.0" ?>\n')
	out.write(b"<!DOCTYPE openmsx-serialize SYSTEM 'openmsx-serialize.dtd'>\n")
	doc.write(out, encoding='utf-8', xml_declaration=False)

Exemple #7

0

Afficher le fichier

def replaceEvents(inp, out, inputEvents):
    doc = parseXML(inp)

    # Set the serialization date to now.
    rootElem = doc.getroot()
    rootElem.attrib['date_time'] = \
      datetime.now().strftime('%a %b %d %H:%M:%S %Y')
    rootElem.attrib['openmsx_version'] = 'txt2omr'
    rootElem.attrib['platform'] = platform.system().lower()

    # Remove snapshots except the one at timestamp 0.
    snapshots = doc.find('replay/snapshots')
    if snapshots is None:
        print('Base replay lacks snapshots', file=stderr)
    else:
        seenInitialSnapshot = False
        for snapshot in snapshots.findall('item'):
            timeElem = snapshot.find('scheduler/currentTime/time')
            time = int(timeElem.text)
            if time == 0:
                seenInitialSnapshot = True
            else:
                snapshots.remove(snapshot)
        if not seenInitialSnapshot:
            print('No snapshot found with timestamp 0', file=stderr)

    # Replace event log.
    eventsElem = doc.find('replay/events')
    if eventsElem is None:
        print('No events tag found; cannot insert events', file=stderr)
    else:
        tail = eventsElem.tail
        eventsElem.clear()
        eventsElem.text = '\n'
        eventsElem.tail = tail

        # IDs must be unique for the entire document. We look for the highest
        # in-use ID and generate new IDs counting up from there.
        baseID = max(
            int(elem.attrib['id']) for elem in doc.iterfind('.//*[@id]')) + 1

        def createEvent(i, time):
            itemElem = SubElement(eventsElem,
                                  'item',
                                  id=str(baseID + i),
                                  type='KeyMatrixState')
            itemElem.tail = '\n'
            stateChangeElem = SubElement(itemElem, 'StateChange')
            timeElem1 = SubElement(stateChangeElem, 'time')
            timeElem2 = SubElement(timeElem1, 'time')
            timeElem2.text = str(time)
            return itemElem

        for i, (time, row, press, release) in enumerate(inputEvents):
            itemElem = createEvent(i, time)
            SubElement(itemElem, 'row').text = str(row)
            SubElement(itemElem, 'press').text = str(press)
            SubElement(itemElem, 'release').text = str(release)
        endTime = inputEvents[-1][0] if inputEvents else 0
        createEvent(len(inputEvents), endTime).attrib['type'] = 'EndLog'

    # Reset re-record count.
    reRecordCount = doc.find('replay/reRecordCount')
    if reRecordCount is not None:
        reRecordCount.text = '0'

    # Reset the current time.
    currentTime = doc.find('replay/currentTime/time')
    if currentTime is not None:
        currentTime.text = '0'

    out.write(b'<?xml version="1.0" ?>\n')
    out.write(b"<!DOCTYPE openmsx-serialize SYSTEM 'openmsx-serialize.dtd'>\n")
    doc.write(out, encoding='utf-8', xml_declaration=False)

Exemple #8

0

Afficher le fichier

Fichier : xml.py Projet : gingerkaan/serveros

    def loadRecords(self, loadNow=False, stat=True):
        """
        Load records from L{self.filePath}.

        Does nothing if a successful refresh has happened within the
        last L{self.refreshInterval} seconds.

        @param loadNow: If true, load now (ignoring L{self.refreshInterval})
        @type loadNow: L{type}

        @param stat: If true, check file metadata and don't reload if
            unchanged.
        @type loadNow: L{type}
        """
        #
        # Punt if we've read the file recently
        #
        now = time()
        if not loadNow and now - self._lastRefresh <= self.refreshInterval:
            return

        #
        # Punt if we've read the file and it's still the same.
        #
        if stat:
            try:
                self.filePath.restat()
            except (OSError, IOError):
                # Can't read the file
                self.flush()
                raise DirectoryAvailabilityError(
                    "Can't open {}".format(self.filePath)
                )

            cacheTag = (
                self.filePath.getModificationTime(),
                self.filePath.getsize()
            )
            if cacheTag == self._cacheTag:
                return
        else:
            cacheTag = None

        #
        # Open and parse the file
        #
        try:
            with self.filePath.open() as fh:
                lines = fh.read().splitlines()
                self.filePreamble = "\n".join(itertools.takewhile(lambda x: not x.startswith("<directory"), lines))

            with self.filePath.open() as fh:
                try:
                    etree = parseXML(fh)
                except XMLParseError as e:
                    raise ParseError(e)
        except (OSError, IOError):
            # Can't read the file
            self.flush()
            raise DirectoryAvailabilityError(
                "Can't open {}".format(self.filePath)
            )

        #
        # Pull data from DOM
        #
        directoryNode = etree.getroot()
        if directoryNode.tag != self.element.directory.value:
            raise ParseError(
                "Incorrect root element: {0}".format(directoryNode.tag)
            )

        realmName = unicode(directoryNode.get(
            self.attribute.realm.value, u""
        ))

        if not realmName:
            raise ParseError("No realm name.")

        unknownRecordTypes = set()
        unknownFieldElements = set()

        records = set()

        for recordNode in directoryNode:
            try:
                records.add(
                    self.parseRecordNode(recordNode, unknownFieldElements)
                )
            except UnknownRecordTypeError as e:
                unknownRecordTypes.add(e.token)

        #
        # Store results
        #

        self.flush()
        self.indexRecords(records)

        self._realmName = realmName

        self._unknownRecordTypes = unknownRecordTypes
        self._unknownFieldElements = unknownFieldElements

        self._cacheTag = cacheTag
        self._lastRefresh = now

        return etree