Example #1
0
    def build_etree(self, xml_string):
        try:
            etree = ET.fromstring(self.re_encode_string(xml_string))
        except ET.ParseError as e:
            logger = RequiredFeature('logger').request()
            logger.error("Building ETree from XML failed: %s. Offending string follows ..." % e.message)
            logger.error(xml_string)
            raise ValueError("Building ETree Failed")

        return etree
Example #2
0
    def build_etree(self, xml_string):
        try:
            etree = ET.fromstring(self.re_encode_string(xml_string))
        except ET.ParseError as e:
            logger = RequiredFeature('logger').request()
            logger.error(
                "Building ETree from XML failed: %s. Offending string follows ..."
                % e.message)
            logger.error(xml_string)
            raise ValueError("Building ETree Failed")

        return etree
Example #3
0
    def re_encode_string(self, xml_string):
        logger = RequiredFeature('logger').request()
        regex = re.compile('UTF-\d{1,2}')

        specified_encoding = regex.search(xml_string)

        logger.info("Trying to decode as: %s" % 'ASCII')
        try:
            xml_string = xml_string.decode(encoding='ascii')
        except UnicodeDecodeError as e:
            logger.info("Decoding as %s failed, trying as %s" %
                        ('ASCII', 'UTF-8'))
            try:
                xml_string = xml_string.decode(encoding='UTF-8')
            except UnicodeDecodeError as e:
                logger.info("Decoding as %s failed, trying as %s" %
                            ('UTF-8', 'UTF-16'))
                try:
                    xml_string = xml_string.decode(encoding='UTF-16')
                except UnicodeDecodeError as e:
                    logger.error(
                        "Decoding as UTF-16 failed, this was the last attempt. Offending string follows ..."
                    )
                    logger.error(xml_string)
                    raise ValueError("String Decode Failed")

        if specified_encoding is not None:
            try:
                logger.info("Trying to encode as specified in XML: %s" %
                            specified_encoding.group(0))
                xml_string = xml_string.encode(
                    encoding=specified_encoding.group(0))
            except UnicodeEncodeError as e:
                new_encode_setting = 'UTF-16' if specified_encoding.group(
                    0) == 'UTF-8' else 'UTF-8'
                logger.info("Encoding as %s failed, trying as %s" %
                            (specified_encoding.group(0), new_encode_setting))
                try:
                    xml_string = xml_string.encode(encoding=new_encode_setting)
                except UnicodeEncodeError as e:
                    logger.error(
                        "Encoding as %s failed, this was the last attempt. Offending string follows ..."
                        % new_encode_setting)
                    logger.error(xml_string)
                    raise ValueError("String Encode Failed")

            return xml_string
        else:
            logger.info("RegExp couldn't find a match in the XML string ...")
            try:
                logger.info("Trying to encode as: UTF-8")
                xml_string = xml_string.encode(encoding='UTF-8')
            except UnicodeEncodeError as e:
                logger.info("Encoding as UTF-8 failed, trying as UTF-16")
                try:
                    xml_string = xml_string.encode(encoding='UTF-16')
                except UnicodeEncodeError as e:
                    logger.error(
                        "Encoding as UTF-16 failed, this was the last attempt. Offending string follows ..."
                    )
                    logger.error(xml_string)
                    raise ValueError("String Encode Failed")

            return xml_string
Example #4
0
    def re_encode_string(self, xml_string):
        logger = RequiredFeature('logger').request()
        regex = re.compile('UTF-\d{1,2}')

        specified_encoding = regex.search(xml_string)

        logger.info("Trying to decode as: %s" % 'ASCII')
        try:
            xml_string = xml_string.decode(encoding='ascii')
        except UnicodeDecodeError as e:
            logger.info("Decoding as %s failed, trying as %s" % ('ASCII', 'UTF-8'))
            try:
                xml_string = xml_string.decode(encoding='UTF-8')
            except UnicodeDecodeError as e:
                logger.info("Decoding as %s failed, trying as %s" % ('UTF-8', 'UTF-16'))
                try:
                    xml_string = xml_string.decode(encoding='UTF-16')
                except UnicodeDecodeError as e:
                    logger.error("Decoding as UTF-16 failed, this was the last attempt. Offending string follows ...")
                    logger.error(xml_string)
                    raise ValueError("String Decode Failed")

        if specified_encoding is not None:
            try:
                logger.info("Trying to encode as specified in XML: %s" % specified_encoding.group(0))
                xml_string = xml_string.encode(encoding=specified_encoding.group(0))
            except UnicodeEncodeError as e:
                new_encode_setting = 'UTF-16' if specified_encoding.group(0) == 'UTF-8' else 'UTF-8'
                logger.info("Encoding as %s failed, trying as %s" % (specified_encoding.group(0), new_encode_setting))
                try:
                    xml_string = xml_string.encode(encoding=new_encode_setting)
                except UnicodeEncodeError as e:
                    logger.error(
                        "Encoding as %s failed, this was the last attempt. Offending string follows ..." %
                        new_encode_setting)
                    logger.error(xml_string)
                    raise ValueError("String Encode Failed")

            return xml_string
        else:
            logger.info("RegExp couldn't find a match in the XML string ...")
            try:
                logger.info("Trying to encode as: UTF-8")
                xml_string = xml_string.encode(encoding='UTF-8')
            except UnicodeEncodeError as e:
                logger.info("Encoding as UTF-8 failed, trying as UTF-16")
                try:
                    xml_string = xml_string.encode(encoding='UTF-16')
                except UnicodeEncodeError as e:
                    logger.error("Encoding as UTF-16 failed, this was the last attempt. Offending string follows ...")
                    logger.error(xml_string)
                    raise ValueError("String Encode Failed")

            return xml_string