Exemple #1
0
    def test_validate_xml_against_xsd(self):
        """
        Validate generated DataCite XML for all public records
        """
        from invenio.websearch_model import Collection
        from invenio.bibformat import format_record
        from invenio.bibfield import get_record

        etree.clear_error_log()

        for recid in Collection.query.filter_by(name='zenodo').first().reclist:
            try:
                xml = None
                record = get_record(recid)
                for identifier in record.get('related_identifiers', []):
                    if identifier['scheme'] != identifier['scheme'].lower():
                        raise Exception(
                            "Record %s has problem with upper-case scheme %s" %
                            (recid, identifier['scheme']))
                if record.get('doi', None):
                    xml = StringIO(format_record(recid, 'dcite'))
                    xml_doc = etree.parse(xml)
                    self.schema.assertValid(xml_doc)
            except Exception, e:
                print recid
                if xml:
                    print xml.getvalue()
                raise e
Exemple #2
0
def loadUtr(modelManager): # Build a dictionary of item types that are constrained by the UTR.
    modelManager.utrDict = {} # This attribute is unbound on modelManager until this function is called.
    utrUrl = "http://www.xbrl.org/utr/utr.xml"
    #utrUrl = os.path.join(modelManager.cntlr.configDir, "utr.xml")
    modelManager.cntlr.showStatus(_("Loading Unit Type Registry"))
    try:
        xmldoc = etree.parse(modelManager.cntlr.webCache.getfilename(utrUrl))
        for unitElt in xmldoc.iter(tag="{http://www.xbrl.org/2009/utr}unit"):
            id = unitElt.get("id")
            unitId = unitElt.findtext("{http://www.xbrl.org/2009/utr}unitId")
            nsUnit = unitElt.findtext("{http://www.xbrl.org/2009/utr}nsUnit")
            itemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}itemType")
            nsItemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}nsItemType")
            numeratorItemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}numeratorItemType")
            nsNumeratorItemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}nsNumeratorItemType")
            denominatorItemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}denominatorItemType")
            nsDenominatorItemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}nsDenominatorItemType")
            # TO DO: This indexing scheme assumes that there are no name clashes in item types of the registry.
            if modelManager.utrDict.get(itemType) == None:
                modelManager.utrDict[itemType] = {}
            # a RegEntry is just an array.
            (modelManager.utrDict[itemType])[id] = [unitId, nsUnit # 0,1
                              , nsNumeratorItemType, numeratorItemType # 2,3
                              , nsDenominatorItemType, denominatorItemType # 4,5
                              , nsItemType # 6 often None
                              ]
    except (EnvironmentError,
            etree.LxmlError) as err:
        modelManager.cntlr.addToLog("Unit Type Registry Import error: {0}".format(err))
        etree.clear_error_log()
    def validate(self):
        """
        Validate the provided data file for correctness against the provided
        schema file.

        Returns:
           A flag indicating if the data validates against the schema. 
        """
         
        # clear any previous xml errors
        clear_error_log()
        if self.schemafile is not None:
            try:
                # Attempt parsing the schema file
                schdoc = parse(self.schemafile)
            except XMLSyntaxError, e:
                # The schema was not parsable XML
                logging.warning('The schema XML file could not be parsed.')
                for item in e.error_log:
                    logging.info(item)

                return False

            try:
                theschema = XMLSchema(schdoc)
            except XMLSchemaParseError, e:
                # The schema document is XML, but it's not a schema
                logging.warning('The schema XML file was parsed, but it does not appear to be a valid XML Schema document.')
                for item in e.error_log:
                    logging.info(item)

                return False
Exemple #4
0
def test(xhtml_file: Path, dtd: DTD, schematron: Schematron) -> bool:
    """
    Test that an XHTML file matches a DTD and passes Schematron tests.
    Error messages are printed to stderr if the file doesn't pass.

    :param xhtml_file: the XHTML file to test
    :param dtd: the DTD
    :param schematron: the Schematron
    :return: True if the file passes
    """
    if settings.verbose:
        print(xhtml_file)

    clear_error_log()

    parser = XHTMLParser(dtd_validation=True, ns_clean=True)
    try:
        tree = parse(source=str(xhtml_file), parser=parser)
        html = tree.getroot()
    except IOError as e:
        print(f"{xhtml_file}: {e.strerror}", file=stderr)
        return False
    except XMLSyntaxError:
        print_error_log(parser.error_log)
        return False

    if not dtd.validate(html):
        print_error_log(dtd.error_log)
        return False

    if not schematron.validate(html):
        print_schematron_error_log(html, schematron)
        return False

    return test_links(xhtml_file, html) and test_images(xhtml_file, html)
Exemple #5
0
    def transformIterable(self, result, encoding):
        """Apply the transform if required
        """

        result = self.parseTree(result)
        if result is None:
            return None

        DevelopmentMode = Globals.DevelopmentMode
        runtrace = (DevelopmentMode and self.request.get(
            'diazo.debug', '').lower() in ('1', 'y', 'yes', 't', 'true'))

        try:
            etree.clear_error_log()

            settings = self.getSettings()
            if settings.doctype:
                result.doctype = settings.doctype
                if not result.doctype.endswith('\n'):
                    result.doctype += '\n'

            transform = self.setupTransform(runtrace=runtrace)
            if transform is None:
                return None

            cache = None
            if not DevelopmentMode:
                cache = getCache(settings)

            parameterExpressions = settings.parameterExpressions or {}
            params = prepareThemeParameters(findContext(self.request),
                                            self.request, parameterExpressions,
                                            cache)

            transformed = transform(result.tree, **params)
            error_log = transform.error_log
            if transformed is not None:
                # Transformed worked, swap content with result
                result.tree = transformed
        except etree.LxmlError as e:
            if not (DevelopmentMode):
                raise
            error_log = e.error_log
            runtrace = True

        if runtrace:
            from diazo.runtrace import generate_debug_html
            # Add debug information to end of body
            body = result.tree.xpath('/html/body')[0]
            body.insert(
                -1,
                generate_debug_html(
                    findContext(self.request).portal_url() +
                    '/++resource++diazo-debug',
                    rules=settings.rules,
                    rules_parser=getParser('rules', settings.readNetwork),
                    error_log=error_log,
                ))

        return result
    def test_validate_xml_against_xsd(self):
        """
        Validate generated DataCite XML for all public records
        """
        from invenio.websearch_model import Collection
        from invenio.bibformat import format_record
        from invenio.bibfield import get_record

        etree.clear_error_log()

        for recid in Collection.query.filter_by(name='zenodo').first().reclist:
            try:
                xml = None
                record = get_record(recid)
                for identifier in record.get('related_identifiers', []):
                    if identifier['scheme'] != identifier['scheme'].lower():
                        raise Exception("Record %s has problem with upper-case scheme %s" % (recid, identifier['scheme']))
                if record.get('doi', None):
                    xml = StringIO(format_record(recid, 'dcite'))
                    xml_doc = etree.parse(xml)
                    self.schema.assertValid(xml_doc)
            except Exception, e:
                print recid
                if xml:
                    print xml.getvalue()
                raise e
Exemple #7
0
def loadUtr(modelManager): # Build a dictionary of item types that are constrained by the UTR
    utrItemTypeEntries = defaultdict(dict)
    # print('UTR LOADED FROM '+utrUrl);
    modelManager.cntlr.showStatus(_("Loading Unit Type Registry"))
    file = None
    try:
        from arelle.FileSource import openXmlFileStream
        # normalize any relative paths to config directory
        file = openXmlFileStream(modelManager.cntlr, modelManager.disclosureSystem.utrUrl, stripDeclaration=True)[0]
        xmldoc = etree.parse(file)
        for unitElt in xmldoc.iter(tag="{http://www.xbrl.org/2009/utr}unit"):
            u = UtrEntry()
            u.id = unitElt.get("id")
            u.unitId = unitElt.findtext("{http://www.xbrl.org/2009/utr}unitId")
            u.nsUnit = (unitElt.findtext("{http://www.xbrl.org/2009/utr}nsUnit") or None) # None if empty entry
            u.itemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}itemType")
            u.nsItemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}nsItemType")
            u.numeratorItemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}numeratorItemType")
            u.nsNumeratorItemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}nsNumeratorItemType")
            u.denominatorItemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}denominatorItemType")
            u.nsDenominatorItemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}nsDenominatorItemType")
            u.isSimple = u.numeratorItemType is None and u.denominatorItemType is None
            # TO DO: This indexing scheme assumes that there are no name clashes in item types of the registry.
            (utrItemTypeEntries[u.itemType])[u.id] = u
        modelManager.disclosureSystem.utrItemTypeEntries = utrItemTypeEntries  
    except (EnvironmentError,
            etree.LxmlError) as err:
        modelManager.cntlr.addToLog("Unit Type Registry Import error: {0}".format(err))
        etree.clear_error_log()
    if file:
        file.close()
Exemple #8
0
def trim_xml(
    xml_file_in: str, xml_file_out: str
) -> Tuple[bool, Optional[str], Optional[etree.ElementTree]]:

    # clear global error log for lxml
    etree.clear_error_log()

    parser = etree.XMLParser(remove_blank_text=True)

    try:
        doc = etree.parse(xml_file_in, parser)

    except IOError:
        return (False, 'IO', None)

    except etree.XMLSyntaxError:
        return (False, 'Syntax', None)

    try:
        with open(xml_file_out, 'wb') as f:
            f.write(
                etree.tostring(doc.getroot(),
                               pretty_print=True,
                               encoding='UTF-8'))
        return (True, None, doc)

    except IOError:
        return (False, 'IO', None)
Exemple #9
0
    def validate(self):
        """
        Validate the provided data file for correctness against the provided
        schema file.

        @return: A flag indicating if the data validates against the schema. 
        """

        # clear any previous xml errors
        clear_error_log()
        if self.schemafile is not None:
            try:
                # Attempt parsing the schema file
                schdoc = parse(self.schemafile)
            except XMLSyntaxError, e:
                # The schema was not parsable XML
                logging.warning('The schema XML file could not be parsed.')
                for item in e.error_log:
                    logging.info(item)

                return False

            try:
                theschema = XMLSchema(schdoc)
            except XMLSchemaParseError, e:
                # The schema document is XML, but it's not a schema
                logging.warning(
                    'The schema XML file was parsed, but it does not appear to be a valid XML Schema document.'
                )
                for item in e.error_log:
                    logging.info(item)

                return False
Exemple #10
0
def quickDumpFunctionStereotypeInfo(xmlDocument, xsltDocument,):

    try:
        resultingDoc = executeTransform(xmlDocument, xsltDocument)
        print >> sys.stderr,  et.tostring(resultingDoc)
        if len(xsltDocument.error_log) >0:
            print >> sys.stderr, xsltDocument.error_log
            et.clear_error_log()
    except:
        print "Failed to execute transformation"
        raise

    matches = []
    try:
        matches = resultingDoc.xpath(
            "//src:function[preceding-sibling::*[1][self::src:comment]]",
            namespaces=xmlNamespaces
        )
        print >> sys.stderr, "Number of Functions located: {0}".format(len(matches))

    except:
        print >> sys.stderr,  "Failed to test stereotype data"
        # print "transformed document"
        # print et.tostring(resultingDoc)

        # print "\n\n\nMatches: "
        # for m in matches:
        #     print et.tostring(m)
        raise
Exemple #11
0
    def validate(self, filepath, expected=None):
        logger.debug('Validating syntax of {xml}'.format(xml=filepath))

        etree.clear_error_log()
        started = timezone.now()

        try:
            etree.parse(filepath)
        except etree.XMLSyntaxError as e:
            msg = 'Syntax validation of {xml} failed'.format(xml=filepath)
            logger.exception(msg)
            done = timezone.now()
            validation_objs = []
            for error in e.error_log:
                message = '{line}: {msg}'.format(line=error.line,
                                                 msg=error.message)
                validation_objs.append(
                    Validation(
                        passed=False,
                        validator=self.__class__.__name__,
                        filename=filepath,
                        message=message,
                        time_started=started,
                        time_done=done,
                        information_package_id=self.ip,
                        task=self.task,
                    ))

            Validation.objects.bulk_create(validation_objs, 100)
            raise ValidationError(msg,
                                  errors=[o.message for o in validation_objs])
        except Exception as e:
            logger.exception(
                'Unknown error during syntax validation of {xml}'.format(
                    xml=filepath))
            done = timezone.now()
            Validation.objects.create(
                passed=False,
                validator=self.__class__.__name__,
                filename=filepath,
                message=str(e),
                time_started=started,
                time_done=done,
                information_package_id=self.ip,
                task=self.task,
            )
            raise

        Validation.objects.create(
            passed=True,
            validator=self.__class__.__name__,
            filename=filepath,
            time_started=started,
            time_done=timezone.now(),
            information_package_id=self.ip,
            task=self.task,
        )
        logger.info(
            "Successful syntax validation of {xml}".format(xml=filepath))
Exemple #12
0
    def loadStandardTaxonomiesDict(self):
        if self.selection:
            self.standardTaxonomiesDict = {}
            self.standardLocalHrefs = set()
            self.standardAuthorities = set()
            if not self.standardTaxonomiesUrl:
                return
            basename = os.path.basename(self.standardTaxonomiesUrl)
            self.modelManager.cntlr.showStatus(
                _("parsing {0}").format(basename))
            try:
                for file in (self.modelManager.cntlr.webCache.getfilename(
                        self.standardTaxonomiesUrl),
                             os.path.join(self.modelManager.cntlr.configDir,
                                          "xbrlschemafiles.xml")):
                    xmldoc = etree.parse(file)
                    for locElt in xmldoc.iter(tag="Loc"):
                        href = None
                        localHref = None
                        namespaceUri = None
                        attType = None
                        family = None
                        for childElt in locElt.iterchildren():
                            ln = childElt.tag
                            value = childElt.text.strip()
                            if ln == "Href":
                                href = value
                            elif ln == "LocalHref":
                                localHref = value
                            elif ln == "Namespace":
                                namespaceUri = value
                            elif ln == "AttType":
                                attType = value
                            elif ln == "Family":
                                family = value
                        if href:
                            if namespaceUri and (attType == "SCH"
                                                 or attType == "ENT"):
                                if namespaceUri not in self.standardTaxonomiesDict:
                                    self.standardTaxonomiesDict[
                                        namespaceUri] = (href, localHref)
                                authority = UrlUtil.authority(namespaceUri)
                                self.standardAuthorities.add(authority)
                                if family == "BASE":
                                    self.baseTaxonomyNamespaces.add(
                                        namespaceUri)
                            if href not in self.standardTaxonomiesDict:
                                self.standardTaxonomiesDict[
                                    href] = "Allowed" + attType
                            if localHref:
                                self.standardLocalHrefs.add(localHref)
                        elif attType == "SCH" and family == "BASE":
                            self.baseTaxonomyNamespaces.add(namespaceUri)

            except (EnvironmentError, etree.LxmlError) as err:
                self.modelManager.cntlr.addToLog(
                    "{0}: import error: {1}".format(basename, err))
                etree.clear_error_log()
Exemple #13
0
 def _validate_xhtml(func_name, *args, **kwargs):
     page = b.get_html()
     if "xhtml1-strict.dtd" not in page:
         return
     etree.clear_error_log()
     try:
         doc = etree.parse(StringIO(page), base_url=b.get_url())
     except etree.XMLSyntaxError, e:
         raise twill.errors.TwillAssertionError(
             _format_error_log(page, e.error_log))
Exemple #14
0
    def validate(self, xml_data):
        self.errors = []
        if self.additional_root_element:
            xml_data = u"<%(are)s>\n%(data)s\n</%(are)s>" % {"are": self.additional_root_element, "data": xml_data}

        etree.clear_error_log()
        try:
            doc = etree.parse(StringIO(xml_data))
        except etree.XMLSyntaxError, e:
            self.raiseValidationError(xml_data, e.error_log)
    def transformIterable(self, result, encoding):
        """Apply the transform if required
        """

        result = self.parseTree(result)
        if result is None:
            return None

        DevelopmentMode = Globals.DevelopmentMode
        runtrace = (DevelopmentMode and
            self.request.get('diazo.debug', '').lower() in ('1', 'y', 'yes', 't', 'true'))

        try:
            etree.clear_error_log()

            settings = self.getSettings()
            if settings.doctype:
                result.doctype = settings.doctype
                if not result.doctype.endswith('\n'):
                    result.doctype += '\n'

            transform = self.setupTransform(runtrace=runtrace)
            if transform is None:
                return None

            cache = None
            if not DevelopmentMode:
                cache = getCache(settings)

            parameterExpressions = settings.parameterExpressions or {}
            params = prepareThemeParameters(findContext(self.request), self.request, parameterExpressions, cache)

            transformed = transform(result.tree, **params)
            error_log = transform.error_log
            if transformed is not None:
                # Transformed worked, swap content with result
                result.tree = transformed
        except etree.LxmlError as e:
            if not(DevelopmentMode):
                raise
            error_log = e.error_log
            runtrace = True

        if runtrace:
            from diazo.runtrace import generate_debug_html
            # Add debug information to end of body
            body = result.tree.xpath('/html/body')[0]
            body.insert(-1, generate_debug_html(
                findContext(self.request).portal_url() + '/++resource++diazo-debug',
                rules=settings.rules,
                rules_parser=getParser('rules', settings.readNetwork),
                error_log = error_log,
            ))

        return result
    def _validate_xml(self):
        inner_xml = self.require_unique_param('inner_xml')

        result = Record()
        etree.clear_error_log()
        try:
            result.inner_xml = inner_xml.strip()
        except etree.XMLSyntaxError, e:
            entry = e.error_log.last_error
            raise ValidationError("Invalid XML supplied: %s, "
                                  "at line %d, character %d" %
                                  (entry.message, entry.line - 1, entry.column))
Exemple #17
0
 def loadMappings(self):
     basename = os.path.basename(self.mappingsUrl)
     self.modelManager.cntlr.showStatus(_("parsing {0}").format(basename))
     try:
         xmldoc = etree.parse(self.mappingsUrl)
         for elt in xmldoc.iter(tag="mapFile"):
             self.mappedFiles[elt.get("from")] = elt.get("to")
         for elt in xmldoc.iter(tag="mapPath"):
             self.mappedPaths.append((elt.get("from"), elt.get("to")))
     except (EnvironmentError, etree.LxmlError) as err:
         self.modelManager.cntlr.addToLog("{0}: import error: {1}".format(basename, err))
         etree.clear_error_log()
Exemple #18
0
 def loadMappings(self):
     basename = os.path.basename(self.mappingsUrl)
     self.modelManager.cntlr.showStatus(_("parsing {0}").format(basename))
     try:
         xmldoc = etree.parse(self.mappingsUrl)
         for elt in xmldoc.iter(tag="mapFile"):
             self.mappedFiles[elt.get("from")] = elt.get("to")
         for elt in xmldoc.iter(tag="mapPath"):
             self.mappedPaths.append((elt.get("from"), elt.get("to")))
     except (EnvironmentError, etree.LxmlError) as err:
         self.modelManager.cntlr.addToLog("{0}: import error: {1}".format(
             basename, err))
         etree.clear_error_log()
    def loadStandardTaxonomiesDict(self):
        if self.selection:
            self.standardTaxonomiesDict = {}
            self.standardLocalHrefs = set()
            self.standardAuthorities = set()
            if not self.standardTaxonomiesUrl:
                return
            basename = os.path.basename(self.standardTaxonomiesUrl)
            self.modelManager.cntlr.showStatus(_("parsing {0}").format(basename))
            try:
                for file in (self.modelManager.cntlr.webCache.getfilename(self.standardTaxonomiesUrl), 
                            os.path.join(self.modelManager.cntlr.configDir,"xbrlschemafiles.xml")):
                    xmldoc = etree.parse(file)
                    for locElt in xmldoc.iter(tag="Loc"):
                        href = None
                        localHref = None
                        namespaceUri = None
                        attType = None
                        family = None
                        for childElt in locElt.iterchildren():
                            ln = childElt.tag
                            value = childElt.text.strip()
                            if ln == "Href":
                                href = value
                            elif ln == "LocalHref":
                                localHref = value
                            elif ln == "Namespace":
                                namespaceUri = value
                            elif ln == "AttType":
                                attType = value
                            elif ln == "Family":
                                family = value
                        if href:
                            if namespaceUri and (attType == "SCH" or attType == "ENT"):
                                if namespaceUri not in self.standardTaxonomiesDict:
                                    self.standardTaxonomiesDict[namespaceUri] = (href, localHref)
                                authority = UrlUtil.authority(namespaceUri)
                                self.standardAuthorities.add(authority)
                                if family == "BASE":
                                    self.baseTaxonomyNamespaces.add(namespaceUri)
                            if href not in self.standardTaxonomiesDict:
                                self.standardTaxonomiesDict[href] = "Allowed" + attType
                            if localHref:
                                self.standardLocalHrefs.add(localHref)
                        elif attType == "SCH" and family == "BASE":
                            self.baseTaxonomyNamespaces.add(namespaceUri)

            except (EnvironmentError,
                    etree.LxmlError) as err:
                self.modelManager.cntlr.addToLog("{0}: import error: {1}".format(basename,err))
                etree.clear_error_log()
Exemple #20
0
def html_to_article(content, language):
    content = content.strip()
    if not len(content):
        return ''

    config = NewspaperConfig()
    config.language = language

    doc = config.get_parser().fromstring(content.strip())
    if doc is None:
        return ''

    # Split block-level elements with newlines
    for tag in _BLOCKLEVEL_TAGS:
        if tag in _MEANINGLESS_TAGS:
            continue
        for node in doc.xpath('//{}'.format(tag)):
            node.append(etree.Element('br'))
            node.append(etree.Element('br'))

    # Initial cleanup
    cleaner = _NewspaperCleaner(config)
    doc = cleaner.clean(doc)

    # Best node estimation
    extractor = NewspaperExtractor(config)
    top = extractor.calculate_best_node(doc)
    if top is None:
        del doc, cleaner, extractor
        etree.clear_error_log()

        return ''

    top = extractor.post_cleanup(top)

    # Cleanup dummy nodes used for estimation
    for dummy in top.xpath("//p[@newspaper='dummy']"):
        dummy.getparent().remove(dummy)

    # Custom formatting to avoid unnecessary computations
    formatter = NewspaperFormatter(config)
    formatter.top_node = top
    formatter.remove_negativescores_nodes()
    content = formatter.convert_to_html()
    content = str(content).strip()
    content = unescape(content)

    del doc, top, cleaner, extractor, formatter
    etree.clear_error_log()

    return content
Exemple #21
0
 def _validate_xhtml(func_name, *args, **kwargs):
     page = b.get_html()
     if "xhtml1-strict.dtd" not in page:
         return
     etree.clear_error_log()
     try:
         # lxml will try to convert the URL to unicode by itself,
         # this won't work for non-ascii URLs, so help him
         url = b.get_url()
         if isinstance(url, str):
             url = unicode(url, "latin1")
         etree.parse(StringIO(page), base_url=url)
     except etree.XMLSyntaxError, e:
         raise twill.errors.TwillAssertionError(_format_error_log(page, e.error_log))
Exemple #22
0
 def _validate_xhtml(func_name, *args, **kwargs):
     page = b.get_html()
     if "xhtml1-strict.dtd" not in page:
         return
     etree.clear_error_log()
     try:
         # lxml will try to convert the URL to unicode by itself,
         # this won't work for non-ascii URLs, so help him
         url = b.get_url()
         if isinstance(url, str):
             url = unicode(url, 'latin1')
         etree.parse(StringIO(page), base_url=url)
     except etree.XMLSyntaxError as e:
         raise twill.errors.TwillAssertionError(
             _format_error_log(page, e.error_log))
Exemple #23
0
 def validate(file_path):
     """Validate a given file_path using the Validator from the Daisy
     Pipeline. Return an empty string if the validation was
     successful. Return a list of error messages as delivered by
     the Daisy Pipeline otherwise."""
     
     xmlschema_doc = etree.parse(
         join(settings.PROJECT_DIR, 'documents', 'schema', 'minimalSchema.xsd'))
     xmlschema = etree.XMLSchema(xmlschema_doc)
     
     etree.clear_error_log()
     try:
         doc = etree.parse(file_path)
     except etree.XMLSyntaxError, e:
         entries = e.error_log.filter_from_level(etree.ErrorLevels.FATAL)
         return [("%s on line %s" % (entry.message, entry.line)) for entry in entries]
Exemple #24
0
def validateXML(xml_string):

    try:
        etree.clear_error_log()
        parser = etree.XMLParser()
        xml = etree.fromstring(xml_string, parser)
        return xml
    except etree.XMLSyntaxError as e:
        error_list = []
        for error in e.error_log:
            error_list.append((error.line, error.message))
        return error_list
    except Exception as e:
        error_list = []
        error_list.append((0, "unknown error " + str(e)))
        return error_list
Exemple #25
0
def update_pmode(obj, mode="add"):
    etree.clear_error_log()
    # parse the pmode xml template
    try:
        pmode = etree.parse(PMODE_FILE)

        parties = pmode.find("/businessProcesses/parties")
        init_parties = pmode.find("//initiatorParties")
        party_exists = pmode.xpath("//party[@name='{}']".format(
            obj.gateway_party_name))
        if not party_exists and mode == 'add':
            new_party = etree.SubElement(
                parties, 'party', {
                    'name': obj.gateway_party_name,
                    'endpoint': obj.endpoint,
                    'allowChunking': 'false'
                })
            etree.SubElement(new_party, 'identifier', {
                'partyId': obj.gateway_party_id,
                'partyIdType': 'partyTypeUrn'
            })
            # add to initiator parties
            etree.SubElement(init_parties, 'initiatorParty',
                             {'name': obj.gateway_party_name})
        else:
            # get party to remove
            for party in parties.xpath(".//party[@name='{}']".format(
                    obj.gateway_party_name)):
                party.getparent().remove(party)
            # also remove from initiatorParties
            for init_party in init_parties.xpath(
                    ".//initiatorParty[@name='{}']".format(
                        obj.gateway_party_name)):
                init_party.getparent().remove(init_party)
        # write the reult tree back to xml
        f = open(PMODE_FILE, 'w')
        f.write(
            etree.tostring(pmode,
                           pretty_print=True,
                           xml_declaration=True,
                           encoding='UTF-8'))
        f.close()
    except Exception, e:
        return {
            'success': False,
            'msg': "Could not parse pmode template: {}".format(e.message)
        }
def validateXML(xml_doc, schema, Resultfile2):
    etree.clear_error_log()
    root_new = xml_doc.getroot()
    print "Parsing Done"
    if (schema.validate(xml_doc)):
        print "File is valid against the given Schema: "
    else:
        log = schema.error_log
        print file1 + " is not valid against Schema"
        #logpath = filepath.split(".")[0]+".log"
        logpath = Resultfile2 + "XSDVALIDATION.log"
        fs = open(logpath, "a+")
        for error in iter(log):
            fs.write(error.message)
            fs.write("\n")
        fs.close()
    root_new.clear()
Exemple #27
0
def loadUtr(
    modelManager
):  # Build a dictionary of item types that are constrained by the UTR.
    modelManager.utrDict = {
    }  # This attribute is unbound on modelManager until this function is called.
    utrUrl = "http://www.xbrl.org/utr/utr.xml"
    #utrUrl = os.path.join(modelManager.cntlr.configDir, "utr.xml")
    modelManager.cntlr.showStatus(_("Loading Unit Type Registry"))
    try:
        xmldoc = etree.parse(modelManager.cntlr.webCache.getfilename(utrUrl))
        for unitElt in xmldoc.iter(tag="{http://www.xbrl.org/2009/utr}unit"):
            id = unitElt.get("id")
            unitId = unitElt.findtext("{http://www.xbrl.org/2009/utr}unitId")
            nsUnit = unitElt.findtext("{http://www.xbrl.org/2009/utr}nsUnit")
            itemType = unitElt.findtext(
                "{http://www.xbrl.org/2009/utr}itemType")
            nsItemType = unitElt.findtext(
                "{http://www.xbrl.org/2009/utr}nsItemType")
            numeratorItemType = unitElt.findtext(
                "{http://www.xbrl.org/2009/utr}numeratorItemType")
            nsNumeratorItemType = unitElt.findtext(
                "{http://www.xbrl.org/2009/utr}nsNumeratorItemType")
            denominatorItemType = unitElt.findtext(
                "{http://www.xbrl.org/2009/utr}denominatorItemType")
            nsDenominatorItemType = unitElt.findtext(
                "{http://www.xbrl.org/2009/utr}nsDenominatorItemType")
            # TO DO: This indexing scheme assumes that there are no name clashes in item types of the registry.
            if modelManager.utrDict.get(itemType) == None:
                modelManager.utrDict[itemType] = {}
            # a RegEntry is just an array.
            (modelManager.utrDict[itemType])[id] = [
                unitId,
                nsUnit  # 0,1
                ,
                nsNumeratorItemType,
                numeratorItemType  # 2,3
                ,
                nsDenominatorItemType,
                denominatorItemType  # 4,5
                ,
                nsItemType  # 6 often None
            ]
    except (EnvironmentError, etree.LxmlError) as err:
        modelManager.cntlr.addToLog(
            "Unit Type Registry Import error: {0}".format(err))
        etree.clear_error_log()
Exemple #28
0
 def loadMappings(self):
     basename = os.path.basename(self.mappingsUrl)
     self.modelManager.cntlr.showStatus(_("parsing {0}").format(basename))
     try:
         xmldoc = etree.parse(self.mappingsUrl)
         xmldoc.xinclude()
         for elt in xmldoc.iter(tag="mapFile"):
             self.mappedFiles[elt.get("from")] = elt.get("to")
         for elt in xmldoc.iter(tag="mapPath"):
             self.mappedPaths.append((elt.get("from"), elt.get("to")))
     except (EnvironmentError,
             etree.LxmlError) as err:
         self.modelManager.cntlr.addToLog(_("Disclosure System \"%(name)s\" import %(importFile)s, error: %(error)s"),
                                          messageCode="arelle:disclosureSystemImportError", 
                                          messageArgs={"error": str(err), "name": self.name, "importFile": basename}, 
                                          level=logging.ERROR)
         etree.clear_error_log()
Exemple #29
0
 def loadMappings(self):
     basename = os.path.basename(self.mappingsUrl)
     self.modelManager.cntlr.showStatus(_("parsing {0}").format(basename))
     try:
         xmldoc = etree.parse(self.mappingsUrl)
         xmldoc.xinclude()
         for elt in xmldoc.iter(tag="mapFile"):
             self.mappedFiles[elt.get("from")] = elt.get("to")
         for elt in xmldoc.iter(tag="mapPath"):
             self.mappedPaths.append((elt.get("from"), elt.get("to")))
     except (EnvironmentError,
             etree.LxmlError) as err:
         self.modelManager.cntlr.addToLog(_("Disclosure System \"%(name)s\" import %(importFile)s, error: %(error)s"),
                                          messageCode="arelle:disclosureSystemImportError", 
                                          messageArgs={"error": str(err), "name": self.name, "importFile": basename}, 
                                          level=logging.ERROR)
         etree.clear_error_log()
Exemple #30
0
def check_syntax(
    xml_file: str
) -> Tuple[bool, Optional[str], Optional[str], Optional[etree.ElementTree]]:

    # clear global error log for lxml
    etree.clear_error_log()

    # parse xml
    try:
        doc = etree.parse(xml_file)
        return (True, None, None, doc)

    except IOError:
        return (False, 'IO', 'Invalid File', None)

    except etree.XMLSyntaxError as err:
        return (False, 'Syntax', str(err.error_log), None)  #pylint: disable=no-member
Exemple #31
0
def loadUtr(
    modelManager
):  # Build a dictionary of item types that are constrained by the UTR
    modelManager.disclosureSystem.utrItemTypeEntries = utrItemTypeEntries = defaultdict(
        dict)
    # print('UTR LOADED FROM '+utrUrl);
    # skip status message as it hides prior activity during which this might have just obtained symbols
    # modelManager.cntlr.showStatus(_("Loading Unit Type Registry"))
    file = None
    try:
        from arelle.FileSource import openXmlFileStream
        # normalize any relative paths to config directory
        file = openXmlFileStream(modelManager.cntlr,
                                 modelManager.disclosureSystem.utrUrl,
                                 stripDeclaration=True)[0]
        xmldoc = etree.parse(file)
        for unitElt in xmldoc.iter(tag="{http://www.xbrl.org/2009/utr}unit"):
            u = UtrEntry()
            u.id = unitElt.get("id")
            u.unitId = unitElt.findtext("{http://www.xbrl.org/2009/utr}unitId")
            u.nsUnit = (
                unitElt.findtext("{http://www.xbrl.org/2009/utr}nsUnit")
                or None)  # None if empty entry
            u.itemType = unitElt.findtext(
                "{http://www.xbrl.org/2009/utr}itemType")
            u.nsItemType = unitElt.findtext(
                "{http://www.xbrl.org/2009/utr}nsItemType")
            u.numeratorItemType = unitElt.findtext(
                "{http://www.xbrl.org/2009/utr}numeratorItemType")
            u.nsNumeratorItemType = unitElt.findtext(
                "{http://www.xbrl.org/2009/utr}nsNumeratorItemType")
            u.denominatorItemType = unitElt.findtext(
                "{http://www.xbrl.org/2009/utr}denominatorItemType")
            u.nsDenominatorItemType = unitElt.findtext(
                "{http://www.xbrl.org/2009/utr}nsDenominatorItemType")
            u.isSimple = u.numeratorItemType is None and u.denominatorItemType is None
            u.symbol = unitElt.findtext("{http://www.xbrl.org/2009/utr}symbol")
            # TO DO: This indexing scheme assumes that there are no name clashes in item types of the registry.
            (utrItemTypeEntries[u.itemType])[u.id] = u
    except (EnvironmentError, etree.LxmlError) as err:
        modelManager.cntlr.addToLog(
            "Unit Type Registry Import error: {0}".format(err))
        etree.clear_error_log()
    if file:
        file.close()
Exemple #32
0
def fragment_to_text(html):
    if not len(html.strip()):
        return ''

    soup = BeautifulSoup(html, 'lxml')
    if soup is None:
        raise ValueError('Can\'t build DOM tree with LXML')

    # Drop comments
    for comment in soup(text=lambda txt: isinstance(txt, Comment)):
        comment.extract()

    # Drop non-meaning tags
    for node in soup(_MEANINGLESS_TAGS):
        node.replace_with(soup.new_tag('br'))

    # Insert linebreaks around block-level tags
    for node in soup(_BLOCKLEVEL_TAGS):
        node.insert_before(soup.new_tag('br'))
        node.insert_before(soup.new_tag('br'))
        node.insert_after(soup.new_tag('br'))
        node.insert_after(soup.new_tag('br'))

    # Remove linebreaks inside text nodes (as browser does)
    for node in soup(string=lambda string: '\n' in string):
        node.string.replace_with(node.string.replace('\n', ' '))

    # Swap html linebreaks to normal ones
    for node in soup('br'):
        node.replace_with('\n')

    # Cleanup final text
    text = soup.getText()
    text = str(text).strip()
    text = re.sub(' {2,}', ' ', text)
    text = text.replace(' \n', '\n').replace('\n ', '\n')
    text = re.sub('\n{3,}', '\n\n', text)

    soup.decompose()
    etree.clear_error_log()
    del soup

    return text
Exemple #33
0
    def insert_entries(self, entries_xml, taxids):
        """insert UniProt entries from XML"""

        # to avoid memory leak reload of etree is necessary
        if 'etree' in sys.modules:
            importlib.reload(etree)

        parser = etree.XMLParser(collect_ids=False)
        entries = etree.fromstringlist(entries_xml, parser)

        for entry in entries:
            self.insert_entry(entry, taxids)
            entry.clear()
            del entry

        etree.clear_error_log()
        del entries

        self.session.commit()
Exemple #34
0
def run(xhtml_files: List[Path], dtd_file: Path, images: bool,
        links: bool) -> bool:
    try:
        dtd = DTD(str(dtd_file))
    except DTDParseError as e:
        print(e.error_log, file=stderr)
        clear_error_log()
        return False
    else:
        success = True
        for file in xhtml_files:
            # if you reuse the parser on too many documents it gets confused
            parser = XHTMLParser(dtd_validation=True, ns_clean=True)
            dtd = DTD(str(dtd_file))
            if settings.verbose:
                print(xhtml_file)
            if not test(file, parser, dtd, images, links):
                success = False
        return success
Exemple #35
0
    def validate_cib(self, new_cib_elem):
        detail_msg = ""

        if self.is_local:
            schema_f = os.path.join(self.local_dir, self.schema_filename)
        else:
            try:
                tmp_f = self.tmp_schema_f()
            except EnvironmentError as msg:
                raise PacemakerError("Cannot expand the Relax-NG schema: " + str(msg))
            if tmp_f is None:
                raise PacemakerError("Cannot expand the Relax-NG schema")
            else:
                schema_f = tmp_f

        try:
            cib_elem = etree.fromstring(etree.tostring(new_cib_elem))
        except etree.Error as msg:
            raise PacemakerError("Failed to parse the CIB XML: " + str(msg))

        try:
            schema = etree.RelaxNG(file=schema_f)

        except etree.Error as msg:
            raise PacemakerError("Failed to parse the Relax-NG schema: " + str(msg))
        try:
            etree.clear_error_log()
        except:
            pass

        is_valid = schema.validate(cib_elem)
        if not is_valid:
            for error_entry in schema.error_log:
                detail_msg += error_entry.level_name + ": " + error_entry.message + "\n"

        if not self.is_local:
            try:
                delete_dir(os.path.dirname(tmp_f))
            except:
                pass

        return (is_valid, detail_msg)
Exemple #36
0
def test(xhtml_file: Path, parser: XHTMLParser, dtd: DTD, images: bool,
         links: bool) -> bool:
    success = False
    try:
        try:
            document = parse(source=str(xhtml_file), parser=parser).getroot()
            dtd.assertValid(document)
        except IOError as e:
            print(f"{xhtml_file}: {e.strerror}", file=stderr)
        except XMLSyntaxError as e:
            print(str(e.error_log), file=stderr)
        except DocumentInvalid as e:
            print(str(e.error_log), file=stderr)
        else:
            success = True
            if images:
                success = success and test_images(xhtml_file, document)
            if links:
                success = success and test_links(xhtml_file, document)
    finally:
        clear_error_log()
    return success
Exemple #37
0
def validate_xml(xml_doc: etree.ElementTree,
                 xsd_file: str) -> Tuple[bool, Optional[str], Optional[str]]:

    # clear global error log for lxml
    etree.clear_error_log()

    try:
        xmlschema_doc = etree.parse(xsd_file)
        xmlschema = etree.XMLSchema(xmlschema_doc)

    except IOError:
        return (False, 'IO', 'XSD file I/O error')

    except etree.XMLSyntaxError as err:
        return (False, 'Syntax', str(err.error_log))  #pylint: disable=no-member

    try:
        xmlschema.assertValid(xml_doc)
        return (True, None, None)

    except etree.DocumentInvalid as err:
        return (False, 'Schema', str(err.error_log))  #pylint: disable=no-member
Exemple #38
0
    def parse_schedule(xml, filename):
        """
        Parses a schedule definition in XML.

        :param str xml: The XML with a schedule definition
        :param str filename:

        :rtype: enarksh.xml_reader.node.ScheduleNode
        """
        with open(os.path.join(C.HOME, 'etc/enarksh.xsd'), 'rb') as f:
            xsd = f.read()

        etree.clear_error_log()
        schema_root = etree.XML(xsd)
        schema = etree.XMLSchema(schema_root)
        parser = etree.XMLParser(schema=schema, encoding='utf8')
        try:
            root = etree.fromstring(bytes(xml, 'utf8'), parser)

            # Root element must be a schedule.
            if root.tag != 'Schedule':
                raise Exception("Root element must be 'Schedule' but '{0!s}' was found.".format(root.tag))

            schedule = create_node('Schedule')
            schedule.read_xml(root)
            error = schedule.validate()
            if error:
                raise Exception(
                    "File '{0!s}' is not a valid schedule configuration file.\n{1!s}".format(filename, error))

            # Set recursion and dependency levels.
            schedule.set_levels()
        except etree.XMLSyntaxError as exception:
            log = logging.getLogger('enarksh')
            log.error(exception.error_log.filter_from_level(etree.ErrorLevels.WARNING))
            raise exception

        return schedule
Exemple #39
0
def check_html(filename, html_lines, html_hints, quiet):
    """Validates the given HTML (as XHTML actually)
    """
    global etree
    print("\n# -- HTML check for '%s'" % filename)
    # re-build the page content, replacing the DTD with the XHTML DTD,
    # or adding it if missing. Jinja2 expressions are removed.
    opened_braces = 0
    normalized_lines = []
    has_html_elt = has_head_elt = has_body_elt = False
    for linenum, line in html_lines:
        has_html_elt = has_html_elt or '<html>' in line
        has_head_elt = has_head_elt or '<head>' in line
        has_body_elt = has_body_elt or '<body>' in line
        if line.strip() != '<!DOCTYPE html>':
            normalized, opened_braces = remove_jinja_exprs(
                linenum, line, opened_braces)
            normalized_lines.append(normalized)
    is_xml = html_lines[0][1].startswith('<?xml ')
    if not is_xml:
        if not has_body_elt:
            normalized_lines[0] = '<body>' + normalized_lines[0]
            normalized_lines[-1] = normalized_lines[-1] + '</body>'
        if not has_head_elt:
            normalized_lines[0] = '<head><title/></head>' + normalized_lines[0]
        if not has_html_elt:
            normalized_lines[0] = '<html>' + normalized_lines[0]
            normalized_lines[-1] = normalized_lines[-1] + '</html>'
        normalized_lines[0] = XHTML_DOCTYPE + normalized_lines[0]
    page = '\n'.join(normalized_lines)
    ## print('LINES %s' % ''.join("%5d: %s" % l for l in html_lines)) # DEBUG
    ## print('PAGE %s' %
    ##       '\n'.join("%5d: %s" % l for l in enumerate(normalized_lines)))
    ## print('HINTS', repr(html_hints)) # DEBUG
    etree.clear_error_log()
    try:
        # lxml will try to convert the URL to unicode by itself,
        # this won't work for non-ascii URLs, so help him
        etree.parse(StringIO(page), base_url='.')  #  base_url ??
        if not quiet:
            for lineinfo in html_lines:
                print('%5d %s' % lineinfo),
        return 0
    except etree.XMLSyntaxError as e:
        errors = []
        for entry in e.error_log:
            errors.append((entry.line, entry.column, entry.message))
        real_errors = []

        def process_error(linenum, col, msg):
            hint_linenum = hint = None
            while html_hints:
                hint_linenum, hint = html_hints[0]
                if hint_linenum >= linenum or len(html_hints) == 1:
                    break
                del html_hints[0]
            if hint and hint in msg:
                del html_hints[0]
                ignored = ' (IGNORED "%s")' % hint
            else:
                real_errors.append(linenum)
                ignored = ''
            print('%s:%s:%s: %s%s' % (filename, linenum, col, msg, ignored))

        for linenum, line in html_lines:
            if not quiet:
                print('%5d %s' % (linenum, line)),
            while errors and errors[0][0] == linenum:
                err = errors[0]
                del errors[0]
                process_error(*err)
        # in case some errors haven't been flushed at this point...
        for err in errors:
            process_error(*err)
        return len(real_errors)
Exemple #40
0
def loadUtr(modelXbrl): # Build a dictionary of item types that are constrained by the UTR
    modelManager = modelXbrl.modelManager
    modelManager.disclosureSystem.utrItemTypeEntries = utrItemTypeEntries = defaultdict(dict)
    # print('UTR LOADED FROM '+utrUrl);
    # skip status message as it hides prior activity during which this might have just obtained symbols
    # modelManager.cntlr.showStatus(_("Loading Unit Type Registry"))
    file = None
    try:
        from arelle.FileSource import openXmlFileStream
        # normalize any relative paths to config directory
        unitDupCheck = set()
        file = openXmlFileStream(modelManager.cntlr, modelManager.disclosureSystem.utrUrl, stripDeclaration=True)[0]
        xmldoc = etree.parse(file)
        for unitElt in xmldoc.iter(tag="{http://www.xbrl.org/2009/utr}unit"):
            u = UtrEntry()
            u.id = unitElt.get("id")
            u.unitId = unitElt.findtext("{http://www.xbrl.org/2009/utr}unitId")
            u.nsUnit = (unitElt.findtext("{http://www.xbrl.org/2009/utr}nsUnit") or None) # None if empty entry
            u.itemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}itemType")
            u.nsItemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}nsItemType")
            u.numeratorItemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}numeratorItemType")
            u.nsNumeratorItemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}nsNumeratorItemType")
            u.denominatorItemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}denominatorItemType")
            u.nsDenominatorItemType = unitElt.findtext("{http://www.xbrl.org/2009/utr}nsDenominatorItemType")
            u.isSimple = all(e is None for e in (u.numeratorItemType, u.nsNumeratorItemType, u.denominatorItemType, u.nsDenominatorItemType))
            u.symbol = unitElt.findtext("{http://www.xbrl.org/2009/utr}symbol")
            u.status = unitElt.findtext("{http://www.xbrl.org/2009/utr}status")
            if u.status == "REC":
                # TO DO: This indexing scheme assumes that there are no name clashes in item types of the registry.
                (utrItemTypeEntries[u.itemType])[u.id] = u
            unitDupKey = (u.unitId, u.nsUnit, u.status)
            if unitDupKey in unitDupCheck:
                modelXbrl.error("arelleUtrLoader:entryDuplication",
                                "Unit Type Registry entry duplication: id %(id)s unit %(unitId)s nsUnit %(nsUnit)s status %(status)s",
                                modelObject=modelXbrl, id=u.id, unitId=u.unitId, nsUnit=u.nsUnit, status=u.status)
            unitDupCheck.add(unitDupKey)
            if u.isSimple:
                if not u.itemType:
                    modelXbrl.error("arelleUtrLoader:simpleDefMissingField",
                                    "Unit Type Registry simple unit definition missing item type: id %(id)s unit %(unitId)s nsUnit %(nsUnit)s status %(status)s",
                                    modelObject=modelXbrl, id=u.id, unitId=u.unitId, nsUnit=u.nsUnit, status=u.status)
                if u.numeratorItemType or u.denominatorItemType or u.nsNumeratorItemType or u.nsDenominatorItemType:
                    modelXbrl.error("arelleUtrLoader",
                                    "Unit Type Registry simple unit definition may not have complex fields: id %(id)s unit %(unitId)s nsUnit %(nsUnit)s status %(status)s",
                                    modelObject=modelXbrl, id=u.id, unitId=u.unitId, nsUnit=u.nsUnit, status=u.status)
            else:
                if u.symbol:
                    modelXbrl.error("arelleUtrLoader:complexDefSymbol",
                                    "Unit Type Registry complex unit definition may not have symbol: id %(id)s unit %(unitId)s nsUnit %(nsUnit)s status %(status)s",
                                    modelObject=modelXbrl, id=u.id, unitId=u.unitId, nsUnit=u.nsUnit, status=u.status)
                if not u.numeratorItemType or not u.denominatorItemType:
                    modelXbrl.error("arelleUtrLoader:complexDefMissingField",
                                    "Unit Type Registry complex unit definition must have numerator and denominator fields: id %(id)s unit %(unitId)s nsUnit %(nsUnit)s status %(status)s",
                                    modelObject=modelXbrl, id=u.id, unitId=u.unitId, nsUnit=u.nsUnit, status=u.status)
    except (EnvironmentError,
            etree.LxmlError) as err:
        modelManager.modelXbrl.error("arelleUtrLoader:error",
                                     "Unit Type Registry Import error: %(error)s",
                                     modelObject=modelXbrl, error=err)
        etree.clear_error_log()
    if file:
        file.close()
Exemple #41
0
    def loadStandardTaxonomiesDict(self):
        if self.selection:
            self.standardTaxonomiesDict = defaultdict(set)
            self.familyHrefs = defaultdict(set)
            self.standardLocalHrefs = defaultdict(set)
            self.standardAuthorities = set()
            self.standardPrefixes = {}
            if not self.standardTaxonomiesUrl:
                return
            basename = os.path.basename(self.standardTaxonomiesUrl)
            self.modelManager.cntlr.showStatus(_("parsing {0}").format(basename))
            try:
                from arelle.FileSource import openXmlFileStream
                for filepath in (self.standardTaxonomiesUrl, 
                                 os.path.join(self.modelManager.cntlr.configDir,"xbrlschemafiles.xml")):
                    xmldoc = etree.parse(filepath) # must open with file path for xinclude to know base of file
                    xmldoc.xinclude() # to include elements below root use xpointer(/*/*)
                    for erxlElt in xmldoc.iter(tag="Erxl"):
                        v = erxlElt.get("version")
                        if v and re.match(r"[0-9]+([.][0-9]+)*$", v):
                            vSplit = v.split('.') # at least 3 digits always!
                            self.version = tuple(int(n) for n in vSplit) + tuple(0 for n in range(3 - len(vSplit)))
                        break
                    for locElt in xmldoc.iter(tag="Loc"):
                        href = None
                        localHref = None
                        namespaceUri = None
                        prefix = None
                        attType = None
                        family = None
                        elements = None
                        version = None
                        for childElt in locElt.iterchildren():
                            ln = childElt.tag
                            value = childElt.text.strip()
                            if ln == "Href":
                                href = value
                            elif ln == "LocalHref":
                                localHref = value
                            elif ln == "Namespace":
                                namespaceUri = value
                            elif ln == "Prefix":
                                prefix = value
                            elif ln == "AttType":
                                attType = value
                            elif ln == "Family":
                                family = value
                            elif ln == "Elements":
                                elements = value
                            elif ln == "Version":
                                version = value
                        if href:
                            if namespaceUri and (attType == "SCH" or attType == "ENT"):
                                self.standardTaxonomiesDict[namespaceUri].add(href)
                                if localHref:
                                    self.standardLocalHrefs[namespaceUri].add(localHref)
                                authority = UrlUtil.authority(namespaceUri)
                                self.standardAuthorities.add(authority)
                                if family == "BASE":
                                    self.baseTaxonomyNamespaces.add(namespaceUri)
                                if prefix:
                                    self.standardPrefixes[namespaceUri] = prefix
                            if href not in self.standardTaxonomiesDict:
                                self.standardTaxonomiesDict[href] = "Allowed" + attType
                            if family:
                                self.familyHrefs[family].add(ErxlLoc(family, version, href, attType, elements, namespaceUri))
                        elif attType == "SCH" and family == "BASE":
                            self.baseTaxonomyNamespaces.add(namespaceUri)

            except (EnvironmentError,
                    etree.LxmlError) as err:
                self.modelManager.cntlr.addToLog(_("Disclosure System \"%(name)s\" import %(importFile)s, error: %(error)s"),
                                                 messageCode="arelle:disclosureSystemImportError", 
                                                 messageArgs={"error": str(err), "name": self.name, "importFile": basename}, 
                                                 level=logging.ERROR)
                etree.clear_error_log()
    def process_response(self, request, response):
        settings = getSettings()

        if settings is None or not isThemeEnabled(request, response, settings):
            return response

        result = parseTree(response)
        if result is None:
            return response

        runtrace = (DevelopmentMode and
                    request.GET.get(u'diazo.debug', u'').lower() in TRUE)

        try:
            etree.clear_error_log()

            if settings.get('doctype'):
                result.doctype = settings.get('doctype')
                if not result.doctype.endswith('\n'):
                    result.doctype += '\n'

            transform = setupTransform(request, response, runtrace)
            if transform is None:
                return response

            parameterExpressions = settings.get('parameter_expressions') or {}
            params = prepareThemeParameters(request, parameterExpressions)

            transformed = transform(result.tree, **params)
            error_log = transform.error_log
            if transformed is not None:
                # Transformed worked, swap content with result
                result.tree = transformed
        except etree.LxmlError as e:
            if not(DevelopmentMode):
                raise
            error_log = e.error_log
            runtrace = True

        if runtrace:
            from diazo.runtrace import generate_debug_html
            # Add debug information to end of body
            base_url = request.build_absolute_uri()[:-len(request.path)]
            body = result.tree.xpath('/html/body')
            if body:
                body = body[0]
            else:
                html = result.tree.xpath('/html')[0]
                body = etree.Element('body')
                html.append(body)
            body.insert(-1, generate_debug_html(
                base_url + '/diazo-debug',
                rules=settings.get('rules'),
                rules_parser=getParser('rules', settings.get('read_network')),
                error_log=error_log,
            ))

        response.content = str(result)
        if settings.get('update_content_length'):
            response['Content-Length'] = str(len(response.content))
        return response
Exemple #43
0
    def parse(self, doc, options):
        doc.diagnostics = []

        doc_type = 'XML'
        etree.clear_error_log()

        with open(doc.data_path) as f:
            source = f.read()

        try:
            # parse the XML for errors
            if os.path.isabs(doc.path):
                doc_schema = self.get_schema(doc.path, doc.path, source)
                xml = doc_schema['xml']

                if doc_schema['type'] != None:
                    doc_type = doc_schema['type']
            else:
                xml = etree.fromstring(source)

            # if the doc is a schema itself, parse it for schema errors
            try:
                if doc_type == "XSD":
                    etree.XMLSchema(xml)
                elif doc_type == "RelaxNG":
                    etree.RelaxNG(xml)

            except (etree.RelaxNGError, etree.XMLSchemaParseError) as e:
                for error in e.error_log:
                    doc.diagnostics.append(
                        self.format_error(doc_type + " parsing error", error))

            except Exception as e:
                doc.diagnostics.append(
                    self.format_error(doc_type + " parsing error", e))

            # parse XML comments in document for a reference to a schema
            try:
                (schema_ref, schema_location,
                 comment_line) = self.look_for_schema(doc.path, xml)

                if schema_ref != None:
                    try:
                        if schema_ref['type'] == "XSD":
                            schema = etree.XMLSchema(schema_ref['xml'])
                        elif schema_ref['type'] == "RelaxNG":
                            schema = etree.RelaxNG(schema_ref['xml'])

                        schema.assertValid(xml)

                    except (etree.DocumentInvalid, etree.RelaxNGValidateError,
                            etree.XMLSchemaValidateError):
                        for error in schema.error_log:
                            doc.diagnostics.append(
                                self.format_error(
                                    schema_ref['type'] + " validation error",
                                    error))

                    except (etree.RelaxNGError, etree.XMLSchemaParseError):
                        doc.diagnostics.append(
                            self.format_error(
                                schema_ref['type'] + " error",
                                "Schema is invalid " + schema_location,
                                comment_line))

                    except Exception as e:
                        doc.diagnostics.append(
                            self.format_error(schema_ref['type'] + " error",
                                              e))

            except etree.XMLSyntaxError as e:
                doc.diagnostics.append(
                    self.format_error(
                        "Schema error",
                        "Unable to parse schema XML " + schema_location,
                        comment_line))

            except Exception as e:
                doc.diagnostics.append(
                    self.format_error("Schema error", e, comment_line))

        # handle XML parse errors
        except etree.XMLSyntaxError as e:
            for error in e.error_log:
                doc.diagnostics.append(
                    self.format_error("XML parsing error", error))

        # ignore other exceptions
        except:
            pass
Exemple #44
0
 def _test(self, sample):
     """ Actual Test Enrty """
     etree.clear_error_log()
     html = sample['content']
     elapsed = self.timeit(lambda: lxml.html.fromstring(html))
     return {'time': elapsed}
    def transformIterable(self, result, encoding):
        """Apply the transform if required
        """
        # Obtain settings. Do nothing if not found
        policy = theming_policy(self.request)
        settings = policy.getSettings()
        if settings is None:
            return None
        if not policy.isThemeEnabled():
            return None
        result = self.parseTree(result)
        if result is None:
            return None

        debug_mode = getConfiguration().debug_mode
        runtrace = self.debug_theme()

        try:
            etree.clear_error_log()

            if settings.doctype:
                result.doctype = settings.doctype
                if not result.doctype.endswith('\n'):
                    result.doctype += '\n'

            transform = self.setupTransform(runtrace=runtrace)
            if transform is None:
                return None

            cache = None
            if not debug_mode:
                cache = policy.getCache()

            parameterExpressions = settings.parameterExpressions or {}
            params = prepareThemeParameters(findContext(self.request),
                                            self.request, parameterExpressions,
                                            cache)

            transformed = transform(result.tree, **params)
            error_log = transform.error_log
            if transformed is not None:
                # Transformed worked, swap content with result
                result.tree = transformed
        except etree.LxmlError as e:
            if not (debug_mode):
                raise
            error_log = e.error_log
            runtrace = True

        if runtrace:
            from diazo.runtrace import generate_debug_html
            # Add debug information to end of body
            body = result.tree.xpath('/html/body')[0]
            debug_url = findContext(
                self.request).portal_url() + '/++resource++diazo-debug'
            body.insert(
                -1,
                generate_debug_html(
                    debug_url,
                    rules=settings.rules,
                    rules_parser=getParser('rules', settings.readNetwork),
                    error_log=error_log,
                ))
        return result
Exemple #46
0
            else:
                schema_f = tmp_f

        try:
            cib_elem = etree.fromstring(etree.tostring(new_cib_elem))
        except etree.Error, msg:
            raise PacemakerError("Failed to parse the CIB XML: " + str(msg))

        try:
            schema = etree.RelaxNG(file=schema_f)

        except etree.Error, msg:
            raise PacemakerError("Failed to parse the Relax-NG schema: " +
                                 str(msg))
        try:
            etree.clear_error_log()
        except:
            pass

        is_valid = schema.validate(cib_elem)
        if not is_valid:
            for error_entry in schema.error_log:
                detail_msg += error_entry.level_name + ": " + error_entry.message + "\n"

        if not self.is_local:
            try:
                delete_dir(os.path.dirname(tmp_f))
            except:
                pass

        return (is_valid, detail_msg)
Exemple #47
0
    def transformIterable(self, result, encoding):
        """Apply the transform if required
        """
        # Obtain settings. Do nothing if not found
        policy = theming_policy(self.request)
        settings = policy.getSettings()
        if settings is None:
            return None
        if not policy.isThemeEnabled():
            return None
        result = self.parseTree(result)
        if result is None:
            return None

        debug_mode = getConfiguration().debug_mode
        runtrace = self.debug_theme()

        try:
            etree.clear_error_log()

            if settings.doctype:
                result.doctype = settings.doctype
                if not result.doctype.endswith('\n'):
                    result.doctype += '\n'

            transform = self.setupTransform(runtrace=runtrace)
            if transform is None:
                return None

            cache = None
            if not debug_mode:
                cache = policy.getCache()

            parameterExpressions = settings.parameterExpressions or {}
            params = prepareThemeParameters(
                findContext(self.request),
                self.request,
                parameterExpressions,
                cache
            )

            transformed = transform(result.tree, **params)
            error_log = transform.error_log
            if transformed is not None:
                # Transformed worked, swap content with result
                result.tree = transformed
        except etree.LxmlError as e:
            if not(debug_mode):
                raise
            error_log = e.error_log
            runtrace = True

        if runtrace:
            from diazo.runtrace import generate_debug_html
            # Add debug information to end of body
            body = result.tree.xpath('/html/body')[0]
            debug_url = findContext(
                self.request
            ).portal_url() + '/++resource++diazo-debug'
            body.insert(
                -1,
                generate_debug_html(
                    debug_url,
                    rules=settings.rules,
                    rules_parser=getParser('rules', settings.readNetwork),
                    error_log=error_log,
                )
            )
        return result
Exemple #48
0
    def select(self, name):
        self.clear()
        if not name:
            return True # nothing to load
        result = False
        status = _("loading disclosure system and mappings")
        try:
            if name:
                isSelected = False
                for url in self.urls: # urls in revese order, last plugin first
                    xmldoc = etree.parse(url)
                    for dsElt in xmldoc.iter(tag="DisclosureSystem"):
                        namesStr = dsElt.get("names")
                        if namesStr:
                            names = namesStr.split("|")
                            if name in names:
                                self.names = names
                                self.name = self.names[0]
                                self.validationType = dsElt.get("validationType")
                                self.exclusiveTypesPattern = compileAttrPattern(dsElt,"exclusiveTypesPattern", patternIfNoAttr=None)
                                if self.validationType not in self.pluginTypes:
                                    self.EFM = self.validationType == "EFM"
                                    self.GFM = self.validationType == "GFM"
                                    self.EFMorGFM = self.EFM or self.GFM
                                    self.HMRC = self.validationType == "HMRC"
                                    self.SBRNL = self.validationType == "SBR.NL"
                                for pluginXbrlMethod in pluginClassMethods("DisclosureSystem.Types"):
                                    for typeName, typeTestVariable in pluginXbrlMethod(self):
                                        setattr(self, typeTestVariable, self.validationType == typeName)
                                self.validateFileText = dsElt.get("validateFileText") == "true"
                                if dsElt.get("allowedExternalHrefPattern"):
                                    self.allowedExternalHrefPattern = re.compile(dsElt.get("allowedExternalHrefPattern"))
                                self.blockDisallowedReferences = dsElt.get("blockDisallowedReferences") == "true"
                                try:
                                    self.maxSubmissionSubdirectoryEntryNesting = int(dsElt.get("maxSubmissionSubdirectoryEntryNesting"))
                                except (ValueError, TypeError):
                                    self.maxSubmissionSubdirectoryEntryNesting = 0
                                self.defaultXmlLang = dsElt.get("defaultXmlLang")
                                if dsElt.get("defaultXmlEncoding", default=None) is not None: # don't reset from utf-8 unless supplied with a value
                                    self.defaultXmlEncoding = dsElt.get("defaultXmlEncoding") # may be an empty string
                                self.xmlLangPattern = compileAttrPattern(dsElt,"xmlLangPattern")
                                self.defaultLanguage = dsElt.get("defaultLanguage")
                                self.standardTaxonomiesUrl = self.modelManager.cntlr.webCache.normalizeUrl(
                                                 dsElt.get("standardTaxonomiesUrl"),
                                                 url)
                                if dsElt.get("mappingsUrl"):
                                    self.mappingsUrl = self.modelManager.cntlr.webCache.normalizeUrl(
                                                 dsElt.get("mappingsUrl"),
                                                 url)
                                if dsElt.get("utrUrl"): # may be mapped by mappingsUrl entries, see below
                                    self.utrUrl = self.modelManager.cntlr.webCache.normalizeUrl(
                                                 dsElt.get("utrUrl"),
                                                 url)
                                self.identifierSchemePattern = compileAttrPattern(dsElt,"identifierSchemePattern")
                                self.identifierValuePattern = compileAttrPattern(dsElt,"identifierValuePattern")
                                self.identifierValueName = dsElt.get("identifierValueName")
                                self.contextElement = dsElt.get("contextElement")
                                self.roleDefinitionPattern = compileAttrPattern(dsElt,"roleDefinitionPattern")
                                self.labelCheckPattern = compileAttrPattern(dsElt,"labelCheckPattern", re.DOTALL)
                                self.labelTrimPattern = compileAttrPattern(dsElt,"labelTrimPattern", re.DOTALL)
                                self.deiNamespacePattern = compileAttrPattern(dsElt,"deiNamespacePattern")
                                self.deiAmendmentFlagElement = dsElt.get("deiAmendmentFlagElement")
                                self.deiCurrentFiscalYearEndDateElement = dsElt.get("deiCurrentFiscalYearEndDateElement")
                                self.deiDocumentFiscalYearFocusElement = dsElt.get("deiDocumentFiscalYearFocusElement")
                                self.deiDocumentPeriodEndDateElement = dsElt.get("deiDocumentPeriodEndDateElement")
                                self.deiFilerIdentifierElement = dsElt.get("deiFilerIdentifierElement")
                                self.deiFilerNameElement = dsElt.get("deiFilerNameElement")
                                self.logLevelFilter = dsElt.get("logLevelFilter")
                                self.logCodeFilter = dsElt.get("logCodeFilter")
                                self.standardTaxonomyDatabase = dsElt.get("standardTaxonomyDatabase")
                                self.standardTaxonomyUrlPattern = compileAttrPattern(dsElt, "standardTaxonomyUrlPattern")

                                self.selection = self.name
                                isSelected = True
                                result = True
                                break
                    if isSelected:
                        break
            self.loadMappings()
            self.utrUrl = self.mappedUrl(self.utrUrl) # utr may be mapped, change to its mapped entry
            self.loadStandardTaxonomiesDict()
            self.utrTypeEntries = None # clear any prior loaded entries
            # set log level filters (including resetting prior disclosure systems values if no such filter)
            self.modelManager.cntlr.setLogLevelFilter(self.logLevelFilter)  # None or "" clears out prior filter if any
            self.modelManager.cntlr.setLogCodeFilter(self.logCodeFilter)
            if result:
                status = _("loaded")
            else:
                status = _("unable to load disclosure system {}").format(name)
                self.modelManager.cntlr.addToLog(_("Disclosure System \"%(name)s\" not recognized (a plug-in may be needed)."),
                                                 messageCode="arelle:disclosureSystemName", 
                                                 messageArgs={"name": name}, level=logging.ERROR)
                
        except (EnvironmentError,
                etree.LxmlError) as err:
            status = _("exception during loading")
            result = False
            self.modelManager.cntlr.addToLog(_("Disclosure System \"%(name)s\" loading error: %(error)s"),
                                             messageCode="arelle:disclosureSystemLoadingError", 
                                             messageArgs={"error": str(err), "name": name}, level=logging.ERROR)
            etree.clear_error_log()
        self.modelManager.cntlr.showStatus(_("Disclosure system and mappings {0}: {1}").format(status,name), 3500)
        return result
Exemple #49
0
 def select(self, name):
     self.clear()
     status = _("loading disclosure system and mappings")
     try:
         if name:
             xmldoc = etree.parse(self.url)
             for dsElt in xmldoc.iter(tag="DisclosureSystem"):
                 namesStr = dsElt.get("names")
                 if namesStr:
                     names = namesStr.split("|")
                     if name in names:
                         self.names = names
                         self.name = self.names[0]
                         self.validationType = dsElt.get("validationType")
                         self.EFM = self.validationType == "EFM"
                         self.GFM = self.validationType == "GFM"
                         self.EFMorGFM = self.EFM or self.GFM
                         self.HMRC = self.validationType == "HMRC"
                         self.SBRNL = self.validationType == "SBR-NL"
                         self.validateFileText = dsElt.get("validateFileText") == "true"
                         self.blockDisallowedReferences = dsElt.get("blockDisallowedReferences") == "true"
                         try:
                             self.maxSubmissionSubdirectoryEntryNesting = int(dsElt.get("maxSubmissionSubdirectoryEntryNesting"))
                         except (ValueError, TypeError):
                             self.maxSubmissionSubdirectoryEntryNesting = 0
                         self.defaultXmlLang = dsElt.get("defaultXmlLang")
                         self.xmlLangPattern = compileAttrPattern(dsElt,"xmlLangPattern")
                         self.defaultLanguage = dsElt.get("defaultLanguage")
                         self.standardTaxonomiesUrl = self.modelManager.cntlr.webCache.normalizeUrl(
                                          dsElt.get("standardTaxonomiesUrl"),
                                          self.url)
                         if dsElt.get("mappingsUrl"):
                             self.mappingsUrl = self.modelManager.cntlr.webCache.normalizeUrl(
                                          dsElt.get("mappingsUrl"),
                                          self.url)
                         if dsElt.get("utrUrl"): # may be mapped by mappingsUrl entries, see below
                             self.utrUrl = self.modelManager.cntlr.webCache.normalizeUrl(
                                          dsElt.get("utrUrl"),
                                          self.url)
                         self.identifierSchemePattern = compileAttrPattern(dsElt,"identifierSchemePattern")
                         self.identifierValuePattern = compileAttrPattern(dsElt,"identifierValuePattern")
                         self.identifierValueName = dsElt.get("identifierValueName")
                         self.contextElement = dsElt.get("contextElement")
                         self.roleDefinitionPattern = compileAttrPattern(dsElt,"roleDefinitionPattern")
                         self.labelCheckPattern = compileAttrPattern(dsElt,"labelCheckPattern", re.DOTALL)
                         self.labelTrimPattern = compileAttrPattern(dsElt,"labelTrimPattern", re.DOTALL)
                         self.deiNamespacePattern = compileAttrPattern(dsElt,"deiNamespacePattern")
                         self.deiAmendmentFlagElement = dsElt.get("deiAmendmentFlagElement")
                         self.deiCurrentFiscalYearEndDateElement = dsElt.get("deiCurrentFiscalYearEndDateElement")
                         self.deiDocumentFiscalYearFocusElement = dsElt.get("deiDocumentFiscalYearFocusElement")
                         self.deiDocumentPeriodEndDateElement = dsElt.get("deiDocumentPeriodEndDateElement")
                         self.deiFilerIdentifierElement = dsElt.get("deiFilerIdentifierElement")
                         self.deiFilerNameElement = dsElt.get("deiFilerNameElement")
                         self.logLevelFilter = dsElt.get("logLevelFilter")
                         self.logCodeFilter = dsElt.get("logCodeFilter")
                         self.selection = self.name
                         break
         self.loadMappings()
         self.utrUrl = self.mappedUrl(self.utrUrl) # utr may be mapped, change to its mapped entry
         self.loadStandardTaxonomiesDict()
         self.utrTypeEntries = None # clear any prior loaded entries
         # set log level filters (including resetting prior disclosure systems values if no such filter)
         self.modelManager.cntlr.setLogLevelFilter(self.logLevelFilter)  # None or "" clears out prior filter if any
         self.modelManager.cntlr.setLogCodeFilter(self.logCodeFilter)
         status = _("loaded")
         result = True
     except (EnvironmentError,
             etree.LxmlError) as err:
         status = _("exception during loading")
         result = False
         self.modelManager.cntlr.addToLog("disclosuresystems.xml: import error: {0}".format(err))
         etree.clear_error_log()
     self.modelManager.cntlr.showStatus(_("Disclosure system and mappings {0}: {1}").format(status,name), 3500)
     return result
Exemple #50
0
    def loadStandardTaxonomiesDict(self):
        if self.selection:
            self.standardTaxonomiesDict = defaultdict(set)
            self.familyHrefs = defaultdict(set)
            self.standardLocalHrefs = defaultdict(set)
            self.standardAuthorities = set()
            self.standardPrefixes = {}
            if not self.standardTaxonomiesUrl:
                return
            basename = os.path.basename(self.standardTaxonomiesUrl)
            self.modelManager.cntlr.showStatus(_("parsing {0}").format(basename))
            file = None
            try:
                from arelle.FileSource import openXmlFileStream
                for filepath in (self.standardTaxonomiesUrl, 
                                 os.path.join(self.modelManager.cntlr.configDir,"xbrlschemafiles.xml")):
                    file = openXmlFileStream(self.modelManager.cntlr, filepath, stripDeclaration=True)[0]
                    xmldoc = etree.parse(file)
                    file.close()
                    for locElt in xmldoc.iter(tag="Loc"):
                        href = None
                        localHref = None
                        namespaceUri = None
                        prefix = None
                        attType = None
                        family = None
                        elements = None
                        version = None
                        for childElt in locElt.iterchildren():
                            ln = childElt.tag
                            value = childElt.text.strip()
                            if ln == "Href":
                                href = value
                            elif ln == "LocalHref":
                                localHref = value
                            elif ln == "Namespace":
                                namespaceUri = value
                            elif ln == "Prefix":
                                prefix = value
                            elif ln == "AttType":
                                attType = value
                            elif ln == "Family":
                                family = value
                            elif ln == "Elements":
                                elements = value
                            elif ln == "Version":
                                version = value
                        if href:
                            if namespaceUri and (attType == "SCH" or attType == "ENT"):
                                self.standardTaxonomiesDict[namespaceUri].add(href)
                                if localHref:
                                    self.standardLocalHrefs[namespaceUri].add(localHref)
                                authority = UrlUtil.authority(namespaceUri)
                                self.standardAuthorities.add(authority)
                                if family == "BASE":
                                    self.baseTaxonomyNamespaces.add(namespaceUri)
                                if prefix:
                                    self.standardPrefixes[namespaceUri] = prefix
                            if href not in self.standardTaxonomiesDict:
                                self.standardTaxonomiesDict[href] = "Allowed" + attType
                            if family:
                                self.familyHrefs[family].add(ErxlLoc(family, version, href, attType, elements, namespaceUri))
                        elif attType == "SCH" and family == "BASE":
                            self.baseTaxonomyNamespaces.add(namespaceUri)

            except (EnvironmentError,
                    etree.LxmlError) as err:
                self.modelManager.cntlr.addToLog("{0}: import error: {1}".format(basename,err))
                etree.clear_error_log()
                if file:
                    file.close()
Exemple #51
0
    def parse(self):
        """
        Traverses through the XML document and parses the data, applying it to the
        model specified in the :py:class:`~feedmapper.models.Mapping`.
        """
        self.mapping.parse_attempted = now()
        try:
            tree = etree.parse(self.data_source)
            root = tree.getroot()

            model_mappings = self.mapping.data_map['models']
            purge_filter = self.mapping.data_map.get('purge_filter')
            for model_string, configuration in model_mappings.items():
                baseurl = configuration.get('baseurl',None)
                if not self.validate_model_format(model_string):
                    raise ValueError("Invalid model format in JSON mapping: %s" % model_string)
                identifier = configuration.get('identifier')
                identifier_fieldname = configuration.get('identifier_fieldname')
                if not identifier and not self.mapping.purge:
                    raise UserWarning("Purging is off and the JSON mapping doesn't supply an identifier.")
                model = get_model(*model_string.split('.'))
                node_path = configuration['nodePath'].replace('.', '/')
                fields = configuration['fields']
                nodes = root.xpath(node_path, namespaces=self.nsmap)

                if self.mapping.purge:
                    # remove existing items
                    existing_items = model.objects.all()
                    if purge_filter:
                        filter_kwargs = self.generate_filter_kwargs(purge_filter)
                        if filter_kwargs:
                            existing_items = existing_items.filter(**filter_kwargs)
                    existing_items.delete()

                for node in nodes:
                    if self.mapping.purge:
                        instance = model()
                    else:
                        # purge is turned off, retrieve an existing instance
                        identifier_value = node.find(identifier, namespaces=self.nsmap).text.strip()
                        try:
                            kw = {identifier_fieldname:identifier_value}
                            instance = model.objects.get(**kw)
                        except model.DoesNotExist:
                            instance = model()
                    for field, target in fields.items():
                        extra_args = {}
                        if "extra" in target: extra_args.update(**target['extra'])
                        if baseurl: extra_args['baseurl'] = baseurl
                        if isinstance(target, basestring):
                            if target == "feed_label":
                                # provides the feed label
                                value = self.mapping.label
                            else:
                                # maps one model field to one feed node
                                value = self.get_value(node, target)
                        elif isinstance(target, list):
                            # maps one model field to multiple feed nodes
                            value = self.join_fields(node, target)
                        elif isinstance(target, dict):
                            value = None
                            if 'transformer' in target:
                                # maps one model field to a transformer method
                                transformer = getattr(instance, target['transformer'])
                                text_list = [self.get_value(node, target_field) for target_field in target['fields']]
                                value = transformer(*text_list,**extra_args)
                            if 'default' in target and not value:
                                # maps one model field to a default value
                                value = target['default']
                        if isinstance(value, basestring): value = value.strip()
                        setattr(instance, field, value)
                    instance.save()
            self.mapping.parse_succeeded = True
            self.mapping.parse_log = ""
        except etree.Error as e:
            self.mapping.parse_succeeded = False
            self.mapping.parse_log = str(e.error_log)
        except IOError as e:
            self.mapping.parse_succeeded = False
            self.mapping.parse_log = e.args[0]
        # clear the lxml error log so errors don't compound
        etree.clear_error_log()
        self.mapping.save()

        # notify the authorities if a failure occured
        if not self.mapping.parse_succeeded and self.mapping.notification_recipients:
            self.notify_failure()
    def parse(self):
        """
        Traverses through the XML document and parses the data, applying it to the
        model specified in the :py:class:`~feedmapper.models.Mapping`.
        """
        self.mapping.parse_attempted = datetime.now()
        try:
            tree = etree.parse(self.data_source)
            root = tree.getroot()

            model_mappings = self.mapping.data_map['models']
            purge_filter = self.mapping.data_map.get('purge_filter')
            for model_string, configuration in model_mappings.items():
                if not self.validate_model_format(model_string):
                    raise ValueError("Invalid model format in JSON mapping: %s" % model_string)
                identifier = configuration.get('identifier')

                # allow transformation of identifiers
                identifier_transformer = None
                if isinstance(identifier, dict):
                    identifier_transformer = identifier["transformer"]
                    identifier = identifier["field"]

                if not identifier and not self.mapping.purge:
                    raise UserWarning("Purging is off and the JSON mapping doesn't supply an identifier.")
                model = get_model(*model_string.split('.'))
                node_path = configuration['nodePath'].replace('.', '/')
                fields = configuration['fields']
                nodes = root.xpath(node_path, namespaces=self.nsmap)

                if self.mapping.purge:
                    # remove existing items
                    existing_items = model.objects.all()
                    if purge_filter:
                        filter_kwargs = self.generate_filter_kwargs(purge_filter)
                        if filter_kwargs:
                            existing_items = existing_items.filter(**filter_kwargs)
                    existing_items.delete()

                for node in nodes:
                    if self.mapping.purge:
                        instance = model()
                    else:
                        # purge is turned off, retrieve an existing instance
                        identifier_value = node.find(identifier, namespaces=self.nsmap).text
                        if identifier_transformer:
                            identifier_value = getattr(model, identifier_transformer)(identifier_value, parser=self)

                        kwargs = {identifier: identifier_value}
                        try:
                            instance = model.objects.get(**kwargs)
                        except model.DoesNotExist:
                            instance = model(**kwargs)

                    many_to_many = {}
                    for field, target in fields.items():

                        transformer = getattr(instance, "parse_%s" % field, None)

                        if not transformer:
                            if isinstance(target, basestring):
                                # maps one model field to one feed node
                                value = self.get_value(node, target)
                            elif isinstance(target, list):
                                # maps one model field to multiple feed nodes
                                value = self.join_fields(node, target)

                        elif transformer or isinstance(target, dict):
                            # we may have a transformer (parse_fieldname method) or an extended definition
                            value = None
                            if 'transformer' in target:
                                # maps one model field to a transformer method
                                transformer = getattr(instance, target['transformer'])
                            else:
                                # we've got a single field definition with an implicit transformer
                                target = {"fields": [target]}

                            if transformer:
                                transformer_args = []

                                field_is_m2m = False
                                if len(target["fields"]) == 1 and target["fields"][0].endswith("*"):
                                    # we've hit a many2many relation
                                    transformer_args = self.get_value(node, target["fields"][0][:-1], as_text=False)
                                    field_is_m2m = True

                                else:
                                    for target_field in target["fields"]:

                                        if target_field.endswith("*"):
                                            raise ValueError(u"M2m fields can only contain one target field")
                                        else:
                                            transformer_args.append(self.get_value(node, target_field))

                                if field_is_m2m:
                                    many_to_many[field] = (
                                        transformer, transformer_args, {"parser": self}
                                    )
                                    continue
                                else:
                                    value = transformer(*transformer_args, parser=self)

                            if 'default' in target and not value:
                                # maps one model field to a default value
                                value = target['default']
                        setattr(instance, field, value)
                    instance.save()

                    # handle m2m
                    for transformer, args, kwargs in many_to_many.values():
                        transformer(*args, **kwargs)

            self.mapping.parse_succeeded = True
            self.mapping.parse_log = ""
        except etree.Error as e:
            self.mapping.parse_succeeded = False
            self.mapping.parse_log = str(e.error_log)
        except IOError as e:
            self.mapping.parse_succeeded = False
            self.mapping.parse_log = e.args[0]
        # clear the lxml error log so errors don't compound
        etree.clear_error_log()
        self.mapping.save()

        # notify the authorities if a failure occured
        if not self.mapping.parse_succeeded and self.mapping.notification_recipients:
            self.notify_failure()
Exemple #53
0
    def select(self, name):
        self.clear()
        if not name:
            return True  # nothing to load
        result = False
        status = _("loading disclosure system and mappings")
        try:
            if name:
                isSelected = False
                for url in self.urls:  # urls in revese order, last plugin first
                    xmldoc = etree.parse(url)
                    for dsElt in xmldoc.iter(tag="DisclosureSystem"):
                        namesStr = dsElt.get("names")
                        if namesStr:
                            names = namesStr.split("|")
                            if name in names:
                                self.names = names
                                self.name = self.names[0]
                                self.validationType = dsElt.get(
                                    "validationType")
                                self.exclusiveTypesPattern = compileAttrPattern(
                                    dsElt,
                                    "exclusiveTypesPattern",
                                    patternIfNoAttr=None)
                                if self.validationType not in self.pluginTypes:
                                    self.EFM = self.validationType == "EFM"
                                    self.GFM = self.validationType == "GFM"
                                    self.EFMorGFM = self.EFM or self.GFM
                                    self.HMRC = self.validationType == "HMRC"
                                    self.SBRNL = self.validationType == "SBR.NL"
                                for pluginXbrlMethod in pluginClassMethods(
                                        "DisclosureSystem.Types"):
                                    for typeName, typeTestVariable in pluginXbrlMethod(
                                            self):
                                        setattr(
                                            self, typeTestVariable,
                                            self.validationType == typeName)
                                self.validateFileText = dsElt.get(
                                    "validateFileText") == "true"
                                if dsElt.get("allowedExternalHrefPattern"):
                                    self.allowedExternalHrefPattern = re.compile(
                                        dsElt.get(
                                            "allowedExternalHrefPattern"))
                                self.blockDisallowedReferences = dsElt.get(
                                    "blockDisallowedReferences") == "true"
                                try:
                                    self.maxSubmissionSubdirectoryEntryNesting = int(
                                        dsElt.get(
                                            "maxSubmissionSubdirectoryEntryNesting"
                                        ))
                                except (ValueError, TypeError):
                                    self.maxSubmissionSubdirectoryEntryNesting = 0
                                self.defaultXmlLang = dsElt.get(
                                    "defaultXmlLang")
                                if dsElt.get(
                                        "defaultXmlEncoding", default=None
                                ) is not None:  # don't reset from utf-8 unless supplied with a value
                                    self.defaultXmlEncoding = dsElt.get(
                                        "defaultXmlEncoding"
                                    )  # may be an empty string
                                self.xmlLangPattern = compileAttrPattern(
                                    dsElt, "xmlLangPattern")
                                self.xmlLangIsInheritable = dsElt.get(
                                    "xmlLangIsInheritable", "true") == "true"
                                self.defaultLanguage = dsElt.get(
                                    "defaultLanguage")
                                if dsElt.get("standardTaxonomiesUrl"):
                                    self.standardTaxonomiesUrl = self.modelManager.cntlr.webCache.normalizeUrl(
                                        dsElt.get("standardTaxonomiesUrl"),
                                        url)
                                if dsElt.get("mappingsUrl"):
                                    self.mappingsUrl = self.modelManager.cntlr.webCache.normalizeUrl(
                                        dsElt.get("mappingsUrl"), url)
                                if dsElt.get(
                                        "utrUrl"
                                ):  # may be mapped by mappingsUrl entries, see below
                                    self.utrUrl = self.modelManager.cntlr.webCache.normalizeUrl(
                                        dsElt.get("utrUrl"), url)
                                self.identifierSchemePattern = compileAttrPattern(
                                    dsElt, "identifierSchemePattern")
                                self.identifierValuePattern = compileAttrPattern(
                                    dsElt, "identifierValuePattern")
                                self.identifierValueName = dsElt.get(
                                    "identifierValueName")
                                self.contextElement = dsElt.get(
                                    "contextElement")
                                self.roleDefinitionPattern = compileAttrPattern(
                                    dsElt, "roleDefinitionPattern")
                                self.labelCheckPattern = compileAttrPattern(
                                    dsElt, "labelCheckPattern", re.DOTALL)
                                self.labelTrimPattern = compileAttrPattern(
                                    dsElt, "labelTrimPattern", re.DOTALL)
                                self.deiNamespacePattern = compileAttrPattern(
                                    dsElt, "deiNamespacePattern")
                                self.deiAmendmentFlagElement = dsElt.get(
                                    "deiAmendmentFlagElement")
                                self.deiCurrentFiscalYearEndDateElement = dsElt.get(
                                    "deiCurrentFiscalYearEndDateElement")
                                self.deiDocumentFiscalYearFocusElement = dsElt.get(
                                    "deiDocumentFiscalYearFocusElement")
                                self.deiDocumentPeriodEndDateElement = dsElt.get(
                                    "deiDocumentPeriodEndDateElement")
                                self.deiFilerIdentifierElement = dsElt.get(
                                    "deiFilerIdentifierElement")
                                self.deiFilerNameElement = dsElt.get(
                                    "deiFilerNameElement")
                                self.logLevelFilter = dsElt.get(
                                    "logLevelFilter")
                                self.logCodeFilter = dsElt.get("logCodeFilter")
                                self.standardTaxonomyDatabase = dsElt.get(
                                    "standardTaxonomyDatabase")
                                self.standardTaxonomyUrlPattern = compileAttrPattern(
                                    dsElt, "standardTaxonomyUrlPattern")

                                self.selection = self.name
                                isSelected = True
                                result = True
                                break
                    if isSelected:
                        break
            self.loadMappings()
            self.utrUrl = self.mappedUrl(
                self.utrUrl)  # utr may be mapped, change to its mapped entry
            self.loadStandardTaxonomiesDict()
            self.utrTypeEntries = None  # clear any prior loaded entries
            # set log level filters (including resetting prior disclosure systems values if no such filter)
            self.modelManager.cntlr.setLogLevelFilter(
                self.logLevelFilter
            )  # None or "" clears out prior filter if any
            self.modelManager.cntlr.setLogCodeFilter(self.logCodeFilter)
            if result:
                status = _("loaded")
            else:
                status = _("unable to load disclosure system {}").format(name)
                self.modelManager.cntlr.addToLog(
                    _("Disclosure System \"%(name)s\" not recognized (a plug-in may be needed)."
                      ),
                    messageCode="arelle:disclosureSystemName",
                    messageArgs={"name": name},
                    level=logging.ERROR)

        except (EnvironmentError, etree.LxmlError) as err:
            status = _("exception during loading")
            result = False
            self.modelManager.cntlr.addToLog(
                _("Disclosure System \"%(name)s\" loading error: %(error)s"),
                messageCode="arelle:disclosureSystemLoadingError",
                messageArgs={
                    "error": str(err),
                    "name": name
                },
                level=logging.ERROR)
            etree.clear_error_log()
        self.modelManager.cntlr.showStatus(
            _("Disclosure system and mappings {0}: {1}").format(status, name),
            3500)
        return result
Exemple #54
0
    def loadStandardTaxonomiesDict(self):
        if self.selection:
            self.standardTaxonomiesDict = defaultdict(set)
            self.familyHrefs = defaultdict(set)
            self.standardLocalHrefs = defaultdict(set)
            self.standardAuthorities = set()
            self.standardPrefixes = {}
            if not self.standardTaxonomiesUrl:
                return
            basename = os.path.basename(self.standardTaxonomiesUrl)
            self.modelManager.cntlr.showStatus(
                _("parsing {0}").format(basename))
            try:
                from arelle.FileSource import openXmlFileStream
                for filepath in (self.standardTaxonomiesUrl,
                                 os.path.join(
                                     self.modelManager.cntlr.configDir,
                                     "xbrlschemafiles.xml")):
                    xmldoc = etree.parse(
                        filepath
                    )  # must open with file path for xinclude to know base of file
                    xmldoc.xinclude(
                    )  # to include elements below root use xpointer(/*/*)
                    for erxlElt in xmldoc.iter(tag="Erxl"):
                        v = erxlElt.get("version")
                        if v and re.match(r"[0-9]+([.][0-9]+)*$", v):
                            vSplit = v.split('.')  # at least 3 digits always!
                            self.version = tuple(
                                int(n) for n in vSplit) + tuple(
                                    0 for n in range(3 - len(vSplit)))
                        break
                    for locElt in xmldoc.iter(tag="Loc"):
                        href = None
                        localHref = None
                        namespaceUri = None
                        prefix = None
                        attType = None
                        family = None
                        elements = None
                        version = None
                        for childElt in locElt.iterchildren():
                            ln = childElt.tag
                            value = childElt.text.strip()
                            if ln == "Href":
                                href = value
                            elif ln == "LocalHref":
                                localHref = value
                            elif ln == "Namespace":
                                namespaceUri = value
                            elif ln == "Prefix":
                                prefix = value
                            elif ln == "AttType":
                                attType = value
                            elif ln == "Family":
                                family = value
                            elif ln == "Elements":
                                elements = value
                            elif ln == "Version":
                                version = value
                        if href:
                            if namespaceUri and (attType == "SCH"
                                                 or attType == "ENT"):
                                self.standardTaxonomiesDict[namespaceUri].add(
                                    href)
                                if localHref:
                                    self.standardLocalHrefs[namespaceUri].add(
                                        localHref)
                                authority = UrlUtil.authority(namespaceUri)
                                self.standardAuthorities.add(authority)
                                if family == "BASE":
                                    self.baseTaxonomyNamespaces.add(
                                        namespaceUri)
                                if prefix:
                                    self.standardPrefixes[
                                        namespaceUri] = prefix
                            if href not in self.standardTaxonomiesDict:
                                self.standardTaxonomiesDict[
                                    href] = "Allowed" + attType
                            if family:
                                self.familyHrefs[family].add(
                                    ErxlLoc(family, version, href, attType,
                                            elements, namespaceUri))
                        elif attType == "SCH" and family == "BASE":
                            self.baseTaxonomyNamespaces.add(namespaceUri)

            except (EnvironmentError, etree.LxmlError) as err:
                self.modelManager.cntlr.addToLog(
                    _("Disclosure System \"%(name)s\" import %(importFile)s, error: %(error)s"
                      ),
                    messageCode="arelle:disclosureSystemImportError",
                    messageArgs={
                        "error": str(err),
                        "name": self.name,
                        "importFile": basename
                    },
                    level=logging.ERROR)
                etree.clear_error_log()
Exemple #55
0
def loadUtr(
    modelXbrl
):  # Build a dictionary of item types that are constrained by the UTR
    modelManager = modelXbrl.modelManager
    modelManager.disclosureSystem.utrItemTypeEntries = utrItemTypeEntries = defaultdict(
        dict)
    # print('UTR LOADED FROM '+utrUrl);
    # skip status message as it hides prior activity during which this might have just obtained symbols
    # modelManager.cntlr.showStatus(_("Loading Unit Type Registry"))
    file = None
    try:
        from arelle.FileSource import openXmlFileStream
        # normalize any relative paths to config directory
        unitDupCheck = set()
        file = openXmlFileStream(modelManager.cntlr,
                                 modelManager.disclosureSystem.utrUrl,
                                 stripDeclaration=True)[0]
        xmldoc = etree.parse(file)
        for unitElt in xmldoc.iter(tag="{http://www.xbrl.org/2009/utr}unit"):
            u = UtrEntry()
            u.id = unitElt.get("id")
            u.unitId = unitElt.findtext("{http://www.xbrl.org/2009/utr}unitId")
            u.nsUnit = (
                unitElt.findtext("{http://www.xbrl.org/2009/utr}nsUnit")
                or None)  # None if empty entry
            u.itemType = unitElt.findtext(
                "{http://www.xbrl.org/2009/utr}itemType")
            u.nsItemType = unitElt.findtext(
                "{http://www.xbrl.org/2009/utr}nsItemType")
            u.numeratorItemType = unitElt.findtext(
                "{http://www.xbrl.org/2009/utr}numeratorItemType")
            u.nsNumeratorItemType = unitElt.findtext(
                "{http://www.xbrl.org/2009/utr}nsNumeratorItemType")
            u.denominatorItemType = unitElt.findtext(
                "{http://www.xbrl.org/2009/utr}denominatorItemType")
            u.nsDenominatorItemType = unitElt.findtext(
                "{http://www.xbrl.org/2009/utr}nsDenominatorItemType")
            u.isSimple = all(
                e is None
                for e in (u.numeratorItemType, u.nsNumeratorItemType,
                          u.denominatorItemType, u.nsDenominatorItemType))
            u.symbol = unitElt.findtext("{http://www.xbrl.org/2009/utr}symbol")
            u.status = unitElt.findtext("{http://www.xbrl.org/2009/utr}status")
            if u.status == "REC":
                # TO DO: This indexing scheme assumes that there are no name clashes in item types of the registry.
                (utrItemTypeEntries[u.itemType])[u.id] = u
            unitDupKey = (u.unitId, u.nsUnit, u.status)
            if unitDupKey in unitDupCheck:
                modelXbrl.error(
                    "arelleUtrLoader:entryDuplication",
                    "Unit Type Registry entry duplication: id %(id)s unit %(unitId)s nsUnit %(nsUnit)s status %(status)s",
                    modelObject=modelXbrl,
                    id=u.id,
                    unitId=u.unitId,
                    nsUnit=u.nsUnit,
                    status=u.status)
            unitDupCheck.add(unitDupKey)
            if u.isSimple:
                if not u.itemType:
                    modelXbrl.error(
                        "arelleUtrLoader:simpleDefMissingField",
                        "Unit Type Registry simple unit definition missing item type: id %(id)s unit %(unitId)s nsUnit %(nsUnit)s status %(status)s",
                        modelObject=modelXbrl,
                        id=u.id,
                        unitId=u.unitId,
                        nsUnit=u.nsUnit,
                        status=u.status)
                if u.numeratorItemType or u.denominatorItemType or u.nsNumeratorItemType or u.nsDenominatorItemType:
                    modelXbrl.error(
                        "arelleUtrLoader",
                        "Unit Type Registry simple unit definition may not have complex fields: id %(id)s unit %(unitId)s nsUnit %(nsUnit)s status %(status)s",
                        modelObject=modelXbrl,
                        id=u.id,
                        unitId=u.unitId,
                        nsUnit=u.nsUnit,
                        status=u.status)
            else:
                if u.symbol:
                    modelXbrl.error(
                        "arelleUtrLoader:complexDefSymbol",
                        "Unit Type Registry complex unit definition may not have symbol: id %(id)s unit %(unitId)s nsUnit %(nsUnit)s status %(status)s",
                        modelObject=modelXbrl,
                        id=u.id,
                        unitId=u.unitId,
                        nsUnit=u.nsUnit,
                        status=u.status)
                if not u.numeratorItemType or not u.denominatorItemType:
                    modelXbrl.error(
                        "arelleUtrLoader:complexDefMissingField",
                        "Unit Type Registry complex unit definition must have numerator and denominator fields: id %(id)s unit %(unitId)s nsUnit %(nsUnit)s status %(status)s",
                        modelObject=modelXbrl,
                        id=u.id,
                        unitId=u.unitId,
                        nsUnit=u.nsUnit,
                        status=u.status)
    except (EnvironmentError, etree.LxmlError) as err:
        modelManager.modelXbrl.error(
            "arelleUtrLoader:error",
            "Unit Type Registry Import error: %(error)s",
            modelObject=modelXbrl,
            error=err)
        etree.clear_error_log()
    if file:
        file.close()
    def run(self):

        docType = 'XML'

        etree.clear_error_log()

        try:

            # parse the XML for errors

            if self.source_file != '<unknown>':

                docSchema = self.getSchema(self.source_file, self.source_contents)
                xml = docSchema['xml']

                if docSchema['type'] != None:
                    docType = docSchema['type']

            else:
                xml = etree.fromstring(self.source_contents)

            # if the doc is a schema itself, parse it for schema errors

            try:
                if docType == "XSD":
                    etree.XMLSchema(xml)
                elif docType == "RelaxNG":
                    etree.RelaxNG(xml)

            except (etree.RelaxNGError, etree.XMLSchemaParseError) as e:
                for error in e.error_log:
                    self.addError(docType + " parsing error", error)
               
            except Exception as e:
                self.addError(docType + " parsing error", e)


            # parse XML comments in document for a reference to a schema

            try:

                (schemaRef, schemaLocation, commentLine) = self.lookForSchema(xml)
                
                if schemaRef != None:

                    try:
                        if schemaRef['type'] == "XSD":
                            schema = etree.XMLSchema(schemaRef['xml'])
                        elif schemaRef['type'] == "RelaxNG":
                            schema = etree.RelaxNG(schemaRef['xml'])

                        schema.assertValid(xml)

                    except (etree.DocumentInvalid, etree.RelaxNGValidateError, etree.XMLSchemaValidateError):
                        for error in schema.error_log:
                            self.addError(schemaRef['type'] + " validation error", error)

                    except (etree.RelaxNGError, etree.XMLSchemaParseError):
                        self.addError(schemaRef['type'] + " error", "Schema is invalid " + schemaLocation, commentLine)

                    except Exception as e:
                        self.addError(schemaRef['type'] + " error", e)

            except etree.XMLSyntaxError as e:
                self.addError("Schema error", "Unable to parse schema XML " + schemaLocation, commentLine)

            except Exception as e:
                self.addError("Schema error", e, commentLine)


        # handle XML parse errors

        except etree.XMLSyntaxError as e:
            for error in e.error_log:
                self.addError("XML parsing error", error)

        # ignore other exceptions
        
        except:
            pass

        self.clock.acquire()

        if not self.cancelled:
            self.idle_finish = GLib.idle_add(self.finish_in_idle)

        self.clock.release()
    def parse(self, doc, options):
        doc.diagnostics = []

        doc_type = 'XML'
        etree.clear_error_log()

        with open(doc.data_path) as f:
            source = f.read()

        try:
            # parse the XML for errors
            if os.path.isabs(doc.path):
                doc_schema = self.get_schema(doc.path, doc.path, source)
                xml = doc_schema['xml']

                if doc_schema['type'] != None:
                    doc_type = doc_schema['type']
            else:
                xml = etree.fromstring(source)

            # if the doc is a schema itself, parse it for schema errors
            try:
                if doc_type == "XSD":
                    etree.XMLSchema(xml)
                elif doc_type == "RelaxNG":
                    etree.RelaxNG(xml)

            except (etree.RelaxNGError, etree.XMLSchemaParseError) as e:
                for error in e.error_log:
                    doc.diagnostics.append(self.format_error(doc_type + " parsing error", error))

            except Exception as e:
                doc.diagnostics.append(self.format_error(doc_type + " parsing error", e))

            # parse XML comments in document for a reference to a schema
            try:
                (schema_ref, schema_location, comment_line) = self.look_for_schema(doc.path, xml)
                
                if schema_ref != None:
                    try:
                        if schema_ref['type'] == "XSD":
                            schema = etree.XMLSchema(schema_ref['xml'])
                        elif schema_ref['type'] == "RelaxNG":
                            schema = etree.RelaxNG(schema_ref['xml'])

                        schema.assertValid(xml)

                    except (etree.DocumentInvalid, etree.RelaxNGValidateError, etree.XMLSchemaValidateError):
                        for error in schema.error_log:
                            doc.diagnostics.append(self.format_error(schema_ref['type'] + " validation error", error))

                    except (etree.RelaxNGError, etree.XMLSchemaParseError):
                        doc.diagnostics.append(self.format_error(schema_ref['type'] + " error", "Schema is invalid " + schema_location, comment_line))

                    except Exception as e:
                        doc.diagnostics.append(self.format_error(schema_ref['type'] + " error", e))

            except etree.XMLSyntaxError as e:
                doc.diagnostics.append(self.format_error("Schema error", "Unable to parse schema XML " + schema_location, comment_line))

            except Exception as e:
                doc.diagnostics.append(self.format_error("Schema error", e, comment_line))

        # handle XML parse errors
        except etree.XMLSyntaxError as e:
            for error in e.error_log:
                doc.diagnostics.append(self.format_error("XML parsing error", error))

        # ignore other exceptions
        except:
            pass
Exemple #58
0
                raise PacemakerError("Cannot expand the Relax-NG schema")
            else:
                schema_f = tmp_f

        try:
            cib_elem = etree.fromstring(etree.tostring(new_cib_elem))
        except etree.Error, msg:
            raise PacemakerError("Failed to parse the CIB XML: " + str(msg))

        try:
            schema = etree.RelaxNG(file=schema_f)

        except etree.Error, msg:
            raise PacemakerError("Failed to parse the Relax-NG schema: " + str(msg))
        try:
            etree.clear_error_log()
        except:
            pass

        is_valid = schema.validate(cib_elem)
        if not is_valid:
            for error_entry in schema.error_log:
                detail_msg += error_entry.level_name + ": " + error_entry.message + "\n"

        if not self.is_local:
            try:
                delete_dir(os.path.dirname(tmp_f))
            except:
                pass

        return (is_valid, detail_msg)
Exemple #59
0
    def do_ConfirmReservation(self, elem, *args, **kw):
        log.debug("=============== XenBEEClient2BrokerProtocol::do_ConfirmReservation")
        try:
            confirm = message.MessageBuilder.from_xml(elem.getroottree())
        except Exception, e:
            return message.BrokerError(confirm.uuid(), errcode.ILLEGAL_REQUEST, str(e))
        ticket = TicketStore.getInstance().lookup(confirm.ticket())
        if ticket is None:
            return message.BrokerError(confirm.uuid(), errcode.TICKET_INVALID, confirm.ticket())
        log.debug("got confirmation with ticket %s" % confirm.ticket())

        xbed = XBEDaemon.getInstance()
        jsdl_doc = jsdl.JsdlDocument(schema_map=xbed.schema_map)
        try:
            if hasattr(etree, 'clearErrorLog'): etree.clearErrorLog()
            if hasattr(etree, 'clear_error_log'): etree.clear_error_log()
            parsed_jsdl = jsdl_doc.parse(confirm.jsdl())
        except etree.DocumentInvalid, e:
            log.info("got invalid document: %s" % str(e.error_log))
#            TaskManager.getInstance().removeTask(ticket.task)
#            del ticket.task
#            TicketStore.getInstance().release(ticket)
            return message.BrokerError(confirm.uuid(), errcode.ILLEGAL_REQUEST, "JSDL document is invalid: %s" % (e.error_log,))

        try:
            # does the job have our InstanceDescription element?
            # otherwise drop the job
            jsdl_doc.lookup_path(
                "JobDefinition/JobDescription/Resources/"+
                "InstanceDefinition/InstanceDescription")
        except Exception, e: