def validateFootnote(modelXbrl, footnote): #handler = TextBlockHandler(modelXbrl) loadDTD(modelXbrl) checkedGraphicsFiles = set() # only check any graphics file reference once per footnote try: footnoteHtml = XML("<body/>") copyHtml(footnote, footnoteHtml) if not edbodyDTD.validate( footnoteHtml ): modelXbrl.error("EFM.6.05.34.dtdError", _("Footnote %(xlinkLabel)s causes the XML error %(error)s"), modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"), error=', '.join(e.message for e in edbodyDTD.error_log.filter_from_errors())) for elt in footnoteHtml.iter(): eltTag = elt.tag for attrTag, attrValue in elt.items(): if ((attrTag == "href" and eltTag == "a") or (attrTag == "src" and eltTag == "img")): if "javascript:" in attrValue: modelXbrl.error("EFM.6.05.34.activeContent", _("Footnote %(xlinkLabel)s has javascript in '%(attribute)s' for <%(element)s>"), modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"), attribute=attrTag, element=eltTag) elif attrValue.startswith("http://www.sec.gov/Archives/edgar/data/") and eltTag == "a": pass elif "http:" in attrValue or "https:" in attrValue or "ftp:" in attrValue: modelXbrl.error("EFM.6.05.34.externalReference", _("Footnote %(xlinkLabel)s has an invalid external reference in '%(attribute)s' for <%(element)s>: %(value)s"), modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"), attribute=attrTag, element=eltTag, value=attrValue) if attrTag == "src" and attrValue not in checkedGraphicsFiles: if attrValue.lower()[-4:] not in ('.jpg', '.gif'): modelXbrl.error("EFM.6.05.34.graphicFileType", _("Footnote %(xlinkLabel)s references a graphics file which isn't .gif or .jpg '%(attribute)s' for <%(element)s>"), modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"), attribute=attrValue, element=eltTag) else: # test file contents try: if validateGraphicFile(footnote, attrValue) != attrValue.lower()[-3:]: modelXbrl.error("EFM.6.05.34.graphicFileContent", _("Footnote %(xlinkLabel)s references a graphics file which doesn't have expected content '%(attribute)s' for <%(element)s>"), modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"), attribute=attrValue, element=eltTag) except IOError as err: modelXbrl.error("EFM.6.05.34.graphicFileError", _("Footnote %(xlinkLabel)s references a graphics file which isn't openable '%(attribute)s' for <%(element)s>, error: %(error)s"), modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"), attribute=attrValue, element=eltTag, error=err) checkedGraphicsFiles.add(attrValue) if eltTag == "table" and any(a is not None for a in elt.iterancestors("table")): modelXbrl.error("EFM.6.05.34.nestedTable", _("Footnote %(xlinkLabel)s has nested <table> elements."), modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label")) except (XMLSyntaxError, UnicodeDecodeError) as err: #if not err.endswith("undefined entity"): modelXbrl.error("EFM.6.05.34", _("Footnote %(xlinkLabel)s causes the XML error %(error)s"), modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"), error=edbodyDTD.error_log.filter_from_errors())
def validateFootnote(modelXbrl, footnote): #handler = TextBlockHandler(modelXbrl) loadDTD(modelXbrl) try: footnoteHtml = XML("<body/>") copyHtml(footnote, footnoteHtml) if not edbodyDTD.validate(footnoteHtml): modelXbrl.error( "EFM.6.05.34", _("Footnote %(xlinkLabel)s causes the XML error %(error)s"), modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"), error=', '.join( e.message for e in edbodyDTD.error_log.filter_from_errors())) for elt in footnoteHtml.iter(): eltTag = elt.tag for attrTag, attrValue in elt.items(): if ((attrTag == "href" and eltTag == "a") or (attrTag == "src" and eltTag == "img")): if "javascript:" in attrValue: modelXbrl.error( "EFM.6.05.34", _("Footnote %(xlinkLabel)s has javascript in '%(attribute)s' for <%(element)s>" ), modelObject=footnote, xlinkLabel=footnote.get( "{http://www.w3.org/1999/xlink}label"), attribute=attrTag, element=eltTag) elif attrValue.startswith( "http://www.sec.gov/Archives/edgar/data/" ) and eltTag == "a": pass elif "http:" in attrValue or "https:" in attrValue or "ftp:" in attrValue: modelXbrl.error( "EFM.6.05.34", _("Footnote %(xlinkLabel)s has an invalid external reference in '%(attribute)s' for <%(element)s>: %(value)s" ), modelObject=footnote, xlinkLabel=footnote.get( "{http://www.w3.org/1999/xlink}label"), attribute=attrTag, element=eltTag, value=attrValue) except (XMLSyntaxError, UnicodeDecodeError) as err: #if not err.endswith("undefined entity"): modelXbrl.error( "EFM.6.05.34", _("Footnote %(xlinkLabel)s causes the XML error %(error)s"), modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"), error=edbodyDTD.error_log.filter_from_errors())
def validateFootnote(modelXbrl, footnote): # handler = TextBlockHandler(modelXbrl) loadDTD(modelXbrl) try: footnoteHtml = XML("<body/>") copyHtml(footnote, footnoteHtml) if not edbodyDTD.validate(footnoteHtml): modelXbrl.error( "EFM.6.05.34", _("Footnote %(xlinkLabel)s causes the XML error %(error)s"), modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"), error=", ".join(e.message for e in edbodyDTD.error_log.filter_from_errors()), ) for elt in footnoteHtml.iter(): eltTag = elt.tag for attrTag, attrValue in elt.items(): if (attrTag == "href" and eltTag == "a") or (attrTag == "src" and eltTag == "img"): if "javascript:" in attrValue: modelXbrl.error( "EFM.6.05.34", _("Footnote %(xlinkLabel)s has javascript in '%(attribute)s' for <%(element)s>"), modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"), attribute=attrTag, element=eltTag, ) elif attrValue.startswith("http://www.sec.gov/Archives/edgar/data/") and eltTag == "a": pass elif "http:" in attrValue or "https:" in attrValue or "ftp:" in attrValue: modelXbrl.error( "EFM.6.05.34", _( "Footnote %(xlinkLabel)s has an invalid external reference in '%(attribute)s' for <%(element)s>: %(value)s" ), modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"), attribute=attrTag, element=eltTag, value=attrValue, ) except (XMLSyntaxError, UnicodeDecodeError) as err: # if not err.endswith("undefined entity"): modelXbrl.error( "EFM.6.05.34", _("Footnote %(xlinkLabel)s causes the XML error %(error)s"), modelObject=footnote, xlinkLabel=footnote.get("{http://www.w3.org/1999/xlink}label"), error=edbodyDTD.error_log.filter_from_errors(), )
def validateTextBlockFacts(modelXbrl): #handler = TextBlockHandler(modelXbrl) loadDTD(modelXbrl) checkedGraphicsFiles = set() # only check any graphics file reference once per fact for f1 in modelXbrl.facts: # build keys table for 6.5.14 concept = f1.concept if f1.xsiNil != "true" and \ concept is not None and \ concept.isTextBlock and \ XMLpattern.match(f1.value): #handler.fact = f1 # test encoded entity tags for match in namedEntityPattern.finditer(f1.value): entity = match.group() if not entity in xhtmlEntities: modelXbrl.error(("EFM.6.05.16", "GFM.1.2.15"), _("Fact %(fact)s contextID %(contextID)s has disallowed entity %(entity)s"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, entity=entity, error=entity) # test html for xmltext in [f1.value] + CDATApattern.findall(f1.value): ''' try: xml.sax.parseString( "<?xml version='1.0' encoding='utf-8' ?>\n<body>\n{0}\n</body>\n".format( removeEntities(xmltext)).encode('utf-8'),handler,handler) except (xml.sax.SAXParseException, xml.sax.SAXException, UnicodeDecodeError) as err: # ignore errors which are not errors (e.g., entity codes checked previously if not err.endswith("undefined entity"): handler.modelXbrl.error(("EFM.6.05.15", "GFM.1.02.14"), _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=err) ''' xmlBodyWithoutEntities = "<body>\n{0}\n</body>\n".format(removeEntities(xmltext)) try: textblockXml = XML(xmlBodyWithoutEntities) if not edbodyDTD.validate( textblockXml ): errors = edbodyDTD.error_log.filter_from_errors() htmlError = any(e.type_name in ("DTD_INVALID_CHILD", "DTD_UNKNOWN_ATTRIBUTE") for e in errors) modelXbrl.error("EFM.6.05.16" if htmlError else ("EFM.6.05.15.dtdError", "GFM.1.02.14"), _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=', '.join(e.message for e in errors)) for elt in textblockXml.iter(): eltTag = elt.tag for attrTag, attrValue in elt.items(): if ((attrTag == "href" and eltTag == "a") or (attrTag == "src" and eltTag == "img")): if "javascript:" in attrValue: modelXbrl.error("EFM.6.05.16.activeContent", _("Fact %(fact)s of context %(contextID)s has javascript in '%(attribute)s' for <%(element)s>"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, attribute=attrTag, element=eltTag) elif attrValue.startswith("http://www.sec.gov/Archives/edgar/data/") and eltTag == "a": pass elif "http:" in attrValue or "https:" in attrValue or "ftp:" in attrValue: modelXbrl.error("EFM.6.05.16.externalReference", _("Fact %(fact)s of context %(contextID)s has an invalid external reference in '%(attribute)s' for <%(element)s>"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, attribute=attrTag, element=eltTag) if attrTag == "src" and attrValue not in checkedGraphicsFiles: if attrValue.lower()[-4:] not in ('.jpg', '.gif'): modelXbrl.error("EFM.6.05.16.graphicFileType", _("Fact %(fact)s of context %(contextID)s references a graphics file which isn't .gif or .jpg '%(attribute)s' for <%(element)s>"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, attribute=attrValue, element=eltTag) else: # test file contents try: if validateGraphicFile(f1, attrValue) != attrValue.lower()[-3:]: modelXbrl.error("EFM.6.05.16.graphicFileContent", _("Fact %(fact)s of context %(contextID)s references a graphics file which doesn't have expected content '%(attribute)s' for <%(element)s>"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, attribute=attrValue, element=eltTag) except IOError as err: modelXbrl.error("EFM.6.05.16.graphicFileError", _("Fact %(fact)s of context %(contextID)s references a graphics file which isn't openable '%(attribute)s' for <%(element)s>, error: %(error)s"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, attribute=attrValue, element=eltTag, error=err) checkedGraphicsFiles.add(attrValue) if eltTag == "table" and any(a is not None for a in elt.iterancestors("table")): modelXbrl.error("EFM.6.05.16.nestedTable", _("Fact %(fact)s of context %(contextID)s has nested <table> elements."), modelObject=f1, fact=f1.qname, contextID=f1.contextID) except (XMLSyntaxError, UnicodeDecodeError) as err: #if not err.endswith("undefined entity"): modelXbrl.error(("EFM.6.05.15", "GFM.1.02.14"), _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=err) checkedGraphicsFiles.clear()
def validateTextBlockFacts(modelXbrl): # handler = TextBlockHandler(modelXbrl) loadDTD(modelXbrl) for f1 in modelXbrl.facts: # build keys table for 6.5.14 concept = f1.concept if f1.xsiNil != "true" and concept is not None and concept.isTextBlock and XMLpattern.match(f1.value): # handler.fact = f1 # test encoded entity tags for match in entityPattern.finditer(f1.value): entity = match.group() if not entity in xhtmlEntities: modelXbrl.error( ("EFM.6.05.16", "GFM.1.2.15"), _("Fact %(fact)s contextID %(contextID)s has disallowed entity %(entity)s"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, entity=entity, ) # test html for xmltext in [f1.value] + CDATApattern.findall(f1.value): """ try: xml.sax.parseString( "<?xml version='1.0' encoding='utf-8' ?>\n<body>\n{0}\n</body>\n".format( removeEntities(xmltext)).encode('utf-8'),handler,handler) except (xml.sax.SAXParseException, xml.sax.SAXException, UnicodeDecodeError) as err: # ignore errors which are not errors (e.g., entity codes checked previously if not err.endswith("undefined entity"): handler.modelXbrl.error(("EFM.6.05.15", "GFM.1.02.14"), _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=err) """ try: textblockXml = XML("<body>\n{0}\n</body>\n".format(removeEntities(xmltext))) if not edbodyDTD.validate(textblockXml): errors = edbodyDTD.error_log.filter_from_errors() htmlError = any(e.type_name in ("DTD_INVALID_CHILD", "DTD_UNKNOWN_ATTRIBUTE") for e in errors) modelXbrl.error( "EFM.6.05.16" if htmlError else ("EFM.6.05.15", "GFM.1.02.14"), _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=", ".join(e.message for e in errors), ) for elt in textblockXml.iter(): eltTag = elt.tag for attrTag, attrValue in elt.items(): if (attrTag == "href" and eltTag == "a") or (attrTag == "src" and eltTag == "img"): if "javascript:" in attrValue: modelXbrl.error( "EFM.6.05.16", _( "Fact %(fact)s of context %(contextID) has javascript in '%(attribute)s' for <%(element)s>" ), modelObject=f1, fact=f1.qname, contextID=f1.contextID, attribute=attrTag, element=eltTag, ) elif attrValue.startswith("http://www.sec.gov/Archives/edgar/data/") and eltTag == "a": pass elif "http:" in attrValue or "https:" in attrValue or "ftp:" in attrValue: modelXbrl.error( "EFM.6.05.16", _( "Fact %(fact)s of context %(contextID) has an invalid external reference in '%(attribute)s' for <%(element)s>" ), modelObject=f1, fact=f1.qname, contextID=f1.contextID, attribute=attrTag, element=eltTag, ) except (XMLSyntaxError, UnicodeDecodeError) as err: # if not err.endswith("undefined entity"): modelXbrl.error( ("EFM.6.05.15", "GFM.1.02.14"), _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=err, )
def createTargetInstance(modelXbrl, targetUrl, targetDocumentSchemaRefs, filingFiles, baseXmlLang=None, defaultXmlLang=None): def addLocallyReferencedFile(elt, filingFiles): if elt.tag in ("a", "img"): for attrTag, attrValue in elt.items(): if attrTag in ("href", "src") and not isHttpUrl( attrValue) and not os.path.isabs(attrValue): attrValue = attrValue.partition('#')[0] # remove anchor if attrValue: # ignore anchor references to base document attrValue = os.path.normpath( attrValue ) # change url path separators to host separators file = os.path.join(sourceDir, attrValue) if modelXbrl.fileSource.isInArchive( file, checkExistence=True) or os.path.exists(file): filingFiles.add(file) targetInstance = ModelXbrl.create( modelXbrl.modelManager, newDocumentType=Type.INSTANCE, url=targetUrl, schemaRefs=targetDocumentSchemaRefs, isEntry=True, discover=False) # don't attempt to load DTS ixTargetRootElt = modelXbrl.ixTargetRootElements[getattr( modelXbrl, "ixdsTarget", None)] langIsSet = False # copy ix resources target root attributes for attrName, attrValue in ixTargetRootElt.items(): if attrName != "target": # ix:references target is not mapped to xbrli:xbrl targetInstance.modelDocument.xmlRootElement.set( attrName, attrValue) if attrName == "{http://www.w3.org/XML/1998/namespace}lang": langIsSet = True defaultXmlLang = attrValue if attrName.startswith("{"): ns, _sep, ln = attrName[1:].rpartition("}") if ns: prefix = xmlnsprefix(ixTargetRootElt, ns) if prefix not in (None, "xml"): setXmlns(targetInstance.modelDocument, prefix, ns) if not langIsSet and baseXmlLang: targetInstance.modelDocument.xmlRootElement.set( "{http://www.w3.org/XML/1998/namespace}lang", baseXmlLang) if defaultXmlLang is None: defaultXmlLang = baseXmlLang # allows facts/footnotes to override baseXmlLang ValidateXbrlDimensions.loadDimensionDefaults( targetInstance) # need dimension defaults # roleRef and arcroleRef (of each inline document) for sourceRefs in (modelXbrl.targetRoleRefs, modelXbrl.targetArcroleRefs): for roleRefElt in sourceRefs.values(): addChild(targetInstance.modelDocument.xmlRootElement, roleRefElt.qname, attributes=roleRefElt.items()) # contexts for context in sorted(modelXbrl.contexts.values(), key=lambda c: c.objectIndex ): # contexts may come from multiple IXDS files ignore = targetInstance.createContext( context.entityIdentifier[0], context.entityIdentifier[1], 'instant' if context.isInstantPeriod else 'duration' if context.isStartEndPeriod else 'forever', context.startDatetime, context.endDatetime, None, context.qnameDims, [], [], id=context.id) for unit in sorted(modelXbrl.units.values(), key=lambda u: u.objectIndex ): # units may come from multiple IXDS files measures = unit.measures ignore = targetInstance.createUnit(measures[0], measures[1], id=unit.id) modelXbrl.modelManager.showStatus(_("Creating and validating facts")) newFactForOldObjId = {} def createFacts(facts, parent): for fact in facts: if fact.isItem: # HF does not de-duplicate, which is currently-desired behavior attrs = {"contextRef": fact.contextID} if fact.id: attrs["id"] = fact.id if fact.isNumeric: attrs["unitRef"] = fact.unitID if fact.get("decimals"): attrs["decimals"] = fact.get("decimals") if fact.get("precision"): attrs["precision"] = fact.get("precision") if fact.isNil: attrs[XbrlConst.qnXsiNil] = "true" text = None else: text = fact.xValue if fact.xValid else fact.textValue for attrName, attrValue in fact.items(): if attrName.startswith("{"): attrs[qname( attrName, fact.nsmap )] = attrValue # using qname allows setting prefix in extracted instance newFact = targetInstance.createFact(fact.qname, attributes=attrs, text=text, parent=parent) # if fact.isFraction, create numerator and denominator newFactForOldObjId[fact.objectIndex] = newFact if filingFiles is not None and fact.concept is not None and fact.concept.isTextBlock: # check for img and other filing references so that referenced files are included in the zip. for xmltext in [text] + CDATApattern.findall(text): try: for elt in XML("<body>\n{0}\n</body>\n".format( xmltext)).iter(): addLocallyReferencedFile(elt, filingFiles) except (XMLSyntaxError, UnicodeDecodeError): pass # TODO: Why ignore UnicodeDecodeError? elif fact.isTuple: attrs = {} if fact.id: attrs["id"] = fact.id if fact.isNil: attrs[XbrlConst.qnXsiNil] = "true" for attrName, attrValue in fact.items(): if attrName.startswith("{"): attrs[qname(attrName, fact.nsmap)] = attrValue newTuple = targetInstance.createFact(fact.qname, attributes=attrs, parent=parent) newFactForOldObjId[fact.objectIndex] = newTuple createFacts(fact.modelTupleFacts, newTuple) createFacts(modelXbrl.facts, None) modelXbrl.modelManager.showStatus( _("Creating and validating footnotes and relationships")) HREF = "{http://www.w3.org/1999/xlink}href" footnoteLinks = defaultdict(list) footnoteIdCount = {} for linkKey, linkPrototypes in modelXbrl.baseSets.items(): arcrole, linkrole, linkqname, arcqname = linkKey if (linkrole and linkqname and arcqname and # fully specified roles arcrole != "XBRL-footnotes" and any( lP.modelDocument.type == Type.INLINEXBRL for lP in linkPrototypes)): for linkPrototype in linkPrototypes: if linkPrototype not in footnoteLinks[linkrole]: footnoteLinks[linkrole].append(linkPrototype) for linkrole in sorted(footnoteLinks.keys()): for linkPrototype in footnoteLinks[linkrole]: newLink = addChild(targetInstance.modelDocument.xmlRootElement, linkPrototype.qname, attributes=linkPrototype.attributes) for linkChild in linkPrototype: attributes = linkChild.attributes if isinstance(linkChild, LocPrototype): if HREF not in linkChild.attributes: linkChild.attributes[HREF] = \ "#" + elementFragmentIdentifier(newFactForOldObjId[linkChild.dereference().objectIndex]) addChild(newLink, linkChild.qname, attributes=attributes) elif isinstance(linkChild, ArcPrototype): addChild(newLink, linkChild.qname, attributes=attributes) elif isinstance(linkChild, ModelInlineFootnote): idUseCount = footnoteIdCount.get(linkChild.footnoteID, 0) + 1 if idUseCount > 1: # if footnote with id in other links bump the id number attributes = linkChild.attributes.copy() attributes["id"] = "{}_{}".format( attributes["id"], idUseCount) footnoteIdCount[linkChild.footnoteID] = idUseCount newChild = addChild(newLink, linkChild.qname, attributes=attributes) xmlLang = linkChild.xmlLang if xmlLang is not None and xmlLang != defaultXmlLang: # default newChild.set( "{http://www.w3.org/XML/1998/namespace}lang", xmlLang) copyIxFootnoteHtml( linkChild, newChild, targetModelDocument=targetInstance.modelDocument, withText=True) if filingFiles and linkChild.textValue: footnoteHtml = XML("<body/>") copyIxFootnoteHtml(linkChild, footnoteHtml) for elt in footnoteHtml.iter(): addLocallyReferencedFile(elt, filingFiles) return targetInstance
def saveTargetDocument(modelXbrl, targetDocumentFilename, targetDocumentSchemaRefs, outputZip=None, filingFiles=None, *args, **kwargs): targetUrl = modelXbrl.modelManager.cntlr.webCache.normalizeUrl( targetDocumentFilename, modelXbrl.modelDocument.filepath) targetUrlParts = targetUrl.rpartition(".") targetUrl = targetUrlParts[0] + "_extracted." + targetUrlParts[2] modelXbrl.modelManager.showStatus( _("Extracting instance ") + os.path.basename(targetUrl)) targetInstance = ModelXbrl.create(modelXbrl.modelManager, newDocumentType=Type.INSTANCE, url=targetUrl, schemaRefs=targetDocumentSchemaRefs, isEntry=True) ValidateXbrlDimensions.loadDimensionDefaults( targetInstance) # need dimension defaults # roleRef and arcroleRef (of each inline document) for sourceRefs in (modelXbrl.targetRoleRefs, modelXbrl.targetArcroleRefs): for roleRefElt in sourceRefs.values(): addChild(targetInstance.modelDocument.xmlRootElement, roleRefElt.qname, attributes=roleRefElt.items()) # contexts for context in modelXbrl.contexts.values(): newCntx = targetInstance.createContext( context.entityIdentifier[0], context.entityIdentifier[1], 'instant' if context.isInstantPeriod else 'duration' if context.isStartEndPeriod else 'forever', context.startDatetime, context.endDatetime, None, context.qnameDims, [], [], id=context.id) for unit in modelXbrl.units.values(): measures = unit.measures newUnit = targetInstance.createUnit(measures[0], measures[1], id=unit.id) modelXbrl.modelManager.showStatus(_("Creating and validating facts")) newFactForOldObjId = {} def createFacts(facts, parent): for fact in facts: if fact.isItem: attrs = {"contextRef": fact.contextID} if fact.id: attrs["id"] = fact.id if fact.isNumeric: attrs["unitRef"] = fact.unitID if fact.get("decimals"): attrs["decimals"] = fact.get("decimals") if fact.get("precision"): attrs["precision"] = fact.get("precision") if fact.isNil: attrs[XbrlConst.qnXsiNil] = "true" text = None else: text = fact.xValue if fact.xValid else fact.textValue newFact = targetInstance.createFact(fact.qname, attributes=attrs, text=text, parent=parent) newFactForOldObjId[fact.objectIndex] = newFact if filingFiles and fact.concept is not None and fact.concept.isTextBlock: # check for img and other filing references for xmltext in [text] + CDATApattern.findall(text): try: for elt in XML( "<body>\n{0}\n</body>\n".format(xmltext)): if elt.tag in ("a", "img") and not isHttpUrl( attrValue) and not os.path.isabs( attrvalue): for attrTag, attrValue in elt.items(): if attrTag in ("href", "src"): filingFiles.add(attrValue) except (XMLSyntaxError, UnicodeDecodeError): pass elif fact.isTuple: newTuple = targetInstance.createFact(fact.qname, parent=parent) newFactForOldObjId[fact.objectIndex] = newTuple createFacts(fact.modelTupleFacts, newTuple) createFacts(modelXbrl.facts, None) # footnote links footnoteIdCount = {} modelXbrl.modelManager.showStatus( _("Creating and validating footnotes & relationships")) HREF = "{http://www.w3.org/1999/xlink}href" footnoteLinks = defaultdict(list) for linkKey, linkPrototypes in modelXbrl.baseSets.items(): arcrole, linkrole, linkqname, arcqname = linkKey if (linkrole and linkqname and arcqname and # fully specified roles arcrole != "XBRL-footnotes" and any( lP.modelDocument.type == Type.INLINEXBRL for lP in linkPrototypes)): for linkPrototype in linkPrototypes: if linkPrototype not in footnoteLinks[linkrole]: footnoteLinks[linkrole].append(linkPrototype) for linkrole in sorted(footnoteLinks.keys()): for linkPrototype in footnoteLinks[linkrole]: newLink = addChild(targetInstance.modelDocument.xmlRootElement, linkPrototype.qname, attributes=linkPrototype.attributes) for linkChild in linkPrototype: attributes = linkChild.attributes if isinstance(linkChild, LocPrototype): if HREF not in linkChild.attributes: linkChild.attributes[HREF] = \ "#" + elementFragmentIdentifier(newFactForOldObjId[linkChild.dereference().objectIndex]) addChild(newLink, linkChild.qname, attributes=attributes) elif isinstance(linkChild, ArcPrototype): addChild(newLink, linkChild.qname, attributes=attributes) elif isinstance(linkChild, ModelInlineFootnote): idUseCount = footnoteIdCount.get(linkChild.footnoteID, 0) + 1 if idUseCount > 1: # if footnote with id in other links bump the id number attributes = linkChild.attributes.copy() attributes["id"] = "{}_{}".format( attributes["id"], idUseCount) footnoteIdCount[linkChild.footnoteID] = idUseCount newChild = addChild(newLink, linkChild.qname, attributes=attributes) copyIxFootnoteHtml( linkChild, newChild, targetModelDocument=targetInstance.modelDocument, withText=True) if filingFiles and linkChild.textValue: footnoteHtml = XML("<body/>") copyIxFootnoteHtml(linkChild, footnoteHtml) for elt in footnoteHtml.iter(): if elt.tag in ("a", "img"): for attrTag, attrValue in elt.items(): if attrTag in ( "href", "src") and not isHttpUrl( attrValue ) and not os.path.isabs(attrvalue): filingFiles.add(attrValue) targetInstance.saveInstance(overrideFilepath=targetUrl, outputZip=outputZip) if getattr(modelXbrl, "isTestcaseVariation", False): modelXbrl.extractedInlineInstance = True # for validation comparison modelXbrl.modelManager.showStatus(_("Saved extracted instance"), 5000)
def validateTextBlockFacts(modelXbrl): #handler = TextBlockHandler(modelXbrl) loadDTD(modelXbrl) checkedGraphicsFiles = set() # only check any graphics file reference once per fact allowedExternalHrefPattern = modelXbrl.modelManager.disclosureSystem.allowedExternalHrefPattern if isInlineDTD: htmlBodyTemplate = "<body><div>\n{0}\n</div></body>\n" else: htmlBodyTemplate = "<body>\n{0}\n</body>\n" _xhtmlNs = "{{{}}}".format(xhtml) _xhtmlNsLen = len(_xhtmlNs) for f1 in modelXbrl.facts: # build keys table for 6.5.14 concept = f1.concept if f1.xsiNil != "true" and \ concept is not None and \ concept.isTextBlock and \ XMLpattern.match(f1.value): #handler.fact = f1 # test encoded entity tags for match in namedEntityPattern.finditer(f1.value): entity = match.group() if not entity in xhtmlEntities: modelXbrl.error(("EFM.6.05.16", "GFM.1.2.15"), _("Fact %(fact)s contextID %(contextID)s has disallowed entity %(entity)s"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, entity=entity, error=entity) # test html for xmltext in [f1.value] + CDATApattern.findall(f1.value): ''' try: xml.sax.parseString( "<?xml version='1.0' encoding='utf-8' ?>\n<body>\n{0}\n</body>\n".format( removeEntities(xmltext)).encode('utf-8'),handler,handler) except (xml.sax.SAXParseException, xml.sax.SAXException, UnicodeDecodeError) as err: # ignore errors which are not errors (e.g., entity codes checked previously if not err.endswith("undefined entity"): handler.modelXbrl.error(("EFM.6.05.15", "GFM.1.02.14"), _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=err) ''' xmlBodyWithoutEntities = htmlBodyTemplate.format(removeEntities(xmltext)) try: textblockXml = XML(xmlBodyWithoutEntities) if not edbodyDTD.validate( textblockXml ): errors = edbodyDTD.error_log.filter_from_errors() htmlError = any(e.type_name in ("DTD_INVALID_CHILD", "DTD_UNKNOWN_ATTRIBUTE") for e in errors) modelXbrl.error("EFM.6.05.16" if htmlError else ("EFM.6.05.15.dtdError", "GFM.1.02.14"), _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=', '.join(e.message for e in errors), messageCodes=("EFM.6.05.16", "EFM.6.05.15.dtdError", "GFM.1.02.14")) for elt in textblockXml.iter(): eltTag = elt.tag if isinstance(elt, ModelObject) and elt.namespaceURI == xhtml: eltTag = elt.localName elif isinstance(elt, (_ElementTree, _Comment, _ProcessingInstruction)): continue # comment or other non-parsed element else: eltTag = elt.tag if eltTag.startswith(_xhtmlNs): eltTag = eltTag[_xhtmlNsLen:] if isInlineDTD and eltTag in efmBlockedInlineHtmlElements: modelXbrl.error("EFM.5.02.05.disallowedElement", _("%(validatedObjectLabel)s has disallowed element <%(element)s>"), modelObject=elt, validatedObjectLabel=f1.qname, element=eltTag) for attrTag, attrValue in elt.items(): if isInlineDTD: if attrTag in efmBlockedInlineHtmlElementAttributes.get(eltTag,()): modelXbrl.error("EFM.5.02.05.disallowedAttribute", _("%(validatedObjectLabel)s has disallowed attribute on element <%(element)s>: %(attribute)s=\"%(value)s\""), modelObject=elt, validatedObjectLabel=validatedObjectLabel, element=eltTag, attribute=attrTag, value=attrValue) if ((attrTag == "href" and eltTag == "a") or (attrTag == "src" and eltTag == "img")): if "javascript:" in attrValue: modelXbrl.error("EFM.6.05.16.activeContent", _("Fact %(fact)s of context %(contextID)s has javascript in '%(attribute)s' for <%(element)s>"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, attribute=attrTag, element=eltTag) elif eltTag == "a" and (not allowedExternalHrefPattern or allowedExternalHrefPattern.match(attrValue)): pass elif scheme(attrValue) in ("http", "https", "ftp"): modelXbrl.error("EFM.6.05.16.externalReference", _("Fact %(fact)s of context %(contextID)s has an invalid external reference in '%(attribute)s' for <%(element)s>"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, attribute=attrTag, element=eltTag) if attrTag == "src" and attrValue not in checkedGraphicsFiles: if scheme(attrValue) == "data": modelXbrl.error("EFM.6.05.16.graphicDataUrl", _("Fact %(fact)s of context %(contextID)s references a graphics data URL which isn't accepted '%(attribute)s' for <%(element)s>"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, attribute=attrValue[:32], element=eltTag) elif attrValue.lower()[-4:] not in ('.jpg', '.gif'): modelXbrl.error("EFM.6.05.16.graphicFileType", _("Fact %(fact)s of context %(contextID)s references a graphics file which isn't .gif or .jpg '%(attribute)s' for <%(element)s>"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, attribute=attrValue, element=eltTag) else: # test file contents try: if validateGraphicFile(f1, attrValue) != attrValue.lower()[-3:]: modelXbrl.error("EFM.6.05.16.graphicFileContent", _("Fact %(fact)s of context %(contextID)s references a graphics file which doesn't have expected content '%(attribute)s' for <%(element)s>"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, attribute=attrValue, element=eltTag) except IOError as err: modelXbrl.error("EFM.6.05.16.graphicFileError", _("Fact %(fact)s of context %(contextID)s references a graphics file which isn't openable '%(attribute)s' for <%(element)s>, error: %(error)s"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, attribute=attrValue, element=eltTag, error=err) checkedGraphicsFiles.add(attrValue) if eltTag == "table" and any(a is not None for a in elt.iterancestors("table")): modelXbrl.error("EFM.6.05.16.nestedTable", _("Fact %(fact)s of context %(contextID)s has nested <table> elements."), modelObject=f1, fact=f1.qname, contextID=f1.contextID) except (XMLSyntaxError, UnicodeDecodeError) as err: #if not err.endswith("undefined entity"): modelXbrl.error(("EFM.6.05.15", "GFM.1.02.14"), _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=err) checkedGraphicsFiles.clear()
def createTargetInstance(modelXbrl, targetUrl, targetDocumentSchemaRefs, filingFiles, baseXmlLang=None, defaultXmlLang=None): targetInstance = ModelXbrl.create(modelXbrl.modelManager, newDocumentType=Type.INSTANCE, url=targetUrl, schemaRefs=targetDocumentSchemaRefs, isEntry=True, discover=False) # don't attempt to load DTS if baseXmlLang: targetInstance.modelDocument.xmlRootElement.set("{http://www.w3.org/XML/1998/namespace}lang", baseXmlLang) if defaultXmlLang is None: defaultXmlLang = baseXmlLang # allows facts/footnotes to override baseXmlLang ValidateXbrlDimensions.loadDimensionDefaults(targetInstance) # need dimension defaults # roleRef and arcroleRef (of each inline document) for sourceRefs in (modelXbrl.targetRoleRefs, modelXbrl.targetArcroleRefs): for roleRefElt in sourceRefs.values(): addChild(targetInstance.modelDocument.xmlRootElement, roleRefElt.qname, attributes=roleRefElt.items()) # contexts for context in sorted(modelXbrl.contexts.values(), key=lambda c: c.objectIndex): # contexts may come from multiple IXDS files ignore = targetInstance.createContext(context.entityIdentifier[0], context.entityIdentifier[1], 'instant' if context.isInstantPeriod else 'duration' if context.isStartEndPeriod else 'forever', context.startDatetime, context.endDatetime, None, context.qnameDims, [], [], id=context.id) for unit in sorted(modelXbrl.units.values(), key=lambda u: u.objectIndex): # units may come from multiple IXDS files measures = unit.measures ignore = targetInstance.createUnit(measures[0], measures[1], id=unit.id) modelXbrl.modelManager.showStatus(_("Creating and validating facts")) newFactForOldObjId = {} def createFacts(facts, parent): for fact in facts: if fact.isItem: # HF does not de-duplicate, which is currently-desired behavior attrs = {"contextRef": fact.contextID} if fact.id: attrs["id"] = fact.id if fact.isNumeric: attrs["unitRef"] = fact.unitID if fact.get("decimals"): attrs["decimals"] = fact.get("decimals") if fact.get("precision"): attrs["precision"] = fact.get("precision") if fact.isNil: attrs[XbrlConst.qnXsiNil] = "true" text = None else: text = fact.xValue if fact.xValid else fact.textValue if fact.concept is not None and fact.concept.baseXsdType in ("string", "normalizedString"): # default xmlLang = fact.xmlLang if xmlLang is not None and xmlLang != defaultXmlLang: attrs["{http://www.w3.org/XML/1998/namespace}lang"] = xmlLang newFact = targetInstance.createFact(fact.qname, attributes=attrs, text=text, parent=parent) # if fact.isFraction, create numerator and denominator newFactForOldObjId[fact.objectIndex] = newFact if filingFiles is not None and fact.concept is not None and fact.concept.isTextBlock: # check for img and other filing references so that referenced files are included in the zip. for xmltext in [text] + CDATApattern.findall(text): try: for elt in XML("<body>\n{0}\n</body>\n".format(xmltext)).iter(): addLocallyReferencedFile(elt, filingFiles) except (XMLSyntaxError, UnicodeDecodeError): pass # TODO: Why ignore UnicodeDecodeError? elif fact.isTuple: newTuple = targetInstance.createFact(fact.qname, parent=parent) newFactForOldObjId[fact.objectIndex] = newTuple createFacts(fact.modelTupleFacts, newTuple) createFacts(modelXbrl.facts, None) modelXbrl.modelManager.showStatus(_("Creating and validating footnotes and relationships")) HREF = "{http://www.w3.org/1999/xlink}href" footnoteLinks = defaultdict(list) footnoteIdCount = {} for linkKey, linkPrototypes in modelXbrl.baseSets.items(): arcrole, linkrole, linkqname, arcqname = linkKey if (linkrole and linkqname and arcqname and # fully specified roles arcrole != "XBRL-footnotes" and any(lP.modelDocument.type == Type.INLINEXBRL for lP in linkPrototypes)): for linkPrototype in linkPrototypes: if linkPrototype not in footnoteLinks[linkrole]: footnoteLinks[linkrole].append(linkPrototype) for linkrole in sorted(footnoteLinks.keys()): for linkPrototype in footnoteLinks[linkrole]: newLink = addChild(targetInstance.modelDocument.xmlRootElement, linkPrototype.qname, attributes=linkPrototype.attributes) for linkChild in linkPrototype: attributes = linkChild.attributes if isinstance(linkChild, LocPrototype): if HREF not in linkChild.attributes: linkChild.attributes[HREF] = \ "#" + elementFragmentIdentifier(newFactForOldObjId[linkChild.dereference().objectIndex]) addChild(newLink, linkChild.qname, attributes=attributes) elif isinstance(linkChild, ArcPrototype): addChild(newLink, linkChild.qname, attributes=attributes) elif isinstance(linkChild, ModelInlineFootnote): idUseCount = footnoteIdCount.get(linkChild.footnoteID, 0) + 1 if idUseCount > 1: # if footnote with id in other links bump the id number attributes = linkChild.attributes.copy() attributes["id"] = "{}_{}".format(attributes["id"], idUseCount) footnoteIdCount[linkChild.footnoteID] = idUseCount newChild = addChild(newLink, linkChild.qname, attributes=attributes) xmlLang = linkChild.xmlLang if xmlLang is not None and xmlLang != defaultXmlLang: # default newChild.set("{http://www.w3.org/XML/1998/namespace}lang", xmlLang) copyIxFootnoteHtml(linkChild, newChild, targetModelDocument=targetInstance.modelDocument, withText=True) if filingFiles and linkChild.textValue: footnoteHtml = XML("<body/>") copyIxFootnoteHtml(linkChild, footnoteHtml) for elt in footnoteHtml.iter(): addLocallyReferencedFile(elt,filingFiles) return targetInstance
def saveTargetDocument(modelXbrl, targetDocumentFilename, targetDocumentSchemaRefs, outputZip=None, filingFiles=None): targetUrl = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(targetDocumentFilename, modelXbrl.modelDocument.filepath) targetUrlParts = targetUrl.rpartition(".") targetUrl = targetUrlParts[0] + "_extracted." + targetUrlParts[2] modelXbrl.modelManager.showStatus(_("Extracting instance ") + os.path.basename(targetUrl)) targetInstance = ModelXbrl.create(modelXbrl.modelManager, newDocumentType=Type.INSTANCE, url=targetUrl, schemaRefs=targetDocumentSchemaRefs, isEntry=True) ValidateXbrlDimensions.loadDimensionDefaults(targetInstance) # need dimension defaults # roleRef and arcroleRef (of each inline document) for sourceRefs in (modelXbrl.targetRoleRefs, modelXbrl.targetArcroleRefs): for roleRefElt in sourceRefs.values(): addChild(targetInstance.modelDocument.xmlRootElement, roleRefElt.qname, attributes=roleRefElt.items()) # contexts for context in modelXbrl.contexts.values(): newCntx = targetInstance.createContext(context.entityIdentifier[0], context.entityIdentifier[1], 'instant' if context.isInstantPeriod else 'duration' if context.isStartEndPeriod else 'forever', context.startDatetime, context.endDatetime, None, context.qnameDims, [], [], id=context.id) for unit in modelXbrl.units.values(): measures = unit.measures newUnit = targetInstance.createUnit(measures[0], measures[1], id=unit.id) modelXbrl.modelManager.showStatus(_("Creating and validating facts")) newFactForOldObjId = {} def createFacts(facts, parent): for fact in facts: if fact.isItem: attrs = {"contextRef": fact.contextID} if fact.id: attrs["id"] = fact.id if fact.isNumeric: attrs["unitRef"] = fact.unitID if fact.get("decimals"): attrs["decimals"] = fact.get("decimals") if fact.get("precision"): attrs["precision"] = fact.get("precision") if fact.isNil: attrs[XbrlConst.qnXsiNil] = "true" text = None else: text = fact.xValue if fact.xValid else fact.textValue newFact = targetInstance.createFact(fact.qname, attributes=attrs, text=text, parent=parent) newFactForOldObjId[fact.objectIndex] = newFact if filingFiles and fact.concept is not None and fact.concept.isTextBlock: # check for img and other filing references for xmltext in [text] + CDATApattern.findall(text): try: for elt in XML("<body>\n{0}\n</body>\n".format(xmltext)): if elt.tag in ("a", "img") and not isHttpUrl(attrValue) and not os.path.isabs(attrvalue): for attrTag, attrValue in elt.items(): if attrTag in ("href", "src"): filingFiles.add(attrValue) except (XMLSyntaxError, UnicodeDecodeError): pass elif fact.isTuple: newTuple = targetInstance.createFact(fact.qname, parent=parent) newFactForOldObjId[fact.objectIndex] = newTuple createFacts(fact.modelTupleFacts, newTuple) createFacts(modelXbrl.facts, None) # footnote links footnoteIdCount = {} modelXbrl.modelManager.showStatus(_("Creating and validating footnotes & relationships")) HREF = "{http://www.w3.org/1999/xlink}href" footnoteLinks = defaultdict(list) for linkKey, linkPrototypes in modelXbrl.baseSets.items(): arcrole, linkrole, linkqname, arcqname = linkKey if (linkrole and linkqname and arcqname and # fully specified roles arcrole != "XBRL-footnotes" and any(lP.modelDocument.type == Type.INLINEXBRL for lP in linkPrototypes)): for linkPrototype in linkPrototypes: if linkPrototype not in footnoteLinks[linkrole]: footnoteLinks[linkrole].append(linkPrototype) for linkrole in sorted(footnoteLinks.keys()): for linkPrototype in footnoteLinks[linkrole]: newLink = addChild(targetInstance.modelDocument.xmlRootElement, linkPrototype.qname, attributes=linkPrototype.attributes) for linkChild in linkPrototype: attributes = linkChild.attributes if isinstance(linkChild, LocPrototype): if HREF not in linkChild.attributes: linkChild.attributes[HREF] = \ "#" + elementFragmentIdentifier(newFactForOldObjId[linkChild.dereference().objectIndex]) addChild(newLink, linkChild.qname, attributes=attributes) elif isinstance(linkChild, ArcPrototype): addChild(newLink, linkChild.qname, attributes=attributes) elif isinstance(linkChild, ModelInlineFootnote): idUseCount = footnoteIdCount.get(linkChild.footnoteID, 0) + 1 if idUseCount > 1: # if footnote with id in other links bump the id number attributes = linkChild.attributes.copy() attributes["id"] = "{}_{}".format(attributes["id"], idUseCount) footnoteIdCount[linkChild.footnoteID] = idUseCount newChild = addChild(newLink, linkChild.qname, attributes=attributes) copyIxFootnoteHtml(linkChild, newChild, withText=True) if filingFiles and linkChild.textValue: footnoteHtml = XML("<body/>") copyIxFootnoteHtml(linkChild, footnoteHtml) for elt in footnoteHtml.iter(): if elt.tag in ("a", "img"): for attrTag, attrValue in elt.items(): if attrTag in ("href", "src") and not isHttpUrl(attrValue) and not os.path.isabs(attrvalue): filingFiles.add(attrValue) targetInstance.saveInstance(overrideFilepath=targetUrl, outputZip=outputZip) modelXbrl.modelManager.showStatus(_("Saved extracted instance"), 5000)
class CoverletterCreator(QtWidgets.QMainWindow, mainWindow.Ui_MainWindow): def __init__(self, parent=None): super(CoverletterCreator, self).__init__(parent) self.setupUi(self) self.mainTitle = "Coverletter Creator" self.config = QSettings() self.settings = SettingsHandler(parent=self, settings=self.config) self.clipboard = QtWidgets.QApplication.clipboard() self.actionNew.triggered.connect(self.new_project) self.actionSave.triggered.connect(self.save_project) self.actionSave_As.triggered.connect(self.saveas_project) self.actionOpen.triggered.connect(self.open_project) self.actionExit.triggered.connect(self.close) self.actionSettings.triggered.connect(self.settings.show) # Set default values self.filename = "Examples/example_project.xml" self.file_dirty = False self.readSettings() self.load_file(self.filename) self.pb_browsePhoto.clicked.connect(self.browse_photo) self.pb_generatePdf.clicked.connect(self.generate_pdf) self.pb_generateText.clicked.connect(self.generate_text) self.connect_all_fields() self.connect_mandatory_fields() # Connect all labels to click handler for child in self.centralwidget.findChildren(QtWidgets.QLabel): child.mousePressEvent = functools.partial(self.label_clicked, source=child) for child in self.centralwidget.findChildren(QtWidgets.QCheckBox): child.mousePressEvent = functools.partial(self.checkbox_clicked, source=child) self.RECEIPIENTGENDER.mousePressEvent = functools.partial( self.combobox_clicked, source=self.RECEIPIENTGENDER) self.RECEIPIENTSALUTATION.mousePressEvent = functools.partial( self.combobox_clicked, source=self.RECEIPIENTSALUTATION) self.COMPANYNAME.editingFinished.connect( lambda: self.COMPANYSHORTNAME.setText(self.COMPANYNAME.text())) def connect_all_fields(self): for child in self.centralwidget.findChildren(QtWidgets.QLineEdit): child.textChanged.connect(self.setWindowTitleUnsaved) for child in self.centralwidget.findChildren(QtWidgets.QPlainTextEdit): child.textChanged.connect(self.setWindowTitleUnsaved) for child in self.centralwidget.findChildren(QtWidgets.QCheckBox): child.clicked.connect(self.setWindowTitleUnsaved) for child in self.centralwidget.findChildren(SpellTextEdit): child.textChanged.connect(self.setWindowTitleUnsaved) for child in self.centralwidget.findChildren(QtWidgets.QComboBox): child.currentIndexChanged.connect(self.setWindowTitleUnsaved) def connect_mandatory_fields(self): mandatory_fields_list = [ self.FIRSTNAME, self.LASTNAME, self.MOBILE, self.EMAIL, self.COMPANYNAME ] for textBox in mandatory_fields_list: textBox.textChanged[str].connect( lambda: self.pb_generatePdf.setEnabled(textBox.text() != "")) for textBox in mandatory_fields_list: textBox.textChanged[str].connect( lambda: self.pb_generateText.setEnabled(textBox.text() != "")) def label_clicked(self, event, source): var_code = source.accessibleName() self.clipboard.setText(str(var_code)) event.accept() def checkbox_clicked(self, event, source): var_code = source.accessibleName() self.clipboard.setText(str(var_code)) source.toggle() def combobox_clicked(self, event, source): var_code = source.accessibleName() self.clipboard.setText(str(var_code)) source.showPopup() def setWindowTitleUnsaved(self): self.file_dirty = True _, fname = os.path.split(self.filename) self.setWindowTitle(self.mainTitle + " - " + fname + "*") def setWindowTitleSaved(self): self.file_dirty = False _, fname = os.path.split(self.filename) self.setWindowTitle(self.mainTitle + " - " + fname) def new_project(self): filename, _ = QFileDialog.getSaveFileName(self, "New Project", "./", "XML Files (*.xml)") if filename: if ".xml" not in filename: filename = filename + '.xml' self.reset_all_fields() self.filename = filename self.setWindowTitleUnsaved() else: return def reset_all_fields(self): for child in self.centralwidget.findChildren(QtWidgets.QLineEdit): child.clear() for child in self.centralwidget.findChildren(QtWidgets.QPlainTextEdit): child.clear() for child in self.centralwidget.findChildren(QtWidgets.QCheckBox): child.setChecked(False) for child in self.centralwidget.findChildren(SpellTextEdit): child.clear() for child in self.centralwidget.findChildren(SpellTextEdit): child.clear() self.label_pic.clear() def save_project(self): try: open(self.filename, 'w') except OSError: filename, _ = QFileDialog.getSaveFileName(self, "Save Project", "./", "XML Files (*.xml)") if filename: self.filename = filename else: return self.root = self.generate_root() if ".xml" not in self.filename: self.filename = self.filename + '.xml' with open(self.filename, 'wb') as f: f.write(tostring(self.root, pretty_print=True)) self.setWindowTitleSaved() def generate_root(self): root = Element('root') personal_info = Element('personal_info') root.append(personal_info) for qW in [ self.FIRSTNAME, self.LASTNAME, self.MOBILE, self.EMAIL, self.HOMEPAGE, self.GITHUBNAME, self.LINKEDINNAME ]: child = Element(qW.objectName()) child.text = qW.text() personal_info.append(child) personal_address = Element('PERSONALADDRESS') personal_address.text = self.PERSONALADDRESS.toPlainText() personal_info.append(personal_address) company_info = Element('company_info') root.append(company_info) for qW in [ self.COMPANYNAME, self.COMPANYSHORTNAME, self.DEPARTMENT, self.LETTERTITLE, self.JOBTITLE, self.JOBREFID, self.RECEIPIENTNAME ]: child = Element(qW.objectName()) child.text = qW.text() company_info.append(child) company_address = Element('COMPANYADDRESS') company_address.text = self.COMPANYADDRESS.toPlainText() company_info.append(company_address) RECEIPIENTGENDER = Element('RECEIPIENTGENDER') RECEIPIENTGENDER.text = str(self.RECEIPIENTGENDER.currentText()) company_info.append(RECEIPIENTGENDER) RECEIPIENTSALUTATION = Element('RECEIPIENTSALUTATION') RECEIPIENTSALUTATION.text = str( self.RECEIPIENTSALUTATION.currentText()) company_info.append(RECEIPIENTSALUTATION) about_me = Element('TEXTABOUTME') about_me.text = self.TEXTABOUTME.toPlainText() root.append(about_me) WhyFirm = Element('TEXTWHYTHISFIRM') WhyFirm.text = self.TEXTWHYTHISFIRM.toPlainText() root.append(WhyFirm) whyYou = Element('TEXTWHYYOU') whyYou.text = self.TEXTWHYYOU.toPlainText() root.append(whyYou) misc = Element('misc') root.append(misc) for qW in [ self.CLOSINGSALUTATION, self.ENCLOSINGPREFIX, self.PHOTOPATH ]: child = Element(qW.objectName()) child.text = qW.text() misc.append(child) for qW in [ self.CERTIFICATESATTACHED, self.CVATTACHED, self.REFLETTERSATTACHED, self.TRANSCRIPTSATTACHED, ]: child = Element(qW.objectName()) child.text = str(qW.isChecked()) misc.append(child) return root def saveas_project(self): filename, _ = QFileDialog.getSaveFileName(self, "Save Project As", "./", "XML Files (*.xml)") if filename: if ".xml" not in filename: filename = filename + '.xml' with open(filename, 'wb') as f: f.write(tostring(self.generate_root(), pretty_print=True)) self.load_file(filename) def open_project(self): filename, _ = QFileDialog.getOpenFileName(self, "Open Project", "./", "XML Files (*.xml)") if not filename: return if ".xml" not in filename: filename = filename + '.xml' self.load_file(filename) def load_file(self, filename): try: with open(filename, 'r') as f: self.root = XML(f.read()) #.replace("\n", "")) self.reset_all_fields() for element in self.root.iter(): widget = self.findChild(QtWidgets.QLineEdit, str(element.tag)) if widget is not None and element.text is not None: widget.setText(str(element.text)) else: widget = self.findChild(QtWidgets.QPlainTextEdit, str(element.tag)) if widget is not None and element.text is not None: widget.setPlainText(str(element.text)) else: widget = self.findChild(QtWidgets.QComboBox, str(element.tag)) if widget is not None and element.text is not None: index = widget.findText(element.text, QtCore.Qt.MatchFixedString) if index >= 0: widget.setCurrentIndex(index) elif str(element.text).isdigit(): widget.setCurrentIndex(int(element.text)) else: widget.setCurrentText(str(element.text)) else: widget = self.findChild(QtWidgets.QCheckBox, str(element.tag)) if widget is not None and element.text is not None: widget.setChecked(str(element.text) == 'True') else: widget = self.findChild( SpellTextEdit, str(element.tag)) if widget is not None and element.text is not None: widget.setChecked( str(element.text) == 'True') self.filename = filename self.get_photo(self.PHOTOPATH.text()) self.setWindowTitleSaved() except FileNotFoundError: # Warning: File not found! self.filename = "untitled.xml" self.setWindowTitleUnsaved() self.file_dirty = False except XMLSyntaxError: QtWidgets.QMessageBox.critical( self, "XML Read Failed", "Cannot read xml file %s. \n\nMake sure the xml file is not blank " % filename) def browse_photo(self): fname, _ = QFileDialog.getOpenFileName(self, 'Open profile photo', './', "Image files (*.jpg *.png)") if fname: self.get_photo(fname) def get_photo(self, fname): image = QtGui.QImage(fname) if image.isNull(): QtWidgets.QMessageBox.information(self, "Image Viewer", "Cannot load %s." % fname) return self.PHOTOPATH.setText(fname) self.label_pic.setPixmap( QtGui.QPixmap(fname).scaled(160, 160, QtCore.Qt.KeepAspectRatio, QtCore.Qt.FastTransformation)) def generate_pdf(self): pdfcreator = PdfCreator(data=self.generate_root(), parent=self) pdfcreator.read_template(template=self.settings.latex_template) pdfcreator.convert_to_dict() pdfcreator.render_template() filename = self.COMPANYSHORTNAME.text() + '_' + self.JOBREFID.text( ) + '_Coverletter' filename = "".join(i for i in filename if i not in ".\/:*?<>|").replace(r' ', '_') self.pb_generatePdf.setEnabled(False) try: pdfcreator.compile_xelatex( compiler=self.settings.get_latex_compiler(), pdfname=filename + ".pdf", outputDir=self.settings.latex_dir, open_pdf=self.settings.open_pdf, keep_tex=self.settings.keep_tex) except FileNotFoundError as e: QtWidgets.QMessageBox.critical( self, "PDF Compilation Failed: " + str(e), "Cannot complete command {}.".format( self.settings.get_latex_compiler())) self.pb_generatePdf.setEnabled(True) def generate_text(self): textcreator = TextCreator(data=self.generate_root()) try: textcreator.read_template(template=self.settings.text_template) except FileNotFoundError as e: QtWidgets.QMessageBox.critical( self, "Error: " + repr(e), "Cannot find template file {}.\n".format( self.settings.text_template)) textcreator.convert_to_dict() textcreator.render_template() filename = self.COMPANYSHORTNAME.text() + '_' + self.JOBREFID.text( ) + '_Coverletter' filename = "".join(i for i in filename if i not in ".\/: *?<>|").replace(r' ', '_') self.pb_generateText.setEnabled(False) textcreator.compile_text(textname=filename + ".txt", outputDir=self.settings.text_dir, open_text=self.settings.open_text) self.pb_generateText.setEnabled(True) def writeSettings(self): self.config.beginGroup("MainWindow") self.config.setValue("size", self.size()) self.config.setValue("pos", self.pos()) self.config.endGroup() if not self.file_dirty: self.config.beginGroup("Project") self.config.setValue("filename", str(self.filename)) self.config.endGroup() self.config.sync() def readSettings(self): self.config.beginGroup("MainWindow") self.resize(self.config.value("size", QtCore.QSize(616, 466))) self.move(self.config.value("pos", QtCore.QPoint(200, 200))) self.config.endGroup() self.config.beginGroup("Project") self.filename = str(self.config.value("filename", self.filename)) self.config.endGroup() # event : QCloseEvent def closeEvent(self, event): if self.file_dirty: choice = QtWidgets.QMessageBox.question( self, 'Project not saved', "Save Project before exit?", QtWidgets.QMessageBox.Yes | QtWidgets.QMessageBox.No | QtWidgets.QMessageBox.Cancel) if choice == QtWidgets.QMessageBox.Yes: self.save_project() self.writeSettings() event.accept() #sys.exit() elif choice == QtWidgets.QMessageBox.Cancel: event.ignore() else: self.writeSettings() event.accept() #sys.exit() else: self.writeSettings() event.accept()
def validateTextBlockFacts(modelXbrl): #handler = TextBlockHandler(modelXbrl) loadDTD(modelXbrl) for f1 in modelXbrl.facts: # build keys table for 6.5.14 concept = f1.concept if f1.xsiNil != "true" and \ concept is not None and \ concept.isTextBlock and \ XMLpattern.match(f1.value): #handler.fact = f1 # test encoded entity tags for match in entityPattern.finditer(f1.value): entity = match.group() if not entity in xhtmlEntities: modelXbrl.error( ("EFM.6.05.16", "GFM.1.2.15"), _("Fact %(fact)s contextID %(contextID)s has disallowed entity %(entity)s" ), modelObject=f1, fact=f1.qname, contextID=f1.contextID, entity=entity) # test html for xmltext in [f1.value] + CDATApattern.findall(f1.value): ''' try: xml.sax.parseString( "<?xml version='1.0' encoding='utf-8' ?>\n<body>\n{0}\n</body>\n".format( removeEntities(xmltext)).encode('utf-8'),handler,handler) except (xml.sax.SAXParseException, xml.sax.SAXException, UnicodeDecodeError) as err: # ignore errors which are not errors (e.g., entity codes checked previously if not err.endswith("undefined entity"): handler.modelXbrl.error(("EFM.6.05.15", "GFM.1.02.14"), _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=err) ''' try: textblockXml = XML("<body>\n{0}\n</body>\n".format( removeEntities(xmltext))) if not edbodyDTD.validate(textblockXml): errors = edbodyDTD.error_log.filter_from_errors() htmlError = any( e.type_name in ("DTD_INVALID_CHILD", "DTD_UNKNOWN_ATTRIBUTE") for e in errors) modelXbrl.error( "EFM.6.05.16" if htmlError else ("EFM.6.05.15", "GFM.1.02.14"), _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s" ), modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=', '.join(e.message for e in errors)) for elt in textblockXml.iter(): eltTag = elt.tag for attrTag, attrValue in elt.items(): if ((attrTag == "href" and eltTag == "a") or (attrTag == "src" and eltTag == "img")): if "javascript:" in attrValue: modelXbrl.error( "EFM.6.05.16", _("Fact %(fact)s of context %(contextID) has javascript in '%(attribute)s' for <%(element)s>" ), modelObject=f1, fact=f1.qname, contextID=f1.contextID, attribute=attrTag, element=eltTag) elif attrValue.startswith( "http://www.sec.gov/Archives/edgar/data/" ) and eltTag == "a": pass elif "http:" in attrValue or "https:" in attrValue or "ftp:" in attrValue: modelXbrl.error( "EFM.6.05.16", _("Fact %(fact)s of context %(contextID) has an invalid external reference in '%(attribute)s' for <%(element)s>" ), modelObject=f1, fact=f1.qname, contextID=f1.contextID, attribute=attrTag, element=eltTag) except (XMLSyntaxError, UnicodeDecodeError) as err: #if not err.endswith("undefined entity"): modelXbrl.error( ("EFM.6.05.15", "GFM.1.02.14"), _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s" ), modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=err)
def saveTargetDocument(modelXbrl, targetDocumentFilename, targetDocumentSchemaRefs, outputZip=None, filingFiles=None, *args, **kwargs): targetUrl = modelXbrl.modelManager.cntlr.webCache.normalizeUrl( targetDocumentFilename, modelXbrl.modelDocument.filepath) def addLocallyReferencedFile(elt, filingFiles): if elt.tag in ("a", "img"): for attrTag, attrValue in elt.items(): if attrTag in ("href", "src") and not isHttpUrl( attrValue) and not os.path.isabs(attrValue): attrValue = attrValue.partition('#')[0] # remove anchor if attrValue: # ignore anchor references to base document attrValue = os.path.normpath( attrValue ) # change url path separators to host separators file = os.path.join(sourceDir, attrValue) if modelXbrl.fileSource.isInArchive( file, checkExistence=True) or os.path.exists(file): filingFiles.add(file) targetUrlParts = targetUrl.rpartition(".") targetUrl = targetUrlParts[0] + "_extracted." + targetUrlParts[2] modelXbrl.modelManager.showStatus( _("Extracting instance ") + os.path.basename(targetUrl)) rootElt = modelXbrl.modelDocument.xmlRootElement # take baseXmlLang from <html> or <base> baseXmlLang = rootElt.get( "{http://www.w3.org/XML/1998/namespace}lang") or rootElt.get("lang") for ixElt in modelXbrl.modelDocument.xmlRootElement.iterdescendants( tag="{http://www.w3.org/1999/xhtml}body"): baseXmlLang = ixElt.get("{http://www.w3.org/XML/1998/namespace}lang" ) or rootElt.get("lang") or baseXmlLang targetInstance = ModelXbrl.create( modelXbrl.modelManager, newDocumentType=Type.INSTANCE, url=targetUrl, schemaRefs=targetDocumentSchemaRefs, isEntry=True, discover=False) # don't attempt to load DTS if baseXmlLang: targetInstance.modelDocument.xmlRootElement.set( "{http://www.w3.org/XML/1998/namespace}lang", baseXmlLang) ValidateXbrlDimensions.loadDimensionDefaults( targetInstance) # need dimension defaults # roleRef and arcroleRef (of each inline document) for sourceRefs in (modelXbrl.targetRoleRefs, modelXbrl.targetArcroleRefs): for roleRefElt in sourceRefs.values(): addChild(targetInstance.modelDocument.xmlRootElement, roleRefElt.qname, attributes=roleRefElt.items()) # contexts for context in sorted(modelXbrl.contexts.values(), key=lambda c: elementChildSequence(c)): ignore = targetInstance.createContext( context.entityIdentifier[0], context.entityIdentifier[1], 'instant' if context.isInstantPeriod else 'duration' if context.isStartEndPeriod else 'forever', context.startDatetime, context.endDatetime, None, context.qnameDims, [], [], id=context.id) for unit in modelXbrl.units.values(): measures = unit.measures ignore = targetInstance.createUnit(measures[0], measures[1], id=unit.id) modelXbrl.modelManager.showStatus(_("Creating and validating facts")) newFactForOldObjId = {} def createFacts(facts, parent): for fact in facts: if fact.isItem: # HF does not de-duplicate, which is currently-desired behavior attrs = {"contextRef": fact.contextID} if fact.id: attrs["id"] = fact.id if fact.isNumeric: attrs["unitRef"] = fact.unitID if fact.get("decimals"): attrs["decimals"] = fact.get("decimals") if fact.get("precision"): attrs["precision"] = fact.get("precision") if fact.isNil: attrs[XbrlConst.qnXsiNil] = "true" text = None else: text = fact.xValue if fact.xValid else fact.textValue if fact.concept is not None and fact.concept.baseXsdType in ( "string", "normalizedString"): # default xmlLang = fact.xmlLang if xmlLang is not None and xmlLang != baseXmlLang: attrs[ "{http://www.w3.org/XML/1998/namespace}lang"] = xmlLang newFact = targetInstance.createFact(fact.qname, attributes=attrs, text=text, parent=parent) # if fact.isFraction, create numerator and denominator newFactForOldObjId[fact.objectIndex] = newFact if filingFiles is not None and fact.concept is not None and fact.concept.isTextBlock: # check for img and other filing references so that referenced files are included in the zip. for xmltext in [text] + CDATApattern.findall(text): try: for elt in XML("<body>\n{0}\n</body>\n".format( xmltext)).iter(): addLocallyReferencedFile(elt, filingFiles) except (XMLSyntaxError, UnicodeDecodeError): pass # TODO: Why ignore UnicodeDecodeError? elif fact.isTuple: newTuple = targetInstance.createFact(fact.qname, parent=parent) newFactForOldObjId[fact.objectIndex] = newTuple createFacts(fact.modelTupleFacts, newTuple) createFacts(modelXbrl.facts, None) modelXbrl.modelManager.showStatus( _("Creating and validating footnotes and relationships")) HREF = "{http://www.w3.org/1999/xlink}href" footnoteLinks = defaultdict(list) footnoteIdCount = {} for linkKey, linkPrototypes in modelXbrl.baseSets.items(): arcrole, linkrole, linkqname, arcqname = linkKey if (linkrole and linkqname and arcqname and # fully specified roles arcrole != "XBRL-footnotes" and any( lP.modelDocument.type == Type.INLINEXBRL for lP in linkPrototypes)): for linkPrototype in linkPrototypes: if linkPrototype not in footnoteLinks[linkrole]: footnoteLinks[linkrole].append(linkPrototype) for linkrole in sorted(footnoteLinks.keys()): for linkPrototype in footnoteLinks[linkrole]: newLink = addChild(targetInstance.modelDocument.xmlRootElement, linkPrototype.qname, attributes=linkPrototype.attributes) for linkChild in linkPrototype: attributes = linkChild.attributes if isinstance(linkChild, LocPrototype): if HREF not in linkChild.attributes: linkChild.attributes[HREF] = \ "#" + elementFragmentIdentifier(newFactForOldObjId[linkChild.dereference().objectIndex]) addChild(newLink, linkChild.qname, attributes=attributes) elif isinstance(linkChild, ArcPrototype): addChild(newLink, linkChild.qname, attributes=attributes) elif isinstance(linkChild, ModelInlineFootnote): idUseCount = footnoteIdCount.get(linkChild.footnoteID, 0) + 1 if idUseCount > 1: # if footnote with id in other links bump the id number attributes = linkChild.attributes.copy() attributes["id"] = "{}_{}".format( attributes["id"], idUseCount) footnoteIdCount[linkChild.footnoteID] = idUseCount newChild = addChild(newLink, linkChild.qname, attributes=attributes) xmlLang = linkChild.xmlLang if xmlLang is not None and xmlLang != baseXmlLang: # default newChild.set( "{http://www.w3.org/XML/1998/namespace}lang", xmlLang) copyIxFootnoteHtml( linkChild, newChild, targetModelDocument=targetInstance.modelDocument, withText=True) if filingFiles and linkChild.textValue: footnoteHtml = XML("<body/>") copyIxFootnoteHtml(linkChild, footnoteHtml) for elt in footnoteHtml.iter(): addLocallyReferencedFile(elt, filingFiles) targetInstance.saveInstance(overrideFilepath=targetUrl, outputZip=outputZip) if getattr(modelXbrl, "isTestcaseVariation", False): modelXbrl.extractedInlineInstance = True # for validation comparison modelXbrl.modelManager.showStatus(_("Saved extracted instance"), 5000)
def validateTextBlockFacts(modelXbrl): #handler = TextBlockHandler(modelXbrl) loadDTD(modelXbrl) checkedGraphicsFiles = set( ) # only check any graphics file reference once per fact for f1 in modelXbrl.facts: # build keys table for 6.5.14 concept = f1.concept if f1.xsiNil != "true" and \ concept is not None and \ concept.isTextBlock and \ f1.namespaceURI not in ixbrlAll and \ XMLpattern.match(f1.value): #handler.fact = f1 # test encoded entity tags for match in namedEntityPattern.finditer(f1.value): entity = match.group() if not entity in xhtmlEntities: modelXbrl.error( ("EFM.6.05.16", "GFM.1.2.15"), _("Fact %(fact)s contextID %(contextID)s has disallowed entity %(entity)s" ), modelObject=f1, fact=f1.qname, contextID=f1.contextID, entity=entity, error=entity) # test html for xmltext in [f1.value] + CDATApattern.findall(f1.value): ''' try: xml.sax.parseString( "<?xml version='1.0' encoding='utf-8' ?>\n<body>\n{0}\n</body>\n".format( removeEntities(xmltext)).encode('utf-8'),handler,handler) except (xml.sax.SAXParseException, xml.sax.SAXException, UnicodeDecodeError) as err: # ignore errors which are not errors (e.g., entity codes checked previously if not err.endswith("undefined entity"): handler.modelXbrl.error(("EFM.6.05.15", "GFM.1.02.14"), _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s"), modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=err) ''' xmlBodyWithoutEntities = "<body>\n{0}\n</body>\n".format( removeEntities(xmltext)) try: textblockXml = XML(xmlBodyWithoutEntities) if not edbodyDTD.validate(textblockXml): errors = edbodyDTD.error_log.filter_from_errors() htmlError = any( e.type_name in ("DTD_INVALID_CHILD", "DTD_UNKNOWN_ATTRIBUTE") for e in errors) modelXbrl.error( "EFM.6.05.16" if htmlError else ("EFM.6.05.15.dtdError", "GFM.1.02.14"), _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s" ), modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=', '.join(e.message for e in errors), messageCodes=("EFM.6.05.16", "EFM.6.05.15.dtdError", "GFM.1.02.14")) for elt in textblockXml.iter(): eltTag = elt.tag for attrTag, attrValue in elt.items(): if ((attrTag == "href" and eltTag == "a") or (attrTag == "src" and eltTag == "img")): if "javascript:" in attrValue: modelXbrl.error( "EFM.6.05.16.activeContent", _("Fact %(fact)s of context %(contextID)s has javascript in '%(attribute)s' for <%(element)s>" ), modelObject=f1, fact=f1.qname, contextID=f1.contextID, attribute=attrTag, element=eltTag) elif attrValue.startswith( "http://www.sec.gov/Archives/edgar/data/" ) and eltTag == "a": pass elif "http:" in attrValue or "https:" in attrValue or "ftp:" in attrValue: modelXbrl.error( "EFM.6.05.16.externalReference", _("Fact %(fact)s of context %(contextID)s has an invalid external reference in '%(attribute)s' for <%(element)s>" ), modelObject=f1, fact=f1.qname, contextID=f1.contextID, attribute=attrTag, element=eltTag) if attrTag == "src" and attrValue not in checkedGraphicsFiles: if attrValue.lower()[-4:] not in ('.jpg', '.gif'): modelXbrl.error( "EFM.6.05.16.graphicFileType", _("Fact %(fact)s of context %(contextID)s references a graphics file which isn't .gif or .jpg '%(attribute)s' for <%(element)s>" ), modelObject=f1, fact=f1.qname, contextID=f1.contextID, attribute=attrValue, element=eltTag) else: # test file contents try: if validateGraphicFile( f1, attrValue ) != attrValue.lower()[-3:]: modelXbrl.error( "EFM.6.05.16.graphicFileContent", _("Fact %(fact)s of context %(contextID)s references a graphics file which doesn't have expected content '%(attribute)s' for <%(element)s>" ), modelObject=f1, fact=f1.qname, contextID=f1.contextID, attribute=attrValue, element=eltTag) except IOError as err: modelXbrl.error( "EFM.6.05.16.graphicFileError", _("Fact %(fact)s of context %(contextID)s references a graphics file which isn't openable '%(attribute)s' for <%(element)s>, error: %(error)s" ), modelObject=f1, fact=f1.qname, contextID=f1.contextID, attribute=attrValue, element=eltTag, error=err) checkedGraphicsFiles.add(attrValue) if eltTag == "table" and any( a is not None for a in elt.iterancestors("table")): modelXbrl.error( "EFM.6.05.16.nestedTable", _("Fact %(fact)s of context %(contextID)s has nested <table> elements." ), modelObject=f1, fact=f1.qname, contextID=f1.contextID) except (XMLSyntaxError, UnicodeDecodeError) as err: #if not err.endswith("undefined entity"): modelXbrl.error( ("EFM.6.05.15", "GFM.1.02.14"), _("Fact %(fact)s contextID %(contextID)s has text which causes the XML error %(error)s" ), modelObject=f1, fact=f1.qname, contextID=f1.contextID, error=err) checkedGraphicsFiles.clear()
def saveTargetDocument(modelXbrl, targetDocumentFilename, targetDocumentSchemaRefs, outputZip=None, filingFiles=None, *args, **kwargs): targetUrl = modelXbrl.modelManager.cntlr.webCache.normalizeUrl(targetDocumentFilename, modelXbrl.modelDocument.filepath) def addLocallyReferencedFile(elt,filingFiles): if elt.tag in ("a", "img"): for attrTag, attrValue in elt.items(): if attrTag in ("href", "src") and not isHttpUrl(attrValue) and not os.path.isabs(attrValue): attrValue = attrValue.partition('#')[0] # remove anchor if attrValue: # ignore anchor references to base document attrValue = os.path.normpath(attrValue) # change url path separators to host separators file = os.path.join(sourceDir,attrValue) if modelXbrl.fileSource.isInArchive(file, checkExistence=True) or os.path.exists(file): filingFiles.add(file) targetUrlParts = targetUrl.rpartition(".") targetUrl = targetUrlParts[0] + "_extracted." + targetUrlParts[2] modelXbrl.modelManager.showStatus(_("Extracting instance ") + os.path.basename(targetUrl)) rootElt = modelXbrl.modelDocument.xmlRootElement # take baseXmlLang from <html> or <base> baseXmlLang = rootElt.get("{http://www.w3.org/XML/1998/namespace}lang") or rootElt.get("lang") for ixElt in modelXbrl.modelDocument.xmlRootElement.iterdescendants(tag="{http://www.w3.org/1999/xhtml}body"): baseXmlLang = ixElt.get("{http://www.w3.org/XML/1998/namespace}lang") or rootElt.get("lang") or baseXmlLang targetInstance = ModelXbrl.create(modelXbrl.modelManager, newDocumentType=Type.INSTANCE, url=targetUrl, schemaRefs=targetDocumentSchemaRefs, isEntry=True, discover=False) # don't attempt to load DTS if baseXmlLang: targetInstance.modelDocument.xmlRootElement.set("{http://www.w3.org/XML/1998/namespace}lang", baseXmlLang) ValidateXbrlDimensions.loadDimensionDefaults(targetInstance) # need dimension defaults # roleRef and arcroleRef (of each inline document) for sourceRefs in (modelXbrl.targetRoleRefs, modelXbrl.targetArcroleRefs): for roleRefElt in sourceRefs.values(): addChild(targetInstance.modelDocument.xmlRootElement, roleRefElt.qname, attributes=roleRefElt.items()) # contexts for context in sorted(modelXbrl.contexts.values(), key=lambda c: elementChildSequence(c)): ignore = targetInstance.createContext(context.entityIdentifier[0], context.entityIdentifier[1], 'instant' if context.isInstantPeriod else 'duration' if context.isStartEndPeriod else 'forever', context.startDatetime, context.endDatetime, None, context.qnameDims, [], [], id=context.id) for unit in modelXbrl.units.values(): measures = unit.measures ignore = targetInstance.createUnit(measures[0], measures[1], id=unit.id) modelXbrl.modelManager.showStatus(_("Creating and validating facts")) newFactForOldObjId = {} def createFacts(facts, parent): for fact in facts: if fact.isItem: # HF does not de-duplicate, which is currently-desired behavior attrs = {"contextRef": fact.contextID} if fact.id: attrs["id"] = fact.id if fact.isNumeric: attrs["unitRef"] = fact.unitID if fact.get("decimals"): attrs["decimals"] = fact.get("decimals") if fact.get("precision"): attrs["precision"] = fact.get("precision") if fact.isNil: attrs[XbrlConst.qnXsiNil] = "true" text = None else: text = fact.xValue if fact.xValid else fact.textValue if fact.concept is not None and fact.concept.baseXsdType in ("string", "normalizedString"): # default xmlLang = fact.xmlLang if xmlLang is not None and xmlLang != baseXmlLang: attrs["{http://www.w3.org/XML/1998/namespace}lang"] = xmlLang newFact = targetInstance.createFact(fact.qname, attributes=attrs, text=text, parent=parent) # if fact.isFraction, create numerator and denominator newFactForOldObjId[fact.objectIndex] = newFact if filingFiles is not None and fact.concept is not None and fact.concept.isTextBlock: # check for img and other filing references so that referenced files are included in the zip. for xmltext in [text] + CDATApattern.findall(text): try: for elt in XML("<body>\n{0}\n</body>\n".format(xmltext)).iter(): addLocallyReferencedFile(elt, filingFiles) except (XMLSyntaxError, UnicodeDecodeError): pass # TODO: Why ignore UnicodeDecodeError? elif fact.isTuple: newTuple = targetInstance.createFact(fact.qname, parent=parent) newFactForOldObjId[fact.objectIndex] = newTuple createFacts(fact.modelTupleFacts, newTuple) createFacts(modelXbrl.facts, None) modelXbrl.modelManager.showStatus(_("Creating and validating footnotes and relationships")) HREF = "{http://www.w3.org/1999/xlink}href" footnoteLinks = defaultdict(list) footnoteIdCount = {} for linkKey, linkPrototypes in modelXbrl.baseSets.items(): arcrole, linkrole, linkqname, arcqname = linkKey if (linkrole and linkqname and arcqname and # fully specified roles arcrole != "XBRL-footnotes" and any(lP.modelDocument.type == Type.INLINEXBRL for lP in linkPrototypes)): for linkPrototype in linkPrototypes: if linkPrototype not in footnoteLinks[linkrole]: footnoteLinks[linkrole].append(linkPrototype) for linkrole in sorted(footnoteLinks.keys()): for linkPrototype in footnoteLinks[linkrole]: newLink = addChild(targetInstance.modelDocument.xmlRootElement, linkPrototype.qname, attributes=linkPrototype.attributes) for linkChild in linkPrototype: attributes = linkChild.attributes if isinstance(linkChild, LocPrototype): if HREF not in linkChild.attributes: linkChild.attributes[HREF] = \ "#" + elementFragmentIdentifier(newFactForOldObjId[linkChild.dereference().objectIndex]) addChild(newLink, linkChild.qname, attributes=attributes) elif isinstance(linkChild, ArcPrototype): addChild(newLink, linkChild.qname, attributes=attributes) elif isinstance(linkChild, ModelInlineFootnote): idUseCount = footnoteIdCount.get(linkChild.footnoteID, 0) + 1 if idUseCount > 1: # if footnote with id in other links bump the id number attributes = linkChild.attributes.copy() attributes["id"] = "{}_{}".format(attributes["id"], idUseCount) footnoteIdCount[linkChild.footnoteID] = idUseCount newChild = addChild(newLink, linkChild.qname, attributes=attributes) xmlLang = linkChild.xmlLang if xmlLang is not None and xmlLang != baseXmlLang: # default newChild.set("{http://www.w3.org/XML/1998/namespace}lang", xmlLang) copyIxFootnoteHtml(linkChild, newChild, targetModelDocument=targetInstance.modelDocument, withText=True) if filingFiles and linkChild.textValue: footnoteHtml = XML("<body/>") copyIxFootnoteHtml(linkChild, footnoteHtml) for elt in footnoteHtml.iter(): addLocallyReferencedFile(elt,filingFiles) targetInstance.saveInstance(overrideFilepath=targetUrl, outputZip=outputZip) if getattr(modelXbrl, "isTestcaseVariation", False): modelXbrl.extractedInlineInstance = True # for validation comparison modelXbrl.modelManager.showStatus(_("Saved extracted instance"), 5000)