def checklink (xrefelement) : #since the href attribute can exist in any element, let's simply only for this attribute and not for any specific element #now there can be three link destinations #1. external (scope = external) #2. internal within this xml #3. internal in another xml but within this folder if 'scope' in xrefelement.attrib : #1. external (scope = external) pass #for now we are not checking for external links else : parts = xrefelement.attrib['href'].split('#') topics = None if parts[0] == '' : #within this file topics = root.findall(".//topic[@id='" + parts[1] + "']") else : #within this folder otherroot = getxmlroot(foldername + parts[0]) #get the xml root topics = otherroot.findall(".//topic[@id='" + parts[1] + "']") if len(topics) == 1: #if links are fine, the check for sticky text title = topics[0].find('title') xreftext = None if xrefelement.tag == 'xref' : xreftext = xrefelement.text elif xrefelement.tag == 'link' : linktextelement = xrefelement.find('linktext') xreftext = linktextelement.text if xreftext != title.text : if xrefelement.tag == 'xref' : xrefelement.text = title.text xrefelement.attrib['outputclass'] = 'fm:' global changemade changemade = 1 elif xrefelement.tag == 'link' : linktextelement = xrefelement.find('linktext') xrefelement.attrib['outputclass'] = 'fm:' linktextelement.text = title.text global changemade changemade = 1
def checklink (xrefelement) : global NoOfErrors global NoOfLinkErrors global NoOfTextErrors #since the href attribute can exist in any element, let's simply only for this attribute and not for any specific element #now there can be three link destinations #1. external (scope = external) #2. internal within this xml #3. internal in another xml but within this folder if 'scope' in xrefelement.attrib : #1. external (scope = external) pass #for now we are not checking for external links else : parts = xrefelement.attrib['href'].split('#') topics = None if parts[0] == '' : #within this file topics = root.findall(".//topic[@id='" + parts[1] + "']") else : #within this folder #first we need to check if the destination file exists file_path = foldername + parts[0] if os.path.exists(file_path) : otherroot = getxmlroot(file_path) #get the xml root topics = otherroot.findall(".//topic[@id='" + parts[1] + "']") else : logerrormessage('filenotfound',file_path,getxreftext(xrefelement)) if topics != None : if len(topics) == 0 : logerrormessage('linkbreak',getxreftext(xrefelement),"Pointing to:\n" + parts[1]) elif len(topics) > 1 : logerrormessage('mulitpleids',parts[1]) elif len(topics) == 1: #if links are fine, the check for sticky text title = topics[0].find('title') xreftext = getxreftext(xrefelement) titletext = cleanstring (title.text) if xreftext != titletext : logerrormessage('stickytext',"Link text\n" + xreftext, "Title text:\n" + titletext)
def iteratetree (currentelement) : classElements = currentelement.findall("*") for classElement in classElements : checkhref(classElement) iteratetree(classElement) foldername = 'C:/aaWork/p4v_ws/theoden/depot/EN/Docs/TechComm Suite/2015/fm/scripting/' xmlfiles = glob.glob(foldername + '*.xml') root = None logger = FileWriter('dita-link-checker.log') for xmlfile in xmlfiles : NoOfErrors = 0 NoOfLinkErrors = 0 NoOfTextErrors = 0 print (xmlfile) logger.WriteLine ('Start: ' + xmlfile) root = getxmlroot(xmlfile) #get the xml dom classElements = root.findall("*") for classElement in classElements : checkhref(classElement) iteratetree(classElement) #logger.WriteLine('Number of Errors: ' + str(NoOfErrors)) logger.WriteLine('Number of Link Errors: ' + str(NoOfLinkErrors)) logger.WriteLine('Number of Text Errors: ' + str(NoOfTextErrors)) logger.WriteLine ('DONE: ' + xmlfile) logger.LineSeparator() logger.Close() print ("DONE. You can find the results in:\n" + logger.LogFileName())
def iteratetree (currentelement) : classElements = currentelement.findall("*") for classElement in classElements : checkhref(classElement) iteratetree(classElement) foldername = 'C:/aaWork/p4v_ws/theoden/depot/EN/Docs/TechComm Suite/2015/rh/user-guide/' xmlfiles = glob.glob(foldername + '*.xml') root = None logger = FileWriter('dita-stickytext-fixer-upper.log') changemade = 0 for xmlfile in xmlfiles : print (xmlfile) logger.WriteLine ('Start: ' + xmlfile) root = getxmlroot(xmlfile) classElements = root.findall("*") for classElement in classElements : checkhref(classElement) iteratetree(classElement) if changemade == 1: writexmlback(xmlfile,tree) logger.WriteLine ('DONE: ' + xmlfile) logger.Close() print ("DONE. You can find the results in:\n" + logger.LogFileName())