def singleStyleApplication(datafile, styles, styleTransform, styleParams, dstFile, rule, dirObj, dataTree=None): """ Apply a transformation to a file with a dictionary """ # Check for dependencies! sources = set(styles + [datafile]) sources.update(dirObj.option_files) dependency.update(dstFile, sources) # If the destination file is up to date, skip the execution if dependency.isUpToDate(dstFile): print i18n.get("file_uptodate").format(os.path.basename(dstFile)) return dataTree # Proceed with the execution of xslt print i18n.get("producing").format(os.path.basename(dstFile)) # Parse the data file if needed if dataTree == None: adagio.logInfo(rule, dirObj, "Parsing " + datafile) dataTree = treecache.findOrAddTree(datafile, True) # Apply the transformation xsltprocEquivalent(rule, dirObj, styleParams, datafile, dstFile) try: result = styleTransform(dataTree, **styleParams) except etree.XSLTApplyError, e: print print i18n.get("error_applying_xslt").format("\n".join(styles), datafile, rule) print "Error:", str(e) sys.exit(1)
def obtainXincludes(files): """ Obtain the files included using xinclude in the given file. Return a list with the absolute filenames """ # Remember the old current directory because we are going to modify it old_cwd = os.getcwd() result = set([]) for fileName in files: # We only accept absolute paths if not os.path.isabs(fileName): fileName = os.path.abspath(fileName) # Get the directory where the file is located and change cwd fDir = os.path.dirname(fileName) os.chdir(fDir) # Get the file parsed without expanding the xincludes root = treecache.findOrAddTree(fileName, False) # Path to locate the includes and dir of the given file includePath = '//{http://www.w3.org/2001/XInclude}include' # Obtain the included files includeFiles = \ set([os.path.join( x.attrib.get('{http://www.w3.org/XML/1998/namespace}base', ''), x.attrib.get('href')) for x in root.findall(includePath) if x.attrib.get('href') != None and \ (x.attrib.get('parse') == None or x.attrib.get('parse') == 'xml')]) # Traverse all the include files for includeFile in includeFiles: # Locate the file applying Adagio search rules # locatedFile = dependency.locateFile(includeFile, [fDir]) locatedFile = treecache.xml_resolver.resolve_file(includeFile) # If not found, notify and terminate if locatedFile == None: print i18n.get('file_not_found').format(includeFile) print i18n.get('included_from'), fileName sys.exit(1) if os.path.dirname(os.path.abspath(locatedFile)) == fDir: # If it is in the same dir, prepare to traverse files.append(locatedFile) else: # If in another dir, append to the result result.add(os.path.abspath(locatedFile)) # restore the original cwd os.chdir(old_cwd) return result
def getIncludes(fName): """ Get the xsl:import, xsl:include and xi:include in an XML file returns the set of absolute files that are included/imported """ # Turn the name into absolute path and get the directory part fName = os.path.abspath(fName) fDir = os.path.dirname(fName) # Parse the document and initialize the result to the empty set root = treecache.findOrAddTree(fName, False) result = set([]) allIncludes = \ set(root.findall('//{http://www.w3.org/1999/XSL/Transform}import')) | \ set(root.findall('//{http://www.w3.org/1999/XSL/Transform}include')) | \ set(root.findall('//{http://www.w3.org/2001/XInclude}include')) # This is the equivalent xpath expression, but if used, the package is only # compatible if lxml is installed. # root.xpath('/descendant::*[self::xi:include or self::xsl:import or \ # self::xsl:include]', # namespaces={'xi' : 'http://www.w3.org/2001/XInclude', # 'xsl' : 'http://www.w3.org/1999/XSL/Transform'}) # Loop over all the includes, and imports of XML and XSL for element in [e for e in allIncludes if 'href' in e.attrib]: hrefValue = \ treecache.xml_resolver.resolve_file(element.attrib.get('href')) if hrefValue != None: result.add(hrefValue) allRSS = set(root.findall('//{http://www.w3.org/1999/xhtml}rss')) # root.xpath('/descendant::html:rss', # namespaces={'html' : \ # 'http://www.w3.org/1999/xhtml'}) # Loop over all the rss elements in the HTML namespace for element in [e for e in allRSS if 'file' in e.attrib]: result.add(os.path.abspath(os.path.join(fDir, element.attrib['file']))) # Return the result set return result
def doGetShuffledFiles(fname): """ Function that given an XML file, checks the presence of productnumber elements in the section info and returns the names of the files which will contain the permutations. """ sourceTree = treecache.findOrAddTree(fname, True) root = sourceTree.getroot() # Get the number of 'productnumber' elements. If none, set it to 1 sectionInfo = root.find('sectioninfo') n = 1 if sectionInfo != None: pnumbers = sectionInfo.findall('productnumber') if pnumbers != None: n = len(pnumbers) # Create the raw files that will be produced (h, t) = os.path.splitext(fname) return map(lambda x: h + '_' + str(x) + t, range(1, n + 1))
def main(sourceFile, pout = None): """ Function that given a Docbook file containing a quandaset with a set of quandadivs creates as many permutations as specified in a specific element within the document. Returns the number of permutations created (zero means error) """ if pout == None: pout = sys.stdout # For notifying steps through stdout stepCount = 1 print >> pout, 'Step', stepCount, 'Check file permissions' stepCount += 1 # If the given file is not present, return. if not os.path.isfile(sourceFile): print >> pout, 'File', sourceFile, 'cannot be accessed.' return 0 print >> pout, 'Step', stepCount, 'Create the XML document manager' stepCount += 1 # Parse the source tree. sourceTree = treecache.findOrAddTree(sourceFile, True) root = sourceTree.getroot() # Get all product numbers from the source document (they are the seeds) seedList = [] sectionInfo = root.find("sectioninfo") if sectionInfo != None: pnumbers = sectionInfo.findall("productnumber") if pnumbers != None: for pnumber in pnumbers: seedList.append(copy.deepcopy(pnumber)) print >> pout, 'Step', stepCount, 'Read', len(pnumbers), \ 'seeds in document.' stepCount += 1 print >> pout, 'Step', stepCount, 'Fetch the qandadiv elements to shuffle' stepCount += 1 # If no product number is given, create one for shuffling if seedList == []: pnumber = etree.Element('productnumber') pnumber.text = str(int(time.time())) seedList.append(pnumber) qandaset = root.find('qandaset') if qandaset == None: print >> pout, 'No element qandaset found under root' return 0 qandadivs = qandaset.findall('qandadiv') if qandadivs == []: print >> pout, 'No qandadiv elements found. Nothing to shuffle.' return 0 print >> pout, 'Step', stepCount, 'Creating hash for', len(qandadivs), \ 'qandadivs' # Create a dictionary with all the qandaentries hashed by the index # (starting at 1 originalOrder = [] for qandadiv in qandadivs: # Get all the entries qandaentries = qandadiv.findall('qandaentry') # If there is more than one, means several questions inside the same div if len(qandaentries) > 1: originalOrder.extend(qandaentries) # Only one question in the qandadiv else: originalOrder.append(qandadiv) # Dump all the IDs being processed print >> pout, 'IDs: ', for el in originalOrder: if el.tag == 'qandadiv': print >> pout, el.get('id'), else: p = el.getparent() idStr = p.get('id') if idStr == None: print 'Anomaly while shuffling. Quandadiv with no id attribute' print etree.tostring(p) sys.exit(1) print >> pout, idStr + '_' + str(p.findall('qandaentry').index(el)), print >> pout print >> pout, 'Step', stepCount, 'Create the permutation vectors' # Loop over the elements in the seedList permutations = [] index = 1 for seedElement in seedList: permutation = [] result = copy.deepcopy(root) seed = long(seedElement.text) random.seed(seed) # Set the status and replace section info result.set("id", "AdaShuffle") result.set('status', str(seed)) sectionInfo = etree.Element('sectioninfo') sectionInfo.extend(seedElement) result.insert(0, sectionInfo) # Get the qandadivs qandaset = result.find('qandaset') qandadivs = qandaset.findall('qandadiv') size = len(list(qandadivs)) # Create a list with all the qandaentries in the given order originalOrder = [] for qandadiv in qandadivs: # Get all the entries originalOrder.extend(qandadiv.findall('qandaentry')) # Remove the qandadivs (this is to replace them by a shuffled version) # There is something wrong with this function! It bombs out in some cases # ABEL: FIX map(lambda x: qandaset.remove(x), qandadivs) # Get a list representing a permutation of the indices of the list random.shuffle(qandadivs) # Reattach the qandadivs qandaset.extend(qandadivs) # Traverse the qandadivs and shuffle those with more than one # qandaentry. We need to preserve their positions within the quandadiv. for qandadiv in qandadivs: if len(qandadiv.findall('qandaentry')) == 1: continue # Shuffle the qandaentries inside the qandadiv qandaentries = qandadiv.findall('qandaentry') # Get the indeces of the qandaentries in the qandadiv children = qandadiv.getchildren() indeces = [children.index(x) for x in qandaentries] # Remove the qandaentries from the qandadiv map(lambda x: qandadiv.remove(x), qandaentries) for idx in range(0, len(indeces)): entry = qandaentries.pop(random.randint(0, len(qandaentries) - 1)) qandadiv.insert(indeces[idx], entry) result = etree.ElementTree(result) (head, tail) = os.path.splitext(sourceFile) result.write(head + '_' + str(index) + tail, encoding = 'UTF-8', xml_declaration = True, pretty_print = True) # Get again the qandadivs to dump the permutation array qandadivs = qandaset.findall('qandadiv') print >> pout, 'V00:', for qandadiv in qandadivs: for qandaentry in qandadiv.findall('qandaentry'): print >> pout, originalOrder.index(qandaentry), print >> pout index += 1 return len(seedList)