Beispiel #1
0
def singleStyleApplication(datafile, styles, styleTransform, styleParams, dstFile, rule, dirObj, dataTree=None):
    """
    Apply a transformation to a file with a dictionary
    """

    # Check for dependencies!
    sources = set(styles + [datafile])
    sources.update(dirObj.option_files)
    dependency.update(dstFile, sources)

    # If the destination file is up to date, skip the execution
    if dependency.isUpToDate(dstFile):
        print i18n.get("file_uptodate").format(os.path.basename(dstFile))
        return dataTree

    # Proceed with the execution of xslt
    print i18n.get("producing").format(os.path.basename(dstFile))

    # Parse the data file if needed
    if dataTree == None:
        adagio.logInfo(rule, dirObj, "Parsing " + datafile)
        dataTree = treecache.findOrAddTree(datafile, True)

    # Apply the transformation
    xsltprocEquivalent(rule, dirObj, styleParams, datafile, dstFile)
    try:
        result = styleTransform(dataTree, **styleParams)
    except etree.XSLTApplyError, e:
        print
        print i18n.get("error_applying_xslt").format("\n".join(styles), datafile, rule)
        print "Error:", str(e)
        sys.exit(1)
Beispiel #2
0
def obtainXincludes(files):
    """
    Obtain the files included using xinclude in the given file. Return a list
    with the absolute filenames
    """

    # Remember the old current directory because we are going to modify it
    old_cwd = os.getcwd()

    result = set([])
    for fileName in files:
        # We only accept absolute paths
        if not os.path.isabs(fileName):
            fileName = os.path.abspath(fileName)

        # Get the directory where the file is located and change cwd
        fDir = os.path.dirname(fileName)
	os.chdir(fDir)

        # Get the file parsed without expanding the xincludes
        root = treecache.findOrAddTree(fileName, False)

        # Path to locate the includes and dir of the given file
        includePath = '//{http://www.w3.org/2001/XInclude}include'

        # Obtain the included files
        includeFiles = \
            set([os.path.join(
                    x.attrib.get('{http://www.w3.org/XML/1998/namespace}base',
                                 ''),
                    x.attrib.get('href'))
                 for x in root.findall(includePath)
                 if x.attrib.get('href') != None and \
                     (x.attrib.get('parse') == None or
                      x.attrib.get('parse') == 'xml')])

        # Traverse all the include files
        for includeFile in includeFiles:
            # Locate the file applying Adagio search rules
            # locatedFile = dependency.locateFile(includeFile, [fDir])
            locatedFile = treecache.xml_resolver.resolve_file(includeFile)

            # If not found, notify and terminate
            if locatedFile == None:
                print i18n.get('file_not_found').format(includeFile)
                print i18n.get('included_from'), fileName
                sys.exit(1)

            if os.path.dirname(os.path.abspath(locatedFile)) == fDir:
                # If it is in the same dir, prepare to traverse
                files.append(locatedFile)
            else:
                # If in another dir, append to the result
                result.add(os.path.abspath(locatedFile))

    # restore the original cwd
    os.chdir(old_cwd)

    return result
Beispiel #3
0
def getIncludes(fName):
    """
    Get the xsl:import, xsl:include and xi:include in an XML file

    returns the set of absolute files that are included/imported
    """

    # Turn the name into absolute path and get the directory part
    fName = os.path.abspath(fName)
    fDir = os.path.dirname(fName)

    # Parse the document and initialize the result to the empty set
    root = treecache.findOrAddTree(fName, False)
    result = set([])

    allIncludes = \
         set(root.findall('//{http://www.w3.org/1999/XSL/Transform}import')) | \
         set(root.findall('//{http://www.w3.org/1999/XSL/Transform}include')) | \
         set(root.findall('//{http://www.w3.org/2001/XInclude}include'))

# This is the equivalent xpath expression, but if used, the package is only
# compatible if lxml is installed.

#     root.xpath('/descendant::*[self::xi:include or self::xsl:import or \
#                                self::xsl:include]',
#                namespaces={'xi' : 'http://www.w3.org/2001/XInclude',
#                            'xsl' : 'http://www.w3.org/1999/XSL/Transform'})

    # Loop over all the includes, and imports of XML and XSL
    for element in [e for e in allIncludes if 'href' in e.attrib]:
        hrefValue = \
            treecache.xml_resolver.resolve_file(element.attrib.get('href'))

        if hrefValue != None:
            result.add(hrefValue)

    allRSS = set(root.findall('//{http://www.w3.org/1999/xhtml}rss'))

#     root.xpath('/descendant::html:rss',
#                namespaces={'html' : \
#                            'http://www.w3.org/1999/xhtml'})

    # Loop over all the rss elements in the HTML namespace
    for element in [e for e in allRSS if 'file' in e.attrib]:
        result.add(os.path.abspath(os.path.join(fDir,
                                                element.attrib['file'])))

    # Return the result set
    return result
Beispiel #4
0
def doGetShuffledFiles(fname):
    """
    Function that given an XML file, checks the presence of productnumber
    elements in the section info and returns the names of the files which will
    contain the permutations.
    """
    
    sourceTree = treecache.findOrAddTree(fname, True)
    root = sourceTree.getroot()

    # Get the number of 'productnumber' elements. If none, set it to 1
    sectionInfo = root.find('sectioninfo')
    n = 1
    if sectionInfo != None:
        pnumbers = sectionInfo.findall('productnumber')
        if pnumbers != None:
            n = len(pnumbers)

    # Create the raw files that will be produced
    (h, t) = os.path.splitext(fname)
    return map(lambda x: h + '_' + str(x) + t, range(1, n + 1))
Beispiel #5
0
def main(sourceFile, pout = None):
    """
    Function that given a Docbook file containing a quandaset with a set of
    quandadivs creates as many permutations as specified in a specific element
    within the document.
    
    Returns the number of permutations created (zero means error)
    """

    if pout == None:
	pout = sys.stdout

    # For notifying steps through stdout
    stepCount = 1

    print >> pout, 'Step', stepCount, 'Check file permissions'
    stepCount += 1

    # If the given file is not present, return.
    if not os.path.isfile(sourceFile):
        print >> pout, 'File', sourceFile, 'cannot be accessed.'
        return 0

    print >> pout, 'Step', stepCount, 'Create the XML document manager'
    stepCount += 1
    
    # Parse the source tree.
    sourceTree = treecache.findOrAddTree(sourceFile, True)
    root = sourceTree.getroot()

    # Get all product numbers from the source document (they are the seeds)
    seedList = []
    sectionInfo = root.find("sectioninfo")
    if sectionInfo != None:
        pnumbers = sectionInfo.findall("productnumber")
        if pnumbers != None:
            for pnumber in pnumbers:
                seedList.append(copy.deepcopy(pnumber))
        print >> pout, 'Step', stepCount, 'Read', len(pnumbers), \
            'seeds in document.'
        stepCount += 1
    
    print >> pout, 'Step', stepCount, 'Fetch the qandadiv elements to shuffle'
    stepCount += 1
    
    # If no product number is given, create one for shuffling
    if seedList == []:
        pnumber = etree.Element('productnumber')
        pnumber.text = str(int(time.time()))
        seedList.append(pnumber)

    qandaset = root.find('qandaset')
    if qandaset == None:
        print >> pout, 'No element qandaset found under root'
        return 0

    qandadivs = qandaset.findall('qandadiv')
    if qandadivs == []:
        print >> pout, 'No qandadiv elements found. Nothing to shuffle.'
        return 0
    
    print >> pout, 'Step', stepCount, 'Creating hash for', len(qandadivs), \
        'qandadivs'

    # Create a dictionary with all the qandaentries hashed by the index
    # (starting at 1
    originalOrder = []
    for qandadiv in qandadivs:
        # Get all the entries
        qandaentries = qandadiv.findall('qandaentry')
        # If there is more than one, means several questions inside the same div
        if len(qandaentries) > 1:
            originalOrder.extend(qandaentries)
        # Only one question in the qandadiv
        else:
            originalOrder.append(qandadiv)
    
    # Dump all the IDs being processed
    print >> pout, 'IDs: ',
    for el in originalOrder:
        if el.tag == 'qandadiv':
            print >> pout, el.get('id'),
        else:
            p = el.getparent()
            idStr = p.get('id')
            if idStr == None:
                print 'Anomaly while shuffling. Quandadiv with no id attribute'
                print etree.tostring(p)
                sys.exit(1)
            print >> pout, idStr + '_' + str(p.findall('qandaentry').index(el)),
    print >> pout

    print >> pout, 'Step', stepCount, 'Create the permutation vectors'

    # Loop over the elements in the seedList
    permutations = []
    index = 1
    for seedElement in seedList:
        permutation = []
        result = copy.deepcopy(root)
        
        seed = long(seedElement.text)
        random.seed(seed)

        # Set the status and replace section info
        result.set("id", "AdaShuffle")
        result.set('status', str(seed))
        sectionInfo = etree.Element('sectioninfo')
        sectionInfo.extend(seedElement)
        result.insert(0, sectionInfo)

        # Get the qandadivs
        qandaset = result.find('qandaset')
        qandadivs = qandaset.findall('qandadiv')
        size = len(list(qandadivs))
        
        # Create a list with all the qandaentries in the given order
        originalOrder = []
        for qandadiv in qandadivs:
            # Get all the entries
            originalOrder.extend(qandadiv.findall('qandaentry'))
    
        # Remove the qandadivs (this is to replace them by a shuffled version)
        # There is something wrong with this function! It bombs out in some cases
        # ABEL: FIX
        map(lambda x: qandaset.remove(x), qandadivs)
        

        # Get a list representing a permutation of the indices of the list
        random.shuffle(qandadivs)

        # Reattach the qandadivs
        qandaset.extend(qandadivs)

        # Traverse the qandadivs and shuffle those with more than one
        # qandaentry. We need to preserve their positions within the quandadiv.
        for qandadiv in qandadivs:
            if len(qandadiv.findall('qandaentry')) == 1:
                continue

            # Shuffle the qandaentries inside the qandadiv
            qandaentries = qandadiv.findall('qandaentry')

            # Get the indeces of the qandaentries in the qandadiv
            children = qandadiv.getchildren()
            indeces = [children.index(x) for x in qandaentries]

            # Remove the qandaentries from the qandadiv
            map(lambda x: qandadiv.remove(x), qandaentries)

            for idx in range(0, len(indeces)):
                entry = qandaentries.pop(random.randint(0, len(qandaentries) - 1))
                qandadiv.insert(indeces[idx], entry)
                
        result = etree.ElementTree(result)
        (head, tail) = os.path.splitext(sourceFile)
        result.write(head + '_' + str(index) + tail,
                     encoding = 'UTF-8', xml_declaration = True, 
                     pretty_print = True)

        # Get again the qandadivs to dump the permutation array
        qandadivs = qandaset.findall('qandadiv')
        print >> pout, 'V00:',
        for qandadiv in qandadivs:
            for qandaentry in qandadiv.findall('qandaentry'):
                print >> pout, originalOrder.index(qandaentry),
        print >> pout

        index += 1

    return len(seedList)