Example #1
0
    def importFile(self, text):
        """Convert from XMLSpy format"""

        # Fixup invalid PI prefix. Naughty Altova, PIs can't start with 'xml'
        text = text.replace('<?xmlspy', '<?authentic')
        try:
            doc = XMLService.parseString(text)
        except XMLService.XMLError:
            raise ValueError, "Could not perform XMLSpy/Authentic import: unable to parse file"

        # Get rid of Authentic goop:
        xslt = doc.children
        if xslt.type != 'pi':
            doc.freeDoc()
            raise ValueError, "Could not perform XMLSpy/Authentic import: missing stylesheet PI"
        xslt.unlinkNode()
        xslt.freeNode()

        pi = doc.children
        if pi.type != 'pi':
            doc.freeDoc()
            raise ValueError, "Could not perform XMLSpy/Authentic import: missing xmlspysps PI"
        pi.unlinkNode()
        pi.freeNode()

        rootNode = doc.children
        attr = rootNode.properties
        while attr:
            if attr.name == 'schemaLocation':
                attr.removeProp()
                break
            attr = attr.next

        schema_ns = rootNode.removeNsDef(
            "http://www.w3.org/2001/XMLSchema-instance")
        schema_ns.freeNode()

        ns = XMLService.listDocNamespaces(doc)
        # Get rid of namespaces we don't care about
        try:
            ns.remove('http://bibtexml.sf.net/')
            ns.remove('http://cnx.rice.edu/mdml/0.4')
        except ValueError:
            pass

        ns.sort()
        try:
            doctype = DTD[tuple(ns)]
        except KeyError:
            raise ValueError, "Cannot determine CNXML version from provided file"

        #return ns, doctype

        body = rootNode.serialize(encoding='utf-8')
        result = '\n'.join(
            ['<?xml version="1.0" encoding="utf-8"?>', doctype, body])

        doc.freeDoc()

        return result
    def importFile(self, text):
        """Convert from XMLSpy format"""

        # Fixup invalid PI prefix. Naughty Altova, PIs can't start with 'xml'
        text = text.replace('<?xmlspy', '<?authentic')
        try:
            doc = XMLService.parseString(text)
        except XMLService.XMLError:
            raise ValueError, "Could not perform XMLSpy/Authentic import: unable to parse file"

        # Get rid of Authentic goop:
        xslt = doc.children
        if xslt.type != 'pi':
            doc.freeDoc()
            raise ValueError, "Could not perform XMLSpy/Authentic import: missing stylesheet PI"
        xslt.unlinkNode()
        xslt.freeNode()

        pi = doc.children
        if pi.type != 'pi':
            doc.freeDoc()
            raise ValueError, "Could not perform XMLSpy/Authentic import: missing xmlspysps PI"
        pi.unlinkNode()
        pi.freeNode()

        rootNode = doc.children
        attr = rootNode.properties
        while attr:
            if attr.name == 'schemaLocation':
                attr.removeProp()
                break
            attr = attr.next
        
        schema_ns = rootNode.removeNsDef("http://www.w3.org/2001/XMLSchema-instance")
        schema_ns.freeNode()

        ns = XMLService.listDocNamespaces(doc)
        # Get rid of namespaces we don't care about
        try:
            ns.remove('http://bibtexml.sf.net/')
            ns.remove('http://cnx.rice.edu/mdml/0.4')
        except ValueError:
            pass

        ns.sort()
        try:
            doctype = DTD[tuple(ns)]
        except KeyError:
            raise ValueError, "Cannot determine CNXML version from provided file"

        #return ns, doctype
    

        body = rootNode.serialize(encoding='utf-8')
        result = '\n'.join(['<?xml version="1.0" encoding="utf-8"?>', doctype , body])

        doc.freeDoc()

        return result
Example #3
0
def rendercnxml(source, prestyles=()):

    stylesheets = list(prestyles)
    stylesheets.append(CNXML_RENDER_XSL)

    params = {}
    params['wrapper'] = 0

    doc = XMLService.parseString(source)
    result = XMLService.xsltPipeline(doc, stylesheets, **params)
    return result
Example #4
0
 def abstract_text(self):
     """The rendered abstract/summary text of content, by itself, stripped of markup.
     """
     context = self.context
     source = getattr(context, 'getRawAbstract', None)
     source = source and source() or context.abstract
     if source:
         source = """<md:abstract xmlns="http://cnx.rice.edu/cnxml"
                                 xmlns:bib="http://bibtexml.sf.net/"
                                 xmlns:m="http://www.w3.org/1998/Math/MathML"
                                 xmlns:md="http://cnx.rice.edu/mdml"
                                 xmlns:q="http://cnx.rice.edu/qml/1.0">%s</md:abstract>""" % source
         doc = XMLService.parseString(source)
         result = XMLService.xsltPipeline(doc, [CNXML_SEARCHABLE_XSL])
         return result
     return ''
Example #5
0
    def exportFile(self, text):
        """Convert to XMLSpy format"""
        text = XMLService.normalize(text)

        m = HEADER_REGEX.search(text)

        try:
            doctype = m.groupdict()['doctype']
        except (AttributeError, KeyError):
            raise ValueError, "Could not perform XMLSpy/Authentic export"

        if 'MathML' in doctype:
            template = MATH_XMLSPY_TEMPLATE
        else:
            template = XMLSPY_TEMPLATE

        try:
            export = m.expand(template)
        except (AttributeError, KeyError):
            raise ValueError, "Could not perform XMLSpy/Authentic export"

        return export
    def exportFile(self, text):
        """Convert to XMLSpy format"""
        text = XMLService.normalize(text)

        m = HEADER_REGEX.search(text)

        try:
            doctype = m.groupdict()['doctype']
        except (AttributeError, KeyError):
            raise ValueError, "Could not perform XMLSpy/Authentic export"
        
        if 'MathML' in doctype:
            template = MATH_XMLSPY_TEMPLATE
        else:
            template = XMLSPY_TEMPLATE

        try:
            export = m.expand(template)
        except (AttributeError, KeyError):
            raise ValueError, "Could not perform XMLSpy/Authentic export"
        
        return export
 def testDoubleTransform(self):
     """xsltPipeline must correctly perform double transform"""
     doc = XMLService.parseDoc(noNS)
     self.assertEquals(XMLService.xsltPipeline(doc, ['step1.xsl', 'step2.xsl']), 'Success')
 def testSingleTransform(self):
     """xsltPipeline must correctly perform single transform"""
     doc = XMLService.parseDoc(noNS)
     self.assertEquals(XMLService.xsltPipeline(doc, ['test.xsl']), 'Success')
 def testEmptyPipeline(self):
     """xsltPipeline must serialize original doc if pipeline is empty"""
     doc = XMLService.parseDoc(noNS)
     self.assertEquals(XMLService.xsltPipeline(doc, []), '<tag>Hello</tag>')
 def testParseDocument(self):
     """return of parseDoc must serializes correctly"""
     doc = XMLService.parseDoc(noNS)
     self.assertEquals(etree.tostring(doc), '<tag>Hello</tag>')
Example #11
0
if __name__ == "__main__":
    strZipFile = sys.argv[1]
    strInputOOoXmlFile = sys.argv[2]
    strOutputMassageOOoXmlFileBase = sys.argv[3]
    strOutputCnxmlFile = sys.argv[4]
    #print 'arg1 is \n' + sys.argv[1]
    #print 'arg2 is \n' + sys.argv[2]
    #print 'arg3 is \n' + sys.argv[3]
    #print 'arg4 is \n' + sys.argv[4]

    fileInputXml = open(strInputOOoXmlFile)
    strInputOOoXml = fileInputXml.read()
    fileInputXml.close()

    strOOoXml = strInputOOoXml
    doc = XMLService.parseString(strOOoXml)

    objZipFile = zipfile.ZipFile(strZipFile,
                                 'r')  # no 'rb' since 'b' => binary?

    #
    # Pass #1 - OOo Xml to OOo Xml xform - change one entry table & remove empty <text:p>
    #
    try:
        styles_xml = objZipFile.read('styles.xml')
        (tmpsfile, tmpsname) = tempfile.mkstemp('.OOo')
        os.write(tmpsfile, styles_xml)
        os.close(tmpsfile)
        stylesPath = tmpsname

        strOutputMassageOOoXml = XMLService.transform(strOOoXml,
    def convert(self, data, outdata, **kwargs):
        """Input is a zip file. Output is idata, with getData being index.cnxml and subObjects being other siblings."""
        fakefile = StringIO(data)
        zipfile = ZipFile(fakefile, 'r')

        prefix = ''
        namelist = zipfile.namelist()
        lenlist = len(namelist)
        if lenlist > 1:
            prefix = os.path.commonprefix(namelist)
            lastslash = prefix.rfind("/")
            if lastslash != -1: prefix = prefix[:lastslash+1]
            else: prefix = ''
        elif lenlist == 1:
            name = namelist[0]
            lastslash = name.rfind("/")
            if lastslash != -1: prefix = name[:lastslash+1]
        namelist = [name[len(prefix):] for name in namelist] # Strip prefix from namelist entries

        zLOG.LOG("Sword Transform", zLOG.INFO, "files in zip=%s" % namelist)
        meta = outdata.getMetadata()
        meta['properties'] = {}
        objects = {}

        containsIndexCnxml = ('index.cnxml' in namelist)
        wordfiles = len([True for m in namelist for e in \
                                ('.odt', '.sxw', '.docx', \
                                '.rtf', '.doc') if m.endswith(e)])
        latexfiles = len([True for m in namelist if m.endswith('.tex')])

        if sum([int(containsIndexCnxml), wordfiles, latexfiles]) > 1:
            # The upload contains more than one transformable file, ie
            # it has a index.cnxml and latex/word content, or it has both latex
            # and word content, or more than one latex or word file.
            raise CNXImportError(
                "Import has more than one transformable file. It has "
                "%d index.cnxml files, %d word files and "
                "%d LaTeX files" % (containsIndexCnxml, wordfiles, latexfiles))

        for modname in namelist:
            if not modname:               # some zip programs show directories by themselves
              continue
            isubdir = modname.find('/')
            if isubdir != -1:             # subdirs, incl. especially 'stylesheets', not imported
              continue
            unzipfile = zipfile.read(prefix + modname)
            if modname == "mets.xml":
                # Write metadata
                zLOG.LOG("Sword Transform", zLOG.INFO, "starting...")
                simplified = XMLService.transform(unzipfile, SWORD2RME_XSL)
                jsonstr = XMLService.transform(simplified, XML2JSON_XSL)
                m = json.decode(jsonstr)
                meta['properties'] = m
            elif modname == "index.cnxml":
                # hook here for featured links
                # elaborate the metadata returned in order to add the featured links.
                meta['featured_links'] = []
                if unzipfile:
                    outdata.setData(StringIO(unzipfile))
                    dom = parseString(unzipfile)
                    groups = dom.getElementsByTagName('link-group')
                    links = meta.get('featured_links', [])
                    for group in groups:
                        group_type = group.getAttribute('type').encode(self.encoding)
                        for link in group.getElementsByTagName('link'):
                            title = link.firstChild.toxml().encode(
                                self.encoding)
                            url = link.getAttribute('url').encode(
                                self.encoding)
                            strength = link.getAttribute('strength').encode(
                                self.encoding)
                            links.append({'url':url,
                                          'title':title,
                                          'type':group_type,
                                          'strength':strength
                                         }
                            )
                        meta['featured_links'] = links
            else:
                if not containsIndexCnxml:
                    if [True for e in ('.odt', '.sxw', '.docx', \
                        '.rtf', '.doc') if modname.endswith(e)]:
                        # This is a word file
                        oo_to_cnxml().convert(unzipfile, outdata, **kwargs)
                    elif modname.endswith('.tex'):
                        # This is LaTeX
                        latex_to_folder().convert(unzipfile, outdata,
                                        original_file_name='sword-import-file.tex',
                                        user_name=kwargs['user_name'])
                        # LaTeX transform returns straight text, make it
                        # a file object
                        outdata.setData(StringIO(outdata.getData()))
                    else:
                        objects[modname] = unzipfile
                else:
                    objects[modname] = unzipfile

        zipfile.close()
        fakefile.close()

        meta = outdata.getMetadata()

        # Add attribution note to the cnxml
        props = meta['properties']
        params = {}
        for key in ('journal', 'year', 'url'):
          if unicode(key) in props:
            value = props[unicode(key)]
            if isinstance(value, unicode):
              value = value.encode('utf-8')
            params[key] = value

        zLOG.LOG("Sword Transform", zLOG.INFO, "attribution dict=%s" % params)
        data = outdata.getData()

        if data and len(data.getvalue()) > 0:
          attributed = XMLService.transform(data.getvalue(), SWORD_INSERT_ATTRIBUTION_XSL, **params)
          outdata.setData(StringIO(unicode(attributed,'utf-8')))
        else:
          zLOG.LOG("Sword Transform", zLOG.INFO, "Skipping adding attributions because no cnxml was generated...")

        #meta['subdirs'] = subdirs.keys()

        objects.update(outdata.getSubObjects())
        outdata.setSubObjects(objects)

        return outdata
Example #13
0
#CNXML_XSL = 'http://cnx.rice.edu/technology/cnxml/stylesheet/unibrowser.xsl'
CNXML_XSL = CNXML_RENDER_XSL

if not stylesheet:
    stylesheet = CNXML_XSL
stylesheets = [stylesheet]

### for old CNXML (< 0.5) ###
doctype = getattr(context, 'doctype', None)
if doctype and doctype.find('0.5') == -1:
    from Products.CNXMLDocument import CNXML_UPGRADE_XSL
    stylesheets.insert(0, CNXML_UPGRADE_XSL)
### /upgrade ###

# Parse the source and grab the namespaces
doc = XMLService.parseString(source)
sourceNs = XMLService.listDocNamespaces(doc)

# Figure out our content types
has_math = MATHML_NS in sourceNs
params['doctype'], params['mimetype'], ns = context.content_type_decide(
    has_math=has_math)

# Transform source

result = XMLService.xsltPipeline(doc, stylesheets, **params)

# Set content-type
context.REQUEST.RESPONSE.setHeader('Content-Type',
                                   "%s; charset=utf-8" % params['mimetype'])
## Script (Python) "onEditChangeSet"
##bind container=container
##bind context=context
##bind namespace=
##bind script=script
##bind subpath=traverse_subpath
##parameters=
##title=Compute object differences
##
from Products.CNXMLDocument.XMLService import XMLError
from Products.CNXMLDocument import XMLService

diffs = context.getDiffs()
if not diffs:
    return "no changes"

return XMLService.transform(diffs[0].htmlDiff(), "/home/simon/xml/cnxml/style/unibrowser.xsl")
#CNXML_XSL = 'http://cnx.rice.edu/technology/cnxml/stylesheet/unibrowser.xsl'
CNXML_XSL = CNXML_RENDER_XSL

if not stylesheet:
    stylesheet = CNXML_XSL
stylesheets = [stylesheet]

### for old CNXML (< 0.5) ###
doctype = getattr(context, 'doctype', None)
if doctype and doctype.find('0.5') == -1:
    from Products.CNXMLDocument import CNXML_UPGRADE_XSL
    stylesheets.insert(0, CNXML_UPGRADE_XSL)
### /upgrade ###

# Parse the source and grab the namespaces
doc = XMLService.parseString(source)
sourceNs = XMLService.listDocNamespaces(doc)

# Figure out our content types
has_math = MATHML_NS in sourceNs
params['doctype'], params['mimetype'], ns = context.content_type_decide(has_math=has_math)

# Transform source

result = XMLService.xsltPipeline(doc, stylesheets, **params)

# Set content-type
context.REQUEST.RESPONSE.setHeader('Content-Type', "%s; charset=utf-8" % params['mimetype'])

# Prepend doctype
header = context.xmlheader(params['doctype'])
    def toCnxml(self, strXml, objZipFile):
        # stow styles.xml in a tempfile
        styles_xml = objZipFile.read('styles.xml')
        (tmpsfile, tmpsname) = tempfile.mkstemp('.OOo')
        os.write(tmpsfile, styles_xml)
        os.close(tmpsfile)
        stylesPath=tmpsname

        #
        # not strictly required.  this xform removes empty paragraphs.
        # makes other oo 2 cnxml xforms possible.
        #
        try:
            strOOoXml = XMLService.transform(strXml, OO2OO_XSL, stylesPath=stylesPath)
            if len(strOOoXml) == 0:
                zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "OOo to OOo XSL transform failed.");
                strOOoXml = strXml
        except:
            zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "OOo to OOo XSL transform failed.");
            strOOoXml = strXml

        # Clean up styles.xml tempfile
        os.remove(tmpsname)

        #
        # addSectionTags() calls the SAX parser.parse() which expects a file argument
        # thus we force the xml string into being a file object
        #
        try:
            strSectionedXml = addSectionTags(StringIO(strOOoXml))
            if len(strSectionedXml) > 0:
                bAddedSections = True
            else:
                zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Failed to add sections.");
                strSectionedXml = strOOoXml
                bAddedSections = False
        except:
            zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Failed to add sections.");
            strSectionedXml = strOOoXml
            bAddedSections = False

        #
        # add external MathML as child of <draw:object> via SAX parser.
        #
        try:
            strMathedXml = addMathML(StringIO(strSectionedXml), objZipFile)
            if len(strMathedXml) > 0:
                bAddedMath = True
            else:
                zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Failed to add MathML.");
                strMathedXml = strSectionedXml
                bAddedMath = False
        except:
            zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Failed to add MathML.");
            strMathedXml = strSectionedXml
            bAddedMath = False

        #
        # oo 2 cnxml via xsl transform.
        #
        try:
            strCnxml = XMLService.transform(strMathedXml, OO2CNXML_XSL)
            bTransformed = True
        except:
            zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "OOo to CNXML XSL transform failed.");
            # set strCnxml to invalid CNXML ...
            strCnxml = '<>'
            bTransformed = False

        #
        # Replace Word Symbol Font with correct entity
        #
        strCnxml = symbolReplace(strCnxml, UNICODE_DICTIONARY)

        #
        # Global id generation
        #
        strCnxml = autoIds(strCnxml, prefix='oo-')

        #
        # Error handling
        #
        errors = XMLService.validate(strCnxml)
        if errors:

            if bAddedSections or bAddedMath:
                zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Invalid CNXML generated. Trying w/o sections and MathML. Errors were \n" + str([str(e) for e in errors]))

                try:
                    strCnxml = XMLService.transform(strXml, OO2CNXML_XSL)
                    strCnxml = autoIds(strCnxml, prefix='oo-')
                except:
                    zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "OOo to CNXML XSL transform failed again with the undoctored OOo Xml.");
                    strCnxml = '<>'

                errors = XMLService.validate(strCnxml)
                if errors:
                    zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Still...invalid CNXML. errors were \n" + str(errors))
                    raise OOoImportError, "Generated CNXML is invalid"
            else:
                zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Invalid CNXML generated. errors were \n" + str(errors))
                raise OOoImportError, "Generated CNXML is invalid"

        #
        # Tidy up the CNXML
        #
        docCnxmlClean = XMLService.transform(strCnxml, CNXMLTIDY_XSL)

        return str(docCnxmlClean)
Example #17
0
 def mdml2json(self, content):
     return XMLService.transform(content, MDML2JSON_XSL)
Example #18
0
 def cnxml2json(self, content):
     return XMLService.transform(content, CNXML2JSON_XSL)
    def convert(self, data, outdata, **kwargs):
        """Input is a zip file. Output is idata, with getData being index.cnxml and subObjects being other siblings."""
        fakefile = StringIO(data)
        zipfile = ZipFile(fakefile, 'r')

        prefix = ''
        namelist = zipfile.namelist()
        lenlist = len(namelist)
        if lenlist > 1:
            prefix = os.path.commonprefix(namelist)
            lastslash = prefix.rfind("/")
            if lastslash != -1: prefix = prefix[:lastslash + 1]
            else: prefix = ''
        elif lenlist == 1:
            name = namelist[0]
            lastslash = name.rfind("/")
            if lastslash != -1: prefix = name[:lastslash + 1]

        subdirs = {}
        ignored = []
        objects = {}
        mdata = {}
        preflen = len(prefix)
        for name in namelist:
            modname = name[preflen:]
            if not modname:  # some zip programs store directories by themselves
                continue
            isubdir = modname.find('/')
            if isubdir != -1:  # subdirs, incl. especially 'stylesheets', not imported
                subdir = modname[:isubdir]
                if not subdirs.has_key(subdir): subdirs[subdir] = 1
                continue
            ## disabled until we get a better handle on "viewable" export...
            #if modname == 'index.xhtml':  # do not import index.xhtml
            #  ignored.append('index.xhtml')
            #  continue
            ## probably also do the same with README
            if modname == 'index_auto_generated.cnxml':  # do not import autogenerated cnxml
                ignored.append('index_auto_generated.cnxml')
                continue
            unzipfile = zipfile.read(name)
            if modname == "index.cnxml":
                if unzipfile:
                    outdata.setData(unzipfile)
                    # Parse out the mdml for trusted import
                    jsonstr = XMLService.transform(unzipfile, MDML2JSON_XSL)
                    metadict = demjson.decode(jsonstr)

                    # First, direct copies
                    for k in ('abstract', 'title', 'language'):
                        val = metadict.get(k)
                        if type(val) == type(u''):
                            val = val.encode('UTF-8')
                        if not (val):
                            val = ''
                        mdata[k] = val

                    # Now, unwrap one level of dict for lists
                    for k in ('subjectlist', 'keywordlist'):
                        listdict = metadict.get(k)
                        if listdict:
                            lkey = listdict.keys()[0]  # should only be one
                            mlist = listdict[lkey]
                            if isinstance(mlist, basestring):
                                listdict[lkey] = [mlist]
                            mdata.update(listdict)

                    # Rename
                    if metadict.has_key('content-id'):
                        mdata['objectId'] = metadict['content-id'].encode(
                            'UTF-8')
                    if metadict.has_key('license'):
                        if metadict['license'].has_key('url'):
                            mdata['license'] = metadict['license'][
                                'url'].encode('UTF-8')
                        else:
                            mdata['license'] = metadict['license'][
                                'href'].encode('UTF-8')

                    # DateTime strings
                    for k in ('created', 'revised'):
                        if metadict.has_key(k):
                            mdata[k] = DateTime(metadict[k])

                    # And the trickiest, unwrap and split roles (userids must be str, not unicode)
                    if metadict.has_key('roles'):
                        mdata.update(
                            dict([(r['type'] + 's', str(r['_text']).split())
                                  for r in metadict['roles']['role']]))
                        #FIXME need to do collaborators here, as well - untested below
                        mdata['collaborators'] = {}.fromkeys(' '.join([
                            r['_text'] for r in metadict['roles']['role']
                        ]).encode('UTF-8').split()).keys()
                else:
                    ignored.append('index.cnxml')
            else:
                objects[modname] = unzipfile

        zipfile.close()
        fakefile.close()

        meta = outdata.getMetadata()
        meta['subdirs'] = subdirs.keys()
        meta['ignored'] = ignored
        meta['metadata'] = mdata

        outdata.setSubObjects(objects)
        return outdata
    def convert(self, data, outdata, **kwargs):
        """Input is a zip file. Output is idata, with getData being index.cnxml and subObjects being other siblings."""
        fakefile = StringIO(data)
        zipfile = ZipFile(fakefile, 'r')
        
        prefix = ''
        namelist = zipfile.namelist()
        lenlist = len(namelist)
        if lenlist > 1:
            prefix = os.path.commonprefix(namelist)
            lastslash = prefix.rfind("/")
            if lastslash != -1: prefix = prefix[:lastslash+1]
            else: prefix = ''
        elif lenlist == 1:
            name = namelist[0]
            lastslash = name.rfind("/")
            if lastslash != -1: prefix = name[:lastslash+1]

        subdirs = {}
        ignored = []
        objects = {}
        mdata = {}
        preflen = len(prefix)
        for name in namelist:
            modname = name[preflen:]
            if not modname:               # some zip programs store directories by themselves
              continue
            isubdir = modname.find('/')
            if isubdir != -1:             # subdirs, incl. especially 'stylesheets', not imported
              subdir = modname[:isubdir]
              if not subdirs.has_key(subdir): subdirs[subdir] = 1
              continue
            ## disabled until we get a better handle on "viewable" export...
            #if modname == 'index.xhtml':  # do not import index.xhtml
            #  ignored.append('index.xhtml')
            #  continue
            ## probably also do the same with README
            if modname == 'index_auto_generated.cnxml':  # do not import autogenerated cnxml
              ignored.append('index_auto_generated.cnxml')
              continue
            unzipfile = zipfile.read(name)
            if modname == "index.cnxml":
                if unzipfile:
                    outdata.setData(unzipfile)
                    # Parse out the mdml for trusted import
                    jsonstr = XMLService.transform(unzipfile, MDML2JSON_XSL)
                    metadict = demjson.decode(jsonstr)

                    # First, direct copies
                    for k in ('abstract','title','language'):
                        val = metadict.get(k)
                        if type(val) == type(u''):
                            val = val.encode('UTF-8')
                        if not(val):
                            val = ''
                        mdata[k] = val

                    # Now, unwrap one level of dict for lists
                    for k in ('subjectlist','keywordlist'):
                        listdict = metadict.get(k)
                        if listdict:
                            lkey = listdict.keys()[0] # should only be one
                            mlist = listdict[lkey]
                            if isinstance(mlist,basestring):
                                listdict[lkey] = [mlist]
                            mdata.update(listdict)

                    # Rename
                    if metadict.has_key('content-id'):
                        mdata['objectId'] = metadict['content-id'].encode('UTF-8')
                    if metadict.has_key('license'):
                        if metadict['license'].has_key('url'):
                            mdata['license'] = metadict['license']['url'].encode('UTF-8')
                        else:
                            mdata['license'] = metadict['license']['href'].encode('UTF-8')

                    # DateTime strings
                    for k in ('created','revised'):
                        if metadict.has_key(k):
                            mdata[k] = DateTime(metadict[k])

                    # And the trickiest, unwrap and split roles (userids must be str, not unicode)
                    if metadict.has_key('roles'):
                        mdata.update(dict([(r['type']+'s',str(r['_text']).split()) for r in metadict['roles']['role']]))
                        #FIXME need to do collaborators here, as well - untested below
                        mdata['collaborators'] = {}.fromkeys(' '.join([r['_text'] for r in metadict['roles']['role']]).encode('UTF-8').split()).keys()
                else:
                    ignored.append('index.cnxml')
            else:
                objects[modname] = unzipfile

        zipfile.close()
        fakefile.close()

        meta = outdata.getMetadata()
        meta['subdirs'] = subdirs.keys()
        meta['ignored'] = ignored
        meta['metadata'] = mdata

        outdata.setSubObjects(objects)
        return outdata
if __name__ == "__main__":
     strZipFile                     = sys.argv[1]
     strInputOOoXmlFile             = sys.argv[2]
     strOutputMassageOOoXmlFileBase = sys.argv[3]
     strOutputCnxmlFile             = sys.argv[4]
     #print 'arg1 is \n' + sys.argv[1]
     #print 'arg2 is \n' + sys.argv[2]
     #print 'arg3 is \n' + sys.argv[3]
     #print 'arg4 is \n' + sys.argv[4]

     fileInputXml = open(strInputOOoXmlFile)
     strInputOOoXml = fileInputXml.read()
     fileInputXml.close()

     strOOoXml = strInputOOoXml
     doc = XMLService.parseString(strOOoXml)

     objZipFile = zipfile.ZipFile(strZipFile, 'r') # no 'rb' since 'b' => binary?

     #
     # Pass #1 - OOo Xml to OOo Xml xform - change one entry table & remove empty <text:p>
     #
     try:
         styles_xml = objZipFile.read('styles.xml')
         (tmpsfile, tmpsname) = tempfile.mkstemp('.OOo')
         os.write(tmpsfile, styles_xml)
         os.close(tmpsfile)
         stylesPath=tmpsname

         strOutputMassageOOoXml = XMLService.transform(strOOoXml, OO2OO_XSL, stylesPath=stylesPath)
         if len(strOutputMassageOOoXml) > 0:
##bind container=container
##bind context=context
##bind namespace=
##bind script=script
##bind subpath=traverse_subpath
##parameters=content=None
##title=
from Products.RhaptosModuleEditor import MODULE_EIP_XSL
from Products.CNXMLDocument.XMLService import XMLError
from Products.CNXMLDocument import XMLService

request = context.REQUEST

if content is None:
    try:
        content = request['BODY']
    except KeyError:
        raise TypeError, "No content provided"

results = XMLService.validate(
    content,
    url="http://cnx.rice.edu/technology/cnxml/schema/rng/0.7/cnxml-fragment.rng"
)

if results:
    request.RESPONSE.setStatus(400, "Invalid CNXML")
    return results

else:
    return context.cnxml_transform(content, stylesheet=MODULE_EIP_XSL)
## Script (Python) "eip_transform"
##bind container=container
##bind context=context
##bind namespace=
##bind script=script
##bind subpath=traverse_subpath
##parameters=content=None
##title=
from Products.RhaptosModuleEditor import MODULE_EIP_XSL
from Products.CNXMLDocument.XMLService import XMLError
from Products.CNXMLDocument import XMLService

request = context.REQUEST

versioninfo = context.rmeVersionInfo()
cnxmlvers = versioninfo['cnxmlvers'] or '0.7'
if content is None:
    try:
        content = request['BODY']
    except KeyError:
        raise TypeError, "No content provided"

results = XMLService.validate(content, url="http://cnx.rice.edu/technology/cnxml/schema/rng/%s/cnxml-fragment.rng" % cnxmlvers)

if results:
    request.RESPONSE.setStatus(400, "Invalid CNXML")
    return results

else:
    return context.cnxml_transform(content, stylesheet=MODULE_EIP_XSL)
 def SearchableText(self):
     """Return the text of the module for searching"""
     content = self.getDefaultFile().getSource()
     bare = XMLService.transform(content, baretext)
     return bare
 def cnxml2json(self, content):
     return XMLService.transform(content, CNXML2JSON_XSL)
 def mdml2json(self, content):
     return XMLService.transform(content, MDML2JSON_XSL)
 def testParseDocumentReturnsXMLDoc(self):
     """parseDoc must return an xmlDoc"""
     doc = XMLService.parseDoc(noNS)
     self.failUnless(isinstance(doc, etree._ElementTree))
    def toCnxml(self, strXml, objZipFile):
        # stow styles.xml in a tempfile
        styles_xml = objZipFile.read('styles.xml')
        (tmpsfile, tmpsname) = tempfile.mkstemp('.OOo')
        os.write(tmpsfile, styles_xml)
        os.close(tmpsfile)
        stylesPath=tmpsname

        #
        # not strictly required.  this xform removes empty paragraphs.
        # makes other oo 2 cnxml xforms possible.
        #
        try:
            strOOoXml = XMLService.transform(strXml, OO2OO_XSL, stylesPath=stylesPath)
            if len(strOOoXml) == 0:
                zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "OOo to OOo XSL transform failed.");
                strOOoXml = strXml
        except:
            zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "OOo to OOo XSL transform failed.");
            strOOoXml = strXml

        # Clean up styles.xml tempfile
        os.remove(tmpsname)

        #
        # addSectionTags() calls the SAX parser.parse() which expects a file argument
        # thus we force the xml string into being a file object
        #
        try:
            strSectionedXml = addSectionTags(StringIO(strOOoXml))
            if len(strSectionedXml) > 0:
                bAddedSections = True
            else:
                zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Failed to add sections.");
                strSectionedXml = strOOoXml
                bAddedSections = False
        except:
            zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Failed to add sections.");
            strSectionedXml = strOOoXml
            bAddedSections = False

        #
        # add external MathML as child of <draw:object> via SAX parser.
        #
        try:
            strMathedXml = addMathML(StringIO(strSectionedXml), objZipFile)
            if len(strMathedXml) > 0:
                bAddedMath = True
            else:
                zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Failed to add MathML.");
                strMathedXml = strSectionedXml
                bAddedMath = False
        except:
            zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Failed to add MathML.");
            strMathedXml = strSectionedXml
            bAddedMath = False

        #
        # oo 2 cnxml via xsl transform.
        #
        try:
            strCnxml = XMLService.transform(strMathedXml, OO2CNXML_XSL)
            bTransformed = True
        except:
            zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "OOo to CNXML XSL transform failed.");
            # set strCnxml to invalid CNXML ...
            strCnxml = '<>'
            bTransformed = False

        #
        # Replace Word Symbol Font with correct entity
        #
        strCnxml = symbolReplace(strCnxml, UNICODE_DICTIONARY)

        #
        # Global id generation
        #
        strCnxml = autoIds(strCnxml, prefix='oo-')

        #
        # Error handling
        #
        errors = XMLService.validate(strCnxml)
        if errors:

            if bAddedSections or bAddedMath:
                zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Invalid CNXML generated. Trying w/o sections and MathML. Errors were \n" + str([str(e) for e in errors]))

                try:
                    strCnxml = XMLService.transform(strXml, OO2CNXML_XSL)
                    strCnxml = autoIds(strCnxml, prefix='oo-')
                except:
                    zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "OOo to CNXML XSL transform failed again with the undoctored OOo Xml.");
                    strCnxml = '<>'

                errors = XMLService.validate(strCnxml)
                if errors:
                    zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Still...invalid CNXML. errors were \n" + str(errors))
                    raise OOoImportError, "Generated CNXML is invalid"
            else:
                zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Invalid CNXML generated. errors were \n" + str(errors))
                raise OOoImportError, "Generated CNXML is invalid"

        #
        # Tidy up the CNXML
        #
        docCnxmlClean = XMLService.transform(strCnxml, CNXMLTIDY_XSL)

        return str(docCnxmlClean)
 def SearchableText(self):
     """Return the text of the module for searching"""
     content = self.getDefaultFile().getSource()
     bare = XMLService.transform(content,baretext)
     return bare