Example #1
0
 def __init__(self):
     self.CONTENTXML = 'content.xml'
     self.STYLESXML = 'styles.xml'
     self.fd = FontData()
     self.outputFont = "ABC-TEXT-05"
     self.outputFontSize = None
     self.data = self.fd.unicodeData(self.outputFont)
Example #2
0
def convertTxtFile(inputFile, outputFile, outputFont):
    """
    This function creates plain text file from the khmer unicode to legacy.
    """

    if (inputFile == outputFile):
        raise TypeError('Input file and output file must be different!')

    fd = FontData()
    if (not fd.isConvertable(outputFont)):
        raise TypeError('Unknown output font ' + outputFont + ' !')

    try:
        fileIn = open(inputFile, 'r')
    except IOError:
        raise IOError('Cannot open file "' + inputFile + '" for reading!')

    try:
        fileOut = open(outputFile, 'w')
    except IOError:
        raise IOError('Cannot open file "' + outputFile + '" for writing!')

    data = fd.unicodeData(outputFont)

    # reading line by line from the input file, until end of file.
    for line in fileIn:
        result = line.decode('utf-8')
        result = legacyReorder.reorder(result)
        result = legacyConverter.converter(result, data)
        fileOut.write(result)

    fileIn.close()
    fileOut.close()
def convertTxtFile(inputFile, outputFile, outputFont):
    """
    This function creates plain text file from the khmer unicode to legacy.
    """

    if (inputFile == outputFile):
        raise TypeError('Input file and output file must be different!')

    fd = FontData()
    if (not fd.isConvertable(outputFont)):
        raise TypeError('Unknown output font ' + outputFont + ' !')

    try:
        fileIn = open(inputFile, 'r')
    except IOError:
        raise IOError('Cannot open file "' +  inputFile + '" for reading!')

    try:
        fileOut = open(outputFile, 'w')
    except IOError:
        raise IOError('Cannot open file "' +  outputFile + '" for writing!')

    data = fd.unicodeData(outputFont)

    # reading line by line from the input file, until end of file.
    for line in fileIn:
        result = line.decode('utf-8')
        result = legacyReorder.reorder(result)
        result = legacyConverter.converter(result, data)
        fileOut.write(result)

    fileIn.close()
    fileOut.close()
def convertTxtFile(inputFileName, outputFileName, fontType, encoding):
    """
    converts Khmer legacy plain text file and produce a unicode output file
    inputfilename: Legacy plain text file
    outputfilename: Khmer Unicode plain text file
    fontType: type "abc" or font name "ABC-TEXT-5"
    encoding: cp1252, utf-8, iso-8859-1
    """
    if (inputFileName == outputFileName):
        raise TypeError('input file and output file must not be the same!')

    fd = FontData()
    if (not fd.canDecode(encoding)):
        raise TypeError('unknow encoding!')
    
    try:
        fin = open(inputFileName, "r")
    except IOError:        
        raise IOError('Cannot open file "' +  inputFileName + '" for reading!')
    
    try:
        fout = open(outputFileName, "w")
    except IOError:        
        raise IOError('Cannot open file "' +  outputFileName + '" for writing!')
    
    data = fd.legacyData(fontType)
    # reading line by line from the input file, until end of file.
    for line in fin:
        sin = fd.changeEncoding(line, encoding)
        result = unicodeProcess.process(sin, data)
        bufout = unicodeReorder.reorder(result)
        fout.write(bufout.encode('utf-8'))

    fin.close()
    fout.close()
 def __init__(self):
     self.CONTENTXML = 'content.xml'
     self.STYLESXML = 'styles.xml'
     self.convertibleStyle = {}
     self.nonConvertibleStyle = {}
     self.fd = FontData()
     self.outputFont = "Khmer OS"
     self.outputFontSize = None
class TestFontData(unittest.TestCase):

    dataClass = FontData()

    def setUp(self):
        self.dataClass.readXML("fontdata.xml")

    def testABCDataLegacy(self):
        data = self.dataClass.legacyData("abc")
        self.assertEqual(data[1][0xb2], unichr(0x201c))
        self.assertEqual(data[1][0xb3], unichr(0x201d))

    def testABCDataUnicode(self):
        data = self.dataClass.unicodeData("abc")
        self.assertEqual(data[0][0][unichr(0x201c)], chr(0xb2))
        self.assertEqual(data[0][0][unichr(0x201d)], chr(0xb3))
        self.assertEqual(data[0][0][u'«'], chr(0xb2))
        self.assertEqual(data[0][0][u'»'], chr(0xb3))

    def testLimonDataLegacy(self):
        data = self.dataClass.legacyData("limon")
        self.assertEqual(data[1][0x7b], unichr(0x201c))
        self.assertEqual(data[1][0x7d], unichr(0x201d))

    def testLimonDataUnicode(self):
        data = self.dataClass.unicodeData("limon")
        self.assertEqual(data[0][0][unichr(0x201c)], chr(0x7b))
        self.assertEqual(data[0][0][unichr(0x201d)], chr(0x7d))
        self.assertEqual(data[0][0][u'«'], chr(0x7b))
        self.assertEqual(data[0][0][u'»'], chr(0x7d))
        self.assertEqual(data[0][0][u'ឲ'], chr(0x5b))
 def __init__(self):
     self.CONTENTXML = 'content.xml'
     self.STYLESXML = 'styles.xml'
     self.convertibleStyle = {}
     self.nonConvertibleStyle = {}
     self.fd = FontData()
     self.outputFont = "Khmer OS"
     self.outputFontSize = None
Example #8
0
class legacyConvertOdt:
    def __init__(self):
        self.CONTENTXML = 'content.xml'
        self.STYLESXML = 'styles.xml'
        self.fd = FontData()
        self.outputFont = "ABC-TEXT-05"
        self.outputFontSize = None
        self.data = self.fd.unicodeData(self.outputFont)

    def convertOdtFile(self, inputFileName, outputFileName, outputFont, outputFontSize = None):
        """This function converts OpenOffice.org Writer file.
        inputFileName : name of input file to convert
        outputFileName : name of output file. Default value is converted-inputFileName.
        outputFont : legacy output font name. Default depends on the font type.
        outputFontSize : force the font size the output file will use. value = None to ignore.
        """
        if (not self.fd.isConvertable(outputFont)):
            raise TypeError('unknown output font ' + outputFont + '!')
    
        if (inputFileName == outputFileName):
            raise TypeError('input file and output file must be different!')
    
        try:
            # read zip file (.odt)
            zipIn = zipfile.ZipFile(inputFileName, "r")
        except IOError:
            raise IOError('Cannot open file "' +  inputFileName + '" for reading!')
    
        if (not (self.CONTENTXML and self.STYLESXML) in zipIn.namelist()):        
            raise TypeError('Input file' + inputFileName + 'is not an odt file!')
    
        try:
            # create new zip file (.odt)
            zipOut = zipfile.ZipFile(outputFileName, "w", DEFLATED)
        except IOError:               
            raise IOError('Cannot open file "' +  outputFileName + '" for writing!')
        
        # get data for the font
        self.outputFont = self.fd.defaultFont(outputFont)
        self.data = self.fd.unicodeData(self.outputFont)
        if (outputFontSize):
            self.outputFontSize = str(outputFontSize) + 'pt'
        
        for file in zipIn.namelist():
            fdata = zipIn.read(file)
            # do the converting for content.xml only
            if (file == self.CONTENTXML):
                fdata = self.processContent(fdata)
                # TODO: do we need to test the type? When do we not want to encode in UTF-8 ?
                if (type(fdata) == unicode):
                    fdata = fdata.encode('utf-8')
            elif (file == self.STYLESXML):
                fdata = self.processStyle(fdata)
                # TODO: do we need to test the type? When do we not want to encode in UTF-8 ?
                if (type(fdata) == unicode):
                    fdata = fdata.encode('utf-8')
            zipOut.writestr(file, fdata)
        zipOut.close()
        zipIn.close()
    
    def processContent(self, xmlData):
        """
        input: xml data in unicode string
        return: xml data string in legacy encoding where text is converted
        """
        self.xmldoc = minidom.parseString(xmlData)
        
        officeNode = self.xmldoc.getElementsByTagName('office:text')
        officeAutoStylesNode = self.xmldoc.getElementsByTagName('office:automatic-styles')[0]
        officeFontFaceDecls = self.xmldoc.getElementsByTagName('office:font-face-decls')[0]
        # add font information
        self.addFontInfo(officeAutoStylesNode, officeFontFaceDecls)
        # go through office node and convert to legacy.
        self.goThru(officeNode, self.convertIfUnicode)
        return self.xmldoc.toxml()
    
    def processStyle(self, xmldata):
        """change font name and size, convert data to legacy in xmldata
        @param xmldata: xml string to parse."""
        self.xmldoc = minidom.parseString(xmldata)
        officeAutoStylesNode = self.xmldoc.getElementsByTagName('office:automatic-styles')[0]
        officeFontFaceDecls = self.xmldoc.getElementsByTagName('office:font-face-decls')[0]
        officeMasterStylesNode = self.xmldoc.getElementsByTagName('office:master-styles')
        # go through node, replace font, and convert data to legacy.
        self.addFontInfo(officeAutoStylesNode, officeFontFaceDecls)
        self.goThru(officeMasterStylesNode, self.convertIfUnicode)
        return self.xmldoc.toxml('utf-8')
    
    def goThru (self, nodelist, function):
        """go through nodelist and call function with child node as argument.
        @param nodelist: dom's node list.
        @param function: function to call, child argument will be provided by goThru."""
        for node in nodelist:
            if node.hasChildNodes():
                for child in node.childNodes:
                    function(child)
                self.goThru (node.childNodes, function)
    
    def addFontInfo(self, autoStyleNode, declsNode):
        """add "style:style" to node."""
        # add font declaration
        styleFontFaceNode = self.xmldoc.createElement('style:font-face')
        styleFontFaceNode.setAttribute('style:name', self.outputFont)
        styleFontFaceNode.setAttribute('svg:font-family', self.outputFont)
        declsNode.appendChild(styleFontFaceNode)
    
        # add font style
        styleNode = self.xmldoc.createElement('style:style')
        styleNode.setAttribute('style:family', 'text')
        styleNode.setAttribute('style:name', KHMERSTYLE)
        styleTextPropNode = self.xmldoc.createElement('style:text-properties')
        styleTextPropNode.setAttribute('style:font-name', self.outputFont)
        if (self.outputFontSize):
            styleTextPropNode.setAttribute('fo:font-size', self.outputFontSize)
        styleNode.appendChild(styleTextPropNode)
        autoStyleNode.appendChild(styleNode)
    
    def convertIfUnicode(self, node):
        """
        take Khmer Unicode data out of current node, convert it and put
        it in a new node which mark as khmerConverter_DefaultStyle.
        """
        if not node.nodeValue:
            return node
        sin = node.data
        newNode = self.xmldoc.createDocumentFragment()
        cursor = 0
        charCount = len(sin)
        while (cursor < charCount):
            khmStr = u''
            othStr = u''
            while (cursor < charCount):
                val = ord(sin[cursor])
                # in khmer range
                if ((val >= MINUNIC) and (val <= MAXUNIC)) or (STARTKHMER.find(unichr(val)) != -1) or (len(khmStr) > 0 and INKHMER.find(unichr(val)) != -1):
                    if (othStr):
                        break
                    khmStr += sin[cursor]
                # in other range
                else:
                    if (khmStr):
                        break
                    othStr += sin[cursor]
                cursor += 1
            # end of while (khmer string or other string found)
            if (khmStr):
                # convert khmer text
                khmStr = legacyReorder.reorder(khmStr)
                khmStr = legacyConverter.converter(khmStr, self.data)
                khmStr = khmStr.decode('cp1252')
                # add new khmer node
                khmNode = self.xmldoc.createElement('text:span')
                khmNode.setAttribute('text:style-name', KHMERSTYLE)
                # add data
                txtNode = self.xmldoc.createTextNode(khmStr)
                khmNode.appendChild(txtNode)
                newNode.appendChild(khmNode)
            elif (othStr):
                txtNode = self.xmldoc.createTextNode(othStr)
                newNode.appendChild(txtNode)
                
        node.parentNode.replaceChild(newNode, node)
class unicodeConvertOdt:
    def __init__(self):
        self.CONTENTXML = 'content.xml'
        self.STYLESXML = 'styles.xml'
        self.convertibleStyle = {}
        self.nonConvertibleStyle = {}
        self.fd = FontData()
        self.outputFont = "Khmer OS"
        self.outputFontSize = None

    def convertOdtFile(self,
                       inputFileName,
                       outputFileName,
                       outputFont=None,
                       outputFontSize=None):
        """This function convert OpenOffice.Org writer file
        inputFileName: the name of file you want to convert. 
        outputFileName: the result file name. Default value is converted-inputFileName
        outputFont: font name to override. default value is Khmer OS.
        outputFontSize: a value to override font size in odt file, value = None to ignore."""

        self.outputFont = outputFont
        if (outputFontSize):
            self.outputFontSize = str(outputFontSize) + 'pt'

        if (inputFileName == outputFileName):
            raise TypeError('input file and output file must be different!')

        try:
            # read zip file (.odt)
            zipIn = zipfile.ZipFile(inputFileName, "r")
        except IOError:
            raise IOError('Cannot open file "' + inputFileName +
                          '" for reading!')

        if (not (self.CONTENTXML and self.STYLESXML) in zipIn.namelist()):
            raise TypeError('Input file' + inputFileName +
                            'is not an odt file!')

        try:
            # create new zip file (.odt)
            zipOut = zipfile.ZipFile(outputFileName, "w", DEFLATED)
        except IOError:
            raise IOError('Cannot open file "' + outputFileName +
                          '" for writing!')

        zipOut.debug = 3
        for file in zipIn.namelist():
            fdata = zipIn.read(file)
            if (file == self.CONTENTXML):
                # read data to contentXml for later processing.
                contentXml = fdata
                continue
            elif (file == self.STYLESXML):
                fdata = self.processStyle(fdata)
            zipOut.writestr(file, fdata)

        # process the content.xml only after already read the styles.xml.
        fdata = self.processContent(contentXml)
        zipOut.writestr(self.CONTENTXML, fdata)
        zipOut.close()
        zipIn.close()

    def processContent(self, xmldata):
        """change font name and size, convert data to unicode in xmldata
        @param xmldata: xml string to parse."""
        self.xmldoc = minidom.parseString(xmldata)
        officeNode = self.xmldoc.getElementsByTagName('office:text')
        officeDocContentNode = self.xmldoc.getElementsByTagName(
            'office:document-content')
        # go through node, replace font, and convert data to unicode.
        self.goThru(officeDocContentNode, self.replaceFont)
        self.goThru(officeNode, self.convertIfLegacy)
        return self.xmldoc.toxml('utf-8')

    def processStyle(self, xmldata):
        """change font name and size, convert data to unicode in xmldata
        @param xmldata: xml string to parse."""
        self.xmldoc = minidom.parseString(xmldata)
        officeDocStylesNode = self.xmldoc.getElementsByTagName(
            'office:document-styles')
        # go through node, replace font, and convert data to unicode.
        self.goThru(officeDocStylesNode, self.replaceFont)
        self.goThru(officeDocStylesNode, self.convertIfLegacy)
        return self.xmldoc.toxml('utf-8')

    def goThru(self, nodelist, function):
        """go through nodelist and call function with child node as argument.
        @param nodelist: dom's node list.
        @param function: function to call, child argument will be provided by goThru."""
        for node in nodelist:
            if node.hasChildNodes():
                for child in node.childNodes:
                    function(child)
                self.goThru(node.childNodes, function)

    def replaceFont(self, node):
        """look for node which has "style:font-name" attribute and change its value to fontName."""
        if (not hasattr(node, "getAttribute")):
            return
        fontName = node.getAttribute('style:font-name')
        fontType = None
        if (fontName):
            try:
                fontType = self.fd.typeForFontname(fontName)
            except:
                pass
        if (fontType and hasattr(node.parentNode, "getAttribute")):
            # add name to convertible list
            self.convertibleStyle[unicode(
                node.parentNode.getAttribute('style:name'))] = fontType
            node.removeAttribute('style:font-name')
            node.setAttribute('style:font-name-complex', self.outputFont)
            if (self.outputFontSize):
                node.setAttribute('style:font-size-complex',
                                  self.outputFontSize)

        styleName = node.getAttribute('style:name')
        if (styleName):
            # if node's parent style is also convertible, node is also convertible.
            # search in child if child also has style:font-name (which will override parent)
            # then will not add to convertible list.
            if node.hasChildNodes():
                for child in node.childNodes:
                    if (child.hasAttribute('style:font-name')) and (hasattr(
                            child, "getAttribute")):
                        fontName = child.getAttribute('style:font-name')
                        if fontName:
                            try:
                                fontType = self.fd.typeForFontname(fontName)
                            except:
                                self.nonConvertibleStyle[styleName] = True
                                return

            parentStyleName = node.getAttribute('style:parent-style-name')
            if parentStyleName and self.convertibleStyle.has_key(
                    parentStyleName):
                self.convertibleStyle[styleName] = self.convertibleStyle[
                    parentStyleName]
                node.setAttribute('style:name', self.outputFont)
                node.setAttribute('svg:font-family', self.outputFont)
            try:
                fontType = self.fd.typeForFontname(styleName)
            except:
                return
            self.convertibleStyle[styleName] = fontType
            node.setAttribute('style:name', self.outputFont)
            node.setAttribute('svg:font-family', self.outputFont)

    def convertIfLegacy(self, node):
        """look the node for information of legacy font and convert to unicode, otherwise return False.
        @param node: node to look to and convert if necessary."""

        if (not node.nodeValue):
            return False

        if (not (hasattr(node, "parentNode")
                 or hasattr(node.parentNode, "getAttribute")
                 or hasattr(node.parentNode, "parentNode")
                 or hasattr(node.parentNode.parentNode, "getAttribute"))):
            return False

        # if font is not specified on node, but node is under a parent that is
        # in the convertible list, convert the node.
        styleName = node.parentNode.getAttribute(u'text:style-name')
        parentStyleName = node.parentNode.parentNode.getAttribute(
            u'text:style-name')

        if (styleName in self.convertibleStyle):
            style = styleName
        elif (styleName in self.nonConvertibleStyle):
            return False
            style = parentStyleName
        else:
            return False

        # legacy font data's referal.
        fontname = self.convertibleStyle[style]
        sin = node.data
        try:
            sin = sin.encode('cp1252')
        except UnicodeEncodeError:
            result = u''
            part = ''
            for char in sin:
                try:
                    tmpChar = char.encode('cp1252')
                except UnicodeEncodeError:
                    if (part):
                        part = unicodeProcess.process(
                            part, self.fd.legacyData(fontname))
                        result += unicodeReorder.reorder(part)
                        part = ''
                    result += char
                else:
                    part += tmpChar
            if (part):
                part = unicodeProcess.process(part,
                                              self.fd.legacyData(fontname))
                result += unicodeReorder.reorder(part)
            sin = result
        else:
            sin = unicodeProcess.process(sin, self.fd.legacyData(fontname))
            sin = unicodeReorder.reorder(sin)
        newtext = self.xmldoc.createTextNode(sin)  # create text of Node
        node.parentNode.replaceChild(newtext, node)
Example #10
0
    def __init__(self, parent=None):
        QtGui.QMainWindow.__init__(self, parent)
        self.ui = Ui_kconvert()
        self.ui.setupUi(self)

        self.tab = "Leg"
        self.originExt = ".txt"
        settingOrg = "KhmerConverter"
        settingApp = "Khmer Converter"
        self.settings = QtCore.QSettings(QtCore.QSettings.IniFormat,
                                         QtCore.QSettings.UserScope,
                                         settingOrg, settingApp)

        self.setWindowTitle(settingApp + ' ' + __version__.ver)

        self.connect(self.ui.tabWidget, QtCore.SIGNAL("currentChanged(int)"),
                     self.tabChanged)

        #---------------------------------------Legacy to Unicode------------------------------------------
        # legacy to Unicode --  signal for browsing folder
        self.connect(self.ui.btnBrowseInL, QtCore.SIGNAL("clicked()"),
                     self.openDialog)
        self.connect(self.ui.btnBrowseOutL, QtCore.SIGNAL("clicked()"),
                     self.saveDialog)
        self.connect(self.ui.cmbDocTypeL,
                     QtCore.SIGNAL("currentIndexChanged(QString)"),
                     self.docTypeChangedL)
        self.connect(self.ui.cmbDocTypeU,
                     QtCore.SIGNAL("currentIndexChanged(QString)"),
                     self.docTypeChangedU)

        # add items into combo box doctType for legacy
        self.typeOdt = "OpenOffice.org Writer (*.odt)"
        self.typeText = "Plain Text"
        self.typeHtml = "Web page, HTML"
        self.docTypes = [self.typeOdt, self.typeText, self.typeHtml]
        for doctType in self.docTypes:
            self.ui.cmbDocTypeL.addItem(self.tr(doctType))

        # add items into combo box legacy font for legacy
        for font in FontData().listFontTypes():
            self.ui.cmbFontInputL.addItem(font)
            for fontName in FontData().listFontNamesForType(font):
                self.ui.cmbFontInputL.addItem("  " + fontName)

        # add items into combo box unicode font for legacy
        self.unicodeFontList = [
            'Khmer OS', 'Khmer OS Bokor', 'Khmer OS Battambang',
            'Khmer OS Content', 'Khmer OS Fasthand', 'Khmer OS Freehand',
            'Khmer OS Metal Chrieng', 'Khmer OS Muol', 'Khmer OS Muol Light',
            'Khmer OS Muol Pali', 'Khmer OS SiemReap', 'Khmer OS System'
        ]

        for i in self.unicodeFontList:
            self.ui.cmbFontOutputL.addItem(i)

        self.defaultUniFont()

        # add items into combo box ecoding for legacy
        self.encodings = {
            "Plain Text (cp1252)": 'cp1252',
            "Plain Text (latin-1/iso-8859-1)": 'iso-8859-1',
            "Unicode (utf-8)": 'utf-8'
        }
        for key in self.encodings:
            self.ui.cmbEncodingL.addItem(self.tr(key))

        self.connect(self.ui.lineInputL, QtCore.SIGNAL("textChanged(QString)"),
                     self.detectDocType)
        self.connect(self.ui.chbOverrideSizeL,
                     QtCore.SIGNAL("stateChanged(int)"), self.toggleSize)

        #---------------------------------------Unicode to Legacy------------------------------------------
        # unicode to legacy --  signal for browsing folder
        self.connect(self.ui.btnBrowseInU, QtCore.SIGNAL("clicked()"),
                     self.openDialog)
        self.connect(self.ui.btnBrowseOutU, QtCore.SIGNAL("clicked()"),
                     self.saveDialog)

        # add item into combo box doctType for unicode
        for doctType in self.docTypes:
            self.ui.cmbDocTypeU.addItem(self.tr(doctType))

        # add items into combo box legacy font for unicode
        self.legacyFontList = []
        for font in FontData().listFontTypes():
            self.ui.cmbFontOutputU.addItem(font)
            self.legacyFontList.append(font)
            for fontName in FontData().listFontNamesForType(font):
                self.ui.cmbFontOutputU.addItem("  " + fontName)
                self.legacyFontList.append(fontName)

        self.defaultLegFont()

        self.connect(self.ui.lineInputU, QtCore.SIGNAL("textChanged(QString)"),
                     self.detectDocType)
        self.connect(self.ui.chbOverrideSizeU,
                     QtCore.SIGNAL("stateChanged(int)"), self.toggleSize)

        # menubar
        self.connect(self.ui.actionQuit, QtCore.SIGNAL("triggered()"),
                     QtCore.SLOT("close()"))
        self.connect(self.ui.actionAboutQt, QtCore.SIGNAL("triggered()"),
                     QtGui.qApp, QtCore.SLOT("aboutQt()"))

        # Help menu
        self.aboutKConvert = AboutKConvert(self)
        self.connect(self.ui.actionAboutKConverter,
                     QtCore.SIGNAL("triggered()"),
                     self.aboutKConvert.showDialog)

        # set default value
        self.defaultValue()

        # instance of mime type
        self.mt = MimeTypes()

        self.connect(self.ui.btnConvert, QtCore.SIGNAL("clicked()"),
                     self.convert)
        self.connect(self.ui.btnReset, QtCore.SIGNAL("clicked()"),
                     self.defaultValue)
class unicodeConvertOdt:
    def __init__(self):
        self.CONTENTXML = 'content.xml'
        self.STYLESXML = 'styles.xml'
        self.convertibleStyle = {}
        self.nonConvertibleStyle = {}
        self.fd = FontData()
        self.outputFont = "Khmer OS"
        self.outputFontSize = None

    def convertOdtFile(self, inputFileName, outputFileName, outputFont = None, outputFontSize = None):
        """This function convert OpenOffice.Org writer file
        inputFileName: the name of file you want to convert. 
        outputFileName: the result file name. Default value is converted-inputFileName
        outputFont: font name to override. default value is Khmer OS.
        outputFontSize: a value to override font size in odt file, value = None to ignore."""
        
        self.outputFont = outputFont
        if (outputFontSize):
            self.outputFontSize = str(outputFontSize) + 'pt'
        
        if (inputFileName == outputFileName):
            raise TypeError('input file and output file must be different!')

        try:
            # read zip file (.odt)
            zipIn = zipfile.ZipFile(inputFileName, "r")
        except IOError:        
            raise IOError('Cannot open file "' +  inputFileName + '" for reading!')
    
        if (not (self.CONTENTXML and self.STYLESXML) in zipIn.namelist()):        
            raise TypeError('Input file' + inputFileName + 'is not an odt file!')
        
        try:
            # create new zip file (.odt)
            zipOut = zipfile.ZipFile(outputFileName, "w", DEFLATED)
        except IOError:        
            raise IOError('Cannot open file "' +  outputFileName + '" for writing!')
    
        zipOut.debug = 3
        for file in zipIn.namelist():
            fdata = zipIn.read(file)
            if (file == self.CONTENTXML):
                # read data to contentXml for later processing.
                contentXml = fdata
                continue
            elif (file == self.STYLESXML):
                fdata = self.processStyle(fdata)
            zipOut.writestr(file, fdata)
    
        # process the content.xml only after already read the styles.xml.
        fdata = self.processContent(contentXml)
        zipOut.writestr(self.CONTENTXML, fdata)
        zipOut.close()
        zipIn.close()
    
    def processContent(self, xmldata):
        """change font name and size, convert data to unicode in xmldata
        @param xmldata: xml string to parse."""
        self.xmldoc = minidom.parseString(xmldata)
        officeNode = self.xmldoc.getElementsByTagName('office:text')
        officeDocContentNode = self.xmldoc.getElementsByTagName('office:document-content')
        # go through node, replace font, and convert data to unicode.
        self.goThru(officeDocContentNode, self.replaceFont)
        self.goThru(officeNode, self.convertIfLegacy)
        return self.xmldoc.toxml('utf-8')
    
    def processStyle(self, xmldata):
        """change font name and size, convert data to unicode in xmldata
        @param xmldata: xml string to parse."""
        self.xmldoc = minidom.parseString(xmldata)
        officeDocStylesNode = self.xmldoc.getElementsByTagName('office:document-styles')
        # go through node, replace font, and convert data to unicode.
        self.goThru(officeDocStylesNode, self.replaceFont)
        self.goThru(officeDocStylesNode, self.convertIfLegacy)
        return self.xmldoc.toxml('utf-8')
    
    def goThru (self, nodelist, function):
        """go through nodelist and call function with child node as argument.
        @param nodelist: dom's node list.
        @param function: function to call, child argument will be provided by goThru."""
        for node in nodelist:
            if node.hasChildNodes():
                for child in node.childNodes:
                    function(child)
                self.goThru (node.childNodes, function)
    
    def replaceFont(self, node):
        """look for node which has "style:font-name" attribute and change its value to fontName."""
        if (not hasattr(node, "getAttribute")):
            return
        fontName = node.getAttribute('style:font-name')
        fontType = None
        if (fontName):
            try:
                fontType = self.fd.typeForFontname(fontName)
            except:
                pass
        if (fontType and hasattr(node.parentNode, "getAttribute")):
            # add name to convertible list
            self.convertibleStyle[unicode(node.parentNode.getAttribute('style:name'))] = fontType
            node.removeAttribute('style:font-name')
            node.setAttribute('style:font-name-complex', self.outputFont)
            if (self.outputFontSize):
                node.setAttribute('style:font-size-complex', self.outputFontSize)
        
        styleName = node.getAttribute('style:name')
        if (styleName):
            # if node's parent style is also convertible, node is also convertible.
            # search in child if child also has style:font-name (which will override parent)
            # then will not add to convertible list.
            if node.hasChildNodes():
                for child in node.childNodes:
                    if (child.hasAttribute('style:font-name')) and (hasattr(child, "getAttribute")):
                        fontName = child.getAttribute('style:font-name')
                        if fontName:
                            try:
                                fontType = self.fd.typeForFontname(fontName)
                            except:
                                self.nonConvertibleStyle[styleName] = True
                                return
            
            parentStyleName = node.getAttribute('style:parent-style-name')
            if parentStyleName and self.convertibleStyle.has_key(parentStyleName):
                self.convertibleStyle[styleName] = self.convertibleStyle[parentStyleName]
                node.setAttribute('style:name', self.outputFont)
                node.setAttribute('svg:font-family', self.outputFont)
            try:
                fontType = self.fd.typeForFontname(styleName)
            except:
                return
            self.convertibleStyle[styleName] = fontType
            node.setAttribute('style:name', self.outputFont)
            node.setAttribute('svg:font-family', self.outputFont)

    def convertIfLegacy(self, node):
        """look the node for information of legacy font and convert to unicode, otherwise return False.
        @param node: node to look to and convert if necessary."""
        
        if (not node.nodeValue):
            return False
        
        if (not (hasattr(node, "parentNode") or 
                 hasattr(node.parentNode, "getAttribute") or
                 hasattr(node.parentNode, "parentNode") or
                 hasattr(node.parentNode.parentNode, "getAttribute"))):
            return False
        
        # if font is not specified on node, but node is under a parent that is
        # in the convertible list, convert the node.
        styleName = node.parentNode.getAttribute(u'text:style-name')
        parentStyleName = node.parentNode.parentNode.getAttribute(u'text:style-name')
        
        if (styleName in self.convertibleStyle):
            style = styleName
        elif (styleName in self.nonConvertibleStyle):
            return False
            style = parentStyleName
        else:
            return False
        
        # legacy font data's referal.
        fontname = self.convertibleStyle[style]
        sin = node.data
        try:
            sin = sin.encode('cp1252')
        except UnicodeEncodeError:
            result = u''
            part = ''
            for char in sin:
                try:
                    tmpChar = char.encode('cp1252')
                except UnicodeEncodeError:
                    if (part):
                        part = unicodeProcess.process(part, self.fd.legacyData(fontname))
                        result += unicodeReorder.reorder(part)
                        part = ''
                    result += char
                else:
                    part += tmpChar
            if (part):
                part = unicodeProcess.process(part, self.fd.legacyData(fontname))
                result += unicodeReorder.reorder(part)
            sin = result
        else:
            sin = unicodeProcess.process(sin, self.fd.legacyData(fontname))
            sin = unicodeReorder.reorder(sin)
        newtext = self.xmldoc.createTextNode(sin) # create text of Node
        node.parentNode.replaceChild(newtext, node)