Exemple #1
0
    def process(self, form):
        try:
            # Convert from other office format to odt if needed
            filename, extension = os.path.splitext(self.original_filename)
            odt_filename = str(filename) + '.odt'

            if (extension != '.odt'):
                self._convert_to_odt(filename)
            # Convert and save all the resulting files.

            tree, files, errors = transform(odt_filename)
            cnxml = clean_cnxml(etree.tostring(tree))

            save_cnxml(self.save_dir, cnxml, files.items())

            # now validate with jing
            validate_cnxml(cnxml)

        except ConversionError as e:
            return render_conversionerror(self.request, e.msg)

        except Exception:
            tb = traceback.format_exc()
            self.write_traceback_to_zipfile(tb, form)
            templatePath = 'templates/error.pt'
            response = {'traceback': tb}
            if ('title' in self.request.session):
                del self.request.session['title']
            return render_to_response(templatePath,
                                      response,
                                      request=self.request)

        self.request.session.flash(self.message)
        return HTTPFound(location=self.request.route_url(self.nextStep()))
    def process(self):
        try:
            # Convert from other office format to odt if needed
            filename, extension = os.path.splitext(self.original_filename)
            odt_filename = str(filename) + '.odt'

            if(extension != '.odt'):
                self._convert_to_odt(filename)        
            # Convert and save all the resulting files.

            tree, files, errors = transform(odt_filename)
            cnxml = clean_cnxml(etree.tostring(tree))

            save_cnxml(self.save_dir, cnxml, files.items())

            # now validate with jing
            validate_cnxml(cnxml)

        except ConversionError as e:
            return render_conversionerror(self.request, e.msg)

        except Exception:
            tb = traceback.format_exc()
            self.write_traceback_to_zipfile(tb)
            templatePath = 'templates/error.pt'
            response = {'traceback': tb}
            if('title' in self.request.session):
                del self.request.session['title']
            return render_to_response(templatePath, response, request=self.request)

        self.request.session.flash(self.message)
        return HTTPFound(location=self.request.route_url(self.nextStep()))
Exemple #3
0
    def test_odt(self):
        odt_files = os.listdir(test_folder_name + 'odt/')
        i = 0
        # Find only .odt files in the testing folder for odt
        while (i < len(odt_files)):
            f = odt_files[i]
            filename, extension = os.path.splitext(f)
            if (extension != '.odt'):
                odt_files.remove(f)
            else:
                i = i + 1

        for f in odt_files:
            original_filename = test_folder_name + 'odt/' + f
            filename, extension = os.path.splitext(original_filename)

            valid_filename = filename + '.cnxml'
            output_filename = filename + '.tmp'
            odt_filename = original_filename
            diff_filename = filename + '.diff'
            err_filename = filename + '.err'

            try:
                open(valid_filename, 'r')
            except IOError as e:
                print('Missing valid file (' + valid_filename +
                      ') for testing ' + original_filename)
                return

            tree, files, errors = transform(odt_filename)
            cnxml = clean_cnxml(etree.tostring(tree))
            validate_cnxml(cnxml)
            output = open(output_filename, 'w')
            output.write(cnxml)
            output.close()
            remove_ids(output_filename)
            process = subprocess.Popen(
                ['diff', valid_filename, output_filename],
                shell=False,
                stdout=subprocess.PIPE)
            std_output = process.communicate()

            if (std_output[0] != None and len(std_output[0]) != 0):
                diff_output = open(diff_filename, 'w')
                diff_output.write(std_output[0])
                diff_output.close()
                print(
                    'Differences in the testing of ' + original_filename +
                    ', information on those differences has been placed in ' +
                    diff_filename)
            elif (std_output[1] != None and len(std_output[1]) != 0):
                err_output = open(err_filename, 'w')
                err_output.write(std_output[1])
                err_output.close()
                print(
                    'Error(s) occurred while attempting to test for differences in CNXML output of '
                    + original_filename +
                    ', information on these errors are in ' + err_filename)
    def convert(self, data, outdata, **kwargs):
        ### JCC TODO: all the back and forth about whether the data is a
        ###           file or data should be streamlined, if possible

        strOriginalFileName = kwargs['original_file_name']
        strUserName = kwargs['user_name']
        zLOG.LOG("OOo2CNXML Transform", zLOG.INFO,
                 "Original file name is : \"" + strOriginalFileName + "\". User is : \"" + strUserName + "\"")

        # write the file to disk; attempt to harvest to central location else put in /tmp
        strFileName = self.writeToGood(data,strUserName,strOriginalFileName)
        if strOriginalFileName.endswith('.xml'):
            zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Input file is a .xml file.  Terminate import.")
            # importing .xml file sometime blows up the OOo server and lacks a use case so we punt.
            self.moveToBad(strFileName)
            raise OOoImportError, "Could not convert .xml file.  Please try another file type."

        # OOo convert a doc file into an XML file embedded in a zip file.
        try:
            binOOoData = self.convertWordToOOo(strFileName)
        except:
            self.moveToBad(strFileName)
            raise

        if len(binOOoData) == 0:
            zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Open Office does not return anything.  The Open Office server may not be running.")
            # don't know for sure if the conversion failed, so do we leave
            # the harvested word file in the GOOD directory or do we leave
            # the word file in the BAD directory?  Choosing to keep the GOOD
            # as pristine as possible at the current time.
            self.moveToBad(strFileName)
            raise OOoImportError, "Could not convert file"

        fileOOo = StringIO(binOOoData)
        try:
            elCnxml, filesDict, errors = odt2cnxml.transform(fileOOo)
            from lxml import etree
            strCnxml = etree.tostring(elCnxml, pretty_print=True)
        except OOoImportError:
            # toCnxml() wrote log messages
            self.moveToBad(strFileName)
            raise OOoImportError, "Generated CNXML is invalid"

        fileCnxmlClean = StringIO(strCnxml)
        outdata.setData(fileCnxmlClean)

        # Add images
        objects = filesDict #{}
        outdata.setSubObjects(objects)

        self.cleanup(strFileName)

        return outdata
    def convert(self, data, outdata, **kwargs):
        ### JCC TODO: all the back and forth about whether the data is a
        ###           file or data should be streamlined, if possible

        strOriginalFileName = kwargs['original_file_name']
        strUserName = kwargs['user_name']
        zLOG.LOG("OOo2CNXML Transform", zLOG.INFO,
                 "Original file name is : \"" + strOriginalFileName + "\". User is : \"" + strUserName + "\"")

        # write the file to disk; attempt to harvest to central location else put in /tmp
        strFileName = self.writeToGood(data,strUserName,strOriginalFileName)
        if strOriginalFileName.endswith('.xml'):
            zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Input file is a .xml file.  Terminate import.")
            # importing .xml file sometime blows up the OOo server and lacks a use case so we punt.
            self.moveToBad(strFileName)
            raise OOoImportError, "Could not convert .xml file.  Please try another file type."

        # OOo convert a doc file into an XML file embedded in a zip file.
        try:
            binOOoData = self.convertWordToOOo(strFileName)
        except:
            self.moveToBad(strFileName)
            raise

        if len(binOOoData) == 0:
            zLOG.LOG("OOo2CNXML Transform", zLOG.INFO, "Open Office does not return anything.  The Open Office server may not be running.")
            # don't know for sure if the conversion failed, so do we leave
            # the harvested word file in the GOOD directory or do we leave
            # the word file in the BAD directory?  Choosing to keep the GOOD
            # as pristine as possible at the current time.
            self.moveToBad(strFileName)
            raise OOoImportError, "Could not convert file"

        fileOOo = StringIO(binOOoData)
        try:
            elCnxml, filesDict, errors = odt2cnxml.transform(fileOOo)
            from lxml import etree
            strCnxml = etree.tostring(elCnxml, pretty_print=True)
        except OOoImportError:
            # toCnxml() wrote log messages
            self.moveToBad(strFileName)
            raise OOoImportError, "Generated CNXML is invalid"

        fileCnxmlClean = StringIO(strCnxml)
        outdata.setData(fileCnxmlClean)

        # Add images
        objects = filesDict #{}
        outdata.setSubObjects(objects)

        self.cleanup(strFileName)

        return outdata
    def test_doc(self):
        doc_files=os.listdir(test_folder_name+'doc/')
        i=0
# Find only .odt files in the testing folder for odt
        while(i < len(doc_files)):
            f=doc_files[i]
            filename, extension = os.path.splitext(f)
            if(extension != '.doc'):
                doc_files.remove(f)
            else:
                i=i+1

        for f in doc_files:
            original_filename=test_folder_name+'doc/'+f
            filename, extension = os.path.splitext(original_filename)

            valid_filename=filename+'.cnxml'
            output_filename=filename+'.tmp'
            doc_filename = original_filename
            diff_filename = filename+'.diff'
            err_filename = filename+'.err'

            odt_filename= filename+'.odt'
            command = '/usr/bin/soffice --headless --nologo --nofirststartwizard "macro:///Standard.Module1.SaveAsOOO(' + os.getcwd()+'/'+original_filename + ',' + os.getcwd()+'/'+odt_filename + ')"'
            os.system(command)

            try:
                open(valid_filename, 'r')
            except IOError as e:
                print('Missing valid file ('+valid_filename+') for testing '+original_filename)
                return

            tree, files, errors = transform(odt_filename)
            cnxml = clean_cnxml(etree.tostring(tree))
            validate_cnxml(cnxml)
            output=open(output_filename,'w')
            output.write(cnxml)
            output.close()
            remove_ids(output_filename)
            process = subprocess.Popen(['diff',valid_filename,output_filename], shell=False, stdout=subprocess.PIPE)
            std_output = process.communicate()

            if(std_output[0] != None and len(std_output[0]) != 0):
                diff_output=open(diff_filename,'w')
                diff_output.write(std_output[0])
                diff_output.close()
                print('Differences in the testing of '+original_filename+', information on those differences has been placed in '+diff_filename)
            elif(std_output[1] != None and len(std_output[1]) != 0):
                err_output=open(err_filename,'w')
                err_output.write(std_output[1])
                err_output.close()
                print('Error(s) occurred while attempting to test for differences in CNXML output of '+original_filename+', information on these errors are in '+err_filename)
    def test_bolditalic(self):
        odtfile = os.path.join(dirname, 'data', 'bolditalic.odt')
        validcnxml = open(
            os.path.join(dirname, 'data', 'bolditalic.cnxml')).read()
        # remove newlines and indentation and ids
        validcnxml = validcnxml.replace('\n', '')
        validcnxml = re.sub('>\s+<', '><', validcnxml)
        validcnxml = re.sub('id=\".*?\"', '', validcnxml)

        cnxml, images, errors = transform(odtfile)
        cnxml = etree.tostring(cnxml)
        # strip ids
        cnxml = re.sub('id=\".*?\"', '', cnxml)
        self.assertEqual(cnxml, validcnxml)
    def test_odt(self):
        odt_files=os.listdir(test_folder_name+'odt/')
        i=0
# Find only .odt files in the testing folder for odt
        while(i < len(odt_files)):
            f=odt_files[i]
            filename, extension = os.path.splitext(f)
            if(extension != '.odt'):
                odt_files.remove(f)
            else:
                i=i+1

        for f in odt_files:
            original_filename=test_folder_name+'odt/'+f
            filename, extension = os.path.splitext(original_filename)

            valid_filename=filename+'.cnxml'
            output_filename=filename+'.tmp'
            odt_filename = original_filename
            diff_filename = filename+'.diff'
            err_filename = filename+'.err'

            try:
                open(valid_filename, 'r')
            except IOError as e:
                print('Missing valid file ('+valid_filename+') for testing '+original_filename)
                return

            tree, files, errors = transform(odt_filename)
            cnxml = clean_cnxml(etree.tostring(tree))
            validate_cnxml(cnxml)
            output=open(output_filename,'w')
            output.write(cnxml)
            output.close()
            remove_ids(output_filename)
            process = subprocess.Popen(['diff',valid_filename,output_filename], shell=False, stdout=subprocess.PIPE)
            std_output = process.communicate()

            if(std_output[0] != None and len(std_output[0]) != 0):
                diff_output=open(diff_filename,'w')
                diff_output.write(std_output[0])
                diff_output.close()
                print('Differences in the testing of '+original_filename+', information on those differences has been placed in '+diff_filename)
            elif(std_output[1] != None and len(std_output[1]) != 0):
                err_output=open(err_filename,'w')
                err_output.write(std_output[1])
                err_output.close()
                print('Error(s) occurred while attempting to test for differences in CNXML output of '+original_filename+', information on these errors are in '+err_filename)
Exemple #9
0
def process_import(save_dir_path, original_filepath, filename, download_url):
    # convert from other office format to odt
    odt_filename = '%s.odt' % filename
    odt_filepath = str(os.path.join(save_dir_path, odt_filename))
    # run jod service
    converter = JOD.DocumentConverterClient()
    try:
        converter.convert(original_filepath, 'odt', odt_filepath)
    except Exception as e:
        raise e

    # check file existed
    try:
        fp = open(odt_filepath, 'r')
        fp.close()
    except IOError as io:
        # TODO: raise exception
        raise io

    # convert to cnxml
    tree, files, errors = transform(odt_filepath)
    cnxml = clean_cnxml(etree.tostring(tree))

    # convert to html
    html = cnxml_to_htmlpreview(cnxml)

    # produce zipfile
    ram = StringIO()
    zip_archive = zipfile.ZipFile(ram, 'w')
    zip_archive.writestr('index.html', html)
    for fname, fdata in files.items():
        zip_archive.writestr(fname, fdata)
    zip_archive.close()

    # save zipfile
    zip_file_path = os.path.join(save_dir_path, '%s.zip' % filename)
    if os.path.exists(zip_file_path):
        os.rename(zip_file_path, zip_file_path + '~')
    f = open(zip_file_path, 'wb')
    f.write(ram.getvalue())
    f.close()

    return download_url
def office_to_cnxml(pathToOfficeFile, verbose=True):
    import os

    # Get absolute path to file
    inputFilename = os.path.abspath(pathToOfficeFile)

    # Convert to ODT if necessary
    if inputFilename[inputFilename.rfind(".") :].lower() != ".odt":
        odtFilename = "/tmp/temp.odt"
        command = (
            '/usr/bin/soffice -headless -nologo -nofirststartwizard "macro:///Standard.Module1.SaveAsOOO('
            + escape_system(inputFilename)[1:-1]
            + ","
            + odtFilename
            + ')"'
        )
        os.system(command)
        inputFilename = odtFilename

    # Convert to CNXML
    xml, files, errors = odt2cnxml.transform(inputFilename)
    return etree.tostring(xml), files
    quit()

filename=sys.argv[1]
name, extension = os.path.splitext(filename)
if(extension == '.odt' or extension == '.doc'):

    if(extension == '.doc'):
        doc_folder = os.getcwd()+'/'+os.path.dirname(name)
        os.system('./converters/doc2odt -o '+doc_folder+' '+os.getcwd()+'/'+filename)
        #command = '/usr/bin/soffice --headless --nologo --nofirststartwizard "macro:///Standard.Module1.SaveAsOOO(' + os.getcwd()+'/'+filename + ',' + os.getcwd()+'/'+name+'.odt' + ')"'
        #os.system(command)
        filename=name+'.odt'


    valid_filename=name+'.cnxml'
    tree, files, errors = transform(filename)
    cnxml = clean_cnxml(etree.tostring(tree))
    output=open(valid_filename,'w')
    output.write(cnxml)
    output.close()
    remove_ids(valid_filename)
    if(extension == '.doc'):
        os.remove(os.getcwd()+'/'+name+'.odt')

elif(extension == '.tex'):
    valid_filename=name+'.cnxml'
    fp = open(filename, 'r')
    latex_archive = fp.read()
    fp.close()

    # LaTeX 2 CNXML transformation
Exemple #12
0
    def test_doc(self):
        doc_files = os.listdir(test_folder_name + 'doc/')
        i = 0
        # Find only .odt files in the testing folder for odt
        while (i < len(doc_files)):
            f = doc_files[i]
            filename, extension = os.path.splitext(f)
            if (extension != '.doc'):
                doc_files.remove(f)
            else:
                i = i + 1

        for f in doc_files:
            original_filename = test_folder_name + 'doc/' + f
            filename, extension = os.path.splitext(original_filename)

            valid_filename = filename + '.cnxml'
            output_filename = filename + '.tmp'
            doc_filename = original_filename
            diff_filename = filename + '.diff'
            err_filename = filename + '.err'

            odt_filename = filename + '.odt'
            command = '/usr/bin/soffice --headless --nologo --nofirststartwizard "macro:///Standard.Module1.SaveAsOOO(' + os.getcwd(
            ) + '/' + original_filename + ',' + os.getcwd(
            ) + '/' + odt_filename + ')"'
            os.system(command)

            try:
                open(valid_filename, 'r')
            except IOError as e:
                print('Missing valid file (' + valid_filename +
                      ') for testing ' + original_filename)
                return

            tree, files, errors = transform(odt_filename)
            cnxml = clean_cnxml(etree.tostring(tree))
            validate_cnxml(cnxml)
            output = open(output_filename, 'w')
            output.write(cnxml)
            output.close()
            remove_ids(output_filename)
            process = subprocess.Popen(
                ['diff', valid_filename, output_filename],
                shell=False,
                stdout=subprocess.PIPE)
            std_output = process.communicate()

            if (std_output[0] != None and len(std_output[0]) != 0):
                diff_output = open(diff_filename, 'w')
                diff_output.write(std_output[0])
                diff_output.close()
                print(
                    'Differences in the testing of ' + original_filename +
                    ', information on those differences has been placed in ' +
                    diff_filename)
            elif (std_output[1] != None and len(std_output[1]) != 0):
                err_output = open(err_filename, 'w')
                err_output.write(std_output[1])
                err_output.close()
                print(
                    'Error(s) occurred while attempting to test for differences in CNXML output of '
                    + original_filename +
                    ', information on these errors are in ' + err_filename)
def choose_view(request):
    check_login(request)

    templatePath = 'templates/choose.pt'

    form = Form(request, schema=UploadSchema)
    field_list = [('upload', 'File')]

    # clear the session
    if 'transformerror' in request.session:
        del request.session['transformerror']
    if 'title' in request.session:
        del request.session['title']

    # Check for successful form completion
    if form.validate():
        try: # Catch-all exception block
            # Create a directory to do the conversions
            now_string = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
            # TODO: This has a good chance of being unique, but even so...
            temp_dir_name = '%s-%s' % (request.session['username'], now_string)
            save_dir = os.path.join(
                request.registry.settings['transform_dir'],
                temp_dir_name
                )
            os.mkdir(save_dir)

            # Keep the info we need for next uploads.  Note that this
            # might kill the ability to do multiple tabs in parallel,
            # unless it gets offloaded onto the form again.
            request.session['upload_dir'] = temp_dir_name
            if form.data['upload'] is not None:
                request.session['filename'] = form.data['upload'].filename

            # Google Docs Conversion
            # if we have a Google Docs ID and Access token.
            if form.data['gdocs_resource_id']:
                gdocs_resource_id = form.data['gdocs_resource_id']
                gdocs_access_token = form.data['gdocs_access_token']

                form.data['gdocs_resource_id'] = None
                form.data['gdocs_access_token'] = None
                
                (request.session['title'], request.session['filename']) = \
                    process_gdocs_resource(save_dir, \
                                           gdocs_resource_id, \
                                           gdocs_access_token)

            # HTML URL Import:
            elif form.data.get('url_text'):
                url = form.data['url_text']

                form.data['url_text'] = None

                # Build a regex for Google Docs URLs
                regex = re.compile("^https:\/\/docs\.google\.com\/.*document\/[^\/]\/([^\/]+)\/")
                r = regex.search(url)

                # Take special action for Google Docs URLs
                if r:
                    gdocs_resource_id = r.groups()[0]
                    (request.session['title'], request.session['filename']) = \
                        process_gdocs_resource(save_dir, "document:" + gdocs_resource_id)
                else:
                    # download html:
                    #html = urllib2.urlopen(url).read() 
                    # Simple urlopen() will fail on mediawiki websites like e.g. Wikipedia!
                    import_opener = urllib2.build_opener()
                    import_opener.addheaders = [('User-agent', 'Mozilla/5.0')]
                    try:
                        import_request = import_opener.open(url)
                        html = import_request.read()

                        # transformation            
                        cnxml, objects, html_title = htmlsoup_to_cnxml(
                        html, bDownloadImages=True, base_or_source_url=url)
                        request.session['title'] = html_title

                        cnxml = clean_cnxml(cnxml)
                        save_cnxml(save_dir, cnxml, objects.items())

                        # Keep the info we need for next uploads.  Note that
                        # this might kill the ability to do multiple tabs in
                        # parallel, unless it gets offloaded onto the form
                        # again.
                        request.session['filename'] = "HTML Document"

                        validate_cnxml(cnxml)

                    except urllib2.URLError, e:
                        request['errors'] = ['The URL %s could not be opened' %url,]
                        response = {
                            'form': FormRenderer(form),
                            }
                        return render_to_response(templatePath, response, request=request)

            # Office, CNXML-ZIP or LaTeX-ZIP file
            else:
                # Save the original file so that we can convert, plus keep it.
                original_filename = os.path.join(
                    save_dir,
                    form.data['upload'].filename.replace(os.sep, '_'))
                saved_file = open(original_filename, 'wb')
                input_file = form.data['upload'].file
                shutil.copyfileobj(input_file, saved_file)
                saved_file.close()
                input_file.close()

                form.data['upload'] = None

                # Check if it is a ZIP file with at least index.cnxml or a LaTeX file in it
                try:
                    zip_archive = zipfile.ZipFile(original_filename, 'r')
                    is_zip_archive = ('index.cnxml' in zip_archive.namelist())
                    
                    # Do we have a latex file?
                    if not is_zip_archive:
                        # incoming latex.zip must contain a latex.tex file, where "latex" is the base name.
                        (latex_head, latex_tail) = os.path.split(original_filename)
                        (latex_root, latex_ext)  = os.path.splitext(latex_tail)
                        latex_basename = latex_root
                        latex_filename = latex_basename + '.tex'
                        is_latex_archive = (latex_filename in zip_archive.namelist())

                except zipfile.BadZipfile:
                    is_zip_archive = False
                    is_latex_archive = False

                # ZIP package from previous conversion
                if is_zip_archive:
                    # Unzip into transform directory
                    zip_archive.extractall(path=save_dir)

                    # Rename ZIP file so that the user can download it again
                    os.rename(original_filename, os.path.join(save_dir, 'upload.zip'))

                    # Read CNXML
                    with open(os.path.join(save_dir, 'index.cnxml'), 'rt') as fp:
                        cnxml = fp.read()

                    # Convert the CNXML to XHTML for preview
                    html = cnxml_to_htmlpreview(cnxml)
                    with open(os.path.join(save_dir, 'index.xhtml'), 'w') as index:
                        index.write(html)

                    cnxml = clean_cnxml(cnxml)
                    validate_cnxml(cnxml)
                
                # LaTeX
                elif is_latex_archive:
                    f = open(original_filename)
                    latex_archive = f.read()

                    # LaTeX 2 CNXML transformation
                    cnxml, objects = latex_to_cnxml(latex_archive, original_filename)

                    cnxml = clean_cnxml(cnxml)
                    save_cnxml(save_dir, cnxml, objects.items())
                    validate_cnxml(cnxml)

                # OOo / MS Word Conversion
                else:
                    # Convert from other office format to odt if needed
                    odt_filename = original_filename
                    filename, extension = os.path.splitext(original_filename)
                    if(extension != '.odt'):
                        odt_filename= '%s.odt' % filename
                        command = '/usr/bin/soffice -headless -nologo -nofirststartwizard "macro:///Standard.Module1.SaveAsOOO(' + escape_system(original_filename)[1:-1] + ',' + odt_filename + ')"'
                        os.system(command)
                        try:
                            fp = open(odt_filename, 'r')
                            fp.close()
                        except IOError as io:
                            raise ConversionError("%s not found" %
                                                  original_filename)
                    # Convert and save all the resulting files.

                    tree, files, errors = transform(odt_filename)
                    cnxml = clean_cnxml(etree.tostring(tree))
                    save_cnxml(save_dir, cnxml, files.items())

                    # now validate with jing
                    validate_cnxml(cnxml)
Exemple #14
0
    quit()

filename = sys.argv[1]
name, extension = os.path.splitext(filename)
if (extension == '.odt' or extension == '.doc'):

    if (extension == '.doc'):
        doc_folder = os.getcwd() + '/' + os.path.dirname(name)
        os.system('./converters/doc2odt -o ' + doc_folder + ' ' + os.getcwd() +
                  '/' + filename)
        #command = '/usr/bin/soffice --headless --nologo --nofirststartwizard "macro:///Standard.Module1.SaveAsOOO(' + os.getcwd()+'/'+filename + ',' + os.getcwd()+'/'+name+'.odt' + ')"'
        #os.system(command)
        filename = name + '.odt'

    valid_filename = name + '.cnxml'
    tree, files, errors = transform(filename)
    cnxml = clean_cnxml(etree.tostring(tree))
    output = open(valid_filename, 'w')
    output.write(cnxml)
    output.close()
    remove_ids(valid_filename)
    if (extension == '.doc'):
        os.remove(os.getcwd() + '/' + name + '.odt')

elif (extension == '.tex'):
    valid_filename = name + '.cnxml'
    fp = open(filename, 'r')
    latex_archive = fp.read()
    fp.close()

    # LaTeX 2 CNXML transformation