def process(self): try: f = open(self.original_filename) latex_archive = f.read() # LaTeX 2 CNXML transformation cnxml, objects = latex_to_cnxml(latex_archive, self.original_filename) cnxml = clean_cnxml(cnxml) save_cnxml(self.save_dir, cnxml, objects.items()) validate_cnxml(cnxml) except ConversionError as e: return render_conversionerror(self.request, e.msg) except Exception: tb = traceback.format_exc() self.write_traceback_to_zipfile(tb) templatePath = 'templates/error.pt' response = {'traceback': tb} if('title' in self.request.session): del self.request.session['title'] return render_to_response(templatePath, response, request=self.request) self.request.session.flash(self.message) return HTTPFound(location=self.request.route_url(self.nextStep()))
def process(self, form): try: f = open(self.original_filename) latex_archive = f.read() # LaTeX 2 CNXML transformation cnxml, objects = latex_to_cnxml(latex_archive, self.original_filename) cnxml = clean_cnxml(cnxml) save_cnxml(self.save_dir, cnxml, objects.items()) validate_cnxml(cnxml) except ConversionError as e: return render_conversionerror(self.request, e.msg) except Exception: tb = traceback.format_exc() self.write_traceback_to_zipfile(tb, form) templatePath = 'templates/error.pt' response = {'traceback': tb} if ('title' in self.request.session): del self.request.session['title'] return render_to_response(templatePath, response, request=self.request) self.request.session.flash(self.message) return HTTPFound(location=self.request.route_url(self.nextStep()))
def choose_view(request): check_login(request) templatePath = 'templates/choose.pt' form = Form(request, schema=UploadSchema) field_list = [('upload', 'File')] # clear the session if 'transformerror' in request.session: del request.session['transformerror'] if 'title' in request.session: del request.session['title'] # Check for successful form completion if form.validate(): try: # Catch-all exception block # Create a directory to do the conversions now_string = datetime.datetime.now().strftime('%Y%m%d-%H%M%S') # TODO: This has a good chance of being unique, but even so... temp_dir_name = '%s-%s' % (request.session['username'], now_string) save_dir = os.path.join( request.registry.settings['transform_dir'], temp_dir_name ) os.mkdir(save_dir) # Keep the info we need for next uploads. Note that this # might kill the ability to do multiple tabs in parallel, # unless it gets offloaded onto the form again. request.session['upload_dir'] = temp_dir_name if form.data['upload'] is not None: request.session['filename'] = form.data['upload'].filename # Google Docs Conversion # if we have a Google Docs ID and Access token. if form.data['gdocs_resource_id']: gdocs_resource_id = form.data['gdocs_resource_id'] gdocs_access_token = form.data['gdocs_access_token'] form.data['gdocs_resource_id'] = None form.data['gdocs_access_token'] = None (request.session['title'], request.session['filename']) = \ process_gdocs_resource(save_dir, \ gdocs_resource_id, \ gdocs_access_token) # HTML URL Import: elif form.data.get('url_text'): url = form.data['url_text'] form.data['url_text'] = None # Build a regex for Google Docs URLs regex = re.compile("^https:\/\/docs\.google\.com\/.*document\/[^\/]\/([^\/]+)\/") r = regex.search(url) # Take special action for Google Docs URLs if r: gdocs_resource_id = r.groups()[0] (request.session['title'], request.session['filename']) = \ process_gdocs_resource(save_dir, "document:" + gdocs_resource_id) else: # download html: #html = urllib2.urlopen(url).read() # Simple urlopen() will fail on mediawiki websites like e.g. Wikipedia! import_opener = urllib2.build_opener() import_opener.addheaders = [('User-agent', 'Mozilla/5.0')] try: import_request = import_opener.open(url) html = import_request.read() # transformation cnxml, objects, html_title = htmlsoup_to_cnxml( html, bDownloadImages=True, base_or_source_url=url) request.session['title'] = html_title cnxml = clean_cnxml(cnxml) save_cnxml(save_dir, cnxml, objects.items()) # Keep the info we need for next uploads. Note that # this might kill the ability to do multiple tabs in # parallel, unless it gets offloaded onto the form # again. request.session['filename'] = "HTML Document" validate_cnxml(cnxml) except urllib2.URLError, e: request['errors'] = ['The URL %s could not be opened' %url,] response = { 'form': FormRenderer(form), } return render_to_response(templatePath, response, request=request) # Office, CNXML-ZIP or LaTeX-ZIP file else: # Save the original file so that we can convert, plus keep it. original_filename = os.path.join( save_dir, form.data['upload'].filename.replace(os.sep, '_')) saved_file = open(original_filename, 'wb') input_file = form.data['upload'].file shutil.copyfileobj(input_file, saved_file) saved_file.close() input_file.close() form.data['upload'] = None # Check if it is a ZIP file with at least index.cnxml or a LaTeX file in it try: zip_archive = zipfile.ZipFile(original_filename, 'r') is_zip_archive = ('index.cnxml' in zip_archive.namelist()) # Do we have a latex file? if not is_zip_archive: # incoming latex.zip must contain a latex.tex file, where "latex" is the base name. (latex_head, latex_tail) = os.path.split(original_filename) (latex_root, latex_ext) = os.path.splitext(latex_tail) latex_basename = latex_root latex_filename = latex_basename + '.tex' is_latex_archive = (latex_filename in zip_archive.namelist()) except zipfile.BadZipfile: is_zip_archive = False is_latex_archive = False # ZIP package from previous conversion if is_zip_archive: # Unzip into transform directory zip_archive.extractall(path=save_dir) # Rename ZIP file so that the user can download it again os.rename(original_filename, os.path.join(save_dir, 'upload.zip')) # Read CNXML with open(os.path.join(save_dir, 'index.cnxml'), 'rt') as fp: cnxml = fp.read() # Convert the CNXML to XHTML for preview html = cnxml_to_htmlpreview(cnxml) with open(os.path.join(save_dir, 'index.xhtml'), 'w') as index: index.write(html) cnxml = clean_cnxml(cnxml) validate_cnxml(cnxml) # LaTeX elif is_latex_archive: f = open(original_filename) latex_archive = f.read() # LaTeX 2 CNXML transformation cnxml, objects = latex_to_cnxml(latex_archive, original_filename) cnxml = clean_cnxml(cnxml) save_cnxml(save_dir, cnxml, objects.items()) validate_cnxml(cnxml) # OOo / MS Word Conversion else: # Convert from other office format to odt if needed odt_filename = original_filename filename, extension = os.path.splitext(original_filename) if(extension != '.odt'): odt_filename= '%s.odt' % filename command = '/usr/bin/soffice -headless -nologo -nofirststartwizard "macro:///Standard.Module1.SaveAsOOO(' + escape_system(original_filename)[1:-1] + ',' + odt_filename + ')"' os.system(command) try: fp = open(odt_filename, 'r') fp.close() except IOError as io: raise ConversionError("%s not found" % original_filename) # Convert and save all the resulting files. tree, files, errors = transform(odt_filename) cnxml = clean_cnxml(etree.tostring(tree)) save_cnxml(save_dir, cnxml, files.items()) # now validate with jing validate_cnxml(cnxml)