コード例 #1
0
    def process(self):
        try:
            f = open(self.original_filename)
            latex_archive = f.read()

            # LaTeX 2 CNXML transformation
            cnxml, objects = latex_to_cnxml(latex_archive, self.original_filename)

            cnxml = clean_cnxml(cnxml)
            save_cnxml(self.save_dir, cnxml, objects.items())
            validate_cnxml(cnxml)

        except ConversionError as e:
            return render_conversionerror(self.request, e.msg)

        except Exception:
            tb = traceback.format_exc()
            self.write_traceback_to_zipfile(tb)
            templatePath = 'templates/error.pt'
            response = {'traceback': tb}
            if('title' in self.request.session):
                del self.request.session['title']
            return render_to_response(templatePath, response, request=self.request)

        self.request.session.flash(self.message)
        return HTTPFound(location=self.request.route_url(self.nextStep()))
コード例 #2
0
    def process(self, form):
        try:
            f = open(self.original_filename)
            latex_archive = f.read()

            # LaTeX 2 CNXML transformation
            cnxml, objects = latex_to_cnxml(latex_archive,
                                            self.original_filename)

            cnxml = clean_cnxml(cnxml)
            save_cnxml(self.save_dir, cnxml, objects.items())
            validate_cnxml(cnxml)

        except ConversionError as e:
            return render_conversionerror(self.request, e.msg)

        except Exception:
            tb = traceback.format_exc()
            self.write_traceback_to_zipfile(tb, form)
            templatePath = 'templates/error.pt'
            response = {'traceback': tb}
            if ('title' in self.request.session):
                del self.request.session['title']
            return render_to_response(templatePath,
                                      response,
                                      request=self.request)

        self.request.session.flash(self.message)
        return HTTPFound(location=self.request.route_url(self.nextStep()))
コード例 #3
0
def choose_view(request):
    check_login(request)

    templatePath = 'templates/choose.pt'

    form = Form(request, schema=UploadSchema)
    field_list = [('upload', 'File')]

    # clear the session
    if 'transformerror' in request.session:
        del request.session['transformerror']
    if 'title' in request.session:
        del request.session['title']

    # Check for successful form completion
    if form.validate():
        try: # Catch-all exception block
            # Create a directory to do the conversions
            now_string = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
            # TODO: This has a good chance of being unique, but even so...
            temp_dir_name = '%s-%s' % (request.session['username'], now_string)
            save_dir = os.path.join(
                request.registry.settings['transform_dir'],
                temp_dir_name
                )
            os.mkdir(save_dir)

            # Keep the info we need for next uploads.  Note that this
            # might kill the ability to do multiple tabs in parallel,
            # unless it gets offloaded onto the form again.
            request.session['upload_dir'] = temp_dir_name
            if form.data['upload'] is not None:
                request.session['filename'] = form.data['upload'].filename

            # Google Docs Conversion
            # if we have a Google Docs ID and Access token.
            if form.data['gdocs_resource_id']:
                gdocs_resource_id = form.data['gdocs_resource_id']
                gdocs_access_token = form.data['gdocs_access_token']

                form.data['gdocs_resource_id'] = None
                form.data['gdocs_access_token'] = None
                
                (request.session['title'], request.session['filename']) = \
                    process_gdocs_resource(save_dir, \
                                           gdocs_resource_id, \
                                           gdocs_access_token)

            # HTML URL Import:
            elif form.data.get('url_text'):
                url = form.data['url_text']

                form.data['url_text'] = None

                # Build a regex for Google Docs URLs
                regex = re.compile("^https:\/\/docs\.google\.com\/.*document\/[^\/]\/([^\/]+)\/")
                r = regex.search(url)

                # Take special action for Google Docs URLs
                if r:
                    gdocs_resource_id = r.groups()[0]
                    (request.session['title'], request.session['filename']) = \
                        process_gdocs_resource(save_dir, "document:" + gdocs_resource_id)
                else:
                    # download html:
                    #html = urllib2.urlopen(url).read() 
                    # Simple urlopen() will fail on mediawiki websites like e.g. Wikipedia!
                    import_opener = urllib2.build_opener()
                    import_opener.addheaders = [('User-agent', 'Mozilla/5.0')]
                    try:
                        import_request = import_opener.open(url)
                        html = import_request.read()

                        # transformation            
                        cnxml, objects, html_title = htmlsoup_to_cnxml(
                        html, bDownloadImages=True, base_or_source_url=url)
                        request.session['title'] = html_title

                        cnxml = clean_cnxml(cnxml)
                        save_cnxml(save_dir, cnxml, objects.items())

                        # Keep the info we need for next uploads.  Note that
                        # this might kill the ability to do multiple tabs in
                        # parallel, unless it gets offloaded onto the form
                        # again.
                        request.session['filename'] = "HTML Document"

                        validate_cnxml(cnxml)

                    except urllib2.URLError, e:
                        request['errors'] = ['The URL %s could not be opened' %url,]
                        response = {
                            'form': FormRenderer(form),
                            }
                        return render_to_response(templatePath, response, request=request)

            # Office, CNXML-ZIP or LaTeX-ZIP file
            else:
                # Save the original file so that we can convert, plus keep it.
                original_filename = os.path.join(
                    save_dir,
                    form.data['upload'].filename.replace(os.sep, '_'))
                saved_file = open(original_filename, 'wb')
                input_file = form.data['upload'].file
                shutil.copyfileobj(input_file, saved_file)
                saved_file.close()
                input_file.close()

                form.data['upload'] = None

                # Check if it is a ZIP file with at least index.cnxml or a LaTeX file in it
                try:
                    zip_archive = zipfile.ZipFile(original_filename, 'r')
                    is_zip_archive = ('index.cnxml' in zip_archive.namelist())
                    
                    # Do we have a latex file?
                    if not is_zip_archive:
                        # incoming latex.zip must contain a latex.tex file, where "latex" is the base name.
                        (latex_head, latex_tail) = os.path.split(original_filename)
                        (latex_root, latex_ext)  = os.path.splitext(latex_tail)
                        latex_basename = latex_root
                        latex_filename = latex_basename + '.tex'
                        is_latex_archive = (latex_filename in zip_archive.namelist())

                except zipfile.BadZipfile:
                    is_zip_archive = False
                    is_latex_archive = False

                # ZIP package from previous conversion
                if is_zip_archive:
                    # Unzip into transform directory
                    zip_archive.extractall(path=save_dir)

                    # Rename ZIP file so that the user can download it again
                    os.rename(original_filename, os.path.join(save_dir, 'upload.zip'))

                    # Read CNXML
                    with open(os.path.join(save_dir, 'index.cnxml'), 'rt') as fp:
                        cnxml = fp.read()

                    # Convert the CNXML to XHTML for preview
                    html = cnxml_to_htmlpreview(cnxml)
                    with open(os.path.join(save_dir, 'index.xhtml'), 'w') as index:
                        index.write(html)

                    cnxml = clean_cnxml(cnxml)
                    validate_cnxml(cnxml)
                
                # LaTeX
                elif is_latex_archive:
                    f = open(original_filename)
                    latex_archive = f.read()

                    # LaTeX 2 CNXML transformation
                    cnxml, objects = latex_to_cnxml(latex_archive, original_filename)

                    cnxml = clean_cnxml(cnxml)
                    save_cnxml(save_dir, cnxml, objects.items())
                    validate_cnxml(cnxml)

                # OOo / MS Word Conversion
                else:
                    # Convert from other office format to odt if needed
                    odt_filename = original_filename
                    filename, extension = os.path.splitext(original_filename)
                    if(extension != '.odt'):
                        odt_filename= '%s.odt' % filename
                        command = '/usr/bin/soffice -headless -nologo -nofirststartwizard "macro:///Standard.Module1.SaveAsOOO(' + escape_system(original_filename)[1:-1] + ',' + odt_filename + ')"'
                        os.system(command)
                        try:
                            fp = open(odt_filename, 'r')
                            fp.close()
                        except IOError as io:
                            raise ConversionError("%s not found" %
                                                  original_filename)
                    # Convert and save all the resulting files.

                    tree, files, errors = transform(odt_filename)
                    cnxml = clean_cnxml(etree.tostring(tree))
                    save_cnxml(save_dir, cnxml, files.items())

                    # now validate with jing
                    validate_cnxml(cnxml)