def process(self):
        try:
            # Convert from other office format to odt if needed
            filename, extension = os.path.splitext(self.original_filename)
            odt_filename = str(filename) + '.odt'

            if(extension != '.odt'):
                self._convert_to_odt(filename)        
            # Convert and save all the resulting files.

            tree, files, errors = transform(odt_filename)
            cnxml = clean_cnxml(etree.tostring(tree))

            save_cnxml(self.save_dir, cnxml, files.items())

            # now validate with jing
            validate_cnxml(cnxml)

        except ConversionError as e:
            return render_conversionerror(self.request, e.msg)

        except Exception:
            tb = traceback.format_exc()
            self.write_traceback_to_zipfile(tb)
            templatePath = 'templates/error.pt'
            response = {'traceback': tb}
            if('title' in self.request.session):
                del self.request.session['title']
            return render_to_response(templatePath, response, request=self.request)

        self.request.session.flash(self.message)
        return HTTPFound(location=self.request.route_url(self.nextStep()))
    def process_gdocs_resource(self, save_dir, gdocs_resource_id, gdocs_access_token=None):

        # login to gdocs and get a client object
        gd_client = getAuthorizedGoogleDocsClient()

        # Create a AuthSub Token based on gdocs_access_token String
        auth_sub_token = gdata.gauth.AuthSubToken(gdocs_access_token) \
                         if gdocs_access_token \
                         else None

        # get the Google Docs Entry
        gd_entry = gd_client.GetDoc(gdocs_resource_id, None, auth_sub_token)

        # Get the contents of the document
        gd_entry_url = gd_entry.content.src
        html = gd_client.get_file_content(gd_entry_url, auth_sub_token)

        # Transformation and get images
        cnxml, objects = gdocs_to_cnxml(html, bDownloadImages=True)

        cnxml = clean_cnxml(cnxml)
        save_cnxml(save_dir, cnxml, objects.items())

        validate_cnxml(cnxml)

        # Return the title and filename.  Old comment states
        # that returning this filename might kill the ability to
        # do multiple tabs in parallel, unless it gets offloaded
        # onto the form again.
        return (gd_entry.title.text, "Google Document")
 def _process_gdocs_resource(klass, save_dir, html):
     # Transformation and get images
     cnxml, objects = gdocs_to_cnxml(html, bDownloadImages=True)
     cnxml = clean_cnxml(cnxml)
     save_cnxml(save_dir, cnxml, objects.items())
     validate_cnxml(cnxml)
     return "Google Document"
    def process(self):
        try:
            f = open(self.original_filename)
            latex_archive = f.read()

            # LaTeX 2 CNXML transformation
            cnxml, objects = latex_to_cnxml(latex_archive, self.original_filename)

            cnxml = clean_cnxml(cnxml)
            save_cnxml(self.save_dir, cnxml, objects.items())
            validate_cnxml(cnxml)

        except ConversionError as e:
            return render_conversionerror(self.request, e.msg)

        except Exception:
            tb = traceback.format_exc()
            self.write_traceback_to_zipfile(tb)
            templatePath = 'templates/error.pt'
            response = {'traceback': tb}
            if('title' in self.request.session):
                del self.request.session['title']
            return render_to_response(templatePath, response, request=self.request)

        self.request.session.flash(self.message)
        return HTTPFound(location=self.request.route_url(self.nextStep()))
Exemple #5
0
    def process(self, form):
        try:
            # Convert from other office format to odt if needed
            filename, extension = os.path.splitext(self.original_filename)
            odt_filename = str(filename) + '.odt'

            if (extension != '.odt'):
                self._convert_to_odt(filename)
            # Convert and save all the resulting files.

            tree, files, errors = transform(odt_filename)
            cnxml = clean_cnxml(etree.tostring(tree))

            save_cnxml(self.save_dir, cnxml, files.items())

            # now validate with jing
            validate_cnxml(cnxml)

        except ConversionError as e:
            return render_conversionerror(self.request, e.msg)

        except Exception:
            tb = traceback.format_exc()
            self.write_traceback_to_zipfile(tb, form)
            templatePath = 'templates/error.pt'
            response = {'traceback': tb}
            if ('title' in self.request.session):
                del self.request.session['title']
            return render_to_response(templatePath,
                                      response,
                                      request=self.request)

        self.request.session.flash(self.message)
        return HTTPFound(location=self.request.route_url(self.nextStep()))
Exemple #6
0
    def process(self, form):
        try:
            f = open(self.original_filename)
            latex_archive = f.read()

            # LaTeX 2 CNXML transformation
            cnxml, objects = latex_to_cnxml(latex_archive,
                                            self.original_filename)

            cnxml = clean_cnxml(cnxml)
            save_cnxml(self.save_dir, cnxml, objects.items())
            validate_cnxml(cnxml)

        except ConversionError as e:
            return render_conversionerror(self.request, e.msg)

        except Exception:
            tb = traceback.format_exc()
            self.write_traceback_to_zipfile(tb, form)
            templatePath = 'templates/error.pt'
            response = {'traceback': tb}
            if ('title' in self.request.session):
                del self.request.session['title']
            return render_to_response(templatePath,
                                      response,
                                      request=self.request)

        self.request.session.flash(self.message)
        return HTTPFound(location=self.request.route_url(self.nextStep()))
Exemple #7
0
 def _process_gdocs_resource(klass, save_dir, html, kix=None):
     # Transformation and get images
     cnxml, objects = gdocs_to_cnxml(html,
                                     kixcontent=kix,
                                     bDownloadImages=True)
     cnxml = clean_cnxml(cnxml)
     save_cnxml(save_dir, cnxml, objects.items())
     validate_cnxml(cnxml)
     return "Google Document"
    def process(self):
        try:
            url = self.form.data['url_text']

            # Build a regex for Google Docs URLs
            regex = re.compile(
                "^https:\/\/docs\.google\.com\/.*document\/[^\/]\/([^\/]+)\/")
            r = regex.search(url)

            # Take special action for Google Docs URLs
            if r:
                gdocs_resource_id = r.groups()[0]
                doc_id = "document:" + gdocs_resource_id
                title, filename = self.process_gdocs_resource(self.save_dir,
                                                              doc_id)

                self.request.session['title'] = title
                self.request.session['filename'] = filename
            else:
                # download html:
                # Simple urlopen() will fail on mediawiki websites eg. Wikipedia!
                import_opener = urllib2.build_opener()
                import_opener.addheaders = [('User-agent', 'Mozilla/5.0')]
                import_request = import_opener.open(url)
                html = import_request.read()

                # transformation
                cnxml, objects, html_title = htmlsoup_to_cnxml(
                        html, bDownloadImages=True, base_or_source_url=url)
                self.request.session['title'] = html_title

                cnxml = clean_cnxml(cnxml)
                save_cnxml(self.save_dir, cnxml, objects.items())

                # Keep the info we need for next uploads.  Note that
                # this might kill the ability to do multiple tabs in
                # parallel, unless it gets offloaded onto the form
                # again.
                self.request.session['filename'] = "HTML Document"

                validate_cnxml(cnxml)

        except ConversionError as e:
            return render_conversionerror(self.request, e.msg)

        except Exception:
            tb = traceback.format_exc()
            self.write_traceback_to_zipfile(tb)
            templatePath = 'templates/error.pt'
            response = {'traceback': tb}
            if('title' in self.request.session):
                del self.request.session['title']
            return render_to_response(templatePath, response, request=self.request)

        self.request.session.flash(self.message)
        return HTTPFound(location=self.request.route_url(self.nextStep()))
Exemple #9
0
    def process(self, zip_filename):
        try:
            self.zip_archive = zipfile.ZipFile(zip_filename, 'r')

            # Unzip into transform directory
            self.zip_archive.extractall(path=self.save_dir)

            # Rename ZIP file so that the user can download it again
            os.rename(zip_filename, os.path.join(self.save_dir, 'upload.zip'))

            # Read CNXML
            with open(os.path.join(self.save_dir, 'index.cnxml'), 'rt') as fp:
                cnxml = fp.read()

            # Convert the CNXML to XHTML for preview
            html = cnxml_to_htmlpreview(cnxml)
            with open(os.path.join(self.save_dir, 'index.xhtml'),
                      'w') as index:
                index.write(html)

            cnxml = clean_cnxml(cnxml)
            validate_cnxml(cnxml)

        except ConversionError as e:
            return render_conversionerror(self.request, e.msg)

        except Exception:
            tb = traceback.format_exc()
            self.write_traceback_to_zipfile(tb, form)
            templatePath = 'templates/error.pt'
            response = {'traceback': tb}
            if ('title' in self.request.session):
                del self.request.session['title']
            return render_to_response(templatePath,
                                      response,
                                      request=self.request)

        self.request.session.flash(self.message)
        return HTTPFound(location=self.request.route_url(self.nextStep()))
    def process(self, zip_filename):
        try:
            self.zip_archive = zipfile.ZipFile(zip_filename, 'r')

            # Unzip into transform directory
            self.zip_archive.extractall(path=self.save_dir)

            # Rename ZIP file so that the user can download it again
            os.rename(zip_filename,
                      os.path.join(self.save_dir, 'upload.zip'))

            # Read CNXML
            with open(os.path.join(self.save_dir, 'index.cnxml'), 'rt') as fp:
                cnxml = fp.read()

            # Convert the CNXML to XHTML for preview
            html = cnxml_to_htmlpreview(cnxml)
            with open(os.path.join(self.save_dir, 'index.xhtml'), 'w') as index:
                index.write(html)

            cnxml = clean_cnxml(cnxml)
            validate_cnxml(cnxml)

        except ConversionError as e:
            return render_conversionerror(self.request, e.msg)

        except Exception:
            tb = traceback.format_exc()
            self.write_traceback_to_zipfile(tb)
            templatePath = 'templates/error.pt'
            response = {'traceback': tb}
            if('title' in self.request.session):
                del self.request.session['title']
            return render_to_response(templatePath, response, request=self.request)

        self.request.session.flash(self.message)
        return HTTPFound(location=self.request.route_url(self.nextStep()))
    def process(self, form):
        try:
            url = form.data['url_text']

            # Build a regex for Google Docs URLs
            regex = re.compile(
                "^https:\/\/docs\.google\.com\/.*document\/[^\/]\/([^\/]+)\/")
            r = regex.search(url)

            # Take special action for Google Docs URLs
            if r is not None:
                gdocs_resource_id = r.groups()[0]
                http = httplib2.Http()
                http.follow_redirects = False
                try:
                    resp, html = http.request(
                        'https://docs.google.com/document/d/%s/export?format=html&confirm=no_antivirus' % gdocs_resource_id)
                except HttpError:
                    pass
                else:
                    # Check that status was OK, google docs sends a redirect to a login
                    # page if not.
                    if resp.status / 100 == 2:
                        # Get the title (does not work anymore)
                        title = 'Untitled Google Document'

                        # Process it
                        P = GoogleDocProcessor(self.request)
                        return P.process_gdocs_resource(html, title, form)
                self.request.session.flash('Failed to convert google document')
                return HTTPFound(location=self.request.route_url('choose'))
            else:
                # download html:
                # Simple urlopen() will fail on mediawiki websites eg. Wikipedia!
                import_opener = urllib2.build_opener()
                import_opener.addheaders = [('User-agent', 'Mozilla/5.0')]
                import_request = import_opener.open(url)
                html = import_request.read()

                # transformation
                cnxml, objects, html_title = htmlsoup_to_cnxml(
                        html, bDownloadImages=True, base_or_source_url=url)
                self.request.session['title'] = html_title

                cnxml = clean_cnxml(cnxml)
                save_cnxml(self.save_dir, cnxml, objects.items())

                # Keep the info we need for next uploads.  Note that
                # this might kill the ability to do multiple tabs in
                # parallel, unless it gets offloaded onto the form
                # again.
                self.request.session['filename'] = "HTML Document"

                validate_cnxml(cnxml)

        except ConversionError as e:
            return render_conversionerror(self.request, e.msg)

        except Exception:
            tb = traceback.format_exc()
            self.write_traceback_to_zipfile(tb, form)
            templatePath = 'templates/error.pt'
            response = {'traceback': tb}
            if('title' in self.request.session):
                del self.request.session['title']
            return render_to_response(templatePath, response, request=self.request)

        self.request.session.flash(self.message)
        return HTTPFound(location=self.request.route_url(self.nextStep()))
Exemple #12
0
    def process(self, form):
        try:
            url = form.data['url_text']

            # Build a regex for Google Docs URLs
            regex = re.compile(
                "^https:\/\/docs\.google\.com\/.*document\/[^\/]\/([^\/]+)\/")
            r = regex.search(url)

            # Take special action for Google Docs URLs
            if r is not None:
                gdocs_resource_id = r.groups()[0]
                http = httplib2.Http()
                http.follow_redirects = False
                try:
                    resp, html = http.request(
                        'https://docs.google.com/document/d/%s/export?format=html&confirm=no_antivirus'
                        % gdocs_resource_id)
                    resp2, kix = http.request(
                        'https://docs.google.com/feeds/download/documents/export/Export?id=%s&exportFormat=kix'
                        % gdocs_resource_id)
                except HttpError:
                    pass
                else:
                    # Check that status was OK, google docs sends a redirect to a login
                    # page if not.
                    if resp.status / 100 == 2:
                        # Get the title
                        title = 'Untitled Google Document'

                        # Process it
                        P = GoogleDocProcessor(self.request)
                        return P.process_gdocs_resource(html, title, form, kix)
                self.request.session.flash('Failed to convert google document')
                return HTTPFound(location=self.request.route_url('choose'))
            else:
                # download html:
                # Simple urlopen() will fail on mediawiki websites eg. Wikipedia!
                import_opener = urllib2.build_opener()
                import_opener.addheaders = [('User-agent', 'Mozilla/5.0')]
                import_request = import_opener.open(url)
                html = import_request.read()

                # transformation
                cnxml, objects, html_title = htmlsoup_to_cnxml(
                    html, bDownloadImages=True, base_or_source_url=url)
                self.request.session['title'] = html_title

                cnxml = clean_cnxml(cnxml)
                save_cnxml(self.save_dir, cnxml, objects.items())

                # Keep the info we need for next uploads.  Note that
                # this might kill the ability to do multiple tabs in
                # parallel, unless it gets offloaded onto the form
                # again.
                self.request.session['filename'] = "HTML Document"

                validate_cnxml(cnxml)

        except ConversionError as e:
            return render_conversionerror(self.request, e.msg)

        except Exception:
            tb = traceback.format_exc()
            self.write_traceback_to_zipfile(tb, form)
            templatePath = 'templates/error.pt'
            response = {'traceback': tb}
            if ('title' in self.request.session):
                del self.request.session['title']
            return render_to_response(templatePath,
                                      response,
                                      request=self.request)

        self.request.session.flash(self.message)
        return HTTPFound(location=self.request.route_url(self.nextStep()))