def save_cnxml(save_dir, cnxml, files):
    # write CNXML output
    save_and_backup_file(save_dir, 'index.cnxml', cnxml)

    # write files
    for filename, content in files:
        filename = os.path.join(save_dir, filename)
        f = open(filename, 'wb') # write binary, important!
        f.write(content)
        f.close()

    # we generate the preview and save the error
    conversionerror = None
    try:
        htmlpreview = cnxml_to_htmlpreview(cnxml)
    except libxml2.parserError:
        conversionerror = traceback.format_exc()

    # Zip up all the files. This is done now, since we have all the files
    # available, and it also allows us to post a simple download link.
    # Note that we cannot use zipfile as context manager, as that is only
    # available from python 2.7
    # TODO: Do a filesize check xxxx
    if conversionerror is None:
        save_and_backup_file(save_dir, 'index.html', htmlpreview)
        save_zip(save_dir, cnxml, htmlpreview, files)
    else:
        save_zip(save_dir, cnxml, None, files)
        raise ConversionError(conversionerror)
예제 #2
0
    def _download_module(self, module_url):
        request = self.request
        session = request.session
        conn = sword2cnx.Connection(session['login'].service_document_url,
                                    user_name=session['login'].username,
                                    user_pass=session['login'].password,
                                    always_authenticate=True,
                                    download_service_document=False)

        parts = urlparse.urlsplit(module_url)
        path = parts.path.split('/')
        path = path[:path.index('sword')]
        module_url = '%s://%s%s' % (parts.scheme, parts.netloc, '/'.join(path))

        # example: http://cnx.org/Members/user001/m17222/sword/editmedia
        zip_file = conn.get_cnx_module(module_url = module_url,
                                       packaging = 'zip')
         
        save_dir = get_save_dir(request)
        if save_dir is None:
            request.session['upload_dir'], save_dir = create_save_dir(request)
        extract_to_save_dir(zip_file, save_dir)

        cnxml_file = open(os.path.join(save_dir, 'index.cnxml'), 'rb')
        cnxml = cnxml_file.read()
        cnxml_file.close()
        conversionerror = None
        try:
            htmlpreview = cnxml_to_htmlpreview(cnxml)
            save_and_backup_file(save_dir, 'index.html', htmlpreview)
            files = get_files(save_dir)
            save_zip(save_dir, cnxml, htmlpreview, files)
        except libxml2.parserError:
            conversionerror = traceback.format_exc()
            raise ConversionError(conversionerror)
예제 #3
0
def process_import(save_dir_path, original_filepath, filename, download_url):
    # convert from other office format to odt
    odt_filename = '%s.odt' % filename
    odt_filepath = str(os.path.join(save_dir_path, odt_filename))
    # run jod service
    converter = JOD.DocumentConverterClient()
    try:
        converter.convert(original_filepath, 'odt', odt_filepath)
    except Exception as e:
        raise e

    # check file existed
    try:
        fp = open(odt_filepath, 'r')
        fp.close()
    except IOError as io:
        # TODO: raise exception
        raise io

    # convert to cnxml
    tree, files, errors = transform(odt_filepath)
    cnxml = clean_cnxml(etree.tostring(tree))

    # convert to html
    html = cnxml_to_htmlpreview(cnxml)

    # produce zipfile
    ram = StringIO()
    zip_archive = zipfile.ZipFile(ram, 'w')
    zip_archive.writestr('index.html', html)
    for fname, fdata in files.items():
        zip_archive.writestr(fname, fdata)
    zip_archive.close()

    # save zipfile
    zip_file_path = os.path.join(save_dir_path, '%s.zip' % filename)
    if os.path.exists(zip_file_path):
        os.rename(zip_file_path, zip_file_path + '~')
    f = open(zip_file_path, 'wb')
    f.write(ram.getvalue())
    f.close()

    return download_url
예제 #4
0
    def process(self, zip_filename):
        try:
            self.zip_archive = zipfile.ZipFile(zip_filename, 'r')

            # Unzip into transform directory
            self.zip_archive.extractall(path=self.save_dir)

            # Rename ZIP file so that the user can download it again
            os.rename(zip_filename, os.path.join(self.save_dir, 'upload.zip'))

            # Read CNXML
            with open(os.path.join(self.save_dir, 'index.cnxml'), 'rt') as fp:
                cnxml = fp.read()

            # Convert the CNXML to XHTML for preview
            html = cnxml_to_htmlpreview(cnxml)
            with open(os.path.join(self.save_dir, 'index.xhtml'),
                      'w') as index:
                index.write(html)

            cnxml = clean_cnxml(cnxml)
            validate_cnxml(cnxml)

        except ConversionError as e:
            return render_conversionerror(self.request, e.msg)

        except Exception:
            tb = traceback.format_exc()
            self.write_traceback_to_zipfile(tb, form)
            templatePath = 'templates/error.pt'
            response = {'traceback': tb}
            if ('title' in self.request.session):
                del self.request.session['title']
            return render_to_response(templatePath,
                                      response,
                                      request=self.request)

        self.request.session.flash(self.message)
        return HTTPFound(location=self.request.route_url(self.nextStep()))
def save_cnxml(save_dir, cnxml, files):
    # write CNXML output
    save_and_backup_file(save_dir, 'index.cnxml', cnxml)

    # write files
    for filename, content in files:
        filename = os.path.join(save_dir, filename)
        f = open(filename, 'wb') # write binary, important!
        f.write(content)
        f.close()

    # we generate the preview and save the error 
    conversionerror = None
    try:
        htmlpreview = cnxml_to_htmlpreview(cnxml)
    except libxml2.parserError:
        conversionerror = traceback.format_exc()     

    # Zip up all the files. This is done now, since we have all the files
    # available, and it also allows us to post a simple download link.
    # Note that we cannot use zipfile as context manager, as that is only
    # available from python 2.7
    # TODO: Do a filesize check xxxx
    ram = StringIO()
    zip_archive = zipfile.ZipFile(ram, 'w')
    zip_archive.writestr('index.cnxml', cnxml)
    if not conversionerror:
        save_and_backup_file(save_dir, 'index.xhtml', htmlpreview)
        zip_archive.writestr('index.xhtml', htmlpreview)
    for filename, fileObj in files:
        zip_archive.writestr(filename, fileObj)
    zip_archive.close()

    zip_filename = os.path.join(save_dir, 'upload.zip')
    save_and_backup_file(save_dir, zip_filename, ram.getvalue(), mode='wb')

    if conversionerror:
        raise ConversionError(conversionerror)
    def process(self, zip_filename):
        try:
            self.zip_archive = zipfile.ZipFile(zip_filename, 'r')

            # Unzip into transform directory
            self.zip_archive.extractall(path=self.save_dir)

            # Rename ZIP file so that the user can download it again
            os.rename(zip_filename,
                      os.path.join(self.save_dir, 'upload.zip'))

            # Read CNXML
            with open(os.path.join(self.save_dir, 'index.cnxml'), 'rt') as fp:
                cnxml = fp.read()

            # Convert the CNXML to XHTML for preview
            html = cnxml_to_htmlpreview(cnxml)
            with open(os.path.join(self.save_dir, 'index.xhtml'), 'w') as index:
                index.write(html)

            cnxml = clean_cnxml(cnxml)
            validate_cnxml(cnxml)

        except ConversionError as e:
            return render_conversionerror(self.request, e.msg)

        except Exception:
            tb = traceback.format_exc()
            self.write_traceback_to_zipfile(tb)
            templatePath = 'templates/error.pt'
            response = {'traceback': tb}
            if('title' in self.request.session):
                del self.request.session['title']
            return render_to_response(templatePath, response, request=self.request)

        self.request.session.flash(self.message)
        return HTTPFound(location=self.request.route_url(self.nextStep()))
def choose_view(request):
    check_login(request)

    templatePath = 'templates/choose.pt'

    form = Form(request, schema=UploadSchema)
    field_list = [('upload', 'File')]

    # clear the session
    if 'transformerror' in request.session:
        del request.session['transformerror']
    if 'title' in request.session:
        del request.session['title']

    # Check for successful form completion
    if form.validate():
        try: # Catch-all exception block
            # Create a directory to do the conversions
            now_string = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
            # TODO: This has a good chance of being unique, but even so...
            temp_dir_name = '%s-%s' % (request.session['username'], now_string)
            save_dir = os.path.join(
                request.registry.settings['transform_dir'],
                temp_dir_name
                )
            os.mkdir(save_dir)

            # Keep the info we need for next uploads.  Note that this
            # might kill the ability to do multiple tabs in parallel,
            # unless it gets offloaded onto the form again.
            request.session['upload_dir'] = temp_dir_name
            if form.data['upload'] is not None:
                request.session['filename'] = form.data['upload'].filename

            # Google Docs Conversion
            # if we have a Google Docs ID and Access token.
            if form.data['gdocs_resource_id']:
                gdocs_resource_id = form.data['gdocs_resource_id']
                gdocs_access_token = form.data['gdocs_access_token']

                form.data['gdocs_resource_id'] = None
                form.data['gdocs_access_token'] = None
                
                (request.session['title'], request.session['filename']) = \
                    process_gdocs_resource(save_dir, \
                                           gdocs_resource_id, \
                                           gdocs_access_token)

            # HTML URL Import:
            elif form.data.get('url_text'):
                url = form.data['url_text']

                form.data['url_text'] = None

                # Build a regex for Google Docs URLs
                regex = re.compile("^https:\/\/docs\.google\.com\/.*document\/[^\/]\/([^\/]+)\/")
                r = regex.search(url)

                # Take special action for Google Docs URLs
                if r:
                    gdocs_resource_id = r.groups()[0]
                    (request.session['title'], request.session['filename']) = \
                        process_gdocs_resource(save_dir, "document:" + gdocs_resource_id)
                else:
                    # download html:
                    #html = urllib2.urlopen(url).read() 
                    # Simple urlopen() will fail on mediawiki websites like e.g. Wikipedia!
                    import_opener = urllib2.build_opener()
                    import_opener.addheaders = [('User-agent', 'Mozilla/5.0')]
                    try:
                        import_request = import_opener.open(url)
                        html = import_request.read()

                        # transformation            
                        cnxml, objects, html_title = htmlsoup_to_cnxml(
                        html, bDownloadImages=True, base_or_source_url=url)
                        request.session['title'] = html_title

                        cnxml = clean_cnxml(cnxml)
                        save_cnxml(save_dir, cnxml, objects.items())

                        # Keep the info we need for next uploads.  Note that
                        # this might kill the ability to do multiple tabs in
                        # parallel, unless it gets offloaded onto the form
                        # again.
                        request.session['filename'] = "HTML Document"

                        validate_cnxml(cnxml)

                    except urllib2.URLError, e:
                        request['errors'] = ['The URL %s could not be opened' %url,]
                        response = {
                            'form': FormRenderer(form),
                            }
                        return render_to_response(templatePath, response, request=request)

            # Office, CNXML-ZIP or LaTeX-ZIP file
            else:
                # Save the original file so that we can convert, plus keep it.
                original_filename = os.path.join(
                    save_dir,
                    form.data['upload'].filename.replace(os.sep, '_'))
                saved_file = open(original_filename, 'wb')
                input_file = form.data['upload'].file
                shutil.copyfileobj(input_file, saved_file)
                saved_file.close()
                input_file.close()

                form.data['upload'] = None

                # Check if it is a ZIP file with at least index.cnxml or a LaTeX file in it
                try:
                    zip_archive = zipfile.ZipFile(original_filename, 'r')
                    is_zip_archive = ('index.cnxml' in zip_archive.namelist())
                    
                    # Do we have a latex file?
                    if not is_zip_archive:
                        # incoming latex.zip must contain a latex.tex file, where "latex" is the base name.
                        (latex_head, latex_tail) = os.path.split(original_filename)
                        (latex_root, latex_ext)  = os.path.splitext(latex_tail)
                        latex_basename = latex_root
                        latex_filename = latex_basename + '.tex'
                        is_latex_archive = (latex_filename in zip_archive.namelist())

                except zipfile.BadZipfile:
                    is_zip_archive = False
                    is_latex_archive = False

                # ZIP package from previous conversion
                if is_zip_archive:
                    # Unzip into transform directory
                    zip_archive.extractall(path=save_dir)

                    # Rename ZIP file so that the user can download it again
                    os.rename(original_filename, os.path.join(save_dir, 'upload.zip'))

                    # Read CNXML
                    with open(os.path.join(save_dir, 'index.cnxml'), 'rt') as fp:
                        cnxml = fp.read()

                    # Convert the CNXML to XHTML for preview
                    html = cnxml_to_htmlpreview(cnxml)
                    with open(os.path.join(save_dir, 'index.xhtml'), 'w') as index:
                        index.write(html)

                    cnxml = clean_cnxml(cnxml)
                    validate_cnxml(cnxml)
                
                # LaTeX
                elif is_latex_archive:
                    f = open(original_filename)
                    latex_archive = f.read()

                    # LaTeX 2 CNXML transformation
                    cnxml, objects = latex_to_cnxml(latex_archive, original_filename)

                    cnxml = clean_cnxml(cnxml)
                    save_cnxml(save_dir, cnxml, objects.items())
                    validate_cnxml(cnxml)

                # OOo / MS Word Conversion
                else:
                    # Convert from other office format to odt if needed
                    odt_filename = original_filename
                    filename, extension = os.path.splitext(original_filename)
                    if(extension != '.odt'):
                        odt_filename= '%s.odt' % filename
                        command = '/usr/bin/soffice -headless -nologo -nofirststartwizard "macro:///Standard.Module1.SaveAsOOO(' + escape_system(original_filename)[1:-1] + ',' + odt_filename + ')"'
                        os.system(command)
                        try:
                            fp = open(odt_filename, 'r')
                            fp.close()
                        except IOError as io:
                            raise ConversionError("%s not found" %
                                                  original_filename)
                    # Convert and save all the resulting files.

                    tree, files, errors = transform(odt_filename)
                    cnxml = clean_cnxml(etree.tostring(tree))
                    save_cnxml(save_dir, cnxml, files.items())

                    # now validate with jing
                    validate_cnxml(cnxml)