Python bodyfinder Examples, Products.PortalTransforms.libtransforms.utils.bodyfinder Python Examples

Example #1

0

Show file

File: office_com.py Project: plone/Products.PortalTransforms

 def html(self):
     htmlfile = open(self.fullname + '.htm', 'r')
     html = htmlfile.read()
     htmlfile.close()
     html = SafeHTML().scrub_html(html)
     body = bodyfinder(html)
     return body

Example #2

0

Show file

File: office_com.py Project: pabo3000/Products.PortalTransforms

 def html(self):
     htmlfile = open(self.fullname + '.htm', 'r')
     html = htmlfile.read()
     htmlfile.close()
     html = SafeHTML().scrub_html(html)
     body = bodyfinder(html)
     return body

Example #3

0

Show file

File: office_wvware.py Project: kkdhanesh/NBADEMO

 def html(self):
     htmlfile = open("%s/%s.html" % (self.tmpdir, self.__name__), 'r')
     html = htmlfile.read()
     htmlfile.close()
     html = scrubHTMLNoRaise(html)
     body = bodyfinder(html)
     return body

Example #4

0

Show file

File: office_com.py Project: CGTIC/Plone_SP

 def html(self):
     htmlfile = open(self.fullname + '.htm', 'r')
     html = htmlfile.read()
     htmlfile.close()
     html = scrubHTMLNoRaise(html)
     body = bodyfinder(html)
     return body

Example #5

0

Show file

File: office_uno.py Project: yuanpli/erp5

 def html(self):
     htmlfile = open(self.outputfile, 'r')
     html = htmlfile.read()
     htmlfile.close()
     html = scrubHTML(html)
     body = bodyfinder(html)
     return body

Example #6

0

Show file

 def html(self):
     htmlfile = open(self.fullname + '.htm', 'r')
     html = htmlfile.read()
     htmlfile.close()
     html = scrubHTMLNoRaise(html)
     body = bodyfinder(html)
     return body

Example #7

0

Show file

File: office_wvware.py Project: enfold-josh/Products.PortalTransforms

 def html(self):
     htmlfile = open("%s/%s.html" % (self.tmpdir, self.__name__), 'r')
     html = htmlfile.read()
     htmlfile.close()
     html = scrubHTML(html)
     body = bodyfinder(html)
     return body

Example #8

0

Show file

File: office_uno.py Project: pigaov10/plone4.3

 def html(self):
     htmlfile = open(self.outputfile, "r")
     html = htmlfile.read()
     htmlfile.close()
     html = scrubHTMLNoRaise(html)
     body = bodyfinder(html)
     return body

Example #9

0

Show file

File: office_com.py Project: kiorky/Products.PortalTransforms

 def html(self):
     htmlfile = open(self.fullname + ".htm", "r")
     html = htmlfile.read()
     htmlfile.close()
     html = scrubHTML(html)
     body = bodyfinder(html)
     return body

Example #10

0

Show file

File: office_wvware_gen.py Project: starzel/Products.AROfficeTransforms

 def _html(self):
     htmlfile = open(pjoin(self.tmpdir, self.__name__+".html"), 'r')
     html = htmlfile.read()
     htmlfile.close()
     #html = scrubHTML(html)
     body = bodyfinder(html)
     body = xmltag + body
     return body

Example #11

0

Show file

File: office_wvware_gen.py Project: collective/Products.AROfficeTransforms

 def _html(self):
     htmlfile = open(pjoin(self.tmpdir, self.__name__ + ".html"), 'r')
     html = htmlfile.read()
     htmlfile.close()
     #html = scrubHTML(html)
     body = bodyfinder(html)
     body = xmltag + body
     return body

Example #12

0

Show file

File: powerpoint_ppthtml.py Project: hermancaldara/buildout_portal_iff_plone3

 def _html(self):
     htmlfile = open(pjoin(self.tmpdir, self.__name__+".html"), 'r')
     html = htmlfile.read()
     if process_double_encoding:
         html = noDoubleEncoding(html)
     htmlfile.close()
     html = scrubHTML(html)
     body = bodyfinder(html)
     return body

Example #13

0

Show file

 def _html(self):
     htmlfile = open(pjoin(self.tmpdir, self.__name__+".html"), 'r')
     html = htmlfile.read()
     if process_double_encoding:
         html = noDoubleEncoding(html)
     htmlfile.close()
     html = scrubHTML(html)
     body = bodyfinder(html)
     return body

Example #14

0

Show file

File: tika.py Project: collective/collective.tika

class document(commandtransform):

    file_ext = ''

    def __init__(self, name, data, exec_prefix=None):
        """ Initialization: create tmp work directory and copy the
        document into a file"""
        tika_path = 'tika'
        if exec_prefix is not None:
            tika_path = os.path.join(exec_prefix, 'tika-bin')
            if not os.path.exists(tika_path):
                tika_path = os.path.join(exec_prefix, 'tika')
                if not os.path.exists(tika_path):
                    log.warn('no tika-bin or tika found in exec-prefix: %s' %
                             tika_path)
                    tika_path = 'tika'

        commandtransform.__init__(self, name, binary=tika_path)
        name = self.name()
        if not name.endswith(self.file_ext):
            name = name + self.file_ext
        self.tmpdir, self.fullname = self.initialize_tmpdir(data,
                                                            filename=name)

    def convert(self):
        "Convert the document"
        tmpdir = self.tmpdir
        htmlfile = open("%s/%s.html" % (self.tmpdir, self.__name__), 'w')

        # for windows, install wvware from GnuWin32 at C:\Program Files\GnuWin32\bin
        # you can use:
        # wvware.exe -c ..\share\wv\wvHtml.xml --charset=utf-8 -d d:\temp d:\temp\test.doc > test.html

        if os.name == 'posix':
            try:
                subprocess.check_call([self.binary, self.fullname],
                                      stdout=htmlfile,
                                      cwd=tmpdir)
            except subprocess.CalledProcessError as cpe:
                log.warn('Could not transform %s: %s' % (self.fullname, cpe))
        htmlfile.close()

    def html(self):
        htmlfile = open("%s/%s.html" % (self.tmpdir, self.__name__), 'r')
        html = htmlfile.read()
        htmlfile.close()
        html = safe_unicode(html)
        try:
            html = laundryutils.sanitize(html, HTMLCleaner)
        except Exception, err:
            html = ''
        # scrubHTML is EVIL, takes ages!
        #html = scrubHTML(html)
        body = bodyfinder(html)
        body = body.encode('utf-8')
        return body

Example #15

0

Show file

File: excel_to_html.py Project: collective/collective.transform.ppt_xl

 def convert(self, data, cache, **kwargs):
     tmp = NamedTemporaryFile()
     tmp.write(data)
     tmp.flush()
     cmd = ['xlhtml', tmp.name]
     p = Popen(cmd, stdout=PIPE, stderr=PIPE)
     stdout, stderr = p.communicate()
     body = bodyfinder(stdout)
     cache.setData(body)
     return cache

Example #16

0

Show file

    def convert(self, data, cache, **kwargs):
        kwargs['filename'] = 'unknown.pdf'

        tmpdir, fullname = self.initialize_tmpdir(data, **kwargs)
        html = self.invokeCommand(tmpdir, fullname)
        path, images = self.subObjects(tmpdir)
        objects = {}
        if images:
            self.fixImages(path, images, objects)
        self.cleanDir(tmpdir)
        cache.setData(bodyfinder(html))
        cache.setSubObjects(objects)
        return cache

Example #17

0

Show file

File: excel_xlhtml.py Project: hermancaldara/buildout_portal_iff_plone3

 def _html(self):
     try:
         htmlfile = open(pjoin(self.tmpdir, self.__name__+".html"), 'r')
         html = htmlfile.read()
     except IOError:
         return ""
     if process_double_encoding :
         html = noDoubleEncoding(html)
     htmlfile.close()
     #xlhtml gives verry complex html ; scrubHTML takes soooo long !
     #html = scrubHTML(html)
     body = bodyfinder(html)
     return body

Example #18

0

Show file

File: rtf_to_html.py Project: CGTIC/Plone_SP

    def convert(self, data, cache, **kwargs):
        kwargs['filename'] = 'unknow.rtf'

        tmpdir, fullname = self.initialize_tmpdir(data, **kwargs)
        html = self.invokeCommand(tmpdir, fullname)
        path, images = self.subObjects(tmpdir)
        objects = {}
        if images:
            self.fixImages(path, images, objects)
        self.cleanDir(tmpdir)
        cache.setData(bodyfinder(html))
        cache.setSubObjects(objects)
        return cache

Example #19

0

Show file

def get_text_from_view(view_name):
    """
    Text get from a browser view template <body> tag
    """
    portal = api.portal.get()
    request = getattr(portal, "REQUEST", None)
    if request is not None:
        view = api.content.get_view(name=view_name, context=portal, request=request)
        if view is not None:
            text = bodyfinder(view.index()).strip()
            if not isinstance(text, text_type):
                text = text.decode("utf-8")
            return text
    return ""

Example #20

0

Show file

 def _html(self):
     try:
         htmlfile = open(pjoin(self.tmpdir, self.__name__ + ".html"), 'r')
         html = htmlfile.read()
     except IOError:
         return ""
     htmlfile.close()
     if process_double_encoding:
         # This operation can be very memory-consuming ...
         try:
             html = noDoubleEncoding(html)
         except MemoryError:
             return ""
     #xlhtml gives verry complex html ; scrubHTML takes soooo long !
     #html = scrubHTML(html)
     body = bodyfinder(html)
     return body

Example #21

0

Show file

File: excel_xlhtml.py Project: starzel/Products.AROfficeTransforms

 def _html(self):
     try:
         htmlfile = open(pjoin(self.tmpdir, self.__name__+".html"), 'r')
         html = htmlfile.read()
     except IOError:
         return ""
     htmlfile.close()
     if process_double_encoding :
         # This operation can be very memory-consuming ...
         try:
             html = noDoubleEncoding(html)
         except MemoryError:
             return ""
     #xlhtml gives verry complex html ; scrubHTML takes soooo long !
     #html = scrubHTML(html)
     body = bodyfinder(html)
     return body

Example #22

0

Show file

def get_default_text(context):
    """
    Text get from a browser view template <body> tag
    """
    portal = api.portal.get()
    view_name = 'default_gdpr_text'
    request = getattr(portal, 'REQUEST', None)
    if request is not None:
        view = api.content.get_view(
            name=view_name,
            context=portal,
            request=request
        )
        if view is not None:
            text = bodyfinder(view.index()).strip()
            if not isinstance(text, unicode):
                text = text.decode("utf-8")
            return text
    return u''

Example #23

0

Show file

File: pdf_to_html.py Project: CGTIC/Plone_SP

 def getData(self, couterr):
     return bodyfinder(couterr.read())

Example #24

0

Show file

 def getData(self, couterr):
     return bodyfinder(couterr.read())

Example #25

0

Show file

File: html_body.py Project: pigaov10/plone4.3

 def convert(self, orig, data, **kwargs):
     body = bodyfinder(orig)
     data.setData(body)
     return data

Example #26

0

Show file

File: html_body.py Project: kkdhanesh/NBADEMO

 def convert(self, orig, data, **kwargs):
     body = bodyfinder(orig)
     data.setData(body)
     return data