Esempio n. 1
0
 def testUnicodeHTML1(self):
     here = os.path.dirname(__file__)
     before = codecs.open(os.path.join(here, 'euc-jp.html'),
                          'r','euc-jp').read()
     assert isinstance(before, unicode)
     after = slimmer.html_slimmer(before)
     assert isinstance(after, unicode)
Esempio n. 2
0
 def testUnicodeHTML1(self):
     here = os.path.dirname(__file__)
     before = codecs.open(os.path.join(here, 'euc-jp.html'),
                          'r','euc-jp').read()
     assert isinstance(before, str)
     after = slimmer.html_slimmer(before)
     assert isinstance(after, str)
Esempio n. 3
0
 def testUnicodeHTML2(self):
     here = os.path.dirname(__file__)
     before = codecs.open(os.path.join(here, 'utf-8.html'),
                          'r','utf-8').read()
     assert isinstance(before, unicode)
     after = slimmer.html_slimmer(before)
     assert isinstance(after, unicode)
     expect = u'<html><p>\u0e2a\u0e27\u0e31\u0e2a\u0e14\u0e35\u0e04\u0e23\u0e31\u0e1a</p></html>'
     assert after == expect
Esempio n. 4
0
 def testUnicodeHTML2(self):
     here = os.path.dirname(__file__)
     before = codecs.open(os.path.join(here, 'utf-8.html'),
                          'r','utf-8').read()
     assert isinstance(before, str)
     after = slimmer.html_slimmer(before)
     assert isinstance(after, str)
     expect = '<html><p>\u0e2a\u0e27\u0e31\u0e2a\u0e14\u0e35\u0e04\u0e23\u0e31\u0e1a</p></html>'
     assert after == expect
Esempio n. 5
0
def main(url, language):
    from newsman.processor import simplr

    title, content, images = simplr.convert(url, language)
    #import re
    #a = re.sub(">\s+<", "><", unicode(content))
    from slimmer import html_slimmer

    content = html_slimmer(content)
    print "--------------------------------------------------------------------"
    print str(content)
Esempio n. 6
0
def main(url, language):
    from newsman.processor import simplr

    title, content, images = simplr.convert(url, language)
    #import re
    #a = re.sub(">\s+<", "><", unicode(content))
    from slimmer import html_slimmer

    content = html_slimmer(content)
    print "--------------------------------------------------------------------"
    print str(content)
Esempio n. 7
0
def write_page(path, template, title, **args):
    if not title:
        title = 'Under The Radar'
    else:
        title = 'Under The Radar: ' + title

    with open_out(path) as f:
        f.write(
            slimmer.html_slimmer(
                templates.get_template('index.html').render(
                    content=templates.get_template(template).render(args),
                    title=title)))
Esempio n. 8
0
 def render(self, context):
     code = self.nodelist.render(context)
     if self.format == 'css':
         return css_slimmer(code)
     elif self.format in ('js', 'javascript'):
         return js_slimmer(code)
     elif self.format == 'html':
         return html_slimmer(code)
     else:
         format = guessSyntax(code)
         if format:
             self.format = format
             return self.render(context)
         
     return code
Esempio n. 9
0
    def render(self, context):
        code = self.nodelist.render(context)
        if self.format == 'css':
            return css_slimmer(code)
        elif self.format in ('js', 'javascript'):
            return js_slimmer(code)
        elif self.format == 'html':
            return html_slimmer(code)
        else:
            format = guessSyntax(code)
            if format:
                self.format = format
                return self.render(context)

        return code
Esempio n. 10
0
 def render(self, context):
     code = self.nodelist.render(context)
     if slimmer is None:
         return code
     
     if self.format not in ('css','js','html','xhtml'):
         self.format = guessSyntax(code)
         
     if self.format == 'css':
         return css_slimmer(code)
     elif self.format in ('js', 'javascript'):
         return js_slimmer(code)
     elif self.format == 'xhtml':
         return xhtml_slimmer(code)
     elif self.format == 'html':
         return html_slimmer(code)
         
     return code
Esempio n. 11
0
    def render(self, context):
        code = self.nodelist.render(context)
        if slimmer is None:
            return code

        if self.format not in ('css','js','html','xhtml'):
            self.format = slimmer.guessSyntax(code)

        if self.format == 'css':
            return slimmer.css_slimmer(code)
        elif self.format in ('js', 'javascript'):
            return slimmer.js_slimmer(code)
        elif self.format == 'xhtml':
            return slimmer.xhtml_slimmer(code)
        elif self.format == 'html':
            return slimmer.html_slimmer(code)
        else:
            raise TemplateSyntaxError("Unrecognized format for slimming content")

        return code
Esempio n. 12
0
    def render(self, context):
        code = self.nodelist.render(context)
        if slimmer is None:
            return code

        if self.format not in ("css", "js", "html", "xhtml"):
            self.format = slimmer.guessSyntax(code)

        if self.format == "css":
            return slimmer.css_slimmer(code)
        elif self.format in ("js", "javascript"):
            return slimmer.js_slimmer(code)
        elif self.format == "xhtml":
            return slimmer.xhtml_slimmer(code)
        elif self.format == "html":
            return slimmer.html_slimmer(code)
        else:
            raise TemplateSyntaxError("Unrecognized format for slimming content")

        return code
Esempio n. 13
0
    def render(self, context):
        code = self.nodelist.render(context)
        if slimmer is None:
            return code

        if self.format not in ('css','js','html','xhtml'):
            self.format = slimmer.guessSyntax(code)

        if self.format == 'css':
            return slimmer.css_slimmer(code)
        elif self.format in ('js', 'javascript'):
            return slimmer.js_slimmer(code)
        elif self.format == 'xhtml':
            return slimmer.xhtml_slimmer(code)
        elif self.format == 'html':
            return slimmer.html_slimmer(code)
        else:
            raise TemplateSyntaxError("Unrecognized format for slimming content")

        return code
Esempio n. 14
0
def find_images(content=None, referer=None):
    """
    find out all images from content and its size info
    """
    if not content:
        logger.error('Content/HTML is found VOID!')
        return None, content

    try:
        if isinstance(content, str) or isinstance(content, unicode):
            soup = BeautifulSoup(content.decode('utf-8', 'ignore'))
            normalized_images = []

            element_replaced = False
            for image in soup.findAll('img'):
                if image.get('src'):
                    normalized_image = find_image(image.get('src'), referer)
                    if normalized_image:
                        # replace original image link with clean and (local)
                        # copy
                        if 'original_url' in normalized_image and \
                                normalized_image['original_url']:
                            image['src'] = str(normalized_image['url'])
                            element_replaced = True
                        normalized_images.append(normalized_image)

            content_new = soup.prettify(encoding='utf-8')
            if element_replaced and content_new:
                content = str(
                    html_slimmer(urllib2.unquote(
                        hparser.unescape(content_new))))
            return normalized_images, content
        else:
            logger.info("Wrong format %s" % content)
            return None, content
    except Exception as k:
        logger.error("Problem [%s] Source [%s]" % (str(k), content))
        return None, content
Esempio n. 15
0
def find_images(content=None, referer=None):
    """
    find out all images from content and its size info
    """
    if not content:
        logger.error('Content/HTML is found VOID!')
        return None, content

    try:
        if isinstance(content, str) or isinstance(content, unicode):
            soup = BeautifulSoup(content.decode('utf-8', 'ignore'))
            normalized_images = []

            element_replaced = False
            for image in soup.findAll('img'):
                if image.get('src'):
                    normalized_image = find_image(image.get('src'), referer)
                    if normalized_image:
                        # replace original image link with clean and (local)
                        # copy
                        if 'original_url' in normalized_image and \
                                normalized_image['original_url']:
                            image['src'] = str(normalized_image['url'])
                            element_replaced = True
                        normalized_images.append(normalized_image)

            content_new = soup.prettify(encoding='utf-8')
            if element_replaced and content_new:
                content = str(
                    html_slimmer(
                        urllib2.unquote(hparser.unescape(content_new))))
            return normalized_images, content
        else:
            logger.info("Wrong format %s" % content)
            return None, content
    except Exception as k:
        logger.error("Problem [%s] Source [%s]" % (str(k), content))
        return None, content
Esempio n. 16
0
def convert(language="en",
            title=None,
            link=None,
            updated=None,
            feed=None,
            transcoder="chengdujin",
            relative_path=None,
            stdout=False):
    """
    select a transcoder
    send the link
    gather the data
    combine them with the template
    generate paths
    return news and images
    * stdout is to print result directly, no saving to physical disk related
    * stdout default value False
    """
    if not language or not link:
        logger.error('Method malformed! language: %s link: %s' %
                     (language, link))
        if not stdout:
            return None, None, None, None
        else:
            return None, None

    try:
        link_clean = _preprocess(link)
        if link_clean:
            # this wont suck
            transcoders = _organize_transcoders(transcoder)
            title_new, content, images = _transcode(link_clean, transcoders,
                                                    language)
            # remove null content
            content = content.strip() if content else None

            # in case no title is found from feed information
            if not title:
                title = title_new

            if content and title:
                # slimmer the content
                content = html_slimmer(content)
                if not stdout:
                    # embed content in template
                    news = _compose(language, title, updated, feed,
                                    _sanitize(content), images)
                    if news:
                        # create web/local path
                        web_path, local_path = _save(news, relative_path)
                        if web_path:
                            # the FINAL return
                            return web_path, local_path, content, images
                        else:
                            if not stdout:
                                return None, None, None, None
                            else:
                                return None, None
                    else:
                        logger.error(
                            'Cannot combine content with the template!')
                        if not stdout:
                            return None, None, None, None
                        else:
                            return None, None
                else:
                    return title, content
            else:
                if not content:
                    logger.info('Transcoder %s failed for %s' %
                                (transcoder, link_clean))
                else:
                    logger.info('Cannot find title for %s' % link_clean)

                if not stdout:
                    # original link is returned as transcoded path
                    logger.info('Original link %s is used as transcoded path')
                    return link_clean, None, None, None
                else:
                    return None, None
        else:
            logger.error('Link [clean %s] [original %s] cannot be parsed' %
                         (link_clean, link))
            if not stdout:
                return None, None, None, None
            else:
                return None, None
    except Exception as k:
        logger.error(str(k))
        if not stdout:
            return None, None, None, None
        else:
            return None, None
    for script in soup.findAll('script'):
        src = script.get('src')
        if (src != None):
            all_script += urllib.urlopen(src).read()
        else:
            all_script += script.text
        html = html.replace(str(script), '')

    total_size += len(all_script.encode('utf-8'))
    all_script += '</script>'
    all_minified_script = jsmin(all_script)

    html = html.replace('</body>', '%s</body>' % all_minified_script)

# Minify the resulting html
minified_html = html_slimmer(html)

# Create cpp header string
cpp_string = '#ifndef __webpage_h__\n'
cpp_string += '#define _webpage_h__\n'
cpp_string += 'PROGMEM extern const String html = R"~(' + minified_html.replace(
    '\n', '') + ')~";\n'
cpp_string += '#endif'
absolute_output_path = os.path.join(current_directory, output_path)
amount_of_bytes = 0
with open(absolute_output_path, 'w+') as output_file:
    # Write string to header file
    output_file.write(cpp_string)
    output_file.flush()
    # Calculate the size of our file
    amount_of_bytes = os.path.getsize(absolute_output_path)
Esempio n. 18
0
 def generate(self, *args, **kw):
     return html_slimmer(self.template.generate(*args, **kw))
Esempio n. 19
0
File: deploy.py Progetto: jccode/NHW
def minify_html_proc(content):
    try:
        return htmlmin.minify(content, remove_comments=True, remove_empty_space=True).encode('utf-8')
    except:
        return html_slimmer(content.strip().replace('\n',' ').replace('\t',' ').replace('\r',' '))
Esempio n. 20
0
def convert(language="en", title=None, link=None, updated=None, feed=None,
            transcoder="chengdujin", relative_path=None, stdout=False):
    """
    select a transcoder
    send the link
    gather the data
    combine them with the template
    generate paths
    return news and images
    * stdout is to print result directly, no saving to physical disk related
    * stdout default value False
    """
    if not language or not link:
        logger.error('Method malformed! language: %s link: %s' %
                     (language, link))
        if not stdout:
            return None, None, None, None
        else:
            return None, None

    try:
        link_clean = _preprocess(link)
        if link_clean:
            # this wont suck
            transcoders = _organize_transcoders(transcoder)
            title_new, content, images = _transcode(
                link_clean, transcoders, language)
            # remove null content
            content = content.strip() if content else None

            # in case no title is found from feed information
            if not title:
                title = title_new

            if content and title:
                # slimmer the content
                content = html_slimmer(content)
                if not stdout:
                    # embed content in template
                    news = _compose(
                        language, title, updated, feed, _sanitize(content),
                        images)
                    if news:
                        # create web/local path
                        web_path, local_path = _save(news, relative_path)
                        if web_path:
                            # the FINAL return
                            return web_path, local_path, content, images
                        else:
                            if not stdout:
                                return None, None, None, None
                            else:
                                return None, None
                    else:
                        logger.error(
                            'Cannot combine content with the template!')
                        if not stdout:
                            return None, None, None, None
                        else:
                            return None, None
                else:
                    return title, content
            else:
                if not content:
                    logger.info('Transcoder %s failed for %s' %
                                (transcoder, link_clean))
                else:
                    logger.info('Cannot find title for %s' % link_clean)

                if not stdout:
                    # original link is returned as transcoded path
                    logger.info('Original link %s is used as transcoded path')
                    return link_clean, None, None, None
                else:
                    return None, None
        else:
            logger.error(
                'Link [clean %s] [original %s] cannot be parsed' % (
                    link_clean, link))
            if not stdout:
                return None, None, None, None
            else:
                return None, None
    except Exception as k:
        logger.error(str(k))
        if not stdout:
            return None, None, None, None
        else:
            return None, None
Esempio n. 21
0
 def __slimmer(self, htmlFile):
     htmlFile = list(
         filter(
             lambda x: x[0:16] != '<link href="/css' and x[0:11] !=
             '<script src', htmlFile))
     return html_slimmer("".join(htmlFile), True)
Esempio n. 22
0
			# try to handle additional types unknown to mimetypes.guess_type()			
			if fileext == ".tpl":
				mimetype = "text/html"
			else:
				mimetype = "application/octet-stream"
				
		# get raw file data
		with open(filepath, "rb") as fr:
			filedata = fr.read()
		
		oldfilesize = len(filedata)
		
		# can I remove CR, LF, Tabs?
		if do_slimmer:			
			if fileext in [".tpl", ".html", ".htm"]:
				filedata = slimmer.html_slimmer(filedata)
			elif fileext in [".css"]:
				filedata = slimmer.css_slimmer(filedata)
			elif fileext in [".js"]:
				filedata = slimmer.js_slimmer(filedata)			

		print "Adding {} mimetype = ({}) size = {}  reduced size = {}".format(filename, mimetype, oldfilesize, len(filedata))
				
		# filename length, mime tpye length, file content length
		fw.write(struct.pack("<BBH", len(filename) + 1, len(mimetype) + 1, len(filedata)))
				
		# filename data
		fw.write(struct.pack(str(len(filename)) + "sB", filename, 0x00))
		
		# mime type data
		fw.write(struct.pack(str(len(mimetype)) + "sB", mimetype, 0x00))
Esempio n. 23
0
import os,sys
import json
import datetime
import time
import calendar
import arrow
import os
#Y-m-d H:i:s ms tz 
#print datetime.timedelta(3600*8)

#print result
html=""
for line in open("test.html"):
    html+=line
#print html


import slimmer

html=slimmer.js_slimmer(html)
html=slimmer.html_slimmer(html)
print html



Esempio n. 24
0
            # try to handle additional types unknown to mimetypes.guess_type()
            if fileext == ".tpl":
                mimetype = "text/html"
            else:
                mimetype = "application/octet-stream"

        # get raw file data
        with open(filepath, "rb") as fr:
            filedata = fr.read()

        oldfilesize = len(filedata)

        # can I remove CR, LF, Tabs?
        if do_slimmer:
            if fileext in [".tpl", ".html", ".htm"]:
                filedata = slimmer.html_slimmer(filedata)
            elif fileext in [".css"]:
                filedata = slimmer.css_slimmer(filedata)
            elif fileext in [".js"]:
                filedata = slimmer.js_slimmer(filedata)

        print "Adding {} mimetype = ({}) size = {}  reduced size = {}".format(
            filename, mimetype, oldfilesize, len(filedata))

        # flags
        fw.write(struct.pack("B", 0))

        # filename length, mime tpye length, file content length
        fw.write(
            struct.pack("<BBH",
                        len(filename) + 1,
Esempio n. 25
0
import slimmer
import sys

if __name__ == '__main__':
    for fpath in sys.argv[1:]:
        with open(fpath, 'r') as fh:
            txt = fh.read()

        if fpath.endswith('.html'):
            txt = slimmer.html_slimmer(txt)
        elif fpath.endswith('.css'):
            txt = slimmer.css_slimmer(txt)
        elif fpath.endswith('.js'):
            txt = slimmer.js_slimmer(txt)
        else:
            print('unknown format of', fpath)

        with open(fpath, 'w') as fh:
            fh.write(txt)