Ejemplo n.º 1
0
def html_minify(html_code, ignore_comments=True):
    html_code = force_decode(html_code)
    soup = BeautifulSoup(html_code)

    scripts = [ str(script) for script in soup.findAll(name='script') if len(script.text) > 0 ]

    for index, script in enumerate(scripts):
        html_code = html_code.replace(script, SCRIPT_PATTERN % index)

    soup = BeautifulSoup(html_code)

    if ignore_comments:
        [ comment.extract() for comment in soup.findAll(text=lambda text:isinstance(text, Comment)) ]

    html_code = str(soup)
    lines = html_code.split('\n')
    minified_lines = []
    last_line = '<>'

    for index, line in enumerate(lines):
        minified_line = line.strip()
        minified_lines.append(str(minified_line))

    content = "".join(minified_lines)

    for index, script in enumerate(scripts):
        content = content.replace(SCRIPT_PATTERN % index, script)

    if "DOCTYPE" not in content:
        content = "<!DOCTYPE html>%s" % content

    return content
Ejemplo n.º 2
0
def html_minify(html_code, ignore_comments=True):
    html_code = force_decode(html_code)
    soup = BeautifulSoup(html_code)
    exclude_tags = {}
    
    for tag in EXCLUDE_TAGS:
        exclude_tags[tag] = [ str(script) for script in soup.findAll(name=tag) if len(script.text) > 0 ]
        
        for index, script in enumerate(exclude_tags[tag]):
            html_code = html_code.replace(script, TAGS_PATTERN % (tag, index))

    soup = BeautifulSoup(html_code)

    if ignore_comments:
        [ comment.extract() for comment in soup.findAll(text=lambda text:isinstance(text, Comment)) ]

    html_code = str(soup)
    lines = html_code.split('\n')
    minified_lines = []
    last_line = '<>'

    for index, line in enumerate(lines):
        minified_line = line.strip()
        minified_lines.append(str(minified_line))

    content = "".join(minified_lines)
    
    for tag in EXCLUDE_TAGS:
        for index, script in enumerate(exclude_tags[tag]):
            content = content.replace(TAGS_PATTERN % (tag,index), script)

    if "DOCTYPE" not in content:
        content = "<!DOCTYPE html>%s" % content

    return content
Ejemplo n.º 3
0
def html_minify(html_code, ignore_comments=True):
    html_code = force_decode(html_code)
    soup = HtmlMinifyParser(html_code)
    exclude_tags = {}

    for tag in EXCLUDE_TAGS:
        exclude_tags[tag] = [str(script) for script in soup.findAll(name=tag) if len(script.text) > 0]

        for index, script in enumerate(exclude_tags[tag]):
            html_code = html_code.replace(script.decode('utf-8'), TAGS_PATTERN % (tag, index))

    soup = HtmlMinifyParser(html_code)

    if ignore_comments:
        [comment.extract() for comment in soup.findAll(text=lambda text:isinstance(text, Comment))]

    html_code = str(soup)
    lines = html_code.split('\n')
    minified_lines = []

    for index, line in enumerate(lines):
        minified_line = line.strip()

        # not in between two tags
        if not between_two_tags(minified_line, minified_lines, index):
            minified_line = ' %s' % minified_line

        minified_lines.append(str(minified_line))

    content = "".join(minified_lines)

    for tag in EXCLUDE_TAGS:
        for index, script in enumerate(exclude_tags[tag]):
            content = content.replace(TAGS_PATTERN % (tag, index), script)

    if ignore_comments:
        # remove css comments
        content = re.sub( r'\s*/\*\s*\*/', "$$HACK1$$", content)
        content = re.sub( r'/\*[\s\S]*?\*/', "", content)
        content = content.replace( "$$HACK1$$", '/**/')

    return content
Ejemplo n.º 4
0
def html_minify(html_code, ignore_comments=True):
    html_code = force_decode(html_code)
    soup = HtmlMinifyParser(html_code)
    exclude_tags = {}

    for tag in EXCLUDE_TAGS:
        exclude_tags[tag] = [str(script) for script in soup.findAll(name=tag) if len(script.text) > 0]

        for index, script in enumerate(exclude_tags[tag]):
            html_code = html_code.replace(script.decode("utf-8"), TAGS_PATTERN % (tag, index))

    soup = HtmlMinifyParser(html_code)

    if ignore_comments:
        [comment.extract() for comment in soup.findAll(text=lambda text: isinstance(text, Comment))]

    html_code = str(soup)
    lines = html_code.split("\n")
    minified_lines = []

    for index, line in enumerate(lines):
        minified_line = line.strip()

        # not in between two tags
        if not between_two_tags(minified_line, minified_lines, index):
            minified_line = " %s" % minified_line

        minified_lines.append(str(minified_line))

    content = "".join(minified_lines)

    for tag in EXCLUDE_TAGS:
        for index, script in enumerate(exclude_tags[tag]):
            content = content.replace(TAGS_PATTERN % (tag, index), script)

    return content