def __init__(self, srcdir, verbose=False, silent=False, minify=True, prettify=False): self.srcdir = srcdir if verbose and silent: raise ValueError( 'Parameters "verbose" and "silent" are mutually exclusive options' ) self.verbose = verbose self.silent = silent self.config = self.read_global_configuration() # Populate scss namespace with variables from global configuration namespace = scss.namespace.Namespace() for name, value in self.config.items(): converted_value = convert_to_scss_variable(value) namespace.set_variable(f'${name}', converted_value) self.scss_compiler = scss.compiler.Compiler(search_path=list( self.asset_dirs('stylesheets')), import_static_css=True, output_style='compressed', namespace=namespace) self.html_minifier = htmlmin.Minifier( remove_comments=True, remove_empty_space=True) if minify else None self.html_prettifier = Prettifier() if prettify else None
def __init__(self, site): super(HtmlMinPlugin, self).__init__(site) import htmlmin # Filter out all the settings that are not relevant to htmlmin. module_settings = dict(self.settings) self.minifier = htmlmin.Minifier(**module_settings)
def minify_html_file(name, dry_run=False): minifier = htmlmin.Minifier(remove_comments=True, remove_empty_space=True, reduce_boolean_attributes=True) with open(name, 'r+', newline='\n', encoding='utf-8') as f: #encoding is needed on Windows minifier.input(f.read()) f.seek(0) if dry_run: print(f'Would write file {name}.') else: f.truncate() f.write(minifier.finalize())
def minimize_html(output_dir: str) -> None: html_minimizer = htmlmin.Minifier( remove_comments=True, remove_empty_space=True, remove_all_empty_space=True, reduce_boolean_attributes=True, ) for file in get_files(output_dir, ".html"): text = file.read_text() minimized = html_minimizer.minify(text) position = 0 while True: script_start = minimized.find("<script>\n", position) if script_start == -1: break position = script_end = minimized.find("</script>", script_start) minimized = extract_js_script_and_minimize(minimized, script_start, script_end) file.write_text(minimized)
def setUp(self): HTMLMinTestCase.setUp(self) self.minifier = htmlmin.Minifier() self.minify = self.minifier.minify
def write_index(entries, dictionary_name, title, stream, respect_re_restr=True, default_index=VOCAB_INDEX): # http://www.mobipocket.com/dev/article.asp?basefolder=prcgen&file=indexing.htm # http://kindlegen.s3.amazonaws.com/AmazonKindlePublishingGuidelines.pdf # http://www.klokan.cz/projects/stardict-lingea/tab2opf.py # Sort entries alphabetically entries.sort(key=sort_function) prev_section = None dictionary_file_name = dictionary_name.replace(' ', '_') stream = None sections = [] section_streams = {} for entry in entries: section = entry.section if section != prev_section: try: stream = section_streams[section] except KeyError: sections.append(section) filename = 'entry-%s-%s.html' % (dictionary_file_name, section) stream = open(filename, 'wt', encoding='UTF-8') section_streams[section] = stream write_index_header(stream) prev_section = section #scriptable="yes" is needed, otherwise the results are cut off or results after the actual result are also dsiplayed if default_index != None: if entry.entry_type == VOCAB_ENTRY: stream.write('<idx:entry name="v" scriptable="yes">\n') elif entry.entry_type == NAME_ENTRY: stream.write('<idx:entry name="n" scriptable="yes">\n') else: print(f"Not implemented entry type: {entry.entry_type}") else: stream.write('<idx:entry scriptable="yes">\n') assert entry.readings if respect_re_restr: special_readings = {} readings = [] for reading in entry.readings: if reading.re_restr: if (not reading.re_restr in special_readings): special_readings[reading.re_restr] = [] special_readings[reading.re_restr].append(reading) readings.append(format_pronunciations(reading)) label = ";".join(readings) if entry.kanjis: label += '【' + ';'.join( [escape(kanji.keb, quote=False) for kanji in entry.kanjis]) + '】' stream.write(' <p class=lab>' + label + '</p>\n') if (len(special_readings.keys()) > 0): for kanji in special_readings: label = "" readings = [] for reading in special_readings[kanji]: readings.append(format_pronunciations(reading)) label = ";".join(readings) label += '【' + escape(kanji, quote=False) + '】' stream.write(' <p class=lab>' + label + '</p>\n') else: label = ';'.join([reading.reb for reading in entry.readings]) if entry.kanjis: label += '【' + ';'.join([kanji.keb for kanji in entry.kanjis]) + '】' assert entry.senses if (len(entry.senses) > 0): stream.write(' <ul>\n') for sense in entry.senses: stream.write(' <li>') if sense.pos or sense.dial or sense.misc: stream.write('<span class=pos>' + ','.join(sense.pos + sense.dial + sense.misc) + '</span> ') stream.write(escape('; '.join(sense.gloss), quote=False)) stream.write('</li>\n') stream.write(' </ul>\n') if (entry.entry_type == VOCAB_ENTRY and len(entry.sentences) > 0): stream.write('<div class=ex>\n') stream.write(' <span class="exh">Examples:</span>\n') entry.sentences.sort(reverse=True, key=lambda sentence: sentence.good_sentence) for sentence in entry.sentences: stream.write(' <div class="sen">\n') stream.write(' <span>' + sentence.japanese + '</span>\n') stream.write(' <br>\n') stream.write(' <span>' + sentence.english + '</span>\n') stream.write(' </div>\n') stream.write('</div>\n') for ortho in entry.orthos: stream.write(' <idx:orth value="%s"' % escape(ortho.value, quote=True)) if ortho.inflgrps: stream.write('>\n') for inflgrp in list(ortho.inflgrps.values()): assert inflgrp stream.write(' <idx:infl>\n') iforms = list(inflgrp) iforms.sort() for iform in iforms: stream.write(' <idx:iform value="%s"/>\n' % escape(iform, quote=True)) stream.write(' </idx:infl>\n') stream.write(' </idx:orth>\n') else: stream.write('/>\n') stream.write('</idx:entry>\n') stream.write('<hr/>\n') for stream in list(section_streams.values()): write_index_footer(stream) stream.close() #create cover createCover(dictionary_name, title, 768, 1024) # minify html minifier = htmlmin.Minifier(remove_empty_space=True) for i in range(len(sections)): section = sections[i] with open('entry-%s-%s.html' % (dictionary_file_name, section), 'r+', encoding='UTF-8') as f: content = f.read() content = minifier.minify(content) f.seek(0) f.write(content) f.truncate() # Write the OPF stream = open('%s.opf' % dictionary_file_name, 'wt', encoding='UTF-8') stream.write('<?xml version="1.0" encoding="utf-8"?>\n') stream.write('<package unique-identifier="uid">\n') stream.write(' <metadata>\n') stream.write( ' <dc-metadata xmlns:dc="http://purl.org/metadata/dublin_core">\n') stream.write(' <dc:Identifier id="uid">%s</dc:Identifier>\n' % (hex(hash(title)).split('x')[1])) stream.write(' <dc:Title><h2>%s</h2></dc:Title>\n' % title) stream.write(' <dc:Language>ja</dc:Language>\n') stream.write( ' <dc:Creator>Electronic Dictionary Research & Development Group</dc:Creator>\n' ) stream.write(' <dc:Date>2019-05-08</dc:Date>\n') stream.write( ' <dc:Copyrights>2013 Electronic Dictionary Research & Development Group</dc:Copyrights>\n' ) stream.write(' </dc-metadata>\n') stream.write(' <x-metadata>\n') stream.write( ' <output encoding="UTF-8" flatten-dynamic-dir="yes"/>\n') stream.write(' <DictionaryInLanguage>ja</DictionaryInLanguage>\n') stream.write(' <DictionaryOutLanguage>en</DictionaryOutLanguage>\n') if default_index == VOCAB_INDEX: stream.write(' <DictionaryOutLanguage>v</DictionaryOutLanguage>\n') elif default_index == NAME_INDEX: stream.write(' <DictionaryOutLanguage>n</DictionaryOutLanguage>\n') stream.write(' </x-metadata>\n') stream.write(' </metadata>\n') stream.write(' <manifest>\n') stream.write( ' <item id="cover" href="%s-cover.jpg" media-type="image/jpeg" properties="cover-image"/>\n' % dictionary_file_name) stream.write( ' <item id="css" href="style.css" media-type="text/css"/>\n') stream.write( ' <item id="frontmatter" href="%s-frontmatter.html" media-type="text/x-oeb1-document"/>\n' % dictionary_file_name) for i in range(len(sections)): section = sections[i] stream.write( ' <item id="entry-%u" href="entry-%s-%s.html" media-type="text/x-oeb1-document"/>\n' % (i, dictionary_file_name, escape(section, quote=True))) stream.write(' </manifest>\n') stream.write('\n') stream.write(' <spine>\n') stream.write(' <itemref idref="frontmatter"/>\n') for i in range(len(sections)): stream.write(' <itemref idref="entry-%u"/>\n' % i) stream.write(' </spine>\n') stream.write(' <tours/>\n') stream.write(' <guide/>\n') stream.write('</package>\n')
from multiprocessing import Pool import psycopg2 import requests import urllib3 import htmlmin urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) from bs4 import BeautifulSoup from src.data.websites import website as website_helper from src.visualization.console import StatusVisualization ''' Some intialization ''' minifier = htmlmin.Minifier(remove_comments=True, remove_all_empty_space=True, reduce_boolean_attributes=True, remove_empty_space=True) def crawl_article(article): index, (article_url, source_name) = article videos = [] try: res = requests.get(article_url, headers={"user-agent": "Mozilla"}) if res.status_code >= 300: status = str(res.status_code) else: status = "Success" bs = BeautifulSoup(res.text, features="lxml") # find video iframes and get their src attributes videos = list(website_helper.get_video_sources_bs(bs)) if len(videos) > 0:
import htmlmin from pathlib import Path input_file = Path('index.html').read_text() minified = htmlmin.Minifier().minify(input_file) with open("index.min.html", 'w') as out: out.write(minified)