def __init__(self,
                 srcdir,
                 verbose=False,
                 silent=False,
                 minify=True,
                 prettify=False):
        self.srcdir = srcdir

        if verbose and silent:
            raise ValueError(
                'Parameters "verbose" and "silent" are mutually exclusive options'
            )
        self.verbose = verbose
        self.silent = silent

        self.config = self.read_global_configuration()

        # Populate scss namespace with variables from global configuration
        namespace = scss.namespace.Namespace()
        for name, value in self.config.items():
            converted_value = convert_to_scss_variable(value)
            namespace.set_variable(f'${name}', converted_value)

        self.scss_compiler = scss.compiler.Compiler(search_path=list(
            self.asset_dirs('stylesheets')),
                                                    import_static_css=True,
                                                    output_style='compressed',
                                                    namespace=namespace)

        self.html_minifier = htmlmin.Minifier(
            remove_comments=True, remove_empty_space=True) if minify else None
        self.html_prettifier = Prettifier() if prettify else None
Exemple #2
0
    def __init__(self, site):
        super(HtmlMinPlugin, self).__init__(site)
        import htmlmin

        # Filter out all the settings that are not relevant to htmlmin.
        module_settings = dict(self.settings)

        self.minifier = htmlmin.Minifier(**module_settings)
def minify_html_file(name, dry_run=False):
    minifier = htmlmin.Minifier(remove_comments=True,
                                remove_empty_space=True,
                                reduce_boolean_attributes=True)
    with open(name, 'r+', newline='\n',
              encoding='utf-8') as f:  #encoding is needed on Windows
        minifier.input(f.read())
        f.seek(0)
        if dry_run:
            print(f'Would write file {name}.')
        else:
            f.truncate()
            f.write(minifier.finalize())
Exemple #4
0
def minimize_html(output_dir: str) -> None:
    html_minimizer = htmlmin.Minifier(
        remove_comments=True,
        remove_empty_space=True,
        remove_all_empty_space=True,
        reduce_boolean_attributes=True,
    )

    for file in get_files(output_dir, ".html"):
        text = file.read_text()
        minimized = html_minimizer.minify(text)

        position = 0

        while True:
            script_start = minimized.find("<script>\n", position)
            if script_start == -1:
                break

            position = script_end = minimized.find("</script>", script_start)
            minimized = extract_js_script_and_minimize(minimized, script_start,
                                                       script_end)

        file.write_text(minimized)
Exemple #5
0
 def setUp(self):
   HTMLMinTestCase.setUp(self)
   self.minifier = htmlmin.Minifier()
   self.minify = self.minifier.minify
def write_index(entries,
                dictionary_name,
                title,
                stream,
                respect_re_restr=True,
                default_index=VOCAB_INDEX):
    # http://www.mobipocket.com/dev/article.asp?basefolder=prcgen&file=indexing.htm
    # http://kindlegen.s3.amazonaws.com/AmazonKindlePublishingGuidelines.pdf
    # http://www.klokan.cz/projects/stardict-lingea/tab2opf.py

    # Sort entries alphabetically
    entries.sort(key=sort_function)

    prev_section = None
    dictionary_file_name = dictionary_name.replace(' ', '_')

    stream = None

    sections = []
    section_streams = {}

    for entry in entries:
        section = entry.section

        if section != prev_section:
            try:
                stream = section_streams[section]
            except KeyError:
                sections.append(section)
                filename = 'entry-%s-%s.html' % (dictionary_file_name, section)
                stream = open(filename, 'wt', encoding='UTF-8')
                section_streams[section] = stream
                write_index_header(stream)

            prev_section = section

        #scriptable="yes" is needed, otherwise the results are cut off or results after the actual result are also dsiplayed
        if default_index != None:
            if entry.entry_type == VOCAB_ENTRY:
                stream.write('<idx:entry name="v" scriptable="yes">\n')
            elif entry.entry_type == NAME_ENTRY:
                stream.write('<idx:entry name="n" scriptable="yes">\n')
            else:
                print(f"Not implemented entry type: {entry.entry_type}")
        else:
            stream.write('<idx:entry scriptable="yes">\n')

        assert entry.readings
        if respect_re_restr:
            special_readings = {}
            readings = []
            for reading in entry.readings:
                if reading.re_restr:
                    if (not reading.re_restr in special_readings):
                        special_readings[reading.re_restr] = []
                    special_readings[reading.re_restr].append(reading)
                readings.append(format_pronunciations(reading))
            label = ";".join(readings)
            if entry.kanjis:
                label += '【' + ';'.join(
                    [escape(kanji.keb, quote=False)
                     for kanji in entry.kanjis]) + '】'

            stream.write(' <p class=lab>' + label + '</p>\n')

            if (len(special_readings.keys()) > 0):
                for kanji in special_readings:
                    label = ""
                    readings = []
                    for reading in special_readings[kanji]:
                        readings.append(format_pronunciations(reading))
                    label = ";".join(readings)
                    label += '【' + escape(kanji, quote=False) + '】'
                    stream.write(' <p class=lab>' + label + '</p>\n')
        else:
            label = ';'.join([reading.reb for reading in entry.readings])
            if entry.kanjis:
                label += '【' + ';'.join([kanji.keb
                                         for kanji in entry.kanjis]) + '】'

        assert entry.senses

        if (len(entry.senses) > 0):
            stream.write(' <ul>\n')
            for sense in entry.senses:
                stream.write(' <li>')
                if sense.pos or sense.dial or sense.misc:
                    stream.write('<span class=pos>' +
                                 ','.join(sense.pos + sense.dial +
                                          sense.misc) + '</span> ')
                stream.write(escape('; '.join(sense.gloss), quote=False))
                stream.write('</li>\n')
            stream.write(' </ul>\n')

        if (entry.entry_type == VOCAB_ENTRY and len(entry.sentences) > 0):
            stream.write('<div class=ex>\n')
            stream.write(' <span class="exh">Examples:</span>\n')
            entry.sentences.sort(reverse=True,
                                 key=lambda sentence: sentence.good_sentence)
            for sentence in entry.sentences:
                stream.write(' <div class="sen">\n')
                stream.write('  <span>' + sentence.japanese + '</span>\n')
                stream.write('  <br>\n')
                stream.write('  <span>' + sentence.english + '</span>\n')
                stream.write(' </div>\n')
            stream.write('</div>\n')

        for ortho in entry.orthos:
            stream.write(' <idx:orth value="%s"' %
                         escape(ortho.value, quote=True))
            if ortho.inflgrps:
                stream.write('>\n')
                for inflgrp in list(ortho.inflgrps.values()):
                    assert inflgrp
                    stream.write('  <idx:infl>\n')
                    iforms = list(inflgrp)
                    iforms.sort()
                    for iform in iforms:
                        stream.write('   <idx:iform value="%s"/>\n' %
                                     escape(iform, quote=True))
                    stream.write('  </idx:infl>\n')
                stream.write(' </idx:orth>\n')
            else:
                stream.write('/>\n')

        stream.write('</idx:entry>\n')

        stream.write('<hr/>\n')

    for stream in list(section_streams.values()):
        write_index_footer(stream)
        stream.close()

    #create cover
    createCover(dictionary_name, title, 768, 1024)

    # minify html
    minifier = htmlmin.Minifier(remove_empty_space=True)
    for i in range(len(sections)):
        section = sections[i]
        with open('entry-%s-%s.html' % (dictionary_file_name, section),
                  'r+',
                  encoding='UTF-8') as f:
            content = f.read()
            content = minifier.minify(content)
            f.seek(0)
            f.write(content)
            f.truncate()

    # Write the OPF
    stream = open('%s.opf' % dictionary_file_name, 'wt', encoding='UTF-8')
    stream.write('<?xml version="1.0" encoding="utf-8"?>\n')
    stream.write('<package unique-identifier="uid">\n')
    stream.write('  <metadata>\n')
    stream.write(
        '    <dc-metadata xmlns:dc="http://purl.org/metadata/dublin_core">\n')
    stream.write('      <dc:Identifier id="uid">%s</dc:Identifier>\n' %
                 (hex(hash(title)).split('x')[1]))
    stream.write('      <dc:Title><h2>%s</h2></dc:Title>\n' % title)
    stream.write('      <dc:Language>ja</dc:Language>\n')
    stream.write(
        '      <dc:Creator>Electronic Dictionary Research &amp; Development Group</dc:Creator>\n'
    )
    stream.write('      <dc:Date>2019-05-08</dc:Date>\n')
    stream.write(
        '      <dc:Copyrights>2013 Electronic Dictionary Research &amp; Development Group</dc:Copyrights>\n'
    )
    stream.write('    </dc-metadata>\n')
    stream.write('    <x-metadata>\n')
    stream.write(
        '      <output encoding="UTF-8" flatten-dynamic-dir="yes"/>\n')
    stream.write('      <DictionaryInLanguage>ja</DictionaryInLanguage>\n')
    stream.write('      <DictionaryOutLanguage>en</DictionaryOutLanguage>\n')
    if default_index == VOCAB_INDEX:
        stream.write('  <DictionaryOutLanguage>v</DictionaryOutLanguage>\n')
    elif default_index == NAME_INDEX:
        stream.write('  <DictionaryOutLanguage>n</DictionaryOutLanguage>\n')
    stream.write('    </x-metadata>\n')
    stream.write('  </metadata>\n')
    stream.write('  <manifest>\n')
    stream.write(
        '    <item id="cover" href="%s-cover.jpg" media-type="image/jpeg" properties="cover-image"/>\n'
        % dictionary_file_name)
    stream.write(
        '    <item id="css" href="style.css" media-type="text/css"/>\n')
    stream.write(
        '    <item id="frontmatter" href="%s-frontmatter.html" media-type="text/x-oeb1-document"/>\n'
        % dictionary_file_name)
    for i in range(len(sections)):
        section = sections[i]
        stream.write(
            '    <item id="entry-%u" href="entry-%s-%s.html" media-type="text/x-oeb1-document"/>\n'
            % (i, dictionary_file_name, escape(section, quote=True)))
    stream.write('  </manifest>\n')
    stream.write('\n')
    stream.write('  <spine>\n')
    stream.write('    <itemref idref="frontmatter"/>\n')
    for i in range(len(sections)):
        stream.write('    <itemref idref="entry-%u"/>\n' % i)
    stream.write('  </spine>\n')
    stream.write('  <tours/>\n')
    stream.write('  <guide/>\n')
    stream.write('</package>\n')
from multiprocessing import Pool

import psycopg2
import requests
import urllib3
import htmlmin

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
from bs4 import BeautifulSoup
from src.data.websites import website as website_helper
from src.visualization.console import StatusVisualization

''' Some intialization '''

minifier = htmlmin.Minifier(remove_comments=True, remove_all_empty_space=True, reduce_boolean_attributes=True,
                            remove_empty_space=True)


def crawl_article(article):
    index, (article_url, source_name) = article
    videos = []
    try:
        res = requests.get(article_url, headers={"user-agent": "Mozilla"})
        if res.status_code >= 300:
            status = str(res.status_code)
        else:
            status = "Success"
            bs = BeautifulSoup(res.text, features="lxml")
            # find video iframes and get their src attributes
            videos = list(website_helper.get_video_sources_bs(bs))
            if len(videos) > 0:
Exemple #8
0
import htmlmin
from pathlib import Path

input_file = Path('index.html').read_text()

minified = htmlmin.Minifier().minify(input_file)

with open("index.min.html", 'w') as out:
    out.write(minified)