def insert(dry_run=False, impatient=False): """Primary function.""" # Unzip and zopfli if the content has changed. autocompleteroot = os.path.join(settings.BASE_DIR, "songsearch-autocomplete") contentroot = os.path.join(settings.BASE_DIR, "peterbecom-static-content") assert os.path.isdir(autocompleteroot) zip_path = os.path.join(autocompleteroot, "songsearch-autocomplete.zip") assert os.path.isfile(zip_path) with tempfile.TemporaryDirectory() as tmpdir: with open(zip_path, "rb") as f: zf = zipfile.ZipFile(f) zf.extractall(tmpdir) # print(os.listdir(tmpdir + "/songsearch-autocomplete/js")) assert os.listdir(tmpdir) source = os.path.join(tmpdir, "songsearch-autocomplete") assert os.path.isdir(source), source destination = os.path.join(contentroot, "songsearch-autocomplete") # print(os.listdir(destination + "/js")) different = not _are_dir_trees_equal(source, destination) if different: shutil.rmtree(destination) shutil.move(source, destination) print("MOVED", source, "TO", destination) assert os.path.isdir(contentroot) csspath, = glob( os.path.join(contentroot, "songsearch-autocomplete/css/*.css")) jspaths = glob(os.path.join(contentroot, "songsearch-autocomplete/js/*.js")) jspaths = [x.replace(contentroot + "/", "") for x in jspaths] with open(csspath) as f: csspayload = f.read() csspayload = re.sub(r"\/\*# sourceMappingURL=.*?\*\/", "", csspayload) csspayload = csspayload.strip() js_block = "\n".join([(JS_BLOCK.replace("{cdn}", CDN).replace("{jspath}", jspath)).strip() for jspath in jspaths]) css_block = (CSS_BLOCK.replace("{cdn}", CDN).replace("{csspayload}", csspayload)).strip() template = os.path.join(contentroot, "_FSCACHE/plog/blogitem-040601-1/index.html") if not impatient: patient_isfile_check(template) assert os.path.isfile(template), template with open(template) as f: original_content = content = f.read() # The assumption is that the HTML has been CSS minified. Only after that has # been done can we insert (or not insert) the autocomplete snippets. # The simplest way to check is if there's a `<link rel="preload" href="*.css"` # tag and a big blob of <style> if not has_been_css_minified(content): print("WARNING! The HTML file hasn't been CSS minified yet.") return # Inject the JS code js_header = "<!-- songsearch-autocomplete -->" start = content.find(js_header) js_footer = "<!-- /songsearch-autocomplete -->" end = content.find(js_footer) if start > -1: content = content[: start] + js_header + "\n" + js_block + "\n" + content[ end:] else: if js_footer in content and js_header not in content: raise SongsearchAutocompleteError( "Only footer is in the HTML but not the header") content = content.replace( "</body>", "{}\n{}\n{}\n</body>".format(js_header, js_block, js_footer)) # Inject the CSS code css_header = "<!-- songsearch-autocomplete-css -->" start = content.find(css_header) css_footer = "<!-- /songsearch-autocomplete-css -->" end = content.find(css_footer) if start > -1: content = content[: start] + css_header + "\n" + css_block + "\n" + content[ end:] else: if css_footer in content and css_header not in content: raise SongsearchAutocompleteError( "Only footer is in the HTML but not the header") content = content.replace( "</head>", "{}\n{}\n{}\n</head>".format(css_header, css_block, css_footer)) # Paranoia, because it has happened in the past js_files = re.findall( r"/songsearch-autocomplete/js/main.[a-f0-9]{8}.chunk.js", content) if len(js_files) != 1: os.remove(template) raise SongsearchAutocompleteError( "Incorrect number of js paths! Should have been just one, not: " "{}".format(js_files)) # When it's done it should only be exactly 1 of these bits of strings # in the HTML (actually, it's inside the <style> tag) css_bit = "License for minified and inlined CSS originally belongs to Semantic UI" if content.count(css_bit) != 1: print(content) raise SongsearchAutocompleteError( "There is not exactly 1 ({} instead) CSS license strings".format( content.count(css_bit))) if original_content != content: if dry_run: print("DRY RUN! ...BUT WILL WRITE NEW CONTENT TO FILE") else: with open(template, "w") as f: f.write(content) if os.path.isfile(template + ".gz"): os.remove(template + ".gz") _zopfli(template) if os.path.isfile(template + ".br"): os.remove(template + ".gz") _brotli(template) print("Updated {} with new content.".format(template)) else: print("Nothing changed in the content. No write.") if not os.path.isfile(template + ".gz"): print("Going to zopfli a new index.html") _zopfli(template) if not os.path.isfile(template + ".br"): print("Going to brotli a new index.html") _brotli(template) # The zopfli file should always be younger than the not-zopflied file. age_html = os.stat(template).st_mtime if os.path.isfile(template + ".gz"): age_gz = os.stat(template + ".gz").st_mtime if age_html > age_gz: os.remove(template + ".gz") raise SongsearchAutocompleteError( "The index.html.gz file was older than the index.html file") if os.path.isfile(template + ".br"): age_br = os.stat(template + ".br").st_mtime if age_html > age_br: os.remove(template + ".br") raise SongsearchAutocompleteError( "The index.html.br file was older than the index.html file")
def _post_process_template(template, impatient, js_block, css_block, dry_run=False): if not template.is_file(): print("WARNING! {} does not exist".format(template)) return assert template.is_file(), template # more convenient this way. Also, mostly due to Python 3.5 and legacy template = str(template) if not impatient: patient_isfile_check(template) with open(template) as f: original_content = content = f.read() # The assumption is that the HTML has been CSS minified. Only after that has # been done can we insert (or not insert) the autocomplete snippets. # The simplest way to check is if there's a `<link rel="preload" href="*.css"` # tag and a big blob of <style> try: if not has_been_css_minified(content): print("WARNING! The HTML file hasn't been CSS minified yet.") return except ValueError: raise CSSMinifiedCheckError( "Template with problem: {}".format(template)) # Inject the JS code js_header = "<!-- songsearch-autocomplete -->" start = content.find(js_header) js_footer = "<!-- /songsearch-autocomplete -->" end = content.find(js_footer) if start > -1: content = content[: start] + js_header + "\n" + js_block + "\n" + content[ end:] else: if js_footer in content and js_header not in content: raise SongsearchAutocompleteError( "Only footer is in the HTML but not the header") content = content.replace( "</body>", "{}\n{}\n{}\n</body>".format(js_header, js_block, js_footer)) # Inject the CSS code css_header = "<!-- songsearch-autocomplete-css -->" start = content.find(css_header) css_footer = "<!-- /songsearch-autocomplete-css -->" end = content.find(css_footer) if start > -1: content = content[: start] + css_header + "\n" + css_block + "\n" + content[ end:] else: if css_footer in content and css_header not in content: raise SongsearchAutocompleteError( "Only footer is in the HTML but not the header") content = content.replace( "</head>", "{}\n{}\n{}\n</head>".format(css_header, css_block, css_footer)) # Paranoia, because it has happened in the past js_files = re.findall( r"/songsearch-autocomplete/js/main.[a-f0-9]{8}.chunk.js", content) if len(js_files) != 1: os.remove(template) raise SongsearchAutocompleteError( "Incorrect number of js paths! Should have been just one, not: " "{}".format(js_files)) # When it's done it should only be exactly 1 of these bits of strings # in the HTML (actually, it's inside the <style> tag) css_bit = "License for minified and inlined CSS originally belongs to Semantic UI" if content.count(css_bit) != 1: print(content) raise SongsearchAutocompleteError( "There is not exactly 1 ({} instead) CSS license strings".format( content.count(css_bit))) if original_content != content: if dry_run: print("DRY RUN! ...BUT WILL WRITE NEW CONTENT TO FILE") else: with open(template, "w") as f: f.write(content) if os.path.isfile(template + ".gz"): os.remove(template + ".gz") _zopfli(template) if os.path.isfile(template + ".br"): os.remove(template + ".gz") _brotli(template) print("Updated {} with new content.".format(template)) else: print("Nothing changed in the content. No write.") if not os.path.isfile(template + ".gz"): print("Going to zopfli a new index.html") _zopfli(template) if not os.path.isfile(template + ".br"): print("Going to brotli a new index.html") _brotli(template)
def _post_process_cached_html(filepath, url, postprocessing, original_url): if "\n" in url: raise ValueError("URL can't have a linebreak in it ({!r})".format(url)) if url.startswith("http://testserver"): # do nothing. testing. return if not os.path.exists(filepath): postprocessing.notes.append("{} no longer exists".format(filepath)) return # raise ValueError( # "{!r} does not exist and can't be post-processed".format(filepath) # ) attempts = 0 with open(filepath) as f: html = f.read() if has_been_css_minified(html): # This function has a lock decorator on it. That essentially makes sure, # if fired concurrently, at the same time'ish, by two threads, only one # of them will run at a time. In serial. The second thread will still # get to run. This check is to see if it's no point running now. msg = "HTML ({}) already post processed".format(filepath) postprocessing.notes.append(msg) return # Squeezing every little byte out of it! # That page doesn't need the little minimalcss stats block. # Otherwise, the default is to include it. include_minimalcss_stats = "/plog/blogitem-040601-1" not in url optimized_html = html while True and not url.endswith("/awspa"): t0 = time.perf_counter() try: print("CALLING mincss_html FOR", original_url or url) optimized_html = mincss_html( html, original_url or url, include_minimalcss_stats=include_minimalcss_stats, ) t1 = time.perf_counter() if optimized_html is None: postprocessing.notes.append( "At attempt number {} the optimized HTML " "became None (Took {:.1f}s)".format(attempts + 1, t1 - t0)) else: postprocessing.notes.append( "Took {:.1f}s mincss_html HTML from {} to {}".format( t1 - t0, len(html), len(optimized_html))) except ReadTimeout as exception: postprocessing.notes.append( "Timeout on mincss_html() ({})".format(exception)) optimized_html = None # created = False attempts += 1 if optimized_html is None: postprocessing.notes.append( "WARNING! mincss_html returned None for {} ({})".format( filepath, url)) if attempts < 3: print("Will try again!") time.sleep(1) continue postprocessing.notes.append( "Gave up after {} attempts".format(attempts)) return try: shutil.move(filepath, filepath + ".original") except FileNotFoundError: postprocessing.notes.append( "Can't move to .original {} no longer exists".format(filepath)) return with open(filepath, "w") as f: f.write(optimized_html) print("mincss optimized {}".format(filepath)) break try: page, = re.findall(r"/p(\d+)$", url) page = int(page) except ValueError: page = 1 if "/plog/blogitem-040601-1" in url: songsearch_autocomplete.insert(page=page) else: t0 = time.perf_counter() minified_html = _minify_html(filepath, url) t1 = time.perf_counter() if not minified_html: postprocessing.notes.append("Calling minify_html() failed") postprocessing.notes.append("Took {:.1f}s to minify HTML".format(t1 - t0)) t0 = time.perf_counter() _zopfli_html(minified_html and minified_html or optimized_html, filepath, url) t1 = time.perf_counter() postprocessing.notes.append("Took {:.1f}s to Zopfli HTML".format(t1 - t0)) t0 = time.perf_counter() _brotli_html(minified_html and minified_html or optimized_html, filepath, url) t1 = time.perf_counter() postprocessing.notes.append("Took {:.1f}s to Brotli HTML".format(t1 - t0)) CDNPurgeURL.add(url)
def _post_process_template(template, impatient, js_block, css_block, dry_run=False): if not template.is_file(): print("WARNING! {} does not exist".format(template)) return assert template.is_file(), template # more convenient this way. Also, mostly due to Python 3.5 and legacy template = str(template) if not impatient: patient_isfile_check(template) with open(template) as f: original_content = content = f.read() # The assumption is that the HTML has been CSS minified. Only after that has # been done can we insert (or not insert) the autocomplete snippets. # The simplest way to check is if there's a `<link rel="preload" href="*.css"` # tag and a big blob of <style> try: if not has_been_css_minified(content): print("WARNING! The HTML file hasn't been CSS minified yet.") return except ValueError: raise CSSMinifiedCheckError("Template with problem: {}".format(template)) # Inject the JS code js_header = "<!-- songsearch-autocomplete -->" start = content.find(js_header) js_footer = "<!-- /songsearch-autocomplete -->" end = content.find(js_footer) if start > -1: content = content[:start] + js_header + "\n" + js_block + "\n" + content[end:] else: if js_footer in content and js_header not in content: raise SongsearchAutocompleteError( "Only footer is in the HTML but not the header" ) content = content.replace( "</body>", "{}\n{}\n{}\n</body>".format(js_header, js_block, js_footer) ) # Inject the CSS code css_header = "<!-- songsearch-autocomplete-css -->" start = content.find(css_header) css_footer = "<!-- /songsearch-autocomplete-css -->" end = content.find(css_footer) if start > -1: content = content[:start] + css_header + "\n" + css_block + "\n" + content[end:] else: if css_footer in content and css_header not in content: raise SongsearchAutocompleteError( "Only footer is in the HTML but not the header" ) content = content.replace( "</head>", "{}\n{}\n{}\n</head>".format(css_header, css_block, css_footer) ) # Paranoia, because it has happened in the past js_files = re.findall( r"/songsearch-autocomplete/js/main.[a-f0-9]{8}.chunk.js", content ) if len(js_files) != 1: os.remove(template) raise SongsearchAutocompleteError( "Incorrect number of js paths! Should have been just one, not: " "{}".format(js_files) ) # When it's done it should only be exactly 1 of these bits of strings # in the HTML (actually, it's inside the <style> tag) css_bit = "License for minified and inlined CSS originally belongs to Semantic UI" if content.count(css_bit) != 1: print(content) raise SongsearchAutocompleteError( "There is not exactly 1 ({} instead) CSS license strings".format( content.count(css_bit) ) ) if original_content != content: if dry_run: print("DRY RUN! ...BUT WILL WRITE NEW CONTENT TO FILE") else: with open(template, "w") as f: f.write(content) if os.path.isfile(template + ".gz"): os.remove(template + ".gz") _zopfli(template) if os.path.isfile(template + ".br"): os.remove(template + ".gz") _brotli(template) print("Updated {} with new content.".format(template)) else: print("Nothing changed in the content. No write.") if not os.path.isfile(template + ".gz"): print("Going to zopfli a new index.html") _zopfli(template) if not os.path.isfile(template + ".br"): print("Going to brotli a new index.html") _brotli(template)
def _post_process_cached_html(filepath, url, postprocessing, original_url): if "\n" in url: raise ValueError("URL can't have a linebreak in it ({!r})".format(url)) if url.startswith("http://testserver"): # do nothing. testing. return if not os.path.exists(filepath): postprocessing.notes.append("{} no longer exists".format(filepath)) return # raise ValueError( # "{!r} does not exist and can't be post-processed".format(filepath) # ) attempts = 0 with open(filepath) as f: html = f.read() if has_been_css_minified(html): # This function has a lock decorator on it. That essentially makes sure, # if fired concurrently, at the same time'ish, by two threads, only one # of them will run at a time. In serial. The second thread will still # get to run. This check is to see if it's no point running now. msg = "HTML ({}) already post processed".format(filepath) postprocessing.notes.append(msg) return # Squeezing every little byte out of it! # That page doesn't need the little minimalcss stats block. # Otherwise, the default is to include it. include_minimalcss_stats = "/plog/blogitem-040601-1" not in url optimized_html = html while True and not url.endswith("/awspa"): t0 = time.perf_counter() try: print("CALLING mincss_html FOR", original_url or url) optimized_html = mincss_html( html, original_url or url, include_minimalcss_stats=include_minimalcss_stats, ) t1 = time.perf_counter() if optimized_html is None: postprocessing.notes.append( "At attempt number {} the optimized HTML " "became None (Took {:.1f}s)".format(attempts + 1, t1 - t0) ) else: postprocessing.notes.append( "Took {:.1f}s mincss_html HTML from {} to {}".format( t1 - t0, len(html), len(optimized_html) ) ) except ReadTimeout as exception: postprocessing.notes.append( "Timeout on mincss_html() ({})".format(exception) ) optimized_html = None # created = False attempts += 1 if optimized_html is None: postprocessing.notes.append( "WARNING! mincss_html returned None for {} ({})".format(filepath, url) ) if attempts < 3: print("Will try again!") time.sleep(1) continue postprocessing.notes.append("Gave up after {} attempts".format(attempts)) return try: shutil.move(filepath, filepath + ".original") except FileNotFoundError: postprocessing.notes.append( "Can't move to .original {} no longer exists".format(filepath) ) return with open(filepath, "w") as f: f.write(optimized_html) print("mincss optimized {}".format(filepath)) break try: page, = re.findall(r"/p(\d+)$", url) page = int(page) except ValueError: page = 1 if "/plog/blogitem-040601-1" in url: songsearch_autocomplete.insert(page=page) else: t0 = time.perf_counter() minified_html = _minify_html(filepath, url) t1 = time.perf_counter() if not minified_html: postprocessing.notes.append("Calling minify_html() failed") postprocessing.notes.append("Took {:.1f}s to minify HTML".format(t1 - t0)) t0 = time.perf_counter() _zopfli_html(minified_html and minified_html or optimized_html, filepath, url) t1 = time.perf_counter() postprocessing.notes.append("Took {:.1f}s to Zopfli HTML".format(t1 - t0)) t0 = time.perf_counter() _brotli_html(minified_html and minified_html or optimized_html, filepath, url) t1 = time.perf_counter() postprocessing.notes.append("Took {:.1f}s to Brotli HTML".format(t1 - t0)) purge_cdn_urls([url])
def _post_process_cached_html(filepath, url, postprocessing): if "\n" in url: raise ValueError("URL can't have a linebreak in it ({!r})".format(url)) if url.startswith("http://testserver"): # do nothing. testing. return if not os.path.exists(filepath): raise ValueError( "{!r} does not exist and can't be post-processed".format(filepath)) attempts = 0 with open(filepath) as f: html = f.read() if has_been_css_minified(html): # This function has a lock decorator on it. That essentially makes sure, # if fired concurrently, at the same time'ish, by two threads, only one # of them will run at a time. In serial. The second thread will still # get to run. This check is to see if it's no point running now. msg = "HTML ({}) already post processed".format(filepath) postprocessing.notes.append(msg) return optimized_html = html while True and not url.endswith("/awspa"): t0 = time.perf_counter() try: optimized_html = mincss_html(html, url) t1 = time.perf_counter() if optimized_html is None: postprocessing.notes.append( "At attempt number {} the optimized HTML " "became None (Took {:.1f}s)".format(attempts + 1, t1 - t0)) else: postprocessing.notes.append( "Took {:.1f}s mincss_html HTML from {} to {}".format( t1 - t0, len(html), len(optimized_html))) except ReadTimeout as exception: postprocessing.notes.append( "Timeout on mincss_html() ({})".format(exception)) optimized_html = None # created = False attempts += 1 if optimized_html is None: postprocessing.notes.append( "WARNING! mincss_html returned None for {} ({})".format( filepath, url)) if attempts < 3: print("Will try again!") time.sleep(1) continue postprocessing.notes.append( "Gave up after {} attempts".format(attempts)) return shutil.move(filepath, filepath + ".original") with open(filepath, "w") as f: f.write(optimized_html) print("mincss optimized {}".format(filepath)) break if url.endswith("/plog/blogitem-040601-1"): songsearch_autocomplete.insert() else: t0 = time.perf_counter() minified_html = _minify_html(filepath, url) t1 = time.perf_counter() if not minified_html: postprocessing.notes.append("Calling minify_html() failed") postprocessing.notes.append("Took {:.1f}s to minify HTML".format(t1 - t0)) t0 = time.perf_counter() _zopfli_html(minified_html and minified_html or optimized_html, filepath, url) t1 = time.perf_counter() postprocessing.notes.append("Took {:.1f}s to Zopfli HTML".format(t1 - t0)) t0 = time.perf_counter() _brotli_html(minified_html and minified_html or optimized_html, filepath, url) t1 = time.perf_counter() postprocessing.notes.append("Took {:.1f}s to Brotli HTML".format(t1 - t0))