def test_detect_root_breakout(self): with self.assertRaisesRegex( RuntimeError, "Attempted to break out of output directory to /test"): utils.sanitised_join( "/foo/bar", "/test" )
def test_detect_root_breakout(self): with six.assertRaisesRegex( self, RuntimeError, "Attempted to break out of output directory to /test"): utils.sanitised_join( "/foo/bar", "/test" )
def test_detect_root_breakout(self): with self.assertRaisesRegex( RuntimeError, "Attempted to break out of output directory to " "(.*?:)?/test"): # (.*?:)? accounts for Windows root utils.sanitised_join( "/foo/bar", "/test" )
def compile_css_files(pelican_object): """ Compiles specified less files into css files using lesscpy Configuration ------------- generator.settings['LESS_CSS_FILES']: Dictionary with keys indicating output files values should be a tuple (input,output) """ logger.info("Generating css with lesscpy") for key, value in pelican_object.settings["LESS_CSS_FILES"].items(): input_rel, output_rel = value logger.info("Generating %s from %s using lesscpy", output_rel, input_rel) try: input_path = sanitised_join(os.getcwd(), input_rel) except JoinError: logger.error( "Skipping: file %r would be read outside output path", input_rel, ) continue try: output_path = sanitised_join( pelican_object.settings["OUTPUT_PATH"], output_rel ) except JoinError: logger.error( "Skipping: file %r would be written outside output path", output_rel, ) continue out_dir = os.path.dirname(output_path) if not os.path.exists(out_dir): try: os.makedirs(out_dir) except Exception: logger.error( "Error creating containing directory %r", out_dir, ) raise try: with open(output_path, "w") as f: compile_css_file(input_path, f) except Exception: logger.error( "Error compiling %r as less file", input_rel, ) raise
def valid_save_as(self): """Return true if save_as doesn't write outside output path, false otherwise.""" try: output_path = self.settings["OUTPUT_PATH"] except KeyError: # we cannot check return True try: sanitised_join(output_path, self.save_as) except RuntimeError: # outside output_dir return False return True
def write_feed(self, elements, context, path=None, url=None, feed_type='atom', override_output=False, feed_title=None): """Generate a feed with the list of articles provided Return the feed. If no path or output_path is specified, just return the feed object. :param elements: the articles to put on the feed. :param context: the context to get the feed metadata. :param path: the path to output. :param url: the publicly visible feed URL; if None, path is used instead :param feed_type: the feed type to use (atom or rss) :param override_output: boolean telling if we can override previous output with the same name (and if next files written with the same name should be skipped to keep that one) :param feed_title: the title of the feed.o """ if not is_selected_for_writing(self.settings, path): return self.site_url = context.get('SITEURL', path_to_url(get_relative_path(path))) self.feed_domain = context.get('FEED_DOMAIN') self.feed_url = self.urljoiner(self.feed_domain, url if url else path) feed = self._create_new_feed(feed_type, feed_title, context) max_items = len(elements) if self.settings['FEED_MAX_ITEMS']: max_items = min(self.settings['FEED_MAX_ITEMS'], max_items) for i in range(max_items): self._add_item_to_the_feed(feed, elements[i]) signals.feed_generated.send(context, feed=feed) if path: complete_path = sanitised_join(self.output_path, path) try: os.makedirs(os.path.dirname(complete_path)) except Exception: pass encoding = 'utf-8' if six.PY3 else None with self._open_w(complete_path, encoding, override_output) as fp: feed.write(fp, 'utf-8') logger.info('Writing %s', complete_path) signals.feed_written.send(complete_path, context=context, feed=feed) return feed
def test_pass_deep_subpaths(self): self.assertEqual( utils.sanitised_join( "/foo/bar", "test" ), os.path.join("/foo/bar", "test") )
def _has_valid_save_as(self): """Return true if save_as doesn't write outside output path, false otherwise.""" try: output_path = self.settings["OUTPUT_PATH"] except KeyError: # we cannot check return True try: sanitised_join(output_path, self.save_as) except RuntimeError: # outside output_dir logger.error( "Skipping %s: file %r would be written outside output path", self, self.save_as, ) return False return True
def write_feed(self, elements, context, path=None, url=None, feed_type='atom', override_output=False, feed_title=None): """Generate a feed with the list of articles provided Return the feed. If no path or output_path is specified, just return the feed object. :param elements: the articles to put on the feed. :param context: the context to get the feed metadata. :param path: the path to output. :param url: the publicly visible feed URL; if None, path is used instead :param feed_type: the feed type to use (atom or rss) :param override_output: boolean telling if we can override previous output with the same name (and if next files written with the same name should be skipped to keep that one) :param feed_title: the title of the feed.o """ if not is_selected_for_writing(self.settings, path): return self.site_url = context.get( 'SITEURL', path_to_url(get_relative_path(path))) self.feed_domain = context.get('FEED_DOMAIN') self.feed_url = self.urljoiner(self.feed_domain, url if url else path) feed = self._create_new_feed(feed_type, feed_title, context) max_items = len(elements) if self.settings['FEED_MAX_ITEMS']: max_items = min(self.settings['FEED_MAX_ITEMS'], max_items) for i in range(max_items): self._add_item_to_the_feed(feed, elements[i]) signals.feed_generated.send(context, feed=feed) if path: complete_path = sanitised_join(self.output_path, path) try: os.makedirs(os.path.dirname(complete_path)) except Exception: pass encoding = 'utf-8' if six.PY3 else None with self._open_w(complete_path, encoding, override_output) as fp: feed.write(fp, 'utf-8') logger.info('Writing %s', complete_path) signals.feed_written.send( complete_path, context=context, feed=feed) return feed
def _write_file(output_path, name, override): """Write the js file.""" # output = 'var documents = ' + json.dumps(context['index_data'], indent=2) + ";" path = sanitised_join(output_path, name) dir = os.path.dirname(path) if not os.path.exists(dir): os.makedirs(dir) idx = lunr( ref="ref", fields=[ { "field_name": "title", "boost": 10 }, { "field_name": "summary", "boost": 2 }, "body", "tags", ], documents=context["index_data"], ) with self._open_w(path, "utf-8", override=override) as handle: handle.write("const lunrSerializedIdx = ") handle.write(json.dumps(idx.serialize())) handle.write(";\n") handle.write("const lunrDocuments = ") def remove_body(): for doc in context["index_data"]: cleaned_doc = dict(doc) del cleaned_doc["body"] yield cleaned_doc handle.write(json.dumps([doc for doc in remove_body()])) handle.write(";\n") logger.info("Writing %s", path) # Send a signal to say we're writing a file with some specific # local context. signals.content_written.send(path, context=context)
def _write_file(template, localcontext, output_path, name, override): """Render the template write the file.""" # set localsiteurl for context so that Contents can adjust links if localcontext['localsiteurl']: context['localsiteurl'] = localcontext['localsiteurl'] output = template.render(localcontext) path = sanitised_join(output_path, name) try: os.makedirs(os.path.dirname(path)) except Exception: pass with self._open_w(path, 'utf-8', override=override) as f: f.write(output) logger.info('Writing %s', path) # Send a signal to say we're writing a file with some specific # local context. signals.content_written.send(path, context=localcontext)
def add_readable_works(generator): if isinstance(generator, PersonGenerator): for person in generator.people: if 'work' in person.metadata: for work in person.metadata['work']: for copy in work['copies'].values(): if 'provider' in copy and 'data' in copy: if copy['provider'][ 'value'] == 'https://textgridrep.org': content = fetch_cached_work( copy['data']['value']) if content: doc = etree.XML(content) doc = DTA_STYLESHEET( transform( TG_EXTRACT_TEXT_STYLESHEET(doc))) determine_tags( doc.xpath('/tei:TEI/tei:text', namespaces=NS)[0]) hash = sha256() hash.update(work['title'].encode('utf-8')) hash.update(b'$$') hash.update( copy['data']['value'].encode('utf-8')) copy['read_url'] = { 'value': '{0}/{1}.tei'.format( person.url[:-5], hash.hexdigest()), 'label': None } target_dir = sanitised_join( generator.settings['OUTPUT_PATH'], person.url[:-5]) os.makedirs(target_dir, exist_ok=True) with open( sanitised_join( target_dir, '{0}.tei'.format( hash.hexdigest())), 'wb') as out_f: out_f.write( etree.tostring(doc, pretty_print=True)) elif copy['provider'][ 'label'] == 'Deutsches Textarchiv': content = fetch_cached_work( copy['data']['value']) if content: doc = etree.XML(content) doc = DTA_STYLESHEET(transform(doc)) determine_tags( doc.xpath('/tei:TEI/tei:text', namespaces=NS)[0]) hash = sha256() hash.update(work['title'].encode('utf-8')) hash.update(b'$$') hash.update( copy['data']['value'].encode('utf-8')) copy['read_url'] = { 'value': '{0}/{1}.tei'.format( person.url[:-5], hash.hexdigest()), 'label': None } target_dir = sanitised_join( generator.settings['OUTPUT_PATH'], person.url[:-5]) os.makedirs(target_dir, exist_ok=True) with open( sanitised_join( target_dir, '{0}.tei'.format( hash.hexdigest())), 'wb') as out_f: out_f.write( etree.tostring(doc, pretty_print=True)) tmp = list(TAGS - KNOWN_TAGS - IGNORED_TAGS) tmp.sort() for tag in tmp: print(tag)
def get_css_names(generator): """ Gets list of specified CSS files to be generated by lesscpy Configuration ------------- generator.settings['LESS_INTEGRITY']: List generator.settings['LESS_CSS_FILES']: Dictionary with keys indicating output files values should be a tuple (input,output) Output ------ generator.context['compiled_css']: Dictionary with keys corresponding to the configurations keys and values are filepaths relative to the output root """ if "LESS_CSS_FILES" not in generator.settings: return hashes = generator.settings.get("LESS_INTEGRITY", []) versioned = generator.settings.get("VERSIONED_CSS", False) compiled_files = {} logger.info("Filling environment with lesscpy output file names") for key, value in generator.settings["LESS_CSS_FILES"].items(): input_rel, output_rel = value hash_vals = [] ver_string = "" logger.info( "Generating data for css file key={key}: ({input_rel})".format( key=key, input_rel=input_rel ) ) if hashes or versioned: try: input_path = sanitised_join(os.getcwd(), input_rel) except RuntimeError: logger.error( "Skipping: file %r would be written outside output path", input_rel, ) continue tmpio = io.StringIO() compile_css_file(input_path, tmpio) if versioned: ver_string = ( "?" + hashlib.sha256(tmpio.getvalue().encode("utf-8")).hexdigest()[:6] ) for h in hashes: if h not in hash_funcs: logger.error("Skipping generation of unknown hash %s", h) continue hash_vals.append( "{h}-{dgst}".format( h=h, dgst=base64.b64encode( hash_funcs[h](tmpio.getvalue().encode("utf-8")).digest() ).decode(), ) ) tmpio.close() compiled_files[key] = { "css_file": output_rel + ver_string, "integrity": " ".join(hash_vals), } generator.context["compiled_css"] = compiled_files