def needles(self): """Fill out path (and path.trigrams).""" yield "path", self.path extension = splitext(self.path)[1] if extension: yield "ext", extension[1:] # skip the period if is_image(self.path): bytestring = self.contents.encode("utf-8") if self.contains_text() else self.contents yield "raw_data", b64encode(bytestring) # binary, but not an image elif not self.contains_text(): yield "is_binary", True
def needles(self): """Fill out path (and path.trigrams).""" if self.is_link(): # realpath will keep following symlinks until it gets to the 'real' thing. yield 'link', relpath(realpath(self.absolute_path()), self.tree.source_folder) yield 'path', self.path extension = splitext(self.path)[1] if extension: yield 'ext', extension[1:] # skip the period if is_image(self.path): bytestring = (self.contents.encode('utf-8') if self.contains_text() else self.contents) yield 'raw_data', b64encode(bytestring) # binary, but not an image elif not self.contains_text(): yield 'is_binary', True
def _browse_file(tree, path, line_docs, file_doc, config, date=None, contents=None): """Return a rendered page displaying a source file. :arg string tree: name of tree on which file is found :arg string path: relative path from tree root of file :arg list line_docs: LINE documents as defined in the mapping of core.py, where the `content` field is dereferenced :arg file_doc: the FILE document as defined in core.py :arg config: TreeConfig object of this tree :arg date: a formatted string representing the generated date, default to now :arg string contents: the contents of the source file, defaults to joining the `content` field of all line_docs """ def sidebar_links(sections): """Return data structure to build nav sidebar from. :: [('Section Name', [{'icon': ..., 'title': ..., 'href': ...}])] """ # Sort by order, resolving ties by section name: return sorted(sections, key=lambda section: (section['order'], section['heading'])) if not date: # Then assume that the file is generated now. Remark: we can't use this # as the default param because that is only evaluated once, so the same # time would always be used. date = datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S +0000") common = _build_common_file_template(tree, path, date, config) links = file_doc.get('links', []) if is_image(path): return render_template( 'image_file.html', **common) else: # We don't allow browsing binary files, so this must be a text file. # We concretize the lines into a list because we iterate over it multiple times lines = [doc['content'] for doc in line_docs] if not contents: # If contents are not provided, we can reconstruct them by # stitching the lines together. contents = ''.join(lines) offsets = cumulative_sum(imap(len, lines)) tree_config = config.trees[tree] # Construct skimmer objects for all enabled plugins that define a # file_to_skim class. skimmers = [plugin.file_to_skim(path, contents, plugin.name, tree_config, file_doc, line_docs) for plugin in tree_config.enabled_plugins if plugin.file_to_skim] skim_links, refses, regionses, annotationses = skim_file(skimmers, len(line_docs)) index_refs = (Ref.es_to_triple(ref, tree_config) for ref in chain.from_iterable(doc.get('refs', []) for doc in line_docs)) index_regions = (Region.es_to_triple(region) for region in chain.from_iterable(doc.get('regions', []) for doc in line_docs)) tags = finished_tags(lines, chain(chain.from_iterable(refses), index_refs), chain(chain.from_iterable(regionses), index_regions)) return render_template( 'text_file.html', **merge(common, { # Someday, it would be great to stream this and not concretize # the whole thing in RAM. The template will have to quit # looping through the whole thing 3 times. 'lines': [(html_line(doc['content'], tags_in_line, offset), doc.get('annotations', []) + skim_annotations) for doc, tags_in_line, offset, skim_annotations in izip(line_docs, tags_per_line(tags), offsets, annotationses)], 'is_text': True, 'sections': sidebar_links(links + skim_links)}))