def rev(tree, revision, path): """Display a page showing the file at path at specified revision by obtaining the contents from version control. """ config = current_app.dxr_config tree_config = config.trees[tree] abs_path = join(tree_config.source_folder, path) contents = file_contents_at_rev(abs_path, revision) if contents is not None: image_rev = None if is_binary_image(path): is_text = False contents = '' image_rev = revision else: is_text, contents = decode_data(contents, tree_config.source_encoding) if not is_text: contents = '' elif is_textual_image(path): image_rev = revision # We do some wrapping to mimic the JSON returned by an ES lines query. return _browse_file(tree, path, [{'content': line} for line in split_content_lines(contents)], {}, config, not is_text, contents=contents, image_rev=image_rev) else: raise NotFound
def raw_rev(tree, revision, path): """Send raw data at path from tree at the given revision, for binary things like images.""" if not is_binary_image(path) and not is_textual_image(path): raise NotFound config = current_app.dxr_config tree_config = config.trees[tree] data = file_contents_at_rev(tree_config.source_folder, path, revision) if data is None: raise NotFound data_file = StringIO(data) return send_file(data_file, mimetype=guess_type(path)[0])
def raw(tree, path): """Send raw data at path from tree, for binary things like images.""" if not is_binary_image(path) and not is_textual_image(path): raise NotFound query = {'filter': {'term': {'path': path}}} results = current_app.es.search(query, index=es_alias_or_not_found(tree), doc_type=FILE, size=1) try: # we explicitly get index 0 because there should be exactly 1 result data = results['hits']['hits'][0]['_source']['raw_data'][0] except IndexError: # couldn't find the image raise NotFound data_file = StringIO(data.decode('base64')) return send_file(data_file, mimetype=guess_type(path)[0])
def needles(self): """Fill out path (and path.trigrams).""" if self.is_link(): # realpath will keep following symlinks until it gets to the 'real' thing. yield 'link', relpath(realpath(self.absolute_path()), self.tree.source_folder) yield 'path', self.path extension = splitext(self.path)[1] if extension: yield 'ext', extension[1:] # skip the period # We store both the contents of textual images twice so that they can # both show up in searches and be previewed in the browser. if is_binary_image(self.path) or is_textual_image(self.path): bytestring = (self.contents.encode('utf-8') if self.contains_text() else self.contents) yield 'raw_data', b64encode(bytestring) # binary, but not an image elif not self.contains_text(): yield 'is_binary', True
def needles(self): """Fill out path (and path.trigrams).""" if self.is_link(): # realpath will keep following symlinks until it gets to the 'real' thing. yield 'link', relpath(realpath(self.absolute_path()), self.tree.source_folder) unicode_path = unicode_for_display(self.path) yield 'path', unicode_path yield 'file_name', basename(unicode_path) extension = splitext(unicode_path)[1] if extension: yield 'ext', extension[1:] # skip the period # We store both the contents of textual images twice so that they can # both show up in searches and be previewed in the browser. if is_binary_image(self.path) or is_textual_image(self.path): # If the file was binary, then contents are None, so read it here. if self.contents is None: with open(self.absolute_path(), 'rb') as image_file: self.contents = image_file.read() bytestring = (self.contents.encode('utf-8') if self.contains_text() else self.contents) yield 'raw_data', b64encode(bytestring) # binary, but not an image elif not self.contains_text(): yield 'is_binary', True # Find the last modified time from version control if possible, # otherwise fall back to the timestamp from stat'ing the file. modified = None if self.vcs: vcs_relative_path = relpath(self.absolute_path(), self.vcs.get_root_dir()) try: modified = self.vcs.last_modified_date(vcs_relative_path) except NotImplementedError: pass if modified is None: file_info = stat(self.absolute_path()) modified = datetime.utcfromtimestamp(file_info.st_mtime) yield 'modified', modified
def raw(tree, path): """Send raw data at path from tree, for binary things like images.""" if not is_binary_image(path) and not is_textual_image(path): raise NotFound query = { 'filter': { 'term': { 'path': path } } } results = current_app.es.search( query, index=es_alias_or_not_found(tree), doc_type=FILE, size=1) try: # we explicitly get index 0 because there should be exactly 1 result data = results['hits']['hits'][0]['_source']['raw_data'][0] except IndexError: # couldn't find the image raise NotFound data_file = StringIO(data.decode('base64')) return send_file(data_file, mimetype=guess_type(path)[0])
def _browse_file(tree, path, line_docs, file_doc, config, is_binary, date=None, contents=None, image_rev=None): """Return a rendered page displaying a source file. :arg string tree: name of tree on which file is found :arg string path: relative path from tree root of file :arg list line_docs: LINE documents as defined in the mapping of core.py, where the `content` field is dereferenced :arg file_doc: the FILE document as defined in core.py :arg config: TreeConfig object of this tree :arg is_binary: Whether file is binary or not :arg date: a formatted string representing the generated date, default to now :arg string contents: the contents of the source file, defaults to joining the `content` field of all line_docs :arg image_rev: revision number of a textual or binary image, for images displayed at a certain rev """ def process_link_templates(sections): """Look for {{line}} in the links of given sections, and duplicate them onto a 'template' field. """ for section in sections: for link in section['items']: if '{{line}}' in link['href']: link['template'] = link['href'] link['href'] = link['href'].replace('{{line}}', '') def sidebar_links(sections): """Return data structure to build nav sidebar from. :: [('Section Name', [{'icon': ..., 'title': ..., 'href': ...}])] """ process_link_templates(sections) # Sort by order, resolving ties by section name: return sorted(sections, key=lambda section: (section['order'], section['heading'])) if not date: # Then assume that the file is generated now. Remark: we can't use this # as the default param because that is only evaluated once, so the same # time would always be used. date = datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S +0000") common = _build_common_file_template(tree, path, is_binary, date, config) links = file_doc.get('links', []) if is_binary_image(path): return render_template( 'image_file.html', **merge(common, { 'sections': sidebar_links(links), 'revision': image_rev})) elif is_binary: return render_template( 'text_file.html', **merge(common, { 'lines': [], 'is_binary': True, 'sections': sidebar_links(links)})) else: # We concretize the lines into a list because we iterate over it multiple times lines = [doc['content'] for doc in line_docs] if not contents: # If contents are not provided, we can reconstruct them by # stitching the lines together. contents = ''.join(lines) offsets = build_offset_map(lines) tree_config = config.trees[tree] if is_textual_image(path) and image_rev: # Add a link to view textual images on revs: links.extend(dictify_links([ (4, 'Image', [('svgview', 'View', url_for('.raw_rev', tree=tree_config.name, path=path, revision=image_rev))])])) # Construct skimmer objects for all enabled plugins that define a # file_to_skim class. skimmers = [plugin.file_to_skim(path, contents, plugin.name, tree_config, file_doc, line_docs) for plugin in tree_config.enabled_plugins if plugin.file_to_skim] skim_links, refses, regionses, annotationses = skim_file(skimmers, len(line_docs)) index_refs = (Ref.es_to_triple(ref, tree_config) for ref in chain.from_iterable(doc.get('refs', []) for doc in line_docs)) index_regions = (Region.es_to_triple(region) for region in chain.from_iterable(doc.get('regions', []) for doc in line_docs)) tags = finished_tags(lines, chain(chain.from_iterable(refses), index_refs), chain(chain.from_iterable(regionses), index_regions)) return render_template( 'text_file.html', **merge(common, { # Someday, it would be great to stream this and not concretize # the whole thing in RAM. The template will have to quit # looping through the whole thing 3 times. 'lines': [(html_line(doc['content'], tags_in_line, offset), doc.get('annotations', []) + skim_annotations) for doc, tags_in_line, offset, skim_annotations in izip(line_docs, tags_per_line(tags), offsets, annotationses)], 'sections': sidebar_links(links + skim_links), 'query': request.args.get('q', ''), 'bubble': request.args.get('redirect_type')}))
def _browse_file(tree, path, line_docs, file_doc, config, date=None, contents=None): """Return a rendered page displaying a source file. :arg string tree: name of tree on which file is found :arg string path: relative path from tree root of file :arg list line_docs: LINE documents as defined in the mapping of core.py, where the `content` field is dereferenced :arg file_doc: the FILE document as defined in core.py :arg config: TreeConfig object of this tree :arg date: a formatted string representing the generated date, default to now :arg string contents: the contents of the source file, defaults to joining the `content` field of all line_docs """ def sidebar_links(sections): """Return data structure to build nav sidebar from. :: [('Section Name', [{'icon': ..., 'title': ..., 'href': ...}])] """ # Sort by order, resolving ties by section name: return sorted(sections, key=lambda section: (section['order'], section['heading'])) if not date: # Then assume that the file is generated now. Remark: we can't use this # as the default param because that is only evaluated once, so the same # time would always be used. date = datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S +0000") common = _build_common_file_template(tree, path, date, config) links = file_doc.get('links', []) if is_binary_image(path): return render_template('image_file.html', **common) else: # We don't allow browsing binary files, so this must be a text file. # We concretize the lines into a list because we iterate over it multiple times lines = [doc['content'] for doc in line_docs] if not contents: # If contents are not provided, we can reconstruct them by # stitching the lines together. contents = ''.join(lines) offsets = cumulative_sum(imap(len, lines)) tree_config = config.trees[tree] # Construct skimmer objects for all enabled plugins that define a # file_to_skim class. skimmers = [ plugin.file_to_skim(path, contents, plugin.name, tree_config, file_doc, line_docs) for plugin in tree_config.enabled_plugins if plugin.file_to_skim ] skim_links, refses, regionses, annotationses = skim_file( skimmers, len(line_docs)) index_refs = (Ref.es_to_triple(ref, tree_config) for ref in chain.from_iterable( doc.get('refs', []) for doc in line_docs)) index_regions = (Region.es_to_triple(region) for region in chain.from_iterable( doc.get('regions', []) for doc in line_docs)) tags = finished_tags( lines, chain(chain.from_iterable(refses), index_refs), chain(chain.from_iterable(regionses), index_regions)) return render_template( 'text_file.html', **merge( common, { # Someday, it would be great to stream this and not concretize # the whole thing in RAM. The template will have to quit # looping through the whole thing 3 times. 'lines': [(html_line(doc['content'], tags_in_line, offset), doc.get('annotations', []) + skim_annotations) for doc, tags_in_line, offset, skim_annotations in izip(line_docs, tags_per_line(tags), offsets, annotationses)], 'is_text': True, 'sections': sidebar_links(links + skim_links) }))
def _browse_file(tree, path, line_docs, file_doc, config, date=None, contents=None): """Return a rendered page displaying a source file. :arg string tree: name of tree on which file is found :arg string path: relative path from tree root of file :arg list line_docs: LINE documents as defined in the mapping of core.py, where the `content` field is dereferenced :arg file_doc: the FILE document as defined in core.py :arg config: TreeConfig object of this tree :arg date: a formatted string representing the generated date, default to now :arg string contents: the contents of the source file, defaults to joining the `content` field of all line_docs """ def sidebar_links(sections): """Return data structure to build nav sidebar from. :: [('Section Name', [{'icon': ..., 'title': ..., 'href': ...}])] """ # Sort by order, resolving ties by section name: return sorted(sections, key=lambda section: (section['order'], section['heading'])) if not date: # Then assume that the file is generated now. Remark: we can't use this # as the default param because that is only evaluated once, so the same # time would always be used. date = datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S +0000") common = _build_common_file_template(tree, path, date, config) links = file_doc.get('links', []) if is_binary_image(path): return render_template( 'image_file.html', **common) else: # We don't allow browsing binary files, so this must be a text file. # We concretize the lines into a list because we iterate over it multiple times lines = [doc['content'] for doc in line_docs] if not contents: # If contents are not provided, we can reconstruct them by # stitching the lines together. contents = ''.join(lines) offsets = cumulative_sum(imap(len, lines)) tree_config = config.trees[tree] # Construct skimmer objects for all enabled plugins that define a # file_to_skim class. skimmers = [plugin.file_to_skim(path, contents, plugin.name, tree_config, file_doc, line_docs) for plugin in tree_config.enabled_plugins if plugin.file_to_skim] skim_links, refses, regionses, annotationses = skim_file(skimmers, len(line_docs)) index_refs = (Ref.es_to_triple(ref, tree_config) for ref in chain.from_iterable(doc.get('refs', []) for doc in line_docs)) index_regions = (Region.es_to_triple(region) for region in chain.from_iterable(doc.get('regions', []) for doc in line_docs)) tags = finished_tags(lines, chain(chain.from_iterable(refses), index_refs), chain(chain.from_iterable(regionses), index_regions)) return render_template( 'text_file.html', **merge(common, { # Someday, it would be great to stream this and not concretize # the whole thing in RAM. The template will have to quit # looping through the whole thing 3 times. 'lines': [(html_line(doc['content'], tags_in_line, offset), doc.get('annotations', []) + skim_annotations) for doc, tags_in_line, offset, skim_annotations in izip(line_docs, tags_per_line(tags), offsets, annotationses)], 'is_text': True, 'sections': sidebar_links(links + skim_links)}))