def links(self): """Add a link from this file to another in the same folder with a matching extension, when such a file exists. """ def dual_exts_for(ext): if ext in self.ext_pairings[0].exts: return self.ext_pairings[1] if ext in self.ext_pairings[1].exts: return self.ext_pairings[0] return _TitledExts((), '') def is_indexed(path): if any(fnmatchcase(basename(path), e) for e in self.tree.ignore_filenames): return False if any(fnmatchcase('/' + path.replace(os.sep, '/'), e) for e in self.tree.ignore_paths): return False return True path_no_ext, ext = splitext(self.path) dual_exts = dual_exts_for(ext) for dual_ext in dual_exts.exts: dual_path = path_no_ext + dual_ext if (isfile(join(self.tree.source_folder, dual_path)) and is_indexed(dual_path)): yield (4, dual_exts.title, [(icon(dual_path), unicode_for_display(basename(dual_path)), browse_file_url(self.tree.name, unicode_for_display(dual_path)))]) # Todo? this 'break' breaks handling of multiple extension # pairings on the same basename. break
def docs(): """Yield documents for bulk indexing. Big Warning: docs also clears the contents of all elements of needles_by_line because they will no longer be used. """ # Index a doc of type 'file' so we can build folder listings. # At the moment, we send to ES in the same worker that does the # indexing. We could interpose an external queueing system, but I'm # willing to potentially sacrifice a little speed here for the easy # management of self-throttling. file_info = stat(path) folder_name, file_name = split(rel_path) # Hard-code the keys that are hard-coded in the browse() # controller. Merge with the pluggable ones from needles: doc = dict(# Some non-array fields: folder=unicode_for_display(folder_name), name=unicode_for_display(file_name), size=file_info.st_size, is_folder=False, # And these, which all get mashed into arrays: **needles) links = dictify_links(chain.from_iterable(linkses)) if links: doc['links'] = links yield es.index_op(doc, doc_type=FILE) # Index all the lines. if index_by_line: for total, annotations_for_this_line, tags in izip( needles_by_line, annotations_by_line, es_lines(finished_tags(lines, chain.from_iterable(refses), chain.from_iterable(regionses)))): # Duplicate the file-wide needles into this line: total.update(needles) # We bucket tags into refs and regions for ES because later at # request time we want to be able to merge them individually # with those from skimmers. refs_and_regions = bucket(tags, lambda index_obj: "regions" if isinstance(index_obj['payload'], basestring) else "refs") if 'refs' in refs_and_regions: total['refs'] = refs_and_regions['refs'] if 'regions' in refs_and_regions: total['regions'] = refs_and_regions['regions'] if annotations_for_this_line: total['annotations'] = annotations_for_this_line yield es.index_op(total) # Because needles_by_line holds a reference, total is not # garbage collected. Since we won't use it again, we can clear # the contents, saving substantial memory on long files. total.clear()
def docs(): """Yield documents for bulk indexing. Big Warning: docs also clears the contents of all elements of needles_by_line because they will no longer be used. """ # Index a doc of type 'file' so we can build folder listings. # At the moment, we send to ES in the same worker that does the # indexing. We could interpose an external queueing system, but I'm # willing to potentially sacrifice a little speed here for the easy # management of self-throttling. file_info = stat(path) folder_name, file_name = split(rel_path) # Hard-code the keys that are hard-coded in the browse() # controller. Merge with the pluggable ones from needles: doc = dict( # Some non-array fields: folder=unicode_for_display(folder_name), name=unicode_for_display(file_name), size=file_info.st_size, is_folder=False, # And these, which all get mashed into arrays: **needles) links = dictify_links(chain.from_iterable(linkses)) if links: doc['links'] = links yield es.index_op(doc, doc_type=FILE) # Index all the lines. if index_by_line: for total, annotations_for_this_line, tags in izip( needles_by_line, annotations_by_line, es_lines( finished_tags(lines, chain.from_iterable(refses), chain.from_iterable(regionses)))): # Duplicate the file-wide needles into this line: total.update(needles) # We bucket tags into refs and regions for ES because later at # request time we want to be able to merge them individually # with those from skimmers. refs_and_regions = bucket( tags, lambda index_obj: "regions" if isinstance( index_obj['payload'], basestring) else "refs") if 'refs' in refs_and_regions: total['refs'] = refs_and_regions['refs'] if 'regions' in refs_and_regions: total['regions'] = refs_and_regions['regions'] if annotations_for_this_line: total['annotations'] = annotations_for_this_line yield es.index_op(total) # Because needles_by_line holds a reference, total is not # garbage collected. Since we won't use it again, we can clear # the contents, saving substantial memory on long files. total.clear()
def links(self): if self.vcs: vcs_relative_path = relpath(self.absolute_path(), self.vcs.get_root_dir()) yield (5, '%s (%s)' % (self.vcs.get_vcs_name(), self.vcs.display_rev(vcs_relative_path)), [('permalink', 'Permalink', url_for('.rev', tree=self.tree.name, revision=self.vcs.revision, path=unicode_for_display(self.path)))]) else: yield 5, 'Untracked file', [] if is_textual_image(self.path): yield (4, 'Image', [('svgview', 'View', url_for('.raw', tree=self.tree.name, path=unicode_for_display(self.path)))])
def needles(self): rel_path = relpath(self.path, self.tree.source_folder) # Convert from bag of bytes to unicode, which ES demands and the web # likes: rel_path = unicode_for_display(rel_path) superfolder_path, folder_name = split(rel_path) return [ ('path', [rel_path]), # array for consistency with non-folder file docs ('folder', superfolder_path), ('name', folder_name) ]
def needles(self): """Fill out path (and path.trigrams).""" if self.is_link(): # realpath will keep following symlinks until it gets to the 'real' thing. yield 'link', relpath(realpath(self.absolute_path()), self.tree.source_folder) unicode_path = unicode_for_display(self.path) yield 'path', unicode_path yield 'file_name', basename(unicode_path) extension = splitext(unicode_path)[1] if extension: yield 'ext', extension[1:] # skip the period # We store both the contents of textual images twice so that they can # both show up in searches and be previewed in the browser. if is_binary_image(self.path) or is_textual_image(self.path): # If the file was binary, then contents are None, so read it here. if self.contents is None: with open(self.absolute_path(), 'rb') as image_file: self.contents = image_file.read() bytestring = (self.contents.encode('utf-8') if self.contains_text() else self.contents) yield 'raw_data', b64encode(bytestring) # binary, but not an image elif not self.contains_text(): yield 'is_binary', True # Find the last modified time from version control if possible, # otherwise fall back to the timestamp from stat'ing the file. modified = None if self.vcs: vcs_relative_path = relpath(self.absolute_path(), self.vcs.get_root_dir()) try: modified = self.vcs.last_modified_date(vcs_relative_path) except NotImplementedError: pass if modified is None: file_info = stat(self.absolute_path()) modified = datetime.utcfromtimestamp(file_info.st_mtime) yield 'modified', modified