Beispiel #1
0
    def links(self):
        """Add a link from this file to another in the same folder with a
        matching extension, when such a file exists.
        """
        def dual_exts_for(ext):
            if ext in self.ext_pairings[0].exts:
                return self.ext_pairings[1]
            if ext in self.ext_pairings[1].exts:
                return self.ext_pairings[0]
            return _TitledExts((), '')

        def is_indexed(path):
            if any(fnmatchcase(basename(path), e)
                   for e in self.tree.ignore_filenames):
                return False
            if any(fnmatchcase('/' + path.replace(os.sep, '/'), e)
                   for e in self.tree.ignore_paths):
                return False
            return True

        path_no_ext, ext = splitext(self.path)
        dual_exts = dual_exts_for(ext)
        for dual_ext in dual_exts.exts:
            dual_path = path_no_ext + dual_ext
            if (isfile(join(self.tree.source_folder, dual_path)) and
                is_indexed(dual_path)):
                yield (4,
                       dual_exts.title,
                       [(icon(dual_path),
                        unicode_for_display(basename(dual_path)),
                        browse_file_url(self.tree.name,
                                        unicode_for_display(dual_path)))])
                # Todo? this 'break' breaks handling of multiple extension
                # pairings on the same basename.
                break
Beispiel #2
0
    def links(self):
        """Add a link from this file to another in the same folder with a
        matching extension, when such a file exists.
        """
        def dual_exts_for(ext):
            if ext in self.ext_pairings[0].exts:
                return self.ext_pairings[1]
            if ext in self.ext_pairings[1].exts:
                return self.ext_pairings[0]
            return _TitledExts((), '')

        def is_indexed(path):
            if any(fnmatchcase(basename(path), e)
                   for e in self.tree.ignore_filenames):
                return False
            if any(fnmatchcase('/' + path.replace(os.sep, '/'), e)
                   for e in self.tree.ignore_paths):
                return False
            return True

        path_no_ext, ext = splitext(self.path)
        dual_exts = dual_exts_for(ext)
        for dual_ext in dual_exts.exts:
            dual_path = path_no_ext + dual_ext
            if (isfile(join(self.tree.source_folder, dual_path)) and
                is_indexed(dual_path)):
                yield (4,
                       dual_exts.title,
                       [(icon(dual_path),
                        unicode_for_display(basename(dual_path)),
                        browse_file_url(self.tree.name,
                                        unicode_for_display(dual_path)))])
                # Todo? this 'break' breaks handling of multiple extension
                # pairings on the same basename.
                break
Beispiel #3
0
    def docs():
        """Yield documents for bulk indexing.

        Big Warning: docs also clears the contents of all elements of
        needles_by_line because they will no longer be used.
        """
        # Index a doc of type 'file' so we can build folder listings.
        # At the moment, we send to ES in the same worker that does the
        # indexing. We could interpose an external queueing system, but I'm
        # willing to potentially sacrifice a little speed here for the easy
        # management of self-throttling.
        file_info = stat(path)
        folder_name, file_name = split(rel_path)
        # Hard-code the keys that are hard-coded in the browse()
        # controller. Merge with the pluggable ones from needles:
        doc = dict(# Some non-array fields:
                    folder=unicode_for_display(folder_name),
                    name=unicode_for_display(file_name),
                    size=file_info.st_size,
                    is_folder=False,

                    # And these, which all get mashed into arrays:
                    **needles)
        links = dictify_links(chain.from_iterable(linkses))
        if links:
            doc['links'] = links
        yield es.index_op(doc, doc_type=FILE)

        # Index all the lines.
        if index_by_line:
            for total, annotations_for_this_line, tags in izip(
                    needles_by_line,
                    annotations_by_line,
                    es_lines(finished_tags(lines,
                                           chain.from_iterable(refses),
                                           chain.from_iterable(regionses)))):
                # Duplicate the file-wide needles into this line:
                total.update(needles)

                # We bucket tags into refs and regions for ES because later at
                # request time we want to be able to merge them individually
                # with those from skimmers.
                refs_and_regions = bucket(tags, lambda index_obj: "regions" if
                                          isinstance(index_obj['payload'], basestring) else
                                          "refs")
                if 'refs' in refs_and_regions:
                    total['refs'] = refs_and_regions['refs']
                if 'regions' in refs_and_regions:
                    total['regions'] = refs_and_regions['regions']
                if annotations_for_this_line:
                    total['annotations'] = annotations_for_this_line
                yield es.index_op(total)

                # Because needles_by_line holds a reference, total is not
                # garbage collected. Since we won't use it again, we can clear
                # the contents, saving substantial memory on long files.
                total.clear()
Beispiel #4
0
Datei: build.py Projekt: vck/dxr
    def docs():
        """Yield documents for bulk indexing.

        Big Warning: docs also clears the contents of all elements of
        needles_by_line because they will no longer be used.
        """
        # Index a doc of type 'file' so we can build folder listings.
        # At the moment, we send to ES in the same worker that does the
        # indexing. We could interpose an external queueing system, but I'm
        # willing to potentially sacrifice a little speed here for the easy
        # management of self-throttling.
        file_info = stat(path)
        folder_name, file_name = split(rel_path)
        # Hard-code the keys that are hard-coded in the browse()
        # controller. Merge with the pluggable ones from needles:
        doc = dict(  # Some non-array fields:
            folder=unicode_for_display(folder_name),
            name=unicode_for_display(file_name),
            size=file_info.st_size,
            is_folder=False,

            # And these, which all get mashed into arrays:
            **needles)
        links = dictify_links(chain.from_iterable(linkses))
        if links:
            doc['links'] = links
        yield es.index_op(doc, doc_type=FILE)

        # Index all the lines.
        if index_by_line:
            for total, annotations_for_this_line, tags in izip(
                    needles_by_line, annotations_by_line,
                    es_lines(
                        finished_tags(lines, chain.from_iterable(refses),
                                      chain.from_iterable(regionses)))):
                # Duplicate the file-wide needles into this line:
                total.update(needles)

                # We bucket tags into refs and regions for ES because later at
                # request time we want to be able to merge them individually
                # with those from skimmers.
                refs_and_regions = bucket(
                    tags, lambda index_obj: "regions" if isinstance(
                        index_obj['payload'], basestring) else "refs")
                if 'refs' in refs_and_regions:
                    total['refs'] = refs_and_regions['refs']
                if 'regions' in refs_and_regions:
                    total['regions'] = refs_and_regions['regions']
                if annotations_for_this_line:
                    total['annotations'] = annotations_for_this_line
                yield es.index_op(total)

                # Because needles_by_line holds a reference, total is not
                # garbage collected. Since we won't use it again, we can clear
                # the contents, saving substantial memory on long files.
                total.clear()
Beispiel #5
0
Datei: core.py Projekt: na-g/dxr
    def links(self):
        if self.vcs:
            vcs_relative_path = relpath(self.absolute_path(),
                                        self.vcs.get_root_dir())
            yield (5,
                   '%s (%s)' % (self.vcs.get_vcs_name(),
                                self.vcs.display_rev(vcs_relative_path)),
                   [('permalink', 'Permalink', url_for('.rev',
                                                       tree=self.tree.name,
                                                       revision=self.vcs.revision,
                                                       path=unicode_for_display(self.path)))])
        else:
            yield 5, 'Untracked file', []

        if is_textual_image(self.path):
            yield (4,
                   'Image',
                   [('svgview', 'View', url_for('.raw',
                                                tree=self.tree.name,
                                                path=unicode_for_display(self.path)))])
Beispiel #6
0
Datei: core.py Projekt: na-g/dxr
 def needles(self):
     rel_path = relpath(self.path, self.tree.source_folder)
     # Convert from bag of bytes to unicode, which ES demands and the web
     # likes:
     rel_path = unicode_for_display(rel_path)
     superfolder_path, folder_name = split(rel_path)
     return [
         ('path', [rel_path]),  # array for consistency with non-folder file docs
         ('folder', superfolder_path),
         ('name', folder_name)
     ]
Beispiel #7
0
Datei: core.py Projekt: na-g/dxr
 def needles(self):
     """Fill out path (and path.trigrams)."""
     if self.is_link():
         # realpath will keep following symlinks until it gets to the 'real' thing.
         yield 'link', relpath(realpath(self.absolute_path()),
                               self.tree.source_folder)
     unicode_path = unicode_for_display(self.path)
     yield 'path', unicode_path
     yield 'file_name', basename(unicode_path)
     extension = splitext(unicode_path)[1]
     if extension:
         yield 'ext', extension[1:]  # skip the period
     # We store both the contents of textual images twice so that they can
     # both show up in searches and be previewed in the browser.
     if is_binary_image(self.path) or is_textual_image(self.path):
         # If the file was binary, then contents are None, so read it here.
         if self.contents is None:
             with open(self.absolute_path(), 'rb') as image_file:
                 self.contents = image_file.read()
         bytestring = (self.contents.encode('utf-8') if self.contains_text()
                       else self.contents)
         yield 'raw_data', b64encode(bytestring)
     # binary, but not an image
     elif not self.contains_text():
         yield 'is_binary', True
     # Find the last modified time from version control if possible,
     # otherwise fall back to the timestamp from stat'ing the file.
     modified = None
     if self.vcs:
         vcs_relative_path = relpath(self.absolute_path(),
                                     self.vcs.get_root_dir())
         try:
             modified = self.vcs.last_modified_date(vcs_relative_path)
         except NotImplementedError:
             pass
     if modified is None:
         file_info = stat(self.absolute_path())
         modified = datetime.utcfromtimestamp(file_info.st_mtime)
     yield 'modified', modified