Example #1
0
    def create_from_data(self,
                         repository,
                         diff_file_name,
                         diff_file_contents,
                         parent_diff_file_name=None,
                         parent_diff_file_contents=None,
                         diffset_history=None,
                         basedir=None,
                         request=None,
                         base_commit_id=None,
                         check_existence=True,
                         validate_only=False,
                         **kwargs):
        """Create a DiffSet from raw diff data.

        This parses a diff and optional parent diff covering one or more files,
        validates, and constructs :py:class:`DiffSets
        <reviewboard.diffviewer.models.DiffSet>` and :py:class:`FileDiffs
        <reviewboard.diffviewer.models.FileDiff>` representing the diff.

        This can optionally validate the diff without saving anything to the
        database. In this case, no value will be returned. Instead, callers
        should take any result as success.

        Args:
            repository (reviewboard.scmtools.models.Repository):
                The repository the diff applies to.

            diff_file_name (unicode):
                The filename of the main diff file.

            diff_file_contents (bytes):
                The contents of the main diff file.

            parent_diff_file_name (unicode, optional):
                The filename of the parent diff, if one is provided.

            parent_diff_file_contents (bytes, optional):
                The contents of the parent diff, if one is provided.

            diffset_history (reviewboard.diffviewer.models.DiffSetHistory, optional):
                The history object to associate the DiffSet with. This is
                not required if using ``validate_only=True``.

            basedir (unicode, optional):
                The base directory to prepend to all file paths in the diff.

            request (django.http.HttpRequest, optional):
                The current HTTP request, if any. This will result in better
                logging.

            base_commit_id (unicode, optional):
                The ID of the commit that the diff is based upon. This is
                needed by some SCMs or hosting services to properly look up
                files, if the diffs represent blob IDs instead of commit IDs
                and the service doesn't support those lookups.

            check_existence (bool, optional):
                Whether to check for file existence as part of the validation
                process. This defaults to ``True``.

            validate_only (bool, optional):
                Whether to just validate and not save. If ``True``, then this
                won't populate the database at all and will return ``None``
                upon success. This defaults to ``False``.

        Returns:
            reviewboard.diffviewer.models.DiffSet:
            The resulting DiffSet stored in the database, if processing
            succeeded and ``validate_only=False``.

        Raises:
            reviewboard.diffviewer.errors.DiffParserError:
                There was an error parsing the main diff or parent diff.

            reviewboard.diffviewer.errors.EmptyDiffError:
                The provided diff file did not contain any file changes.

            reviewboard.scmtools.core.FileNotFoundError:
                A file specified in the diff could not be found in the
                repository.

            reviewboard.scmtools.core.SCMError:
                There was an error talking to the repository when validating
                the existence of a file.

            reviewboard.scmtools.git.ShortSHA1Error:
                A SHA1 specified in the diff was in the short form, which
                could not be used to look up the file. This is applicable only
                to Git.
        """
        from reviewboard.diffviewer.diffutils import convert_to_unicode
        from reviewboard.diffviewer.models import FileDiff

        if 'save' in kwargs:
            warnings.warn(
                'The save parameter to '
                'DiffSet.objects.create_from_data is deprecated. '
                'Please set validate_only instead.', DeprecationWarning)
            validate_only = not kwargs['save']

        tool = repository.get_scmtool()
        parser = tool.get_parser(diff_file_contents)

        files = list(
            self._process_files(parser,
                                basedir,
                                repository,
                                base_commit_id,
                                request,
                                check_existence=check_existence
                                and not parent_diff_file_contents))

        # Parse the diff
        if len(files) == 0:
            raise EmptyDiffError(_("The diff file is empty"))

        # Sort the files so that header files come before implementation.
        files.sort(cmp=self._compare_files, key=lambda f: f.origFile)

        # Parse the parent diff
        parent_files = {}

        # This is used only for tools like Mercurial that use atomic changeset
        # IDs to identify all file versions but not individual file version
        # IDs.
        parent_commit_id = None

        if parent_diff_file_contents:
            diff_filenames = set([f.origFile for f in files])

            parent_parser = tool.get_parser(parent_diff_file_contents)

            # If the user supplied a base diff, we need to parse it and
            # later apply each of the files that are in the main diff
            for f in self._process_files(parent_parser,
                                         basedir,
                                         repository,
                                         base_commit_id,
                                         request,
                                         check_existence=check_existence,
                                         limit_to=diff_filenames):
                parent_files[f.newFile] = f

            # This will return a non-None value only for tools that use
            # commit IDs to identify file versions as opposed to file revision
            # IDs.
            parent_commit_id = parent_parser.get_orig_commit_id()

        diffset = self.model(name=diff_file_name,
                             revision=0,
                             basedir=basedir,
                             history=diffset_history,
                             repository=repository,
                             diffcompat=DiffCompatVersion.DEFAULT,
                             base_commit_id=base_commit_id)

        if not validate_only:
            diffset.save()

        encoding_list = repository.get_encoding_list()
        filediffs = []

        for f in files:
            parent_file = None
            orig_rev = None
            parent_content = b''

            if f.origFile in parent_files:
                parent_file = parent_files[f.origFile]
                parent_content = parent_file.data
                orig_rev = parent_file.origInfo

            # If there is a parent file there is not necessarily an original
            # revision for the parent file in the case of a renamed file in
            # git.
            if not orig_rev:
                if parent_commit_id and f.origInfo != PRE_CREATION:
                    orig_rev = parent_commit_id
                else:
                    orig_rev = f.origInfo

            enc, orig_file = convert_to_unicode(f.origFile, encoding_list)
            enc, dest_file = convert_to_unicode(f.newFile, encoding_list)

            if f.deleted:
                status = FileDiff.DELETED
            elif f.moved:
                status = FileDiff.MOVED
            elif f.copied:
                status = FileDiff.COPIED
            else:
                status = FileDiff.MODIFIED

            filediff = FileDiff(
                diffset=diffset,
                source_file=parser.normalize_diff_filename(orig_file),
                dest_file=parser.normalize_diff_filename(dest_file),
                source_revision=smart_unicode(orig_rev),
                dest_detail=f.newInfo,
                binary=f.binary,
                status=status)

            filediff.extra_data = {
                'is_symlink': f.is_symlink,
            }

            if (parent_file and (parent_file.moved or parent_file.copied)
                    and parent_file.insert_count == 0
                    and parent_file.delete_count == 0):
                filediff.extra_data['parent_moved'] = True

            if not validate_only:
                # This state all requires making modifications to the database.
                # We only want to do this if we're saving.
                filediff.diff = f.data
                filediff.parent_diff = parent_content

                filediff.set_line_counts(raw_insert_count=f.insert_count,
                                         raw_delete_count=f.delete_count)

                filediffs.append(filediff)

        if validate_only:
            return None

        if filediffs:
            FileDiff.objects.bulk_create(filediffs)

        return diffset
Example #2
0
    def create_from_data(self,
                         repository,
                         diff_file_name,
                         diff_file_contents,
                         parent_diff_file_name,
                         parent_diff_file_contents,
                         diffset_history,
                         basedir,
                         request,
                         base_commit_id=None,
                         save=True):
        """Create a DiffSet from raw diff data.

        The diff_file_contents and parent_diff_file_contents parameters are
        strings with the actual diff contents.
        """
        from reviewboard.diffviewer.diffutils import convert_to_unicode
        from reviewboard.diffviewer.models import FileDiff

        tool = repository.get_scmtool()

        parser = tool.get_parser(diff_file_contents)

        files = list(
            self._process_files(
                parser,
                basedir,
                repository,
                base_commit_id,
                request,
                check_existence=(not parent_diff_file_contents)))

        # Parse the diff
        if len(files) == 0:
            raise EmptyDiffError(_("The diff file is empty"))

        # Sort the files so that header files come before implementation.
        files.sort(cmp=self._compare_files, key=lambda f: f.origFile)

        # Parse the parent diff
        parent_files = {}

        # This is used only for tools like Mercurial that use atomic changeset
        # IDs to identify all file versions but not individual file version
        # IDs.
        parent_commit_id = None

        if parent_diff_file_contents:
            diff_filenames = set([f.origFile for f in files])

            parent_parser = tool.get_parser(parent_diff_file_contents)

            # If the user supplied a base diff, we need to parse it and
            # later apply each of the files that are in the main diff
            for f in self._process_files(parent_parser,
                                         basedir,
                                         repository,
                                         base_commit_id,
                                         request,
                                         check_existence=True,
                                         limit_to=diff_filenames):
                parent_files[f.newFile] = f

            # This will return a non-None value only for tools that use
            # commit IDs to identify file versions as opposed to file revision
            # IDs.
            parent_commit_id = parent_parser.get_orig_commit_id()

        diffset = self.model(name=diff_file_name,
                             revision=0,
                             basedir=basedir,
                             history=diffset_history,
                             repository=repository,
                             diffcompat=DiffCompatVersion.DEFAULT,
                             base_commit_id=base_commit_id)

        if save:
            diffset.save()

        encoding_list = repository.get_encoding_list()

        for f in files:
            parent_file = None
            orig_rev = None
            parent_content = b''

            if f.origFile in parent_files:
                parent_file = parent_files[f.origFile]
                parent_content = parent_file.data
                orig_rev = parent_file.origInfo

            # If there is a parent file there is not necessarily an original
            # revision for the parent file in the case of a renamed file in
            # git.
            if not orig_rev:
                if parent_commit_id and f.origInfo != PRE_CREATION:
                    orig_rev = parent_commit_id
                else:
                    orig_rev = f.origInfo

            enc, orig_file = convert_to_unicode(f.origFile, encoding_list)
            enc, dest_file = convert_to_unicode(f.newFile, encoding_list)

            if f.deleted:
                status = FileDiff.DELETED
            elif f.moved:
                status = FileDiff.MOVED
            elif f.copied:
                status = FileDiff.COPIED
            else:
                status = FileDiff.MODIFIED

            filediff = FileDiff(
                diffset=diffset,
                source_file=parser.normalize_diff_filename(orig_file),
                dest_file=parser.normalize_diff_filename(dest_file),
                source_revision=smart_unicode(orig_rev),
                dest_detail=f.newInfo,
                diff=f.data,
                parent_diff=parent_content,
                binary=f.binary,
                status=status)

            if (parent_file and (parent_file.moved or parent_file.copied)
                    and parent_file.insert_count == 0
                    and parent_file.delete_count == 0):
                filediff.extra_data = {'parent_moved': True}

            filediff.set_line_counts(raw_insert_count=f.insert_count,
                                     raw_delete_count=f.delete_count)

            if save:
                filediff.save()

        return diffset
def _prepare_file_list(diff_file_contents, parent_diff_file_contents,
                       repository, request, basedir, check_existence,
                       get_file_exists=None, base_commit_id=None):
    """Extract the list of files from the diff.

    Args:
        diff_file_contents (bytes):
            The contents of the diff.

        parent_diff_file_contents (bytes):
            The contents of the parent diff, if any.

        repository (reviewboard.scmtools.models.Repository):
            The repository against which the diff was created.

        request (django.http.HttpRequest):
            The current HTTP request.

        basedir (unicode):
            The base directory to prepend to all file paths in the diff.

        check_existence (bool):
            Whether or not existence checks should be performed against
            the upstream repository.

        get_file_exists (callable, optional):
            A callable to use to determine if a file exists in the repository.

            This argument must be provided if ``check_existence`` is ``True``.

        base_commit_id (unicode, optional):
            The ID of the commit that the diff is based upon. This is
            needed by some SCMs or hosting services to properly look up
            files, if the diffs represent blob IDs instead of commit IDs
            and the service doesn't support those lookups.

    Returns:
        tuple:
        A tuple of the following:

        * The files in the diff. (:py:class:`list` of
          :py:class:`ParsedDiffFile`)
        * The diff parser.
          (:py:class:`reviewboard.diffviewer.parser.DiffParser`)
        * The parent commit ID or ``None`` if not applicable.
          (:py:class:`unicode`)
        * A dictionary of files in the parent diff. (:py:class:`dict`)

    Raises:
        reviewboard.diffviewer.errors.EmptyDiffError:
            The diff contains no files.

        ValueError:
            ``check_existence`` was ``True`` but ``get_file_exists`` was not
            provided.
    """
    if check_existence and get_file_exists is None:
        raise ValueError('Must provide get_file_exists when check_existence '
                         'is True')

    tool = repository.get_scmtool()
    parser = tool.get_parser(diff_file_contents)
    files = list(_process_files(
        parser=parser,
        basedir=basedir,
        repository=repository,
        base_commit_id=base_commit_id,
        request=request,
        check_existence=(check_existence and
                         not parent_diff_file_contents),
        get_file_exists=get_file_exists))

    if len(files) == 0:
        raise EmptyDiffError(_('The diff is empty.'))

    # Sort the files so that header files come before implementation
    # files.
    files.sort(cmp=_compare_files, key=lambda f: f.origFile)

    parent_files = {}

    # This is used only for tools like Mercurial that use atomic changeset
    # IDs to identify all file versions. but not individual file version
    # IDs.
    parent_commit_id = None

    if parent_diff_file_contents:
        diff_filenames = {f.origFile for f in files}
        parent_parser = tool.get_parser(parent_diff_file_contents)

        # If the user supplied a base diff, we need to parse it and later
        # apply each of the files that are in main diff.
        parent_files = {
            f.newFile: f
            for f in _process_files(
                get_file_exists=get_file_exists,
                parser=parent_parser,
                basedir=basedir,
                repository=repository,
                base_commit_id=base_commit_id,
                request=request,
                check_existence=check_existence,
                limit_to=diff_filenames)
        }

        # This will return a non-None value only for tools that use commit
        # IDs to identify file versions as opposed to file revision IDs.
        parent_commit_id = parent_parser.get_orig_commit_id()

    return files, parser, parent_commit_id, parent_files
Example #4
0
    def create_from_data(self,
                         repository,
                         diff_file_name,
                         diff_file_contents,
                         parent_diff_file_name,
                         parent_diff_file_contents,
                         diffset_history,
                         basedir,
                         request,
                         base_commit_id=None,
                         save=True):
        """Create a DiffSet from raw diff data.

        The diff_file_contents and parent_diff_file_contents parameters are
        strings with the actual diff contents.
        """
        from reviewboard.diffviewer.diffutils import convert_to_unicode
        from reviewboard.diffviewer.models import FileDiff

        tool = repository.get_scmtool()

        encoding, diff_text = convert_to_unicode(
            diff_file_contents, repository.get_encoding_list())
        parser = tool.get_parser(diff_text)

        files = list(
            self._process_files(
                parser,
                basedir,
                repository,
                base_commit_id,
                request,
                check_existence=(not parent_diff_file_contents)))

        # Parse the diff
        if len(files) == 0:
            raise EmptyDiffError(_("The diff file is empty"))

        # Sort the files so that header files come before implementation.
        files.sort(cmp=self._compare_files, key=lambda f: f.origFile)

        # Parse the parent diff
        parent_files = {}

        # This is used only for tools like Mercurial that use atomic changeset
        # IDs to identify all file versions but not individual file version
        # IDs.
        parent_commit_id = None

        if parent_diff_file_contents:
            diff_filenames = set([f.origFile for f in files])

            parent_parser = tool.get_parser(
                convert_to_unicode(parent_diff_file_contents, [encoding])[1])

            # If the user supplied a base diff, we need to parse it and
            # later apply each of the files that are in the main diff
            for f in self._process_files(parent_parser,
                                         basedir,
                                         repository,
                                         base_commit_id,
                                         request,
                                         check_existence=True,
                                         limit_to=diff_filenames):
                parent_files[f.origFile] = f

            # This will return a non-None value only for tools that use
            # commit IDs to identify file versions as opposed to file revision
            # IDs.
            parent_commit_id = parent_parser.get_orig_commit_id()

        diffset = super(DiffSetManager,
                        self).create(name=diff_file_name,
                                     revision=0,
                                     basedir=basedir,
                                     history=diffset_history,
                                     repository=repository,
                                     diffcompat=DiffCompatVersion.DEFAULT,
                                     base_commit_id=base_commit_id)

        if save:
            diffset.save()

        for f in files:
            if f.origFile in parent_files:
                parent_file = parent_files[f.origFile]
                parent_content = parent_file.data.encode(encoding)
                source_rev = parent_file.origInfo
            else:
                parent_content = b""

                if parent_commit_id and f.origInfo != PRE_CREATION:
                    source_rev = parent_commit_id
                else:
                    source_rev = f.origInfo

            dest_file = os.path.join(basedir, f.newFile).replace("\\", "/")

            if f.deleted:
                status = FileDiff.DELETED
            elif f.moved:
                status = FileDiff.MOVED
            elif f.copied:
                status = FileDiff.COPIED
            else:
                status = FileDiff.MODIFIED

            filediff = FileDiff(diffset=diffset,
                                source_file=f.origFile,
                                dest_file=dest_file,
                                source_revision=smart_unicode(source_rev),
                                dest_detail=f.newInfo,
                                diff=f.data.encode(encoding),
                                parent_diff=parent_content,
                                binary=f.binary,
                                status=status)
            filediff.set_line_counts(raw_insert_count=f.insert_count,
                                     raw_delete_count=f.delete_count)

            if save:
                filediff.save()

        return diffset
def _prepare_diff_info(diff_file_contents,
                       parent_diff_file_contents,
                       repository,
                       request,
                       basedir,
                       check_existence,
                       get_file_exists=None,
                       base_commit_id=None):
    """Extract information and files from a diff.

    Args:
        diff_file_contents (bytes):
            The contents of the diff.

        parent_diff_file_contents (bytes):
            The contents of the parent diff, if any.

        repository (reviewboard.scmtools.models.Repository):
            The repository against which the diff was created.

        request (django.http.HttpRequest):
            The current HTTP request.

        basedir (unicode):
            The base directory to prepend to all file paths in the diff.

        check_existence (bool):
            Whether or not existence checks should be performed against
            the upstream repository.

        get_file_exists (callable, optional):
            A callable to use to determine if a file exists in the repository.

            This argument must be provided if ``check_existence`` is ``True``.

        base_commit_id (unicode, optional):
            The ID of the commit that the diff is based upon. This is
            needed by some SCMs or hosting services to properly look up
            files, if the diffs represent blob IDs instead of commit IDs
            and the service doesn't support those lookups.

    Returns:
        dict:
        A dictionary of information about the diff and parser. This contains
        the following keys:

        ``files`` (:py:class:`list` of
        :py:class:`reviewboard.diffviewer.parser.ParsedDiffFile):
            All parsed files in the diff.

        ``parent_commit_id`` (:py:class:`unicode`):
            The ID of the parent commit, if any.

        ``parent_files`` (:py:class:`dict`):
            A mapping of modified filenames from ``files`` (:py:class:`bytes`)
            to :py:class:`reviewboard.diffviewer.parser.ParsedDiffFile`
            instances.

        ``parsed_diff`` (:py:class:`ParsedDiff`):
            The parsed diff file.

        ``parsed_parent_diff`` (:py:class:`ParsedDiff`):
            The parsed diff file for the parent diff.

        ``parser`` (:py:class:`BaseDiffParser`):
            The parent diff file.

    Raises:
        reviewboard.diffviewer.errors.EmptyDiffError:
            The diff contains no files.

        ValueError:
            ``check_existence`` was ``True`` but ``get_file_exists`` was not
            provided.
    """
    if check_existence and get_file_exists is None:
        raise ValueError('Must provide get_file_exists when check_existence '
                         'is True')

    tool = repository.get_scmtool()
    parsed_diff = _parse_diff(tool, diff_file_contents)

    files = list(
        _process_files(parsed_diff=parsed_diff,
                       basedir=basedir,
                       repository=repository,
                       base_commit_id=base_commit_id,
                       request=request,
                       check_existence=(check_existence
                                        and not parent_diff_file_contents),
                       get_file_exists=get_file_exists))

    if len(files) == 0:
        raise EmptyDiffError(_('The diff is empty.'))

    # Sort the files so that header files come before implementation
    # files.
    files.sort(key=cmp_to_key(_compare_files))

    parsed_parent_diff = None
    parent_files = {}

    if parent_diff_file_contents:
        diff_filenames = {f.orig_filename for f in files}
        parsed_parent_diff = _parse_diff(tool, parent_diff_file_contents)

        # If the user supplied a base diff, we need to parse it and later
        # apply each of the files that are in main diff.
        parent_files = {
            f.modified_filename: f
            for f in _process_files(get_file_exists=get_file_exists,
                                    parsed_diff=parsed_parent_diff,
                                    basedir=basedir,
                                    repository=repository,
                                    base_commit_id=base_commit_id,
                                    request=request,
                                    check_existence=check_existence,
                                    limit_to=diff_filenames)
        }

    return {
        'files': files,
        'parent_files': parent_files,
        'parsed_diff': parsed_diff,
        'parsed_parent_diff': parsed_parent_diff,
        'parser': parsed_diff.parser,
    }