예제 #1
0
파일: lib.py 프로젝트: pacospace/glyph
def classify_by_tag(
    path: str, start_tag: str, end_tag: Optional[str] = None, model: Optional[MLModel] = None
) -> List[str]:
    """Classify messages for the given repo based on tags."""
    repo_path = os.path.join(path, ".git")
    if os.path.exists(repo_path):
        repo = Repository(repo_path)
    else:
        raise RepositoryNotFoundException

    start_tag = repo.revparse_single("refs/tags/" + start_tag)

    if end_tag is None:
        end_tag = repo.revparse_single("refs/heads/master")
    else:
        end_tag = repo.revparse_single("refs/tags/" + end_tag)

    orig_messages = []
    walker = repo.walk(end_tag.id, GIT_SORT_TOPOLOGICAL)
    walker.hide(start_tag.id)

    for commit in walker:
        orig_messages.append(commit.message.lower())

    return classify_messages(orig_messages, model)
예제 #2
0
def main(args):
    repo = Repository(os.getcwd())

    start_commit = repo.revparse_single(getattr(args, "start-commit"))
    end_commit = repo.revparse_single(getattr(args, "end-commit"))
    formatter = getattr(args, "formatter")

    for commit, heading, changes in get_changes_for_commits(
            repo, start_commit, end_commit, formatter):
        print(f"{commit}: {heading}")
        print(indent(changes))
예제 #3
0
def save_git_info(path):
    repo = Repository(
        os.path.join(os.path.dirname(os.path.abspath(__file__)), "../../.."))
    info = {
        "branch": repo.head.shorthand,
        "commit_hex": repo.revparse_single('HEAD').hex,
        "commit_message": repo.revparse_single('HEAD').message,
    }

    with open(path, 'w') as f:
        json.dump(info, f)
예제 #4
0
def test_commit_to_head(git_repo: pygit2.Repository, tmp_path: Path):
    commit = git_repo.revparse_single('HEAD~3')

    call_args = [
        'git-to-freshports-xml', f'--path={git_repo.workdir}',
        f'--commit={commit.hex}', f'--output={tmp_path}',
        f'--spool={tmp_path}', '--repo=ports'
    ]

    with patch.object(sys, 'argv', call_args):
        MODULE_UNDER_TEST.main()

    result_files = list(tmp_path.iterdir())
    assert len(result_files) == 3, "Must be exactly three commits"
    assert git_repo.revparse_single('HEAD~2').hex in result_files[0].name
예제 #5
0
def sync_handler(fork_from: str, from_sha: str, repo_name: str,
                 ticket_id: int, pr_url: str):
    output_path = '{}.txt'.format(pr_url.split('/', 3)[3].rsplit('/', 2)[0])
    output_path = os.path.join(WORK_DIR, output_path.replace('/', '_'))
    work_tree = os.path.join(WORK_DIR, fork_from)
    parent_path = os.path.dirname(work_tree)
    if not os.path.exists(parent_path):
        os.makedirs(parent_path)
    if not os.path.exists(work_tree):
        repo = clone_repository(
            '{0}{1}.git'.format(GITHUB_URL, fork_from), work_tree)
    else:
        repo = Repository(work_tree)

    remote_name = repo_name.split('/')[0]
    update_remote(work_tree, repo, repo_name, remote_name)

    if remote_name == 'origin':
        commit = repo.revparse_single(from_sha)
        repo.checkout_tree(commit, strategy=GIT_CHECKOUT_FORCE)
    else:
        ref_name = 'refs/pull/{0}/head'.format(ticket_id)
        try:
            repo.create_reference(ref_name, from_sha)
        except ValueError:
            pass
        ref = repo.lookup_reference(ref_name)
        repo.checkout(ref, strategy=GIT_CHECKOUT_FORCE)
    cwd = os.getcwd()
    os.chdir(work_tree)
    subprocess.call(
        '{} . --output-file={}'.format(FLAKE8_EXECUTABLE, output_path),
        shell=True)
    os.chdir(cwd)
    return output_path
예제 #6
0
def get_docset_ids(config: Dict, manifest: Manifest,
                   only: Optional[List[str]]) -> Dict[str, str]:
    """Obtain the ids for all requested docsets.

    Args:
        config: Cache configuration.
        manifest: Repository manifest.
        only: Only consider this docset (null to consider all)

    Returns:
        State.
    """

    ids = dict()

    for entry in config:
        if only and entry["docset"] not in only:
            continue

        id = ""
        for name in entry["projects"]:
            p = next((p for p in manifest.projects if p.name == name), None)
            assert p, f"Project {name} not in manifest"

            repo = Repository(Path(p.topdir) / p.path)
            id += repo.revparse_single("HEAD").id.hex

        ids[entry["docset"]] = hashlib.sha256(id.encode("utf-8")).hexdigest()

    return ids
예제 #7
0
def new_branch(working_dir, branch_name):
    repo = Repository(working_dir + '.git')
    commit = repo.revparse_single('HEAD')
    print working_dir
    print 'sssssssssssssssssssss'
    new_branch = repo.branches.create(branch_name, commit, force=False)
    print('new branch ' + branch_name)
예제 #8
0
파일: test_git.py 프로젝트: lobostome/jool
def test_blame_mapper(gitrepo):
    directory, g = gitrepo
    repo = Repository("%s/jool/.git" % directory)
    commit = repo.revparse_single('9aeaac94cda9eb7e32d6f861079ab793a0311983')
    mapper = BugMap()
    assert mapper.map(repo, 'c/d.py', 5,
                      commit) == '903b2d2c65f291492f30467edc6442b74a78ab92'
예제 #9
0
파일: tweet.py 프로젝트: ajkxyz/veles-blog
def get_new_articles():
    blog = PelicanBlog()
    content_dir = blog.get_content_directory()
    repo = Repository(os.path.abspath(os.path.dirname(__file__)))
    diff = repo.revparse_single("HEAD").tree.diff_to_tree()
    existing_articles = set(os.path.relpath(obj.old_file_path, content_dir)
                            for obj in diff
                            if obj.old_file_path.startswith(content_dir))
    all_articles = set(blog.get_posts())
    new_articles = {art for art in all_articles - existing_articles
                    if blog.get_post_lang(art) in (TWITTER_LANGUAGE, "")}
    new_titles = []
    repo.index.read()
    for newart in new_articles:
        title = blog.get_post_title(newart)
        yield Article(title, blog.get_post_url(newart),
                      blog.get_post_authors(newart))
        new_titles.append(title)
        repo.index.add(os.path.join(content_dir, newart))
    blogger = Signature(repo.config["user.name"], repo.config["user.email"])
    repo.create_commit("HEAD", blogger, blogger,
                       "[BLOG] %s" % ", ".join(new_titles),
                       repo.index.write_tree(), [repo.head.peel().oid])
    repo.index.write()
    # TODO(v.markovtsev): implement git push using pygit2
    subprocess.call(("git", "push", "origin", repo.head.shorthand))
예제 #10
0
def get_docset_props(docset: str, config: Dict, manifest: Manifest) -> Tuple[str, bool]:
    """Obtain the id and dirty status of the given docset.

    Args:
        docset: Docset name.
        config: Cache configuration.
        manifest: Repository manifest.

    Returns:
        Docset ID.
    """

    for entry in config:
        if entry["docset"] != docset:
            continue

        id = ""
        dirty = False
        for name in entry["projects"]:
            p = next((p for p in manifest.projects if p.name == name), None)
            assert p, f"Project {name} not in manifest"

            repo = Repository(Path(p.topdir) / p.path)
            id += repo.revparse_single("HEAD").id.hex
            dirty = dirty or bool(repo.status())

        return hashlib.sha256(id.encode("utf-8")).hexdigest(), dirty

    raise ValueError(f"Docset {docset} not in configuration file")
def commit_range(repo: pygit2.Repository, commit_range: str):
    start_commit_ref, end_commit_ref = commit_range.split('..')
    start_commit = repo.revparse_single(start_commit_ref)
    end_commit = repo.revparse_single(end_commit_ref)

    walker = repo.walk(end_commit.oid)
    walker.simplify_first_parent(
    )  # Avoid wandering off to merged branches. Same as 'git log --first-parent'

    result = []
    for commit in walker:
        if commit == start_commit:
            break
        result.append(commit)

    return list(reversed(result))
예제 #12
0
def add_version_commit():
    repo = Repository(script_dir)
    create_commit(repo, 'Update to {}'.format(ver_str))
    config = Config.get_global_config()
    author = Signature(config['user.name'], config['user.email'])
    repo.create_tag('v{}'.format(ver_str),
                    repo.revparse_single('HEAD').id, GIT_OBJ_COMMIT, author,
                    'v{}'.format(ver_str))
예제 #13
0
def makeInstanceBranch(repo: Repository, name: str) -> None:
    # headCommit = repo.revparse_single('origin/master')
    headCommit = repo.revparse_single('HEAD')
    # branch = repo.creare_branch(name, headCommit, force = True)
    print('creating branch ' + name)
    branch = repo.branches.local.create(name, headCommit, force=True)
    print('checkouting to ' + name)
    repo.checkout(branch)
예제 #14
0
파일: git_fns.py 프로젝트: aecay/annotald2
def file_at_revision(repo: pygit2.Repository, filepath: str, revision: str):
    tree = repo.revparse_single(revision).tree
    dirname = os.path.dirname(filepath)
    if dirname != "":
        for component in dirname.split("/"):
            tree = repo[tree[component].oid]

    return repo[tree[os.path.basename(filepath)].id].data.decode("utf-8")
예제 #15
0
def new_switch_branch(working_dir, branch_name):
    repo = Repository(working_dir + '.git')
    commit = repo.revparse_single('HEAD')
    new_branch = repo.branches.create(branch_name, commit, force=False)
    print 'refs/heads/' + new_branch.branch_name
    print working_dir
    print 'ddddddddddddddddd'
    repo.checkout('refs/heads/' + new_branch.branch_name)
    print('switch to a new branch: ' + branch_name)
예제 #16
0
def _pygit2_commits(commit, repository):
    from pygit2 import Repository, GIT_SORT_TOPOLOGICAL
    g = Repository(repository)

    if '..' in commit:
        tail, head = commit.split('..', 2)
        head = head or 'HEAD'
    else:
        head = commit
        tail = commit + '^'

    walker = g.walk(g.revparse_single(head).oid, GIT_SORT_TOPOLOGICAL)

    try:
        walker.hide(g.revparse_single(tail).oid)
    except KeyError:
        pass

    return walker
예제 #17
0
def test_commit_range(git_repo: pygit2.Repository, tmp_path: Path):
    first_commit = git_repo.revparse_single('HEAD~3')
    second_commit = git_repo.revparse_single('HEAD~1')

    call_args = [
        'git-to-freshports-xml', f'--path={git_repo.workdir}',
        f'--commit-range={first_commit.hex}..{second_commit.hex}',
        f'--output={tmp_path}', f'--spool={tmp_path}', '--repo=ports'
    ]

    with patch.object(sys, 'argv', call_args):
        MODULE_UNDER_TEST.main()

    result_files = list(tmp_path.iterdir())
    assert len(result_files) == 2, "Must be exactly two commits"

    commit_filenames = [x.name for x in result_files]
    assert git_repo.revparse_single('HEAD~2').hex in commit_filenames[0]
    assert second_commit.hex in commit_filenames[1]
def get_git_description():
    try:
        from pygit2 import Repository
        try:
            repo = Repository('.')
            return "'{0}_{1}'".format(repo.head.shorthand, repo.revparse_single('HEAD').short_id)
        except:
            return 'No_.git_dir'
    except ImportError:
        return 'pygit2_not_installed'
예제 #19
0
def getCommitInfo(obj, commid_id):
    repo = Repository(obj.vcsDir)
    commit = repo.revparse_single(commid_id)
    msg = commit.message
    return {
        "epoch": commit.author.time,
        "author": f"{commit.author.name} <{commit.author.email}>",
        "shortHash": commit.id.hex[:8],  # or commit.short_id.hex
        "summary": msg.split("\n")[0],
        "description": msg,
    }
예제 #20
0
def getShortStat(obj, prevId, thisId):
    """
	returns (files_changed, insertions, deletions)
	"""
    repo = Repository(obj.vcsDir)
    diff = repo.diff(
        a=repo.revparse_single(prevId).id.hex,
        b=repo.revparse_single(thisId).id.hex,
    )
    stats = diff.stats
    return (stats.files_changed, stats.insertions, stats.deletions)
예제 #21
0
def revparse(repo: git.Repository, revision: str) -> git.Oid:
    try:
        return repo.revparse_single(revision).oid

    except KeyError as e:
        fatal(f"Commit not found in {repo.path}: {e}")

    except ValueError as e:
        fatal(f"Bad revision: {e}")

    except Exception as e:
        fatal(f"Unexpected error: {type(e).__name__}: {e}")
예제 #22
0
def getCommitShortStat(obj, commit_id):
    """
	returns (files_changed, insertions, deletions)
	"""
    repo = Repository(obj.vcsDir)
    commit = repo.revparse_single(commit_id)
    diff = repo.diff(
        a=commit.parent_ids[0].hex,
        b=commit.id.hex,
    )
    stats = diff.stats
    return (stats.files_changed, stats.insertions, stats.deletions)
예제 #23
0
def get_current_timestamp(path_to_repository):
    """Utility method for getting the timestamp of the last commit from a Git repository.

        Args:
            path_to_repository (str): Path to the Git repository

        Returns:
            str: The timestamp of the last commit in the provided repository.
    """
    repo = Repository(path_to_repository)
    head = repo.revparse_single('HEAD')
    return head.commit_time
예제 #24
0
    def _fast_forward(self, local_path, merge_target, branch):
        # fast-forward all the branches.
        # pygit2 repo
        repo = Repository(discover_repository(local_path))

        # convert merge_target from hex into oid.
        fetch_head = repo.revparse_single(merge_target)

        # try to resolve a common anscestor between fetched and local
        try:
            head = repo.revparse_single(branch)
        except KeyError:
            # Doesn't exist.  Create and done.
            repo.create_reference(branch, fetch_head.oid)
            return True, 'Created new branch: %s' % branch

        if head.oid == fetch_head.oid:
            return True, 'Source and target are identical.'

        # raises KeyError if no merge bases found.
        common_oid = repo.merge_base(head.oid, fetch_head.oid)

        # Three different outcomes between the remaining cases.
        if common_oid.hex not in (head.oid.hex, fetch_head.oid.hex):
            # common ancestor is beyond both of these, not going to
            # attempt a merge here and will assume this:
            return False, 'Branch will diverge.'
        elif common_oid.hex == fetch_head.oid.hex:
            # Remote is also the common ancestor, so nothing to do.
            return True, 'No new changes found.'

        # This case remains: common_oid.hex == head.oid.hex, meaning
        # this local repository is the ancestor of further changes
        # fetched from the remote - remote newer, so fast-forward.
        ref = repo.lookup_reference(branch)
        ref.delete()

        repo.create_reference(branch, fetch_head.oid)

        return True, 'Fast-forwarded branch: %s' % branch
예제 #25
0
파일: git.py 프로젝트: cholin/gix
class GitObjectRenderer:
    def __init__(self, reponame, cache, revspec, path):
        self.repo = Repository(reponame)
        self.revspec = revspec
        self.path = path
        self.object_map = {
            Blob : self._render_blob,
            Commit : self._render_commit,
            Tag : self._render_tag
        }
        self.cache = cache


    def _render_blob(self, rev):
        data = rev.data.decode('utf-8', errors='ignore')
        formatter = HtmlFormatter(linenos='inline')
        lexer = guess_lexer(data)
        blob = highlight(data, lexer, formatter)
        return render_template("objects/blob.html",
                  repo = self.repo,
                  revspec = 'master', blob = blob, path = self.path)

    def _render_commit(self, rev):
        entries = self.repo[rev.tree[self.path].oid] if self.path else rev.tree
        cached = []
        for entry in entries:
            cache_id = "{0}-commit-{1}".format(self.repo.get_name(), entry.hex)
            hit = self.cache.get(cache_id)
            if hit is not None:
                cached.append((entry, hit))
            else:
                cached.append((entry, None))

        files = sorted(cached, key=lambda x: x[0].filemode)
        return render_template("objects/tree.html",
                  repo = self.repo,
                  revspec = self.revspec, rev = rev, files = files,
                  path = self.path)

    def _render_tag(self, rev):
        obj = self.repo[rev.target]
        url = url_for('obj', reponame=self.repo.get_name(), revspec = obj.hex)
        return redirect(url)

    def render_obj(self):
        rev = self.repo.revparse_single(self.revspec)
        return self.object_map[type(rev)](rev)
예제 #26
0
 def revparse(repo: pygit2.Repository, target_id: str, *, strict: bool = True):
     assert isinstance(repo, pygit2.Repository)
     try:
         ref = repo.revparse_single(target_id)
         short_id = ref.short_id
         message = getattr(ref, 'message', None)
     except (KeyError, pygit2.GitError, pygit2.InvalidSpecError):
         if strict:
             raise
         else:
             # Use given target id and an unknown message
             short_id = target_id
             message = None
     if message is None or not message or message.isspace():
         if strict:
             raise ValueError(f"Invalid message for {short_id}: {message!r}")
         else:
             message = "<UNKNOWN MESSGAE>"
     # Strip everything before the first line (giving us the summary)
     if '\n' in message:
         summary = message[:message.index('\n')]
     return DevCommit(short_id, summary, message)
예제 #27
0
파일: git.py 프로젝트: kompotkot/locust
def revision_file(
    repository: pygit2.Repository, revision: Optional[str], filepath: str
) -> Optional[str]:
    """
    Returns the source from the file at the given filepath on the given revision. If revision is
    None, returns the bytes from the filepath in the current working tree.

    Filepath is expected to be an absolute, normalized path.
    """
    repo_path = os.path.normpath(repository.workdir)

    content = bytes()

    if revision is None:
        assert (
            os.path.commonpath([repo_path, filepath]) == repo_path
        ), f"File ({filepath}) is not contained in repository ({repo_path})"
        with open(filepath, "rb") as ifp:
            content = ifp.read()
    else:
        relative_path = os.path.relpath(filepath, repo_path)
        dirname, basename = os.path.split(relative_path)
        components: List[str] = [basename]
        while dirname:
            dirname, basename = os.path.split(dirname)
            components.append(basename)
        components.reverse()

        commit = repository.revparse_single(revision)
        current_tree = commit.tree
        for component in components:
            try:
                current_tree = current_tree[component]
            except KeyError:
                return None
        content = current_tree.data

    return content.decode(errors="ignore")
예제 #28
0
class GitRepo:
    """A class that manages a git repository.

    This class enables versiong via git for a repository.
    You can stage and commit files and checkout different commits of the repository.
    """

    path = ''
    pathspec = []
    repo = None
    callback = None
    author_name = 'QuitStore'
    author_email = '*****@*****.**'
    gcProcess = None

    def __init__(self, path, origin=None, gc=False):
        """Initialize a new repository from an existing directory.

        Args:
            path: A string containing the path to the repository.
            origin: The remote URL where to clone and fetch from and push to
        """
        logger = logging.getLogger('quit.core.GitRepo')
        logger.debug('GitRepo, init, Create an instance of GitStore')
        self.path = path
        self.gc = gc

        if not exists(path):
            try:
                makedirs(path)
            except OSError as e:
                raise Exception('Can\'t create path in filesystem:', path, e)

        try:
            self.repo = Repository(path)
        except KeyError:
            pass
        except AttributeError:
            pass

        if origin:
            self.callback = QuitRemoteCallbacks()

        if self.repo:
            if self.repo.is_bare:
                raise QuitGitRepoError('Bare repositories not supported, yet')

            if origin:
                # set remote
                self.addRemote('origin', origin)
        else:
            if origin:
                # clone
                self.repo = self.cloneRepository(origin, path, self.callback)
            else:
                self.repo = init_repository(path=path, bare=False)

    def cloneRepository(self, origin, path, callback):
        try:
            repo = clone_repository(url=origin,
                                    path=path,
                                    bare=False,
                                    callbacks=callback)
            return repo
        except Exception as e:
            raise QuitGitRepoError(
                "Could not clone from: {} origin. {}".format(origin, e))

    def addall(self):
        """Add all (newly created|changed) files to index."""
        self.repo.index.read()
        self.repo.index.add_all(self.pathspec)
        self.repo.index.write()

    def addfile(self, filename):
        """Add a file to the index.

        Args:
            filename: A string containing the path to the file.
        """
        index = self.repo.index
        index.read()

        try:
            index.add(filename)
            index.write()
        except Exception as e:
            logger.info(
                "GitRepo, addfile, Could not add file  {}.".format(filename))
            logger.debug(e)

    def addRemote(self, name, url):
        """Add a remote.

        Args:
            name: A string containing the name of the remote.
            url: A string containing the url to the remote.
        """
        try:
            self.repo.remotes.create(name, url)
            logger.info("Successfully added remote: {} - {}".format(name, url))
        except Exception as e:
            logger.info("Could not add remote: {} - {}".format(name, url))
            logger.debug(e)

        try:
            self.repo.remotes.set_push_url(name, url)
            self.repo.remotes.set_url(name, url)
        except Exception as e:
            logger.info("Could not set push/fetch urls: {} - {}".format(
                name, url))
            logger.debug(e)

    def checkout(self, commitid):
        """Checkout a commit by a commit id.

        Args:
            commitid: A string cotaining a commitid.
        """
        try:
            commit = self.repo.revparse_single(commitid)
            self.repo.set_head(commit.oid)
            self.repo.reset(commit.oid, GIT_RESET_HARD)
            logger.info("Checked out commit: {}".format(commitid))
        except Exception as e:
            logger.info("Could not check out commit: {}".format(commitid))
            logger.debug(e)

    def commit(self, message=None):
        """Commit staged files.

        Args:
            message: A string for the commit message.
        Raises:
            Exception: If no files in staging area.
        """
        if self.isstagingareaclean():
            # nothing to commit
            return

        index = self.repo.index
        index.read()
        tree = index.write_tree()

        try:
            author = Signature(self.author_name, self.author_email)
            comitter = Signature(self.author_name, self.author_email)

            if len(self.repo.listall_reference_objects()) == 0:
                # Initial Commit
                if message is None:
                    message = 'Initial Commit from QuitStore'
                self.repo.create_commit('HEAD', author, comitter, message,
                                        tree, [])
            else:
                if message is None:
                    message = 'New Commit from QuitStore'
                self.repo.create_commit('HEAD', author, comitter, message,
                                        tree,
                                        [self.repo.head.get_object().hex])
            logger.info('Updates commited')
        except Exception as e:
            logger.info('Nothing to commit')
            logger.debug(e)

        if self.gc:
            self.garbagecollection()

    def commitexists(self, commitid):
        """Check if a commit id is part of the repository history.

        Args:
            commitid: String of a Git commit id.
        Returns:
            True, if commitid is part of commit log
            False, else.
        """
        if commitid in self.getids():
            return True
        else:
            return False

    def garbagecollection(self):
        """Start garbage collection.

        Args:
            commitid: A string cotaining a commitid.
        """
        try:
            # Check if the garbage collection process is still running
            if self.gcProcess is None or self.gcProcess.poll() is not None:
                # Start garbage collection with "--auto" option,
                # which imidietly terminates, if it is not necessary
                self.gcProcess = Popen(["git", "gc", "--auto", "--quiet"],
                                       cwd=self.path)
                logger.debug('Spawn garbage collection')
        except Exception as e:
            logger.debug('Git garbage collection failed to spawn')
            logger.debug(e)

    def getpath(self):
        """Return the path of the git repository.

        Returns:
            A string containing the path to the directory of git repo
        """
        return self.path

    def getcommits(self):
        """Return meta data about exitsting commits.

        Returns:
            A list containing dictionaries with commit meta data
        """
        commits = []
        if len(self.repo.listall_reference_objects()) > 0:
            for commit in self.repo.walk(self.repo.head.target,
                                         GIT_SORT_REVERSE):
                commits.append({
                    'id':
                    str(commit.oid),
                    'message':
                    str(commit.message),
                    'commit_date':
                    datetime.fromtimestamp(
                        commit.commit_time).strftime('%Y-%m-%dT%H:%M:%SZ'),
                    'author_name':
                    commit.author.name,
                    'author_email':
                    commit.author.email,
                    'parents': [c.hex for c in commit.parents],
                })
        return commits

    def getids(self):
        """Return meta data about exitsting commits.

        Returns:
            A list containing dictionaries with commit meta data
        """
        ids = []
        if len(self.repo.listall_reference_objects()) > 0:
            for commit in self.repo.walk(self.repo.head.target,
                                         GIT_SORT_REVERSE):
                ids.append(str(commit.oid))
        return ids

    def isgarbagecollectionon(self):
        """Return if gc is activated or not.

        Returns:
            True, if activated
            False, if not
        """
        return self.gc

    def isstagingareaclean(self):
        """Check if staging area is clean.

        Returns:
            True, if staginarea is clean
            False, else.
        """
        status = self.repo.status()

        for filepath, flags in status.items():
            if flags != GIT_STATUS_CURRENT:
                return False

        return True

    def pull(self, remote='origin', branch='master'):
        """Pull if possible.

        Return:
            True: If successful.
            False: If merge not possible or no updates from remote.
        """
        try:
            self.repo.remotes[remote].fetch()
        except Exception as e:
            logger.info("Can not pull:  Remote {} not found.".format(remote))
            logger.debug(e)

        ref = 'refs/remotes/' + remote + '/' + branch
        remoteid = self.repo.lookup_reference(ref).target
        analysis, _ = self.repo.merge_analysis(remoteid)

        if analysis & GIT_MERGE_ANALYSIS_UP_TO_DATE:
            # Already up-to-date
            pass
        elif analysis & GIT_MERGE_ANALYSIS_FASTFORWARD:
            # fastforward
            self.repo.checkout_tree(self.repo.get(remoteid))
            master_ref = self.repo.lookup_reference('refs/heads/master')
            master_ref.set_target(remoteid)
            self.repo.head.set_target(remoteid)
        elif analysis & GIT_MERGE_ANALYSIS_NORMAL:
            self.repo.merge(remoteid)
            tree = self.repo.index.write_tree()
            msg = 'Merge from ' + remote + ' ' + branch
            author = Signature(self.author_name, self.author_email)
            comitter = Signature(self.author_name, self.author_email)
            self.repo.create_commit('HEAD', author, comitter, msg, tree,
                                    [self.repo.head.target, remoteid])
            self.repo.state_cleanup()
        else:
            logger.debug('Can not pull. Unknown merge analysis result')

    def push(self, remote='origin', branch='master'):
        """Push if possible.

        Return:
            True: If successful.
            False: If diverged or nothing to push.
        """
        ref = ['refs/heads/' + branch]

        try:
            remo = self.repo.remotes[remote]
        except Exception as e:
            logger.info(
                "Can not push. Remote: {} does not exist.".format(remote))
            logger.debug(e)
            return

        try:
            remo.push(ref, callbacks=self.callback)
        except Exception as e:
            logger.info("Can not push to {} with ref {}".format(
                remote, str(ref)))
            logger.debug(e)

    def getRemotes(self):
        remotes = {}

        try:
            for remote in self.repo.remotes:
                remotes[remote.name] = [remote.url, remote.push_url]
        except Exception as e:
            logger.info('No remotes found.')
            logger.debug(e)
            return {}

        return remotes
예제 #29
0
class PyGitEngine(GitContentDatabaseEngine):
    def __init__(self, config):
        super(PyGitEngine, self).__init__(config)
        self.repo = None

    def connect(self):
        """Create content directory"""
        if not isdir(self.content_path):
            init_repository(self.content_path, bare=True)
            self.repo = Repository(self.content_path)
            self.create_initial_commit()
        else:
            self.repo = Repository(self.content_path)

    @staticmethod
    def do_put(content_path, object_hashes, content, filename):
        """Perform put operation. This is used in the distributed wrapper"""
        content_hash = Repository(content_path).create_blob(content)
        result = object_hashes[filename] = str(content_hash)
        return result

    def put_attr(self, content, filename):
        """Return attributes for the do_put operation"""
        filename = self._inc_name(filename)
        return (self.content_path, self.object_hashes, content, filename)

    def put(self, content, filename="generic"):  # pylint: disable=method-hidden
        """Put content in the content database"""
        return self.do_put(*self.put_attr(content, filename))

    def get(self, content_hash):  # pylint: disable=method-hidden
        """Get content from the content database"""
        return_data = self.repo[content_hash].data
        return return_data

    def find_subhash(self, content_hash):
        """Find hash in git"""
        try:
            blob = self.repo.revparse_single(content_hash)
            return str(blob.id)
        except KeyError:
            return None

    def create_initial_commit(self):
        """Create the initial commit of the git repository"""
        empty_tree = self.repo.TreeBuilder().write()
        self.create_commit_object(self._initial_message, empty_tree)

    def create_commit_object(self, message, tree):
        """Create a commit object"""
        references = list(self.repo.references)

        master_ref = self.repo.lookup_reference(
            self._commit_ref) if len(references) > 0 else None

        parents = []
        if master_ref is not None:
            parents = [master_ref.peel().id]

        author = Signature(self._commit_name, self._commit_email)
        return self.repo.create_commit(self._commit_ref, author, author,
                                       message, tree, parents)

    def new_tree(self, parent):
        """Create new git tree"""
        return self.repo.TreeBuilder()

    def insert_blob(self, tree, basename, value):
        """Insert blob into tree"""
        tree.insert(basename, value, GIT_FILEMODE_BLOB)

    def insert_tree(self, tree, basename, value):
        """Insert tree into tree"""
        tree.insert(basename, value, GIT_FILEMODE_TREE)

    def write_tree(self, tree):
        """Write tree to git directory"""
        return tree.write()
def main():
    git_repo = Path(sys.argv[1]).resolve(strict=True)
    modules_dir = Path(sys.argv[2]).resolve(strict=True)
    collections_dir = Path(sys.argv[3]).resolve(strict=True)
    reference = sys.argv[4]
    canonical_file = Path(sys.argv[5]).resolve(strict=True)
    repo = Repository(git_repo)

    canonical_list = json.load(canonical_file.open())

    canonical_mapping = {}

    for bookslug in reversed(canonical_list):
        collection = collections_dir / f'{bookslug}.collection.xml'
        col_tree = etree.parse(str(collection))
        col_modules = col_tree.xpath("//col:module/@document",
                                     namespaces={"col": NS_COLLXML})
        col_uuid = col_tree.xpath("//md:uuid", namespaces={"md":
                                                           NS_MDML})[0].text
        for module in col_modules:
            canonical_mapping[module] = col_uuid

    # For the time being, we're going to parse the timestamp of the HEAD
    # commit and use that as the revised time for all module pages.
    commit = repo.revparse_single('HEAD')
    revised_time = datetime.fromtimestamp(commit.commit_time,
                                          timezone.utc).isoformat()
    book_version = determine_book_version(reference, repo, commit)

    # Get list of module files while filtering orphans using canonical_mapping
    module_files = [
        mf.resolve(strict=True) for mf in modules_dir.glob("**/*")
        if mf.is_file() and mf.name == "index.cnxml"
        and mf.parent.name in canonical_mapping
    ]

    collection_files = [
        cf.resolve(strict=True) for cf in collections_dir.glob("*.xml")
    ]

    for module_file in module_files:
        cnxml_doc = etree.parse(str(module_file))

        remove_metadata_entries(cnxml_doc, ["canonical-book-uuid"], NS_CNXML)

        new_metadata = {
            "revised": revised_time,
            "canonical-book-uuid": canonical_mapping[module_file.parent.name]
        }
        add_metadata_entries(cnxml_doc, new_metadata, NS_CNXML)

        with open(module_file, "wb") as f:
            cnxml_doc.write(f, encoding="utf-8", xml_declaration=False)

    for collection_file in collection_files:
        collection_doc = etree.parse(str(collection_file))
        new_metadata = {"revised": revised_time, "version": book_version}
        add_metadata_entries(collection_doc, new_metadata, NS_COLLXML)

        with open(collection_file, "wb") as f:
            collection_doc.write(f, encoding="utf-8", xml_declaration=False)
예제 #31
0
파일: git.py 프로젝트: hitigon/warehouse
class GitRepo(object):

    ''' git repo class '''

    def __init__(self, path):
        try:
            self.__repo = Repository(path)
        except Exception as e:
            self.__repo = None
            print(e)

    def get_info(self):
        if not self.__repo:
            return None
        signature = self.__repo.default_signature
        result = {
            'path': self.__repo.path,
            'workdir': self.__repo.workdir,
            'bare': self.__repo.is_bare,
            'empty': self.__repo.is_empty,
            'name': signature.name,
            'email': signature.email,
            'time': signature.time,
            'offset': signature.offset,
        }
        return result

    def get_all_references(self):
        return self.__repo.listall_references()

    def get_reference(self, name):
        if not self.__repo:
            return None
        ref = None
        try:
            ref = self.__repo.lookup_reference(name)
        except Exception as e:
            print(e)
        return ref

    def get_all_branches(self, branch_type=None):
        if not self.__repo:
            return None
        if branch_type:
            return self.__repo.listall_branches(branch_type)
        r = self.__repo.listall_branches(GIT_BRANCH_LOCAL | GIT_BRANCH_REMOTE)
        return r

    def get_branch(self, name, branch_type=GIT_BRANCH_LOCAL):
        if not self.__repo:
            return None
        return self.__repo.lookup_branch(name, branch_type)

    def check_branch(self, name, branch_type=None):
        if not branch_type:
            if '/' in name:
                branch_type = GIT_BRANCH_REMOTE
            else:
                branch_type = GIT_BRANCH_LOCAL
        try:
            result = self.get_branch(name, branch_type)
            return result
        except Exception as e:
            print(e)
            return False

    def get_current_commit(self):
        if not self.__repo:
            return None
        commit = self.__repo.revparse_single('HEAD')
        return self.get_commit(commit)

    def get_commit_by_branch(self, branch):
        if not self.__repo:
            return None
        query = 'refs/'
        if hasattr(branch, 'remote_name'):
            query += 'remotes/'
        else:
            query += 'heads/'
        query += branch.branch_name
        try:
            ref = self.get_reference(query)
            commit = ref.target
            return self.get_commit(commit)
        except Exception as e:
            print(e)
            return None

    def get_commit_by_tag(self, tag):
        if self.__repo is None:
            return None
        if tag:
            commit = tag.get_object()
            return self.get_commit(commit)
        return None

    def get_commit(self, oid_or_commit):
        ''' return a commit w/ json '''
        if not self.__repo or not oid_or_commit:
            return None
        try:
            commit = oid_or_commit
            if not isinstance(oid_or_commit, Commit):
                commit = self.__repo.get(oid_or_commit)
            if commit and commit.type == GIT_OBJ_COMMIT:
                # t1 = self.__repo.revparse_single('HEAD^')
                # t2 = self.__repo.revparse_single('HEAD^^')
                # patches = self.__repo.diff(t1, t2)
                # for p in patches:
                #     print(p.new_file_path)
                result = {
                    'id': str(commit.id),
                    'author': commit.author.name,
                    'commiter': commit.committer.name,
                    'message': commit.message,
                    'message_encoding': commit.message_encoding,
                    'tree': str(commit.tree_id),
                    'parent': [str(pid) for pid in commit.parent_ids],
                    'time': str(commit.commit_time),
                    'time_offset': str(commit.commit_time_offset),
                }
                return result
        except Exception as e:
            print(e)
        return None

    def get_commits(self, depth=10, oid_or_commit=None):
        result = []
        if depth == 0:
            return result
        if oid_or_commit:
            commit = self.get_commit(oid_or_commit)
        else:
            commit = self.get_current_commit()
        if not commit:
            return result
        # TODO: starting from a commit or its parent
        # TODO: author
        result.append(commit)
        depth -= 1
        if commit and commit['parent']:
            for parent in commit['parent']:
                    result.extend(self.get_commits(depth, parent))
        return result

    def get_commits_by_branch(self, name, path=None):
        if not self.__repo:
            return None
        if self.check_branch(name):
            ref = self.get_reference('refs/heads/' + name)
            if ref:
                commit = ref.target
                commits = self.get_commits(commit)
                result = {}
                for key, val in commits.items():
                    if self.check_commit_by_path(val, path):
                        result[key] = val
                return result
        return None

    def check_tag(self, name):
        try:
            ref = self.get_reference('refs/tags/' + name)
            return ref
        except Exception:
            return False

    def get_commits_by_tag(self, tag, path=None):
        if not self.__repo:
            return None
        if tag:
            commit = tag.target
            commits = self.get_commits(commit)
            result = {}
            for key, val in commits.items():
                if self.check_commit_by_path(val, path):
                    result[key] = val
            return result
        return None

    def check_commit_by_path(self, commit, path):
        if not commit:
            return False
        if path is None or len(path) == 0:
            return True
        result = self.get_tree(commit['tree'])

        if not isinstance(path, list):
            path = path.strip().split('/')

        for name in path:
            name = name.strip()
            if name in result:
                oid = result[name]
                result = self.get_tree(oid)

                if not result:
                    result = self.get_blob(oid)
        return result is not None

    def get_tree(self, oid, ppath=None):
        if not self.__repo:
            return None
        try:
            tree = self.__repo.get(oid)
            if tree and tree.type == GIT_OBJ_TREE:
                result = {}
                for entry in tree:
                    item = {
                        'id': str(entry.id)
                    }
                    obj = self.__repo.get(entry.id)
                    if obj.type == GIT_OBJ_BLOB:
                        item['type'] = 'blob'
                    elif obj.type == GIT_OBJ_TREE:
                        item['type'] = 'tree'
                    item['ppath'] = ppath
                    result[entry.name] = item
                return result
        except Exception as e:
            print(e)
        return None

    def get_tree_by_commit(self, commit, path=None):
        if not commit:
            return None
        result = self.get_tree(commit['tree'])
        if not path:
            return result

        # if not isinstance(path, list):
        #     path = path.strip().split('/')

        try:
            for name in path:
                oid = result[name]['id']
                p = result[name]['ppath']
                p = name if not p else p + '/' + name
                result = self.get_tree(oid, p)
                if not result:
                    break
        except Exception as e:
            print(e)
            result = None
        return result

    def get_current_root(self):
        tree = self.get_current_commit()
        if tree:
            return self.get_tree(tree['tree'])
        return None

    def get_whole_tree(self, oid):
        ''' tree w/ json '''
        if not self.__repo:
            return None
        result = tree_walker(self.__repo, oid)
        return result

    def get_blob(self, oid):
        ''' blob w/ json '''
        if not self.__repo or not oid:
            return None
        try:
            blob = self.__repo.get(oid)
            if blob and blob.type == GIT_OBJ_BLOB:
                content = blob.is_binary and None or blob.data.decode(
                    'utf8', 'ignore')
                result = {
                    'id': str(blob.id),
                    'content': content,
                    'size': blob.size,
                }
                return result
        except Exception as e:
            print(e)
        return None

    def get_blob_by_commit(self, commit, path=None):

        try:
            tree = self.get_tree_by_commit(commit, path[:-1])
            oid = tree[path[-1]]['id']
            result = self.get_blob(oid)
            return result
        except Exception as e:
            print(e)
            return None

    def get_tag(self, oid):
        ''' blob w/ json '''
        if not self.__repo or not oid:
            return None
        try:
            tag = self.__repo.get(oid)
            if tag and tag.type == GIT_OBJ_TAG:
                result = {
                    'id': str(oid),
                    'name': tag.name,
                    'target': str(tag.target.id),
                    'tagger': tag.tagger,
                    'message': tag.message,
                }
                return result
        except Exception as e:
            print(e)
        return None

    def get_patches(self, a=None, b=None):
        try:
            if not a:
                a = 'HEAD'
            if not b:
                b = a + '^'
            t1 = self.__repo.revparse_single(a)
            t2 = self.__repo.revparse_single(b)
            patches = self.__repo.diff(t1, t2)
            result = []
            for patch in patches:
                p = {
                    'old_file_path': patch.old_file_path,
                    'new_file_path': patch.new_file_path,
                    'old_oid': str(patch.old_oid),
                    'new_oid': str(patch.new_oid),
                    'status': patch.status,
                    'similarity': patch.similarity,
                    'additions': patch.additions,
                    'deletions': patch.deletions,
                    'binary': patch.is_binary,
                    'hunks': [],
                }
                for hunk in patch.hunks:
                    h = {
                        'old_start': hunk.old_start,
                        'old_lines': hunk.old_lines,
                        'new_start': hunk.new_start,
                        'new_lines': hunk.new_lines,
                        'lines': hunk.lines,
                    }
                    p['hunks'].append(h)
                result.append(p)
            return result
        except Exception as e:
            print(e)
        return None
예제 #32
0
logging.info('Checking if test-metric recording needs to be enabled')
if 'TEST_METRICS_URL' in os.environ:
    import datetime
    import getpass
    import json
    import socket
    from timeit import default_timer as timer

    from pygit2 import Repository

    from tests.cook import util

    repository_path = os.path.abspath(f'{os.path.dirname(os.path.abspath(__file__))}/../..')
    repo = Repository(repository_path)
    head = repo.head
    commit = repo.revparse_single('HEAD')
    git_branch = head.name.replace('refs/heads/', '')
    git_commit_hex = commit.hex
    elastic_search_url = os.getenv('TEST_METRICS_URL').rstrip('/')
    logging.info(f'Sending test metrics to {elastic_search_url}')


    @pytest.fixture()
    def record_test_metric(request):
        start = timer()
        yield
        try:
            end = timer()
            now = datetime.datetime.utcnow()
            index = f'cook-tests-{now.strftime("%Y%m%d")}'
            request_node = request.node
예제 #33
0
class GitOperations(object):
    def __init__(self, repo, no_cache=False):
        self.repo = repo
        self._gitrepo = os.path.join(repo, '.git')
        self._pygit = Repository(repo)
        self._commands = {}
        self._trees = {}
        self._trees_filled = {}
        self._sizes = {}
        self._refs = {}
        self._commits_iterator = None
        self.cache = not no_cache
        self.years = range(self._first_year(), self._last_year() + 1)

    def cached_command(self, list, return_exit_code=False, silent=False):
        """
        Executes the specified git command and returns its result.
        Subsequent executions of the same command return the cached result
        If return_exit_code is set, then the return value is True of False
        depending on whether the command exited with 0 or not.
	If silent is true then failed executions return None,
	without displaying an error.
        """

        list = ['git', '--git-dir', self._gitrepo] + list
        command = " ".join(list)
        if command in self._commands:
            return self._commands[command]
        else:
            try:
                # print(command)
                out = check_output(list)
                if return_exit_code:
                    out = True
            except CalledProcessError as e:
                if return_exit_code:
                    out = False
                elif silent:
                    out = None
                else:
                    message = "Error calling %s: %s" % (command, str(e))
                    sys.stderr.write(message)
                    out = None
            if self.cache:
                self._commands[command] = out
            return out

    def _get_entry(self, commit, path=None, return_tree=False):
        try:
            if path:
                obj = self._pygit[commit].tree[path]
            elif path == '':
                obj = self._pygit[commit].tree
            else:
                obj = self._pygit[commit]
        except KeyError as e:
            raise GitOperError("pygit entry does not exist\n%s" % (str(e)))

        if return_tree:
            return obj

        return self._pygit[obj.id]

    def _fill_trees(self, commit, contents):
        if not commit in self._trees:
            self._trees[commit] = set()

        trees = []
        for cont in contents:
            if cont[1] == "tree" and cont[0] not in self._trees[commit]:
                trees.append(cont[0])

        self._trees[commit].update(trees)

    def _get_tree(self, commit, path):
        if not path:
            tree = self._get_entry(commit, '')
        else:
            path += "/"
            try:
                tree = self._get_entry(commit, path)
            except KeyError:
                return []

        return [(c.name, c.type) for c in tree]

    def _cache_tree(self, commit, path):
        tree = self._get_tree(commit, path)
        paths_and_names = [(os.path.join(path, c[0]), c[1]) for c in tree]
        self._fill_trees(commit, paths_and_names)
        if not commit in self._trees_filled:
            self._trees_filled[commit] = set()
        self._trees_filled[commit].update([path])

    def _first_year(self):
        """
        Returns the year of the repo's first commit(s)
        Not implemented using pygit2 because its faster
        to get the year via shell command and it creates
        only one process on boot time.
        """

        first_years = self.cached_command(
            ['log', '--max-parents=0', '--date=format:%Y',
             '--pretty=%ad']).splitlines()
        return int(sorted(first_years)[0])

    def _last_year(self):
        """
        Returns the year of the repo's last commit
        """
        return int(
            self.cached_command([
                'log', '-n', '1', '--all', '--date=format:%Y', '--pretty=%ad'
            ]))

    def refs(self, refs):
        """
        Returns the specified refs in the form:
        <commit_hash> refs/{heads,remotes,tags}/<branchname>
        """
        refs = self.cached_command(
            ['for-each-ref', '--format=%(objectname) %(refname)'] +
            refs).splitlines()
        return [ref.strip() for ref in refs]

    def _get_commits_iterator(self, command):
        return StringIO.StringIO(self.cached_command(command))

    def commits_by_date(self, y, m, d):
        """
        Returns a list of commit hashes for the given year, month, day
        """
        start = datetime.date(y, m, d)
        end = start + datetime.timedelta(days=1)
        # T00:00:00 is at the start of the specified day
        command = [
            'log', '--after',
            '%04d-%02d-%02dT00:00:00' % (start.year, start.month, start.day),
            '--before',
            '%04d-%02d-%02dT00:00:00' % (end.year, end.month, end.day),
            '--all', '--pretty=%H'
        ]
        commits = self._get_commits_iterator(command)
        for commit in commits:
            yield commit.strip()

    def all_commits(self, prefix=""):
        """
        Returns a list of all commit hashes
        """
        command = ['log', '--all', '--pretty=%H']
        commits = self._get_commits_iterator(command)

        if prefix:
            commits = iter([c for c in commits if c.startswith(prefix)])

        for commit in commits:
            yield commit.strip()

    def _get_commit_from_ref(self, ref):
        commit = self._pygit.revparse_single(ref)
        if isinstance(commit, Commit):
            return commit

        if hasattr(commit, "target"):
            return self._pygit[commit.target]

        return None

    def commit_of_ref(self, ref):
        """
        Returns the last commit of a ref.
        """
        # Check cache
        if ref in self._refs:
            return self._refs[ref]

        commit = self._get_commit_from_ref(ref)
        self._refs[ref] = ""
        if commit:
            self._refs[ref] = str(commit.id)

        return self._refs[ref]

    def commit_parents(self, commit):
        """
        Returns commit parents
        """
        parents = self._get_entry(commit).parents
        return [str(p.id) for p in parents]

    def commit_descendants(self, commit):
        """
        Returns commit descendants
        """
        return []

    def commit_names(self, commit):
        """
        Returns names associated with commit
        """
        return []

    def get_commit_time(self, commit):
        return self._get_entry(commit).commit_time

    def get_author_time(self, commit):
        return self._get_entry(commit).author.time

    def directory_contents(self, commit, path):
        """
        Returns the contents of the directory
        specified by `path`
        """

        tree = self._get_tree(commit, path)
        return [c[0] for c in tree]

    def is_symlink(self, commit, path):
        # the root of the repository can't be a symlink
        if not path:
            return False

        entry = self._get_entry(commit, path, return_tree=True)
        if entry.filemode == GIT_FILEMODE_LINK:
            return True

        return False

    def is_dir(self, commit, path):
        if commit in self._trees and path in self._trees[commit]:
            return True

        if commit not in self._trees:
            self._trees[commit] = set([''])
            self._trees_filled[commit] = set([''])
            self._cache_tree(commit, '')

        elements = path.split("/")
        for i in range(len(elements) - 1):
            subpath = "/".join(elements[:i + 1])
            if subpath in self._trees[
                    commit] and subpath not in self._trees_filled[commit]:
                self._cache_tree(commit, subpath)

        return path in self._trees[commit]

    def file_contents(self, commit, path):
        try:
            return self._get_entry(commit, path).data
        except KeyError:
            return ""

    def file_size(self, commit, path):
        if not commit in self._sizes:
            self._sizes[commit] = {}

        if path in self._sizes[commit]:
            return self._sizes[commit][path]

        try:
            size = self._get_entry(commit, path).size
        except KeyError:
            size = 0

        self._sizes[commit][path] = size
        return size

    def author(self, commit):
        return self._get_entry(commit).author.name

    def author_email(self, commit):
        return self._get_entry(commit).author.email
예제 #34
0
#    print commit.hex
#    print base.revparse_single(commit.hex).message
#    print commit.commit_time
#    print commit.commit_time_offset

git = sh.git.bake(_cwd='/home/heather/research/spoon-knife')

for point in history:
    git.checkout(point)
#    print subprocess.check_output(['ohcount', 'spoon-knife'])

git.checkout(history[0])

i = 0
while i < len(history) - 2:
    t0 = base.revparse_single(history[i])
    t1 = base.revparse_single(history[i+1])
    diff = base.diff(t0,t1)
    patches = [p for p in diff]
    for patch in patches:
        print 'NUM HUNKS: ' + str(len(patch.hunks))
        for hunk in patch.hunks:
#            print hunk.lines
            totesLines = 0
            totesMods = 0
            for line in hunk.lines:
                totesLines += 1
                if line[0] == '-' or line[0] == '+':
                    totesMods += 1
                    print line
            print 'TOTAL LINES: ' + str(totesLines)
예제 #35
0
class prototype:
    repo = ""  # Path to a given repository
    name = ""  # Name of a repository
    base = ""  # Repository as defined in pygit2

    # Initialization. Clones the given repository, placing it in the current directory,
    # and changes to the repository directory.
    def init(self, repository):
        self.repo = repository

        # Use regular expressions to match the last instance of a forward slash
        # followed by the name of the repository, which we wish to extract, followed
        # by ".git". 
        m = re.search('/([^/]+).git$', repository)
        if m:
            self.name = m.group(1)

        if not os.path.isdir(self.name):
            os.system('git clone ' + self.repo) # Get the repository from GitHub

        self.base = Repository(self.name)
        self.base.checkout('HEAD')

    # Destruction. Remove the given repository from memory.
    def destroy(self):
        os.system('cd ' + self.name)
        os.system('rm -rf ' + self.name)

    # Get total LOC by given repository. 
    def totalRepoLOC(self):
        loc = countDirLOC(self.name)
        return loc

    # Get total commits by a given repository
    def totalRepoCommits(self):
        commits = 0
        for commit in self.base.walk(self.base.head.target, GIT_SORT_TOPOLOGICAL):
            commits = commits + 1
        return commits

    # Get a list of LOC changed per commit
    def locPerCommit(self):
        loc = []
        oldPath = os.popen('pwd')
        os.chdir(self.name)
        sha1 = 0
        sha2 = 0

        start = 1
        total = self.totalRepoCommits()

        # For each commit within the repository
        for commit in self.base.walk(self.base.head.target, GIT_SORT_TOPOLOGICAL):

            print '\r', start, '/', total,
            start += 1

            # Based on the SHA, use git to show the patch for that commit
            sha1 = sha2
            sha2 = commit.hex
            if sha1 != 0:
                p = os.popen('git diff --shortstat ' + sha1 + ' ' + sha2)
                line = p.readline()

                # line contains "# file changed, # insertions(+), # deletions(-)
                # Use regular expressions to find the number of additions and deletions.
                # Additions are found after ", " and before " insertion". Deletions are
                # found after "(+), " and before " deletion".
                m = re.search(', (.*) insertion', line)
                additions = 0
                deletions = 0
                if m:
                    additions = m.group(1)
                m = re.search('\(\+\), (.*) deletion', line)
                if m:
                    deletions = m.group(1)

                # Get the total and append to array
                modifications = int(additions) + int(deletions)
                loc.append(modifications)

        os.chdir('..')
        return loc


    # Get a list containing the total number of line additions and deletions (including
    # whitespace and comments) contained within each hunk that was changed over t
    def locPerHunk(self):
        loc = []
        history = []

        # Get the hex number for each commit within the repository
        for commit in self.base.walk(self.base.head.target, GIT_SORT_TOPOLOGICAL):
            sha = commit.hex
            history.append(sha)

        # Compare each revision in the history of the repository with the previous rev.
        i = 0
        while i < len(history) - 1:
            t0 = self.base.revparse_single(history[i])
            t1 = self.base.revparse_single(history[i+1])
            diff = self.base.diff(t0,t1)
            patches = [p for p in diff]
            for patch in patches:
                for hunk in patch.hunks:
                   
                    # Check the first character in each hunk line. Only those that have
                    # been modified will contain a '+' (insertion) or '-' (deletion)
                    totalModifications = 0
                    for line in hunk.lines:
                        if line[0] == '-' or line[0] == '+':
                            totalModifications +=1
                    loc.append(totalModifications)
            i += 1
        return loc

    # Get the total number of lines contained within a hunk, including additions, deletions,
    # and surrounding non-changed lines
    def locInHunk(self):
        loc = []
        history = []

        # Get the hex number for each commit within the repository
        for commit in self.base.walk(self.base.head.target, GIT_SORT_TOPOLOGICAL):
            sha = commit.hex
            history.append(sha)

        # Compare each revision in the history of the repository with the previous rev.
        i = 0
        while i < len(history) - 1:
            t0 = self.base.revparse_single(history[i])
            t1 = self.base.revparse_single(history[i+1])
            diff = self.base.diff(t0,t1)
            patches = [p for p in diff]
            for patch in patches:
                for hunk in patch.hunks:
                    totalLines = 0
                    for line in hunk.lines:
                       totalLines += 1
                    loc.append(totalLines)
            i += 1
        return loc

    # Perform a diff between all commits starting from oldest to newest
    #  and compile temp files comprised of only modified lines.
    #  Run cloc on temp files to get sloc for each diff set.
    def slocPerDiff(self):
        # Storage for commit history hashes
        history = []
        
        # Store all slocs
        slocPerDiffs = []

        # Move through the system history from newest to oldest commit
        for commit in self.base.walk(self.base.head.target, GIT_SORT_TOPOLOGICAL | GIT_SORT_REVERSE):
            history.append(commit)

        i = 0
        while i < len(history) - 2:
            sloc = 0
            t0 = self.base.revparse_single(history[i].hex)
            t1 = self.base.revparse_single(history[i+1].hex)
            try:
                diff = self.base.diff(t0,t1)
            except ValueError:
                print "Caught value error."
                i += 1
                continue

            patches = [p for p in diff]
            for patch in patches:
                print patch.new_file_path
                hunkfile = open("tmp", 'w') 
                for hunk in patch.hunks:
                    totesLines = 0
                    totesMods = 0
                    for line in hunk.lines:
                        totesLines += 1
                        if line[0] == '-' or line[0] == '+':
                            totesMods += 1
                            hunkfile.write(line[1])
                hunkfile.close()
            
                output = subprocess.Popen('cloc ' + patch.new_file_path + ' --by-file --csv', shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
                start = False
                for line in output.stdout.readlines():
                    if line[0] == 'l':
                        start = True
                        continue
                    if start:
                        temp = line.split(',')
                        sloc += int(temp[4].replace('\n', ''))
                        retval = output.wait()
                os.remove("tmp")                        
            i += 1
            slocPerDiffs.append(int(sloc))
        
        return slocPerDiffs

    # Get a list containing the number of hunks changed per commit
    def hunksPerCommit(self):
        hunks = []
        history = []

        start = 1
        total = self.totalRepoCommits()

        # Get the hex number for each commit within the repository
        for commit in self.base.walk(self.base.head.target, GIT_SORT_TOPOLOGICAL):
            sha = commit.hex
            history.append(sha)

        # Compare each revision in the history of the repository with the previous rev.
        i = 0
        while i < len(history) - 1:
            print '\r', start, '/', total,
            start += 1

            t0 = self.base.revparse_single(history[i])
            t1 = self.base.revparse_single(history[i+1])

            try:
                diff = self.base.diff(t0,t1)
            except ValueError:
                print "Caught value error."
                i += 1
                continue

            patches = [p for p in diff]
            for patch in patches:
                hunks.append(len(patch.hunks))
            i += 1

        return hunks


    # Get a list of the number of files changed per commit
    def filesPerCommit(self):
        files = []
        oldPath = os.popen('pwd')
        os.chdir(self.name)
        sha1 = 0
        sha2 = 0

        start = 1
        total = self.totalRepoCommits()

        # For each commit within the repository
        for commit in self.base.walk(self.base.head.target, GIT_SORT_TOPOLOGICAL):

            print '\r', start, '/', total,
            start += 1

            # Based on the SHA, use git to show the patch for that commit
            sha1 = sha2
            sha2 = commit.hex
            if sha1 != 0:
                p = os.popen('git diff --shortstat ' + sha1 + ' ' + sha2)
                line = p.readline()

                # line contains "# file changed, # insertions(+), # deletions(-)
                # Use regular expressions to find the number of files modified, which
                # are contained first on the line followed by " file"
                m = re.search(' (.*) file', line)
                if m:
                    numFilesChanged = int(m.group(1))
                    files.append(numFilesChanged)

        os.chdir('..')
        return files

    # Print out all stats for the repository
    def printStats(self):
        f = open(self.name + '-results.txt', 'w')
        f.write(("-----------" + self.name + "-----------\n"))

        # Stats on entire repository
        repoLOC = self.totalRepoLOC()
        repoCommits = self.totalRepoCommits()

        # Lists by commit
        locPerCommit   = self.locPerCommit()
        #slocPerDiff    = self.slocPerDiff()
        hunksPerCommit = self.hunksPerCommit()
        filesPerCommit = self.filesPerCommit()
        
        # Stats for LOC
        xsmall = 0
        small  = 0
        medium = 0
        large  = 0
        xlarge = 0
        for item in locPerCommit:
            if (item >= 0 and item <= 5):
                xsmall += 1
            if (item >= 6 and item <= 46):
                small += 1
            if (item >= 47 and item <= 106):
                medium += 1
            if (item >= 107 and item <= 166):
                large += 1
            if (item >= 167):
                xlarge += 1

        f.write("Number of Modified Lines:\n")
        f.write("x-small: " + str(xsmall) + "\n")
        f.write("small:   " + str(small) + "\n")
        f.write("medium:  " + str(medium) + "\n")
        f.write("large:   " + str(large) + "\n")
        f.write("x-large: " + str(xlarge) + "\n")
        

        '''
        # Stats for SLOC
        xsmall = 0
        small  = 0
        medium = 0
        large  = 0
        xlarge = 0
        for item in slocPerDiff:
            if (item >= 0 and item <= 5):
                xsmall += 1
            if (item >= 6 and item <= 46):
                small += 1
            if (item >= 47 and item <= 106):
                medium += 1
            if (item >= 107 and item <= 166):
                large += 1
            if (item >= 167):
                xlarge += 1

        f.write("Number of Modified SLOC: \n")
        f.write("x-small: " + str(xsmall) + "\n")
        f.write("small:   " + str(small) + "\n")
        f.write("medium:  " + str(medium) + "\n")
        f.write("large:   " + str(large) + "\n")
        f.write("x-large: " + str(xlarge) + "\n")

        '''
        # Print stats for modified files
        xsmall = 0
        small  = 0
        medium = 0
        large  = 0
        xlarge = 0
        for item in filesPerCommit:
            if (item == 1):
                xsmall += 1
            if (item >= 2 and item <= 4):
                small += 1
            if (item >= 5 and item <= 7):
                medium += 1
            if (item >= 8 and item <= 10):
                large += 1
            if (item >= 11):
                xlarge += 1

        f.write("Number of modified files: \n")
        f.write("x-small: " + str(xsmall) + "\n")
        f.write("small:   " + str(small) + "\n")
        f.write("medium:  " + str(medium) + "\n")
        f.write("large:   " + str(large) + "\n")
        f.write("x-large: " + str(xlarge) + "\n")

        # Prints stats for hunks
        xsmall = 0
        small  = 0
        medium = 0
        large  = 0
        xlarge = 0
        for item in hunksPerCommit:
            if (item >= 0 and item <= 1):
                xsmall += 1
            if (item >= 2 and item <= 8):
                small += 1
            if (item >= 9 and item <= 17):
                medium += 1
            if (item >= 18 and item <= 26):
                large += 1
            if (item >= 27):
                xlarge += 1

        f.write("Number of hunks per commit: \n")
        f.write("x-small: " + str(xsmall) + "\n")
        f.write("small:   " + str(small) + "\n")
        f.write("medium:  " + str(medium) + "\n")
        f.write("large:   " + str(large) + "\n")
        f.write("x-large: " + str(xlarge) + "\n")

        f.close()
예제 #36
0
class GitStorage(BaseStorage):

    _backend = None

    def __init__(self, context, repo_path=None):
        self.context = context
        rp = IStorageInfo(context).path

        try:
            self.repo = Repository(discover_repository(rp))
        except KeyError:
            # discover_repository may have failed.
            raise PathNotFoundError('repository does not exist at path')

        self.checkout()  # defaults to HEAD.

    @property
    def empty_root(self):
        return {'': '_empty_root'}

    def _get_empty_root(self):
        return self.empty_root

    def _get_obj(self, path, cls=None):
        if path == '' and self._commit is None:
            # special case
            return self._get_empty_root()

        if self._commit is None:
            raise PathNotFoundError('repository is empty')

        root = self._commit.tree
        try:
            breadcrumbs = []
            fragments = list(reversed(path.split('/')))
            node = root
            oid = None
            while fragments:
                fragment = fragments.pop()
                if not fragment == '':
                    # no empty string entries, also skips over '//' and
                    # leaves the final node (if directory) as the tree.
                    oid = node[fragment].oid
                    node = self.repo.get(oid)
                breadcrumbs.append(fragment)
                if node is None:
                    # strange.  Looks like it's either submodules only
                    # have entry nodes or pygit2 doesn't fully support
                    # this.  Try to manually resolve the .gitmodules
                    # file.
                    if cls is None:
                        # Only return this if a specific type was not
                        # expected.
                        submods = parse_gitmodules(self.repo.get(
                            root[GIT_MODULE_FILE].oid).data)
                        submod = submods.get('/'.join(breadcrumbs))
                        if submod:
                            fragments.reverse()
                            return {
                                '': '_subrepo',
                                'location': submod,
                                'path': '/'.join(fragments),
                                'rev': oid.hex,
                            }

            if node and (cls is None or isinstance(node, cls)):
                return node
        except KeyError:
            # can't find what is needed in repo, raised by pygit2
            raise PathNotFoundError('path not found')

        # not what we were looking for.
        if cls == Tree:
            raise PathNotDirError('path not dir')
        elif cls == Blob:
            raise PathNotFileError('path not file')
        raise PathNotFoundError('path not found')

    @property
    def _commit(self):
        return self.__commit

    @property
    def rev(self):
        if self.__commit:
            return self.__commit.hex
        return None

    @property
    def shortrev(self):
        # TODO this is an interim solution.
        if self.rev:
            return self.rev[:12]

    def basename(self, name):
        return name.split('/')[-1]

    def checkout(self, rev=None):
        # None maps to the default revision.
        if rev is None:
            rev = 'HEAD'

        try:
            self.__commit = self.repo.revparse_single(rev)
        except KeyError:
            if rev == 'HEAD':
                # probably a new repo.
                self.__commit = None
                return
            raise RevisionNotFoundError('revision %s not found' % rev)
            # otherwise a RevisionNotFoundError should be raised.

    def files(self):
        def _files(tree, current_path=None):
            results = []
            for node in tree:
                if current_path:
                    name = '/'.join([current_path, node.name])
                else:
                    name = node.name

                obj = self.repo.get(node.oid)
                if isinstance(obj, Blob):
                    results.append(name)
                elif isinstance(obj, Tree):
                    results.extend(_files(obj, name))
            return results

        if not self._commit:
            return []
        results = _files(self._commit.tree)
        return results

    def file(self, path):
        return self._get_obj(path, Blob).data

    def listdir(self, path):
        if path:
            tree = self._get_obj(path, Tree)
        else:
            if self._commit is None:
                return []
            tree = self._commit.tree

        return [entry.name for entry in tree]

    def format(self, **kw):
        # XXX backwards compatibility??
        return kw

    def log(self, start, count, branch=None, shortlog=False):
        """
        start and branch are literally the same thing.
        """

        def _log(iterator):
            for pos, commit in iterator:
                if pos == count:
                    raise StopIteration
                yield {
                    'author': commit.committer.name,
                    'email': self._commit.committer.email,
                    'date': self.strftime(committer_dt(commit.committer)),
                    'node': commit.hex,
                    'rev': commit.hex,
                    'desc': commit.message
                }

        if start is None:
            # assumption.
            start = 'HEAD'
            try:
                self.repo.revparse_single(start)
            except KeyError:
                return []

        try:
            rev = self.repo.revparse_single(start).hex
        except KeyError:
            raise RevisionNotFoundError('revision %s not found' % start)

        iterator = enumerate(self.repo.walk(rev, GIT_SORT_TIME))

        return list(_log(iterator))

    def pathinfo(self, path):
        obj = self._get_obj(path)
        if isinstance(obj, Blob):
            return self.format(**{
                'type': 'file',
                'basename': self.basename(path),
                'size': obj.size,
                'date': self.strftime(committer_dt(self._commit.committer)),
            })
        elif isinstance(obj, dict):
            # special cases are represented as dict.
            if obj[''] == '_subrepo':
                return self.format(**{
                    'type': 'subrepo',
                    'date': '',
                    'size': 0,
                    'basename': self.basename(path),
                    # extra field.
                    'obj': obj,
                })

            elif obj[''] == '_empty_root':
                return self.format(**{
                    'type': 'folder',
                    'date': '',
                    'size': 0,
                    'basename': self.basename(path),
                })

        # Assume this is a Tree.
        return self.format(**{
            'basename': self.basename(path),
            'size': 0,
            'type': 'folder',
            'date': '',
        })

    def branches(self):
        return tuple(
            (b, self.repo.lookup_branch(b).target.hex)
            for b in self.repo.listall_branches()
        )

    def tags(self):
        return tuple(
            (b[10:], self.repo.lookup_reference(b).target.hex)
            for b in self.repo.listall_references()
            if b.startswith('refs/tags')
        )
예제 #37
0
class CollectGit(object):
    """
    Small Helper class for small repositories.
    This does not scale because we hold a lot of data in memory.
    """

    _regex_comment = re.compile(
        r"(//[^\"\n\r]*(?:\"[^\"\n\r]*\"[^\"\n\r]*)*[\r\n]|/\*([^*]|\*(?!/))*?\*/)(?=[^\"]*(?:\"[^\"]*\"[^\"]*)*$)"
    )
    _regex_jdoc_line = re.compile(r"(- |\+)\s*(\*|/\*).*")

    def __init__(self, path):
        if not path.endswith('.git'):
            if not path.endswith('/'):
                path += '/'
            path += '.git'
        self._log = logging.getLogger(self.__class__.__name__)
        self._path = path
        self._repo = Repository(self._path)
        self._hunks = {}

        self._file_actions = {}
        self._bugfix = {}
        self._msgs = {}
        self._days = {}
        self._cdays = {}
        self._branches = {}
        self._tags = {}

        self._dopts = GIT_DIFF_FIND_RENAMES | GIT_DIFF_FIND_COPIES
        self._SIMILARITY_THRESHOLD = 50
        self._graph = nx.DiGraph()

    @classmethod
    def clone_repo(cls, uri, local_path):
        project_name = uri.split('/')[-1].split('.git')[0]
        repo_path = local_path + '/' + project_name + '/'

        if os.path.isdir(repo_path):
            c = subprocess.run(['git', 'fetch'],
                               cwd=repo_path,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE)
            if c.returncode != 0:
                err = 'Error pulling repository {} to {}'.format(
                    uri, repo_path)
                raise Exception(err)
        else:
            os.mkdir(repo_path)
            c = subprocess.run(['git', 'clone', uri, repo_path],
                               cwd=repo_path,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE)
            if c.returncode != 0:
                err = 'Error cloning repository {} to {}'.format(
                    uri, repo_path)
                raise Exception(err)
        return repo_path

    def _changed_lines(self, hunk):
        added_lines = []
        deleted_lines = []

        del_line = hunk['old_start']
        add_line = hunk['new_start']

        for line in hunk['content'].split('\n'):

            tmp = line[1:].strip()
            # is_comment = tmp.startswith('//') or tmp.startswith('/*') or tmp.startswith('*')

            if line.startswith('+'):
                added_lines.append((add_line, tmp))
                del_line -= 1
            if line.startswith('-'):
                deleted_lines.append((del_line, tmp))
                add_line -= 1

            del_line += 1
            add_line += 1

        return added_lines, deleted_lines

    def _comment_only_change(self, content):
        content = content + '\n'  # required for regex to drop comments
        content = re.sub(self._regex_comment, "", content)
        removed = ''
        added = ''
        for line in content.split('\n'):
            line = re.sub(
                r"\s+", " ", line, flags=re.UNICODE
            )  # replace all kinds of whitespaces (also multiple) with sińgle whitespace
            if not re.match(self._regex_jdoc_line, line):
                if line.startswith('-'):
                    removed += line[1:].strip()
                elif line.startswith('+'):
                    added += line[1:].strip()
        return removed == added

    def _blame_lines(self,
                     revision_hash,
                     filepath,
                     strategy,
                     ignore_lines=False,
                     validated_bugfix_lines=False):
        """We want to find changed lines for one file in one commit (from the previous commit).

        For this we are iterating over the diff and counting the lines that are deleted (changed) from the original file.
        We ignore all added lines.

        ignore_lines is already specific to all changed hunks of the file for which blame_lines is called
        """
        c = self._repo.revparse_single('{}'.format(revision_hash))
        self._hunks[revision_hash] = self._get_hunks(c)

        changed_lines = []
        if revision_hash not in self._hunks.keys(
        ) or not self._hunks[revision_hash]:
            return changed_lines

        for h in self._hunks[revision_hash]:
            if h['new_file'] != filepath:
                continue

            # only whitespace or comment changes in the hunk, ignore
            if strategy == 'code_only' and self._comment_only_change(
                    h['content']):
                self._log.debug(
                    'detected whitepace or comment only change in {} for {}'.
                    format(revision_hash, filepath))
                continue

            added, deleted = self._changed_lines(h)
            for dt in deleted:
                if dt not in changed_lines and dt[1]:
                    if strategy == 'code_only' and dt[1].startswith(
                        ('//', '/*', '*')):
                        continue

                    # we may only want validated lines
                    if validated_bugfix_lines is not False:
                        if dt[0] not in validated_bugfix_lines:
                            continue

                    # we may ignore lines, e.g., refactorings
                    if ignore_lines:
                        ignore = False
                        for start_line, end_line in ignore_lines:
                            if start_line <= dt[0] <= end_line:
                                ignore = True
                                break

                        # if we hit the line in our ignore list we continue to the next
                        if ignore:
                            # self._log.warn('ignore line {} in file {} in commit {} because of refactoring detection'.format(dt[0], filepath, revision_hash))
                            continue

                    changed_lines.append(dt)

        return changed_lines

    def blame(self,
              revision_hash,
              filepath,
              strategy='code_only',
              ignore_lines=False,
              validated_bugfix_lines=False):
        """Collect a list of commits where the given revision and file were last changed.

        Uses git blame.

        :param str revision_hash: Commit for which we want to collect blame commits.
        :param str filepath: File for which we want to collect blame commits.
        :rtype: list
        :returns: A list of tuples of blame commits and the original file for the given parameters.
        """
        commits = []

        # - ignore if commit is not in graph
        if revision_hash not in self._graph:
            return []

        # # - ignore package-info.java
        # if strategy == 'code_only' and filepath.lower().endswith('package-info.java'):
        #     self._log.debug('skipping blame on revision: {} for file {} because it is package-info.java'.format(revision_hash, filepath))
        #     return []

        # # - ignore test/ /test/ example/ examples/
        # if strategy == 'code_only' and re.match(self._regex_test_example, filepath):
        #     self._log.debug('skipping blame on revision: {} for file {} because it is a test or an example'.format(revision_hash, filepath))
        #     return []

        # bail on multiple parents
        parents = list(self._graph.predecessors(revision_hash))
        if len(parents) > 1:
            self._log.debug(
                'skipping blame on revision: {} because it is a merge commit'.
                format(revision_hash))
            return []

        changed_lines = self._blame_lines(revision_hash, filepath, strategy,
                                          ignore_lines, validated_bugfix_lines)
        parent_commit = self._repo.revparse_single('{}^'.format(revision_hash))

        blame = self._repo.blame(filepath,
                                 flags=GIT_BLAME_TRACK_COPIES_SAME_FILE,
                                 newest_commit=parent_commit.hex)
        for lineno, line in changed_lines:
            # returns blamehunk for specific line
            try:
                bh = blame.for_line(lineno)
            except IndexError as e:
                # this happens when we have the wrong parent node
                bla = 'tried to get file: {}, line: {}, revision: {}, blame commit: {}'.format(
                    filepath, lineno, revision_hash, str(bh.orig_commit_id))
                self._log.error(bla)
                raise  # this is critical

            inducing_commit = self._repo.revparse_single(str(
                bh.orig_commit_id))

            # start = bh.orig_start_line_number
            # lines = bh.lines_in_hunk
            # final_start = bh.final_start_line_number
            # print(revision_hash, '->', inducing_commit.hex)
            # print('original: {}: {}'.format(lineno, line))
            # print('{},{}: {},{}'.format(start, lines, final_start, lines))

            # blame_lines = []
            # for hunk in self._hunks[inducing_commit.hex]:
            #     if hunk['new_file'] != bh.orig_path:
            #         continue
            #     ls = final_start
            #     for i, blame_line in enumerate(hunk['content'].split('\n')):
            #         if blame_line[1:].strip() and line[1:].strip() and blame_line[1:] == line[1:]:
            #             print('blame: {}:{}'.format(ls, blame_line))
            #         ls += 1
            commits.append((inducing_commit.hex, bh.orig_path))

        # make unique
        return list(set(commits))

    def commit_information(self, revision_hash):
        obj = self._repo.get(revision_hash)

        return {
            'author_name':
            obj.author.name,
            'author_email':
            obj.author.email,
            'committer_name':
            obj.committer.name,
            'committer_email':
            obj.committer.email,
            'committer_date_utc':
            datetime.fromtimestamp(obj.commit_time, tz=timezone.utc),
            'committer_date':
            obj.commit_time,
            'committer_date_offset':
            obj.commit_time_offset,
            'message':
            obj.message,
            'file_actions':
            self._file_actions[revision_hash]
        }

    def file_actions(self, revision_hash):
        return self._file_actions[revision_hash]

    def all_files(self, revision_hash):
        # 1. checkout repo
        self._checkout_revision(revision_hash)

        # 2. list files
        return self._list_files()

    def first_occurence(self, filename):
        # file rename tracking is not possible currently in libgit, see:
        # https://github.com/libgit2/libgit2/issues/3041

        # find first occurence of file with git cli

        # git log --follow --diff-filter=A --find-renames=40% foo.js
        path = self._path.replace('.git', '')
        c = subprocess.run([
            'git', 'log', '--all', '--pretty=tformat:"%H %ci"', '--follow',
            '--diff-filter=A', '--find-renames=80%', '--', filename
        ],
                           cwd=path,
                           stdout=subprocess.PIPE,
                           stderr=subprocess.PIPE)
        if c.returncode != 0:
            err = 'Error finding first occurrence of file: {}'.format(filename)
            self._log.error(err)
            self._log.error(c.stderr)
            raise Exception(err)

        full = c.stdout.decode('utf-8')
        try:
            first_line = full.split('\n')[-2]
        except IndexError:
            if not full:
                print('no git log for file {}'.format(filename))
            print(full)
            raise
        first_date = ' '.join(first_line.split(' ')[1:]).replace('"', '')
        dt = datetime.strptime(
            first_date, '%Y-%m-%d %H:%M:%S %z'
        )  # we can do this here because we control the input format, %z does not cover +01:00 just +100 (at least in 3.6)
        return dt

    def tags(self):
        regex = re.compile('^refs/tags')
        ret = []
        for tagref in filter(lambda r: regex.match(r),
                             self._repo.listall_references()):
            tag = self._repo.lookup_reference(tagref)
            target = self._repo.lookup_reference(tagref).peel()
            ret.append({
                'name': tag.name.replace('refs/tags/', ''),
                'revision_hash': target.id
            })
        return ret

    def _checkout_revision(self, revision):
        """Checkout via shell, we ignore stdout output."""
        path = self._path.replace('.git', '')
        c = subprocess.run(['git', 'checkout', '-q', '-f', revision],
                           cwd=path,
                           stdout=subprocess.PIPE)
        return c.returncode == 0

    def _list_files(self):
        """The slower list_files"""
        path = self._path.replace('.git', '')

        ret = []
        for root, dirs, files in os.walk(path):
            for file in files:
                filepath = os.path.join(root, file)
                relative_filepath = filepath.replace(path, '')
                ret.append(relative_filepath)
        return ret

    def _list_files2(self):
        """The faster list_files (relies on find command)"""
        path = self._path.replace('.git', '')
        lines = subprocess.check_output(['find', '.', '-iname', '*.java'],
                                        cwd=path)

        files = []
        for f in lines.decode('utf-8').split('\n'):
            if f.lower().endswith('.java'):
                files.append(f.replace('./', ''))

        return files

    def _get_hunks(self, commit):
        diffs = []
        hunks = []

        # for initial commit (or orphan commits) pygit2 needs some special attention
        initial = False
        if not commit.parents:
            initial = True
            diffs.append((None,
                          commit.tree.diff_to_tree(context_lines=0,
                                                   interhunk_lines=1)))

        # we may have multiple parents (merge commit)
        for parent in commit.parents:
            # we need all information from each parent because in a merge each parent may add different files
            tmp = self._repo.diff(parent,
                                  commit,
                                  context_lines=0,
                                  interhunk_lines=1)
            tmp.find_similar(self._dopts, self._SIMILARITY_THRESHOLD,
                             self._SIMILARITY_THRESHOLD)
            diffs.append((parent.hex, tmp))

        for parent, diff in diffs:
            checked_paths = set()
            for patch in diff:
                if patch.delta.new_file.path in checked_paths:
                    self._log.warn('already have {} in checked_paths'.format(
                        patch.delta.new_file.path))
                    continue
                mode = 'X'
                if patch.delta.status == 1:
                    mode = 'A'
                elif patch.delta.status == 2:
                    mode = 'D'
                elif patch.delta.status == 3:
                    mode = 'M'
                elif patch.delta.status == 4:
                    mode = 'R'
                elif patch.delta.status == 5:
                    mode = 'C'
                elif patch.delta.status == 6:
                    mode = 'I'
                elif patch.delta.status == 7:
                    mode = 'U'
                elif patch.delta.status == 8:
                    mode = 'T'

                # diff to tree gives D for inital commit otherwise
                if initial:
                    mode = 'A'

                # we may have hunks to add
                if patch.hunks and commit.hex not in self._hunks.keys():
                    self._hunks[commit.hex] = []

                # add hunks
                for hunk in patch.hunks:
                    # initial is special case
                    if initial:
                        content = ''.join(
                            ['+' + l.content for l in hunk.lines])
                        hunks.append({
                            'header': hunk.header,
                            'new_file': patch.delta.new_file.path,
                            'new_start': hunk.old_start,
                            'new_lines': hunk.old_lines,
                            'old_start': hunk.new_start,
                            'old_lines': hunk.new_lines,
                            'content': content
                        })
                    else:
                        content = ''.join(
                            [l.origin + l.content for l in hunk.lines])
                        hunks.append({
                            'header': hunk.header,
                            'new_file': patch.delta.new_file.path,
                            'new_start': hunk.new_start,
                            'new_lines': hunk.new_lines,
                            'old_start': hunk.old_start,
                            'old_lines': hunk.old_lines,
                            'content': content
                        })
        return hunks

    def _changed_files(self, commit):
        changed_files = []
        diffs = []

        # for initial commit (or orphan commits) pygit2 needs some special attention
        initial = False
        if not commit.parents:
            initial = True
            diffs.append((None,
                          commit.tree.diff_to_tree(context_lines=0,
                                                   interhunk_lines=1)))

        # we may have multiple parents (merge commit)
        for parent in commit.parents:
            # we need all information from each parent because in a merge each parent may add different files
            tmp = self._repo.diff(parent,
                                  commit,
                                  context_lines=0,
                                  interhunk_lines=1)
            tmp.find_similar(self._dopts, self._SIMILARITY_THRESHOLD,
                             self._SIMILARITY_THRESHOLD)
            diffs.append((parent.hex, tmp))

        for parent, diff in diffs:
            checked_paths = set()
            for patch in diff:
                if patch.delta.new_file.path in checked_paths:
                    self._log.warn('already have {} in checked_paths'.format(
                        patch.delta.new_file.path))
                    continue
                mode = 'X'
                if patch.delta.status == 1:
                    mode = 'A'
                elif patch.delta.status == 2:
                    mode = 'D'
                elif patch.delta.status == 3:
                    mode = 'M'
                elif patch.delta.status == 4:
                    mode = 'R'
                elif patch.delta.status == 5:
                    mode = 'C'
                elif patch.delta.status == 6:
                    mode = 'I'
                elif patch.delta.status == 7:
                    mode = 'U'
                elif patch.delta.status == 8:
                    mode = 'T'

                # diff to tree gives D for inital commit otherwise
                if initial:
                    mode = 'A'

                # we may have hunks to add
                if patch.hunks and commit.hex not in self._hunks.keys():
                    self._hunks[commit.hex] = []

                # add hunks
                for hunk in patch.hunks:
                    # initial is special case
                    if initial:
                        content = ''.join(
                            ['+' + l.content for l in hunk.lines])
                        self._hunks[commit.hex].append({
                            'header':
                            hunk.header,
                            'new_file':
                            patch.delta.new_file.path,
                            'new_start':
                            hunk.old_start,
                            'new_lines':
                            hunk.old_lines,
                            'old_start':
                            hunk.new_start,
                            'old_lines':
                            hunk.new_lines,
                            'content':
                            content
                        })
                    else:
                        content = ''.join(
                            [l.origin + l.content for l in hunk.lines])
                        self._hunks[commit.hex].append({
                            'header':
                            hunk.header,
                            'new_file':
                            patch.delta.new_file.path,
                            'new_start':
                            hunk.new_start,
                            'new_lines':
                            hunk.new_lines,
                            'old_start':
                            hunk.old_start,
                            'old_lines':
                            hunk.old_lines,
                            'content':
                            content
                        })

                # collect line stats
                if initial:
                    fa = {
                        'lines_added': patch.line_stats[2],
                        'lines_deleted': patch.line_stats[1],
                        'changeset_size': len(diff),
                        'parent': None
                    }
                else:
                    fa = {
                        'lines_added': patch.line_stats[1],
                        'lines_deleted': patch.line_stats[2],
                        'changeset_size': len(diff),
                        'parent': parent
                    }

                #if mode == 'R':
                #    print('R {} -> {}, sim: {}'.format(patch.delta.old_file.path, patch.delta.new_file.path, patch.delta.similarity))

                if mode in ['C', 'R']:
                    changed_file = [
                        mode, patch.delta.new_file.path,
                        patch.delta.old_file.path, fa
                    ]
                else:
                    changed_file = [mode, patch.delta.new_file.path, None, fa]

                checked_paths.add(patch.delta.new_file.path)
                changed_files.append(changed_file)
        return changed_files

    def collect(self):
        # list all branches
        for branch in list(self._repo.branches):
            self._collect_branch(branch)

        # list all tags
        for obj in self._repo:
            tag = self._repo[obj]
            if tag.type == GIT_OBJ_TAG:
                self._collect_branch(tag, is_tag=True)

        return self._graph

    def _collect_branch(self, branch, is_tag=False):
        if type(branch) == str:
            branch = self._repo.branches[branch]

        # add nodes to graph
        try:
            for c in self._repo.walk(branch.target):
                self._graph.add_node(c.hex)

                # branch stuff, used for traversing backwards for tags in svn->git conversions
                # if c.hex not in self._branches.keys():
                #     self._branches[c.hex] = []

                # what about tags which are also on branches?
                # if is_tag:
                #     self._tags[c.hex] = branch.name
                # else:
                #     self._branches[c.hex].append(branch.branch_name)

                # add msg
                # self._msgs[c.hex] = c.message

                # add days, we use this later for lookup
                # day = str(datetime.fromtimestamp(c.commit_time, tz=timezone.utc).date())
                # if day not in self._days.keys():
                #     self._days[day] = []
                # self._days[day].append(c.hex)

                # add for convenience for OntdekBaanBfs
                # self._cdays[c.hex] = day

                # add changed files per node
                # if c.hex not in self._file_actions.keys():
                #     self._file_actions[c.hex] = self._changed_files(c)

                # still too expensive
                # self._create_hunks(c)

            # add edges to graph
            for c in self._repo.walk(branch.target):
                for p in c.parents:
                    self._graph.add_edge(p.hex, c.hex)
        except ValueError as e:
            pass
예제 #38
0
    def get(self, request, *args, **kw):
        # Process any get params that you may need
        # If you don't need to process get params,
        # you can skip this part
        repo = self.kwargs['resource_id']
        try:
            specific_repo = repo_model.objects.get(id=repo)
        except:
            return Response({'detail': 'Repo not found'},
                            status=status.HTTP_404_NOT_FOUND)
        this_repo = Repository(specific_repo.get_repo_path())
        directory = ""
        if 'directories' in self.kwargs:
            directory = self.kwargs['directories']
        dir_path = path.join(specific_repo.get_repo_path(), directory)
        try:
            os.chdir(dir_path)
        except:
            # returning Response() and raise NotFound is the same
            # return Response(status=status.HTTP_404_NOT_FOUND)
            raise NotFound(detail="No such dir or file", code=404)

        index_tree = this_repo.index
        tuplet = []
        time2 = None
        user = request.user
        is_owner = False
        is_editor = False
        if specific_repo.owner == user:
            is_owner = True
        if user.is_superuser:
            is_owner = True
        for editor in specific_repo.editors.all():
            if editor.id == user.id:
                is_editor = True
        empty = False
        if this_repo.is_empty:
            empty = True
        try:
            commit = this_repo.revparse_single('HEAD')
            tree = commit.tree
            folders = []
            if directory != "":
                item = tree.__getitem__(str(directory))
                index_tree.read_tree(item.id)
                for entry in index_tree:
                    name = entry.path
                    filemode = index_tree[entry.path].mode
                    type = ""
                    if filemode is '33188':
                        type = "tree"
                        if name in folders:
                            continue
                        folders.append(name)
                    else:
                        type = "blob"
                    if "/" in entry.path:
                        name = entry.path.split("/")[0]
                        filemode = '100644'
                        type = "tree"
                        if name in folders:
                            continue
                        folders.append(name)

                    tuplet.append({
                        'name': name,
                        'id': entry.hex,
                        'type': type,
                        'filemode': filemode
                    })
            else:
                for entry in tree:
                    tuplet.append({
                        'name': entry.name,
                        'id': entry.id.hex,
                        'type': entry.type,
                        'filemode': entry.filemode
                    })
            date_handler = lambda obj: (  # noqa: E731
                obj.isoformat()
                if isinstance(obj,
                              (datetime.datetime, datetime.date)) else None)
            time2 = json.dumps(datetime.datetime.fromtimestamp(
                commit.commit_time),
                               default=date_handler)
            dir_hier = directory

            main_list = {
                'files': tuplet,
                'hex': commit.hex,
                'message': commit.message,
                'author': commit.author.name,
                'committer': commit.committer.name,
                'time': time2,
                'branches': list(this_repo.branches),
                'is_owner': is_owner,
                'is_empty': empty,
                'dir_hier': dir_hier,
                'is_editor': is_editor
            }

        except:
            # no files, no initial commit so no head hex
            main_list = {
                'files': tuplet,
                'hex': None,
                'message': None,
                'author': None,
                'committer': None,
                'time': None,
                'branches': [],
                'is_owner': is_owner,
                'is_empty': empty,
                'is_editor': is_editor,
                'dir_hier': None
            }

        return Response(main_list, status=status.HTTP_200_OK)
예제 #39
0
파일: git.py 프로젝트: cantsin/git-tracker
class GitMixin(object):

    tag_or_remote_regex = re.compile('^refs/(tags|remotes)/(.*)')

    def __init__(self):
        where = GitOperations.get_repository_location(self.user, self.name)
        self.ondisk = Repository(where)

    def refresh(self):
        creds = GitOperations.get_credentials(self.git_user, self.user)
        for remote in self.ondisk.remotes:
            remote.credentials = creds
            remote.fetch()
        # update current reference
        master_ref = self.ondisk.lookup_reference('refs/heads/master')
        remote_ref = self.ondisk.lookup_reference('refs/remotes/origin/master')
        master_ref.set_target(remote_ref.target)

    def filter_references(self, regex):
        return [ref for ref in self.ondisk.listall_references()
                if regex.match(ref)]

    def get_commit_time(self, name):
        ref = self.ondisk.revparse_single(name)
        if isinstance(ref, Tag):
            return ref.get_object().commit_time
        if isinstance(ref, Commit):
            return ref.commit_time
        raise GitException('invalid reference: commit time could not be found.') # pragma: no cover

    def get_latest_refs(self, count=None):
        info = self.filter_references(GitMixin.tag_or_remote_regex)
        refs = list(zip(info, map(self.get_commit_time, info)))
        refs.sort(key=itemgetter(1), reverse=True)
        def ref_info(info):
            (ref, commit_time) = info
            what, name = GitMixin.tag_or_remote_regex.findall(ref)[0]
            return (what, name, commit_time)
        refs = map(ref_info, refs)
        if not count:
            return refs
        return islice(refs, count)

    def filter_commits(self, flags=0):
        all_commits = self.ondisk.walk(self.ondisk.head.target, flags)
        emails = [ue.email for ue in self.user.emails.all()]
        return filter(lambda commit: commit.author.email in emails, all_commits)

    def get_commits(self, count=None):
        all_commits = self.filter_commits(GIT_SORT_TOPOLOGICAL)
        if not count:
            return all_commits
        return islice(all_commits, count)

    def get_commit_count(self):
        return len(list(self.filter_commits()))

    def get_shorthand_of_branch(self, branch):
        commit = self.ondisk.lookup_branch(branch)
        if commit:
            return commit.shorthand
        return '(none)'

    def get_sha1_of_branch(self, branch):
        commit = self.ondisk.lookup_branch(branch)
        if commit:
            return str(commit.get_object().id)[:6]
        return '(none)'

    def get_numstat(self, commit):
        diff = None
        try:
            previous_commit = self.ondisk.revparse_single(str(commit.id) + '^')
            diff = self.ondisk.diff(previous_commit, commit)
        except KeyError:
            # likely we hit the very first commit.
            diff = commit.tree.diff_to_tree(swap=True)
        additions, deletions = 0, 0
        for patch in diff:
            additions += patch.additions
            deletions += patch.deletions
        return (len(diff), additions, deletions)

    def get_first_updated(self):
        all_commits = self.ondisk.walk(self.ondisk.head.target,
                                       GIT_SORT_TIME | GIT_SORT_REVERSE)
        first_commit = next(all_commits)
        return first_commit.commit_time

    def get_last_updated(self):
        all_commits = self.ondisk.walk(self.ondisk.head.target,
                                       GIT_SORT_TIME)
        last_commit = next(all_commits)
        return last_commit.commit_time

    def get_file_count(self):
        diff = self.ondisk.head.get_object().tree.diff_to_tree()
        return len([patch.old_file_path for patch in diff])

    def get_line_count(self):
        diff = self.ondisk.head.get_object().tree.diff_to_tree()
        return sum([patch.deletions for patch in diff])

    def get_author_count(self):
        commits = self.filter_commits()
        return len(set([commit.author.email for commit in commits]))

    def commits_between(self, start, end):
        all_commits = self.filter_commits(GIT_SORT_TIME | GIT_SORT_REVERSE)
        starting = dropwhile(lambda obj: obj.commit_time < start, all_commits)
        return takewhile(lambda obj: obj.commit_time <= end, starting)

    @staticmethod
    def by_day(obj):
        # we want to group our commit times by the day. so convert
        # timestamp -> date -> timestamp
        new_date = date.fromtimestamp(obj.commit_time)
        new_date += timedelta(days=1)
        return timegm(new_date.timetuple())

    @staticmethod
    def group_by(series):
        result = groupby(series, GitMixin.by_day)
        return [{'date': commit_date,
                 'value': len(list(commits))}
                for commit_date, commits in result]

    def histogram(self, start, end):
        series = self.commits_between(start, end)
        return GitMixin.group_by(series)
예제 #40
0
def process(repo, history):
	# GET A REPO ON DISK
	base = Repository(repo)
	base.checkout('HEAD')

	file_xsmall = 0
	file_small = 0
	file_medium = 0
	file_large = 0
	file_xlarge = 0
		
	hunk_xsmall = 0
	hunk_small = 0
	hunk_medium = 0
	hunk_large = 0
	hunk_xlarge = 0

	line_xsmall = 0
	line_small = 0
	line_medium = 0
	line_large = 0
	line_xlarge = 0 
	
	i = 0
	while i < len(history) - 1:
		print '\rDiff#: ' + str(i + 1) + ' of ' + str(len(history)-1),

		t0 = base.revparse_single(history[i].hex)
		t1 = base.revparse_single(history[i+1].hex)
		
		try:
			diff = base.diff(t0,t1)
		except ValueError:
			print ''
			print 'Value Error'
			print ''
			i += 1
			continue
		
		files = [p for p in diff]
		
		if len(files) == 1:
			file_xsmall += 1
		if len(files) >= 2 and len(files) <= 4:
			file_small += 1
		if len(files) >= 5 and len(files) <= 7:
			file_medium += 1
		if len(files) >= 8 and len(files) <= 10:
			file_large += 1
		if len(files) >= 11:
			file_xlarge += 1
		
		hunksInCommit = 0
		linesInCommit = 0

		for modfile in files:
			hunksInCommit += len(modfile.hunks)
			for hunk in modfile.hunks:
				for line in hunk.lines:
					if line[0] == '-' or line[0] == '+':
						linesInCommit += 1


		if hunksInCommit <= 1:
			hunk_xsmall += 1
		if hunksInCommit >= 2 and hunksInCommit <= 8:
			hunk_small += 1
		if hunksInCommit >= 9 and hunksInCommit <= 17:
			hunk_medium += 1
		if hunksInCommit >= 18 and hunksInCommit <= 26:
			hunk_large += 1
		if hunksInCommit >= 27:
			hunk_xlarge += 1

		if linesInCommit <= 5:
			line_xsmall += 1
		if linesInCommit >= 6 and linesInCommit <= 46:
			line_small += 1
		if linesInCommit >= 47 and linesInCommit <= 106:
			line_medium += 1
		if linesInCommit >= 107 and linesInCommit <= 166:
			line_large += 1
		if linesInCommit >= 167:
			line_xlarge += 1

		i += 1
	print ''

	ts = time.time()
	st = datetime.datetime.fromtimestamp(ts).strftime('-%Y-%m-%d.%H.%M.%S')
	name = repo.replace('/.git', '') + st + '.txt'
	output = open(name,'w')

	output.write('--------- ' + repo + ' ----------' + '\n')
	output.write('Number of Lines Modified:' + '\n')
	output.write('x-small: ' + str( + line_xsmall) + '\n')
	output.write('small: ' + str(line_small) + '\n')
	output.write('medium: ' + str(line_medium) + '\n')
	output.write('large: ' + str(line_large) + '\n')
	output.write('x-large: ' + str(line_xlarge) + '\n')

	output.write('Number of Files Modified:' + '\n')
	output.write('x-small: ' + str(file_xsmall) + '\n')
	output.write('small: ' + str(file_small) + '\n')
	output.write('medium: ' + str(file_medium) + '\n')
	output.write('large: ' + str(file_large) + '\n')
	output.write('x-large: ' + str(file_xlarge) + '\n')

	output.write('Number of Hunks Per Commit' + '\n')
	output.write('x-small: ' + str(hunk_xsmall) + '\n')
	output.write('small: ' + str(hunk_small) + '\n')
	output.write('medium: ' + str(hunk_medium) + '\n')
	output.write('large: ' + str(hunk_large) + '\n')
	output.write('x-large: ' + str(hunk_xlarge) + '\n')

	output.close()