Ejemplo n.º 1
0
def diff_status():
    # get latest commits of repos
    git_folder = app.config['GIT_REPOS_FOLDER']
    rep = Repo(git_folder + '/terraform/')
    repowlkr = rep.get_walker(max_entries=1)
    lastfcommit = next(iter(repowlkr)).commit

    modules_repo = Repo(git_folder + '/terraform-cognite-modules')
    repowlkr_modules = modules_repo.get_walker(max_entries=1)
    lastfcommit_modules = next(iter(repowlkr_modules)).commit

    all_files = porcelain.ls_files(rep)
    state_map = {}
    diff_module_map = {}

    diff_utils.set_state_map(state_map, all_files, lastfcommit)
    count = 0
    
    # For now, we are comparing everything against Greenfield. We assume Greenfield is the most up to date project
    for folder_path, modules in state_map['cognitedata-greenfield'].items():
        for module, module_info in modules.items():
            for full_module_path, git_ref in module_info.items():
                dirfnm = full_module_path.encode('utf-8')
                repowlkr_subpath = modules_repo.get_walker(paths=[dirfnm])
                repowlkr = modules_repo.get_walker()

                iterator = iter(repowlkr)
                iterator_subpath = iter(repowlkr_subpath)

                all_commits = []
                subpath_commits = []

                diff_utils.create_all_commits_list(iterator, all_commits)
                diff_utils.create_subpath_commits_list(iterator_subpath, subpath_commits)

                greenfield_commit = diff_utils.get_commit_in_subpath(git_ref, all_commits, subpath_commits)
                for project in state_map:
                    if project != 'cognitedata-greenfield':
                        project_commit = None
                        if folder_path in state_map[project]:
                            if module in state_map[project][folder_path]:
                                project_commit = project + '_commit'
                                project_commit = diff_utils.get_commit_in_subpath(state_map[project][folder_path][module][full_module_path],
                                                                             all_commits, subpath_commits)

                            else:
                                continue

                        else:
                            continue

                        if project_commit is None:
                            continue

                        diff_utils.set_diff_module_map(project_commit, project, greenfield_commit, folder_path,
                                            module, full_module_path, subpath_commits, diff_module_map, count)
                        count += 1

    cache.set('diff_module_map', diff_module_map)
    return json.dumps(diff_module_map)
Ejemplo n.º 2
0
    def list_all_contributors (self):
        tmp = 1
        tot = len(self.repos)
        all_contribs = []
        for repo in self.repos:
            print >> sys.stderr, "[%d/%d Analyzing %s]" % (tmp, tot, repo)
            tmp += 1
            repo = Repo(repo)
            master = repo.get_refs()['refs/heads/master']
            for i in repo.get_walker ([master]):
                if "<" in i.commit.author:
                    split = i.commit.author.split("<")
                    author = split[0]
                    email = split[1]
                    author = author.strip ()
                    email = email.strip ()
                    email = email[:-1]
                else:
                    author = i.commit.author
                    email = ""

                all_contribs.append((author, email))
            del repo

        tmp = []
        for c in all_contribs:
            if c in tmp:
                continue
            tmp.append(c)
        return tmp
Ejemplo n.º 3
0
def get_first_parents(repo_path: str) -> List[str]:
    repo = Repo(repo_path)
    #: these are the commits that are parents of more than one other commit
    first_parents: List[str] = []
    on_merge = False

    for entry in repo.get_walker(order=dulwich.walk.ORDER_TOPO):
        commit = entry.commit
        # In order to properly work on python 2 and 3 we need some utf magic
        parents = commit.parents and [_to_str(i) for i in commit.parents]
        if not parents:
            if commit.sha().hexdigest() not in first_parents:
                first_parents.append(commit.sha().hexdigest())
        elif len(parents) == 1 and not on_merge:
            if commit.sha().hexdigest() not in first_parents:
                first_parents.append(commit.sha().hexdigest())
            if parents[0] not in first_parents:
                first_parents.append(parents[0])
        elif len(parents) > 1 and not on_merge:
            on_merge = True
            if commit.sha().hexdigest() not in first_parents:
                first_parents.append(commit.sha().hexdigest())
            if parents[0] not in first_parents:
                first_parents.append(parents[0])
        elif parents and commit.sha().hexdigest() in first_parents:
            if parents[0] not in first_parents:
                first_parents.append(parents[0])

    return first_parents
Ejemplo n.º 4
0
class Git():
    """
    object that holds the git repository
    """
    def __init__(self):
        self.repo_path = user_data_dir(appname, appauthor)
        self.files_under_version_controll = ['config.json', 'data.json']
        # initialize repo if it doesn't exist
        try:
            self.repo = Repo(self.repo_path)
        except NotGitRepository:
            # create repo
            if not os.path.exists(self.repo_path):
                try:
                    os.makedirs(self.repo_path)
                except OSError as exc:  # Guard against race condition
                    if exc.errno != errno.EEXIST:
                        raise
            Repo.init(self.repo_path)
            self.repo = Repo(self.repo_path)
            self.commit('initial commit')

    def commit(self, message):
        """
        commits the current status of files_under_version_controll
        :param message: str; commit message
        """
        self.repo.stage(self.files_under_version_controll)
        self.repo.do_commit(str.encode(message), str.encode('nextSongs'))

    def get_current_head(self):
        """
        get sha as bytes of current head
        :return: bytes; sha1 checksum of current head
        """
        return self.repo.head()

    def get_commits(self):
        """
        generates a list of last commits
        :return: list-of-dulwich.objects.Commit
        """
        commits = []
        for i in self.repo.get_walker():
            commits.append(i.commit)
        return reversed(
            sorted(
                commits,
                key=lambda x: datetime.datetime.fromtimestamp(x.author_time)))

    def restore(self, commit):
        """
        does a hard reset to a given commit
        :param commit: list-of-dulwich.objects.Commit; commit to reset to
        """
        porcelain.reset(self.repo, 'hard',
                        str.encode(commit.sha().hexdigest()))
        self.commit("Restored setting and data.")
        Config.read_config()
Ejemplo n.º 5
0
def get_commit(repo: Repo, path: Path) -> Tuple[float, str]:
    try:
        paths = [bytes(path)] if path.name else None
        walker = repo.get_walker(paths=paths, follow=True, reverse=True)
        commit = next(iter(walker)).commit
        return (commit.author_time, re.sub(" <.*", "", commit.author.decode()))
    except (KeyError, StopIteration):
        return (0.0, "")
Ejemplo n.º 6
0
def get_children_per_parent(repo_path: str) -> DefaultDict[str, Set[str]]:
    repo = Repo(repo_path)
    children_per_parent: DefaultDict[str, Set[str]] = defaultdict(set)

    for entry in repo.get_walker(order=dulwich.walk.ORDER_TOPO):
        for parent in entry.commit.parents:
            children_per_parent[_to_str(parent)].add(
                entry.commit.sha().hexdigest())

    return children_per_parent
Ejemplo n.º 7
0
def branch_has_change(branch, change, repo_path):
    repo = Repo(repo_path)
    if not branch.startswith('refs/heads/'):
        branch = 'refs/heads/' + branch
    branch = repo.refs[branch]
    msg = '\nChange-Id: ' + change
    matches = (True for parent in repo.get_walker(include=[branch])
               if msg in parent.commit.message)
    is_in = next(matches, False)
    return is_in
Ejemplo n.º 8
0
 def _find_date_boundaries (self):
     for repo in self.repos:
         repo = Repo(repo)
         master = repo.get_refs()['refs/heads/master']
         for i in repo.get_walker ([master]):
             if self.date_oldest == None or self.date_oldest > i.commit.commit_time:
                 self.date_oldest = i.commit.commit_time
             if self.date_newest == None or self.date_newest < i.commit.commit_time:
                 self.date_newest = i.commit.commit_time
             del i
         del repo
Ejemplo n.º 9
0
    def _git_commit_list(self):
        """
        Generate an in-order list of commits
        """
        _repo = Repo(self.config['top_dir'])

        commits = []
        for entry in _repo.get_walker(order=walk.ORDER_DATE):
            commits.append(entry.commit.id)

        return commits
Ejemplo n.º 10
0
    def _git_commit_list(self):
        """
        Generate an in-order list of commits
        """
        _repo = Repo(self.config['top_dir'])

        commits = []
        for entry in _repo.get_walker(order=walk.ORDER_DATE):
            commits.append(entry.commit.id)

        return commits
Ejemplo n.º 11
0
def branch_has_change(branch, change, repo_path):
    repo = Repo(repo_path)
    if not branch.startswith('refs/heads/'):
        branch = 'refs/heads/' + branch
    branch = repo.refs[branch]
    msg = '\nChange-Id: ' + change
    matches = (
        True for parent in repo.get_walker(include=[branch])
        if msg in parent.commit.message
    )
    is_in = next(matches, False)
    return is_in
Ejemplo n.º 12
0
    def analyze(self):

        # ToDo add support for older file versions
        for root, dirs, files in os.walk(self.path):
            for f in files:

                # maybe support other files in the future
                if not f.endswith('.py'):
                    continue

                cleaned_path = os.path.join(root, f).replace(self.path, '')
                self.job.meta['current_file'] = cleaned_path
                self.job.save()

                with open(os.path.join(root, f), 'r') as source_file:
                    self.run_tests(source_file.read(), cleaned_path)

        if self.previous:
            try:
                r = Repo(self.path)
            except:
                return

            for root, dirs, files in os.walk(self.path):
                for f in files:

                    if not f.endswith('.py'):
                        continue

                    cleaned_path = os.path.join(root, f).replace(self.path, '')
                    self.job.meta['current_file'] = cleaned_path
                    self.job.save()

                    walker = r.get_walker(paths=[cleaned_path[1:]])
                    commits = iter(walker)

                    first = True
                    for commit in commits:
                        if first:
                            first = False
                            continue
                        try:
                            source = self.get_file(r, r[commit.commit.id].tree,
                                                   cleaned_path[1:])
                            self.run_tests(source, cleaned_path, True,
                                           commit.commit.id)
                        except KeyError:
                            # known dulwich error
                            # @FixThis
                            pass
        shutil.rmtree(self.path)
Ejemplo n.º 13
0
def load_git(path, revision):
    # Thanks to Jelmer Vernooij for spelling this one out for me :-D
    repo = Repo(path)
    rev = revision.encode("ascii")
    for r in repo.get_walker():
        if r.commit.id.startswith(rev):
            rev = r.commit.id
            break
    menu = porcelain.get_object_by_path(repo, "menu", rev)
    all = {}
    for name, mode, object_id in menu.iteritems():
        text = str(repo[object_id].data, "utf-8")
        check_indents(text, name)
        all[name] = json.loads(text)

    return all
Ejemplo n.º 14
0
class ManifestWalker:
    """
    Walk all branches for a manifest repository and return key info
    and the contents of each commit; this walker moves forward in
    Git history
    """
    def __init__(self, manifest_dir, latest_sha):
        """Initialize the repository connection and encode latest SHAs"""

        self.repo = Repo(manifest_dir)
        self.latest_sha = [sha.encode('utf-8') for sha in latest_sha]

    def walk(self):
        """
        Find all branches and do a full walk from a given commit,
        history forward, returning key information and contents
        of each commit
        """

        branches = [
            self.repo.get_object(self.repo.refs[ref])
            for ref in self.repo.refs.keys() if ref.startswith(b'refs/remotes')
        ]

        walker = self.repo.get_walker(include=list(
            set([branch.id for branch in branches])),
                                      exclude=self.latest_sha,
                                      reverse=True)

        for entry in walker:
            changes = entry.changes()

            # Skip any commit that doesn't have exactly one change
            # (Zero is a merge commit, more than one is a multi-file
            # commit)
            if len(changes) != 1:
                continue

            change = changes[0]
            yield ((change.new.path, entry.commit),
                   self.repo.get_object(change.new.sha).as_pretty_string())
Ejemplo n.º 15
0
def commit_history(repo_key, branch):
    
    repo = Repo(settings.REPOS[repo_key])
    
    #get all the branches and set the name branch in a ref list (don't 
    #add the selected one, this will be added sepparetly in the template)
    references = []
    selected_branch = branch
    for ref, sha in repo.get_refs().iteritems():
        #get the name of the branch without the pefix
        if (LOCAL_BRANCH_PREFIX in ref):
            references.append(ref.replace(LOCAL_BRANCH_PREFIX, '', 1))
    
    #Get the branch walker
    walker = repo.get_walker(include = [repo.get_refs()[LOCAL_BRANCH_PREFIX+branch], ])
    
    
    #Start getting all the commits from the branch
    commits = []
    commits_per_day = []
    previous_commit_time = None
    
    #Group commits by day (I use list instead of a dict because the list is ordered already, so I don't need to sort the dict)
    for i in walker:
        
        commit = i.commit
        commit_time = filters.convert_unix_time_filter(commit.commit_time, '%d %b %Y')
        
        #if is new or like the previous one time, then add to the list, if not then save the list and create a new one
        if (previous_commit_time is None) or (commit_time == previous_commit_time):
            commits_per_day.append(commit)
        else:
            commits.append(commits_per_day)
            commits_per_day = [commit,]
        
        previous_commit_time = commit_time
        
    #Add last ones
    commits.append(commits_per_day)
    return render_template('commit-history.html', commits=commits, repo_key=repo_key, 
                            references = references, selected_branch=selected_branch)
Ejemplo n.º 16
0
    def build_stats_by_periods (self, periods, filter_fn=None):
        assert (len(periods) > 0)
        assert (reduce(lambda x,y: x and y, map(lambda x: isinstance(x,int), periods)))

        lower = self.date_oldest
        upper = self.date_newest

        periods.sort()
        periods = dict.fromkeys(periods, [])

        tmp = 1
        tot = len(self.repos)
        for repo in self.repos:
            print >> sys.stderr, "[%d/%d Analyzing %s]" % (tmp, tot, repo)
            tmp += 1
            repo = Repo(repo)
            master = repo.get_refs()['refs/heads/master']
            for i in repo.get_walker ([master]):
                keys = periods.keys()
                keys.sort()
                lower = keys[0]
                upper = keys[-1]
                if i.commit.commit_time < lower or i.commit.commit_time > upper:
                    continue
                if filter_fn != None and not filter_fn (i):
                    continue

                period = self._find_period (periods.keys(), i.commit.commit_time)
                author = i.commit.author.split("<")[0].strip()

                periods[period].append(author)
                del i
            del repo
    
        for period in periods.keys():
            periods[period] = self._plain_to_count(periods[period])

        return periods
Ejemplo n.º 17
0
def get_latest_commits(git_folder, module_state_map):
    """Gets latest commit in /terraform and sets module_state_map

    Args:
        git_folder: A string of the repo path
        module_state_map: An empty map

    Returns:
        A nested dictionary of all terraform modules being used by all projects,
        path name within that project and path to that specific module.
        See diff_utils.set_module_state_map for example
    """
    rep = Repo(git_folder + '/terraform/')
    repowlkr = rep.get_walker(max_entries=1)
    lastfcommit = next(iter(repowlkr)).commit
    all_files = porcelain.ls_files(rep)
    module_state_map = cache.get('module_state_map')
    if module_state_map is None:
        module_state_map = {}
        diff_utils.set_module_state_map(module_state_map, all_files,
                                        lastfcommit)
    cache.set('module_state_map', module_state_map)
    return module_state_map
Ejemplo n.º 18
0
class DiffWalker:
    """
    Handles determining which new commits occurred between two successive
    builds, taking into account possibly having no previous build
    """
    def __init__(self, repo_dir):
        """Initialize the repository connection"""

        # Making the assumption the repo is already checked out
        # at this location from previous steps
        self.repo = Repo(str(repo_dir.resolve()))

    def walk(self, old_shas, new_shas):
        """
        Walk through the set of commits between the sets of given SHAs
        to determine the new commits and return the list of the commits
        """

        try:
            walker = self.repo.get_walker(include=new_shas, exclude=old_shas)
        except dulwich.errors.MissingCommitError as exc:
            raise MissingCommitError(exc)

        return [entry.commit for entry in walker]
Ejemplo n.º 19
0
#!/usr/bin/python
# Example printing the last author of a specified file

import sys
import time
from dulwich.repo import Repo

if len(sys.argv) < 2:
    print "usage: %s filename" % (sys.argv[0],)
    sys.exit(1)

r = Repo(".")

w = r.get_walker(paths=[sys.argv[1]], max_entries=1)
try:
    c = iter(w).next().commit
except StopIteration:
    print "No file %s anywhere in history." % sys.argv[1]
else:
    print "%s was last changed at %s by %s (commit %s)" % (sys.argv[1], c.author, time.ctime(c.author_time), c.id)
Ejemplo n.º 20
0
"""Example printing the last author of a specified file."""

import sys

import time

from dulwich.repo import Repo

if len(sys.argv) < 2:
    print("usage: %s filename" % (sys.argv[0], ))
    sys.exit(1)

r = Repo(".")

path = sys.argv[1].encode('utf-8')

w = r.get_walker(paths=[path], max_entries=1)
try:
    c = next(iter(w)).commit
except StopIteration:
    print("No file %s anywhere in history." % sys.argv[1])
else:
    print("%s was last changed by %s at %s (commit %s)" %
          (sys.argv[1], c.author, time.ctime(c.author_time), c.id))
Ejemplo n.º 21
0
def display_diff():
    git_folder = app.config['GIT_REPOS_FOLDER']
    rep = Repo(git_folder + '/terraform/')
    repowlkr = rep.get_walker(max_entries=1)
    lastfcommit = next(iter(repowlkr)).commit

    modules_repo = Repo(git_folder + '/terraform-cognite-modules')
    repowlkr_modules = modules_repo.get_walker(max_entries=1)
    lastfcommit_modules = next(iter(repowlkr_modules)).commit

    all_files = porcelain.ls_files(rep)
    state_map = {}

    ret = cache.get('ret')
    if ret is None:
        ret = {}

    diff_utils.set_state_map(state_map, all_files, lastfcommit)
    ret["all_subpath_commits"] = {}

    for folder_path, modules in state_map['cognitedata-greenfield'].items():
        for module, module_info in modules.items():
            ret["all_subpath_commits"][module] = {}
            ret["all_subpath_commits"][module]["module_commits"] = []
            for full_module_path, git_ref in module_info.items():
                dirfnm = full_module_path.encode('utf-8')
                repowlkr_subpath = modules_repo.get_walker(paths=[dirfnm])
                repowlkr = modules_repo.get_walker()

                iterator = iter(repowlkr)
                iterator_subpath = iter(repowlkr_subpath)

                all_commits = []
                subpath_commits = []

                diff_utils.create_all_commits_list(iterator, all_commits)
                diff_utils.create_subpath_commits_list(iterator_subpath,
                                                       subpath_commits)

                for commit in subpath_commits:
                    parsed_commit = commit.as_pretty_string().decode().split(
                        '\n')
                    committer = parsed_commit[2]
                    description = parsed_commit[-2]
                    # sometimes them commit description has 2 new lines at the end
                    if len(description) == 0:
                        description = parsed_commit[-3]
                    sha_commit = commit.id.decode()
                    commit_info = {}
                    commit_info["committer"] = committer
                    commit_info["description"] = description
                    commit_info["sha_commit"] = sha_commit
                    ret["all_subpath_commits"][module][
                        "module_commits"].append(commit_info)
                greenfield_commit = diff_utils.get_commit_in_subpath(
                    git_ref, all_commits, subpath_commits)
                ret["all_subpath_commits"][module][
                    "cognitedata-greenfield"] = greenfield_commit.id.decode()

                for project in state_map:
                    if project != 'cognitedata-greenfield':
                        project_commit = None
                        if folder_path in state_map[project]:
                            if module in state_map[project][folder_path]:
                                project_commit = project + '_commit'
                                project_commit = diff_utils.get_commit_in_subpath(
                                    state_map[project][folder_path][module]
                                    [full_module_path], all_commits,
                                    subpath_commits)

                                if module in ret["all_subpath_commits"]:
                                    ret["all_subpath_commits"][module][
                                        project] = project_commit.id.decode()
                                if module not in ret:
                                    ret[module] = {}
                                ret[module][
                                    "greenfield"] = greenfield_commit.id.decode(
                                    )
                                ret[module][
                                    project] = project_commit.id.decode()
                            else:
                                continue

                        else:
                            continue

                        if project_commit is None:
                            # raise a flag if equinor's git ref is invalid
                            continue
    return jsonify(ret)
Ejemplo n.º 22
0
class Repo(object):
    """
    Wrapper around a libgit Repository that knows:

    * How to get all the files in the repository
    * How to get the oid of HEAD
    * How to get the commit times of the files we want commit times for

    It's written with speed in mind, given the constraints of making
    performant code in python!
    """
    def __init__(self, root_folder):
        self.git = Repository(root_folder)

    def all_files(self):
        """Return a set of all the files under git control"""
        return set([entry.decode() for entry, _ in self.git.open_index().items()])

    @property
    def first_commit(self):
        """Return the oid of HEAD"""
        return self.git.head().decode()

    def file_commit_times(self, use_files_paths, debug=False):
        """
        Traverse the commits in the repository, starting from HEAD until we have
        found the commit times for all the files we care about.

        Yield each file once, only when it is found to be changed in some commit.

        If self.debug is true, also output log.debug for the speed we are going
        through commits (output commits/second every 1000 commits and every
        100000 commits)
        """
        prefixes = PrefixTree()
        prefixes.fill(use_files_paths)

        for entry in self.git.get_walker():
            # Commit time taking into account the timezone
            commit_time = entry.commit.commit_time - entry.commit.commit_timezone

            # Get us the two different tree structures between parents and current
            cf_and_pf, changes = self.tree_structures_for(()
                , entry.commit.tree
                , [self.git.get_object(oid).tree for oid in entry.commit.parents]
                , prefixes
                )

            # Deep dive into any differences
            difference = []
            if changes:
                cfs_and_pfs = [(cf_and_pf, changes)]
                while cfs_and_pfs:
                    nxt, changes = cfs_and_pfs.pop(0)
                    for thing, changes, is_path in self.differences_between(nxt[0], nxt[1], changes, prefixes):
                        if is_path:
                            found = prefixes.remove(thing[:-1], thing[-1])
                            if found:
                                difference.append('/'.join(thing))
                        else:
                            cfs_and_pfs.append((thing, changes))

            # Only yield if there was a difference
            if difference:
                yield entry.commit.sha().hexdigest(), commit_time, difference

            # If nothing remains, then break!
            if not prefixes:
                break

    def entries_in_tree_oid(self, prefix, tree_oid):
        """Find the tree at this oid and return entries prefixed with ``prefix``"""
        try:
            tree = self.git.get_object(tree_oid)
        except KeyError:
            log.warning("Couldn't find object {0}".format(tree_oid))
            return empty
        else:
            return frozenset(self.entries_in_tree(prefix, tree))

    def entries_in_tree(self, prefix, tree):
        """
        Traverse the entries in this tree and yield (prefix, is_tree, oid)

        Where prefix is a tuple of the given prefix and the name of the entry.
        """
        for entry in tree.items():
            if prefix:
                new_prefix = prefix + (entry.path.decode(), )
            else:
                new_prefix = (entry.path.decode(), )

            yield (new_prefix, stat.S_ISDIR(entry.mode), entry.sha)

    def tree_structures_for(self, prefix, current_oid, parent_oids, prefixes):
        """
        Return the entries for this commit, the entries of the parent commits,
        and the difference between the two (current_files - parent_files)
        """
        if prefix and prefixes and prefix not in prefixes:
            return empty, empty

        parent_files = set()
        for oid in parent_oids:
            parent_files.update(self.entries_in_tree_oid(prefix, oid))

        current_files = self.entries_in_tree_oid(prefix, current_oid)
        return (current_files, parent_files), (current_files - parent_files)

    def differences_between(self, current_files, parent_files, changes, prefixes):
        """
        yield (thing, changes, is_path)

        If is_path is true, changes is None and thing is the path as a tuple.

        If is_path is false, thing is the current_files and parent_files for
        that changed treeentry and changes is the difference between current_files
        and parent_files.

        The code here is written to squeeze as much performance as possible out
        of this operation.
        """
        parent_oid = None

        if any(is_tree for _, is_tree, _ in changes):
            if len(changes) == 1:
                wanted_path = list(changes)[0][0]
                parent_oid = frozenset([oid for path, is_tree, oid in parent_files if path == wanted_path and is_tree])
            else:
                parent_values = defaultdict(set)
                parent_changes = parent_files - current_files
                for path, is_tree, oid in parent_changes:
                    if is_tree:
                        parent_values[path].add(oid)

        for path, is_tree, oid in changes:
            if is_tree and path not in prefixes:
                continue

            if not is_tree:
                yield path, None, True
            else:
                parent_oids = parent_oid if parent_oid is not None else parent_values.get(path, empty)
                cf_and_pf, changes = self.tree_structures_for(path, oid, parent_oids, prefixes)
                if changes:
                    yield cf_and_pf, changes, False
Ejemplo n.º 23
0
    def _get_changesets(alias, org_repo, org_rev, other_repo, other_rev):
        """
        Returns lists of changesets that can be merged from org_repo@org_rev
        to other_repo@other_rev
        ... and the other way
        ... and the ancestors that would be used for merge

        :param org_repo: repo object, that is most likely the original repo we forked from
        :param org_rev: the revision we want our compare to be made
        :param other_repo: repo object, most likely the fork of org_repo. It has
            all changesets that we need to obtain
        :param other_rev: revision we want out compare to be made on other_repo
        """
        ancestors = None
        if org_rev == other_rev:
            org_changesets = []
            other_changesets = []

        elif alias == 'hg':
            # case two independent repos
            if org_repo != other_repo:
                hgrepo = mercurial.unionrepo.makeunionrepository(
                    other_repo.baseui, safe_bytes(other_repo.path),
                    safe_bytes(org_repo.path))
                # all ancestors of other_rev will be in other_repo and
                # rev numbers from hgrepo can be used in other_repo - org_rev ancestors cannot

            # no remote compare do it on the same repository
            else:
                hgrepo = other_repo._repo

            ancestors = [
                ascii_str(hgrepo[ancestor].hex()) for ancestor in hgrepo.revs(
                    b"id(%s) & ::id(%s)", ascii_bytes(other_rev),
                    ascii_bytes(org_rev))
            ]
            if ancestors:
                log.debug("shortcut found: %s is already an ancestor of %s",
                          other_rev, org_rev)
            else:
                log.debug("no shortcut found: %s is not an ancestor of %s",
                          other_rev, org_rev)
                ancestors = [
                    ascii_str(hgrepo[ancestor].hex())
                    for ancestor in hgrepo.revs(b"heads(::id(%s) & ::id(%s))",
                                                ascii_bytes(org_rev),
                                                ascii_bytes(other_rev))
                ]  # FIXME: expensive!

            other_changesets = [
                other_repo.get_changeset(rev) for rev in hgrepo.revs(
                    b"ancestors(id(%s)) and not ancestors(id(%s)) and not id(%s)",
                    ascii_bytes(other_rev), ascii_bytes(org_rev),
                    ascii_bytes(org_rev))
            ]
            org_changesets = [
                org_repo.get_changeset(ascii_str(hgrepo[rev].hex()))
                for rev in hgrepo.revs(
                    b"ancestors(id(%s)) and not ancestors(id(%s)) and not id(%s)",
                    ascii_bytes(org_rev), ascii_bytes(other_rev),
                    ascii_bytes(other_rev))
            ]

        elif alias == 'git':
            if org_repo != other_repo:
                from dulwich.repo import Repo
                from dulwich.client import SubprocessGitClient

                gitrepo = Repo(org_repo.path)
                SubprocessGitClient(thin_packs=False).fetch(
                    other_repo.path, gitrepo)

                gitrepo_remote = Repo(other_repo.path)
                SubprocessGitClient(thin_packs=False).fetch(
                    org_repo.path, gitrepo_remote)

                revs = [
                    ascii_str(x.commit.id) for x in gitrepo_remote.get_walker(
                        include=[ascii_bytes(other_rev)],
                        exclude=[ascii_bytes(org_rev)])
                ]
                other_changesets = [
                    other_repo.get_changeset(rev) for rev in reversed(revs)
                ]
                if other_changesets:
                    ancestors = [other_changesets[0].parents[0].raw_id]
                else:
                    # no changesets from other repo, ancestor is the other_rev
                    ancestors = [other_rev]

                gitrepo.close()
                gitrepo_remote.close()

            else:
                so = org_repo.run_git_command([
                    'log', '--reverse', '--pretty=format:%H', '-s',
                    '%s..%s' % (org_rev, other_rev)
                ])
                other_changesets = [
                    org_repo.get_changeset(cs)
                    for cs in re.findall(r'[0-9a-fA-F]{40}', so)
                ]
                so = org_repo.run_git_command(
                    ['merge-base', org_rev, other_rev])
                ancestors = [re.findall(r'[0-9a-fA-F]{40}', so)[0]]
            org_changesets = []

        else:
            raise Exception('Bad alias only git and hg is allowed')

        return other_changesets, org_changesets, ancestors
Ejemplo n.º 24
0
class backend():
	def __init__(self):
		self.username = ""
		self.email = ""
		self.activity = ""
		self.repo_path = ""
		self.repo_name = ""
		self.isaclone = 0
		self.cloned_from = ""


	def set_authorinfo(self, username, email):
		self.username = username
		self.email = email


	def local_init(self, repo_name, activity):
		self.activity = activity
		self.repo_name = repo_name
		try:
			self.repo = p.init(repo_name)
			self.current_dir = os.getcwd()
			self.repo_path = self.current_dir + '/' + self.repo_name
			print self.repo_path
			print "Local Repo Created"
		except:
			print "Repo already exist, delete it first"

	def load_repo(self, repo_name):
		self.repo_name = repo_name
		self.repo = DulwichRepo(self.repo_name)
		self.current_dir = os.getcwd()
		self.repo_path = self.current_dir + '/' + self.repo_name


	def create_file(self, name, content):
		try:
			file = open(os.path.join(self.repo_path,name), 'w')
			file.write(content)
			file.close()
		except:
			print 'Unable to create README, does it already exist?'


	def edit_readme(self, name, content):
		file = open(os.path.join(self.repo_path,name), 'w')
		file.write(content)
		file.close()


	def add(self, a):
		#a can be list of files or a single file
		print self.repo_name
		print self.repo
		if type(a) == list:
			for i in a:
				p.add(self.repo, i)
		else:
			p.add(self.repo, a)


	def get_status(self):
		if os.path.exists(self.repo_path):
			print self.repo_path
			print p.status(self.repo_path)
		else:
			print "Repo does not exist"


	def commit(self, message):
		p.commit(self.repo, message)


	def get_commit_history(self):
		print self.repo_path
		r = self.repo
		f = "README"
		w = r.get_walker(paths=[f], max_entries=None)
		count = 0
		for i in iter(w):
			count += 1
			print count,
			print i
			print i.commit


	def clone_local(self,clone_repo_name):
		#Creating a clone of a given repo. The repo should be local.
		p.clone(self.repo_path,clone_repo_name)


	def clone_remote(remote_repo_name, clone_repo_name):
		#Creating a clone of remote repo.
		p.clone(remote_repo_name, clone_repo_name)


	def commit_logs(self):
		try:
			if os.path.exists(self.repo_path):
				print p.log(self.repo)
			else:
				print "Repo does not exist"
		except:
			print "No commits yet"


	"""
	#Some issues - have to be rectified asap

	def revert_to_commit(self):
		print self.repo_path
		r = self.repo
		f = "README"
		w = r.get_walker(paths=[], max_entries=None)
		count = 0
		for i in iter(w):
			count += 1
			print count,
			print type(i)
			print i
			print i.commit.id
			a = i.commit.id
			#a = a[0:8]
			#print i.commit.get_sha_for()
		print a
		p.reset(self.repo, "hard", a)
	"""


	def get_diff(self):
		#p.diff_tree(self.repo,)
		f = "README"
		tree_list = []
		w = self.repo.get_walker(paths=[f], max_entries=None)
		for i in iter(w):
			tree_list.append(i.commit.tree)
			print i.commit.tree
		print len(tree_list)

		p.diff_tree(self.repo, tree_list[0], tree_list[3])


	def update_local(self):
		if self.isaclone == 1:
			try:
				p.pull(self.repo, self.cloned_from)
			except:
				print "Error"

		else:
			print "Can not update"
Ejemplo n.º 25
0
#!/usr/bin/python
# Example printing the last author of a specified file

import sys
import time
from dulwich.repo import Repo

if len(sys.argv) < 2:
    print("usage: %s filename" % (sys.argv[0], ))
    sys.exit(1)

r = Repo(".")

w = r.get_walker(paths=[sys.argv[1]], max_entries=1)
try:
    c = next(iter(w)).commit
except StopIteration:
    print("No file %s anywhere in history." % sys.argv[1])
else:
    print("%s was last changed by %s at %s (commit %s)" %
          (sys.argv[1], c.author, time.ctime(c.author_time), c.id))
Ejemplo n.º 26
0
#!/usr/bin/python
# Example printing the last author of a specified file

import sys
import time
from dulwich.repo import Repo

if len(sys.argv) < 2:
    print("usage: %s filename" % (sys.argv[0], ))
    sys.exit(1)

r = Repo(".")

path = sys.argv[1].encode('utf-8')

w = r.get_walker(paths=[path], max_entries=1)
try:
    c = next(iter(w)).commit
except StopIteration:
    print("No file %s anywhere in history." % sys.argv[1])
else:
    print("%s was last changed by %s at %s (commit %s)" % (
        sys.argv[1], c.author, time.ctime(c.author_time), c.id))
Ejemplo n.º 27
0
    def _get_changesets(alias, org_repo, org_rev, other_repo, other_rev):
        """
        Returns lists of changesets that can be merged from org_repo@org_rev
        to other_repo@other_rev
        ... and the other way
        ... and the ancestor that would be used for merge

        :param org_repo: repo object, that is most likely the original repo we forked from
        :param org_rev: the revision we want our compare to be made
        :param other_repo: repo object, most likely the fork of org_repo. It has
            all changesets that we need to obtain
        :param other_rev: revision we want out compare to be made on other_repo
        """
        ancestor = None
        if org_rev == other_rev:
            org_changesets = []
            other_changesets = []
            ancestor = org_rev

        elif alias == 'hg':
            #case two independent repos
            if org_repo != other_repo:
                hgrepo = unionrepo.unionrepository(other_repo.baseui,
                                                   other_repo.path,
                                                   org_repo.path)
                # all ancestors of other_rev will be in other_repo and
                # rev numbers from hgrepo can be used in other_repo - org_rev ancestors cannot

            #no remote compare do it on the same repository
            else:
                hgrepo = other_repo._repo

            if org_repo.EMPTY_CHANGESET in (org_rev, other_rev):
                # work around unexpected behaviour in Mercurial < 3.4
                ancestor = org_repo.EMPTY_CHANGESET
            else:
                ancestors = hgrepo.revs("ancestor(id(%s), id(%s))", org_rev, other_rev)
                if ancestors:
                    # FIXME: picks arbitrary ancestor - but there is usually only one
                    try:
                        ancestor = hgrepo[ancestors.first()].hex()
                    except AttributeError:
                        # removed in hg 3.2
                        ancestor = hgrepo[ancestors[0]].hex()

            other_revs = hgrepo.revs("ancestors(id(%s)) and not ancestors(id(%s)) and not id(%s)",
                                     other_rev, org_rev, org_rev)
            other_changesets = [other_repo.get_changeset(rev) for rev in other_revs]
            org_revs = hgrepo.revs("ancestors(id(%s)) and not ancestors(id(%s)) and not id(%s)",
                                   org_rev, other_rev, other_rev)

            org_changesets = [org_repo.get_changeset(hgrepo[rev].hex()) for rev in org_revs]

        elif alias == 'git':
            if org_repo != other_repo:
                from dulwich.repo import Repo
                from dulwich.client import SubprocessGitClient

                gitrepo = Repo(org_repo.path)
                SubprocessGitClient(thin_packs=False).fetch(safe_str(other_repo.path), gitrepo)

                gitrepo_remote = Repo(other_repo.path)
                SubprocessGitClient(thin_packs=False).fetch(safe_str(org_repo.path), gitrepo_remote)

                revs = []
                for x in gitrepo_remote.get_walker(include=[other_rev],
                                                   exclude=[org_rev]):
                    revs.append(x.commit.id)

                other_changesets = [other_repo.get_changeset(rev) for rev in reversed(revs)]
                if other_changesets:
                    ancestor = other_changesets[0].parents[0].raw_id
                else:
                    # no changesets from other repo, ancestor is the other_rev
                    ancestor = other_rev

            else:
                so, se = org_repo.run_git_command(
                    ['log', '--reverse', '--pretty=format:%H',
                     '-s', '%s..%s' % (org_rev, other_rev)]
                )
                other_changesets = [org_repo.get_changeset(cs)
                              for cs in re.findall(r'[0-9a-fA-F]{40}', so)]
                so, se = org_repo.run_git_command(
                    ['merge-base', org_rev, other_rev]
                )
                ancestor = re.findall(r'[0-9a-fA-F]{40}', so)[0]
            org_changesets = []

        else:
            raise Exception('Bad alias only git and hg is allowed')

        return other_changesets, org_changesets, ancestor
Ejemplo n.º 28
0
class backend():
	def __init__(self):
		self.username = ""
		self.email = ""
		self.activity = ""
		self.repo_path = ""
		self.repo_name = ""
		self.isaclone = 0
		self.cloned_from = "ssh://[email protected]/vikramahuja1001/TurtleCodes"
		self.current_file_name = ""


	def set_authorinfo(self, username, email):
		self.username = username
		self.email = email


	def local_init(self, repo_name, activity):
		self.activity = activity
		self.repo_name = repo_name
		try:
			self.repo = p.init(repo_name)
			self.current_dir = os.getcwd()
			self.repo_path = self.current_dir + '/' + self.repo_name
			print self.repo_path
			print "Local Repo Created"
		except:
			print "Repo already exist, delete it first"

	def load_repo(self, repo_name):
		self.repo_name = repo_name
		self.repo = DulwichRepo(self.repo_name)
		self.current_dir = os.getcwd()
		self.repo_path = self.current_dir + '/' + self.repo_name


	def create_file(self, name, content):
		try:
			file = open(os.path.join(self.repo_path,name), 'w')
			file.write(content)
			file.close()
		except:
			print 'Unable to create README, does it already exist?'


	def edit_file(self, name, content):
		file = open(os.path.join(self.repo_path,name), 'w')
		file.write(content)
		file.close()


	def add(self, a):
		#a can be list of files or a single file
		print self.repo_name
		print self.repo
		if type(a) == list:
			for i in a:
				p.add(self.repo, i)
		else:
			p.add(self.repo, a)


	def get_status(self):
		if os.path.exists(self.repo_path):
			print self.repo_path
			return p.status(self.repo_path)
		else:
			print "Repo does not exist"


	def commit(self, message):
		p.commit(self.repo, message)


	def get_commit_history(self, f):
		print self.repo_path
		r = self.repo
		w = r.get_walker(paths=[f], max_entries=None)
		count = 0
		a = []
		for i in iter(w):
			count += 1
			a.append(i.commit)
			print type(i)
			print type(i.commit)
			a.append('\n')
		return a

	def get_commit_id_and_message(self, f):
		print self.repo_path
		r = self.repo
		w = r.get_walker(paths=[f], max_entries=None)
		count = 0
		a = []
		for i in iter(w):
			count += 1
			lin = ''
			lin = i.commit.id + '_' + i.commit.message 
			a.append(lin)

			print type(i)
			print type(i.commit)
			a.append('\n')
		return a


	def clone_local(self,clone_repo_name):
		#Creating a clone of a given repo. The repo should be local.
		p.clone(self.repo_path,clone_repo_name)


	def clone_remote(self, clone_repo_name):
		#Creating a clone of remote repo.
		self.isaclone = 1
		#Creating a clone of remote repo.
		p.clone(self.cloned_from, clone_repo_name)


	def commit_logs(self):
		try:
			if os.path.exists(self.repo_path):
				a = p.log(self.repo)
				return a
			else:
				return "Repo does not exist"
		except:
			return "No commits yet"

	def set_current_file_name(self, f):
		self.current_file_name = f


	"""
	#Some issues - have to be rectified asap

	def revert_to_commit(self):
		print self.repo_path
		r = self.repo
		f = "README"
		w = r.get_walker(paths=[], max_entries=None)
		count = 0
		for i in iter(w):
			count += 1
			print count,
			print type(i)
			print i
			print i.commit.id
			a = i.commit.id
			#a = a[0:8]
			#print i.commit.get_sha_for()
		print a
		p.reset(self.repo, "hard", a)
	"""


	def get_diff(self):
		#p.diff_tree(self.repo,)
		f = self.current_file_name
		tree_list = []
		w = self.repo.get_walker(paths=[f], max_entries=None)
		a = []
		for i in iter(w):
			tree_list.append(i.commit.tree)
			#print i.commit.tree
			a.append(i.commit.tree)
		#print len(tree_list)
		for i in range(len(tree_list) - 1):
			#print "Diff between commits"
			a.append(p.diff_tree(self.repo, tree_list[i], tree_list[i+1]))
			print 
		return a


	def update_local(self):
		if self.isaclone == 1:
			try:
				p.pull(self.repo, self.cloned_from)
			except:
				print "Error"

		else:
			print "Can not update"


	def push(self):
		try:
			refs_path = b"refs/heads/master"
			new_id = self.repo[b'HEAD'].id
			#self.assertNotEqual(new_id, ZERO_SHA)
			self.repo.refs[refs_path] = new_id
			p.push( self.repo.path,self.cloned_from, b"HEAD:" + refs_path)
		except Exception as e:
			print e
			print "Error"
Ejemplo n.º 29
0
    def _get_changesets(alias, org_repo, org_rev, other_repo, other_rev):
        """
        Returns lists of changesets that can be merged from org_repo@org_rev
        to other_repo@other_rev
        ... and the other way
        ... and the ancestor that would be used for merge

        :param org_repo: repo object, that is most likely the original repo we forked from
        :param org_rev: the revision we want our compare to be made
        :param other_repo: repo object, most likely the fork of org_repo. It has
            all changesets that we need to obtain
        :param other_rev: revision we want out compare to be made on other_repo
        """
        ancestor = None
        if org_rev == other_rev or org_repo.EMPTY_CHANGESET in (org_rev,
                                                                other_rev):
            org_changesets = []
            other_changesets = []
            ancestor = org_rev

        elif alias == 'hg':
            #case two independent repos
            if org_repo != other_repo:
                hgrepo = unionrepo.unionrepository(other_repo.baseui,
                                                   other_repo.path,
                                                   org_repo.path)
                # all ancestors of other_rev will be in other_repo and
                # rev numbers from hgrepo can be used in other_repo - org_rev ancestors cannot

            #no remote compare do it on the same repository
            else:
                hgrepo = other_repo._repo

            ancestors = hgrepo.revs("ancestor(id(%s), id(%s))", org_rev,
                                    other_rev)
            if ancestors:
                # FIXME: picks arbitrary ancestor - but there is usually only one
                try:
                    ancestor = hgrepo[ancestors.first()].hex()
                except AttributeError:
                    # removed in hg 3.2
                    ancestor = hgrepo[ancestors[0]].hex()

            other_revs = hgrepo.revs(
                "ancestors(id(%s)) and not ancestors(id(%s)) and not id(%s)",
                other_rev, org_rev, org_rev)
            other_changesets = [
                other_repo.get_changeset(rev) for rev in other_revs
            ]
            org_revs = hgrepo.revs(
                "ancestors(id(%s)) and not ancestors(id(%s)) and not id(%s)",
                org_rev, other_rev, other_rev)

            org_changesets = [
                org_repo.get_changeset(hgrepo[rev].hex()) for rev in org_revs
            ]

        elif alias == 'git':
            if org_repo != other_repo:
                from dulwich.repo import Repo
                from dulwich.client import SubprocessGitClient

                gitrepo = Repo(org_repo.path)
                SubprocessGitClient(thin_packs=False).fetch(
                    other_repo.path, gitrepo)

                gitrepo_remote = Repo(other_repo.path)
                SubprocessGitClient(thin_packs=False).fetch(
                    org_repo.path, gitrepo_remote)

                revs = []
                for x in gitrepo_remote.get_walker(include=[other_rev],
                                                   exclude=[org_rev]):
                    revs.append(x.commit.id)

                other_changesets = [
                    other_repo.get_changeset(rev) for rev in reversed(revs)
                ]
                if other_changesets:
                    ancestor = other_changesets[0].parents[0].raw_id
                else:
                    # no changesets from other repo, ancestor is the other_rev
                    ancestor = other_rev

            else:
                so, se = org_repo.run_git_command(
                    'log --reverse --pretty="format: %%H" -s %s..%s' %
                    (org_rev, other_rev))
                other_changesets = [
                    org_repo.get_changeset(cs)
                    for cs in re.findall(r'[0-9a-fA-F]{40}', so)
                ]
                so, se = org_repo.run_git_command('merge-base %s %s' %
                                                  (org_rev, other_rev))
                ancestor = re.findall(r'[0-9a-fA-F]{40}', so)[0]
            org_changesets = []

        else:
            raise Exception('Bad alias only git and hg is allowed')

        return other_changesets, org_changesets, ancestor
Ejemplo n.º 30
0
class backend():
    def __init__(self):
        self.username = ""
        self.email = ""
        self.activity = ""
        self.repo_path = ""
        self.repo_name = ""
        self.isaclone = 0
        self.cloned_from = ""

    def set_authorinfo(self, username, email):
        self.username = username
        self.email = email

    def local_init(self, repo_name, activity):
        self.activity = activity
        self.repo_name = repo_name
        try:
            self.repo = p.init(repo_name)
            self.current_dir = os.getcwd()
            self.repo_path = self.current_dir + '/' + self.repo_name
            print self.repo_path
            print "Local Repo Created"
        except:
            print "Repo already exist, delete it first"

    def load_repo(self, repo_name):
        self.repo_name = repo_name
        self.repo = DulwichRepo(self.repo_name)
        self.current_dir = os.getcwd()
        self.repo_path = self.current_dir + '/' + self.repo_name

    def create_file(self, name, content):
        try:
            file = open(os.path.join(self.repo_path, name), 'w')
            file.write(content)
            file.close()
        except:
            print 'Unable to create README, does it already exist?'

    def edit_readme(self, name, content):
        file = open(os.path.join(self.repo_path, name), 'w')
        file.write(content)
        file.close()

    def add(self, a):
        #a can be list of files or a single file
        print self.repo_name
        print self.repo
        if type(a) == list:
            for i in a:
                p.add(self.repo, i)
        else:
            p.add(self.repo, a)

    def get_status(self):
        if os.path.exists(self.repo_path):
            print self.repo_path
            print p.status(self.repo_path)
        else:
            print "Repo does not exist"

    def commit(self, message):
        p.commit(self.repo, message)

    def get_commit_history(self):
        print self.repo_path
        r = self.repo
        f = "README"
        w = r.get_walker(paths=[f], max_entries=None)
        count = 0
        for i in iter(w):
            count += 1
            print count,
            print i
            print i.commit

    def clone_local(self, clone_repo_name):
        #Creating a clone of a given repo. The repo should be local.
        p.clone(self.repo_path, clone_repo_name)

    def clone_remote(remote_repo_name, clone_repo_name):
        #Creating a clone of remote repo.
        p.clone(remote_repo_name, clone_repo_name)

    def commit_logs(self):
        try:
            if os.path.exists(self.repo_path):
                print p.log(self.repo)
            else:
                print "Repo does not exist"
        except:
            print "No commits yet"

    """
	#Some issues - have to be rectified asap

	def revert_to_commit(self):
		print self.repo_path
		r = self.repo
		f = "README"
		w = r.get_walker(paths=[], max_entries=None)
		count = 0
		for i in iter(w):
			count += 1
			print count,
			print type(i)
			print i
			print i.commit.id
			a = i.commit.id
			#a = a[0:8]
			#print i.commit.get_sha_for()
		print a
		p.reset(self.repo, "hard", a)
	"""

    def get_diff(self):
        #p.diff_tree(self.repo,)
        f = "README"
        tree_list = []
        w = self.repo.get_walker(paths=[f], max_entries=None)
        for i in iter(w):
            tree_list.append(i.commit.tree)
            print i.commit.tree
        print len(tree_list)

        p.diff_tree(self.repo, tree_list[0], tree_list[3])

    def update_local(self):
        if self.isaclone == 1:
            try:
                p.pull(self.repo, self.cloned_from)
            except:
                print "Error"

        else:
            print "Can not update"
Ejemplo n.º 31
0
    def find_git_mtimes(self, context, silent_build):
        """
        Use git to find the mtimes of the files we care about
        """
        if not context.use_git_timestamps:
            return {}

        parent_dir = context.parent_dir
        root_folder = context.git_root

        # Can't use git timestamps if it's just a shallow clone
        # Otherwise all the files get the timestamp of the latest commit
        if context.use_git_timestamps and os.path.exists(
                os.path.join(root_folder, ".git", "shallow")):
            raise HarpoonError("Can't get git timestamps from a shallow clone",
                               directory=parent_dir)

        git = Repo(root_folder)
        mtimes = {}
        all_files = set(git.open_index())

        use_files = set()
        for filename in all_files:
            relpath = os.path.relpath(os.path.join(root_folder, filename),
                                      context.parent_dir)

            # Only include files under the parent_dir
            if relpath.startswith("../"):
                continue

            # Ignore files that we don't want git_timestamps from
            if context.use_git_timestamps and type(
                    context.use_git_timestamps) is not bool:
                match = False
                for line in context.use_git_timestamps:
                    if fnmatch.fnmatch(relpath, line):
                        match = True
                        break
                if not match:
                    continue

            # Matched is true by default if
            # * Have context.exclude
            # * No context.exclude and no context.include
            matched = context.exclude or not any(
                [context.exclude, context.include])

            # Anything not matching exclude gets included
            if context.exclude:
                for line in context.exclude:
                    if fnmatch.fnmatch(relpath, line):
                        matched = False

            # Anything matching include gets included
            if context.include:
                for line in context.include:
                    if fnmatch.fnmatch(relpath, line):
                        matched = True
                        break

            # Either didn't match any exclude or matched an include
            if matched:
                use_files.add(filename)

        if not silent_build:
            log.info(
                "Finding modified times for %s/%s git controlled files in %s",
                len(use_files), len(all_files), root_folder)
        for entry in git.get_walker(paths=use_files):
            date = entry.commit.author_time
            for changes in entry.changes():
                if type(changes) is not list:
                    changes = [changes]
                for change in changes:
                    path = change.new.path
                    if root_folder and change.new.path and context.parent_dir:
                        new_relpath = os.path.relpath(
                            os.path.join(root_folder, change.new.path),
                            context.parent_dir)
                        if path in use_files and mtimes.get(
                                new_relpath, 0
                        ) < date and not new_relpath.startswith("../"):
                            mtimes[new_relpath] = date

            if len(use_files - set(mtimes)) == 0:
                break

        return mtimes
Ejemplo n.º 32
0
class Gittle(object):
    """All paths used in Gittle external methods must be paths relative to the git repository
    """
    DEFAULT_COMMIT = 'HEAD'
    DEFAULT_BRANCH = 'master'
    DEFAULT_REMOTE = 'origin'
    DEFAULT_MESSAGE = '**No Message**'
    DEFAULT_USER_INFO = {
        'name': None,
        'email': None,
    }

    DIFF_FUNCTIONS = {
        'classic': utils.git.classic_tree_diff,
        'dict': utils.git.dict_tree_diff,
        'changes': utils.git.dict_tree_diff
    }
    DEFAULT_DIFF_TYPE = 'dict'

    HIDDEN_REGEXES = [
        # Hide git directory
        r'.*\/\.git\/.*',
    ]

    # References
    REFS_BRANCHES = 'refs/heads/'
    REFS_REMOTES = 'refs/remotes/'
    REFS_TAGS = 'refs/tags/'

    # Name pattern truths
    # Used for detecting if files are :
    # - deleted
    # - added
    # - changed
    PATTERN_ADDED = (False, True)
    PATTERN_REMOVED = (True, False)
    PATTERN_MODIFIED = (True, True)

    # Permissions
    MODE_DIRECTORY = 0o40000  # Used to tell if a tree entry is a directory

    # Tree depth
    MAX_TREE_DEPTH = 1000

    # Acceptable Root paths
    ROOT_PATHS = (os.path.curdir, os.path.sep)

    def __init__(self, repo_or_path, origin_uri=None, auth=None, report_activity=None, *args, **kwargs):
        if isinstance(repo_or_path, DulwichRepo):
            self.repo = repo_or_path
        elif isinstance(repo_or_path, Gittle):
            self.repo = DulwichRepo(repo_or_path.path)
        elif isinstance(repo_or_path, basestring):
            path = os.path.abspath(repo_or_path)
            self.repo = DulwichRepo(path)
        else:
            logging.warning('Repo is of type %s' % type(repo_or_path))
            raise Exception('Gittle must be initialized with either a dulwich repository or a string to the path')

        # Set path
        self.path = self.repo.path

        # The remote url
        self.origin_uri = origin_uri

        # Report client activty
        self._report_activity = report_activity

        # Build ignore filter
        self.hidden_regexes = copy.copy(self.HIDDEN_REGEXES)
        self.hidden_regexes.extend(self._get_ignore_regexes())
        self.ignore_filter = utils.paths.path_filter_regex(self.hidden_regexes)
        self.filters = [
            self.ignore_filter,
        ]

        # Get authenticator
        if auth:
            self.authenticator = auth
        else:
            self.auth(*args, **kwargs)

    def report_activity(self, *args, **kwargs):
        if not self._report_activity:
            return
        return self._report_activity(*args, **kwargs)

    def _format_author(self, name, email):
        return "%s <%s>" % (name, email)

    def _format_userinfo(self, userinfo):
        name = userinfo.get('name')
        email = userinfo.get('email')
        if name and email:
            return self._format_author(name, email)
        return None

    def _format_ref(self, base, extra):
        return ''.join([base, extra])

    def _format_ref_branch(self, branch_name):
        return self._format_ref(self.REFS_BRANCHES, branch_name)

    def _format_ref_remote(self, remote_name):
        return self._format_ref(self.REFS_REMOTES, remote_name)

    def _format_ref_tag(self, tag_name):
        return self._format_ref(self.REFS_TAGS, tag_name)

    @property
    def head(self):
        """Return SHA of the current HEAD
        """
        return self.repo.head()

    @property
    def is_bare(self):
        """Bare repositories have no working directories or indexes
        """
        return self.repo.bare

    @property
    def is_working(self):
        return not(self.is_bare)

    def has_index(self):
        """Opposite of is_bare
        """
        return self.repo.has_index()

    @property
    def has_commits(self):
        """
        If the repository has no HEAD we consider that is has no commits
        """
        try:
            self.repo.head()
        except KeyError:
            return False
        return True

    def ref_walker(self, ref=None):
        """
        Very simple, basic walker
        """
        ref = ref or 'HEAD'
        sha = self._commit_sha(ref)
        for entry in self.repo.get_walker(sha):
            yield entry.commit

    def branch_walker(self, branch):
        branch = branch or self.active_branch
        ref = self._format_ref_branch(branch)
        return self.ref_walker(ref)

    def commit_info(self, start=0, end=None, branch=None):
        """Return a generator of commits with all their attached information
        """
        if not self.has_commits:
            return []
        commits = [utils.git.commit_info(entry) for entry in self.branch_walker(branch)]
        if not end:
            return commits
        return commits[start:end]


    @funky.uniquify
    def recent_contributors(self, n=None, branch=None):
        n = n or 10
        return funky.pluck(self.commit_info(end=n, branch=branch), 'author')

    @property
    def commit_count(self):
        try:
            return len(self.ref_walker())
        except KeyError:
            return 0

    def commits(self):
        """Return a list of SHAs for all the concerned commits
        """
        return [commit['sha'] for commit in self.commit_info()]

    @property
    def git_dir(self):
        return self.repo.controldir()

    def auth(self, *args, **kwargs):
        self.authenticator = GittleAuth(*args, **kwargs)
        return self.authenticator

    # Generate a branch selector (used for pushing)
    def _wants_branch(self, branch_name=None):
        branch_name = branch_name or self.active_branch
        refs_key = self._format_ref_branch(branch_name)
        sha = self.branches[branch_name]

        def wants_func(old):
            refs_key = self._format_ref_branch(branch_name)
            return {
                refs_key: sha
            }
        return wants_func

    def _get_ignore_regexes(self):
        gitignore_filename = os.path.join(self.path, '.gitignore')
        if not os.path.exists(gitignore_filename):
            return []
        lines = open(gitignore_filename).readlines()
        globers = map(lambda line: line.rstrip(), lines)
        return utils.paths.globers_to_regex(globers)

    # Get the absolute path for a file in the git repo
    def abspath(self, repo_file):
        return os.path.abspath(
            os.path.join(self.path, repo_file)
        )

    # Get the relative path from the absolute path
    def relpath(self, abspath):
        return os.path.relpath(abspath, self.path)

    @property
    def last_commit(self):
        return self[self.repo.head()]

    @property
    def index(self):
        return self.repo.open_index()

    @classmethod
    def init(cls, path, bare=None, *args, **kwargs):
        """Initialize a repository"""
        mkdir_safe(path)

        # Constructor to use
        if bare:
            constructor = DulwichRepo.init_bare
        else:
            constructor = DulwichRepo.init

        # Create dulwich repo
        repo = constructor(path)

        # Create Gittle repo
        return cls(repo, *args, **kwargs)

    @classmethod
    def init_bare(cls, *args, **kwargs):
        kwargs.setdefault('bare', True)
        return cls.init(*args, **kwargs)

    @classmethod
    def is_repo(cls, path):
        """Returns True if path is a git repository, False if it is not"""
        try:
            repo = Gittle(path)
        except NotGitRepository:
            return False
        else:
            return True

    def get_client(self, origin_uri=None, **kwargs):
        # Get the remote URL
        origin_uri = origin_uri or self.origin_uri

        # Fail if inexistant
        if not origin_uri:
            raise InvalidRemoteUrl()

        client_kwargs = {}
        auth_kwargs = self.authenticator.kwargs()

        client_kwargs.update(auth_kwargs)
        client_kwargs.update(kwargs)
        client_kwargs.update({
            'report_activity': self.report_activity
        })

        client, remote_path = get_transport_and_path(origin_uri, **client_kwargs)
        return client, remote_path

    def push_to(self, origin_uri, branch_name=None, progress=None):
        selector = self._wants_branch(branch_name=branch_name)
        client, remote_path = self.get_client(origin_uri)
        return client.send_pack(
            remote_path,
            selector,
            self.repo.object_store.generate_pack_contents,
            progress=progress
        )

    # Like: git push
    def push(self, origin_uri=None, branch_name=None, progress=None):
        return self.push_to(origin_uri, branch_name, progress)

    # Not recommended at ALL ... !!!
    def dirty_pull_from(self, origin_uri, branch_name=None):
        # Remove all previously existing data
        rmtree(self.path)
        mkdir_safe(self.path)
        self.repo = DulwichRepo.init(self.path)

        # Fetch brand new copy from remote
        return self.pull_from(origin_uri, branch_name)

    def pull_from(self, origin_uri, branch_name=None):
        return self.fetch(origin_uri)

    # Like: git pull
    def pull(self, origin_uri=None, branch_name=None):
        return self.pull_from(origin_uri, branch_name)

    def fetch_remote(self, origin_uri=None):
        # Get client
        client, remote_path = self.get_client(origin_uri=origin_uri)

        # Fetch data from remote repository
        remote_refs = client.fetch(remote_path, self.repo)

        return remote_refs


    def _setup_fetched_refs(self, refs, origin, bare):
        remote_tags = utils.git.subrefs(refs, 'refs/tags')
        remote_heads = utils.git.subrefs(refs, 'refs/heads')

        # Filter refs
        clean_remote_tags = utils.git.clean_refs(remote_tags)
        clean_remote_heads = utils.git.clean_refs(remote_heads)

        # Base of new refs
        heads_base = 'refs/remotes/' + origin
        if bare:
            heads_base = 'refs/heads'

        # Import branches
        self.import_refs(
            heads_base,
            clean_remote_heads
        )

        # Import tags
        self.import_refs(
            'refs/tags',
            clean_remote_tags
        )

        # Update HEAD
        for k, v in utils.git.clean_refs(refs).items():
            self[k] = v


    def fetch(self, origin_uri=None, bare=None, origin=None):
        bare = bare or False
        origin = origin or self.DEFAULT_REMOTE

        # Remote refs
        remote_refs = self.fetch_remote(origin_uri)

        # Update head
        # Hit repo because head doesn't yet exist so
        # print("REFS = %s" % remote_refs)

        # If no refs (empty repository()
        if not remote_refs:
            return

        # Update refs (branches, tags, HEAD)
        self._setup_fetched_refs(remote_refs, origin, bare)

        # Checkout working directories
        if not bare and self.has_commits:
            self.checkout_all()
        else:
            self.update_server_info()


    @classmethod
    def clone(cls, origin_uri, local_path, auth=None, mkdir=True, bare=False, *args, **kwargs):
        """Clone a remote repository"""
        mkdir_safe(local_path)

        # Initialize the local repository
        if bare:
            local_repo = cls.init_bare(local_path)
        else:
            local_repo = cls.init(local_path)

        repo = cls(local_repo, origin_uri=origin_uri, auth=auth, *args, **kwargs)

        repo.fetch(bare=bare)

        # Add origin
        repo.add_remote('origin', origin_uri)

        return repo

    @classmethod
    def clone_bare(cls, *args, **kwargs):
        """Same as .clone except clones to a bare repository by default
        """
        kwargs.setdefault('bare', True)
        return cls.clone(*args, **kwargs)

    def _commit(self, committer=None, author=None, message=None, files=None, tree=None, *args, **kwargs):

        if not tree:
            # If no tree then stage files
            modified_files = files or self.modified_files
            logging.info("STAGING : %s" % modified_files)
            self.repo.stage(modified_files)

        # Messages
        message = message or self.DEFAULT_MESSAGE
        author_msg = self._format_userinfo(author)
        committer_msg = self._format_userinfo(committer)

        return self.repo.do_commit(
            message=message,
            author=author_msg,
            committer=committer_msg,
            encoding='UTF-8',
            tree=tree,
            *args, **kwargs
        )

    def _tree_from_structure(self, structure):
        # TODO : Support directories
        tree = Tree()

        for file_info in structure:

            # str only
            try:
                data = file_info['data'].encode('ascii')
                name = file_info['name'].encode('ascii')
                mode = file_info['mode']
            except:
                # Skip file on encoding errors
                continue

            blob = Blob()

            blob.data = data

            # Store file's contents
            self.repo.object_store.add_object(blob)

            # Add blob entry
            tree.add(
                name,
                mode,
                blob.id
            )

        # Store tree
        self.repo.object_store.add_object(tree)

        return tree.id

    # Like: git commmit -a
    def commit(self, name=None, email=None, message=None, files=None, *args, **kwargs):
        user_info = {
            'name': name,
            'email': email,
        }
        return self._commit(
            committer=user_info,
            author=user_info,
            message=message,
            files=files,
            *args,
            **kwargs
        )

    def commit_structure(self, name=None, email=None, message=None, structure=None, *args, **kwargs):
        """Main use is to do commits directly to bare repositories
        For example doing a first Initial Commit so the repo can be cloned and worked on right away
        """
        if not structure:
            return
        tree = self._tree_from_structure(structure)

        user_info = {
            'name': name,
            'email': email,
        }

        return self._commit(
            committer=user_info,
            author=user_info,
            message=message,
            tree=tree,
            *args,
            **kwargs
        )

    # Push all local commits
    # and pull all remote commits
    def sync(self, origin_uri=None):
        self.push(origin_uri)
        return self.pull(origin_uri)

    def lookup_entry(self, relpath, trackable_files=set()):
        if not relpath in trackable_files:
            raise KeyError

        abspath = self.abspath(relpath)

        with open(abspath, 'rb') as git_file:
            data = git_file.read()
            s = sha1()
            s.update("blob %u\0" % len(data))
            s.update(data)
        return (s.hexdigest(), os.stat(abspath).st_mode)

    @property
    @funky.transform(set)
    def tracked_files(self):
        return list(self.index)

    @property
    @funky.transform(set)
    def raw_files(self):
        return utils.paths.subpaths(self.path)

    @property
    @funky.transform(set)
    def ignored_files(self):
        return utils.paths.subpaths(self.path, filters=self.filters)

    @property
    @funky.transform(set)
    def trackable_files(self):
        return self.raw_files - self.ignored_files

    @property
    @funky.transform(set)
    def untracked_files(self):
        return self.trackable_files - self.tracked_files

    """
    @property
    @funky.transform(set)
    def modified_staged_files(self):
        "Checks if the file has changed since last commit"
        timestamp = self.last_commit.commit_time
        index = self.index
        return [
            f
            for f in self.tracked_files
            if index[f][1][0] > timestamp
        ]
    """

    # Return a list of tuples
    # representing the changed elements in the git tree
    def _changed_entries(self, ref=None):
        ref = ref or self.DEFAULT_COMMIT
        if not self.has_commits:
            return []
        obj_sto = self.repo.object_store
        tree_id = self[ref].tree
        names = self.trackable_files

        lookup_func = partial(self.lookup_entry, trackable_files=names)

        # Format = [((old_name, new_name), (old_mode, new_mode), (old_sha, new_sha)), ...]
        tree_diff = changes_from_tree(names, lookup_func, obj_sto, tree_id, want_unchanged=False)
        return list(tree_diff)

    @funky.transform(set)
    def _changed_entries_by_pattern(self, pattern):
        changed_entries = self._changed_entries()
        filtered_paths = None
         #if the pattern is PATTERN_MODIFIED, should check the sha
        if self.PATTERN_MODIFIED == pattern:
            filtered_paths = [
              funky.first_true(names)
                  for names, modes, sha in changed_entries
                  if tuple(map(bool, names)) == pattern and funky.first_true(names) and sha[0] == sha[1]
            ]
        else :
            filtered_paths = [
               funky.first_true(names)
                 for names, modes, sha in changed_entries
                 if tuple(map(bool, names)) == pattern and funky.first_true(names)
            ]
        return filtered_paths

    @property
    @funky.transform(set)
    def removed_files(self):
        return self._changed_entries_by_pattern(self.PATTERN_REMOVED) - self.ignored_files

    @property
    @funky.transform(set)
    def added_files(self):
        return self._changed_entries_by_pattern(self.PATTERN_ADDED) - self.ignored_files

    @property
    @funky.transform(set)
    def modified_files(self):
        modified_files = self._changed_entries_by_pattern(self.PATTERN_MODIFIED) - self.ignored_files
        return modified_files

    @property
    @funky.transform(set)
    def modified_unstaged_files(self):
        timestamp = self.last_commit.commit_time
        return [
            f
            for f in self.tracked_files
            if os.stat(self.abspath(f)).st_mtime > timestamp
        ]

    @property
    def pending_files(self):
        """
        Returns a list of all files that could be possibly staged
        """
        # Union of both
        return self.modified_files | self.added_files | self.removed_files

    @property
    def pending_files_by_state(self):
        files = {
            'modified': self.modified_files,
            'added': self.added_files,
            'removed': self.removed_files
        }

        # "Flip" the dictionary
        return {
            path: state
            for state, paths in files.items()
            for path in paths
        }

    """
    @property
    @funky.transform(set)
    def modified_files(self):
        return self.modified_staged_files | self.modified_unstaged_files
    """

    # Like: git add
    @funky.arglist_method
    def stage(self, files):
        return self.repo.stage(files)

    def add(self, *args, **kwargs):
        return self.stage(*args, **kwargs)

    # Like: git rm
    @funky.arglist_method
    def rm(self, files, force=False):
        index = self.index
        index_files = filter(lambda f: f in index, files)
        for f in index_files:
            del self.index[f]
        return index.write()

    def mv_fs(self, file_pair):
        old_name, new_name = file_pair
        os.rename(old_name, new_name)

    # Like: git mv
    @funky.arglist_method
    def mv(self, files_pair):
        index = self.index
        files_in_index = filter(lambda f: f[0] in index, files_pair)
        map(self.mv_fs, files_in_index)
        old_files = map(funky.first, files_in_index)
        new_files = map(funky.last, files_in_index)
        self.add(new_files)
        self.rm(old_files)
        self.add(old_files)
        return

    @working_only
    def _checkout_tree(self, tree):
        return build_index_from_tree(
            self.repo.path,
            self.repo.index_path(),
            self.repo.object_store,
            tree
        )

    def checkout_all(self, commit_sha=None):
        commit_sha = commit_sha or self.head
        commit_tree = self._commit_tree(commit_sha)
        # Rebuild index from the current tree
        return self._checkout_tree(commit_tree)

    def checkout(self, ref):
        """Checkout a given ref or SHA
        """
        self.repo.refs.set_symbolic_ref('HEAD', ref)
        commit_tree = self._commit_tree(ref)
        # Rebuild index from the current tree
        return self._checkout_tree(commit_tree)

    @funky.arglist_method
    def reset(self, files, commit='HEAD'):
        pass

    def rm_all(self):
        # if we go at the index via the property, it is reconstructed
        # each time and therefore clear() doesn't have the desired effect,
        # therefore, we cache it in a variable and use that.
        i = self.index
        i.clear()
        return i.write()

    def _to_commit(self, commit_obj):
        """Allows methods to accept both SHA's or dulwich Commit objects as arguments
        """
        if isinstance(commit_obj, basestring):
            return self.repo[commit_obj]
        return commit_obj

    def _commit_sha(self, commit_obj):
        """Extracts a Dulwich commits SHA
        """
        if utils.git.is_sha(commit_obj):
            return commit_obj
        elif isinstance(commit_obj, basestring):
            # Can't use self[commit_obj] to avoid infinite recursion
            commit_obj = self.repo[self.dwim_reference(commit_obj)]
        return commit_obj.id

    def dwim_reference(self, ref):
        """Dwim resolves a short reference to a full reference
        """

        # Formats of refs we want to try in order
        formats = [
            "%s",
            "refs/%s",
            "refs/tags/%s",
            "refs/heads/%s",
            "refs/remotes/%s",
            "refs/remotes/%s/HEAD",
        ]

        for f in formats:
            try:
                fullref = f % ref
                if not fullref in self.repo:
                    continue
                return fullref
            except:
                continue

        raise Exception("Could not resolve ref")

    def blob_data(self, sha):
        """Return a blobs content for a given SHA
        """
        return self[sha].data

    # Get the nth parent back for a given commit
    def get_parent_commit(self, commit, n=None):
        """ Recursively gets the nth parent for a given commit
            Warning: Remember that parents aren't the previous commits
        """
        if n is None:
            n = 1
        commit = self._to_commit(commit)
        parents = commit.parents

        if n <= 0 or not parents:
            # Return a SHA
            return self._commit_sha(commit)

        parent_sha = parents[0]
        parent = self[parent_sha]

        # Recur
        return self.get_parent_commit(parent, n - 1)

    def get_previous_commit(self, commit_ref, n=None):
        commit_sha = self._parse_reference(commit_ref)
        n = n or 1
        commits = self.commits()
        return funky.next(commits, commit_sha, n=n, default=commit_sha)

    def _parse_reference(self, ref_string):
        # COMMIT_REF~x
        if '~' in ref_string:
            ref, count = ref_string.split('~')
            count = int(count)
            commit_sha = self._commit_sha(ref)
            return self.get_previous_commit(commit_sha, count)
        return self._commit_sha(ref_string)

    def _commit_tree(self, commit_sha):
        """Return the tree object for a given commit
        """
        return self[commit_sha].tree

    def diff(self, commit_sha, compare_to=None, diff_type=None, filter_binary=True):
        diff_type = diff_type or self.DEFAULT_DIFF_TYPE
        diff_func = self.DIFF_FUNCTIONS[diff_type]

        if not compare_to:
            compare_to = self.get_previous_commit(commit_sha)

        return self._diff_between(compare_to, commit_sha, diff_function=diff_func)

    def diff_working(self, ref=None, filter_binary=True):
        """Diff between the current working directory and the HEAD
        """
        return utils.git.diff_changes_paths(
            self.repo.object_store,
            self.path,
            self._changed_entries(ref=ref),
            filter_binary=filter_binary
        )

    def get_commit_files(self, commit_sha, parent_path=None, is_tree=None, paths=None):
        """Returns a dict of the following Format :
            {
                "directory/filename.txt": {
                    'name': 'filename.txt',
                    'path': "directory/filename.txt",
                    "sha": "xxxxxxxxxxxxxxxxxxxx",
                    "data": "blablabla",
                    "mode": 0xxxxx",
                },
                ...
            }
        """
        # Default values
        context = {}
        is_tree = is_tree or False
        parent_path = parent_path or ''

        if is_tree:
            tree = self[commit_sha]
        else:
            tree = self[self._commit_tree(commit_sha)]

        for entry in tree.items():
            # Check if entry is a directory
            if entry.mode == self.MODE_DIRECTORY:
                context.update(
                    self.get_commit_files(entry.sha, parent_path=os.path.join(parent_path, entry.path), is_tree=True, paths=paths)
                )
                continue

            subpath = os.path.join(parent_path, entry.path)

            # Only add the files we want
            if not(paths is None or subpath in paths):
                continue

            # Add file entry
            context[subpath] = {
                'name': entry.path,
                'path': subpath,
                'mode': entry.mode,
                'sha': entry.sha,
                'data': self.blob_data(entry.sha),
            }
        return context

    def file_versions(self, path):
        """Returns all commits where given file was modified
        """
        versions = []
        commits_info = self.commit_info()
        seen_shas = set()

        for commit in commits_info:
            try:
                files = self.get_commit_files(commit['sha'], paths=[path])
                file_path, file_data = files.items()[0]
            except IndexError:
                continue

            file_sha = file_data['sha']

            if file_sha in seen_shas:
                continue
            else:
                seen_shas.add(file_sha)

            # Add file info
            commit['file'] = file_data
            versions.append(file_data)
        return versions

    def _diff_between(self, old_commit_sha, new_commit_sha, diff_function=None, filter_binary=True):
        """Internal method for getting a diff between two commits
            Please use .diff method unless you have very specific needs
        """

        # If commit is first commit (new_commit_sha == old_commit_sha)
        # then compare to an empty tree
        if new_commit_sha == old_commit_sha:
            old_tree = Tree()
        else:
            old_tree = self._commit_tree(old_commit_sha)

        new_tree = self._commit_tree(new_commit_sha)

        return diff_function(self.repo.object_store, old_tree, new_tree, filter_binary=filter_binary)

    def changes(self, *args, **kwargs):
        """ List of changes between two SHAs
            Returns a list of lists of tuples :
            [
                [
                    (oldpath, newpath), (oldmode, newmode), (oldsha, newsha)
                ],
                ...
            ]
        """
        kwargs['diff_type'] = 'changes'
        return self.diff(*args, **kwargs)

    def changes_count(self, *args, **kwargs):
        return len(self.changes(*args, **kwargs))

    def _refs_by_pattern(self, pattern):
        refs = self.refs

        def item_filter(key_value):
            """Filter only concered refs"""
            key, value = key_value
            return key.startswith(pattern)

        def item_map(key_value):
            """Rewrite keys"""
            key, value = key_value
            new_key = key[len(pattern):]
            return (new_key, value)

        return dict(
            map(item_map,
                filter(
                    item_filter,
                    refs.items()
                )
            )
        )

    @property
    def refs(self):
        return self.repo.get_refs()

    def set_refs(refs_dict):
        for k, v in refs_dict.items():
            self.repo[k] = v

    def import_refs(self, base, other):
        return self.repo.refs.import_refs(base, other)

    @property
    def branches(self):
        return self._refs_by_pattern(self.REFS_BRANCHES)

    @property
    def active_branch(self):
        """Returns the name of the active branch, or None, if HEAD is detached
        """
        x = self.repo.refs.read_ref('HEAD')
        if not x.startswith(SYMREF):
            return None
        else:
            symref = x[len(SYMREF):]
            if not symref.startswith(self.REFS_BRANCHES):
                return None
            else:
                return symref[len(self.REFS_BRANCHES):]

    @property
    def active_sha(self):
        """Deprecated equivalent to head property
        """
        return self.head

    @property
    def remote_branches(self):
        return self._refs_by_pattern(self.REFS_REMOTES)

    @property
    def tags(self):
        return self._refs_by_pattern(self.REFS_TAGS)

    @property
    def remotes(self):
        """ Dict of remotes
        {
            'origin': 'http://friendco.de/some_user/repo.git',
            ...
        }
        """
        config = self.repo.get_config()
        return {
            keys[1]: values['url']
            for keys, values in config.items()
            if keys[0] == 'remote'
        }

    def add_remote(self, remote_name, remote_url):
        # Get repo's config
        config = self.repo.get_config()

        # Add new entries for remote
        config.set(('remote', remote_name), 'url', remote_url)
        config.set(('remote', remote_name), 'fetch', "+refs/heads/*:refs/remotes/%s/*" % remote_name)

        # Write to disk
        config.write_to_path()

        return remote_name

    def add_ref(self, new_ref, old_ref):
        self.repo.refs[new_ref] = old_ref
        self.update_server_info()

    def remove_ref(self, ref_name):
        # Returns False if ref doesn't exist
        if not ref_name in self.repo.refs:
            return False
        del self.repo.refs[ref_name]
        self.update_server_info()
        return True

    def create_branch(self, base_branch, new_branch, tracking=None):
        """Try creating a new branch which tracks the given remote
            if such a branch does not exist then branch off a local branch
        """

        # The remote to track
        tracking = self.DEFAULT_REMOTE

        # Already exists
        if new_branch in self.branches:
            raise Exception("branch %s already exists" % new_branch)

        # Get information about remote_branch
        remote_branch = os.path.sep.join([tracking, base_branch])

        # Fork Local
        if base_branch in self.branches:
            base_ref = self._format_ref_branch(base_branch)
        # Fork remote
        elif remote_branch in self.remote_branches:
            base_ref = self._format_ref_remote(remote_branch)
            # TODO : track
        else:
            raise Exception("Can not find the branch named '%s' to fork either locally or in '%s'" % (base_branch, tracking))

        # Reference of new branch
        new_ref = self._format_ref_branch(new_branch)

        # Copy reference to create branch
        self.add_ref(new_ref, base_ref)

        return new_ref

    def create_orphan_branch(self, new_branch, empty_index=None):
        """ Create a new branch with no commits in it.
        Technically, just points HEAD to a non-existent branch.  The actual branch will
        only be created if something is committed.  This is equivalent to:

            git checkout --orphan <new_branch>,

        Unless empty_index is set to True, in which case the index will be emptied along
        with the file-tree (which is always emptied).  Against a clean working tree,
        this is equivalent to:

            git checkout --orphan <new_branch>
            git reset --merge
        """
        if new_branch in self.branches:
            raise Exception("branch %s already exists" % new_branch)

        new_ref = self._format_ref_branch(new_branch)
        self.repo.refs.set_symbolic_ref('HEAD', new_ref)

        if self.is_working:
            if empty_index:
               self.rm_all()
            self.clean_working()

        return new_ref

    def remove_branch(self, branch_name):
        ref = self._format_ref_branch(branch_name)
        return self.remove_ref(ref)

    def switch_branch(self, branch_name, tracking=None, create=None):
        """Changes the current branch
        """
        if create is None:
            create = True

        # Check if branch exists
        if not branch_name in self.branches:
            self.create_branch(branch_name, branch_name, tracking=tracking)

        # Get branch reference
        branch_ref = self._format_ref_branch(branch_name)

        # Change main branch
        self.repo.refs.set_symbolic_ref('HEAD', branch_ref)

        if self.is_working:
            # Remove all files
            self.clean_working()

            # Add files for the current branch
            self.checkout_all()

    def create_tag(self, tag_name, target):
        ref = self._format_ref_tag(tag_name)
        return self.add_ref(ref, self._parse_reference(target))

    def remove_tag(self, tag_name):
        ref = self._format_ref_tag(tag_name)
        return self.remove_ref(ref)

    def clean(self, force=None, directories=None):
        untracked_files = self.untracked_files
        map(os.remove, untracked_files)
        return untracked_files

    def clean_working(self):
        """Purges all the working (removes everything except .git)
            used by checkout_all to get clean branch switching
        """
        return self.clean()

    def _get_fs_structure(self, tree_sha, depth=None, parent_sha=None):
        tree = self[tree_sha]
        structure = {}
        if depth is None:
            depth = self.MAX_TREE_DEPTH
        elif depth == 0:
            return structure
        for entry in tree.items():
            # tree
            if entry.mode == self.MODE_DIRECTORY:
                # Recur
                structure[entry.path] = self._get_fs_structure(entry.sha, depth=depth - 1, parent_sha=tree_sha)
            # commit
            else:
                structure[entry.path] = entry.sha
        structure['.'] = tree_sha
        structure['..'] = parent_sha or tree_sha
        return structure

    def _get_fs_structure_by_path(self, tree_sha, path):
        parts = path.split(os.path.sep)
        depth = len(parts) + 1
        structure = self._get_fs_structure(tree_sha, depth=depth)

        return funky.subkey(structure, parts)

    def commit_ls(self, ref, subpath=None):
        """List a "directory" for a given commit
           using the tree of that commit
        """
        tree_sha = self._commit_tree(ref)

        # Root path
        if subpath in self.ROOT_PATHS or not subpath:
            return self._get_fs_structure(tree_sha, depth=1)
        # Any other path
        return self._get_fs_structure_by_path(tree_sha, subpath)

    def commit_file(self, ref, path):
        """Return info on a given file for a given commit
        """
        name, info = self.get_commit_files(ref, paths=[path]).items()[0]
        return info

    def commit_tree(self, ref, *args, **kwargs):
        tree_sha = self._commit_tree(ref)
        return self._get_fs_structure(tree_sha, *args, **kwargs)

    def update_server_info(self):
        if not self.is_bare:
            return
        update_server_info(self.repo)

    def _is_fast_forward(self):
        pass

    def _merge_fast_forward(self):
        pass

    def __hash__(self):
        """This is required otherwise the memoize function will just mess it up
        """
        return hash(self.path)

    def __getitem__(self, key):
        try:
            sha = self._parse_reference(key)
        except:
            raise KeyError(key)
        return self.repo[sha]

    def __setitem__(self, key, value):
        try:
            key = self.dwim_reference(key)
        except:
            pass
        self.repo[key] = value

    def __contains__(self, key):
        try:
            key = self.dwim_reference(key)
        except:
            pass
        return key in self.repo

    def __delitem__(self, key):
        try:
            key = self.dwim_reference(key)
        except:
            raise KeyError(key)
        self.remove_ref(key)


    # Alias to clone_bare
    fork = clone_bare
    log = commit_info
    diff_count = changes_count
    contributors = recent_contributors
Ejemplo n.º 33
0
    def find_git_mtimes(self, context, silent_build):
        """
        Use git to find the mtimes of the files we care about
        """
        if not context.use_git_timestamps:
            return {}

        parent_dir = context.parent_dir
        root_folder = context.git_root

        # Can't use git timestamps if it's just a shallow clone
        # Otherwise all the files get the timestamp of the latest commit
        if context.use_git_timestamps and os.path.exists(os.path.join(root_folder, ".git", "shallow")):
            raise HarpoonError("Can't get git timestamps from a shallow clone", directory=parent_dir)

        git = Repo(root_folder)
        mtimes = {}
        all_files = set(git.open_index())

        use_files = set()
        use_files_relpaths = set()
        for filename in all_files:
            relpath = os.path.relpath(os.path.join(root_folder, filename.decode('utf-8')), context.parent_dir)

            # Only include files under the parent_dir
            if relpath.startswith("../"):
                continue

            # Ignore files that we don't want git_timestamps from
            if context.use_git_timestamps and type(context.use_git_timestamps) is not bool:
                match = False
                for line in context.use_git_timestamps:
                    if fnmatch.fnmatch(relpath, line):
                        match = True
                        break
                if not match:
                    continue

            # Matched is true by default if
            # * Have context.exclude
            # * No context.exclude and no context.include
            matched = context.exclude or not any([context.exclude, context.include])

            # Anything not matching exclude gets included
            if context.exclude:
                for line in context.exclude:
                    if fnmatch.fnmatch(relpath, line):
                        matched = False

            # Anything matching include gets included
            if context.include:
                for line in context.include:
                    if fnmatch.fnmatch(relpath, line):
                        matched = True
                        break

            # Either didn't match any exclude or matched an include
            if matched:
                use_files.add(filename)
                use_files_relpaths.add(relpath)

        if not silent_build: log.info("Finding modified times for %s/%s git controlled files in %s", len(use_files), len(all_files), root_folder)

        first_commit = None
        cached_commit, cached_mtimes = self.get_cached_mtimes(root_folder, use_files_relpaths)
        for entry in git.get_walker():
            if first_commit is None:
                first_commit = entry.commit.id.decode('utf-8')

            if cached_commit and entry.commit.id.decode('utf-8') == cached_commit:
                new_mtimes = cached_mtimes
                new_mtimes.update(mtimes)
                mtimes = new_mtimes
                break

            date = entry.commit.author_time
            added = False
            for changes in entry.changes():
                if type(changes) is not list:
                    changes = [changes]
                for change in changes:
                    path = change.new.path
                    if root_folder and change.new.path and context.parent_dir:
                        if path in use_files:
                            new_relpath = os.path.relpath(os.path.join(root_folder, change.new.path.decode('utf-8')), context.parent_dir).encode('utf-8')
                            if not new_relpath.decode('utf-8').startswith("../"):
                                if mtimes.get(new_relpath, 0) < date:
                                    mtimes[new_relpath] = date
                                    added = True

            if added:
                if len(use_files - set(mtimes)) == 0:
                    break

        mtimes = dict((fn.decode('utf-8') if hasattr(fn, "decode") else fn, mtime) for fn, mtime in mtimes.items())
        if first_commit != cached_commit:
            self.set_cached_mtimes(root_folder, first_commit, mtimes, use_files_relpaths)
        return mtimes
Ejemplo n.º 34
0
            raise SyntaxError("Bad mix of whitespace on %s line %d" %
                              (fn, num + 1))


all = {}
if not args.revision:
    for p, _, flist in os.walk("menu"):
        for fn in flist:
            text = open(os.path.join(p, fn), "r", encoding="utf-8").read()
            check_indents(text, fn)
            all[fn] = json.loads(text)
else:
    # Thanks to Jelmer Vernooij for spelling this one out for me :-D
    repo = Repo('.')
    rev = args.revision.encode("ascii")
    for r in repo.get_walker():
        if r.commit.id.startswith(rev):
            rev = r.commit.id
            break
    menu = porcelain.get_object_by_path(repo, "menu", rev)
    for name, mode, object_id in menu.iteritems():
        text = str(repo[object_id].data, "utf-8")
        check_indents(text, name)
        all[name] = json.loads(text)

if args.weeks:
    dates = [
        datetime.datetime.strptime(e["start"], "%Y-%m-%d")
        for e in all.values()
    ]
    # Using max(dates) instead of just today's date so we're a
Ejemplo n.º 35
0
class Analyser(object):

    #########################
    ## STATIC CLASS MEMBER ##
    #########################

    CHANGE_TYPES = (
        'add',
        'modify',
        'delete',
    )

    ####################
    ## PUBLIC METHODS ##
    ####################

    def __init__(
        self,
        repo_name,
        searching_paths,
        allowed_endings,
        exclude_patters,
        exclude_paths,
    ):
        """
        """

        # Repository
        self.repo_name = repo_name
        self.repo = Repo(repo_name)

        # File infos
        self.file_paths = {}
        self.deleted_paths = {}

        # Commits
        self.authors = {}
        self.commits = 0

        # Searched
        self.SEARCHING_PATHS = searching_paths
        self.ALLOWED_ENDINGS = allowed_endings

        # Excludes
        self.EXCLUDE_PATTERNS = exclude_patters
        self.EXCLUDE_PATHS = exclude_paths

    def do_analyse(self):
        """
        """

        for change_tree in self.repo.get_walker():
            author_name = change_tree.commit.author

            if not author_name in self.authors:
                self.authors[author_name] = Author.Author(name=author_name)

            self.authors[author_name].commits.append(change_tree.commit)

            for tree_change in change_tree.changes():
                # Save tree data
                self._save_tree_data(change_tree=change_tree,
                                     tree_change=tree_change)

    def report_file_endings(self):
        """
        """

        file_ending_report = FileEndingReport(paths=self.file_paths)
        file_ending_report.generate()

        chart_type = ChartExporter.EXPORT_TYPE['PIE']
        file_ending_report.report(exporter=ChartExporter(type=chart_type))
        #
        # # Print report
        # print("############################################")
        #
        # for ending in file_endings:
        #     print("%s: %s" % (ending, file_endings[ending]))
        #
        # print("############################################")

    def report_authors_commits(self):
        """
        """

        print("############################################")

        for author_name in self.authors:
            author_commit_count = len(self.authors[author_name].commits)
            print("%s has %s commits" % (author_name, author_commit_count))

        print("############################################")

    def report_commits_per_file(self):

        print("############################################")

        for file_path in self.file_paths:
            repo_file = self.file_paths[file_path]
            file_commit_count = len(repo_file.commits)
            print("%s is in %s commits" % (repo_file.path, file_commit_count))

        print("############################################")

    def report_top_10_commited_files(self):

        print("############################################")

        top_ten = []

        for file_path in self.file_paths:
            top_ten.append(self.file_paths[file_path])

        # Sort the files by the number of commits
        sorted_top_ten = sorted(top_ten,
                                key=lambda repo_file: len(repo_file.commits),
                                reverse=True)

        for idx, repo_file in enumerate(sorted_top_ten):

            # To have correct index we need to check here
            if idx is 10:
                break

            file_commit_count = len(repo_file.commits)
            print("%s is in %s commits" % (repo_file.path, file_commit_count))

        print("############################################")

    def report_for_all_authors(self):
        """
        """

        author_commit_report = AuthorsCommitReport(authors=self.authors)
        author_commit_report.generate()

        chart_type = ChartExporter.EXPORT_TYPE['SPLINE']
        author_commit_report.report(exporter=ChartExporter(type=chart_type))

    def report_for_author(self, name):
        """
        """

        author = self.authors[name]

        print("############################################")
        print("Author: %s" % author.name)

        # year
        years = {}

        for commit in author.commits:

            author_commit_time = time.localtime(commit.author_time)

            commit_year = str(author_commit_time.tm_year)

            if not commit_year in years:
                years[commit_year] = {}

            this_year = years[commit_year]
            commit_month = author_commit_time.tm_mon

            if not commit_month in this_year:
                this_year[commit_month] = {}

            this_month = this_year[commit_month]
            commit_day = author_commit_time.tm_mday

            if not commit_day in this_month:
                this_month[commit_day] = 0

            this_month[commit_day] += 1

            # author_commit_time = time.ctime(commit.author_time)
            # print("%s : %s" % (author_commit_time, commit.sha))

        reverse_sort_order = True

        sorted_years = sorted(years, reverse=reverse_sort_order)

        for year in sorted_years:
            print("Year %s:" % year)

            year_dict = years[year]
            sorted_year_dict = sorted(year_dict, reverse=reverse_sort_order)

            for month in sorted_year_dict:
                print('     Month %s:' % month)

                month_dict = year_dict[month]
                sorted_month_dict = sorted(month_dict,
                                           reverse=reverse_sort_order)

                for day in sorted_month_dict:
                    if day < 10:
                        print('             Day  %s: %s' %
                              (day, month_dict[day]))
                    else:
                        print('             Day %s: %s' %
                              (day, month_dict[day]))

        print("############################################")

    #####################
    ## PRIVATE METHODS ##
    #####################

    def _is_matching_exclude_pattern(self, path):
        """
        """

        for pattern in self.EXCLUDE_PATTERNS:

            p = re.compile(pattern)

            if p.match(path) is not None:
                return True

        return False

    def _in_exclude_path(self, path):
        """
        """

        for exclude_path in self.EXCLUDE_PATHS:

            if path.startswith(exclude_path):
                return True

        return False

    def _is_allowed_path(self, path):
        """
        """

        try:
            file_ending = File.get_ending(file_path=path)

            for search_path in self.SEARCHING_PATHS:

                # Looks if path in exclude path
                if self._in_exclude_path(path):
                    return False

                # Looks if path matches the excluding pattern
                if self._is_matching_exclude_pattern(path):
                    return False

                # TODO: Check out if this is logical
                # Looks if path starts not in a searching path
                if not self._is_in_search_path(path=path,
                                               search_path=search_path):
                    return False

            if file_ending not in self.ALLOWED_ENDINGS or self._has_repo_file(
                    file_path=path):
                return False

        except Exception as err:
            print(err)

        return True

    def _save_tree_data(self, change_tree, tree_change):

        # Check if is list
        if type(tree_change) is list:

            for change in tree_change:
                self._parse_change_tree(change_tree=change_tree,
                                        tree_change=change)

        else:
            self._parse_change_tree(change_tree=change_tree,
                                    tree_change=tree_change)

    def _parse_change_tree(self, change_tree, tree_change):

        change_type = tree_change.type

        if change_type is 'add' or change_type is 'modify':

            new_tree_sha = tree_change.new.sha
            new_tree_value = self.repo[new_tree_sha]
            new_tree_data = new_tree_value.data
            file_path = tree_change.new.path

            # Check if the file has not been later being deleted
            if file_path in self.deleted_paths:
                return

            file_ending = File.get_ending(file_path=file_path)

            # Try to get repo file
            try:
                repo_file = self._get_repo_file(file_path=file_path)
                # Add commit to repo file
                repo_file.commits.append(change_tree.commit)
            except:
                pass

            # Check if file is in allowed path
            if not self._is_allowed_path(path=file_path):
                return

            counted_lines = new_tree_data.count('\n')

            # Get repo file
            repo_file = self._create_repo_file(file_path=file_path)
            # Set repo file data
            repo_file.code_lines = counted_lines
            repo_file.ending = file_ending
            repo_file.commits.append(change_tree.commit)

        elif change_type is 'delete':
            file_path = tree_change.old.path

            self.deleted_paths[file_path] = True

    def _is_in_search_path(self, path, search_path):
        """
        """

        if not path.startswith('/'):
            path = '/' + path

        if search_path == '':
            search_path = '/'
        elif not search_path.startswith('/'):
            search_path = '/' + search_path

        return path.startswith(search_path)

    def _create_repo_file(self, file_path):
        """
        """

        if not self._has_repo_file(file_path=file_path):

            file = File(path=file_path)
            self.file_paths[file_path] = file

            return file

        else:
            return None

    def _get_repo_file(self, file_path):
        """
        """

        file = self.file_paths[file_path]
        return file

    def _has_repo_file(self, file_path):
        """
        """

        if file_path in self.file_paths:
            return True
        else:
            return False