Exemple #1
0
def transform_data(input):
    repo = Repository(get_git_dir())
    commits = {
        commit.id.hex: commit
        for commit in repo.walk(repo.branches.get("master").target)
    }

    output = []
    for row in input["results"]:
        if row["command"].endswith("[ERROR]"):
            continue
        dir, commit_hash = re.match("^([^ ]+/)?([0-9a-f]+)",
                                    row["command"]).groups()
        try:
            commit = commits[commit_hash]
        except KeyError:
            print(f"Skipping commit {commit_hash}", file=sys.stderr)
            continue
        for time in row["times"]:
            output_row = row.copy()
            output_row["commit"] = f"{commit.commit_time}-{commit_hash}"
            output_row["message"] = commit.message
            del output_row["times"]
            output_row["time"] = time

            output.append(output_row)

    return output
class GitAccessor(ScmAccessor):
    def __init__(self, repo_path, start_rev=None):
        super().__init__(repo_path=repo_path, start_rev=start_rev)
        self._scm = Repository(path.join(repo_path))

    def get_log(self):
        data = []

        for c in self._scm.walk(self._scm.head.target, GIT_SORT_TIME):
            print("Processing commit %s" % c.id)

            diff = self._scm.diff(c, c.parents[0]).stats.format(
                GIT_DIFF_STATS_FULL, 1) if c.parents else ""

            diff = diff.splitlines()
            if len(diff) >= 1:
                diff = diff[:-1]

            stripped_diff = [d.split("|")[0].strip() for d in diff]

            e = LogEntry()
            e.id = c.id
            e.msg = c.message.strip("\n")
            e.author = c.committer.name
            e.email = c.committer.email
            e.time = datetime.fromtimestamp(c.commit_time)
            e.diff = stripped_diff
            data.append(e)

            if self._start_rev and c.id.hex == self._start_rev:
                break

        return data
def try_commit_and_push(name, version, bundle_version):
    repo = Repository("headers/.git")
    new_commit_message = name + " " + version + " (" + bundle_version + ")"

    # Already commited this version?
    for commit in repo.walk(repo.head.target,
                            GIT_SORT_TIME | GIT_SORT_REVERSE):
        if commit.message == new_commit_message:
            return False

    index = repo.index
    index.add_all()
    index.write()

    # print(index.diff_to_workdir().stats.files_changed)
    # if index.diff_to_workdir().stats.files_changed == 0:
    #     return False

    print("Commiting...")

    user = repo.default_signature
    tree = index.write_tree()
    ref = "refs/heads/master"
    repo.create_commit(ref, user, user, new_commit_message, tree,
                       [repo.head.get_object().hex])

    push(repo, ref)
    return True
def get_bug_commit_ratio_per_file(git_folder = ".git/", output_file):
    result = []
    exec_dir = os.getcwd()
    repo = Repository(os.path.join(git_repo, git_folder))

    os.chdir(git_repo)

    for commit in repo.walk(repo.head.target):
        touched_files = get_touched_files(commit)

        for file in touched_files:
            file_data = [f for f in result if f['file_name'] == file]

            if file_data:
                file_data = file_data[0]
                file_data['commit_num'] += 1
                if bug_related:
                    file_data['bug_commit_num'] += 1
            else:
                result.append({'file_name': file,
                               'commit_num': 1,
                               'bug_commit_num': 1 if bug_related else 0})

    os.chdir(exec_dir)

    for entry in result:
        entry['bug_commit_ratio'] = entry['bug_commit_num'] / entry['commit_num']

    with open(output_file, "w", newline='') as output:
        writer = csv.DictWriter(output, csv_header)
        writer.writeheader()
        writer.writerows(result)
Exemple #5
0
def git_is_clean(srcdir, project):
    repo = Repository(os.path.join(srcdir, project.workspace_path, ".git"))
    for _, b in iteritems(repo.status()):
        if b != GIT_STATUS_IGNORED and b != GIT_STATUS_CURRENT:
            return False, "has uncommitted changes"
    if repo.head_is_detached:
        return False, "has detached HEAD"
    origin = get_origin(repo, project)
    if not origin:
        return False, "has no upstream remote"
    remote_refs = []
    local_refs = {}
    for refname in repo.listall_references():
        if refname.startswith("refs/remotes/%s/" % origin.name):
            ref = repo.lookup_reference(refname)
            if ref.type == GIT_REF_OID:
                remote_refs.append(ref.target)
        elif not refname.startswith("refs/remotes/"):
            ref = repo.lookup_reference(refname)
            if ref.type == GIT_REF_OID:
                local_refs[ref.peel().id] = refname
    if not remote_refs:
        return False, "has no upstream remote branches"
    if not local_refs:
        return False, "has no local branches"
    if not repo.lookup_branch("%s/%s" % (origin.name, project.master_branch), GIT_BRANCH_REMOTE):
        return False, "has no upstream master branch"
    for remote_ref in remote_refs:
        for commit in repo.walk(remote_ref):
            if commit.id in local_refs:
                del local_refs[commit.id]
    if local_refs:
        return False, "has local commits: %s" % ", ".join(["'%s'" % name for _, name in iteritems(local_refs)])
    return True, ""
def extract_commits(repos_root, output_path):
    # Uncomment code to generate a separate file for each commit.

    try:
        os.makedirs(output_path)
    except FileExistsError as ex:
        pass

    exec_dir = os.getcwd()

    for git_repo in get_immediate_subdirectories(repos_root):
        os.chdir(git_repo)
        repo = Repository(os.path.join(git_repo, git_folder))
        root = etree.Element("commits")

        repo_name = os.path.basename(os.path.normpath(git_repo))

        print("\n> project: " + repo_name + " extraction started")

        for commit in repo.walk(repo.head.target):
            stats = get_commit_stats(commit.id)
            commit_xml = commit_to_xml(commit, stats)
            root.append(commit_xml)

            # print(".", end=" ")
            print("> project: " + repo_name + ", commit " + str(commit.id) + " processed")

        output_xml = xml_to_string(root)

        os.chdir(exec_dir)

        with open(os.path.join(output_path, repo_name + "_" + output_commit_file), "w") as file:
            file.write(output_xml)

        print("\n> project: " + repo_name + " extraction finished")
def _plot_chord_diagram_for_raw_bugs(project_name: str,
                                     project_repo: pygit2.Repository,
                                     bug_set: tp.FrozenSet[PygitBug],
                                     szz_tool: str) -> gob.FigureWidget:
    """Creates a chord diagram representing relations between introducing/fixing
    commits for a given set of RawBugs."""

    # maps commit hex -> node id
    map_commit_to_id: tp.Dict[pygit2.Commit,
                              int] = _map_commits_to_nodes(project_repo)
    commit_type: tp.Dict[pygit2.Commit, NodeType] = {}
    commit_count = len(map_commit_to_id.keys())

    edge_colors = ['#d4daff', '#84a9dd', '#5588c8', '#6d8acf']

    for commit in project_repo.walk(project_repo.head.target.id,
                                    pygit2.GIT_SORT_TIME):
        commit_type[commit] = NodeType.DEFAULT

    # if less than 2 commits, no graph can be drawn!
    if commit_count < 2:
        raise PlotDataEmpty

    commit_coordinates = _compute_node_placement(commit_count)

    # draw relations and preprocess commit types
    lines = _generate_line_data(bug_set, commit_coordinates, map_commit_to_id,
                                commit_type, edge_colors)
    nodes = _generate_node_data(project_repo, commit_coordinates,
                                map_commit_to_id, commit_type)

    data = nodes + lines
    layout = _create_layout(f'{szz_tool} {project_name}')
    return gob.FigureWidget(data=data, layout=layout)
Exemple #8
0
def classify_by_date(
    path: str, start: Optional[str] = None, end: Optional[str] = None, model: Optional[MLModel] = None
) -> List[str]:
    """Classify commits by date."""
    start_time = 0
    end_time = sys.maxsize

    if start is not None:
        start_time = int(time.mktime(datetime.datetime.strptime(start, "%Y-%m-%d").timetuple()))

    if end is not None:
        end_time = int(time.mktime(datetime.datetime.strptime(end, "%Y-%m-%d").timetuple()))

    repo_path = os.path.join(path, ".git")
    if os.path.exists(repo_path):
        repo = Repository(repo_path)
    else:
        raise RepositoryNotFoundException

    orig_messages = []
    for commit in repo.walk(repo.head.target, GIT_SORT_TOPOLOGICAL):
        if start_time < commit.commit_time < end_time:
            orig_messages.append(commit.message.lower())

    return classify_messages(orig_messages, model)
Exemple #9
0
def getCommitList(obj,
                  startJd=None,
                  endJd=None,
                  branch="") -> List[Tuple[int, str]]:
    """
	returns a list of (epoch, commit_id) tuples

	this function is optimized for recent commits
		i.e. endJd is either None or recent
	"""
    if not branch:
        branch = "main"
    startEpoch = None
    endEpoch = None
    if startJd is not None:
        startEpoch = getEpochFromJd(startJd)
    if endJd is not None:
        endEpoch = getEpochFromJd(endJd)
    repo = Repository(obj.vcsDir)
    data = []  # type: List[Tuple[int, str]]
    # items of data are (epochTime, commitHash)
    target = repo.branches[branch].target
    for commit in repo.walk(target, GIT_SORT_TIME):
        tm = commit.author.time
        if endEpoch is not None and tm > endEpoch:
            continue
        if startEpoch is not None and tm < startEpoch:
            break
        data.append((
            tm,
            commit.id.hex,
        ))
    data.reverse()
    return data
def _bug_data_diff_plot(project_name: str, project_repo: pygit2.Repository,
                        bugs_left: tp.FrozenSet[PygitBug],
                        bugs_right: tp.FrozenSet[PygitBug]) -> gob.Figure:
    """Creates a chord diagram representing the diff between two sets of bugs as
    relation between introducing/fixing commits."""
    commits_to_nodes_map = _map_commits_to_nodes(project_repo)
    commit_occurrences: tp.Dict[pygit2.Commit, DiffOccurrence] = {}
    commit_count = len(commits_to_nodes_map.keys())
    commit_coordinates = _compute_node_placement(commit_count)

    for commit in project_repo.walk(project_repo.head.target.hex,
                                    pygit2.GIT_SORT_TIME):
        commit_occurrences[commit] = DiffOccurrence.NONE

    lines: tp.List[gob.Scatter] = _generate_diff_line_data(
        _diff_raw_bugs(bugs_left, bugs_right), commits_to_nodes_map,
        commit_coordinates, commit_occurrences)

    commit_types = {
        commit: __DIFF_TO_NODE_TYPE[do]
        for commit, do in commit_occurrences.items()
    }

    nodes: tp.List[gob.Scatter] = _generate_node_data(project_repo,
                                                      commit_coordinates,
                                                      commits_to_nodes_map,
                                                      commit_types)
    data = lines + nodes
    layout = _create_layout(f'szz_diff {project_name}')
    return gob.Figure(data=data, layout=layout)
def _generate_node_data(
        project_repo: pygit2.Repository,
        commit_coordinates: tp.List[npt.NDArray[np.float64]],
        map_commit_to_id: tp.Dict[str, int],
        commit_type: tp.Dict[str, NodeType]) -> tp.List[gob.Scatter]:
    nodes = []

    for commit in project_repo.walk(project_repo.head.target.id,
                                    pygit2.GIT_SORT_TIME):
        # draw commit nodes using preprocessed commit types
        commit_id = map_commit_to_id[commit]

        if commit.id == project_repo.head.target.id:
            commit_type[commit] = NodeType.FIXING_HEAD if commit_type[
                commit] == NodeType.FIX else NodeType.HEAD

        # set node data according to commit type
        node_size = 10 if commit_type[commit] == NodeType.HEAD or commit_type[
            commit] == NodeType.FIXING_HEAD else 8
        displayed_message = commit.message.partition('\n')[0]
        node_label = f'Type: {commit_type[commit.hex]}<br>' \
                     f'Hash: {commit.hex}<br>' \
                     f'Author: {commit.author.name}<br>' \
                     f'Date: {datetime.fromtimestamp(commit.commit_time)}<br>' \
                     f'Message: {displayed_message}'
        node_color = commit_type[commit].color

        node_scatter = _create_node(commit_coordinates[commit_id], node_color,
                                    node_size, node_label)

        nodes.append(node_scatter)

    return nodes
Exemple #12
0
    def report():
        max_name_length = 0
        max_email_length = 0

        authors = dict()
        repo = Repository('%s/.git' % find_toplevel(os.getcwd()))
        for commit in repo.walk(repo.head.target, GIT_SORT_TOPOLOGICAL | GIT_SORT_REVERSE):
            if commit.author.email not in authors.keys():
                authors[commit.author.email] = dict()
                authors[commit.author.email]['author'] = commit.author
                authors[commit.author.email]['commits'] = 1
                if len(commit.author.name) > max_name_length:
                    max_name_length = len(commit.author.name)
                if len(commit.author.email) > max_email_length:
                    max_email_length = len(commit.author.email)
            else:
                authors[commit.author.email]['commits'] += 1

        print(
            'Name'.ljust(max_name_length), '\t',
            'Email'.ljust(max_email_length), '\t',
            'Commits'
        )

        print(
            '_' * max_name_length, '\t',
            '_' * max_email_length, '\t',
            '_' * 7
        )
        for email, author in authors.items():
            print(
                author['author'].name.ljust(max_name_length), '\t',
                author['author'].email.ljust(max_email_length), '\t',
                author['commits']
            )
Exemple #13
0
def classify_by_tag(
    path: str, start_tag: str, end_tag: Optional[str] = None, model: Optional[MLModel] = None
) -> List[str]:
    """Classify messages for the given repo based on tags."""
    repo_path = os.path.join(path, ".git")
    if os.path.exists(repo_path):
        repo = Repository(repo_path)
    else:
        raise RepositoryNotFoundException

    start_tag = repo.revparse_single("refs/tags/" + start_tag)

    if end_tag is None:
        end_tag = repo.revparse_single("refs/heads/master")
    else:
        end_tag = repo.revparse_single("refs/tags/" + end_tag)

    orig_messages = []
    walker = repo.walk(end_tag.id, GIT_SORT_TOPOLOGICAL)
    walker.hide(start_tag.id)

    for commit in walker:
        orig_messages.append(commit.message.lower())

    return classify_messages(orig_messages, model)
    def get_commit_activity(self, project):
        from datetime import date, timedelta
        from pygit2 import Repository
        from pygit2 import GIT_SORT_TIME
        repo = Repository(project.gitrepo)

        weeks = self.get_weeks()
        for commit in repo.walk(repo.head.oid, GIT_SORT_TIME):
            commit_time = date.fromtimestamp(commit.commit_time)
            commit_week = commit_time - timedelta(days=commit_time.weekday())

            if commit_week not in weeks:
                continue

            weeks[commit_week]['mine'] += 1

        counts = []
        max = 0
        for k in sorted(weeks.iterkeys()):
            counts.append({
                "week":     k.isoformat(),
                "mine":     weeks[k]['mine'],
                "others":   weeks[k]['others'],
            })
        return counts
Exemple #15
0
def get_git_info(git_working_tree_dir):
    repository_path = discover_repository(git_working_tree_dir)
    assert repository_path is not None

    repo = Repository(repository_path)
    commits = list(repo.walk(repo.head.target, GIT_SORT_NONE))
    head_commit = commits[0]
    diff = repo.diff()

    git_info = {
        'head_commit': {
            'hash': head_commit.hex,
            'message': head_commit.message,
            'author': head_commit.author.name
        },
        'branch': {
            'name': repo.head.shorthand
        },
        'stats': {
            'files_changed': diff.stats.files_changed,
        },
        'num_commits': len(commits)
    }

    return git_info
Exemple #16
0
class RepositoryProcessor(object):

    def __init__(self, repository_path):
        self.repo = GitRepository(repository_path + '/.git')
        self.users = {}

    def get_bages_processors_for_user(self, email):
        if email in self.users:
            return self.users[email]
        self.users[email] = []
        for badge_class in initialize_badge_classes():
            logging.info(u'Initializing badge class [%s] for user [%s]' % (str(badge_class), email))
            self.users[email].append(badge_class(email))
        return self.users[email]

    def process(self):
        # returns the json of the collaborators
        for commit in [c for c in self.repo.walk(self.repo.head.oid, GIT_SORT_TIME)][::-1]:
            for badge in self.get_bages_processors_for_user(commit.author.email):
                badge.process_commit(commit, datetime.fromtimestamp(commit.commit_time))
        result = []
        for user_email, badges in self.users.items():
            user = {"email": user_email, "badges": []}
            result.append(user)
            for badge in badges:
                if isinstance(badge, Badge):
                    if badge.award_this():
                        user['badges'].append({"badge_slug": badge.slug})
                else:
                    user.update(badge.update_data())
            user.update(count_modifications_by_user(user_email, self.repo.path))
            print user
        return result
def get_labels(repo_path, branch, pair_file, last_commit):
    """
    Get the labels from a file produced by the SZZ algorithm. It contains
    bug fixing commits and their respective bug fixing commit.
    """
    repo = Repository(repo_path)
    head = repo.references.get(branch)

    commits = []
    for commit in list(
            repo.walk(head.target, GIT_SORT_TOPOLOGICAL | GIT_SORT_REVERSE)):
        commits.append(commit)
        if commit.hex == last_commit:
            break

    commits = list(reversed(commits))

    pairs = {}
    with open(pair_file, 'r') as inp:
        pairs = json.load(inp)

    unique_pairs = set([p[1] for p in pairs])
    labels = []

    for commit in tqdm(commits):
        label = [commit.hex, "1" if commit.hex in unique_pairs else "0"]
        labels.append(label)

    return labels
def save_label_distribution(repo_path, branch, labels, res_path):
    """
    Save a distribution of the labels over time.
    """
    ldict = set()
    for label in labels:
        if label[1] == "1":
            ldict.add(label[0])

    repo = Repository(repo_path)
    head = repo.references.get(branch)

    commits = list(repo.walk(head.target, GIT_SORT_TOPOLOGICAL))

    start_year = dat.fromtimestamp(commits[-1].commit_time).year
    end_year = dat.fromtimestamp(commits[0].commit_time).year

    num_years = end_year - start_year
    year_dist = [0 for y in range(num_years + 1)]
    years = [y for y in range(start_year, end_year + 1)]

    for commit in commits:
        if commit.hex in ldict:
            commit_year = dat.fromtimestamp(commit.commit_time).year
            year_dist[commit_year - start_year - 1] += 1

    fig = plt.figure()
    plt.bar(years, year_dist)
    plt.xticks(years)
    plt.xlim(xmin=years[0] - 1, xmax=years[-1] + 1)
    fig.autofmt_xdate()
    plt.savefig(res_path)
Exemple #19
0
def parse_diffusion_features(pid, repo_path, branch, start, stop=-1):
    """
    Function to extract diffusion features from a set of commits.
    """
    repo = Repository(repo_path)

    head = repo.references.get(branch)
    commits = list(
        repo.walk(head.target, GIT_SORT_TOPOLOGICAL | GIT_SORT_REVERSE))

    start = start - 1 if (start > 0) else start
    commits = commits[start:stop] if (stop != -1) else commits[start:]

    features = [[] for c in range(len(commits))]
    for i, commit in enumerate(tqdm(commits[1:], position=pid)):
        diff = repo.diff(commits[i], commit)

        patches = [p for p in diff]

        # Extract all different subsystems that have been modified
        modules = set([])
        subsystems_mapping = {}
        entropy_change = 0

        file_changes = []
        total_change = 0
        for patch in patches:
            # Skip binary files
            if patch.delta.is_binary:
                continue
            _, addition, deletions = patch.line_stats
            total_change = total_change + (addition + deletions)
            file_changes.append(addition + deletions)

            # Store all subsystems
            fpath = patch.delta.new_file.path
            subsystems = fpath.split('/')[:-1]

            root = subsystems_mapping
            for system in subsystems:
                if system not in root:
                    root[system] = {}
                root = root[system]
            if subsystems > 0:
                modules.add(subsystems[0])

        # Check how many subsystems that have been touched
        modified_systems = count_diffing_subsystems(subsystems_mapping)

        # Calculate the entropy for the commit
        entropy_change = count_entropy(file_changes, total_change)

        # Add all features
        features[i].append(str(commit.hex))
        features[i].append(str(float(modified_systems)))
        features[i].append(str(float(len(modules))))
        features[i].append(str(float(entropy_change)))

    RES[pid] = features
Exemple #20
0
def getHist(repo):
	base = Repository(repo)
	base.checkout('HEAD')
	history = []
	for commit in base.walk(base.head.target, GIT_SORT_TOPOLOGICAL | GIT_SORT_REVERSE):
		history.append(commit)
	
	return history
Exemple #21
0
def get_and_update_repo_cache(repo_path):
    cache_filename = '%s-stats.cache' % repo_path
    if os.path.exists(cache_filename):
        with open(cache_filename) as f:
            data = load(f)
    else:
        data = {
            'author_to_month_to_additions': defaultdict(defaultdict_int),
            'author_to_month_to_deletions': defaultdict(defaultdict_int),
            'author_to_month_to_commits': defaultdict(defaultdict_int),
            'day_to_count': defaultdict(defaultdict_int),
            'latest_sha': None,
        }

    repo = Repository(repo_path)

    count = 0
    for commit in repo.walk(repo.head.target, GIT_SORT_TOPOLOGICAL):
        count += 1
        if commit.type == GIT_OBJ_COMMIT:
            if data['latest_sha'] == commit.hex:
                break
        
            if not commit.message.lower().startswith('merge'):
                try:
                    d = repo.diff('%s^' % commit.hex, commit)
                except KeyError:
                    # First commit!
                    break
                patches = list(d)
                additions = sum([p.additions for p in patches])
                deletions = sum([p.deletions for p in patches])

                author = author_aliases.get(commit.author.email, commit.author.email)

                day = date.fromtimestamp(commit.commit_time)
                data['day_to_count']['Lines'][day] += additions
                data['day_to_count']['Lines'][day] -= deletions

                if additions > 1000 and deletions < 5 and commit.hex not in whitelist_commits:
                    if commit.hex not in blacklist_commits:
                        print 'WARNING: ignored %s looks like an embedding of a lib (message: %s)' % (commit.hex, commit.message)
                    continue
                if (additions > 3000 or deletions > 3000) and commit.hex not in whitelist_commits:
                    if commit.hex not in blacklist_commits and additions != deletions:  # Guess that if additions == deletions it's a big rename of files
                        print 'WARNING: ignored %s because it is bigger than 3k lines. Put this commit in the whitelist or the blacklist (message: %s)' % (commit.hex, commit.message)
                    continue
                month = date(day.year, day.month, 1)
                data['author_to_month_to_additions'][author][month] += additions
                data['author_to_month_to_deletions'][author][month] += deletions
                data['author_to_month_to_commits'][author][month] += 1
                if data['latest_sha'] is None:
                    data['latest_sha'] = commit.hex

    with open(cache_filename, 'w') as f:
        dump(data, f)

    return data
def save_history_features_graph(repo_path, branch, graph_path):
    """
    Track the number of developers that have worked in a repository and save the
    results in a graph which could be used for later use.
    """
    repo = Repository(repo_path)
    head = repo.references.get(branch)

    commits = list(
        repo.walk(head.target, GIT_SORT_TOPOLOGICAL | GIT_SORT_REVERSE))
    current_commit = repo.head.target

    start_time = time.time()

    all_files = {}
    current_commit = repo.get(str(current_commit))
    files = get_files_in_tree(current_commit.tree, repo)

    for (_, name) in tqdm(files):
        all_files[name] = {}
        all_files[name]['lastcommit'] = current_commit.hex
        all_files[name][current_commit.hex] = {}
        all_files[name][current_commit.hex]["prevcommit"] = ""
        all_files[name][current_commit.hex]["authors"] = [
            current_commit.committer.name
        ]

    for i, commit in enumerate(tqdm(commits[1:])):
        files = get_diffing_files(commit, commits[i], repo)
        for (_, name, _) in files:
            if name not in all_files:
                all_files[name] = {}

            last_commit = ""
            if 'lastcommit' not in all_files[name]:
                all_files[name]['lastcommit'] = commit.hex
            else:
                last_commit = all_files[name]['lastcommit']

            all_files[name][commit.hex] = {}
            all_files[name][commit.hex]["prevcommit"] = last_commit

            authors = set([commit.committer.name])
            if last_commit:
                authors.update(all_files[name][last_commit]["authors"])
            all_files[name][commit.hex]["authors"] = authors

            all_files[name]['lastcommit'] = commit.hex

    with open(graph_path, 'w') as output:
        json.dump(all_files, output, default=set_to_list)

    end_time = time.time()

    print("Done")
    print("Overall processing time {}".format(end_time - start_time))
Exemple #23
0
def get_commits():
    global commit_list
    global work_list
    global commit_complexities

    repo = Repository(REPO_PATH)
    for commit in repo.walk(repo.head.target):
        commit_list.append(str(commit.id))
        work_list.append(str(commit.id))
        commit_complexities.append(0.0)
Exemple #24
0
    def gets(cls, path, max_count=100, order=GIT_SORT_TIME):
        """gets commits from a git repository.

        :param path: The normalized path to the git repository.
        :param max_count: max count of commits.
        :param order: order commits list."""
        repo = Repository(path)
        return [cls(c.hex, [p.hex for p in c.parents])
                for c in islice(repo.walk(repo.head.target, order),
                                max_count)]
Exemple #25
0
def test_003_init_in_branch(data_dir: pathlib.Path,
                            root_repo: pygit2.Repository) -> None:
    dev_branch = root_repo.branches.local.create(
        name='dev', commit=next(root_repo.walk(root_repo.head.target)))
    root_repo.checkout(refname=dev_branch, strategy=pygit2.GIT_CHECKOUT_FORCE)

    core.init()

    assert config.Config.load(
        path=pathlib.Path('wok.yml')) == config.Config.load(path=data_dir /
                                                            '003_wok.yml')
Exemple #26
0
    def gets(cls, path, max_count=100, order=GIT_SORT_TIME):
        """gets commits from a git repository.

        :param path: The normalized path to the git repository.
        :param max_count: max count of commits.
        :param order: order commits list."""
        repo = Repository(path)
        return [
            cls(c.hex, [p.hex for p in c.parents])
            for c in islice(repo.walk(repo.head.target, order), max_count)
        ]
def _map_commits_to_nodes(
        project_repo: pygit2.Repository) -> tp.Dict[pygit2.Commit, int]:
    """Maps commit hex -> node id."""
    commits_to_nodes_map: tp.Dict[pygit2.Commit, int] = {}
    commit_count = 0
    for commit in project_repo.walk(project_repo.head.target.hex,
                                    pygit2.GIT_SORT_TIME):
        # node ids are sorted by time
        commits_to_nodes_map[commit] = commit_count
        commit_count += 1
    return commits_to_nodes_map
Exemple #28
0
def get_time(path):
    """Function to find the commits done on each hour"""
    times = {}
    repo = Repository(path)
    for commit in repo.walk(repo.head.target, GIT_SORT_TOPOLOGICAL):
        time = datetime.datetime.fromtimestamp(commit.commit_time).strftime('%-H')
        number = times.get(time, 0)
        number += 1
        times[time] = number
    for time in range(0, 24):
        print("%d hour has %d commits" % (time, times.get(str(time), 0)))
Exemple #29
0
def generate_walkers(
    repo: Repository,
    branch_names: Iterable[str],
    simplify_first_parent: bool,
    sorting: int,
) -> Iterable[Walker]:
    walkers = tuple(
        repo.walk(repo.branches[branch_name].peel().id, sorting)
        for branch_name in branch_names)
    for walker in walkers if simplify_first_parent else tuple():
        walker.simplify_first_parent()
    yield from walkers
Exemple #30
0
 def get_authors(self, repo_path):
     try:
         if self.args.verbose:
             Helpers().print_success("Collecting authors in ".format(repo_path))
         authors_set = set()
         repo = Repository(repo_path)
         for commit in repo.walk(repo.head.target, GIT_SORT_TOPOLOGICAL):
             authors_set.add(Author(commit.author.name, commit.author.email))
         return authors_set
     except Exception as e:
         Helpers().print_error("{}: Could not collect authors".format(repo_path))
         return None
Exemple #31
0
def apply_patches(repo_name, comments, patch_path):
    """
        将 patch 重新应用, 并额外增加之前 id 的引用信息,

        - 假定 repo 的位置是所在目录的兄弟目录
    """
    repo_realpath = os.path.join(repo_name, '.git')

    repo = Repository(repo_realpath)

    ht_comments = dict(zip(comments, comments))

    base_patch = os.path.basename(patch_path)

    cnt = 0
    prev_tree = None
    prev_k = None
    prev_message = None

    with io.StringIO() as fh:

        for commit in repo.walk(repo.head.target,
                                GIT_SORT_TOPOLOGICAL | GIT_SORT_REVERSE):
            k = str(commit.oid)
            if prev_k and prev_k in ht_comments:
                # print(prev_message)
                patch_fname = os.path.join(base_patch, prev_k + ".patch")
                """
                FIXME: how to create image files, eg. png
                """
                fh.write("patch -p1 -i ../" + patch_fname + "\n")
                # 假定当前是 git 的仓库
                fh.write("git add . \n")
                # 处理 message
                """
                git commit -F- <<EOF
Message

goes
here
EOF
                """
                fh.write("git commit -F- <<EOF\n")
                for m in prev_message.split('\n'):
                    fh.write(m.strip() + "\n")
                fh.write("EOF\n")
                pass
            prev_tree = commit.tree
            prev_message = commit.message
            prev_k = k

        return fh.getvalue()
Exemple #32
0
def main():
    contrib_data_dir = sys.argv[1]
    git_repo = sys.argv[2]
    output_file = sys.argv[3]

    result = []
    exec_dir = os.getcwd()

    for contrib_file in os.listdir(contrib_data_dir):

        release = contrib_file[len(contrib_file_prefix
                                   ):][:-len(contrib_file_extension)]
        contrib_file = os.path.join(contrib_data_dir, contrib_file)

        top_devs = {}

        with open(contrib_file, newline="") as csv_file:
            for row in csv.DictReader(csv_file):
                top_dev = row['top_single_dev_contribution_knowledge'].split(
                    ":")[0]

                if top_dev in top_devs:
                    top_devs[top_dev] += 1
                else:
                    top_devs[top_dev] = 1

        os.chdir(git_repo)
        call(["git", "checkout", "tags/" + release])
        os.chdir(exec_dir)

        for top_dev in top_devs:
            author_commit_count = 0
            commit_count = 0

            repo = Repository(os.path.join(git_repo, git_folder))
            for commit in repo.walk(repo.head.target):
                commit_count += 1
                if commit.author.name == top_dev:
                    author_commit_count += 1

            result.append({
                'release': release,
                'release_commits': commit_count,
                'top_dev': top_dev,
                'files_owned': top_devs[top_dev],
                'dev_commits': author_commit_count
            })

    with open(output_file, 'w', newline='') as output:
        writer = csv.DictWriter(output, csv_header)
        writer.writeheader()
        writer.writerows(result)
def get_churn_per_commit(git_repo, output_file):
    touched_files = []
    exec_dir = os.getcwd()
    repo = Repository(os.path.join(git_repo, git_folder))
    os.chdir(git_repo)

    for commit in repo.walk(repo.head.target):
        touched_files = get_touched_files(commit)

    with open(output_file, "w", newline='') as output:
        writer = csv.DictWriter(output, csv_header)
        writer.writeheader()
        writer.writerows(touched_files)
def obtain_cloned_repos(settings, logger):
    """
        Obtains information (e.g. number of commits) of the cloned repositories
    """
    input_filename = settings.get('results-repos-output-file')
    with open(input_filename, newline='', encoding='utf-8') as input_file:
        repos = json.load(input_file)

    # Obtain earliest todo-issue (discard all other data)
    repos = map(lambda kv: (kv[0],
        datetime.datetime.fromisoformat(
            min(kv[1].get('issues'), key=lambda y: y.get('created_at'), default=None).get('created_at')
            # Read dates are in UTC
            ).replace(tzinfo=datetime.timezone.utc).timestamp()
        ),
        repos.items())
    repos = dict(repos)

    cloned_repo_lst = []

    path = settings.get("download-output-path-repo")
    with os.scandir(path) as it:
        for entry in it:
            if entry.is_dir():
                # Iterate over repo folders (of a single author)
                with os.scandir(os.path.join(path, entry.name)) as it2:
                    for repo in it2:
                        if repo.is_dir():
                            repo_name = entry.name + "/" + repo.name
                            print("Handling " + repo_name)
                            repo_path = os.path.join(path, entry.name, repo.name)
                            r = Repository(repo_path)
                            earliest_todo_issue = repos.get(repo_name)
                            total_commits = 0
                            pre_commits = 0
                            if earliest_todo_issue is not None:
                                repos[repo_name] = [0, 0]
                                for commit in r.walk(r.head.target, GIT_SORT_TIME | GIT_SORT_REVERSE):
                                    if commit.commit_time < earliest_todo_issue:
                                        pre_commits += 1
                                    total_commits += 1

                            cloned_repo_lst.append( {
                                "repo": repo_name,
                                "cloned": True,
                                "total_commits": total_commits,
                                "earliest_todo_issue": earliest_todo_issue,
                                "pre_earliest_issue_commits": pre_commits,
                            })
    df_cloned_repos = pd.DataFrame(cloned_repo_lst, columns=["repo", "cloned", "total_commits", "earliest_todo_issue", "pre_earliest_issue_commits"])
    df_cloned_repos.to_csv(settings.get('results-clone-info-output-file'), index=False)
Exemple #35
0
def authors(path):
    """Function to find the commits done on each authors, with their name and mail id"""
    info = {}
    repo = Repository(path)
    for commit in repo.walk(repo.head.target, GIT_SORT_TOPOLOGICAL):
        author = commit.author.name
        email = commit.author.email
        key = "{0} <{1}>".format(author, email)
        number = info.get(key, 0)
        number += 1
        info[key] = number
    for author, number in info.items():
        msg = "{0} has {1} commits".format(author, number)
        print(msg)
Exemple #36
0
def shift(amount, repo_path):
    repo = Repository(repo_path)
    head = repo.lookup_reference('HEAD').resolve()
    adder = partial(add, amount=amount)
    changelog = dict()
    reference = REF_FMT.format(time=time(), pid=getpid())
    for commit in repo.walk(head.oid, GIT_SORT_REVERSE | GIT_SORT_TOPOLOGICAL):
        newmsg, nsubs = ISSUE_RE.subn(adder, commit.message)
        if nsubs != 0 or any(pnt.oid in changelog for pnt in commit.parents):
            parents = [changelog.get(c.oid, c.oid) for c in commit.parents]
            new_oid = repo.create_commit(reference, commit.author,
                    commit.committer, newmsg, commit.tree.oid, parents)
            changelog[commit.oid] = new_oid
    return changelog, reference
Exemple #37
0
def walk_repository(path):

    # load our requested repository
    repo = Repository(path)

    # walk the repository and check which authors are there
    blobs = {}
    old_blobs = set()
    old_date = -1

    for commit in repo.walk(repo.head.target,
                            GIT_SORT_TIME | GIT_SORT_REVERSE):

        date = commit.commit_time
        root_tree = commit.tree

        these_blobs = load_blobs_for_root_tree(root_tree, repo)

        for blob, filename in these_blobs:
            if blob not in blobs:
                blobs[blob] = {}
                blobs[blob]["start"] = date
                blobs[blob]["start_commit"] = str(commit.id)
                blobs[blob]["filename"] = filename

        blob_diff = old_blobs - these_blobs
        for blob, filename in blob_diff:
            if blob not in blobs:
                print "wat"
            else:
                blobs[blob]['end'] = date
                blobs[blob]['end_commit'] = str(commit.id)
                blobs[blob]['difference'] = (
                    date - blobs[blob]['start']) / SECONDS_PER_DAY

        old_date = date
        old_blobs = these_blobs

    # this sets so that blobs on the current worktree to "it's still here"
    for blob in blobs:
        if "end" not in blobs[blob]:

            blobs[blob]['end'] = time.mktime(
                datetime.datetime.now().timetuple())
            blobs[blob]['end_commit'] = str(commit.id)
            blobs[blob]['difference'] = (
                blobs[blob]['end'] - blobs[blob]['start']) / SECONDS_PER_DAY
            blobs[blob]['spicy'] = True

    return blobs
def get_history_features(graph, repo_path, branch):
    """
    Function that extracts the history features from a git repository.
    They are the total number of authors, the total age and the total
    number of unique changes.
    """
    repo = Repository(repo_path)
    head = repo.references.get(branch)

    commits = list(
        repo.walk(head.target, GIT_SORT_TOPOLOGICAL | GIT_SORT_REVERSE))

    features = []

    commit_feat = []
    commit_feat.append(str(commits[0].hex))
    commit_feat.append(str(1.0))
    commit_feat.append(str(0.0))
    commit_feat.append(str(0.0))
    features.append(commit_feat)

    for i, commit in enumerate(tqdm(commits[1:])):
        files = get_diffing_files(commit, commits[i], repo)

        total_number_of_authors = set()
        total_age = []
        total_unique_changes = set()

        for (_, name, _) in files:
            sub_graph = graph[name][commit.hex]
            total_number_of_authors.update(sub_graph['authors'])

            prev_commit = sub_graph['prevcommit']
            if prev_commit:
                total_unique_changes.add(prev_commit)

                prev_commit_obj = repo.get(prev_commit)

                total_age.append(commit.commit_time -
                                 prev_commit_obj.commit_time)

        total_age = float(sum(total_age)) / len(total_age) if total_age else 0

        commit_feat = []
        commit_feat.append(str(commit.hex))
        commit_feat.append(str(float(len(total_number_of_authors))))
        commit_feat.append(str(float(total_age)))
        commit_feat.append(str(float(len(total_unique_changes))))
        features.append(commit_feat)
    return features
Exemple #39
0
def log(dir):
    repo = Repository(dir)
    last = repo[repo.head.target]
    c_list = []
    c_item = []
    c_mes, c_name, c_time = [], [], []
    for commit in repo.walk(last.id, pygit2.GIT_SORT_TIME):
        a, b, c = commit.message, commit.committer.name, time_change(
            commit.author.time)
        c_item.append(a)
        c_item.append(b)
        c_item.append(c)
        c_list.append(c_item)
        c_item = []
    return c_list
def main():
    contrib_data_dir = sys.argv[1]
    git_repo = sys.argv[2]
    output_file = sys.argv[3]

    result = []
    exec_dir = os.getcwd()

    for contrib_file in os.listdir(contrib_data_dir):

        release = contrib_file[len(contrib_file_prefix):][:-len(contrib_file_extension)]
        contrib_file = os.path.join(contrib_data_dir, contrib_file)

        top_devs = {}

        with open(contrib_file, newline="") as csv_file:
            for row in csv.DictReader(csv_file):
                top_dev = row['top_single_dev_contribution_knowledge'].split(":")[0]

                if top_dev in top_devs:
                    top_devs[top_dev] += 1
                else:
                    top_devs[top_dev] = 1

        os.chdir(git_repo)
        call(["git", "checkout", "tags/" + release])
        os.chdir(exec_dir)

        for top_dev in top_devs:
            author_commit_count = 0
            commit_count = 0

            repo = Repository(os.path.join(git_repo, git_folder))
            for commit in repo.walk(repo.head.target):
                commit_count += 1
                if commit.author.name == top_dev:
                    author_commit_count += 1

            result.append({'release': release,
                           'release_commits': commit_count,
                           'top_dev': top_dev,
                           'files_owned': top_devs[top_dev],
                           'dev_commits': author_commit_count})

    with open(output_file, 'w', newline='') as output:
        writer = csv.DictWriter(output, csv_header)
        writer.writeheader()
        writer.writerows(result)
Exemple #41
0
def export_patches(repo_name, comments, target_path, path_list=[]):
    """
        从 repo 中读取相关的 comments , 并应用到 target_path 中, 默认为 .target
            - comments 已经按提交的顺序 排好
    """
    repo_realpath = os.path.join(repo_name, '.git')

    repo = Repository(repo_realpath)

    ht_comments = dict(zip(comments, comments))

    cnt = 0
    prev_tree = None
    prev_k = None

    for commit in repo.walk(repo.head.target,
                            GIT_SORT_TOPOLOGICAL | GIT_SORT_REVERSE):
        k = str(commit.oid)
        if prev_k and prev_k in ht_comments:
            """
                临时方案 
                1: 将 diff 输出到以 commits 命名的临时文件夹
                2. 应用 patch, 重新提交一遍
            """
            changes = prev_tree.diff_to_tree(commit.tree)
            print("commit: ", prev_k, "=====================================")
            if True:
                patch_fname = os.path.join(target_path, prev_k + ".patch")
                with open(patch_fname, 'w') as fh:
                    for c in changes:
                        c_delta = c.delta
                        b_emit_patch = False
                        # 与源文件相关 或 与修改过的文件相关
                        print(c_delta.new_file.path)
                        if has_related_file(c_delta.new_file.path, path_list) \
                            or has_related_file(c_delta.old_file.path, path_list):
                            b_emit_patch = True

                        if b_emit_patch:
                            fh.write(c.text)
                            # for h in c.hunks:
                            #    # print(h.header)
                            #    pass
                            # exit(0)
                            # ()
            pass
        prev_tree = commit.tree
        prev_k = k
def get_commit_churn(repo_path):
    exec_dir = os.getcwd()
    repo = Repository(os.path.join(repo_path, git_folder))
    os.chdir(repo_path)

    debt_commits = get_debt_commits(repo_path) #commit, commit_ts, fiel_path
    file_churn = []

    for commit in repo.walk(repo.head.target):
        print (commit.id)
        curr_commit_ts = int(str(get_unix_timestamp(str(commit.id))).replace("\n", ""))
        debt_commit_flag = [f for f in debt_commits if f['commit'] == str(commit.id)]
        #print (str(debt_commit_flag))
        is_debt_commit = 0
        if debt_commit_flag:
            is_debt_commit = 1

        touched_files = get_touched_files(commit)
        for strFileChurn in touched_files:
            added_lines, deleted_lines, file_name = strFileChurn.split("\t")

            file_commit_flag = [f for f in debt_commits if f['file_path'] == file_name]
            is_file_debt = 0
            if file_commit_flag:
                file_commit_flag = file_commit_flag[0]
                debt_commit_commit_ts = int(file_commit_flag['commit_ts'])
                
                if curr_commit_ts >= debt_commit_commit_ts:
                    is_file_debt = 1

            try:
                file_churn.append({
                    'commit':str(commit.id),
                    'commit_ts':curr_commit_ts,
                    'file_name':file_name,
                    'added_lines':added_lines,
                    'deleted_lines':deleted_lines,
                    'commit_debt':is_debt_commit,
                    'file_debt':is_file_debt,
                    })
            except (AttributeError):
                continue;

    os.chdir(exec_dir)
    with open(output_file, "w", newline='') as output:
        writer = csv.DictWriter(output, csv_header)
        writer.writeheader()
        writer.writerows(file_churn)
def cross_reference_commits_with_bug_reports():
    repos_root = sys.argv[1]#"/home/kevin/Desktop/eclipse-platform"#
    bug_reports_file = sys.argv[2]#"/home/kevin/Downloads/eclipse-bugs.csv"#

    with open(bug_reports_file, newline="") as csv_file:
        bug_reports = [{"id": bug["id"],
                        "creation_time": datetime.strptime(bug["creation_time"], bug_date_format),
                        "closed_time": datetime.strptime(bug["closed_time"], bug_date_format)}
                       for bug in csv.DictReader(csv_file)]

    os.makedirs(output_root_path)

    for git_repo in get_immediate_subdirectories(repos_root):
        repo_name = os.path.basename(os.path.normpath(git_repo))
        repo = Repository(os.path.join(git_repo, git_folder))
        bug_related_commits = [commit for commit in repo.walk(repo.head.target) if is_bug_related(commit)]

        root = etree.Element("commits")
        count = 0

        for bug_report in bug_reports:

            # This may actually hurt the detection
            bug_related_commits_within_bug_life = \
                [c for c in bug_related_commits
                 if bug_report['creation_time'] <= datetime.fromtimestamp(c.commit_time) <= bug_report['closed_time']]

            # for commit in bug_related_commits:
            for commit in bug_related_commits_within_bug_life:
                if are_related(commit, bug_report):
                    commit_xml = commit_to_xml(commit)
                    commit_xml.set("related_bug", bug_report["id"])
                    root.append(commit_xml)
                    count += 1

            print("repo: " + repo_name + ", bug: " + bug_report["id"] + " processed")

            # if count > 10:
            #     break

        root.set("count", str(count))
        output_xml = xml_to_string(root)

        with open(os.path.join(output_root_path, repo_name + "_" + output_commit_file), "w") as file:
            file.write(output_xml)
Exemple #44
0
def _pygit2_commits(commit, repository):
    from pygit2 import Repository, GIT_SORT_TOPOLOGICAL
    g = Repository(repository)

    if '..' in commit:
        tail, head = commit.split('..', 2)
        head = head or 'HEAD'
    else:
        head = commit
        tail = commit + '^'

    walker = g.walk(g.revparse_single(head).oid, GIT_SORT_TOPOLOGICAL)

    try:
        walker.hide(g.revparse_single(tail).oid)
    except KeyError:
        pass

    return walker
def main():
    package = sys.argv[1]

    repo = Repository('.git')

    stripv = re.compile("v(\d+\.\d+\.\d+.*)")

    checktag = True

    log = """{package} ({version}) unstable; urgency=low

  * {message}

 -- {author_name} <{author_email}>  {time}
"""
    for commit in repo.walk(repo.head.target, GIT_SORT_TOPOLOGICAL):
        if checktag:
            try:
                version = git("describe", "--tags", commit.id).strip()
            except sh.ErrorReturnCode_128:
                version = '0.0.0-0-g%s' % str(commit.id)[0:7]
                checktag = False
        else:
            version = '0.0.0-0-g%s' % str(commit.id)[0:7]

        stripr = stripv.search(version)
        if stripr is not None:
            version = stripr.group(1)
        message = commit.message.encode("ascii", errors="replace").strip()
        messages = ["  %s" % line for line in message.split("\n")]
        messages[0] = messages[0].strip()
        message = "\n".join(messages)
        print log.format(**dict(
            package=package,
            version=version,
            message=message,
            author_name=commit.author.name.encode("ascii", errors="replace"),
            author_email=commit.author.email.encode("ascii", errors="replace"),
            time=datetime.datetime.fromtimestamp(commit.commit_time).strftime("%a, %d %b %Y %H:%M:%S -0000")
        ))
def processGitDiff(commitsNum):
  counter = commitsNum;
  repositoryName = "../git-repos/postgres"
  repo = Repository(repositoryName +"/"+ ".git")
  childCommitNumber = ""
  for commit in repo.walk(repo.head.target, GIT_SORT_TIME):
    counter-=1;
    if counter<0:
      break
    currentCommitNumber = commit.oid.hex
    if(childCommitNumber!=""):
        diff = repo.diff(currentCommitNumber, childCommitNumber)
        fileChanges = 0;
        for p in diff:
          print(p.old_file_path)
              #print(p.old_oid)
          print(p.new_file_path)
              #print(p.new_oid)
          #print(p.additions)
          addLines = 0;
          deleteLines = 0;
          for hunk in p.hunks:
                  #print(hunk.old_start)
                  #print(hunk.old_lines)
                  #print(hunk.new_start)
                  #print(hunk.new_lines)
            for line in hunk.lines:
              if line[0] == "+":
                addLines+=1;
              if line[0] == "-":
                deleteLines+=1;
          print("lines added" + str(addLines));
          print("lines deleted" + str(deleteLines));
        fileChanges+=1
        print("file changed" + str(fileChanges));
    childCommitNumber = commit.oid.hex;
class GitStorage(BaseStorage):

    _backend = None

    def __init__(self, context, repo_path=None):
        self.context = context
        rp = IStorageInfo(context).path

        try:
            self.repo = Repository(discover_repository(rp))
        except KeyError:
            # discover_repository may have failed.
            raise PathNotFoundError('repository does not exist at path')

        self.checkout()  # defaults to HEAD.

    @property
    def empty_root(self):
        return {'': '_empty_root'}

    def _get_empty_root(self):
        return self.empty_root

    def _get_obj(self, path, cls=None):
        if path == '' and self._commit is None:
            # special case
            return self._get_empty_root()

        if self._commit is None:
            raise PathNotFoundError('repository is empty')

        root = self._commit.tree
        try:
            breadcrumbs = []
            fragments = list(reversed(path.split('/')))
            node = root
            oid = None
            while fragments:
                fragment = fragments.pop()
                if not fragment == '':
                    # no empty string entries, also skips over '//' and
                    # leaves the final node (if directory) as the tree.
                    oid = node[fragment].oid
                    node = self.repo.get(oid)
                breadcrumbs.append(fragment)
                if node is None:
                    # strange.  Looks like it's either submodules only
                    # have entry nodes or pygit2 doesn't fully support
                    # this.  Try to manually resolve the .gitmodules
                    # file.
                    if cls is None:
                        # Only return this if a specific type was not
                        # expected.
                        submods = parse_gitmodules(self.repo.get(
                            root[GIT_MODULE_FILE].oid).data)
                        submod = submods.get('/'.join(breadcrumbs))
                        if submod:
                            fragments.reverse()
                            return {
                                '': '_subrepo',
                                'location': submod,
                                'path': '/'.join(fragments),
                                'rev': oid.hex,
                            }

            if node and (cls is None or isinstance(node, cls)):
                return node
        except KeyError:
            # can't find what is needed in repo, raised by pygit2
            raise PathNotFoundError('path not found')

        # not what we were looking for.
        if cls == Tree:
            raise PathNotDirError('path not dir')
        elif cls == Blob:
            raise PathNotFileError('path not file')
        raise PathNotFoundError('path not found')

    @property
    def _commit(self):
        return self.__commit

    @property
    def rev(self):
        if self.__commit:
            return self.__commit.hex
        return None

    @property
    def shortrev(self):
        # TODO this is an interim solution.
        if self.rev:
            return self.rev[:12]

    def basename(self, name):
        return name.split('/')[-1]

    def checkout(self, rev=None):
        # None maps to the default revision.
        if rev is None:
            rev = 'HEAD'

        try:
            self.__commit = self.repo.revparse_single(rev)
        except KeyError:
            if rev == 'HEAD':
                # probably a new repo.
                self.__commit = None
                return
            raise RevisionNotFoundError('revision %s not found' % rev)
            # otherwise a RevisionNotFoundError should be raised.

    def files(self):
        def _files(tree, current_path=None):
            results = []
            for node in tree:
                if current_path:
                    name = '/'.join([current_path, node.name])
                else:
                    name = node.name

                obj = self.repo.get(node.oid)
                if isinstance(obj, Blob):
                    results.append(name)
                elif isinstance(obj, Tree):
                    results.extend(_files(obj, name))
            return results

        if not self._commit:
            return []
        results = _files(self._commit.tree)
        return results

    def file(self, path):
        return self._get_obj(path, Blob).data

    def listdir(self, path):
        if path:
            tree = self._get_obj(path, Tree)
        else:
            if self._commit is None:
                return []
            tree = self._commit.tree

        return [entry.name for entry in tree]

    def format(self, **kw):
        # XXX backwards compatibility??
        return kw

    def log(self, start, count, branch=None, shortlog=False):
        """
        start and branch are literally the same thing.
        """

        def _log(iterator):
            for pos, commit in iterator:
                if pos == count:
                    raise StopIteration
                yield {
                    'author': commit.committer.name,
                    'email': self._commit.committer.email,
                    'date': self.strftime(committer_dt(commit.committer)),
                    'node': commit.hex,
                    'rev': commit.hex,
                    'desc': commit.message
                }

        if start is None:
            # assumption.
            start = 'HEAD'
            try:
                self.repo.revparse_single(start)
            except KeyError:
                return []

        try:
            rev = self.repo.revparse_single(start).hex
        except KeyError:
            raise RevisionNotFoundError('revision %s not found' % start)

        iterator = enumerate(self.repo.walk(rev, GIT_SORT_TIME))

        return list(_log(iterator))

    def pathinfo(self, path):
        obj = self._get_obj(path)
        if isinstance(obj, Blob):
            return self.format(**{
                'type': 'file',
                'basename': self.basename(path),
                'size': obj.size,
                'date': self.strftime(committer_dt(self._commit.committer)),
            })
        elif isinstance(obj, dict):
            # special cases are represented as dict.
            if obj[''] == '_subrepo':
                return self.format(**{
                    'type': 'subrepo',
                    'date': '',
                    'size': 0,
                    'basename': self.basename(path),
                    # extra field.
                    'obj': obj,
                })

            elif obj[''] == '_empty_root':
                return self.format(**{
                    'type': 'folder',
                    'date': '',
                    'size': 0,
                    'basename': self.basename(path),
                })

        # Assume this is a Tree.
        return self.format(**{
            'basename': self.basename(path),
            'size': 0,
            'type': 'folder',
            'date': '',
        })

    def branches(self):
        return tuple(
            (b, self.repo.lookup_branch(b).target.hex)
            for b in self.repo.listall_branches()
        )

    def tags(self):
        return tuple(
            (b[10:], self.repo.lookup_reference(b).target.hex)
            for b in self.repo.listall_references()
            if b.startswith('refs/tags')
        )
Exemple #48
0
#encoding utf8
from pygit2 import Repository
from pygit2 import GIT_SORT_TOPOLOGICAL, GIT_SORT_REVERSE

repo = Repository('.git')

for commit in repo.walk(repo.head.target,  GIT_SORT_TOPOLOGICAL| GIT_SORT_REVERSE):
	if commit.author.name == "ozawaseijiro" or commit.author.name == "ozawa.seijiro" or commit.author.name == "tf-s.ozawa":
		print commit.message


class GitLogConverter(object):
    def __init__(self, path):
        self.repo = Repository('%s/.git' % path)

    def get_commits(self):
        return self.repo.walk(
            self.repo.head.target,
            GIT_SORT_TOPOLOGICAL
        )

    def commits_as_dicts(self):
        return (self.commit_to_dict(commit) for commit in self.get_commits())

    def commit_to_dict(self, commit):
        commit_dict = {
            "id": str(commit.id),
            "type": commit.type,
            "author_name": commit.author.name,
            "author_email": commit.author.email,
            "author_time": commit.author.time,
            "author_time_offset": commit.author.offset,
            "committer_name": commit.committer.name,
            "committer_email": commit.committer.email,
            "committer_time": commit.committer.time,
            "committer_time_offset": commit.committer.offset,
            "message": commit.message,
            "message_encoding": commit.message_encoding,
            "patches": [],
            "parent_ids": [str(id) for id in commit.parent_ids],
            "commit_time": commit.commit_time,
            "commit_time_offset": commit.commit_time_offset,
        }
        patches = commit_dict["patches"]
        diffs = [
            commit.tree.diff_to_tree(parent.tree)
            for parent in commit.parents
        ]
        merged_diff = None
        for diff in diffs:
            if merged_diff is None:
                merged_diff = diff
            else:
                merged_diff.merge(diff)
        if merged_diff is not None:
            for patch in merged_diff:
                patch_dict = {
                    "old_file_path": patch.old_file_path,
                    "new_file_path": patch.new_file_path,
                    "is_binary": patch.is_binary,
                    "old_id": str(patch.old_id),
                    "new_id": str(patch.new_id),
                    "status": patch.status,
                    "similarity": patch.similarity,
                    "additions": patch.additions,
                    "deletions": patch.deletions,
                }
                patches.append(patch_dict)
        return commit_dict

    def print_commits_as_json(self, file=sys.stdout):
        try:
            for commit_dict in self.commits_as_dicts():
                print(json.dumps(commit_dict), file=file, flush=True)
        except (BrokenPipeError, KeyboardInterrupt):
            pass
Exemple #50
0
import json
import sys
from pygit2 import Repository, Oid, GIT_SORT_TOPOLOGICAL

nil="0000000000000000000000000000000000000000"

payload = dict(zip(('before', 'after', 'ref'), sys.stdin.read().split()))

payload['created'] = True if payload['before'] == nil else False
payload['deleted'] = True if payload['after'] == nil else False

if not payload['created'] and not payload['deleted']:

    repo = Repository('.')

    log = repo.walk(Oid(hex=payload['after']), GIT_SORT_TOPOLOGICAL)
    log.hide(Oid(hex=payload['before']))
    payload['commits'] = []
    for commit in log:
        info = {}
        info['id'] = commit.hex
        info['message'] = commit.message

        author = {}
        author['name'] = commit.author.name
        author['email'] = commit.author.email
        author['timestamp'] = commit.author.time
        info['author'] = author

        committer = {}
        committer['name'] = commit.committer.name
    shutil.make_archive(tarname, "gztar", root_dir=dirname)
    # run dh_make
    os.system("cd %s; dh_make -s -c gpl2 --createorig -y -a -e %s" %
              (dirname, email))
    # remove extra files and copy source files
    os.system("cd %s; rm -rf *.ex *.EX README*" % debname)
    shutil.copy2("deb_control", debname+"control")
    shutil.copy2("deb_copyright", debname+"copyright")
    # make the dirs and install files
    os.system("echo '%s' > %sdirs" % (installdir, debname))
    os.system("echo 'gnome-keyring.so %s' > %sinstall"
              % (installdir, debname))
    # write the changelog
    changelog = open(debname + "changelog", "w")
    repo = Repository(".")
    for commit in repo.walk(repo.head.target, GIT_SORT_TIME):
        changelog.write("%s (%s) %s; urgency=low\n\n" %
                        (basename, package_version_str, ubuntuname))
        for commit_line in commit.message.split("\n"):
            if len(commit_line) > 0:
                changelog.write("  " + commit_line + "\n")
        changelog.write("\n")
        date = time.strftime("%a, %d %b %Y %X",
                             time.gmtime(commit.commit_time))
        offset = "%+0.04d" % (commit.commit_time_offset / 60 * 1000)
        changelog.write(" -- %s <%s>  %s %s\n" %
                        (author_name, email, date, offset))
    changelog.close()
    # call debuild
    os.system("cd %s; debuild -S -sa" % dirname)
Exemple #52
0
import sh
import subprocess
import time

repo_url = 'https://github.com/octocat/Spoon-Knife.git'
repo_path = 'spoon-knife'

if not os.path.exists(repo_path):
    repo = clone_repository(repo_url, repo_path)

base = Repository(repo_path + '/.git')
base.checkout('HEAD')

history = []
# Display Commits Newest to Oldest
for commit in base.walk(base.head.target, GIT_SORT_TOPOLOGICAL):
    #print commit.hex
    #print commit.message
    print time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(commit.commit_time))
    history.append(commit.hex)

#print '-----------------------------------------------------------'

# Display Commits Oldest to Newest
for commit in base.walk(base.head.target, GIT_SORT_TOPOLOGICAL | GIT_SORT_REVERSE):
    pass
#    print commit.hex
#    print base.revparse_single(commit.hex).message
#    print commit.commit_time
#    print commit.commit_time_offset
Exemple #53
0
        repo_id = result[0][0]
    else:
        # The repo isn't in the database yet, so we add it
        sql = 'INSERT INTO repo (repo_id, name) VALUES (NULL, :repo_name)'
        c.execute(sql, {"repo_name": repo_name})
        conn.commit()

        # Retrieve the repo_id value generated by the database for the above insert
        repo_id = c.lastrowid

    # Loop around for each branch, adding their commits to the database
    for branch_name in repo.listall_branches(GIT_BRANCH_REMOTE):

        # Starting with the oldest commit in the branch, add its commits to the database
        branch = repo.lookup_reference('refs/remotes/' + branch_name)
        for commit in repo.walk(branch.target, GIT_SORT_TIME | GIT_SORT_REVERSE):

            # If requested, display the commit info for debugging purposes
            if debug == 1:
                print "commit {0}".format(commit.hex)
                print "Author: {0} <{1}>".format(unicode(commit.author.name).encode("utf-8"), commit.author.email)
                print datetime.utcfromtimestamp(commit.commit_time).strftime('Date:   %a %b %d %H:%M:%S %Y +0000\n')
                print "   {0}".format(unicode(commit.message).encode("utf-8"))

            # Check if the commit already exists in the database.  Don't add it if its already there
            sql = 'SELECT commit_id, hash FROM commits WHERE repo = :repo AND hash = :hash'
            c.execute(sql, {"repo": repo_id, "hash": commit.hex})
            result = c.fetchall()
            if len(result) > 0:

                # If requested, show debugging info
class GitStorage(Storage):
    """ Git file storage backend. """

    def __init__(self, path):
        """
            Initialize repository.

            :param path: Absolute path to the existing Git repository.
            :type path: str
        """

        super(GitStorage, self).__init__()

        self.repo = Repository(path)
        self.index = self.repo.index
        self.index.read()

    @classmethod
    def create_storage(cls, path):
        """
            Create repository, and return GitStorage object on it

            :param path: Absolute path to the Git repository to create.
            :type path: str
            :returns: GitStorage
        """

        init_repository(path, False)

        return cls(path)

    def commit(self, user, message):
        """
            Save previous changes in a new commit.

            :param user: The commit author/committer.
            :type user: django.contrib.auth.models.User
            :param message: The commit message.
            :type message: unicode
            :returns: pygit2.Commit
        """

        # Refresh index before committing
        index = self.repo.index
        index.read()

        # Check the status of the repository
        status = self.repo.status()

        for filename, flags in status.items():
            # the file was deleted
            if flags in (GIT_STATUS_INDEX_DELETED, GIT_STATUS_WT_DELETED):
                # remove it from the tree
                del index[filename]

            # or the file was modified/added
            elif flags in (GIT_STATUS_INDEX_MODIFIED, GIT_STATUS_INDEX_NEW,
                           GIT_STATUS_WT_MODIFIED, GIT_STATUS_WT_NEW):
                # add it to the tree
                index.add(filename)

        treeid = index.write_tree()

        # Now make the commit

        author = Signature(u'{0} {1}'.format(
            user.first_name,
            user.last_name).encode('utf-8'),
            user.email.encode('utf-8')
        )
        committer = author

        try:
            parents = [self.repo.head.oid]

        except GitError:
            parents = []

        commit = self.repo.create_commit(
            'refs/heads/master',
            author, committer, message,
            treeid,
            parents
        )

        # Write changes to disk
        index.write()
        # and refresh index.
        self.index.read()

        # Return commit object
        return self.repo[commit]

    def log(self, name=None, limit=10):
        """
            Get history of the repository, or of a file if name is not None.

            :param name: File name within the repository.
            :type name: unicode or None
            :param limit: Maximal number of commits to get (default: 10), use a negative number to get all.
            :type limit: int
            :returns: list of pygit2.Commit
        """

        commits = []

        if not name:
            # Look for `limit` commits
            for commit in self.repo.walk(self.repo.head.oid, GIT_SORT_TIME):
                commits.append(commit)

                limit = limit - 1

                if limit == 0:
                    break

        else:
            # For each commits
            for commit in self.repo.walk(self.repo.head.oid, GIT_SORT_TIME):
                # Check the presence of the file in the tree

                if commit.parents:
                    # If the commit has parents, check if the file is present
                    # in the diff

                    diff = commit.tree.diff(commit.parents[0].tree)

                    for patch in diff:
                        # If the filename is the patch's filename...
                        if name.encode('utf-8') == patch.new_file_path:
                            # ... then we can add the commit to the list
                            # and leave the loop

                            commits.append(commit)

                            limit = limit - 1
                            break

                else:
                    # But if the commit has no parents (root commit)
                    # Simply check in its tree

                    try:
                        commit.tree[name]

                        # no error raised, it means the entry exists, so add the
                        # commit to the list
                        commits.append(commit)

                        limit = limit - 1

                    # If the file is not in the tree, then it raises a KeyError,
                    # so, just ignore it.
                    except KeyError:
                        pass

                # If the limit is reached, leave the loop
                if limit == 0:
                    break

        return commits

    def diffs(self, name=None, limit=10):
        """
            Get diffs between commits.

            Return the following dict :

                {"diffs": [
                    {
                        "msg": unicode(<commit message>),
                        "date": datetime.fromtimestamp(<commit date>),
                        "author": unicode(<author name>),
                        "sha": unicode(<commit SHA>),
                        "parent_sha": unicode(<parent commit SHA>), # optional
                    },
                    # ...
                ]}

            :param name: File name within the repository.
            :type name: unicode or None
            :param limit: Maximal number of diffs to get (default: 10), use a negative number to get all.
            :type limit: int
            :returns: dict
        """

        commits = self.log(name=name, limit=limit)

        diffs = {'diffs': []}

        # For each commit
        for commit in commits:
            # Create a JSON object containing informations about the commit
            diff = {
                'msg': commit.message,
                'date': datetime.datetime.fromtimestamp(commit.commit_time),
                'author': commit.author.name,
                'sha': commit.hex,
            }

            if commit.parents:
                diff['parent_sha'] = commit.parents[0].hex

            # The SHA and parent SHA will be used to get the diff via AJAX.

            diffs['diffs'].append(diff)

        return diffs

    def diff(self, asha, bsha, name=None):
        """
            Get diff between two commits.

            :param asha: SHA of commit A.
            :type asha: unicode
            :param bsha: SHA of commit B.
            :type bsha: unicode
            :param name: File name within the repository.
            :type name: unicode or None
            :returns: unicode
        """

        c1 = self.repo[asha]
        c2 = self.repo[bsha]

        d = c1.tree.diff(c2.tree)

        if name:
            diff = u''

            # For each patch in the diff
            for patch in d:
                # Check if the patch is our file
                if name.encode('utf-8') == patch.new_file_path:
                    # Format the patch
                    for hunk in patch.hunks:
                        p = u'\n'.join(hunk.lines)

                        # And add the diff to the final diff
                        diff = u'{0}{1}'.format(diff, p)

            return diff

        # For a global diff, just return the full patch
        else:
            return d.patch

    def search(self, pattern, exclude=None):
        """
            Search pattern in GIT repository.

            :param pattern: Pattern to search.
            :type pattern: unicode
            :param exclude: Exclude some files from the search results
            :type exclude: regex
            :returns: list of tuple containing the filename and the list of matched lines.
        """

        entries = []

        self.index.read()

        # For each files in the index
        for ientry in self.index:
            # If the filename match the exclude_file regex, then ignore it
            if exclude and re.match(exclude, ientry.path.decode('utf-8')):
                continue

            # Get the associated blob
            blob = self.repo[ientry.oid]

            # Create entry
            entry = (ientry.path.decode('utf-8'), [])

            # Add matched lines to the entry
            for line in blob.data.decode('utf-8').splitlines():
                if pattern in line:
                    entry[1].append(line)

            # If the entry has no matched lines, then ignore
            if entry[1]:
                entries.append(entry)

        return entries

    def is_dir(self, name):
        """
            Check if name refers to a directory.

            :param name: File name within the repository.
            :type name: unicode
            :returns: True, False
        """

        # Check if the path exists, if not returns default value.
        if not self.exists(name):
            return False

        # Get the TreeEntry associated to name
        tentry = self.repo.head.tree[name]

        # Convert it to its pygit2 representation
        obj = tentry.to_object()

        # If it's a Tree, then we can return True
        if isinstance(obj, Tree):
            return True

        # The instance is a Blob, so it's a file, return False
        else:
            return False

    def mimetype(self, name):
        """
            Get the mimetype of a file.

            :param name: File name within the repository.
            :type name: unicode
            :returns: str
        """

        # If the file is a directory
        if self.is_dir(name):
            return 'inode/directory'

        # Or doesn't exist
        elif not self.exists(name):
            return 'unknown'

        # The file exists, check its mimetype
        else:
            import urllib
            import mimetypes

            url = urllib.pathname2url(name.encode('utf-8'))

            return mimetypes.guess_type(url)[0] or 'unknown'

    def walk(self):
        """
            Walk through the repository.
        """

        self.index.read()

        for entry in self.index:
            yield entry

    # Storage API

    def accessed_time(self, name):
        """
            Get last accessed time of a file.

            :param name: File name within the repository.
            :type name: unicode
            :returns: datetime
            :raises: IOError
        """

        if not self.exists(name):
            raise IOError(u"{0}: Not found in repository".format(name))

        abspath = os.path.join(self.repo.workdir, name)
        stats = os.stat(abspath)

        return datetime.datetime.fromtimestamp(stats.st_atime)

    def created_time(self, name):
        """
            Get creation time of a file.

            :param name: File name within the repository.
            :type name: unicode
            :returns: datetime
            :raises: IOError
        """

        if not self.exists(name):
            raise IOError(u"{0}: Not found in repository".format(name))

        abspath = os.path.join(self.repo.workdir, name)
        stats = os.stat(abspath)

        return datetime.datetime.fromtimestamp(stats.st_ctime)

    def modified_time(self, name):
        """
            Get last modified time of a file.

            :param name: File name within the repository.
            :type name: unicode
            :returns: datetime
            :raises: IOError
        """

        if not self.exists(name):
            raise IOError(u"{0}: Not found in repository".format(name))

        abspath = os.path.join(self.repo.workdir, name)
        stats = os.stat(abspath)

        return datetime.datetime.fromtimestamp(stats.st_mtime)

    def size(self, name):
        """
            Get file's size.

            :param name: File name within the repository.
            :type name: unicode
            :returns: int
            :raises: IOError
        """

        if not self.exists(name):
            raise IOError(u"{0}: Not found in repository".format(name))

        e = self.index[name]
        blob = self.repo[e.oid]

        return blob.size

    def exists(self, path):
        """
            Check if ``path`` exists in the Git repository.

            :param path: Path within the repository of the file to check.
            :type param: unicode
            :returns: True if the file exists, False if the name is available for a new file.
        """

        # If the head is orphaned (does not point to any commit), returns False
        # because there is nothing in the repository.
        if self.repo.head_is_orphaned:
            return False

        # Try getting the path via the tree
        try:
            entry = self.repo.head.tree[path]

            return True

        # If it raises a KeyError, then the path doesn't exist
        except KeyError:
            return False

    def listdir(self, path=None):
        """
            Lists the contents of the specified path.

            :param path: Path of the directory to list (or None to list the root).
            :type path: unicode or None
            :returns: a 2-tuple of lists; the first item being directories, the second item being files.
        """

        abspath = os.path.join(self.repo.workdir, path) if path else self.repo.workdir

        dirs = []
        files = []

        for e in os.listdir(abspath):
            entry_fullpath = os.path.join(abspath, e)

            if os.path.isdir(entry_fullpath):
                if e != '.git':
                    dirs.append(e.decode('utf-8'))

            else:
                files.append(e.decode('utf-8'))

        return (dirs, files)

    def open(self, name, mode='rb'):
        """
            Opens the file given by name.

            :param name: Name of the file to open.
            :type name: unicode
            :param mode: Flags for openning the file (see builtin ``open`` function).
            :type mode: str
            :returns: GitFile
        """

        abspath = os.path.join(self.repo.workdir, name)
        dirname = os.path.dirname(abspath)

        if 'w' in mode and not os.path.exists(dirname):
            os.makedirs(dirname)

        return GitFile(open(abspath, mode))

    def path(self, name):
        """
            Return the absolute path of the file ``name`` within the repository.

            :param name: Name of the file within the repository.
            :type name: unicode
            :returns: str
            :raises: IOError
        """

        if not self.exists(name):
            raise IOError(u"{0}: Not found in repository".format(name))

        e = self.index[name]

        return os.path.join(self.repo.workdir, e.path).decode('utf-8')

    def save(self, name, content):
        """
            Saves a new file using the storage system, preferably with the name
            specified. If there already exists a file with this name, the
            storage system may modify the filename as necessary to get a unique
            name. The actual name of the stored file will be returned.

            :param name: Name of the new file within the repository.
            :type name: unicode
            :param content: Content to save.
            :type content: django.core.files.File
            :returns: str
        """

        new_name = self.get_available_name(name)
        abspath = os.path.join(self.repo.workdir, new_name)

        dirname = os.path.dirname(abspath)

        if not os.path.exists(dirname):
            os.makedirs(dirname)

        with open(abspath, 'wb') as f:
            for chunk in content.chunks():
                f.write(chunk)

    def delete(self, name):
        """
            Deletes the file referenced by name.

            :param name: Name of the file within the repository to delete
            :type name: unicode
            :raises: IOError
        """

        if not self.exists(name):
            raise IOError(u"{0}: Not found in repository".format(name))

        abspath = os.path.join(self.repo.workdir, name)
        os.remove(abspath)
Exemple #55
0
repo_url = 'https://github.com/octocat/Spoon-Knife.git'
urlChunks = repo_url.split('/')
repo_path = urlChunks[len(urlChunks)-1].replace('.git', '').lower()

#file = open(repo_path + '.csv', 'wb')
#csvWriter = csv.writer(file)

if not os.path.exists(repo_path):
    repo = clone_repository(repo_url, repo_path)

base = Repository(repo_path + '/.git')
base.checkout('HEAD')

history = []
# Display Commits Newest to Oldest
for commit in base.walk(base.head.target, GIT_SORT_TOPOLOGICAL):
    #print commit.hex
    #print commit.message
    #print commit.commit_time
    #print commit.commit_time_offset
    history.append(commit.hex)
'''
print '-----------------------------------------------------------'

# Display Commits Oldest to Newest
for commit in base.walk(base.head.target, GIT_SORT_TOPOLOGICAL | GIT_SORT_REVERSE):
    print commit.hex
    print base.revparse_single(commit.hex).message
    print commit.commit_time
    print commit.commit_time_offset
'''
Exemple #56
0
from pygit2 import Repository

repo = Repository('.git')
diff = repo.diff

prev_commit = None

consolidate = True

if consolidate is True:
    matrix = {}
else:
    matrix = []

for commit in repo.walk(repo.head.target):
    print(commit.message)

    if prev_commit is not None:
        # get the diff info
        diff = repo.diff(commit, prev_commit)

        # Get the string with changed info and split it
        changes = diff.patch.split('\n')[5:]
        try:
            a = changes[3].split()
            b = changes[4].split()
        except:
            print("last one? and I am too lasy to dump that one so here we go")
            Exception
        else:
Exemple #57
0
def count_contribs(repo_str,num_months):
    repo = Repository((repo_str).strip())
    last = repo[repo.head.target]
    now = date.fromtimestamp(time.time())
    res_emp = list()
    res_vol = list()
    res_tot = list()
    res_emp_cont = list()
    res_vol_cont = list()
    res_tot_cont = list()
    res_vol_perc = list()
    res_leverage_people = list()
    res_leverage_patches = list()
    emails = list()
    authors = list()
    commits = list()
    for i in xrange(num_months):
        time_key = add_months(i, now)
        print time_key
        employee_authors = set()
        volunteer_authors = set()
        emp_contributions = 0
        vol_contributions = 0
        for commit in repo.walk(last.id, GIT_SORT_TIME):
            tpin = get_month_key(commit.commit_time)
            if tpin != time_key:
                continue;
            authors.append(commit.author)
            commits.append(commit)
            # All entries in authors.keys and emails should be unique at this
            # point.
            author = commit.author
            if (author.email in employees) or "@mozilla." in author.email:
                employee_authors.add(author.email)
                emp_contributions += 1
            else:
                volunteer_authors.add(author.email)
                vol_contributions += 1

        res_emp.append(len(employee_authors))
        res_vol.append(len(volunteer_authors))
        res_tot.append(len(employee_authors) + len(volunteer_authors))
        res_emp_cont.append(emp_contributions)
        res_vol_cont.append(vol_contributions)
        res_tot_cont.append(emp_contributions + vol_contributions)
        if not 0 in (emp_contributions, employee_authors, vol_contributions):
            res_vol_perc.append(100 * vol_contributions
                                / (emp_contributions + vol_contributions))
            res_leverage_people.append(float(len(volunteer_authors))
                                       / float(len(employee_authors)))
            res_leverage_patches.append(float(vol_contributions)
                                        / float(len(employee_authors)))

    print 'done: ' + repo_str
    print 'employees: ' + str(res_emp) 
    print  'volunteers: ' + str(res_vol)

    filename = "HTML_OUTPUT/" + (repo_str).strip() + '.html' 
    html= open(filename, 'w')

    html.write( '''<!doctype html>
<html>
<head>
<title>Are We Everyone Yet?</title>
<link href="index.css" rel="stylesheet" type="text/css">
</head>
<body>
<script src="./src/Chart.js"></script>
<div class="section header">
<div id="banner">
Are We Everyone Yet?</div>
</div>
<div class="projects"><center>
''')

    for p in repos:
        html.write( '''<a href="%s.html">%s</a> ''' % (p.strip(), p.strip() ))

    html.write('''
</center></div>
<div class="section">
    <center>
    <p><span id="blue">Total</span>, <span id="yellow">Employees</span>, and <span id="red">Volunteer</span> contributors per Firefox release.</p><br/>
    <span id="slogan">Active contributors and employees per release.</span><br/>
    <p><canvas id="contribChart" width="800" height="400"></canvas></p><br/>
    <br/>
    <span id="slogan">Patches from volunteer contributors and employees per release.</span><br/>
    <p><canvas id="contribPercent" width="800" height="400"></canvas></p><br/>
    <span id="slogan">Leverage: Contributers Per Employee</span><br/>
    <p><canvas id="leveragePeopleOverall" width="800" height="400"></canvas></p>
    <span id="slogan">Leverage: Contributor Patches Per Employee</span><br/>
    <p><canvas id="leveragePatchesOverall" width="800" height="400"></canvas></p>
    <span id="slogan">Volunteer Commit Percentage Overall</span><br/>
    <p><canvas id="contribPercentOverall" width="800" height="400"></canvas></p>
</center>
<script>


var myColor = {
        red : "rgba(230, 118, 39 , 1)",
        yellow : "rgba(255, 230, 17 , 1)",
        blue : "rgba(4,174,225, 1)", 
        green : "rgba(57, 181, 17 , 1)"
    } 

   
    document.getElementById("blue").style.color=myColor.blue
    document.getElementById("yellow").style.color=myColor.yellow
    document.getElementById("red").style.color=myColor.red
    
    var participation_data = { 
''' ) 
    html.write('versions: ' + str(list(range( 0 - num_months, 0))) + ',\n' + \
                'employees: ' + str(res_emp) + ',\n' + \
                'volunteers: ' + str(res_vol) + ',\n' + \
                'total: ' + str(res_tot) + ',\n' + \
                'employee_commits: ' + str(res_emp_cont) + ',\n' + \
                'volunteer_commits: ' + str(res_vol_cont) + ',\n' + \
                'total_commits: ' + str(res_tot_cont) + ',\n' + \
                'volunteer_commit_percentage: ' + str(res_vol_perc) + ',\n' + \
                'leverage_people: ' + str(res_leverage_people) + ',\n' + \
                'leverage_patches: ' + str(res_leverage_patches)  )
    html.write( '''
} 
// Volunteers, up first.

var lineGraphData = {
        labels : participation_data.versions,
        datasets : [
                {
        fillColor : "rgba(0,0,0,0)",// : "rgba(0,0,0,0)", //myColor.blue,
        strokeColor :myColor.blue,
        pointColor :myColor.blue,
                        pointStrokeColor : "#fff",
                        data : participation_data.total
                },
                {
        fillColor : "rgba(0,0,0,0)",// :"rgba(0,0,0,0)", //myColor.yellow,
      strokeColor :  myColor.yellow,
      pointColor :  myColor.yellow,
                        pointStrokeColor : "#fff",
                        data : participation_data.employees
                },
                {
        fillColor : "rgba(0,0,0,0)",// : "rgba(0,0,0,0)", //gmyColor.red,
      strokeColor : myColor.red,
      pointColor : myColor.red,
                        pointStrokeColor : "#fff",
                        data : participation_data.volunteers
  }
  ]     
}

var lineGraphParams = { scaleOverride: true, scaleSteps: 15, scaleStepWidth: 40, scaleStepStart: 0, scaleBeginAtZero: true } 

var lineGraph  = new Chart(document.getElementById("contribChart").getContext("2d")).Line(lineGraphData, lineGraphParams);

// Next up, contributor commits.

var commitGraphData = {
        labels : participation_data.versions,
        datasets : [
                {
        fillColor : "rgba(0,0,0,0)",// : myColor.blue,
        strokeColor : myColor.blue,
        pointColor : myColor.blue,
                        pointStrokeColor : "#fff",
                        data : participation_data.total_commits
                },
                {
        fillColor : "rgba(0,0,0,0)",// : myColor.yellow,
        strokeColor : myColor.yellow,
        pointColor : myColor.yellow,
                        pointStrokeColor : "#fff",
                        data : participation_data.employee_commits
                },
                {
        fillColor : "rgba(0,0,0,0)",// : myColor.red,
        strokeColor : myColor.red,
        pointColor : myColor.red,
                        pointStrokeColor : "#fff",
                        data : participation_data.volunteer_commits
  }
  ]     
}

var commitGraphParams = { scaleOverride: true, scaleSteps: 20 , scaleStepWidth: 400, scaleStepsStart: 0, scaleBeginAtZero: true} 

var commitGraph = new Chart(document.getElementById("contribPercent").getContext("2d")).Line(commitGraphData, commitGraphParams);



var percentGraphData = {
        labels : participation_data.versions,
        datasets : [
                {
        fillColor : "rgba(0,0,0,0)",// : myColor.blue, 
        strokeColor : myColor.blue,
        pointColor : myColor.blue,
                        pointStrokeColor : "#fff",
                        data : participation_data.volunteer_commit_percentage
  }
  ]     
}

var percentGraphParams = { scaleOverride: true, scaleSteps: 10 , scaleStepWidth: 10, scaleStepsStart: 0, scaleBeginAtZero: true} 

var percentGraph  = new Chart(document.getElementById("contribPercentOverall").getContext("2d")).Line(percentGraphData, percentGraphParams);

var leveragePeopleGraphData= {
        labels : participation_data.versions,
        datasets : [
                {
        fillColor : "rgba(0,0,0,0)",// : myColor.blue, 
        strokeColor : myColor.blue,
        pointColor : myColor.blue,
                        pointStrokeColor : "#fff",
                        data : participation_data.leverage_people
  }
  ]     
}

var leveragePeopleGraphParams = { scaleOverride: true, scaleSteps: 10 , scaleStepWidth: 0.2, scaleStepsStart: 0, scaleBeginAtZero: true} 

var leveragePeopleGraph= new Chart(document.getElementById("leveragePeopleOverall").getContext("2d")).Line(leveragePeopleGraphData, leveragePeopleGraphParams);

var leveragePatchesGraphData = {
        labels : participation_data.versions,
        datasets : [
                {
        fillColor : "rgba(0,0,0,0)",// : myColor.blue, 
        strokeColor : myColor.blue,
        pointColor : myColor.blue,
                        pointStrokeColor : "#fff",
                        data : participation_data.leverage_patches
  }
  ]     
}

var leveragePatchesGraphParams = { scaleOverride: true, scaleSteps: 10 , scaleStepWidth: 2 , scaleStepsStart: 0, scaleBeginAtZero: true} 

var leveragePatchesGraph  = new Chart(document.getElementById("leveragePatchesOverall").getContext("2d")).Line(leveragePatchesGraphData, leveragePatchesGraphParams);


</script>

</div>
</body>
</html> ''' )
Exemple #58
0
  return resp


def get_attachement(num, attachment):
  url = TRAC_HTTP + "/raw-attachment/ticket/%d/%s" % (num, attachment)
  print url
  resp = requests.get(url)
  print resp.status_code
  if resp.status_code == 200:
    return resp.text
  return None


repo = Repository(GIT_REPO)

for commit in repo.walk(repo.head.oid, GIT_SORT_TIME):
  try:
    tickets = re.findall('#[0-9]+', commit.message)
    if tickets:
      if len(commit.parents) != 1:
        continue
      base = commit.parents[0].hex
      head = commit.hex
      title = commit.message
      ticket = tickets[0][1:]
      body = "PR for issue #%s" % (ticket)
      trac_ticket = trac._tracserver.ticket.get(ticket)
      num, updated, created, props = trac_ticket
      if props['status'] != 'closed':
        continue
      print commit.message
Exemple #59
0
def get_and_update_repo_cache(repo_path, repo_name):
    cache_filename = '%s-stats.cache' % repo_name
    if os.path.exists(cache_filename):
        with open(cache_filename) as f:
            data = load(f)
    else:
        data = {
            'author_to_month_to_additions': defaultdict(defaultdict_int),
            'author_to_month_to_deletions': defaultdict(defaultdict_int),
            'author_to_month_to_changes': defaultdict(defaultdict_int),
            'author_to_month_to_commits': defaultdict(defaultdict_int),
            'day_to_count': defaultdict(defaultdict_int),
            'change_count_by_file': defaultdict(int),
            'latest_sha': None,
        }

    repo = Repository(repo_path)

    ignored_commits = []

    count = 0
    for commit in repo.walk(repo.head.target, GIT_SORT_TOPOLOGICAL):
        count += 1
        if commit.type == GIT_OBJ_COMMIT:
            if data['latest_sha'] == commit.hex:
                break

            try:
                d = repo.diff('%s^' % commit.hex, commit)
            except KeyError:
                print "Commits without parent: ", commit.hex
                continue
            additions = d.stats.insertions
            deletions = d.stats.deletions

            author = author_aliases.get(commit.author.email, commit.author.email)

            day = date.fromtimestamp(commit.commit_time)
            data['day_to_count']['Lines'][day] += additions
            data['day_to_count']['Lines'][day] -= deletions

            if additions > 1000 and deletions < 5 and commit.hex not in whitelist_commits:
                if commit.hex not in blacklist_commits:
                    ignored_commits.append(commit.hex)
                    # print 'WARNING: ignored %s looks like an embedding of a lib (message: %s)' % (commit.hex, commit.message)
                continue
            if (additions > 3000 or deletions > 3000) and commit.hex not in whitelist_commits:
                if commit.hex not in blacklist_commits:
                    ignored_commits.append(commit.hex)
                    # print 'WARNING: ignored %s because it is bigger than 3k lines. Put this commit in the whitelist or the blacklist (message: %s)' % (commit.hex, commit.message)
                continue
            month = date(day.year, day.month, 1)
            data['author_to_month_to_additions'][author][month] += additions
            data['author_to_month_to_deletions'][author][month] += deletions
            data['author_to_month_to_changes'][author][month] += additions + deletions
            data['author_to_month_to_commits'][author][month] += 1
            if data['latest_sha'] is None:
                data['latest_sha'] = commit.hex

            if d.patch:
                for changed_path in [x for x in d.patch.split('\n') if x.startswith('+++ ') and '/dev/null' not in x]:
                    data['change_count_by_file'][changed_path[len('+++ ') + 1:]] += 1

    with open(cache_filename, 'w') as f:
        dump(data, f)

    with open(repo_name + '-ignored-commits.txt', 'w') as f:
        f.writelines('%s\n' % x for x in ignored_commits)

    return data
Exemple #60
0
class prototype:
    repo = ""  # Path to a given repository
    name = ""  # Name of a repository
    base = ""  # Repository as defined in pygit2

    # Initialization. Clones the given repository, placing it in the current directory,
    # and changes to the repository directory.
    def init(self, repository):
        self.repo = repository

        # Use regular expressions to match the last instance of a forward slash
        # followed by the name of the repository, which we wish to extract, followed
        # by ".git". 
        m = re.search('/([^/]+).git$', repository)
        if m:
            self.name = m.group(1)

        if not os.path.isdir(self.name):
            os.system('git clone ' + self.repo) # Get the repository from GitHub

        self.base = Repository(self.name)
        self.base.checkout('HEAD')

    # Destruction. Remove the given repository from memory.
    def destroy(self):
        os.system('cd ' + self.name)
        os.system('rm -rf ' + self.name)

    # Get total LOC by given repository. 
    def totalRepoLOC(self):
        loc = countDirLOC(self.name)
        return loc

    # Get total commits by a given repository
    def totalRepoCommits(self):
        commits = 0
        for commit in self.base.walk(self.base.head.target, GIT_SORT_TOPOLOGICAL):
            commits = commits + 1
        return commits

    # Get a list of LOC changed per commit
    def locPerCommit(self):
        loc = []
        oldPath = os.popen('pwd')
        os.chdir(self.name)
        sha1 = 0
        sha2 = 0

        start = 1
        total = self.totalRepoCommits()

        # For each commit within the repository
        for commit in self.base.walk(self.base.head.target, GIT_SORT_TOPOLOGICAL):

            print '\r', start, '/', total,
            start += 1

            # Based on the SHA, use git to show the patch for that commit
            sha1 = sha2
            sha2 = commit.hex
            if sha1 != 0:
                p = os.popen('git diff --shortstat ' + sha1 + ' ' + sha2)
                line = p.readline()

                # line contains "# file changed, # insertions(+), # deletions(-)
                # Use regular expressions to find the number of additions and deletions.
                # Additions are found after ", " and before " insertion". Deletions are
                # found after "(+), " and before " deletion".
                m = re.search(', (.*) insertion', line)
                additions = 0
                deletions = 0
                if m:
                    additions = m.group(1)
                m = re.search('\(\+\), (.*) deletion', line)
                if m:
                    deletions = m.group(1)

                # Get the total and append to array
                modifications = int(additions) + int(deletions)
                loc.append(modifications)

        os.chdir('..')
        return loc


    # Get a list containing the total number of line additions and deletions (including
    # whitespace and comments) contained within each hunk that was changed over t
    def locPerHunk(self):
        loc = []
        history = []

        # Get the hex number for each commit within the repository
        for commit in self.base.walk(self.base.head.target, GIT_SORT_TOPOLOGICAL):
            sha = commit.hex
            history.append(sha)

        # Compare each revision in the history of the repository with the previous rev.
        i = 0
        while i < len(history) - 1:
            t0 = self.base.revparse_single(history[i])
            t1 = self.base.revparse_single(history[i+1])
            diff = self.base.diff(t0,t1)
            patches = [p for p in diff]
            for patch in patches:
                for hunk in patch.hunks:
                   
                    # Check the first character in each hunk line. Only those that have
                    # been modified will contain a '+' (insertion) or '-' (deletion)
                    totalModifications = 0
                    for line in hunk.lines:
                        if line[0] == '-' or line[0] == '+':
                            totalModifications +=1
                    loc.append(totalModifications)
            i += 1
        return loc

    # Get the total number of lines contained within a hunk, including additions, deletions,
    # and surrounding non-changed lines
    def locInHunk(self):
        loc = []
        history = []

        # Get the hex number for each commit within the repository
        for commit in self.base.walk(self.base.head.target, GIT_SORT_TOPOLOGICAL):
            sha = commit.hex
            history.append(sha)

        # Compare each revision in the history of the repository with the previous rev.
        i = 0
        while i < len(history) - 1:
            t0 = self.base.revparse_single(history[i])
            t1 = self.base.revparse_single(history[i+1])
            diff = self.base.diff(t0,t1)
            patches = [p for p in diff]
            for patch in patches:
                for hunk in patch.hunks:
                    totalLines = 0
                    for line in hunk.lines:
                       totalLines += 1
                    loc.append(totalLines)
            i += 1
        return loc

    # Perform a diff between all commits starting from oldest to newest
    #  and compile temp files comprised of only modified lines.
    #  Run cloc on temp files to get sloc for each diff set.
    def slocPerDiff(self):
        # Storage for commit history hashes
        history = []
        
        # Store all slocs
        slocPerDiffs = []

        # Move through the system history from newest to oldest commit
        for commit in self.base.walk(self.base.head.target, GIT_SORT_TOPOLOGICAL | GIT_SORT_REVERSE):
            history.append(commit)

        i = 0
        while i < len(history) - 2:
            sloc = 0
            t0 = self.base.revparse_single(history[i].hex)
            t1 = self.base.revparse_single(history[i+1].hex)
            try:
                diff = self.base.diff(t0,t1)
            except ValueError:
                print "Caught value error."
                i += 1
                continue

            patches = [p for p in diff]
            for patch in patches:
                print patch.new_file_path
                hunkfile = open("tmp", 'w') 
                for hunk in patch.hunks:
                    totesLines = 0
                    totesMods = 0
                    for line in hunk.lines:
                        totesLines += 1
                        if line[0] == '-' or line[0] == '+':
                            totesMods += 1
                            hunkfile.write(line[1])
                hunkfile.close()
            
                output = subprocess.Popen('cloc ' + patch.new_file_path + ' --by-file --csv', shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
                start = False
                for line in output.stdout.readlines():
                    if line[0] == 'l':
                        start = True
                        continue
                    if start:
                        temp = line.split(',')
                        sloc += int(temp[4].replace('\n', ''))
                        retval = output.wait()
                os.remove("tmp")                        
            i += 1
            slocPerDiffs.append(int(sloc))
        
        return slocPerDiffs

    # Get a list containing the number of hunks changed per commit
    def hunksPerCommit(self):
        hunks = []
        history = []

        start = 1
        total = self.totalRepoCommits()

        # Get the hex number for each commit within the repository
        for commit in self.base.walk(self.base.head.target, GIT_SORT_TOPOLOGICAL):
            sha = commit.hex
            history.append(sha)

        # Compare each revision in the history of the repository with the previous rev.
        i = 0
        while i < len(history) - 1:
            print '\r', start, '/', total,
            start += 1

            t0 = self.base.revparse_single(history[i])
            t1 = self.base.revparse_single(history[i+1])

            try:
                diff = self.base.diff(t0,t1)
            except ValueError:
                print "Caught value error."
                i += 1
                continue

            patches = [p for p in diff]
            for patch in patches:
                hunks.append(len(patch.hunks))
            i += 1

        return hunks


    # Get a list of the number of files changed per commit
    def filesPerCommit(self):
        files = []
        oldPath = os.popen('pwd')
        os.chdir(self.name)
        sha1 = 0
        sha2 = 0

        start = 1
        total = self.totalRepoCommits()

        # For each commit within the repository
        for commit in self.base.walk(self.base.head.target, GIT_SORT_TOPOLOGICAL):

            print '\r', start, '/', total,
            start += 1

            # Based on the SHA, use git to show the patch for that commit
            sha1 = sha2
            sha2 = commit.hex
            if sha1 != 0:
                p = os.popen('git diff --shortstat ' + sha1 + ' ' + sha2)
                line = p.readline()

                # line contains "# file changed, # insertions(+), # deletions(-)
                # Use regular expressions to find the number of files modified, which
                # are contained first on the line followed by " file"
                m = re.search(' (.*) file', line)
                if m:
                    numFilesChanged = int(m.group(1))
                    files.append(numFilesChanged)

        os.chdir('..')
        return files

    # Print out all stats for the repository
    def printStats(self):
        f = open(self.name + '-results.txt', 'w')
        f.write(("-----------" + self.name + "-----------\n"))

        # Stats on entire repository
        repoLOC = self.totalRepoLOC()
        repoCommits = self.totalRepoCommits()

        # Lists by commit
        locPerCommit   = self.locPerCommit()
        #slocPerDiff    = self.slocPerDiff()
        hunksPerCommit = self.hunksPerCommit()
        filesPerCommit = self.filesPerCommit()
        
        # Stats for LOC
        xsmall = 0
        small  = 0
        medium = 0
        large  = 0
        xlarge = 0
        for item in locPerCommit:
            if (item >= 0 and item <= 5):
                xsmall += 1
            if (item >= 6 and item <= 46):
                small += 1
            if (item >= 47 and item <= 106):
                medium += 1
            if (item >= 107 and item <= 166):
                large += 1
            if (item >= 167):
                xlarge += 1

        f.write("Number of Modified Lines:\n")
        f.write("x-small: " + str(xsmall) + "\n")
        f.write("small:   " + str(small) + "\n")
        f.write("medium:  " + str(medium) + "\n")
        f.write("large:   " + str(large) + "\n")
        f.write("x-large: " + str(xlarge) + "\n")
        

        '''
        # Stats for SLOC
        xsmall = 0
        small  = 0
        medium = 0
        large  = 0
        xlarge = 0
        for item in slocPerDiff:
            if (item >= 0 and item <= 5):
                xsmall += 1
            if (item >= 6 and item <= 46):
                small += 1
            if (item >= 47 and item <= 106):
                medium += 1
            if (item >= 107 and item <= 166):
                large += 1
            if (item >= 167):
                xlarge += 1

        f.write("Number of Modified SLOC: \n")
        f.write("x-small: " + str(xsmall) + "\n")
        f.write("small:   " + str(small) + "\n")
        f.write("medium:  " + str(medium) + "\n")
        f.write("large:   " + str(large) + "\n")
        f.write("x-large: " + str(xlarge) + "\n")

        '''
        # Print stats for modified files
        xsmall = 0
        small  = 0
        medium = 0
        large  = 0
        xlarge = 0
        for item in filesPerCommit:
            if (item == 1):
                xsmall += 1
            if (item >= 2 and item <= 4):
                small += 1
            if (item >= 5 and item <= 7):
                medium += 1
            if (item >= 8 and item <= 10):
                large += 1
            if (item >= 11):
                xlarge += 1

        f.write("Number of modified files: \n")
        f.write("x-small: " + str(xsmall) + "\n")
        f.write("small:   " + str(small) + "\n")
        f.write("medium:  " + str(medium) + "\n")
        f.write("large:   " + str(large) + "\n")
        f.write("x-large: " + str(xlarge) + "\n")

        # Prints stats for hunks
        xsmall = 0
        small  = 0
        medium = 0
        large  = 0
        xlarge = 0
        for item in hunksPerCommit:
            if (item >= 0 and item <= 1):
                xsmall += 1
            if (item >= 2 and item <= 8):
                small += 1
            if (item >= 9 and item <= 17):
                medium += 1
            if (item >= 18 and item <= 26):
                large += 1
            if (item >= 27):
                xlarge += 1

        f.write("Number of hunks per commit: \n")
        f.write("x-small: " + str(xsmall) + "\n")
        f.write("small:   " + str(small) + "\n")
        f.write("medium:  " + str(medium) + "\n")
        f.write("large:   " + str(large) + "\n")
        f.write("x-large: " + str(xlarge) + "\n")

        f.close()