def sequential(lines, model_args):
    """
    Entry point for the sequential algorithm.

    See the description in the file docs.

    Yields FileData objects as tsv lines, with dev_uniq and
    tot_knowledge fields filled in.
    """
    knowledge_churn_constant = float(model_args[0])
    for line in lines:
        fd = FileData(line)
        dev_uniq, tot_knowledge = sequential_estimate_uniq(fd, knowledge_churn_constant)
        fd.dev_uniq = dev_uniq
        fd.tot_knowledge = tot_knowledge
        yield fd.as_line()
Exemple #2
0
def sequential(lines, model_args):
    """
    Entry point for the sequential algorithm.

    See the description in the file docs.

    Yields FileData objects as tsv lines, with dev_uniq and
    tot_knowledge fields filled in.
    """
    knowledge_churn_constant = float(model_args[0])
    for line in lines:
        fd = FileData(line)
        dev_uniq, tot_knowledge = sequential_estimate_uniq(
            fd, knowledge_churn_constant)
        fd.dev_uniq = dev_uniq
        fd.tot_knowledge = tot_knowledge
        yield fd.as_line()
Exemple #3
0
def gen_stats(root, project, interesting, not_interesting, options):
    """
    root: the root svn url of the project we are generating stats for
    (does not need to be the root of the svn repo).  Must be a url,
    not a checkout path.

    project: the project identifier.

    interesting: regular expressions that indicate an interesting path
    if they match

    not_interesting: regular expressions that trump interesting and
    indicate a path is not interesting.

    options: currently unused, options from gen_file_stats.py's main.

    Yields FileData objects encoded as tsv lines.  Only the fname,
    dev_experience and cnt_lines fields are filled in.
    """
    client = pysvn.Client()

    # we need the repo root because the paths returned by svn ls are relative to the repo root,
    # not our project root
    repo_root = client.root_url_from_path(root)

    interesting_fs = [
        f[0].repos_path for f in client.list(root, recurse=True)
        if is_interesting(f[0].repos_path, interesting, not_interesting)
        and f[0].kind == pysvn.node_kind.file
    ]

    for f in interesting_fs:
        dev_experience = parse_dev_experience(f, client, repo_root)
        if dev_experience:
            fd = FileData(':'.join([project, f]))
            # don't take revisions that are 0 lines added and 0 removed, like properties
            fd.dev_experience = [(dev, added, removed)
                                 for dev, added, removed in dev_experience
                                 if added or removed]
            fd.cnt_lines = count_lines(f, client, repo_root)
            fd_line = fd.as_line()
            if fd_line.strip():
                yield fd_line
Exemple #4
0
def summarize(lines, departed_devs):
    """
    Aggregate the FileData in lines, considering all devs in
    departed_devs to be hit by a bus.
    """

    aggs = {}

    # aggregate by valtype and our top-level objects, used by the
    # index page.
    create_agg(aggs, (a_valtype, a_dev))
    create_agg(aggs, (a_valtype, a_project))
    create_agg(aggs, (a_valtype, a_fname))

    # aggregates by project for the projects pages
    create_agg(aggs, (a_project, a_valtype, a_fname))
    create_agg(aggs, (a_project, a_valtype, a_dev))

    # aggregates by dev group of 1 or more for the devs pages.
    create_agg(aggs, (a_dev, a_valtype, a_fname))
    create_agg(aggs, (a_dev, a_valtype, a_project))

    # fname aggregate for the files pages
    create_agg(aggs, (a_fname, a_valtype, a_dev))

    for line in lines:
        fd = FileData(line)

        # we don't do anything with the risk represented by departed
        # devs...the risk has already turned out to be real and the
        # knowledge is gone.
        dev_risk, _ignored = split_out_dev_vals(fd.dev_risk, departed_devs)
        for devs, risk in dev_risk:
            agg_all(aggs, Dat('risk', fd, devs, risk))
        dev_uniq, dev_orphaned = split_out_dev_vals(fd.dev_uniq, departed_devs)
        for devs, uniq in dev_uniq:
            agg_all(aggs, Dat('unique knowledge', fd, devs, uniq))
            # hack: to get the devs with most shared knowledge to show
            # up on the devs pages, explode the devs and aggregate
            # them pairwise here under a different valtype that only
            # the devs pages will use
            for dev1 in devs:
                for dev2 in devs:
                    # don't double count the similarity
                    if dev1 < dev2:
                        agg_all(
                            aggs,
                            Dat('shared knowledge (devs still present)', fd,
                                [dev1, dev2], uniq))
        # if there is knowledge unique to groups of 1 or more devs who
        # are all departed, this knowledge is orphaned.
        for devs, orphaned in dev_orphaned:
            agg_all(aggs, Dat('orphaned knowledge', fd, devs, orphaned))

    return aggs
def estimate_file_risks(lines, bus_risks, def_bus_risk):
    """
    Estimate the risk in the file as:

    sum(knowledge unique to a group of 1 or more devs * the
    probability that all devs in the group will be hit by a bus)

    We use a simple joint probability and assume that all bus killings
    are independently likely.
    """
    for line in lines:
        fd = FileData(line)
        dev_risk = []
        for devs, shared in fd.dev_uniq:
            risk = shared
            for dev in devs:
                risk = float(risk) * get_bus_risk(dev, bus_risks, def_bus_risk)
            dev_risk.append((devs, risk))
        fd.dev_risk = dev_risk
        yield fd.as_line()
def estimate_file_risks(lines, bus_risks, def_bus_risk):
    """
    Estimate the risk in the file as:

    sum(knowledge unique to a group of 1 or more devs * the
    probability that all devs in the group will be hit by a bus)

    We use a simple joint probability and assume that all bus killings
    are independently likely.
    """
    for line in lines:
        fd = FileData(line)
        dev_risk = []
        for devs, shared in fd.dev_uniq:
            risk = shared
            for dev in devs:
                risk = float(risk) * get_bus_risk(dev, bus_risks, def_bus_risk)
            dev_risk.append((devs, risk))
        fd.dev_risk = dev_risk
        yield fd.as_line()
def gen_stats(root, project, interesting, not_interesting, options):
    """
    root: the root svn url of the project we are generating stats for
    (does not need to be the root of the svn repo).  Must be a url,
    not a checkout path.

    project: the project identifier.

    interesting: regular expressions that indicate an interesting path
    if they match

    not_interesting: regular expressions that trump interesting and
    indicate a path is not interesting.

    options: currently unused, options from gen_file_stats.py's main.

    Yields FileData objects encoded as tsv lines.  Only the fname,
    dev_experience and cnt_lines fields are filled in.
    """
    client = pysvn.Client()

    # we need the repo root because the paths returned by svn ls are relative to the repo root,
    # not our project root
    repo_root = client.root_url_from_path(root)

    interesting_fs = [f[0].repos_path for f in client.list(root, recurse=True) if
                      is_interesting(f[0].repos_path, interesting, not_interesting) and f[0].kind == pysvn.node_kind.file]

    for f in interesting_fs:
        dev_experience = parse_dev_experience(f, client, repo_root)
        if dev_experience:
            fd = FileData(':'.join([project, f]))
            # don't take revisions that are 0 lines added and 0 removed, like properties
            fd.dev_experience = [(dev, added, removed) for dev, added, removed in dev_experience if added or removed]
            fd.cnt_lines = count_lines(f, client, repo_root)
            fd_line = fd.as_line()
            if fd_line.strip():
                yield fd_line
def gen_stats(root, project, interesting, not_interesting, options):
    """
    root: the path a local, git controlled-directory that is the root
    of this project

    project: the name of the project

    interesting: regular expressions that indicate an interesting path
    if they match

    not_interesting: regular expressions that trump interesting and
    indicate a path is not interesting.

    options: from gen_file_stats.py's main, currently only uses
    git_exe.

    Yields FileData objects encoded as tsv lines.  Only the fname,
    dev_experience and cnt_lines fields are filled in.
    """
    git_exe = options.git_exe

    # since git only works once you're in a git controlled path, we
    # need to get into one of those...
    prepare(root, git_exe)

    files = git_ls(root, git_exe)

    for f in files:
        if is_interesting(f, interesting, not_interesting):
            dev_experience = parse_dev_experience(f, git_exe)
            if dev_experience:
                fd = FileData(':'.join([project, f]))
                fd.dev_experience = dev_experience
                fd.cnt_lines = count_lines(f)
                fd_line = fd.as_line()
                if fd_line.strip():
                    yield fd_line
def gen_stats(root, project, interesting, not_interesting, options):
    """
    root: the path a local, git controlled-directory that is the root
    of this project

    project: the name of the project

    interesting: regular expressions that indicate an interesting path
    if they match

    not_interesting: regular expressions that trump interesting and
    indicate a path is not interesting.

    options: from gen_file_stats.py's main, currently only uses
    git_exe.

    Yields FileData objects encoded as tsv lines.  Only the fname,
    dev_experience and cnt_lines fields are filled in.
    """
    git_exe = options.git_exe

    # since git only works once you're in a git controlled path, we
    # need to get into one of those...
    prepare(root, git_exe)

    files = git_ls(root, git_exe)

    for f in files:
        if is_interesting(f, interesting, not_interesting):
            dev_experience = parse_dev_experience(f, git_exe)
            if dev_experience:
                fd = FileData(':'.join([project, f]))
                fd.dev_experience = dev_experience
                fd.cnt_lines = count_lines(f)
                fd_line = fd.as_line()
                if fd_line.strip():
                    yield fd_line