Exemple #1
0
def plot_overwrites_matrix(args, repo, people, matrix):
    if args.output and args.output.endswith(".json"):
        data = locals().copy()
        del data["args"]
        data["type"] = "overwrites_matrix"
        if args.mode == "all":
            output = get_plot_path(args.output, "matrix")
        else:
            output = args.output
        with open(output, "w") as fout:
            json.dump(data, fout, sort_keys=True, default=default_json)
        return

    matplotlib, pyplot = import_pyplot(args.backend, args.style)

    s = 4 + matrix.shape[1] * 0.3
    fig = pyplot.figure(figsize=(s, s))
    ax = fig.add_subplot(111)
    ax.xaxis.set_label_position("top")
    ax.matshow(matrix, cmap=pyplot.cm.OrRd)
    ax.set_xticks(numpy.arange(0, matrix.shape[1]))
    ax.set_yticks(numpy.arange(0, matrix.shape[0]))
    ax.set_yticklabels(people, va="center")
    ax.set_xticks(numpy.arange(0.5, matrix.shape[1] + 0.5), minor=True)
    ax.set_xticklabels(
        ["Unidentified"] + people,
        rotation=45,
        ha="left",
        va="bottom",
        rotation_mode="anchor",
    )
    ax.set_yticks(numpy.arange(0.5, matrix.shape[0] + 0.5), minor=True)
    ax.grid(False)
    ax.grid(which="minor")
    apply_plot_style(fig, ax, None, args.background, args.font_size, args.size)
    if not args.output:
        pos1 = ax.get_position()
        pos2 = (pos1.x0 + 0.15, pos1.y0 - 0.1, pos1.width * 0.9,
                pos1.height * 0.9)
        ax.set_position(pos2)
    if args.mode == "all" and args.output:
        output = get_plot_path(args.output, "matrix")
    else:
        output = args.output
    title = "%s %d developers overwrite" % (repo, matrix.shape[0])
    if args.output:
        # FIXME(vmarkovtsev): otherwise the title is screwed in savefig()
        title = ""
    deploy_plot(title, output, args.background)
def show_devs_parallel(args, name, start_date, end_date, devs):
    matplotlib, pyplot = import_pyplot(args.backend, args.style)
    from matplotlib.collections import LineCollection

    def solve_equations(x1, y1, x2, y2):
        xcube = (x1 - x2)**3
        a = 2 * (y2 - y1) / xcube
        b = 3 * (y1 - y2) * (x1 + x2) / xcube
        c = 6 * (y2 - y1) * x1 * x2 / xcube
        d = y1 - a * x1**3 - b * x1**2 - c * x1
        return a, b, c, d

    # biggest = {k: max(getattr(d, k) for d in devs.values())
    #            for k in ("commits", "lines", "ownership")}
    for k, dev in devs.items():
        points = numpy.array(
            [
                (1, dev.commits_rank),
                (2, dev.lines_rank),
                (3, dev.ownership_rank),
                (4, dev.couples_index),
                (5, dev.commit_coocc_index),
            ],
            dtype=float,
        )
        points[:, 1] = points[:, 1] / len(devs)
        splines = []
        for i in range(len(points) - 1):
            a, b, c, d = solve_equations(*points[i], *points[i + 1])
            x = numpy.linspace(i + 1, i + 2, 100)
            smooth_points = numpy.array([x, a * x**3 + b * x**2 + c * x + d
                                         ]).T.reshape(-1, 1, 2)
            splines.append(smooth_points)
        points = numpy.concatenate(splines)
        segments = numpy.concatenate([points[:-1], points[1:]], axis=1)
        lc = LineCollection(segments)
        lc.set_array(numpy.linspace(0, 0.1, segments.shape[0]))
        pyplot.gca().add_collection(lc)

    pyplot.xlim(0, 6)
    pyplot.ylim(-0.1, 1.1)
    deploy_plot("Developers", args.output, args.background)
Exemple #3
0
def show_old_vs_new(
    args: Namespace,
    name: str,
    start_date: int,
    end_date: int,
    people: List[str],
    days: Dict[int, Dict[int, DevDay]],
) -> None:
    from scipy.signal import convolve, slepian

    start_date = datetime.fromtimestamp(start_date)
    start_date = datetime(start_date.year, start_date.month, start_date.day)
    end_date = datetime.fromtimestamp(end_date)
    end_date = datetime(end_date.year, end_date.month, end_date.day)
    new_lines = numpy.zeros((end_date - start_date).days + 2)
    old_lines = numpy.zeros_like(new_lines)
    for day, devs in days.items():
        for stats in devs.values():
            new_lines[day] += stats.Added
            old_lines[day] += stats.Removed + stats.Changed
    resolution = 32
    window = slepian(max(len(new_lines) // resolution, 1), 0.5)
    new_lines = convolve(new_lines, window, "same")
    old_lines = convolve(old_lines, window, "same")
    matplotlib, pyplot = import_pyplot(args.backend, args.style)
    plot_x = [start_date + timedelta(days=i) for i in range(len(new_lines))]
    pyplot.fill_between(plot_x, new_lines, color="#8DB843", label="Changed new lines")
    pyplot.fill_between(
        plot_x, old_lines, color="#E14C35", label="Changed existing lines"
    )
    pyplot.legend(loc=2, fontsize=args.font_size)
    for tick in chain(
        pyplot.gca().xaxis.get_major_ticks(), pyplot.gca().yaxis.get_major_ticks()
    ):
        tick.label.set_fontsize(args.font_size)
    if args.mode == "all" and args.output:
        output = get_plot_path(args.output, "old_vs_new")
    else:
        output = args.output
    deploy_plot("Additions vs changes", output, args.background)
Exemple #4
0
def plot_ownership(args, repo, names, people, date_range, last):
    if args.output and args.output.endswith(".json"):
        data = locals().copy()
        del data["args"]
        data["type"] = "ownership"
        if args.mode == "all" and args.output:
            output = get_plot_path(args.output, "people")
        else:
            output = args.output
        with open(output, "w") as fout:
            json.dump(data, fout, sort_keys=True, default=default_json)
        return

    matplotlib, pyplot = import_pyplot(args.backend, args.style)

    polys = pyplot.stackplot(date_range, people, labels=names)
    if names[-1] == "others":
        polys[-1].set_hatch("/")
    pyplot.xlim(parse_date(args.start_date, date_range[0]),
                parse_date(args.end_date, last))

    if args.relative:
        for i in range(people.shape[1]):
            people[:, i] /= people[:, i].sum()
        pyplot.ylim(0, 1)
        legend_loc = 3
    else:
        legend_loc = 2
    ncol = 1 if len(names) < 15 else 2
    legend = pyplot.legend(loc=legend_loc, fontsize=args.font_size, ncol=ncol)
    apply_plot_style(pyplot.gcf(), pyplot.gca(), legend, args.background,
                     args.font_size, args.size)
    if args.mode == "all" and args.output:
        output = get_plot_path(args.output, "people")
    else:
        output = args.output
    deploy_plot("%s code ownership through time" % repo, output,
                args.background)
Exemple #5
0
def plot_burndown(
    args: Namespace,
    target: str,
    name: str,
    matrix: numpy.ndarray,
    date_range_sampling: 'DatetimeIndex',
    labels: List[int],
    granularity: int,
    sampling: int,
    resample: str,
) -> None:
    if args.output and args.output.endswith(".json"):
        data = locals().copy()
        del data["args"]
        data["type"] = "burndown"
        if args.mode == "project" and target == "project":
            output = args.output
        else:
            if target == "project":
                name = "project"
            output = get_plot_path(args.output, name)
        with open(output, "w") as fout:
            json.dump(data, fout, sort_keys=True, default=default_json)
        return

    matplotlib, pyplot = import_pyplot(args.backend, args.style)

    pyplot.stackplot(date_range_sampling, matrix, labels=labels)
    if args.relative:
        for i in range(matrix.shape[1]):
            matrix[:, i] /= matrix[:, i].sum()
        pyplot.ylim(0, 1)
        legend_loc = 3
    else:
        legend_loc = 2
    legend = pyplot.legend(loc=legend_loc, fontsize=args.font_size)
    pyplot.ylabel("Lines of code")
    pyplot.xlabel("Time")
    apply_plot_style(pyplot.gcf(), pyplot.gca(), legend, args.background,
                     args.font_size, args.size)
    pyplot.xlim(
        parse_date(args.start_date, date_range_sampling[0]),
        parse_date(args.end_date, date_range_sampling[-1]),
    )
    locator = pyplot.gca().xaxis.get_major_locator()
    # set the optimal xticks locator
    if "M" not in resample:
        pyplot.gca().xaxis.set_major_locator(matplotlib.dates.YearLocator())
    locs = pyplot.gca().get_xticks().tolist()
    if len(locs) >= 16:
        pyplot.gca().xaxis.set_major_locator(matplotlib.dates.YearLocator())
        locs = pyplot.gca().get_xticks().tolist()
        if len(locs) >= 16:
            pyplot.gca().xaxis.set_major_locator(locator)
    if locs[0] < pyplot.xlim()[0]:
        del locs[0]
    endindex = -1
    if len(locs) >= 2 and pyplot.xlim()[1] - locs[-1] > (locs[-1] -
                                                         locs[-2]) / 2:
        locs.append(pyplot.xlim()[1])
        endindex = len(locs) - 1
    startindex = -1
    if len(locs) >= 2 and locs[0] - pyplot.xlim()[0] > (locs[1] - locs[0]) / 2:
        locs.append(pyplot.xlim()[0])
        startindex = len(locs) - 1
    pyplot.gca().set_xticks(locs)
    # hacking time!
    labels = pyplot.gca().get_xticklabels()
    if startindex >= 0:
        labels[startindex].set_text(date_range_sampling[0].date())
        labels[startindex].set_text = lambda _: None
        labels[startindex].set_rotation(30)
        labels[startindex].set_ha("right")
    if endindex >= 0:
        labels[endindex].set_text(date_range_sampling[-1].date())
        labels[endindex].set_text = lambda _: None
        labels[endindex].set_rotation(30)
        labels[endindex].set_ha("right")
    title = "%s %d x %d (granularity %d, sampling %d)" % (
        (name, ) + matrix.shape + (granularity, sampling))
    output = args.output
    if output:
        if args.mode == "project" and target == "project":
            output = args.output
        else:
            if target == "project":
                name = "project"
            output = get_plot_path(args.output, name)
    deploy_plot(title, output, args.background)
Exemple #6
0
def show_devs_efforts(
    args: Namespace,
    name: str,
    start_date: int,
    end_date: int,
    people: List[str],
    days: Dict[int, Dict[int, DevDay]],
    max_people: int,
) -> None:
    from scipy.signal import convolve, slepian

    start_date = datetime.fromtimestamp(start_date)
    start_date = datetime(start_date.year, start_date.month, start_date.day)
    end_date = datetime.fromtimestamp(end_date)
    end_date = datetime(end_date.year, end_date.month, end_date.day)

    efforts_by_dev = defaultdict(int)
    for day, devs in days.items():
        for dev, stats in devs.items():
            efforts_by_dev[dev] += stats.Added + stats.Removed + stats.Changed
    if len(efforts_by_dev) > max_people:
        chosen = {
            v
            for k, v in sorted(
                ((v, k) for k, v in efforts_by_dev.items()), reverse=True
            )[:max_people]
        }
        print("Warning: truncated people to the most active %d" % max_people)
    else:
        chosen = set(efforts_by_dev)
    chosen_efforts = sorted(((efforts_by_dev[k], k) for k in chosen), reverse=True)
    chosen_order = {k: i for i, (_, k) in enumerate(chosen_efforts)}

    efforts = numpy.zeros(
        (len(chosen) + 1, (end_date - start_date).days + 1), dtype=numpy.float32
    )
    for day, devs in days.items():
        if day < efforts.shape[1]:
            for dev, stats in devs.items():
                dev = chosen_order.get(dev, len(chosen_order))
                efforts[dev][day] += stats.Added + stats.Removed + stats.Changed
    efforts_cum = numpy.cumsum(efforts, axis=1)
    window = slepian(10, 0.5)
    window /= window.sum()
    for e in (efforts, efforts_cum):
        for i in range(e.shape[0]):
            ending = e[i][-len(window) * 2 :].copy()
            e[i] = convolve(e[i], window, "same")
            e[i][-len(ending) :] = ending
    matplotlib, pyplot = import_pyplot(args.backend, args.style)
    plot_x = [start_date + timedelta(days=i) for i in range(efforts.shape[1])]

    people = [people[k] for _, k in chosen_efforts] + ["others"]
    for i, name in enumerate(people):
        if len(name) > 40:
            people[i] = name[:37] + "..."

    polys = pyplot.stackplot(plot_x, efforts_cum, labels=people)
    if len(polys) == max_people + 1:
        polys[-1].set_hatch("/")
    polys = pyplot.stackplot(plot_x, -efforts * efforts_cum.max() / efforts.max())
    if len(polys) == max_people + 1:
        polys[-1].set_hatch("/")
    yticks = []
    for tick in pyplot.gca().yaxis.iter_ticks():
        if tick[1] >= 0:
            yticks.append(tick[1])
    pyplot.gca().yaxis.set_ticks(yticks)
    legend = pyplot.legend(loc=2, ncol=2, fontsize=args.font_size)
    apply_plot_style(
        pyplot.gcf(),
        pyplot.gca(),
        legend,
        args.background,
        args.font_size,
        args.size or "16,10",
    )
    if args.mode == "all" and args.output:
        output = get_plot_path(args.output, "efforts")
    else:
        output = args.output
    deploy_plot("Efforts through time (changed lines of code)", output, args.background)
Exemple #7
0
def show_devs(
    args: Namespace,
    name: str,
    start_date: int,
    end_date: int,
    people: List[str],
    days: Dict[int, Dict[int, DevDay]],
    max_people: int = 50,
) -> None:
    from scipy.signal import convolve, slepian

    if len(people) > max_people:
        print("Picking top %s developers by commit count" % max_people)
        # pick top N developers by commit count
        commits = defaultdict(int)
        for devs in days.values():
            for dev, stats in devs.items():
                commits[dev] += stats.Commits
        commits = sorted(((v, k) for k, v in commits.items()), reverse=True)
        chosen_people = {people[k] for _, k in commits[:max_people]}
    else:
        chosen_people = set(people)
    dists, devseries, devstats, route = order_commits(chosen_people, days, people)
    route_map = {v: i for i, v in enumerate(route)}
    # determine clusters
    clusters = hdbscan_cluster_routed_series(dists, route)
    keys = list(devseries.keys())
    route = [keys[node] for node in route]
    print("Plotting")
    # smooth time series
    start_date = datetime.fromtimestamp(start_date)
    start_date = datetime(start_date.year, start_date.month, start_date.day)
    end_date = datetime.fromtimestamp(end_date)
    end_date = datetime(end_date.year, end_date.month, end_date.day)
    size = (end_date - start_date).days + 1
    plot_x = [start_date + timedelta(days=i) for i in range(size)]
    resolution = 64
    window = slepian(size // resolution, 0.5)
    final = numpy.zeros((len(devseries), size), dtype=numpy.float32)
    for i, s in enumerate(devseries.values()):
        arr = numpy.array(s).transpose()
        full_history = numpy.zeros(size, dtype=numpy.float32)
        mask = arr[0] < size
        full_history[arr[0][mask]] = arr[1][mask]
        final[route_map[i]] = convolve(full_history, window, "same")

    matplotlib, pyplot = import_pyplot(args.backend, args.style)
    pyplot.rcParams["figure.figsize"] = (32, 16)
    pyplot.rcParams["font.size"] = args.font_size
    prop_cycle = pyplot.rcParams["axes.prop_cycle"]
    colors = prop_cycle.by_key()["color"]
    fig, axes = pyplot.subplots(final.shape[0], 1)
    backgrounds = (
        ("#C4FFDB", "#FFD0CD") if args.background == "white" else ("#05401C", "#40110E")
    )
    max_cluster = numpy.max(clusters)
    for ax, series, cluster, dev_i in zip(axes, final, clusters, route):
        if cluster >= 0:
            color = colors[cluster % len(colors)]
            i = 1
            while color == "#777777":
                color = colors[(max_cluster + i) % len(colors)]
                i += 1
        else:
            # outlier
            color = "#777777"
        ax.fill_between(plot_x, series, color=color)
        ax.set_axis_off()
        author = people[dev_i]
        ax.text(
            0.03,
            0.5,
            author[:36] + (author[36:] and "..."),
            horizontalalignment="right",
            verticalalignment="center",
            transform=ax.transAxes,
            fontsize=args.font_size,
            color="black" if args.background == "white" else "white",
        )
        ds = devstats[dev_i]
        stats = "%5d %8s %8s" % (
            ds[0],
            _format_number(ds[1] - ds[2]),
            _format_number(ds[3]),
        )
        ax.text(
            0.97,
            0.5,
            stats,
            horizontalalignment="left",
            verticalalignment="center",
            transform=ax.transAxes,
            fontsize=args.font_size,
            family="monospace",
            backgroundcolor=backgrounds[ds[1] <= ds[2]],
            color="black" if args.background == "white" else "white",
        )
    axes[0].text(
        0.97,
        1.75,
        " cmts    delta  changed",
        horizontalalignment="left",
        verticalalignment="center",
        transform=axes[0].transAxes,
        fontsize=args.font_size,
        family="monospace",
        color="black" if args.background == "white" else "white",
    )
    axes[-1].set_axis_on()
    target_num_labels = 12
    num_months = (
        (end_date.year - start_date.year) * 12 + end_date.month - start_date.month
    )
    interval = int(numpy.ceil(num_months / target_num_labels))
    if interval >= 8:
        interval = int(numpy.ceil(num_months / (12 * target_num_labels)))
        axes[-1].xaxis.set_major_locator(
            matplotlib.dates.YearLocator(base=max(1, interval // 12))
        )
        axes[-1].xaxis.set_major_formatter(matplotlib.dates.DateFormatter("%Y"))
    else:
        axes[-1].xaxis.set_major_locator(
            matplotlib.dates.MonthLocator(interval=interval)
        )
        axes[-1].xaxis.set_major_formatter(matplotlib.dates.DateFormatter("%Y-%m"))
    for tick in axes[-1].xaxis.get_major_ticks():
        tick.label.set_fontsize(args.font_size)
    axes[-1].spines["left"].set_visible(False)
    axes[-1].spines["right"].set_visible(False)
    axes[-1].spines["top"].set_visible(False)
    axes[-1].get_yaxis().set_visible(False)
    axes[-1].set_facecolor((1.0,) * 3 + (0.0,))

    title = ("%s commits" % name) if not args.output else ""
    if args.mode == "all" and args.output:
        output = get_plot_path(args.output, "time_series")
    else:
        output = args.output
    deploy_plot(title, output, args.background)
Exemple #8
0
def show_sentiment_stats(args, name, resample, start_date, data):
    from scipy.signal import convolve, slepian

    matplotlib, pyplot = import_pyplot(args.backend, args.style)

    start_date = datetime.fromtimestamp(start_date)
    data = sorted(data.items())
    mood = numpy.zeros(data[-1][0] + 1, dtype=numpy.float32)
    timeline = numpy.array(
        [start_date + timedelta(days=i) for i in range(mood.shape[0])]
    )
    for d, val in data:
        mood[d] = (0.5 - val.Value) * 2
    resolution = 32
    window = slepian(len(timeline) // resolution, 0.5)
    window /= window.sum()
    mood_smooth = convolve(mood, window, "same")
    pos = mood_smooth.copy()
    pos[pos < 0] = 0
    neg = mood_smooth.copy()
    neg[neg >= 0] = 0
    resolution = 4
    window = numpy.ones(len(timeline) // resolution)
    window /= window.sum()
    avg = convolve(mood, window, "same")
    pyplot.fill_between(timeline, pos, color="#8DB843", label="Positive")
    pyplot.fill_between(timeline, neg, color="#E14C35", label="Negative")
    pyplot.plot(timeline, avg, color="grey", label="Average", linewidth=5)
    legend = pyplot.legend(loc=1, fontsize=args.font_size)
    pyplot.ylabel("Comment sentiment")
    pyplot.xlabel("Time")
    apply_plot_style(
        pyplot.gcf(), pyplot.gca(), legend, args.background, args.font_size, args.size
    )
    pyplot.xlim(
        parse_date(args.start_date, timeline[0]),
        parse_date(args.end_date, timeline[-1]),
    )
    locator = pyplot.gca().xaxis.get_major_locator()
    # set the optimal xticks locator
    if "M" not in resample:
        pyplot.gca().xaxis.set_major_locator(matplotlib.dates.YearLocator())
    locs = pyplot.gca().get_xticks().tolist()
    if len(locs) >= 16:
        pyplot.gca().xaxis.set_major_locator(matplotlib.dates.YearLocator())
        locs = pyplot.gca().get_xticks().tolist()
        if len(locs) >= 16:
            pyplot.gca().xaxis.set_major_locator(locator)
    if locs[0] < pyplot.xlim()[0]:
        del locs[0]
    endindex = -1
    if len(locs) >= 2 and pyplot.xlim()[1] - locs[-1] > (locs[-1] - locs[-2]) / 2:
        locs.append(pyplot.xlim()[1])
        endindex = len(locs) - 1
    startindex = -1
    if len(locs) >= 2 and locs[0] - pyplot.xlim()[0] > (locs[1] - locs[0]) / 2:
        locs.append(pyplot.xlim()[0])
        startindex = len(locs) - 1
    pyplot.gca().set_xticks(locs)
    # hacking time!
    labels = pyplot.gca().get_xticklabels()
    if startindex >= 0:
        labels[startindex].set_text(timeline[0].date())
        labels[startindex].set_text = lambda _: None
        labels[startindex].set_rotation(30)
        labels[startindex].set_ha("right")
    if endindex >= 0:
        labels[endindex].set_text(timeline[-1].date())
        labels[endindex].set_text = lambda _: None
        labels[endindex].set_rotation(30)
        labels[endindex].set_ha("right")
    overall_pos = sum(2 * (0.5 - d[1].Value) for d in data if d[1].Value < 0.5)
    overall_neg = sum(2 * (d[1].Value - 0.5) for d in data if d[1].Value > 0.5)
    title = "%s sentiment +%.1f -%.1f δ=%.1f" % (
        name,
        overall_pos,
        overall_neg,
        overall_pos - overall_neg,
    )
    if args.mode == "all" and args.output:
        output = get_plot_path(args.output, "sentiment")
    else:
        output = args.output
    deploy_plot(title, output, args.background)