def plot_overwrites_matrix(args, repo, people, matrix): if args.output and args.output.endswith(".json"): data = locals().copy() del data["args"] data["type"] = "overwrites_matrix" if args.mode == "all": output = get_plot_path(args.output, "matrix") else: output = args.output with open(output, "w") as fout: json.dump(data, fout, sort_keys=True, default=default_json) return matplotlib, pyplot = import_pyplot(args.backend, args.style) s = 4 + matrix.shape[1] * 0.3 fig = pyplot.figure(figsize=(s, s)) ax = fig.add_subplot(111) ax.xaxis.set_label_position("top") ax.matshow(matrix, cmap=pyplot.cm.OrRd) ax.set_xticks(numpy.arange(0, matrix.shape[1])) ax.set_yticks(numpy.arange(0, matrix.shape[0])) ax.set_yticklabels(people, va="center") ax.set_xticks(numpy.arange(0.5, matrix.shape[1] + 0.5), minor=True) ax.set_xticklabels( ["Unidentified"] + people, rotation=45, ha="left", va="bottom", rotation_mode="anchor", ) ax.set_yticks(numpy.arange(0.5, matrix.shape[0] + 0.5), minor=True) ax.grid(False) ax.grid(which="minor") apply_plot_style(fig, ax, None, args.background, args.font_size, args.size) if not args.output: pos1 = ax.get_position() pos2 = (pos1.x0 + 0.15, pos1.y0 - 0.1, pos1.width * 0.9, pos1.height * 0.9) ax.set_position(pos2) if args.mode == "all" and args.output: output = get_plot_path(args.output, "matrix") else: output = args.output title = "%s %d developers overwrite" % (repo, matrix.shape[0]) if args.output: # FIXME(vmarkovtsev): otherwise the title is screwed in savefig() title = "" deploy_plot(title, output, args.background)
def show_devs_parallel(args, name, start_date, end_date, devs): matplotlib, pyplot = import_pyplot(args.backend, args.style) from matplotlib.collections import LineCollection def solve_equations(x1, y1, x2, y2): xcube = (x1 - x2)**3 a = 2 * (y2 - y1) / xcube b = 3 * (y1 - y2) * (x1 + x2) / xcube c = 6 * (y2 - y1) * x1 * x2 / xcube d = y1 - a * x1**3 - b * x1**2 - c * x1 return a, b, c, d # biggest = {k: max(getattr(d, k) for d in devs.values()) # for k in ("commits", "lines", "ownership")} for k, dev in devs.items(): points = numpy.array( [ (1, dev.commits_rank), (2, dev.lines_rank), (3, dev.ownership_rank), (4, dev.couples_index), (5, dev.commit_coocc_index), ], dtype=float, ) points[:, 1] = points[:, 1] / len(devs) splines = [] for i in range(len(points) - 1): a, b, c, d = solve_equations(*points[i], *points[i + 1]) x = numpy.linspace(i + 1, i + 2, 100) smooth_points = numpy.array([x, a * x**3 + b * x**2 + c * x + d ]).T.reshape(-1, 1, 2) splines.append(smooth_points) points = numpy.concatenate(splines) segments = numpy.concatenate([points[:-1], points[1:]], axis=1) lc = LineCollection(segments) lc.set_array(numpy.linspace(0, 0.1, segments.shape[0])) pyplot.gca().add_collection(lc) pyplot.xlim(0, 6) pyplot.ylim(-0.1, 1.1) deploy_plot("Developers", args.output, args.background)
def show_old_vs_new( args: Namespace, name: str, start_date: int, end_date: int, people: List[str], days: Dict[int, Dict[int, DevDay]], ) -> None: from scipy.signal import convolve, slepian start_date = datetime.fromtimestamp(start_date) start_date = datetime(start_date.year, start_date.month, start_date.day) end_date = datetime.fromtimestamp(end_date) end_date = datetime(end_date.year, end_date.month, end_date.day) new_lines = numpy.zeros((end_date - start_date).days + 2) old_lines = numpy.zeros_like(new_lines) for day, devs in days.items(): for stats in devs.values(): new_lines[day] += stats.Added old_lines[day] += stats.Removed + stats.Changed resolution = 32 window = slepian(max(len(new_lines) // resolution, 1), 0.5) new_lines = convolve(new_lines, window, "same") old_lines = convolve(old_lines, window, "same") matplotlib, pyplot = import_pyplot(args.backend, args.style) plot_x = [start_date + timedelta(days=i) for i in range(len(new_lines))] pyplot.fill_between(plot_x, new_lines, color="#8DB843", label="Changed new lines") pyplot.fill_between( plot_x, old_lines, color="#E14C35", label="Changed existing lines" ) pyplot.legend(loc=2, fontsize=args.font_size) for tick in chain( pyplot.gca().xaxis.get_major_ticks(), pyplot.gca().yaxis.get_major_ticks() ): tick.label.set_fontsize(args.font_size) if args.mode == "all" and args.output: output = get_plot_path(args.output, "old_vs_new") else: output = args.output deploy_plot("Additions vs changes", output, args.background)
def plot_ownership(args, repo, names, people, date_range, last): if args.output and args.output.endswith(".json"): data = locals().copy() del data["args"] data["type"] = "ownership" if args.mode == "all" and args.output: output = get_plot_path(args.output, "people") else: output = args.output with open(output, "w") as fout: json.dump(data, fout, sort_keys=True, default=default_json) return matplotlib, pyplot = import_pyplot(args.backend, args.style) polys = pyplot.stackplot(date_range, people, labels=names) if names[-1] == "others": polys[-1].set_hatch("/") pyplot.xlim(parse_date(args.start_date, date_range[0]), parse_date(args.end_date, last)) if args.relative: for i in range(people.shape[1]): people[:, i] /= people[:, i].sum() pyplot.ylim(0, 1) legend_loc = 3 else: legend_loc = 2 ncol = 1 if len(names) < 15 else 2 legend = pyplot.legend(loc=legend_loc, fontsize=args.font_size, ncol=ncol) apply_plot_style(pyplot.gcf(), pyplot.gca(), legend, args.background, args.font_size, args.size) if args.mode == "all" and args.output: output = get_plot_path(args.output, "people") else: output = args.output deploy_plot("%s code ownership through time" % repo, output, args.background)
def plot_burndown( args: Namespace, target: str, name: str, matrix: numpy.ndarray, date_range_sampling: 'DatetimeIndex', labels: List[int], granularity: int, sampling: int, resample: str, ) -> None: if args.output and args.output.endswith(".json"): data = locals().copy() del data["args"] data["type"] = "burndown" if args.mode == "project" and target == "project": output = args.output else: if target == "project": name = "project" output = get_plot_path(args.output, name) with open(output, "w") as fout: json.dump(data, fout, sort_keys=True, default=default_json) return matplotlib, pyplot = import_pyplot(args.backend, args.style) pyplot.stackplot(date_range_sampling, matrix, labels=labels) if args.relative: for i in range(matrix.shape[1]): matrix[:, i] /= matrix[:, i].sum() pyplot.ylim(0, 1) legend_loc = 3 else: legend_loc = 2 legend = pyplot.legend(loc=legend_loc, fontsize=args.font_size) pyplot.ylabel("Lines of code") pyplot.xlabel("Time") apply_plot_style(pyplot.gcf(), pyplot.gca(), legend, args.background, args.font_size, args.size) pyplot.xlim( parse_date(args.start_date, date_range_sampling[0]), parse_date(args.end_date, date_range_sampling[-1]), ) locator = pyplot.gca().xaxis.get_major_locator() # set the optimal xticks locator if "M" not in resample: pyplot.gca().xaxis.set_major_locator(matplotlib.dates.YearLocator()) locs = pyplot.gca().get_xticks().tolist() if len(locs) >= 16: pyplot.gca().xaxis.set_major_locator(matplotlib.dates.YearLocator()) locs = pyplot.gca().get_xticks().tolist() if len(locs) >= 16: pyplot.gca().xaxis.set_major_locator(locator) if locs[0] < pyplot.xlim()[0]: del locs[0] endindex = -1 if len(locs) >= 2 and pyplot.xlim()[1] - locs[-1] > (locs[-1] - locs[-2]) / 2: locs.append(pyplot.xlim()[1]) endindex = len(locs) - 1 startindex = -1 if len(locs) >= 2 and locs[0] - pyplot.xlim()[0] > (locs[1] - locs[0]) / 2: locs.append(pyplot.xlim()[0]) startindex = len(locs) - 1 pyplot.gca().set_xticks(locs) # hacking time! labels = pyplot.gca().get_xticklabels() if startindex >= 0: labels[startindex].set_text(date_range_sampling[0].date()) labels[startindex].set_text = lambda _: None labels[startindex].set_rotation(30) labels[startindex].set_ha("right") if endindex >= 0: labels[endindex].set_text(date_range_sampling[-1].date()) labels[endindex].set_text = lambda _: None labels[endindex].set_rotation(30) labels[endindex].set_ha("right") title = "%s %d x %d (granularity %d, sampling %d)" % ( (name, ) + matrix.shape + (granularity, sampling)) output = args.output if output: if args.mode == "project" and target == "project": output = args.output else: if target == "project": name = "project" output = get_plot_path(args.output, name) deploy_plot(title, output, args.background)
def show_devs_efforts( args: Namespace, name: str, start_date: int, end_date: int, people: List[str], days: Dict[int, Dict[int, DevDay]], max_people: int, ) -> None: from scipy.signal import convolve, slepian start_date = datetime.fromtimestamp(start_date) start_date = datetime(start_date.year, start_date.month, start_date.day) end_date = datetime.fromtimestamp(end_date) end_date = datetime(end_date.year, end_date.month, end_date.day) efforts_by_dev = defaultdict(int) for day, devs in days.items(): for dev, stats in devs.items(): efforts_by_dev[dev] += stats.Added + stats.Removed + stats.Changed if len(efforts_by_dev) > max_people: chosen = { v for k, v in sorted( ((v, k) for k, v in efforts_by_dev.items()), reverse=True )[:max_people] } print("Warning: truncated people to the most active %d" % max_people) else: chosen = set(efforts_by_dev) chosen_efforts = sorted(((efforts_by_dev[k], k) for k in chosen), reverse=True) chosen_order = {k: i for i, (_, k) in enumerate(chosen_efforts)} efforts = numpy.zeros( (len(chosen) + 1, (end_date - start_date).days + 1), dtype=numpy.float32 ) for day, devs in days.items(): if day < efforts.shape[1]: for dev, stats in devs.items(): dev = chosen_order.get(dev, len(chosen_order)) efforts[dev][day] += stats.Added + stats.Removed + stats.Changed efforts_cum = numpy.cumsum(efforts, axis=1) window = slepian(10, 0.5) window /= window.sum() for e in (efforts, efforts_cum): for i in range(e.shape[0]): ending = e[i][-len(window) * 2 :].copy() e[i] = convolve(e[i], window, "same") e[i][-len(ending) :] = ending matplotlib, pyplot = import_pyplot(args.backend, args.style) plot_x = [start_date + timedelta(days=i) for i in range(efforts.shape[1])] people = [people[k] for _, k in chosen_efforts] + ["others"] for i, name in enumerate(people): if len(name) > 40: people[i] = name[:37] + "..." polys = pyplot.stackplot(plot_x, efforts_cum, labels=people) if len(polys) == max_people + 1: polys[-1].set_hatch("/") polys = pyplot.stackplot(plot_x, -efforts * efforts_cum.max() / efforts.max()) if len(polys) == max_people + 1: polys[-1].set_hatch("/") yticks = [] for tick in pyplot.gca().yaxis.iter_ticks(): if tick[1] >= 0: yticks.append(tick[1]) pyplot.gca().yaxis.set_ticks(yticks) legend = pyplot.legend(loc=2, ncol=2, fontsize=args.font_size) apply_plot_style( pyplot.gcf(), pyplot.gca(), legend, args.background, args.font_size, args.size or "16,10", ) if args.mode == "all" and args.output: output = get_plot_path(args.output, "efforts") else: output = args.output deploy_plot("Efforts through time (changed lines of code)", output, args.background)
def show_devs( args: Namespace, name: str, start_date: int, end_date: int, people: List[str], days: Dict[int, Dict[int, DevDay]], max_people: int = 50, ) -> None: from scipy.signal import convolve, slepian if len(people) > max_people: print("Picking top %s developers by commit count" % max_people) # pick top N developers by commit count commits = defaultdict(int) for devs in days.values(): for dev, stats in devs.items(): commits[dev] += stats.Commits commits = sorted(((v, k) for k, v in commits.items()), reverse=True) chosen_people = {people[k] for _, k in commits[:max_people]} else: chosen_people = set(people) dists, devseries, devstats, route = order_commits(chosen_people, days, people) route_map = {v: i for i, v in enumerate(route)} # determine clusters clusters = hdbscan_cluster_routed_series(dists, route) keys = list(devseries.keys()) route = [keys[node] for node in route] print("Plotting") # smooth time series start_date = datetime.fromtimestamp(start_date) start_date = datetime(start_date.year, start_date.month, start_date.day) end_date = datetime.fromtimestamp(end_date) end_date = datetime(end_date.year, end_date.month, end_date.day) size = (end_date - start_date).days + 1 plot_x = [start_date + timedelta(days=i) for i in range(size)] resolution = 64 window = slepian(size // resolution, 0.5) final = numpy.zeros((len(devseries), size), dtype=numpy.float32) for i, s in enumerate(devseries.values()): arr = numpy.array(s).transpose() full_history = numpy.zeros(size, dtype=numpy.float32) mask = arr[0] < size full_history[arr[0][mask]] = arr[1][mask] final[route_map[i]] = convolve(full_history, window, "same") matplotlib, pyplot = import_pyplot(args.backend, args.style) pyplot.rcParams["figure.figsize"] = (32, 16) pyplot.rcParams["font.size"] = args.font_size prop_cycle = pyplot.rcParams["axes.prop_cycle"] colors = prop_cycle.by_key()["color"] fig, axes = pyplot.subplots(final.shape[0], 1) backgrounds = ( ("#C4FFDB", "#FFD0CD") if args.background == "white" else ("#05401C", "#40110E") ) max_cluster = numpy.max(clusters) for ax, series, cluster, dev_i in zip(axes, final, clusters, route): if cluster >= 0: color = colors[cluster % len(colors)] i = 1 while color == "#777777": color = colors[(max_cluster + i) % len(colors)] i += 1 else: # outlier color = "#777777" ax.fill_between(plot_x, series, color=color) ax.set_axis_off() author = people[dev_i] ax.text( 0.03, 0.5, author[:36] + (author[36:] and "..."), horizontalalignment="right", verticalalignment="center", transform=ax.transAxes, fontsize=args.font_size, color="black" if args.background == "white" else "white", ) ds = devstats[dev_i] stats = "%5d %8s %8s" % ( ds[0], _format_number(ds[1] - ds[2]), _format_number(ds[3]), ) ax.text( 0.97, 0.5, stats, horizontalalignment="left", verticalalignment="center", transform=ax.transAxes, fontsize=args.font_size, family="monospace", backgroundcolor=backgrounds[ds[1] <= ds[2]], color="black" if args.background == "white" else "white", ) axes[0].text( 0.97, 1.75, " cmts delta changed", horizontalalignment="left", verticalalignment="center", transform=axes[0].transAxes, fontsize=args.font_size, family="monospace", color="black" if args.background == "white" else "white", ) axes[-1].set_axis_on() target_num_labels = 12 num_months = ( (end_date.year - start_date.year) * 12 + end_date.month - start_date.month ) interval = int(numpy.ceil(num_months / target_num_labels)) if interval >= 8: interval = int(numpy.ceil(num_months / (12 * target_num_labels))) axes[-1].xaxis.set_major_locator( matplotlib.dates.YearLocator(base=max(1, interval // 12)) ) axes[-1].xaxis.set_major_formatter(matplotlib.dates.DateFormatter("%Y")) else: axes[-1].xaxis.set_major_locator( matplotlib.dates.MonthLocator(interval=interval) ) axes[-1].xaxis.set_major_formatter(matplotlib.dates.DateFormatter("%Y-%m")) for tick in axes[-1].xaxis.get_major_ticks(): tick.label.set_fontsize(args.font_size) axes[-1].spines["left"].set_visible(False) axes[-1].spines["right"].set_visible(False) axes[-1].spines["top"].set_visible(False) axes[-1].get_yaxis().set_visible(False) axes[-1].set_facecolor((1.0,) * 3 + (0.0,)) title = ("%s commits" % name) if not args.output else "" if args.mode == "all" and args.output: output = get_plot_path(args.output, "time_series") else: output = args.output deploy_plot(title, output, args.background)
def show_sentiment_stats(args, name, resample, start_date, data): from scipy.signal import convolve, slepian matplotlib, pyplot = import_pyplot(args.backend, args.style) start_date = datetime.fromtimestamp(start_date) data = sorted(data.items()) mood = numpy.zeros(data[-1][0] + 1, dtype=numpy.float32) timeline = numpy.array( [start_date + timedelta(days=i) for i in range(mood.shape[0])] ) for d, val in data: mood[d] = (0.5 - val.Value) * 2 resolution = 32 window = slepian(len(timeline) // resolution, 0.5) window /= window.sum() mood_smooth = convolve(mood, window, "same") pos = mood_smooth.copy() pos[pos < 0] = 0 neg = mood_smooth.copy() neg[neg >= 0] = 0 resolution = 4 window = numpy.ones(len(timeline) // resolution) window /= window.sum() avg = convolve(mood, window, "same") pyplot.fill_between(timeline, pos, color="#8DB843", label="Positive") pyplot.fill_between(timeline, neg, color="#E14C35", label="Negative") pyplot.plot(timeline, avg, color="grey", label="Average", linewidth=5) legend = pyplot.legend(loc=1, fontsize=args.font_size) pyplot.ylabel("Comment sentiment") pyplot.xlabel("Time") apply_plot_style( pyplot.gcf(), pyplot.gca(), legend, args.background, args.font_size, args.size ) pyplot.xlim( parse_date(args.start_date, timeline[0]), parse_date(args.end_date, timeline[-1]), ) locator = pyplot.gca().xaxis.get_major_locator() # set the optimal xticks locator if "M" not in resample: pyplot.gca().xaxis.set_major_locator(matplotlib.dates.YearLocator()) locs = pyplot.gca().get_xticks().tolist() if len(locs) >= 16: pyplot.gca().xaxis.set_major_locator(matplotlib.dates.YearLocator()) locs = pyplot.gca().get_xticks().tolist() if len(locs) >= 16: pyplot.gca().xaxis.set_major_locator(locator) if locs[0] < pyplot.xlim()[0]: del locs[0] endindex = -1 if len(locs) >= 2 and pyplot.xlim()[1] - locs[-1] > (locs[-1] - locs[-2]) / 2: locs.append(pyplot.xlim()[1]) endindex = len(locs) - 1 startindex = -1 if len(locs) >= 2 and locs[0] - pyplot.xlim()[0] > (locs[1] - locs[0]) / 2: locs.append(pyplot.xlim()[0]) startindex = len(locs) - 1 pyplot.gca().set_xticks(locs) # hacking time! labels = pyplot.gca().get_xticklabels() if startindex >= 0: labels[startindex].set_text(timeline[0].date()) labels[startindex].set_text = lambda _: None labels[startindex].set_rotation(30) labels[startindex].set_ha("right") if endindex >= 0: labels[endindex].set_text(timeline[-1].date()) labels[endindex].set_text = lambda _: None labels[endindex].set_rotation(30) labels[endindex].set_ha("right") overall_pos = sum(2 * (0.5 - d[1].Value) for d in data if d[1].Value < 0.5) overall_neg = sum(2 * (d[1].Value - 0.5) for d in data if d[1].Value > 0.5) title = "%s sentiment +%.1f -%.1f δ=%.1f" % ( name, overall_pos, overall_neg, overall_pos - overall_neg, ) if args.mode == "all" and args.output: output = get_plot_path(args.output, "sentiment") else: output = args.output deploy_plot(title, output, args.background)