Example #1
0
def create_no_open_issues_dev(df, sys_name):
    compute_no_open_issues(df)

    fig, axs = plt.subplots(figsize=(6, 3))

    x_col = "created"
    y_col = "no_open_iss"
    df.plot.scatter(x=x_col, y=y_col, s=1, ax=axs, rot=0)

    axs.set_ylabel("Number of Open Issues")
    axs.set_xlabel("Ticket Creation Time")

    slope, intercept, xf_dates, yf = compute_lin_reg(df, x_col, y_col)
    axs.plot(xf_dates, yf, c="orange")
    fig.savefig(
        f"data/output/{sys_name[:3]}_no_open_issues.png", bbox_inches="tight"
    )

    # This code is only to generate the values used in the paper's text
    if sys_name == "cassandra":
        dt_a = "2010-01-01"
    elif sys_name == "gaffer":
        dt_a = "2016-01-01"
    dt_b = "2021-01-01"
    ya = slope * pd.to_numeric(pd.Series([pd.to_datetime(dt_a)]))[0] + intercept
    yb = slope * pd.to_numeric(pd.Series([pd.to_datetime(dt_b)]))[0] + intercept
    print(
        f"increase in open_iss/year: {slope * (10 ** 9) * 60 * 60 * 24 * 365}",
        f"no open issues {dt_a}: {ya}",
        f"no open issues {dt_b}: {yb}",
    )

    return fig, slope, intercept
Example #2
0
def compute_lead_t_dev(df, sys_name):
    df_small = filter_iqr_outliers(df, "t_lead_s")

    df_small["t_lead_h"] = df_small.t_lead_s / 60 / 60  # Convert it to hours
    fig, axs = plt.subplots(figsize=(6, 3))

    x_col = "resolved"
    y_col = "t_lead_h"
    df_small.plot.scatter(x=x_col, y=y_col, s=1, ax=axs, rot=0)

    axs.set_ylabel("$t_{lead}$ in hours")
    axs.set_xlabel("Ticket Close Time")

    slope, intercept, xf_dates, yf = compute_lin_reg(df_small, x_col, y_col)
    axs.plot(xf_dates, yf, c="orange")

    fig.savefig(f"data/output/{sys_name[:3]}_lead_t.png", bbox_inches="tight")

    # This code is only to generate the values used in the paper's text
    if sys_name == "cassandra":
        dt_a = "2010-01-01"
    elif sys_name == "gaffer":
        dt_a = "2016-01-01"
    dt_b = "2021-01-01"
    ya = slope * pd.to_numeric(pd.Series([pd.to_datetime(dt_a)]))[0] + intercept
    yb = slope * pd.to_numeric(pd.Series([pd.to_datetime(dt_b)]))[0] + intercept
    print(
        f"increase in h/year: {slope * (10 ** 9) * 60 * 60 * 24 * 365}",
        f"closing time {dt_a}: {ya}",
        f"closing time {dt_b}: {yb}",
    )

    return fig, slope
Example #3
0
def create_no_contributors_dev(sys_name):
    df = compute_no_contributors(sys_name)

    fig, axs = plt.subplots(figsize=(6, 3))

    x_col = "release_date"
    y_col = "no_contributors"
    # df.plot.scatter(x=x_col, y=y_col, s=1, ax=axs, rot=45)

    df.plot(
        x=x_col,
        y=y_col,
        ax=axs,
        linestyle=":",
        marker="o",
        rot=45,
        legend=False,
    )

    axs.set_ylabel("Number of Contributors")
    axs.set_xlabel("Release Date")

    slope, intercept, xf_dates, yf = compute_lin_reg(df, x_col, y_col)
    axs.plot(xf_dates, yf, c="orange")
    fig.savefig(
        f"data/output/{sys_name[:3]}_no_contributors.png", bbox_inches="tight"
    )

    # This code is only to generate the values used in the paper's text
    if sys_name == "cassandra":
        dt_a = "2010-01-01"
    elif sys_name == "gaffer":
        dt_a = "2016-01-01"
    dt_b = "2021-01-01"
    ya = slope * pd.to_numeric(pd.Series([pd.to_datetime(dt_a)]))[0] + intercept
    yb = slope * pd.to_numeric(pd.Series([pd.to_datetime(dt_b)]))[0] + intercept
    print(
        f"increase in contr/year: {slope * (10 ** 9) * 60 * 60 * 24 * 365}",
        f"no contributors {dt_a}: {ya}",
        f"no contributors {dt_b}: {yb}",
    )

    return fig, slope, intercept
Example #4
0
def compute_contribcompl_dev2(df, sys_name):
    fig, axs = plt.subplots(figsize=(6, 3))

    x_col = "month"
    y_col = "avg_contrib_compl"
    df.plot.scatter(x=x_col, y=y_col, s=1, ax=axs, rot=0)

    axs.set_ylabel("Avg. Contribution Complexity per Month")
    axs.set_xlabel("Resolved Date")

    slope, intercept, xf_dates, yf = compute_lin_reg(df, x_col, y_col)
    axs.plot(xf_dates, yf, c="orange")

    axs.set_yticks(list(range(1, 6)))
    axs.set_yticklabels(
        ["low", "moderate", "medium", "elevate", "high"], rotation=90
    )
    print("Increase in ? per ?", slope)
    fig.tight_layout()
    # fig.savefig(
    #     f"data/output/{sys_name[:3]}_contribcompl.png", bbox_inches="tight"
    # )
    return fig, slope
Example #5
0
def compute_contribcompl_dev(df, sys_name):
    fig, axs = plt.subplots(figsize=(6, 3))

    x_col = "resolved"
    y_col = "contrib_complexity"
    df.plot.scatter(x=x_col, y=y_col, s=1, ax=axs, rot=0)

    axs.set_ylabel("Contribution Complexity")
    axs.set_xlabel("Resolved Date")

    slope, intercept, xf_dates, yf = compute_lin_reg(df, x_col, y_col)
    axs.plot(xf_dates, yf, c="orange")

    axs.set_yticks(list(range(1, 6)))
    axs.set_yticklabels(
        ["low", "moderate", "medium", "elevate", "high"], rotation=45
    )

    # This code is only to generate the values used in the paper's text
    if sys_name == "cassandra":
        dt_a = "2010-01-01"
    elif sys_name == "gaffer":
        dt_a = "2016-01-01"
    dt_b = "2021-01-01"
    ya = slope * pd.to_numeric(pd.Series([pd.to_datetime(dt_a)]))[0] + intercept
    yb = slope * pd.to_numeric(pd.Series([pd.to_datetime(dt_b)]))[0] + intercept
    print(
        f"increase in cc/year: {slope * (10 ** 9) * 60 * 60 * 24 * 365}",
        f"cc {dt_a}: {ya}",
        f"cc {dt_b}: {yb}",
    )

    fig.tight_layout()
    fig.savefig(
        f"data/output/{sys_name[:3]}_contribcompl.png", bbox_inches="tight"
    )
    return fig, slope