Example #1
0
def add_company_column_for_issues(org, repo):
    users = c.get_issue_authors_with_company(org, repo)
    issues = c.get_issues(org, repo)
    issues_with_company = merge_issues_with_company_column(issues, users)
    issues_with_company.to_csv(org + "_" + repo + "_" + c.issue_file_suffix +
                               "with_employer",
                               sep='\t')
def calculate_issue_processing_time(org, repo):
    issues = c.get_issues(org, repo)
    for based_on_devstats_data in True, False:
        issues_w_processing_time = p.calculate_issue_time_difference(
            org, repo, issues, "created_at", "closed_at",
            based_on_devstats_data)
        issues_w_processing_time.rename(
            columns={'time_difference': 'processing_time'}, inplace=True)
        issues_w_processing_time.to_csv(
            org + "_" + repo + "_" + c.issue_file_suffix +
            "_with_processing_time_based_on_devstats_" +
            str(based_on_devstats_data),
            sep='\t')
def print_logistic_regression_for_pr_acceptance_rate(
        org, repo, based_on_devstats_data=False):
    users = p.get_users(org, repo, based_on_devstats_data)
    issues = c.get_issues(org, repo)
    pulls = c.get_pulls(org, repo)
    pulls = p.merge_pulls_with_issue_priority_and_kind(pulls, issues)
    pulls = _merge_pulls_with_company_column(pulls, users,
                                             based_on_devstats_data)
    pulls = p.add_dummy_column_for_pr_merge_state(pulls)
    pulls = _add_controlling_variables(pulls)
    pulls = _prepare_independent_company_variable(pulls,
                                                  based_on_devstats_data)

    result = sm.logit(formula=_ols_formula("pr_is_merged",
                                           based_on_devstats_data),
                      data=pulls).fit()
    _print_company_representation_in_data(pulls)
    _print_and_save_result(result)
def calculate_issue_reponse_time(org, repo):
    issues_w_comments = c.get_issues_with_comments(org, repo)

    if issues_w_comments.empty:
        issues = c.get_issues(org, repo)
        issue_comments = c.get_issue_comments(org, repo)
        issue_comments = p.extract_first_comment_per_issue(issue_comments)
        issues_w_comments = p.merge_issues_with_issue_comments(
            issues, issue_comments)

    for based_on_devstats_data in True, False:
        issues_w_response_time_df = p.calculate_issue_time_difference(
            org, repo, issues_w_comments, "created_at", "commented_at",
            based_on_devstats_data)
        issues_w_response_time_df.rename(
            columns={'time_difference': 'response_time'}, inplace=True)
        issues_w_response_time_df.to_csv(
            org + "_" + repo + "_" + c.issue_file_suffix +
            "_with_response_time_based_on_devstats_" +
            str(based_on_devstats_data),
            sep='\t')
def calculate_avg_issue_response_time_by_company(org, repo):
    issues = c.get_issues(org, repo)
    issue_comments = c.get_issue_comments(org, repo)
    issues_w_comments = p.merge_issues_with_issue_comments(
        issues, issue_comments)

    companies = c.get_companies(org, repo)
    for employer in companies.keys():
        companies[employer]["response_time"] = timedelta(0)
        companies[employer]["issue_count"] = 0

    time_format = "%Y-%m-%d %H:%M:%S"
    for _, issue in issues_w_comments.iterrows():
        employer = p.get_employer(
            issue.user_login_x, org,
            repo)  # TODO make more generic / change merge behavior
        open_issue = type(issue.created_at_y) is float and math.isnan(
            issue.created_at_y)
        if employer is None or open_issue:
            continue
        companies[employer]["response_time"] = companies[employer][
            "response_time"] + p.determine_processing_time(
                issue.created_at_y, issue.created_at_x,
                time_format)  # TODO make more generic / change merge behavior
        companies[employer]["issue_count"] += 1

    for employer in companies.keys():
        companies[employer]["avg_response_time"] = companies[employer][
            "response_time"].total_seconds() / companies[employer][
                "issue_count"] if companies[employer]["issue_count"] else 0
        print(
            str(employer) + " - avg_response_time: " +
            str(companies[employer]["avg_response_time"]))
        print(
            str(employer) + " - issue_count: " +
            str(companies[employer]["issue_count"]))

    return companies
def calculate_avg_issue_processing_time_by_company(org, repo):
    # note that PRs are included here as they are a special type of an issue
    time_format = "%Y-%m-%d %H:%M:%S"
    companies = c.get_companies(org, repo)
    for employer in companies.keys():
        companies[employer]["processing_time"] = timedelta(0)
        companies[employer]["issue_count"] = 0

    issues = c.get_issues(org, repo)

    print("Start iterating over issues...")
    for _, issue in issues.iterrows():
        employer = p.get_employer(issue.user_login, org, repo)
        still_open = type(issue.closed_at) is float and math.isnan(
            issue.closed_at)
        if employer is None or still_open:
            continue

        print(issue.title)
        companies[employer]["processing_time"] = companies[employer][
            "processing_time"] + p.determine_processing_time(
                issue.created_at, issue.closed_at, time_format)
        companies[employer]["issue_count"] += 1

    for employer in companies.keys():
        companies[employer]["avg_processing_time"] = companies[employer][
            "processing_time"].total_seconds() / companies[employer][
                "issue_count"] if companies[employer]["issue_count"] else 0
        print(
            str(employer) + " - avg_processing_time: " +
            str(companies[employer]["avg_processing_time"]))
        print(
            str(employer) + " - issue_count: " +
            str(companies[employer]["issue_count"]))

    return companies
Example #7
0
def determine_company_share_of_issues_based_on_devstats_data(org, repo):
    issues = c.get_issues(org, repo)
    users = get_formatted_devstats_user()
    issues_with_company = merge_issues_with_company_column(issues, users)
    return Counter(issues_with_company.company.values)