Example #1
0
def blasphemy_ccp_cochange_by_dev_num_group(repo_file_quality_per_year,
                                            repo_file_blasphemy_per_year):
    """
        Note that repo_file_quality_per_year uses bug hit ratio and not ccp.
        For change analysis it doesn't matter.
    :param repo_file_quality_per_year:
    :return:
    """
    key = 'repo_name'
    control_variables = ['dev_num_group']
    repo_file_quality_per_year_df = build_repo_per_year_df(
        repo_file_quality_per_year,
        key=key,
        control_variables=control_variables)
    repo_file_blasphemy_per_year_df = build_repo_per_year_df(
        repo_file_blasphemy_per_year,
        key=key,
        control_variables=control_variables)
    per_year_df = pd.merge(repo_file_quality_per_year_df,
                           repo_file_blasphemy_per_year_df,
                           on=[key, 'year'] + control_variables)

    cochange_analysis_by_value(per_year_df,
                               first_metric='corrective_commits_ratio',
                               second_metric='blasphemy_hit_rates',
                               first_the_higher_the_better=False,
                               second_the_higher_the_better=False,
                               first_sig_threshold=0.1,
                               second_sig_threshold=0.01,
                               fixed_variable='dev_num_group',
                               fixed_values=['small', 'medium', 'large'],
                               key=key,
                               control_variables=control_variables)
Example #2
0
def speed_ccp_cochange_by_var(commits_per_user_file, fixed_variable,
                              fixed_values):
    """
        Note that repo_file_quality_per_year uses bug hit ratio and not ccp.
        For change analysis it doesn't matter.
    :param repo_file_quality_per_year:
    :return:
    """
    key = 'repo_name'
    control_variables = [fixed_variable]

    trep = get_valid_repos()
    users_per_project = pd.read_csv(commits_per_user_file)
    users_per_project = users_per_project[
        users_per_project.year > EARLIEST_ANALYZED_YEAR]
    per_year_df = pd.merge(users_per_project, trep, on='repo_name')

    cochange_analysis_by_value(per_year_df,
                               first_metric='corrective_commits_ratio',
                               second_metric='commits_per_above11_users',
                               first_the_higher_the_better=False,
                               second_the_higher_the_better=True,
                               first_sig_threshold=0.1,
                               second_sig_threshold=10,
                               fixed_variable=fixed_variable,
                               fixed_values=fixed_values,
                               key=key,
                               control_variables=control_variables)
def churn_ccp_cochange_by_age(repo_file_quality_per_year,
                              repo_file_churn_per_year):
    """
        Note that repo_file_quality_per_year uses bug hit ratio and not ccp.
        For change analysis it doesn't matter.
    :param repo_file_quality_per_year:
    :return:
    """
    key = 'repo_name'
    control_variables = ['age_group']
    repo_file_quality_per_year_df = build_repo_per_year_df(
        repo_file_quality_per_year,
        key=key,
        control_variables=control_variables)
    repo_file_churn_per_year_df = build_repo_per_year_df(
        repo_file_churn_per_year, key=key, control_variables=control_variables)
    repo_file_churn_per_year_df = repo_file_churn_per_year_df[(
        repo_file_churn_per_year_df.base_year_developers > 9)]
    per_year_df = pd.merge(repo_file_quality_per_year_df,
                           repo_file_churn_per_year_df,
                           on=[key, 'year'] + control_variables)
    per_year_df['ccp'] = per_year_df.corrective_commits_ratio.map(
        lambda x: ccp_estimator.estimate_positives(x))

    cochange_analysis_by_value(per_year_df,
                               first_metric='ccp',
                               second_metric='continuing_developers_ratio',
                               first_the_higher_the_better=False,
                               second_the_higher_the_better=True,
                               first_sig_threshold=0.1,
                               second_sig_threshold=0.1,
                               fixed_variable='age_group',
                               fixed_values=['old', 'medium', 'young'],
                               key=key,
                               control_variables=control_variables)
Example #4
0
def onboarding_ccp_cochange_by_lang(repo_file_quality_per_year,
                                    repo_file_onboarding_per_year):
    """
        Note that repo_file_quality_per_year uses bug hit ratio and not ccp.
        For change analysis it doesn't matter.
    :param repo_file_quality_per_year:
    :return:
    """
    key = 'repo_name'
    fixed_variable = 'language'
    control_variables = [fixed_variable]

    repo_file_quality_per_year_df = build_repo_per_year_df(
        repo_file_quality_per_year,
        key=key,
        control_variables=control_variables)
    repo_file_churn_per_year_df = build_repo_per_year_df(
        repo_file_onboarding_per_year,
        key=key,
        control_variables=control_variables)
    repo_file_churn_per_year_df = repo_file_churn_per_year_df[
        (repo_file_churn_per_year_df.comming_developers > 9)
        & (repo_file_churn_per_year_df.language.isin(lang_name))]
    per_year_df = pd.merge(repo_file_quality_per_year_df,
                           repo_file_churn_per_year_df,
                           on=[key, 'year'] + control_variables)
    per_year_df['ccp'] = per_year_df.corrective_commits_ratio.map(
        lambda x: ccp_estimator.estimate_positives(x))

    cochange_analysis_by_value(
        per_year_df,
        first_metric='ccp',
        second_metric='comming_involved_developers_ratio',
        first_the_higher_the_better=False,
        second_the_higher_the_better=True,
        first_sig_threshold=0.1,
        second_sig_threshold=0.1,
        fixed_variable=fixed_variable,
        fixed_values=lang_name,
        key=key,
        control_variables=control_variables)
Example #5
0
def blasphemy_ccp_cochange_by_lang(repo_file_quality_per_year,
                                   repo_file_blasphemy_per_year):
    """
        Note that repo_file_quality_per_year uses bug hit ratio and not ccp.
        For change analysis it doesn't matter.
    :param repo_file_quality_per_year:
    :return:
    """
    key = 'repo_name'
    control_variables = ['language']
    #import pdb; pdb.set_trace()
    repo_file_quality_per_year_df = build_repo_per_year_df(
        repo_file_quality_per_year,
        key=key,
        control_variables=control_variables)
    repo_file_blasphemy_per_year_df = build_repo_per_year_df(
        repo_file_blasphemy_per_year,
        key=key,
        control_variables=control_variables)
    repo_file_blasphemy_per_year_df = repo_file_blasphemy_per_year_df[(
        repo_file_blasphemy_per_year_df.language.isin(lang_name))]
    per_year_df = pd.merge(repo_file_quality_per_year_df,
                           repo_file_blasphemy_per_year_df,
                           on=[key, 'year'] + control_variables)

    cochange_analysis_by_value(per_year_df,
                               first_metric='corrective_commits_ratio',
                               second_metric='blasphemy_hit_rates',
                               first_the_higher_the_better=False,
                               second_the_higher_the_better=False,
                               first_sig_threshold=0.1,
                               second_sig_threshold=0.01,
                               fixed_variable='language',
                               fixed_values=lang_name,
                               key=key,
                               control_variables=control_variables)
def cochange_analysis_by_value(per_year_df
                            , metrics_dict
                            , fixed_variable
                            , fixed_values
                            , keys
                            , control_variables
                            , expected
                         ):

    actual = cochange_analysis_by_value(per_year_df
                           , metrics_dict
                           , fixed_variable
                           , fixed_values
                           , keys
                           , control_variables)

    assert actual == expected