def main(args):
    """
    Runs univariate regression for each of the available metrics.
    """
    warnings.filterwarnings("ignore",
                            category=RuntimeWarning,
                            module="sklearn")
    if args.split_paradigm_score:
        folder = f'{args.folder}/split-regression/univariate/'
    else:
        folder = f'{args.folder}/regression/univariate/'
    estimator = LogisticRegression(class_weight='balanced', random_state=42)
    cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
    for category in categories:
        for path, name in projects.items():
            df = get_metric_results(args.folder, path, category)
            if df is not None:
                if args.split_paradigm_score:
                    for paradigm, scores in split_paradigm_score(
                            df, args.folder, path, category):
                        univariate(scores, folder, category, name + paradigm,
                                   estimator, cv, args)
                else:
                    univariate(df, folder, category, name, estimator, cv, args)
    if args.split_paradigm_score:
        summarise_split_directory(args, 'univariate')
    else:
        summarise_directory(args, 'univariate')
Exemple #2
0
def main(args):
    """
    Calculates statistics of the available fault data.
    """
    if args.split_paradigm_score:
        folder = f'{args.folder}/split-regression/fault-statistics/'
    else:
        folder = f'{args.folder}/regression/fault-statistics/'
    for category in categories:
        statistics = pd.DataFrame(columns=[
            'name', 'rows', 'faulty_rows', 'non_faulty_rows',
            'percentage_faulty'
        ])
        for path, name in projects.items():
            df = get_metric_results(args.folder, path, category)
            if df is not None:
                if args.split_paradigm_score:
                    for paradigm, scores in split_paradigm_score(
                            df, args.folder, path, category):
                        statistics = fault_statistics(scores, statistics,
                                                      category,
                                                      name + paradigm)
                else:
                    statistics = fault_statistics(df, statistics, category,
                                                  name)
        if not statistics.empty:
            save_dataframe(statistics, folder, category, False)
Exemple #3
0
def main(args):
    """
    Runs multivariate regression on all metrics together.
    """
    warnings.filterwarnings("ignore",
                            category=ConvergenceWarning,
                            module="sklearn")
    if args.split_paradigm_score:
        folder = f'{args.folder}/split-regression/multivariate/'
    else:
        folder = f'{args.folder}/regression/multivariate/'
    estimator = LogisticRegression(class_weight='balanced', random_state=42)
    cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
    for category in categories:
        regression_results = pd.DataFrame(columns=[
            'name', 'tn', 'fp', 'fn', 'tp', 'r2', 'precision', 'recall', 'mcc'
        ])
        for path, name in projects.items():
            df = get_metric_results(args.folder, path, category)
            if df is not None:
                if args.split_paradigm_score:
                    for paradigm, scores in split_paradigm_score(
                            df, args.folder, path, category):
                        regression_results = multivatiate(
                            scores, regression_results, category,
                            name + paradigm, estimator, cv, args)
                else:
                    regression_results = multivatiate(df, regression_results,
                                                      category, name,
                                                      estimator, cv, args)
        if not regression_results.empty:
            save_dataframe(regression_results, folder, category, False)
def main(args):
    """
    Calculates statistics of the fault data per metric.
    """
    if args.split_paradigm_score:
        folder = f'{args.folder}/split-regression/fault-metric-statistics/'
    else:
        folder = f'{args.folder}/regression/fault-metric-statistics/'
    for category in categories:
        for path, name in projects.items():
            df = get_metric_results(args.folder, path, category)
            if df is not None:
                if args.split_paradigm_score:
                    for paradigm, scores in split_paradigm_score(
                            df, args.folder, path, category):
                        fault_metric_statistics(folder, scores, category,
                                                name + paradigm, args)
                else:
                    fault_metric_statistics(folder, df, category, name, args)
    if args.split_paradigm_score:
        summarise_split_directory(
            args, 'fault-metric-statistics',
            ['name', 'percentage_faulty', 'percentage_total_faults'])
    else:
        summarise_directory(
            args, 'fault-metric-statistics',
            ['name', 'percentage_faulty', 'percentage_total_faults'])
def main(args):
    """
    Runs univariate and multivariate baseline regression on a control metric.
    The control metric is the expected values with a small probability of being incorrect.
    """
    warnings.filterwarnings("ignore",
                            category=ConvergenceWarning,
                            module="sklearn")
    folder = f'{args.folder}/regression/multivariate-baseline-control/'
    estimator = LogisticRegression(class_weight='balanced', random_state=42)
    cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
    for category in categories:
        for path, name in projects.items():
            df = get_metric_results('baseline', path, category)
            if df is not None:
                multivariate_baseline_control(df, folder, category, name,
                                              estimator, cv, args)
    summarise_directory(args, 'multivariate-baseline-control')
def main(args):
    """
    For each of the avialable metrics, runs multivariate regression
    on the baseline metric set with one of the metrics added.
    """
    warnings.filterwarnings("ignore", category=ConvergenceWarning, module="sklearn")
    folder = f'{args.folder}/regression/multivariate-baseline/'
    estimator = LogisticRegression(class_weight='balanced', random_state=42)
    cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
    for category in categories:
        for path, name in projects.items():
            baseline_df = get_metric_results('baseline', path, category)
            df = get_metric_results(args.folder, path, category)
            if df is None and 'object' in category:
                base_category = 'objectResultsBriand' if 'Briand' in category else 'objectResultsLandkroon'
                df = get_metric_results(args.folder, path, base_category)
            if baseline_df is not None and df is not None:
                multivariate_baseline(baseline_df, df, folder, category, name, estimator, cv, args)
    summarise_directory(args, 'multivariate-baseline')
def main(args):
    """
    Calculates descriptive statistics of the metric values.
    """
    if args.split_paradigm_score:
        folder = f'{args.folder}/split-regression/descriptive/'
    else:
        folder = f'{args.folder}/regression/descriptive/'
    for category in categories:
        for path, name in projects.items():
            df = get_metric_results(args.folder, path, category)
            if df is not None:
                if args.split_paradigm_score:
                    for paradigm, scores in split_paradigm_score(
                            df, args.folder, path, category):
                        descriptive(scores, folder, category, name + paradigm,
                                    args)
                else:
                    descriptive(df, folder, category, name, args)
    if args.split_paradigm_score:
        summarise_split_directory(args, 'descriptive', ['name', 'mean', 'std'])
    else:
        summarise_directory(args, 'descriptive', ['name', 'mean', 'std'])