Esempio n. 1
0
def pictures_from_stats(args):
    # plot_pareto_fronts()

    logger = logging.getLogger(__name__)
    logger.debug("pictures from stats")

    boot_size = int(args["--bootstrap"])
    results_dir = args["--dir"]
    plots_dir = Path(args["-o"])

    results = collections.defaultdict(list)
    with log_time(process_time, logger, "Preparing data done in {time_res:.3f}"):
        for problem_name, problem_mod, algorithms in serialization.each_result(
            BudgetResultsExtractor(), results_dir
        ):
            for algo_name, budgets in algorithms:
                for result in budgets:
                    _, _, cost_data = next(result["analysis"])
                    cost_data = list(x() for x in cost_data)
                    cost_analysis = yield_analysis(cost_data, boot_size)

                    budget = cost_analysis["btstrpd"]["metrics"]
                    budget_err = cost_analysis["stdev"]

                    for metric_name, metric_name_long, data_process in result[
                        "analysis"
                    ]:
                        if metric_name in best_func:
                            if metric_name == "dst from pareto":
                                metric_name = "dst"
                            data_process = list(x() for x in data_process)

                            data_analysis = yield_analysis(data_process, boot_size)

                            score = data_analysis["btstrpd"]["metrics"]
                            score_err = data_analysis["stdev"]

                            keys = [
                                (problem_name, algo_name, metric_name, group)
                                for group in algos_groups[algo_name]
                            ]
                            value = (budget, budget_err, score, score_err)
                            print("PLOT: " + str(value))

                            for key in keys:
                                results[key].append(value)
    plot_results(results, plots_dir, (500, 4500))
Esempio n. 2
0
def pictures_summary(args):
    logger = logging.getLogger(__name__)
    logger.debug("pictures_summary")

    selected = set(args["--selected"].upper().split(","))
    boot_size = int(args["--bootstrap"])
    results_dir = args["--dir"]
    plots_dir = Path(args["-o"])

    logger.debug("Plotting summary with selected algos: " + ",".join(selected))

    scoring = collections.defaultdict(lambda: collections.defaultdict(dict))
    problems = set()

    with log_time(process_time, logger, "Preparing data done in {time_res:.3f}"):
        for problem_name, problem_mod, algorithms in serialization.each_result(
            BudgetResultsExtractor(), results_dir
        ):
            problems.add(problem_name)
            problem_score = collections.defaultdict(list)
            algos = list(algorithms)
            for algo_name, results in algos:
                max_result = find_acceptable_result_for_budget(list(results), boot_size)
                if max_result:
                    print(
                        "{}, {} , budget={}".format(
                            problem_name, algo_name, max_result["budget"]
                        )
                    )
                    for metric_name, metric_name_long, data_process in max_result[
                        "analysis"
                    ]:
                        if metric_name in ranking.best_func:
                            data_process = list(x() for x in data_process)
                            data_analysis = yield_analysis(data_process, boot_size)

                            score = math.log(
                                math.fabs(data_analysis["btstrpd"]["metrics"]) + 1.0
                            )

                            scoring[metric_name][algo_name][problem_name] = score
                            problem_score[metric_name].append((algo_name, score))
                else:
                    print("{}, {}, NO BUDGET".format(problem_name, algo_name))

            for metric_name in scoring:
                if metric_name != "pdi":

                    max_score = (
                        max(x for algo, x in problem_score[metric_name]) + 0.0001
                    )
                    for algo_name, _ in algos:
                        if (
                            algo_name in scoring[metric_name]
                            and problem_name in scoring[metric_name][algo_name]
                        ):
                            scoring[metric_name][algo_name][problem_name] /= max_score

    plot_results_summary(problems, scoring, selected, plots_dir)
Esempio n. 3
0
def rank(args):
    # plot_pareto_fronts()

    logger = logging.getLogger(__name__)
    logger.debug("ranking")

    boot_size = int(args["--bootstrap"])

    scoring = collections.defaultdict(list)

    with log_time(process_time, logger,
                  "Preparing data done in {time_res:.3f}"):
        for problem_name, problem_mod, algorithms in serialization.each_result(
                BudgetResultsExtractor(), RESULTS_DIR):
            for algo_name, results in algorithms:
                max_budget_result = find_acceptable_result_for_budget(
                    list(results), boot_size)
                if max_budget_result:
                    for (
                            metric_name,
                            metric_name_long,
                            data_process,
                    ) in max_budget_result["analysis"]:
                        if metric_name in best_func:

                            data_process = list(x() for x in data_process)
                            data_analysis = yield_analysis(
                                data_process, boot_size)

                            score = data_analysis["btstrpd"]["metrics"]
                            scoring[(problem_name, metric_name)].append(
                                (algo_name, score))

    global_scoring = collections.defaultdict(collections.Counter)

    print("Problem ranking\n################")
    for problem_name, metric_name in scoring:
        metric_scoring = scoring[(problem_name, metric_name)]
        algo_win, score = best_func[metric_name](metric_scoring,
                                                 key=lambda x: x[1])
        print("{}, {} : {}".format(problem_name, metric_name, algo_win))

        weak_winners = get_weak_winners(metric_scoring, (algo_win, score),
                                        winner_tolerance[metric_name])
        # if not weak_winners:
        #     global_scoring[metric_name].update([algo_win])
        if not weak_winners:
            global_scoring[metric_name].update([algo_win, algo_win])
        else:
            global_scoring[metric_name].update(
                [algo_win] + [algo for algo, score in weak_winners])

    print("\nGlobal ranking\n##############")
    for metric_name in global_scoring:
        print("{} : ".format(metric_name) + ", ".join(
            "{} ({})".format(score[0], score[1])
            for score in global_scoring[metric_name].most_common()))
Esempio n. 4
0
def pictures_from_stats(args, queue):
    # plot_pareto_fronts()

    logger = logging.getLogger(__name__)
    logger.debug("pictures from stats")

    boot_size = int(args['--bootstrap'])

    results = collections.defaultdict(list)
    with log_time(process_time, logger, "Preparing data done in {time_res:.3f}"):
        for problem_name, problem_mod, algorithms in RunResult.each_result(RESULTS_DIR):
            for algo_name, budgets in algorithms:
                for result in budgets:
                    _, _, cost_data = next(result["analysis"])
                    cost_data = list(x() for x in cost_data)
                    cost_analysis = yield_analysis(cost_data, boot_size)

                    budget = cost_analysis["btstrpd"]["metrics"]
                    budget_err = cost_analysis["stdev"]

                    for metric_name, metric_name_long, data_process in result["analysis"]:
                        if metric_name in best_func:
                            if metric_name == 'dst from pareto':
                                metric_name = 'dst'
                            data_process = list(x() for x in data_process)

                            data_analysis = yield_analysis(data_process, boot_size)

                            score = data_analysis["btstrpd"]["metrics"]
                            score_err = data_analysis["stdev"]

                            keys = [(problem_name, algo_name, metric_name, group) for group in
                                    algos_groups[algo_name]]
                            value = (budget, budget_err, score, score_err)

                            for key in keys:
                                results[key].append(value)
    plot_results(results)
Esempio n. 5
0
def detailed_rank(args):
    # plot_pareto_fronts()

    logger = logging.getLogger(__name__)
    logger.debug("detailed ranking")

    boot_size = int(args["--bootstrap"])

    for result_set in result_dirs:
        print("***{}***".format(result_set))
        scoring = collections.defaultdict(list)

        with log_time(process_time, logger,
                      "Preparing data done in {time_res:.3f}"):
            for problem_name, problem_mod, algorithms in serialization.each_result(
                    BudgetResultsExtractor(), result_set):
                for algo_name, results in algorithms:
                    for result in results:
                        if validate_cost(result, boot_size):
                            for metric_name, metric_name_long, data_process in result[
                                    "analysis"]:
                                if metric_name in best_func:
                                    data_process = list(x()
                                                        for x in data_process)
                                    data_analysis = yield_analysis(
                                        data_process, boot_size)

                                    score = data_analysis["btstrpd"]["metrics"]
                                    scoring[(problem_name, result["budget"],
                                             metric_name)].append(
                                                 (algo_name, score))

        global_scoring = collections.defaultdict(collections.Counter)

        for problem_name, budget, metric_name in scoring:
            algo_win, score = best_func[metric_name](scoring[(problem_name,
                                                              budget,
                                                              metric_name)],
                                                     key=lambda x: x[1])
            global_scoring[(budget, metric_name)].update([algo_win])

        for budget, metric_name in sorted(global_scoring):
            print("{} {} : ".format(budget, metric_name) +
                  ", ".join("{} ({})".format(score[0], score[1])
                            for score in global_scoring[
                                (budget, metric_name)].most_common()))
Esempio n. 6
0
def pictures_summary(args, queue):
    logger = logging.getLogger(__name__)
    logger.debug("pictures_summary")

    selected = set(args['--selected'].upper().split(','))
    boot_size = int(args['--bootstrap'])

    logger.debug('Plotting summary with selected algos: ' + ','.join(selected))

    scoring = collections.defaultdict(lambda: collections.defaultdict(dict))
    problems = set()

    with log_time(process_time, logger, "Preparing data done in {time_res:.3f}"):
        for problem_name, problem_mod, algorithms in RunResult.each_result(RESULTS_DIR):
            problems.add(problem_name)
            problem_score = collections.defaultdict(list)
            algos = list(algorithms)
            for algo_name, results in algos:
                max_result = find_acceptable_result_for_budget(list(results), boot_size)
                if max_result:
                    print('{}, {} , budget={}'.format(problem_name, algo_name, max_result['budget']))
                    for metric_name, metric_name_long, data_process in max_result["analysis"]:
                        if metric_name in ranking.best_func:
                            data_process = list(x() for x in data_process)
                            data_analysis = yield_analysis(data_process, boot_size)

                            score = math.log(math.fabs(data_analysis["btstrpd"]["metrics"]) + 1.0)

                            scoring[metric_name][algo_name][problem_name] = score
                            problem_score[metric_name].append((algo_name, score))
                else:
                    print('{}, {}, NO BUDGET'.format(problem_name, algo_name))

            for metric_name in scoring:
                if metric_name != 'pdi':

                    max_score = max(x for algo, x in problem_score[metric_name]) + 0.0001
                    for algo_name, _ in algos:
                        if algo_name in scoring[metric_name] and problem_name in scoring[metric_name][algo_name]:
                            scoring[metric_name][algo_name][problem_name] /= max_score

    plot_results_summary(problems, scoring, selected)
Esempio n. 7
0
def pictures_time(args):
    logger = logging.getLogger(__name__)
    logger.debug("pictures from stats")

    boot_size = int(args["--bootstrap"])
    results_dir = args["--dir"]
    plots_dir = Path(args["-o"])

    plot_data = collections.defaultdict(list)
    with log_time(process_time, logger, "Preparing data done in {time_res:.3f}"):
        for problem_name, problem_mod, algorithms in serialization.each_result(
            TimeResultsExtractor(), results_dir
        ):
            for algo_name, results in algorithms:
                for result in results:
                    time = result["time"]

                    for metric_name, metric_name_long, data_process in result[
                        "analysis"
                    ]:
                        if metric_name in best_func:
                            if metric_name == "dst from pareto":
                                metric_name = "dst"
                            data_process = list(x() for x in data_process)

                            data_analysis = yield_analysis(data_process, boot_size)

                            score = data_analysis["btstrpd"]["metrics"]
                            score_err = data_analysis["stdev"]

                            keys = [
                                (problem_name, algo_name, metric_name, group)
                                for group in algos_groups[algo_name]
                            ]
                            value = (time, 0, score, score_err)
                            for key in keys:
                                plot_data[key].append(value)
    max_time = max(list(plot_data.values())[0])[0]
    plot_results(plot_data, plots_dir, (0, max_time))
Esempio n. 8
0
def table_rank(args):
    logger = logging.getLogger(__name__)
    logger.debug("table ranking")

    boot_size = int(args["--bootstrap"])

    results = collections.defaultdict(
        lambda: collections.defaultdict(collections.Counter))

    for result_set in result_dirs:
        print("***{}***".format(result_set))
        with log_time(process_time, logger,
                      "Preparing data done in {time_res:.3f}"):
            for problem_name, problem_mod, algorithms in serialization.each_result(
                    BudgetResultsExtractor(), result_set):
                print(result_set, problem_name)
                scoring = collections.defaultdict(list)
                for algo_name, results_data in algorithms:
                    results_data = list(results_data)

                    for i in range(len(results_data)):
                        original_budget = results_data[i]["budget"]
                        if original_budget == 40:
                            result = results_data[i]
                            # result = find_acceptable_result_for_budget(
                            #     results_data[: i + 1], boot_size
                            # )
                            if result:
                                print("{} {} {} -> {}".format(
                                    problem_name,
                                    algo_name,
                                    original_budget,
                                    result["budget"],
                                ))
                                for metric_name, metric_name_long, data_process in result[
                                        "analysis"]:
                                    if metric_name in best_func:
                                        data_process = list(
                                            x() for x in data_process)
                                        data_analysis = yield_analysis(
                                            data_process, boot_size)

                                        score = data_analysis["btstrpd"][
                                            "metrics"]
                                        scoring[(original_budget,
                                                 metric_name)].append(
                                                     (algo_name, score))
                print("****{}****".format(problem_name))
                for budget, metric_name in sorted(scoring):
                    metric_scoring = scoring[(budget, metric_name)]
                    algo_win, score = best_func[metric_name](
                        metric_scoring, key=lambda x: x[1])
                    weak_winners = get_weak_winners(
                        metric_scoring, (algo_win, score),
                        winner_tolerance[metric_name])

                    # # Only strong
                    # if not weak_winners:
                    #     results[(budget, metric_name)][result_set].update([algo_win])

                    # Strong = 2 points, Weak or Winner = 1 point
                    if not weak_winners:
                        results[(budget, metric_name)][result_set].update(
                            [algo_win, algo_win])
                    else:
                        results[(budget, metric_name)][result_set].update(
                            [algo_win] +
                            [algo for algo, score in weak_winners])

                    # print('{} {} {}'.format(budget, metric_name, scoring[(budget, metric_name)]))
                    print("*****{} {}".format(budget, metric_name))
                    if not weak_winners:
                        print("*****Strong winner: {} :{}".format(
                            algo_win, score))
                    else:
                        print("*****Winner: {} :{}".format(algo_win, score))
                        print("*****Weak winners: {}".format(weak_winners))

    print("""\\begin{table}[ht]
  \\centering
    \\caption{Final results}
    \\label{tab:results"}
    \\resizebox{\\textwidth}{!}{%
    \\begin{tabular}{  r@{ }l | c | c | c | }
          \multicolumn{2}{c}{}
        & $K_0$
        & $K_1$
        & $K_2$
      \\\\ \\hline""")

    prevous_budget = None
    for budget, metric_name in sorted(
            sorted(results.keys(), key=lambda x: metrics_order.index(x[1])),
            key=lambda x: x[0],
    ):
        budget_label = str(budget) + " "
        if prevous_budget and prevous_budget != budget:
            print("\\hdashline")
        elif prevous_budget:
            budget_label = ""

        score_str = ""
        for result_set in result_dirs:
            results_counter = results[(budget, metric_name)][result_set]
            algo_ranking = results_counter.most_common(2)
            values = list(results_counter.values())
            if len(algo_ranking) == 2:
                winner = format_result(algo_ranking[0], values, 2)
                second = format_result(
                    algo_ranking[1],
                    values,
                    1 if algo_ranking[0][1] != algo_ranking[1][1] else 2,
                )
                score_str += "& {}, {}".format(winner, second)
            elif len(algo_ranking) == 1:
                winner = format_result(algo_ranking[0], values, 2)
                score_str += "& {}".format(winner)
            else:
                score_str += "& "
        print("{}& {} {}\\\\".format(budget_label, metric_name, score_str))
        prevous_budget = budget

    print("""    \\end{tabular}}\n\\end{table}""")
Esempio n. 9
0
def force_data(args):
    boot_size, (metric_name, metric_name_long, data_process) = args

    data_process = list(x() for x in data_process)
    force_analysis = yield_analysis(data_process, boot_size)
    return metric_name, metric_name_long, data_process, force_analysis
Esempio n. 10
0
def force_data(args):
    boot_size, (metric_name, metric_name_long, data_process) = args

    data_process = list(x() for x in data_process)
    force_analysis = yield_analysis(data_process, boot_size)
    return metric_name, metric_name_long, data_process, force_analysis