def create_comparison_dict_hybrid_over_transitive() -> dict:
    """
    Creates a dictionary containing dataset names as keys and tuples as values of the technique definition
    of the best direct and hybrid techniques. This dictionary can be used to calculate the gain between
    techniques within metric tables.
    :return:
    """
    comparison_dict = {}
    for dataset in DATASET_COLUMN_ORDER:
        comparison_dict.update({
            dataset: (
                get_best_transitive_technique(dataset),
                get_best_hybrid_technique(dataset),
            )
        })
    return comparison_dict
Exemple #2
0
from api.tracer import Tracer
from experiments.evaluate_paths import change_paths_in_technique
from utilities.constants import PATH_TO_EXPLORATORY
from utilities.technique_extractors import (
    get_best_direct_technique,
    get_best_hybrid_technique,
    get_best_transitive_technique,
)

if __name__ == "__main__":
    dataset_name = "Drone"
    tracer = Tracer()

    direct_technique = get_best_direct_technique(dataset_name)
    transitive_technique = get_best_transitive_technique(dataset_name)
    hybrid_technique = get_best_hybrid_technique(dataset_name)
    new_path = ["0", "2", "1"]
    techniques = [direct_technique, transitive_technique, hybrid_technique]
    techniques = [change_paths_in_technique(t, new_path) for t in techniques]
    matrices = [
        tracer.get_technique_data(dataset_name, t).similarity_matrix
        for t in techniques
    ]
    matrices = list(map(minmax_scale, matrices))

    def get_group(percentile):
        if percentile < 1 / 3:
            return "low"
        elif percentile < 2 / 3:
            return "medium"
        else:
import numpy as np

from api.tracer import Tracer
from utilities.technique_extractors import (
    get_best_direct_technique,
    get_best_hybrid_technique,
    get_best_transitive_technique,
)

if __name__ == "__main__":
    tracer = Tracer()
    d_name = "EasyClinic"
    direct_technique = get_best_direct_technique(d_name)
    transitive_technique = get_best_transitive_technique(d_name)
    hybrid_technique = get_best_hybrid_technique(d_name)
    """
    Direct
    """
    direct_score = tracer.get_metrics(d_name, direct_technique)[0].ap
    direct_individual_metrics = tracer.get_metrics(d_name,
                                                   direct_technique,
                                                   summary_metrics=False)
    direct_scores = [m.ap for m in direct_individual_metrics]
    print(f"Direct: {direct_score}:{np.mean(direct_scores)}")
    """
    Transitive
    """
    transitive_score = tracer.get_metrics(d_name, transitive_technique)[0].ap
    transitive_individual_metrics = tracer.get_metrics("EasyClinic",
                                                       transitive_technique,
                                                       summary_metrics=False)
    def run(self) -> Table:
        """
        Returns a metric table containing all of the metrics calculated for each technique in df
        :return: metric table with single query metrics for each technique applied to specified dataset in row
        """
        dataset_name = prompt_for_dataset()

        """
        Find best techniques
        """
        direct_best_definition = get_best_direct_technique(dataset_name)
        transitive_best_definition = get_best_transitive_technique(dataset_name)
        combined_best_definition = get_best_hybrid_technique(dataset_name)

        """
        Calculate metrics for individual queries on dataset
        """
        tracer = Tracer()
        metric_table = MetricTable()

        direct_metrics: [Metrics] = tracer.get_metrics(
            dataset_name, direct_best_definition, summary_metrics=False
        )
        metric_table.add(
            direct_metrics, other={TECHNIQUE_TYPE_COLNAME: DIRECT_ID}, create_index=True
        )

        transitive_metrics: [Metrics] = tracer.get_metrics(
            dataset_name, transitive_best_definition, summary_metrics=False
        )
        metric_table.add(
            transitive_metrics,
            other={TECHNIQUE_TYPE_COLNAME: TRANSITIVE_ID},
            create_index=True,
        )

        combined_metrics: [Metrics] = tracer.get_metrics(
            dataset_name, combined_best_definition, summary_metrics=False
        )
        metric_table.add(
            combined_metrics,
            other={TECHNIQUE_TYPE_COLNAME: HYBRID_ID},
            create_index=True,
        )

        """
        Export individual run
        """
        export_path = os.path.join(PATH_TO_INDIVIDUAL_QUERIES, dataset_name + ".csv")
        (metric_table.sort(DATASET_COLUMN_ORDER).save(export_path))
        self.export_paths.append(export_path)

        """
        Update aggregate
        """

        individual_queries_aggregate = (
            MetricTable(
                Table.aggregate_intermediate_files(PATH_TO_INDIVIDUAL_QUERIES).table
            )
            .create_lag_norm_inverted(drop_old=True)
            .melt_metrics(metric_value_col_name=METRIC_SCORE_COLNAME)
            .sort(DATASET_COLUMN_ORDER)
            .col_values_to_upper(METRIC_COLNAME)
            .to_title_case(exclude=METRIC_COLNAME)
            .save(PATH_TO_INDIVIDUAL_QUERIES_AGG)
        )

        individual_queries_aggregate = (
            MetricTable(
                Table.aggregate_intermediate_files(PATH_TO_INDIVIDUAL_QUERIES).table
            )
            .create_lag_norm_inverted(drop_old=True)
            .sort(DATASET_COLUMN_ORDER)
            .save(PATH_TO_INDIVIDUAL_QUERIES_UNMELTED)
        )

        # aggregate_table
        self.export_paths.append(PATH_TO_INDIVIDUAL_QUERIES_AGG)

        return individual_queries_aggregate
Exemple #5
0
    def run(self) -> Table:
        tracer = Tracer()

        def get_metrics(d_name, t_def: str):
            return tracer.get_metrics(d_name, t_def)

        def add_metrics(d_name, t_def: str, t_type: str, p_name: str):
            t_metrics = get_metrics(d_name, t_def)
            metric_table.add(
                t_metrics,
                {
                    DATASET_COLNAME: d_name,
                    "path": p_name,
                    "type": t_type,
                    NAME_COLNAME: t_def,
                },
            )

        aggregate_gain = None
        aggregate_metric = None
        for path in POSSIBLE_PATHS:
            metric_table = MetricTable()
            comparison_dict = {}
            path_name = path_to_str(path)

            for dataset_name in DATASET_COLUMN_ORDER:
                source_index = str(path[0])
                intermediate_index = str(path[1])
                target_index = str(path[2])

                new_path = [source_index, intermediate_index, target_index]

                # direct
                direct_technique_def = change_paths_in_technique(
                    get_best_direct_technique(dataset_name), new_path)
                add_metrics(
                    dataset_name,
                    direct_technique_def,
                    DIRECT_ID,
                    path_name,
                )

                # transitive
                transitive_technique_def = change_paths_in_technique(
                    get_best_transitive_technique(dataset_name), new_path)
                add_metrics(
                    dataset_name,
                    transitive_technique_def,
                    TRANSITIVE_ID,
                    path_name,
                )

                # HYBRID
                hybrid_technique_definition = change_paths_in_technique(
                    get_best_hybrid_technique(dataset_name), new_path)
                add_metrics(
                    dataset_name,
                    hybrid_technique_definition,
                    HYBRID_ID,
                    path_name,
                )
                comparison_dict.update({
                    dataset_name:
                    (direct_technique_def, hybrid_technique_definition)
                })
            gain_table = metric_table.calculate_gain_between_techniques(
                comparison_dict)
            gain_table.table["path"] = path_name

            aggregate_gain = (gain_table.table if aggregate_gain is None else
                              pd.concat([gain_table.table, aggregate_gain]))

            aggregate_metric = (metric_table.table
                                if aggregate_metric is None else pd.concat(
                                    [metric_table.table, aggregate_metric]))

            MetricTable(aggregate_metric).create_lag_norm_inverted(
                drop_old=True).melt_metrics().save(METRIC_TABLE_EXPORT_PATH)
            self.export_paths.append(METRIC_TABLE_EXPORT_PATH)

            MetricTable(aggregate_gain).melt_metrics().save(
                GAIN_TABLE_EXPORT_PATH)
            self.export_paths.append(GAIN_TABLE_EXPORT_PATH)
        return aggregate_gain