def run(self) -> Table:
        """
        Returns a metric table containing all of the metrics calculated for each technique in df
        :return: metric table with single query metrics for each technique applied to specified dataset in row
        """
        tracer = Tracer()
        metric_table = MetricTable()

        for dataset_name in DATASET_COLUMN_ORDER:
            hybrid_query_metrics: List[Metrics] = tracer.get_metrics(
                dataset_name, BEST_OVERALL_TECHNIQUE, summary_metrics=False)
            metric_table.add(
                hybrid_query_metrics,
                other={
                    DATASET_COLNAME: dataset_name,
                    TECHNIQUE_TYPE_COLNAME: HYBRID_ID,
                },
                create_index=True,
            )

            direct_query_metrics: List[Metrics] = tracer.get_metrics(
                dataset_name,
                get_best_direct_technique(dataset_name),
                summary_metrics=False,
            )
            metric_table.add(
                direct_query_metrics,
                other={
                    DATASET_COLNAME: dataset_name,
                    TECHNIQUE_TYPE_COLNAME: DIRECT_ID,
                },
                create_index=True,
            )

        individual_queries_aggregate = (metric_table.create_lag_norm_inverted(
            drop_old=True).melt_metrics(
                metric_value_col_name=METRIC_SCORE_COLNAME).sort(
                    DATASET_COLUMN_ORDER).col_values_to_upper(
                        METRIC_COLNAME).save(EXPORT_PATH))

        self.export_paths.append(PATH_TO_INDIVIDUAL_QUERIES_AGG)

        return individual_queries_aggregate
Exemple #2
0
def calculate_technique_metric_table(dataset: str) -> Table:
    """
    Creates a metric table for each technique (direct, transitive, and combined) containing identifying information
    for each technique and the default set of accuracy metrics provided by Tracer engine.
    :param dataset: the name of the dataset
    :return: MetricTable - contains default accuracy metrics for techniques
    """
    tracer = Tracer()
    metric_table = MetricTable()

    techniques = RetrievalTechniques()
    with create_loading_bar(EXPERIMENT_LOADING_MESSAGE,
                            techniques,
                            length=len(techniques)) as techniques:
        for t_name, t_entry in techniques:
            t_entry.update({NAME_COLNAME: t_name})
            t_metrics = tracer.get_metrics(dataset, t_name)
            metric_table.add(t_metrics, t_entry)

    return metric_table
Exemple #3
0
from api.tables.metric_table import MetricTable
from api.tracer import Tracer

if __name__ == "__main__":
    dataset_name = "EasyClinic"
    direct_technique = "(. (LSI NT) (0 2))"
    transitive_technique = "(x (PCA GLOBAL) ((. (LSI NT) (0 1)) (. (LSI NT) (1 2))))"
    hybrid_technique = f"(o (MAX) ({direct_technique} {transitive_technique}))"

    technique_definitions = [
        ("direct", direct_technique),
        ("transitive", transitive_technique),
        ("hybrid", hybrid_technique),
    ]

    metric_table = MetricTable()
    tracer = Tracer()

    for t_name, t_def in technique_definitions:
        t_metrics = tracer.get_metrics(dataset_name, t_def)
        metric_table.add(t_metrics, {"name": t_name})

    print(metric_table.table)
    def run(self) -> Table:
        """
        Returns a metric table containing all of the metrics calculated for each technique in df
        :return: metric table with single query metrics for each technique applied to specified dataset in row
        """
        dataset_name = prompt_for_dataset()

        """
        Find best techniques
        """
        direct_best_definition = get_best_direct_technique(dataset_name)
        transitive_best_definition = get_best_transitive_technique(dataset_name)
        combined_best_definition = get_best_hybrid_technique(dataset_name)

        """
        Calculate metrics for individual queries on dataset
        """
        tracer = Tracer()
        metric_table = MetricTable()

        direct_metrics: [Metrics] = tracer.get_metrics(
            dataset_name, direct_best_definition, summary_metrics=False
        )
        metric_table.add(
            direct_metrics, other={TECHNIQUE_TYPE_COLNAME: DIRECT_ID}, create_index=True
        )

        transitive_metrics: [Metrics] = tracer.get_metrics(
            dataset_name, transitive_best_definition, summary_metrics=False
        )
        metric_table.add(
            transitive_metrics,
            other={TECHNIQUE_TYPE_COLNAME: TRANSITIVE_ID},
            create_index=True,
        )

        combined_metrics: [Metrics] = tracer.get_metrics(
            dataset_name, combined_best_definition, summary_metrics=False
        )
        metric_table.add(
            combined_metrics,
            other={TECHNIQUE_TYPE_COLNAME: HYBRID_ID},
            create_index=True,
        )

        """
        Export individual run
        """
        export_path = os.path.join(PATH_TO_INDIVIDUAL_QUERIES, dataset_name + ".csv")
        (metric_table.sort(DATASET_COLUMN_ORDER).save(export_path))
        self.export_paths.append(export_path)

        """
        Update aggregate
        """

        individual_queries_aggregate = (
            MetricTable(
                Table.aggregate_intermediate_files(PATH_TO_INDIVIDUAL_QUERIES).table
            )
            .create_lag_norm_inverted(drop_old=True)
            .melt_metrics(metric_value_col_name=METRIC_SCORE_COLNAME)
            .sort(DATASET_COLUMN_ORDER)
            .col_values_to_upper(METRIC_COLNAME)
            .to_title_case(exclude=METRIC_COLNAME)
            .save(PATH_TO_INDIVIDUAL_QUERIES_AGG)
        )

        individual_queries_aggregate = (
            MetricTable(
                Table.aggregate_intermediate_files(PATH_TO_INDIVIDUAL_QUERIES).table
            )
            .create_lag_norm_inverted(drop_old=True)
            .sort(DATASET_COLUMN_ORDER)
            .save(PATH_TO_INDIVIDUAL_QUERIES_UNMELTED)
        )

        # aggregate_table
        self.export_paths.append(PATH_TO_INDIVIDUAL_QUERIES_AGG)

        return individual_queries_aggregate