def fmax(param_dict):
            print(param_dict)

            support, confidence = param_dict["support"] / 1000, param_dict[
                "confidence"] / 1000
            print(dict(support=support, confidence=confidence))

            cba = CBA(support=support, confidence=confidence)
            cba.fit(txns)

            cba_clf = cba.clf

            ids = IDS()
            ids_clf = IDSClassifier(
                IDSRuleSet.from_cba_rules(cba_clf.rules).ruleset)
            ids_clf.quant_dataframe_train = quant_df
            ids_clf.calculate_default_class()

            ids.clf = ids_clf

            metrics = ids.score_interpretability_metrics(quant_df_test)
            if not is_solution_interpretable(metrics):
                distance = solution_interpretability_distance(metrics)
                print(-distance)

                return -distance

            auc = ids.score_auc(quant_df_test)

            print(auc)

            return auc
            def fmax(lambda_dict):
                print(lambda_dict)
                ids = IDS(algorithm=algorithm)
                ids.fit(class_association_rules=cars,
                        quant_dataframe=quant_df,
                        lambda_array=list(lambda_dict.values()))

                auc = ids.score_auc(quant_df_test)

                print(auc)

                return auc
예제 #3
0
        def fmax(param_dict):
            print(param_dict)

            support, confidence = param_dict["support"] / 1000, param_dict["confidence"] / 1000
            print(dict(support=support, confidence=confidence))

            cba = CBA(support=support, confidence=confidence)
            cba.fit(txns)

            cba_clf = cba.clf

            ids = IDS()
            ids_clf = IDSClassifier(IDSRuleSet.from_cba_rules(cba_clf.rules).ruleset)
            ids_clf.quant_dataframe_train = quant_df
            ids_clf.calculate_default_class()

            ids.clf = ids_clf

            auc = ids.score_auc(quant_df_test)

            print(auc)

            return auc
예제 #4
0
def fmax(lambda_dict):
    print(lambda_dict)
    ids = IDS(algorithm="RUSM")
    ids.fit(class_association_rules=cars,
            quant_dataframe=quant_df,
            lambda_array=list(lambda_dict.values()))

    metrics = ids.score_interpretability_metrics(quant_df)

    if not is_solution_interpretable(metrics):
        return 0

    auc = ids.score_auc(quant_df)

    print(auc)

    return auc
예제 #5
0
    quant_df = QuantitativeDataFrame(df)

    for algorithm in ["DLS", "SLS", "DUSM", "RUSM"]:
        durations = []

        for _ in range(time_estimation_iterations):
            print(_)
            lambda_array = generate_lambda_array()

            print(f"data count: {data_count}")
            print(f"algorithm: {algorithm}")
            print(f"using lambda: {lambda_array}")

            cars = mine_CARs(df, rule_cutoff=rule_cutoff)

            ids = IDS(algorithm=algorithm)
            start = time.time()
            ids.fit(class_association_rules=cars,
                    quant_dataframe=quant_df,
                    lambda_array=lambda_array)
            duration = time.time() - start

            print(f"avg duration: {duration}")

            durations.append(duration)

        duration = np.mean(durations)

        print(f"avg duration: {duration}")

        benchmark_data.append(
                                     ternary_search_precision=5,
                                     max_iterations=1,
                                     extension_precision=-1,
                                     func_args_extension=dict(support=0,
                                                              confidence=0))

        best_pars = coord_asc.fit()

        print("best_pars:", best_pars)
        support, confidence = best_pars[0] / 1000, best_pars[1] / 1000

        cba = CBA(support=support, confidence=confidence)
        cba.fit(txns)
        cba_clf = cba.clf

        ids = IDS()
        ids_clf = IDSClassifier(
            IDSRuleSet.from_cba_rules(cba_clf.rules).ruleset)
        ids_clf.quant_dataframe_train = quant_df
        ids_clf.calculate_default_class()

        ids.clf = ids_clf

        data = dict(dataset_name=dataset_name,
                    algorithm="pyARC",
                    auc=ids.score_auc(quant_df_test, order_type="cba"),
                    rule_cutoff=rule_cutoff)

        data.update(ids.score_interpretability_metrics(quant_df_test))

        print(data)
예제 #7
0
    rule_count = i

    rules = mine_CARs(df, rule_count)

    quant_df = QuantitativeDataFrame(df)

    cars = mine_CARs(df, rule_count)
    print(len(cars))

    for algorithm in ["DLS", "SLS"]:
        times = []

        for _ in range(10):
            start_time = time.time()

            ids = IDS(algorithm=algorithm)
            ids.fit(quant_dataframe=quant_df, class_association_rules=cars)

            total_time = time.time() - start_time
            times.append(total_time)

        benchmark_data = dict(rule_count=rule_count,
                              duration=np.mean(times),
                              algorithm=algorithm)

        print(benchmark_data)

        iris_benchmark.append(benchmark_data)

    times = []
예제 #8
0
import pandas as pd
import numpy as np
from pyids.algorithms.ids import IDS
from pyids.algorithms import mine_CARs

from pyarc.qcba.data_structures import QuantitativeDataFrame

import random
import time

df = pd.read_csv("../../../data/iris0.csv")
cars = mine_CARs(df, 15, sample=False)

quant_dataframe = QuantitativeDataFrame(df)

start = time.time()
ids = IDS(algorithm="RUSM")
ids.fit(class_association_rules=cars,
        quant_dataframe=quant_dataframe,
        random_seed=None,
        lambda_array=[1, 1, 1, 1, 1, 1, 1000000000])
end = time.time()

print(end - start)

for r in ids.clf.rules:
    print(r)

auc = ids.score_auc(quant_dataframe)

print("AUC", auc)
                                         func_args_ranges=dict(l1=(1, 1000),
                                                               l2=(1, 1000),
                                                               l3=(1, 1000),
                                                               l4=(1, 1000),
                                                               l5=(1, 1000),
                                                               l6=(1, 1000),
                                                               l7=(1, 1000)),
                                         ternary_search_precision=50,
                                         max_iterations=2)

            best_lambda = coord_asc.fit()

            scores = []

            for _ in range(10):
                ids = IDS(algorithm=algorithm)
                ids.fit(quant_df, cars, lambda_array=best_lambda)
                auc = ids.score_auc(quant_df_test)
                metrics = ids.score_interpretability_metrics(quant_df_test)

                interpretable = is_solution_interpretable(metrics)

                scores.append(
                    dict(auc=auc, metrics=metrics,
                         interpretable=interpretable))

            scores.sort(key=lambda x: (x["interpretable"], x["auc"]),
                        reverse=True)

            data = dict(dataset_name=dataset_name,
                        algorithm=algorithm,
예제 #10
0
    df_train = pd.read_csv(os.path.join(dataset_path_train, dataset_filename))
    df_test = pd.read_csv(os.path.join(dataset_path_test, dataset_filename))

    txns_train = TransactionDB.from_DataFrame(df_train)
    txns_test = TransactionDB.from_DataFrame(df_test)

    quant_df_train = QuantitativeDataFrame(df_train)
    quant_df_test = QuantitativeDataFrame(df_test)

    cba = CBA(support=0.1, confidence=0.1)
    cba.fit(txns_train)

    rules = cba.clf.rules
    ids_ruleset = IDSRuleSet.from_cba_rules(rules)

    ids = IDS()
    ids.clf = IDSClassifier(ids_ruleset.ruleset)
    ids.clf.default_class = cba.clf.default_class

    metrics_dict = ids.score_interpretability_metrics(quant_df_test)

    benchmark_dict = dict(dataset_filename=dataset_filename, algorithm="cba")

    benchmark_dict.update(metrics_dict)
    print(benchmark_dict)

    benchmark_list.append(benchmark_dict)

benchmark_df = pd.DataFrame(benchmark_list)
benchmark_df.to_csv("output_data/cba_interpretability_benchmark.csv",
                    index=False)
예제 #11
0
        acc_score_dict = dict()
        acc_score_dict["file"] = train_file
        acc_score_dict["rule_count"] = rule_count
        acc_score_dict["algorithm"] = algorithm

        auc_score_dict = dict(acc_score_dict)

        df_train = pd.read_csv(os.path.join(train_directory, train_file))
        quant_df_train = QuantitativeDataFrame(df_train)

        df_test = pd.read_csv(os.path.join(test_directory, train_file))
        quant_df_test = QuantitativeDataFrame(df_test)

        cars = mine_CARs(df_train, rule_count)

        ids = IDS(algorithm=algorithm)
        ids.fit(quant_dataframe=quant_df_train, class_association_rules=cars)

        acc = ids.score(quant_df_test)

        accuracy_dict[train_file] = acc

        acc_score_dict["accuracy"] = acc
        """
        print("training multi")
        ids_multi = IDSOneVsAll(algorithm=algorithm)
        ids_multi.fit(quant_dataframe=quant_df_train, rule_cutoff=rule_count)

        auc = ids_multi.score_auc(quant_df_test)

        auc_score_dict["auc"] = auc
    train_files = get_dataset_files(train_path, dataset_name)
    test_files = get_dataset_files(test_path, dataset_name)
    for algorithm in ["SLS", "DLS", "DUSM", "RUSM"]:
        for train_file, test_file in zip(train_files, test_files):
            dataset_path = os.path.join(train_path, train_file)
            dataset_test_path = os.path.join(test_path, test_file)

            df = pd.read_csv(dataset_path)
            quant_df = QuantitativeDataFrame(df)

            df_test = pd.read_csv(dataset_test_path)
            quant_df_test = QuantitativeDataFrame(df_test)

            cars = mine_CARs(df, rule_cutoff)

            ids = IDS(algorithm=algorithm)
            ids.fit(quant_df, cars)

            auc = ids.score_auc(quant_df_test)
            metrics = ids.score_interpretability_metrics(quant_df_test)

            data = dict(dataset_name=dataset_name,
                        algorithm=algorithm,
                        auc=auc,
                        rule_cutoff=rule_cutoff)

            data.update(metrics)

            print(data)

            benchmark_data.append(data)
예제 #13
0
from pyids.algorithms.ids import IDS
from pyids.algorithms import mine_CARs
from pyarc.qcba.data_structures import QuantitativeDataFrame
from pyids.model_selection.coordinate_ascent import CoordinateAscent

lambda_dict = {
    'l1': 124.16415180612711,
    'l2': 38.896662094192955,
    'l3': 557.0996799268405,
    'l4': 638.188385916781,
    'l5': 136.48056698673983,
    'l6': 432.1760402377687,
    'l7': 452.1563786008231
}
lambda_array = [
    665.9341563786008, 271.7242798353909, 212.34156378600824,
    20.489711934156375, 648.5761316872428, 911, 560
]

df = pd.read_csv("C:/code/python/machine_learning/assoc_rules/train/iris0.csv")
quant_df = QuantitativeDataFrame(df)
quant_df_test = QuantitativeDataFrame(
    pd.read_csv("C:/code/python/machine_learning/assoc_rules/test/iris0.csv"))

cars = mine_CARs(df, 20)

ids = IDS(algorithm="DUSM")
ids.fit(quant_df, cars, lambda_array=lambda_array)

print(ids.score_auc(quant_df))