Exemple #1
0
    def test_model_fitting(self):
        df = pd.read_csv(
            "C:/code/python/interpretable_decision_sets/data/titanic.csv")
        cars = mine_CARs(df, rule_cutoff=40)

        quant_df = QuantitativeDataFrame(df)
        ids = IDS()
        ids.fit(quant_df, cars, debug=False)
        auc = ids.score_auc(quant_df)
Exemple #2
0
    def test_random_seed(self):
        replications_n = 10
        cars_to_mine = 10

        df = pd.read_csv("data/iris0.csv")
        quant_df = QuantitativeDataFrame(df)

        mined_cars_mupliple = []
        mined_cars_comparison_results = []

        for _ in range(replications_n):
            ClassAssocationRule.id = 0
            cars = mine_CARs(df, cars_to_mine)
            mined_cars_mupliple.append(cars)

        for idx in range(replications_n):
            same = _all_rules_same(mined_cars_mupliple[0],
                                   mined_cars_mupliple[idx])
            mined_cars_comparison_results.append(same)

        self.assertTrue(np.all(mined_cars_comparison_results))

        ids_models_multiple = []
        ids_comparison_results = []

        for _ in range(replications_n):
            ids = IDS()
            ids = ids.fit(quant_dataframe=quant_df,
                          class_association_rules=cars,
                          debug=False,
                          random_seed=2)
            ids_models_multiple.append(ids.clf.rules)

        for idx in range(replications_n):
            same = _all_rules_same(ids_models_multiple[0],
                                   ids_models_multiple[idx])
            ids_comparison_results.append(same)

        self.assertTrue(np.all(ids_comparison_results))
Exemple #3
0
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import random

from sklearn.metrics import accuracy_score, auc, roc_auc_score

from pyids.ids_rule import IDSRule
from pyids.ids_ruleset import IDSRuleSet
from pyids.ids_objective_function import ObjectiveFunctionParameters, IDSObjectiveFunction
from pyids.ids_optimizer import RSOptimizer, SLSOptimizer
from pyids.ids_cacher import IDSCacher
from pyids.ids_classifier import IDS, mine_CARs

from pyids.visualization import IDSVisualization

from pyarc.qcba import *

df = pd.read_csv("C:/code/python/interpretable_decision_sets/data/iris0.csv")
cars = mine_CARs(df, 20)

quant_df = QuantitativeDataFrame(df)

ids = IDS()
ids.fit(quant_df, cars, debug=False)

viz = IDSVisualization(ids, quant_df)

viz.visualize_dataframe()
Exemple #4
0
benchmark_data = []

for data_size in data_size_quantiles:
    current_cars = cars[:RULE_COUNT_TO_USE]
    current_df = df.sample(data_size)
    current_quant_df = QuantitativeDataFrame(current_df)

    times = []

    for _ in range(PY_IDS_DURATION_ITERATIONS):
        time1 = time.time()
        lambda_array = [1.0] * 7  # use separate hyperparamter search routine
        ids = IDS()
        ids.fit(class_association_rules=current_cars,
                quant_dataframe=current_quant_df,
                debug=False)

        time2 = time.time()

        duration = time2 - time1
        times.append(duration)

    benchmark_data.append(dict(duration=np.mean(times), data_size=data_size))

# In[32]:

benchmark_dataframe_pyids = pd.DataFrame(benchmark_data)

benchmark_dataframe_pyids
import pandas as pd
from collections import Counter

from pyarc.qcba.data_structures import QuantitativeDataFrame

from pyids.ids_classifier import IDS, mine_CARs
df = pd.read_csv("./data/titanic.csv")

cars = mine_CARs(df, 20)

quant_dataframe = QuantitativeDataFrame(df)

ids = IDS()

ids.fit(quant_dataframe, class_association_rules=cars, debug=False)

print(ids.score_auc(quant_dataframe, confidence_based=False))
print(ids.score_auc(quant_dataframe, confidence_based=True))
quant_df_train, quant_df_test = QuantitativeDataFrame(
    df_train), QuantitativeDataFrame(df_test)

lambda_array_coordinate_ascent = [18, 18, 18, 18, 18, 18, 18]
lambda_array_random_search = [
    510.6938775510204, 694.1836734693878, 918.4489795918367,
    286.42857142857144, 490.30612244897964, 694.1836734693878,
    62.163265306122454
]

ids_ascent = IDS()
ids_random = IDS()
ids_basic = IDS()

ids_ascent.fit(quant_df_train,
               cars,
               debug=False,
               lambda_array=lambda_array_coordinate_ascent)
ids_random.fit(quant_df_train,
               cars,
               debug=False,
               lambda_array=lambda_array_random_search)
ids_basic.fit(quant_df_train, cars, debug=False, lambda_array=7 * [1])

ascent_metrics = calculate_ruleset_statistics(IDSRuleSet(ids_ascent.clf.rules),
                                              quant_df_test)
random_metrics = calculate_ruleset_statistics(IDSRuleSet(ids_random.clf.rules),
                                              quant_df_test)
basic_metrics = calculate_ruleset_statistics(IDSRuleSet(ids_basic.clf.rules),
                                             quant_df_test)

ascent_metrics.update({"auc": ids_ascent.score_auc(quant_df_test)})
Exemple #7
0
import pandas as pd
from pyids.ids_classifier import IDS, mine_CARs

from pyarc.qcba.data_structures import QuantitativeDataFrame


df = pd.read_csv("./data/titanic.csv")

cars = mine_CARs(df, rule_cutoff=80)

quant_dataframe = QuantitativeDataFrame(df)


ids = IDS()
ids.fit(class_association_rules=cars, quant_dataframe=quant_dataframe, debug=True)

auc = ids.score_auc(quant_dataframe)
print(auc)

import random
from collections import Counter

from pyarc.qcba import *

from pyarc.algorithms import createCARs, top_rules
from pyarc import TransactionDB

from pyids.ids_classifier import IDS, mine_IDS_ruleset, mine_CARs
from pyids.ids_cacher import IDSCacher
from pyids.ids_ruleset import IDSRuleSet
from pyids.ids_rule import IDSRule
from pyids.model_selection import encode_label, mode, RandomSearchOptimizer

df = pd.read_csv("./data/titanic.csv")
df["Died"] = df["Died"].astype(str) + "_"

cars = mine_CARs(df, 100)

quant_dataframe = QuantitativeDataFrame(df)

ids = IDS()

ids.fit(class_association_rules=cars,
        quant_dataframe=quant_dataframe,
        algorithm="DLS")

acc = ids.score_auc(quant_dataframe)

print(acc)
print(ids.clf.rules)
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import random

from sklearn.metrics import accuracy_score, auc, roc_auc_score

from pyids.ids_rule import IDSRule
from pyids.ids_ruleset import IDSRuleSet
from pyids.ids_objective_function import ObjectiveFunctionParameters, IDSObjectiveFunction
from pyids.ids_optimizer import RSOptimizer, SLSOptimizer
from pyids.ids_cacher import IDSCacher
from pyids.ids_classifier import IDS, mine_CARs

from pyids.visualization import IDSVisualization

from pyarc.qcba import *

df = pd.read_csv("C:/code/python/interpretable_decision_sets/data/iris0.csv")
cars = mine_CARs(df, 20)

quant_df = QuantitativeDataFrame(df)

ids = IDS()
ids.fit(quant_df, cars, debug=True)

viz = IDSVisualization(ids, quant_df)

viz.visualize_dataframe()
Exemple #10
0
import pandas as pd
from collections import Counter

from pyarc.qcba.data_structures import QuantitativeDataFrame

from pyids.ids_classifier import IDS, mine_CARs

df = pd.read_csv("./data/titanic.csv")

cars = mine_CARs(df, 20)

quant_dataframe = QuantitativeDataFrame(df)

ids = IDS()

ids.fit(quant_dataframe,
        class_association_rules=cars,
        debug=False,
        algorithm="SLS")

acc = ids.score(quant_dataframe)