Esempio n. 1
0
    def test_model_fitting(self):
        df = pd.read_csv(
            "C:/code/python/interpretable_decision_sets/data/titanic.csv")
        cars = mine_CARs(df, rule_cutoff=40)

        quant_df = QuantitativeDataFrame(df)
        ids = IDS()
        ids.fit(quant_df, cars, debug=False)
        auc = ids.score_auc(quant_df)
Esempio n. 2
0
    def test_optimization(self):
        df = pd.read_csv("C:/code/python/interpretable_decision_sets/data/titanic.csv")
        ids_ruleset = mine_IDS_ruleset(df, rule_cutoff=40)

        quant_df = QuantitativeDataFrame(df)
        ascent = CoordinateAscentOptimizer(IDS(), maximum_consecutive_iterations=1)
        lambdas = ascent.fit(ids_ruleset, quant_df, quant_df)
Esempio n. 3
0
    def test_random_seed(self):
        replications_n = 10
        cars_to_mine = 10

        df = pd.read_csv("data/iris0.csv")
        quant_df = QuantitativeDataFrame(df)

        mined_cars_mupliple = []
        mined_cars_comparison_results = []

        for _ in range(replications_n):
            ClassAssocationRule.id = 0
            cars = mine_CARs(df, cars_to_mine)
            mined_cars_mupliple.append(cars)

        for idx in range(replications_n):
            same = _all_rules_same(mined_cars_mupliple[0],
                                   mined_cars_mupliple[idx])
            mined_cars_comparison_results.append(same)

        self.assertTrue(np.all(mined_cars_comparison_results))

        ids_models_multiple = []
        ids_comparison_results = []

        for _ in range(replications_n):
            ids = IDS()
            ids = ids.fit(quant_dataframe=quant_df,
                          class_association_rules=cars,
                          debug=False,
                          random_seed=2)
            ids_models_multiple.append(ids.clf.rules)

        for idx in range(replications_n):
            same = _all_rules_same(ids_models_multiple[0],
                                   ids_models_multiple[idx])
            ids_comparison_results.append(same)

        self.assertTrue(np.all(ids_comparison_results))
Esempio n. 4
0
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import random

from sklearn.metrics import accuracy_score, auc, roc_auc_score

from pyids.ids_rule import IDSRule
from pyids.ids_ruleset import IDSRuleSet
from pyids.ids_objective_function import ObjectiveFunctionParameters, IDSObjectiveFunction
from pyids.ids_optimizer import RSOptimizer, SLSOptimizer
from pyids.ids_cacher import IDSCacher
from pyids.ids_classifier import IDS, mine_CARs

from pyids.visualization import IDSVisualization

from pyarc.qcba import *

df = pd.read_csv("C:/code/python/interpretable_decision_sets/data/iris0.csv")
cars = mine_CARs(df, 20)

quant_df = QuantitativeDataFrame(df)

ids = IDS()
ids.fit(quant_df, cars, debug=False)

viz = IDSVisualization(ids, quant_df)

viz.visualize_dataframe()
Esempio n. 5
0
# In[31]:

benchmark_data = []

for data_size in data_size_quantiles:
    current_cars = cars[:RULE_COUNT_TO_USE]
    current_df = df.sample(data_size)
    current_quant_df = QuantitativeDataFrame(current_df)

    times = []

    for _ in range(PY_IDS_DURATION_ITERATIONS):
        time1 = time.time()
        lambda_array = [1.0] * 7  # use separate hyperparamter search routine
        ids = IDS()
        ids.fit(class_association_rules=current_cars,
                quant_dataframe=current_quant_df,
                debug=False)

        time2 = time.time()

        duration = time2 - time1
        times.append(duration)

    benchmark_data.append(dict(duration=np.mean(times), data_size=data_size))

# In[32]:

benchmark_dataframe_pyids = pd.DataFrame(benchmark_data)
Esempio n. 6
0
from pyarc.qcba import *

from pyarc.algorithms import createCARs, top_rules
from pyarc import TransactionDB


from pyids.ids_classifier import IDS, mine_IDS_ruleset
from pyids.ids_cacher import IDSCacher
from pyids.ids_ruleset import IDSRuleSet
from pyids.ids_rule import IDSRule
from pyids.model_selection import encode_label, mode, RandomSearchOptimizer

df = pd.read_csv("./data/titanic.csv")
df["Died"] = df["Died"].astype(str) + "_"


ids_ruleset = mine_IDS_ruleset(df, 40)

quant_dataframe = QuantitativeDataFrame(df)




ids = IDS()

random_search = RandomSearchOptimizer(ids, debug=False, maximum_score_estimation_iterations=3, maximum_iterations=500)
result = random_search.fit(ids_ruleset, quant_dataframe, quant_dataframe)


print(result)
import pandas as pd
from collections import Counter

from pyarc.qcba.data_structures import QuantitativeDataFrame

from pyids.ids_classifier import IDS, mine_CARs
df = pd.read_csv("./data/titanic.csv")

cars = mine_CARs(df, 20)

quant_dataframe = QuantitativeDataFrame(df)

ids = IDS()

ids.fit(quant_dataframe, class_association_rules=cars, debug=False)

print(ids.score_auc(quant_dataframe, confidence_based=False))
print(ids.score_auc(quant_dataframe, confidence_based=True))
from pyids.ids_classifier import IDS, mine_IDS_ruleset
from pyids.ids_ruleset import IDSRuleSet
from pyids.model_selection import RandomSearchOptimizer, train_test_split_pd
from pyids.rule_mining import RuleMiner


df = pd.read_csv("../../data/titanic.csv")

rm = RuleMiner()
cars = rm.mine_rules(df, minsup=0.005)

ids_ruleset = IDSRuleSet.from_cba_rules(cars)

df_train, df_test = train_test_split_pd(df, prop=0.25)
quant_df_train, quant_df_test = QuantitativeDataFrame(df_train), QuantitativeDataFrame(df_test)



random_optimizer = RandomSearchOptimizer(IDS(), maximum_score_estimation_iterations=5, maximum_iterations=500)
lambda_array = random_optimizer.fit(ids_ruleset, quant_df_train, quant_df_test)
all_params = random_optimizer.score_params_dict

print(lambda_array)

with open("results/random_search_lambda_array.txt", "w") as file:
    file.write(str(lambda_array))

with open("results/random_search_all_score_params.txt", "w") as file:
    file.write(str(random_optimizer.score_params_dict))

Esempio n. 9
0
df = pd.read_csv("../../data/titanic.csv")

cars = mine_CARs(df, rule_cutoff=40)

df_train, df_test = train_test_split_pd(df, prop=0.25)
quant_df_train, quant_df_test = QuantitativeDataFrame(
    df_train), QuantitativeDataFrame(df_test)

lambda_array_coordinate_ascent = [18, 18, 18, 18, 18, 18, 18]
lambda_array_random_search = [
    510.6938775510204, 694.1836734693878, 918.4489795918367,
    286.42857142857144, 490.30612244897964, 694.1836734693878,
    62.163265306122454
]

ids_ascent = IDS()
ids_random = IDS()
ids_basic = IDS()

ids_ascent.fit(quant_df_train,
               cars,
               debug=False,
               lambda_array=lambda_array_coordinate_ascent)
ids_random.fit(quant_df_train,
               cars,
               debug=False,
               lambda_array=lambda_array_random_search)
ids_basic.fit(quant_df_train, cars, debug=False, lambda_array=7 * [1])

ascent_metrics = calculate_ruleset_statistics(IDSRuleSet(ids_ascent.clf.rules),
                                              quant_df_test)
Esempio n. 10
0
import pandas as pd
from pyids.ids_classifier import IDS, mine_CARs

from pyarc.qcba.data_structures import QuantitativeDataFrame


df = pd.read_csv("./data/titanic.csv")

cars = mine_CARs(df, rule_cutoff=80)

quant_dataframe = QuantitativeDataFrame(df)


ids = IDS()
ids.fit(class_association_rules=cars, quant_dataframe=quant_dataframe, debug=True)

auc = ids.score_auc(quant_dataframe)
print(auc)

Esempio n. 11
0
from pyids.rule_mining import RuleMiner
import pandas as pd

from pyids.ids_classifier import IDS
from pyarc.qcba.data_structures import QuantitativeDataFrame
from pyids.ids_ruleset import IDSRuleSet

rm = RuleMiner()

df = pd.read_csv("./data/titanic.csv")

cars = rm.mine_rules(df, minsup=0.001)
print(len(cars))

quant_df = QuantitativeDataFrame(df)

ids = IDS()
ids.fit(quant_df, cars, debug=False)

metrics = ids.score_interpretable_metrics(quant_df)

print(metrics)
Esempio n. 12
0
import random
from collections import Counter

from pyarc.qcba import *

from pyarc.algorithms import createCARs, top_rules
from pyarc import TransactionDB

from pyids.ids_classifier import IDS, mine_IDS_ruleset, mine_CARs
from pyids.ids_cacher import IDSCacher
from pyids.ids_ruleset import IDSRuleSet
from pyids.ids_rule import IDSRule
from pyids.model_selection import encode_label, mode, RandomSearchOptimizer

df = pd.read_csv("./data/titanic.csv")
df["Died"] = df["Died"].astype(str) + "_"

cars = mine_CARs(df, 100)

quant_dataframe = QuantitativeDataFrame(df)

ids = IDS()

ids.fit(class_association_rules=cars,
        quant_dataframe=quant_dataframe,
        algorithm="DLS")

acc = ids.score_auc(quant_dataframe)

print(acc)
print(ids.clf.rules)
from pyids.ids_classifier import IDS, mine_IDS_ruleset
from pyids.ids_ruleset import IDSRuleSet
from pyids.model_selection import RandomSearchOptimizer, train_test_split_pd
from pyids.rule_mining import RuleMiner

df = pd.read_csv("../../data/titanic.csv")

rm = RuleMiner()
cars = rm.mine_rules(df, minsup=0.001)

ids_ruleset = IDSRuleSet.from_cba_rules(cars)

df_train, df_test = train_test_split_pd(df, prop=0.25)
quant_df_train, quant_df_test = QuantitativeDataFrame(
    df_train), QuantitativeDataFrame(df_test)

random_optimizer = RandomSearchOptimizer(IDS(),
                                         maximum_score_estimation_iterations=5,
                                         maximum_iterations=500)
lambda_array = random_optimizer.fit(ids_ruleset, quant_df_train, quant_df_test)
all_params = random_optimizer.score_params_dict

print(lambda_array)

with open("results/random_search_lambda_array.txt", "w") as file:
    file.write(str(lambda_array))

with open("results/random_search_all_score_params.txt", "w") as file:
    file.write(str(random_optimizer.score_params_dict))
Esempio n. 14
0
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import random
from collections import Counter

from pyarc.qcba import *

from pyarc.algorithms import createCARs, top_rules
from pyarc import TransactionDB

from pyids.ids_classifier import IDS, mine_IDS_ruleset
from pyids.ids_cacher import IDSCacher
from pyids.ids_ruleset import IDSRuleSet
from pyids.ids_rule import IDSRule
from pyids.model_selection import encode_label, mode, CoordinateAscentOptimizer

df = pd.read_csv("./data/titanic.csv")

ids_ruleset = mine_IDS_ruleset(df, rule_cutoff=40)

quant_dataframe = QuantitativeDataFrame(df)

coordinate_ascent = CoordinateAscentOptimizer(
    IDS(),
    debug=True,
    maximum_delta_between_iterations=200,
    maximum_score_estimation_iterations=3,
    ternary_search_precision=20)
coordinate_ascent.fit(ids_ruleset, quant_dataframe, quant_dataframe)
Esempio n. 15
0
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import random

from sklearn.metrics import accuracy_score, auc, roc_auc_score

from pyids.ids_rule import IDSRule
from pyids.ids_ruleset import IDSRuleSet
from pyids.ids_objective_function import ObjectiveFunctionParameters, IDSObjectiveFunction
from pyids.ids_optimizer import RSOptimizer, SLSOptimizer
from pyids.ids_cacher import IDSCacher
from pyids.ids_classifier import IDS, mine_CARs

from pyids.visualization import IDSVisualization

from pyarc.qcba import *

df = pd.read_csv("C:/code/python/interpretable_decision_sets/data/iris0.csv")
cars = mine_CARs(df, 20)

quant_df = QuantitativeDataFrame(df)

ids = IDS()
ids.fit(quant_df, cars, debug=True)

viz = IDSVisualization(ids, quant_df)

viz.visualize_dataframe()
Esempio n. 16
0
import pandas as pd
from collections import Counter

from pyarc.qcba.data_structures import QuantitativeDataFrame

from pyids.ids_classifier import IDS, mine_CARs

df = pd.read_csv("./data/titanic.csv")

cars = mine_CARs(df, 20)

quant_dataframe = QuantitativeDataFrame(df)

ids = IDS()

ids.fit(quant_dataframe,
        class_association_rules=cars,
        debug=False,
        algorithm="SLS")

acc = ids.score(quant_dataframe)