Esempio n. 1
0
    def test_predict_probability(self):
        cba = CBA(algorithm="m2")

        test_dataframe = pd.read_csv(dataset_file, sep=",")

        transactions = TransactionDB.from_DataFrame(test_dataframe)
        transactions_test = TransactionDB.from_DataFrame(test_dataframe[:2])

        cba.fit(transactions)

        cba.predict_probability(transactions_test)
Esempio n. 2
0
    def test_predict_probability_works(self):
        cba = CBA(algorithm="m1")

        test_dataframe = pd.read_csv(dataset_file, sep=",")

        transactions = TransactionDB.from_DataFrame(test_dataframe)
        transactions_test = TransactionDB.from_DataFrame(test_dataframe[:2])

        cba.fit(transactions)

        probabilities = cba.predict_probability(transactions_test)
        matched_rules = cba.predict_matched_rules(transactions_test)

        for idx in range(len(probabilities)):
            self.assertEqual(probabilities[idx], matched_rules[idx].confidence)
Esempio n. 3
0
    def test_accuracy(self):
        expected_accuracy = 0.5

        cba = CBA(algorithm="m2")

        test_dataframe = pd.read_csv(dataset_file, sep=",")

        transactions = TransactionDB.from_DataFrame(test_dataframe)
        transactions_test = TransactionDB.from_DataFrame(test_dataframe[:2])

        cba.fit(transactions)

        accuracy = cba.rule_model_accuracy(transactions_test)

        self.assertAlmostEqual(accuracy, expected_accuracy, places=3)
Esempio n. 4
0
    def fit(self, rule_cutoff):
        dataframes = self._prepare_dataframes()

        scores = []

        for dataframe_train, dataframe_test in dataframes:
            txns_train = TransactionDB.from_DataFrame(dataframe_train)

            rules = top_rules(txns_train.string_representation, appearance=txns_train.appeardict)
            cars = createCARs(rules)[:rule_cutoff]

            quant_dataframe_train = QuantitativeDataFrame(dataframe_train)
            quant_dataframe_test = QuantitativeDataFrame(dataframe_test)

            self.classifier.fit(quant_dataframe_train, cars, debug=self.debug)

            score = None
            
            if self.score_auc:
                score = self.classifier.score_auc(quant_dataframe_test)
            else:
                score = self.classifier.score(quant_dataframe_test)


            scores.append(score)

        return scores
Esempio n. 5
0
    def fit(self,
            quant_dataframe,
            cars=None,
            rule_cutoff=30,
            lambda_array=7 * [1],
            class_name=None,
            debug=False,
            algorithm="SLS"):

        self.quant_dataframe_train = quant_dataframe

        self._prepare(quant_dataframe, class_name)

        for class_, clf_dict in self.ids_classifiers.items():
            print("training class:", class_)

            clf = clf_dict["clf"]
            quant_dataframe = clf_dict["quant_dataframe"]
            pandas_dataframe = quant_dataframe.dataframe

            txns = TransactionDB.from_DataFrame(pandas_dataframe)
            rules = top_rules(txns.string_representation,
                              appearance=txns.appeardict)
            cars = createCARs(rules)
            cars.sort(reverse=True)

            clf.fit(quant_dataframe,
                    cars[:rule_cutoff],
                    lambda_array=lambda_array,
                    debug=debug,
                    algorithm=algorithm)
Esempio n. 6
0
    def mine_frequent_itemsets(self, pandas_df, minsup):
        txns_classless = TransactionDB.from_DataFrame(pandas_df.iloc[:, :-1])

        frequent_itemsets = fim.apriori(txns_classless.string_representation,
                                        supp=minsup * 100,
                                        report="s")

        return frequent_itemsets
Esempio n. 7
0
    def test_fitting(self):
        cba = CBA()

        test_dataframe = pd.read_csv(dataset_file, sep=",")

        transactions = TransactionDB.from_DataFrame(test_dataframe)

        cba.fit(transactions)
Esempio n. 8
0
def mine_CARs(df, rule_cutoff, sample=False):
    txns = TransactionDB.from_DataFrame(df)
    rules = top_rules(txns.string_representation, appearance=txns.appeardict)
    cars = createCARs(rules)

    cars_subset = cars[:rule_cutoff]

    if sample:
        cars_subset = random.sample(cars, rule_cutoff)

    return cars_subset
Esempio n. 9
0
    def test_target_class_works(self):
        cba = CBA(algorithm="m2")

        test_dataframe = pd.read_csv(dataset_file, sep=",")

        transactions = TransactionDB.from_DataFrame(test_dataframe,
                                                    target="Gender")

        cba.fit(transactions)

        rules = cba.clf.rules

        rule0 = rules[0]

        self.assertEqual(rule0.consequent[0], "Gender")
Esempio n. 10
0
    def test_rule_class_label_works(self):
        cba = CBA(algorithm="m2")

        test_dataframe = pd.read_csv(dataset_file, sep=",")

        transactions = TransactionDB.from_DataFrame(test_dataframe)

        cba.fit(transactions)

        rules = cba.clf.rules

        rule0 = rules[0]

        self.assertEqual(rule0.consequent[0],
                         test_dataframe.columns.values[-1])
Esempio n. 11
0
    def test_inspect(self):
        cba = CBA()

        test_dataframe = pd.read_csv(dataset_file, sep=";")

        transactions = TransactionDB.from_DataFrame(test_dataframe)

        cba.fit(transactions)

        clf = cba.clf

        inspect_df = clf.inspect()

        self.assertEqual(type(inspect_df), pd.DataFrame)
        self.assertEqual(len(inspect_df), len(clf.rules) + 1)

        self.assertEqual(inspect_df["lhs"].iloc[-1], "{}")
Esempio n. 12
0
def mine_CARs(df,
              rule_cutoff,
              sample=False,
              random_seed=None,
              **top_rules_kwargs):
    if random_seed:
        random.seed(random_seed)
        np.random.seed(random_seed)

    txns = TransactionDB.from_DataFrame(df)
    rules = top_rules(txns.string_representation,
                      appearance=txns.appeardict,
                      **top_rules_kwargs)
    cars = createCARs(rules)

    cars_subset = cars[:rule_cutoff]

    if sample:
        cars_subset = random.sample(cars, rule_cutoff)

    return cars_subset

benchmark_data = []

for dataset_name, rule_cutoff in dataset_rulenum.items():
    print(dataset_name)

    train_files = get_dataset_files(train_path, dataset_name)
    test_files = get_dataset_files(test_path, dataset_name)
    for train_file, test_file in list(zip(train_files, test_files))[:]:
        dataset_path = os.path.join(train_path, train_file)
        dataset_test_path = os.path.join(test_path, test_file)

        df = pd.read_csv(dataset_path)
        quant_df = QuantitativeDataFrame(df)
        txns = TransactionDB.from_DataFrame(df)

        df_test = pd.read_csv(dataset_test_path)
        quant_df_test = QuantitativeDataFrame(df_test)
        txns_test = TransactionDB.from_DataFrame(df_test)

        def fmax(param_dict):
            print(param_dict)

            support, confidence = param_dict["support"] / 1000, param_dict[
                "confidence"] / 1000
            print(dict(support=support, confidence=confidence))

            cba = CBA(support=support, confidence=confidence)
            cba.fit(txns)
Esempio n. 14
0
import numpy as np

from pyarc.qcba.data_structures import QuantitativeDataFrame
import time

from pyids.algorithms.ids_classifier import mine_CARs
from pyids.algorithms.ids import IDS
from pyarc.data_structures import TransactionDB
from pyarc.algorithms import M1Algorithm

#logging.basicConfig(level=logging.DEBUG)

iris_file = "c:/code/python/machine_learning/assoc_rules/train/iris0.csv"

df = pd.read_csv(iris_file)
txns = TransactionDB.from_DataFrame(df)

iris_benchmark = []

for i in range(10, 110, 10):
    rule_count = i

    rules = mine_CARs(df, rule_count)

    quant_df = QuantitativeDataFrame(df)

    cars = mine_CARs(df, rule_count)
    print(len(cars))

    for algorithm in ["DLS", "SLS"]:
        times = []
Esempio n. 15
0
dataset_files = [f"{dataset_name}0.csv" for dataset_name in datasets]

dataset_path = "C:/code/python/machine_learning/assoc_rules/"
dataset_path_train = os.path.join(dataset_path, "train")
dataset_path_test = os.path.join(dataset_path, "../../../test")

benchmark_list = []

for dataset_filename in dataset_files:
    print(dataset_filename)

    df_train = pd.read_csv(os.path.join(dataset_path_train, dataset_filename))
    df_test = pd.read_csv(os.path.join(dataset_path_test, dataset_filename))

    txns_train = TransactionDB.from_DataFrame(df_train)
    txns_test = TransactionDB.from_DataFrame(df_test)

    quant_df_train = QuantitativeDataFrame(df_train)
    quant_df_test = QuantitativeDataFrame(df_test)

    cba = CBA(support=0.1, confidence=0.1)
    cba.fit(txns_train)

    rules = cba.clf.rules
    ids_ruleset = IDSRuleSet.from_cba_rules(rules)

    ids = IDS()
    ids.clf = IDSClassifier(ids_ruleset.ruleset)
    ids.clf.default_class = cba.clf.default_class