def fmax(param_dict):
            print(param_dict)

            support, confidence = param_dict["support"] / 1000, param_dict[
                "confidence"] / 1000
            print(dict(support=support, confidence=confidence))

            cba = CBA(support=support, confidence=confidence)
            cba.fit(txns)

            cba_clf = cba.clf

            ids = IDS()
            ids_clf = IDSClassifier(
                IDSRuleSet.from_cba_rules(cba_clf.rules).ruleset)
            ids_clf.quant_dataframe_train = quant_df
            ids_clf.calculate_default_class()

            ids.clf = ids_clf

            metrics = ids.score_interpretability_metrics(quant_df_test)
            if not is_solution_interpretable(metrics):
                distance = solution_interpretability_distance(metrics)
                print(-distance)

                return -distance

            auc = ids.score_auc(quant_df_test)

            print(auc)

            return auc
예제 #2
0
def run_cba(Xtr,
            Ytr,
            Xt,
            Yt,
            lb,
            support=0.20,
            confidence=0.5,
            k=None,
            log=None):
    txns_train = TransactionDB.from_DataFrame(pd.concat([Xtr, Ytr], axis=1))
    txns_test = TransactionDB.from_DataFrame(pd.concat([Xt, Yt], axis=1))
    cba = CBA(support=support, confidence=confidence, algorithm="m1")
    cba.fit(txns_train)

    if k is not None:
        cba.clf.rules = cba.clf.rules[:k]

    Y_pred = [int(i) for i in cba.predict(txns_test)]

    for r in cba.clf.rules:
        r.covered = set(
            [i for i, rd in enumerate(txns_train) if r.antecedent <= rd])

    if log is None:
        from logger import log
    log('cba-k', len(cba.clf.rules))
    log('cba-rules', str(cba.clf.rules))
    [log('cba-nconds', len(r), i) for i, r in enumerate(cba.clf.rules)]
    log('cba-auc', roc_auc_score(lb.transform(Yt.values),
                                 lb.transform(Y_pred)))
    log('cba-bacc', balanced_accuracy_score(Yt, Y_pred))
    log('cba-disp', dispersion_(cba.clf.rules, average=True))
    log('cba-overlap', overlap(cba.clf.rules))
    print(confusion_matrix(Yt, Y_pred))
예제 #3
0
    def test_fitting(self):
        cba = CBA()

        test_dataframe = pd.read_csv(dataset_file, sep=",")

        transactions = TransactionDB.from_DataFrame(test_dataframe)

        cba.fit(transactions)
예제 #4
0
    def test_predict_probablity(self):
        header1 = ["A", "B", "Y"]
        rows1 = [[1, 1, 0], [1, 1, 0], [1, 1, 1], [0, 0, 0], [0, 0, 1],
                 [0, 0, 1]]

        transactions = TransactionDB(rows1, header1)

        cba = CBA()

        cba.fit(transactions)

        probs = cba.clf.predict_probability_all(transactions)
예제 #5
0
    def test_default_rule_correct(self):
        cba = CBA(support=0.9)
        cba_m2 = CBA(support=0.9)

        header1 = ["A", "B", "Y"]
        rows1 = [
            [1, 1, 0],
            [0, 0, 1],
        ]

        transactions = TransactionDB(rows1, header1)

        cba.fit(transactions)
        cba_m2.fit(transactions)

        default_class = cba.clf.default_class
        default_class_m2 = cba_m2.clf.default_class

        self.assertTrue(default_class in ["0", "1"])
        self.assertTrue(default_class_m2 in ["0", "1"])

        default_class_support = cba.clf.default_class_support
        default_class_confidence = cba.clf.default_class_confidence

        default_class_support_m2 = cba_m2.clf.default_class_support
        default_class_confidence_m2 = cba_m2.clf.default_class_confidence

        self.assertTrue(0 <= default_class_support <= 1)
        self.assertTrue(0 <= default_class_support_m2 <= 1)
        self.assertTrue(0 <= default_class_confidence <= 1)
        self.assertTrue(0 <= default_class_confidence_m2 <= 1)
예제 #6
0
    def test_target_class_works(self):
        cba = CBA(algorithm="m2")

        test_dataframe = pd.read_csv(dataset_file, sep=",")

        transactions = TransactionDB.from_DataFrame(test_dataframe,
                                                    target="Gender")

        cba.fit(transactions)

        rules = cba.clf.rules

        rule0 = rules[0]

        self.assertEqual(rule0.consequent[0], "Gender")
예제 #7
0
    def test_accuracy(self):
        expected_accuracy = 0.5

        cba = CBA(algorithm="m2")

        test_dataframe = pd.read_csv(dataset_file, sep=",")

        transactions = TransactionDB.from_DataFrame(test_dataframe)
        transactions_test = TransactionDB.from_DataFrame(test_dataframe[:2])

        cba.fit(transactions)

        accuracy = cba.rule_model_accuracy(transactions_test)

        self.assertAlmostEqual(accuracy, expected_accuracy, places=3)
예제 #8
0
    def test_rule_class_label_works(self):
        cba = CBA(algorithm="m2")

        test_dataframe = pd.read_csv(dataset_file, sep=",")

        transactions = TransactionDB.from_DataFrame(test_dataframe)

        cba.fit(transactions)

        rules = cba.clf.rules

        rule0 = rules[0]

        self.assertEqual(rule0.consequent[0],
                         test_dataframe.columns.values[-1])
예제 #9
0
    def test_inspect(self):
        cba = CBA()

        test_dataframe = pd.read_csv(dataset_file, sep=";")

        transactions = TransactionDB.from_DataFrame(test_dataframe)

        cba.fit(transactions)

        clf = cba.clf

        inspect_df = clf.inspect()

        self.assertEqual(type(inspect_df), pd.DataFrame)
        self.assertEqual(len(inspect_df), len(clf.rules) + 1)

        self.assertEqual(inspect_df["lhs"].iloc[-1], "{}")
예제 #10
0
def misuse(train_df, test_df):
    print("\nMisuse Data")
    train = train_df.copy()
    test = test_df.copy()

    train = train[(train['Label'] != 'Benign') == True]

    txns_train = TransactionDB.from_DataFrame(train, target="Label")
    txns_test = TransactionDB.from_DataFrame(test)

    print("Association Rule Generation")
    cba = CBA(support=0.01, confidence=0.01)
    cba.fit(txns_train)

    predict = cba.predict(txns_test)
    test['predict'] = predict

    return test
예제 #11
0
        def fmax(param_dict):
            print(param_dict)

            support, confidence = param_dict["support"] / 1000, param_dict["confidence"] / 1000
            print(dict(support=support, confidence=confidence))

            cba = CBA(support=support, confidence=confidence)
            cba.fit(txns)

            cba_clf = cba.clf

            ids = IDS()
            ids_clf = IDSClassifier(IDSRuleSet.from_cba_rules(cba_clf.rules).ruleset)
            ids_clf.quant_dataframe_train = quant_df
            ids_clf.calculate_default_class()

            ids.clf = ids_clf

            auc = ids.score_auc(quant_df_test)

            print(auc)

            return auc
def train_model(data, support, confidence, rule_length):
    print('Mining rules...')
    headers = data.columns
    data = TransactionDB.from_DataFrame(data)

    cba = CBA(support=support,
              confidence=confidence,
              maxlen=rule_length,
              algorithm="m1")
    cba.fit(data)
    accuracy = cba.rule_model_accuracy(data)

    print('Determining semantic coherence...')
    semantics.precompute_attributes(headers)

    for rule in cba.clf.rules:
        antecedent = list(rule.antecedent.itemset.keys())
        coherence = semantics.get_rule_coherence(antecedent)
        mined_rules.append((f'{rule_to_em_rule(rule)}', rule.confidence,
                            coherence, rule.consequent.value))

    print_model()
    print(f'Model accuracy: {round(accuracy * 100, 2)} %')
    print(f'Model support: {cba.support}')
예제 #13
0
    def test_predict_probability(self):
        cba = CBA(algorithm="m2")

        test_dataframe = pd.read_csv(dataset_file, sep=",")

        transactions = TransactionDB.from_DataFrame(test_dataframe)
        transactions_test = TransactionDB.from_DataFrame(test_dataframe[:2])

        cba.fit(transactions)

        cba.predict_probability(transactions_test)
예제 #14
0
    def test_parameter_validation(self):
        cba = CBA()

        self.assertRaises(Exception, CBA, maxlen=-1)

        self.assertRaises(Exception, CBA, confidence=-1)

        self.assertRaises(Exception, CBA, confidence=12)

        self.assertRaises(Exception, CBA, support=-1)

        self.assertRaises(Exception, CBA, support=12)

        self.assertRaises(Exception, cba.predict, None)

        self.assertRaises(Exception, cba.rule_model_accuracy, None)

        self.assertRaises(Exception, cba.fit, None)
예제 #15
0
    def test_predict_probability_works(self):
        cba = CBA(algorithm="m1")

        test_dataframe = pd.read_csv(dataset_file, sep=",")

        transactions = TransactionDB.from_DataFrame(test_dataframe)
        transactions_test = TransactionDB.from_DataFrame(test_dataframe[:2])

        cba.fit(transactions)

        probabilities = cba.predict_probability(transactions_test)
        matched_rules = cba.predict_matched_rules(transactions_test)

        for idx in range(len(probabilities)):
            self.assertEqual(probabilities[idx], matched_rules[idx].confidence)
                                     func_args_ranges=dict(
                                         support=(1, 999),
                                         confidence=(1, 999),
                                     ),
                                     ternary_search_precision=5,
                                     max_iterations=1,
                                     extension_precision=-1,
                                     func_args_extension=dict(support=0,
                                                              confidence=0))

        best_pars = coord_asc.fit()

        print("best_pars:", best_pars)
        support, confidence = best_pars[0] / 1000, best_pars[1] / 1000

        cba = CBA(support=support, confidence=confidence)
        cba.fit(txns)
        cba_clf = cba.clf

        ids = IDS()
        ids_clf = IDSClassifier(
            IDSRuleSet.from_cba_rules(cba_clf.rules).ruleset)
        ids_clf.quant_dataframe_train = quant_df
        ids_clf.calculate_default_class()

        ids.clf = ids_clf

        data = dict(dataset_name=dataset_name,
                    algorithm="pyARC",
                    auc=ids.score_auc(quant_df_test, order_type="cba"),
                    rule_cutoff=rule_cutoff)
예제 #17
0
import pandas as pd
from pyarc import CBA, TransactionDB
from sklearn.model_selection import train_test_split

print("")
print("Rule Generation")
data = pd.read_csv('total_data.csv')  #disc_data/Known_attack_data
print("Data Size : %s" % str(data.shape))

# 데이터 분할
train, test = train_test_split(data, test_size=0.2, random_state=123)

txns_train = TransactionDB.from_DataFrame(train, target="class")
txns_test = TransactionDB.from_DataFrame(test)

print("Association Rule Generation")
cba = CBA(support=0.1)
cba.fit(txns_train)
print(cba.fit(txns_train))

print("\nRULES : ({})".format(len(cba.clf.rules)))
for i in cba.clf.rules:
    print(i)

accuracy = cba.rule_model_accuracy(txns_test)
print("")
print(accuracy)
예제 #18
0
benchmark_list = []

for dataset_filename in dataset_files:
    print(dataset_filename)

    df_train = pd.read_csv(os.path.join(dataset_path_train, dataset_filename))
    df_test = pd.read_csv(os.path.join(dataset_path_test, dataset_filename))

    txns_train = TransactionDB.from_DataFrame(df_train)
    txns_test = TransactionDB.from_DataFrame(df_test)

    quant_df_train = QuantitativeDataFrame(df_train)
    quant_df_test = QuantitativeDataFrame(df_test)

    cba = CBA(support=0.1, confidence=0.1)
    cba.fit(txns_train)

    rules = cba.clf.rules
    ids_ruleset = IDSRuleSet.from_cba_rules(rules)

    ids = IDS()
    ids.clf = IDSClassifier(ids_ruleset.ruleset)
    ids.clf.default_class = cba.clf.default_class

    metrics_dict = ids.score_interpretability_metrics(quant_df_test)

    benchmark_dict = dict(dataset_filename=dataset_filename, algorithm="cba")

    benchmark_dict.update(metrics_dict)
    print(benchmark_dict)