Beispiel #1
0
def run_cba(Xtr,
            Ytr,
            Xt,
            Yt,
            lb,
            support=0.20,
            confidence=0.5,
            k=None,
            log=None):
    txns_train = TransactionDB.from_DataFrame(pd.concat([Xtr, Ytr], axis=1))
    txns_test = TransactionDB.from_DataFrame(pd.concat([Xt, Yt], axis=1))
    cba = CBA(support=support, confidence=confidence, algorithm="m1")
    cba.fit(txns_train)

    if k is not None:
        cba.clf.rules = cba.clf.rules[:k]

    Y_pred = [int(i) for i in cba.predict(txns_test)]

    for r in cba.clf.rules:
        r.covered = set(
            [i for i, rd in enumerate(txns_train) if r.antecedent <= rd])

    if log is None:
        from logger import log
    log('cba-k', len(cba.clf.rules))
    log('cba-rules', str(cba.clf.rules))
    [log('cba-nconds', len(r), i) for i, r in enumerate(cba.clf.rules)]
    log('cba-auc', roc_auc_score(lb.transform(Yt.values),
                                 lb.transform(Y_pred)))
    log('cba-bacc', balanced_accuracy_score(Yt, Y_pred))
    log('cba-disp', dispersion_(cba.clf.rules, average=True))
    log('cba-overlap', overlap(cba.clf.rules))
    print(confusion_matrix(Yt, Y_pred))
def misuse(train_df, test_df):
    print("\nMisuse Data")
    train = train_df.copy()
    test = test_df.copy()

    train = train[(train['Label'] != 'Benign') == True]

    txns_train = TransactionDB.from_DataFrame(train, target="Label")
    txns_test = TransactionDB.from_DataFrame(test)

    print("Association Rule Generation")
    cba = CBA(support=0.01, confidence=0.01)
    cba.fit(txns_train)

    predict = cba.predict(txns_test)
    test['predict'] = predict

    return test