def MLEM2_STAT(FILENAME, iter1, iter2):

    # rule induction
    fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules/' + 'rules_' + str(
        iter1) + '-' + str(iter2) + '.pkl'
    rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile(
        fullpath_filename) else mlem2.getRulesByMLEM2(FILENAME, iter1, iter2)

    # rule save
    if not os.path.isfile(fullpath_filename):
        mlem2.savePickleRules(rules, fullpath_filename)

    # rules の数を求める
    num = len(rules)
    # 平均の長さを求める
    leng = mlem2.getMeanLength(rules)
    # 平均支持度を求める
    support = mlem2.getMeanSupport(rules)

    # ファイルにsave
    savepath = DIR_UCI + '/' + FILENAME + '/MLEM2_STAT.csv'
    with open(savepath, "a") as f:
        f.writelines(
            'MLEM2_STAT,1,{FILENAME},{iter1},{iter2},{num},{leng},{support}'.
            format(FILENAME=FILENAME,
                   iter1=iter1,
                   iter2=iter2,
                   num=num,
                   leng=leng,
                   support=support) + "\n")

    return (0)
Exemple #2
0
def MLEM2_OnlyK_Identified(FILENAME, iter1, iter2, k, p):

    # rule induction
    fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules/' + 'rules_' + str(
        iter1) + '-' + str(iter2) + '.pkl'
    rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile(
        fullpath_filename) else mlem2.getRulesByMLEM2(FILENAME, iter1, iter2)

    # rule save
    if not os.path.isfile(fullpath_filename):
        mlem2.savePickleRules(rules, fullpath_filename)

    # only-k rule filter
    fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules_onlyK/' + 'rules-' + str(
        k) + '_' + str(iter1) + '-' + str(iter2) + '.pkl'
    rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile(
        fullpath_filename) else [r for r in rules if len(r.getSupport()) >= k]

    # rule save
    if not os.path.isfile(fullpath_filename):
        mlem2.savePickleRules(rules, fullpath_filename)

    # PerIdentifiedClass を求める
    ans = mlem2.getPerIdentifiedClass(rules, p)

    # save
    savepath = DIR_UCI + '/' + FILENAME + '/Identify_MLEM2_OnlyK.csv'
    with open(savepath, "a") as f:
        f.writelines(
            'Identify_MLEM2_OnlyK,{k},{p},{FILENAME},{iter1},{iter2},{ans}'.
            format(
                FILENAME=FILENAME, k=k, p=p, iter1=iter1, iter2=iter2,
                ans=ans) + "\n")

    return (ans)
Exemple #3
0
def MLEM2_LERS(FILENAME, iter1, iter2):
    # rule induction and rule save
    fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules/' + 'rules_' + str(
        iter1) + '-' + str(iter2) + '.pkl'
    rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile(
        fullpath_filename) else mlem2.getRulesByMLEM2(FILENAME, iter1, iter2)
    if not os.path.isfile(fullpath_filename):
        mlem2.savePickleRules(rules, fullpath_filename)

    # test data setup
    decision_table_test, decision_class = getData(FILENAME,
                                                  iter1,
                                                  iter2,
                                                  T="test")
    list_judgeNominal = getJudgeNominal(decision_table_test, FILENAME)

    # predict by LERS
    predictions = LERS.predictByLERS(rules, decision_table_test,
                                     list_judgeNominal)

    # 正答率を求める
    accuracy = accuracy_score(decision_class, predictions)
    # rules の数を求める
    num = len(rules)
    # 各クラスのrulesの数を求める
    num_class = strNumClassRules(rules)
    # 平均の長さを求める
    mean_length = mlem2.getMeanLength(rules)
    # 平均支持度と平均確信度を求める
    decision_table_train, decision_class = getData(FILENAME,
                                                   iter1,
                                                   iter2,
                                                   T="train")
    list_judgeNominal = getJudgeNominal(decision_table_train, FILENAME)
    mean_support, mean_conf = LERS.getSupportConfidenceRules(
        rules, decision_table_train, list_judgeNominal)
    # AccとRecallを求める
    acc_recall = LERS.getAccurayRecall(rules, decision_table_train,
                                       list_judgeNominal)

    # ファイルにsave
    savepath = DIR_UCI + '/' + FILENAME + '/fairness/00_normal/MLEM2_LERS.csv'
    with open(savepath, "a") as f:
        f.writelines(
            'MLEM2_LERS,{FILENAME},{iter1},{iter2},{acc},{num},{num_class},{mean_length},{mean_support},{mean_conf},{acc_recall}'
            .format(FILENAME=FILENAME,
                    iter1=iter1,
                    iter2=iter2,
                    acc=accuracy,
                    num=num,
                    num_class=num_class,
                    mean_length=mean_length,
                    mean_support=mean_support,
                    mean_conf=mean_conf,
                    acc_recall=strAccRecall(rules, acc_recall)) + "\n")
    return (0)
def MLEM2_OnlyK_LERS(FILENAME, iter1, iter2, k):

    print("START iter1 iter2 k : " + str(iter1) + "," + str(iter2) + "," +
          str(k))
    # rule induction
    fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules/' + 'rules_' + str(
        iter1) + '-' + str(iter2) + '.pkl'
    rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile(
        fullpath_filename) else mlem2.getRulesByMLEM2(FILENAME, iter1, iter2)

    # rule save
    if not os.path.isfile(fullpath_filename):
        mlem2.savePickleRules(rules, fullpath_filename)

    # only-k rule filter
    fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules_onlyK/' + 'rules-' + str(
        k) + '_' + str(iter1) + '-' + str(iter2) + '.pkl'
    rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile(
        fullpath_filename) else [r for r in rules if len(r.getSupport()) >= k]

    # rule save
    if not os.path.isfile(fullpath_filename):
        mlem2.savePickleRules(rules, fullpath_filename)

    # test data setup
    filepath = DIR_UCI + '/' + FILENAME + '/' + FILENAME + '-test' + str(
        iter1) + '-' + str(iter2) + '.tsv'
    decision_table_test = mlem2.getDecisionTable(filepath)
    decision_table_test = decision_table_test.dropna()
    decision_class = decision_table_test[
        decision_table_test.columns[-1]].values.tolist()

    filepath = DIR_UCI + '/' + FILENAME + '/' + FILENAME + '.nominal'
    list_nominal = mlem2.getNominalList(filepath)
    list_judgeNominal = mlem2.getJudgeNominal(decision_table_test,
                                              list_nominal)

    # predict by LERS
    predictions = LERS.predictByLERS(rules, decision_table_test,
                                     list_judgeNominal)

    # 正答率を求める
    accuracy = accuracy_score(decision_class, predictions)

    #print('{FILENAME} : {iter1} {iter2}'.format(FILENAME=FILENAME,iter1=iter1,iter2=iter2))
    #logging.info('MLEM2_OnlyK_LERS,{k},{FILENAME},{iter1},{iter2},{acc}'.format(FILENAME=FILENAME,k=k,iter1=iter1,iter2=iter2,acc=accuracy))
    savepath = DIR_UCI + '/' + FILENAME + '/MLEM2_OnlyK_LERS.csv'
    with open(savepath, "a") as f:
        f.writelines(
            'MLEM2_OnlyK_LERS,{k},{FILENAME},{iter1},{iter2},{acc}'.format(
                FILENAME=FILENAME, k=k, iter1=iter1, iter2=iter2, acc=accuracy)
            + "\n")

    #print("END iter1 iter2 k : " + str(iter1) + "," + str(iter2) + "," + str(k))
    return (accuracy)
def MLEM2_RuleClusteringByConsistentExceptMRule_STAT(FILENAME, iter1, iter2, k,
                                                     m):
    # rule induction
    fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules/' + 'rules_' + str(
        iter1) + '-' + str(iter2) + '.pkl'
    rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile(
        fullpath_filename) else mlem2.getRulesByMLEM2(FILENAME, iter1, iter2)

    # rule save
    if not os.path.isfile(fullpath_filename):
        mlem2.savePickleRules(rules, fullpath_filename)

    # rule clustering
    filepath = DIR_UCI + '/' + FILENAME + '/' + FILENAME + '-train' + str(
        iter1) + '-' + str(iter2) + '.tsv'
    decision_table = mlem2.getDecisionTable(filepath)
    colnames = mlem2.getColNames(decision_table)

    filepath = DIR_UCI + '/' + FILENAME + '/' + FILENAME + '.nominal'
    list_nominal = mlem2.getNominalList(filepath)
    list_judgeNominal = mlem2.getJudgeNominal(decision_table, list_nominal)

    fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules_cluster_consistent_except_mrule/' + 'rules-' + str(
        k) + '_' + str(iter1) + '-' + str(iter2) + '.pkl'
    rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile(
        fullpath_filename
    ) else clustering.getRuleClusteringByConsistentExceptMRule(
        rules, colnames, list_judgeNominal, k=k, m=m)

    # rule save
    if not os.path.isfile(fullpath_filename):
        mlem2.savePickleRules(rules, fullpath_filename)

    # rules の数を求める
    num = len(rules)
    # 平均の長さを求める
    leng = mlem2.getMeanLength(rules)
    # 平均支持度を求める
    support = mlem2.getMeanSupport(rules)

    # ファイルにsave
    savepath = DIR_UCI + '/' + FILENAME + '/MLEM2_RuleClusteringByConsistentExceptMRule_STAT.csv'
    with open(savepath, "a") as f:
        f.writelines(
            'MLEM2_RuleClusteringByConsistentExceptMRule_STAT,{k},{FILENAME},{iter1},{iter2},{num},{leng},{support}'
            .format(FILENAME=FILENAME,
                    k=k,
                    iter1=iter1,
                    iter2=iter2,
                    num=num,
                    leng=leng,
                    support=support) + "\n")

    return (0)
def MLEM2_RuleClusteringBySameCondition_LERS(FILENAME, iter1, iter2, k):

    # rule induction
    fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules/' + 'rules_' + str(
        iter1) + '-' + str(iter2) + '.pkl'
    rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile(
        fullpath_filename) else mlem2.getRulesByMLEM2(FILENAME, iter1, iter2)

    # rule save
    if not os.path.isfile(fullpath_filename):
        mlem2.savePickleRules(rules, fullpath_filename)

    # rule clustering
    fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules_cluster_same_condition/' + 'rules-' + str(
        k) + '_' + str(iter1) + '-' + str(iter2) + '.pkl'
    rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile(
        fullpath_filename) else clustering.getRuleClusteringBySameCondition(
            rules, k=k)

    # rule save
    if not os.path.isfile(fullpath_filename):
        mlem2.savePickleRules(rules, fullpath_filename)

    # test data setup
    filepath = DIR_UCI + '/' + FILENAME + '/' + FILENAME + '-test' + str(
        iter1) + '-' + str(iter2) + '.tsv'
    decision_table_test = mlem2.getDecisionTable(filepath)
    decision_table_test = decision_table_test.dropna()
    decision_class = decision_table_test[
        decision_table_test.columns[-1]].values.tolist()

    filepath = DIR_UCI + '/' + FILENAME + '/' + FILENAME + '.nominal'
    list_nominal = mlem2.getNominalList(filepath)
    list_judgeNominal = mlem2.getJudgeNominal(decision_table_test,
                                              list_nominal)

    # predict by LERS
    predictions = LERS.predictByLERS(rules, decision_table_test,
                                     list_judgeNominal)

    # 正答率を求める
    accuracy = accuracy_score(decision_class, predictions)

    #logging.info('MLEM2_RuleClusteringBySameCondition_LERS,{k},{FILENAME},{iter1},{iter2},{acc}'.format(FILENAME=FILENAME,k=k,iter1=iter1,iter2=iter2,acc=accuracy))
    savepath = DIR_UCI + '/' + FILENAME + '/MLEM2_RuleClusteringBySameCondition_LERS.csv'
    with open(savepath, "a") as f:
        f.writelines(
            'MLEM2_RuleClusteringBySameCondition_LERS,{k},{FILENAME},{iter1},{iter2},{acc}'
            .format(
                FILENAME=FILENAME, k=k, iter1=iter1, iter2=iter2,
                acc=accuracy) + "\n")

    return (accuracy)
def MLEM2_LERS(FILENAME, iter1, iter2):
    # rule induction and rule save
    fullpath_filename = DIR_UCI + "/" + FILENAME + "/rules/" + "rules_" + str(iter1) + "-" + str(iter2) + ".pkl"
    rules = (
        mlem2.loadPickleRules(fullpath_filename)
        if os.path.isfile(fullpath_filename)
        else mlem2.getRulesByMLEM2(FILENAME, iter1, iter2)
    )
    if not os.path.isfile(fullpath_filename):
        mlem2.savePickleRules(rules, fullpath_filename)

    # test data setup
    decision_table_test, decision_class = getData(FILENAME, iter1, iter2, T="test")
    list_judgeNominal = getJudgeNominal(decision_table_test, FILENAME)

    # predict by LERS
    predictions = LERS.predictByLERS(rules, decision_table_test, list_judgeNominal)

    # 正答率を求める
    accuracy = accuracy_score(decision_class, predictions)
    # rules の数を求める
    num = len(rules)
    # 各クラスのrulesの数を求める
    num_class = strNumClassRules(rules)
    # 平均の長さを求める
    mean_length = mlem2.getMeanLength(rules)
    # 平均支持度と平均確信度を求める
    decision_table_train, decision_class = getData(FILENAME, iter1, iter2, T="train")
    list_judgeNominal = getJudgeNominal(decision_table_train, FILENAME)
    mean_support, mean_conf = LERS.getSupportConfidenceRules(rules, decision_table_train, list_judgeNominal)
    # AccとRecallを求める
    acc_recall = LERS.getAccurayRecall(rules, decision_table_train, list_judgeNominal)

    # ファイルにsave
    savepath = DIR_UCI + "/" + FILENAME + "/fairness/00_normal/MLEM2_LERS.csv"
    with open(savepath, "a") as f:
        f.writelines(
            "MLEM2_LERS,{FILENAME},{iter1},{iter2},{acc},{num},{num_class},{mean_length},{mean_support},{mean_conf},{acc_recall}".format(
                FILENAME=FILENAME,
                iter1=iter1,
                iter2=iter2,
                acc=accuracy,
                num=num,
                num_class=num_class,
                mean_length=mean_length,
                mean_support=mean_support,
                mean_conf=mean_conf,
                acc_recall=strAccRecall(rules, acc_recall),
            )
            + "\n"
        )
    return 0
Exemple #8
0
def MLEM2_RuleClusteringByConsistentSim_Identified(FILENAME, iter1, iter2, k,
                                                   p):

    # rule induction
    fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules/' + 'rules_' + str(
        iter1) + '-' + str(iter2) + '.pkl'
    rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile(
        fullpath_filename) else mlem2.getRulesByMLEM2(FILENAME, iter1, iter2)

    # rule save
    if not os.path.isfile(fullpath_filename):
        mlem2.savePickleRules(rules, fullpath_filename)

    # rule clustering
    filepath = DIR_UCI + '/' + FILENAME + '/' + FILENAME + '-train' + str(
        iter1) + '-' + str(iter2) + '.tsv'
    decision_table = mlem2.getDecisionTable(filepath)
    colnames = mlem2.getColNames(decision_table)

    filepath = DIR_UCI + '/' + FILENAME + '/' + FILENAME + '.nominal'
    list_nominal = mlem2.getNominalList(filepath)
    list_judgeNominal = mlem2.getJudgeNominal(decision_table, list_nominal)

    fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules_cluster_consistent_sim/' + 'rules-' + str(
        k) + '_' + str(iter1) + '-' + str(iter2) + '.pkl'
    rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile(
        fullpath_filename
    ) else clustering.getRuleClusteringByConsistentSimilarity(
        rules, colnames, list_judgeNominal, k=k)

    # rule save
    if not os.path.isfile(fullpath_filename):
        mlem2.savePickleRules(rules, fullpath_filename)

    # PerIdentifiedClass を求める
    ans = mlem2.getPerIdentifiedClass(rules, p)

    # save
    savepath = DIR_UCI + '/' + FILENAME + '/Identify_MLEM2_RuleClusteringByConsistentSim.csv'
    with open(savepath, "a") as f:
        f.writelines(
            'Identify_MLEM2_RuleClusteringByConsistentSim,{k},{p},{FILENAME},{iter1},{iter2},{ans}'
            .format(
                FILENAME=FILENAME, k=k, p=p, iter1=iter1, iter2=iter2,
                ans=ans) + "\n")

    return (ans)
Exemple #9
0
def MLEM2_PerN(FILENAME, n):

    ans = 0.0
    for iter1, iter2 in product(range(1, 11), range(1, 11)):
        # rule induction
        fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules/' + 'rules_' + str(
            iter1) + '-' + str(iter2) + '.pkl'
        rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile(
            fullpath_filename) else mlem2.getRulesByMLEM2(
                FILENAME, iter1, iter2)

        # n per support
        per_n_support = mlem2.getPerNSupport(rules, n)
        ans += per_n_support
        print(per_n_support)
    ans /= 100
    print(ans)
def MLEM2_OnlyK_STAT(FILENAME, iter1, iter2, k):

    print("START iter1 iter2 k : " + str(iter1) + "," + str(iter2) + "," +
          str(k))
    # rule induction
    fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules/' + 'rules_' + str(
        iter1) + '-' + str(iter2) + '.pkl'
    rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile(
        fullpath_filename) else mlem2.getRulesByMLEM2(FILENAME, iter1, iter2)

    # rule save
    if not os.path.isfile(fullpath_filename):
        mlem2.savePickleRules(rules, fullpath_filename)

    # only-k rule filter
    fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules_onlyK/' + 'rules-' + str(
        k) + '_' + str(iter1) + '-' + str(iter2) + '.pkl'
    rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile(
        fullpath_filename) else [r for r in rules if len(r.getSupport()) >= k]

    # rule save
    if not os.path.isfile(fullpath_filename):
        mlem2.savePickleRules(rules, fullpath_filename)

    # rules の数を求める
    num = len(rules)
    # 平均の長さを求める
    leng = mlem2.getMeanLength(rules)
    # 平均支持度を求める
    support = mlem2.getMeanSupport(rules)

    # ファイルにsave
    savepath = DIR_UCI + '/' + FILENAME + '/MLEM2_OnlyK_STAT.csv'
    with open(savepath, "a") as f:
        f.writelines(
            'MLEM2_OnlyK_STAT,{k},{FILENAME},{iter1},{iter2},{num},{leng},{support}'
            .format(FILENAME=FILENAME,
                    k=k,
                    iter1=iter1,
                    iter2=iter2,
                    num=num,
                    leng=leng,
                    support=support) + "\n")

    return (0)
Exemple #11
0
def MLEM2_RuleClusteringBySameCondition_Identified(FILENAME, iter1, iter2, k,
                                                   p):

    # rule induction
    fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules/' + 'rules_' + str(
        iter1) + '-' + str(iter2) + '.pkl'
    rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile(
        fullpath_filename) else mlem2.getRulesByMLEM2(FILENAME, iter1, iter2)

    # rule save
    if not os.path.isfile(fullpath_filename):
        mlem2.savePickleRules(rules, fullpath_filename)

    # rule clustering
    fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules_cluster_same_condition/' + 'rules-' + str(
        k) + '_' + str(iter1) + '-' + str(iter2) + '.pkl'
    rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile(
        fullpath_filename) else clustering.getRuleClusteringBySameCondition(
            rules, k=k)

    # rule save
    if not os.path.isfile(fullpath_filename):
        mlem2.savePickleRules(rules, fullpath_filename)

    # PerIdentifiedClass を求める
    ans = mlem2.getPerIdentifiedClass(rules, p)

    # save
    savepath = DIR_UCI + '/' + FILENAME + '/Identify_MLEM2_SameCondition.csv'
    with open(savepath, "a") as f:
        f.writelines(
            'Identify_MLEM2_SameCondition,{k},{p},{FILENAME},{iter1},{iter2},{ans}'
            .format(
                FILENAME=FILENAME, k=k, p=p, iter1=iter1, iter2=iter2,
                ans=ans) + "\n")

    return (ans)
Exemple #12
0
def MLEM2_delEAlphaRule_LERS(FILENAME, iter1, iter2, DELFUN, CLASS,
                             ATTRIBUTE_VALUE, alpha):
    print(datetime.now().strftime('%Y/%m/%d %H:%M:%S') + ' ' + FILENAME + ' ' +
          str(iter1) + ' ' + str(iter2) + ' ' + DELFUN.__name__ + ' ' +
          strAttributeValue(ATTRIBUTE_VALUE) + ' ' + str(alpha) + ' ' +
          "START")

    # rule induction and rule save
    fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules/' + 'rules_' + str(
        iter1) + '-' + str(iter2) + '.pkl'
    rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile(
        fullpath_filename) else mlem2.getRulesByMLEM2(FILENAME, iter1, iter2)
    if not os.path.isfile(fullpath_filename):
        mlem2.savePickleRules(rules, fullpath_filename)

    # train data setup
    decision_table_train, decision_class = getData(FILENAME,
                                                   iter1,
                                                   iter2,
                                                   T="train")
    list_judgeNominal = getJudgeNominal(decision_table_train, FILENAME)

    # alpha差別的なルールの基本条件削除 or ルールを削除
    if CLASS != "ALL":
        rules_target = mlem2.getRulesClass(rules, CLASS)
        rules_nontarget = mlem2.getRulesClass(rules, CLASS, judge=False)
        for attr in ATTRIBUTE_VALUE:
            for e in ATTRIBUTE_VALUE[attr]:
                rules_target = DELFUN(rules_target, attr, e,
                                      decision_table_train, list_judgeNominal,
                                      alpha)
        rules_target.extend(rules_nontarget)
        rules = rules_target
    else:
        for attr in ATTRIBUTE_VALUE:
            for e in ATTRIBUTE_VALUE[attr]:
                rules = DELFUN(rules, attr, e, decision_table_train,
                               list_judgeNominal, alpha)

    print(datetime.now().strftime('%Y/%m/%d %H:%M:%S') + ' ' + FILENAME + ' ' +
          str(iter1) + ' ' + str(iter2) + ' ' + DELFUN.__name__ + ' ' +
          strAttributeValue(ATTRIBUTE_VALUE) + ' ' + str(alpha) + ' ' +
          "RULES")

    # test data setup
    decision_table_test, decision_class = getData(FILENAME,
                                                  iter1,
                                                  iter2,
                                                  T="test")
    list_judgeNominal = getJudgeNominal(decision_table_test, FILENAME)

    # predict by LERS
    predictions = LERS.predictByLERS(rules, decision_table_test,
                                     list_judgeNominal)

    # 正答率を求める
    accuracy = accuracy_score(decision_class, predictions)
    # rules の数を求める
    num = len(rules)
    # 各クラスのrulesの数を求める
    num_class = strNumClassRules(rules)
    # 平均の長さを求める
    mean_length = mlem2.getMeanLength(rules)
    # 平均支持度と平均確信度を求める
    list_judgeNominal = getJudgeNominal(decision_table_train, FILENAME)
    mean_support, mean_conf = LERS.getSupportConfidenceRules(
        rules, decision_table_train, list_judgeNominal)
    # AccとRecallを求める
    acc_recall = LERS.getAccurayRecall(rules, decision_table_train,
                                       list_judgeNominal)

    # ファイルにsave
    savepath = DIR_UCI + '/' + FILENAME + '/fairness/02_alpha_preserve/MLEM2_delEAlphaRule_LERS.csv'
    with open(savepath, "a") as f:
        f.writelines(
            'MLEM2_delEAlphaRule_LERS,{DELFUN},{CLASS},{FILENAME},{ATTRIBUTE_VALUE},{alpha},{iter1},{iter2},{acc},{num},{num_class},{mean_length},{mean_support},{mean_conf},{acc_recall}'
            .format(DELFUN=DELFUN.__name__,
                    CLASS=CLASS,
                    FILENAME=FILENAME,
                    ATTRIBUTE_VALUE=strAttributeValue(ATTRIBUTE_VALUE),
                    alpha=alpha,
                    iter1=iter1,
                    iter2=iter2,
                    acc=accuracy,
                    num=num,
                    num_class=num_class,
                    mean_length=mean_length,
                    mean_support=mean_support,
                    mean_conf=mean_conf,
                    acc_recall=strAccRecall(rules, acc_recall)) + "\n")
    print(datetime.now().strftime('%Y/%m/%d %H:%M:%S') + ' ' + FILENAME + ' ' +
          str(iter1) + ' ' + str(iter2) + ' ' + DELFUN.__name__ + ' ' +
          strAttributeValue(ATTRIBUTE_VALUE) + ' ' + str(alpha) + ' ' + "END")

    return (0)
Exemple #13
0
    return (predictions)


# ========================================
# main
# ========================================
if __name__ == "__main__":

    FILENAME = 'german_credit_categorical'
    #FILENAME = 'hayes-roth'
    iter1 = 1
    iter2 = 1

    # rule induction
    rules = mlem2.getRulesByMLEM2(FILENAME, iter1, iter2)

    filepath = '/mnt/data/uci/' + FILENAME + '/' + FILENAME + '-test' + str(
        iter1) + '-' + str(iter2) + '.tsv'
    decision_table_test = mlem2.getDecisionTable(filepath)
    decision_table_test = decision_table_test.dropna()
    decision_class = decision_table_test[
        decision_table_test.columns[-1]].values.tolist()

    filepath = '/mnt/data/uci/' + FILENAME + '/' + FILENAME + '.nominal'
    list_nominal = mlem2.getNominalList(filepath)
    list_judgeNominal = mlem2.getJudgeNominal(decision_table_test,
                                              list_nominal)

    # predict by LERS
    predictions = predictByLERS(rules, decision_table_test, list_judgeNominal)
def MLEM2_OnlyK_LERS(FILENAME, iter1, iter2, k) :

    print("START iter1 iter2 k : " + str(iter1) + "," + str(iter2) + "," + str(k))
    # rule induction
    fullpath_filename = DIR_UCI+'/'+FILENAME+'/rules/'+'rules_'+str(iter1)+'-'+str(iter2)+'.pkl'
    rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile(fullpath_filename) else mlem2.getRulesByMLEM2(FILENAME, iter1, iter2)

    # rule save
    if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename)

    # only-k rule filter
    fullpath_filename = DIR_UCI+'/'+FILENAME+'/rules_onlyK/'+'rules-'+str(k)+'_'+str(iter1)+'-'+str(iter2)+'.pkl'
    rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile(fullpath_filename) else [r for r in rules if len(r.getSupport()) >= k]

    # rule save
    if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename)

    # test data setup
    filepath = DIR_UCI+'/'+FILENAME+'/'+FILENAME+'-test'+str(iter1)+'-'+str(iter2)+'.tsv'
    decision_table_test = mlem2.getDecisionTable(filepath)
    decision_table_test = decision_table_test.dropna()
    decision_class = decision_table_test[decision_table_test.columns[-1]].values.tolist()

    filepath = DIR_UCI+'/'+FILENAME+'/'+FILENAME+'.nominal'
    list_nominal = mlem2.getNominalList(filepath)
    list_judgeNominal = mlem2.getJudgeNominal(decision_table_test, list_nominal)

    # predict by LERS
    predictions = LERS.predictByLERS(rules, decision_table_test, list_judgeNominal)

    # 正答率を求める
    accuracy = accuracy_score(decision_class, predictions)

    #print('{FILENAME} : {iter1} {iter2}'.format(FILENAME=FILENAME,iter1=iter1,iter2=iter2))
    #logging.info('MLEM2_OnlyK_LERS,{k},{FILENAME},{iter1},{iter2},{acc}'.format(FILENAME=FILENAME,k=k,iter1=iter1,iter2=iter2,acc=accuracy))
    savepath = DIR_UCI+'/'+FILENAME+'/MLEM2_OnlyK_LERS.csv'
    with open(savepath, "a") as f :
        f.writelines('MLEM2_OnlyK_LERS,{k},{FILENAME},{iter1},{iter2},{acc}'.format(FILENAME=FILENAME,k=k,iter1=iter1,iter2=iter2,acc=accuracy)+"\n")

    #print("END iter1 iter2 k : " + str(iter1) + "," + str(iter2) + "," + str(k))
    return(accuracy)
def MLEM2_delEAlphaRule_LERS(FILENAME, iter1, iter2, DELFUN, CLASS, ATTRIBUTE_VALUE, alpha):
    print(
        datetime.now().strftime("%Y/%m/%d %H:%M:%S")
        + " "
        + FILENAME
        + " "
        + str(iter1)
        + " "
        + str(iter2)
        + " "
        + DELFUN.__name__
        + " "
        + strAttributeValue(ATTRIBUTE_VALUE)
        + " "
        + str(alpha)
        + " "
        + "START"
    )

    # rule induction and rule save
    fullpath_filename = DIR_UCI + "/" + FILENAME + "/rules/" + "rules_" + str(iter1) + "-" + str(iter2) + ".pkl"
    rules = (
        mlem2.loadPickleRules(fullpath_filename)
        if os.path.isfile(fullpath_filename)
        else mlem2.getRulesByMLEM2(FILENAME, iter1, iter2)
    )
    if not os.path.isfile(fullpath_filename):
        mlem2.savePickleRules(rules, fullpath_filename)

    # train data setup
    decision_table_train, decision_class = getData(FILENAME, iter1, iter2, T="train")
    list_judgeNominal = getJudgeNominal(decision_table_train, FILENAME)

    # alpha差別的なルールの基本条件削除 or ルールを削除
    if CLASS != "ALL":
        rules_target = mlem2.getRulesClass(rules, CLASS)
        rules_nontarget = mlem2.getRulesClass(rules, CLASS, judge=False)
        for attr in ATTRIBUTE_VALUE:
            for e in ATTRIBUTE_VALUE[attr]:
                rules_target = DELFUN(rules_target, attr, e, decision_table_train, list_judgeNominal, alpha)
        rules_target.extend(rules_nontarget)
        rules = rules_target
    else:
        for attr in ATTRIBUTE_VALUE:
            for e in ATTRIBUTE_VALUE[attr]:
                rules = DELFUN(rules, attr, e, decision_table_train, list_judgeNominal, alpha)

    print(
        datetime.now().strftime("%Y/%m/%d %H:%M:%S")
        + " "
        + FILENAME
        + " "
        + str(iter1)
        + " "
        + str(iter2)
        + " "
        + DELFUN.__name__
        + " "
        + strAttributeValue(ATTRIBUTE_VALUE)
        + " "
        + str(alpha)
        + " "
        + "RULES"
    )

    # test data setup
    decision_table_test, decision_class = getData(FILENAME, iter1, iter2, T="test")
    list_judgeNominal = getJudgeNominal(decision_table_test, FILENAME)

    # predict by LERS
    predictions = LERS.predictByLERS(rules, decision_table_test, list_judgeNominal)

    # 正答率を求める
    accuracy = accuracy_score(decision_class, predictions)
    # rules の数を求める
    num = len(rules)
    # 各クラスのrulesの数を求める
    num_class = strNumClassRules(rules)
    # 平均の長さを求める
    mean_length = mlem2.getMeanLength(rules)
    # 平均支持度と平均確信度を求める
    list_judgeNominal = getJudgeNominal(decision_table_train, FILENAME)
    mean_support, mean_conf = LERS.getSupportConfidenceRules(rules, decision_table_train, list_judgeNominal)
    # AccとRecallを求める
    acc_recall = LERS.getAccurayRecall(rules, decision_table_train, list_judgeNominal)

    # ファイルにsave
    savepath = DIR_UCI + "/" + FILENAME + "/fairness/02_alpha_preserve/MLEM2_delEAlphaRule_LERS.csv"
    with open(savepath, "a") as f:
        f.writelines(
            "MLEM2_delEAlphaRule_LERS,{DELFUN},{CLASS},{FILENAME},{ATTRIBUTE_VALUE},{alpha},{iter1},{iter2},{acc},{num},{num_class},{mean_length},{mean_support},{mean_conf},{acc_recall}".format(
                DELFUN=DELFUN.__name__,
                CLASS=CLASS,
                FILENAME=FILENAME,
                ATTRIBUTE_VALUE=strAttributeValue(ATTRIBUTE_VALUE),
                alpha=alpha,
                iter1=iter1,
                iter2=iter2,
                acc=accuracy,
                num=num,
                num_class=num_class,
                mean_length=mean_length,
                mean_support=mean_support,
                mean_conf=mean_conf,
                acc_recall=strAccRecall(rules, acc_recall),
            )
            + "\n"
        )
    print(
        datetime.now().strftime("%Y/%m/%d %H:%M:%S")
        + " "
        + FILENAME
        + " "
        + str(iter1)
        + " "
        + str(iter2)
        + " "
        + DELFUN.__name__
        + " "
        + strAttributeValue(ATTRIBUTE_VALUE)
        + " "
        + str(alpha)
        + " "
        + "END"
    )

    return 0
Exemple #16
0
    pool = Pool(proc)
    multiargs = []
    for FILENAME, iter1, iter2 in product(FILENAMES, range(1,11), range(1,11)):    
        multiargs.append((FILENAME,iter1,iter2))
  
    #results = pool.starmap(MLEM2_LERS, multiargs)
    return(results)
      
# ========================================
# main
# ========================================
if __name__ == "__main__":

    #FILENAMES = ['hayes-roth']    
    #rules = 
    rules = mlem2.getRulesByMLEM2('hayes-roth', 2, 2)


    # シングルプロセスで実行
    #for FILENAME, iter1, iter2 in product(FILENAMES, range(1,11), range(1,11)):    
    #    print('{filename} {i1} {i2}'.format(filename=FILENAME, i1=iter1, i2=iter2))
    #    print(MLEM2_LERS(FILENAME, iter1, iter2))

    # 並列実行    
    proc=4
    freeze_support()
    results = multi_main(proc, FILENAMES)
    
    # 平均と分散
    print(getEvalMeanVar(results))
    
Exemple #17
0
            
        rules_new.extend(target_rules)
        
    return(rules_new)

        
# ========================================
# main
# ========================================
if __name__ == "__main__":

    FILENAME = 'hayes-roth'
    iter1 = 5
    iter2 = 4
    
    rules = mlem2.getRulesByMLEM2(FILENAME, iter1, iter2)
    
    filepath = '/mnt/data/uci/'+FILENAME+'/'+FILENAME+'-train'+str(iter1)+'-'+str(iter2)+'.tsv'
    decision_table = mlem2.getDecisionTable(filepath)
    colnames = mlem2.getColNames(decision_table)
    
    filepath = '/mnt/data/uci/'+FILENAME+'/'+FILENAME+'.nominal'
    list_nominal = mlem2.getNominalList(filepath)
    list_judgeNominal = mlem2.getJudgeNominal(decision_table, list_nominal)
    
    # ルールクラスタリング
    #rules_new = getRuleClusteringBySimilarity(rules, colnames, list_judgeNominal, k=3)
    #rules_new = getRuleClusteringByRandom(rules, k=3)
    #rules_new = getRuleClusteringBySameCondition(rules, k=3)
    #rules_new = getRuleClusteringByConsistentSimilarity(rules, colnames, list_judgeNominal, k=3)
    #rules_new = getRuleClusteringByConsistentSimilarityExceptMRule(rules, colnames, list_judgeNominal, k=3, m=3)
def MLEM2_RuleClusteringBySameCondition_LERS(FILENAME, iter1, iter2, k) :
          
    # rule induction
    fullpath_filename = DIR_UCI+'/'+FILENAME+'/rules/'+'rules_'+str(iter1)+'-'+str(iter2)+'.pkl'
    rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile(fullpath_filename) else mlem2.getRulesByMLEM2(FILENAME, iter1, iter2) 

    # rule save
    if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) 

    # rule clustering
    fullpath_filename = DIR_UCI+'/'+FILENAME+'/rules_cluster_same_condition/'+'rules-'+str(k)+'_'+str(iter1)+'-'+str(iter2)+'.pkl'
    rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile(fullpath_filename) else clustering.getRuleClusteringBySameCondition(rules, k=k)

    # rule save
    if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) 

    # test data setup
    filepath = DIR_UCI+'/'+FILENAME+'/'+FILENAME+'-test'+str(iter1)+'-'+str(iter2)+'.tsv'
    decision_table_test = mlem2.getDecisionTable(filepath)
    decision_table_test = decision_table_test.dropna()
    decision_class = decision_table_test[decision_table_test.columns[-1]].values.tolist()

    filepath = DIR_UCI+'/'+FILENAME+'/'+FILENAME+'.nominal'
    list_nominal = mlem2.getNominalList(filepath)
    list_judgeNominal = mlem2.getJudgeNominal(decision_table_test, list_nominal)
    
    # predict by LERS
    predictions = LERS.predictByLERS(rules, decision_table_test, list_judgeNominal)
    
    # 正答率を求める
    accuracy = accuracy_score(decision_class, predictions)
    
    #logging.info('MLEM2_RuleClusteringBySameCondition_LERS,{k},{FILENAME},{iter1},{iter2},{acc}'.format(FILENAME=FILENAME,k=k,iter1=iter1,iter2=iter2,acc=accuracy))
    savepath = DIR_UCI+'/'+FILENAME+'/MLEM2_RuleClusteringBySameCondition_LERS.csv'
    with open(savepath, "a") as f :
        f.writelines('MLEM2_RuleClusteringBySameCondition_LERS,{k},{FILENAME},{iter1},{iter2},{acc}'.format(FILENAME=FILENAME,k=k,iter1=iter1,iter2=iter2,acc=accuracy)+"\n")
    
    return(accuracy)