def MLEM2_STAT(FILENAME, iter1, iter2): # rule induction fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules/' + 'rules_' + str( iter1) + '-' + str(iter2) + '.pkl' rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile( fullpath_filename) else mlem2.getRulesByMLEM2(FILENAME, iter1, iter2) # rule save if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # rules の数を求める num = len(rules) # 平均の長さを求める leng = mlem2.getMeanLength(rules) # 平均支持度を求める support = mlem2.getMeanSupport(rules) # ファイルにsave savepath = DIR_UCI + '/' + FILENAME + '/MLEM2_STAT.csv' with open(savepath, "a") as f: f.writelines( 'MLEM2_STAT,1,{FILENAME},{iter1},{iter2},{num},{leng},{support}'. format(FILENAME=FILENAME, iter1=iter1, iter2=iter2, num=num, leng=leng, support=support) + "\n") return (0)
def MLEM2_OnlyK_Identified(FILENAME, iter1, iter2, k, p): # rule induction fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules/' + 'rules_' + str( iter1) + '-' + str(iter2) + '.pkl' rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile( fullpath_filename) else mlem2.getRulesByMLEM2(FILENAME, iter1, iter2) # rule save if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # only-k rule filter fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules_onlyK/' + 'rules-' + str( k) + '_' + str(iter1) + '-' + str(iter2) + '.pkl' rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile( fullpath_filename) else [r for r in rules if len(r.getSupport()) >= k] # rule save if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # PerIdentifiedClass を求める ans = mlem2.getPerIdentifiedClass(rules, p) # save savepath = DIR_UCI + '/' + FILENAME + '/Identify_MLEM2_OnlyK.csv' with open(savepath, "a") as f: f.writelines( 'Identify_MLEM2_OnlyK,{k},{p},{FILENAME},{iter1},{iter2},{ans}'. format( FILENAME=FILENAME, k=k, p=p, iter1=iter1, iter2=iter2, ans=ans) + "\n") return (ans)
def MLEM2_LERS(FILENAME, iter1, iter2): # rule induction and rule save fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules/' + 'rules_' + str( iter1) + '-' + str(iter2) + '.pkl' rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile( fullpath_filename) else mlem2.getRulesByMLEM2(FILENAME, iter1, iter2) if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # test data setup decision_table_test, decision_class = getData(FILENAME, iter1, iter2, T="test") list_judgeNominal = getJudgeNominal(decision_table_test, FILENAME) # predict by LERS predictions = LERS.predictByLERS(rules, decision_table_test, list_judgeNominal) # 正答率を求める accuracy = accuracy_score(decision_class, predictions) # rules の数を求める num = len(rules) # 各クラスのrulesの数を求める num_class = strNumClassRules(rules) # 平均の長さを求める mean_length = mlem2.getMeanLength(rules) # 平均支持度と平均確信度を求める decision_table_train, decision_class = getData(FILENAME, iter1, iter2, T="train") list_judgeNominal = getJudgeNominal(decision_table_train, FILENAME) mean_support, mean_conf = LERS.getSupportConfidenceRules( rules, decision_table_train, list_judgeNominal) # AccとRecallを求める acc_recall = LERS.getAccurayRecall(rules, decision_table_train, list_judgeNominal) # ファイルにsave savepath = DIR_UCI + '/' + FILENAME + '/fairness/00_normal/MLEM2_LERS.csv' with open(savepath, "a") as f: f.writelines( 'MLEM2_LERS,{FILENAME},{iter1},{iter2},{acc},{num},{num_class},{mean_length},{mean_support},{mean_conf},{acc_recall}' .format(FILENAME=FILENAME, iter1=iter1, iter2=iter2, acc=accuracy, num=num, num_class=num_class, mean_length=mean_length, mean_support=mean_support, mean_conf=mean_conf, acc_recall=strAccRecall(rules, acc_recall)) + "\n") return (0)
def MLEM2_OnlyK_LERS(FILENAME, iter1, iter2, k): print("START iter1 iter2 k : " + str(iter1) + "," + str(iter2) + "," + str(k)) # rule induction fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules/' + 'rules_' + str( iter1) + '-' + str(iter2) + '.pkl' rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile( fullpath_filename) else mlem2.getRulesByMLEM2(FILENAME, iter1, iter2) # rule save if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # only-k rule filter fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules_onlyK/' + 'rules-' + str( k) + '_' + str(iter1) + '-' + str(iter2) + '.pkl' rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile( fullpath_filename) else [r for r in rules if len(r.getSupport()) >= k] # rule save if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # test data setup filepath = DIR_UCI + '/' + FILENAME + '/' + FILENAME + '-test' + str( iter1) + '-' + str(iter2) + '.tsv' decision_table_test = mlem2.getDecisionTable(filepath) decision_table_test = decision_table_test.dropna() decision_class = decision_table_test[ decision_table_test.columns[-1]].values.tolist() filepath = DIR_UCI + '/' + FILENAME + '/' + FILENAME + '.nominal' list_nominal = mlem2.getNominalList(filepath) list_judgeNominal = mlem2.getJudgeNominal(decision_table_test, list_nominal) # predict by LERS predictions = LERS.predictByLERS(rules, decision_table_test, list_judgeNominal) # 正答率を求める accuracy = accuracy_score(decision_class, predictions) #print('{FILENAME} : {iter1} {iter2}'.format(FILENAME=FILENAME,iter1=iter1,iter2=iter2)) #logging.info('MLEM2_OnlyK_LERS,{k},{FILENAME},{iter1},{iter2},{acc}'.format(FILENAME=FILENAME,k=k,iter1=iter1,iter2=iter2,acc=accuracy)) savepath = DIR_UCI + '/' + FILENAME + '/MLEM2_OnlyK_LERS.csv' with open(savepath, "a") as f: f.writelines( 'MLEM2_OnlyK_LERS,{k},{FILENAME},{iter1},{iter2},{acc}'.format( FILENAME=FILENAME, k=k, iter1=iter1, iter2=iter2, acc=accuracy) + "\n") #print("END iter1 iter2 k : " + str(iter1) + "," + str(iter2) + "," + str(k)) return (accuracy)
def MLEM2_RuleClusteringByConsistentExceptMRule_STAT(FILENAME, iter1, iter2, k, m): # rule induction fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules/' + 'rules_' + str( iter1) + '-' + str(iter2) + '.pkl' rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile( fullpath_filename) else mlem2.getRulesByMLEM2(FILENAME, iter1, iter2) # rule save if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # rule clustering filepath = DIR_UCI + '/' + FILENAME + '/' + FILENAME + '-train' + str( iter1) + '-' + str(iter2) + '.tsv' decision_table = mlem2.getDecisionTable(filepath) colnames = mlem2.getColNames(decision_table) filepath = DIR_UCI + '/' + FILENAME + '/' + FILENAME + '.nominal' list_nominal = mlem2.getNominalList(filepath) list_judgeNominal = mlem2.getJudgeNominal(decision_table, list_nominal) fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules_cluster_consistent_except_mrule/' + 'rules-' + str( k) + '_' + str(iter1) + '-' + str(iter2) + '.pkl' rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile( fullpath_filename ) else clustering.getRuleClusteringByConsistentExceptMRule( rules, colnames, list_judgeNominal, k=k, m=m) # rule save if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # rules の数を求める num = len(rules) # 平均の長さを求める leng = mlem2.getMeanLength(rules) # 平均支持度を求める support = mlem2.getMeanSupport(rules) # ファイルにsave savepath = DIR_UCI + '/' + FILENAME + '/MLEM2_RuleClusteringByConsistentExceptMRule_STAT.csv' with open(savepath, "a") as f: f.writelines( 'MLEM2_RuleClusteringByConsistentExceptMRule_STAT,{k},{FILENAME},{iter1},{iter2},{num},{leng},{support}' .format(FILENAME=FILENAME, k=k, iter1=iter1, iter2=iter2, num=num, leng=leng, support=support) + "\n") return (0)
def MLEM2_RuleClusteringBySameCondition_LERS(FILENAME, iter1, iter2, k): # rule induction fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules/' + 'rules_' + str( iter1) + '-' + str(iter2) + '.pkl' rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile( fullpath_filename) else mlem2.getRulesByMLEM2(FILENAME, iter1, iter2) # rule save if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # rule clustering fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules_cluster_same_condition/' + 'rules-' + str( k) + '_' + str(iter1) + '-' + str(iter2) + '.pkl' rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile( fullpath_filename) else clustering.getRuleClusteringBySameCondition( rules, k=k) # rule save if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # test data setup filepath = DIR_UCI + '/' + FILENAME + '/' + FILENAME + '-test' + str( iter1) + '-' + str(iter2) + '.tsv' decision_table_test = mlem2.getDecisionTable(filepath) decision_table_test = decision_table_test.dropna() decision_class = decision_table_test[ decision_table_test.columns[-1]].values.tolist() filepath = DIR_UCI + '/' + FILENAME + '/' + FILENAME + '.nominal' list_nominal = mlem2.getNominalList(filepath) list_judgeNominal = mlem2.getJudgeNominal(decision_table_test, list_nominal) # predict by LERS predictions = LERS.predictByLERS(rules, decision_table_test, list_judgeNominal) # 正答率を求める accuracy = accuracy_score(decision_class, predictions) #logging.info('MLEM2_RuleClusteringBySameCondition_LERS,{k},{FILENAME},{iter1},{iter2},{acc}'.format(FILENAME=FILENAME,k=k,iter1=iter1,iter2=iter2,acc=accuracy)) savepath = DIR_UCI + '/' + FILENAME + '/MLEM2_RuleClusteringBySameCondition_LERS.csv' with open(savepath, "a") as f: f.writelines( 'MLEM2_RuleClusteringBySameCondition_LERS,{k},{FILENAME},{iter1},{iter2},{acc}' .format( FILENAME=FILENAME, k=k, iter1=iter1, iter2=iter2, acc=accuracy) + "\n") return (accuracy)
def MLEM2_LERS(FILENAME, iter1, iter2): # rule induction and rule save fullpath_filename = DIR_UCI + "/" + FILENAME + "/rules/" + "rules_" + str(iter1) + "-" + str(iter2) + ".pkl" rules = ( mlem2.loadPickleRules(fullpath_filename) if os.path.isfile(fullpath_filename) else mlem2.getRulesByMLEM2(FILENAME, iter1, iter2) ) if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # test data setup decision_table_test, decision_class = getData(FILENAME, iter1, iter2, T="test") list_judgeNominal = getJudgeNominal(decision_table_test, FILENAME) # predict by LERS predictions = LERS.predictByLERS(rules, decision_table_test, list_judgeNominal) # 正答率を求める accuracy = accuracy_score(decision_class, predictions) # rules の数を求める num = len(rules) # 各クラスのrulesの数を求める num_class = strNumClassRules(rules) # 平均の長さを求める mean_length = mlem2.getMeanLength(rules) # 平均支持度と平均確信度を求める decision_table_train, decision_class = getData(FILENAME, iter1, iter2, T="train") list_judgeNominal = getJudgeNominal(decision_table_train, FILENAME) mean_support, mean_conf = LERS.getSupportConfidenceRules(rules, decision_table_train, list_judgeNominal) # AccとRecallを求める acc_recall = LERS.getAccurayRecall(rules, decision_table_train, list_judgeNominal) # ファイルにsave savepath = DIR_UCI + "/" + FILENAME + "/fairness/00_normal/MLEM2_LERS.csv" with open(savepath, "a") as f: f.writelines( "MLEM2_LERS,{FILENAME},{iter1},{iter2},{acc},{num},{num_class},{mean_length},{mean_support},{mean_conf},{acc_recall}".format( FILENAME=FILENAME, iter1=iter1, iter2=iter2, acc=accuracy, num=num, num_class=num_class, mean_length=mean_length, mean_support=mean_support, mean_conf=mean_conf, acc_recall=strAccRecall(rules, acc_recall), ) + "\n" ) return 0
def MLEM2_RuleClusteringByConsistentSim_Identified(FILENAME, iter1, iter2, k, p): # rule induction fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules/' + 'rules_' + str( iter1) + '-' + str(iter2) + '.pkl' rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile( fullpath_filename) else mlem2.getRulesByMLEM2(FILENAME, iter1, iter2) # rule save if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # rule clustering filepath = DIR_UCI + '/' + FILENAME + '/' + FILENAME + '-train' + str( iter1) + '-' + str(iter2) + '.tsv' decision_table = mlem2.getDecisionTable(filepath) colnames = mlem2.getColNames(decision_table) filepath = DIR_UCI + '/' + FILENAME + '/' + FILENAME + '.nominal' list_nominal = mlem2.getNominalList(filepath) list_judgeNominal = mlem2.getJudgeNominal(decision_table, list_nominal) fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules_cluster_consistent_sim/' + 'rules-' + str( k) + '_' + str(iter1) + '-' + str(iter2) + '.pkl' rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile( fullpath_filename ) else clustering.getRuleClusteringByConsistentSimilarity( rules, colnames, list_judgeNominal, k=k) # rule save if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # PerIdentifiedClass を求める ans = mlem2.getPerIdentifiedClass(rules, p) # save savepath = DIR_UCI + '/' + FILENAME + '/Identify_MLEM2_RuleClusteringByConsistentSim.csv' with open(savepath, "a") as f: f.writelines( 'Identify_MLEM2_RuleClusteringByConsistentSim,{k},{p},{FILENAME},{iter1},{iter2},{ans}' .format( FILENAME=FILENAME, k=k, p=p, iter1=iter1, iter2=iter2, ans=ans) + "\n") return (ans)
def MLEM2_PerN(FILENAME, n): ans = 0.0 for iter1, iter2 in product(range(1, 11), range(1, 11)): # rule induction fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules/' + 'rules_' + str( iter1) + '-' + str(iter2) + '.pkl' rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile( fullpath_filename) else mlem2.getRulesByMLEM2( FILENAME, iter1, iter2) # n per support per_n_support = mlem2.getPerNSupport(rules, n) ans += per_n_support print(per_n_support) ans /= 100 print(ans)
def MLEM2_OnlyK_STAT(FILENAME, iter1, iter2, k): print("START iter1 iter2 k : " + str(iter1) + "," + str(iter2) + "," + str(k)) # rule induction fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules/' + 'rules_' + str( iter1) + '-' + str(iter2) + '.pkl' rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile( fullpath_filename) else mlem2.getRulesByMLEM2(FILENAME, iter1, iter2) # rule save if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # only-k rule filter fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules_onlyK/' + 'rules-' + str( k) + '_' + str(iter1) + '-' + str(iter2) + '.pkl' rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile( fullpath_filename) else [r for r in rules if len(r.getSupport()) >= k] # rule save if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # rules の数を求める num = len(rules) # 平均の長さを求める leng = mlem2.getMeanLength(rules) # 平均支持度を求める support = mlem2.getMeanSupport(rules) # ファイルにsave savepath = DIR_UCI + '/' + FILENAME + '/MLEM2_OnlyK_STAT.csv' with open(savepath, "a") as f: f.writelines( 'MLEM2_OnlyK_STAT,{k},{FILENAME},{iter1},{iter2},{num},{leng},{support}' .format(FILENAME=FILENAME, k=k, iter1=iter1, iter2=iter2, num=num, leng=leng, support=support) + "\n") return (0)
def MLEM2_RuleClusteringBySameCondition_Identified(FILENAME, iter1, iter2, k, p): # rule induction fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules/' + 'rules_' + str( iter1) + '-' + str(iter2) + '.pkl' rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile( fullpath_filename) else mlem2.getRulesByMLEM2(FILENAME, iter1, iter2) # rule save if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # rule clustering fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules_cluster_same_condition/' + 'rules-' + str( k) + '_' + str(iter1) + '-' + str(iter2) + '.pkl' rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile( fullpath_filename) else clustering.getRuleClusteringBySameCondition( rules, k=k) # rule save if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # PerIdentifiedClass を求める ans = mlem2.getPerIdentifiedClass(rules, p) # save savepath = DIR_UCI + '/' + FILENAME + '/Identify_MLEM2_SameCondition.csv' with open(savepath, "a") as f: f.writelines( 'Identify_MLEM2_SameCondition,{k},{p},{FILENAME},{iter1},{iter2},{ans}' .format( FILENAME=FILENAME, k=k, p=p, iter1=iter1, iter2=iter2, ans=ans) + "\n") return (ans)
def MLEM2_delEAlphaRule_LERS(FILENAME, iter1, iter2, DELFUN, CLASS, ATTRIBUTE_VALUE, alpha): print(datetime.now().strftime('%Y/%m/%d %H:%M:%S') + ' ' + FILENAME + ' ' + str(iter1) + ' ' + str(iter2) + ' ' + DELFUN.__name__ + ' ' + strAttributeValue(ATTRIBUTE_VALUE) + ' ' + str(alpha) + ' ' + "START") # rule induction and rule save fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules/' + 'rules_' + str( iter1) + '-' + str(iter2) + '.pkl' rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile( fullpath_filename) else mlem2.getRulesByMLEM2(FILENAME, iter1, iter2) if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # train data setup decision_table_train, decision_class = getData(FILENAME, iter1, iter2, T="train") list_judgeNominal = getJudgeNominal(decision_table_train, FILENAME) # alpha差別的なルールの基本条件削除 or ルールを削除 if CLASS != "ALL": rules_target = mlem2.getRulesClass(rules, CLASS) rules_nontarget = mlem2.getRulesClass(rules, CLASS, judge=False) for attr in ATTRIBUTE_VALUE: for e in ATTRIBUTE_VALUE[attr]: rules_target = DELFUN(rules_target, attr, e, decision_table_train, list_judgeNominal, alpha) rules_target.extend(rules_nontarget) rules = rules_target else: for attr in ATTRIBUTE_VALUE: for e in ATTRIBUTE_VALUE[attr]: rules = DELFUN(rules, attr, e, decision_table_train, list_judgeNominal, alpha) print(datetime.now().strftime('%Y/%m/%d %H:%M:%S') + ' ' + FILENAME + ' ' + str(iter1) + ' ' + str(iter2) + ' ' + DELFUN.__name__ + ' ' + strAttributeValue(ATTRIBUTE_VALUE) + ' ' + str(alpha) + ' ' + "RULES") # test data setup decision_table_test, decision_class = getData(FILENAME, iter1, iter2, T="test") list_judgeNominal = getJudgeNominal(decision_table_test, FILENAME) # predict by LERS predictions = LERS.predictByLERS(rules, decision_table_test, list_judgeNominal) # 正答率を求める accuracy = accuracy_score(decision_class, predictions) # rules の数を求める num = len(rules) # 各クラスのrulesの数を求める num_class = strNumClassRules(rules) # 平均の長さを求める mean_length = mlem2.getMeanLength(rules) # 平均支持度と平均確信度を求める list_judgeNominal = getJudgeNominal(decision_table_train, FILENAME) mean_support, mean_conf = LERS.getSupportConfidenceRules( rules, decision_table_train, list_judgeNominal) # AccとRecallを求める acc_recall = LERS.getAccurayRecall(rules, decision_table_train, list_judgeNominal) # ファイルにsave savepath = DIR_UCI + '/' + FILENAME + '/fairness/02_alpha_preserve/MLEM2_delEAlphaRule_LERS.csv' with open(savepath, "a") as f: f.writelines( 'MLEM2_delEAlphaRule_LERS,{DELFUN},{CLASS},{FILENAME},{ATTRIBUTE_VALUE},{alpha},{iter1},{iter2},{acc},{num},{num_class},{mean_length},{mean_support},{mean_conf},{acc_recall}' .format(DELFUN=DELFUN.__name__, CLASS=CLASS, FILENAME=FILENAME, ATTRIBUTE_VALUE=strAttributeValue(ATTRIBUTE_VALUE), alpha=alpha, iter1=iter1, iter2=iter2, acc=accuracy, num=num, num_class=num_class, mean_length=mean_length, mean_support=mean_support, mean_conf=mean_conf, acc_recall=strAccRecall(rules, acc_recall)) + "\n") print(datetime.now().strftime('%Y/%m/%d %H:%M:%S') + ' ' + FILENAME + ' ' + str(iter1) + ' ' + str(iter2) + ' ' + DELFUN.__name__ + ' ' + strAttributeValue(ATTRIBUTE_VALUE) + ' ' + str(alpha) + ' ' + "END") return (0)
return (predictions) # ======================================== # main # ======================================== if __name__ == "__main__": FILENAME = 'german_credit_categorical' #FILENAME = 'hayes-roth' iter1 = 1 iter2 = 1 # rule induction rules = mlem2.getRulesByMLEM2(FILENAME, iter1, iter2) filepath = '/mnt/data/uci/' + FILENAME + '/' + FILENAME + '-test' + str( iter1) + '-' + str(iter2) + '.tsv' decision_table_test = mlem2.getDecisionTable(filepath) decision_table_test = decision_table_test.dropna() decision_class = decision_table_test[ decision_table_test.columns[-1]].values.tolist() filepath = '/mnt/data/uci/' + FILENAME + '/' + FILENAME + '.nominal' list_nominal = mlem2.getNominalList(filepath) list_judgeNominal = mlem2.getJudgeNominal(decision_table_test, list_nominal) # predict by LERS predictions = predictByLERS(rules, decision_table_test, list_judgeNominal)
def MLEM2_OnlyK_LERS(FILENAME, iter1, iter2, k) : print("START iter1 iter2 k : " + str(iter1) + "," + str(iter2) + "," + str(k)) # rule induction fullpath_filename = DIR_UCI+'/'+FILENAME+'/rules/'+'rules_'+str(iter1)+'-'+str(iter2)+'.pkl' rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile(fullpath_filename) else mlem2.getRulesByMLEM2(FILENAME, iter1, iter2) # rule save if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # only-k rule filter fullpath_filename = DIR_UCI+'/'+FILENAME+'/rules_onlyK/'+'rules-'+str(k)+'_'+str(iter1)+'-'+str(iter2)+'.pkl' rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile(fullpath_filename) else [r for r in rules if len(r.getSupport()) >= k] # rule save if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # test data setup filepath = DIR_UCI+'/'+FILENAME+'/'+FILENAME+'-test'+str(iter1)+'-'+str(iter2)+'.tsv' decision_table_test = mlem2.getDecisionTable(filepath) decision_table_test = decision_table_test.dropna() decision_class = decision_table_test[decision_table_test.columns[-1]].values.tolist() filepath = DIR_UCI+'/'+FILENAME+'/'+FILENAME+'.nominal' list_nominal = mlem2.getNominalList(filepath) list_judgeNominal = mlem2.getJudgeNominal(decision_table_test, list_nominal) # predict by LERS predictions = LERS.predictByLERS(rules, decision_table_test, list_judgeNominal) # 正答率を求める accuracy = accuracy_score(decision_class, predictions) #print('{FILENAME} : {iter1} {iter2}'.format(FILENAME=FILENAME,iter1=iter1,iter2=iter2)) #logging.info('MLEM2_OnlyK_LERS,{k},{FILENAME},{iter1},{iter2},{acc}'.format(FILENAME=FILENAME,k=k,iter1=iter1,iter2=iter2,acc=accuracy)) savepath = DIR_UCI+'/'+FILENAME+'/MLEM2_OnlyK_LERS.csv' with open(savepath, "a") as f : f.writelines('MLEM2_OnlyK_LERS,{k},{FILENAME},{iter1},{iter2},{acc}'.format(FILENAME=FILENAME,k=k,iter1=iter1,iter2=iter2,acc=accuracy)+"\n") #print("END iter1 iter2 k : " + str(iter1) + "," + str(iter2) + "," + str(k)) return(accuracy)
def MLEM2_delEAlphaRule_LERS(FILENAME, iter1, iter2, DELFUN, CLASS, ATTRIBUTE_VALUE, alpha): print( datetime.now().strftime("%Y/%m/%d %H:%M:%S") + " " + FILENAME + " " + str(iter1) + " " + str(iter2) + " " + DELFUN.__name__ + " " + strAttributeValue(ATTRIBUTE_VALUE) + " " + str(alpha) + " " + "START" ) # rule induction and rule save fullpath_filename = DIR_UCI + "/" + FILENAME + "/rules/" + "rules_" + str(iter1) + "-" + str(iter2) + ".pkl" rules = ( mlem2.loadPickleRules(fullpath_filename) if os.path.isfile(fullpath_filename) else mlem2.getRulesByMLEM2(FILENAME, iter1, iter2) ) if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # train data setup decision_table_train, decision_class = getData(FILENAME, iter1, iter2, T="train") list_judgeNominal = getJudgeNominal(decision_table_train, FILENAME) # alpha差別的なルールの基本条件削除 or ルールを削除 if CLASS != "ALL": rules_target = mlem2.getRulesClass(rules, CLASS) rules_nontarget = mlem2.getRulesClass(rules, CLASS, judge=False) for attr in ATTRIBUTE_VALUE: for e in ATTRIBUTE_VALUE[attr]: rules_target = DELFUN(rules_target, attr, e, decision_table_train, list_judgeNominal, alpha) rules_target.extend(rules_nontarget) rules = rules_target else: for attr in ATTRIBUTE_VALUE: for e in ATTRIBUTE_VALUE[attr]: rules = DELFUN(rules, attr, e, decision_table_train, list_judgeNominal, alpha) print( datetime.now().strftime("%Y/%m/%d %H:%M:%S") + " " + FILENAME + " " + str(iter1) + " " + str(iter2) + " " + DELFUN.__name__ + " " + strAttributeValue(ATTRIBUTE_VALUE) + " " + str(alpha) + " " + "RULES" ) # test data setup decision_table_test, decision_class = getData(FILENAME, iter1, iter2, T="test") list_judgeNominal = getJudgeNominal(decision_table_test, FILENAME) # predict by LERS predictions = LERS.predictByLERS(rules, decision_table_test, list_judgeNominal) # 正答率を求める accuracy = accuracy_score(decision_class, predictions) # rules の数を求める num = len(rules) # 各クラスのrulesの数を求める num_class = strNumClassRules(rules) # 平均の長さを求める mean_length = mlem2.getMeanLength(rules) # 平均支持度と平均確信度を求める list_judgeNominal = getJudgeNominal(decision_table_train, FILENAME) mean_support, mean_conf = LERS.getSupportConfidenceRules(rules, decision_table_train, list_judgeNominal) # AccとRecallを求める acc_recall = LERS.getAccurayRecall(rules, decision_table_train, list_judgeNominal) # ファイルにsave savepath = DIR_UCI + "/" + FILENAME + "/fairness/02_alpha_preserve/MLEM2_delEAlphaRule_LERS.csv" with open(savepath, "a") as f: f.writelines( "MLEM2_delEAlphaRule_LERS,{DELFUN},{CLASS},{FILENAME},{ATTRIBUTE_VALUE},{alpha},{iter1},{iter2},{acc},{num},{num_class},{mean_length},{mean_support},{mean_conf},{acc_recall}".format( DELFUN=DELFUN.__name__, CLASS=CLASS, FILENAME=FILENAME, ATTRIBUTE_VALUE=strAttributeValue(ATTRIBUTE_VALUE), alpha=alpha, iter1=iter1, iter2=iter2, acc=accuracy, num=num, num_class=num_class, mean_length=mean_length, mean_support=mean_support, mean_conf=mean_conf, acc_recall=strAccRecall(rules, acc_recall), ) + "\n" ) print( datetime.now().strftime("%Y/%m/%d %H:%M:%S") + " " + FILENAME + " " + str(iter1) + " " + str(iter2) + " " + DELFUN.__name__ + " " + strAttributeValue(ATTRIBUTE_VALUE) + " " + str(alpha) + " " + "END" ) return 0
pool = Pool(proc) multiargs = [] for FILENAME, iter1, iter2 in product(FILENAMES, range(1,11), range(1,11)): multiargs.append((FILENAME,iter1,iter2)) #results = pool.starmap(MLEM2_LERS, multiargs) return(results) # ======================================== # main # ======================================== if __name__ == "__main__": #FILENAMES = ['hayes-roth'] #rules = rules = mlem2.getRulesByMLEM2('hayes-roth', 2, 2) # シングルプロセスで実行 #for FILENAME, iter1, iter2 in product(FILENAMES, range(1,11), range(1,11)): # print('{filename} {i1} {i2}'.format(filename=FILENAME, i1=iter1, i2=iter2)) # print(MLEM2_LERS(FILENAME, iter1, iter2)) # 並列実行 proc=4 freeze_support() results = multi_main(proc, FILENAMES) # 平均と分散 print(getEvalMeanVar(results))
rules_new.extend(target_rules) return(rules_new) # ======================================== # main # ======================================== if __name__ == "__main__": FILENAME = 'hayes-roth' iter1 = 5 iter2 = 4 rules = mlem2.getRulesByMLEM2(FILENAME, iter1, iter2) filepath = '/mnt/data/uci/'+FILENAME+'/'+FILENAME+'-train'+str(iter1)+'-'+str(iter2)+'.tsv' decision_table = mlem2.getDecisionTable(filepath) colnames = mlem2.getColNames(decision_table) filepath = '/mnt/data/uci/'+FILENAME+'/'+FILENAME+'.nominal' list_nominal = mlem2.getNominalList(filepath) list_judgeNominal = mlem2.getJudgeNominal(decision_table, list_nominal) # ルールクラスタリング #rules_new = getRuleClusteringBySimilarity(rules, colnames, list_judgeNominal, k=3) #rules_new = getRuleClusteringByRandom(rules, k=3) #rules_new = getRuleClusteringBySameCondition(rules, k=3) #rules_new = getRuleClusteringByConsistentSimilarity(rules, colnames, list_judgeNominal, k=3) #rules_new = getRuleClusteringByConsistentSimilarityExceptMRule(rules, colnames, list_judgeNominal, k=3, m=3)
def MLEM2_RuleClusteringBySameCondition_LERS(FILENAME, iter1, iter2, k) : # rule induction fullpath_filename = DIR_UCI+'/'+FILENAME+'/rules/'+'rules_'+str(iter1)+'-'+str(iter2)+'.pkl' rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile(fullpath_filename) else mlem2.getRulesByMLEM2(FILENAME, iter1, iter2) # rule save if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # rule clustering fullpath_filename = DIR_UCI+'/'+FILENAME+'/rules_cluster_same_condition/'+'rules-'+str(k)+'_'+str(iter1)+'-'+str(iter2)+'.pkl' rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile(fullpath_filename) else clustering.getRuleClusteringBySameCondition(rules, k=k) # rule save if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # test data setup filepath = DIR_UCI+'/'+FILENAME+'/'+FILENAME+'-test'+str(iter1)+'-'+str(iter2)+'.tsv' decision_table_test = mlem2.getDecisionTable(filepath) decision_table_test = decision_table_test.dropna() decision_class = decision_table_test[decision_table_test.columns[-1]].values.tolist() filepath = DIR_UCI+'/'+FILENAME+'/'+FILENAME+'.nominal' list_nominal = mlem2.getNominalList(filepath) list_judgeNominal = mlem2.getJudgeNominal(decision_table_test, list_nominal) # predict by LERS predictions = LERS.predictByLERS(rules, decision_table_test, list_judgeNominal) # 正答率を求める accuracy = accuracy_score(decision_class, predictions) #logging.info('MLEM2_RuleClusteringBySameCondition_LERS,{k},{FILENAME},{iter1},{iter2},{acc}'.format(FILENAME=FILENAME,k=k,iter1=iter1,iter2=iter2,acc=accuracy)) savepath = DIR_UCI+'/'+FILENAME+'/MLEM2_RuleClusteringBySameCondition_LERS.csv' with open(savepath, "a") as f : f.writelines('MLEM2_RuleClusteringBySameCondition_LERS,{k},{FILENAME},{iter1},{iter2},{acc}'.format(FILENAME=FILENAME,k=k,iter1=iter1,iter2=iter2,acc=accuracy)+"\n") return(accuracy)