def getData(FILENAME, iter1, iter2, T="test"): filepath = DIR_UCI + '/' + FILENAME + '/' + FILENAME + '-' + T + str( iter1) + '-' + str(iter2) + '.tsv' decision_table = mlem2.getDecisionTable(filepath) decision_table = decision_table.dropna() decision_class = decision_table[decision_table.columns[-1]].values.tolist() return (decision_table, decision_class)
def Apriori_LERS(FILENAME, iter1, iter2, minsup, minconf) : # rule induction fullpath_filename = '/data/uci/'+FILENAME+'/apriori/'+'rules_'+str(iter1)+'-'+str(iter2)+'-'+str(minsup)+'-'+str(minconf)+'.pkl' rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile(fullpath_filename) else apriori.getRulesByApriori(FILENAME, iter1, iter2, minsup, minconf) # rule save if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # test data setup filepath = '/data/uci/'+FILENAME+'/'+FILENAME+'-test'+str(iter1)+'-'+str(iter2)+'.tsv' decision_table_test = mlem2.getDecisionTable(filepath) decision_table_test = decision_table_test.dropna() decision_class = decision_table_test[decision_table_test.columns[-1]].values.tolist() filepath = '/data/uci/'+FILENAME+'/'+FILENAME+'.nominal' list_nominal = mlem2.getNominalList(filepath) list_judgeNominal = mlem2.getJudgeNominal(decision_table_test, list_nominal) # predict by LERS predictions = LERS.predictByLERS(rules, decision_table_test, list_judgeNominal) # 正答率を求める accuracy = accuracy_score(list(map(str,decision_class)), predictions) #print('{FILENAME} : {iter1} {iter2}'.format(FILENAME=FILENAME,iter1=iter1,iter2=iter2)) logging.basicConfig(filename=os.path.dirname(os.path.abspath("__file__"))+'/'+FILENAME+'.log',format='%(asctime)s,%(message)s',level=logging.DEBUG) logging.info('Apriori_LERS,{FILENAME},{iter1},{iter2},{acc},{minsup},{minconf}'.format(FILENAME=FILENAME,iter1=iter1,iter2=iter2,acc=accuracy,minsup=minsup,minconf=minconf)) return(accuracy)
def MLEM2_LERS(FILENAME, iter1, iter2) : # rule induction fullpath_filename = DIR_UCI+'/'+FILENAME+'/rules/'+'rules_'+str(iter1)+'-'+str(iter2)+'.pkl' rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile(fullpath_filename) else mlem2.getRulesByMLEM2(FILENAME, iter1, iter2) # rule save if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # test data setup filepath = DIR_UCI+'/'+FILENAME+'/'+FILENAME+'-test'+str(iter1)+'-'+str(iter2)+'.tsv' decision_table_test = mlem2.getDecisionTable(filepath) decision_table_test = decision_table_test.dropna() decision_class = decision_table_test[decision_table_test.columns[-1]].values.tolist() filepath = DIR_UCI+'/'+FILENAME+'/'+FILENAME+'.nominal' list_nominal = mlem2.getNominalList(filepath) list_judgeNominal = mlem2.getJudgeNominal(decision_table_test, list_nominal) # predict by LERS predictions = LERS.predictByLERS(rules, decision_table_test, list_judgeNominal) # 正答率を求める accuracy = accuracy_score(decision_class, predictions) #print('{FILENAME} : {iter1} {iter2}'.format(FILENAME=FILENAME,iter1=iter1,iter2=iter2)) #logging.info('MLEM2_LERS,1,{FILENAME},{iter1},{iter2},{acc}'.format(FILENAME=FILENAME,iter1=iter1,iter2=iter2,acc=accuracy)) savepath = DIR_UCI+'/'+FILENAME+'/MLEM2_LERS.csv' with open(savepath, "a") as f : f.writelines('MLEM2_LERS,1,{FILENAME},{iter1},{iter2},{acc}'.format(FILENAME=FILENAME,iter1=iter1,iter2=iter2,acc=accuracy)+"\n") return(accuracy)
def MLEM2_RuleClusteringByConsistentTimesSimExceptMRule_LERS(FILENAME, iter1, iter2, k, m) : # rule induction fullpath_filename = DIR_UCI+'/'+FILENAME+'/rules/'+'rules_'+str(iter1)+'-'+str(iter2)+'.pkl' rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile(fullpath_filename) else mlem2.getRulesByMLEM2(FILENAME, iter1, iter2) # rule save if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # rule clustering filepath = DIR_UCI+'/'+FILENAME+'/'+FILENAME+'-train'+str(iter1)+'-'+str(iter2)+'.tsv' decision_table = mlem2.getDecisionTable(filepath) colnames = mlem2.getColNames(decision_table) filepath = DIR_UCI+'/'+FILENAME+'/'+FILENAME+'.nominal' list_nominal = mlem2.getNominalList(filepath) list_judgeNominal = mlem2.getJudgeNominal(decision_table, list_nominal) fullpath_filename = DIR_UCI+'/'+FILENAME+'/rules_cluster_consistent_times_sim_except_mrule/'+'rules-'+str(k)+'_'+str(iter1)+'-'+str(iter2)+'.pkl' rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile(fullpath_filename) else clustering.getRuleClusteringByConsistentTimesSimilarityExceptMRule(rules, colnames, list_judgeNominal, k=k, m=m) # rule save if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # test data setup filepath = DIR_UCI+'/'+FILENAME+'/'+FILENAME+'-test'+str(iter1)+'-'+str(iter2)+'.tsv' decision_table_test = mlem2.getDecisionTable(filepath) decision_table_test = decision_table_test.dropna() decision_class = decision_table_test[decision_table_test.columns[-1]].values.tolist() filepath = DIR_UCI+'/'+FILENAME+'/'+FILENAME+'.nominal' list_nominal = mlem2.getNominalList(filepath) list_judgeNominal = mlem2.getJudgeNominal(decision_table_test, list_nominal) # predict by LERS predictions = LERS.predictByLERS(rules, decision_table_test, list_judgeNominal) # 正答率を求める accuracy = accuracy_score(decision_class, predictions) #print('{FILENAME} : {iter1} {iter2}'.format(FILENAME=FILENAME,iter1=iter1,iter2=iter2)) #logging.info('MLEM2_RuleClusteringByConsistentSimExceptMRule_LERS,{k},{FILENAME},{iter1},{iter2},{acc}'.format(FILENAME=FILENAME,k=k,iter1=iter1,iter2=iter2,acc=accuracy)) savepath = DIR_UCI+'/'+FILENAME+'/MLEM2_RuleClusteringByConsistentTimesSimExceptMRule_LERS.csv' with open(savepath, "a") as f : f.writelines('MLEM2_RuleClusteringByConsistentTimesSimExceptMRule_LERS,{k},{FILENAME},{iter1},{iter2},{acc}'.format(FILENAME=FILENAME,k=k,iter1=iter1,iter2=iter2,acc=accuracy)+"\n") return(accuracy)
def MLEM2_OnlyK_LERS(FILENAME, iter1, iter2, k): print("START iter1 iter2 k : " + str(iter1) + "," + str(iter2) + "," + str(k)) # rule induction fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules/' + 'rules_' + str( iter1) + '-' + str(iter2) + '.pkl' rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile( fullpath_filename) else mlem2.getRulesByMLEM2(FILENAME, iter1, iter2) # rule save if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # only-k rule filter fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules_onlyK/' + 'rules-' + str( k) + '_' + str(iter1) + '-' + str(iter2) + '.pkl' rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile( fullpath_filename) else [r for r in rules if len(r.getSupport()) >= k] # rule save if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # test data setup filepath = DIR_UCI + '/' + FILENAME + '/' + FILENAME + '-test' + str( iter1) + '-' + str(iter2) + '.tsv' decision_table_test = mlem2.getDecisionTable(filepath) decision_table_test = decision_table_test.dropna() decision_class = decision_table_test[ decision_table_test.columns[-1]].values.tolist() filepath = DIR_UCI + '/' + FILENAME + '/' + FILENAME + '.nominal' list_nominal = mlem2.getNominalList(filepath) list_judgeNominal = mlem2.getJudgeNominal(decision_table_test, list_nominal) # predict by LERS predictions = LERS.predictByLERS(rules, decision_table_test, list_judgeNominal) # 正答率を求める accuracy = accuracy_score(decision_class, predictions) #print('{FILENAME} : {iter1} {iter2}'.format(FILENAME=FILENAME,iter1=iter1,iter2=iter2)) #logging.info('MLEM2_OnlyK_LERS,{k},{FILENAME},{iter1},{iter2},{acc}'.format(FILENAME=FILENAME,k=k,iter1=iter1,iter2=iter2,acc=accuracy)) savepath = DIR_UCI + '/' + FILENAME + '/MLEM2_OnlyK_LERS.csv' with open(savepath, "a") as f: f.writelines( 'MLEM2_OnlyK_LERS,{k},{FILENAME},{iter1},{iter2},{acc}'.format( FILENAME=FILENAME, k=k, iter1=iter1, iter2=iter2, acc=accuracy) + "\n") #print("END iter1 iter2 k : " + str(iter1) + "," + str(iter2) + "," + str(k)) return (accuracy)
def MLEM2_RuleClusteringByConsistentExceptMRule_STAT(FILENAME, iter1, iter2, k, m): # rule induction fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules/' + 'rules_' + str( iter1) + '-' + str(iter2) + '.pkl' rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile( fullpath_filename) else mlem2.getRulesByMLEM2(FILENAME, iter1, iter2) # rule save if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # rule clustering filepath = DIR_UCI + '/' + FILENAME + '/' + FILENAME + '-train' + str( iter1) + '-' + str(iter2) + '.tsv' decision_table = mlem2.getDecisionTable(filepath) colnames = mlem2.getColNames(decision_table) filepath = DIR_UCI + '/' + FILENAME + '/' + FILENAME + '.nominal' list_nominal = mlem2.getNominalList(filepath) list_judgeNominal = mlem2.getJudgeNominal(decision_table, list_nominal) fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules_cluster_consistent_except_mrule/' + 'rules-' + str( k) + '_' + str(iter1) + '-' + str(iter2) + '.pkl' rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile( fullpath_filename ) else clustering.getRuleClusteringByConsistentExceptMRule( rules, colnames, list_judgeNominal, k=k, m=m) # rule save if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # rules の数を求める num = len(rules) # 平均の長さを求める leng = mlem2.getMeanLength(rules) # 平均支持度を求める support = mlem2.getMeanSupport(rules) # ファイルにsave savepath = DIR_UCI + '/' + FILENAME + '/MLEM2_RuleClusteringByConsistentExceptMRule_STAT.csv' with open(savepath, "a") as f: f.writelines( 'MLEM2_RuleClusteringByConsistentExceptMRule_STAT,{k},{FILENAME},{iter1},{iter2},{num},{leng},{support}' .format(FILENAME=FILENAME, k=k, iter1=iter1, iter2=iter2, num=num, leng=leng, support=support) + "\n") return (0)
def MLEM2_RuleClusteringBySameCondition_LERS(FILENAME, iter1, iter2, k): # rule induction fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules/' + 'rules_' + str( iter1) + '-' + str(iter2) + '.pkl' rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile( fullpath_filename) else mlem2.getRulesByMLEM2(FILENAME, iter1, iter2) # rule save if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # rule clustering fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules_cluster_same_condition/' + 'rules-' + str( k) + '_' + str(iter1) + '-' + str(iter2) + '.pkl' rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile( fullpath_filename) else clustering.getRuleClusteringBySameCondition( rules, k=k) # rule save if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # test data setup filepath = DIR_UCI + '/' + FILENAME + '/' + FILENAME + '-test' + str( iter1) + '-' + str(iter2) + '.tsv' decision_table_test = mlem2.getDecisionTable(filepath) decision_table_test = decision_table_test.dropna() decision_class = decision_table_test[ decision_table_test.columns[-1]].values.tolist() filepath = DIR_UCI + '/' + FILENAME + '/' + FILENAME + '.nominal' list_nominal = mlem2.getNominalList(filepath) list_judgeNominal = mlem2.getJudgeNominal(decision_table_test, list_nominal) # predict by LERS predictions = LERS.predictByLERS(rules, decision_table_test, list_judgeNominal) # 正答率を求める accuracy = accuracy_score(decision_class, predictions) #logging.info('MLEM2_RuleClusteringBySameCondition_LERS,{k},{FILENAME},{iter1},{iter2},{acc}'.format(FILENAME=FILENAME,k=k,iter1=iter1,iter2=iter2,acc=accuracy)) savepath = DIR_UCI + '/' + FILENAME + '/MLEM2_RuleClusteringBySameCondition_LERS.csv' with open(savepath, "a") as f: f.writelines( 'MLEM2_RuleClusteringBySameCondition_LERS,{k},{FILENAME},{iter1},{iter2},{acc}' .format( FILENAME=FILENAME, k=k, iter1=iter1, iter2=iter2, acc=accuracy) + "\n") return (accuracy)
def Apriori_LERS(FILENAME, iter1, iter2, minsup, minconf): # rule induction fullpath_filename = '/data/uci/' + FILENAME + '/apriori/' + 'rules_' + str( iter1) + '-' + str(iter2) + '-' + str(minsup) + '-' + str( minconf) + '.pkl' rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile( fullpath_filename) else apriori.getRulesByApriori( FILENAME, iter1, iter2, minsup, minconf) # rule save if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # test data setup filepath = '/data/uci/' + FILENAME + '/' + FILENAME + '-test' + str( iter1) + '-' + str(iter2) + '.tsv' decision_table_test = mlem2.getDecisionTable(filepath) decision_table_test = decision_table_test.dropna() decision_class = decision_table_test[ decision_table_test.columns[-1]].values.tolist() filepath = '/data/uci/' + FILENAME + '/' + FILENAME + '.nominal' list_nominal = mlem2.getNominalList(filepath) list_judgeNominal = mlem2.getJudgeNominal(decision_table_test, list_nominal) # predict by LERS predictions = LERS.predictByLERS(rules, decision_table_test, list_judgeNominal) # 正答率を求める accuracy = accuracy_score(list(map(str, decision_class)), predictions) #print('{FILENAME} : {iter1} {iter2}'.format(FILENAME=FILENAME,iter1=iter1,iter2=iter2)) logging.basicConfig(filename=os.path.dirname(os.path.abspath("__file__")) + '/' + FILENAME + '.log', format='%(asctime)s,%(message)s', level=logging.DEBUG) logging.info( 'Apriori_LERS,{FILENAME},{iter1},{iter2},{acc},{minsup},{minconf}'. format(FILENAME=FILENAME, iter1=iter1, iter2=iter2, acc=accuracy, minsup=minsup, minconf=minconf)) return (accuracy)
def MLEM2_RuleClusteringByConsistentSim_Identified(FILENAME, iter1, iter2, k, p): # rule induction fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules/' + 'rules_' + str( iter1) + '-' + str(iter2) + '.pkl' rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile( fullpath_filename) else mlem2.getRulesByMLEM2(FILENAME, iter1, iter2) # rule save if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # rule clustering filepath = DIR_UCI + '/' + FILENAME + '/' + FILENAME + '-train' + str( iter1) + '-' + str(iter2) + '.tsv' decision_table = mlem2.getDecisionTable(filepath) colnames = mlem2.getColNames(decision_table) filepath = DIR_UCI + '/' + FILENAME + '/' + FILENAME + '.nominal' list_nominal = mlem2.getNominalList(filepath) list_judgeNominal = mlem2.getJudgeNominal(decision_table, list_nominal) fullpath_filename = DIR_UCI + '/' + FILENAME + '/rules_cluster_consistent_sim/' + 'rules-' + str( k) + '_' + str(iter1) + '-' + str(iter2) + '.pkl' rules = mlem2.loadPickleRules(fullpath_filename) if os.path.isfile( fullpath_filename ) else clustering.getRuleClusteringByConsistentSimilarity( rules, colnames, list_judgeNominal, k=k) # rule save if not os.path.isfile(fullpath_filename): mlem2.savePickleRules(rules, fullpath_filename) # PerIdentifiedClass を求める ans = mlem2.getPerIdentifiedClass(rules, p) # save savepath = DIR_UCI + '/' + FILENAME + '/Identify_MLEM2_RuleClusteringByConsistentSim.csv' with open(savepath, "a") as f: f.writelines( 'Identify_MLEM2_RuleClusteringByConsistentSim,{k},{p},{FILENAME},{iter1},{iter2},{ans}' .format( FILENAME=FILENAME, k=k, p=p, iter1=iter1, iter2=iter2, ans=ans) + "\n") return (ans)
return(rules_new) # ======================================== # main # ======================================== if __name__ == "__main__": FILENAME = 'hayes-roth' iter1 = 5 iter2 = 4 rules = mlem2.getRulesByMLEM2(FILENAME, iter1, iter2) filepath = '/mnt/data/uci/'+FILENAME+'/'+FILENAME+'-train'+str(iter1)+'-'+str(iter2)+'.tsv' decision_table = mlem2.getDecisionTable(filepath) colnames = mlem2.getColNames(decision_table) filepath = '/mnt/data/uci/'+FILENAME+'/'+FILENAME+'.nominal' list_nominal = mlem2.getNominalList(filepath) list_judgeNominal = mlem2.getJudgeNominal(decision_table, list_nominal) # ルールクラスタリング #rules_new = getRuleClusteringBySimilarity(rules, colnames, list_judgeNominal, k=3) #rules_new = getRuleClusteringByRandom(rules, k=3) #rules_new = getRuleClusteringBySameCondition(rules, k=3) #rules_new = getRuleClusteringByConsistentSimilarity(rules, colnames, list_judgeNominal, k=3) #rules_new = getRuleClusteringByConsistentSimilarityExceptMRule(rules, colnames, list_judgeNominal, k=3, m=3) #rules_new = getRuleClusteringByConsistentTimesSimilarityExceptMRule(rules, colnames, list_judgeNominal, k=3, m=3) rules_new = getRuleClusteringBySimilarityExceptMRule(rules, colnames, list_judgeNominal, k=3, m=3) rules_new = getRuleClusteringByConsistentExceptMRule(rules, colnames, list_judgeNominal, k=3, m=3)
def getRulesByApriori(FILENAME, iter1, iter2, minsup, minconf) : # read data filepath = '/data/uci/'+FILENAME+'/'+FILENAME+'-train'+str(iter1)+'-'+str(iter2)+'.tsv' decision_table = mlem2.getDecisionTable(filepath) decision_table = decision_table.dropna() decision_table.index = range(decision_table.shape[0]) # AttributeValuePair attributeValuePair = getAttributeValuePairs(decision_table) # 頻出アイテム集合初期化 dict_frequent_itemset = defaultdict(list) # 1 frequent itemset frequent_itemset = list() frequent_itemset = [{avp} for avp in attributeValuePair if len(avp.getSupport()) >= minsup] dict_frequent_itemset[1] = frequent_itemset # 2 ~ frequent itemset for c in range(2,decision_table.shape[1]+1) : #print(c) # 頻出アイテム集合から c組み合わせしたものを候補アイテム集合とする #candidate_itemset = list(combinations(frequent_itemset, c)) list_candidate_item = [] for fi1 in range(len(dict_frequent_itemset[c-1])) : for fi2 in range(fi1+1, len(dict_frequent_itemset[c-1])): candidate_item = dict_frequent_itemset[c-1][fi1].union(dict_frequent_itemset[c-1][fi2]) list_candidate_item.append(candidate_item) #print(fi1,fi2) list_candidate_item = [item for item in list_candidate_item if len(item) == c] # 候補アイテム集合から、1つ前の頻出アイテム集合にあったもので構成されているかをチェックする -> 不要 #list_candidate_item = [ci for ci in list_candidate_item if isExistFrequentItemSet(ci, dict_frequent_itemset[c-1])] # 候補アイテム集合からminsupを満たすものを次の頻出アイテム集合とする tmp_frequent_itemset = [ci for ci in list_candidate_item if len(getAllSupport(ci)) >= minsup] # 頻出アイテム集合に追加する dict_frequent_itemset[c] = tmp_frequent_itemset print('{iter1},{iter2},frequent item done'.format(iter1=iter1, iter2=iter2)) # classのアイテムがある頻出パターンだけ取り出す list_target = [] for c in range(2,decision_table.shape[1]+1) : for items in dict_frequent_itemset[c] : list_items = list(items) list_idx = [item.getIdx() for item in list_items] if decision_table.shape[1] in list_idx: list_target.append(list_items) else: pass print(c) # ルールの数 print(len(list_target)) # minconf より大きな頻出パターンだけ取り出す list_target = [items for items in list_target if getConfidence(items, decision_table) >= minconf] # rulesを作成する rules = [createRuleFromItems(items, decision_table) for items in list_target] # END return(rules)
# ======================================== # main # ======================================== if __name__ == "__main__": FILENAME = 'hayes-roth' iter1 = 4 iter2 = 5 minsup = 10 minconf = 1.0 rules = getRulesByApriori(FILENAME, iter1, iter2, minsup, minconf) # test data setup filepath = '/data/uci/'+FILENAME+'/'+FILENAME+'-test'+str(iter1)+'-'+str(iter2)+'.tsv' decision_table_test = mlem2.getDecisionTable(filepath) decision_table_test = decision_table_test.dropna() decision_class = decision_table_test[decision_table_test.columns[-1]].values.tolist() filepath = '/data/uci/'+FILENAME+'/'+FILENAME+'.nominal' list_nominal = mlem2.getNominalList(filepath) list_judgeNominal = mlem2.getJudgeNominal(decision_table_test, list_nominal) # predict by LERS predictions = LERS.predictByLERS(rules, decision_table_test, list_judgeNominal) # 正答率を求める accuracy = accuracy_score(list(map(str,decision_class)), predictions) print(accuracy)
def getData(FILENAME, iter1, iter2, T="test"): filepath = DIR_UCI + "/" + FILENAME + "/" + FILENAME + "-" + T + str(iter1) + "-" + str(iter2) + ".tsv" decision_table = mlem2.getDecisionTable(filepath) decision_table = decision_table.dropna() decision_class = decision_table[decision_table.columns[-1]].values.tolist() return (decision_table, decision_class)