def strNumClassRules(rules): list_string = [] for consequent in mlem2.getEstimatedClass(rules): rules_consequent = mlem2.getRulesClass(rules, consequent) num = len(rules_consequent) list_string.append(str(num)) return ",".join(list_string)
def strNumClassRules(rules): list_string = [] for consequent in mlem2.getEstimatedClass(rules): rules_consequent = mlem2.getRulesClass(rules, consequent) num = len(rules_consequent) list_string.append(str(num)) return (",".join(list_string))
def getRuleClusteringBySimilarity(rules, colnames, list_judgeNominal, k=3) : rules_new = list() # 結論部別 for cls in mlem2.getEstimatedClass(rules) : target_rules = [r for r in rules if r.getConsequent() == cls] # ルール群のサポート値の最小値がk以下のルールがある内は繰り返す min_support = mlem2.getMinSupport(target_rules) while min_support < k : # target_rules が 1つなら if len(target_rules) == 1 : target_rules.pop() break # merge対象ルールを見つける merged_rules = [r for r in target_rules if len(r.getSupport()) == min_support] merged_rule = merged_rules[0] target_rules.remove(merged_rule) # merged_rule との類似度を求める list_similarities = [getSimilarity2(merged_rule, r, colnames, list_judgeNominal) for r in target_rules] # 最も類似度が大きいルールを見つける max_similarity = np.max(list_similarities) max_rules = [target_rules[i] for i,s in enumerate(list_similarities) if s == max_similarity] #print("First : " + str(len(max_rules))) # 一意でなければ、条件部を構成する属性数で判断 if len(max_rules) > 1 : list_count_same_conditions = [getCountSameCondition(merged_rule, r) for r in max_rules] max_count = np.max(list_count_same_conditions) max_rules = [max_rules[i] for i,c in enumerate(list_count_same_conditions) if c == max_count] #print("Second : " + str(len(max_rules))) # 一意でなければ、supportの小ささで判断 if len(max_rules) > 1 : list_supports = [len(r.getSupport()) for r in max_rules] min_support = np.min(list_supports) max_rules = [max_rules[i] for i,s in enumerate(list_supports) if s == min_support] #print("Third : " + str(len(max_rules))) # 先頭のルールでmerge merge_rule = mergeRule(merged_rule, max_rules[0]) target_rules.remove(max_rules[0]) # 新しいルールを追加 target_rules.append(merge_rule) # min_support 更新 min_support = mlem2.getMinSupport(target_rules) print(min_support) rules_new.extend(target_rules) return(rules_new)
def getMRulesFUN(list_rules, attr, v, target_cls, DELFUN, m = 0) : num_target_cls, num_other_cls, list_num_other_cls = 0, 0, [] classes = mlem2.getEstimatedClass(list_rules) for cls in classes : if cls == target_cls : num_target_cls = getNumRulesClassIncludeE(list_rules, attr, v, cls) else : list_num_other_cls.append(getNumRulesClassIncludeE(list_rules, attr, v, cls)) num_other_cls = sum(list_num_other_cls) / len(list_num_other_cls) #複数クラスの場合を考慮 if (num_target_cls / (num_target_cls + num_other_cls)) > m : #m保護なら return(list_rules) else : return(DELFUN(list_rules, attr, v))
def getRuleClusteringBySameCondition(rules, k=3): rules_new = list() # 結論部別 for cls in mlem2.getEstimatedClass(rules): target_rules = [r for r in rules if r.getConsequent() == cls] # ルール群のサポート値の最小値がk以下のルールがある内は繰り返す min_support = mlem2.getMinSupport(target_rules) while min_support < k: # target_rules が 1つなら if len(target_rules) == 1: target_rules.pop() break # merge対象ルールを見つける merged_rules = [ r for r in target_rules if len(r.getSupport()) == min_support ] merged_rule = merged_rules[0] target_rules.remove(merged_rule) # 同じ条件属性の数でマージするルールを決定 list_count_same_conditions = [ getCountSameCondition(merged_rule, r) for r in target_rules ] max_count = np.max(list_count_same_conditions) max_rules = [ target_rules[i] for i, c in enumerate(list_count_same_conditions) if c == max_count ] # 先頭のルールでmerge merge_rule = mergeRule(merged_rule, max_rules[0]) target_rules.remove(max_rules[0]) # 新しいルールを追加 target_rules.append(merge_rule) # min_support 更新 min_support = mlem2.getMinSupport(target_rules) print(min_support) rules_new.extend(target_rules) return (rules_new)
def getAccurayRecall(rules, decision_table, list_judgeNominal): result = [] consequents = mlem2.getEstimatedClass(rules) for consequent in consequents : target_rules = mlem2.getRulesClass(rules, consequent) target_objects = [] for rule in target_rules : match_objects = decision_table.apply(lambda obj: isExplainRule(obj, rule, list_judgeNominal), axis=1) index_objects = decision_table[match_objects].index.tolist() target_objects.extend(index_objects) target_objects = list(set(target_objects)) estimated_classes = decision_table[decision_table.columns[-1]].ix[target_objects] accuracy = sum(estimated_classes == consequent)/ len(target_objects) recall = sum(estimated_classes == consequent) / sum(decision_table[decision_table.columns[-1]] == consequent) result.append((accuracy, recall)) return(result)
def getAccurayRecall(rules, decision_table, list_judgeNominal): result = [] consequents = mlem2.getEstimatedClass(rules) for consequent in consequents: target_rules = mlem2.getRulesClass(rules, consequent) target_objects = [] for rule in target_rules: match_objects = decision_table.apply( lambda obj: isExplainRule(obj, rule, list_judgeNominal), axis=1) index_objects = decision_table[match_objects].index.tolist() target_objects.extend(index_objects) target_objects = list(set(target_objects)) estimated_classes = decision_table[ decision_table.columns[-1]].ix[target_objects] accuracy = sum(estimated_classes == consequent) / len(target_objects) recall = sum(estimated_classes == consequent) / sum( decision_table[decision_table.columns[-1]] == consequent) result.append((accuracy, recall)) return (result)
def getRuleClusteringByRandom(rules, k=3): rules_new = list() # 結論部別 for cls in mlem2.getEstimatedClass(rules): target_rules = [r for r in rules if r.getConsequent() == cls] # ルール群のサポート値の最小値がk以下のルールがある内は繰り返す min_support = mlem2.getMinSupport(target_rules) while min_support < k: # target_rules が 1つなら if len(target_rules) == 1: target_rules.pop() break # merge対象ルールを見つける merged_rules = [ r for r in target_rules if len(r.getSupport()) == min_support ] merged_rule = merged_rules[0] target_rules.remove(merged_rule) # ランダムにもう一つのルールを求める random_rule = choiceRandomRule(target_rules) # 先頭のルールでmerge merge_rule = mergeRule(merged_rule, random_rule) target_rules.remove(random_rule) # 新しいルールを追加 target_rules.append(merge_rule) # min_support 更新 min_support = mlem2.getMinSupport(target_rules) print(min_support) rules_new.extend(target_rules) return (rules_new)
def getRuleClusteringBySameCondition(rules, k=3) : rules_new = list() # 結論部別 for cls in mlem2.getEstimatedClass(rules) : target_rules = [r for r in rules if r.getConsequent() == cls] # ルール群のサポート値の最小値がk以下のルールがある内は繰り返す min_support = mlem2.getMinSupport(target_rules) while min_support < k : # target_rules が 1つなら if len(target_rules) == 1 : target_rules.pop() break # merge対象ルールを見つける merged_rules = [r for r in target_rules if len(r.getSupport()) == min_support] merged_rule = merged_rules[0] target_rules.remove(merged_rule) # 同じ条件属性の数でマージするルールを決定 list_count_same_conditions = [getCountSameCondition(merged_rule, r) for r in target_rules] max_count = np.max(list_count_same_conditions) max_rules = [target_rules[i] for i,c in enumerate(list_count_same_conditions) if c == max_count] # 先頭のルールでmerge merge_rule = mergeRule(merged_rule, max_rules[0]) target_rules.remove(max_rules[0]) # 新しいルールを追加 target_rules.append(merge_rule) # min_support 更新 min_support = mlem2.getMinSupport(target_rules) print(min_support) rules_new.extend(target_rules) return(rules_new)
def getRuleClusteringByRandom(rules, k=3) : rules_new = list() # 結論部別 for cls in mlem2.getEstimatedClass(rules) : target_rules = [r for r in rules if r.getConsequent() == cls] # ルール群のサポート値の最小値がk以下のルールがある内は繰り返す min_support = mlem2.getMinSupport(target_rules) while min_support < k : # target_rules が 1つなら if len(target_rules) == 1 : target_rules.pop() break # merge対象ルールを見つける merged_rules = [r for r in target_rules if len(r.getSupport()) == min_support] merged_rule = merged_rules[0] target_rules.remove(merged_rule) # ランダムにもう一つのルールを求める random_rule = choiceRandomRule(target_rules) # 先頭のルールでmerge merge_rule = mergeRule(merged_rule, random_rule) target_rules.remove(random_rule) # 新しいルールを追加 target_rules.append(merge_rule) # min_support 更新 min_support = mlem2.getMinSupport(target_rules) print(min_support) rules_new.extend(target_rules) return(rules_new)
def getRuleClusteringBySimilarity(rules, colnames, list_judgeNominal, k=3): rules_new = list() # 結論部別 for cls in mlem2.getEstimatedClass(rules): target_rules = [r for r in rules if r.getConsequent() == cls] # ルール群のサポート値の最小値がk以下のルールがある内は繰り返す min_support = mlem2.getMinSupport(target_rules) while min_support < k: # target_rules が 1つなら if len(target_rules) == 1: target_rules.pop() break # merge対象ルールを見つける merged_rules = [ r for r in target_rules if len(r.getSupport()) == min_support ] merged_rule = merged_rules[0] target_rules.remove(merged_rule) # merged_rule との類似度を求める list_similarities = [ getSimilarity2(merged_rule, r, colnames, list_judgeNominal) for r in target_rules ] # 最も類似度が大きいルールを見つける max_similarity = np.max(list_similarities) max_rules = [ target_rules[i] for i, s in enumerate(list_similarities) if s == max_similarity ] #print("First : " + str(len(max_rules))) # 一意でなければ、条件部を構成する属性数で判断 if len(max_rules) > 1: list_count_same_conditions = [ getCountSameCondition(merged_rule, r) for r in max_rules ] max_count = np.max(list_count_same_conditions) max_rules = [ max_rules[i] for i, c in enumerate(list_count_same_conditions) if c == max_count ] #print("Second : " + str(len(max_rules))) # 一意でなければ、supportの小ささで判断 if len(max_rules) > 1: list_supports = [len(r.getSupport()) for r in max_rules] min_support = np.min(list_supports) max_rules = [ max_rules[i] for i, s in enumerate(list_supports) if s == min_support ] #print("Third : " + str(len(max_rules))) # 先頭のルールでmerge merge_rule = mergeRule(merged_rule, max_rules[0]) target_rules.remove(max_rules[0]) # 新しいルールを追加 target_rules.append(merge_rule) # min_support 更新 min_support = mlem2.getMinSupport(target_rules) print(min_support) rules_new.extend(target_rules) return (rules_new)
def getRuleClusteringByConsistentExceptMRule(rules, colnames, list_judgeNominal, k=3, m=3) : rules_new = list() # 結論部別 for cls in mlem2.getEstimatedClass(rules) : cls_rules = [r for r in rules if r.getConsequent() == cls] # m未満のルールとm以上のルールに分ける target_rules = [r for r in cls_rules if len(r.getSupport()) < m] no_target_rules = [r for r in cls_rules if r not in target_rules] # ルール群のサポート値の最小値がk以下のルールがある内は繰り返す min_support = mlem2.getMinSupport(target_rules) if target_rules else 0 while min_support < k and target_rules: # target_rules が 1つならそれを削除して繰り返しを抜ける if len(target_rules) == 1 : target_rules.pop() break # merge対象ルールを見つける merged_rules = [r for r in target_rules if len(r.getSupport()) == min_support] merged_rule = merged_rules[0] target_rules.remove(merged_rule) # 斜め発生度合いを計算 list_inconsistency = [getElementDiscernibieRule(r, merged_rule) for r in target_rules] # もっとも斜めが発生しないルールに絞る max_inconsistency = np.max(list_inconsistency) max_rules = [target_rules[i] for i,c in enumerate(list_inconsistency) if c == max_inconsistency] # 一意でなければ、条件部を構成する属性数で判断 if len(max_rules) > 1 : list_count_same_conditions = [getCountSameCondition(merged_rule, r) for r in max_rules] max_count = np.max(list_count_same_conditions) max_rules = [max_rules[i] for i,c in enumerate(list_count_same_conditions) if c == max_count] #print("Second : " + str(len(max_rules))) # 一意でなければ、supportの小ささで判断 if len(max_rules) > 1 : list_supports = [len(r.getSupport()) for r in max_rules] min_support = np.min(list_supports) max_rules = [max_rules[i] for i,s in enumerate(list_supports) if s == min_support] #print("Third : " + str(len(max_rules))) # 先頭のルールでmerge merge_rule = mergeRule(merged_rule, max_rules[0]) target_rules.remove(max_rules[0]) # 新しいルールを追加 target_rules.append(merge_rule) # min_support 更新 min_support = mlem2.getMinSupport(target_rules) print(min_support) rules_new.extend(target_rules) rules_new.extend(no_target_rules) return(rules_new)
accuracy_score(decision_class, predictions) # rules の数を求める num = len(rules) # 平均の長さを求める mean_length = mlem2.getMeanLength(rules) # train data setup decision_table_train, decision_class = getData(FILENAME, iter1, iter2, T = "train") list_judgeNominal = getJudgeNominal(decision_table_train, FILENAME) # 平均支持度と平均確信度を求める mean_support, mean_conf = LERS.getSupportConfidenceRules(rules, decision_table_train, list_judgeNominal) # AccとRecallを求める acc_recall = LERS.getAccurayRecall(rules, decision_table_train, list_judgeNominal) for i,c in enumerate(mlem2.getEstimatedClass(rules)): print(str(acc_recall[i][0])+","+str(acc_recall[i][1])) ###### 公正配慮のテスト # 基本条件を含むルールセット rules_sex_2 = mlem2.getRulesIncludeE(rules, "Sex_Marital_Status", "2.0") rules_sex_4 = mlem2.getRulesIncludeE(rules, "Sex_Marital_Status", "4.0") # 条件を含まないルールセット rules_exclude_sex = mlem2.getRulesExcludeAttr(rules, "Sex_Marital_Status") # 基本条件を含まないルールセット rules_exclude_sex_1 = mlem2.getRulesExcludeE(rules, "Sex_Marital_Status", "1.0") # 条件を削除したルールセット rules_del_value = mlem2.getRulesDelAttr(rules, "Value_Savings_Stocks") # 基本条件を削除したルールセット rules_del_value_1 = mlem2.getRulesDelE(rules, "Value_Savings_Stocks", "1.0")
def strAccRecall(rules, acc_recall): list_string = [] for i, c in enumerate(mlem2.getEstimatedClass(rules)): list_string.append(str(acc_recall[i][0]) + "," + str(acc_recall[i][1])) return ",".join(list_string)
def strAccRecall(rules, acc_recall): list_string = [] for i, c in enumerate(mlem2.getEstimatedClass(rules)): list_string.append(str(acc_recall[i][0]) + "," + str(acc_recall[i][1])) return (",".join(list_string))
def getRuleClusteringByConsistentExceptMRule(rules, colnames, list_judgeNominal, k=3, m=3): rules_new = list() # 結論部別 for cls in mlem2.getEstimatedClass(rules): cls_rules = [r for r in rules if r.getConsequent() == cls] # m未満のルールとm以上のルールに分ける target_rules = [r for r in cls_rules if len(r.getSupport()) < m] no_target_rules = [r for r in cls_rules if r not in target_rules] # ルール群のサポート値の最小値がk以下のルールがある内は繰り返す min_support = mlem2.getMinSupport(target_rules) if target_rules else 0 while min_support < k and target_rules: # target_rules が 1つならそれを削除して繰り返しを抜ける if len(target_rules) == 1: target_rules.pop() break # merge対象ルールを見つける merged_rules = [ r for r in target_rules if len(r.getSupport()) == min_support ] merged_rule = merged_rules[0] target_rules.remove(merged_rule) # 斜め発生度合いを計算 list_inconsistency = [ getElementDiscernibieRule(r, merged_rule) for r in target_rules ] # もっとも斜めが発生しないルールに絞る max_inconsistency = np.max(list_inconsistency) max_rules = [ target_rules[i] for i, c in enumerate(list_inconsistency) if c == max_inconsistency ] # 一意でなければ、条件部を構成する属性数で判断 if len(max_rules) > 1: list_count_same_conditions = [ getCountSameCondition(merged_rule, r) for r in max_rules ] max_count = np.max(list_count_same_conditions) max_rules = [ max_rules[i] for i, c in enumerate(list_count_same_conditions) if c == max_count ] #print("Second : " + str(len(max_rules))) # 一意でなければ、supportの小ささで判断 if len(max_rules) > 1: list_supports = [len(r.getSupport()) for r in max_rules] min_support = np.min(list_supports) max_rules = [ max_rules[i] for i, s in enumerate(list_supports) if s == min_support ] #print("Third : " + str(len(max_rules))) # 先頭のルールでmerge merge_rule = mergeRule(merged_rule, max_rules[0]) target_rules.remove(max_rules[0]) # 新しいルールを追加 target_rules.append(merge_rule) # min_support 更新 min_support = mlem2.getMinSupport(target_rules) print(min_support) rules_new.extend(target_rules) rules_new.extend(no_target_rules) return (rules_new)