コード例 #1
0
 def growth(self, tree, postNodes):
     if tree.isUniquePath():
         nodeCombinations = []
         tree.getCombinationFromPath(nodeCombinations)
         for combination in nodeCombinations:
             support = self._getMinSupport(combination)
             if support is None or support < self.minsup:
                 continue
             #gen pattern
             pattern = ([], support)
             for node in combination:
                 pattern[0].append(node["name"])
             for node in postNodes:
                 pattern[0].append(node)
             if len(pattern[0]) > 1:
                 self.fp.append(pattern)
                 #self._printPattern(pattern)
     else:
         for item in tree.itemTable:
             #gen pattern
             pattern = ([], tree.itemTable[item][0])
             pattern[0].append(item)
             for node in postNodes:
                 pattern[0].append(node)
             if len(pattern[0]) > 1 and pattern[1] > self.minsup:
                 self.fp.append(pattern)
                 #self._printPattern(pattern)
             #construct conditional pattern base
             baseSet = []
             tree.getConditionalPatternBase(item, baseSet)
             tmpTree = FPTree.FPTree(baseSet, minsup=self.minsup)
             tmpTree.build()
             if not tmpTree.isEmpty():
                 self.growth(tmpTree, pattern[0])
コード例 #2
0
ファイル: rule.py プロジェクト: qmh1234567/Tibet-analysis
def get_rules(keywordfile):
    #testcase = [[["i2","i1","i5"],1],[["i2","i4"],1],[["i2","i3"],1],[["i2","i1","i4"],1],[["i1","i3"],1],[["i2","i3"],1],[["i1","i3"],1],[["i2","i1","i3","i5"],1],[["i2","i1","i3"],1]]
    print("getting rules...")
    testcase = load_data(keywordfile)
    tree = FPTree.FPTree(testcase, minsup=2)
    tree.build()
    algorithm = FPGrowth1(minsup=2)
    algorithm.growth(tree, [])
    res = sorted(algorithm.fp, key=lambda d: d[1], reverse=True)
    with open("rule_" + rule_type + "_TF_IDF.txt", 'w', encoding='utf-8') as f:
        for rule in res:
            f.write(str(rule) + "\n")
    print("关联规则写入文件成功")
コード例 #3
0
def test():
    #testcase = [[["i2","i1","i5"],1],[["i2","i4"],1],[["i2","i3"],1],[["i2","i1","i4"],1],[["i1","i3"],1],[["i2","i3"],1],[["i1","i3"],1],[["i2","i1","i3","i5"],1],[["i2","i1","i3"],1]]
    testcase = [[["a", "b"], 1], [["b", "c", "d"], 1], [["a", "c", "d",
                                                         "e"], 1],
                [["a", "d", "e"], 1], [["a", "b", "c"], 1],
                [["a", "b", "c", "d"], 1], [["a"], 1], [["a", "b", "c"], 1],
                [["a", "b", "d"], 1], [["b", "c", "e"], 1]]
    #testcase = [(["i1","i2"],1),(["i3"],1)]
    tree = FPTree.FPTree(testcase, minsup=2)
    tree.build()
    algorithm = FPGrowth(minsup=2)
    algorithm.growth(tree, [])
    res = sorted(algorithm.fp, key=lambda d: d[1], reverse=True)
    for rule in res:
        print(rule)
コード例 #4
0
    def tjms_tree(paths, rect1, rect_dict):
        # 建树
        treep = FPTree.FPTree()
        for path in paths:
            condition_node = path.pop(0)
            path = list(filter(lambda v: v in rect1, path))
            path.sort(key=lambda x: rect_dict[x])
            point = treep.root
            for item in path:
                next_point = point.search(item)
                if next_point:
                    next_point.count += condition_node[1]
                else:
                    next_point = FPNode.FPNode(treep, item, condition_node[1])
                    point.add(next_point)
                    treep._update_route(next_point)
                point = next_point

        return treep
コード例 #5
0
def find_frequent_items(data_set, n, include_support=False):
    # n = 20000
    # 获取1项集及其支持度
    def get_item1(data_set):
        rect = {}
        for line in data_set:
            for item in line:
                rect[item] = rect.get(item, 0) + 1
        return rect

    # 项目降序排列
    def descending(rect):
        tp_list = []
        tp_list.extend(rect.keys())  # 列表的扩展,即将两个列表合并成一个
        tp_list.sort(key=lambda x: rect[x], reverse=True)
        tp_dict = {}
        i = 1
        for item in tp_list:
            tp_dict[item] = i
            i += 1
        return tp_dict

    # 事务排序后带有计数
    def count_order(rect, tp_dict):
        for elem in tp_dict:
            if elem in rect:
                tp_dict[elem] = rect[elem]
        return tp_dict

    # 2.获得1项集计数
    ys_item1 = get_item1(data_set)
    # 3.事务按从大到小排序
    tp_list = []
    tp_list.extend(ys_item1.keys())
    tp_list.sort(key=lambda x: ys_item1[x], reverse=True)
    tp_dict = {}
    i = 1
    for item in tp_list:
        tp_dict[item] = i
        i += 1
    ys_item1_order = count_order(rect=ys_item1, tp_dict=tp_dict)

    # 4.把项目分别加入大项目表和小项目表
    #####################
    min_sup = 0.01
    big_table = {}
    small_table = {}
    for item in tp_list:
        if ys_item1[item] >= min_sup * n:
            big_table[item] = ys_item1[item]
        else:
            small_table[item] = ys_item1[item]
    # 5.根据big_table建树
    tree = FPTree.FPTree()
    for line in data_set:
        line = list(filter(lambda v: v in big_table, line))
        line.sort(key=lambda x: big_table[x], reverse=True)
        tree.add(line)
    # =================================挖掘频繁项集==============================
    # print('开始挖掘:')
    def pf_item(paths, min_sup):
        rect = {}
        for path in paths:
            condition_node = path.pop(0)
            for item in path:
                rect[item] = rect.get(item, 0) + condition_node[1]
        rect1 = {}
        for item in rect:
            if rect[item] >= min_sup:
                rect1[item] = rect[item]
        rect_dict = descending(rect1)
        return rect1, rect_dict

    def tjms_tree(paths, rect1, rect_dict):
        # 建树
        treep = FPTree.FPTree()
        for path in paths:
            condition_node = path.pop(0)
            path = list(filter(lambda v: v in rect1, path))
            path.sort(key=lambda x: rect_dict[x])
            point = treep.root
            for item in path:
                next_point = point.search(item)
                if next_point:
                    next_point.count += condition_node[1]
                else:
                    next_point = FPNode.FPNode(treep, item, condition_node[1])
                    point.add(next_point)
                    treep._update_route(next_point)
                point = next_point

        return treep

        # 找到后缀

    def find_with_suffix(tree, suffix):
        for item, node in tree.items():
            support = sum(n.count for n in node)
            if support >= min_sup and item not in suffix:  # item不在fuffix中
                found_set = [item] + suffix  # 把item放入到suffix的最前
                yield (found_set, support) if include_support else found_set

                # 构建条件树并递归搜索频繁其中的项目集
                rect1, rect_dict = pf_item(tree.prefix_paths(item),
                                           min_sup * n)
                cond_tree = tjms_tree(tree.prefix_paths(item), rect1,
                                      rect_dict)
                for s in find_with_suffix(cond_tree, found_set):
                    yield s

    for itemset in find_with_suffix(tree, []):
        yield itemset
コード例 #6
0
    def get_recommend_based_on_enemies(self, enemy, min_support):
        """

        :param enemy:           (str) name of one enemy in current enemies
        :param min_support:     (int) min support count of heroes who win the enemy
        :return:
        """
        df_lose = self.df_lose
        df_win = self.df_win
        df_match = self.df_match

        indexes = \
            df_lose.index[df_lose.isin([enemy]).any(axis=1)].tolist()

        # print("indexes {}".format(indexes))

        df_lose = df_lose.iloc[indexes, :]
        df_win = df_win.iloc[indexes, :]

        heroes_win = df_win.reset_index(drop=True).values.tolist()

        print("current heroes_win len {}".format(len(heroes_win)))

        min_support = int(round(len(heroes_win) * min_support))
        fp_tree = FPTree(heroes_win, min_support)
        freq_heroes = fp_tree.gen_freq_itemsets()

        print("win_heroes len: {}".format(len(freq_heroes)))

        candidates = \
            list(filter(lambda hero_set: len(hero_set) == 1, freq_heroes))

        print("enemies freq hero num: {}".format(len(candidates)))

        all_candidate_rules = list()
        for hero in candidates:
            # print("hero: {}, enemy: {}".format(hero, enemy))
            # radiant_win_indexes = \
            #     (df_match["winner"] == 1) & \
            #     (df_match.loc[:, "radiant_hero_1":"radiant_hero_5"].isin(hero).any(axis=1)) & \
            #     (df_match.loc[:, "dire_hero_1":"dire_hero_5"].isin([enemy]).any(axis=1))
            #
            # dire_win_indexes = \
            #     (df_match["winner"] == -1) & \
            #     (df_match.loc[:, "radiant_hero_1":"radiant_hero_5"].isin([enemy]).any(axis=1)) & \
            #     (df_match.loc[:, "dire_hero_1":"dire_hero_5"].isin(hero).any(axis=1))
            #
            # print("radiant_win_indexes sum : {}".format(radiant_win_indexes.sum()))
            # print("dire_win_indexes sum {}".format(dire_win_indexes.sum()))
            # df_match = df_match.loc[(radiant_win_indexes | dire_win_indexes)]

            # print("df_match==== {}".format(df_match))
            hero = hero[0].strip(" ")
            if hero in (self.allies + self.enemies):
                continue

            rule = AssociationRule(lhs=[enemy], rhs=[hero], rule_type="enemies")

            rule.compute_metrics(df_win=df_win, df_lose=df_lose, df_match=df_match)

            all_candidate_rules.append(rule)

        return all_candidate_rules
コード例 #7
0
    df_radiant_win_dire_heroes = pd.read_csv("../data/processed_data/radiant_win_dire_heros.csv")

    df_lose_heroes = pd.concat([df_dire_win_radiant_heroes,
                                df_radiant_win_dire_heroes],
                               axis=0,
                               sort=False)

    df_lose_heroes = df_lose_heroes.reset_index(drop=True)
    # df_lose_heroes.to_csv("./inter_data/df_lose_heroes.csv")

    df_all_heroes = pd.concat([df_win_heroes, df_lose_heroes], axis=0)
    # df_all_heroes.to_csv("./inter_data/df_all_heroes.csv")

    all_heroes = df_all_heroes.reset_index(drop=True).values.tolist()

    fp_tree = FPTree(transactions=all_heroes,
                     min_support_count=int(round(min_support_allies * len(all_heroes))))

    freq_allies = fp_tree.gen_freq_itemsets()

    df_radiant_win_match = pd.read_csv("../data/processed_data/radiant_win_match.csv")
    df_dire_win_match = pd.read_csv("../data/processed_data/dire_win_match.csv")

    df_match = pd.concat([df_radiant_win_match,
                          df_dire_win_match],
                         axis=0,
                         sort=False)

    df_match = df_match.reset_index(drop=True)
    # df_match.to_csv("./inter_data/df_match.csv")
    print("df_match : {}".format(df_match))