def relatives(list_name, data, relatives_num):
    relatives_list = []  # 药物亲友团
    length = data.shape[0]
    for item in list_name:
        list_ = []
        for i in range(length):
            words = data['药物'][i]
            if item in words:
                list_.append(words)
        relatives_list.append(list_)
    return utils.cut_by_num(relatives_list, relatives_num)
Ejemplo n.º 2
0
def cluster_main2(relatives_list, list_name):
    list_qyt = duplicate_removal(relatives_list, list_name)
    # 使用数字代替列表中的项
    list_num = utils.word_2_num(list_name, list_qyt)
    for group_num in range(3, 9):
        new_list = utils.cut_by_num(list_num, group_num)
        list_num2 = del_by_correlation(new_list)
        reWord = utils.num_2_word(list_name, list_num2)
        # 创建二元组
        doubleSet = create_double_set(list_num2)
        max_num, bestSet = merge_loop(doubleSet, list_name,
                                      'data/group' + str(group_num) + '.csv')
        # 信息利用率
        print(max_num, '/', group_num, '=', max_num / group_num)
def relatives_2(list_name, data, relatives_num):
    """
    根据互信息得到每项的亲友团
    :param list_name:所有词的list
    :param data:dataFrame,{组合,关联度系数}
    :param relatives_num:限制亲友团个数
    :return:[[]] 所有项的亲友团
    """
    relatives_list = [[] for i in range(len(list_name))]
    length = data.shape[0]
    for i in range(length):
        words = data['组合'][i]
        # words = data['药物'][i]
        pre_index = list_name.index(words[0])
        relatives_list[pre_index].append(words)
        suf_index = list_name.index(words[1])
        relatives_list[suf_index].append(words)
    return utils.cut_by_num(relatives_list, relatives_num)