def makeTreeFull(satisfy_lists,datasets,myTree, labels_full, parentClass, default): print("--------------进入makeTreeFull函数-----------------") print("1入口satisfy_lists=",satisfy_lists) print("-------------------------------") # print("myTree=",myTree) print("prarentClass=",parentClass) print("-------------------------------") """ 将数中的不存在的特征标签进行补全,补全为父节点中出现最多的类别 :param myTree: 生成的树 :param labels_full: 特征的全部标签 :param parentClass: 父节点中所含最多的类别 :param default: 如果父节点没有类别,而此时又有缺失的标签,默认标签分类设置为default :return: """ # 拿到当前的根节点☆☆☆☆☆☆☆☆☆ root_key = list(myTree.keys())[0] # 得到根节点对应的子树,也就是key对应的内容 sub_tree = myTree[root_key] # 如果是叶子节点就结束 if isinstance(sub_tree, str): return ########################################### ########################################### # 循环遍历全部特征标签,将不存在标签添加进去 for label in labels_full[root_key]: if label not in sub_tree.keys(): # 如果此时父标签最多的分类不为None,则将新的标签设置为父标签 if parentClass is not None: sub_tree[label] = parentClass+"(虚)"#这里加了个虚,表示该叶子节点对应的训练集数据不存在 # 否则设置为default else: sub_tree[label] = default # 当前层对应的分类列表 current_class = [] # 循环遍历一下子树,找到类别最多的那个,如果此时没有分类的话就是None # for sub_key in sub_tree.keys(): # if isinstance(sub_tree[sub_key], str):#如果是叶子节点 # current_class.append(sub_tree[sub_key]) # 找到本层出现最多的类别,作为parentLabel传递给下一次递归 #这里的"层"指的是包含根节点的这一层 print("current_class=",current_class) print("myTree=",myTree) print("collections.Counter(current_class)=",collections.Counter(current_class)) #这里的current class指的是类别标签列表,里面可以有各种类别,诸如[好瓜,坏瓜]等 # if len(current_class): # most_class = collections.Counter(current_class).most_common(1)[0][0] print("2入口satisfy_lists=",satisfy_lists) if satisfy_lists!=[]: datasete,most_class=most_class_computes(datasets,satisfy_lists) #等号左侧的datasets是满足新的特征取值 else: most_class = None print("离开2入口satisfy_lists=",satisfy_lists) print("most_class=",most_class) # 递归处理 for sub_key in sub_tree.keys(): print("----------------进入for循环--------------") #这里的sub_key是树枝的取值 #所以这个for循环在遍历当前根节点的每个树枝 # 也就是在遍历当前特征的每种取值 if isinstance(sub_tree[sub_key], dict):#如果子树不是叶子节点 print("--------------递归调用前--------------") temp=satisfy_lists.copy() print("satisfy_lists=",satisfy_lists) satisfy_lists.append(sub_key) makeTreeFull(satisfy_lists,dataSet,myTree=sub_tree[sub_key], labels_full=labels_full, parentClass=most_class, default=default) satisfy_lists=temp
def makeTreeFull(satisfy_lists, datasets, myTree, labels_full, parentClass, default): print("--------------进入makeTreeFull函数-----------------") print("1入口satisfy_lists=", satisfy_lists) print("-------------------------------") print("myTree=", myTree) print("prarentClass=", parentClass) print("-------------------------------") """ 将数中的不存在的特征标签进行补全,补全为父节点中出现最多的类别 :param myTree: 生成的树 :param labels_full: 特征的全部标签 :param parentClass: 父节点中所含最多的类别 :param default: 如果父节点没有类别,而此时又有缺失的标签,默认标签分类设置为default :return: """ # 拿到当前的根节点☆☆☆☆☆☆☆☆☆ root_key = '' for item in myTree.keys(): if item != 'Entropy=': root_key = item # root_key = list(myTree.keys())[0] # 得到根节点对应的子树,也就是key对应的内容 sub_tree = myTree[root_key] # 如果是叶子节点就结束 if isinstance(sub_tree, str): print "如果认为是叶子节点" return if satisfy_lists != []: print("-----------------进入most_class_computes------------------------") print("satisfy_lists=", satisfy_lists) datasete, most_class = most_class_computes(datasets, satisfy_lists) print("most_class=", most_class) print("-----------------离开most_class_computes-----------------------") #等号左侧的datasets是满足新的特征取值 else: most_class = None print("离开2入口satisfy_lists=", satisfy_lists) print("most_class=", most_class) parentClass = most_class ########################################### # 循环遍历全部特征标签,将不存在标签添加进去 for label in labels_full[root_key]: if label not in sub_tree.keys(): print "查看是否存在该标签=", label print("parentClass=", parentClass) # 如果此时父标签最多的分类不为None,则将新的标签设置为父标签 if parentClass is not None: # sub_tree[label] = parentClass+"(虚)"#这里加了个虚,表示该叶子节点对应的训练集数据不存在 sub_tree[label] = parentClass # 否则设置为default else: sub_tree[label] = default print("离开2入口satisfy_lists=", satisfy_lists) print("most_class=", most_class) # 递归处理 for sub_key in sub_tree.keys(): print "树枝sub_key=", sub_key print "most_class=", most_class print "sub_tree[sub_key]=", sub_tree[sub_key] print("----------------进入for循环☆--------------") #这里的sub_key是树枝的取值 #所以这个for循环在遍历当前根节点的每个树枝 # 也就是在遍历当前特征的每种取值 if isinstance(sub_tree[sub_key], dict): #如果子树不是叶子节点 print("--------------递归调用前--------------") temp = copy.copy(satisfy_lists) print("satisfy_lists=", satisfy_lists) satisfy_lists.append(sub_key) makeTreeFull(satisfy_lists, dataSet, myTree=sub_tree[sub_key], labels_full=labels_full, parentClass=most_class, default=default) satisfy_lists = temp