Esempio n. 1
0
 def handle_startendtag(self, tag, attrs):
     '''
     处理类似于<br/>这样的直接闭合的标签
     :param tag:
     :param attrs:
     :return:
     '''
     # print("tag_startend:{0}".format(tag))
     # 父方法获取对应的标签和属性
     HTMLParser.handle_startendtag(self, tag, attrs)
     # 新建该节点 并分类
     node = wn.WebNode()
     if tag == "br":
         node = wn.BrNode()
     elif tag == "hr":
         node = wn.HrNode()
     elif tag == "meta":
         node = wn.MetaNode()
     elif tag == "img":
         node = wn.ImgNode()
     # 赋值标签信息
     node.tag = tag
     node.father = self.now
     # 直接闭合标签必然是有父节点的
     node.index = node.father.index + 1
     node.path = node.father.path + "-" + "{0}[{1}]".format(tag, len(node.father.children) + 1)
     # 给其赋值其属性
     for each in attrs:
         node.attr[each[0]] = each[1]
     # 设定其特别的类型
     node.type = "startendtag"
     # 将其加入其父节点的子节点列表中
     self.now.children.append(node)
Esempio n. 2
0
 def handle_startendtag(self, tag, attrs):
     HTMLParser.handle_startendtag(self, tag, attrs)