def parse_update_date(self): u""" 获取最近更新日期 """ dom = self.node.select(u"div.zg-gray-normal span.time")[0] raw_update_date = Tag.get_content(dom) update_date = Match.parse_date(raw_update_date) self.set_attr("update_date", update_date) return
def parse_follower_count(self): u""" 获取关注人数 """ dom = self.node.select( u'div.zg-gray-normal a[data-za-l="collection_followers_count"]')[0] follower_count = Tag.get_content(dom) self.set_attr("follower_count", follower_count) return
def parse_collection_id(self): u""" 获取收藏夹id """ anchor = self.node.select( u'div.zg-gray-normal a[data-za-l="collection_followers_count"]')[0] log_src = Tag.get_attr(anchor, u"href") collection_id = Match.match_collection_id(log_src) self.set_attr("collection_id", collection_id) return
def parse_creator_name(self, dom): u""" dom => div#zh-single-answer-author-info 获取用户名 :param dom: 用户信息节点 :type dom: bs4.Tag :return: :rtype: None """ text_tag = dom.select(u"h2.zm-list-content-title a")[0] name = Tag.get_content(text_tag) self.set_attr("name", name) return
def parse_creator_profile_id(self, dom): u""" dom => div#zh-single-answer-author-info 获取用户profile_id :param dom: 用户信息节点 :type dom: bs4.Tag :return: :rtype: None """ anchor = dom.select(u"h2.zm-list-content-title a")[0] link = Tag.get_attr(anchor, u"href") profile_id = Match.match_author_id(link) self.set_attr("profile_id", profile_id) return
def parse_raw_creator_avatar(self): u""" dom => self.node 获取用户头像地址 :return: :rtype: None """ dom = self.node.select(u"div.zm-side-section-inner a.zm-list-avatar-link")[0] img = dom.select(u".zm-list-avatar-medium")[0] src = Tag.get_attr(img, u"src") self.set_attr("raw_avatar", src) avatar = Match.format_avatar(src) self.set_attr("avatar", avatar) return
def parse_creator_headline(self, dom): u""" dom => div#zh-single-answer-author-info 获取用户签名 :param dom: 用户信息节点 :type dom: bs4.Tag :return: :rtype: None """ # 签名tag一定存在 text_tag = dom.select(u"div.zg-gray-normal")[0] headline = Tag.get_content(text_tag) self.set_attr("headline", headline) return
def parse_raw_creator_avatar(self): u""" dom => self.node 获取用户头像地址 :return: :rtype: None """ dom = self.node.select( u"div.zm-side-section-inner a.zm-list-avatar-link")[0] img = dom.select(u".zm-list-avatar-medium")[0] src = Tag.get_attr(img, u"src") self.set_attr("raw_avatar", src) avatar = Match.format_avatar(src) self.set_attr("avatar", avatar) return
def parse_creator_hash_id(self, dom): u""" dom => don#zh-single-answer-author-info 获取用户hash_id :param dom: 用户信息节点 :type dom: bs4.Tag :return: :rtype: None """ try: follow_button = dom.select(u"button.zm-rich-follow-btn")[0] except IndexError: # 用户自己的收藏夹 return hash_id = Tag.get_attr(follow_button, u"data-id") self.set_attr("hash_id", hash_id) return
def set_up(self): title = Tag.get_content(self.node) self.set_attr(u"title", title) return
def set_up(self): follow_button = self.node.select(u"a.zu-entry-focus-button")[0] _class = Tag.get_attr(follow_button, u"class") is_followed = (u"zg-btn-white" in _class) self.set_attr(u"is_followed", is_followed) return