def parse_author_id(self): # TODO 这个部分可以不重复的 u""" 获得author_id :return: """ author_id_href = False author_id = self.dom.select('div.blognavInfo span a') if author_id: author_id_href = ParserTools.get_attr( author_id[1], 'href') # 因为creator_id[0]是首页的链接 if not author_id_href: Debug.logger.debug(u"没有找到creator_id") return result = Match.SinaBlog_profile(author_id_href) SinaBlog_id = result.group('SinaBlog_people_id') self.info['author_id'] = SinaBlog_id
def parse_creator_id(self): u""" :return: """ creator_id = self.dom.select('div.blognavInfo span a') if not creator_id: Debug.logger.debug(u"没有找到creator_id") return creator_id_href = ParserTools.get_attr(creator_id[1], 'href') # 因为creator_id[0]是首页的链接 if not creator_id_href: Debug.logger.debug(u"没有找到creator_id") return result = Match.SinaBlog_profile(creator_id_href) SinaBlog_id = result.group('SinaBlog_people_id') self.info['creator_id'] = SinaBlog_id