def get_retweet_info(self): tag = 'feed_list_forwardContent' pos = self.content.find(tag, self.pos, self.end) temp = pos #被删除的情况时,用到这个position if pos != -1: #被删除的微薄,tag也能找到,下面注意限定范围 tag = 'feed_list_originNick' pos = self.content.find(tag, pos, self.end) if pos != -1: #正常情况下 self.pos = pos self.get_retweet_un() self.get_retweet_mc() self.get_weibopu() self.get_retweet_rc() self.get_retweet_cc() self.get_retweet_page() self.get_retweet_pt() else: #被删除的情况;还有情况就是:只有文字,其他啥也没有 tag = 'WB_deltxt' pos = self.content.find(tag, temp, self.end) bTag = '">' eTag = '<\/div>' pos1 = self.content.find(bTag, pos, self.end) + len(bTag) pos2 = self.content.find(eTag, pos1, self.end) self.weibomsg['rmc'] = self.content[pos1:pos2] self.weibomsg['rmc'] = self.eraseTag( self.weibomsg['rmc']).replace('\\', '') self.weibomsg['rmc'] = utility.clearSpace(self.weibomsg['rmc']) self.pos = pos2 else: pass #转发信息都为空
def get_retweet_info(self): tag = 'feed_list_forwardContent' pos = self.content.find(tag, self.pos, self.end) temp = pos #被删除的情况时,用到这个position if pos != -1: #被删除的微薄,tag也能找到,下面注意限定范围 tag = 'feed_list_originNick' pos = self.content.find(tag, pos, self.end) if pos != -1: #正常情况下 self.pos = pos self.get_retweet_un() self.get_retweet_mc() self.get_weibopu() self.get_retweet_rc() self.get_retweet_cc() self.get_retweet_page() self.get_retweet_pt() else: #被删除的情况;还有情况就是:只有文字,其他啥也没有 tag = 'WB_deltxt' pos = self.content.find(tag, temp, self.end) bTag = '">' eTag = '<\/div>' pos1 = self.content.find(bTag, pos, self.end)+len(bTag) pos2 = self.content.find(eTag, pos1, self.end) self.weibomsg['rmc'] = self.content[pos1:pos2] self.weibomsg['rmc'] = self.eraseTag(self.weibomsg['rmc']).replace('\\','') self.weibomsg['rmc'] = utility.clearSpace(self.weibomsg['rmc']); self.pos = pos2 else: pass #转发信息都为空
def get_retweet_mc(self): bTag = 'node-type="feed_list_reason">' eTag = '<\/div>' pos1 = self.content.find(bTag, self.pos, self.end)+len(bTag) pos2 = self.content.find(eTag, pos1, self.end) self.weibomsg['rmc'] = self.content[pos1:pos2] self.pos = pos2 self.weibomsg['rmc'] = self.eraseTag(self.weibomsg['rmc']).replace('\\','') self.weibomsg['rmc'] = utility.clearSpace(self.weibomsg['rmc']);
def get_retweet_mc(self): bTag = 'node-type="feed_list_reason">' eTag = '<\/div>' pos1 = self.content.find(bTag, self.pos, self.end) + len(bTag) pos2 = self.content.find(eTag, pos1, self.end) self.weibomsg['rmc'] = self.content[pos1:pos2] self.pos = pos2 self.weibomsg['rmc'] = self.eraseTag(self.weibomsg['rmc']).replace( '\\', '') self.weibomsg['rmc'] = utility.clearSpace(self.weibomsg['rmc'])
def get_weibomc(self): tag = 'node-type="feed_list_content"' self.pos = self.content.find(tag, self.pos, self.end)+len(tag) bTag = '>' eTag = '<\/div>' pos1 = self.content.find(bTag, self.pos, self.end) + len(bTag) pos2 = self.content.find(eTag, pos1, self.end) self.weibomsg['mc'] = self.content[pos1:pos2] self.weibomsg['mc'] = self.eraseTag(self.weibomsg['mc']).replace('\\','') self.weibomsg['mc'] = utility.clearSpace(self.weibomsg['mc']); self.pos = pos2
def get_weibomc(self): tag = 'node-type="feed_list_content"' self.pos = self.content.find(tag, self.pos, self.end) + len(tag) bTag = '>' eTag = '<\/div>' pos1 = self.content.find(bTag, self.pos, self.end) + len(bTag) pos2 = self.content.find(eTag, pos1, self.end) self.weibomsg['mc'] = self.content[pos1:pos2] self.weibomsg['mc'] = self.eraseTag(self.weibomsg['mc']).replace( '\\', '') self.weibomsg['mc'] = utility.clearSpace(self.weibomsg['mc']) self.pos = pos2
def get_retweet_cc(self): bTag = '评论' eTag = '<\/a>' pos1 = self.content.find(bTag, self.pos, self.end) + len(bTag) pos2 = self.content.find(eTag, pos1, self.end) if pos2 - pos1 == 0: self.weibomsg['rcc'] = '0' self.pos = pos2 else: slug = self.content[pos1:pos2] slug = utility.clearSpace(slug) self.weibomsg['rcc'] = slug[1:-1] self.pos = pos2
def get_retweet_cc(self): bTag = '评论' eTag = '<\/a>' pos1 = self.content.find(bTag, self.pos, self.end)+len(bTag) pos2 = self.content.find(eTag, pos1, self.end) if pos2-pos1==0: self.weibomsg['rcc'] = '0' self.pos = pos2 else: slug = self.content[pos1:pos2] slug = utility.clearSpace(slug); self.weibomsg['rcc'] = slug[1:-1]; self.pos = pos2