def comment_map(node, cookies, soup): cot = {} cot['comment_id'] = node['comment_id'] cot['comment_uface'] = node.select('.WB_face img')[0]['src'] cot['comment_uid'] = node.select('.WB_face img')[0]['usercard'][3:] cot['comment_text'] = Utils.get_innerhtml(node.select('.WB_text')[0]) textstart = cot['comment_text'].find(':') cot['comment_text'] = cot['comment_text'][textstart + len(':'):] cot['comment_time'] = Utils.parse_time_text(node.select('.WB_from')[0].string) cot['comment_like'] = Utils.parse_num(node.select('.WB_handle span[node-type="like_status"] em')[0].string) return cot
def person_map(node, cookies, soup): weibo_temp = Weibo() # this weibo weibo_temp['mid'] = node['mid'] weibo_temp['uid'] = Utils.get_uid_from_tfinfo(node['tbinfo']) WeiboUser.logger.debug('PERSON_MAP uid %s mid %s' % (weibo_temp['uid'], weibo_temp['mid'])) weibo_temp['text'] = Utils.get_innerhtml(node.select('.WB_detail > .WB_text')[0]) weibo_temp['time'] = Utils.parse_time(node.select('.WB_detail > .WB_from a[node-type="feed_list_item_date"]')[0]['title']) weibo_temp['device'] = node.select('.WB_detail > .WB_from a[rel="nofollow"]')[0].string weibo_temp['comment'] = Utils.parse_num(node.select('.WB_feed_handle ul span[node-type="comment_btn_text"]')[0].string) weibo_temp['forward'] = Utils.parse_num(node.select('.WB_feed_handle ul span[node-type="forward_btn_text"]')[0].string) weibo_temp['like'] = Utils.parse_num(node.select('.WB_feed_handle ul span[node-type="like_status"] em')[0].string) # comments cindex = 1 while True: (this_clist, end) = WeiboUser(cookies).get_comment(weibo_temp['mid'], cindex) if len(this_clist) > 0: weibo_temp['comment_list'].extend(this_clist) cindex += 1 else: break # inner weibo if(len(node.select('.WB_feed_expand')) > 0): inner_wb = node.select('.WB_feed_expand')[0] weibo_temp['inner']['uid'] = inner_wb.select('.WB_info a[node-type="feed_list_originNick"]')[0]['usercard'][3:] weibo_temp['inner']['nickname'] = inner_wb.select('.WB_info a[node-type="feed_list_originNick"]')[0]['title'] weibo_temp['inner']['text'] = Utils.get_innerhtml(inner_wb.select('.WB_text')[0]) weibo_temp['inner']['mid'] = inner_wb.select('.WB_func .WB_handle')[0]['mid'] for a in inner_wb.select('.WB_func .WB_handle ul a'): if a.has_key('suda-uatrack') and a['suda-uatrack'].find('transfer') != -1: weibo_temp['inner']['forward'] = Utils.parse_num(a.string) elif a.has_key('suda-uatrack') and a['suda-uatrack'].find('comment') != -1: weibo_temp['inner']['comment'] = Utils.parse_num(a.string) weibo_temp['inner']['time'] = Utils.parse_time(inner_wb.select('.WB_func .WB_from a[node-type="feed_list_item_date"]')[0]['title']) weibo_temp['inner']['device'] = inner_wb.select('.WB_func .WB_from a[rel="nofollow"]')[0].string return weibo_temp
def search_map(node, cookies, soup): weibo_temp = Weibo() weibo_temp['mid'] = node['mid'] usernode = node.select('img.W_face_radius')[0] uhref = node.select('.face a')[0]['href'] weibo_temp['uid'] = uhref[uhref.rfind("/")+1:] WeiboUser.logger.debug('SEARCH_MAP uid %s mid %s' % (weibo_temp['uid'], weibo_temp['mid'])) #weibo_temp['uid'] = usernode['usercard'][usernode['usercard'].find('id=') + 3:usernode['usercard'].find('&')] weibo_temp['uface'] = usernode['src'] weibo_temp['nickname'] = usernode['alt'] weibo_temp['text'] = Utils.get_innerhtml(node.select('p.comment_txt')[0]) weibo_temp['time'] = Utils.parse_time(node.select('.WB_feed_detail a[node-type="feed_list_item_date"]')[0]['title']).isoformat() # some 'from' don't have rel="nofollow" if len(node.select('a[rel="nofollow"]')) > 0: weibo_temp['device'] = node.select('a[rel="nofollow"]')[0].string else: alltxt = Utils.get_text(node.select('.feed_from')[0]).strip() weibo_temp['device'] = alltxt[alltxt.find(u'来自') + 4:].strip() forwardstr = node.select('a[action-type="feed_list_forward"] em')[0].string commentstr = node.select('a[action-type="feed_list_comment"] em')[0].string \ if len(node.select('a[action-type="feed_list_comment"] em')) == 1 else '' weibo_temp['forward'] = Utils.parse_num(forwardstr) weibo_temp['comment'] = Utils.parse_num(commentstr) weibo_temp['like'] = Utils.parse_num(node.select('a[action-type="feed_list_like"] em')[0].string) # comments cindex = 1 while True: (this_clist, end) = WeiboUser(cookies).get_comment(weibo_temp['mid'], cindex) if len(this_clist) > 0: weibo_temp['comment_list'].extend(this_clist) cindex += 1 else: break if cindex > Utils.task['max_comment']: break Utils.sleep(15, 25) return weibo_temp