def __inner_weibo_parse(weibo_driver): weibo_info = weibo_driver.find_element_by_class_name( 'WB_info').find_element_by_tag_name('a') weibo_author = weibo_info.text[1:].encode('utf-8') weibo_href = weibo_info.get_attribute('href') comment = weibo_driver.find_element_by_class_name('WB_text').text.encode( 'utf-8') info_list = weibo_driver.find_elements_by_tag_name('li') fordward_num = info_list[-3].text[3:].encode('utf-8') comment_num = info_list[-2].text[3:].encode('utf-8') good_num = info_list[-1].text.encode('utf-8') weibo = Weibo() weibo.author_name = weibo_author weibo.author_url = weibo_href weibo.comment = comment if fordward_num: weibo.forward_num = fordward_num if comment_num: weibo.comment_num = comment_num if good_num: weibo.good_num = good_num weibo.is_forward = False return weibo
def __inner_weibo_parse(weibo_driver): weibo_info = weibo_driver.find_element_by_class_name('WB_info').find_element_by_tag_name('a') weibo_author = weibo_info.text[1:].encode('utf-8') weibo_href = weibo_info.get_attribute('href') comment = weibo_driver.find_element_by_class_name('WB_text').text.encode('utf-8') info_list = weibo_driver.find_elements_by_tag_name('li') fordward_num = info_list[-3].text[3:].encode('utf-8') comment_num = info_list[-2].text[3:].encode('utf-8') good_num = info_list[-1].text.encode('utf-8') weibo = Weibo() weibo.author_name = weibo_author weibo.author_url = weibo_href weibo.comment = comment if fordward_num: weibo.forward_num = fordward_num if comment_num: weibo.comment_num = comment_num if good_num: weibo.good_num = good_num weibo.is_forward = False return weibo
def weibo_parse_simple(weibo_driver): fordward_num = weibo_driver.find_element_by_xpath(".//span[@node-type='forward_btn_text']").text[3:].encode('utf-8') comment_num = weibo_driver.find_element_by_xpath(".//span[@node-type='comment_btn_text']").text[3:].encode('utf-8') good_num = weibo_driver.find_element_by_xpath(".//span[@node-type='like_status']").text.encode('utf-8') weibo_time = weibo_driver.find_element_by_xpath(".//a[@node-type='feed_list_item_date']").text.encode('utf-8') weibo = Weibo() weibo.time = time_parse(weibo_time) weibo.weibo_driver = weibo_driver if fordward_num: weibo.forward_num = fordward_num if comment_num: weibo.comment_num = comment_num if good_num: weibo.good_num = good_num return weibo
def weibo_parse_simple(weibo_driver): fordward_num = weibo_driver.find_element_by_xpath( ".//span[@node-type='forward_btn_text']").text[3:].encode('utf-8') comment_num = weibo_driver.find_element_by_xpath( ".//span[@node-type='comment_btn_text']").text[3:].encode('utf-8') good_num = weibo_driver.find_element_by_xpath( ".//span[@node-type='like_status']").text.encode('utf-8') weibo_time = weibo_driver.find_element_by_xpath( ".//a[@node-type='feed_list_item_date']").text.encode('utf-8') weibo = Weibo() weibo.time = time_parse(weibo_time) weibo.weibo_driver = weibo_driver if fordward_num: weibo.forward_num = fordward_num if comment_num: weibo.comment_num = comment_num if good_num: weibo.good_num = good_num return weibo
def weibo_parse(weibo_driver): print('解析一条微博') try: #自己微博就不会有 weibo_info = weibo_driver.find_element_by_class_name( 'WB_info').find_element_by_tag_name('a') weibo_author = weibo_info.text.encode('utf-8') weibo_href = weibo_info.get_attribute('href') except: weibo_author = '' weibo_href = '' comment = weibo_driver.find_element_by_xpath( ".//div[@node-type='feed_list_content']").text.encode('utf-8') comment_handled = __comment_text_filter(comment) nums_driver = weibo_driver.find_element_by_class_name('WB_feed_handle') fordward_num = nums_driver.find_element_by_xpath( ".//span[@node-type='forward_btn_text']").text[3:].encode('utf-8') comment_num = nums_driver.find_element_by_xpath( ".//span[@node-type='comment_btn_text']").text[3:].encode('utf-8') good_num = nums_driver.find_element_by_xpath( ".//span[@node-type='like_status']").text.encode('utf-8') forward_weibo_list = weibo_driver.find_elements_by_xpath( ".//div[@node-type='feed_list_forwardContent']") weibo_time = weibo_driver.find_element_by_xpath( ".//a[@node-type='feed_list_item_date']").text.encode('utf-8') weibo = Weibo() if forward_weibo_list: try: forward_weibo = __inner_weibo_parse(forward_weibo_list[0]) weibo.is_forward = True except Exception as e: print e forward_weibo = None else: forward_weibo = None weibo.author_name = weibo_author weibo.author_url = weibo_href weibo.comment = comment_handled weibo.time = time_parse(weibo_time) weibo.weibo_driver = weibo_driver if fordward_num: weibo.forward_num = int(fordward_num) if comment_num: weibo.comment_num = int(comment_num) if good_num: weibo.good_num = int(good_num) weibo.fordward_weibo = forward_weibo return weibo
def weibo_parse(weibo_driver): print('解析一条微博') try: #自己微博就不会有 weibo_info = weibo_driver.find_element_by_class_name('WB_info').find_element_by_tag_name('a') weibo_author = weibo_info.text.encode('utf-8') weibo_href = weibo_info.get_attribute('href') except: weibo_author = '' weibo_href = '' comment = weibo_driver.find_element_by_xpath(".//div[@node-type='feed_list_content']").text.encode('utf-8') comment_handled = __comment_text_filter(comment) nums_driver = weibo_driver.find_element_by_class_name('WB_feed_handle') fordward_num = nums_driver.find_element_by_xpath(".//span[@node-type='forward_btn_text']").text[3:].encode('utf-8') comment_num = nums_driver.find_element_by_xpath(".//span[@node-type='comment_btn_text']").text[3:].encode('utf-8') good_num = nums_driver.find_element_by_xpath(".//span[@node-type='like_status']").text.encode('utf-8') forward_weibo_list = weibo_driver.find_elements_by_xpath(".//div[@node-type='feed_list_forwardContent']") weibo_time = weibo_driver.find_element_by_xpath(".//a[@node-type='feed_list_item_date']").text.encode('utf-8') weibo = Weibo() if forward_weibo_list: try: forward_weibo = __inner_weibo_parse(forward_weibo_list[0]) weibo.is_forward = True except Exception as e: print e forward_weibo = None else: forward_weibo = None weibo.author_name = weibo_author weibo.author_url = weibo_href weibo.comment = comment_handled weibo.time = time_parse(weibo_time) weibo.weibo_driver = weibo_driver if fordward_num: weibo.forward_num = int(fordward_num) if comment_num: weibo.comment_num = int(comment_num) if good_num: weibo.good_num = int(good_num) weibo.fordward_weibo = forward_weibo return weibo