Python get_cont_of_weibo Examples, page_get.status.get_cont_of_weibo Python Examples

Example #1

0

Show file

def get_weibo_list(html):
    """
    get the list of weibo info
    :param html:
    :return: 
    """
    if not html:
        return list()
    soup = BeautifulSoup(html, "html.parser")
    feed_list = soup.find_all(attrs={'action-type': 'feed_list_item'})
    weibo_datas = []
    for data in feed_list:
        r = get_weibo_info_detail(data, html)
        if r is not None:
            wb_data = r[0]
            if r[1] == 0 and CRAWLING_MODE == 'accurate':
                weibo_cont = status.get_cont_of_weibo(wb_data.weibo_id)
                wb_data.weibo_cont = weibo_cont if weibo_cont else wb_data.weibo_cont
            weibo_datas.append(wb_data)

            if wb_data.is_origin == 0:
                fr = get_weibo_forward_info_detail(wb_data.weibo_forward_id,
                                                   data, html)
                if fr is not None:
                    wb_fd_data = fr[0]
                    if fr[1] == 0 and CRAWLING_MODE == 'accurate':
                        weibo_fd_cont = status.get_cont_of_weibo(
                            wb_fd_data.weibo_id)
                        wb_fd_data.weibo_cont = weibo_fd_cont if weibo_fd_cont else wb_fd_data.weibo_cont
                    weibo_datas.append(wb_fd_data)

    return weibo_datas

Example #2

0

Show file

File: home.py Project: KingOfBanana/SocialNetworkAI

def get_weibo_list(html):
    """
    get the list of weibo info
    :param html: 
    :return: 
    """
    if not html:
        return list()
    soup = BeautifulSoup(html, "html.parser")
    feed_list = soup.find_all(attrs={'action-type': 'feed_list_item'})
    weibo_datas = []
    weibo_pics = []
    for data in feed_list:
        r = get_weibo_info_detail(data, html)
        if r is not None:
            wb_data = r[0]
            if r[1] == 0:
                wb_data.weibo_cont = status.get_cont_of_weibo(wb_data.weibo_id)

            # 如果pic是非空，则在pic数组中插入
            if r[2]:
                weibo_pics.extend(r[2])
            # end
            weibo_datas.append(wb_data)
    return weibo_datas, weibo_pics

Example #3

0

Show file

File: search.py Project: Petrinet/weibospider

def get_search_info(html):
    """
    通过搜索页的内容获取搜索结果
    :param html: 
    :return: 
    """
    # 搜索结果可能有两种方式，一种是直接返回的，一种是编码过后的
    content = _search_page_parse(html) if '举报' not in html else html

    if content == '':
        return list()

    # todo 这里用bs会导致某些信息不能被解析（参考../tests/fail.html），可参考使用xpath，考虑到成本，暂时不实现
    soup = BeautifulSoup(
        content.encode('utf-8', 'ignore').decode('utf-8'), "html.parser")

    feed_list = soup.find_all(attrs={'action-type': 'feed_list_item'})
    search_list = []
    for each in feed_list:
        r = get_weibo_info(each, html)
        if r is not None:
            wb_data = r[0]
            if r[1] == 0:
                wb_data.weibo_cont = status.get_cont_of_weibo(wb_data.weibo_id)
            search_list.append(wb_data)
    return search_list

Example #4

0

Show file

File: search.py Project: williamfu1989/weibospider

def get_search_info(html):
    """
    :param html: response content for search with login
    :return: search results
    """
    # 搜索结果可能有两种方式，一种是直接返回的，一种是编码过后的
    # content = _search_page_parse(html) if '举报' not in html else html
    content = html

    if content == '':
        return list()
    # todo 这里用bs会导致某些信息不能被解析（参考../tests/fail.html），可参考使用xpath，考虑到成本，暂时不实现
    soup = BeautifulSoup(
        content.encode('utf-8', 'ignore').decode('utf-8'), "html.parser")
    feed_list = soup.find_all(attrs={'action-type': 'feed_list_item'})
    search_list = []
    for each in feed_list:
        r = get_weibo_info_1(each, html)
        if r is not None:
            wb_data = r[0]
            if r[1] == 0 and CRAWLING_MODE == 'accurate':
                weibo_cont = status.get_cont_of_weibo(wb_data.weibo_id)
                wb_data.weibo_cont = weibo_cont if weibo_cont else wb_data.weibo_cont
            search_list.append(wb_data)
    return search_list

Example #5

0

Show file

File: test_wbspider.py Project: zqy1/weibospider

 def test_get_weibo_detail_cont(self):
     """
     test for get weibo's all cont
     :return:
     """
     from page_get import status
     print(status.get_cont_of_weibo('4129510280252577'))

Example #6

0

Show file

File: test_wbspider.py Project: dittoyi/weibospider

 def test_get_weibo_detail_cont(self):
     """
     test for get weibo's all cont
     :return:
     """
     from page_get import status
     print(status.get_cont_of_weibo('4129510280252577'))

Example #7

0

Show file

File: search.py Project: dittoyi/weibospider

def get_search_info(html):
    """
    通过搜索页的内容获取搜索结果
    :param html: 
    :return: 
    """
    # 搜索结果可能有两种方式，一种是直接返回的，一种是编码过后的
    content = _search_page_parse(html) if '举报' not in html else html

    if content == '':
        return list()
    # todo 这里用bs会导致某些信息不能被解析（参考../tests/fail.html），可参考使用xpath，考虑到成本，暂时不实现
    soup = BeautifulSoup(content.encode('utf-8', 'ignore').decode('utf-8'), "html.parser")

    feed_list = soup.find_all(attrs={'action-type': 'feed_list_item'})
    search_list = []
    for each in feed_list:
        r = get_weibo_info(each, html)
        if r is not None:
            wb_data = r[0]
            if r[1] == 0 and CRAWLING_MODE == 'accurate':
                weibo_cont = status.get_cont_of_weibo(wb_data.weibo_id)
                wb_data.weibo_cont = weibo_cont if weibo_cont else wb_data.weibo_cont
            search_list.append(wb_data)
    return search_list

Example #8

0

Show file

File: home.py Project: dittoyi/weibospider

def get_weibo_list(html):
    """
    get the list of weibo info
    :param html: 
    :return: 
    """
    if not html:
        return list()
    soup = BeautifulSoup(html, "html.parser")
    feed_list = soup.find_all(attrs={'action-type': 'feed_list_item'})
    weibo_datas = []
    for data in feed_list:
        r = get_weibo_info_detail(data, html)
        if r is not None:
            wb_data = r[0]
            if r[1] == 0 and CRAWLING_MODE == 'accurate':
                weibo_cont = status.get_cont_of_weibo(wb_data.weibo_id)
                wb_data.weibo_cont = weibo_cont if weibo_cont else wb_data.weibo_cont
            weibo_datas.append(wb_data)
    return weibo_datas