Example #1
0
def test_parse_home_info(url, is_login, is_ajax, cookies, session):
    if is_login == 1:
        content = session.get(url).text
        if not is_ajax:
            assert len(home.get_data(content)) > 0
        else:
            assert len(home.get_ajax_data(content)) > 0
    else:
        content = requests.get(url, cookies=cookies).text
        if not is_ajax:
            assert len(home.get_data(content)) > 0
        else:
            assert len(home.get_ajax_data(content)) > 0
    time.sleep(REQUEST_INTERNAL)
def test_crawl_first_home_page():
    from page_parse.home import get_ajax_data
    url = 'http://weibo.com/u/1800822823?is_ori=1&is_tag=0&profile_ftype=1&page=1'
    content = get_page(url, auth_level=1)
    assert "['islogin']" in content
    time.sleep(REQUEST_INTERNAL)
    cur_time = int(time.time() * 1000)
    ajax_url_0 = HOME_AJAX_URL.format('100505', 0, '100505', '1800822823', 1, 1, cur_time)
    ajax_url_1 = HOME_AJAX_URL.format('100505', 0, '100505', '1800822823', 1, 1, cur_time + 100)

    content = get_page(ajax_url_0, auth_level=1, is_ajax=True)
    assert 'Sina Visitor System' not in content
    assert len(get_ajax_data(content)) > 0
    time.sleep(REQUEST_INTERNAL)

    content = get_page(ajax_url_1, auth_level=1, is_ajax=True)
    assert 'Sina Visitor System' not in content
    assert len(get_ajax_data(content)) > 0
    time.sleep(REQUEST_INTERNAL)
Example #3
0
def crawl_ajax_page(url, auth_level):
    """
    :param url: user home ajax url
    :param auth_level: 1 stands for no login but need fake cookies, 2 stands for login
    :return: resp.text
    """
    ajax_html = get_page(url, auth_level, is_ajax=True)
    ajax_wbdatas = get_ajax_data(ajax_html)
    if not ajax_wbdatas:
        return ''

    WbDataOper.add_all(ajax_wbdatas)
    return ajax_html
Example #4
0
def crawl_ajax_page(url, auth_level):
    """
    :param url: user home ajax url
    :param auth_level: 1 stands for no login but need fake cookies, 2 stands for login
    :return: resp.text
    """
    ajax_html = get_page(url, auth_level, is_ajax=True)
    ajax_wbdatas = get_ajax_data(ajax_html)
    if not ajax_wbdatas:
        return ''

    timeafter = time.mktime(time.strptime(get_time_after(), '%Y-%m-%d %H:%M:%S'))
    for i in range(0,len(ajax_wbdatas)):
        weibo_time = time.mktime(time.strptime(ajax_wbdatas[i].create_time, '%Y-%m-%d %H:%M'))
        if weibo_time < timeafter:
            ajax_wbdatas = ajax_wbdatas[0:i]
            break

    WbDataOper.add_all(ajax_wbdatas)
    return ajax_html
Example #5
0
def crawl_ajax_page(url, auth_level):
    """
    :param url: user home ajax url
    :param auth_level: 1 stands for no login but need fake cookies, 2 stands for login
    :return: resp.text
    """
    ajax_html = get_page(url, auth_level, is_ajax=True)
    ajax_wbdata = get_ajax_data(ajax_html)
    if not ajax_wbdata:
        return ''

    timeafter = time.mktime(
        time.strptime(get_time_after(), '%Y-%m-%d %H:%M:%S'))
    ajax_wbdata = [
        ajax_wbdatum for ajax_wbdatum in ajax_wbdata
        if determine(ajax_wbdatum, timeafter)
    ]

    WbDataOper.add_all(ajax_wbdata)
    return ajax_html
Example #6
0
def crawl_ajax_page(url, auth_level):
    """
    :param url: user home ajax url
    :param auth_level: 1 stands for no login but need fake cookies, 2 stands for login
    :return: resp.text
    """
    ajax_html = get_page(url, auth_level, is_ajax=True)
    ajax_wbdata = get_ajax_data(ajax_html)
    if not ajax_wbdata:
        return ''

    timeafter = time.mktime(
        time.strptime(get_time_after(), '%Y-%m-%d %H:%M:%S'))
    ajax_wbdata = [
        ajax_wbdatum for ajax_wbdatum in ajax_wbdata
        if determine(ajax_wbdatum, timeafter)
    ]

    WbDataOper.add_all(ajax_wbdata)
    return ajax_html