Beispiel #1
0
def get_product_list(keys, sorttype, num=0):
    global proxies, headers, currentPage, pageSize
    # 获取商品列表信息的url
    rooturl = 'https://s.taobao.com/search?ajax=true&callback=%s&tab=all&style=list&q=%s&sort=%s&s=%d' % (
        get_random(), keys, sorttype, num)
    logging.info(rooturl)
    print(rooturl)

    try:
        res = requests.get(rooturl, proxies=proxies, headers=headers).text
        # res = requests.get(rooturl).text

        jsonp_str = res[2:7]
        # 判断是否返回json格式的结果,如果结果正常,继续解析执行,如果异常更换ip重新访问
        if jsonp_str == 'jsonp':
            json_str = res[12:][:-2]
            data = json.loads(json_str)
            totalPage = data['mods']['pager']['data']['totalPage']  # 总页数
            currentPage = data['mods']['pager']['data']['currentPage']  # 当前页
            pageSize = data['mods']['pager']['data']['pageSize']  # 每页显示数量

            # 解析并保存当前页的数据
            products = data['mods']['itemlist']['data']['auctions']
            insert_product_list(products, currentPage, keys, sorttype)
            # 判断是否到最后一页
            print(currentPage, ':', totalPage)
            if currentPage == totalPage:
                return
            else:
                # 跳转到下一页
                get_product_list(keys, sorttype, currentPage * pageSize)
        else:
            # print(jsonp_str)
            # 随机更换ip 继续访问
            # IndentationError
            proxies['http'] = proxy.get_random_proxy()
            headers = headersobj.get_random_header()
            callback('1', currentPage, pageSize, keys, sorttype)
    except Exception as e:
        # raise e
        proxies['http'] = proxy.get_random_proxy()
        headers = headersobj.get_random_header()
        logging.debug(e)
        print(e)
        callback('2', currentPage, pageSize, keys, sorttype)
    except TimeoutError as t:
        # raise t
        proxies['http'] = proxy.get_random_proxy()
        headers = headersobj.get_random_header()
        logging.debug(t)
        callback(currentPage, pageSize, keys, sorttype)
Beispiel #2
0
# 保存数据
conn = mysql.connector.connect(user='******',
                               password='******',
                               database='taobao')
cursor = conn.cursor()
'''
	打开浏览器
'''
browser = webdriver.Chrome()
wait = WebDriverWait(browser, 5)
browser.maximize_window()

# 更新代理
proxies = {"http": ''}
headers = headersobj.get_random_header()


# 1、根据关键字和排序信息查询商品列表
def get_product_list(keys, sorttype, num=0):
    global proxies, headers
    # 获取商品列表信息的url
    rooturl = 'https://s.taobao.com/search?ajax=true&callback=%s&tab=all&style=list&q=%s&sort=%s&s=%d' % (
        get_random(), keys, sorttype, num)
    logging.info(rooturl)
    print(rooturl)

    currentPage = 1
    pageSize = 44
    try:
        browser.get(rooturl)