def get_brand_from_parser(asin, debug_log): # 根据asin获取url urls = BaseCrawler.make_url(asin) url = urls[0] refer = urls[1] # 获取ua ua = UaPond.get_new_ua() cookies = '' ip = '' ipQ = '' # 调用下载器方法获取页面数据 html_data, cookie, is_error = get_html_useRequest(url, ua, ip, cookies, debug_log, refer, ipQ, url_type='goods', asin=asin) # 调用商品解析的方法获取页面的品牌信息 brand = GoodsParser(html_data)._get_brand() return brand
img = get_img(url, ua, ip, cookie, debug_log, referer, ipQ) if img: print('img: ', len(img), type(img)) img_str_list= amazon_captcha_crack.main(img) img_string = ''.join(img_str_list) return img_string """ if __name__ == '__main__': time1 = time.time() # from pprint import pprint # ip = '192.126.168.2:3128' proxy = dict(https='https://%s' % (ip), ) print(proxy) # html = get_html('https://www.amazon.com/dp/B01C4N6IBA', ip, proxy=proxy) # print(type(html)) # print(len(html)) # parser = HtmlParser() # pprint(parser.parser_goods(html, 'B01C4N6IBA')) time2 = time.time() print(time2 - time1) # from conf.setting import DB_CONFIG, REDIS_CONFIG, BASE_DIR # UA = UaPond.get_new_ua() # print(DB_CONFIG) # print(REDIS_CONFIG) # print(BASE_DIR) print(UA)
def get_kw_ua(self): return UaPond.get_kw_new_ua()