Example #1
0
def get_brand_from_parser(asin, debug_log):
    # 根据asin获取url
    urls = BaseCrawler.make_url(asin)
    url = urls[0]
    refer = urls[1]
    # 获取ua
    ua = UaPond.get_new_ua()
    cookies = ''
    ip = ''
    ipQ = ''
    # 调用下载器方法获取页面数据
    html_data, cookie, is_error = get_html_useRequest(url,
                                                      ua,
                                                      ip,
                                                      cookies,
                                                      debug_log,
                                                      refer,
                                                      ipQ,
                                                      url_type='goods',
                                                      asin=asin)
    # 调用商品解析的方法获取页面的品牌信息
    brand = GoodsParser(html_data)._get_brand()
    return brand
Example #2
0
        img = get_img(url, ua, ip, cookie, debug_log, referer, ipQ)
    if img:
        print('img: ', len(img), type(img))
        img_str_list= amazon_captcha_crack.main(img)
        img_string = ''.join(img_str_list)
    return img_string

"""

if __name__ == '__main__':
    time1 = time.time()
    # from pprint import pprint
    #
    ip = '192.126.168.2:3128'
    proxy = dict(https='https://%s' % (ip), )
    print(proxy)
    # html = get_html('https://www.amazon.com/dp/B01C4N6IBA', ip, proxy=proxy)
    # print(type(html))
    # print(len(html))
    # parser = HtmlParser()
    # pprint(parser.parser_goods(html, 'B01C4N6IBA'))
    time2 = time.time()
    print(time2 - time1)
    # from conf.setting import DB_CONFIG, REDIS_CONFIG, BASE_DIR
    #
    UA = UaPond.get_new_ua()
    # print(DB_CONFIG)
    # print(REDIS_CONFIG)
    # print(BASE_DIR)
    print(UA)
Example #3
0
 def get_kw_ua(self):
     return UaPond.get_kw_new_ua()