'pdct_img_main_url'] and brm.find_brand( pdt['pdct_name_on_eretailer'])['brand'] in mh_brands: print(pdt['pdct_name_on_eretailer'] + "." + pdt['pdct_img_main_url'].split('.')[-1]) response = requests.get(pdt['pdct_img_main_url'], stream=True, verify=False, headers=headers) # response.raw.decode_content = True tmp_file_path = '/tmp/' + shop_id + 'mhers_tmp_{}.imgtype'.format( abs(hash(pdt['pdct_img_main_url']))) img_path = img_path_namer(shop_id, pdt['pdct_name_on_eretailer']) with open(tmp_file_path, 'wb') as out_file: shutil.copyfileobj(response.raw, out_file) if imghdr.what(tmp_file_path) is not None: img_path = img_path.split('.')[0] + '.' + imghdr.what( '/tmp/' + shop_id + 'mhers_tmp_{}.imgtype'.format( abs(hash(pdt['pdct_img_main_url'])))) shutil.copyfile( '/tmp/' + shop_id + 'mhers_tmp_{}.imgtype'.format( abs(hash(pdt['pdct_img_main_url']))), img_path) products[url].update({ 'img_path': img_path, 'img_hash': file_hash(img_path) }) create_csvs(products, categories, searches, shop_id, fpath_namer(shop_id, 'raw_csv'), COLLECTION_DATE) validate_raw_files(fpath_namer(shop_id, 'raw_csv')) driver.quit()
print(d['pdct_name_on_eretailer'], d['volume']) url_mod = clean_url(url, root_url=root_url) fpath = fpath_namer(shop_id, 'pdct', d['pdct_name_on_eretailer'], 0) if not op.exists(fpath): driver.get(url_mod) sleep(2) driver.save_page(fpath, scroll_to_bottom=True) products = pdct_parsing(fpath, url, products) print(products[url]) ###################################### # # Download images ########### ###################################### # Download images from ers import download_img for url, pdt in products.items(): if 'pdct_img_main_url' in pdt and pdt['pdct_img_main_url'] and brm.find_brand(pdt['pdct_name_on_eretailer'])['brand'] in mh_brands: print(pdt['pdct_name_on_eretailer'] + "." + pdt['pdct_img_main_url'].split('.')[-1]) orig_img_path = img_path_namer(shop_id, pdt['pdct_name_on_eretailer']) img_path = download_img(pdt['pdct_img_main_url'], orig_img_path, shop_id=shop_id, decode_content=False, gzipped=False, debug=False) if img_path: products[url].update({'img_path': img_path, 'img_hash': file_hash(img_path)}) create_csvs(products, categories, searches, shop_id, fpath_namer(shop_id, 'raw_csv'), COLLECTION_DATE) validate_raw_files(fpath_namer(shop_id, 'raw_csv')) check_products_detection(shop_id, fpath_namer(shop_id, 'raw_csv'), shop_inventory_lw_csv) driver.quit()