# Download images for url, pdt in products.items(): if 'pdct_img_main_url' in pdt and pdt[ 'pdct_img_main_url'] and brm.find_brand( pdt['pdct_name_on_eretailer'])['brand'] in mh_brands: print(pdt['pdct_name_on_eretailer'] + "." + pdt['pdct_img_main_url'].split('.')[-1]) response = requests.get(pdt['pdct_img_main_url'], stream=True, verify=False, headers=headers) # response.raw.decode_content = True tmp_file_path = '/tmp/' + shop_id + 'mhers_tmp_{}.imgtype'.format( abs(hash(pdt['pdct_img_main_url']))) img_path = img_path_namer(shop_id, pdt['pdct_name_on_eretailer']) with open(tmp_file_path, 'wb') as out_file: shutil.copyfileobj(response.raw, out_file) if imghdr.what(tmp_file_path) is not None: img_path = img_path.split('.')[0] + '.' + imghdr.what( '/tmp/' + shop_id + 'mhers_tmp_{}.imgtype'.format( abs(hash(pdt['pdct_img_main_url'])))) shutil.copyfile( '/tmp/' + shop_id + 'mhers_tmp_{}.imgtype'.format( abs(hash(pdt['pdct_img_main_url']))), img_path) products[url].update({ 'img_path': img_path, 'img_hash': file_hash(img_path) }) create_csvs(products, categories, searches, shop_id,
)), root_url), 'ctg_denom_txt': ' '.join(' '.join( tree.xpath('//ul[@id="breadcrumb"]//text()')).split()), }) print(products[url]) # Download images for url, pdt in products.items(): if 'pdct_img_main_url' in pdt and pdt[ 'pdct_img_main_url'] and brm.find_brand( pdt['pdct_name_on_eretailer'])['brand'] in mh_brands: print(pdt['pdct_name_on_eretailer'] + "." + pdt['pdct_img_main_url'].split('.')[-1]) print(pdt['pdct_img_main_url']) img_path = img_path_namer( shop_id, pdt['pdct_name_on_eretailer'] + "." + pdt['pdct_img_main_url'].split('.')[-1]) r = requests.get(pdt['pdct_img_main_url']) with open(img_path, 'wb') as out_file: out_file.write(r.content) products[url].update({ 'img_path': img_path, 'img_hash': file_hash(img_path) }) create_csvs(products, categories, searches, shop_id, fpath_namer(shop_id, 'raw_csv'), COLLECTION_DATE) validate_raw_files(fpath_namer(shop_id, 'raw_csv')) driver.quit()