from create_csvs import create_csvs from ers import all_keywords_uk as keywords from ers import fpath_namer, mh_brands, clean_url, headers from matcher import BrandMatcher from ers import COLLECTION_DATE, file_hash, img_path_namer import shutil from custom_browser import CustomDriver # Init variables and assets shop_id = 'sainsbury' root_url = 'https://www.sainsburys.co.uk' requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache')) country = 'UK' searches, categories, products = {}, {}, {} driver = CustomDriver(headless=True, download_images=False) def getprice(pricestr): if pricestr == '': return pricestr pricestr = pricestr.replace(',', '').strip() price = parse('£{pound:d}.{pence:d}', pricestr) if price is None: price = parse('{pence:d}p', pricestr) return price.named['pence'] else: return price.named['pound'] * 100 + price.named['pence'] urls_ctgs_dict = {
from validators import validate_raw_files from create_csvs import create_csvs from ers import all_keywords_jp as keywords, fpath_namer, mh_brands, clean_url, headers from ers import TEST_PAGES_FOLDER_PATH from matcher import BrandMatcher from custom_browser import CustomDriver from parse import parse import re from ers import clean_xpathd_text # Init variables and assets shop_id = "seijoishii" root_url = "https://www.seijoishii.com/" country = "JP" searches, categories, products = {}, {}, {} driver = CustomDriver(headless=False) def getprice(pricestr): if pricestr == '': return pricestr pricestr = re.sub("[^0-9]", "", pricestr) price = parse('{pound:d}', pricestr) if price: return price.named['pound'] * 100 ################### # # CTG page xpathing # ################### exple_ctg_page_path = op.join(TEST_PAGES_FOLDER_PATH, "seijoishii",
from ers import all_keywords_usa as keywords, fpath_namer, mh_brands, clean_url, shop_inventory_lw_csv from matcher import BrandMatcher from ers import COLLECTION_DATE, file_hash, img_path_namer, TEST_PAGES_FOLDER_PATH from custom_browser import CustomDriver from parse import parse from ers import clean_xpathd_text # Init variables and assets shop_id = 'jimmy_brings' root_url = 'https://jimmybrings.com.au/' requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache')) country = 'AUS' searches, categories, products = {}, {}, {} driver = CustomDriver(headless=True) brm = BrandMatcher() def getprice(pricestr): if pricestr == '': return pricestr pricestr = pricestr.replace(',', '').strip() price = parse('${dol:d}.{pence:d}', pricestr) if price is None: price = parse('${dol:d}', pricestr) if price is not None: return price.named['dol'] * 100 else: price = parse('{pence:d}p', pricestr) return price.named['pence']
from validators import validate_raw_files from create_csvs import create_csvs from custom_browser import CustomDriver from ers import all_keywords_uk as keywords, fpath_namer, mh_brands, clean_url, headers from matcher import BrandMatcher from ers import COLLECTION_DATE, file_hash, img_path_namer import shutil from parse import parse # Init variables and assets shop_id = 'harrods' root_url = 'https://www.harrods.com/' requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache')) country = 'UK' searches, categories, products = {}, {}, {} driver = CustomDriver(headless=True, firefox=True, download_images=True) urls_ctgs_dict = { 'champagne': 'https://www.harrods.com/en-gb/food-and-wine/wine-and-spirits/champagne-and-sparkling?view=Product&list=List&viewAll=False&pageNumber={page}', 'sparkling': 'https://www.harrods.com/en-gb/food-and-wine/wine-and-spirits/champagne-and-sparkling?view=Product&list=List&viewAll=False&pageNumber={page}', 'still_wines': 'https://www.harrods.com/en-gb/food-and-wine/wine-and-spirits/white-wine?view=Product&list=List&viewAll=False&pageNumber={page}', 'whisky': 'https://www.harrods.com/en-gb/food-and-wine/wine-and-spirits/spirits?view=Product&list=List&viewAll=False&pageNumber={page}&categoryFilterIds=34605', 'cognac': 'https://www.harrods.com/en-gb/food-and-wine/wine-and-spirits/spirits?view=Product&list=List&viewAll=False&pageNumber={page}&categoryFilterIds=23692', 'vodka': 'https://www.harrods.com/en-gb/food-and-wine/wine-and-spirits/spirits?view=Product&list=List&viewAll=False&pageNumber={page}&categoryFilterIds=23688', 'white_wine':
import requests_cache, imghdr from validators import validate_raw_files from create_csvs import create_csvs from ers import all_keywords_fr as keywords, fpath_namer, mh_brands, clean_url, headers from matcher import BrandMatcher from ers import COLLECTION_DATE, file_hash, img_path_namer import shutil from custom_browser import CustomDriver import re shop_id = 'vicampo' root_url = 'https://www.vicampo.de' requests_cache.install_cache(allowable_methods=('GET')) country = 'DE' searches, categories, products = {}, {}, {} driver = CustomDriver(firefox=True) from parse import parse def getprice(pricestr): pricestr = re.sub("[^0-9,€]", "", pricestr) if pricestr == '': return None print(pricestr) price = parse('€{dol:d},{pence:d}', pricestr) if price is None: price = parse('{pence:d}p', pricestr) return price.named['pence'] else: return price.named['dol'] * 100 + price.named['pence']
from create_csvs import create_csvs from ers import all_keywords_usa as keywords, fpath_namer, mh_brands, clean_url, headers from matcher import BrandMatcher from ers import COLLECTION_DATE, file_hash, img_path_namer import shutil import re from custom_browser import CustomDriver # Init variables and assets shop_id = 'b21' root_url = 'https://www.b-21.com/' requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache')) country = 'USA' searches, categories, products = {}, {}, {} tmp_searches, tmp_categories = {}, {} driver = CustomDriver(headless=False) # WARNING !! PLEASE use Chrome for this one from parse import parse def getprice(pricestr): if not pricestr: return pricestr = re.sub("[^0-9.$]", "", pricestr) price = parse('${pound:d}.{pence:d}', pricestr) if not price: price = parse('${th:d},{pound:d}.{pence:d}', pricestr) return price.named['th'] * 100000 + price.named['pound'] * 100 + price.named['pence'] return price.named['pound'] * 100 + price.named['pence']