Exemple #1
0
from create_csvs import create_csvs

from ers import all_keywords_uk as keywords
from ers import fpath_namer, mh_brands, clean_url, headers
from matcher import BrandMatcher
from ers import COLLECTION_DATE, file_hash, img_path_namer
import shutil
from custom_browser import CustomDriver

# Init variables and assets
shop_id = 'sainsbury'
root_url = 'https://www.sainsburys.co.uk'
requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache'))
country = 'UK'
searches, categories, products = {}, {}, {}
driver = CustomDriver(headless=True, download_images=False)


def getprice(pricestr):
    if pricestr == '':
        return pricestr
    pricestr = pricestr.replace(',', '').strip()
    price = parse('£{pound:d}.{pence:d}', pricestr)
    if price is None:
        price = parse('{pence:d}p', pricestr)
        return price.named['pence']
    else:
        return price.named['pound'] * 100 + price.named['pence']


urls_ctgs_dict = {
from validators import validate_raw_files
from create_csvs import create_csvs
from ers import all_keywords_jp as keywords, fpath_namer, mh_brands, clean_url, headers
from ers import TEST_PAGES_FOLDER_PATH
from matcher import BrandMatcher
from custom_browser import CustomDriver
from parse import parse
import re
from ers import clean_xpathd_text

# Init variables and assets
shop_id = "seijoishii"
root_url = "https://www.seijoishii.com/"
country = "JP"
searches, categories, products = {}, {}, {}
driver = CustomDriver(headless=False)


def getprice(pricestr):
    if pricestr == '':
        return pricestr
    pricestr = re.sub("[^0-9]", "", pricestr)
    price = parse('{pound:d}', pricestr)
    if price:
        return price.named['pound'] * 100


###################
# # CTG page xpathing #
###################
exple_ctg_page_path = op.join(TEST_PAGES_FOLDER_PATH, "seijoishii",
Exemple #3
0
from ers import all_keywords_usa as keywords, fpath_namer, mh_brands, clean_url, shop_inventory_lw_csv

from matcher import BrandMatcher
from ers import COLLECTION_DATE, file_hash, img_path_namer, TEST_PAGES_FOLDER_PATH
from custom_browser import CustomDriver
from parse import parse
from ers import clean_xpathd_text

# Init variables and assets
shop_id = 'jimmy_brings'
root_url = 'https://jimmybrings.com.au/'
requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache'))
country = 'AUS'

searches, categories, products = {}, {}, {}
driver = CustomDriver(headless=True)
brm = BrandMatcher()


def getprice(pricestr):
    if pricestr == '':
        return pricestr
    pricestr = pricestr.replace(',', '').strip()
    price = parse('${dol:d}.{pence:d}', pricestr)
    if price is None:
        price = parse('${dol:d}', pricestr)
        if price is not None:
            return price.named['dol'] * 100
        else:
            price = parse('{pence:d}p', pricestr)
            return price.named['pence']
from validators import validate_raw_files
from create_csvs import create_csvs
from custom_browser import CustomDriver
from ers import all_keywords_uk as keywords, fpath_namer, mh_brands, clean_url, headers
from matcher import BrandMatcher
from ers import COLLECTION_DATE, file_hash, img_path_namer
import shutil
from parse import parse

# Init variables and assets
shop_id = 'harrods'
root_url = 'https://www.harrods.com/'
requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache'))
country = 'UK'
searches, categories, products = {}, {}, {}
driver = CustomDriver(headless=True, firefox=True, download_images=True)

urls_ctgs_dict = {
    'champagne':
    'https://www.harrods.com/en-gb/food-and-wine/wine-and-spirits/champagne-and-sparkling?view=Product&list=List&viewAll=False&pageNumber={page}',
    'sparkling':
    'https://www.harrods.com/en-gb/food-and-wine/wine-and-spirits/champagne-and-sparkling?view=Product&list=List&viewAll=False&pageNumber={page}',
    'still_wines':
    'https://www.harrods.com/en-gb/food-and-wine/wine-and-spirits/white-wine?view=Product&list=List&viewAll=False&pageNumber={page}',
    'whisky':
    'https://www.harrods.com/en-gb/food-and-wine/wine-and-spirits/spirits?view=Product&list=List&viewAll=False&pageNumber={page}&categoryFilterIds=34605',
    'cognac':
    'https://www.harrods.com/en-gb/food-and-wine/wine-and-spirits/spirits?view=Product&list=List&viewAll=False&pageNumber={page}&categoryFilterIds=23692',
    'vodka':
    'https://www.harrods.com/en-gb/food-and-wine/wine-and-spirits/spirits?view=Product&list=List&viewAll=False&pageNumber={page}&categoryFilterIds=23688',
    'white_wine':
import requests_cache, imghdr

from validators import validate_raw_files
from create_csvs import create_csvs
from ers import all_keywords_fr as keywords, fpath_namer, mh_brands, clean_url, headers
from matcher import BrandMatcher
from ers import COLLECTION_DATE, file_hash, img_path_namer
import shutil
from custom_browser import CustomDriver
import re
shop_id = 'vicampo'
root_url = 'https://www.vicampo.de'
requests_cache.install_cache(allowable_methods=('GET'))
country = 'DE'
searches, categories, products = {}, {}, {}
driver = CustomDriver(firefox=True)
from parse import parse


def getprice(pricestr):
    pricestr = re.sub("[^0-9,€]", "", pricestr)
    if pricestr == '':
        return None
    print(pricestr)
    price = parse('€{dol:d},{pence:d}', pricestr)
    if price is None:
        price = parse('{pence:d}p', pricestr)
        return price.named['pence']
    else:
        return price.named['dol'] * 100 + price.named['pence']
Exemple #6
0
from create_csvs import create_csvs
from ers import all_keywords_usa as keywords, fpath_namer, mh_brands, clean_url, headers
from matcher import BrandMatcher
from ers import COLLECTION_DATE, file_hash, img_path_namer
import shutil
import re
from custom_browser import CustomDriver

# Init variables and assets
shop_id = 'b21'
root_url = 'https://www.b-21.com/'
requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache'))
country = 'USA'
searches, categories, products = {}, {}, {}
tmp_searches, tmp_categories = {}, {}
driver = CustomDriver(headless=False) # WARNING !! PLEASE use Chrome for this one


from parse import parse

def getprice(pricestr):
    if not pricestr:
        return
    pricestr = re.sub("[^0-9.$]", "", pricestr)
    price = parse('${pound:d}.{pence:d}', pricestr)
    if not price:
        price = parse('${th:d},{pound:d}.{pence:d}', pricestr)
        return price.named['th'] * 100000 + price.named['pound'] * 100 + price.named['pence']
    return price.named['pound'] * 100 + price.named['pence']