コード例 #1
0
from matcher import BrandMatcher
from ers import COLLECTION_DATE, file_hash, img_path_namer
import shutil
from custom_browser import CustomDriver
from parse import parse

# Init variables and assets
shop_id = "wine_com"
root_url = "http://www.wine.com/"
requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache'))
country = "USA"

searches, categories, products = {}, {}, {}
# If necessary
driver = CustomDriver(headless=True)


def getprice(pricestr):
    print(pricestr)
    if pricestr == '':
        return pricestr
    pricestr = re.sub("[^0-9.$]", "", pricestr)
    price = parse('${pound:d}.{pence:d}', pricestr)
    if price is None:
        price = parse('{pence:d}p', pricestr)
        if price is None:
            price = parse('${dol:d}', pricestr)
            return price.named['dol'] * 100
        else:
            return price.named['pence']
コード例 #2
0
from ers import all_keywords_fr as keywords
from ers import clean_xpathd_text
from ers import fpath_namer, mh_brands, clean_url
from matcher import BrandMatcher
from validators import validate_raw_files, check_products_detection

parser = etree.HTMLParser()

# Init variables and assets
shop_id = 'auchan_drive'
root_url = 'https://www.auchandrive.fr'
requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache'))
country = 'FR'

searches, categories, products = {}, {}, {}
driver = CustomDriver(headless=True, download_images=True)
brm = BrandMatcher()


def getprice(pricestr):
    if not pricestr:
        return None
    pricestr = re.sub("[^0-9,€]", "", pricestr)
    price = parse('{euro:d},{cent:d}€', pricestr)
    if price is not None:
        return price.named['euro'] * 100 + price.named['cent']


###################
# # CTG page xpathing #
###################
コード例 #3
0
from validators import validate_raw_files
from create_csvs import create_csvs
from ers import all_keywords_jp as keywords, fpath_namer, mh_brands, clean_url, headers
from ers import TEST_PAGES_FOLDER_PATH
from matcher import BrandMatcher
from custom_browser import CustomDriver
from parse import parse
import re
from ers import clean_xpathd_text

# Init variables and assets
shop_id = "seijoishii"
root_url = "https://www.seijoishii.com/"
country = "JP"
searches, categories, products = {}, {}, {}
driver = CustomDriver(headless=False)


def getprice(pricestr):
    if pricestr == '':
        return pricestr
    pricestr = re.sub("[^0-9]", "", pricestr)
    price = parse('{pound:d}', pricestr)
    if price:
        return price.named['pound'] * 100


###################
# # CTG page xpathing #
###################
exple_ctg_page_path = op.join(TEST_PAGES_FOLDER_PATH, "seijoishii",
コード例 #4
0
import shutil
from custom_browser import CustomDriver
from parse import parse
from validators import validate_raw_files
from create_csvs import create_csvs

# Init variables and assets
shop_id = "mondovino"
root_url = "https://www.mondovino.ch"
requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache'))
country = "CH"

searches, categories, products = {}, {}, {}

# If necessary
driver = CustomDriver(headless=False, download_images=True)


def getprice(pricestr):
    if pricestr == '':
        return pricestr
    pricestr = re.sub("[^0-9.chf]", "", pricestr.lower())
    price = parse('chf{pound:d}.{pence:d}', pricestr)
    if price is None:
        price = parse('{pence:d}p', pricestr)
        return price.named['pence']
    else:
        return price.named['pound'] * 100 + price.named['pence']


urls_ctgs_dict = {
コード例 #5
0
from validators import validate_raw_files
from create_csvs import create_csvs
from custom_browser import CustomDriver
from ers import all_keywords_uk as keywords, fpath_namer, mh_brands, clean_url, headers
from matcher import BrandMatcher
from ers import COLLECTION_DATE, file_hash, img_path_namer
import shutil
from parse import parse

# Init variables and assets
shop_id = 'harrods'
root_url = 'https://www.harrods.com/'
requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache'))
country = 'UK'
searches, categories, products = {}, {}, {}
driver = CustomDriver(headless=True, firefox=True, download_images=True)

urls_ctgs_dict = {
    'champagne':
    'https://www.harrods.com/en-gb/food-and-wine/wine-and-spirits/champagne-and-sparkling?view=Product&list=List&viewAll=False&pageNumber={page}',
    'sparkling':
    'https://www.harrods.com/en-gb/food-and-wine/wine-and-spirits/champagne-and-sparkling?view=Product&list=List&viewAll=False&pageNumber={page}',
    'still_wines':
    'https://www.harrods.com/en-gb/food-and-wine/wine-and-spirits/white-wine?view=Product&list=List&viewAll=False&pageNumber={page}',
    'whisky':
    'https://www.harrods.com/en-gb/food-and-wine/wine-and-spirits/spirits?view=Product&list=List&viewAll=False&pageNumber={page}&categoryFilterIds=34605',
    'cognac':
    'https://www.harrods.com/en-gb/food-and-wine/wine-and-spirits/spirits?view=Product&list=List&viewAll=False&pageNumber={page}&categoryFilterIds=23692',
    'vodka':
    'https://www.harrods.com/en-gb/food-and-wine/wine-and-spirits/spirits?view=Product&list=List&viewAll=False&pageNumber={page}&categoryFilterIds=23688',
    'white_wine':
コード例 #6
0
import shutil
from custom_browser import CustomDriver
from parse import parse
import re


# Init variables and assets
shop_id = "pogos_wine_spirits"
root_url = "https://www.pogoswine.com/" 
requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache'))
country = "USA"


searches, categories, products = {}, {}, {}
# If necessary
driver = CustomDriver(headless=True)


def getprice(pricestr):
    if pricestr == '':
        return pricestr
    if pricestr.count('$') >= 2:
        pricestr = "$" + pricestr.split('$')[1]
    pricestr = re.sub("[^0-9.$]", "", pricestr)
    price = parse('${pound:d}.{pence:d}', pricestr)
    if price is None:
        price = parse('{pence:d}p', pricestr)
        return price.named['pence']
    else:
        return price.named['pound'] * 100 + price.named['pence']
コード例 #7
0
import requests_cache, imghdr

from validators import validate_raw_files
from create_csvs import create_csvs
from ers import all_keywords_fr as keywords, fpath_namer, mh_brands, clean_url, headers
from matcher import BrandMatcher
from ers import COLLECTION_DATE, file_hash, img_path_namer
import shutil
from custom_browser import CustomDriver
import re
shop_id = 'vicampo'
root_url = 'https://www.vicampo.de'
requests_cache.install_cache(allowable_methods=('GET'))
country = 'DE'
searches, categories, products = {}, {}, {}
driver = CustomDriver(firefox=True)
from parse import parse


def getprice(pricestr):
    pricestr = re.sub("[^0-9,€]", "", pricestr)
    if pricestr == '':
        return None
    print(pricestr)
    price = parse('€{dol:d},{pence:d}', pricestr)
    if price is None:
        price = parse('{pence:d}p', pricestr)
        return price.named['pence']
    else:
        return price.named['dol'] * 100 + price.named['pence']
コード例 #8
0
from ers import COLLECTION_DATE, file_hash, img_path_namer
import shutil
from custom_browser import CustomDriver
from parse import parse


# Init variables and assets
shop_id = "selfridges"
root_url = "http://www.selfridges.com/GB/en/"
requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache'))
country = "UK"


searches, categories, products = {}, {}, {}
# If necessary
driver = CustomDriver(headless=False)


def getprice(pricestr):
    if pricestr == '':
        return pricestr
    pricestr = re.sub("[^0-9.£]", "", pricestr)
    price = parse('£{pound:d}.{pence:d}', pricestr)
    if price is None:
        price = parse('{pence:d}p', pricestr)
        return price.named['pence']
    else:
        return price.named['pound'] * 100 + price.named['pence']


urls_ctgs_dict = {
コード例 #9
0
ファイル: bevmo.py プロジェクト: maker-project-1/webscrapping
#
from matcher import BrandMatcher
from ers import COLLECTION_DATE, file_hash, img_path_namer
import shutil
from custom_browser import CustomDriver
from parse import parse

# Init variables and assets
shop_id = "bevmo"
root_url = "http://shop.bevmo.com/"
requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache'))
country = "USA"

searches, categories, products = {}, {}, {}
# If necessary
driver = CustomDriver(headless=False, download_images=True)


def getprice(pricestr):
    pricestr = re.sub("[^0-9.$]", "", pricestr)
    if pricestr == '':
        return pricestr
    price = parse('${pound:d}.{pence:d}', pricestr)
    if price is None:
        price = parse('{pence:d}p', pricestr)
        return price.named['pence']
    else:
        return price.named['pound'] * 100 + price.named['pence']


def init_bevmo(driver):
コード例 #10
0
from create_csvs import create_csvs

from ers import all_keywords_aus as keywords, fpath_namer, mh_brands, clean_url, headers
from matcher import BrandMatcher
from ers import COLLECTION_DATE, file_hash, img_path_namer
import shutil
from parse import parse
from custom_browser import CustomDriver

# Init variables and assets
shop_id = 'goodygoody'
root_url = 'https://www.goodygoody.com/'
requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache'))
country = 'AUS'
searches, categories, products = {}, {}, {}
driver = CustomDriver(headless=True, download_images=True)


def getprice(pricestr):
    if pricestr == '':
        return pricestr
    pricestr = pricestr.replace(',', '').strip()
    price = parse('${dol:d}.{pence:d}', pricestr)
    if price is None:
        price = parse('{pence:d}p', pricestr)
        return price.named['pence']
    else:
        return price.named['dol'] * 100 + price.named['pence']


def init_goodygoody(driver):
コード例 #11
0
from ers import COLLECTION_DATE, file_hash, img_path_namer
import shutil
from custom_browser import CustomDriver
from parse import parse


# Init variables and assets
shop_id = "dan_murphy"
root_url = "http://www.danmurphys.com.au/" 
requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache'))
country = "AUS"


searches, categories, products = {}, {}, {}
# If necessary
driver = CustomDriver(headless=True)


def getprice(pricestr):
    if pricestr == '':
        return pricestr
    pricestr = re.sub("[^0-9.$]", "", pricestr)
    price = parse('${pound:d}.{pence:d}', pricestr)
    if price is None:
        price = parse('{pence:d}p', pricestr)
        return price.named['pence']
    else:
        return price.named['pound'] * 100 + price.named['pence']


コード例 #12
0
from ers import all_keywords_fr as keywords, fpath_namer, mh_brands, clean_url, headers
from matcher import BrandMatcher
from ers import COLLECTION_DATE, file_hash, img_path_namer
import shutil
from custom_browser import CustomDriver
from parse import parse


# Init variables and assets
shop_id = 'vinatis'
root_url = 'https://www.vinatis.com'
requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache'))
country = 'FR'
searches, categories, products = {}, {}, {}
driver = CustomDriver(headless=True, download_images=False)


def getprice(pricestr):
    pricestr = pricestr.replace(' ', '')
    if pricestr == '':
        return pricestr
    price = parse('{dol:d},{pence:d}€', pricestr)
    if price is None:
        price = parse('{pence:d}p', pricestr)
        return price.named['pence']
    else:
        return price.named['dol'] * 100 + price.named['pence']


urls_ctgs_dict = {
コード例 #13
0
ファイル: asda.py プロジェクト: maker-project-1/webscrapping
from create_csvs import create_csvs
from ers import all_keywords_uk as keywords
from ers import fpath_namer, mh_brands, clean_url, headers
from matcher import BrandMatcher
from ers import COLLECTION_DATE, file_hash, img_path_namer
import shutil
from custom_browser import CustomDriver
import re

# Init variables and assets
shop_id = 'asda'
root_url = 'https://groceries.asda.com/'
requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache'))
country = 'UK'
searches, categories, products = {}, {}, {}
driver = CustomDriver(headless=False, download_images=True)


def getprice(pricestr):
    if pricestr == '':
        return pricestr
    pricestr = re.sub("[^0-9.£p]", "", pricestr)
    price = parse('£{pound:d}.{pence:d}', pricestr)
    if price is None:
        price = parse('{pence:d}p', pricestr)
        return price.named['pence']
    else:
        return price.named['pound'] * 100 + price.named['pence']


print(getprice('£40.00'))
コード例 #14
0
from validators import validate_raw_files
from create_csvs import create_csvs
from ers import all_keywords_fr as keywords, fpath_namer, mh_brands, clean_url, headers
from matcher import BrandMatcher
from ers import COLLECTION_DATE, file_hash, img_path_namer
import shutil
from custom_browser import CustomDriver
from parse import parse

# Init variables and assets
shop_id = 'grandsvins_prives'
root_url = 'https://grandsvins-prives.com'
requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache'))
country = 'FR'
searches, categories, products = {}, {}, {}
driver = CustomDriver(headless=True, download_images=False)

urls_ctgs_dict = {
    'champagne':
    'https://grandsvins-prives.com/10-les-bulles#region=0&aoc=CHAMPAGNE&producteur=0&prix=0&couleur=0',
    'sparkling':
    'https://grandsvins-prives.com/10-les-bulles#region=0&aoc=CRÉMANT-DE-BORDEAUX&producteur=0&prix=0&couleur=0',
    'still_wines':
    'https://grandsvins-prives.com/18-nos-blancs/',
    'red_wine':
    'https://grandsvins-prives.com/20-bordeaux-grands-crus',
    'whisky':
    'https://grandsvins-prives.com/recherche?controller=search&orderby=position&orderway=desc&search_query=whisky&submit_search=',
    'cognac':
    'https://grandsvins-prives.com/83-spiritueux#region=0&aoc=COGNAC&producteur=0&prix=0&couleur=0',
    'vodka':
コード例 #15
0
from matcher import BrandMatcher
from ers import COLLECTION_DATE, file_hash, img_path_namer
import shutil
from custom_browser import CustomDriver
from parse import parse

# Init variables and assets
shop_id = "argonaut"
requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache'))
root_url = "https://www.argonautliquor.com/"
country = "USA"

searches, categories, products = {}, {}, {}
# If necessary
driver = CustomDriver(headless=True, download_images=True)


def getprice(pricestr):
    if pricestr == '':
        return pricestr
    price = parse('${pound:d}.{pence:d}', pricestr)

    if price is None:
        price = parse('{pence:d}p', pricestr)
        return price.named['pence']
    else:
        return price.named['pound'] * 100 + price.named['pence']


urls_ctgs_dict = {'vodka': 'https://www.argonautliquor.com/search/result_size/96/categories/Vodka/page/{page}',
コード例 #16
0
from ers import all_keywords_usa as keywords, fpath_namer, mh_brands
from matcher import BrandMatcher
from ers import COLLECTION_DATE, file_hash, img_path_namer
import shutil
from custom_browser import CustomDriver
from parse import parse


# Init variables and assets
shop_id = 'kroger'
root_url = 'https://www.kroger.com/'
requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache'))
country = 'USA'
searches, categories, products = {}, {}, {}
driver = CustomDriver(headless=False, download_images=True, firefox=True)
brm = BrandMatcher()

urls_ctgs_dict = {
            'champagne': 'https://www.kroger.com/pl/champagne/0818000004?page={page}&tab=0',
            'sparkling': 'https://www.kroger.com/pl/sparkling-wine/0818000003?page={page}&tab=0',
            'still_wines': 'https://www.kroger.com/pl/white-wine/08122?page={page}&tab=0',
            'whisky': 'https://www.kroger.com/pl/scotch-whiskey/0812100579?page={page}&tab=0',
            'cognac': 'https://www.kroger.com/pl/brandy-cognac/0812100572?page={page}&tab=0',
            'vodka': 'https://www.kroger.com/pl/vodka/0812100582?page={page}&tab=0',
            'red_wine': 'https://www.kroger.com/pl/red-wine/08120?page={page}&tab=0',
            'white_wine': 'https://www.kroger.com/pl/white-wine/08122?page={page}&tab=0',
            'gin': 'https://www.kroger.com/pl/gin/0812100575?page={page}&tab=0',
            'tequila': 'https://www.kroger.com/pl/tequila/0812100581?page={page}&tab=0',
            'brandy': 'https://www.kroger.com/pl/brandy-cognac/0812100572?page={page}&tab=0',
            'rum': 'https://www.kroger.com/pl/rum/0812100578?page={page}&tab=0',
コード例 #17
0
from ers import all_keywords_aus as keywords, mh_brands, headers
from matcher import BrandMatcher
from ers import COLLECTION_DATE, file_hash, img_path_namer, fpath_namer
import shutil
from helpers.random_user_agent import randomua
import requests
from custom_browser import CustomDriver
from time import sleep


# Init variables and assets
shop_id = 'my_bottle_shop'
root_url = 'https://www.mybottleshop.com.au'
session = requests_cache.CachedSession(fpath_namer(shop_id, 'requests_cache'))
session.headers = {'User-Agent': randomua()}
driver = CustomDriver(headless=False, download_images=True)
with session.cache_disabled():
    session.get('https://www.mybottleshop.com.au/directory/currency/switch/currency/AUD/uenc/')
# print(session.cookies)
country = 'AUS'
searches, categories, products = {}, {}, {}
from parse import parse


def getprice(pricestr):
    if not pricestr:
        return
    price = parse('{pound:d}', pricestr)
    if price:
        return price.named['pound'] * 100
    price = parse('{pound:d}.{pence:d}', pricestr)
コード例 #18
0
from ers import all_keywords_de as keywords, fpath_namer, mh_brands, clean_url, headers
from matcher import BrandMatcher
from ers import COLLECTION_DATE, file_hash, img_path_namer
import shutil
from custom_browser import CustomDriver
import requests

# Init variables and assets
shop_id = 'hawesko'
root_url = 'https://www.hawesko.de'
country = 'DE'
requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache'))

searches, categories, products = {}, {}, {}
driver = CustomDriver(headless=True, download_images=False)

from parse import parse


def getprice(pricestr):
    if not pricestr:
        return
    price = parse('{poundandcent:d}', pricestr)
    return price.named['poundandcent'] * 100


def getpromoprice(pricestr):
    pricestr = pricestr.replace(' ', '')
    if pricestr == '':
        return pricestr
コード例 #19
0
from validators import validate_raw_files
from create_csvs import create_csvs
from ers import all_keywords_jp as keywords, fpath_namer, mh_brands, clean_url
from ers import TEST_PAGES_FOLDER_PATH
from matcher import BrandMatcher
from custom_browser import CustomDriver
from parse import parse
from ers import clean_xpathd_text
import re

# Init variables and assets
shop_id = "aeon_dewine"
root_url = "https://www.aeondewine.com"
country = "JP"
searches, categories, products = {}, {}, {}
driver = CustomDriver(headless=False)


def getprice(pricestr):
    if pricestr == '':
        return pricestr
    pricestr = re.sub("[^0-9]", "", pricestr)
    price = parse('{pound:d}', pricestr)
    if price:
        return price.named['pound'] * 100


###################
# # CTG page xpathing #
###################
exple_ctg_page_path = op.join(TEST_PAGES_FOLDER_PATH, "aeon_dewine",
コード例 #20
0
from ers import all_keywords_uk as keywords
from ers import fpath_namer, mh_brands, clean_url, headers
from matcher import BrandMatcher
from ers import COLLECTION_DATE, file_hash, img_path_namer
import shutil
from custom_browser import CustomDriver


# Init variables and assets
shop_id = 'waitrose'
root_url = 'https://www.waitrose.com'
requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache'))
country = 'UK'
searches, categories, products = {}, {}, {}
driver = CustomDriver(headless=False, download_images=False)


def getprice(pricestr):
    if pricestr == '':
        return None
    pricestr = pricestr.replace('Itemprice', '')
    pricestr = re.sub("[^0-9.£p]", "", pricestr)
    price = parse('£{pound:d}.{pence:d}', pricestr)
    if price is None:
        price = parse('{pence:d}p', pricestr)
        print("?? price", pricestr)
        return price.named['pence']
    else:
        return price.named['pound'] * 100 + price.named['pence']
コード例 #21
0
from create_csvs import create_csvs

from ers import all_keywords_uk as keywords
from ers import fpath_namer, mh_brands, clean_url, headers
from matcher import BrandMatcher
from ers import COLLECTION_DATE, file_hash, img_path_namer
import shutil
from custom_browser import CustomDriver

# Init variables and assets
shop_id = 'waitrose_cellar'
root_url = 'http://www.waitrosecellar.com'
requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache'))
country = 'UK'
searches, categories, products = {}, {}, {}
driver = CustomDriver(headless=True, download_images=False)


def getprice(pricestr):
    if pricestr == '':
        return pricestr
    pricestr = pricestr.replace(',', '').strip()
    price = parse('£{pound:d}.{pence:d}', pricestr)
    if price is None:
        price = parse('{pence:d}p', pricestr)
        return price.named['pence']
    else:
        return price.named['pound'] * 100 + price.named['pence']


urls_ctgs_dict = {
コード例 #22
0
from matcher import BrandMatcher
from ers import COLLECTION_DATE, file_hash, img_path_namer
import shutil
from custom_browser import CustomDriver
from parse import parse

# Init variables and assets
shop_id = "nicks_wine_merchant"
root_url = "https://www.nicks.com.au/"
requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache'))
country = "AUS"

searches, categories, products = {}, {}, {}
# If necessary
driver = CustomDriver(headless=True)


def getprice(pricestr):
    if pricestr == '':
        return pricestr
    pricestr = re.sub("[^0-9.$]", "", pricestr)
    price = parse('${pound:d}.{pence:d}', pricestr)
    if price is None:
        price = parse('{pence:d}p', pricestr)
        return price.named['pence']
    else:
        return price.named['pound'] * 100 + price.named['pence']


urls_ctgs_dict = {
コード例 #23
0
from create_csvs import create_csvs
from ers import all_keywords_usa as keywords, fpath_namer, mh_brands, clean_url, headers
from matcher import BrandMatcher
from ers import COLLECTION_DATE, file_hash, img_path_namer
import shutil
from custom_browser import CustomDriver
from parse import parse


# Init variables and assets
shop_id = 'twin_liquors'
root_url = 'http://www.twinliquors.com'
requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache'))
country = 'USA'
searches, categories, products = {}, {}, {}
driver = CustomDriver(headless=True, download_images=False)

urls_ctgs_dict = {
    'champagne': 'http://twinliquors.com/shop/catalogsearch/result/?q=champagne',
    'sparkling': 'http://twinliquors.com/shop/catalogsearch/result/?q=Sparkling+wine',
    'still_wines': 'http://twinliquors.com/shop/wine.html',
    'whisky': 'http://twinliquors.com/shop/catalogsearch/result/?q=whisky',
    'cognac': 'http://twinliquors.com/shop/catalogsearch/result/?q=cognac',
    'vodka': 'http://twinliquors.com/shop/catalogsearch/result/?q=vodka',
    'red_wine': 'http://twinliquors.com/shop/catalogsearch/result/?q=red+wine',
    'white_wine': 'http://twinliquors.com/shop/catalogsearch/result/?q=red+wine',
    'tequila': 'http://twinliquors.com/shop/catalogsearch/result/?q=tequila',
    'gin': 'http://twinliquors.com/shop/catalogsearch/result/?q=gin',
    'rum': 'http://twinliquors.com/shop/catalogsearch/result/?q=rum',
    'liquor': 'http://twinliquors.com/shop/catalogsearch/result/?q=liquor',
}
コード例 #24
0
from create_csvs import create_csvs

from ers import all_keywords_usa as keywords, fpath_namer, mh_brands, clean_url, headers
from matcher import BrandMatcher
from ers import COLLECTION_DATE, file_hash, img_path_namer
import shutil
from parse import parse
from custom_browser import CustomDriver

# Init variables and assets
shop_id = 'reservebar'
root_url = "https://www.reservebar.com"
requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache'))
country = 'USA'
searches, categories, products = {}, {}, {}
driver = CustomDriver(headless=True, download_images=True)


def getprice(pricestr):
    if pricestr == '':
        return pricestr
    pricestr = pricestr.replace(',', '').strip()
    price = parse('${dol:d}.{pence:d}', pricestr)
    if price is None:
        price = parse('{pence:d}p', pricestr)
        return price.named['pence']
    else:
        return price.named['dol'] * 100 + price.named['pence']


urls_ctgs_dict = {
コード例 #25
0
from matcher import BrandMatcher
from ers import COLLECTION_DATE, file_hash, img_path_namer
import shutil
from custom_browser import CustomDriver
from parse import parse

# Init variables and assets
shop_id = "freshdirect"
root_url = "http://www.freshdirect.com"
requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache'))
country = "USA"

searches, categories, products = {}, {}, {}
# If necessary
driver = CustomDriver(headless=False)


def getprice(pricestr):
    if pricestr == '':
        return pricestr
    if "/ea" in pricestr:
        pricestr = pricestr.split('/ea')[0]
    print(pricestr.count('$'), pricestr)
    if pricestr.count('$') == 2:
        pricestr = pricestr.split(' ')[0]
    print(pricestr)
    pricestr = re.sub("[^0-9.$]", "", pricestr)
    price = parse('${pound:d}.{pence:d}', pricestr)
    if price is None:
        price = parse('${pound:d}', pricestr)
コード例 #26
0
from lxml import etree

parser = etree.HTMLParser()
from custom_browser import CustomDriver
import random
# Init variables and assets
driver = CustomDriver(headless=False, firefox=True, download_images=True)
random.choice([1, 2, 3])

count = 0
driver.get('https://www.leparisien.fr')
while True:
    print('Looping', count)
    elems = driver.driver.find_elements_by_xpath("//a[@href]")
    elems = [el.get_attribute('href') for el in elems]
    elems2 = [el for el in elems if "www.leparisien.fr" in el]
    elems3 = [
        el for el in elems if ("www.leparisien.fr" in el) and ('.php' in el)
    ]
    if elems3:
        url = random.choice(elems3)
        count += 1
        print(count, url)
        driver.get(url)
    elif elems2:
        url = random.choice(elems2)
        count += 1
        print(count, url)
        driver.get(url)
    else:
        driver.get('https://www.leparisien.fr')
コード例 #27
0
from ers import COLLECTION_DATE, file_hash, img_path_namer
import shutil
from custom_browser import CustomDriver
from parse import parse
from validators import validate_raw_files
from create_csvs import create_csvs

# Init variables and assets
shop_id = "bodeboca"
root_url = "https://bodeboca.com"
requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache'))
country = "ES"

searches, categories, products = {}, {}, {}
# If necessary
driver = CustomDriver(headless=False, download_images=True, firefox=True)


def getprice(pricestr):
    if pricestr == '':
        return pricestr
    pricestr = re.sub("[^0-9,€]", "", pricestr)
    pricestr = pricestr.split('€')[0] + '€'
    price = parse('{pound:d},{pence:d}€', pricestr)
    if price is None:
        price = parse('{pence:d}p', pricestr)
        return price.named['pence']
    else:
        return price.named['pound'] * 100 + price.named['pence']

コード例 #28
0
from ers import COLLECTION_DATE, file_hash, img_path_namer
import shutil
from custom_browser import CustomDriver
from parse import parse


# Init variables and assets
shop_id = "wallys"
root_url = "http://www.wallywine.com/" 
requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache'))
country = "USA"


searches, categories, products = {}, {}, {}
# If necessary
driver = CustomDriver(headless=True)


def getprice(pricestr):
    if pricestr == '':
        return pricestr
    pricestr = re.sub("[^0-9.$]", "", pricestr)
    price = parse('${pound:d}.{pence:d}', pricestr)
    if price is None:
        price = parse('{pence:d}p', pricestr)
        return price.named['pence']
    else:
        return price.named['pound'] * 100 + price.named['pence']

urls_ctgs_dict = {
    "vodka": "https://www.wallywine.com/spirits-brews/spirits/type/vodka.html", 
コード例 #29
0
from create_csvs import create_csvs

from ers import all_keywords_uk as keywords
from ers import fpath_namer, mh_brands, clean_url, headers
from matcher import BrandMatcher
from ers import COLLECTION_DATE, file_hash, img_path_namer
import shutil
from custom_browser import CustomDriver

# Init variables and assets
shop_id = 'sainsbury'
root_url = 'https://www.sainsburys.co.uk'
requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache'))
country = 'UK'
searches, categories, products = {}, {}, {}
driver = CustomDriver(headless=True, download_images=False)


def getprice(pricestr):
    if pricestr == '':
        return pricestr
    pricestr = pricestr.replace(',', '').strip()
    price = parse('£{pound:d}.{pence:d}', pricestr)
    if price is None:
        price = parse('{pence:d}p', pricestr)
        return price.named['pence']
    else:
        return price.named['pound'] * 100 + price.named['pence']


urls_ctgs_dict = {
コード例 #30
0
from ers import COLLECTION_DATE, file_hash, img_path_namer
import shutil
from custom_browser import CustomDriver
from parse import parse
from validators import validate_raw_files
from create_csvs import create_csvs

# Init variables and assets
shop_id = "lavinia_es"
root_url = "http://www.lavinia.es"
requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache'))
country = "ES"

searches, categories, products = {}, {}, {}
# If necessary
driver = CustomDriver(headless=False)


def getprice(pricestr):
    if pricestr == '':
        return pricestr
    pricestr = re.sub("[^0-9,€]", "", pricestr)
    price = parse('{pound:d},{pence:d}€', pricestr)
    if price is None:
        price = parse('{pence:d}p', pricestr)
        return price.named['pence']
    else:
        return price.named['pound'] * 100 + price.named['pence']


urls_ctgs_dict = {