예제 #1
0
def get_product_info(response):
    soup = get_soup(response)
    product_name = soup.find("div", {
        "class": "prod-intro"
    }).h1.text.strip() if soup.find("div", {"class": "prod-intro"}) else ''
    product_title = ''
    meta = soup.head.find("meta", {
        "name": "description"
    }).attrs['content'] if soup.head.find("meta",
                                          {"name": "description"}) else ''
    product_description = soup.find("p", {
        "class": "prod-desc"
    }).text.strip() if soup.find("p", {"class": "prod-desc"}) else ''
    stock_status = soup.find("div", {
        "class": "prod-intro"
    }).strong.text.strip() if soup.find("div", {"class": "prod-intro"}) else ''
    product_images = []
    image_divs = soup.find_all("div", {"class": "prod-gallery-item"})
    for image_div in image_divs:
        product_images.append(
            base_url + image_div.img['src']) if image_div.find('img') else ''

    variant_link_rows = soup.find_all("tr", {"class": "basic-info"})
    variant_links = []
    for row in variant_link_rows:
        variant_links.append(base_url + row.find(
            "a", {"class": "tealium-skuLinkPgroup"})['href']) if row.find(
                "a", {"class": "tealium-skuLinkPgroup"}) else ''
    return variant_links, product_name, product_title, product_description, product_images, stock_status, meta
예제 #2
0
def get_product_type_2_name_image_description(response):
    soup = get_soup(response)
    type_name = soup.find("div", {
        "class": "container container-with-padding"
    }).h1.text.strip() if soup.find(
        "div", {"class": "container container-with-padding"}) else ''
    type_image = base_url + soup.find("div", {
        "class":
        "col-lg-4 category-primary-item row align-content-start"
    }).img['src'] if soup.find(
        "div",
        {"class": "col-lg-4 category-primary-item row align-content-start"
         }) else ''
    type_description = soup.find("div", {
        "class":
        "col-lg-4 category-primary-item row align-content-start"
    }).span.text.strip() if soup.find(
        "div",
        {"class": "col-lg-4 category-primary-item row align-content-start"
         }) else ''
    sub_type_link_divs = soup.find("div", {
        "class": "col-lg-8 category-items"
    }).find_all("li", {"class": "col-6 category-item"}) if soup.find(
        "div", {"class": "col-lg-8 category-items"}) else []
    sub_type_links = []
    for li in sub_type_link_divs:
        sub_type_links.append(base_url + li.a['href'] +
                              '?pageSize=All') if li else ''
    return sub_type_links, type_name, type_image, type_description
예제 #3
0
def get_product_urls(response):
    soup = get_soup(response)
    products = soup.find_all('div', {'class': 'product-details span6'})
    product_urls = []
    for product in products:
        product_url = base_url + product.h5.a['href']
        product_urls.append(product_url)

    return product_urls
예제 #4
0
def get_product_type_3_name_image_description(response):
    soup = get_soup(response)
    type_name = soup.find("div", {
        "class": "container container-with-padding shop-page"
    }).h1.text.strip()
    type_image = None
    type_description = ''
    product_link_divs = soup.find_all("div", {"class": "product-wrapper"})
    product_links = []
    for div in product_link_divs:
        product_link = base_url + div.a['href'] + '?pageSize=All'
        product_links.append(product_link)
    return product_links, type_name, type_image, type_description
예제 #5
0
def get_product_type_1_name_image_description(response):
    soup = get_soup(response)
    type_name = soup.find("div", {
        "class": "container container-with-padding"
    }).h1.text.strip() if soup.find(
        "div", {"class": "container container-with-padding"}) else ''
    type_image = None
    type_description = ''
    sub_type_link_divs = soup.find_all(
        "div", {"class": "row category-wrapper has-margin-top"})
    sub_type_links = []
    for link_div in sub_type_link_divs:
        sub_type_links.append(base_url + link_div.find(
            "h2", {
                "class": "is-bold category-primary-title d-none d-lg-block"
            }).a['href']) if link_div.find(
                "h2",
                {"class": "is-bold category-primary-title d-none d-lg-block"
                 }) else ''

    return sub_type_links, type_name, type_image, type_description
예제 #6
0
def get_variant_info(response):
    soup = get_soup(response)
    title = soup.find("div", {
        "class": "prod-intro"
    }).h1.text.strip() if soup.find("div", {"class": "prod-intro"}) else ''
    descripiton = soup.find("p", {
        "class": "prod-desc"
    }).text.strip() if soup.find("p", {"class": "prod-desc"}) else ''
    variant_images = []
    image_divs = soup.find_all("div", {"class": "prod-gallery-item"})
    for image_div in image_divs:
        variant_images.append(
            base_url + image_div.img['src']) if image_div.find('img') else ''

    # item_code = soup.find("p", {"class": "prod-meta"}).find_all("span")[1].text
    item_code_spans = soup.find("p", {
        "class": "prod-meta"
    }).find_all("span") if soup.find("p", {"class": "prod-meta"}) else []
    item_code = item_code_spans[1].text.strip() if item_code_spans else ''

    # availability = soup.find("p", {"class": "prod-meta"}).find_all("span")[2].text
    availability_spans = soup.find("p", {
        "class": "prod-meta"
    }).find_all("span") if soup.find("p", {"class": "prod-meta"}) else []
    availability = availability_spans[2].text.strip(
    ) if availability_spans else ''

    #standard_pack = soup.find("div", {"id": "broadleaf-sku-details"}).find_all("span")[1].text
    standard_pack_spans = soup.find("div", {
        "id": "broadleaf-sku-details"
    }).find_all("span") if soup.find("div",
                                     {"id": "broadleaf-sku-details"}) else []
    standard_pack = standard_pack_spans[1].text.strip(
    ) if standard_pack_spans else 0
    try:
        standard_pack = int(standard_pack)
    except Exception as e:
        standard_pack = 0
        pass

    pricing = {}
    pricing_table = soup.find("table", {"class": "table sku-price-table"})
    pricing_table_items = pricing_table.tbody.find_all(
        "tr") if pricing_table else []

    quantities = []
    unit_prices = []
    for tr in pricing_table_items:
        quantities.append(
            tr.find_all("td")[0].find_all("b")[0].text.strip() +
            tr.find_all("td")[0].find_all("b")[1].text.strip())
        unit_prices.append(tr.find_all("td")[1].text.strip())
    pricing['quantity'] = quantities
    pricing['unit_price'] = unit_prices

    specifications = {}
    specifications_div = soup.find("div",
                                   {"class": "section has-essentra-row"})
    specifications_table = specifications_div.table if specifications_div else None
    specifications_table_rows = specifications_table.tbody.find_all(
        "tr") if specifications_table else []

    for row in specifications_table_rows:
        key = row.th.text.replace('\n', '')
        if 'attr-dim-METRIC' in row.attrs['class']:
            key += 'metric'
        if 'attr-dim-IMPERIAL' in row.attrs['class']:
            key += 'imperial'
        value = row.td.text.replace('\n', '')
        specifications[key] = value

    return title, descripiton, variant_images, item_code, availability, standard_pack, pricing, specifications
예제 #7
0
def get_product_info(response):

    soup = get_soup(response)
    product_name = soup.find('div', {
        'id': 'product'
    }).h1.text.strip() if soup.find('div', {'id': 'product'}) else ''
    product_title = soup.find('div', {
        'id': 'product'
    }).h1.text.strip() if soup.find('div', {'id': 'product'}) else ''
    meta = soup.head.find("meta", {
        "name": "description"
    }).attrs['content'] if soup.head.find("meta",
                                          {"name": "description"}) else ''
    stock_status = ''

    #fetch product images
    product_images = []
    img_url = base_url + soup.find('div', {
        'id': 'product-page-html'
    }).img['src'] if soup.find('div', {'id': 'product-page-html'}) else None
    if img_url:
        product_images.append(img_url)
    overview_div = soup.find('div', {'id': 'overview'}) if soup.find(
        'div', {'id': 'overview'}) else None
    overview_paragraphs = overview_div.find_all('p') if overview_div else None

    #fetch_product_documents
    product_documents = []
    document_divs = soup.find('div', {
        'id': 'documents'
    }).find_all('li') if soup.find('div', {'id': 'documents'}) else None
    if document_divs:
        for doc in document_divs:
            product_documents.append('https:' + doc.a['href'] + '@' +
                                     doc.text.strip())

    #fetch product description
    product_description = soup.find('div', {
        'id': 'overview'
    }).text.strip() if soup.find('div', {'id': 'overview'}) else ''

    #fetch product variants
    variants = []
    variants_table = soup.find('table', {'id': 'product-items'}) if soup.find(
        'table', {'id': 'product-items'}) else None
    trs = variants_table.tbody.find_all(
        'tr', {'class': 'product-item'}) if variants_table else []
    for tr in trs:
        variant = {}

        tds = tr.find_all('td')
        variant['title'] = tds[1].find('div', {
            'class': 'product-item-name'
        }).text.strip() if tds[1].find(
            'div', {'class': 'product-item-name'}) else product_title
        variant['descripiton'] = ''
        variant['variant_images'] = []
        variant['item_code'] = tr.find('div', {
            'itemprop': 'sku'
        }).text.strip() if tr.find('div', {'itemprop': 'sku'}) else None
        variant['availability'] = ''
        variant['standard_pack'] = 0

        pricing = {}
        quantities = []
        unit_prices = []
        variant_unit_price = tr.find('div', {
            'itemprop': 'price'
        }).text.strip() if tr.find('div', {'itemprop': 'price'}) else None
        quantities.append(
            variant_unit_price.split('/')[1] if variant_unit_price else '0')
        unit_prices.append(
            variant_unit_price.split('/')[0] if variant_unit_price else '0')
        pricing['quantity'] = quantities
        pricing['unit_price'] = unit_prices
        variant['pricing'] = pricing

        specifications = {}
        spec_keys = []
        ths = variants_table.thead.find_all('th')
        for th in ths[2:-2]:
            spec_keys.append(th.text.strip())

        i = 2
        for spec_key in spec_keys:
            key = spec_key
            value = tds[i].text.strip()
            i += 1
            specifications[key] = value
        variant['specifications'] = specifications

        variants.append(variant)

    #test
    #https: // www.epsi.com / hksc - series - sheet - and -pipe - suspender - hook

    return variants, product_name, product_title, product_description, product_images, product_documents, stock_status, meta