def get_product_info(response): soup = get_soup(response) product_name = soup.find("div", { "class": "prod-intro" }).h1.text.strip() if soup.find("div", {"class": "prod-intro"}) else '' product_title = '' meta = soup.head.find("meta", { "name": "description" }).attrs['content'] if soup.head.find("meta", {"name": "description"}) else '' product_description = soup.find("p", { "class": "prod-desc" }).text.strip() if soup.find("p", {"class": "prod-desc"}) else '' stock_status = soup.find("div", { "class": "prod-intro" }).strong.text.strip() if soup.find("div", {"class": "prod-intro"}) else '' product_images = [] image_divs = soup.find_all("div", {"class": "prod-gallery-item"}) for image_div in image_divs: product_images.append( base_url + image_div.img['src']) if image_div.find('img') else '' variant_link_rows = soup.find_all("tr", {"class": "basic-info"}) variant_links = [] for row in variant_link_rows: variant_links.append(base_url + row.find( "a", {"class": "tealium-skuLinkPgroup"})['href']) if row.find( "a", {"class": "tealium-skuLinkPgroup"}) else '' return variant_links, product_name, product_title, product_description, product_images, stock_status, meta
def get_product_type_2_name_image_description(response): soup = get_soup(response) type_name = soup.find("div", { "class": "container container-with-padding" }).h1.text.strip() if soup.find( "div", {"class": "container container-with-padding"}) else '' type_image = base_url + soup.find("div", { "class": "col-lg-4 category-primary-item row align-content-start" }).img['src'] if soup.find( "div", {"class": "col-lg-4 category-primary-item row align-content-start" }) else '' type_description = soup.find("div", { "class": "col-lg-4 category-primary-item row align-content-start" }).span.text.strip() if soup.find( "div", {"class": "col-lg-4 category-primary-item row align-content-start" }) else '' sub_type_link_divs = soup.find("div", { "class": "col-lg-8 category-items" }).find_all("li", {"class": "col-6 category-item"}) if soup.find( "div", {"class": "col-lg-8 category-items"}) else [] sub_type_links = [] for li in sub_type_link_divs: sub_type_links.append(base_url + li.a['href'] + '?pageSize=All') if li else '' return sub_type_links, type_name, type_image, type_description
def get_product_urls(response): soup = get_soup(response) products = soup.find_all('div', {'class': 'product-details span6'}) product_urls = [] for product in products: product_url = base_url + product.h5.a['href'] product_urls.append(product_url) return product_urls
def get_product_type_3_name_image_description(response): soup = get_soup(response) type_name = soup.find("div", { "class": "container container-with-padding shop-page" }).h1.text.strip() type_image = None type_description = '' product_link_divs = soup.find_all("div", {"class": "product-wrapper"}) product_links = [] for div in product_link_divs: product_link = base_url + div.a['href'] + '?pageSize=All' product_links.append(product_link) return product_links, type_name, type_image, type_description
def get_product_type_1_name_image_description(response): soup = get_soup(response) type_name = soup.find("div", { "class": "container container-with-padding" }).h1.text.strip() if soup.find( "div", {"class": "container container-with-padding"}) else '' type_image = None type_description = '' sub_type_link_divs = soup.find_all( "div", {"class": "row category-wrapper has-margin-top"}) sub_type_links = [] for link_div in sub_type_link_divs: sub_type_links.append(base_url + link_div.find( "h2", { "class": "is-bold category-primary-title d-none d-lg-block" }).a['href']) if link_div.find( "h2", {"class": "is-bold category-primary-title d-none d-lg-block" }) else '' return sub_type_links, type_name, type_image, type_description
def get_variant_info(response): soup = get_soup(response) title = soup.find("div", { "class": "prod-intro" }).h1.text.strip() if soup.find("div", {"class": "prod-intro"}) else '' descripiton = soup.find("p", { "class": "prod-desc" }).text.strip() if soup.find("p", {"class": "prod-desc"}) else '' variant_images = [] image_divs = soup.find_all("div", {"class": "prod-gallery-item"}) for image_div in image_divs: variant_images.append( base_url + image_div.img['src']) if image_div.find('img') else '' # item_code = soup.find("p", {"class": "prod-meta"}).find_all("span")[1].text item_code_spans = soup.find("p", { "class": "prod-meta" }).find_all("span") if soup.find("p", {"class": "prod-meta"}) else [] item_code = item_code_spans[1].text.strip() if item_code_spans else '' # availability = soup.find("p", {"class": "prod-meta"}).find_all("span")[2].text availability_spans = soup.find("p", { "class": "prod-meta" }).find_all("span") if soup.find("p", {"class": "prod-meta"}) else [] availability = availability_spans[2].text.strip( ) if availability_spans else '' #standard_pack = soup.find("div", {"id": "broadleaf-sku-details"}).find_all("span")[1].text standard_pack_spans = soup.find("div", { "id": "broadleaf-sku-details" }).find_all("span") if soup.find("div", {"id": "broadleaf-sku-details"}) else [] standard_pack = standard_pack_spans[1].text.strip( ) if standard_pack_spans else 0 try: standard_pack = int(standard_pack) except Exception as e: standard_pack = 0 pass pricing = {} pricing_table = soup.find("table", {"class": "table sku-price-table"}) pricing_table_items = pricing_table.tbody.find_all( "tr") if pricing_table else [] quantities = [] unit_prices = [] for tr in pricing_table_items: quantities.append( tr.find_all("td")[0].find_all("b")[0].text.strip() + tr.find_all("td")[0].find_all("b")[1].text.strip()) unit_prices.append(tr.find_all("td")[1].text.strip()) pricing['quantity'] = quantities pricing['unit_price'] = unit_prices specifications = {} specifications_div = soup.find("div", {"class": "section has-essentra-row"}) specifications_table = specifications_div.table if specifications_div else None specifications_table_rows = specifications_table.tbody.find_all( "tr") if specifications_table else [] for row in specifications_table_rows: key = row.th.text.replace('\n', '') if 'attr-dim-METRIC' in row.attrs['class']: key += 'metric' if 'attr-dim-IMPERIAL' in row.attrs['class']: key += 'imperial' value = row.td.text.replace('\n', '') specifications[key] = value return title, descripiton, variant_images, item_code, availability, standard_pack, pricing, specifications
def get_product_info(response): soup = get_soup(response) product_name = soup.find('div', { 'id': 'product' }).h1.text.strip() if soup.find('div', {'id': 'product'}) else '' product_title = soup.find('div', { 'id': 'product' }).h1.text.strip() if soup.find('div', {'id': 'product'}) else '' meta = soup.head.find("meta", { "name": "description" }).attrs['content'] if soup.head.find("meta", {"name": "description"}) else '' stock_status = '' #fetch product images product_images = [] img_url = base_url + soup.find('div', { 'id': 'product-page-html' }).img['src'] if soup.find('div', {'id': 'product-page-html'}) else None if img_url: product_images.append(img_url) overview_div = soup.find('div', {'id': 'overview'}) if soup.find( 'div', {'id': 'overview'}) else None overview_paragraphs = overview_div.find_all('p') if overview_div else None #fetch_product_documents product_documents = [] document_divs = soup.find('div', { 'id': 'documents' }).find_all('li') if soup.find('div', {'id': 'documents'}) else None if document_divs: for doc in document_divs: product_documents.append('https:' + doc.a['href'] + '@' + doc.text.strip()) #fetch product description product_description = soup.find('div', { 'id': 'overview' }).text.strip() if soup.find('div', {'id': 'overview'}) else '' #fetch product variants variants = [] variants_table = soup.find('table', {'id': 'product-items'}) if soup.find( 'table', {'id': 'product-items'}) else None trs = variants_table.tbody.find_all( 'tr', {'class': 'product-item'}) if variants_table else [] for tr in trs: variant = {} tds = tr.find_all('td') variant['title'] = tds[1].find('div', { 'class': 'product-item-name' }).text.strip() if tds[1].find( 'div', {'class': 'product-item-name'}) else product_title variant['descripiton'] = '' variant['variant_images'] = [] variant['item_code'] = tr.find('div', { 'itemprop': 'sku' }).text.strip() if tr.find('div', {'itemprop': 'sku'}) else None variant['availability'] = '' variant['standard_pack'] = 0 pricing = {} quantities = [] unit_prices = [] variant_unit_price = tr.find('div', { 'itemprop': 'price' }).text.strip() if tr.find('div', {'itemprop': 'price'}) else None quantities.append( variant_unit_price.split('/')[1] if variant_unit_price else '0') unit_prices.append( variant_unit_price.split('/')[0] if variant_unit_price else '0') pricing['quantity'] = quantities pricing['unit_price'] = unit_prices variant['pricing'] = pricing specifications = {} spec_keys = [] ths = variants_table.thead.find_all('th') for th in ths[2:-2]: spec_keys.append(th.text.strip()) i = 2 for spec_key in spec_keys: key = spec_key value = tds[i].text.strip() i += 1 specifications[key] = value variant['specifications'] = specifications variants.append(variant) #test #https: // www.epsi.com / hksc - series - sheet - and -pipe - suspender - hook return variants, product_name, product_title, product_description, product_images, product_documents, stock_status, meta