def get_manufacturer(self, url, pk): status, response = self.http(url) soup = BeautifulSoup(response, parseOnlyThese=SoupStrainer('div', {'class': 'body clear brands-list'})) if not soup.findAll('a'): soup = BeautifulSoup(response, parseOnlyThese=SoupStrainer('div', {'class': 'panel-grey body clear brands-list'})) for a in soup.findAll('a'): name = a.string.replace('&', '&').replace(''', "'") try: p = Manufacturer.objects.get(name=name) except Manufacturer.DoesNotExist: p = Manufacturer() p.name = name p.slug = slugify(name) p.save()
def get_product_content(self, category_pk, **kwargs): print kwargs['href'] status, response = self.http(kwargs['href']) soup = BeautifulSoup(response, parseOnlyThese=SoupStrainer('div', {'class': 'breadcrumb'})) pr_det = {} for x in soup.findAll('a'): if 'manufacturer-r' in x['href']: pr_det['manufacturer'] = x.string soup = BeautifulSoup(response, parseOnlyThese=SoupStrainer('div', {'id': 'ctl00_ctl00_cph1_cphLeft_ctrlProductDescription_teaserPanel'})) products_img = [] for img in soup.findAll('script'): for img_url in img.text.split(';'): try: products_img.append(img_url.split("'")[1]) except IndexError: pass soup = BeautifulSoup(response, parseOnlyThese=SoupStrainer('div', {'class': 'description'})) if soup.find('div'): description = str(soup.find('div')) for i in re.findall('src="([^"]+)"', description): img_path = self.get_image(self.domain + i) description = description.replace(i, '/media/' + img_path.encode('utf-8')) else: description = '' img_path = None if not kwargs['src'] is None: img_path = self.get_image(kwargs['src']) patt = re.compile(r'[0-9]+') if not Products.objects.filter(slug=slugify(kwargs['title'])).exists(): created = True p = Products() p.name = kwargs['title'] p.category_id = category_pk try: p.manufacturer = Manufacturer.objects.get(name=pr_det['manufacturer']) except (Manufacturer.DoesNotExist, KeyError): try: p.manufacturer = Manufacturer.objects.get(slug=slugify(pr_det['manufacturer'])) except (Manufacturer.DoesNotExist, KeyError): m = Manufacturer(name=pr_det['manufacturer'], slug=slugify(pr_det['manufacturer'])) m.save() p.manufacturer = m p.description = description p.image = img_path p.slug = slugify(kwargs['title']) p.checked = True else: created = False p = Products.objects.get(slug=slugify(kwargs['title'])) if not kwargs['old_price'] is None: if len(kwargs['old_price'].split('-')) == 2: pr = patt.search(kwargs['old_price'].split('-')[0]).group(0) p.from_old_price = str(float(pr.replace(',', '.').replace(' ', ''))) pr = patt.search(kwargs['old_price'].split('-')[1]).group(0) p.to_old_price = str(float(pr.replace(',', '.').replace(' ', ''))) else: pr = patt.search(kwargs['old_price'].split('-')[0]).group(0) p.from_old_price = str(float(pr.replace(',', '.').replace(' ', ''))) p.to_old_price = str(float(pr.replace(',', '.').replace(' ', ''))) if len(kwargs['price'].split('-')) == 2: pr = patt.search(kwargs['price'].split('-')[0]).group(0) p.from_price = str(float(pr.replace(',', '.').replace(' ', ''))) pr = patt.search(kwargs['price'].split('-')[1]).group(0) p.to_price = str(float(pr.replace(',', '.').replace(' ', ''))) else: pr = patt.search(kwargs['price']).group(0) p.from_price = str(float(pr.replace(',', '.').replace(' ', ''))) p.to_price = str(float(pr.replace(',', '.').replace(' ', ''))) p.save() if created: for x in products_img: ProductsImage(products=p, image=self.get_image(x)).save() soup = BeautifulSoup(response, parseOnlyThese=SoupStrainer('div', {'id': 'ctl00_ctl00_cph1_cphLeft_ProductVariantList_pnlMain'})) pvt_name = soup.find('div', {'class': 'tabs'}) soup = soup.findAll('table') if len(soup) > 1: soup = soup[0:-1] self.get_product_variant(soup, p, pvt_name) if created: self.get_product_color(response, p)