コード例 #1
0
ファイル: parser.py プロジェクト: webdish/e-shop
    def get_manufacturer(self, url, pk):
        status, response = self.http(url)
        soup = BeautifulSoup(response,
                             parseOnlyThese=SoupStrainer('div',
                                                         {'class': 'body clear brands-list'}))

        if not soup.findAll('a'):
            soup = BeautifulSoup(response,
                                 parseOnlyThese=SoupStrainer('div',
                                                             {'class': 'panel-grey body clear brands-list'}))

        for a in soup.findAll('a'):
            name = a.string.replace('&', '&').replace(''', "'")
            try:
                p = Manufacturer.objects.get(name=name)
            except Manufacturer.DoesNotExist:
                p = Manufacturer()
                p.name = name
                p.slug = slugify(name)
                p.save()
コード例 #2
0
ファイル: parser.py プロジェクト: webdish/e-shop
    def get_product_content(self, category_pk, **kwargs):
        print kwargs['href']
        status, response = self.http(kwargs['href'])
        soup = BeautifulSoup(response,
                             parseOnlyThese=SoupStrainer('div',
                                                         {'class': 'breadcrumb'}))
        pr_det = {}
        for x in soup.findAll('a'):
            if 'manufacturer-r' in x['href']:
                pr_det['manufacturer'] = x.string

        soup = BeautifulSoup(response,
                             parseOnlyThese=SoupStrainer('div',
                                                         {'id': 'ctl00_ctl00_cph1_cphLeft_ctrlProductDescription_teaserPanel'}))
        products_img = []
        for img in soup.findAll('script'):
            for img_url in img.text.split(';'):
                try:
                    products_img.append(img_url.split("'")[1])
                except IndexError:
                    pass

        soup = BeautifulSoup(response,
                             parseOnlyThese=SoupStrainer('div',
                                                         {'class': 'description'}))
        if soup.find('div'):
            description = str(soup.find('div'))
            for i in re.findall('src="([^"]+)"', description):
                img_path = self.get_image(self.domain + i)
                description = description.replace(i, '/media/' + img_path.encode('utf-8'))
        else:
            description = ''

        img_path = None
        if not kwargs['src'] is None:
            img_path = self.get_image(kwargs['src'])

        patt = re.compile(r'[0-9]+')

        if not Products.objects.filter(slug=slugify(kwargs['title'])).exists():
            created = True
            p = Products()
            p.name = kwargs['title']
            p.category_id = category_pk

            try:
                p.manufacturer = Manufacturer.objects.get(name=pr_det['manufacturer'])
            except (Manufacturer.DoesNotExist, KeyError):
                try:
                    p.manufacturer = Manufacturer.objects.get(slug=slugify(pr_det['manufacturer']))
                except (Manufacturer.DoesNotExist, KeyError):
                    m = Manufacturer(name=pr_det['manufacturer'],
                                     slug=slugify(pr_det['manufacturer']))
                    m.save()
                    p.manufacturer = m

            p.description = description
            p.image = img_path
            p.slug = slugify(kwargs['title'])
            p.checked = True
        else:
            created = False
            p = Products.objects.get(slug=slugify(kwargs['title']))

        if not kwargs['old_price'] is None:
            if len(kwargs['old_price'].split('-')) == 2:
                pr = patt.search(kwargs['old_price'].split('-')[0]).group(0)
                p.from_old_price = str(float(pr.replace(',', '.').replace(' ', '')))
                pr = patt.search(kwargs['old_price'].split('-')[1]).group(0)
                p.to_old_price = str(float(pr.replace(',', '.').replace(' ', '')))
            else:
                pr = patt.search(kwargs['old_price'].split('-')[0]).group(0)
                p.from_old_price = str(float(pr.replace(',', '.').replace(' ', '')))
                p.to_old_price = str(float(pr.replace(',', '.').replace(' ', '')))
        if len(kwargs['price'].split('-')) == 2:
            pr = patt.search(kwargs['price'].split('-')[0]).group(0)
            p.from_price = str(float(pr.replace(',', '.').replace(' ', '')))
            pr = patt.search(kwargs['price'].split('-')[1]).group(0)
            p.to_price = str(float(pr.replace(',', '.').replace(' ', '')))
        else:
            pr = patt.search(kwargs['price']).group(0)
            p.from_price = str(float(pr.replace(',', '.').replace(' ', '')))
            p.to_price = str(float(pr.replace(',', '.').replace(' ', '')))
        p.save()

        if created:
            for x in products_img:
                ProductsImage(products=p, image=self.get_image(x)).save()

        soup = BeautifulSoup(response,
                             parseOnlyThese=SoupStrainer('div',
                                                         {'id': 'ctl00_ctl00_cph1_cphLeft_ProductVariantList_pnlMain'}))
        pvt_name = soup.find('div', {'class': 'tabs'})
        soup = soup.findAll('table')
        if len(soup) > 1:
            soup = soup[0:-1]

        self.get_product_variant(soup, p, pvt_name)
        if created:
            self.get_product_color(response, p)