Exemple #1
0
 def getDetails(self, url):
     url = self.pathDetails + url if self.pathDetails not in url else url
     detail = Client.getUrl(
         (self.urlDomain + url if 'http' not in url else url))
     bsDetail = BeautifulSoup(detail.text, 'lxml')
     response = {
         'title':
         bsDetail.article.find('div', {
             'class': 'product_main'
         }).h1.text,
         'thumbnail':
         self.urlDomain + bsDetail.article.find('div', {
             'class': 'thumbnail'
         }).div.div.img['src'].replace('../', ''),
         'price':
         bsDetail.article.find('p', {
             'class': 'price_color'
         }).text,
         'stock':
         bsDetail.article.find('p', {
             'class': 'availability'
         }).text.strip(),
         'category':
         bsDetail.find('ul', {
             'class': 'breadcrumb'
         }).find_all('li')[2].text.strip(),
         'description':
         bsDetail.article.find('p', {
             'class': ''
         }).text.strip()
         if bsDetail.article.find('p', {'class': ''}) else '',
         'upc':
         bsDetail.article.find('table', {
             'class': 'table table-striped'
         }).td.text,
         'url':
         url
     }
     return response
Exemple #2
0
 def process(self):
     try:
         DB().get_connection()
         site = Client.getUrl(self.url)
         bsObject = BeautifulSoup(site.text, 'lxml')
         if not self.isNext:
             self.category(
                 bsObject.find_all('div', {'class': 'side_categories'}))
         article = self.article(bsObject.find('section'))
         next = article.find('ul', {
             'class': 'pager'
         }).find('li', {'class': 'next'})
         if next is not None:
             urlNext = next.a['href']
             print(urlNext)
             Process(
                 self.urlDomain + (self.pathDetails if self.pathDetails
                                   not in urlNext else '') + urlNext,
                 True).process()
     except HTTPError as eh:
         print(self.i18n.get('error_proceso'), eh)
     except Exception as e:
         print(self.i18n.get('error'), e)