Example #1
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        session.headers['User-Agent'] = \
            'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \
            '(KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36'

        soup = BeautifulSoup(session.get(url, timeout=30).text, 'html.parser')

        containers = soup.findAll('div', 'textOtrosPrecios')

        normal_price = Decimal(remove_words(containers[0].text))

        stock_image = containers[1].find('img')['src']

        if stock_image in [
                'images/imagenes/ico_normal.jpg',
                'images/imagenes/ico_bajo.jpg'
        ]:
            stock = -1
        else:
            stock = 0

        sku = containers[2].text.strip()
        name = soup.find('div', 'textTituloProducto').text.strip()
        offer_price = Decimal(
            remove_words(soup.find('div', 'textPrecioContado').text))

        description = html_to_markdown(str(soup.find('div', 'p7TPcontent')))

        main_picture = soup.findAll(
            'table', {'id': 'table20'})[1].findAll('img')[2]['src']

        picture_paths = [main_picture]
        picture_paths.extend(
            [tag['src'] for tag in soup.findAll('img', 'Imagen')])

        picture_urls = []
        for path in picture_paths:
            picture_id = path.split('=')[-1]
            picture_url = 'http://www.ttchile.cl/images/imgproductos/' \
                          'imgImagenMarco.php?imagen=' + picture_id
            picture_urls.append(picture_url.replace(' ', '%20'))

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    normal_price,
                    offer_price,
                    'CLP',
                    sku=sku,
                    description=description,
                    picture_urls=picture_urls)

        return [p]
Example #2
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        soup = BeautifulSoup(session.get(url).text, 'html.parser')

        name = soup.find('div', 'product-info__description').text.strip()
        sku = soup.find(
            'div',
            'product-info__title').find('span').text.split(':')[1].strip()

        stock_container = soup.find('input', {'id': 'producto_cantidad'})
        if stock_container:
            stock = int(stock_container['max'])
        else:
            stock = 0

        price_container = soup.find('span', 'price-box__new')

        old_price_container = price_container.find('s')

        if old_price_container:
            old_price = Decimal(remove_words(old_price_container.text))
            price = (old_price * Decimal('0.9')).quantize(0)
        else:
            price = Decimal(remove_words(price_container.text))

        description = html_to_markdown(str(soup.find('div', 'tab-content')),
                                       'http://www.eglo.cl')

        picture_containers = soup.findAll('a', 'swiper-slide')

        if picture_containers:
            picture_urls = []
            for container in picture_containers:
                picture_url = container.find('img')['src']
                picture_urls.append(picture_url)
        else:
            picture_urls = [
                soup.find('div', 'product-main-'
                          'image__item').img['src']
            ]

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    price,
                    price,
                    'CLP',
                    sku=sku,
                    description=description,
                    picture_urls=picture_urls)

        return [p]
Example #3
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)

        soup = BeautifulSoup(session.get(url).text, 'html.parser')
        name = soup.findAll('td', 'texto-precio-ahorro')[1].text.strip()

        if soup.find('img', {'src': 'images/ficha/ico_sin_stock.gif'}):
            stock = 0
        else:
            stock = int(soup.find('td', 'stock-product').text.split()[0])
        sku = soup.find('td', 'sku').text.split()[-1]

        part_number = soup.findAll('td', 'texto-precio-ahorro')[2]\
            .find('td').text.split(':')[1].strip()

        container = soup.find('td', 'lowPrice')

        offer_price = container.contents[0].split('$')[1]
        offer_price = offer_price.split('IVA')[0]
        offer_price = Decimal(remove_words(offer_price))

        normal_price = container.parent.parent.find(
            'td', 'price-normal').contents[0].split('$')[1].split('IVA')[0]
        normal_price = Decimal(remove_words(normal_price))

        picture_links = soup.findAll('a', {'rel': 'lightbox[roadtrip]'})

        picture_urls = []
        for tag in picture_links:
            if not tag.find('img'):
                continue
            picture_url = tag.find('img')['src'].replace(' ', '%20')
            if picture_url == 'http://www.clie.cl/photos/':
                continue
            picture_urls.append(picture_url)

        if not picture_urls:
            picture_urls = None

        p = Product(
            name,
            cls.__name__,
            category,
            url,
            url,
            part_number,
            stock,
            normal_price,
            offer_price,
            'CLP',
            sku=sku,
            part_number=part_number,
            picture_urls=picture_urls
        )

        return [p]
Example #4
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        session.headers['User-Agent'] = \
            'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \
            '(KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36'
        soup = BeautifulSoup(session.get(url).text, 'html.parser')

        name = soup.find('h2', 'product_title').text.strip()
        sku = soup.find('span', 'sku').text.strip()

        stock_text = soup.find('span', 'stock').text.strip()
        stock = 0
        if stock_text != 'Agotado':
            stock = int(stock_text.split(' ')[0])

        price_container = soup.find('p', 'price')

        if not price_container.text.strip():
            return []

        offer_price = Decimal(
            remove_words(price_container.find('ins').find('span').text))
        normal_price = Decimal(
            remove_words(price_container.find('del').find('span').text))

        picture_containers = soup.findAll('div', 'img-thumbnail')
        picture_urls = []

        for picture in picture_containers:
            try:
                picture_url = picture.find('img')['content']
                picture_urls.append(picture_url)
            except KeyError:
                continue

        description = html_to_markdown(
            str(soup.find('div', {'id': 'tab-description'})))

        p = Product(
            name,
            cls.__name__,
            category,
            url,
            url,
            sku,
            stock,
            normal_price,
            offer_price,
            'CLP',
            sku=sku,
            description=description,
            picture_urls=picture_urls
        )

        return [p]
Example #5
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        request_url = '{}?_={}'.format(url, random.randint(1, 1000))
        print(request_url)
        response = session.get(request_url)

        if response.status_code == 404 or not response.text:
            return []

        page_source = response.text
        soup = BeautifulSoup(page_source, 'html5lib')

        name = soup.find('span', {'itemprop': 'name'}).text.strip()
        part_number = soup.find('div', {'itemprop': 'sku'}).text.strip()
        sku = soup.find('div', 'price-final_price')['data-product-id'].strip()

        if soup.find('button', {'id': 'product-addtocart-button'}):
            stock = -1
        else:
            stock = 0

        price_containers = soup.find('div', 'product-info-price').findAll(
            'span', 'price')
        normal_price = Decimal(remove_words(price_containers[0].string))
        offer_price = Decimal(remove_words(price_containers[-1].string))

        description = ''

        for panel_id in ['product.info.description', 'additional']:
            panel = soup.find('div', {'id': panel_id})
            if panel:
                description += html_to_markdown(str(panel)) + '\n\n'

        picture_urls = [
            tag['data-image'] for tag in soup.findAll('a', 'mt-thumb-switcher')
            if tag.get('data-image')
        ]

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    normal_price,
                    offer_price,
                    'CLP',
                    sku=sku,
                    part_number=part_number,
                    description=description,
                    picture_urls=picture_urls)

        return [p]
Example #6
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        soup = BeautifulSoup(session.get(url).text, 'html.parser')

        name = soup.find('h1', 'rm-product-page__title').text
        sku = soup.find('div', 'rm-product__id').h3.text
        if not soup.find('p', 'rm-product__mpn'):
            part_number = None
        else:
            part_number = soup.find(
                'p', 'rm-product__mpn').text.split(':')[-1].strip()

        stock_container = soup.find('div', 'rm-producto-stock-message')

        if not stock_container:
            stock = 0
        elif stock_container.text == 'Sin disponibilidad para venta web':
            stock = 0
        else:
            stock = int(stock_container.text.split(' ')[0])

        offer_price = soup.find('div', 'rm-product__price--cash').h3.text
        offer_price = Decimal(remove_words(offer_price))

        normal_price = soup.find('div', 'rm-product__price--normal').h3.text
        normal_price = Decimal(remove_words(normal_price))

        description = html_to_markdown(
            str(soup.find('div', {'id': 'tab-description'})))

        picture_urls = None

        thumbnails = soup.find('ul', 'thumbnails')

        if thumbnails:
            picture_urls = [thumbnails.a['href']]

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    normal_price,
                    offer_price,
                    'CLP',
                    sku=sku,
                    description=description,
                    picture_urls=picture_urls,
                    part_number=part_number)

        return [p]
Example #7
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        soup = BeautifulSoup(session.get(url).text, 'html.parser')

        name = soup.find('h1').text.strip()
        sku = soup.find('input', {'name': 'id_product'})['value'].strip()

        part_number_container = soup.find('meta', {'name': 'description'})

        if part_number_container:
            part_number = part_number_container['content'].strip()
            if len(part_number) >= 50:
                part_number = None
        else:
            part_number = None

        availability = soup.find('link', {'itemprop': 'availability'})

        if availability and availability['href'] == \
                'http://schema.org/InStock':
            stock = -1
        else:
            stock = 0

        offer_price = soup.find('span', {'id': 'our_price_display'}).string
        offer_price = Decimal(remove_words(offer_price))

        normal_price = soup.find('p', {
            'id': 'old_price'
        }).find('span', 'price').string
        normal_price = Decimal(remove_words(normal_price))

        description = html_to_markdown(
            str(soup.find('section', 'page-product-box')))

        picture_urls = [tag['href'] for tag in soup.findAll('a', 'fancybox')]

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    normal_price,
                    offer_price,
                    'CLP',
                    sku=sku,
                    part_number=part_number,
                    description=description,
                    picture_urls=picture_urls)

        return [p]
Example #8
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        session.headers['Host'] = 'www.ledshop.cl'
        session.headers['User-Agent'] = 'curl/7.52.1'
        session.headers['Accept'] = '*/*'

        print(url)

        soup = BeautifulSoup(session.get(url, timeout=10).text, 'html.parser')
        name = soup.find('h2').text.strip()
        sku = soup.find('input', {'name': 'product_id'})['value'].strip()

        if soup.find('input', 'wpsc_buy_button'):
            stock = -1
        else:
            stock = 0

        try:
            price = Decimal(
                remove_words(soup.find('div', 'wpsc_product_price').span.text))
        except InvalidOperation:
            price = Decimal(
                remove_words(
                    soup.find('div',
                              'wpsc_product_price').findAll('span')[1].text))

        price = price.quantize(0)

        description = html_to_markdown(
            str(soup.find('div', 'wpsc_description')))

        picture_urls = [
            tag['href'].replace(' ', '%20')
            for tag in soup.findAll('a', 'thickbox')
        ]

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    price,
                    price,
                    'CLP',
                    sku=sku,
                    description=description,
                    picture_urls=picture_urls)

        return [p]
Example #9
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        soup = BeautifulSoup(session.get(url).text, 'html.parser')

        name = soup.find('h1').text.strip()
        price = soup.find('span', 'pricing_retail_nodiscount_price')
        stock = -1

        query_string = urllib.parse.urlparse(url).query
        sku = urllib.parse.parse_qs(query_string)['sku'][0]

        description = html_to_markdown(
            str(soup.find('div', {'id': 'cntTabsCnt'})))
        picture_urls = [
            soup.find('div', {'id': 'maincontentcnt'}).findAll('img')[1]['src']
        ]

        if price:
            price = Decimal(remove_words(price.string.split('$')[1]))
        else:
            configure_link_image = soup.find(
                'img', {'alt': 'Configurar y cotizar'})
            configure_link = configure_link_image.parent['href']
            soup = BeautifulSoup(session.get(configure_link).text,
                                 'html.parser')
            price = soup.find('span', 'pricing_retail_nodiscount_price')

            if not price:
                stock = 0
                price = Decimal(0)
            else:
                price = Decimal(remove_words(price.string.split('$')[1]))

        p = Product(
            name,
            cls.__name__,
            category,
            url,
            url,
            sku,
            stock,
            price,
            price,
            'CLP',
            sku=sku,
            description=description,
            picture_urls=picture_urls
        )

        return [p]
Example #10
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        session.headers.update({
            'User-Agent':
            'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/'
            '537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 '
            'Safari/537.36'
        })

        soup = BeautifulSoup(session.get(url).text, 'html.parser')

        name = soup.find('h1').text.strip()
        sku = soup.find('input', {'name': 'product'})['value'].strip()

        if soup.find('button', {'id': 'product-addtocart-button'}):
            stock = -1
        else:
            stock = 0

        normal_price = Decimal(
            remove_words(
                soup.find('p', 'old-price').find('span', 'price').string))

        offer_price = Decimal(
            remove_words(
                soup.find('p', 'special-price').find('span', 'price').string))

        description = html_to_markdown(
            str(soup.find('div', 'short-description')))

        picture_urls = [
            tag['href'] for tag in soup.findAll('a', 'ig_lightbox2')
        ]

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    normal_price,
                    offer_price,
                    'CLP',
                    sku=sku,
                    description=description,
                    picture_urls=picture_urls)

        return [p]
Example #11
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        response = session.get(url)

        if response.status_code == 500:
            return []

        soup = BeautifulSoup(response.text, 'html5lib')

        name = soup.find('h2', 'title-product').text.strip()
        sku = soup.find('span', 'text-stock').text.strip()

        stocks_container = soup.find('div', 'sucursales-stock')

        if stocks_container and stocks_container.find('i', 'fa-check-circle'):
            stock = -1
        else:
            stock = 0

        price_containers = soup.findAll('p', 'precio')

        offer_price = Decimal(remove_words(price_containers[0].text.strip()))
        normal_price = Decimal(remove_words(price_containers[2].text.strip()))

        if normal_price < offer_price:
            normal_price = offer_price

        description = html_to_markdown(
            str(soup.find('div', {'id': 'description'})))

        picture_urls = [
            tag['src'] for tag in soup.findAll('img', 'primary-img')
        ]

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    normal_price,
                    offer_price,
                    'CLP',
                    sku=sku,
                    description=description,
                    picture_urls=picture_urls)

        return [p]
Example #12
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        session.headers['user-agent'] = 'python-requests/2.21.0'
        response = session.get(url)

        if response.status_code == 404:
            return []

        page_source = response.text
        soup = BeautifulSoup(page_source, 'html.parser')

        if not soup.find('body') or \
                not soup.find('h1', {'id': 'nombre-producto'}):
            return []

        name = soup.find('h1', {'id': 'nombre-producto'}).text.strip()
        sku = soup.find('div', {'itemprop': 'sku'}).text.strip()

        ajax_session = session_with_proxy(extra_args)
        ajax_session.headers['user-agent'] = 'python-requests/2.21.0'
        ajax_session.headers['x-requested-with'] = 'XMLHttpRequest'
        ajax_session.headers['content-type'] = \
            'application/x-www-form-urlencoded'

        stock_data = json.loads(
            ajax_session.post(
                'https://catalogo.movistar.cl/fullprice/stockproducto/validar/',
                'sku=' + sku).text)

        stock = stock_data['respuesta']['cantidad']

        price_container = soup.find('span', 'special-price').find('p')
        price = Decimal(remove_words(price_container.text))

        description = html_to_markdown(
            str(soup.find('div', 'detailed-desktop')))

        if 'seminuevo' in description:
            condition = 'https://schema.org/RefurbishedCondition'
        else:
            condition = 'https://schema.org/NewCondition'

        picture_urls = [soup.find('meta', {'property': 'og:image'})['content']]

        return [
            Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    price,
                    price,
                    'CLP',
                    condition=condition,
                    sku=sku,
                    description=description,
                    picture_urls=picture_urls)
        ]
Example #13
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        soup = BeautifulSoup(session.get(url).text, 'html.parser')

        name = soup.find('h1', 'entry-title').text.strip()
        sku = soup.find('input', {'name': 'product_id'})['value'].strip()
        description = html_to_markdown(
            str(soup.find('div', 'product_description')))
        picture_urls = [tag['href'] for tag in soup.findAll('a', 'thickbox')]
        price = Decimal(remove_words(soup.find('span', 'currentprice').text))

        price *= Decimal('1.19')
        price = price.quantize(0)

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    -1,
                    price,
                    price,
                    'CLP',
                    sku=sku,
                    description=description,
                    picture_urls=picture_urls)

        return [p]
Example #14
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        session.headers['User-Agent'] = 'curl/7.54.0'
        soup = BeautifulSoup(session.get(url).text, 'html.parser')

        name = soup.find('h5', 'ttvproduct-title').text.strip()
        sku = soup.find('input', {'name': 'product_id'})['value']

        stock = int(soup.find('span', 'ttvproduct-stock-status').text.strip())

        price_containers = soup.find('ul', 'product-price-and-shipping')\
            .findAll('h3')
        normal_price = Decimal(remove_words(price_containers[0].text))

        if len(price_containers) > 1:
            offer_price = Decimal(remove_words(price_containers[1].text))
        else:
            offer_price = normal_price

        if normal_price < offer_price:
            offer_price = normal_price

        description = html_to_markdown(
            str(soup.find('div', {'id': 'tab-description'})))

        picture_container = soup.find('img', {'id': 'img_zoom'})
        picture_urls = []

        if picture_container:
            picture_urls.append(picture_container['data-zoom-image'])

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    normal_price,
                    offer_price,
                    'CLP',
                    sku=sku,
                    description=description,
                    picture_urls=picture_urls)

        return [p]
Example #15
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        page_source = session.get(url).text
        soup = BeautifulSoup(page_source, 'html.parser')

        if 'Producto no disponible' in page_source:
            return []

        name = soup.find('h1').text.strip()
        sku = re.search('ecomm_prodid: (\d+)', page_source).groups()[0]

        price_container = soup.find('span', 'text_web')

        if price_container:
            price = remove_words(
                price_container.find('strong').find('p').nextSibling)
        else:
            price_container = soup.find('span', 'oferta')
            if not price_container:
                return []
            price = remove_words(price_container.find('b').text)

        price = Decimal(price)

        description = html_to_markdown(
            str(soup.find('div', {'id': 'box-descripcion'})))
        picture_urls = [
            tag['href'] for tag in soup.find('div', 'owl-carousel').findAll(
                'a', 'fancybox')
        ]

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    -1,
                    price,
                    price,
                    'CLP',
                    sku=sku,
                    description=description,
                    picture_urls=picture_urls)

        return [p]
Example #16
0
    def products_for_url(cls, url, category=None, extra_args=None):
        products = []
        if url == cls.prepago_url:
            # Plan Prepago
            p = Product(
                'GTD Prepago',
                cls.__name__,
                category,
                url,
                url,
                'Claro Prepago',
                -1,
                Decimal(0),
                Decimal(0),
                'CLP',
            )
            products.append(p)
        elif url == cls.equipos_url:
            session = session_with_proxy(extra_args)
            body = session.get(url).text
            json_body = re.search(r'var catalog = (.+)', body).groups()[0][:-1]
            json_body = json.loads(json_body)

            for json_product in json_body['products']:
                if not json_product['published']:
                    continue

                name = json_product['name']
                sku = json_product['id']
                price = Decimal(remove_words(json_product['leasing_price']))
                description = html_to_markdown(json_product['description'])

                picture_urls = [
                    'https://nuevo.gtdmanquehue.com' + im['options']['url']
                    for im in json_product['images']
                ]

                product = Product(name,
                                  cls.__name__,
                                  'Cell',
                                  url,
                                  url,
                                  sku,
                                  -1,
                                  price,
                                  price,
                                  'CLP',
                                  sku=sku,
                                  cell_plan_name='GTD Prepago',
                                  description=description,
                                  picture_urls=picture_urls)

                products.append(product)
        else:
            raise Exception('Invalid URL: ' + url)

        return products
Example #17
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        soup = BeautifulSoup(session.get(url).text, 'html.parser')

        pricing_container = soup.find('div', {'id': 'product'}).parent
        name = pricing_container.find('h1').text.strip()
        sku = soup.find('input', {'name': 'product_id'})['value']
        stock = int(soup.find('b', text='STOCK WEB:').next.next)

        price_containers = pricing_container.find(
            'img', {'align': 'absmiddle'}).parent.findAll('h2')

        normal_price = Decimal(remove_words(price_containers[1].text))
        offer_price = Decimal(remove_words(price_containers[2].text))

        if offer_price > normal_price:
            offer_price = normal_price

        description = html_to_markdown(str(soup.find(
            'div', {'id': 'tab-description'})))

        picture_urls = [tag['href'].replace(' ', '%20')
                        for tag in soup.findAll('a', 'thumbnail')
                        if tag['href']]

        p = Product(
            name,
            cls.__name__,
            category,
            url,
            url,
            sku,
            stock,
            normal_price,
            offer_price,
            'CLP',
            sku=sku,
            description=description,
            picture_urls=picture_urls
        )

        return [p]
Example #18
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        json_data = json.loads(session.get(url).text)
        products = []

        plans_dict = {
            'Plan Libre': 'Plus Libre Cod_OAM_Porta cuotas',
            'Plan XL': 'Plus XL Cod_OAN_Porta cuotas',
            'Plan L': 'Plus L Cod_OAO_Porta cuotas',
            'Plan M': 'Plus M Cod_OAP_Porta cuotas',
        }

        for entry in json_data:
            name = entry['telefono']
            picture_urls = [
                'https://ww2.movistar.cl/movistarone/' +
                entry['imagenUrl'].replace(' ', '%20')
            ]

            for plan_entry in entry['planes']:
                cell_plan_name = plans_dict[plan_entry['tipoPlan']]

                price = Decimal(remove_words(plan_entry['pieEquipo']))
                cell_monthly_payment = Decimal(
                    remove_words(plan_entry['cuotaMensualEquipo']))

                products.append(
                    Product(name,
                            cls.__name__,
                            'Cell',
                            'https://ww2.movistar.cl/movistarone/',
                            url,
                            '{} {}'.format(name, cell_plan_name),
                            -1,
                            price,
                            price,
                            'CLP',
                            picture_urls=picture_urls,
                            cell_plan_name=cell_plan_name,
                            cell_monthly_payment=cell_monthly_payment))

        return products
Example #19
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        response = session.get(url, verify=False)

        if response.status_code == 404:
            return []

        soup = BeautifulSoup(
            session.get(url, verify=False).text, 'html.parser')

        name = soup.find('h1', {'itemprop': 'name'}).text.strip()
        sku = soup.find('input', {'name': 'id_product'})['value'].strip()
        part_number = soup.find('span', {'itemprop': 'sku'}).text.strip()

        unavailable_container = soup.find('span', {
            'id': 'availability_value'
        }).string

        if unavailable_container:
            stock = 0
        else:
            stock = -1

        price_container = soup.find('span', {'id': 'our_price_display'})

        price = price_container.string.split('$')[1]
        price = Decimal(remove_words(price))

        condition = soup.find('link', {'itemprop': 'itemCondition'})['href']

        description = html_to_markdown(
            str(soup.find('div', 'page-product-box')))

        picture_urls = [tag['href'] for tag in soup.findAll('a', 'fancybox')]

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    price,
                    price,
                    'CLP',
                    sku=sku,
                    part_number=part_number,
                    description=description,
                    picture_urls=picture_urls,
                    condition=condition)

        return [p]
Example #20
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        soup = BeautifulSoup(session.get(url).text, 'html.parser')

        name = soup.find("h1", "title").text

        if soup.find("div", "product-price") is None:
            return []

        sku = re.search(r'(\d+)',
                        soup.find("div", "product-price")['id']
                        ).groups()[0]

        stock = -1

        price = soup.find('div', 'product-price')
        price = price.find('div', 'PricesalesPrice').span.text
        price = Decimal(remove_words(price))

        description_a = html_to_markdown(str(soup.find('div', 's_desc').text))
        description_b = html_to_markdown(str(soup.find('div', 'desc')))

        description = description_a + '\n\n' + description_b

        resized_picture_urls = soup.find('ul', 'pagination2').img['src']

        resized_picture_name = resized_picture_urls.split('/')[-1]
        picture_size = re.search(r'(_\d+x\d+)',
                                 resized_picture_name).groups()[0]
        picture_name = resized_picture_name.replace(picture_size, '')

        picture_urls = ['http://www.airecenter.cl/images/stories/'
                        'virtuemart/product/' + picture_name]

        p = Product(
            name,
            cls.__name__,
            category,
            url,
            url,
            sku,
            stock,
            price,
            price,
            'CLP',
            sku=sku,
            description=description,
            picture_urls=picture_urls
        )

        return [p]
Example #21
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        soup = BeautifulSoup(session.get(url).text, 'html.parser')

        name = soup.find('h1', 'product_title').text.strip()
        sku = soup.find('link', {'rel': 'shortlink'})['href'].split('=')[1]

        stock_container = soup.find('p', 'stock')

        if not stock_container:
            return []

        if stock_container.text == 'Hay existencias':
            stock = -1
        else:
            stock = 0

        price_container = soup.find('p', 'price')
        if price_container.find('ins'):
            price_container = price_container.find('ins')

        price = Decimal(remove_words(price_container.text))
        picture_containers = soup.findAll(
            'div', 'woocommerce-product-gallery__image')

        picture_urls = [
            ic.find('img')['src'] for ic in picture_containers
            if ic['data-thumb']
        ]

        description = html_to_markdown(
            str(soup.find('div', 'woocommerce-Tabs-panel--description')))

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    price,
                    price,
                    'CLP',
                    sku=sku,
                    description=description,
                    picture_urls=picture_urls)

        return [p]
Example #22
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        soup = BeautifulSoup(
            session.get(url, verify=False).text, 'html.parser')

        name = soup.find('h1').text.strip()
        pricing_container = soup.find('div', 'wrap-product-shop')

        sku = pricing_container.find('p').text.split(':')[1].strip()

        price = Decimal(
            remove_words(
                pricing_container.find('p', 'special-price').find(
                    'span', 'price').contents[0]))

        price *= Decimal('1.19')
        normal_price = price.quantize(0)
        offer_price = normal_price

        description_ids = [
            'tab-descripcion', 'tab-adicional', 'tab-ficha_tecnica'
        ]

        descriptions = []
        for descrption_id in description_ids:
            tag = soup.find('div', {'id': descrption_id})
            if tag:
                descriptions.append(html_to_markdown(str(tag)))

        description = '\n\n'.join(descriptions)

        picture_urls = [tag['href'] for tag in soup.findAll('a', 'colorbox')]

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    -1,
                    normal_price,
                    offer_price,
                    'CLP',
                    sku=sku,
                    description=description,
                    picture_urls=picture_urls)

        return [p]
Example #23
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        data = session.get(url, verify=False).text
        soup = BeautifulSoup(data, 'html.parser')

        name = soup.find('h1', {'itemprop': 'name'}).text.strip()
        sku = soup.find('p', 'titulo-atributo-ficha').find('span').text.strip()

        pricing_container = soup.find('div', 'product-shop')
        price_container = pricing_container.find('span', 'regular-price')

        pn_match = re.search(r'ccs_cc_args.push\(\[\'pn\', \'(.+)\'\]\);',
                             data)
        part_number = pn_match.groups()[0].strip() if pn_match else None

        if not price_container:
            price_container = pricing_container.find('p', 'special-price')

        price = Decimal(
            remove_words(price_container.find('span', 'price').text))
        description = html_to_markdown(
            str(soup.find('div', 'product-description')))
        picture_urls = [tag['href'] for tag in soup.findAll('a', 'lightbox')]

        if soup.find('button', 'btn-cart') or \
                soup.findAll('p', 'tienda-disponible'):
            stock = -1
        else:
            stock = 0

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    price,
                    price,
                    'CLP',
                    sku=sku,
                    part_number=part_number,
                    description=description,
                    picture_urls=picture_urls)

        return [p]
Example #24
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        page_source = session.get(url).text
        soup = BeautifulSoup(page_source, 'html.parser')

        sku = re.search(r'/p/(\d+)/', url).groups()[0]
        model = soup.find('h1', 'producto-nombre').text.strip()
        part_number = soup.find('div', 'producto-subtitulo').text.strip()
        name = '{} ({})'.format(model, part_number)

        stock_container = soup.find('div', 'producto-stock')

        if stock_container:
            stock = int(
                re.search(r'STOCK: (\d+)', stock_container.text).groups()[0])
        else:
            stock = 0

        price = soup.find('div', 'producto-precio').text.split('(')[0]
        price = Decimal(remove_words(price))

        description = html_to_markdown(
            str(soup.find('table', 'producto-ficha-tabla')))

        picture_urls = []
        for tag in soup.findAll('div', 'producto-galeria-imagenes-item'):
            picture_tag = tag.find('a')
            if picture_tag:
                picture_urls.append(picture_tag['href'])

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    price,
                    price,
                    'CLP',
                    sku=sku,
                    part_number=part_number,
                    description=description,
                    picture_urls=picture_urls)

        return [p]
Example #25
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        response = session.get(url)

        if response.status_code == 404:
            return []

        page_source = response.text
        soup = BeautifulSoup(page_source, 'html.parser')

        name = soup.find('h1', 'product-info__name').text
        sku_text = soup.find('meta', {'property': 'og:image'})['content']
        sku = re.search(r'/ultimate-gamer-store/(\d+)/', sku_text).groups()[0]
        stock = 0
        if soup.find('meta', {'property': 'product:availability'})['content'] \
                == 'instock':
            stock = -1

        price = Decimal(
            remove_words(
                soup.find('span', 'product-info__price-current').text).strip())

        description = html_to_markdown(
            str(soup.find('section', {'id': 'product-description'})))

        picture_urls = [
            i['src']
            for i in soup.findAll('img', 'product-slider__block-image')
        ]

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    price,
                    price,
                    'CLP',
                    sku=sku,
                    picture_urls=picture_urls,
                    description=description)

        return [p]
Example #26
0
    def _plans(cls, url, extra_args):
        session = session_with_proxy(extra_args)
        session.headers['user-agent'] = 'python-requests/2.21.0'
        soup = BeautifulSoup(session.get(url, timeout=30).text, 'html5lib')
        products = []

        plan_containers = soup.findAll('div', 'mb-parrilla_col')

        for plan_container in plan_containers:
            print(plan_container)
            plan_link = plan_container.find('a')
            plan_url = plan_link['href']

            base_plan_name = 'Plan ' + plan_link.find('h3').text.strip()
            base_plan_name = base_plan_name.replace('&nbsp;', '')

            price_text = plan_container.find('div', 'mb-parrilla_price').find(
                'p', 'price').text
            price = Decimal(remove_words(price_text.split()[0]))

            portability_suffixes = ['', ' Portabilidad']
            cuotas_suffixes = [
                ' (sin cuota de arriendo)',
                ' (con cuota de arriendo)'
            ]

            for portability_suffix in portability_suffixes:
                for cuota_suffix in cuotas_suffixes:
                    plan_name = '{}{}{}'.format(
                        base_plan_name, portability_suffix, cuota_suffix)

                    products.append(Product(
                        plan_name,
                        cls.__name__,
                        'CellPlan',
                        plan_url,
                        url,
                        plan_name,
                        -1,
                        price,
                        price,
                        'CLP'
                    ))

        return products
Example #27
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        soup = BeautifulSoup(
            session.get(url, verify=False).text, 'html.parser')

        model = soup.find('h1').text.strip()
        brand = soup.find('span', text='Marca:')

        if brand:
            brand = brand.next.next.next.text.strip()
            name = '{} {}'.format(brand, model)
        else:
            name = model

        sku = soup.find('span', text='SKU:').next.next.strip()

        description = html_to_markdown(
            str(soup.find('div', {'id': 'tab-description'})))

        picture_urls = [
            tag['href'].replace(' ', '%20')
            for tag in soup.findAll('a', 'colorbox')
        ]

        price = Decimal(
            remove_words(soup.find('div', 'price').text.split(':')[1]))

        price = price.quantize(0)

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    -1,
                    price,
                    price,
                    'CLP',
                    sku=sku,
                    description=description,
                    picture_urls=picture_urls)

        return [p]
Example #28
0
    def _plans(cls, url, extra_args):
        session = session_with_proxy(extra_args)
        soup = BeautifulSoup(session.get(url).text, 'html.parser')
        products = []

        rows = soup.findAll('div', 'box-vertical-fijo')

        portability_suffixes = [('', 'data-not-ported'),
                                (' Portabilidad', 'data-ported')]
        cuotas_suffixes = [
            ' (sin cuota de arriendo)', ' (con cuota de arriendo)'
        ]

        for row in rows:
            base_plan_name = ' '.join([
                x.replace('\n', '').strip()
                for x in row.find('li', 'gbs_text').text.split()
            ])
            price_container = row.find('div', 'price')

            for portability_suffix, price_field in portability_suffixes:
                price = Decimal(remove_words(price_container[price_field]))

                for cuota_suffix in cuotas_suffixes:
                    plan_name = '{}{}{}'.format(base_plan_name,
                                                portability_suffix,
                                                cuota_suffix)

                    p = Product(
                        plan_name,
                        cls.__name__,
                        'CellPlan',
                        url,
                        url,
                        plan_name,
                        -1,
                        price,
                        price,
                        'CLP',
                    )

                    products.append(p)

        return products
Example #29
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        soup = BeautifulSoup(session.get(url).content, 'html.parser')

        name = soup.find('h1').text.strip()
        sku = soup.find('div', 'codigo-producto').text.split(':')[1].strip()

        if soup.find('a', 'btnAddBasketHome'):
            stock = -1
        else:
            stock = 0

        price = Decimal(remove_words(soup.find(
            'div', 'precio').find('label').string))

        price *= Decimal('1.19')
        price = price.quantize(0)

        panels = soup.findAll('section', 'page_product_box')

        description = '\n\n'.join(
            [html_to_markdown(str(panel), 'https://www.dled.cl')
             for panel in panels])

        picture_urls = ['https://www.dled.cl' + tag['href']
                        for tag in soup.findAll('a', 'cloud-zoom-gallery')]

        p = Product(
            name,
            cls.__name__,
            category,
            url,
            url,
            sku,
            stock,
            price,
            price,
            'CLP',
            sku=sku,
            description=description,
            picture_urls=picture_urls
        )

        return [p]
Example #30
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        soup = BeautifulSoup(session.get(url).text, 'html.parser')

        name = soup.find('div', 'titulo').text.strip()

        identifier = soup.find('div', 'identif').text.strip()
        sku, part_number = \
            re.search('Código Rhona: (\d+) \| Código Fabricante: (.+)',
                      identifier).groups()

        price = soup.find('span', 'verde')

        if not price:
            stock = 0
            price = Decimal(0)
        else:
            stock = -1
            price = Decimal(remove_words(price.string))

        description = html_to_markdown(str(soup.find('ul', {'id': 'tab1'})))

        picture_urls = [
            tag['href'] for tag in soup.find('div', 'masFotos').findAll('a')
        ]

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    price,
                    price,
                    'CLP',
                    sku=sku,
                    part_number=part_number,
                    description=description,
                    picture_urls=picture_urls)

        return [p]