Python AliceItem 예제들, alice.items.AliceItem Python 예제들

예제 #1

0

파일 보기

파일: linio_spider.py 프로젝트: chwlibre/alice-odum

    def parse_product(self, response):

        item = AliceItem()
        item['url'] = response.url
        item['title'] = response.xpath('//*[@id="title-product-detail"]/h1/text()').extract()[0]
        item['picture'] = response.xpath('/html/head/meta[14]/@content').extract()[0]
        item['price'] = int(response.xpath('//*[@id="product-special-price"]/span[@property="gr:hasCurrencyValue"]/text()').re('\d.+\d')[0].replace(".",""))
        #generaly use as brand
        item['brand'] = ""
        item['store'] = "linio"
        item['id_store'] = 16

        tags = response.xpath('//*[@id="category-navigation-breadcrumbs"]/li/a/text()').extract()

        try:
            item['tag1'] = tags[1]
        except:
            item['tag1'] = ""
        try:
            item['tag2'] = tags[2]
        except:
            item['tag2'] = ""
        try:
            item['tag3'] = tags[3]
        except:
            item['tag3'] = ""
        try:
            item['tag4'] = tags[4]
        except:
            item['tag4'] = ""
        try:
            item['tag5'] = tags[5]
        except:
            item['tag5'] = ""
        yield item

예제 #2

0

파일 보기

파일: sodimac_spider.py 프로젝트: chwlibre/alice-odum

    def parse_products(self, response):
        try:
            item = AliceItem()
            item['url'] = response.url
            item['title'] = response.xpath(
                '//meta[@property="og:title"]/@content').extract()[0].replace(
                    ' - Sodimac.com', '')
            item['picture'] = response.xpath(
                '//meta[@property="og:image"]/@content').extract()[0]
            try:
                item['price'] = int(
                    response.xpath('//div[@class="precio1-1"]/div/text()').re(
                        '\d\S*')[0].replace('.', ''))
            except:
                item['price'] = int(
                    response.xpath(
                        '//div[@id="skuPrice"]//div[@class="precio1-1"]/text()'
                    ).re('\d\S*')[0].replace('.', ''))

            try:
                item['brand'] = response.xpath(
                    '//div[@class="marca"]/text()').re('\S.+\S')[0]
            except:
                try:
                    item['brand'] = response.xpath(
                        '//div[@class="marca"]/text()').re('\S.+\S')
                except:
                    item['brand'] = ''

            item['store'] = "sodimac"
            item['id_store'] = 11

            tags = response.xpath('//div[@id="ruta"]//span/text()').re(
                '\S.+\S')
            tags.pop(0)

            try:
                item['tag1'] = tags[0]
            except:
                item['tag1'] = ""
            try:
                item['tag2'] = tags[1]
            except:
                item['tag2'] = ""
            try:
                item['tag3'] = tags[2]
            except:
                item['tag3'] = ""
            try:
                item['tag4'] = tags[3]
            except:
                item['tag4'] = ""
            try:
                item['tag5'] = tags[4]
            except:
                item['tag5'] = ""
            yield item
        except IOError:
            print 'cannot open', arg

예제 #3

0

파일 보기

파일: buscato_spider.py 프로젝트: chwlibre/alice-odum

    def parse_item(self, response):
        try:
            item = AliceItem()
            item['url'] = response.url
            title = response.xpath(
                '//tr/td/strong/span[@class="main_titulo_ficha_bold"]/text()'
            ).re('\S.+\S')
            try:
                title.pop(2)
            except:
                title.pop(1)
            item['title'] = " ".join(title).title().encode('ascii', 'ignore')
            item['picture'] = self.dom + response.xpath(
                '//div[@id="loadarea"]/a[@href]/img/@src').extract()[0]
            try:
                item['price'] = int(
                    response.xpath(
                        '//span[@class="main_precio_efectivo"]/strong/text()').
                    re('\d\S*')[0].replace('.', ''))
            except:
                item['price'] = int(
                    response.xpath('//div[@class="precio_lg"]/text()')[0].re(
                        '\d\S*')[0].replace('.', ''))
            #generaly use as brand
            item['brand'] = title[0]
            item['store'] = "pcfactory"
            item['id_store'] = 6

            tags = response.xpath(
                '//a[@class="main_ruta_link"]/text()').extract()

            try:
                item['tag1'] = tags[1]
            except:
                item['tag1'] = ""
            try:
                item['tag2'] = tags[2]
            except:
                item['tag2'] = ""
            try:
                item['tag3'] = tags[3]
            except:
                item['tag3'] = ""
            try:
                item['tag4'] = tags[4]
            except:
                item['tag4'] = ""
            try:
                item['tag5'] = tags[5]
            except:
                item['tag5'] = ""
            yield item
        except IOError:
            print 'cannot open', arg

예제 #4

0

파일 보기

    def parse_item(self, response):
        try:
            item = AliceItem()
            item['url'] = response.url

            try:
                item['title'] = str(response.xpath('//*[@id="breadcrum"]/text()')[-1].re('\S.+\S')[0].replace('| ',''))
            except:
                try:
                    item['title'] = str(response.xpath('//*[@id="cont_ficha_right"]/h1/text()')[0])
                except:
                    item['title'] = str(response.xpath('//*[@id="cont_ficha_right"]/h1/text()'))

            item['picture'] = "http://www.corona.cl" + response.xpath('//img[@id="img_zoom"]/@src')[0].extract()
            try:
                item['price'] = int(response.xpath('//div[@id="cont_ficha_precio"]/span[@class="precio_internet"]/text()').re('\d\S*')[0].replace(',',''))
            except:
                item['price'] = int(response.xpath('//div[@id="cont_ficha_precio"]/span/text()').re('\d\S*')[0].replace(',',''))

            item['brand'] = ""
            item['store'] = "corona"
            item['id_store'] = 13

            tags = response.xpath('//*[@id="breadcrum"]/a/text()').re('\S.+\S')
            tags.pop(0)

            try:
                item['tag1'] = tags[0]
            except:
                item['tag1'] = ""
            try:
                item['tag2'] = tags[1]
            except:
                item['tag2'] = ""
            try:
                item['tag3'] = tags[2]
            except:
                item['tag3'] = ""
            try:
                item['tag4'] = tags[3]
            except:
                item['tag4'] = ""
            try:
                item['tag5'] = tags[4]
            except:
                item['tag5'] = ""


            yield item
        except IOError:
            print 'cannot open', arg

예제 #5

0

파일 보기

    def parse_products(self, response):
        try:
            item = AliceItem()
            item['url'] = response.url
            item['picture'] = response.xpath(
                '//*[@id="contenedor1PP"]/meta/@content').extract()[0]
            try:
                item['price'] = int(
                    response.xpath(
                        '//*[@id="skuPrice"]/div[@class="precio1"]/text()[2]').
                    extract()[0].replace(".", ""))
            except:
                item['price'] = int(
                    response.xpath(
                        '//*[@id="skuPrice"]/div[@class="precio1"]/span[2]/text()'
                    ).extract()[0].replace(".", ""))
            item['brand'] = response.xpath(
                '//*[@id="productBrand"]/text()').extract()[0].title()
            item['title'] = response.xpath(
                '//*[@id="skuPrice"]/meta[@name="twitter:title"]/@content'
            ).extract()[0].title().encode('ascii', 'ignore')
            item['store'] = "falabella"
            item['id_store'] = 1

            tags = response.xpath('//div[@id="ruta"]//text()').re('\S.+\S')

            try:
                item['tag1'] = tags[1]
            except:
                item['tag1'] = ""
            try:
                item['tag2'] = tags[2]
            except:
                item['tag2'] = ""
            try:
                item['tag3'] = tags[3]
            except:
                item['tag3'] = ""
            try:
                item['tag4'] = tags[4]
            except:
                item['tag4'] = ""
            try:
                item['tag5'] = tags[5]
            except:
                item['tag5'] = ""

            yield item
        except IOError:
            print "error", index, arg

예제 #6

0

파일 보기

파일: buscato_spider.py 프로젝트: chwlibre/alice-odum

    def parse_item(self, response):
        try:
            item = AliceItem()
            item['url'] = response.url

            item['title'] = (response.xpath(
                '//*[@id="main"]/div[@class="container-data-product"]/h3[@class="title-product"]/text()'
            ).re('\S.+\S')[0].title())
            item['picture'] = response.xpath(
                '//img[@id]/@src')[0].extract().replace(
                    '..', 'https://www.casaximena.cl').replace(
                        'public/img/productos',
                        'https://www.casaximena.cl/new/public/img/productos')
            item['price'] = int(
                response.xpath('//*[@id="main"]/div[2]/h3[2]/span/text()').re(
                    '\d\S*')[0].replace('.', ''))
            item['brand'] = ""
            item['store'] = "casaximena"
            item['id_store'] = 14

            tags = response.xpath('//*[@id="main"]/ul/li/a/text()').re(
                '\S.+\S')
            tags.pop(0)

            try:
                item['tag1'] = tags[0]
            except:
                item['tag1'] = ""
            try:
                item['tag2'] = tags[1]
            except:
                item['tag2'] = ""
            try:
                item['tag3'] = tags[2]
            except:
                item['tag3'] = ""
            try:
                item['tag4'] = tags[3]
            except:
                item['tag4'] = ""
            try:
                item['tag5'] = tags[4]
            except:
                item['tag5'] = ""

            yield item
        except IOError:
            print 'cannot open', arg

예제 #7

0

파일 보기

    def parse_item(self, response):
        item = AliceItem()
        item['url'] = response.url
        try:
            item['title'] = response.xpath(
                '//span[@itemprop="name"]/text()').re('(\S.+\S)')[0].title()
        except:
            item['title'] = response.xpath(
                '//span[@itemprop="name"]/text()').re('(\S.+\S)').title()
        item['picture'] = "http://www.ripley.cl/ripley-chile" + str(
            response.xpath('//img[@id="imagen-mini"]/@src').extract()[0])
        item['price'] = int(
            response.xpath('//p[@class="ofomp"]/text()').re('\d\S*')
            [0].replace(".", ""))
        #generaly use as brand
        item['brand'] = ""
        item['store'] = "ripley"
        item['id_store'] = 2
        try:
            item['tag1'] = response.xpath(
                '//*[@id="breadcrumb"]//text()')[5].extract().title()
        except:
            pass
        try:
            item['tag2'] = response.xpath(
                '//*[@id="breadcrumb"]//text()')[8].extract().title()
        except:
            pass
        try:
            item['tag3'] = response.xpath(
                '//*[@id="breadcrumb"]//text()')[11].extract().title()
        except:
            pass
        try:
            item['tag4'] = response.xpath('//meta[@name="keyword"]/@content'
                                          ).extract()[0].split(',')[0].title()
        except:
            pass
        try:
            item['tag5'] = response.xpath('//meta[@name="keyword"]/@content'
                                          ).extract()[0].split(',')[1].title()
        except:
            item['tag5'] = ""

        yield item

예제 #8

0

파일 보기

    def parse_item(self, response):
        item = AliceItem()
        item['url'] = response.url
        item['title'] = response.xpath(
            '//meta[@property="og:description"]/@content').extract()[0].title(
            ).encode('ascii', 'ignore')
        item['picture'] = response.xpath(
            '//meta[@property="og:image"]/@content').extract()[0]
        item['price'] = int(
            response.xpath('//div[@class="price offerPrice bold"]/text()').re(
                '\d\S*')[0].replace(".", ""))
        #generaly use as brand
        item['brand'] = ""
        item['store'] = "Paris"
        item['id_store'] = 3

        tags = response.xpath(
            '//*[@id="WC_BreadCrumbTrailDisplay_div_1"]//text()').re('\w.+')
        tags.pop()

        try:
            if (tags[0] == ""):
                item['tag1'] = "Sin categoría"
            else:
                item['tag1'] = tags[0].title()
        except:
            item['tag1'] = "Sin categoría"
        try:
            item['tag2'] = tags[1].title()
        except:
            item['tag2'] = ""
        try:
            item['tag3'] = tags[2].title()
        except:
            item['tag3'] = ""
        try:
            item['tag4'] = tags[3].title()
        except:
            item['tag4'] = ""
        try:
            item['tag5'] = tags[4].title()
        except:
            item['tag5'] = ""
        yield item

예제 #9

0

파일 보기

    def parse_product(self, response):

        item = AliceItem()
        item['url'] = response.url
        item['title'] = response.xpath(
            '/html/head/meta[@property="og:title"]/@content').extract()[0]
        item['picture'] = response.xpath(
            '/html/head/meta[@property="og:image"]/@content').extract()[0]
        item['price'] = int(
            response.xpath('//meta[@itemprop="price"]/@content').re('\d+')[0])
        #generaly use as brand
        item['brand'] = ""
        item['store'] = "dafiti"
        item['id_store'] = 17

        tags = response.xpath('//li[@class="prs"]/a/@title').extract()

        try:
            item['tag1'] = tags[1]
        except:
            item['tag1'] = ""
        try:
            item['tag2'] = tags[2]
        except:
            item['tag2'] = ""
        try:
            item['tag3'] = tags[3]
        except:
            item['tag3'] = ""
        try:
            item['tag4'] = tags[4]
        except:
            item['tag4'] = ""
        try:
            item['tag5'] = tags[5]
        except:
            item['tag5'] = ""
        yield item

예제 #10

0

파일 보기

    def parse_product(self, response):

        item = AliceItem()
        item['url'] = response.url
        item['title'] = response.xpath(
            '//*[@id="producto"]/h1/a/text()').extract()[0]
        item['picture'] = "http://www.zmart.cl" + response.xpath(
            '//*[@id="imagen_producto"]/img/@src').extract()[0]
        item['price'] = int(
            response.xpath('//div[@id="PriceProduct"]//text()').re('\d.*\d')
            [0].replace('.', ''))
        #generaly use as brand
        item['brand'] = ""
        item['store'] = "zmart"
        item['id_store'] = 15

        tags = response.xpath('/html/head/meta[22]/@content').extract()[0]
        item['tag1'] = "Videojuegos y Consolas"
        item['tag2'] = ""
        item['tag3'] = ""
        item['tag4'] = ""
        item['tag5'] = ""
        yield item

예제 #11

0

파일 보기

    def parse_product(self, response):
        try:
            item = AliceItem()
            item['url'] = response.url

            try:
                item['title'] = response.xpath(
                    '//*[@class="titulo1 descrip_jq"]/text()').extract(
                    )[0].encode('ascii', 'ignore')
            except:
                item['title'] = response.xpath(
                    '//*[@class="titulo1 descrip_jq"]/text()').extract(
                    ).encode('ascii', 'ignore')

            try:
                item['picture'] = response.xpath(
                    '/html/head/meta[3]/@content').extract()[0]
            except:
                item['picture'] = response.xpath(
                    '/html/head/meta[3]/@content').extract()

            try:
                item['price'] = int(
                    response.xpath('//*[@class="precio precio_jq"]/text()').re(
                        '\d\S*')[0].replace('.', ''))
            except:
                item['price'] = int(
                    response.xpath('//*[@class="precio precio_jq"]/text()').re(
                        '\d\S*').replace('.', ''))

            item['brand'] = ""
            item['store'] = "lapolar"
            item['id_store'] = 4

            tags = response.xpath(
                '//tr[not(@id)]/td[@valign="top"]/div[@width]/a/text()'
            ).extract()

            try:
                item['tag1'] = tags[0]
            except:
                item['tag1'] = ""
            try:
                item['tag2'] = tags[1]
            except:
                item['tag2'] = ""
            try:
                item['tag3'] = tags[2]
            except:
                item['tag3'] = ""
            try:
                item['tag4'] = tags[3]
            except:
                item['tag4'] = ""
            try:
                item['tag5'] = tags[4]
            except:
                item['tag5'] = ""
            yield item
        except IOError:
            print 'cannot open', arg

예제 #12

0

파일 보기

    def parse_product(self, response):
        title = response.xpath('//title/text()').extract()[0]
        if not "Problemas" in title:

            try:
                item = AliceItem()
                item['url'] = response.url

                try:
                    item['title'] = response.xpath(
                        '/html/head/meta[@property="og:title"]/@content'
                    ).extract()[0]
                except:
                    item['title'] = response.xpath(
                        '/html/head/meta[@property="og:title"]/@content'
                    ).extract()

                try:
                    item['picture'] = response.xpath(
                        '/html/head/meta[@property="og:image"]/@content'
                    ).extract()[0]
                except:
                    item['picture'] = response.xpath(
                        '/html/head/meta[@property="og:title"]/@content'
                    ).extract()

                try:
                    item['price'] = int(
                        response.xpath(
                            '//*[starts-with(@class,"txt_info_precio_")]/text()'
                        ).re('\d\S*')[0].replace('.', ''))
                except:
                    try:
                        item['price'] = int(
                            response.xpath(
                                '//*[starts-with(@class,"txt_info_precio_")]/text()'
                            ).re('\d\S*').replace('.', ''))
                    except:
                        try:
                            item['price'] = int(
                                response.xpath(
                                    '//*[starts-with(@class,"txt_info_precio_")]/text()'
                                )[1].re('\d\S*')[0])
                        except:
                            item['price'] = int(
                                response.xpath(
                                    '//*[starts-with(@class,"txt_info_precio_")]/text()'
                                )[1].re('\d\S*')[0].replace('.', ''))

                item['brand'] = ""
                item['store'] = "easy"
                item['id_store'] = 10

                tags = response.xpath(
                    '//div[@class="ruta_ubicacion_lista" and not(@style)]//text()'
                ).re('\w.+')
                tags.pop(0)
                tags.pop(0)

                try:
                    if (tags[0] == ""):
                        item['tag1'] = "Sin categoría"
                    else:
                        item['tag1'] = tags[0].title()
                except:
                    item['tag1'] = "Sin categoría"
                try:
                    item['tag2'] = tags[1].title()
                except:
                    item['tag2'] = ""
                try:
                    item['tag3'] = tags[2].title()
                except:
                    item['tag3'] = ""
                try:
                    item['tag4'] = tags[3].title()
                except:
                    item['tag4'] = ""
                try:
                    item['tag5'] = tags[4].title()
                except:
                    item['tag5'] = ""
                yield item

                yield item
            except IOError:
                print 'cannot open', arg
                logging.warning(arg)
        else:
            print "Tecnical Problem on Server"

예제 #13

0

파일 보기

    def parse_product(self, response):
        try:
            item = AliceItem()
            item['url'] = response.url

            try:
                item['title'] = response.xpath(
                    '//meta[@name="description"]/@content').extract()[0].title(
                    ).encode('ascii', 'ignore')
            except:
                item['title'] = response.xpath(
                    '//meta[@name="description"]/@content').extract().title(
                    ).encode('ascii', 'ignore')

            try:
                item['picture'] = response.xpath(
                    '//meta[@property="og:image"]/@content').extract()[0]
            except:
                item['picture'] = response.xpath(
                    '///meta[@property="og:title"]/@content').extract()

            try:
                item['price'] = int(
                    response.xpath(
                        '//span[@id="our_price_display"]/strong/text()').re(
                            '\d\S*')[0].replace('.', ''))
            except:
                item['price'] = int(
                    response.xpath(
                        '//span[@id="our_price_display"]/strong/text()').re(
                            '\d\S*').replace('.', ''))

            item['brand'] = response.xpath(
                '//*[@id="pb-left-column"]/h1/text()').extract()[0].title()
            item['store'] = "hites"
            item['id_store'] = 12

            tags = response.xpath(
                '//*[@id="center_column"]/div[@class="breadcrumb"]/a/text()'
            ).extract()
            tags.pop(0)

            try:
                item['tag1'] = tags[0]
            except:
                item['tag1'] = ""
            try:
                item['tag2'] = tags[1]
            except:
                item['tag2'] = ""
            try:
                item['tag3'] = tags[2]
            except:
                item['tag3'] = ""
            try:
                item['tag4'] = tags[3]
            except:
                item['tag4'] = ""
            try:
                item['tag5'] = tags[4]
            except:
                item['tag5'] = ""

            yield item
        except IOError:
            print 'cannot open', arg

예제 #14

0

파일 보기

파일: abcdin_spider.py 프로젝트: chwlibre/alice-odum

    def parse_item(self, response):
        try:
            title = response.xpath(
                '//*[@id="producto-no-disponible-texto"]/div/text()').extract(
                )[0]
        except:
            title = "Product founded"
        if not "no se encuentra" in title:

            try:
                item = AliceItem()
                item['url'] = response.url
                try:
                    item['title'] = response.xpath(
                        '//html/head/meta[@name="description"]/@content'
                    ).extract()[0].title()
                except:
                    item['title'] = response.xpath(
                        '//*[@id="catalog_link"]/text()').extract()[0].title()
                item['picture'] = "http://www.abcdin.cl" + response.xpath(
                    '//img[@id="productMainImage"]/@src').extract()[0]
                item['price'] = int(
                    response.xpath('//td[@class="offerprice"]/text()').re(
                        '\d\S*')[0].replace(".", ""))
                #generaly use as brand
                item['brand'] = ""
                item['store'] = "abcdin"
                item['id_store'] = 5

                tags = response.xpath(
                    '//div[@class="breadcrumb_links"]//text()').re('\w.+')
                tags.pop(0)

                try:
                    if (tags[0] == ""):
                        item['tag1'] = "Sin categoría"
                    else:
                        item['tag1'] = tags[0].title()
                except:
                    item['tag1'] = "Sin categoría"
                try:
                    item['tag2'] = tags[1].title()
                except:
                    item['tag2'] = ""
                try:
                    item['tag3'] = tags[2].title()
                except:
                    item['tag3'] = ""
                try:
                    item['tag4'] = tags[3].title()
                except:
                    item['tag4'] = ""
                try:
                    item['tag5'] = tags[4].title()
                except:
                    item['tag5'] = ""
                yield item
            except IOError:
                print 'cannot open', arg
        else:
            print "Product not found"