def parse_item(self, response):
        link = response.url
        pil = ProductItemLoader(item=ProductItem(), response=response)

        pil.add_xpath('image', '//*[@id="image"]/@src')
        product = pil.load_item()

        # Ảnh chi tiết sản phẩm
        sel = Selector(response)
        images = sel.xpath('//*[@class="more-views"]/ul[1]/li/a/@href')
        image_urls = []
        dataImages = []

        for img in images:
            imgLink = response.urljoin(img.extract())
            image_urls.append(imgLink)
            dataImages.append(sha1FileName(imgLink))

        image_urls.append(product['image'])

        product['image'] = sha1FileName(product['image'])
        product['images'] = ','.join(dataImages)

        query = "SELECT * FROM products WHERE link = %s"
        self.cursor.execute(query, (link))
        result = self.cursor.fetchone()

        if result:
            productId = result['id']
            sql = "UPDATE products SET image = %s, images = %s WHERE id = %s"
            self.cursor.execute(
                sql, (product['image'], product['images'], productId))
            self.conn.commit()

        # Download and push to bucket
        for image in image_urls:

            imageName = sha1FileName(image)
            thumbs = downloadImageFromUrl(image)
    def parse_item(self, response):
        link = response.url
        pil = ProductItemLoader(item = ProductItem(), response = response)

        pil.add_xpath('image', '//*[@id="image"]/@src')
        product = pil.load_item()

        # Ảnh chi tiết sản phẩm
        sel = Selector(response)
        images = sel.xpath('//*[@class="more-views"]/ul[1]/li/a/@href');
        image_urls = []
        dataImages = []

        for img in images:
            imgLink = response.urljoin(img.extract())
            image_urls.append(imgLink)
            dataImages.append(sha1FileName(imgLink))

        image_urls.append(product['image'])

        product['image'] = sha1FileName(product['image'])
        product['images'] = ',' . join(dataImages)

        query = "SELECT * FROM products WHERE link = %s"
        self.cursor.execute(query, (link))
        result = self.cursor.fetchone()

        if result:
            productId = result['id']
            sql = "UPDATE products SET image = %s, images = %s WHERE id = %s"
            self.cursor.execute(sql, (product['image'], product['images'], productId))
            self.conn.commit()

        # Download and push to bucket
        for image in  image_urls:

            imageName = sha1FileName(image)
            thumbs = downloadImageFromUrl(image)
    def parse_product_detail(self, response):
        priceItemLoader = ProductItemLoader(item = ProductItem(), response = response)
        priceItemLoader.add_xpath('name', '//h1[@id="detail_product_name"]/text()')
        priceItemLoader.add_xpath('price', '//div[@id="detail_product_price"]//b[@class="product_price"]/text()')
        priceItemLoader.add_value('link', response.url)

        priceItem = priceItemLoader.load_item()

        item = {
            "merchant" : response.meta['merchant'],
            "product" : response.meta['product'],
            "price_item" : priceItem,
            "image_links" : response.meta['merchant']["image_links"]
        }

        if self.env == 'dev':
            print item
            return

        yield item
	def parse_detail_content(self, response):
		link = response.url
		url_parts = urlparse(link)
		pil = ProductItemLoader(item = ProductPriceItem(), response = response)
		pil.add_xpath('title', '//*[@id="product_addtocart_form"]//h1/text()')
		pil.add_xpath('price', '//*[@id="price"]')
		pil.add_xpath('brand', '//*[@class="brand"]/a[1]/text()')
		pil.add_value('source', url_parts.netloc)
		pil.add_value('link', link)
		pil.add_value('created_at', strftime("%Y-%m-%d %H:%M:%S"))
		pil.add_value('updated_at', strftime("%Y-%m-%d %H:%M:%S"))

		product = pil.load_item()
		product['brand'] = (pil.get_value(product['title'])).split(" ")[0]

		yield(product)
    def parse_detail_content(self, response):
        pil = ProductItemLoader(item=ProductItem(), response=response)
        pil.add_xpath(
            'name',
            '//*[@class="detail-top-left"]//h1[@class="detail-name"]//text()')
        pil.add_xpath(
            'image',
            '//*[@class="detail-main-image left"]//img[@id="default_image"]/@src'
        )
        pil.add_xpath('spec', '//*[@class="detail-main-specification"]')
        # pil.add_xpath('images', '//*[@class="owl-item"]/div/a/img/@src')
        pil.add_xpath('price',
                      '//*[@class="detail-current-price"]/strong[1]//text()')
        pil.add_xpath('brand', '//*[@class="breadcrumb"]/li[3]/a/text()')

        # Ảnh chi tiết sản phẩm
        sel = Selector(response)
        images = sel.xpath(
            '//*[@class="detail-thumbnail clearfix"]/a/img/@data-original')

        dataImage = []
        image_urls = []

        for img in images:
            imgLink = response.urljoin(img.extract())
            imgLink = imgLink.replace('Thumbs', 'Originals')

            # imgLink = 'http://fptshop.com.vn' + imgLink
            image_urls.append(imgLink)

            imgLinkHash = hashlib.sha1(imgLink).hexdigest() + '.jpg'
            dataImage.append(imgLinkHash)

        product = pil.load_item()

        product['image'] = 'http://fptshop.com.vn' + product['image']

        image_urls.append(pil.get_value(product['image']))

        # Price
        price = pil.get_value(product['price'].encode('utf-8'))
        price = re.sub('\D', '', price)

        product['price'] = price
        product['link'] = response.url
        product['image_urls'] = image_urls
        product['image'] = hashlib.sha1(pil.get_value(
            product['image'])).hexdigest() + '.jpg'
        product['images'] = ','.join(dataImage)
        product['hash_name'] = hashlib.md5(
            pil.get_value(product['name']).encode('utf-8')).hexdigest()
        product['brand'] = pil.get_value(product['brand'])
        product['typ'] = 'product'
        product['created_at'] = strftime("%Y-%m-%d %H:%M:%S")
        product['updated_at'] = strftime("%Y-%m-%d %H:%M:%S")

        yield (product)
    def parse_detail_content(self, response):
        pil = ProductItemLoader(item=ProductItem(), response=response)
        pil.add_xpath('name', '//*[@class="rowtop"]/h1//text()')
        pil.add_xpath('image', '//*[@class="boxright"]/aside[1]/img/@src')
        pil.add_xpath('spec', '//*[@class="parameter"]')
        # pil.add_xpath('images', '//*[@class="owl-item"]/div/a/img/@src')
        pil.add_xpath('price', '//*[@class="price_sale"]/strong[1]/text()')
        pil.add_xpath('brand',
                      '//*[@class="breadcrumb"]/li[@class="brand"]/a/text()')

        # Ảnh chi tiết sản phẩm
        sel = Selector(response)
        images = sel.xpath('//*[@id="characteristics"]/div/a/img/@data-src')

        dataImage = []
        image_urls = []

        for img in images:
            imgLink = response.urljoin(img.extract())
            image_urls.append(imgLink)

            imgLinkHash = hashlib.sha1(imgLink).hexdigest() + '.jpg'
            dataImage.append(imgLinkHash)

        product = pil.load_item()

        image_urls.append(pil.get_value(product['image']))

        # Price
        price = pil.get_value(product['price'].encode('utf-8'))
        price = re.sub('\D', '', price)

        product['price'] = price
        product['link'] = response.url
        product['image_urls'] = image_urls
        product['image'] = hashlib.sha1(pil.get_value(
            product['image'])).hexdigest() + '.jpg'
        product['images'] = ','.join(dataImage)
        product['hash_name'] = hashlib.md5(
            pil.get_value(product['name']).encode('utf-8')).hexdigest()
        product['brand'] = pil.get_value(product['brand'])
        product['typ'] = 'product'
        product['created_at'] = strftime("%Y-%m-%d %H:%M:%S")
        product['updated_at'] = strftime("%Y-%m-%d %H:%M:%S")

        yield (product)
    def get_product(self, response):
        link = response.url

        url_parts = urlparse(link)
        linkItem  = response.meta['link_item']

        pil = ProductItemLoader(item = ProductItem(), response = response)

        pil.add_xpath('image', linkItem['image'])
        pil.add_xpath('name', linkItem['meta_xpath_name'])
        pil.add_xpath('price', linkItem['meta_xpath_price'])
        pil.add_value('source_id', linkItem['site_id'])
        pil.add_value('brand_id', linkItem['brand_id'])
        pil.add_xpath('spec', linkItem['meta_xpath_spec'])
        pil.add_value('link', link)
        pil.add_value('is_laptop', linkItem['is_laptop'])
        pil.add_value('is_mobile', linkItem['is_mobile'])
        pil.add_value('is_tablet', linkItem['is_tablet'])
        pil.add_value('is_camera', linkItem['is_camera'])
        pil.add_value('type', linkItem['type'])
        pil.add_value('category_id', linkItem['category_id'])

        # Ảnh chi tiết sản phẩm
        sel = Selector(response)
        images = sel.xpath(linkItem['images']);

        dataImage = []
        image_urls = []

        for img in images:
            imgLink = response.urljoin(img.extract())
            image_urls.append(imgLink)

            imgLinkHash = sha1FileName(imgLink)
            dataImage.append(imgLinkHash)

        product = pil.load_item()

        parseUrlImage = urlparse(product['image'])

        if parseUrlImage.scheme == '':
            product['image'] = urljoin('http://' + parseUrlImage.netloc, parseUrlImage.path)

        image_urls.append(product['image'])

        price = pil.get_value(product.get('price', "0").encode('utf-8'))
        price = re.sub('\D', '', price)

        product['name']       = product['name'].strip(' \t\n\r')
        product['name']       = product['name'].strip()
        product['image']      = sha1FileName(product['image'])
        product['images']     = ',' . join(dataImage)
        product['image_links'] = image_urls
        product['hash_name']  = md5(product['name'].encode('utf-8'))
        product['price']      = price
        product['min_price']  = price
        product['created_at'] = strftime("%Y-%m-%d %H:%M:%S")
        product['updated_at'] = strftime("%Y-%m-%d %H:%M:%S")

        return product
    def parse_detail_content(self, response):
        link = response.url
        url_parts = urlparse(link)
        pil = ProductItemLoader(item=ProductPriceItem(), response=response)
        pil.add_xpath('title', '//*[@id="product_addtocart_form"]//h1/text()')
        pil.add_xpath('price', '//*[@id="price"]')
        pil.add_xpath('brand', '//*[@class="brand"]/a[1]/text()')
        pil.add_value('source', url_parts.netloc)
        pil.add_value('link', link)
        pil.add_value('created_at', strftime("%Y-%m-%d %H:%M:%S"))
        pil.add_value('updated_at', strftime("%Y-%m-%d %H:%M:%S"))

        product = pil.load_item()
        product['brand'] = (pil.get_value(product['title'])).split(" ")[0]

        yield (product)
Beispiel #9
0
    def getProductJson(self, response):
        linkItem = response.meta['linkItem']
        site = response.meta['site']
        item = response.meta['item']

        link = item.get(linkItem['xpath_link_detail'])

        pil = ProductItemLoader(item=ProductItem(), response=response)

        pil.add_value('name', item.get(linkItem['meta_xpath_name']))
        pil.add_value('image', item.get(linkItem['image']))
        pil.add_value('price', item.get(linkItem['meta_xpath_price']))
        pil.add_value('site_id', linkItem['site_id'])
        pil.add_value('brand_id', linkItem['brand_id'])
        pil.add_value('link', link)
        pil.add_value('is_laptop', linkItem['is_laptop'])
        pil.add_value('is_mobile', linkItem['is_mobile'])
        pil.add_value('is_tablet', linkItem['is_tablet'])
        pil.add_value('type', linkItem['type'])

        product = pil.load_item()

        # Price
        price = pil.get_value(product.get('price', "0").encode('utf-8'))
        price = re.sub('\D', '', price)

        product['name'] = product['name'].strip(' \t\n\r')
        product['name'] = product['name'].strip()

        product['image'] = sha1FileName(product['image'])

        product['price'] = price
        product['min_price'] = price
        product['hash_name'] = md5(product['name'].encode('utf-8'))
        product['created_at'] = strftime("%Y-%m-%d %H:%M:%S")
        product['updated_at'] = strftime("%Y-%m-%d %H:%M:%S")

        return product
    def parse_detail_content(self, response):
        link = response.url
        url_parts = urlparse(link)

        pil = ProductItemLoader(item = ProductPriceItem(), response = response)
        pil.add_xpath('title', self.configs['title'])
        pil.add_xpath('price', self.configs['price'])
        pil.add_value('source', self.configs['source'])
        pil.add_value('source_id', self.configs['source_id'])
        pil.add_value('brand_id', 0)
        pil.add_value('is_phone', 0)
        pil.add_value('is_tablet', 0)
        pil.add_value('is_laptop', 0)
        pil.add_value('link', link)

        product = pil.load_item()

        # Price
        price = pil.get_value(product.get('price', "0").encode('utf-8'))
        price = re.sub('\D', '', price)

        # arrStringShit = ['ĐTDĐ', 'Điện thoại di dộng', 'Điện thoại', 'Mua Trả Góp', 'Điện Thoại', 'Máy tính bảng', 'Máy Tính Bảng' ,'Máy tính xách tay', 'Máy tính', 'máy tính', 'Laptop', 'laptop', 'Di Động']
        # for strValue in arrStringShit:
        #     product['title'] = re.sub(strValue.decode('utf-8'), '', product['title'])

        product['title'] = product['title'].strip(' \t\n\r')
        product['title'] = product['title'].strip()
        product['name']  = product['title']

        product['price']      = price
        product['created_at'] = strftime("%Y-%m-%d %H:%M:%S")
        product['updated_at'] = strftime("%Y-%m-%d %H:%M:%S")
        product['crawled_at'] = strftime("%Y-%m-%d %H:%M:%S")
        # product['brand']      = (pil.get_value(product['title'])).split(" ")[0]

        yield(product)
Beispiel #11
0
    def parse(self, response):
        link = response.url

        pil = ProductItemLoader(item=ProductPriceItem(), response=response)
        pil.add_xpath(
            'title', '//*[@id="topdetail"]//div[@class="rowtop"]/h1[1]/text()')
        pil.add_xpath(
            'price',
            '//*[@id="topdetail"]//*[@class="boxright"]//*[@class="price_sale"]/strong/text() | //*[@id="topdetail"]//*[@class="boxright"]//strong[@class="pricesell"]/text()'
        )

        pil.add_value('source', 'www.thegioididong.com')
        pil.add_value('source_id', 1)
        pil.add_value('brand_id', 0)
        pil.add_value('is_phone', 0)
        pil.add_value('is_laptop', 0)
        pil.add_value('is_tablet', 0)
        pil.add_value('link', link)

        product = pil.load_item()

        print product
        return

        # Price
        price = pil.get_value(product.get('price', "0").encode('utf-8'))
        price = re.sub('\D', '', price)

        # product['title'] = product['title'].strip(' \t\n\r')
        # product['title'] = product['title'].strip()
        # product['name']  = product['title']

        product['price'] = price
        product['created_at'] = strftime("%Y-%m-%d %H:%M:%S")
        product['updated_at'] = strftime("%Y-%m-%d %H:%M:%S")
        product['crawled_at'] = strftime("%Y-%m-%d %H:%M:%S")
        # product['brand']      = (pil.get_value(product['title'])).split(" ")[0]

        print product
        return

        yield (product)
    def parse(self, response):
        link = response.url

        pil = ProductItemLoader(item = ProductPriceItem(), response = response)
        pil.add_xpath('title', '//*[@id="topdetail"]//div[@class="rowtop"]/h1[1]/text()')
        pil.add_xpath('price', '//*[@id="topdetail"]//*[@class="boxright"]//*[@class="price_sale"]/strong/text() | //*[@id="topdetail"]//*[@class="boxright"]//strong[@class="pricesell"]/text()')

        pil.add_value('source', 'www.thegioididong.com')
        pil.add_value('source_id', 1);
        pil.add_value('brand_id', 0);
        pil.add_value('is_phone', 0);
        pil.add_value('is_laptop', 0);
        pil.add_value('is_tablet', 0);
        pil.add_value('link', link)

        product = pil.load_item()

        print product
        return

        # Price
        price = pil.get_value(product.get('price', "0").encode('utf-8'))
        price = re.sub('\D', '', price)

        # product['title'] = product['title'].strip(' \t\n\r')
        # product['title'] = product['title'].strip()
        # product['name']  = product['title']

        product['price']      = price
        product['created_at'] = strftime("%Y-%m-%d %H:%M:%S")
        product['updated_at'] = strftime("%Y-%m-%d %H:%M:%S")
        product['crawled_at'] = strftime("%Y-%m-%d %H:%M:%S")
        # product['brand']      = (pil.get_value(product['title'])).split(" ")[0]

        print product
        return

        yield(product)
    def parse_detail_content(self, response):
        link = response.url

        productMeta = response.meta['product']

        pil = ProductItemLoader(item = ProductPriceItem(), response = response)
        pil.add_value('title', productMeta['name'])
        pil.add_value('price', productMeta['price'])
        pil.add_value('source', 'www.iservice.vn')
        pil.add_value('source_id', 105);
        pil.add_value('brand_id', 0);
        pil.add_value('is_phone', 0);
        pil.add_value('is_laptop', 0);
        pil.add_value('is_tablet', 0);
        pil.add_value('link', link)

        product = pil.load_item()

        # Price
        price = pil.get_value(product.get('price', "0").encode('utf-8'))
        price = re.sub('\D', '', price)

        product['title'] = product['title'].strip(' \t\n\r')
        product['title'] = product['title'].strip()
        product['name']  = product['title']

        product['price']      = price
        product['created_at'] = strftime("%Y-%m-%d %H:%M:%S")
        product['updated_at'] = strftime("%Y-%m-%d %H:%M:%S")
        product['crawled_at'] = strftime("%Y-%m-%d %H:%M:%S")
        # product['brand']      = (pil.get_value(product['title'])).split(" ")[0]

        yield(product)
    def parse_detail_content(self, response):
        pil = ProductItemLoader(item = ProductItem(), response = response)
        pil.add_xpath('name', '//*[@class="rowtop"]/h1//text()')
        pil.add_xpath('image', '//*[@class="boxright"]/aside[1]/img/@src')
        pil.add_xpath('spec', '//*[@class="parameter"]')
        # pil.add_xpath('images', '//*[@class="owl-item"]/div/a/img/@src')
        pil.add_xpath('price', '//*[@class="price_sale"]/strong[1]/text()');
        pil.add_xpath('brand', '//*[@class="breadcrumb"]/li[@class="brand"]/a/text()');

        # Ảnh chi tiết sản phẩm
        sel = Selector(response)
        images = sel.xpath('//*[@id="characteristics"]/div/a/img/@data-src');

        dataImage = []
        image_urls = []

        for img in images:
            imgLink = response.urljoin(img.extract())
            image_urls.append(imgLink)

            imgLinkHash = hashlib.sha1(imgLink).hexdigest() + '.jpg'
            dataImage.append(imgLinkHash)

        product = pil.load_item()

        image_urls.append(pil.get_value(product['image']))

        # Price
        price = pil.get_value(product['price'].encode('utf-8'))
        price = re.sub('\D', '', price)

        product['price']      = price
        product['link']       = response.url
        product['image_urls'] = image_urls
        product['image']      = hashlib.sha1(pil.get_value(product['image'])).hexdigest() + '.jpg'
        product['images']     = ',' . join(dataImage)
        product['hash_name']  = hashlib.md5(pil.get_value(product['name']).encode('utf-8')).hexdigest()
        product['brand']      = pil.get_value(product['brand'])
        product['typ']        = 'product'
        product['created_at'] = strftime("%Y-%m-%d %H:%M:%S")
        product['updated_at'] = strftime("%Y-%m-%d %H:%M:%S")

        yield(product)
Beispiel #15
0
    def parse_detail_content(self, response):
        pil = ProductItemLoader(item = ProductItem(), response = response)
        pil.add_xpath('name', '//*[@class="product-big-right"]/div[@class="product-name"]/h1/text()')
        pil.add_xpath('image', '//*[@id="image"]/@src')
        pil.add_css('spec', '.content-thongso > ul')
        pil.add_xpath('images', '//*[@class="more-views"]/ul[1]/li/a/@href')
        pil.add_xpath('price', '//*[@id="price"]//text()');

        # Ảnh chi tiết sản phẩm
        sel = Selector(response)
        images = sel.xpath('//*[@class="more-views"]/ul[1]/li/a/@href');

        dataImage = []
        image_urls = []

        for img in images:
            imgLink = response.urljoin(img.extract())
            image_urls.append(imgLink)

            imgLinkHash = hashlib.sha1(imgLink).hexdigest() + '.jpg'
            dataImage.append(imgLinkHash)

        product = pil.load_item()

        image_urls.append(pil.get_value(product['image']))

        # Price
        if 'price' not in product:
            product['price'] = '0'

        price = pil.get_value(product['price'].encode('utf-8'))
        price = re.sub('\D', '', price)

        product['price']      = price
        product['link']       = response.url
        product['image_urls'] = image_urls
        product['image']      = hashlib.sha1(pil.get_value(product['image'])).hexdigest() + '.jpg'
        product['images']     = ',' . join(dataImage)
        product['hash_name']  = hashlib.md5(pil.get_value(product['name']).encode('utf-8')).hexdigest()
        product['brand']      = (pil.get_value(product['name'])).split(" ")[0]
        product['typ']        = 'product'
        product['created_at'] = strftime("%Y-%m-%d %H:%M:%S")
        product['updated_at'] = strftime("%Y-%m-%d %H:%M:%S")

        yield(product)
    def parse_detail_content(self, response):
        pil = ProductItemLoader(item = ProductItem(), response = response)
        pil.add_xpath('name', '//*[@class="detail-top-left"]//h1[@class="detail-name"]//text()')
        pil.add_xpath('image', '//*[@class="detail-main-image left"]//img[@id="default_image"]/@src')
        pil.add_xpath('spec', '//*[@class="detail-main-specification"]')
        # pil.add_xpath('images', '//*[@class="owl-item"]/div/a/img/@src')
        pil.add_xpath('price', '//*[@class="detail-current-price"]/strong[1]//text()');
        pil.add_xpath('brand', '//*[@class="breadcrumb"]/li[3]/a/text()');

        # Ảnh chi tiết sản phẩm
        sel = Selector(response)
        images = sel.xpath('//*[@class="detail-thumbnail clearfix"]/a/img/@data-original');

        dataImage = []
        image_urls = []

        for img in images:
            imgLink = response.urljoin(img.extract())
            imgLink = imgLink.replace('Thumbs', 'Originals')

            # imgLink = 'http://fptshop.com.vn' + imgLink
            image_urls.append(imgLink)

            imgLinkHash = hashlib.sha1(imgLink).hexdigest() + '.jpg'
            dataImage.append(imgLinkHash)

        product = pil.load_item()

        product['image'] = 'http://fptshop.com.vn' + product['image']

        image_urls.append(pil.get_value(product['image']))

        # Price
        price = pil.get_value(product['price'].encode('utf-8'))
        price = re.sub('\D', '', price)

        product['price']      = price
        product['link']       = response.url
        product['image_urls'] = image_urls
        product['image']      = hashlib.sha1(pil.get_value(product['image'])).hexdigest() + '.jpg'
        product['images']     = ',' . join(dataImage)
        product['hash_name']  = hashlib.md5(pil.get_value(product['name']).encode('utf-8')).hexdigest()
        product['brand']      = pil.get_value(product['brand'])
        product['typ']        = 'product'
        product['created_at'] = strftime("%Y-%m-%d %H:%M:%S")
        product['updated_at'] = strftime("%Y-%m-%d %H:%M:%S")

        yield(product)
Beispiel #17
0
    def parse_detail_content(self, response):
        link = response.url

        productMeta = response.meta['product']

        pil = ProductItemLoader(item=ProductPriceItem(), response=response)
        pil.add_value('title', productMeta['name'])
        pil.add_value('price', productMeta['price'])
        pil.add_value('source', 'www.iservice.vn')
        pil.add_value('source_id', 105)
        pil.add_value('brand_id', 0)
        pil.add_value('is_phone', 0)
        pil.add_value('is_laptop', 0)
        pil.add_value('is_tablet', 0)
        pil.add_value('link', link)

        product = pil.load_item()

        # Price
        price = pil.get_value(product.get('price', "0").encode('utf-8'))
        price = re.sub('\D', '', price)

        product['title'] = product['title'].strip(' \t\n\r')
        product['title'] = product['title'].strip()
        product['name'] = product['title']

        product['price'] = price
        product['created_at'] = strftime("%Y-%m-%d %H:%M:%S")
        product['updated_at'] = strftime("%Y-%m-%d %H:%M:%S")
        product['crawled_at'] = strftime("%Y-%m-%d %H:%M:%S")
        # product['brand']      = (pil.get_value(product['title'])).split(" ")[0]

        yield (product)
Beispiel #18
0
    def get_product(self, response):
        link = response.url

        url_parts = urlparse(link)
        linkItem = response.meta['link_item']

        pil = ProductItemLoader(item=ProductItem(), response=response)

        pil.add_xpath('image', linkItem['image'])
        pil.add_xpath('name', linkItem['meta_xpath_name'])
        pil.add_xpath('price', linkItem['meta_xpath_price'])
        pil.add_value('source_id', linkItem['site_id'])
        pil.add_value('brand_id', linkItem['brand_id'])
        pil.add_xpath('spec', linkItem['meta_xpath_spec'])
        pil.add_value('link', link)
        pil.add_value('is_laptop', linkItem['is_laptop'])
        pil.add_value('is_mobile', linkItem['is_mobile'])
        pil.add_value('is_tablet', linkItem['is_tablet'])
        pil.add_value('is_camera', linkItem['is_camera'])
        pil.add_value('type', linkItem['type'])
        pil.add_value('category_id', linkItem['category_id'])

        # Ảnh chi tiết sản phẩm
        sel = Selector(response)
        images = sel.xpath(linkItem['images'])

        dataImage = []
        image_urls = []

        for img in images:
            imgLink = response.urljoin(img.extract())
            image_urls.append(imgLink)

            imgLinkHash = sha1FileName(imgLink)
            dataImage.append(imgLinkHash)

        product = pil.load_item()

        parseUrlImage = urlparse(product['image'])

        if parseUrlImage.scheme == '':
            product['image'] = urljoin('http://' + parseUrlImage.netloc,
                                       parseUrlImage.path)

        image_urls.append(product['image'])

        price = pil.get_value(product.get('price', "0").encode('utf-8'))
        price = re.sub('\D', '', price)

        product['name'] = product['name'].strip(' \t\n\r')
        product['name'] = product['name'].strip()
        product['image'] = sha1FileName(product['image'])
        product['images'] = ','.join(dataImage)
        product['image_links'] = image_urls
        product['hash_name'] = md5(product['name'].encode('utf-8'))
        product['price'] = price
        product['min_price'] = price
        product['created_at'] = strftime("%Y-%m-%d %H:%M:%S")
        product['updated_at'] = strftime("%Y-%m-%d %H:%M:%S")

        return product
    def getProductJson(self, response):
        linkItem = response.meta['linkItem']
        site     = response.meta['site']
        item     = response.meta['item']

        link = item.get(linkItem['xpath_link_detail'])

        pil = ProductItemLoader(item = ProductItem(), response = response)

        pil.add_value('name', item.get(linkItem['meta_xpath_name']))
        pil.add_value('image', item.get(linkItem['image']))
        pil.add_value('price', item.get(linkItem['meta_xpath_price']))
        pil.add_value('site_id', linkItem['site_id'])
        pil.add_value('brand_id', linkItem['brand_id'])
        pil.add_value('link', link)
        pil.add_value('is_laptop', linkItem['is_laptop'])
        pil.add_value('is_mobile', linkItem['is_mobile'])
        pil.add_value('is_tablet', linkItem['is_tablet'])
        pil.add_value('type', linkItem['type'])

        product = pil.load_item()

        # Price
        price = pil.get_value(product.get('price', "0").encode('utf-8'))
        price = re.sub('\D', '', price)

        product['name'] = product['name'].strip(' \t\n\r')
        product['name'] = product['name'].strip()

        product['image']      = sha1FileName(product['image'])

        product['price']      = price
        product['min_price']  = price
        product['hash_name']  = md5(product['name'].encode('utf-8'))
        product['created_at'] = strftime("%Y-%m-%d %H:%M:%S")
        product['updated_at'] = strftime("%Y-%m-%d %H:%M:%S")

        return product