Ejemplo n.º 1
0
    def parse(self, response):
        db = DataBaseConnection()
        if response.status == 404:
            db.insertErrorLink(response.url)
            return

        stockStatus = 'Unknown'
        title = response.css('h2.pdp-mod-section-title::text').extract_first()
        price = response.css('span.pdp-price::text').extract_first()[1::]
        db.insertData(title, price, stockStatus, self.name)
Ejemplo n.º 2
0
 def parse(self, response):
     db = DataBaseConnection()
     if response.status == 404:
         db.insertErrorLink(response.url)
         return
     stockStatus = 'Unknown'
     title = response.css('h1.product_title::text').extract_first()
     price = response.css(
         'p.price span.electro-price span::text').extract()[1].replace(
             ',', '')
     db.insertData(title, price, stockStatus, self.name)
Ejemplo n.º 3
0
    def parse(self, response):
        db = DataBaseConnection()
        if response.status == 404:
            db.insertErrorLink(response.url)
            return

        stockStatus = 'Unknown'
        title = response.css(
            'div.pd_content div.pd_details.pd_title div.row div')
        title = title.css('div::text').extract()
        price = response.css('div.pd_price::text').extract()
        db.insertData(title, price, stockStatus, self.name)
Ejemplo n.º 4
0
    def parse(self, response):
        db = DataBaseConnection()
        if response.status == 404:
            db.insertErrorLink(response.url)
            return

        stockStatus = 'Unknown'
        title = response.css(
            'div.summary-container h2.product_title::text').extract_first()
        price = response.css('div.summary-container p.price span.amount::text'
                             ).extract_first().replace(',', '')
        db.insertData(title, price, stockStatus, self.name)
Ejemplo n.º 5
0
    def parse(self, response):
        db = DataBaseConnection()
        if response.status == 404:
            db.insertErrorLink(response.url)
            return

        stockStatus = 'Unknown'

        title = response.css('div.product-name h1::text').extract_first()
        price = response.css(
            'div.price-box span.price::attr("content")').extract_first()
        db.insertData(title, price, stockStatus, self.name)
Ejemplo n.º 6
0
    def parse(self, response):
        db = DataBaseConnection()
        if response.status == 404:
            db.insertErrorLink(response.url)
            return

        stockStatus = 'Unknown'
        title = response.css('h1::text').extract_first()
        priceString = response.css('span.productdetail_price::text').extract()
        price = None
        if priceString != [] and len(priceString) > 1:
            price = priceString[1].replace(',', '').strip()
        db.insertData(title, price, stockStatus, self.name)
Ejemplo n.º 7
0
    def parse(self, response):
        db = DataBaseConnection()
        if response.status == 404:
            db.insertErrorLink(response.url)
            return

        stockStatus = 'Unknown'
        title = response.css(
            'div[id="middle-section"] h1[id="h1productname"]::text'
        ).extract_first()
        priceString = response.css(
            'div[id="middle-section"] div[id="product_details_right"] td.price_maroon::text'
        ).extract_first()
        price = None
        if priceString is not None:
            price = priceString.strip().replace(',', '')
        db.insertData(title, price, stockStatus, self.name)
Ejemplo n.º 8
0
    def parse(self, response):
        db = DataBaseConnection();
        if response.status == 404:
            db.insertErrorLink(response.url)
            return

        stockStatusInfo = response.css('div.product-shop div.price-stock div.price-box link[itemprop="availability"]::attr("href")').extract_first();
        stockStatus = 'Unknown';
        if stockStatusInfo is not None:
            if 'InStock' in stockStatusInfo:
                stockStatus = 'In Stock';
            else:
                stockStatus = 'Out of Stock';

        title = response.css('div.product-shop div.product-name h1::text').extract_first()
        price = response.css('div.product-shop div.price-stock div.price-box meta[itemprop="price"]::attr("content")').extract_first()
        db.insertData(title, price, stockStatus, self.name)
Ejemplo n.º 9
0
    def parse(self, response):
        db = DataBaseConnection()
        if response.status == 404:
            db.insertErrorLink(response.url)
            return

        stockStatus = 'Out of Stock'
        title = response.css(
            'div.detail-container div.product-title h1.product-name::text'
        ).extract_first()
        priceString = response.css(
            'div.detail-container div.buy-now div.product-srp-price span[itemprop="price"]::text'
        ).extract_first()
        price = None
        if priceString is not None:
            price = priceString[2::].replace(',', '')
            stockStatus = 'In Stock'
        db.insertData(title, price, stockStatus, self.name)
Ejemplo n.º 10
0
    def parse(self, response):
        db = DataBaseConnection()
        if response.status == 404:
            db.insertErrorLink(response.url)
            return

        stockStatusInfo = response.css(
            'div.desc_con span.variant_hidden p::text').extract()
        stockStatus = 'Out of Stock'
        for stockStatusInfoElement in stockStatusInfo:
            if stockStatusInfoElement.find("Available Stocks:") >= 0:
                stockStatus = 'In Stock'

        title = response.css('div.title_page h1::text').extract_first()
        price = response.css(
            'div.desc_con span.variant_hidden div.dim_and_w::attr("data-price")'
        ).extract_first()
        db.insertData(title, price, stockStatus, self.name)
Ejemplo n.º 11
0
    def parse(self, response):
        db = DataBaseConnection()
        if response.status == 404:
            db.insertErrorLink(response.url)
            return

        stockStatusInfo = response.css(
            'div.summary div.single-product-price link[itemprop="availability"]::attr("href")'
        ).extract_first()
        stockStatus = 'Out of Stock'
        if stockStatusInfo is not None:
            if 'InStock' in stockStatusInfo:
                stockStatus = 'In Stock'
        title = response.css(
            'div.summary h1.product-title::text').extract_first()
        price = response.css(
            'div.summary div.single-product-price meta[itemprop="price"]::attr("content")'
        ).extract_first()
        db.insertData(title, price, stockStatus, self.name)
Ejemplo n.º 12
0
    def parse(self, response):
        db = DataBaseConnection()
        if response.status == 404:
            db.insertErrorLink(response.url)
            return

        stockStatusField = response.css(
            'div.product-main-info div.stocks-qty div.stocks span.stock-count::text'
        ).extract_first()
        stockStatus = 'Unknown'
        if stockStatusField is not None:
            if int(stockStatusField) > 0:
                stockStatus = 'In Stock'
            else:
                stockStatus = 'Out of Stock'
        title = response.css(
            'h1.shopee-product-info__header__text::text').extract_first()
        price = response.css(
            'div.shopee-product-info__header__real-price::text').extract_first(
            )[1::].replace(',', '')
        db.insertData(title, price, stockStatus, self.name)
Ejemplo n.º 13
0
    def parse(self, response):
        db = DataBaseConnection()
        if response.status == 404:
            db.insertErrorLink(response.url)
            return

        stockStatus = 'Unknown'
        title = response.css('h1.productname span::text').extract_first()
        if title is None:
            title = response.css('h1.product-title::text').extract_first()
        priceString = response.css('div.productpageprice::text').extract()
        price = None
        if priceString != []:
            price = priceString[len(priceString) - 1][2::].strip().replace(
                ',', '')
        elif response.css(
                'span[id="finalPrice"]::text').extract_first() is not None:
            price = response.css(
                'span[id="finalPrice"]::text').extract_first().replace(
                    ',', '')
        db.insertData(title, price, stockStatus, self.name)
Ejemplo n.º 14
0
    def parse(self, response):
        db = DataBaseConnection()
        if response.status == 404:
            db.insertErrorLink(response.url)
            return

        stockStatusInfo = response.css(
            'div.product-top-right div.product-quantity span.stock'
        ).extract_first()
        if stockStatusInfo is not None:
            stockStatus = 'In Stock'
        else:
            stockStatus = 'Out of Stock'

        title = response.css('div.product-title h1::text').extract_first()
        priceSrting = response.css(
            'div.product-top-right span[id="price-num"]::text').extract_first(
            )
        price = None
        if priceSrting is not None:
            price = priceSrting[1::].replace(',', '')
        db.insertData(title, price, stockStatus, self.name)
Ejemplo n.º 15
0
    def parse(self, response):
        db = DataBaseConnection()
        if response.status == 404:
            db.insertErrorLink(response.url)
            return

        stockStatusField = response.css(
            'div.product-main-info div.stocks-qty div.stocks span.stock-count::text'
        ).extract_first()
        stockStatus = 'Unknown'
        if stockStatusField is not None:
            if int(stockStatusField) > 0:
                stockStatus = 'In Stock'
            else:
                stockStatus = 'Out of Stock'

        title = response.css(
            'div.product-main-info h1.product-title::text').extract_first()
        price = response.css(
            'div.product-main-info div.price-detail span.price span[id="finalPrice"]::text'
        ).extract_first()
        db.insertData(title, price, stockStatus, self.name)
Ejemplo n.º 16
0
    def parse(self, response):
        db = DataBaseConnection()
        if response.status == 404:
            db.insertErrorLink(response.url)
            return

        stockStatusInfo = response.css(
            'div.product-info-main div.stock span::text').extract_first()
        stockStatus = 'Unknown'
        if stockStatusInfo is not None:
            if 'In stock' in stockStatusInfo:
                stockStatus = 'In Stock'
            else:
                stockStatus = 'Out of Stock'

        title = response.css(
            'div.product-info-main div.title-wrapper h1.page-title span::text'
        ).extract_first()
        price = response.css(
            'div.product-info-main div.title-wrapper div.price-box span.price-wrapper::attr("data-price-amount")'
        ).extract_first()
        db.insertData(title, price, stockStatus, self.name)
Ejemplo n.º 17
0
    def parse(self, response):
        db = DataBaseConnection()
        if response.status == 404:
            db.insertErrorLink(response.url)
            return

        if response.css('div.summary p.pempty').extract_first() is not None:
            stockStatus = 'Out of Stock'
        else:
            stockStatus = 'In Stock'

        priceScript = response.xpath(
            '//script[contains(text(), "sell_price")]/text()').extract()
        price = None
        if priceScript != []:
            priceStartIndex = priceScript[len(priceScript) -
                                          1].find("sell_price") + 13
            priceEndIndex = priceScript[len(priceScript) - 1].find(
                '"', priceStartIndex)
            price = priceScript[len(priceScript) -
                                1][priceStartIndex:priceEndIndex]
        title = response.css('div.summary h2::text').extract_first()
        db.insertData(title, price, stockStatus, self.name)
Ejemplo n.º 18
0
    def parse(self, response):
        db = DataBaseConnection()
        if response.status == 404:
            db.insertErrorLink(response.url)
            return

        price = None
        stockStatus = 'Unknown'
        stockStatusInfo = response.xpath(
            '//ul[@class="list-unstyled"]/li[contains(text(), "Availability:")]/span/text()'
        ).extract_first()
        if stockStatusInfo is not None:
            if 'In Stock' in stockStatusInfo:
                stockStatus = 'In Stock'
            else:
                stockStatus = 'Out of Stock'

        title = response.css('h1.product-name::text').extract_first()
        priceString = response.css(
            'ul.price-product li span::text').extract_first()
        if priceString is not None:
            price = priceString[1::].replace(',', '')
        db.insertData(title, price, stockStatus, self.name)
Ejemplo n.º 19
0
from siteInfo.ScrapperLazada import LazadaSpider
from siteInfo.ScrapperKimstore import KimstoreSpider
from siteInfo.ScrapperGoods import GoodsSpider
from siteInfo.ScrapperGalleon import GalleonSpider
from siteInfo.ScrapperExpansys import ExpansysSpider
from siteInfo.ScrapperElnstore import ElnstoreSpider
from siteInfo.ScrapperBigmk import BigmkSpider
from siteInfo.ScrapperBigbenta import BigbentaSpider
from siteInfo.ScrapperAsianic import AsianicSpider
from siteInfo.ScrapperAdobomall import AdobomallSpider
from siteInfo.ScrapperEmcor import EmcorSpider

# parse file with links
# with open('links.json') as file:
#     data = json.load(file)
db = DataBaseConnection();
data = db.getUrls();

# initialize empty arrays, which later
abenson = [];
argomall = [];
anson = [];
asianic = [];
bigbenta = [];
bigmk = [];
elnstore = [];
expansys = [];
galleon = [];
goods = [];
kimstore = [];
lazada = [];