Example #1
0
 def processItem(self, response):
     title = response.xpath('//span[@id="titletextonly"]/text()').get()
     price = response.xpath('//span[@class="price"]/text()').get()
     item = OlxItem()
     item['title'] = title
     item['price'] = price
     yield item
Example #2
0
 def parse_detail_page(self, response):
     title = response.css('h1::text').extract()[0].strip()
     price = response.css('.pricelabel > strong::text').extract()[0]
     item = OlxItem()
     item['title'] = title
     item['price'] = price
     item['url'] = response.url
     yield item
Example #3
0
    def parse_detail_page(self, response):
        title = response.css("h1::text").extract()[0].strip()
        price = response.css(".pricelabel > strong::text").extract()[0]

        item = OlxItem()
        item["title"] = title
        item["price"] = price
        item["url"] = response.url
        yield item
Example #4
0
    def parse_detail_page(self, response):
        print('--******__ entrou no detalhe')
        title = response.css('h1::text').extract()[0].strip()
        price = response.css('.pricelabel > .xxxx-large').extract()[0]

        item = OlxItem()
        item['title'] = title
        item['price'] = price
        item['url'] = response.url
        print('ah caraio')
        yield item
Example #5
0
    def parse_item(self, response):

        if OlxSpider.TARGET == OlxSpider.count:  # throw error when target ready
            raise CloseSpider('Exceeded maximum items')

        # Data cleaner
        cost = response.css('strong.pricelabel__value::text').get()
        price = cost[:cost.rfind(' ')]
        currency = cost[cost.rfind(' ') + 1:]

        params = (dict(zip(response.css('a.offer-details__param--url span::text').getall(),
                  response.css('a.offer-details__param--url strong::text').getall())))

        description = response.xpath('//div[@id="textContent"]//text()').extract()
        res_desc = ''
        for _str in description:
            _str = _str.strip().replace('\\n', ' ').replace('\\r', ' ').replace('\\t', ' ')
            res_desc += _str

        if response.css('ul#descGallery li a::attr(href)').getall():
            pics = response.css('ul#descGallery li a::attr(href)').getall()
        else:
            pics = response.css('div#descImage img::attr(src)')

        delivery = response.css('span.olx-delivery-badge-icon-wrapper').get()
        if delivery is not None:
            delivery = 'Есть Доставка OLX'
        else:
            delivery = 'Нет Доставка OLX'
        # end


        # Save Data
        item = OlxItem()
        item['product_url'] = response.url
        item['name'] = (response.css('div h1::text').get())[9:-3]
        item['category'] = response.css('td.middle ul li span::text').getall()
        item['price'] = float(price)
        item['price_currency'] = currency
        item['date_of_creation'] = response.css('em strong::text').get()
        item['count_views'] = int(response.css('span.offer-bottombar__counter strong::text').get())
        item['prod_id'] = response.css('ul.offer-bottombar__items li>strong::text').get()
        item['seller_name'] = response.css('div.offer-user__actions a::text').get().strip()
        item['seller_adress'] = response.css('address p::text').get()
        item['state'] = params.get('Состояние', 'Not Value')
        item['description'] = res_desc
        item['pics_urls'] = pics
        item['olx_delivery'] = delivery
        OlxSpider.count += 1
        # end

        yield item
Example #6
0
 def parse_detail_page(self, response):
     print("#############################Page Detail######################")
     title = response.css(
         'h1[data-aut-id="itemTitle"]::text').extract()[0].strip()
     price = response.css(
         'span[data-aut-id="itemPrice"]::text').extract()[0]
     image = response.css('figure *::attr("src")').extract_first()
     item = OlxItem()
     item['title'] = title
     item['price'] = price
     item['url'] = response.url
     item['image'] = image
     yield item
Example #7
0
    def parse_item_page(self, response):
        url_info = urlparse(response.url)
        price = response.xpath(
            "//strong[contains(@class, 'pricelabel')]//text()").extract_first(
            )
        description = ' '.join(
            response.xpath("//div[@id='textContent']//text()").extract())

        return OlxItem(uid=url_info.path.split('-')[-1],
                       title=response.xpath('//title/text()').extract_first(),
                       price=price,
                       url=response.url,
                       description=description,
                       has_backyard='curte' in description)
Example #8
0
    def parse(self, response):
        rows = response.css('table[summary="Anunt"]')
        for row in rows:
            title = (row.css('h3>a.detailsLink>strong::text').extract_first()
                     or row.css('h3>a.detailsLinkPromoted>strong::text'
                                ).extract_first()).strip()
            price = row.css('.price > strong::text').extract_first().strip()
            href = row.css(
                'h3>a.detailsLinkPromoted::attr(href),a.detailsLink::attr(href)'
            ).extract_first()
            id = href.split('-')[-1].split('.')[0]

            item = OlxItem()
            item['id'] = id
            item['title'] = title
            item['price'] = price
            item['url'] = href
            yield scrapy.Request(href,
                                 callback=self.parse_detail_page,
                                 meta={'item': item})
Example #9
0
	def parse_detail_page(self, response):
	#	title = response.css('h1::text').extract()[0].strip()
		title = response.css('.product-productname ::text').extract()[0].strip()
		old_price = response.css('.old-price > .price ::text').extract()[0].strip()
		new_price = response.css('.special-price > .price ::text').extract()[0].strip()
		#print(title)
		discount = response.css('.view-percent-price > strong ::text').extract()[0].strip()
		#price = response.css('.pricelabel > strong::text').extract()[0]
		#print(price)		
		item = OlxItem()
		item['title'] = title
		item['percentage'] = discount
		item['old_price'] = old_price
		item['new_price'] = new_price
		print("begin")
		print(title)
		print(discount)
		print(old_price)
		print(new_price)
		print("end")
	#	print(response.url)		
		item['url'] = response.url
		yield item
Example #10
0
    def grab(self, response):
        item = OlxItem()

        item['title'] = response.css(
            'div.offer-titlebox h1::text').extract_first().strip()
        item['address'] = response.css(
            'a.show-map-link > strong::text').extract_first().strip()
        item['pub_date'] = ''.join(
            response.css('em::text').extract()).strip().split(',')[-2].strip()
        item['mark'] = response.css('table.item > tr:contains("Марка") a::text'
                                    ).extract_first('NaN').strip()
        item['model'] = response.css(
            'table.item > tr:contains("Модель") a::text').extract_first(
                'NaN').strip()
        item['year'] = response.css(
            'table.item > tr:contains("Год выпуска") strong::text'
        ).extract_first('NaN').strip()
        item['mileage'] = ''.join(
            response.css('table.item > tr:contains("Пробег") strong::text').
            extract_first('NaN').strip().split(' ')[:-1])
        item['body_type'] = response.css(
            'table.item > tr:contains("Тип кузова") strong > a::text'
        ).extract_first('NaN').strip()
        item['color'] = response.css(
            'table.item > tr:contains("Цвет") strong > a::text').extract_first(
                'NaN').strip()

        opt = [
            i.strip() for i in response.css(
                'table.item > tr:contains("Доп. опции") strong > a::text').
            extract()
        ]
        if opt:
            item['add_opt'] = opt
        else:
            item['add_opt'] = 'NaN'

        item['fuel'] = response.css(
            'table.item > tr:contains("Вид топлива") strong > a::text'
        ).extract_first('NaN').strip()
        item['engine_vol'] = ''.join(
            response.css(
                'table.item > tr:contains("Объем двигателя") strong::text').
            extract_first('NaN').strip().split()[:-1])
        item['gearbox'] = response.css(
            'table.item > tr:contains("Коробка передач") strong > a::text'
        ).extract_first('NaN').strip()

        cond = [
            i.strip() for i in response.css(
                'table.item > tr:contains("Состояние машины") strong > a::text'
            ).extract()
        ]
        if cond:
            item['condition'] = cond
        else:
            item['condition'] = 'NaN'

        item['cleared'] = response.css(
            'table.item > tr:contains("Растаможена") strong > a::text'
        ).extract_first('NaN').strip()

        mult = [
            i.strip() for i in response.css(
                'table.item > tr:contains("Мультимедиа") strong > a::text').
            extract()
        ]
        if mult:
            item['multimedia'] = mult
        else:
            item['multimedia'] = 'NaN'

        sec = [
            i.strip() for i in response.css(
                'table.item > tr:contains("Безопасность") strong > a::text').
            extract()
        ]
        if sec:
            item['security'] = sec
        else:
            item['security'] = 'NaN'

        oth = [
            i.strip() for i in response.css(
                'table.item > tr:contains("Прочее") strong > a::text').extract(
                )
        ]
        if oth:
            item['other'] = oth
        else:
            item['other'] = 'NaN'

        item['owner_note'] = ' '.join(
            [i.strip() for i in response.css('#textContent::text').extract()])
        item['views'] = response.css(
            'div.pdingtop10 > strong::text').extract_first('NaN').strip()
        item['price'] = ''.join(
            response.css('strong.xxxx-large::text').extract_first().strip().
            split()[:-1])
        item['currency'] = response.css(
            'strong.xxxx-large::text').extract_first().strip().split()[-1]

        yield item
Example #11
0
    def get_item_data(self, response):
        item = OlxItem()

        try:
            token = re.search("var phoneToken = '[a-zA-Z0-9]+",
                              response.text).group(0)[18:]
            data = response.xpath(
                '//ul[@id="contact_methods_below"]/li/@class').get()
            uid = data.strip(
                'link-phone clr rel  atClickTracking contact-a activated')
            uid = json.loads(uid.replace("'", '"'))['id']
        except:
            item['phone_number'] = None
        else:
            url = f'https://www.olx.ua/uk/ajax/misc/contact/phone/{uid}/?pt={token}'
            yield scrapy.Request(url=url,
                                 callback=self.get_phone_numbers,
                                 cb_kwargs=dict(item_obj=item))

        try:
            photo_urls = []
            for i in response.xpath('//ul[@id="descGallery"]/li'):
                url = i.xpath('./a/@href').get()
                photo_urls.append(url)
            item['photo_urls'] = photo_urls
        except AttributeError:
            item['photo_urls'] = None

        price = get_item_or_none(
            response.xpath(
                '//strong[@class="pricelabel__value arranged"]/text()').get())
        if not price:
            price = get_item_or_none(
                response.xpath(
                    '//div[@class="offer-titlebox__price"]/div/strong/text()').
                get())
        item['price'] = price

        user_name = get_item_or_none(
            response.xpath(
                '//div[@class="offer-user__actions"]/h4/a/text()').get())
        if not user_name:
            user_name = get_item_or_none(
                response.xpath(
                    '//div[@class="offer-user__actions"]/h4/text()').get())
        item['user_name'] = user_name

        user_url = get_item_or_none(
            response.xpath(
                '//div[@class="offer-user__actions"]/h4/a/@href').get())
        if not user_url:
            user_url = get_item_or_none(
                response.xpath('//ul[@id="contact_methods"]/li/a/@href').get())
        item['user_url'] = user_url

        item['ad_url'] = response.url
        item['description'] = get_item_or_none(
            response.xpath('//div[@class="clr lheight20 large"]/text()').get())
        item['title'] = get_item_or_none(
            response.xpath('//div[@class="offer-titlebox"]/h1/text()').get())
        item['address'] = get_item_or_none(
            response.xpath(
                '//div[@class="offer-user__address"]/address/p/text()').get())
        item['date_time'] = get_item_or_none(
            response.xpath(
                '//li[@class="offer-bottombar__item"]/em/strong/text()').get()
        )[2:]
        item['ad_number'] = get_item_or_none(
            response.xpath(
                '//li[@class="offer-bottombar__item"]/strong/text()').get())

        if not 'phone_number' in item or not item['phone_number']:
            phone_numbers = []
            all_text_data = item['user_url'] + \
                item['user_name'] + item['description']
            numbers = find_phone_number(all_text_data)

            if not numbers:
                item['phone_number'] = None
            else:
                phone_numbers.append(numbers)
                item['phone_number'] = [i for i in numbers]

        yield item
Example #12
0
    def parse_detail(self, response):
        #self.log(u'Imóvel URL: {0}'.format(response.url))

        imovel = OlxItem()

        imovel['url'] = response.url
        imovel['titulo'] = response.xpath(
            'normalize-space(//h1[contains(@id,"ad_title")]//.)'
        ).extract_first()

        data = response.xpath(
            'normalize-space(//div[contains(@class,"OLXad-date")]//p)').re(
                "Inserido em: (\d*) (\w*)")
        imovel['data'] = date(date.today().year, self.converteMes[data[1]],
                              int(data[0]))

        preco = response.xpath(
            'normalize-space(//span[contains(@class,"actual-price")])').re(
                "R\$ (.*)")
        preco = (preco and preco[0]) or 0
        if preco != 0:
            imovel['preco'] = int(re.sub('[^0-9]', '', preco))
        else:
            imovel['preco'] = preco

        imovel['descricao'] = response.xpath(
            'normalize-space(//div[contains(@class,"OLXad-description")]//p)'
        ).extract_first()

        detalhes = response.xpath(
            '//div[contains(@class, "OLXad-details")]//li[contains(@class, "item")]'
        )

        atributo = None
        valor = None
        for i, detalhe in enumerate(detalhes):
            atributo = detalhe.xpath(
                'normalize-space(.//span[contains(@class, "term")]/text())'
            ).extract_first()
            valor = detalhe.xpath(
                'normalize-space(.//strong[contains(@class, "description")]/text())'
            ).extract_first()

            if (atributo == 'Tipo:'):
                imovel['tipo'] = valor
            elif (atributo == 'Área útil:'):
                area = int(re.sub('[^0-9]', '', valor))
                imovel['area_util'] = area
            elif (atributo == 'Área construída:'):
                area = int(re.sub('[^0-9]', '', valor))
                imovel['area_construida'] = area
            elif (atributo == 'Quartos:'):
                imovel['n_quartos'] = valor
            elif (atributo == 'Vagas na garagem:'):
                imovel['vagas_garagem'] = valor
            elif (atributo == 'Condomínio:'):
                imovel['condominio'] = valor

        localizacao = response.xpath(
            '//div[contains(@class, "OLXad-location")]//li[contains(@class, "item")]'
        )

        atributo = None
        valor = None
        for i, loc in enumerate(localizacao):
            atributo = loc.xpath(
                'normalize-space(.//span[contains(@class, "term")]/text())'
            ).extract_first()
            valor = loc.xpath(
                'normalize-space(.//strong[contains(@class, "description")]/text())'
            ).extract_first()

            if (atributo == 'Município:'):
                imovel['municipio'] = valor
            elif (atributo == 'CEP do imóvel:'):
                imovel['cep'] = valor
            elif (atributo == 'Bairro:'):
                imovel['bairro'] = valor

        imovel['id'] = response.xpath(
            'normalize-space(//div[contains(@class, "OLXad-id")]//p//strong)'
        ).extract_first()

        yield imovel
Example #13
0
class MySpider(CrawlSpider):
    name = "olxMumbai"
    allowed_domains = ['www.olx.in']
    start_urls = ['https://www.olx.in/mumbai/real-estate/']

    item = OlxItem()

    def parse(self, response):
        hxs = Selector(response)
        data = hxs.xpath(
            '//*[@id="offers_table"]/tbody/tr/td[contains(@class,"offer")]')

        for i in data:
            typ = i.xpath('table/tbody/tr/td[@valign="top"]/p/small/text()'
                          ).extract_first().strip()
            if (('Apartments' in typ) or ('Shops' in typ)
                    or ('Houses' in typ)):
                url = i.xpath('table/tbody/tr/td[@valign="top"]/h3/a/@href'
                              ).extract_first()

                yield Request(url, callback=self.parse1, dont_filter=True)

        if 'Next page' in response.xpath(
                '//div[@class="pager rel clr"]/span[last()]/a/span/text()'
        ).extract_first():
            next_url = response.xpath(
                '//div[@class="pager rel clr"]/span[last()]/a/@href'
            ).extract_first()

            yield Request(next_url, callback=self.parse)

    def parse1(self, response):
        hxs = Selector(response)
        '''
        Assigning default value
        '''
        self.item['Selling_price'] = '0'
        self.item['Monthly_Rent'] = '0'
        self.item['lat'] = '0'
        self.item['longt'] = '0'
        self.item['Bua_sqft'] = '0'
        self.item['carpet_area'] = '0'
        self.item['price_per_sqft'] = '0'
        self.item['management_by_landlord'] = 'None'
        self.item['areacode'] = 'None'
        self.item['mobile_lister'] = 'None'
        self.item['google_place_id'] = 'None'
        self.item['Launch_date'] = 'None'
        self.item['Possession'] = '0'
        self.item['age'] = 'None'
        self.item['address'] = 'None'
        self.item['price_on_req'] = 'false'
        self.item['sublocality'] = 'None'
        self.item['config_type'] = 'None'
        self.item['listing_date'] = dt.now().strftime('%m/%d/%Y %H:%M:%S')
        self.item['updated_date'] = self.item['listing_date']
        self.item['txn_type'] = 'None'
        self.item['property_type'] = 'None'
        self.item['Building_name'] = 'None'
        self.item['locality'] = 'None'
        self.item['price_per_sqft'] = '0'
        self.item['Bua_sqft'] = '0'
        self.item['Status'] = 'None'
        self.item['listing_by'] = 'None'
        self.item['name_lister'] = 'None'
        self.item['Details'] = 'None'

        self.item['city'] = 'mumbai'
        self.item['platform'] = 'olx'

        self.item['data_id'] = response.xpath(
            '//span[@class="rel inlblk"]/text()').extract_first().strip()

        lat_long = response.xpath(
            '//div[@id="mapcontainer"]/@class').extract_first()

        self.item['lat'] = re.findall(" lat: '([0-9.]+)'", lat_long)[0]

        self.item['longt'] = re.findall(" lon: '([0-9.]+)'", lat_long)[0]

        self.item['locality'] = response.xpath(
            '//strong[@class="c2b small"]/text()').extract_first().strip()

        typ = response.xpath(
            '//*[@id="breadcrumbTop"]/tr/td/ul/li[3]/a/span/text()'
        ).extract_first().strip()
        if 'ale' in typ:
            self.item['txn_type'] = 'Sale'
        if 'ent' in typ:
            self.item['txn_type'] = 'Rent'

        if 'ale' in self.item['txn_type']:
            self.item['Selling_price'] = response.xpath(
                '//strong[@class="xxxx-large margintop7 inlblk not-arranged"]/text()'
            ).extract_first()
            self.item['Monthly_Rent'] = '0'
        if 'ent' in self.item['txn_type']:
            self.item['Monthly_Rent'] = response.xpath(
                '//strong[@class="xxxx-large margintop7 inlblk not-arranged"]/text()'
            ).extract_first()
            self.item['Selling_price'] = '0'

        prp_typ = response.xpath(
            '//*[@id="breadcrumbTop"]/tr/td/ul/li[4]/a/span/text()'
        ).extract_first().strip()
        if (('Apartments' in prp_typ) or ('Houses' in prp_typ)):
            self.item['property_type'] = 'Residential'
        if ('Shops' in prp_typ):
            self.item['property_type'] = 'Commercial'

        try:
            conf = response.xpath(
                '//a[contains(@title,"room")]/text()').extract_first().strip()
            if (not conf == None):
                self.item['config_type'] = re.findall('[0-9]', conf)[0] + 'BHK'
        except:
            try:
                conf1 = response.xpath('//a[contains(@title,"more")]/text()'
                                       ).extract_first().strip()
                if (not conf1 == None):
                    self.item['config_type'] = re.findall('[0-9]',
                                                          conf1)[0] + 'BHK'
            except:
                print 'No config ' + ' -->>' + str(response.url)
                self.item['config_type'] = 'None'

        dates = response.xpath(
            '//span[@class="pdingleft10 brlefte5"]/text()').extract()
        date1 = ' '.join(
            re.findall('[\S]+',
                       [date for date in dates if re.findall('[\w]', date)
                        ][0])).replace(',', '').replace('on ', '').replace(
                            'at ', '').replace('Added ', '')
        if 'terday' in date1:
            self.item['listing_date'] = str(
                (d.today() - timedelta(days=1)).month) + "/" + str(
                    (d.today() - timedelta(days=1)).day) + "/" + str(
                        (d.today() - timedelta(days=1)).year) + ' 00:00:00'
        elif 'oday' in date1:
            self.item['listing_date'] = str(d.today().month) + '/' + str(
                d.today().day) + '/' + str(d.today().year) + ' 00:00:00'
        elif ((' am' in date1) or (' pm' in date1)):
            self.item['listing_date'] = str(d.today().month) + '/' + str(
                d.today().day) + '/' + str(
                    d.today().year) + ' ' + date1.replace(' am', '').replace(
                        ' pm', '') + ':00'
        else:
            self.item['listing_date'] = dt.strftime(
                dt.strptime(date1, '%d %b'), '%m/%d') + '/' + str(
                    d.today().year) + ' 00:00:00'

        self.item['updated_date'] = self.item['listing_date']

        try:
            area = response.xpath('//strong[@class="block"]/text()').extract()
            get_area = [sqf for sqf in area if ' ft' in sqf]
            if get_area:
                self.item['Bua_sqft'] = re.findall('[0-9,]+',
                                                   get_area[0].strip())[0]
                if ',' in self.item['Bua_sqft']:
                    self.item['Bua_sqft'] = self.item['Bua_sqft'].replace(
                        ',', '')
        except:
            print 'No Sqft -->>' + str(response.url)

        self.item['scraped_time'] = dt.now().strftime('%m/%d/%Y %H:%M:%S')

        if (((not self.item['Monthly_Rent'] == '0') and
             (not self.item['Bua_sqft'] == '0') and
             (not self.item['Building_name'] == 'None') and
             (not self.item['lat'] == '0'))
                or ((not self.item['Selling_price'] == '0') and
                    (not self.item['Bua_sqft'] == '0') and
                    (not self.item['Building_name'] == 'None') and
                    (not self.item['lat'] == '0'))
                or ((not self.item['price_per_sqft'] == '0') and
                    (not self.item['Bua_sqft'] == '0') and
                    (not self.item['Building_name'] == 'None') and
                    (not self.item['lat'] == '0'))):
            self.item['quality4'] = 1
        elif (((not self.item['price_per_sqft'] == '0') and
               (not self.item['Building_name'] == 'None') and
               (not self.item['lat'] == '0'))
              or ((not self.item['Selling_price'] == '0') and
                  (not self.item['Bua_sqft'] == '0') and
                  (not self.item['lat'] == '0'))
              or ((not self.item['Monthly_Rent'] == '0') and
                  (not self.item['Bua_sqft'] == '0') and
                  (not self.item['lat'] == '0'))
              or ((not self.item['Selling_price'] == '0') and
                  (not self.item['Bua_sqft'] == '0') and
                  (not self.item['Building_name'] == 'None'))
              or ((not self.item['Monthly_Rent'] == '0') and
                  (not self.item['Bua_sqft'] == '0') and
                  (not self.item['Building_name'] == 'None'))):
            self.item['quality4'] = 0.5
        else:
            self.item['quality4'] = 0
        if ((not self.item['Building_name'] == 'None')
                and (not self.item['listing_date'] == '0')
                and (not self.item['txn_type'] == 'None')
                and (not self.item['property_type'] == 'None')
                and ((not self.item['Selling_price'] == '0') or
                     (not self.item['Monthly_Rent'] == '0'))):
            self.item['quality1'] = 1
        else:
            self.item['quality1'] = 0

        if ((not self.item['Launch_date'] == '0')
                or (not self.item['Possession'] == '0')):
            self.item['quality2'] = 1
        else:
            self.item['quality2'] = 0

        if ((not self.item['mobile_lister'] == 'None')
                or (not self.item['listing_by'] == 'None')
                or (not self.item['name_lister'] == 'None')):
            self.item['quality3'] = 1
        else:
            self.item['quality3'] = 0

        yield self.item
Example #14
0
    def parse(self, response):

        for ref in response.css('.sku.-gallery'):
            item = OlxItem()
            #item['title']=
            print('title')
            print('-'.join(ref.css('.link>.title ::text').extract()))
            item['title'] = '-'.join(ref.css('.link>.title ::text').extract())
            #print('url')
            print(ref.css('.link ::attr(href)').extract()[0])
            item['url'] = ref.css('.link ::attr(href)').extract()[0]
            print('percentage')
            #item['percentage']=

            if (ref.css(
                    '.link>.price-container.clearfix>.sale-flag-percent ::text'
            ).extract()):
                toPrint = ref.css(
                    '.link>.price-container.clearfix>.sale-flag-percent ::text'
                ).extract()[0]
                #print(re.findall('\d+', toPrint))
                print(toPrint)
                item['percentage'] = ref.css(
                    '.link>.price-container.clearfix>.sale-flag-percent ::text'
                ).extract()[0]
                #else:
                #	print('no percentage')

                #item['old_price']=
                print('old_price')
                print((ref.css(
                    '.link>.price-container.clearfix>.price-box.ri>.price.-old ::text'
                ).extract()[2]).strip())
                #if len(ref.css('.link>.price-container.clearfix>.price-box.ri>.price.-old ::text').extract().length)>2

                item_old_price = (ref.css(
                    '.link>.price-container.clearfix>.price-box.ri>.price.-old ::text'
                ).extract()[2]).strip()
                x = re.findall('\\d+', item_old_price)
                y = x[0] + x[1]
                print(y)
                print("old_price")
                item['old_price'] = y

            #else:
            #	print ('no old price')
            print('new_price')
            #item['new_price']=

            #if len(ref.css('.link>.price-container.clearfix>.price-box.ri>.price ::text').extract())>2:
            print(
                ref.css(
                    '.link>.price-container.clearfix>.price-box.ri>.price ::text'
                ).extract()[2].strip())
            item_new_price = (ref.css(
                '.link>.price-container.clearfix>.price-box.ri>.price ::text').
                              extract()[2]).strip()
            x = re.findall('\\d+', item_new_price)
            y = x[0] + x[1]
            print(y)
            print("new Price")
            item['new_price'] = y
            #else:
            #	print('no new price')
            yield item

        next_page = response.css(
            '.pagination>.osh-pagination.-horizontal ::attr(href)').extract(
            )[-1]
        print(next_page)
        if self.prev_page == next_page:
            pass
        else:
            self.prev_page = next_page
            yield scrapy.Request(next_page, self.parse, dont_filter=True)