Beispiel #1
0
    def parse_property_info(self, response):

        item = BuyItem()

        price = price_per_sqft = min_area = max_area = bathrooms = bedrooms = SuperBuiltupArea = is_resale = 0
        is_price_fix = 1
        address = city = location = age_of_property = agent_name = agent_type = launch_date = status = amenities = ""
        speciality = {}
        more_info = []

        try:
            full_price = ''.join(
                response.xpath('//div[@class="nActualAmt"]//text()').extract())
            price = float(full_price.split()[1])
            if "Lac" in full_price:
                price *= 100000
            if "Cr" in full_price:
                price *= 1000000
        except:
            pass

        try:
            SuperBuiltupArea = float(''.join(
                response.xpath(
                    '//span[@id="coveredAreaDisplay"]//text()').extract()))
        except:
            pass

        try:
            min_area = max_area = float(''.join(
                response.xpath(
                    '//span[@id="carpetAreaDisplay"]//text()').extract()))
        except:
            pass

        datalist = response.xpath(
            "//div[@class='nMoreListData']/div[@class='nDataRow']")
        for data in datalist:
            try:
                label = data.xpath(
                    'div[@class="dataLabel"]//text()').extract()[0]
                if "Price" in label:
                    try:
                        if price == 0.0:
                            price = ((data.xpath(
                                'div[@class="dataVal"]/span[contains(@class,"fBold")]//text()'
                            ).extract())[0].split())[-1].replace(',',
                                                                 '').lower()
                            price = float(''.join(
                                ele for ele in price
                                if ele.isdigit() or ele == '.'))
                    except:
                        pass

                    try:
                        price_per_sqft = ((data.xpath(
                            'div[@class="dataVal"]/span[@class="light"]/text()'
                        ).extract())[1].split())[1]
                        priceunit = ((data.xpath(
                            'div[@class="dataVal"]/span[@class="light"]//text()'
                        ).extract())[1].split())[3].strip()
                        price_per_sqft = float(''.join(
                            ele for ele in price_per_sqft
                            if ele.isdigit() or ele == '.'))
                        if "sqyrd" in priceunit:
                            price_per_sqft *= 9

                    except:
                        pass

                if "Address" in label:
                    try:
                        address = ''.join(
                            data.xpath('div[@class="dataVal"]//text()').
                            extract()).replace('\n', '').replace('\t', '')
                    except:
                        pass

                if "Water Availability" in label:
                    try:
                        speciality[label] = ''.join(
                            data.xpath('div[@class="dataVal"]//text()').
                            extract()).replace('\n', '').replace('\t', '')

                    except:
                        pass
                if "Status of Electricity" in label:
                    try:
                        speciality[label] = ''.join(
                            data.xpath('div[@class="dataVal"]//text()').
                            extract()).replace('\n', '').replace('\t', '')

                    except:
                        pass

                if "Flooring" in label:
                    try:
                        speciality[label] = ''.join(
                            data.xpath(
                                'div[contains(@class,"dataVal")]//text()').
                            extract()).replace('\n', '').replace('\t', '')
                    except:
                        pass
            except:
                pass

        try:
            if address == "":
                address = (''.join(
                    response.xpath('//div[@class="nProjNmLoc"]//text()').
                    extract())).replace('\n', '').replace('\t', '')
        except:
            pass

        try:
            location = ''.join(
                response.xpath('//span[@itemprop="streetAddress"]//text()').
                extract()).replace(',', ' ')
        except:
            pass

        try:
            city = ''.join(
                response.xpath('//span[@itemprop="addressLocality"]//text()').
                extract()).replace(',', ' ')
        except:
            pass

        try:
            if address == "":
                address = location + ' ' + city
        except:
            pass
        if city == '':
            try:
                city = address.split(',')[-1]
            except:
                pass

        if location == '':
            try:
                location = address.split(',')[-2]
            except:
                pass
        datalist = response.xpath(
            '//div[@class="nInfoDataBlock"]/div[@class="nDataRow"]')
        for data in datalist:
            try:
                label = ''.join(
                    data.xpath('div[@class="dataLabel"]//text()').extract())
                if "Configuration" in label:
                    try:
                        bedrooms = int(
                            ((data.xpath('div[@class="dataVal"]/span//text()').
                              extract())[0].split())[0])
                    except:
                        pass
                    try:
                        other = (''.join(
                            data.xpath('div[@class="dataVal"]/text()').extract(
                            ))).split(",")

                        for info in other:
                            info = info.replace('\n', ' ')
                            if "Bathroom" in info:
                                info = (info.strip().split())[0]
                                bathrooms = int(info)
                            if "Room" in info:
                                speciality['addional_room'] = info.strip()
                    except:
                        pass

                if "Transaction" in label:
                    try:
                        temp = ''.join(
                            data.xpath('div[@class="dataVal"]//text()').
                            extract()).strip()
                        if "Resale" in temp:
                            is_resale = 1
                    except:
                        pass

                if "Status" in label:
                    try:
                        status = (''.join(
                            data.xpath('div[@class="dataVal"]//text()').
                            extract())).replace('\n', '')
                        if status == '':
                            status = (''.join(
                                data.xpath('li/div[@class="dataVal"]//text()').
                                extract())).replace('\n', '')
                    except:
                        pass

                if "Age" in label:
                    try:
                        age_of_property = (''.join(
                            data.xpath('div[@class="dataVal"]//text()').
                            extract())).replace('\n', '')
                        if age_of_property == '':
                            age_of_property = (''.join(
                                data.xpath('li/div[@class="dataVal"]//text()').
                                extract())).replace('\n', '')
                    except:
                        pass

                if "Furnish" in label:
                    try:
                        speciality['furnishing'] = ''.join(
                            data.xpath('div[@class="dataVal"]//text()').
                            extract()).strip()
                    except:
                        pass

                if "Car Parking" in label:
                    try:
                        speciality['parking'] = ''.join(
                            data.xpath('div[@class="dataVal"]//text()').
                            extract()).strip()
                    except:
                        pass
            except:
                pass

        try:
            description = ''.join(
                response.xpath(
                    "//span[@class='dDetail']//text()").extract()).replace(
                        '\n', '')
        except:
            pass

        if description == "":
            try:
                description = (''.join(
                    response.xpath("//div[@class='nAboutBrf']//text()").
                    extract())).replace('\n', '').replace('\t', '')
            except:
                pass

        try:
            posted_on_date = ((((
                response.xpath('//div[@class="propIDnPDate"]//text()').extract(
                ))[0].split('|'))[1]).split(':'))[1].strip().replace(',', ' ')
            posted_on_date = posted_on_date.split()
            posted_on_date[0], posted_on_date[1] = posted_on_date[
                1], posted_on_date[0]
            posted_on_date[1] = month.find_month(posted_on_date[1])
            posted_on_date = ' '.join(posted_on_date)
        except:
            pass

        try:
            code = (((response.xpath('//div[@class="propIDnPDate"][1]//text()')
                      .extract())[0].split("|"))[0].split(":"))[1].strip()

        except:
            pass

        try:
            agent_name = response.xpath('//div[@class="agntName"]//text()'
                                        ).extract()[-1]  # Remove 'Contact'

        except:
            pass

        amenities = ','.join(
            response.xpath(
                '//div[@id="normalAminities"]//li[not(@class="notAvail")]/span[@class="ameLabel"]/text()'
            ).extract())

        item['price'] = price
        item['price_per_sqft'] = price_per_sqft
        item['is_price_fix'] = is_price_fix
        item['address'] = address.encode('utf8')
        item['city'] = city.encode('utf8')
        item['location'] = location.encode('utf8')
        item['min_area'] = min_area
        item['max_area'] = max_area
        item['bathrooms'] = bathrooms
        item['bedrooms'] = bedrooms
        item['SuperBuiltupArea'] = SuperBuiltupArea
        item['age_of_property'] = age_of_property.encode('utf8')
        item['launch_date'] = launch_date.encode('utf8')
        item['posted_on'] = posted_on_date.encode('utf8')
        item['possession_status'] = status.encode('utf8')
        item['agent_name'] = agent_name.encode('utf8')
        item['agent_type'] = agent_type.encode('utf8')
        item['amenities'] = amenities.encode('utf8')
        item['speciality'] = speciality
        item['more_info'] = more_info
        item['is_resale'] = is_resale
        item['url'] = response.url
        item['code'] = code.encode('utf8')
        item['description'] = description.encode('utf8')

        yield item
Beispiel #2
0
    def parse_property_info(self, response):    
        # this function scrpaes info off the property page using xpaths
        # try except is used to avoid crashing in case of missing fields

        item = NewSpdItem()
        try :
            maintainance = posted_by_details = posted_on_date = project_name = price_per_unit = location = address = city =""
            carpet_area = super_built_area =  -1.0
            washroom = bedrooms = -1
            is_price_fixed = True
            price = -1.0
            try :
                super_built_area =  (''.join(response.xpath('//i[@id="superbuiltupArea_span"]//text()').extract()))
                super_built_area = float(re.findall('\d+', super_built_area)[0])
            except :
                super_built_area = -1.0

            try :
                carpet_area =  (''.join(response.xpath('//i[@id="builtupArea_span"]//text()').extract()))
                carpet_area = float(re.findall('\d+', carpet_area)[0])
            except :
                carpet_area = -1.0

            if carpet_area == "":
                try :
                    carpet_area =  (''.join(response.xpath('//i[@id="carpetArea_span"]//text()').extract()))
                    carpet_area = float(re.findall('\d+', carpet_area)[0])
                except :
                    carpet_area = -1.0

            try:
                price = (''.join(response.xpath('//span[@class="redPd b"]/text()').extract())).replace(',','').lower()
                islac = 'lac' in price
                iscr = 'cr' in price
                price = float(re.findall('\d+', price)[0])
                if(islac):
                    price = price * 100000
                if(iscr):
                    price = price * 10000000
            except:
                price = -1.0

            try :
                maintain = response.xpath('//div[@class="mb10"]//li')
                for main in maintain:
                    try :
                        if "Maintenance" in  ''.join(main.xpath('i//text()').extract()):
                            maintainance = (main.xpath('em/text()').extract()[-1]).replace('\n','')
                            maintainance = re.sub(' +',' ', maintainance)
                            break
                    except :
                        pass
            except :
                pass

            try:
                address = (''.join(response.xpath('//div[@id="AddTuplePd"]//text()').extract())).replace('Address:','').replace('\n','')
                address = re.sub(' +', ' ', address)
                city = address.split(',')[-2]
                location = address.split(',')[-3]          
            except:
                pass

            try:
                washroom = (response.xpath('//div[@class="lf"]/b//text()').extract()[0]).replace(':','')
                bedrooms = (''.join(response.xpath('//div[@id="bedroom_numLabel"]/b//text()   ').extract()[-1])).replace(':','')
                washroom = int(washroom)
                bedrooms = int(bedrooms)
            except:
                washroom = bathrooms = -1.0
                pass

            try:
                project_name = (''.join(response.xpath('//span[@class="addPdElip lf"]//text()').extract()[0])).replace('\n','')
                project_name = re.sub(' +',' ',project_name)
            except:
                pass
            
            try:
                posted_on_date = (''.join(response.xpath('//span[contains(@class,"PostdByPd")]//text()').extract())).replace('\n','').replace('Posted on:','').replace(',','')
                posted_on_date = posted_on_date.split()
                posted_on_date[0],posted_on_date[1] = posted_on_date[1],posted_on_date[0]
                posted_on_date[1] = find_month(posted_on_date[1])  
                posted_on_date[0] += "0" if len(posted_on_date) == 1 else ""
                posted_on_date  = '-'.join(posted_on_date[::-1])
                posted_on_date = re.sub(' +','',posted_on_date)    
            except:
                pass

            try:
                posted_by_details = (''.join(response.xpath('//a[@id="ContactPdBody"]/text()').extract())).replace('Contact','').replace('\n','')
                posted_by_details = re.sub(' +',' ',posted_by_details) # Remove 'Contact'
            except:
                pass

            try :
                temp = ''.join(response.xpath('//em/text()').extract())
                is_price_fixed = False if "Negotiable" in temp else True
            except :
                pass
            
            try :
                item['Price'] = price
                item['PricePerUnit'] = price_per_unit.encode('utf8')
                item['maintainance'] = maintainance.encode('utf8')
                item['is_price_fixed'] = is_price_fixed 

                item['SuperBuiltupArea'] = super_built_area
                item['CarpetArea'] = carpet_area

                item['city'] = city.encode('utf8')
                item['address'] = address.encode('utf8')
                item['Location'] = location.encode('utf8')
                
                item['Washroom'] = washroom

                item['PostedBy'] = posted_by_details.encode('utf8')
                item['PostingDate'] = posted_on_date.encode('utf8')
                item['ProjectName'] = project_name.encode('utf8')

                item['Bedrooms'] = bedrooms
                item['URL'] = response.url
                item['website']  = (response.url).split('/')[2]
                yield item
            except :
                yield NewSpdItem()
            
        except :
            yield item
Beispiel #3
0
	def parse_property_info(self, response):
		item = BuyItem()
		self.driver.get(response.url)
		input()
		try:
			WebDriverWait(self.driver, 10).until(EC.presence_of_element_located((By.XPATH, '//div[@class="npPrice"]//text()')))
		except TimeoutException:
			return
		response = TextResponse(url=self.driver.current_url, body=self.driver.page_source, encoding='utf-8')
		is_resale = price = bedrooms = bathrooms = price_per_sqft = 0
		is_price_fix = 1
		print "\n",response.url,"\n"
		try :
			full_price = ','.join(response.xpath('//div[@class="npPrice"]//text()').extract())
			print full_price
			print price
			price = float(full_price.split(',')[3])
			if 'Cr' in full_price :
				price *= 10000000
			if "Lac" in full_price :
				price *= 100000
		except :
			pass

		if price == 0:
			try :
				full_price = ' '.join(response.xpath('//span[@id="pdPrice"]//text()').extract())
				print full_price
				print price
				price = float(full_price.split()[0])
				if 'Cr' in full_price :
					price *= 10000000
				if "Lacs" in full_price :
					price *= 100000
			except :
				pass
		print price
		input()

		try :
			price_per_sqft = float((response.xpath('//div[@class="npBasePrice"]/span/text()').extract())[3])
		except :
			pass

		try :
			price_per_sqft = float((response.xpath('//div[@id="pricePerUnitArea"]/text()').extract()).split()[1])
		except :
			pass


		city  = address = location = ""
		try :
			address = (''.join(response.xpath('//div[@class="project-location"]/span//text()').extract())).replace('\n','')
			city = address.split(',')[-2]
			location = (response.xpath('//a[@class="ttlLink"]/text()').extract()[1])		
		except :
			pass

		if address == "":
			try :
				address = (''.join(response.xpath('//span[@id="address"]/text()').extract())).replace('\n','')
				city = address.split(',')[-2]
				location = (response.xpath('//a[@class="ttlLink"]/text()').extract()[1])	
			except :
				pass

		status =  ""
		min_area = max_area = 0.0
		try :
			status = ''.join(response.xpath('//div[@class="npPossessionDate"]/text()').extract()[2])	
		except:
			pass

		if status == "":
			try :
				status = ''.join(response.xpath('//div[@class="pdDetailInfoOther"]/div[3]/span/text()').extract())	
			except:
				pass
		try :
			temp = ''.join(response.xpath('//div[@class="npAreaPrice"]/span[1]/text()').extract())
			temp = temp.split()
			temp = [float(i) for i in temp if i.isdigit()]
			try :
				min_area = temp[0]
				max_area = temp[1]
			except :
				max_area = min_area
		except :
			pass

		SuperBuiltupArea = 0.0
		try :
			SuperBuiltupArea = ' '.join(response.xpath('//div[@class="npPrjArea"]	/span//text()').extract())
			if "acres" in SuperBuiltupArea:
				SuperBuiltupArea = float(SuperBuiltupArea.split()[0])*43560
			else :
				SuperBuiltupArea = float(SuperBuiltupArea.split()[0])
		except :
			pass


		if min_area == 0.0 :
			try :
				min_area = float(''.join(response.xpath('//span[@id="superbuiltupArea_span"]/text()').extract()))
				max_area = min_area

			except :
				pass

			try :
				SuperBuiltupArea = ''.join(response.xpath('//div[@id="socAreaOccupied"]/text()').extract())
				if "acres" in SuperBuiltupArea:
					SuperBuiltupArea = float(SuperBuiltupArea.split()[0])*43560
				else :
					SuperBuiltupArea = float(SuperBuiltupArea.split()[0])	
			except:
				pass


		launch_date = CarpetArea = posted_on = ''

		try :
			posted_on = (''.join(response.xpath('//span[@class="pdPropDate"]/text()').extract()).replace(',','')).split()
			posted_on[0],posted_on[1] = posted_on[1],posted_on[0]
			posted_on[1] =  find_month(posted_on[1])
			posted_on = ' '.join(posted_on)

		except :
			pass



		Description =amenities  = age_of_property = ''
		speciality = {}

		try :
			Description = (''.join(response.xpath('//div[@id = "description"]//text()').extract())).replace('\n','')
		except:
			pass
		
		try :
			amenities  = ','.join(response.xpath('//div[@id="amenitiesSection"]/div/div[2]/div/div/div//text()').extract())
		except:
			pass

		if amenities == "":
			try :
				amenities  = ','.join(response.xpath('//div[@id="features"]/div/div//text()').extract())
			except:
				pass

		try :
			special  = response.xpath('//div[@class=" pdOtherFacts responsive"]/div')
			for spec in special :
				try :
					header = ''.join(special.xpath('span[1]//text()').extract())
					text = ''.join(special.xpath('span[2]//text()').extract())
					speciality[header] = text
				except:
					pass

		except :
			pass



		agent_name = agent_type =""
		try :
			agent_type = ''.join(response.xpath('//div[@id="QryFormPd"]//span[@class="dealerWidgetHeading"]//text()').extract())
			agent_type = agent_type.replace('Details','')
			agent_name = (','.join(response.xpath('//div[@id="QryFormPd"]//div[@class="c2dInfo"]//text()').extract())).split()[0]
		except :
			pass

		if agent_name == "" :
			try :
				agent_name = (' '.join(response.xpath('//div[@id="QryFormPd"]//div[@class="c2dRunCaptionAbtDev "]//span[@class="spanBold"]//text()').extract()))
				agent_name = agent_name.replace('About ','')
			except :
				pass

		try :
			resale = response.xpath('//span[@id="transactionType"]//text()').extract()
			if 'Resale' in resale:
				is_resale = 1
		except:
			pass

		try :
			bedrooms = int((''.join(response.xpath('//div[@id="bedRoomNum"]//text()').extract())).split()[0])
		except :
			pass

		try :
			bathrooms = int((''.join(response.xpath('//div[@id="bathroomNum"]//text()').extract())).split()[0])
		except :
			pass

		try :
			age_of_property = ''.join(response.xpath('//div[@id="agePossessionLbl"]//text()').extract())
		except :
			pass

		try :
			additional_rooms = ''.join(response.xpath('//div[@id="additionalRooms"]//text()').extract())
			amenities += (", " + additional_rooms)	
		except :
			pass

		more_info = {}

		item['price'] = price
		item['price_per_sqft'] = price_per_sqft
		item['is_price_fix'] = is_price_fix
		item['address'] = address.encode('utf8')
		item['city'] = city.encode('utf8')
		item['location'] = location.encode('utf8')
		item['min_area'] = min_area
		item['max_area'] = max_area
		item['bathrooms'] = bathrooms
		item['bedrooms'] = bedrooms
		item['SuperBuiltupArea'] = SuperBuiltupArea
		item['age_of_property'] = age_of_property.encode('utf8')
		item['launch_date'] = launch_date.encode('utf8')
		item['possession_status'] = status.encode('utf8')
		item['agent_name'] = agent_name.encode('utf8')
		item['agent_type'] = agent_type.encode('utf8')
		item['amenities'] = amenities.encode('utf8')
		item['speciality'] = speciality
		item['more_info'] = more_info
		item['is_resale'] = is_resale
		item['url'] = response.url

		yield item
		input()
Beispiel #4
0
    def parse_property_info(self, response):
        # this function scrpaes info off the property page using xpaths
        # try except is used to avoid crashing in case of missing fields
        item = NewSpdItem()
        try:
            maintainance = posted_by_details = posted_on_date = project_name = location = address = city = ""
            carpet_area = super_built_area = -1.0
            washroom = bedrooms = -1
            is_price_fixed = True
            price = price_per_unit = -1.0

            try:
                super_built_area_unit = ''.join(
                    response.xpath(
                        '//div[@id="coveredAreaUnit"]//text()').extract())
            except:
                pass

            try:
                carpet_area_unit = ''.join(
                    response.xpath(
                        '//div[@id="carpetAreaUnit"]//text()').extract())
            except:
                pass

            try:
                super_built_area = (''.join(
                    response.xpath('//span[@id="coveredAreaDisplay"]//text()').
                    extract())).replace(',', '').replace('\n', '')
                super_built_area = float(
                    re.findall('\d+', super_built_area)[0])
                if "yrd" in super_built_area_unit:
                    super_built_area *= 9
            except:
                super_built_area = -1.0

            try:
                carpet_area = (''.join(
                    response.xpath('//span[@id="carpetAreaDisplay"]//text()').
                    extract())).replace(',', '').replace('\n', '')
                carpet_area = float(re.findall('\d+', carpet_area)[0])
                if "yrd" in carpet_area_unit:
                    carpet_area *= 9
            except:
                carpet_area = -1.0

            try:
                datalist = response.xpath(
                    "//div[@class='nMoreListData']/div[@class='nDataRow']")
                for data in datalist:
                    try:
                        label = data.xpath(
                            'div[@class="dataLabel"]//text()').extract()[0]
                        if "Rent" in label:
                            try:
                                price = ((data.xpath(
                                    'div[@class="dataVal"]/span[contains(@class,"fBold")]//text()'
                                ).extract())[0].split())[-1].replace(
                                    ',', '').lower()
                                price = float(''.join(
                                    ele for ele in price
                                    if ele.isdigit() or ele == '.'))
                            except:
                                price = -1.0

                            try:
                                price_per_unit = ((data.xpath(
                                    'div[@class="dataVal"]/span[@class="light"]/text()'
                                ).extract())[1].split())[1]
                                priceunit = ((data.xpath(
                                    'div[@class="dataVal"]/span[@class="light"]//text()'
                                ).extract())[1].split())[3].strip()
                                price_per_unit = float(''.join(
                                    ele for ele in price_per_unit
                                    if ele.isdigit() or ele == '.'))
                                if "sqyrd" in priceunit:
                                    price_per_unit *= 9

                            except:
                                price_per_unit = -1.0

                        if "Address" in label:
                            try:
                                address = ''.join(
                                    data.xpath('div[@class="dataVal"]/text()').
                                    extract()).replace('\n', '')
                            except:
                                pass

                    except:
                        pass
            except:
                pass
            try:
                location = ''.join(
                    response.xpath('//span[@itemprop="streetAddress"]//text()'
                                   ).extract()).replace(',', ' ')
            except:
                pass

            try:
                city = ''.join(
                    response.xpath(
                        '//span[@itemprop="addressLocality"]//text()').extract(
                        )).replace(',', ' ')
            except:
                pass

            try:
                if address == '':
                    address = location + city
            except:
                pass

            try:
                datalist = response.xpath(
                    '//div[@class="nInfoDataBlock"]/div[@class="nDataRow"]')
                for data in datalist:
                    try:
                        label = ''.join(
                            data.xpath(
                                'div[@class="dataLabel"]//text()').extract())
                        if "Configuration" in label:
                            try:
                                bedrooms = int(((data.xpath(
                                    'div[@class="dataVal"]/span//text()').
                                                 extract())[0].split())[0])
                            except:
                                pass
                            try:
                                other = ''.join(
                                    data.xpath('div[@class="dataVal"]/text()').
                                    extract()).split(",")

                                for info in other:
                                    try:
                                        info = info.replace('\n', ' ')
                                        if "Bathroom" in info:
                                            info = (info.strip().split())[0]
                                            washroom = int(info)
                                    except:
                                        pass
                            except:
                                pass
                    except:
                        pass
            except:
                pass

            try:
                posted_on_date = (((
                    (response.xpath('//div[@class="propIDnPDate"]//text()').
                     extract())[0].split('|'))[1]).split(':'))[1]
                posted_on_date = posted_on_date.replace(',', ' ').replace(
                    '\'', ' ')
                posted_on_date = re.sub(' +', ' ', posted_on_date)
                posted_on_date = posted_on_date.split()
                posted_on_date[0], posted_on_date[1] = posted_on_date[
                    1], posted_on_date[0]
                posted_on_date[1] = find_month(posted_on_date[1])
                posted_on_date[0] += "0" if len(posted_on_date) == 1 else ""
                posted_on_date = '-'.join(posted_on_date[::-1])
            except:
                pass

            try:
                project_name = (''.join(
                    response.xpath('//div[@class="nProjNmLoc"]/a//text()').
                    extract())).replace('\n', '')

            except:
                pass

            try:
                posted_by_details = ''.join(
                    response.xpath('//a[contains(@id,"agentBtn")]//text()').
                    extract()[0]).replace('Contact ', '')  # Remove 'Contact'

            except:
                pass
            try:
                item['URL'] = response.url
                item['website'] = (response.url).split('/')[2]
                item['Price'] = price
                item['PricePerUnit'] = price_per_unit
                item['maintainance'] = maintainance.encode('utf8')
                item['is_price_fixed'] = is_price_fixed

                item['SuperBuiltupArea'] = super_built_area
                item['CarpetArea'] = carpet_area

                item['city'] = city.encode('utf8')
                item['address'] = address.encode('utf8')
                item['Location'] = location.encode('utf8')

                item['Washroom'] = washroom
                item['Bedrooms'] = bedrooms

                item['PostedBy'] = posted_by_details.encode('utf8')
                item['PostingDate'] = posted_on_date.encode('utf8')
                item['ProjectName'] = project_name.encode('utf8')

                if project_name == '':
                    print response.url
                    print "\n\n\nproject name missing\n\n\n"
                    yield
                else:
                    pprint(item)
                    yield item
            except:
                print "error1"
                yield
        except:
            print "error2"
            yield
Beispiel #5
0
    def parse_property_info(self, response):    

        item = BuyItem()

        price = price_per_sqft  = min_area = max_area = bathrooms = bedrooms = SuperBuiltupArea = is_resale =  0
        is_price_fix = 1
        address = city = location = age_of_property = agent_name = agent_type = launch_date = status = amenities = ""
        speciality  = {}
        more_info = []

        try :
            full_price = ''.join(response.xpath('//div[@class="nActualAmt"]//text()').extract())
            price = float(full_price.split()[1])
            if "Lac" in full_price:
                price *= 100000
            if "Cr" in full_price:
                price  *= 1000000
        except :
            pass

        try :
            SuperBuiltupArea = float(''.join(response.xpath('//span[@id="coveredAreaDisplay"]//text()').extract()))
        except :
            pass

        try :
            min_area = max_area = float(''.join(response.xpath('//span[@id="carpetAreaDisplay"]//text()').extract()))
        except :
            pass


        datalist = response.xpath("//div[@class='nMoreListData']/div[@class='nDataRow']")
        for data in datalist:
            try :
                label = data.xpath('div[@class="dataLabel"]//text()').extract()[0]
                if "Price" in label:
                    try :
                        if price == 0.0:
                            price = ((data.xpath('div[@class="dataVal"]/span[contains(@class,"fBold")]//text()').extract())[0].split())[-1].replace(',','').lower()
                            price = float(''.join(ele for ele in price if ele.isdigit() or ele == '.'))
                    except:
                        pass

                    try:
                        price_per_sqft = ((data.xpath('div[@class="dataVal"]/span[@class="light"]/text()').extract())[1].split())[1]
                        priceunit = ((data.xpath('div[@class="dataVal"]/span[@class="light"]//text()').extract())[1].split())[3].strip()
                        price_per_sqft = float(''.join(ele for ele in price_per_sqft if ele.isdigit() or ele == '.'))
                        if "sqyrd" in priceunit:
                            price_per_sqft *= 9
                        
                    except:
                        pass

                if "Address" in label:
                    try :
                        address = ''.join(data.xpath('div[@class="dataVal"]//text()').extract()).replace('\n','').replace('\t','')
                    except:
                        pass

                if "Water Availability" in label:
                    try :
                        speciality[label] = ''.join(data.xpath('div[@class="dataVal"]//text()').extract()).replace('\n','').replace('\t','')
                        
                    except:
                        pass
                if "Status of Electricity" in label:
                    try :
                        speciality[label] = ''.join(data.xpath('div[@class="dataVal"]//text()').extract()).replace('\n','').replace('\t','')
                        
                    except:
                        pass

                if "Flooring" in label:
                    try :
                        speciality[label] = ''.join(data.xpath('div[contains(@class,"dataVal")]//text()').extract()).replace('\n','').replace('\t','')
                    except:
                        pass
            except :
                pass

        try :
            if address == "":
                address =  (''.join(response.xpath('//div[@class="nProjNmLoc"]//text()').extract())).replace('\n','').replace('\t','')
        except:
            pass

        try :
            location = ''.join(response.xpath('//span[@itemprop="streetAddress"]//text()').extract()).replace(',',' ')
        except :
            pass

        try:
            city = ''.join(response.xpath('//span[@itemprop="addressLocality"]//text()').extract()).replace(',',' ')
        except:
            pass

        try :
            if address == "":
                address =  location + ' ' +  city
        except:
            pass
        if city == '':
            try :
                city = address.split(',')[-1]
            except :
                pass

        if location == '':
            try :
                location = address.split(',')[-2]
            except :
                pass
        datalist = response.xpath('//div[@class="nInfoDataBlock"]/div[@class="nDataRow"]')
        for data in datalist:
            try :
                label = ''.join(data.xpath('div[@class="dataLabel"]//text()').extract())
                if "Configuration" in label :
                    try :
                        bedrooms = int(((data.xpath('div[@class="dataVal"]/span//text()').extract())[0].split())[0])
                    except :
                        pass
                    try :
                        other = (''.join(data.xpath('div[@class="dataVal"]/text()').extract())).split(",")
                        
                        for info in other:
                            info  = info.replace('\n',' ')
                            if "Bathroom" in info:
                                info = (info.strip().split())[0]
                                bathrooms = int(info)
                            if "Room" in info:
                                speciality['addional_room'] = info.strip()
                    except:
                        pass

                if "Transaction" in label:
                    try :
                        temp = ''.join(data.xpath('div[@class="dataVal"]//text()').extract()).strip()
                        if "Resale" in temp:
                            is_resale = 1
                    except :
                        pass

                if "Status" in label:
                    try :
                        status = (''.join(data.xpath('div[@class="dataVal"]//text()').extract())).replace('\n','')
                        if status  =='':
                            status = (''.join(data.xpath('li/div[@class="dataVal"]//text()').extract())).replace('\n','')
                    except :
                        pass

                if "Age" in label:
                    try :
                        age_of_property = (''.join(data.xpath('div[@class="dataVal"]//text()').extract())).replace('\n','')
                        if age_of_property=='' :
                            age_of_property = (''.join(data.xpath('li/div[@class="dataVal"]//text()').extract())).replace('\n','')
                    except :
                        pass

                if "Furnish" in label:
                    try :
                        speciality['furnishing'] = ''.join(data.xpath('div[@class="dataVal"]//text()').extract()).strip()
                    except :
                        pass

                if "Car Parking" in label:
                    try :
                        speciality['parking'] = ''.join(data.xpath('div[@class="dataVal"]//text()').extract()).strip()
                    except :
                        pass
            except:
                pass

        try:
            description = ''.join(response.xpath("//span[@class='dDetail']//text()").extract()).replace('\n','')
        except:
            pass

        if description == "":
            try :
                description = (''.join(response.xpath("//div[@class='nAboutBrf']//text()").extract())).replace('\n','').replace('\t','')
            except :
                pass

        try:
            posted_on_date = ((((response.xpath('//div[@class="propIDnPDate"]//text()').extract())[0].split('|'))[1]).split(':'))[1].strip().replace(',',' ')
            posted_on_date = posted_on_date.split()
            posted_on_date[0],posted_on_date[1] = posted_on_date[1],posted_on_date[0]
            posted_on_date[1] = month.find_month(posted_on_date[1])
            posted_on_date = ' '.join(posted_on_date)
        except:
            pass

        try:
            code = (((response.xpath('//div[@class="propIDnPDate"][1]//text()').extract())[0].split("|"))[0].split(":"))[1].strip()
            
        except:
            pass 

        try:
            agent_name = response.xpath('//div[@class="agntName"]//text()').extract()[-1] # Remove 'Contact'
            
        except:
            pass

        amenities = ','.join(response.xpath('//div[@id="normalAminities"]//li[not(@class="notAvail")]/span[@class="ameLabel"]/text()').extract())

        item['price'] = price
        item['price_per_sqft'] = price_per_sqft
        item['is_price_fix'] = is_price_fix
        item['address'] = address.encode('utf8')
        item['city'] = city.encode('utf8')
        item['location'] = location.encode('utf8')
        item['min_area'] = min_area
        item['max_area'] = max_area
        item['bathrooms'] = bathrooms
        item['bedrooms'] = bedrooms
        item['SuperBuiltupArea'] = SuperBuiltupArea
        item['age_of_property'] = age_of_property.encode('utf8')
        item['launch_date'] = launch_date.encode('utf8')
        item['posted_on']  = posted_on_date.encode('utf8')
        item['possession_status'] = status.encode('utf8')
        item['agent_name'] = agent_name.encode('utf8')
        item['agent_type'] = agent_type.encode('utf8')
        item['amenities'] = amenities.encode('utf8')
        item['speciality'] = speciality
        item['more_info'] = more_info
        item['is_resale'] = is_resale
        item['url'] = response.url
        item['code'] = code.encode('utf8')
        item['description'] = description.encode('utf8')


        yield item
Beispiel #6
0
    def parse_property_info(self, response):
        item = BuyItem()
        self.driver.get(response.url)
        input()
        try:
            WebDriverWait(self.driver, 10).until(
                EC.presence_of_element_located(
                    (By.XPATH, '//div[@class="npPrice"]//text()')))
        except TimeoutException:
            return
        response = TextResponse(url=self.driver.current_url,
                                body=self.driver.page_source,
                                encoding='utf-8')
        is_resale = price = bedrooms = bathrooms = price_per_sqft = 0
        is_price_fix = 1
        print "\n", response.url, "\n"
        try:
            full_price = ','.join(
                response.xpath('//div[@class="npPrice"]//text()').extract())
            print full_price
            print price
            price = float(full_price.split(',')[3])
            if 'Cr' in full_price:
                price *= 10000000
            if "Lac" in full_price:
                price *= 100000
        except:
            pass

        if price == 0:
            try:
                full_price = ' '.join(
                    response.xpath('//span[@id="pdPrice"]//text()').extract())
                print full_price
                print price
                price = float(full_price.split()[0])
                if 'Cr' in full_price:
                    price *= 10000000
                if "Lacs" in full_price:
                    price *= 100000
            except:
                pass
        print price
        input()

        try:
            price_per_sqft = float((response.xpath(
                '//div[@class="npBasePrice"]/span/text()').extract())[3])
        except:
            pass

        try:
            price_per_sqft = float((response.xpath(
                '//div[@id="pricePerUnitArea"]/text()').extract()).split()[1])
        except:
            pass

        city = address = location = ""
        try:
            address = (''.join(
                response.xpath('//div[@class="project-location"]/span//text()'
                               ).extract())).replace('\n', '')
            city = address.split(',')[-2]
            location = (
                response.xpath('//a[@class="ttlLink"]/text()').extract()[1])
        except:
            pass

        if address == "":
            try:
                address = (''.join(
                    response.xpath('//span[@id="address"]/text()').extract())
                           ).replace('\n', '')
                city = address.split(',')[-2]
                location = (response.xpath(
                    '//a[@class="ttlLink"]/text()').extract()[1])
            except:
                pass

        status = ""
        min_area = max_area = 0.0
        try:
            status = ''.join(
                response.xpath(
                    '//div[@class="npPossessionDate"]/text()').extract()[2])
        except:
            pass

        if status == "":
            try:
                status = ''.join(
                    response.xpath(
                        '//div[@class="pdDetailInfoOther"]/div[3]/span/text()'
                    ).extract())
            except:
                pass
        try:
            temp = ''.join(
                response.xpath(
                    '//div[@class="npAreaPrice"]/span[1]/text()').extract())
            temp = temp.split()
            temp = [float(i) for i in temp if i.isdigit()]
            try:
                min_area = temp[0]
                max_area = temp[1]
            except:
                max_area = min_area
        except:
            pass

        SuperBuiltupArea = 0.0
        try:
            SuperBuiltupArea = ' '.join(
                response.xpath(
                    '//div[@class="npPrjArea"]	/span//text()').extract())
            if "acres" in SuperBuiltupArea:
                SuperBuiltupArea = float(SuperBuiltupArea.split()[0]) * 43560
            else:
                SuperBuiltupArea = float(SuperBuiltupArea.split()[0])
        except:
            pass

        if min_area == 0.0:
            try:
                min_area = float(''.join(
                    response.xpath('//span[@id="superbuiltupArea_span"]/text()'
                                   ).extract()))
                max_area = min_area

            except:
                pass

            try:
                SuperBuiltupArea = ''.join(
                    response.xpath(
                        '//div[@id="socAreaOccupied"]/text()').extract())
                if "acres" in SuperBuiltupArea:
                    SuperBuiltupArea = float(
                        SuperBuiltupArea.split()[0]) * 43560
                else:
                    SuperBuiltupArea = float(SuperBuiltupArea.split()[0])
            except:
                pass

        launch_date = CarpetArea = posted_on = ''

        try:
            posted_on = (''.join(
                response.xpath(
                    '//span[@class="pdPropDate"]/text()').extract()).replace(
                        ',', '')).split()
            posted_on[0], posted_on[1] = posted_on[1], posted_on[0]
            posted_on[1] = find_month(posted_on[1])
            posted_on = ' '.join(posted_on)

        except:
            pass

        Description = amenities = age_of_property = ''
        speciality = {}

        try:
            Description = (''.join(
                response.xpath('//div[@id = "description"]//text()').extract())
                           ).replace('\n', '')
        except:
            pass

        try:
            amenities = ','.join(
                response.xpath(
                    '//div[@id="amenitiesSection"]/div/div[2]/div/div/div//text()'
                ).extract())
        except:
            pass

        if amenities == "":
            try:
                amenities = ','.join(
                    response.xpath(
                        '//div[@id="features"]/div/div//text()').extract())
            except:
                pass

        try:
            special = response.xpath(
                '//div[@class=" pdOtherFacts responsive"]/div')
            for spec in special:
                try:
                    header = ''.join(
                        special.xpath('span[1]//text()').extract())
                    text = ''.join(special.xpath('span[2]//text()').extract())
                    speciality[header] = text
                except:
                    pass

        except:
            pass

        agent_name = agent_type = ""
        try:
            agent_type = ''.join(
                response.xpath(
                    '//div[@id="QryFormPd"]//span[@class="dealerWidgetHeading"]//text()'
                ).extract())
            agent_type = agent_type.replace('Details', '')
            agent_name = (','.join(
                response.xpath(
                    '//div[@id="QryFormPd"]//div[@class="c2dInfo"]//text()').
                extract())).split()[0]
        except:
            pass

        if agent_name == "":
            try:
                agent_name = (' '.join(
                    response.xpath(
                        '//div[@id="QryFormPd"]//div[@class="c2dRunCaptionAbtDev "]//span[@class="spanBold"]//text()'
                    ).extract()))
                agent_name = agent_name.replace('About ', '')
            except:
                pass

        try:
            resale = response.xpath(
                '//span[@id="transactionType"]//text()').extract()
            if 'Resale' in resale:
                is_resale = 1
        except:
            pass

        try:
            bedrooms = int((''.join(
                response.xpath('//div[@id="bedRoomNum"]//text()').extract())
                            ).split()[0])
        except:
            pass

        try:
            bathrooms = int((''.join(
                response.xpath('//div[@id="bathroomNum"]//text()').extract())
                             ).split()[0])
        except:
            pass

        try:
            age_of_property = ''.join(
                response.xpath(
                    '//div[@id="agePossessionLbl"]//text()').extract())
        except:
            pass

        try:
            additional_rooms = ''.join(
                response.xpath(
                    '//div[@id="additionalRooms"]//text()').extract())
            amenities += (", " + additional_rooms)
        except:
            pass

        more_info = {}

        item['price'] = price
        item['price_per_sqft'] = price_per_sqft
        item['is_price_fix'] = is_price_fix
        item['address'] = address.encode('utf8')
        item['city'] = city.encode('utf8')
        item['location'] = location.encode('utf8')
        item['min_area'] = min_area
        item['max_area'] = max_area
        item['bathrooms'] = bathrooms
        item['bedrooms'] = bedrooms
        item['SuperBuiltupArea'] = SuperBuiltupArea
        item['age_of_property'] = age_of_property.encode('utf8')
        item['launch_date'] = launch_date.encode('utf8')
        item['possession_status'] = status.encode('utf8')
        item['agent_name'] = agent_name.encode('utf8')
        item['agent_type'] = agent_type.encode('utf8')
        item['amenities'] = amenities.encode('utf8')
        item['speciality'] = speciality
        item['more_info'] = more_info
        item['is_resale'] = is_resale
        item['url'] = response.url

        yield item
        input()
Beispiel #7
0
    def parse_property_info(self, response):
        item = BuyItem()

        is_resale = price = bedrooms = bathrooms = price_per_sqft = 0
        is_price_fix = 1
        try:
            price = 0
            price = int(''.join(
                response.xpath(
                    '//span[@class="price-info"]/@data-value').extract()))
        except:
            is_price_fixed = 0

        try:
            price_per_sqft = (''.join(
                response.xpath('//div[@class="pp-container"]/span/text()').
                extract()).split())[0]
            price_per_sqft = price_per_sqft.replace(',', '')
            price_per_sqft = int(price_per_sqft)
        except:
            pass

        city = address = location = ""
        try:
            address = (''.join(
                response.xpath('//div[@class="location-info"]//text()').
                extract())).replace('\n', '')
            city = address.split(',')[-1]
            location = ''.join(
                response.xpath(
                    '//a[@data-category="search"]/span/text()').extract()[5])
        except:
            pass

        status = ""
        min_area = max_area = 0.0
        try:
            info_container = response.xpath(
                '//div[@class="project-info-container"]/div')
            for info in info_container:
                try:
                    info_description = ''.join(
                        info.xpath('div[@class="info-description"]//text()').
                        extract())
                    temp = ''.join(
                        info.xpath(
                            'div[@class="info-value"]//text()').extract())
                    if "Possession" in info_description:
                        status = temp.replace('\n', '')

                    if ("Sizes" in info_description) or ("area"
                                                         in info_description):
                        temp = temp.split()
                        temp = [float(i) for i in temp if i.isdigit()]
                        try:
                            min_area = temp[0]
                            max_area = temp[1]
                        except:
                            max_area = min_area
                except:
                    pass
        except:
            pass

        launch_date = CarpetArea = posted_on = ''
        SuperBuiltupArea = 0.0
        try:
            overview = response.xpath(
                '//div[@id="overview-card"]//span[@class="entity"]')
            try:
                for over in overview:
                    label = ''.join(
                        over.xpath(
                            'span/span[@class="text"]//text()').extract())
                    temp = ''.join(
                        over.xpath(
                            'span/span[@class="value"]//text()').extract())
                    try:
                        if "Area" in label:
                            SuperBuiltupArea = float((temp.split())[0])
                            if "Acres" in temp:
                                SuperBuiltupArea = SuperBuiltupArea * 43560

                    except:
                        pass

                    try:
                        if "Launch" in label:
                            launch_date = ((temp.strip().replace(
                                '\n', '')).replace(',', '')).split()
                            launch_date[0] = find_month(launch_date[0])
                            launch_date = ' '.join(launch_date)

                    except:
                        pass
            except:
                pass
        except:
            pass

        Description = amenities = age_of_property = ''
        speciality = {}

        try:
            Description = ''.join(
                response.xpath('//p[@class="desc-para"]//text()').extract())
        except:
            pass

        try:
            amenities = ','.join(
                response.xpath(
                    '//span[@class="amenity-entity"]//span[@class="text"]//text()'
                ).extract())
        except:
            pass

        try:
            special = response.xpath('//div[@class="amenity-entity"]')
            for spec in special:
                try:
                    header = ''.join(
                        special.xpath(
                            'span[@class="header"]//text()').extract())
                    text = ''.join(
                        special.xpath(
                            'span[@class="texts"]//text()').extract())
                    speciality[header] = text
                except:
                    pass

        except:
            pass

        agent_name = agent_type = ""
        try:
            agent_name = ''.join(
                response.xpath('//*[@class="name"]//text()').extract())
            agent_type = ''.join(
                response.xpath(
                    '//div[@class="info"]/div[@class="type"]//text()').extract(
                    ))
        except:
            pass

        more_info = []
        try:
            information = response.xpath(
                '//div[@class="nsv-list-item-container"]/div')
            bhk = 0
            for info in information:
                try:
                    header = ''.join(info.xpath('//text()').extract())
                    if "BHK" in header:
                        bhk = int(''.join(
                            info.xpath(
                                'span/span//text()').extract()).split())[0]
                    else:
                        size = rate = ""
                        size = float(''.join(
                            info.xpath('div/div[@class="list-heading"]//text()'
                                       ).extract()).split())[0]
                        full_rate = ''.join(
                            info.xpath(
                                'div/div[@class="list-price"]//span/text()').
                            extract())
                        rate = float(full_rate.split())[0]
                        if 'Lacs' in full_rate:
                            rate *= 100000
                        if "Cr" in full_rate:
                            rate *= 10000000

                        more_info += [(bhk, size, rate)]
                except:
                    pass
        except:
            pass

        if "resale" in response.url:
            is_resale = 1
            try:
                location = address.split(',')[-2]
            except:
                pass

            try:
                price_per_sqft = (''.join(
                    response.xpath(
                        '//div[@class="emi-sub-container"]/span/text()').
                    extract()).split())[0]
                price_per_sqft = price_per_sqft.replace(',', '')
                price_per_sqft = int(price_per_sqft)
            except:
                pass
            try:
                overview = response.xpath(
                    '//div[@id="overview-card"]//span[@class="entity"]')
                try:
                    for over in overview:
                        label = ''.join(
                            over.xpath(
                                'span/span[@class="text"]//text()').extract())
                        temp = ''.join(
                            over.xpath(
                                'span/span[@class="value"]//text()').extract())

                        try:
                            if "Price" in label:
                                if "negotiable" in temp:
                                    is_price_fix = 0
                        except:
                            pass

                        try:
                            if "Added" in label:
                                launch_date = ((temp.replace(
                                    '\n', '')).replace(',', '')).split()
                                t1 = launch_date[0]
                                launch_date[0] = ''
                                for i in t1:
                                    if i.isdigit():
                                        launch_date[0] += i
                                launch_date[1] = find_month(launch_date[1])
                                launch_date = ' '.join(launch_date)

                        except:
                            pass

                        try:
                            if "Bedrooms" in label:
                                bedrooms = int(temp.split()[0])
                        except:
                            pass

                        try:
                            if "Bathrooms" in label:
                                bathrooms = int(temp.split()[0])
                        except:
                            pass
                except:
                    pass
            except:
                pass

            try:
                info_container = response.xpath(
                    '//div[@class="project-info-container"]/div')
                for info in info_container:
                    try:
                        info_description = ''.join(
                            info.xpath('div[@class="info-description"]//text()'
                                       ).extract())
                        temp = ''.join(
                            info.xpath(
                                'div[@class="info-value"]//text()').extract())
                        if "Age of property" in info_description:
                            age_of_property = temp.replace('\n', '')

                    except:
                        pass
            except:
                pass

        item['price'] = price
        item['price_per_sqft'] = price_per_sqft
        item['is_price_fix'] = is_price_fix
        item['address'] = address.encode('utf8')
        item['city'] = city.encode('utf8')
        item['location'] = location.encode('utf8')
        item['min_area'] = min_area
        item['max_area'] = max_area
        item['bathrooms'] = bathrooms
        item['bedrooms'] = bedrooms
        item['SuperBuiltupArea'] = SuperBuiltupArea
        item['age_of_property'] = age_of_property.encode('utf8')
        item['launch_date'] = launch_date.encode('utf8')
        item['possession_status'] = status.encode('utf8')
        item['agent_name'] = agent_name.encode('utf8')
        item['agent_type'] = agent_type.encode('utf8')
        item['amenities'] = amenities.encode('utf8')
        item['speciality'] = speciality
        item['more_info'] = more_info
        item['is_resale'] = is_resale
        item['url'] = response.url

        yield item
Beispiel #8
0
	def parse_property_info(self, response):
		item = BuyItem()
		
		is_resale = price = bedrooms = bathrooms = price_per_sqft = 0
		is_price_fix = 1
		try :
			price = 0
			price = int(''.join(response.xpath('//span[@class="price-info"]/@data-value').extract()))
		except :
			is_price_fixed = 0

		try :
			price_per_sqft = (''.join(response.xpath('//div[@class="pp-container"]/span/text()').extract()).split())[0]
			price_per_sqft = price_per_sqft.replace(',','')
			price_per_sqft = int(price_per_sqft)
		except :
			pass


		city  = address = location = ""
		try :
			address = (''.join(response.xpath('//div[@class="location-info"]//text()').extract())).replace('\n','')
			city = address.split(',')[-1]
			location = ''.join(response.xpath('//a[@data-category="search"]/span/text()').extract()[5])		
		except :
			pass

		status =  ""
		min_area = max_area = 0.0
		try :
			info_container = response.xpath('//div[@class="project-info-container"]/div')
			for info in info_container:
				try :
					info_description = ''.join(info.xpath('div[@class="info-description"]//text()').extract())
					temp = ''.join(info.xpath('div[@class="info-value"]//text()').extract())
					if "Possession" in info_description :
						status = temp.replace('\n','')

					if ("Sizes" in info_description) or ("area" in info_description):
						temp = temp.split()
						temp = [float(i) for i in temp if i.isdigit()]
						try :
							min_area = temp[0]
							max_area = temp[1]
						except :
							max_area = min_area
				except :
					pass
		except :
			pass


		launch_date = CarpetArea  = posted_on = '' 
		SuperBuiltupArea = 0.0
		try :
			overview = response.xpath('//div[@id="overview-card"]//span[@class="entity"]')
			try :
				for over in overview :
					label = ''.join(over.xpath('span/span[@class="text"]//text()').extract())
					temp = ''.join(over.xpath('span/span[@class="value"]//text()').extract())
					try :
						if "Area" in label :
							SuperBuiltupArea = float((temp.split())[0])
							if "Acres" in temp:
								SuperBuiltupArea = SuperBuiltupArea*43560

					except :
						pass

					try :
						if "Launch" in label:
							launch_date = ((temp.strip().replace('\n','')).replace(',','')).split()
							launch_date[0] = find_month(launch_date[0])
							launch_date = ' '.join(launch_date)
							
					except :
						pass
			except :
				pass
		except :
			pass


		Description =amenities  = age_of_property = ''
		speciality = {}

		try :
			Description = ''.join(response.xpath('//p[@class="desc-para"]//text()').extract())
		except:
			pass
		
		try :
			amenities  = ','.join(response.xpath('//span[@class="amenity-entity"]//span[@class="text"]//text()').extract())
		except:
			pass

		try :
			special  = response.xpath('//div[@class="amenity-entity"]')
			for spec in special :
				try :
					header = ''.join(special.xpath('span[@class="header"]//text()').extract())
					text = ''.join(special.xpath('span[@class="texts"]//text()').extract())
					speciality[header] = text
				except:
					pass

		except :
			pass


		agent_name = agent_type =""
		try :
			agent_name = ''.join(response.xpath('//*[@class="name"]//text()').extract())
			agent_type = ''.join(response.xpath('//div[@class="info"]/div[@class="type"]//text()').extract())
		except :
			pass

		more_info = []
		try :
			information = response.xpath('//div[@class="nsv-list-item-container"]/div')
			bhk = 0
			for info in information :
				try :
					header = ''.join(info.xpath('//text()').extract())
					if "BHK" in header:
						bhk = int(''.join(info.xpath('span/span//text()').extract()).split())[0]
					else :
						size = rate = ""
						size = float(''.join(info.xpath('div/div[@class="list-heading"]//text()').extract()).split())[0]
						full_rate = ''.join(info.xpath('div/div[@class="list-price"]//span/text()').extract())
						rate = float(full_rate.split())[0]
						if 'Lacs' in full_rate:
							rate *= 100000
						if "Cr" in full_rate:
							rate *= 10000000

						more_info += [(bhk,size,rate)]
				except:
					pass
		except :
			pass


		if "resale" in response.url :
			is_resale = 1
			try :
				location = address.split(',')[-2]
			except:
				pass

			try :
				price_per_sqft = (''.join(response.xpath('//div[@class="emi-sub-container"]/span/text()').extract()).split())[0]
				price_per_sqft = price_per_sqft.replace(',','')
				price_per_sqft = int(price_per_sqft)
			except :
				pass
			try :
				overview = response.xpath('//div[@id="overview-card"]//span[@class="entity"]')
				try :
					for over in overview :
						label = ''.join(over.xpath('span/span[@class="text"]//text()').extract())
						temp = ''.join(over.xpath('span/span[@class="value"]//text()').extract())
						
						try :
							if "Price" in label :
								if "negotiable" in temp :
									is_price_fix = 0
						except :
							pass

						try :
							if "Added" in label:
								launch_date = ((temp.replace('\n','')).replace(',','')).split()
								t1 = launch_date[0]
								launch_date[0] = ''
								for i in t1:
									if i.isdigit():
										launch_date[0] += i
								launch_date[1] = find_month(launch_date[1])
								launch_date = ' '.join(launch_date)

								
						except :
							pass

						try :
							if "Bedrooms" in label:
								bedrooms = int(temp.split()[0])
						except :
							pass

						try :
							if "Bathrooms" in label:
								bathrooms = int(temp.split()[0])
						except :
							pass
				except :
					pass
			except :
				pass

			try :
				info_container = response.xpath('//div[@class="project-info-container"]/div')
				for info in info_container:
					try :
						info_description = ''.join(info.xpath('div[@class="info-description"]//text()').extract())
						temp = ''.join(info.xpath('div[@class="info-value"]//text()').extract())
						if "Age of property" in info_description :
							age_of_property = temp.replace('\n','')

					except :
						pass
			except :
				pass


		item['price'] = price
		item['price_per_sqft'] = price_per_sqft
		item['is_price_fix'] = is_price_fix
		item['address'] = address.encode('utf8')
		item['city'] = city.encode('utf8')
		item['location'] = location.encode('utf8')
		item['min_area'] = min_area
		item['max_area'] = max_area
		item['bathrooms'] = bathrooms
		item['bedrooms'] = bedrooms
		item['SuperBuiltupArea'] = SuperBuiltupArea
		item['age_of_property'] = age_of_property.encode('utf8')
		item['launch_date'] = launch_date.encode('utf8')
		item['possession_status'] = status.encode('utf8')
		item['agent_name'] = agent_name.encode('utf8')
		item['agent_type'] = agent_type.encode('utf8')
		item['amenities'] = amenities.encode('utf8')
		item['speciality'] = speciality
		item['more_info'] = more_info
		item['is_resale'] = is_resale
		item['url'] = response.url


		yield item