Esempio n. 1
0
    def __get_house_details(self, url):

        try:

            print('House URL: ' + url)
            data = self.fetchData(url)
            if not data:
                print('No data grabbed.')
                return

            soup = BeautifulSoup(data, 'lxml')
            if not soup:
                print('Invalid bs4 object.')
                return
            house = House()

            house.Url = url

            state = soup.select_one('#region-state a')
            if state != None:
                house.State = state.text.strip()

            city = soup.select_one('#region-city a')
            if city != None:
                house.City = city.text.strip()

            info = soup.select_one(
                'ol.zsg-breadcrumbs')  #address id need to be tested
            if info != None:
                if len(info) >= 5:
                    address = info.select('li')[4]
                    children = address.findChildren()
                    for child in children:
                        house.PropertyAddress = address.text.strip()
                        break

            price = soup.select_one(
                '.main-row.home-summary-row span')  #price id need to be tested
            if price != None:
                house.Price = re.sub('[^0-9]', '', price.text.strip())

            companyName = soup.select_one(
                '.snl.company-name')  #companyName id need to be tested
            if companyName != None:
                house.CompanyName = companyName.text.strip()

            agentName = soup.select_one(
                '.profile-name-link')  #agentName id need to be tested
            if agentName != None:
                house.AgentName = agentName.text.strip()
                agentProfileLink = self._base_url + agentName['href']
                house.AgentProfile = agentProfileLink
                #need deeper level to get licence number keep in mind

            phoneNumber = soup.select_one(
                '.snl.phone')  #phoneNumber id need to be tested
            if phoneNumber != None:
                house.PhoneNumber = phoneNumber.text.strip()

            facts = soup.select('ul.zsg-sm-1-1.hdp-fact-list li'
                                )  #mlsNumber id need to be tested
            try:
                if facts != None:
                    for li in facts:
                        factName = ''
                        factValue = ''
                        first_span = li.select_one('.hdp-fact-name')
                        second_span = li.select_one('.hdp-fact-value')
                        if first_span != None:
                            factName = first_span.text.strip()
                        if second_span != None:
                            factValue = second_span.text.strip()

                        if factName == 'MLS #:':
                            house.MLSNumber = factValue
                        elif factName == 'Days on Zillow:':
                            days = factValue
                            days_to_subtract = int(re.sub('[^0-9]', '', days))
                            published_date = datetime.today() - timedelta(
                                days=days_to_subtract)
                            house.Date = '{0.month}/{0.day}/{0.year}'.format(
                                published_date)
            except Exception:
                print('Error when getting house details')
            self._write_house(house.getHouseString())

            self._write_on_mf(url)

        except Exception:
            print('Error when getting house details')
            return False