Esempio n. 1
0
    def all_listings_page(self, page_no):
        """Search result details for a single page"""

        soup = self.soup if page_no == 1 else self._get_soup(page_no)
        listings = soup.find_all(name='li', attrs={'class': 'srp clearfix '}) + \
            soup.find_all(name='li', attrs={'class': 'srp clearfix premium-listing premium-listing--branded '})

        df = pd.DataFrame()
        df['listing_id'] = [i['data-listing-id'] for i in listings]
        df['listing_price'] = [currency_to_num(
            i.find(name='a',
                   attrs={'class': 'listing-results-price text-price'}).text.strip().split('\n')[0]) for i in listings]
        df['price_modifier'] = [text_or_none(i.find(name='span', attrs={'class': 'price-modifier'})) for i in listings]
        df['address'] = [i.find(name='a', attrs={'class': 'listing-results-address'}).text for i in listings]
        df['summary'] = [i.find(name='a', attrs={'style': 'text-decoration:underline;'}).text for i in listings]
        df['num_beds'] = [text_or_none(i.find(name='span', attrs={'class': 'num-icon num-beds'}), data_type=str)
                          for i in listings]
        df['num_baths'] = [text_or_none(i.find(name='span', attrs={'class': 'num-icon num-baths'}), data_type=str)
                           for i in listings]
        df['num_receptions'] = [text_or_none(
            i.find(name='span', attrs={'class': 'num-icon num-reception'}), data_type=str) for i in listings]
        df['description'] = [i.find(name='p').text.strip() for i in listings]
        df['listing_date'] = [to_datetime(
            i.find_all(name='small')[-1].text.split('Listed on \n')[1].split('\n')[0].strip()) for i in listings]
        df['estate_agent'] = [i.find(name='p', attrs={'class': 'top-half listing-results-marketed'}).span.text
                              for i in listings]
        df['station1'] = [get_station_name(i.find_all('li', attrs={'class': 'clearfix'}), 0) for i in listings]
        df['distance1'] = [dist_to_num(i.find_all('li', attrs={'class': 'clearfix'})[0]) for i in listings]
        df['station2'] = [get_station_name(i.find_all('li', attrs={'class': 'clearfix'}), 1) for i in listings]
        df['distance2'] = [dist_to_num(i.find_all('li', attrs={'class': 'clearfix'})[1]) for i in listings]
        df['date_generated'] = datetime.now()

        return df
Esempio n. 2
0
    def sales_history(self, dataframe=False):
        # historical listings available at https://www.zoopla.co.uk/property-history/{listing_id}
        history = {
            'date': [
                i.text for i in self.soup.find_all(
                    name='span', attrs={'class': 'pdp-history__date'})
            ],
            'status': [
                i.text for i in self.soup.find_all(
                    name='span', attrs={'class': 'pdp-history__status'})
            ],
            'price': [
                currency_to_num(i.text.replace('View listing', ''))
                for i in self.soup.find_all(
                    name='span', attrs={'class': 'pdp-history__price'})
            ],
            'listing_id': []
        }

        for listing in self.soup.find_all(
                name='span', attrs={'class': 'pdp-history__price'}):
            try:
                history['listing_id'].append(listing.a['href'].split('/')[-1])
            except TypeError:
                history['listing_id'].append(np.nan)

        return pd.DataFrame(history) if dataframe else history
Esempio n. 3
0
    def _price_history(self):
        dates = [i.text for i in self.soup.find_all(name='span', attrs={'class': 'dp-price-history__item-date'})]
        prices = [currency_to_num(i.text) for i in
                  self.soup.find_all(name='span', attrs={'class': 'dp-price-history__item-price'})]
        details = [i.text.strip() for i in
                   self.soup.find_all(name='span', attrs={'class': 'dp-price-history__item-detail'})]

        return {'date': dates, 'price': prices, 'detail': details}
Esempio n. 4
0
    def property_value(self):
        values = [
            currency_to_num(i.text)
            for i in self.soup.find_all(name='p',
                                        attrs={'class': 'pdp-estimate__price'})
        ]
        ranges = [
            currency_to_num(i) for i in list(
                chain(*[
                    i.text[7:].split(' - ') for i in self.soup.find_all(
                        name='p', attrs={'class': 'pdp-estimate__range'})
                ]))
        ]

        try:
            conf = float(
                self.soup.find(name='span',
                               attrs={
                                   'class': 'pdp-confidence-rating__copy'
                               }).text.strip().split('%')[0])
        except AttributeError:
            conf = np.nan

        if len(values) == 1:
            values.append(np.nan)
            ranges.extend([np.nan, np.nan])
        elif not values:
            values.extend([np.nan, np.nan])
            ranges.extend([np.nan, np.nan, np.nan, np.nan])

        return {
            'buy': {
                'value': values[0],
                'lower_bound': ranges[0],
                'upper_bound': ranges[1]
            },
            'rent': {
                'value': values[1],
                'lower_bound': ranges[2],
                'upper_bound': ranges[3]
            },
            'confidence': conf
        }
Esempio n. 5
0
    def value_change(self):
        period = [
            i.text for i in self.soup.find_all(
                name='span', attrs={'class': 'pdp-value-change__label'})
        ]
        changes = [
            currency_to_num(i.text) for i in self.soup.find_all(
                name='span', attrs={'class': 'pdp-value-change__value'})
        ]
        diffs = [
            float(i.text.replace('%', '')) for i in self.soup.find_all(
                name='span', attrs={'class': 'pdp-value-change__difference'})
        ]

        df = pd.DataFrame(list(zip(period, changes, diffs)),
                          columns=['period', 'value_change', 'perc_change'])
        df['value'] = self.property_value()['buy']['value'] / (
            1 + df.perc_change / 100)
        df['value'] = df.value.apply(myround)
        df = df[['period', 'value', 'value_change', 'perc_change']]

        return df