def all_listings_page(self, page_no): """Search result details for a single page""" soup = self.soup if page_no == 1 else self._get_soup(page_no) listings = soup.find_all(name='li', attrs={'class': 'srp clearfix '}) + \ soup.find_all(name='li', attrs={'class': 'srp clearfix premium-listing premium-listing--branded '}) df = pd.DataFrame() df['listing_id'] = [i['data-listing-id'] for i in listings] df['listing_price'] = [currency_to_num( i.find(name='a', attrs={'class': 'listing-results-price text-price'}).text.strip().split('\n')[0]) for i in listings] df['price_modifier'] = [text_or_none(i.find(name='span', attrs={'class': 'price-modifier'})) for i in listings] df['address'] = [i.find(name='a', attrs={'class': 'listing-results-address'}).text for i in listings] df['summary'] = [i.find(name='a', attrs={'style': 'text-decoration:underline;'}).text for i in listings] df['num_beds'] = [text_or_none(i.find(name='span', attrs={'class': 'num-icon num-beds'}), data_type=str) for i in listings] df['num_baths'] = [text_or_none(i.find(name='span', attrs={'class': 'num-icon num-baths'}), data_type=str) for i in listings] df['num_receptions'] = [text_or_none( i.find(name='span', attrs={'class': 'num-icon num-reception'}), data_type=str) for i in listings] df['description'] = [i.find(name='p').text.strip() for i in listings] df['listing_date'] = [to_datetime( i.find_all(name='small')[-1].text.split('Listed on \n')[1].split('\n')[0].strip()) for i in listings] df['estate_agent'] = [i.find(name='p', attrs={'class': 'top-half listing-results-marketed'}).span.text for i in listings] df['station1'] = [get_station_name(i.find_all('li', attrs={'class': 'clearfix'}), 0) for i in listings] df['distance1'] = [dist_to_num(i.find_all('li', attrs={'class': 'clearfix'})[0]) for i in listings] df['station2'] = [get_station_name(i.find_all('li', attrs={'class': 'clearfix'}), 1) for i in listings] df['distance2'] = [dist_to_num(i.find_all('li', attrs={'class': 'clearfix'})[1]) for i in listings] df['date_generated'] = datetime.now() return df
def sales_history(self, dataframe=False): # historical listings available at https://www.zoopla.co.uk/property-history/{listing_id} history = { 'date': [ i.text for i in self.soup.find_all( name='span', attrs={'class': 'pdp-history__date'}) ], 'status': [ i.text for i in self.soup.find_all( name='span', attrs={'class': 'pdp-history__status'}) ], 'price': [ currency_to_num(i.text.replace('View listing', '')) for i in self.soup.find_all( name='span', attrs={'class': 'pdp-history__price'}) ], 'listing_id': [] } for listing in self.soup.find_all( name='span', attrs={'class': 'pdp-history__price'}): try: history['listing_id'].append(listing.a['href'].split('/')[-1]) except TypeError: history['listing_id'].append(np.nan) return pd.DataFrame(history) if dataframe else history
def _price_history(self): dates = [i.text for i in self.soup.find_all(name='span', attrs={'class': 'dp-price-history__item-date'})] prices = [currency_to_num(i.text) for i in self.soup.find_all(name='span', attrs={'class': 'dp-price-history__item-price'})] details = [i.text.strip() for i in self.soup.find_all(name='span', attrs={'class': 'dp-price-history__item-detail'})] return {'date': dates, 'price': prices, 'detail': details}
def property_value(self): values = [ currency_to_num(i.text) for i in self.soup.find_all(name='p', attrs={'class': 'pdp-estimate__price'}) ] ranges = [ currency_to_num(i) for i in list( chain(*[ i.text[7:].split(' - ') for i in self.soup.find_all( name='p', attrs={'class': 'pdp-estimate__range'}) ])) ] try: conf = float( self.soup.find(name='span', attrs={ 'class': 'pdp-confidence-rating__copy' }).text.strip().split('%')[0]) except AttributeError: conf = np.nan if len(values) == 1: values.append(np.nan) ranges.extend([np.nan, np.nan]) elif not values: values.extend([np.nan, np.nan]) ranges.extend([np.nan, np.nan, np.nan, np.nan]) return { 'buy': { 'value': values[0], 'lower_bound': ranges[0], 'upper_bound': ranges[1] }, 'rent': { 'value': values[1], 'lower_bound': ranges[2], 'upper_bound': ranges[3] }, 'confidence': conf }
def value_change(self): period = [ i.text for i in self.soup.find_all( name='span', attrs={'class': 'pdp-value-change__label'}) ] changes = [ currency_to_num(i.text) for i in self.soup.find_all( name='span', attrs={'class': 'pdp-value-change__value'}) ] diffs = [ float(i.text.replace('%', '')) for i in self.soup.find_all( name='span', attrs={'class': 'pdp-value-change__difference'}) ] df = pd.DataFrame(list(zip(period, changes, diffs)), columns=['period', 'value_change', 'perc_change']) df['value'] = self.property_value()['buy']['value'] / ( 1 + df.perc_change / 100) df['value'] = df.value.apply(myround) df = df[['period', 'value', 'value_change', 'perc_change']] return df