def housing_ownership_information(self): """ Retrieve and parse housing ownership history information from Finn.no search to dict Returns ------- out : dict """ LOGGER.info( "trying to retrieve 'housing_ownership_information' for -> '{}'".format(self.finn_code)) history_headers = None history_results = [] keys = [] values = [] try: owner_soup = BeautifulSoup(self.ownership_response().content, "lxml") for geo_val in owner_soup.find_all("dl", attrs={"class": "definition-list u-mb32"}): for i, val in enumerate(geo_val.text.split("\n")): if i % 2 != 0 and val: keys.append(val.strip().lower().replace("å", "a")) elif val: values.append(val.strip()) for table_row in owner_soup.find( "table", attrs={"class": "data-table u-mb32"}).find_all("tr"): if not history_headers: history_headers = [head.text for head in table_row.find_all("th")] row = [tab_row.text.strip().replace(",-", " kr") for tab_row in table_row.find_all("td") if tab_row.text.strip()] if row: history_results.append(row) info = dict(zip(keys, values)) historic_df = DataFrame(history_results, columns=history_headers) info.update( {"historikk": historic_df.assign(Pris=historic_df.iloc[:, -1].str.replace( ",", " kr").str.replace("\u2212", "")).to_dict()}) LOGGER.success("'housing_ownership_information' successfully retrieved") return info except AttributeError as no_ownership_history_exception: LOGGER.debug("[{}] No ownership history found!, exited with '{}'".format( self.__class__.__name__, no_ownership_history_exception))
def housing_stat_information(self): """ Retrieve and parse housing ad information from Finn.no search to dict Returns ------- out : dict """ LOGGER.info( "trying to retrieve 'housing_stat_information' for -> '{}'".format( self.finn_code)) response = asyncio.run(self.stat_response()) try: info = {} stat_soup = BeautifulSoup(response, "lxml") # with open('content.html', 'w', encoding='utf-8') as file: # file.write(stat_soup.prettify()) stat_data = json.loads( stat_soup.find("script", attrs={ "type": "application/json" }).contents[0]) info.update(self.extract_sqm_price(stat_data, info)) info.update(self.extract_images(stat_data, info)) info.update(self.extract_view_statistics(stat_data, info)) info.update(self.extract_published_statistics(stat_data, info)) info.update(self.extract_area_sales_statistics(stat_data, info)) info.update(self.calculate_sqm_price_areas(info)) # with open('stat_data.json', 'w', encoding='utf-8') as file: # json.dump(info, file, ensure_ascii=False, indent=4) LOGGER.success("'housing_stat_information' successfully retrieved") return info except Exception as no_ownership_history_exception: LOGGER.debug( "[{}] No housing statistics found!, exited with '{}'".format( self.__class__.__name__, no_ownership_history_exception))
def community_stat_information(self): """ Retrieve and parse housing ad information from Finn.no search to dict Returns ------- out : dict """ LOGGER.info( "trying to retrieve 'community_stat_information' for -> '{}'".format(self.finn_code)) response = self.community_stat_response() info = {} try: community_stat_soup = BeautifulSoup(response.content, "lxml") # with open('content.html', 'w', encoding='utf-8') as file: # file.write(community_stat_soup.prettify()) nabolag_soup = json.loads( community_stat_soup.find("script", attrs={"id": "__NEXT_DATA__"}).contents[0]) nabolag = nabolag_soup["props"]["initialState"]["nabolag"]["data"] if not nabolag: raise AttributeError("empty community data") info.update({"nabolag": nabolag}) # with open('community_data.json', 'w', encoding='utf-8') as file: # json.dump(info, file, ensure_ascii=False, indent=4) LOGGER.success("'community_stat_information' successfully retrieved") return info except AttributeError as no_community_statistics_exception: LOGGER.debug("[{}] No community statistics found!, exited with '{}'".format( self.__class__.__name__, no_community_statistics_exception))