Exemple #1
0
    def housing_ownership_information(self):
        """
        Retrieve and parse housing ownership history information from Finn.no search to dict

        Returns
        -------
        out     : dict


        """
        LOGGER.info(
            "trying to retrieve 'housing_ownership_information' for -> '{}'".format(self.finn_code))

        history_headers = None
        history_results = []
        keys = []
        values = []
        try:
            owner_soup = BeautifulSoup(self.ownership_response().content, "lxml")
            for geo_val in owner_soup.find_all("dl", attrs={"class": "definition-list u-mb32"}):
                for i, val in enumerate(geo_val.text.split("\n")):
                    if i % 2 != 0 and val:
                        keys.append(val.strip().lower().replace("å", "a"))
                    elif val:
                        values.append(val.strip())

            for table_row in owner_soup.find(
                    "table", attrs={"class": "data-table u-mb32"}).find_all("tr"):
                if not history_headers:
                    history_headers = [head.text for head in table_row.find_all("th")]
                row = [tab_row.text.strip().replace(",-", " kr") for tab_row in
                       table_row.find_all("td") if tab_row.text.strip()]
                if row:
                    history_results.append(row)

            info = dict(zip(keys, values))
            historic_df = DataFrame(history_results, columns=history_headers)
            info.update(
                {"historikk": historic_df.assign(Pris=historic_df.iloc[:, -1].str.replace(
                    ",", " kr").str.replace("\u2212", "")).to_dict()})

            LOGGER.success("'housing_ownership_information' successfully retrieved")
            return info
        except AttributeError as no_ownership_history_exception:
            LOGGER.debug("[{}] No ownership history found!, exited with '{}'".format(
                self.__class__.__name__, no_ownership_history_exception))
Exemple #2
0
    def housing_stat_information(self):
        """
        Retrieve and parse housing ad information from Finn.no search to dict

        Returns
        -------
        out     : dict

        """
        LOGGER.info(
            "trying to retrieve 'housing_stat_information' for -> '{}'".format(
                self.finn_code))
        response = asyncio.run(self.stat_response())
        try:
            info = {}
            stat_soup = BeautifulSoup(response, "lxml")

            # with open('content.html', 'w', encoding='utf-8') as file:
            #     file.write(stat_soup.prettify())

            stat_data = json.loads(
                stat_soup.find("script", attrs={
                    "type": "application/json"
                }).contents[0])

            info.update(self.extract_sqm_price(stat_data, info))
            info.update(self.extract_images(stat_data, info))
            info.update(self.extract_view_statistics(stat_data, info))
            info.update(self.extract_published_statistics(stat_data, info))
            info.update(self.extract_area_sales_statistics(stat_data, info))
            info.update(self.calculate_sqm_price_areas(info))

            # with open('stat_data.json', 'w', encoding='utf-8') as file:
            #     json.dump(info, file, ensure_ascii=False, indent=4)

            LOGGER.success("'housing_stat_information' successfully retrieved")

            return info
        except Exception as no_ownership_history_exception:
            LOGGER.debug(
                "[{}] No housing statistics found!, exited with '{}'".format(
                    self.__class__.__name__, no_ownership_history_exception))
Exemple #3
0
    def community_stat_information(self):
        """
        Retrieve and parse housing ad information from Finn.no search to dict

        Returns
        -------
        out     : dict

        """
        LOGGER.info(
            "trying to retrieve 'community_stat_information' for -> '{}'".format(self.finn_code))
        response = self.community_stat_response()
        info = {}
        try:
            community_stat_soup = BeautifulSoup(response.content, "lxml")

            # with open('content.html', 'w', encoding='utf-8') as file:
            #     file.write(community_stat_soup.prettify())

            nabolag_soup = json.loads(
                community_stat_soup.find("script", attrs={"id": "__NEXT_DATA__"}).contents[0])

            nabolag = nabolag_soup["props"]["initialState"]["nabolag"]["data"]
            if not nabolag:
                raise AttributeError("empty community data")

            info.update({"nabolag": nabolag})

            # with open('community_data.json', 'w', encoding='utf-8') as file:
            #     json.dump(info, file, ensure_ascii=False, indent=4)

            LOGGER.success("'community_stat_information' successfully retrieved")
            return info

        except AttributeError as no_community_statistics_exception:
            LOGGER.debug("[{}] No community statistics found!, exited with '{}'".format(
                self.__class__.__name__, no_community_statistics_exception))