def ssb_interest_rates(self): """ gets the interest information from SSB table nr. 10748 Returns ------- out : dict interest rate information from SSB """ try: LOGGER.info("trying to retrieve '{}'".format( self.ssb_interest_rates.__name__)) response = self.response().json() keys = response["dimension"]["Rentebinding"]["category"][ "label"].values() values = response["value"] LOGGER.success("'{}' successfully retrieved".format( self.ssb_interest_rates.__name__)) return { key.lower(): str(val) for key, val in dict(zip(keys, values)).items() } except Exception as ssb_interest_rates_exception: LOGGER.exception(ssb_interest_rates_exception) raise ssb_interest_rates_exception
def ownership_response(self): """ Response from Finn-no housing ownership history search Returns ------- our : requests.models.Response response with housing ownership information """ try: try: start = time() owner_response = requests.get(FINN_OWNER_URL + "{}".format(self.finn_code), timeout=TIMEOUT) owner_status_code = owner_response.status_code elapsed = self.elapsed_time(start) LOGGER.info( "HTTP status code -> OWNERSHIP HISTORY: [{}: {}] -> elapsed: {}".format( owner_status_code, responses[owner_status_code], elapsed)) return owner_response except ConnectTimeout as finn_owner_timeout_error: raise TimeOutError( "Timeout occurred - please try again later or " "contact system administrator, exited with '{}'".format( finn_owner_timeout_error)) except ConnectError as finn_owner_response_error: raise NoConnectionError( "Failed HTTP request - please insure that internet access is provided to the " "client or contact system administrator, exited with '{}'".format( finn_owner_response_error))
def community_stat_response(self): """ Response from Finn-no housing statistics search Returns ------- our : requests.models.Response response with housing statistics information """ try: try: start = time() community_stat_response = requests.get( FINN_COMMUNITY_URL + "{}".format(self.finn_code), timeout=TIMEOUT) stat_status_code = community_stat_response.status_code elapsed = self.elapsed_time(start) LOGGER.info( "HTTP status code -> COMMUNITY: [{}: {}] -> elapsed: {}".format( stat_status_code, responses[stat_status_code], elapsed)) return community_stat_response except ConnectTimeout as finn_community_stat_timeout_error: raise TimeOutError( "Timeout occurred - please try again later or contact system " "administrator, exited with '{}'".format(finn_community_stat_timeout_error)) except ConnectError as finn_community_stat_response_error: raise NoConnectionError( "Failed HTTP request - please insure that internet access is provided to the " "client or contact system administrator, exited with '{}'".format( finn_community_stat_response_error))
def format_number(self): """ formatting of phone number according to norwegian standard Returns ------- out : str formatted phone number References ------- https://begrep.difi.no/Felles/mobiltelefonnummer """ try: prefix, number = "+47", self.remove_prefix(self.number) valid_number = re.compile("^\\+?[- 0-9]{8,20}$").search(number) if valid_number: phone_number = prefix + " " + " ".join( [number[i:i + 2] for i in range(0, len(number), 2)]) LOGGER.info("format number '{}' to -> '{}'".format( number, phone_number)) return phone_number raise NotPossibleError( "'{}' is an invalid phone number".format(number)) except Exception as format_number_error: LOGGER.exception(format_number_error) raise format_number_error
def ad_response(self): """ Response from Finn-no ad housing search Returns ------- our : requests.models.Response response with housing ad information """ try: try: start = time() ad_response = requests.get(FINN_AD_URL + "{}".format(self.finn_code), timeout=TIMEOUT) ad_status_code = ad_response.status_code if ad_status_code == 404: ad_response = requests.get( FINN_AD_URL.replace('homes', 'newbuildings') + "{}".format(self.finn_code), timeout=TIMEOUT) ad_status_code = ad_response.status_code elapsed = self.elapsed_time(start) LOGGER.info( "HTTP status code -> ADVERT: [{}: {}] -> elapsed: {}".format( ad_status_code, responses[ad_status_code], elapsed)) return ad_response except ConnectTimeout as finn_ad_timeout_error: raise TimeOutError( "Timeout occurred - please try again later or contact system " "administrator, exited with '{}'".format(finn_ad_timeout_error)) except ConnectError as finn_ad_response_error: raise NoConnectionError( "Failed HTTP request - please insure that internet access is provided to the " "client or contact system administrator, exited with '{}'".format( finn_ad_response_error))
def __init__(self, utlanstype: list = None, sektor: list = None, rentebinding: list = None, tid: list = None): """ Constructor / Instantiate the class Parameters ---------- utlanstype : list type of loan, default ["70"] sektor : list sektor, default is ["04b"] rentebinding : list type of interest rate, default is ["08", "12", "10", "11", "06"] tid : list time frame """ try: LOGGER.info("trying to create '{}'".format( self.__class__.__name__)) Assertor.assert_data_types([utlanstype, sektor, rentebinding, tid], [(type(None), list) for _ in range(3)]) self.utlanstype = ["70"] if not utlanstype else utlanstype self.sektor = ["04b"] if not sektor else sektor self.rentebinding = ["08", "12", "10", "11", "06" ] if not rentebinding else rentebinding self.tid = self._updated_table_date() if not tid else tid LOGGER.success("created {}".format(self.__class__.__name__)) except Exception as ssb_payload_exception: LOGGER.exception(ssb_payload_exception) raise ssb_payload_exception
def sifo_base_expenses(self, include_id: bool = False): """ get SIFO base expenses given the family information Returns ------- out : dict dictionary with SIFO expenses """ LOGGER.info("trying to retrieve '{}'".format( self.sifo_base_expenses.__name__)) response_json = self.response().json()["utgifter"] sifo_expenses = {} sifo_expenses.update(response_json['individspesifikke']) sifo_expenses.update({'sumindivid': response_json['sumindivid']}) sifo_expenses.update(response_json['husholdsspesifikke']) sifo_expenses.update( {'sumhusholdning': response_json['sumhusholdning']}) sifo_expenses.update({'totalt': response_json['totalt']}) sifo_expenses = {key: str(val) for key, val in sifo_expenses.items()} if include_id: sifo_expenses.update({'_id': self.family.id_}) # import json # with open('sifo_data.json', 'w', encoding='utf-8') as file: # json.dump(sifo_expenses, file, ensure_ascii=False, indent=4) LOGGER.success("'{}' successfully retrieved".format( self.sifo_base_expenses.__name__)) return sifo_expenses
def update(self, db_name: str, col_name: str, query: dict, new_value: dict): """ method for updating document(s) in a collection Parameters ---------- db_name : str db name to look for collection col_name : str collection name to apply update query : dict document to query new_value : dict new values to apply in document """ try: LOGGER.info("trying to '{}' document '{}' with value '{}'".format( self.update.__name__, query, new_value)) Assertor.assert_data_types([db_name, col_name], [str, str]) collection = getattr(self._client, db_name.lower())[col_name.lower()] collection.update_many(query, new_value) LOGGER.success("'{}' successfully completed".format( self.update.__name__)) except Exception as exp: LOGGER.exception(exp) raise exp
def response(self): """ Submits and gets response for posten request Returns ------- out : mechanize._response.response_seek_wrapper response with expenses information """ try: try: start = time() posten_response = requests.get(POSTEN_URL + "{}".format(self.postal_code), timeout=TIMEOUT) posten_status_code = posten_response.status_code elapsed = self.elapsed_time(start) LOGGER.info( "HTTP status code -> POSTEN: [{}: {}] -> elapsed: {}". format(posten_status_code, responses[posten_status_code], elapsed)) return posten_response except ConnectTimeout as posten_timeout_error: raise TimeOutError( "Timeout occurred - please try again later or contact system " "administrator, exited with '{}'".format( posten_timeout_error)) except ConnectError as posten_response_error: raise NoConnectionError( "Failed HTTP request - please insure that internet access is provided to the " "client or contact system administrator, exited with '{}'". format(posten_response_error))
async def stat_response(self): """ Response from Finn-no housing statistics search Returns ------- our : requests.models.Response response with housing statistics information """ try: try: start = time() async with ClientSession( timeout=ClientTimeout(TIMEOUT)) as session: async with session.get( FINN_STAT_URL + "{}".format(self.finn_code)) as stat_response: stat_status_code = stat_response.status elapsed = self.elapsed_time(start) LOGGER.info( "HTTP status code -> STATISTICS: [{}: {}] -> elapsed: {}" .format(stat_status_code, responses[stat_status_code], elapsed)) return await stat_response.content.read() except TError: raise TimeOutError( "Timeout occurred - please try again later or contact system administrator" ) except ClientConnectionError as finn_stat_response_error: raise NoConnectionError( "Failed HTTP request - please insure that internet access is provided to the " "client or contact system administrator, exited with '{}'". format(finn_stat_response_error))
def postal_code_info(self): """ gets postal code information Returns ------- out : dict dictionary with postal code information """ LOGGER.info("trying to retrieve 'postal_code_info' for -> '{}'".format( self.postal_code)) soup = BeautifulSoup(self.response().content, "lxml") rows = soup.find_all("tr") # with open('content.html', 'w', encoding='utf-8') as file: # file.write(soup.prettify()) if len(rows) == 2: header = [ head.text.strip().lower() for head in soup.find_all('th') ] values = [ value.text.strip().upper() if i != 4 else value.text.strip().upper().rsplit(' ', 1)[0] for i, value in enumerate(rows[1].find_all('td')) ] LOGGER.success("'postal_code_info' successfully retrieved") return { hdr: val for hdr, val in dict(zip(header, values)).items() if val } raise NotFoundError("'{}' is an invalid postal code".format( self.postal_code))
def mortgage_offers(self): """ Retrieve finansportalen.no's boliglån grunndata xml and returns dict for content Returns ------- out : dict content from boliglån grunndata Xxml feed """ try: LOGGER.info("trying to retrieve '{}'".format( self.mortgage_offers.__name__)) offers = {} soup = BeautifulSoup( self.response().content.decode("windows-1252"), "xml") root = Et.fromstring(soup.prettify()) remove_url_re = '{[^>]+}' for i, children in enumerate(root.findall(PORTALEN_ENTRY)): offers.update({ i + 1: { re.sub(remove_url_re, '', child.tag): child.text.strip() for child in children if child.text } }) LOGGER.success("'{}' successfully retrieved".format( self.mortgage_offers.__name__)) return offers except Exception as mortgage_offers_exception: LOGGER.exception(mortgage_offers_exception) raise mortgage_offers_exception
def response(self): """ submits and gets response for skatteetaten request Returns ------- out : requests.models.Response response with interest rate information """ try: try: response = requests.post(url=self.url, data=self.payload(), timeout=TIMEOUT) status_code = response.status_code LOGGER.info("HTTP status code -> [{}: {}]".format( status_code, responses[status_code])) return response except ConnectTimeout as ssb_timeout_error: raise TimeOutError( "Timeout occurred - please try again later or contact system administrator, " "exited with '{}'".format(ssb_timeout_error)) except ConnectError as ssb_response_error: raise NoConnectionError( "Failed HTTP request - please insure that internet access is provided to the " "client or contact system administrator, exited with '{}'". format(ssb_response_error))
def mortgage_offers(self): """ Retrieve finansportalen.no's boliglån grunndata xml and returns dict for content Returns ------- out : dict content from boliglån grunndata Xxml feed """ LOGGER.info("trying to retrieve '{}'".format( self.mortgage_offers.__name__)) response = self.portalen_response() if response: tree = Et.fromstring(response.content.decode( "windows-1252")).findall(PORTALEN_ENTRY) offers = {} count = 0 for entries in tree: count += 1 offers.update({ count: { re.sub("{[^>]+}", "", entry.tag): entry.text.strip() for entry in entries if entry.text } }) LOGGER.success("'{}' successfully retrieved".format( self.mortgage_offers.__name__)) return offers raise NotFoundError("No 'mortgage_offers' received")
def response(self): """ Submits and gets response for SIFO request Returns ------- out : requests.Response response with expenses information """ try: start = time() parsed_sifo_url = SIFO_URL for key, item in self.family.sifo_properties().items(): parsed_sifo_url = parsed_sifo_url + key + '=' + item + '&' response = requests.post(url=parsed_sifo_url, timeout=TIMEOUT) status_code = response.status_code elapsed = self.elapsed_time(start) LOGGER.info( "HTTP status code -> SIFO: [{}: {}] -> elapsed: {}".format( status_code, responses[status_code], elapsed)) return response except URLError as sifo_response_error: if str(sifo_response_error) == "<urlopen error timed out>": raise TimeOutError( "Timeout occurred - please try again later or contact system " "administrator, exited with '{}'".format( sifo_response_error)) raise NoConnectionError( "Failed HTTP request - please insure that internet access is provided to the " "client or contact system administrator, exited with '{}'". format(sifo_response_error))
def read(self, db_name: str, col_name: str): """ method for reading all documents in a collection Parameters ---------- db_name : str db name to lookup col_name : str collection name to lookup Returns ------- out : list all documents in collection """ try: LOGGER.info( "trying to '{}' all documents in collection: '{}' from db: '{}'" .format(self.read.__name__, col_name, db_name)) Assertor.assert_data_types([db_name, col_name], [str, str]) documents = [] for document in getattr(self._client[db_name.lower()], col_name.lower()).find(): documents.append(document) LOGGER.success( "'{}' successfully completed - '{}' document(s) found".format( self.read.__name__, len(documents))) return documents except Exception as exp: LOGGER.exception(exp) raise exp
def sifo_expenses(self): """ get SIFO expenses given the family information Returns ------- out : dict dictionary with SIFO expenses """ try: LOGGER.info("trying to retrieve '{}'".format( self.sifo_expenses.__name__)) soup = BeautifulSoup(self.response(), "xml") root = Et.fromstring(soup.prettify()) expenses = {'_id': self.family.id_str} for child in root: expenses.update( {child.tag: child.text.strip().replace(".", "")}) LOGGER.success("'{}' successfully retrieved".format( self.sifo_expenses.__name__)) return expenses except Exception as sifo_expenses_exception: LOGGER.exception(sifo_expenses_exception) raise sifo_expenses_exception
def delete(self, db_name: str, col_name: str): """ Delete all documents in collection. Will also delete the db that the collection is in. Parameters ---------- db_name : str name of db col_name : str name of collection to be deleted """ try: LOGGER.info( "trying to '{}' all documents from collection: '{}' in db: '{}'" .format(self.delete.__name__, col_name, db_name)) Assertor.assert_data_types([db_name, col_name], [str, str]) collection = getattr(self._client, db_name)[col_name] count = collection.count() collection.drop() LOGGER.success( "'{}' successfully completed - '{}' document(s) deleted". format(self.delete.__name__, count)) except Exception as exp: LOGGER.exception(exp) raise exp
def portalen_response(self): """ Response from finansportalen.no xml feed Returns ------- our : requests.models.Response response with mortgage information """ try: try: response = requests.post(PORTALEN_URL, auth=PORTALEN_CRED, timeout=TIMEOUT) status_code = response.status_code LOGGER.info("HTTP status code -> [{}: {}]".format( status_code, responses[status_code])) return response except ReadTimeout as portalen_timeout_error: raise TimeOutError( "Timeout occurred - please try again later or contact system administrator, " "exited with '{}'".format(portalen_timeout_error)) except ConnectError as portalen_response_error: raise NoConnectionError( "Failed HTTP request - please insure that internet access is provided to the " "client or contact system administrator, exited with '{}'". format(portalen_response_error))
def __init__(self): """ Constructor / Instantiate the class. Only one property, i.e. id given by uuid4 """ LOGGER.info( "trying to create '{}'".format(self.__class__.__name__)) self._id_str = str(uuid4())
def start_process(cls): """ method for starting logging and profiling of process """ LOGGER.info("starting '{}'".format(cls.__name__)) cls.start = time() cls.profiling = profiling_config()
def __init__(self): """ Abstract class, so class cannot be instantiated """ LOGGER.info("trying to create '{}'".format(self.__class__.__name__)) super().__init__() self._browser = Browser() self._browser.set_handle_robots(False) self._browser.set_handle_refresh(False) self._id = str(uuid4())
def response(self): """ Response from Finn-no housing search Returns ------- our : requests.models.Response response with mortgage information """ response = self._browser LOGGER.info("HTTP status code -> [{}: {}]".format( response.status_code, response.reason)) return response
def response(self): """ Response from finansportalen.no xml feed Returns ------- our : requests.models.Response response with mortgage information """ response = self._browser LOGGER.info("HTTP status code -> [{}: {}]".format( response.status_code, response.reason)) return response
def response(self): """ submits and gets response for SSB request Returns ------- out : requests.models.Response response with interest rate information """ payload = self._payload.payload() response = requests.post(url=SSB_URL, json=payload) LOGGER.info("HTTP status code -> [{}: {}]".format( response.status_code, response.reason)) return response
def __init__(self): """ Abstract class, so class cannot be instantiated """ try: LOGGER.info("trying to create '{}'".format( self.__class__.__name__)) super().__init__() self._browser = Browser() self._browser.set_handle_robots(False) self._browser.set_handle_refresh(False) self._id_str = str(uuid4()) except Exception as scraper_exception: LOGGER.exception(scraper_exception) raise scraper_exception
def housing_ownership_information(self): """ Retrieve and parse housing ownership history information from Finn.no search to dict Returns ------- out : dict """ LOGGER.info( "trying to retrieve 'housing_ownership_information' for -> '{}'".format(self.finn_code)) history_headers = None history_results = [] keys = [] values = [] try: owner_soup = BeautifulSoup(self.ownership_response().content, "lxml") for geo_val in owner_soup.find_all("dl", attrs={"class": "definition-list u-mb32"}): for i, val in enumerate(geo_val.text.split("\n")): if i % 2 != 0 and val: keys.append(val.strip().lower().replace("å", "a")) elif val: values.append(val.strip()) for table_row in owner_soup.find( "table", attrs={"class": "data-table u-mb32"}).find_all("tr"): if not history_headers: history_headers = [head.text for head in table_row.find_all("th")] row = [tab_row.text.strip().replace(",-", " kr") for tab_row in table_row.find_all("td") if tab_row.text.strip()] if row: history_results.append(row) info = dict(zip(keys, values)) historic_df = DataFrame(history_results, columns=history_headers) info.update( {"historikk": historic_df.assign(Pris=historic_df.iloc[:, -1].str.replace( ",", " kr").str.replace("\u2212", "")).to_dict()}) LOGGER.success("'housing_ownership_information' successfully retrieved") return info except AttributeError as no_ownership_history_exception: LOGGER.debug("[{}] No ownership history found!, exited with '{}'".format( self.__class__.__name__, no_ownership_history_exception))
def __init__(self): """ Constructor / Instantiate the class. Should only create one connection to the MongoDB cloud database cluster. """ try: LOGGER.info("trying to create '{}'".format( self.__class__.__name__)) self._id_str = str(uuid4()) self._client = MongoClient(DB_STRING) self._db = None self._collection = None LOGGER.success("created '{}', with id: [{}]".format( self.__class__.__name__, self.id_str)) except Exception as dao_exception: LOGGER.exception(dao_exception) raise dao_exception
def end_process(cls): """ method for ending logging and profiling of process """ digits = 7 elapsed = round((time() - cls.start) * 1000, digits) speedup = round(cls.elapsed - elapsed, digits) cls.profiling.add_row(["-----------", "", "", ""]) cls.profiling.add_row(["total", "", "", str(elapsed) + "ms"]) cls.profiling.add_row(["", "", "", ""]) cls.profiling.add_row(["speedup", "", "", str(speedup) + "ms"]) cls.profiling.add_row([ "(total without speedup)", "", "", str(round(cls.elapsed, digits)) + "ms" ]) LOGGER.success("ending '{}'".format(cls.__name__)) LOGGER.info("reporting profiling results -> \n\n profiling: '{}' \n\n". format(cls.__name__) + str(cls.profiling) + "\n")
def create(self, db_name: str, col_name: str, document: (dict, list)): """ method for creating posts / documents (dict or list) into collection in database Parameters ---------- db_name : str db to insert record col_name : str collection to insert record document : dict, list record(s) to insert """ try: LOGGER.info( "trying to '{}' document(s) in collection: '{}' in db: '{}'". format(self.create.__name__, col_name, db_name)) Assertor.assert_data_types([db_name, col_name, document], [str, str, (dict, list)]) if not isinstance(document, (dict, list)): raise TypeError("expected type '{}', got '{}' " "instead".format( (dict.__name__, list.__name__), type(document).__name__)) count = len(document) if isinstance(document, list) else len( [document]) if not self._db: self._db = self._client[db_name.lower()] self._collection = self._db[col_name.lower()] else: self._collection = self._db[col_name.lower()] self._collection.insert(document) LOGGER.success( "'{}' successfully completed - '{}' document(s)".format( self.create.__name__, count)) except Exception as exp: LOGGER.exception(exp) raise exp