def _get_flats_from_page(self, url): flats_from_page_list = [] page_bs = self._url_to_bs4page(url.get_url()) if page_bs != None: article_list = page_bs.find_all( lambda tag: tag.name == "article" and "data-adid" in tag.attrs) for article in article_list: flat_id = article["data-adid"] a_tag = article.find( lambda tag: tag.name == "a" and "href" in tag.attrs and tag .attrs.get("class") != None and "item-link" in tag.attrs[ "class"]) href = self._DOMAIN_NAME + a_tag["href"] title = a_tag["title"] price = int( article.find(lambda tag: tag.name == "div" and "price-row" in tag.attrs["class"]).find( "span").contents[0]) logger.info( "Article: flat_id = {}, href = {}, price = {}".format( flat_id, href, price)) # email_status value is 2 because later on we will check if the flat is new or its price went down and only in these cases we will send a notification flat = Flat(None, flat_id, url.get_id(), datetime.datetime.now(), href, title, price, EmailStatus.EMAIL_SENT.value) flats_from_page_list.append(flat) return flats_from_page_list
def get_flats_from_url(self, url): flats_list = [] page_url = url while page_url: logger.info("Processing the url: {}".format(str(page_url))) flats_list.extend(self._get_flats_from_page(page_url)) page_url = self._get_next_page_url(page_url) return flats_list
def update_price(self, flat_list): c = _conn.cursor() for flat in flat_list: logger.info(flat) c.execute(self._UPDATE_PRICE_TEMPLATE, ( flat.get_price(), flat.get_email_status(), flat.get_flat_id(), )) _conn.commit()
def update(self, url): logger.info("Updating the url: {}".format(str(url))) c = _conn.cursor() c.execute(self._UPDATE_TEMPLATE, ( url.get_url_alias(), url.get_url(), url.get_first_req_done(), url.get_id(), )) _conn.commit()
def insert_list(self, flat_list): c = _conn.cursor() for flat in flat_list: logger.info(flat) c.execute(self._INSERT_LIST_TEMPLATE, ( flat.get_flat_id(), flat.get_url_id(), flat.get_announcement_date(), flat.get_href(), flat.get_title(), flat.get_price(), flat.get_email_status(), )) _conn.commit()