def __serialize_listing(self, listing_element: PageElement): title_element = listing_element.find_next(class_="listing_header") title = title_element.text price_text = listing_element.find_next(class_="price").text price = int(re.sub("[^0-9]", "", price_text)) body = listing_element.find_next(class_="body").text listing = BarnstormersClassifiedListing() listing.title = title listing.price = price listing.description = body listing.url = self.base_url + title_element["href"] return listing
def __parse_result_item(self, result_item: PageElement): result_title = result_item.find_next(id='title') price_text = re.sub("[^0-9]", "", result_item.find_next(class_='txt-price').text) price = 0 description = result_item.find_next(class_='description').text.strip() url = self.base_url + result_title['href'] if len(price_text) != 0: price = int(price_text) return TradeAPlaneListing(title=result_title.text.strip(), price=price, description=description, url=url)
def _format_vacancy(item: PageElement): data = { "site_id": int( item.find_next( "a", {"class": "no-decoration"})['href'].split("/")[-2]), "title": item.find_next("a").get_text(), "company": item.find_next("div", { "class": "add-top-xs" }).find_next("b").get_text(), "desc": " ".join( unicodedata.normalize("NFKD", item.find_next("p").get_text()).split()), "salary": unicodedata.normalize("NFKD", a) if "грн" in (a := item.find_next("b").get_text()) else None, "city": "|".join([ i.replace('\xa0', ' ') for i in item.find_next("div", { "class": "add-top-xs" }).get_text().split("·")[1:] ]), "link": "https://work.ua" + item.find_next("a", {"class": "no-decoration"})['href'] } return data
def _format_vacancy(item: PageElement): data = { "site_id": int(item["data-vacancy-id"]), "title": item.find_next("a", { "class": "ga_listing" }).get_text().replace("\n", ""), "company": item.find_next("a", { "class": "company-profile-name" }).get_text(), "desc": item.find_next("div", { "class": "card-description" }).get_text(), "salary": a if (a := item.find_next("span", { "class": "salary" }).get_text()) else None, "city": item.find_next("span", { "class": "location" }).get_text(), "link": "https://robota.ua" + item.find_next("a", {"class": "ga_listing"})['href'] } return data
def _format_vacancy(item: PageElement): data = { "site_id": int(item["_id"]), "title": item.find_next("a", { "class": "vt" }).get_text(), "company": item.find_next("a", { "class": "company" }).get_text().replace('\xa0', ''), "desc": " ".join( unicodedata.normalize( "NFKD", item.find_next("div", { "class": "sh-info" }).get_text()).split()), "salary": unicodedata.normalize("NFKD", a.get_text()) if (a := item.find_next("span", {"class": "salary"})) else None, "city": item.find_next("span", { "class": "cities" }).get_text(), "link": item.find_next("a", {"class": "vt"})['href'] } return data