def parse_posts(self, posts): for div in posts: location = div.find('strong').text heading = div.find("p", class_='product__note').text.strip() meters = heading.replace("Prodej bytu", "").replace("m²", "") rooms = meters.split(',')[0].strip() size = meters.split(',')[1].strip() size = int(size) price = div.find("strong", class_="product__value").text.strip().replace( "Kč", "").replace(".", "").strip() price = int(price) price_per_meter = price / size room_base_coeff = int(rooms.split('+')[0]) room_addons_coeff = 0.0 if "kk" in rooms else 0.5 room_coeff = room_base_coeff + room_addons_coeff link = div.find("a", class_="product__link")['href'] link = "https://bezrealitky.cz" + link floor, penb, state = self.parse_post(link) flat = Flat(price=price, title=location, link=link, size=room_coeff, meters=size, price_per_meter=price_per_meter, floor=floor, penb=penb, state=state) self.flats.append(flat.get_cmp_dict())
def parse_posts(self,posts): for post in posts: location = post.find("span",class_="locality").text.strip() price = post.find("span", class_="norm-price").text.strip() heading = post.find("span",class_="name").text.strip() heading = heading.replace("Prodej bytu ","") heading = heading.encode("ascii", errors="ignore").decode() rooms = heading.split(' ')[0] try: room_base_coeff = int(rooms.split('+')[0]) except: room_base_coeff = 0.0 room_addons_coeff = 0.0 if "kk" in rooms else 0.5 room_coeff = room_base_coeff + room_addons_coeff link = post.find("a",class_="title")['href'] link = "https://sreality.cz" + link if price == "Info o ceně u RK": continue price = price.replace("Kč","") price = price.encode("ascii", errors="ignore").decode() price = int(price.replace(" ","")) try: meters = heading.replace('m', '').strip() meters = meters[-2:] meters = int(meters) price_per_meter = price / meters floor, penb, state = self.parse_post(link) flat = Flat(title=location, size=room_coeff, price=price, price_per_meter=price_per_meter, meters=meters, link=link, floor=floor, penb=penb, state=state ) self.flats.append(flat.get_cmp_dict()) except IndexError as ie: print('error',heading, str(ie)) except ValueError as ve: print('error',heading, str(ve))
def parse_posts(self, posts): for post in posts: start = time.time() location = post.find("span", class_="locality").text.strip() price = post.find("span", class_="norm-price").text.strip() heading = post.find("span", class_="name").text.strip() heading = heading.replace("Prodej bytu ", "") heading = heading.encode("ascii", errors="ignore").decode() rooms = heading.split(' ')[0] room_base_coeff = int(rooms.split('+')[0]) room_addons_coeff = 0.0 if "kk" in rooms else 0.5 room_coeff = room_base_coeff + room_addons_coeff link = post.find("a", class_="title")['href'] link = "https://sreality.cz" + link if price == "Info o ceně u RK": continue price = price.replace("Kč", "") price = price.encode("ascii", errors="ignore").decode() price = int(price.replace(" ", "")) try: meters = heading.replace('m', '').strip() meters = meters[-2:] meters = int(meters) price_per_meter = price / meters #print(location, price, room_coeff, meters, price_per_meter, link) floor, penb, state, desc = self.parse_post(link) id = link.split('/')[-1] flat = Flat(id=id, title=location, size=room_coeff, price=price, price_per_meter=price_per_meter, meters=meters, link=link, floor=floor, penb=penb, state=state, description=desc) self.flats.append(flat) except IndexError as ie: print('error', ie) #print(heading,ie) except ValueError as ve: print('error', ve) #print(heading,ve) #print(post) end = time.time() duration = end - start print('post parsed in ', duration)
def parse_posts(self, posts): for post in posts: try: price = post.find( "p", class_="c-list-products__price").text.strip().replace( "Kč", "").replace(" ", "") price = int(price) location = post.find( "p", class_="c-list-products__info").text.strip() title = post.find( "h2", class_="c-list-products__title").text.strip().replace( "\n", "").replace("prodejbytu", "") size = float(title.split(',')[1].replace("m²", "").strip()) size = int(size) rooms = title.split(',')[0] room_base_coeff = int(rooms.split('+')[0]) room_addons_coeff = 0.0 if "kk" in rooms else 0.5 room_coeff = room_base_coeff + room_addons_coeff price_per_meter = price / size #print(price,location,title,size,price_per_meter) link = "" if room_coeff > 3.5: continue if size < 50: continue link = post.find("a", class_="c-list-products__link")['href'] link = "https://reality.idnes.cz" + link link = link.split('?')[0] id = link.split('/')[-2] floor, penb, state, desc = self.parse_post(link) if floor < 1: continue flat = Flat(id=id, title=location, size=room_coeff, price=price, price_per_meter=price_per_meter, meters=size, link=link, floor=floor, penb=penb, state=state, description=desc, interest_level=5) self.flats.append(flat) except Exception as e: print(e) print(post)
def parse_posts(self,posts): for post in posts: price = post.find("p",class_="c-list-products__price").text.strip().replace("Kč","").replace(" ","").replace("Cenanavyžádání","999999999") price = int(price) location = post.find("p",class_="c-list-products__info").text.strip() title = post.find("h2", class_="c-list-products__title").text.strip().replace("\n","").replace("prodejbytu","") try: size = int(title.replace("m²","").strip().split(" ")[1]) except Exception as e: print(f"Cannot parse title {title}, error: {repr(e)}") size = 1 rooms = title.split(',')[0] try: room_base_coeff = int(rooms.split('+')[0]) except Exception as e: room_base_coeff = 0.0 print(f"Cannot parse post {post}, error: {repr(e)}") room_addons_coeff = 0.0 if "kk" in rooms else 0.5 room_coeff = room_base_coeff + room_addons_coeff price_per_meter = price / size link = post.find("a",class_="c-list-products__link")['href'] link = "https://reality.idnes.cz" + link link = link.split('?')[0] floor,penb,state = self.parse_post(link) flat = Flat(title=location, size=room_coeff, price=price, price_per_meter=price_per_meter, meters=size, link=link, floor=floor, penb=penb, state=state ) self.flats.append(flat.get_cmp_dict())
def parse_posts(self,posts): for div in posts: #print(div) location = div.find('strong').text suburb = location.split('-')[1] heading = div.find("p", class_='product__note').text.strip() meters = heading.replace("Prodej bytu","").replace("m²","") rooms = meters.split(',')[0].strip() size = meters.split(',')[1].strip() size = int(size) price = div.find("strong",class_="product__value").text.strip().replace("Kč","").replace(".","").strip() price = int(price) price_per_meter = price / size room_base_coeff = int(rooms.split('+')[0]) room_addons_coeff =0.0 if "kk" in rooms else 0.5 room_coeff = room_base_coeff + room_addons_coeff price_per_room = price / room_coeff link = div.find("a",class_="product__link")['href'] link = "https://bezrealitky.cz" + link id = link.split('/')[-1] id = id.split('-')[0] floor,penb,state, desc = self.parse_post(link) if floor < 1: continue if state == "před rekonstrukcí": continue flat = Flat( id=id, price=price, title=location, link=link, size=room_coeff, meters=size, price_per_meter=price_per_meter, floor=floor, penb=penb, state=state, description=desc ) self.flats.append(flat)
def parse_post(self, div): # print(div) location = div.find('h3').find('a').text price = div.find("span", class_="price").text.replace("Kč", "").strip() # price2 = price.replace("\\xa0790","") price = price.encode("ascii", errors="ignore").decode() try: price = int(price) except ValueError as e: raise Exception(e) # print(div) suburb = "" # suburb = location.split('-')[1].split(',')[0].strip() try: location_splitted = location.split(",") size = int(location_splitted[-1].replace("m2", "").strip()) rooms = location_splitted[0].replace("Prodej bytu", "").strip() room_base_coeff = int(rooms.split('+')[0]) room_addons_coeff = 0.0 if "kk" in rooms else 0.5 room_coeff = room_base_coeff + room_addons_coeff price_per_meter = price / size desc = div.find('p', class_="hidden-sm").text.strip() except ValueError as e: raise Exception(e) if "panel" in desc or "ateliér" in desc: #print("panel") raise Exception("not wanted - panel or atelier") #print(location, suburb, size, rooms, room_coeff, price, price_per_meter, desc) heading = div.find("h3", class_='list').text.strip() meters = heading.replace("Prodej bytu", "").replace("m2", "") splitted = meters.split(',') size = splitted[len(splitted) - 1].strip() size = int(size) price = div.find("span", class_="price").text.strip().replace( "Kč", "").replace(".", "").replace(" ", "").strip() price = price.encode("ascii", errors="ignore").decode() price = int(price) price_per_meter = price / size price_per_room = price / room_coeff link = "https://www.bydlisnami.cz" + div.find("h3").find("a")['href'] try: floor, penb, state = self.parse_details(link, desc) except Exception as e: return False if "investice do" in desc or "rezervováno" in desc.lower(): return False flat = Flat(id=id, price=price, title=location, link=link, size=room_coeff, meters=size, price_per_meter=price_per_meter, floor=floor, penb=penb, state=state, description=desc) print(flat.get_cmp_dict())
def parse_posts(self, posts): for post in posts: link = "" try: heading = post.find("h2").text.strip() heading = heading.replace("Prodej bytu,", "").replace(" ", "") rooms = heading.split(',')[0] room_base_coeff = int(rooms.split('+')[0]) room_addons_coeff = 0.0 if "kk" in rooms else 0.5 room_coeff = room_base_coeff + room_addons_coeff meters = heading.split(',')[1] meters = int(meters.replace("m²", "").strip()) price = post.find( "span", class_="advert-list-items__content-price-price" ).text.strip() price = price.replace("Kč", "") price = price.encode("ascii", errors="ignore").decode() price = int(price.replace(" ", "").strip()) price_per_meter = price / meters location = post.find( "p", class_="advert-list-items__content-address").text.strip() floor = "N/A" penb = "N/A" state = "N/A" link = post.find("a", class_="advert-list-items__images")["href"] id = link.split('.html')[0].split('-')[-1] #print(room_coeff,meters,location,price, link) floor, penb, state, desc = self.parse_post(link) if floor < 1: continue flat = Flat(id=id, price=price, title=location, link=link, size=room_coeff, meters=meters, price_per_meter=price_per_meter, floor=floor, penb=penb, state=state, description=desc) self.flats.append(flat) except AttributeError as ae: pass # this is an advert except Exception as e: if "Cena" in str(e): pass elif "Rezerv" in str(e): pass else: print( "Uncaught Exception occurred in post-----------------------------" ) print(e.__class__.__name__, e) print_exc() print(post) print(link)
def parse_posts(self, posts): for post in posts: try: heading = post.find("h2").text.strip() heading = heading.replace("Prodej bytu,", "").replace(" ", "") rooms = heading.split(',')[0] room_base_coeff = int(rooms.split('+')[0]) room_addons_coeff = 0.0 if "kk" in rooms else 0.5 room_coeff = room_base_coeff + room_addons_coeff meters = heading.split(',')[1] meters = int(meters.replace("m²", "").strip()) #price = post.find("span",class_="advert-list-items__content-price-price").text.strip() price = post.find("div", class_="advert-list-items__content-price" ).span.text.strip() price = price.replace("Kč", "") price = price.encode("ascii", errors="ignore").decode() try: price = int(price.replace(" ", "").strip()) except ValueError: price = 1000000000 try: price_per_meter = int(price) / int(meters) except ValueError: price_per_meter = 100000 location = post.find( "p", class_="advert-list-items__content-address").text.strip() floor = "N/A" penb = "N/A" state = "N/A" try: link = post.find("a", class_="form-price")["href"] except: try: link = post.find( "a", class_="advert-list-items__content")["href"] except: link = post.find( "a", class_="advert-list-items__images").get("href") floor, penb, state = self.parse_post(link) flat = Flat(price=price, title=location, link=link, size=room_coeff, meters=meters, price_per_meter=price_per_meter, floor=floor, penb=penb, state=state) self.flats.append(flat.get_cmp_dict()) except AttributeError as ae: pass # this is an advert except Exception as e: # print("Exception occurred in post:") # print(traceback.format_exc()) # print(e.__class__.__name__, str(e)) if "Cena" in str(e): pass elif "Rezerv" in str(e): pass else: # print(post) pass