def parse_posts(self, posts):

        for div in posts:
            location = div.find('strong').text
            heading = div.find("p", class_='product__note').text.strip()
            meters = heading.replace("Prodej bytu", "").replace("m²", "")
            rooms = meters.split(',')[0].strip()
            size = meters.split(',')[1].strip()
            size = int(size)
            price = div.find("strong",
                             class_="product__value").text.strip().replace(
                                 "Kč", "").replace(".", "").strip()
            price = int(price)

            price_per_meter = price / size
            room_base_coeff = int(rooms.split('+')[0])
            room_addons_coeff = 0.0 if "kk" in rooms else 0.5
            room_coeff = room_base_coeff + room_addons_coeff

            link = div.find("a", class_="product__link")['href']
            link = "https://bezrealitky.cz" + link

            floor, penb, state = self.parse_post(link)

            flat = Flat(price=price,
                        title=location,
                        link=link,
                        size=room_coeff,
                        meters=size,
                        price_per_meter=price_per_meter,
                        floor=floor,
                        penb=penb,
                        state=state)
            self.flats.append(flat.get_cmp_dict())
Esempio n. 2
0
    def parse_posts(self,posts):
        for post in posts:

            location = post.find("span",class_="locality").text.strip()
            price = post.find("span", class_="norm-price").text.strip()
            heading = post.find("span",class_="name").text.strip()
            heading = heading.replace("Prodej bytu ","")
            heading = heading.encode("ascii", errors="ignore").decode()
            rooms = heading.split(' ')[0]
            try:
                room_base_coeff = int(rooms.split('+')[0])
            except:
                room_base_coeff = 0.0
            room_addons_coeff = 0.0 if "kk" in rooms else 0.5
            room_coeff = room_base_coeff + room_addons_coeff




            link = post.find("a",class_="title")['href']
            link = "https://sreality.cz" + link
            if price == "Info o ceně u RK":
                continue
            price = price.replace("Kč","")
            price = price.encode("ascii", errors="ignore").decode()
            price = int(price.replace(" ",""))

            try:
                meters = heading.replace('m', '').strip()
                meters = meters[-2:]
                meters = int(meters)
                price_per_meter = price / meters

                floor, penb, state = self.parse_post(link)

                flat = Flat(title=location,
                            size=room_coeff,
                            price=price,
                            price_per_meter=price_per_meter,
                            meters=meters,
                            link=link,
                            floor=floor,
                            penb=penb,
                            state=state
                            )
                self.flats.append(flat.get_cmp_dict())
            except IndexError as ie:
                print('error',heading, str(ie))
            except ValueError as ve:
                print('error',heading, str(ve))
Esempio n. 3
0
    def parse_posts(self, posts):
        for post in posts:

            start = time.time()

            location = post.find("span", class_="locality").text.strip()
            price = post.find("span", class_="norm-price").text.strip()
            heading = post.find("span", class_="name").text.strip()
            heading = heading.replace("Prodej bytu ", "")
            heading = heading.encode("ascii", errors="ignore").decode()
            rooms = heading.split(' ')[0]
            room_base_coeff = int(rooms.split('+')[0])
            room_addons_coeff = 0.0 if "kk" in rooms else 0.5
            room_coeff = room_base_coeff + room_addons_coeff

            link = post.find("a", class_="title")['href']

            link = "https://sreality.cz" + link
            if price == "Info o ceně u RK":
                continue
            price = price.replace("Kč", "")
            price = price.encode("ascii", errors="ignore").decode()
            price = int(price.replace(" ", ""))

            try:
                meters = heading.replace('m', '').strip()
                meters = meters[-2:]
                meters = int(meters)
                price_per_meter = price / meters
                #print(location, price, room_coeff, meters, price_per_meter, link)

                floor, penb, state, desc = self.parse_post(link)

                id = link.split('/')[-1]

                flat = Flat(id=id,
                            title=location,
                            size=room_coeff,
                            price=price,
                            price_per_meter=price_per_meter,
                            meters=meters,
                            link=link,
                            floor=floor,
                            penb=penb,
                            state=state,
                            description=desc)
                self.flats.append(flat)
            except IndexError as ie:
                print('error', ie)
                #print(heading,ie)
            except ValueError as ve:
                print('error', ve)
                #print(heading,ve)
            #print(post)

            end = time.time()

            duration = end - start

            print('post parsed in ', duration)
Esempio n. 4
0
    def parse_posts(self, posts):
        for post in posts:
            try:
                price = post.find(
                    "p", class_="c-list-products__price").text.strip().replace(
                        "Kč", "").replace(" ", "")
                price = int(price)
                location = post.find(
                    "p", class_="c-list-products__info").text.strip()
                title = post.find(
                    "h2",
                    class_="c-list-products__title").text.strip().replace(
                        "\n", "").replace("prodejbytu", "")
                size = float(title.split(',')[1].replace("m²", "").strip())
                size = int(size)
                rooms = title.split(',')[0]
                room_base_coeff = int(rooms.split('+')[0])
                room_addons_coeff = 0.0 if "kk" in rooms else 0.5
                room_coeff = room_base_coeff + room_addons_coeff

                price_per_meter = price / size
                #print(price,location,title,size,price_per_meter)
                link = ""
                if room_coeff > 3.5:
                    continue
                if size < 50:
                    continue
                link = post.find("a", class_="c-list-products__link")['href']
                link = "https://reality.idnes.cz" + link

                link = link.split('?')[0]
                id = link.split('/')[-2]

                floor, penb, state, desc = self.parse_post(link)

                if floor < 1:
                    continue

                flat = Flat(id=id,
                            title=location,
                            size=room_coeff,
                            price=price,
                            price_per_meter=price_per_meter,
                            meters=size,
                            link=link,
                            floor=floor,
                            penb=penb,
                            state=state,
                            description=desc,
                            interest_level=5)
                self.flats.append(flat)
            except Exception as e:
                print(e)
                print(post)
Esempio n. 5
0
    def parse_posts(self,posts):
        for post in posts:

            price = post.find("p",class_="c-list-products__price").text.strip().replace("Kč","").replace(" ","").replace("Cenanavyžádání","999999999")
            price = int(price)
            location = post.find("p",class_="c-list-products__info").text.strip()
            title = post.find("h2", class_="c-list-products__title").text.strip().replace("\n","").replace("prodejbytu","")
            try:
                size = int(title.replace("m²","").strip().split(" ")[1])
            except Exception as e:
                print(f"Cannot parse title {title}, error: {repr(e)}")
                size = 1
            rooms = title.split(',')[0]
            try:
                room_base_coeff = int(rooms.split('+')[0])
            except Exception as e:
                room_base_coeff = 0.0
                print(f"Cannot parse post {post}, error: {repr(e)}")
            room_addons_coeff = 0.0 if "kk" in rooms else 0.5
            room_coeff = room_base_coeff + room_addons_coeff
            price_per_meter = price / size


            link = post.find("a",class_="c-list-products__link")['href']
            link = "https://reality.idnes.cz" + link

            link = link.split('?')[0]

            floor,penb,state = self.parse_post(link)

            flat = Flat(title=location,
                        size=room_coeff,
                        price=price,
                        price_per_meter=price_per_meter,
                        meters=size,
                        link=link,
                        floor=floor,
                        penb=penb,
                        state=state
                        )
            self.flats.append(flat.get_cmp_dict())
Esempio n. 6
0
    def parse_posts(self,posts):

        for div in posts:
            #print(div)
            location = div.find('strong').text
            suburb = location.split('-')[1]
            heading = div.find("p", class_='product__note').text.strip()
            meters = heading.replace("Prodej bytu","").replace("m²","")
            rooms = meters.split(',')[0].strip()
            size = meters.split(',')[1].strip()
            size = int(size)
            price = div.find("strong",class_="product__value").text.strip().replace("Kč","").replace(".","").strip()
            price = int(price)

            price_per_meter = price / size
            room_base_coeff = int(rooms.split('+')[0])
            room_addons_coeff =0.0 if "kk" in rooms else 0.5
            room_coeff = room_base_coeff + room_addons_coeff
            price_per_room = price / room_coeff


            link = div.find("a",class_="product__link")['href']
            link = "https://bezrealitky.cz" + link

            id = link.split('/')[-1]
            id = id.split('-')[0]
            floor,penb,state, desc = self.parse_post(link)

            if floor < 1:
                continue

            if state == "před rekonstrukcí":
                continue

            flat = Flat(
                id=id,
                price=price,
                title=location,
                link=link,
                size=room_coeff,
                meters=size,
                price_per_meter=price_per_meter,
                floor=floor,
                penb=penb,
                state=state,
                description=desc
            )
            self.flats.append(flat)
    def parse_post(self, div):
        # print(div)
        location = div.find('h3').find('a').text
        price = div.find("span", class_="price").text.replace("Kč", "").strip()
        # price2 = price.replace("\\xa0790","")
        price = price.encode("ascii", errors="ignore").decode()
        try:
            price = int(price)
        except ValueError as e:
            raise Exception(e)
        # print(div)
        suburb = ""
        # suburb = location.split('-')[1].split(',')[0].strip()
        try:
            location_splitted = location.split(",")
            size = int(location_splitted[-1].replace("m2", "").strip())
            rooms = location_splitted[0].replace("Prodej bytu", "").strip()
            room_base_coeff = int(rooms.split('+')[0])
            room_addons_coeff = 0.0 if "kk" in rooms else 0.5
            room_coeff = room_base_coeff + room_addons_coeff
            price_per_meter = price / size

            desc = div.find('p', class_="hidden-sm").text.strip()
        except ValueError as e:

            raise Exception(e)

        if "panel" in desc or "ateliér" in desc:
            #print("panel")
            raise Exception("not wanted - panel or atelier")

        #print(location, suburb, size, rooms, room_coeff, price, price_per_meter, desc)

        heading = div.find("h3", class_='list').text.strip()
        meters = heading.replace("Prodej bytu", "").replace("m2", "")

        splitted = meters.split(',')
        size = splitted[len(splitted) - 1].strip()

        size = int(size)
        price = div.find("span", class_="price").text.strip().replace(
            "Kč", "").replace(".", "").replace(" ", "").strip()
        price = price.encode("ascii", errors="ignore").decode()
        price = int(price)

        price_per_meter = price / size

        price_per_room = price / room_coeff

        link = "https://www.bydlisnami.cz" + div.find("h3").find("a")['href']

        try:
            floor, penb, state = self.parse_details(link, desc)
        except Exception as e:
            return False
        if "investice do" in desc or "rezervováno" in desc.lower():
            return False

        flat = Flat(id=id,
                    price=price,
                    title=location,
                    link=link,
                    size=room_coeff,
                    meters=size,
                    price_per_meter=price_per_meter,
                    floor=floor,
                    penb=penb,
                    state=state,
                    description=desc)
        print(flat.get_cmp_dict())
Esempio n. 8
0
    def parse_posts(self, posts):
        for post in posts:
            link = ""
            try:
                heading = post.find("h2").text.strip()
                heading = heading.replace("Prodej bytu,", "").replace(" ", "")
                rooms = heading.split(',')[0]
                room_base_coeff = int(rooms.split('+')[0])
                room_addons_coeff = 0.0 if "kk" in rooms else 0.5
                room_coeff = room_base_coeff + room_addons_coeff
                meters = heading.split(',')[1]
                meters = int(meters.replace("m²", "").strip())
                price = post.find(
                    "span", class_="advert-list-items__content-price-price"
                ).text.strip()
                price = price.replace("Kč", "")
                price = price.encode("ascii", errors="ignore").decode()
                price = int(price.replace(" ", "").strip())

                price_per_meter = price / meters
                location = post.find(
                    "p",
                    class_="advert-list-items__content-address").text.strip()
                floor = "N/A"
                penb = "N/A"
                state = "N/A"
                link = post.find("a",
                                 class_="advert-list-items__images")["href"]

                id = link.split('.html')[0].split('-')[-1]
                #print(room_coeff,meters,location,price, link)
                floor, penb, state, desc = self.parse_post(link)

                if floor < 1:
                    continue

                flat = Flat(id=id,
                            price=price,
                            title=location,
                            link=link,
                            size=room_coeff,
                            meters=meters,
                            price_per_meter=price_per_meter,
                            floor=floor,
                            penb=penb,
                            state=state,
                            description=desc)
                self.flats.append(flat)
            except AttributeError as ae:
                pass  # this is an advert
            except Exception as e:
                if "Cena" in str(e):
                    pass
                elif "Rezerv" in str(e):
                    pass
                else:
                    print(
                        "Uncaught Exception occurred in post-----------------------------"
                    )
                    print(e.__class__.__name__, e)
                    print_exc()
                    print(post)
                    print(link)
    def parse_posts(self, posts):
        for post in posts:
            try:
                heading = post.find("h2").text.strip()
                heading = heading.replace("Prodej bytu,", "").replace(" ", "")
                rooms = heading.split(',')[0]
                room_base_coeff = int(rooms.split('+')[0])
                room_addons_coeff = 0.0 if "kk" in rooms else 0.5
                room_coeff = room_base_coeff + room_addons_coeff
                meters = heading.split(',')[1]
                meters = int(meters.replace("m²", "").strip())
                #price = post.find("span",class_="advert-list-items__content-price-price").text.strip()
                price = post.find("div",
                                  class_="advert-list-items__content-price"
                                  ).span.text.strip()
                price = price.replace("Kč", "")
                price = price.encode("ascii", errors="ignore").decode()
                try:
                    price = int(price.replace(" ", "").strip())
                except ValueError:
                    price = 1000000000
                try:
                    price_per_meter = int(price) / int(meters)
                except ValueError:
                    price_per_meter = 100000
                location = post.find(
                    "p",
                    class_="advert-list-items__content-address").text.strip()
                floor = "N/A"
                penb = "N/A"
                state = "N/A"
                try:
                    link = post.find("a", class_="form-price")["href"]
                except:
                    try:
                        link = post.find(
                            "a", class_="advert-list-items__content")["href"]
                    except:
                        link = post.find(
                            "a",
                            class_="advert-list-items__images").get("href")

                floor, penb, state = self.parse_post(link)

                flat = Flat(price=price,
                            title=location,
                            link=link,
                            size=room_coeff,
                            meters=meters,
                            price_per_meter=price_per_meter,
                            floor=floor,
                            penb=penb,
                            state=state)
                self.flats.append(flat.get_cmp_dict())
            except AttributeError as ae:
                pass  # this is an advert
            except Exception as e:
                # print("Exception occurred in post:")
                # print(traceback.format_exc())
                # print(e.__class__.__name__, str(e))
                if "Cena" in str(e):
                    pass
                elif "Rezerv" in str(e):
                    pass
                else:
                    # print(post)
                    pass