def get_home_parameters(driver):
        rooms_description = {}

        header_container = driver.find_element_by_class_name(
            ZillowUtil.HEADER_CONTAINER_CLASS_NAME)
        short_description = header_container.find_element_by_tag_name("h3")

        if SeleniumUtil.is_element_exists_by_xpath(
                short_description, ZillowUtil.BED_ROOMS_SPAN_XPATH):
            bed_rooms_span = short_description.find_element_by_xpath(
                ZillowUtil.BED_ROOMS_SPAN_XPATH)
            rooms_description[
                "beds"] = ZillowUtil.parse_home_property_value_from_string(
                    bed_rooms_span.text)
        if SeleniumUtil.is_element_exists_by_xpath(
                short_description, ZillowUtil.BATH_ROOMS_SPAN_XPATH):
            bath_rooms_span = short_description.find_element_by_xpath(
                ZillowUtil.BATH_ROOMS_SPAN_XPATH)
            rooms_description[
                "baths"] = ZillowUtil.parse_home_property_value_from_string(
                    bath_rooms_span.text)
        if SeleniumUtil.is_element_exists_by_xpath(short_description,
                                                   ZillowUtil.FOOTAGE_XPATH):
            footage_span = short_description.find_element_by_xpath(
                ZillowUtil.FOOTAGE_XPATH)
            rooms_description[
                "footage"] = ZillowUtil.parse_home_property_value_from_string(
                    footage_span.text)
        return rooms_description
    def init_control_button(driver):
        control_button = driver.find_element_by_id("disabled-map-cover")
        if SeleniumUtil.is_element_exists_by_id(
                driver, ZillowUtil.PARSING_PROGRESS_ID):
            parsing_progress_container = driver.find_element_by_id(
                ZillowUtil.PARSING_PROGRESS_ID)
            driver.execute_script(
                "arguments[0].outerHTML = '" +
                ZillowUtil.START_BUTTON_CONTAINER_TEMPLATE + "';",
                parsing_progress_container)
        else:
            outer_html = control_button.get_attribute(
                "outerHTML") + ZillowUtil.START_BUTTON_CONTAINER_TEMPLATE
            driver.execute_script(
                "arguments[0].outerHTML = '" + outer_html + "';",
                control_button)
        start_parsing_button = SeleniumUtil.get_element_after_loading(
            driver, "start_parsing_button")
        driver.execute_script(
            """\
          arguments[0].addEventListener("click", handle_start_button_click);

          function handle_start_button_click() {
              if (window.location.href.indexOf("_zm") > 0) {
                  this.classList.add('start_parsing_launched');
                  this.removeEventListener('click', handle_start_button_click)
              }
          }
          """, start_parsing_button)
 def get_history_price_change_from_cell(cell):
     if SeleniumUtil.is_element_exists_by_class_name(cell, "delta-value"):
         delta_price_container = cell.find_element_by_class_name(
             "delta-value")
         if SeleniumUtil.is_element_exists_by_tag_name(
                 delta_price_container, "span"):
             return delta_price_container.find_element_by_tag_name(
                 "span").text
     return ZillowUtil.DEFAULT_PRICE_CHANGE_VALUE
 def init_progress_bar(driver):
     if SeleniumUtil.is_element_exists_by_id(driver,
                                             "start_button_container"):
         start_parsing_button = SeleniumUtil.get_element_after_loading(
             driver, "start_button_container")
         driver.execute_script(
             "arguments[0].outerHTML = '" +
             ZillowUtil.PROGRESS_BAR_TEMPLATE + "';", start_parsing_button)
     else:
         control_button = driver.find_element_by_id("disabled-map-cover")
         outer_html_width_progress_bar = control_button.get_attribute(
             "outerHTML") + ZillowUtil.PROGRESS_BAR_TEMPLATE
         driver.execute_script(
             "arguments[0].outerHTML = '" + outer_html_width_progress_bar +
             "';", control_button)
    def get_starting_positions(driver):
        if SeleniumUtil.is_element_exists_by_id(driver, "x_start"):
            x_val_element = driver.find_element_by_id("x_start").get_attribute(
                "value")
            x_val = int(x_val_element)
        else:
            x_val = 0

        if SeleniumUtil.is_element_exists_by_id(driver, "y_start"):
            y_val_element = driver.find_element_by_id("y_start").get_attribute(
                "value")
            y_val = int(y_val_element)
        else:
            y_val = 0

        return x_val, y_val
 def get_radius_value(driver):
     if SeleniumUtil.is_element_exists_by_id(driver, "parsing_radius"):
         radius_input_field_value = driver.find_element_by_id(
             "parsing_radius").get_attribute("value")
         if radius_input_field_value != "" and isInt(
                 radius_input_field_value):
             return int(radius_input_field_value)
     return ZillowUtil.DEFAULT_RADIUS_VALUE
 def is_price_history_filled(tbody):
     if SeleniumUtil.is_element_exists_by_tag_name(tbody, "tr"):
         if len(tbody.find_elements_by_tag_name("tr")) == 1:
             tr_element = tbody.find_element_by_tag_name("tr")
             if len(tr_element.find_elements_by_tag_name("td")) == 1:
                 return False
         return True
     return False
    def handle_full_data_page(driver, full_data_page_url):
        try:
            driver.get(full_data_page_url)
            sleep(ZillowUtil.POP_UP_APPEARING_DELAY_IN_SEC)
            SeleniumUtil.scroll_page_down(driver)

            # if SeleniumUtil.is_element_exists_by_id(driver,"search-detail-lightbox_content")

            if ZillowUtil.is_content_pop_up_displayed(driver):
                print("Test: Full Page Popup")
            else:
                ZillowUtil.save_home_data_to_file(
                    ZillowUtil.parse_home_data(driver))

        except NoSuchElementException as e:
            print("Can't parse url ", full_data_page_url)
            print(e, "\n", format_exception())
 def is_content_pop_up_displayed(driver):
     if SeleniumUtil.is_element_exists_by_id(driver,
                                             "search-detail-lightbox"):
         search_detail_lightbox = driver.find_element_by_id(
             "search-detail-lightbox")
         classes = search_detail_lightbox.get_attribute("class")
         if "yui3-lightbox-focused" in classes:
             return True
     return False
 def get_amount_for_not_sold_status(driver):
     if SeleniumUtil.is_element_exists_by_class_name(
             driver, ZillowUtil.FOR_SALE_STATUS_CLASS_NAME):
         status_container = driver.find_element_by_class_name(
             ZillowUtil.FOR_SALE_STATUS_CLASS_NAME)
         amount_element = status_container.find_element_by_xpath(
             ".//following-sibling::div")
         return remove_dollar_sign(amount_element.text)
     else:
         return 0
    def parse_price_history(driver):
        if SeleniumUtil.is_element_exists_by_id(driver,
                                                ZillowUtil.PRICE_HISTORY_ID):
            price_history_container = driver.find_element_by_id(
                ZillowUtil.PRICE_HISTORY_ID)
            tbody = SeleniumUtil.get_element_after_loading(
                price_history_container, "tbody",
                SeleniumUtil.WAIT_ELEMENT_TAG_NAME_TYPE)

            if ZillowUtil.is_price_history_filled(tbody):
                ZillowUtil.maximize_price_history_display(
                    price_history_container, tbody)
                rows = tbody.find_elements_by_tag_name("tr")
                price_history_data = []
                event_id = ZillowUtil.START_EVENT_ID
                for row in rows:
                    row_cells = row.find_elements_by_tag_name("td")
                    price_history_data.append({
                        "id":
                        event_id,
                        "event_date":
                        row_cells[0].text,
                        "event":
                        row_cells[1].text,
                        "price":
                        ZillowUtil.get_history_price_from_cell(row_cells[2]),
                        "price_change":
                        ZillowUtil.get_history_price_change_from_cell(
                            row_cells[2]),
                        "agents":
                        ZillowUtil.get_agents_name(price_history_container,
                                                   row_cells),
                        "rental":
                        ZillowUtil.check_home_rental_for_event(row_cells[2]),
                        "listing_removed":
                        ZillowUtil.check_listing_removed(
                            event_id, row_cells[2], row_cells[1].text)
                    })
                    event_id += 1
                return price_history_data
        return None
 def get_full_page_url(driver):
     SeleniumUtil.get_element_after_loading(
         driver, "zsg-subfooter-content",
         SeleniumUtil.WAIT_ELEMENT_CLASS_TYPE)
     if SeleniumUtil.is_element_exists_by_xpath(
             driver, ZillowUtil.EXPAND_BUTTON_LINK_XPATH):
         expand_button_link = driver.find_element_by_xpath(
             ZillowUtil.EXPAND_BUTTON_LINK_XPATH)
         expand_button_link_href = expand_button_link.get_attribute("href")
         return expand_button_link_href
     else:
         expand_button_link_container = SeleniumUtil.get_element_after_loading(
             driver, "hdp-popout-menu",
             SeleniumUtil.WAIT_ELEMENT_CLASS_TYPE)
         expand_button_link = expand_button_link_container.find_element_by_tag_name(
             "a")
         if ZillowUtil.MAIN_PAGE_URL in expand_button_link.get_attribute(
                 "href"):
             return expand_button_link.get_attribute("href")
         return ZillowUtil.MAIN_PAGE_URL + expand_button_link.get_attribute(
             "href")
 def parse_home_facts(driver):
     facts = {}
     if SeleniumUtil.is_element_exists_by_class_name(
             driver, ZillowUtil.FACTS_CONTAINER_CLASS_NAME):
         facts_container = driver.find_element_by_class_name(
             ZillowUtil.FACTS_CONTAINER_CLASS_NAME)
         top_facts_container = facts_container.find_element_by_class_name(
             "zsg-g_gutterless")
         top_facts = top_facts_container.find_elements_by_xpath("./div")
         for fact in top_facts:
             for icon_class in ZillowUtil.FACT_ICONS_CLASSES:
                 if SeleniumUtil.is_element_exists_by_class_name(
                         fact, icon_class):
                     fact_name = fact.find_element_by_class_name(
                         "hdp-fact-ataglance-heading").text.replace(
                             " ", "_")
                     value = fact.find_element_by_class_name(
                         "hdp-fact-ataglance-value").text
                     if value.lower(
                     ) == ZillowUtil.FACTS_NO_DATA_VALUE.lower():
                         value = None
                     facts[fact_name.lower()] = value
     return facts
    def parse_home_data(driver):

        SeleniumUtil.get_element_after_loading(
            driver, "zsg-subfooter-content",
            SeleniumUtil.WAIT_ELEMENT_CLASS_TYPE)
        print("TEST: getting city")
        city = ZillowUtil.get_city_name(driver)
        address = ZillowUtil.get_street_address(driver)
        status_data = ZillowUtil.get_status_data(driver)

        if status_data.get("amount") is not None:
            amount = status_data.get("amount")
        else:
            amount = ZillowUtil.get_amount_for_not_sold_status(driver)

        home_parameters = ZillowUtil.get_home_parameters(driver)
        home_facts = ZillowUtil.parse_home_facts(driver)

        return {
            "unique_id": ZillowUtil.get_unique_id(city, address),
            "full_address": ZillowUtil.get_full_address(driver),
            "sale_status": status_data.get("status"),
            "amount": amount,
            "zestimate": ZillowUtil.get_zestimate_value(driver),
            "beds": home_parameters.get("beds"),
            "baths": home_parameters.get("baths"),
            "footage": home_parameters.get("footage"),
            "type": home_facts.get("type"),
            "year_built": home_facts.get("year_built"),
            "heating": home_facts.get("heating"),
            "cooling": home_facts.get("cooling"),
            "parking": home_facts.get("parking"),
            "mls": home_facts.get("mls#"),
            "url": ZillowUtil.get_url(driver),
            "price_history": ZillowUtil.parse_price_history(driver)
        }
Beispiel #15
0
    def test_login_pc(self):
        selenium = None
        try:
            selenium = SeleniumUtil()

            # - ドライバーの初期化
            selenium.build_pc_driver()

            page = LoginPagePC(selenium)
            page.open()
            print(selenium.driver().current_url)

        finally:
            if selenium:
                selenium.dispose_driver()
Beispiel #16
0
def handler(event, context):
  #   return 'Hello from AWS Lambda using Python' + sys.version + '!'
  selenium = None
  try:
    selenium = SeleniumUtil()

    # - ドライバーの初期化
    selenium.build_pc_driver()

    page = LoginPagePC(selenium)
    page.open()
    # print(selenium.driver().current_url)
    return selenium.driver().current_url

  finally:
    if selenium:
      selenium.dispose_driver()
 def is_control_button_displayed(driver):
     return SeleniumUtil.is_element_exists_by_id(driver,
                                                 "start_parsing_button")
 def get_city_name(driver):
     city_container = SeleniumUtil.get_element_after_loading(
         driver, ZillowUtil.REGION_CITY_CONTAINER_ID)
     return city_container.find_element_by_tag_name('a').text
 def is_price_history_minimized(price_history_container, tbody):
     table_element = price_history_container.find_element_by_tag_name(
         "table")
     return "yui3-toggle-content-minimized" in table_element.get_attribute("class") and \
            SeleniumUtil.is_element_exists_by_class_name(tbody, "minimize")
 def get_map_points_layer(driver):
     map_container = SeleniumUtil.get_element_after_loading(
         driver, ZillowUtil.SEARCH_MAP_CONTAINER_ID)
     return SeleniumUtil.get_element_after_loading(
         map_container, ZillowUtil.POINTS_LAYER_CLASS_NAME,
         SeleniumUtil.WAIT_ELEMENT_CLASS_TYPE)
    def handle_selected_map_area(driver, current_x_position,
                                 current_y_position):
        map_area_urls = []
        start_url = driver.current_url
        full_page_window = SeleniumUtil.get_driver()

        points_layer = ZillowUtil.get_map_points_layer(driver)
        width = get_digits_from_string(
            points_layer.value_of_css_property("width"))
        height = get_digits_from_string(
            points_layer.value_of_css_property("height"))

        if current_y_position == 0:
            current_y_position = height - ZillowUtil.STEP_SIZE_IN_PX

        while current_y_position > 0:

            if current_x_position == 0 or current_x_position < 0:
                current_x_position = width - ZillowUtil.STEP_SIZE_IN_PX

            try:
                while current_x_position > 0:
                    #print ("While")
                    starturlList = list(start_url)
                    intersect = len([
                        i
                        for i, j in zip(starturlList, list(driver.current_url))
                        if i == j
                    ])
                    if intersect / len(starturlList) < .85:
                        #if str(start_url) != str(driver.current_url):
                        driver.get(start_url)
                        points_layer = ZillowUtil.get_map_points_layer(driver)

                    SeleniumUtil.click_on_position(driver, points_layer,
                                                   current_x_position,
                                                   current_y_position)
                    print("TEST: Clicked")
                    sleep(ZillowUtil.POP_UP_APPEARING_DELAY_IN_SEC)

                    if start_url != driver.current_url and ZillowUtil.is_content_pop_up_displayed(
                            driver):
                        #print ("Opened")

                        # if we have not scraped this property yet
                        if driver.current_url not in map_area_urls:
                            home_full_page_url = ZillowUtil.get_full_page_url(
                                driver)

                            #get data
                            ZillowUtil.handle_full_data_page(
                                full_page_window, home_full_page_url)
                            map_area_urls.append(driver.current_url)
                        try:
                            closeElement = driver.find_element_by_class_name(
                                ZillowUtil.CLOSE_POP_UP_BUTTON_CLICK)

                        except:
                            print("Test: closeElement Except")
                            driver.get(start_url)
                            points_layer = ZillowUtil.get_map_points_layer(
                                driver)

                        if closeElement:
                            try:
                                closeElement.click()
                            except:
                                print("Test: no closeElement.click()")

                                driver.get(start_url)
                                points_layer = ZillowUtil.get_map_points_layer(
                                    driver)
                            #print ("Closed")
                        else:
                            driver.get(start_url)
                            points_layer = ZillowUtil.get_map_points_layer(
                                driver)
                            print("Refreshed")
                    else:
                        print("x:", current_x_position, "y: ",
                              current_y_position, "doesn't have homes")

                    current_x_position -= ZillowUtil.STEP_SIZE_IN_PX

                    print("Increment x:", current_x_position, "y: ",
                          current_y_position)

                current_y_position -= ZillowUtil.STEP_SIZE_IN_PX

            except StaleElementReferenceException:
                print("The page was realoded. Updating link to points layer")
                #ZillowUtil.init_progress_bar(driver)
                #refresh URL
                driver.get(start_url)
                points_layer = ZillowUtil.get_map_points_layer(driver)
        SeleniumUtil.close_driver(full_page_window)
 def get_agents_name(driver, row_cells):
     if SeleniumUtil.is_element_exists_by_class_name(driver, "ph-agents"):
         return row_cells[3].text
     else:
         return ""
 def is_parsing_start_button_clicked(driver):
     return SeleniumUtil.is_element_exists_by_class_name(
         driver, "start_parsing_launched")