Ejemplo n.º 1
0
def deal_with_feature(features: dict, x_request_id: str,
                      residence_id: int) -> dict:
    """
    Function responsible for deal
        with each feature and send
        to insert in database.

    Parameters:
        features: dict
        x_request_id: str
        residence_id: int

    Returns:
        dict
    """
    try:
        for feature in features.keys():
            create_residence_features(
                x_request_id=x_request_id,
                residence_id=residence_id,
                residence_feature_key=feature,
                residence_feature_value=features[feature],
            )
    except (
            AttributeError,
            IndexError,
            NotImplementedError,
            SyntaxError,
    ) as exception:
        error_handler(
            x_request_id=x_request_id,
            exception=exception,
            _msg="Exception occurred in deal with feature.",
        )
def get_furniture_flag(x_request_id: str, driver) -> bool:
    """
    Function responsible for get flag that represent if the resident
        already have furniture.

    Parameters:
            x_request_id: unique id
            driver: google chrome instance
    Returns:
        int
    """
    send_log(x_request_id=x_request_id,
             message="Searching for a furniture flag...")
    sleep(number=2)
    try:
        flag_furniture_data = driver.find_element_by_xpath(
            "/html/body/div[1]/div/main/section/div/div[1]/div/div[3]/div/div[7]/div/div/span"
        )
        if flag_furniture_data:
            send_log(
                x_request_id=x_request_id,
                message="Found information about furniture in the residence...",
            )
            flag_furniture = flag_furniture_data.text
            flag_furniture = flag_furniture.lower()
            return bool("sem" not in flag_furniture)
    except (AttributeError, NoSuchElementException) as exception:
        error_handler(x_request_id=x_request_id, exception=exception)
def get_link_of_resident_block(x_request_id, div_number_row: int,
                               div_number_column: int, driver) -> classmethod:
    """
    Function responsible for get link of one of blocks in QuintoAndar
        homepage.

        Parameters:
            x_request_id: UniqueId
            div_number_row: Number of the block in row in the page
            div_number_column: Number of the block in column in page
            driver: Google Chrome instance

        uses: wemake-services/[email protected]
        continue-on-error: true
        with:
        Returns
            Link <str>
    """
    send_log(
        x_request_id=x_request_id,
        message=f"Getting link of a respective residence base on row "
        f"{div_number_row} and column {div_number_column}...",
    )
    try:
        link = driver.find_element_by_xpath(
            "/html/body/div[1]/main"
            "/section[2]/div[2]/div"
            f"/div[1]/div[{div_number_row}]/div[{div_number_column}]/div/a")
        return link if link else None
    except (AttributeError, NoSuchElementException) as exception:
        error_handler(
            x_request_id=x_request_id,
            _msg="Exception occurred get_link_of_resident_block",
            exception=exception,
        )
def pet_flag(x_request_id: str, driver) -> bool:
    """
        Function responsible for flag if the residence can have pet or not.

        Parameters:
                x_request_id: unique id
                driver: google chrome instance
    Returns:
        Bool <True or False>
    """
    send_log(x_request_id=x_request_id, message="Searching for pet flag...")
    sleep(number=2)
    try:
        pet_flag_data = driver.find_element_by_xpath(
            "/html/body/div[1]/div/main/section/div/div[1]/div/div[3]/div/div[6]/div/div/span"
        )
        if pet_flag_data:
            send_log(
                x_request_id=x_request_id,
                message="Found information about pet flag...",
            )

            pet_flag_text = pet_flag_data.text

            send_log(
                x_request_id=x_request_id,
                message=f"Pet flag informatio is {pet_flag_text}",
            )

            return not bool("Não" in pet_flag_text or "Nao" in pet_flag_text)
    except (AttributeError, NoSuchElementException) as exception:
        error_handler(x_request_id=x_request_id, exception=exception)
def scroll_quinto_andar_page(x_request_id: str, div_number_row: int,
                             driver) -> None:
    """
    Function responsible for make scroll in quinto andar page
        base on below divs.

    Parameters:
            x_request_id: Unique Id.
            div_number_row: Number of the block in row in the page
            driver: google chrome instance

    Returns:
            None
    """
    try:
        element = driver.find_element_by_xpath(
            f"/html/body/div[1]/main/section[2]/div[2]/div/div[1]/div[{div_number_row+2}]"
        )
        sleep(number=3)
        if element:
            actions = ActionChains(driver)
            actions.move_to_element(element)
            actions.perform()
    except (ElementClickInterceptedException, AttributeError) as exception:
        error_handler(x_request_id=x_request_id, exception=exception)
Ejemplo n.º 6
0
def get_metro_flag(x_request_id: str, driver) -> bool:
    """
        Function responsible for identify if has metro close to the residence.

        Parameters:
                x_request_id: unique id
                driver: google chrome instance
    Returns:
        Bool <True or False>
    """
    send_log(x_request_id=x_request_id, message="Searching for subway flag...")
    sleep(number=2)
    try:
        metro_flag_data = driver.find_element_by_xpath(
            "/html/body/div[1]/div/main/section/div/div[1]/div/div[3]/div/div[8]/div/div/span"
        )
        if metro_flag_data:
            send_log(
                x_request_id=x_request_id,
                message="Found information about subway...",
            )

            metro_flag_text = metro_flag_data.text

            return bool(metro_flag_text.find("Não"))
    except (AttributeError, NoSuchElementException) as exception:
        error_handler(x_request_id=x_request_id, exception=exception)
Ejemplo n.º 7
0
def receive_messages(queue: any, max_number: int, wait_time: int):
    """
    Receive a batch of messages in a single request from an SQS queue.
    Parameters:
            queue: any The queue from which to receive messages
            max_number: any The maximum number of messages to receive.
                The actual number of messages received might be less.
            wait_time: any The maximum time to wait (in seconds)
                    before returning. When this number is greater
                    than zero, long polling is used.
                This can result in
                    reduced costs and fewer false empty responses.
    Returns:
            The list of Message objects received. These each contain the body
                of the message and metadata and custom attributes.
    """
    try:
        messages = queue.receive_messages(
            MessageAttributeNames=["All"],
            MaxNumberOfMessages=max_number,
            WaitTimeSeconds=wait_time,
        )
        return messages
    except ClientError as exception:
        error_handler(exception=exception)
Ejemplo n.º 8
0
def get_type_residence(x_request_id: str, driver) -> str:
    """
        Function responsible for return type of residence.

        Parameters:
                x_request_id: unique id
                driver: google chrome instance
    Returns:
        str
    """
    send_log(
        x_request_id=x_request_id,
        message="Searching for the type of residence...",
    )
    sleep(number=2)
    try:
        type_residence_data = driver.find_element_by_xpath(
            "/html/body/div[1]/div/main/section/div/div[1]/div/div[2]/div[1]/h1"
        )
        if type_residence_data:
            send_log(
                x_request_id=x_request_id,
                message="Found the type of residence...",
            )

            type_residence = type_residence_data.text
            type_residence = type_residence.lower()
            if "casa" in type_residence:
                return "house"
            return "apartment"
    except (AttributeError, NoSuchElementException) as exception:
        error_handler(x_request_id=x_request_id, exception=exception)
Ejemplo n.º 9
0
def dealing_with_empty_queue(queue: any) -> None:
    """
    Function responsible for deal when
        SQS Queue do not have any messages.
        When these happens is going to send the
        default event.

    Parameters:
        queue: AWS SQS Queue
    Returns:
        None
    """
    try:
        data = {
            "x-request-id": "",
            "events": ["quintoAndarScraper"],
            "data": {
                "consumer_name": "default",
                "type_scraper": "quinto-andar",
            },
        }
        send_message(
            x_request_id="",
            queue=queue,
            message_body=json.dumps(data),
            message_attributes={},
        )
    except (AttributeError, ClientError) as exception:
        error_handler(
            _msg="Exception occurred in dealing_with_empty_queue",
            exception=exception,
        )
Ejemplo n.º 10
0
def scraper_flow(x_request_id: str, driver: any):
    """
        Function responsible for deal with flow logic of QuintoAndar scraper.

    Parameters:
        x_request_id: Unique id.
        driver: Google Chrome instance

    Returns:
        void
    """
    try:
        timeout_start = time.time()
        send_log(
            x_request_id=x_request_id,
            message=f"Initiating the flow of scraper. Time: {timeout_start}",
        )
        recursive_scraper_logic(
            x_request_id=x_request_id,
            div_number_row=quinto_andar["div_number_row_initiator"],
            div_number_column=quinto_andar["div_number_column_initiator"],
            limit_scraper=quinto_andar["limit_scraper"],
            timeout_start=timeout_start,
            driver=driver,
        )
        sleep(10)
        send_log(x_request_id=x_request_id,
                 message="Finished the flow of scraper.")
    except (WebDriverException, ElementNotInteractableException) as exception:
        error_handler(
            x_request_id=x_request_id,
            _msg="Exception occurred on scraper_flow",
            exception=exception,
        )
Ejemplo n.º 11
0
def number_of_rooms(x_request_id: str, driver) -> int:
    """
        Function responsible for return number of rooms.

        Parameters:
                x_request_id: unique id
                driver: google chrome instance
    Returns:
        Bool <True or False>
    """
    send_log(x_request_id=x_request_id,
             message="Searching for number of rooms...")
    sleep(number=2)
    try:
        number_rooms_data = driver.find_element_by_xpath(
            "/html/body/div[1]/div/main/section/div/div[1]/div/div[3]/div/div[2]/div/div"
        )

        if number_rooms_data:
            send_log(
                x_request_id=x_request_id,
                message="Found information about number of rooms...",
            )

            number_rooms = number_rooms_data.text

            # Start verification if has digit
            # then going to return. If do not have then return 0.
            return (int(re.findall(r"\d+", number_rooms)[0])
                    if verification_string_has_digit(x_request_id=x_request_id,
                                                     text=number_rooms) else 0)
    except (AttributeError, NoSuchElementException) as exception:
        error_handler(x_request_id=x_request_id, exception=exception)
Ejemplo n.º 12
0
def get_residence_id(x_request_id: str, driver: any) -> int:
    """
    Function responsible for return id of residence.

    Parameters:
            x_request_id: unique id
            driver: google chrome instance
    Returns:
        int
    """
    send_log(x_request_id=x_request_id,
             message="Searching for the residence id...")
    sleep(number=2)
    try:
        residence_id = driver.find_element_by_xpath(
            "/html/body/div[1]/div/main/section/div/div[1]/nav/ol/li[5]/a")

        if residence_id:
            send_log(
                x_request_id=x_request_id,
                message="Found id of residence...",
            )

            residence_id_text = residence_id.text

            # Start verification if has digit
            # then going to return. If do not have then return 0.
            return (int(re.findall(r"\d+", residence_id_text)[0])
                    if verification_string_has_digit(x_request_id=x_request_id,
                                                     text=residence_id_text)
                    else 0)
    except (AttributeError, NoSuchElementException) as exception:
        error_handler(x_request_id=x_request_id, exception=exception)
Ejemplo n.º 13
0
def residence_size(x_request_id: str, driver) -> int:
    """
        Function responsible for return the size of the residence.

        Parameters:
                x_request_id: unique id
                driver: google chrome instance
    Returns:
        int: size of residence
    """
    send_log(
        x_request_id=x_request_id,
        message="Searching for the number of bedrooms...",
    )
    sleep(number=2)
    try:
        size_residence_data = driver.find_element_by_xpath(
            "/html/body/div[1]/div/main/section/div/div[1]/div/div[3]/div/div[1]/div/div/span"
        )
        if size_residence_data:
            send_log(
                x_request_id=x_request_id,
                message="Found information about bedrooms...",
            )

            size_residence = size_residence_data.text
            # Start verification if has digit
            # then going to return. If do not have then return 0.
            return (int(re.findall(r"\d+", size_residence)[0]) if
                    verification_string_has_digit(x_request_id=x_request_id,
                                                  text=size_residence) else 0)
    except (AttributeError, NoSuchElementException) as exception:
        error_handler(x_request_id=x_request_id, exception=exception)
def resident_localization_data(x_request_id: str, driver) -> list:
    """
        Function responsible for get all information
            about localization of specific residence.

        Parameters:
                x_request_id: unique id
                driver: google chrome instance
    Returns:
        Object { street_name<String>, district_name<String>, state_name<String> }
    """
    send_log(
        x_request_id=x_request_id,
        message="Searching for address of residence...",
    )
    sleep(number=7)
    try:
        localization_data = driver.find_element_by_xpath(
            "/html/body/div[1]/div/main/section/div/div[1]/div/div[2]/div[2]/p"
        )

        if localization_data:
            send_log(
                x_request_id=x_request_id,
                message="Found information about address...",
            )

            localization_data = localization_data.text
            return localization_data.split(",") if localization_data else None
    except (AttributeError, NoSuchElementException) as exception:
        error_handler(x_request_id=x_request_id, exception=exception)
Ejemplo n.º 15
0
def get_rent_values(x_request_id: str, driver) -> dict:
    """
        Function responsible for get all values about the rent of the residence.

        Parameters:
                x_request_id: unique id
                driver: google chrome instance
    Returns:
        dict
    """
    send_log(
        x_request_id=x_request_id,
        message="Searching for a values of the rent...",
    )
    try:
        sleep(number=2)
        rent_values_data = driver.find_element_by_xpath(
            "/html/body/div[1]/div/main/section/div/div[2]/section/div/ul")

        if rent_values_data:
            send_log(x_request_id=x_request_id, message="Found the values...")

            rent_values_dict = {
                "rent_without_taxes": int,
                "condominium_tax": int,
                "house_tax": int,
                "fire_insurance": int,
                "service_tax": int,
                "total_rent_value": int,
            }

            rent_values = rent_values_data.text
            if rent_values:
                rent_values = rent_values.replace("Incluso", "0")
                rent_values = re.findall(r"(?<![.,])\d+[,.]{0,1}\d*",
                                         rent_values)

                # Going to get values in case of find 6 numbers in array.
                if len(rent_values) == 6:
                    rent_values_dict["rent_without_taxes"] = rent_values[0]
                    rent_values_dict["condominium_tax"] = rent_values[1]
                    rent_values_dict["house_tax"] = rent_values[2]
                    rent_values_dict["fire_insurance"] = rent_values[3]
                    rent_values_dict["service_tax"] = rent_values[4]
                    rent_values_dict["total_rent_value"] = rent_values[5]

                return rent_values_dict
    except (AttributeError, NoSuchElementException) as exception:
        error_handler(x_request_id=x_request_id, exception=exception)
Ejemplo n.º 16
0
def create(x_request_id: str, data: dict, table_name: str) -> dict:
    """
    Function responsible for create data
        scraped.

    Parameters:
        x_request_id: str
        data: dict
        table_name: str

    Returns:
        int
    """
    body = {"data": data, "tableName": table_name}
    # path = config.get("wmh_backoffice_endpoint", None)
    token = config.get("wmh_backoffice_token", None)
    try:
        response = api_integration(
            x_request_id=x_request_id,
            url="https://wmhbackoffice-prod.onrender.com/v1/wmh/update-data",
            token=token,
            body=body,
        )
        response = dict(response)
        return response["data"]
    except (AssertionError, AttributeError, IndexError, KeyError) as exception:
        return error_handler(
            x_request_id=x_request_id,
            _msg="Exception occurred in create service.",
            exception=exception,
        )
Ejemplo n.º 17
0
def scraper_initiator(x_request_id: str, properties: str, driver: any) -> None:
    """
        Function responsible for initiate scraper.

    Parameters:
        x_request_id: unique id
        driver: google chrome instance
        properties: type of scraper that going to initiate

    Returns:
        None
    """
    try:
        if properties == "quinto-andar":
            homepage(x_request_id=x_request_id, driver=driver)
    except AttributeError as exception:
        error_handler(x_request_id=x_request_id, exception=exception)
Ejemplo n.º 18
0
def send_message(
    x_request_id: str,
    queue,
    message_body,
    message_attributes=None,
    thread_number: int = 0,
) -> None:
    """
    Send a message to an Amazon SQS queue.

    Parameters:
        x_request_id: unique id
        queue: The queue to receive the messages.
        message_body: The messages to send to the queue.
            These are simplified to contain only the message body and attributes.
        message_attributes: any
        thread_number: int
            represent the number of thread of queue.
                these is important to make QUEUE work in thread

    Returns:
    The response from SQS that contains the assigned message ID.
    """
    if not message_attributes:
        message_attributes = {}
    try:
        queue.send_message(
            MessageBody=message_body,
            MessageAttributes=message_attributes,
            MessageDeduplicationId=f"wmh_scraper_{random_number(10000)}",
            MessageGroupId=f"wmh_scraper_{thread_number}",
        )

        message_body = json.loads(message_body)

        send_log(
            message=f"Sending the follow msg to SQS QUEUE {message_body}",
            x_request_id=x_request_id,
        )
    except (ClientError, TypeError) as exception:
        error_handler(
            x_request_id=x_request_id,
            _msg=f"Send message failed: {message_body}",
            exception=exception,
        )
Ejemplo n.º 19
0
def delete_message(x_request_id: str, message) -> None:
    """
    Delete an message from a queue.

    Parameters:
        x_request_id: Unique id str
        message: The message to delete. The message's queue URL is
            contained in the message's metadata.

    Returns:
        None
    """
    try:
        message.delete()
        send_log(
            x_request_id=x_request_id,
            message="Message have been deleted with success.",
        )
    except (ClientError, AttributeError) as exception:
        error_handler(x_request_id=x_request_id, exception=exception)
def creation_residence_data(
    x_request_id: str, residence_data: QuintoAndarSchema
) -> None:
    """
    Function responsible for
        create all data from residence.

    Parameters:
        x_request_id: str
        residence_data: QuintoAndarSchema

    Returns:
        None

    Notes:
        Function deal_with_feature is responsible for
            receive each different features and create
            respectively to each residence.
    """
    try:
        send_log(
            message=f"Going to create the follow data {residence_data}",
            x_request_id=x_request_id,
        )

        residence_address_id = create_residence_address(
            x_request_id=x_request_id, residence_data=residence_data
        )
        residence_id = create_residence(
            x_request_id=x_request_id,
            residence_address_id=residence_address_id,
            residence_data=residence_data,
        )
        create_residence_values(
            x_request_id=x_request_id,
            residence_id=residence_id,
            residence_data=residence_data,
        )
        features = {
            "petFlag": residence_data.pet_flag,
            "metroFlag": residence_data.metro_flag,
            "furnitureFlag": residence_data.furniture_flag,
        }

        deal_with_feature(
            features=features,
            x_request_id=x_request_id,
            residence_id=residence_id,
        )
    except (SyntaxError, AttributeError, AssertionError) as exception:
        return error_handler(
            exception=exception,
            _msg="Exception occurred in create_residence_flow",
        )
def consumer_message_handler(x_request_id: str, message: any,
                             driver: any) -> None:
    """
    Function responsible for handler with SQS Messages

    Parameters:
        x_request_id: id unique
        message: sqs message instance
        driver: google chrome instance

    Returns:
        None
    """
    try:
        data = validate_message_data(x_request_id=x_request_id,
                                     message=message)
        executor(
            x_request_id=x_request_id,
            consumer=data.get("data").get("consumer_name"),
            properties=data.get("data").get("type_scraper"),
            driver=driver,
        )
    except AttributeError as exception:
        error_handler(x_request_id=x_request_id, exception=exception)
Ejemplo n.º 22
0
def main(driver: any, queue: any) -> None:
    """
    Consumer responsible for receive messages from SQS Queue

    Parameters:
        driver: any
        queue: any

    Returns:
        None
    """
    try:
        while True:
            messages = receive_messages(queue=queue, max_number=1, wait_time=0)
            if len(messages) == 0:
                send_log(
                    x_request_id="",
                    message="QUEUE with 0 messages, going to send default event in 30 minutes...",
                )
                sleep(number=1800)
                dealing_with_empty_queue(queue=queue)
            else:
                for message in messages:
                    x_request_id = request_handler(message=message.body)
                    send_log(
                        x_request_id=x_request_id,
                        message="Receive message going to start scraper flow...",
                    )
                    consumer_message_handler(
                        message=message.body,
                        x_request_id=x_request_id,
                        driver=driver,
                    )
                    delete_message(x_request_id=x_request_id, message=message)
    except AttributeError as exception:
        error_handler(exception=exception)
Ejemplo n.º 23
0
def create_residence_values(
    x_request_id: str, residence_id: int, residence_data: QuintoAndarSchema
) -> None:
    """
    Function responsible for create residence
        values.

    Parameters:
        x_request_id: str
        residence_id: int
        residence_data: QuintoAndarSchema

    Returns:
        int
    """
    try:
        table_name = TableNameSchema()
        data = {
            "ResidenceId": residence_id,
            "price": float(residence_data.rent_price_without_tax),
            "condominiumTax": float(residence_data.condominium_tax),
            "houseTax": float(residence_data.house_tax),
            "fireInsurence": float(residence_data.fire_insurance),
            "serviceTax": float(residence_data.service_tax),
            "totalRentPrice": float(residence_data.total_rent_price),
        }

        residence_values = create(
            x_request_id=x_request_id,
            data=data,
            table_name=table_name.residence_values,
        )
        send_log(
            x_request_id=x_request_id,
            message=f"Inserted in database the follow residence values {residence_values}...",
        )
    except (
        TimeoutError,
        SyntaxError,
        IndexError,
        AttributeError,
    ) as exception:
        return error_handler(
            x_request_id=x_request_id,
            exception=exception,
            _msg="Exception occurred in create_residence_value",
        )
Ejemplo n.º 24
0
def api_integration(x_request_id: str, url: str, token: str,
                    body: dict) -> json:
    """
    Function responsible for send request
        based on path and data body.

    Parameters:
        x_request_id: str
        url: str
        token: str
        body: dict

    """
    headers = {"Content-Type": "application/json", "Authorization": token}
    try:
        send_log(
            x_request_id=x_request_id,
            message=
            f"Sending request to follow path: {url} with follow data: {body}",
        )
        data = requests.post(url=url,
                             data=json.dumps(body),
                             headers=headers,
                             timeout=25)
        send_log(
            x_request_id=x_request_id,
            message=f"Request finish with status: {data.status_code}",
        )
        return data
    except (
            requests.exceptions.Timeout,
            requests.exceptions.ReadTimeout,
    ) as exception:
        return error_handler(
            x_request_id=x_request_id,
            _msg="Exception occurred in api service.",
            exception=exception,
        )