def _get_order_info(
            self, order_info_element: WebElement
    ) -> Tuple[str, float, datetime.date]:
        """
        :param order_info_element:
        :returns: the OrderID, price and date
        """
        order_info_list: List[str] = [
            info_field.text for info_field in
            order_info_element.find_elements_by_class_name('value')
        ]

        # value tags have only generic class names so a constant order in form of:
        # [date, price, recipient_address, order_id] or if no recipient_address is available
        # [date, recipient_address, order_id]
        # is assumed
        if len(order_info_list) < 4:
            order_id = order_info_list[2]
        else:
            order_id = order_info_list[3]

        # price is usually formatted as 'EUR x,xx' but special cases as 'Audible Guthaben' are possible as well
        order_price_str = order_info_list[1]
        if order_price_str.find('EUR') != -1:
            order_price = self._price_str_to_float(order_price_str)
        else:
            order_price = 0

        date_str = order_info_list[0]
        date = ut.str_to_date(date_str)
        return order_id, order_price, date
Example #2
0
 def iterate_react_select_options(self,
                                  select: WebElement) -> t.List[WebElement]:
     """Given a .Select rendered by react-select, return an iterable
     of .Select-option that can be .click()-ed to select them.
     """
     # related: https://github.com/JedWatson/react-select/issues/603#issuecomment-157888562 # noqa
     select.find_element_by_class_name('Select-arrow').click()
     self.wait(EC.element_to_be_clickable((By.CLASS_NAME, 'Select-option')))
     return select.find_elements_by_class_name('Select-option')
Example #3
0
    def is_trash_story(cls, story_container: WebElement) -> bool:
        """False for adv and other not normal containers"""
        try:
            if story_container.find_elements_by_class_name(
                    "story__placeholder"):
                return True
        except StaleElementReferenceException:
            return True

        return False
Example #4
0
    def get_link_from_item(friend_item: WebElement) -> str:
        user_link = friend_item.find_elements_by_class_name("fsl")[0] \
                               .find_element_by_tag_name("a") \
                               .get_attribute("href")

        if user_link.find("profile.php") == -1:
            return user_link.split('?')[0]
        else:
            user_id = re.findall(r'(?<=id=)\d+', user_link)[0]

            return f"https://www.facebook.com/profile.php?id={user_id}"
    def _get_comment_issue_diff(self, mr_comment_shadow: WebElement) -> Optional[str]:
        issue_diff_elem_list = mr_comment_shadow.find_elements_by_class_name('issue-diff')
        if len(issue_diff_elem_list) == 0: #there's no issue diff
            issue_diff = None
        elif len(issue_diff_elem_list) == 1: #there's an issue diff
            issue_diff_elem = issue_diff_elem_list[0]
            issue_diff = issue_diff_elem.text
        else:
            raise ScrapeException('More than one issue-diff found in a comment.')

        return issue_diff
 def parse_results_for_bulk_expired_domains_search(self, element: WebElement) ->[]: #return a list of DomainType
     if element is not None:
         products = element.find_elements_by_class_name(BulkCheckElements.get_element_by_class(BulkCheckElements.class_product).target)
         filtered = []
         for product in products:
             domain = product.find_element_by_class_name(BulkCheckElements.get_element_by_class(BulkCheckElements.class_domain_name).target).text
             available = GoDaddy.checkAvailability(product.find_element_by_class_name(
                 BulkCheckElements.get_element_by_class(BulkCheckElements.class_msg).target).text)
             filtered.append(DomainType(domain, available))
         return filtered
     else:
         return None
Example #7
0
    def __init__(self, element: WebElement) -> None:
        super().__init__(element)

        if not self.title.startswith("Ward"):
            raise Exception("Accordion section is not a Ward")

        self.__neighbourhood_elements = element.find_elements_by_class_name(
            "tiles__tile"
        )

        self.neighbourhoods = [
            NeighbourhoodElement(element) for element in self.__neighbourhood_elements
        ]
Example #8
0
    def parse_image_links(story_element: WebElement) -> List:
        links = []
        image_blocks = story_element.find_elements_by_class_name("story-block_type_image")
        for image_block in image_blocks:
            img = image_block.find_element_by_tag_name('img')
            link = img.get_attribute('src')
            if link is None:
                link: str = img.get_attribute('data-src')

            # TODO: check in future versions [3.11.19]
            # aiogram can't send .webp as image
            # if link.endswith('.webp'):
            #     continue
            links.append(link)

        return json.dumps(links)
 def get_elements_static(target_element: WebElement, verifier: SelecElement) ->[]:
     try:
         elements = None
         if verifier.element_type == NavEleType.IsId:
             elements = target_element.find_elements_by_id(verifier.target)
         elif verifier.element_type == NavEleType.IsClass:
             elements = target_element.find_elements_by_class_name(verifier.target)
         elif verifier.element_type == NavEleType.IsCssSelector:
             elements = target_element.find_elements_by_css_selector(verifier.target)
         elif verifier.element_type == NavEleType.IsName:
             elements = target_element.find_elements_by_name(verifier.target)
         else:
             raise ValueError("Selector not Supported")
         return elements
     except Exception as inst:
         print(type(inst))
         print(inst.args)
         return None
 def parse_results_for_bulk_expired_domains_search(
         self, element: WebElement) -> []:  #return a list of DomainType
     if element is not None:
         products = element.find_elements_by_class_name(
             BulkCheckElements.get_element_by_class(
                 BulkCheckElements.class_product).target)
         filtered = []
         for product in products:
             domain = product.find_element_by_class_name(
                 BulkCheckElements.get_element_by_class(
                     BulkCheckElements.class_domain_name).target).text
             available = GoDaddy.checkAvailability(
                 product.find_element_by_class_name(
                     BulkCheckElements.get_element_by_class(
                         BulkCheckElements.class_msg).target).text)
             filtered.append(DomainType(domain, available))
         return filtered
     else:
         return None
Example #11
0
def assert_start_tiles(we: WebElement) -> None:
    """Assert that the start page tiles are as expected.

    Args:
        we: WebElement containing the desired content to assert.
    """
    assert_element_by_classes(we, 'section', ['tile', 'start-tile'], None,
                              True, 2)
    tiles = we.find_elements_by_class_name('start-tile')

    assert_element_by_classes(tiles[0], 'h4', 'main-tile-header',
                              'What is Myaku?')
    assert_element_by_classes(tiles[0], 'span', 'key-word', True, True, 2)
    assert_element_by_classes(tiles[0], 'ol', 'myaku-ol')
    assert_element_by_tag(tiles[0], 'li', True, True, 3)

    assert_element_by_classes(tiles[1], 'h4', 'main-tile-header',
                              'Getting Started')
    assert_element_by_classes(we, 'ul', 'myaku-ul')
    assert_element_by_tag(tiles[1], 'li', True, True, 4)
    assert_element_by_tag(tiles[1], 'a', True, True, 4)
 def get_elements_static(target_element: WebElement,
                         verifier: SelecElement) -> []:
     try:
         elements = None
         if verifier.element_type == NavEleType.IsId:
             elements = target_element.find_elements_by_id(verifier.target)
         elif verifier.element_type == NavEleType.IsClass:
             elements = target_element.find_elements_by_class_name(
                 verifier.target)
         elif verifier.element_type == NavEleType.IsCssSelector:
             elements = target_element.find_elements_by_css_selector(
                 verifier.target)
         elif verifier.element_type == NavEleType.IsName:
             elements = target_element.find_elements_by_name(
                 verifier.target)
         else:
             raise ValueError("Selector not Supported")
         return elements
     except Exception as inst:
         print(type(inst))
         print(inst.args)
         return None
Example #13
0
    def get_job_info(self, job: WebElement, category: str):
        """Extract Job Info from a given element and return it in a list
        
        Args:
            job(WebElement): A webelement reference to a tag consisting of job info.
            category(str): The category the job fits into.
        
        Returns:
            info(list): A list containing the following info: company_name, title, job_type, region and category.
        """

        company_and_job_type = job.find_elements_by_class_name('company')
        company_name = company_and_job_type[0].get_attribute('textContent')
        job_type = company_and_job_type[1].get_attribute('textContent')
        title = job.find_element_by_class_name('title').get_attribute(
            'textContent')
        try:
            region = job.find_element_by_css_selector(
                'section.jobs article ul li span.region.company'
            ).get_attribute('textContent')
        except NoSuchElementException:
            region = ''
        return [company_name, title, job_type, region, category]
Example #14
0
    def parse_text(story_element: WebElement) -> str:
        text_items = []
        for item in story_element.find_elements_by_class_name("story-block_type_text"):
            text_items.append(item.text)

        return "\n".join(text_items)
Example #15
0
 def get_name_from_item(friend_item: WebElement) -> str:
     return friend_item.find_elements_by_class_name("fsl")[0].text
Example #16
0
    def parse_tags(story_element: WebElement):
        tags = story_element.find_elements_by_class_name("tags__tag")

        return {tag.text for tag in tags}
Example #17
0
    def parse_results(self,
                      search_results: WebElement,
                      driver,
                      get_b64_images=False):
        offers = []
        for cottage in search_results.find_elements_by_class_name(
                'holiday-cottage-item'):
            offer = OfferRow()
            offer.lat, offer.lon = cottage.find_element_by_class_name(
                'mapLatLong').get_attribute('textContent').strip().split(',')

            title_info = cottage.find_element_by_css_selector('h3>a')
            offer.title = title_info.text

            offer.url = title_info.get_attribute('href')
            offer.ref = offer.url.split('-ref')[-1]
            offer.slug = offer.url.split('/')[-1].split('-ref')[0]

            offer.location = cottage.find_element_by_css_selector(
                '.loc-container>a').text
            offer.description = cottage.find_element_by_css_selector(
                '.cottage-img .para').text

            regular_price = cottage.find_element_by_css_selector(
                '.price-from-sec>.price').text.replace('£', '').replace(
                    ' all year', '')
            try:
                offer.weekly_low = int(regular_price.split(' to ')[0])
            except ValueError:
                # Failed because price is text
                offer.weekly_low = 0
            try:
                offer.weekly_high = int(regular_price.split(' to ')[-1])
            except ValueError:
                # Failed because price is text
                offer.weekly_high = 0

            meta = cottage.find_element_by_class_name(
                'products-meta').text.lower().strip().split('\n')[0].split('|')
            offer.sleeps = meta[0].strip()
            offer.bedrooms = meta[1].strip()
            offer.dog = meta[2].strip() != 'no'
            offer.child = meta[3].strip() != 'no'
            offer.wifi = meta[4].strip() != 'no'

            # Late offer
            offer_details = cottage.find_element_by_css_selector(
                '.lao-strip .offer-block')
            try:
                offer.late_savings_tag = offer_details.find_element_by_class_name(
                    'tag').text
            except NoSuchElementException:
                offer.late_savings_tag = ""
            offer.late_offer = offer_details.text.replace(
                offer.late_savings_tag, '')
            offer.late_price = int(offer.late_offer.split(', £')[-1])
            offer.late_nights = int(
                offer.late_offer.split(' for ')[-1].split(' night')[0])

            # Get image url
            img = cottage.find_element_by_css_selector(f'#img-{offer.ref} img')
            base_url = urlparse(driver.current_url).netloc
            img_url = img.get_attribute('data-src')
            offer.img_url = "https://" + base_url + img_url

            if get_b64_images:
                # Image
                js = """
                let canvas = document.createElement('canvas');
                let img = document.querySelector('#img-{ref} img');
                img.scrollIntoView();
                canvas.id = 'canvas-{ref}';
                
                // wait for image to load before adding it to canvas
                img.onload = function() {{
                    // using canvas to generate a b64 dataurl without having to request the image a second time to download
                    canvas.width = img.width;
                    canvas.height = img.height;
                    document.body.appendChild(canvas);
                    let ctx = canvas.getContext('2d');
                    ctx.drawImage(img, 0, 0);
                    let data = canvas.toDataURL('image/jpeg', 1.0);
                    let output = document.createElement('div');
                    output.id = 'output-{ref}';
                    output.setAttribute('dataurl', data);
                    img.parentNode.insertBefore(output, output.nextSibling);
                }};
                // if image has already loaded before attaching the listener, manually fire it
                if (img.complete && img.naturalHeight !== 0) {{
                    let evt = document.createEvent('Event');
                    evt.initEvent('load', false, false);
                    img.dispatchEvent(evt);
                }}
                """.format(ref=offer.ref)
                driver.execute_script(js)

                # Download image as base64
                try:
                    data_elem = WebDriverWait(driver, 5).until(
                        EC.presence_of_element_located(
                            (By.ID, f'output-{offer.ref}')))
                    dataurl = data_elem.get_attribute('dataurl')
                    if dataurl == empty_b64_jpg:
                        # image wasn't loaded properly
                        dataurl = None
                except TimeoutException:
                    dataurl = None

                if dataurl:
                    response = urllib.request.urlopen(dataurl)
                    offer.image = response.read()

            offers.append(offer)
        return offers
 def _is_deleted_comment(self, div_under_comment_header: WebElement) -> bool:
     deleted_comment_notices = div_under_comment_header.find_elements_by_class_name('deleted-comment-notice')
     return len(deleted_comment_notices) > 0
 def _get_comment_author_roles(self, div_under_comment_header: WebElement) -> List[str]:
     role_label_elems: List[WebElement] = div_under_comment_header.find_elements_by_class_name('role-label')
     role_labels = list(map(lambda elem: elem.text, role_label_elems))
     return role_labels