Beispiel #1
0
 def parse_row(self, obj: Element) -> dict:
     """Parse object for item information."""
     return {
         "name":
         get_first_element(obj, self.ITEM_NAME).text,
         "media_type":
         get_first_element(obj,
                           self.MEDIA_AND_CATEGORY_TYPE).find("a")[0].text,
         "category":
         get_first_element(obj,
                           self.MEDIA_AND_CATEGORY_TYPE).find("a")[1].text,
         "seeders":
         obj.find(self.SEEDERS_AND_LEACHERS)[0].text,
         "leachers":
         obj.find(self.SEEDERS_AND_LEACHERS)[1].text,
         "magnet_link":
         get_first_element(obj, self.ITEM_MAGNET_LINK).links.pop(),
         "file_size":
         get_first_element(obj, "font.detDesc").text.split(",")[1],
         "vip_status":
         get_first_element(obj, self.UPLOADER_VIP),
         "trusted":
         get_first_element(obj, self.UPLOADER_TRUSTED),
         "href":
         get_first_element(obj, "a.detLink").links.pop(),
         "uploader":
         (obj.find("font.detDesc")[0].text.split("by")[-1].strip()),
         "comment_count": (0 if not obj.search(self.ITEM_COMMENTS) else
                           obj.search(self.ITEM_COMMENTS).fixed[0]),
     }
Beispiel #2
0
 def title_parse(element: Element) -> Optional[str]:
     title_element = element.find(".product-title", first=True)
     if title_element:
         return title_element.text
     title_element = element.find("#name", first=True)
     if title_element:
         return title_element.text
Beispiel #3
0
 def price_discount_parse(element: Element) -> Optional[float]:
     if element.find(".discount-green", first=True):
         if element.find(".discount-green", first=True):
             return float(
                 element.find(".discount-green", first=True).text[1:])
     elif element.find(".price", first=True):
         return float(element.find(".price", first=True).text[1:])
Beispiel #4
0
 def save_x_percent_in_cart_parse(element: Element) -> Optional[int]:
     if element.find("title", containing="in Cart", first=True):
         save_x_percent_in_cart_element = element.find("title",
                                                       containing="in Cart",
                                                       first=True)
         return int(
             parse_html_text_btw(save_x_percent_in_cart_element.text,
                                 "Save ", "% in Cart"))
Beispiel #5
0
 def loyalty_credit_x_percent_parse(element: Element) -> Optional[int]:
     if element.find(".slanted-container",
                     containing="Loyalty Credit",
                     first=True):
         loyalty_credit_element = element.find(".slanted-container",
                                               containing="Loyalty Credit",
                                               first=True)
         if loyalty_credit_element:
             return int(
                 loyalty_credit_element.text[:loyalty_credit_element.text.
                                             find("% Loyalty Credit")])
Beispiel #6
0
 def product_code_parse(element: Element) -> Optional[str]:
     product_code_element = element.find("li",
                                         containing="Product Code",
                                         first=True)
     if product_code_element:
         return product_code_element.text[product_code_element.text.
                                          find(": ") + 2:]
Beispiel #7
0
    def parse_flat(self, html: Element) -> None:  # noqa: CCR001
        """Get info about flat.

        Get all info about flat in given html element.

        :param html: Given element
        """
        try:
            flat_url = html.find("a", first=True).attrs.get("href")
            flat_id = int(re.search(r"flat/(\d+)", flat_url).group(1))
            location = html.xpath(".//a[@data-name='GeoLabel']/text()")
            if self.domain == "ekb":
                location = location[1:]
            city, district, *location = location
            location = " ".join(location)
            price = html.xpath(".//span[@data-mark='MainPrice']/text()",
                               first=True)
            price = int(price.replace("₽", "").strip().replace(" ", ""))
            ppm = html.xpath(".//p[@data-mark='PriceInfo']/text()", first=True)
            ppm = int(ppm.replace("₽/м²", "").strip().replace(" ", ""))
            square = round(price / ppm, 2)
            if not Flat.exists(id=flat_id):
                Flat(
                    id=flat_id,
                    city=city,
                    district=district,
                    location=location,
                    price=price,
                    ppm=ppm,
                    square=square,
                )
                commit()
        except Exception as exc:
            print(exc)
            rollback()
Beispiel #8
0
 def free_shipping_over_x_dollars_parse(element: Element) -> Optional[int]:
     free_shipping_over_element = element.find(".banner-alert", first=True)
     if free_shipping_over_element:
         if "Free Shipping\xa0for orders over" in free_shipping_over_element.text:
             return int(free_shipping_over_element.
                        text[free_shipping_over_element.text.find("\n") +
                             2:])
Beispiel #9
0
 def package_qty_parse(element: Element) -> Optional[str]:
     package_qty_element = element.find("li",
                                        containing="Package Quantity",
                                        first=True)
     if package_qty_element:
         return package_qty_element.text[package_qty_element.text.
                                         find(": ") + 2:]
    def find_element_by_tag_name(element: Element, tag_name):
        try:
            Chrome.lock.acquire()
            result = element.find(tag_name, first=True)
        finally:
            Chrome.lock.release()

        return result
Beispiel #11
0
 def expiration_date_parse(element: Element) -> Optional[datetime.datetime]:
     expiration_date_element = element.find("li",
                                            containing="Expiration Date",
                                            first=True)
     if expiration_date_element:
         expiration_date = parse_html_text_btw(expiration_date_element.text,
                                               "\n?\n", "\n")
         return datetime.datetime.strptime(expiration_date, "%B %Y")
Beispiel #12
0
def get_ride_info(ride: Element) -> dict:
    price = float(
        ride.find("span.num.currency-small-cents")[0].text.split("\xa0")[0])
    departure_time = ride.find("div.ride-times")[0].text.split()[0]
    arrival_time = ride.find("div.ride-times")[0].text.split()[1]
    seats_str = ride.find("div.seats-notice")
    source = ride.find("div.departure-station-name")[0].text
    destination = ride.find("div.arrival-station-name")[0].text

    # duration = ride.find("div.duration")

    # departure = date + departure_time
    # arrival = departure + trip_length

    if seats_str and len(seats_str) > 0:
        seats_str = ride.find("div.seats-notice")[0].text

        matcher = re.match("(\d+)\s+\w+", seats_str)
        if matcher:
            seats_available = int(matcher.groups()[0])
    else:
        seats_available = None

    return {
        "departure_datetime": departure_time,
        "arrival_datetime": arrival_time,  # "2018-06-20 15:00:00",
        "source": source,
        "destinations": destination,
        "price": price,  # in EUR - you can use https://api.skypicker.com/rates
        "type": "bus",  # optional (bus/train)
        "source_id": 26323200,  # optional (carrier’s id)
        "destination_id": 26383230,  # optional (carrier’s id)
        "free_seats": seats_available,  # optional
        "carrier": "Flixbus",  # optional
    }
Beispiel #13
0
 def shipping_weight_parse(element: Element) -> Optional[float]:
     shipping_weight_element = element.find("li",
                                            containing="Shipping Weight",
                                            first=True)
     if shipping_weight_element:
         shipping_weight, shipping_unit = parse_html_text_btw(
             shipping_weight_element.text, "\n?\n", "\n").split()
         if shipping_unit == "lbs":
             return float(shipping_weight)
Beispiel #14
0
    def _get_jobs(self, section: Element, page_no: str) -> List[Job]:
        """Returns job postings within a company section

        :param section: html content to proces to extract jobs.
        :type section: Element
        :param page_no: the section part being processed
        :type page_no: str
        :return: list of jobs
        :rtype: List[Job]
        """
        jobs: List[Job] = []

        company = section.find('span', first=True)
        content = section.find('ul.jobs._list', first=True)

        rows = [] if not content else content.find('li > a')
        for row in rows:
            title_parts = row.text.split('\n')

            text = '::'.join(title_parts)
            text_hash = hashlib.sha256(text.encode('utf-8'))

            jobs.append(
                Job(
                    **{
                        'page_no': page_no,
                        'hash': text_hash.hexdigest(),
                        'data': {
                            'company_name':
                            company.text if company else '',
                            'title':
                            ' | '.join(title_parts),
                            'href':
                            urljoin(self.url, row.attrs['href']),
                            'location':
                            None if len(title_parts) == 1 else title_parts[1],
                            'deadline':
                            None,
                        }
                    }))

        return jobs
Beispiel #15
0
def _parse_tweet(tweet: Element) -> dict:
    div = tweet.find('div.tweet', first=True)
    timestamp = tweet.find('a.tweet-timestamp > span._timestamp', first=True)

    created_at = datetime.datetime.fromtimestamp(
        int(timestamp.attrs['data-time-ms']) / 1000, tz=datetime.timezone.utc)

    return {
        'id': int(div.attrs['data-tweet-id']),
        'conversation_id': int(div.attrs['data-conversation-id']),
        'created_at': created_at,
        'user_id': int(div.attrs['data-user-id']),
        'user_name': div.attrs['data-name'],
        'user_screen_name': div.attrs['data-screen-name'],
        'text': div.find('p.tweet-text', first=True).text,
        'replies_count': _tweet_stat(div, 'reply'),
        'retweets_count': _tweet_stat(div, 'retweet'),
        'favorites_count': _tweet_stat(div, 'favorite'),
        'mentions': div.attrs.get('data-mentions', '').split(),
    }
    def from_html(cls, block: Element) -> Optional[Product]:
        for link in block.find("a"):
            try:
                sku = link.attrs["data-app-insights-track-search-doc-id"]
                break
            except KeyError:
                return
        else:
            return
        price = PRICE_RE.findall(block.text)
        name = [line for line in block.text.split("\n") if "!" not in line][0]

        return cls(sku, name, float(price[0]) if price else None)
Beispiel #17
0
 def parse_poll_options(poll_element: Element):
     options_by_rank = {}
     for el_option in poll_element.find(OldExamSelectors.poll_option):
         el_option_text, = el_option.find(OldExamSelectors.poll_option_text)
         option_text_raw = re.search(
             r'[A-ZÄÖ, ]+', el_option_text.text, re.IGNORECASE).group(0)
         option_rank, option_text = get_option_rank(option_text_raw)
         el_option_vote_count, = el_option.find(OldExamSelectors.option_vote_count)
         option_vote_count_match = re.search("([0-9]+) ä", el_option_vote_count.text)
         option_vote_count = int(option_vote_count_match.group(1))
         option = dict(
             text=option_text,
             vote_count=option_vote_count,
             rank=option_rank)
         options_by_rank[option_rank] = option
     return options_by_rank
Beispiel #18
0
    def get_pagination_details(self, page: Element) -> List[Dict[str, str]]:
        """Returns paging details within a html page.

        :param page: html page to process to extract paging details
        :type page: Element
        :return: list of paging details
        :rtype: List[Dict[str, str]]
        """
        links = []
        paging = page.find('div.results-paging', first=True)

        spans = [] if not paging else paging.find('.pagerLink')
        for span in spans:
            links.append({
                'page_no': span.text,
                'event_target': span.attrs.get('id').replace('_', '$')
            })

        return links
Beispiel #19
0
    def get_jobs(self, page: Element, page_no: str) -> List[Job]:
        """Returns job postings within a html page.

        :param page: html page to process to extract jobs.
        :type page: Element
        :param page_no: the page number being processes
        :type page_no: str
        :return: list of jobs
        :rtype: List[Job]
        """
        jobs: List[Job] = []
        table = page.find('table#tableResults', first=True)

        rows = [] if not table else table.find('tr')
        for row in rows:
            cells = row.find('td')
            if not cells:
                continue

            text = '::'.join([c.text for c in cells])
            text_hash = hashlib.sha256(text.encode('utf-8'))

            jobs.append(
                Job(
                    **{
                        'page_no': page_no,
                        'hash': text_hash.hexdigest(),
                        'data': {
                            'title': cells[0].text.replace('\xa0', ' '),
                            'href': cells[0].find('a',
                                                  first=True).attrs['href'],
                            'location': cells[1].text,
                            'job-family': cells[2].text,
                            'deadline': cells[3].text
                        }
                    }))

        return jobs
 def get_player_stats(cls, element: Element, url: str, name: str) -> dict:
     """
     成績
     """
     return {
         'url': url,
         'name': name,
         'G': element.find('td:nth-child(5)', first=True).text,
         'PA': element.find('td:nth-child(6)', first=True).text,
         'AB': element.find('td:nth-child(7)', first=True).text,
         'R': element.find('td:nth-child(8)', first=True).text,
         'H': element.find('td:nth-child(9)', first=True).text,
         '2B': element.find('td:nth-child(10)', first=True).text,
         '3B': element.find('td:nth-child(11)', first=True).text,
         'HR': element.find('td:nth-child(12)', first=True).text,
         'RBI': element.find('td:nth-child(13)', first=True).text,
         'SB': element.find('td:nth-child(14)', first=True).text,
         'CS': element.find('td:nth-child(15)', first=True).text,
         'BB': element.find('td:nth-child(16)', first=True).text,
         'SO': element.find('td:nth-child(17)', first=True).text,
         'BA': element.find('td:nth-child(18)', first=True).text,
         'OBP': element.find('td:nth-child(19)', first=True).text,
         'SLG': element.find('td:nth-child(20)', first=True).text,
         'OPS': element.find('td:nth-child(21)', first=True).text,
         'TB': element.find('td:nth-child(22)', first=True).text,
         'GIDP': element.find('td:nth-child(23)', first=True).text,
         'HBP': element.find('td:nth-child(24)', first=True).text,
         'SH': element.find('td:nth-child(25)', first=True).text,
         'SF': element.find('td:nth-child(26)', first=True).text,
         'IBB': element.find('td:nth-child(27)', first=True).text,
     }
Beispiel #21
0
 def showcase_image_parse(element: Element) -> Optional[str]:
     showcase_image_element = element.find("img", first=True)
     if showcase_image_element:
         return showcase_image_element.attrs["src"]
Beispiel #22
0
 def trial_product_parse(element: Element) -> Optional[bool]:
     if element.find(".product-flag-trial",
                     containing="Trial Product",
                     first=True):
         return True
Beispiel #23
0
 def clearance_parse(element: Element) -> Optional[bool]:
     if element.find(".product-flag-clearance",
                     containing="Clearance",
                     first=True):
         return True
Beispiel #24
0
 def rating_count_parse(element: Element) -> Optional[int]:
     if element.find(".rating-count", first=True):
         rating_count_element = element.find(".rating-count", first=True)
         return int(rating_count_element.find("span", first=True).text)
Beispiel #25
0
 def stars_parse(element: Element) -> Optional[float]:
     stars_element = element.find(".stars", first=True)
     if stars_element:
         stars = stars_element.attrs["title"]
         return float(stars[:stars.find("/5")])
Beispiel #26
0
 def best_seller_parse(element: Element) -> Optional[bool]:
     if element.find(".product-best-seller",
                     containing="Best Seller",
                     first=True):
         return True
Beispiel #27
0
 def shipping_saver_parse(element: Element) -> Optional[bool]:
     if element.find(".shipping-saver",
                     containing="Shipping Saver",
                     first=True):
         return True
Beispiel #28
0
 def dimensions_parse(element: Element) -> Optional[str]:
     dimensions_element = element.find("li",
                                       containing="Dimensions",
                                       first=True)
     if dimensions_element:
         return parse_html_text_btw(dimensions_element.text, "\n", "\n")
Beispiel #29
0
 def in_stock_parse(element: Element) -> Optional[bool]:
     if element.find(".text-danger", containing="Out of Stock", first=True):
         return False
     elif element.find(".text-primary", containing="In Stock", first=True):
         return True
Beispiel #30
0
 def upc_parse(element: Element) -> Optional[str]:
     upc_element = element.find("li", containing="UPC Code", first=True)
     if upc_element:
         return upc_element.text[upc_element.text.find(": ") + 2:]