Exemple #1
0
    def _parse_tooltip_info(self, book_data, book_id, image_url):
        '''Takes information retried from goodreads tooltips link and parses it'''
        title = book_data.xpath('//a[contains(@class, "readable")]')
        title = title[0].text if len(title) > 0 else None
        authors = book_data.xpath('//a[contains(@class, "authorName")]')
        authors = [authors[0].text] if len(authors) > 0 else None
        rating_info = book_data.xpath(
            '//div[@class="bookRatingAndPublishing"]/span[@class="minirating"]'
        )
        if len(rating_info) > 0:
            rating_string = rating_info[0].text_content().strip().replace(
                ',', '').split()
            rating = float(rating_string[rating_string.index('avg') - 1])
            num_of_reviews = int(rating_string[-2])
        else:
            rating = None
            num_of_reviews = None

        try:
            asin_elements = book_data.xpath(
                '//a[contains(@class, "kindlePreviewButtonIcon")]/@href')
            book_asin = urllib.parse.parse_qs(
                urllib.parse.urlsplit(asin_elements[0]).query)["asin"][0]
        except (KeyError, IndexError):
            book_asin = None

        # We should get the ASIN from the tooltips file, but just in case we'll
        # keep this as a fallback (though this only works in some regions - just USA?)
        if not book_asin:
            asin_data_page = open_url(self._connection,
                                      '/buttons/glide/' + book_id)
            book_asin = GOODREADS_ASIN_PAT.search(asin_data_page)
            if not book_asin:
                return None
            book_asin = book_asin.group(1)

        desc = book_data.xpath(
            '//div[@class="addBookTipDescription"]//span[not(contains(@id, "freeTextContainer"))]'
        )
        desc_backup = book_data.xpath(
            '//div[@class="addBookTipDescription"]//span[contains(@id, "freeTextContainer")]'
        )
        if len(desc) > 0:
            desc = re.sub(r'\s+', ' ', desc[0].text).strip()
        elif len(desc_backup) > 0:
            desc = re.sub(r'\s+', ' ', desc_backup[0].text).strip()
        else:
            return None

        return {
            'class': 'featuredRecommendation',
            'asin': book_asin,
            'title': title,
            'authors': authors,
            'imageUrl': image_url,
            'description': desc,
            'hasSample': False,
            'amazonRating': rating,
            'numberOfReviews': num_of_reviews
        }
    def _parse_tooltip_info(self, book_data, book_id, image_url):
        '''Takes information retried from goodreads tooltips link and parses it'''
        title = book_data.xpath('//a[contains(@class, "readable")]')
        title = title[0].text if len(title) > 0 else None
        authors = book_data.xpath('//a[contains(@class, "authorName")]')
        authors = [authors[0].text] if len(authors) > 0 else None
        rating_info = book_data.xpath('//div[@class="bookRatingAndPublishing"]/span[@class="minirating"]')
        if len(rating_info) > 0:
            rating_string = rating_info[0].text_content().strip().replace(',', '').split()
            rating = float(rating_string[rating_string.index('avg')-1])
            num_of_reviews = int(rating_string[-2])
        else:
            rating = None
            num_of_reviews = None

        try:
            asin_elements = book_data.xpath('//a[contains(@class, "kindlePreviewButtonIcon")]/@href')
            book_asin = urlparse.parse_qs(urlparse.urlsplit(asin_elements[0]).query)["asin"][0]
        except (KeyError, IndexError):
            book_asin = None

        # We should get the ASIN from the tooltips file, but just in case we'll
        # keep this as a fallback (though this only works in some regions - just USA?)
        if not book_asin:
            asin_data_page = open_url(self._connection, '/buttons/glide/' + book_id)
            book_asin = GOODREADS_ASIN_PAT.search(asin_data_page)
            if not book_asin:
                return None
            book_asin = book_asin.group(1)

        desc = book_data.xpath('//div[@class="addBookTipDescription"]//span[not(contains(@id, "freeTextContainer"))]')
        desc_backup = book_data.xpath('//div[@class="addBookTipDescription"]//span[contains(@id, "freeTextContainer")]')
        if len(desc) > 0:
            desc = re.sub(r'\s+', ' ', desc[0].text).strip()
        elif len(desc_backup) > 0:
            desc = re.sub(r'\s+', ' ', desc_backup[0].text).strip()
        else:
            return None

        return {'class': 'featuredRecommendation',
                'asin': book_asin,
                'title': title,
                'authors': authors,
                'imageUrl': image_url,
                'description': desc,
                'hasSample': False,
                'amazonRating': rating,
                'numberOfReviews': num_of_reviews}
Exemple #3
0
    def search_for_asin_on_goodreads(self, url):
        '''Searches for ASIN of book at given url'''
        book_id_search = BOOK_ID_PAT.search(url)
        if not book_id_search:
            return None

        book_id = book_id_search.group(1)

        try:
            response = open_url(self._connections['goodreads'], '/buttons/glide/' + book_id)
        except PageDoesNotExist:
            return None

        book_asin_search = GOODREADS_ASIN_PAT.search(response)
        if not book_asin_search:
            return None

        return book_asin_search.group(1)