コード例 #1
0
    def get_book_data_from_site(self, url):
        """
        args:
            url (String):
                Google Books book url to be parsed
        returns:
            SiteBookData (List):
                format (String): 
                book_title (String):
                book_image:~
                book_image_url (String):
                isbn_13 (String):
                description (String):
                series (String):~
                volume_number (String):~
                subtitle (String):~
                authors (String):
                book_id (String):
                site_slug (String):
                parse_status (String):~
                url (String):
                content (String):
                ready_for_sale (boolean):~
                extra:~
        synopsis:
            The purpose of this function is to parse a url of the 
            Audiobooks website. The url should be a specific book's url, in 
            order for the following function to work.
        """
        response = requests.get(url)
        

        format = None
        book_title = None
        book_image = None
        book_image_url = None
        isbn_13 = None
        description = None
        series = None
        volume_number = None
        subtitle = None
        authors = None
        book_id = None
        site_slug = None
        parse_status = None
        book_url = None
        content = None
        ready_for_sale = None
        extra = None

        # book_title
        book_title = self._get_book_title(response.content)

        # book_image_url
        book_image_url = self._get_book_image_url(response.content)

        # book_image
        book_image = Par_Scrape.get_book_image_from_image_url(book_image_url)
        
        # book_isbn_13
        isbn_13 = self._get_book_isbn_13()

        # description
        description = self._get_book_description(response.content)

        # series
        series = self._get_book_series()

        # volume_number
        volume_number = self._get_book_volume()

        # subtitle
        subtitle = self._get_book_subtitle()

        # authors
        authors = self._get_book_authors(response.content)

        # book_url
        book_url = url

        # site_slug
        site_slug = self._get_book_site_slug()

        # book_id
        book_id = self._get_book_id(response.url)

        # format
        format = self._get_book_format()

        # content
        content = response.content

        # ready_for_sale
        ready_for_sale = self._get_book_sale_status(response.content)

        # parse_status
        parse_status = Par_Scrape.parse_status([format, book_title, book_image, book_image_url, description, authors, book_id, site_slug, url, content, ready_for_sale])
        
        SiteBookData = [format, book_title, book_image, book_image_url, isbn_13, description, series, volume_number, subtitle, authors, book_id, site_slug, parse_status, url, content, ready_for_sale, extra]

        return SiteBookData
コード例 #2
0
    def get_book_data_from_site(self, url):
        """
        args:
            url (String):
                Kobo book url to be parsed
        returns:
            SiteBookData (List):
                format (String): 
                book_title (String):
                book_image:~
                book_image_url (String):
                isbn_13 (String):
                description (String):
                series (String):~
                volume_number (Int):~
                subtitle (String):~
                authors (String):
                book_id:
                site_slug (String):
                parse_status (String):~
                url (String):
                content (String):
                ready_for_sale (boolean):~
                extra:~
        synopsis:
            The purpose of this function is to parse a url of the Kobo
            website.  The url should be a specific book's url, in order
            for the following function to work.  This function works with
            both digital books and audio books.
        """
        response = requests.get(url)

        extra = {}

        # book_title
        book_title = self._get_book_title(response.content)

        # book_image_url
        book_image_url = self._get_book_image_url(response.content)

        # book_image
        book_image = Par_Scrape.get_book_image_from_image_url(book_image_url)

        # isbn_13
        isbn_13 = self._get_book_isbn_13(response.content)

        # description
        description = self._get_book_description(response.content)

        # series
        series = self._get_book_series(response.content)

        # volume_number
        volume_number = self._get_book_volume_number(response.content)

        # subtitle
        subtitle = self._get_book_subtitle(response.content)

        # authors
        authors = self._get_book_authors(response.content)

        # url
        url = self._get_book_url(response.content)

        # site_slug
        site_slug = self._get_book_site_slug()

        # book_id
        book_id = self._get_book_id(response.content)

        # format
        book_format = self._get_book_format(response.content)

        # content
        content = response.content

        # ready_for_sale
        ready_for_sale = self._get_book_availability(response.content)

        # parse_status
        parse_status = Par_Scrape.parse_status([
            book_format, book_title, book_image, book_image_url, isbn_13,
            description, authors, book_id, site_slug, url, content,
            ready_for_sale
        ])

        SiteBookData = [
            book_format, book_title, book_image, book_image_url, isbn_13,
            description, series, volume_number, subtitle, authors, book_id,
            site_slug, parse_status, url, content, ready_for_sale, extra
        ]

        return SiteBookData
コード例 #3
0
    def get_book_data_from_site(self, url):

        content = self.__fetch__(url)

        format = None
        book_title = None
        book_image = None
        book_image_url = None
        isbn_13 = None
        description = None
        series = None
        volume_number = None
        subtitle = None
        authors = None
        book_id = None
        site_slug = None
        parse_status = None
        url = None
        ready_for_sale = None
        extra = None

        #format
        format = 'DIGITAL'

        #book_title
        book_title = self.__get_title__(content)

        #subtitle
        subtitle = self.__get_subtitle__(content)

        #book_image_url
        book_image_url = self.__get_book_image_url__(content)

        #book_image
        book_image = Par_Scrape.get_book_image_from_image_url(book_image_url)

        #isbn_13
        isbn_13 = self.__get_isbn__(content)

        #description
        description = self.__get_description__(content)

        #authors
        authors = self.__get_authors__(content)

        #book_id
        book_id = self.__get_book_id__(content)

        #site_slug
        site_slug = 'LC'

        #url
        url = self.__get_url__(content)

        #ready_for_sale
        ready_for_sale = True


        #price = None

        if subtitle == None:
            parse_list = [format, book_title, book_image, book_image_url, isbn_13, description, authors, book_id, site_slug, url, content, ready_for_sale]
        else:
            parse_list = [format, book_title, book_image, book_image_url, isbn_13, description, subtitle, authors, book_id, site_slug, url, content, ready_for_sale]

        #parse_status
        parse_status = Par_Scrape.parse_status(parse_list)

        SiteBookData = [format, book_title, book_image, book_image_url, isbn_13, description, series, volume_number, subtitle, authors, book_id, site_slug, parse_status, url, content, ready_for_sale, extra]

        return SiteBookData