Ejemplo n.º 1
0
def get_book_info(url_list):
    """Takes in a list of urls and searches Google Books API."""
    
    """collects a book cover links, description, & title
    generates placeholder values for ISBN not found"""

    default_book_title = 'ISBN is not matched with title'
    default_book_author = 'ISBN is not matched with an author'
    default_book_cover_sm = None
    default_book_cover_md = None
    default_book_description = "ISBN is not matched with a description"

    for url in url_list:
        isbn = url.split('+')[1].split('&')[0]
        
        time.sleep(1)
        if isbn not in db_isbn:
            # book_request = urllib2.urlopen(url)
            
            request = urllib2.Request(url, headers={'User-agent':'Mozilla/11.0'})
            response = urllib2.urlopen(request)
            book_dict = json.load(response)
        
        
            if book_dict['totalItems'] > 0:
            
                if 'title'in book_dict['items'][0]['volumeInfo'].keys():
                    book_title = book_dict['items'][0]['volumeInfo']['title']
                else:
                    book_title = default_book_title
                
                if 'imageLinks' in book_dict['items'][0]['volumeInfo'].keys():
                    book_cover_sm = book_dict['items'][0]['volumeInfo']['imageLinks']['smallThumbnail']
                    book_cover_md = book_dict['items'][0]['volumeInfo']['imageLinks']['thumbnail']
                else:
                    book_cover_sm = default_book_cover_sm
                    book_cover_md = default_book_cover_md
                
                if 'authors' in book_dict['items'][0]['volumeInfo'].keys():
                    book_author = book_dict['items'][0]['volumeInfo']['authors'][0]
                else:
                    book_author = default_book_author
                    
                if 'description' in book_dict['items'][0]['volumeInfo'].keys():
                    if len(book_dict['items'][0]['volumeInfo']['description']) > 750:
                        book_description = book_dict['items'][0]['volumeInfo']['description'][0:700] + "[...]"
                    else:
                        book_description = book_dict['items'][0]['volumeInfo']['description']
                else:
                    book_description = default_book_description
                
            else:
                # default values in case book cover or information is not available
                book_title = default_book_title
                book_author = default_book_author
                book_cover_sm = default_book_cover_sm
                book_cover_md = default_book_cover_md
                book_description = default_book_description
                
        
            new_book = Book(title=book_title.encode('ascii', 'replace'),
                            author=book_author.encode('ascii', 'replace'),
                            description=book_description.encode('ascii', 'replace'),
                            isbn=isbn,
                            image_url_sm=book_cover_sm,
                            image_url_md=book_cover_md)
        
            print new_book.title
            new_book.commit_to_db()
Ejemplo n.º 2
0
def get_book_info(url_list):
    """Takes in a list of urls and searches Google Books API."""
    """collects a book cover links, description, & title
    generates placeholder values for ISBN not found"""

    default_book_title = 'ISBN is not matched with title'
    default_book_author = 'ISBN is not matched with an author'
    default_book_cover_sm = None
    default_book_cover_md = None
    default_book_description = "ISBN is not matched with a description"

    for url in url_list:
        isbn = url.split('+')[1].split('&')[0]

        time.sleep(1)
        if isbn not in db_isbn:
            # book_request = urllib2.urlopen(url)

            request = urllib2.Request(url,
                                      headers={'User-agent': 'Mozilla/11.0'})
            response = urllib2.urlopen(request)
            book_dict = json.load(response)

            if book_dict['totalItems'] > 0:

                if 'title' in book_dict['items'][0]['volumeInfo'].keys():
                    book_title = book_dict['items'][0]['volumeInfo']['title']
                else:
                    book_title = default_book_title

                if 'imageLinks' in book_dict['items'][0]['volumeInfo'].keys():
                    book_cover_sm = book_dict['items'][0]['volumeInfo'][
                        'imageLinks']['smallThumbnail']
                    book_cover_md = book_dict['items'][0]['volumeInfo'][
                        'imageLinks']['thumbnail']
                else:
                    book_cover_sm = default_book_cover_sm
                    book_cover_md = default_book_cover_md

                if 'authors' in book_dict['items'][0]['volumeInfo'].keys():
                    book_author = book_dict['items'][0]['volumeInfo'][
                        'authors'][0]
                else:
                    book_author = default_book_author

                if 'description' in book_dict['items'][0]['volumeInfo'].keys():
                    if len(book_dict['items'][0]['volumeInfo']
                           ['description']) > 750:
                        book_description = book_dict['items'][0]['volumeInfo'][
                            'description'][0:700] + "[...]"
                    else:
                        book_description = book_dict['items'][0]['volumeInfo'][
                            'description']
                else:
                    book_description = default_book_description

            else:
                # default values in case book cover or information is not available
                book_title = default_book_title
                book_author = default_book_author
                book_cover_sm = default_book_cover_sm
                book_cover_md = default_book_cover_md
                book_description = default_book_description

            new_book = Book(title=book_title.encode('ascii', 'replace'),
                            author=book_author.encode('ascii', 'replace'),
                            description=book_description.encode(
                                'ascii', 'replace'),
                            isbn=isbn,
                            image_url_sm=book_cover_sm,
                            image_url_md=book_cover_md)

            print new_book.title
            new_book.commit_to_db()