Example #1
0
 def test_author_image_url(self):
     info_author = webscraper_author.WebScraperAuthor()
     page = requests.get(
         'https://www.goodreads.com/author/show/45372.Robert_C_Martin')
     soup = BeautifulSoup(page.content, "html.parser")
     image_url = info_author.get_image_url(soup)
     assert image_url == "https://images.gr-assets.com/authors/1490470967p5/45372.jpg"
Example #2
0
 def test_author_review_count(self):
     info_author = webscraper_author.WebScraperAuthor()
     page = requests.get(
         'https://www.goodreads.com/author/show/45372.Robert_C_Martin')
     soup = BeautifulSoup(page.content, "html.parser")
     contents = soup.find('div', class_="rightContainer")
     review_count = info_author.get_review_count(contents)
     assert review_count == "1562"
Example #3
0
 def get_data(self):
     """
     start from the given url https://www.goodreads.com/book/show/3735293-clean-code.
     Scrape current book page and its author page.
     Move to next book page
     Keep doing until hit the target number
     At the end, upload json files to Mongo database
     :return: required info in json format
     """
     url = 'https://www.goodreads.com/book/show/3735293-clean-code'
     book_name = self.find_book_name(url)
     self.books_list.append(book_name)
     print(self.books_list)
     while self.num_books != 0 or self.num_authors != 0:
         info_book = webscraper_book.WebScraperBook()
         info_book.get_info(url)
         curr_book = info_book.store_data()
         self.output_books_list.append(curr_book)
         # print(self.books_list)
         if self.num_books != 0:
             self.num_books = self.num_books - 1
         print(self.num_books)
         author_url = self.find_author_url(url)
         info_author = webscraper_author.WebScraperAuthor()
         info_author.get_info(author_url)
         curr_author = info_author.store_data()
         self.output_authors_list.append(curr_author)
         # print(self.author_list)
         if self.num_authors != 0:
             self.num_authors = self.num_authors - 1
         print(self.num_authors)
         url = self.find_next_book(url)
     json_object_book = json.dumps(self.output_books_list, indent=4)
     with open('books.json', 'w') as file:
         file.write(json_object_book)
     json_object_author = json.dumps(self.output_authors_list, indent=4)
     with open('authors.json', 'w') as file:
         file.write(json_object_author)
     information = database.PyMongo()
     information.upload_to_database()
     information.download_from_database()
Example #4
0
 def test_author_id(self):
     info_author = webscraper_author.WebScraperAuthor()
     url = 'https://www.goodreads.com/author/show/45372.Robert_C_Martin'
     author_id = info_author.get_author_id(url)
     assert author_id == "45372"