def test_cover_gallery_pages(mock_get): cover_gallery_url = "https://www.comics.org/series/7768/covers/" cover_gallery_html = webscraper.simple_get(cover_gallery_url) cover_gallery_soup = webscraper.transform_simple_get_html(cover_gallery_html) assert webscraper.cover_gallery_pages(cover_gallery_soup) == 2 cover_gallery_url = "https://www.comics.org/series/31350/covers/" cover_gallery_html = webscraper.simple_get(cover_gallery_url) cover_gallery_soup = webscraper.transform_simple_get_html(cover_gallery_html) assert webscraper.cover_gallery_pages(cover_gallery_soup) == 1
def test_get_all_issue_metadata(mock_get): issue_url = "https://www.comics.org/issue/370657/" issue_html = webscraper.simple_get(issue_url) issue_soup = webscraper.transform_simple_get_html(issue_html) issue_metadata = webscraper.get_all_issue_metadata(issue_soup) assert type(issue_metadata) is dict expected_keys = set( [ "on_sale_date", "indicia_frequency", "issue_indicia_publisher", "issue_brand", "issue_price", "issue_pages", "format_color", "format_dimensions", "format_paper_stock", "format_binding", "format_publishing_format", "rating", "indexer_notes", "synopsis", ] ) assert set(issue_metadata.keys()).difference(expected_keys) == set() issue_url = "https://www.comics.org/issue/21497/" issue_soup = webscraper.get_soup(issue_url) issue_metadata = webscraper.get_all_issue_metadata(issue_soup) assert set(issue_metadata.keys()).difference(expected_keys) == set()
def test_get_issue_cover_metadata(mock_get): issue_url = "https://www.comics.org/issue/370657/" issue_html = webscraper.simple_get(issue_url) issue_soup = webscraper.transform_simple_get_html(issue_html) issue_cover_metadata = webscraper.get_issue_cover_metadata(issue_soup) assert type(issue_cover_metadata) is dict expected_keys = set( [ "cover_pencils", "cover_inks", "cover_colors", "cover_letters", "cover_first line of dialogue or text", "cover_genre", "cover_characters", "cover_keywords", ] ) assert set(issue_cover_metadata.keys()).difference(expected_keys) == set() issue_url = "https://www.comics.org/issue/21497/" issue_soup = webscraper.get_soup(issue_url) issue_cover_metadata = webscraper.get_issue_cover_metadata(issue_soup)
def test_parse_series_from_publisher_page(mock_get): publisher_url = "https://www.comics.org/publisher/54/?page=1" publisher_html = webscraper.simple_get(publisher_url) publisher_soup = webscraper.transform_simple_get_html(publisher_html) df = webscraper.parse_series_from_publisher_page(publisher_soup) assert type(df) is DataFrame assert len(df) > 0
def test_get_issue_metadata(mock_get): issue_url = "https://www.comics.org/issue/370657/" issue_html = webscraper.simple_get(issue_url) issue_soup = webscraper.transform_simple_get_html(issue_html) on_sale_date = webscraper.get_issue_metadata(issue_soup, name="on_sale_date") assert on_sale_date == "2007-08-15" issue_indicia_publisher = webscraper.get_issue_metadata( issue_soup, name="issue_indicia_publisher" ) assert issue_indicia_publisher == "DC Comics"
def test_get_cover_credits_from_cover_page_2(mock_get): issue_cover_url = "https://www.comics.org/issue/1179057/cover/4/" issue_cover_html = webscraper.simple_get(issue_cover_url) issue_cover_soup = webscraper.transform_simple_get_html(issue_cover_html) metadata = webscraper.read_jsonl("./comics_net/resources/metadata.jsonl") issue_cover_credits = webscraper.get_cover_credits_from_cover_page( issue_cover_soup, metadata[0]) assert len(issue_cover_credits['covers'].keys()) == 2 assert "Original" in list(issue_cover_credits['covers'].keys()) assert "Scribblenauts Unmasked Variant Cover" in list(issue_cover_credits['covers'].keys())
def test_get_cover_credits_from_cover_page(mock_get): issue_cover_url = "https://www.comics.org/issue/36858/cover/4/" issue_cover_html = webscraper.simple_get(issue_cover_url) issue_cover_soup = webscraper.transform_simple_get_html(issue_cover_html) metadata = webscraper.read_jsonl("./comics_net/resources/metadata.jsonl") issue_cover_credits = webscraper.get_cover_credits_from_cover_page(issue_cover_soup, metadata[0]) assert (type(issue_cover_credits)) is dict expected_keys = set(["covers"]) assert set(issue_cover_credits.keys()).difference(expected_keys) == set() assert set(issue_cover_credits["covers"].keys()) == {"Direct"} expected_keys = set( [ "cover_pencils", "cover_inks", "cover_colors", "cover_letters", "cover_genre", "cover_characters", "cover_keywords", "cover_image_file_name", "save_to", "image_url", ] ) assert ( set(issue_cover_credits["covers"]["Direct"].keys()).difference( expected_keys ) == set() ) issue_cover_url = "https://www.comics.org/issue/21497/cover/4/" issue_cover_soup = webscraper.get_soup(issue_cover_url) issue_cover_credits = webscraper.get_cover_credits_from_cover_page(issue_cover_soup, metadata[0]) assert "cover_awards" not in issue_cover_credits["covers"]["Original"].keys()
def test_get_issue_title(mock_get): issue_url = "https://www.comics.org/issue/370657/" issue_html = webscraper.simple_get(issue_url) issue_soup = webscraper.transform_simple_get_html(issue_html) title = webscraper.get_issue_title(issue_soup) assert title == "Action Comics #854"
def test_transform_simple_get_html(mock_get): url = "https://www.comics.org/publisher/54/?page=1" html = webscraper.simple_get(url) soup = webscraper.transform_simple_get_html(html) assert type(soup) is BeautifulSoup
def test_simple_get(mock_get): url = "https://www.comics.org/publisher/54/?page=1" html = webscraper.simple_get(url) assert type(html) is bytes