Example #1
0
def test_get_top_level_genre_urls():
    scraper = Scraper(read_text_from_file(first_page_filename))
    genre_urls = scraper.get_top_level_genre_urls()

    assert_equal(len(genre_urls), 16)
    assert_equal(genre_urls[0].href, "https://itunes.apple.com/us/genre/podcasts-arts/id1301?mt=2")
    assert_equal(genre_urls[1].href, "https://itunes.apple.com/us/genre/podcasts-business/id1321?mt=2")
Example #2
0
def test_get_subgenre_tags():
    scraper = Scraper(read_text_from_file(first_page_filename))
    subgenre_tags = scraper.get_subgenre_tags()

    assert_equal(len(subgenre_tags), 6)
    assert_equal(subgenre_tags[0].string, "Design")
    assert_equal(subgenre_tags[1].string, "Fashion & Beauty")
Example #3
0
def test_get_top_level_genre_tags():
    scraper = Scraper(read_text_from_file(first_page_filename))
    genre_urls = scraper.get_top_level_genre_tags()

    assert_equal(len(genre_urls), 16)
    assert_equal(genre_urls[0].string, "Arts")
    assert_equal(genre_urls[1].string, "Business")
Example #4
0
def test_get_top_level_genre_tags():
    scraper = Scraper(read_text_from_file(first_page_filename))
    genre_urls = scraper.get_top_level_genre_tags()

    assert_equal(len(genre_urls), 16)
    assert_equal(genre_urls[0].string, "Arts")
    assert_equal(genre_urls[1].string, "Business")
Example #5
0
def test_get_subgenre_tags():
    scraper = Scraper(read_text_from_file(first_page_filename))
    subgenre_tags = scraper.get_subgenre_tags()

    assert_equal(len(subgenre_tags), 6)
    assert_equal(subgenre_tags[0].string, "Design")
    assert_equal(subgenre_tags[1].string, "Fashion & Beauty")
Example #6
0
def test_get_subgenre_urls():
    scraper = Scraper(read_text_from_file(first_page_filename))
    subgenre_urls = scraper.get_subgenre_urls()

    assert_equal(len(subgenre_urls), 6)
    assert_equal(
        subgenre_urls[0].href,
        "https://itunes.apple.com/us/genre/podcasts-arts-design/id1402?mt=2")
Example #7
0
def test_get_number_of_pages():
    scraper = Scraper(read_text_from_file(page_num_filename))
    num_pages = scraper.get_number_of_pages()
    assert_equal(num_pages, 7)

    scraper = Scraper(read_text_from_file(first_page_filename))
    num_pages = scraper.get_number_of_pages()
    assert_equal(num_pages, 0)
Example #8
0
def test_get_itunes_podcast_urls():
    scraper = Scraper(read_text_from_file(first_page_filename))
    itunes_urls = scraper.get_itunes_podcast_urls()

    assert_equal(len(itunes_urls), 240)
    assert_equal(
        itunes_urls[0],
        "https://itunes.apple.com/us/podcast/the-moth-podcast/id275699983?mt=2"
    )
Example #9
0
def test_get_current_subgenre_tag():
    # Test a page that has a subgenre
    scraper = Scraper(read_text_from_file(food_page_filename))
    current_subgenre = scraper._get_currently_selected_subgenre_tag()
    assert_equal(current_subgenre.string, "Food")

    # Test a page that doesn't have one
    scraper = Scraper(read_text_from_file(first_page_filename))
    current_subgenre = scraper._get_currently_selected_subgenre_tag()
    assert_equal(current_subgenre, None)
Example #10
0
def test_get_top_level_genre_urls():
    scraper = Scraper(read_text_from_file(first_page_filename))
    genre_urls = scraper.get_top_level_genre_urls()

    assert_equal(len(genre_urls), 16)
    assert_equal(
        genre_urls[0].href,
        "https://itunes.apple.com/us/genre/podcasts-arts/id1301?mt=2")
    assert_equal(
        genre_urls[1].href,
        "https://itunes.apple.com/us/genre/podcasts-business/id1321?mt=2")
Example #11
0
def test_return_urls_not_in_history_real_tags_single_element():
    fetcher = MockFetcher(fetch_values)
    driver = Driver(test_url3, fetcher)
    driver.populate_state()

    scraper = Scraper(text_from_file(test_url3_file))
    tags = scraper.get_letter_urls()
    #Using two different scrapers because the tags will come from 
    #two different scrapers in real life
    scraper2 = Scraper(text_from_file(test_url3_file))
    tags2 = scraper2.get_letter_urls()

    driver.history = tags2[0]
    new_urls = driver.return_urls_not_in_history(tags)
    assert_equal(new_urls, tags[1:])
Example #12
0
def test_get_current_page():
    #Test a page with a number selected
    scraper = Scraper(read_text_from_file(page_num_filename))
    current_page = scraper.get_currently_selected_page()
    assert_equal(current_page.string, "1")

    #Test a page that's not page 1
    scraper = Scraper(read_text_from_file(society_n2_filename))
    current_page = scraper.get_currently_selected_page()
    assert_equal(current_page.string, "2")

    #Test a page without a paginator
    scraper = Scraper(read_text_from_file(first_page_filename))
    current_page = scraper.get_currently_selected_page()
    assert_equal(current_page, None)
Example #13
0
def test_get_current_letter():
    #Test a page with a letter selected
    scraper = Scraper(read_text_from_file(page_num_filename))
    current_letter = scraper.get_currently_selected_letter()
    assert_equal(current_letter.string, "A")

    #Test a page with a letter other than "A"
    scraper = Scraper(read_text_from_file(society_n2_filename))
    current_letter = scraper.get_currently_selected_letter()
    assert_equal(current_letter.string, "N")

    #Test a page without a letter selected
    scraper = Scraper(read_text_from_file(first_page_filename))
    current_letter = scraper.get_currently_selected_letter()
    assert_equal(current_letter, None)
Example #14
0
def test_get_current_genre():
    #Test that a page has a genre selected
    scraper = Scraper(read_text_from_file(first_page_filename))
    current_genre = scraper.get_currently_selected_genre()
    assert_equal(current_genre.string, "Arts")

    #Test that a page that isn't the base page
    scraper = Scraper(read_text_from_file(music_page_filename))
    current_genre = scraper.get_currently_selected_genre()
    assert_equal(current_genre.string, "Music")

    #Test a page that has a subgenre selected
    scraper = Scraper(read_text_from_file(food_page_filename))
    current_genre = scraper.get_currently_selected_genre()
    assert_equal(current_genre.string, "Arts")
Example #15
0
    def fetch(self, url):
        """
        Get a url, Return a Scraper of the page

        Raises:
            HTTPError: if status code is not HTTP 200 OK
        """
        r = requests.get(url)

        if r.status_code != 200:
            r.raise_for_status()
        scraper = Scraper(r.text)
        return scraper
Example #16
0
def test_get_number_of_pages():
    scraper = Scraper(read_text_from_file(page_num_filename))
    num_pages = scraper.get_number_of_pages()
    assert_equal(num_pages, 7)

    scraper = Scraper(read_text_from_file(first_page_filename))
    num_pages = scraper.get_number_of_pages()
    assert_equal(num_pages, 0)
Example #17
0
def test_get_current_subgenre_tag():
    #Test a page that has a subgenre
    scraper = Scraper(read_text_from_file(food_page_filename))
    current_subgenre = scraper._get_currently_selected_subgenre_tag()
    assert_equal(current_subgenre.string, "Food")

    #Test a page that doesn't have one
    scraper = Scraper(read_text_from_file(first_page_filename))
    current_subgenre = scraper._get_currently_selected_subgenre_tag()
    assert_equal(current_subgenre, None)
Example #18
0
def test_get_current_page():
    # Test a page with a number selected
    scraper = Scraper(read_text_from_file(page_num_filename))
    current_page = scraper.get_currently_selected_page()
    assert_equal(current_page.string, "1")

    # Test a page that's not page 1
    scraper = Scraper(read_text_from_file(society_n2_filename))
    current_page = scraper.get_currently_selected_page()
    assert_equal(current_page.string, "2")

    # Test a page without a paginator
    scraper = Scraper(read_text_from_file(first_page_filename))
    current_page = scraper.get_currently_selected_page()
    assert_equal(current_page, None)
Example #19
0
def test_get_current_letter():
    # Test a page with a letter selected
    scraper = Scraper(read_text_from_file(page_num_filename))
    current_letter = scraper.get_currently_selected_letter()
    assert_equal(current_letter.string, "A")

    # Test a page with a letter other than "A"
    scraper = Scraper(read_text_from_file(society_n2_filename))
    current_letter = scraper.get_currently_selected_letter()
    assert_equal(current_letter.string, "N")

    # Test a page without a letter selected
    scraper = Scraper(read_text_from_file(first_page_filename))
    current_letter = scraper.get_currently_selected_letter()
    assert_equal(current_letter, None)
Example #20
0
def test_get_current_genre():
    # Test that a page has a genre selected
    scraper = Scraper(read_text_from_file(first_page_filename))
    current_genre = scraper.get_currently_selected_genre()
    assert_equal(current_genre.string, "Arts")

    # Test that a page that isn't the base page
    scraper = Scraper(read_text_from_file(music_page_filename))
    current_genre = scraper.get_currently_selected_genre()
    assert_equal(current_genre.string, "Music")

    # Test a page that has a subgenre selected
    scraper = Scraper(read_text_from_file(food_page_filename))
    current_genre = scraper.get_currently_selected_genre()
    assert_equal(current_genre.string, "Arts")
Example #21
0
def test_return_urls_not_in_history_real_tags_single_element():
    fetcher = MockFetcher(fetch_values)
    driver = Driver(test_url3, fetcher)
    driver.populate_state()

    scraper = Scraper(text_from_file(test_url3_file))
    tags = scraper.get_letter_urls()
    #Using two different scrapers because the tags will come from
    #two different scrapers in real life
    scraper2 = Scraper(text_from_file(test_url3_file))
    tags2 = scraper2.get_letter_urls()

    driver.history = tags2[0]
    new_urls = driver.return_urls_not_in_history(tags)
    assert_equal(new_urls, tags[1:])
Example #22
0
def test_get_subgenre_urls():
    scraper = Scraper(read_text_from_file(first_page_filename))
    subgenre_urls = scraper.get_subgenre_urls()

    assert_equal(len(subgenre_urls), 6)
    assert_equal(subgenre_urls[0].href, "https://itunes.apple.com/us/genre/podcasts-arts-design/id1402?mt=2")
Example #23
0
def test_get_letter_urls():
    scraper = Scraper(read_text_from_file(first_page_filename))
    letter_tags = scraper.get_letter_urls()
    assert_equal(letter_tags[0].string, "A")
    assert_equal(letter_tags[-1].string, "#")
Example #24
0
def test_get_page_urls():
    scraper = Scraper(read_text_from_file(page_num_filename))
    page_tags = scraper.get_page_urls()
    assert_equal(page_tags[0].string, "2")
    assert_equal(page_tags[-1].string, "7")
Example #25
0
def test_get_itunes_podcast_urls():
    scraper = Scraper(read_text_from_file(first_page_filename))
    itunes_urls = scraper.get_itunes_podcast_urls()

    assert_equal(len(itunes_urls), 240)
    assert_equal(itunes_urls[0], "https://itunes.apple.com/us/podcast/the-moth-podcast/id275699983?mt=2")
Example #26
0
def test_get_page_urls():
    scraper = Scraper(read_text_from_file(page_num_filename))
    page_tags = scraper.get_page_urls()
    assert_equal(page_tags[0].string, "2")
    assert_equal(page_tags[-1].string, "7")
Example #27
0
def test_make_soup():
    scraper = Scraper(read_text_from_file(first_page_filename))
    assert_equal(scraper.soup.title.string,
                 "Arts - Podcasts Downloads on iTunes")
Example #28
0
 def fetch(self, url):
     text = text_from_file(self.return_values[url])
     return Scraper(text)
Example #29
0
def test_get_subgenre_tags_no_entries():
    scraper = Scraper(read_text_from_file(music_page_filename))
    subgenre_tags = scraper.get_subgenre_tags()

    assert_equal(subgenre_tags, None)
Example #30
0
def test_get_subgenre_tags_no_entries():
    scraper = Scraper(read_text_from_file(music_page_filename))
    subgenre_tags = scraper.get_subgenre_tags()

    assert_equal(subgenre_tags, None)
Example #31
0
def test_get_letter_urls():
    scraper = Scraper(read_text_from_file(first_page_filename))
    letter_tags = scraper.get_letter_urls()
    assert_equal(letter_tags[0].string, "A")
    assert_equal(letter_tags[-1].string, "#")
Example #32
0
 def fetch(self, url):
     return Scraper(self.return_text)