Example #1
0
 def get_info_from_page(self):
     if self.current_url in self.exception_dictionary:
         return PageReturn(self.exception_dictionary[self.current_url], self.current_url)
     next_url = self.url_pattern.format(self.current_page + 1)
     req = requests.get(next_url)
     if req.ok:
         return PageReturn(next_url, self.current_url)
     else:
         return PageReturn(None, self.current_url)
Example #2
0
 def test_mcninja_next(self):
     expected_results = \
         {"http://drmcninja.com/archives/comic/5p52/": PageReturn("http://drmcninja.com/archives/comic/7p1/",
                                                                  "D.A.R.E To Resist Ninja Drugs and Ninja Violence Part 2 p52"),
          "http://drmcninja.com/archives/comic/11p16/": PageReturn("http://drmcninja.com/archives/comic/11p17/",
                                                                   "Punch Dracula p16"),
          "http://drmcninja.com/archives/comic/11p56/": PageReturn("http://drmcninja.com/archives/comic/12p1/",
                                                                   "Punch Dracula p56"),
          "http://drmcninja.com/archives/comic/33p147/": PageReturn(None,
                                                                    "The End: Part 2 p147")}
     self.next_method_asserts(webcomic_list.mcninja_info, expected_results)
Example #3
0
 def test_smbc_next(self):
     req = requests.get("https://www.smbc-comics.com/comic/2002-09-05")
     soup = BeautifulSoup(req.text, "html.parser")
     last_page_url = soup.find('a', class_='last').get('href')
     req = requests.get(last_page_url)
     soup = BeautifulSoup(req.text, "html.parser")
     trim_length = len("Saturday Morning Breakfast Cereal - ")
     last_page_title = soup.find('title').string[trim_length:]
     expected_results = \
         {"https://www.smbc-comics.com/comic/2002-09-05": PageReturn("https://www.smbc-comics.com/comic/2002-09-07",
                                                                     "2002-09-05"),
          "https://www.smbc-comics.com/comic/the-truth": PageReturn(
              "https://www.smbc-comics.com/comic/statistical-flowers-for-algernon",
              "The Truth"),
          last_page_url: PageReturn(None, last_page_title)}
     self.next_method_asserts(webcomic_list.smbc_info, expected_results)
Example #4
0
 def test_cucumber_quest_next(self):
     req = requests.get("http://cucumber.gigidigi.com/cq/page-1/")
     soup = BeautifulSoup(req.text, "html.parser")
     last_page_url = soup.find('a', class_='last-webcomic-link').get('href')
     req = requests.get(last_page_url)
     soup = BeautifulSoup(req.text, "html.parser")
     last_page_title = soup.find('header',
                                 class_='post-header').find('h1').string
     expected_results = \
         {"http://cucumber.gigidigi.com/cq/page-1/": PageReturn("http://cucumber.gigidigi.com/cq/page-2/",
                                                                "page 1"),
          "http://cucumber.gigidigi.com/cq/page-77/": PageReturn("http://cucumber.gigidigi.com/cq/bonus/",
                                                                 "page 77"),
          last_page_url: PageReturn(None, last_page_title)}
     self.next_method_asserts(webcomic_list.cucumber_quest_info,
                              expected_results)
Example #5
0
def cucumber_quest_info(soup):
    current_title = soup.find('header', class_='post-header').find('h1').string
    next_link_button = soup.find('a', class_='next-webcomic-link')
    if 'current-webcomic' in next_link_button['class']:
        next_link = None
    else:
        next_link = next_link_button.get('href')
    return PageReturn(next_link, current_title)
Example #6
0
def smbc_info(soup):
    trim_length = len("Saturday Morning Breakfast Cereal - ")
    current_title = soup.find('title').string[trim_length:]
    next_link_button = soup.find('a', class_='next')
    if next_link_button is None:
        next_link = None
    else:
        next_link = next_link_button.get('href')
    return PageReturn(next_link, current_title)
Example #7
0
def mcninja_info(soup, current_title=None):
    if current_title is None:
        current_series = soup.find('select', id='series_select').find(
            'option', selected=True).string
        current_page = soup.find('select',
                                 id='page_select').find('option',
                                                        selected=True).string
        current_title = '{} p{}'.format(current_series, current_page)

    next_link = soup.find('link', rel='next')
    if next_link:
        next_url = next_link.get('href')
        if '/archives/comic/' in next_url:
            # We found the next page
            return PageReturn(next_url, current_title)
        else:
            # This next url is just a news post, fetch it recursively to find the real next comic page
            req = requests.get(next_url)
            soup = BeautifulSoup(req.text, "html.parser")
            return mcninja_info(soup, current_title)
    else:
        # The last page
        return PageReturn(None, current_title)
Example #8
0
 def get_info_from_page(self):
     if self.current_page == 0:
         return PageReturn(self.first_page_url, None)
     req = requests.get(self.current_url)
     soup = BeautifulSoup(req.text, "html.parser")
     return self.page_info_function(soup)
 def mcninja_next(soup):
     return PageReturn(soup.find('a', class_='next').get('href'), None)