def setUp(self): """ Set up the reusable bits for testing. """ self.url = 'https://docs.python.org/3/library/stdtypes.html#string-methods' self.html = main.get_html(self.url) self.soup = main.get_soup(self.html) self.single_method = self.soup.find(class_='method') # Returns the first method: int.bit_length() self.question = 'What is the capital of Australia?' self.answer = 'Canberra'
def get_petition_links(page_links): petition_links = [] for link in page_links: html = get_html(link) soup = BeautifulSoup(html, 'lxml') a_s = soup.find_all('a', class_='pet_link') for a in a_s: pet_link = f"https://petition.president.gov.ua{a.get('href')}" petition_links.append(pet_link) return petition_links
def test_ptml_to_html(self): html = main.get_html(""" html: head: script src="foo.js": pass body: P: "Hello World!!" """) self.assertIsInstance(html, basestring, msg="message") self.assertEqual(self.striplines(html), self.striplines(self.expected), msg="values don't match expected")
def test_get_html(self, mock_get_headers, mock_get_request): """ Test the function performs the GET request and returns html text """ mock_get_headers.return_value = HEADERS mock_get_request.return_value.status_code = 200 mock_get_request.return_value.text = self.sample_post_html html_str = main.get_html(self.TEST_URL_TO_GET) self.assertEqual(html_str, self.sample_post_html)
def main(): html = get_html("https://www.op.gg/spectate/pro/") soup = BeautifulSoup(html, "html.parser") container = soup.find("div", {"class": "SpectateSummonerList"}) summoners_ct = container.findAll("div", {"class": "Item"}, recursive=False) total = len(summoners_ct) for sm in summoners_ct: sm_name = sm.find("span", {"class": "SummonerName"}).text.strip() sm_champ = sm.find("span", {"class": "ChampionName"}).text.strip() type = sm.find("div", {"class": "GameType"}).text.strip() time = sm.find("div", {"class": "GameTime"}).text.strip() team = sm.find("div", {"class": "TeamName"}).text.strip() player = sm.find("div", {"class": "Extra"}).text.strip() show_lives(sm_name, sm_champ, type, time, team, player)
def test_get_html_raises_exception_on_error_code(self, mock_err_report): """ Test the function performs the GET request which raises an exception on error codes (4xx/5xx). When the exception is raised, a report is sent to Google Cloud Error Reporting """ mock_err_report.message = 'test error msg' url_to_get = 'https://unreachable.craigslist.org/search/apa' responses.add(method=responses.GET, url=url_to_get, status=400, body=requests.exceptions.RequestException()) self.assertEqual( main.get_html(url_to_get=url_to_get, args=self.TEST_ARGS_NAMESPACE), '')
def test_tag_to_html(self): tag_html = main.Tag('html') tag_head = main.Tag('head') tag_body = main.Tag('body') tag_script = main.Tag('script', src='foo.js') tag_p = main.Tag('P') tag_head.add_child(tag_script) tag_p.add_child("Hello World!!") tag_body.add_child(tag_p) tag_html.add_child(tag_head) tag_html.add_child(tag_body) html = main.get_html(tag_html) self.assertIsInstance(html, basestring, msg="message") self.assertEqual(self.striplines(html), self.striplines(self.expected))
def test_get_html_raises_exception_on_error_code(self, mock_get_headers, mock_get_request, mock_err_report): """ Test the function performs the GET request which raises an exception on error codes (4xx/5xx). When the exception is raised, a report is sent """ mock_get_headers.return_value = HEADERS mock_get_request.return_value.status_code = 400 mock_get_request.side_effect = requests.exceptions.RequestException() mock_err_report.message = 'test error msg' self.assertEqual(main.get_html(self.TEST_URL_TO_GET), '')
def test_get_html_with_params_headers_args(self): """ Test the function performs the GET request and returns html text """ url_to_get = 'https://vancouver.craigslist.org/search/apa' headers = templates.HEADERS.copy() headers.update({ 'Host': f'{self.TEST_LOCATION}.craigslist.org', 'Referer': f'https://{self.TEST_LOCATION}.craigslist.org/d/apts-housing-for-rent/search/apa', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 ' '(KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36' }) responses.add(method=responses.GET, url=url_to_get, status=200, body=self.TEST_HTML) self.assertEqual( main.get_html(url_to_get=url_to_get, headers=headers, params=templates.CL_PARAMS, args=self.TEST_ARGS_NAMESPACE), self.TEST_HTML)
def get_max_page(url): html = get_html(url) soup = BeautifulSoup(html, 'lxml') pages = soup.find('ul', class_='pag_list').find( 'i', class_='fa fa-angle-right').parent.parent.previous return int(pages)