def web_scraper(page_id): """This function accepts the id,checks if it is within the list of ids in the database, and scrapes only 10 links on that particular link page""" all_ids = Pages(DB.connect()).select_id() new_all_id = [pid[0] for pid in all_ids] if page_id not in new_all_id: raise TypeError('Id does not exist.') else: url = Pages(DB.connect()).select_url(page_id) DB.pages().update(True, page_id) value = requests.get(url) soup = BeautifulSoup(value.text, 'html.parser') list_urls = [] for link in soup.find_all('a', href=True): if link['href'].startswith('https'): list_urls.append(link['href']) new_list_urls = list_urls[:10] DB.links().delete_by_page_id(page_id) for item in new_list_urls: Links(DB.connect()).insert(page_id, item) DB.pages().update(False, page_id)
def pages(cls): """ Executes the SQL scripts for links table. :return None: Returns None. """ return Pages(cls.connect())
class TestPages(unittest.TestCase): '''test that handle the page class''' def setUp(self) -> None: self.pages = Pages(DB.new_connect()) def test_select(self): '''test for selection function in pages''' DB().setup() DB().seed() result = [(1, 'https://www.facebook.com'), (2, 'https://rb.gy/zd2xxz')] self.assertEqual(self.pages.select(), result) def test_fetch_url(self): '''test for fetch_url function in pages''' DB().setup() DB().seed() result = ('https://rb.gy/zd2xxz', ) self.assertEqual(self.pages.fetch_url(2), result) def test_find(self): ''' test for find function in pages''' DB().setup() DB().seed() result = (1, 'https://www.facebook.com') self.assertEqual(self.pages.find(1)[:2], result) def test_update_id_true(self): '''test for update_id_true function in pages''' DB().setup() DB().seed() result = (1, 'https://www.facebook.com', True) self.assertEqual(self.pages.update_id_true(1)[:3], result) def test_update_id_false(self): '''test for update_id_false function in pages''' DB().setup() DB().seed() result = (1, 'https://www.facebook.com', False) self.assertEqual(self.pages.update_id_false(1)[:3], result) def tearDown(self) -> None: self.pages = None if __name__ == '__main__': unittest.main()
class MyTestPages(unittest.TestCase): """ This class tests the various methods available to the Pages class. """ def setUp(self) -> None: self.DB = Pages(DB.connect()) def test_select(self): self.DB.select() self.assertIsNotNone(self.DB.select()) def fetch(self): self.DB.fetch(2) self.assertIsNotNone(self.DB.fetch(2)) def update(self): self.DB.update(True, 1) self.assertIsNotNone(self.DB.update(True, 1)) def tearDown(self) -> None: self.DB = None
class TestDB(unittest.TestCase): def setUp(self) -> None: self.exec = Pages(DB.connect()) def test_select(self): ''' Test selection of entire pages table ''' DB.setup() DB.seed() value = self.exec.select() self.assertIsNotNone(value) def test_select_urls(self): ''' Test selection of urls from pages table ''' DB.setup() DB.seed() value = self.exec.select_urls() self.assertIsNotNone(value) def test_find_by_id(self): ''' Test selection of specific data from pages table by id ''' DB.setup() DB.seed() value = self.exec.find_by_id(1) self.assertIsNotNone(value) self.assertEqual(type(value), tuple) def test_get_url(self): ''' Test selection of specific url by id''' DB.setup() DB.seed() value = self.exec.get_url(1) self.assertIsNotNone(value) self.assertEqual(type(value), tuple) def test_update_by_id(self): ''' Test update is_scraping value by id ''' DB.setup() DB.seed() value = self.exec.update_by_id(False, 1) self.assertEqual(value, None) def test_delete_by_id(self): ''' Test selection from pages table ''' DB.setup() DB.seed() value = self.exec.delete_by_id(1) self.assertEqual(value, None) def tearDown(self) -> None: self.exec = None
class TestPages(unittest.TestCase): """This class tests all the methods in pages.py""" def setUp(self) -> None: # set up the Pages class self.exec = Pages(DB.connect()) def test_select(self): # Test the select method of Pages class in pages.py result = self.exec.select() self.assertIsNotNone(result) def test_select_id(self): self.assertIsNotNone(self.exec.select_id()) def test_select_url(self): # Test the select_url method of Pages class in pages.py self.assertIsNotNone(self.exec.select_url(1)) def test_find(self): # Test the find method of Pages class in pages.py DB.seed() result = self.exec.find(2) self.assertIsNotNone(result) def test_find_url(self): # Test the find_url method of Pages class in pages.py DB.seed() self.assertIsNotNone(self.exec.find_url(1)) def test_update(self): # Test the update method of Pages class in pages.py DB.seed() self.assertIsNone(self.exec.update(False, 1)) def test_delete(self): # Test the delete method of Pages class in pages.py self.assertIsNone(self.exec.delete(2)) def tearDown(self) -> None: # Tear down the Pages class after all tests self.exec = None
class TestPages(TestCase): # Test each and every method in the Pages class def setUp(self): """Setup all the necessary class and functions""" self.pages = Pages() self.conn_server = DB.only_server() self.conn = self.pages.connect() self.cursor = self.conn.cursor() def test_connect(self): """ Test connecting to postgresql server is successful """ connection_object = self.conn self.assertIsNotNone(connection_object) def test_select(self): """Test select return all data from the database""" data = self.pages.select() self.assertIsNotNone(data) def test_find(self): """Test find data returns the data with the id provided""" data = self.pages.find(1) self.assertIsNotNone(data) self.assertEqual(type(data), tuple) def test_update(self): """Test data is updated by id with params and returned the updated data """ data = 'True' updated_data = self.pages.update(data, 1) self.assertIsNone(updated_data) def test_delete(self): """Test data is deleted by id and returns none """ deleted = self.pages.delete(1) self.assertEqual(deleted, None) def tearDown(self): """TearDown connections and delete all data created for testing purposes""" self.pages.close()
def setUp(self) -> None: # set up the Pages class self.exec = Pages(DB.connect())
def setUp(self) -> None: self.exec = Pages(DB.connect())
def task(): return web_scraper(Pages(DB.connect()).find_url(1))
def pages(cls): # Returns a reference to the pages interface conn = cls.connect() page = Pages(conn) return page
def setUp(self): """Setup all the necessary class and functions""" self.pages = Pages() self.conn_server = DB.only_server() self.conn = self.pages.connect() self.cursor = self.conn.cursor()
def pages(cls): # Returns a referslence to the pages interface result = cls.new_connect() pages = Pages(result) return pages
def setUp(self) -> None: self.pages = Pages(DB.new_connect())
def pages(cls): conn = cls.connect() return Pages(conn)