예제 #1
0
def web_scraper(page_id):
    """This function accepts the id,checks if it is within the list of ids in the database, and
    scrapes only 10 links on that particular link page"""
    all_ids = Pages(DB.connect()).select_id()
    new_all_id = [pid[0] for pid in all_ids]

    if page_id not in new_all_id:
        raise TypeError('Id does not exist.')

    else:
        url = Pages(DB.connect()).select_url(page_id)
        DB.pages().update(True, page_id)
        value = requests.get(url)
        soup = BeautifulSoup(value.text, 'html.parser')

        list_urls = []
        for link in soup.find_all('a', href=True):
          if link['href'].startswith('https'):
            list_urls.append(link['href'])

        new_list_urls = list_urls[:10]
        DB.links().delete_by_page_id(page_id)

        for item in new_list_urls:
            Links(DB.connect()).insert(page_id, item)

        DB.pages().update(False, page_id)
예제 #2
0
 def pages(cls):
   """
   Executes the SQL scripts for links table.
   :return
   None: Returns None.
   """
   return Pages(cls.connect())
예제 #3
0
class TestPages(unittest.TestCase):
    '''test that handle the page class'''
    def setUp(self) -> None:
        self.pages = Pages(DB.new_connect())

    def test_select(self):
        '''test for selection function in pages'''
        DB().setup()
        DB().seed()
        result = [(1, 'https://www.facebook.com'), (2, 'https://rb.gy/zd2xxz')]
        self.assertEqual(self.pages.select(), result)

    def test_fetch_url(self):
        '''test for fetch_url function in pages'''
        DB().setup()
        DB().seed()
        result = ('https://rb.gy/zd2xxz', )
        self.assertEqual(self.pages.fetch_url(2), result)

    def test_find(self):
        ''' test for find function in pages'''
        DB().setup()
        DB().seed()
        result = (1, 'https://www.facebook.com')
        self.assertEqual(self.pages.find(1)[:2], result)

    def test_update_id_true(self):
        '''test for update_id_true function in pages'''
        DB().setup()
        DB().seed()
        result = (1, 'https://www.facebook.com', True)
        self.assertEqual(self.pages.update_id_true(1)[:3], result)

    def test_update_id_false(self):
        '''test for update_id_false function in pages'''
        DB().setup()
        DB().seed()
        result = (1, 'https://www.facebook.com', False)
        self.assertEqual(self.pages.update_id_false(1)[:3], result)

    def tearDown(self) -> None:
        self.pages = None

    if __name__ == '__main__':
        unittest.main()
예제 #4
0
class MyTestPages(unittest.TestCase):
    """
  This class tests the various methods available to the Pages class.
  """
    def setUp(self) -> None:
        self.DB = Pages(DB.connect())

    def test_select(self):
        self.DB.select()
        self.assertIsNotNone(self.DB.select())

    def fetch(self):
        self.DB.fetch(2)
        self.assertIsNotNone(self.DB.fetch(2))

    def update(self):
        self.DB.update(True, 1)
        self.assertIsNotNone(self.DB.update(True, 1))

    def tearDown(self) -> None:
        self.DB = None
예제 #5
0
class TestDB(unittest.TestCase):
    def setUp(self) -> None:
        self.exec = Pages(DB.connect())

    def test_select(self):
        ''' Test selection  of entire pages table '''
        DB.setup()
        DB.seed()
        value = self.exec.select()
        self.assertIsNotNone(value)

    def test_select_urls(self):
        ''' Test selection of urls from pages table '''
        DB.setup()
        DB.seed()
        value = self.exec.select_urls()
        self.assertIsNotNone(value)

    def test_find_by_id(self):
        ''' Test selection of specific data from pages table by id '''
        DB.setup()
        DB.seed()
        value = self.exec.find_by_id(1)
        self.assertIsNotNone(value)
        self.assertEqual(type(value), tuple)

    def test_get_url(self):
        ''' Test selection of specific url by id'''
        DB.setup()
        DB.seed()
        value = self.exec.get_url(1)
        self.assertIsNotNone(value)
        self.assertEqual(type(value), tuple)

    def test_update_by_id(self):
        ''' Test update is_scraping value by id '''
        DB.setup()
        DB.seed()
        value = self.exec.update_by_id(False, 1)
        self.assertEqual(value, None)

    def test_delete_by_id(self):
        ''' Test selection from pages table '''
        DB.setup()
        DB.seed()
        value = self.exec.delete_by_id(1)
        self.assertEqual(value, None)

    def tearDown(self) -> None:
        self.exec = None
예제 #6
0
class TestPages(unittest.TestCase):
    """This class tests all the methods in pages.py"""
    def setUp(self) -> None:
        # set up the Pages class
        self.exec = Pages(DB.connect())

    def test_select(self):
        #  Test the select method of Pages class in pages.py
        result = self.exec.select()
        self.assertIsNotNone(result)

    def test_select_id(self):
        self.assertIsNotNone(self.exec.select_id())

    def test_select_url(self):
        # Test the select_url method of Pages class in pages.py
        self.assertIsNotNone(self.exec.select_url(1))

    def test_find(self):
        # Test the find method of Pages class in pages.py
        DB.seed()
        result = self.exec.find(2)
        self.assertIsNotNone(result)

    def test_find_url(self):
        # Test the find_url method of Pages class in pages.py
        DB.seed()
        self.assertIsNotNone(self.exec.find_url(1))

    def test_update(self):
        # Test the update method of Pages class in pages.py
        DB.seed()
        self.assertIsNone(self.exec.update(False, 1))

    def test_delete(self):
        # Test the delete method of Pages class in pages.py
        self.assertIsNone(self.exec.delete(2))

    def tearDown(self) -> None:
        # Tear down the Pages class after all tests
        self.exec = None
예제 #7
0
class TestPages(TestCase):
    # Test each and every method in the Pages class
    def setUp(self):
        """Setup all the necessary class and functions"""
        self.pages = Pages()
        self.conn_server = DB.only_server()
        self.conn = self.pages.connect()
        self.cursor = self.conn.cursor()

    def test_connect(self):
        """ Test connecting to postgresql server is successful """
        connection_object = self.conn
        self.assertIsNotNone(connection_object)

    def test_select(self):
        """Test select return all data from the database"""
        data = self.pages.select()
        self.assertIsNotNone(data)

    def test_find(self):
        """Test find data returns the data with the id provided"""
        data = self.pages.find(1)
        self.assertIsNotNone(data)
        self.assertEqual(type(data), tuple)

    def test_update(self):
        """Test data is updated by id with params and returned the updated data """
        data = 'True'
        updated_data = self.pages.update(data, 1)
        self.assertIsNone(updated_data)

    def test_delete(self):
        """Test data is deleted by id and returns none """
        deleted = self.pages.delete(1)
        self.assertEqual(deleted, None)

    def tearDown(self):
        """TearDown connections and delete all data created for testing purposes"""
        self.pages.close()
예제 #8
0
 def setUp(self) -> None:
     # set up the Pages class
     self.exec = Pages(DB.connect())
예제 #9
0
 def setUp(self) -> None:
     self.exec = Pages(DB.connect())
예제 #10
0
def task():
    return web_scraper(Pages(DB.connect()).find_url(1))
예제 #11
0
 def pages(cls):
   # Returns a reference to the pages interface
   conn = cls.connect()
   page = Pages(conn)
   return page
예제 #12
0
 def setUp(self):
     """Setup all the necessary class and functions"""
     self.pages = Pages()
     self.conn_server = DB.only_server()
     self.conn = self.pages.connect()
     self.cursor = self.conn.cursor()
예제 #13
0
 def pages(cls):
     # Returns a referslence to the pages interface
     result = cls.new_connect()
     pages = Pages(result)
     return pages
예제 #14
0
 def setUp(self) -> None:
     self.pages = Pages(DB.new_connect())
예제 #15
0
 def pages(cls):
     conn = cls.connect()
     return Pages(conn)