Example #1
0
class TestLinks(unittest.TestCase):
    """This class tests all the functions in the Links class in links.py"""
    def setUp(self) -> None:
        # Set up the Links class
        self.exec = Links(DB.connect())

    def test_insert(self):
        # Test the insert method of the Links class in links.py
        DB.setup()
        DB.seed()
        self.assertIsNone(self.exec.insert(1, 'https://facebook.com'))

    def test_select(self):
        # Test the select method of the Links class in links.py
        DB.setup()
        DB.seed()
        self.assertIsNotNone(self.exec.select())

    def test_find_with_page_id(self):
        # Test the find_with_page_id method of the Links class in links.py
        self.assertIsNone(self.exec.find_with_page_id(1))

    def test_delete_by_page_id(self):
        # Test the delete_by_page_id method of the Links class in links.py
        self.assertIsNone(self.exec.delete_by_page_id(1))

    def tearDown(self) -> None:
        # Tear down the Links class after all the tests
        self.exec = None
Example #2
0
class TestLinks(unittest.TestCase):
    ''' class that test for link class
  insert test for the insert function to insert extracted url
  '''
    def setUp(self) -> None:
        self.links = Links(DB.new_connect())

    def test_insert(self):
        DB().setup()
        DB.seed()
        self.assertEqual(self.links.insert(2, 'https://www.wikipedia.com'),
                         None)

    def test_select(self):
        # DB().setup()
        # DB.seed()
        # self.links.insert(2, 'https://www.wikipedia.com')
        # result = [(1, 'https://www.wikipedia.com')]
        self.assertIsNotNone(self.links.select(1), None)

    def test_delete(self):
        # DB.setup()
        # DB.seed()
        # self.links.insert(1,2, 'https://www.wikipedia.com')
        # self.links.delete(page_id)
        self.assertIsNone(self.links.delete(1), None)

    def tearDown(self) -> None:
        # self.links.delete()
        self.links = None
Example #3
0
def web_scraper(page_id):
    """This function accepts the id,checks if it is within the list of ids in the database, and
    scrapes only 10 links on that particular link page"""
    all_ids = Pages(DB.connect()).select_id()
    new_all_id = [pid[0] for pid in all_ids]

    if page_id not in new_all_id:
        raise TypeError('Id does not exist.')

    else:
        url = Pages(DB.connect()).select_url(page_id)
        DB.pages().update(True, page_id)
        value = requests.get(url)
        soup = BeautifulSoup(value.text, 'html.parser')

        list_urls = []
        for link in soup.find_all('a', href=True):
          if link['href'].startswith('https'):
            list_urls.append(link['href'])

        new_list_urls = list_urls[:10]
        DB.links().delete_by_page_id(page_id)

        for item in new_list_urls:
            Links(DB.connect()).insert(page_id, item)

        DB.pages().update(False, page_id)
Example #4
0
 def links(cls):
   """
   Executes the SQL scripts for links table.
   :return
   None: Returns None.
   """
   return Links(cls.connect())
Example #5
0
class MyTestLinks(unittest.TestCase):
  """
  This class tests the various methods available to the Links class.
  """
  def setUp(self) -> None:
    self.DB = Links(DB.connect())

  def test_select(self):
    self.assertIsNotNone(self.DB.select())

  def test_fetch(self):
    self.assertIsNotNone(self.DB.fetch())

  def test_insert(self):
    self.assertIsNotNone(self.DB.insert(2, 'https://rb.gy/zd2xxz'))

  def test_delete(self):
    self.assertIsNone(self.DB.delete(1))

  def tearDown(self) -> None:
    self.DB = None
class TestDB(unittest.TestCase):
    def setUp(self) -> None:
        self.exec = Links(DB.connect())

    def test_insert(self):
        ''' Test insert into links table '''
        DB.setup()
        DB.seed()
        value = self.exec.insert(1, 'https://www.google.com/')
        self.assertEqual(value, None)

    def test_select(self):
        ''' Test select from links table '''
        DB.setup()
        value = self.exec.select()
        self.assertIsNotNone(value)

    def test_select_by_id(self):
        ''' Test selection of specific data from links table by id '''
        DB.setup()
        DB.seed()
        self.exec.insert(1, 'https://www.google.com/')
        value = self.exec.select_by_id(1)
        self.assertIsNotNone(value)
        self.assertEqual(type(value), tuple)

    def test_select_by_page_id(self):
        ''' Test selection of specific data from links table by page_id '''
        DB.setup()
        value = self.exec.select_by_page_id(1)
        self.assertIsNotNone(value)
        self.assertEqual(type(value), list)

    def test_delete_by_id(self):
        ''' Test deletion of specific data in links table by id'''
        DB.setup()
        value = self.exec.delete_by_id(1)
        self.assertEqual(value, None)

    def test_delete_by_page_id(self):
        ''' Test deletion of specific data in links table by page_id'''
        DB.setup()
        DB.seed()
        value = self.exec.delete_by_page_id(1)
        self.assertEqual(value, None)

    def tearDown(self) -> None:
        self.exec = None
Example #7
0
class TestLinks(TestCase):
    # Test each and every method in the links class
    def setUp(self):
        """Setup all the necessary class and functions"""
        self.pages = Links()
        self.conn_server = DB.only_server()
        self.conn = self.links.connect()
        self.cursor = self.conn.cursor()

    def test_connect(self):
        """ Test connecting to postgresql server is successful """
        connection_object = self.conn
        self.assertIsNotNone(connection_object)

    def test_insert(self):
        """Test data provided is inserted into database"""
        inserted_data = self.pages.insert()
        self.assertIsNotNone(inserted_data)

    def test_select(self):
        """Test select return all data from the database"""
        data = self.links.select()
        self.assertIsNotNone(data)

    def test_find(self):
        """Test find data returns the data with the id provided"""
        data = self.links.find(1)
        self.assertIsNotNone(data)
        self.assertEqual(type(data), tuple)

    def test_update(self):
        """Test data is updated by id with params and returned the updated data """
        data = 'True'
        updated_data = self.links.update(data, 1)
        self.assertIsNone(updated_data)

    def test_delete(self):
        """Test data is deleted by id and returns none """
        deleted = self.links.delete(1)
        self.assertEqual(deleted, None)

    def tearDown(self):
        """TearDown connections and delete all data created for testing purposes"""
        self.links.close()
Example #8
0
def spider(page_id):
    ''' Takes a page id, selects the url linked to page id and runs the scraper
      Scraper takes url and returns a list of urls scraped,
      a maximum of 10 links are inserted into the database '''

    if type(page_id) != int or page_id == 0:
        raise ValueError('Page Id is not valid')

    get_url = DB.pages().get_url(page_id)

    if get_url is None:
        return ValueError('Page Id not found')

    else:
        url = get_url[0]
        all_links = []

        # set is_scraping to True where id == page_id
        DB.pages().update_by_id(True, page_id)

        res = requests.get(url)
        soup = BeautifulSoup(res.text, 'html.parser')

        for link in soup.find_all('a', href=True):

            if link['href'].startswith('http'):
                all_links.append(link['href'])

        # check if page id is in already in links table, delete all data with page id
        DB.links().delete_by_page_id(page_id)

        for link in all_links[:10]:
            # Insert each link into the links table
            Links(DB().connect()).insert(page_id, link)

        # set is_scraping to False in  where id == page_id
        DB.pages().update_by_id(False, page_id)
 def setUp(self) -> None:
     self.exec = Links(DB.connect())
Example #10
0
 def links(cls):
   # Returns a reference to the links interface
   conn = cls.connect()
   receive = cls.pages().select()
   link = Links(conn, receive)
   return link
Example #11
0
 def setUp(self) -> None:
     self.links = Links(DB.new_connect())
Example #12
0
 def links(cls):
     # Returns a reference to the links interface
     links = Links(cls.new_connect())
     return links
Example #13
0
 def setUp(self) -> None:
     # Set up the Links class
     self.exec = Links(DB.connect())
Example #14
0
 def links(cls):
     conn = cls.connect()
     return Links(conn)
Example #15
0
 def setUp(self):
     """Setup all the necessary class and functions"""
     self.pages = Links()
     self.conn_server = DB.only_server()
     self.conn = self.links.connect()
     self.cursor = self.conn.cursor()