Ejemplo n.º 1
0
    def test_worker_add_links_in_crawled(self):
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        worker.crawled = []

        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links(["https://www.reddit.com/user/Chrikelnel"])
        len_to_crawl_after = len(worker.to_crawl)

        self.assertEqual(len_to_crawl_after, len_to_crawl_before)




            def test_worker_contact(self):

                contact = handle_worker_contact(self, worker, address):

                self.assertRaises(ConnectionRefusedError, worker.run)

            def send_mother(self):

                worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

                data = worker.recv(self.buff_size)
                original_target = None
                send_to_mother(self, data, original_target)
Ejemplo n.º 2
0
    def test_worker_add_links_empty_list(self):
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

        before_links = len(worker.to_crawl)
        worker.add_links([])
        after_links = len(worker.to_crawl)

        self.assertEqual(before_links, after_links)
Ejemplo n.º 3
0
    def test_worker_add_links_in_crawled(self):
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        worker.crawled = []

        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links(["https://www.reddit.com/user/Chrikelnel"])
        len_to_crawl_after = len(worker.to_crawl)

        self.assertEqual(len_to_crawl_after, len_to_crawl_before)
    def test_worker_add_links(self):
        worker = None
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links("test.com")
        len_to_crawl_after = len(worker.to_crawl)

        self.assertGreater(len_to_crawl_after, len_to_crawl_before)
Ejemplo n.º 5
0
    def test_worker_duplicate_links(self):
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        worker.crawled = []
        len_initial = len(worker.to_crawl)

        worker.crawled.append("https://www.reddit.com/user/Chrikelnel")
        worker.add_links(["https://www.reddit.com/user/Chrikelnel"])
        len_after_adding_duplicate = len(worker.to_crawl)

        self.assertEqual(len_after_adding_duplicate, len_initial)
Ejemplo n.º 6
0
    def test_worker_adding_new_links(self):
    	"""
    	Purpose: Test adding new links to the to_crawl list.
    	Expectations: New link is added to to_crawl list and length of list increases.

    	:return:
    	"""
    	worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

    	len_before = len(worker.to_crawl)
    	worker.add_links("https://www.reddit.com/user/Groggen2")
    	self.assertGreater(len(worker.to_crawl), len_before)
Ejemplo n.º 7
0
    def test_worker_add_links_max_limit(self):
        worker = None
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

        worker.max_links = 0
        before = worker.to_crawl[:]
        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links("test.com")
        after = worker.to_crawl[:]
        len_to_crawl_after = len(worker.to_crawl)

        self.assertEqual(before, after)
Ejemplo n.º 8
0
    def test_worker_add_links_under_max_limit(self):
        worker = None
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

        worker.max_links = 7  # max_links = 7 now
        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links("test.com")
        len_to_crawl_after = len(worker.to_crawl)

        self.assertNotEqual(
            len_to_crawl_after,
            len_to_crawl_before)  # Check that add_links adds links successfuly
Ejemplo n.º 9
0
    def test_worked_cannot_add_already_crawled_links(self):
        """
        adding a link that has already been crawled does not change the to_crawl length
        """
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        worker.crawled = []

        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links(["https://www.reddit.com/user/Chrikelnel"])
        len_to_crawl_after = len(worker.to_crawl)

        self.assertEqual(len_to_crawl_after, len_to_crawl_before + 1)
Ejemplo n.º 10
0
    def test_zelan_test_two(self):
        """
        this test is to test if the added link in list
        """
        worker = None
        worker = BasicUserParseWorker("https://www.reddit.com")
        worker.crawled = []

        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links(["https://www.reddit.com/user/Chrikelnel"])

        self.assertIn("https://www.reddit.com/user/Chrikelnel",
                      worker.to_crawl)
Ejemplo n.º 11
0
    def add_multiple_links(self):
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        worker.crawled = []

        len_to_crawl_before = len(worker.to_crawl)

        worker.add_links([
            "https://www.reddit.com/user/Chrikelnel", "https://www.google.ca",
            "https://hotmail.com"
        ])
        len_to_crawl_after = len(worker.to_crawl)

        self.assertEqual(len_to_crawl_after, len_to_crawl_before + 2)
Ejemplo n.º 12
0
    def test_zelan_test_one(self):
        """
        this test is to test if add one to worker correctly
        """
        worker = None
        worker = BasicUserParseWorker("https://www.reddit.com")
        worker.crawled = []
        worker.max_links = 5

        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links(["https://www.reddit.com/user/Chrikelnel"])
        len_to_crawl_after = len(worker.to_crawl)

        self.assertEqual(len_to_crawl_after, len_to_crawl_before + 1)
Ejemplo n.º 13
0
    def test_worker_adding_duplicate_links(self):
    	"""
    	Purpose: Test adding duplicate links to the to_crawl list. (Fixed version of above code provided by Caleb Shortt)
    	Expectation: Link is not added to to_crawl list and length of list remains the same. 

    	:return:
    	"""
    	worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

    	worker.run
    	duplicate = "https://www.reddit.com/user/Chrikelnel"
    	worker.add_links("https://www.reddit.com/user/Chrikelnel")
    	if duplicate not in worker.to_crawl:
    		self.assertTrue(True)
Ejemplo n.º 14
0
    def test_zelan_test_four(self):
        """
        this test is to test if the added two links in list, which the two links are not same
        """
        worker = None
        worker = BasicUserParseWorker("https://www.reddit.com")
        worker.crawled = []

        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links([
            "https://www.reddit.com/user/Chrikelnel", "https://www.google.ca"
        ])
        len_to_crawl_after = len(worker.to_crawl)
        self.assertEqual(len_to_crawl_after, len_to_crawl_before + 2)
Ejemplo n.º 15
0
    def test_worker_add_links_in_crawled(self):
        """
        calling add_links() with one link on a worker increases the to_crawl length by 1

        This unit test was partially implemented in class but was broken
        It is now fixed
        """
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        worker.crawled = []

        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links(["https://www.reddit.com/user/GallowBoob"])
        len_to_crawl_after = len(worker.to_crawl)

        self.assertEqual(len_to_crawl_after, len_to_crawl_before + 1)
Ejemplo n.º 16
0
    def test_worker_cannot_add_duplicate_links(self):
        """
        calling add_links() with two identical links only adds 1 link
        """
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        worker.crawled = []

        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links([
            "https://www.reddit.com/user/GallowBoob",
            "https://www.reddit.com/user/GallowBoob"
        ])
        len_to_crawl_after = len(worker.to_crawl)

        self.assertEqual(len_to_crawl_after, len_to_crawl_before + 1)
Ejemplo n.º 17
0
    def test_worker_add_links(self):
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

        worker.max_links = 10

        for i in range(1, 5):
            worker.add_links(['link' + str(i)])

        self.assertEqual(len(worker.to_crawl), 5)

        links_list = []
        for i in range(5, 10):
            links_list.append('link' + str(i))

        worker.add_links(links_list)

        self.assertEqual(len(worker.to_crawl), 10)
Ejemplo n.º 18
0
    def test_worker_add_links_list(self):
        """
        Purpose: Test adding a list of links to worker to_crawl, with duplicate links in the list
        Expectation: The size of to_crawl increases by the size of the unique items in the list (which is 3 in this test)
        """
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        len_to_crawl_before = len(worker.to_crawl)

        li = [
            "https://www.reddit.com/user/Chrikelnel/comments/",
            "https://www.reddit.com/user/Chrikelnel/submitted/",
            "https://www.reddit.com/user/Chrikelnel/gilded/",
            "https://www.reddit.com/user/Chrikelnel/comments/"
        ]
        worker.add_links(li)
        len_to_crawl_after = len(worker.to_crawl)

        self.assertEqual(len_to_crawl_before + 3, len_to_crawl_after)
Ejemplo n.º 19
0
    def test_worker_max_links(self):

        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

        worker.add_links(["https://www.reddit.com/user/Chrikelnel"])
        worker.add_links(["https://www.reddit.com/user/Chrikelnel1"])
        worker.add_links(["https://www.reddit.com/user/Chrikelnel2"])
        worker.add_links(["https://www.reddit.com/user/Chrikelnel3"])
        worker.add_links(["https://www.reddit.com/user/Chrikelnel4"])
        worker.add_links(["https://www.reddit.com/user/Chrikelnel5"])
        worker.add_links(["https://www.reddit.com/user/Chrikelnel6"])
        worker.add_links(["https://www.reddit.com/user/Chrikelnel7"])
        worker.add_links(["https://www.reddit.com/user/Chrikelnel8"])
        worker.add_links(["https://www.reddit.com/user/Chrikelnel9"])
        worker.add_links(["https://www.reddit.com/user/Chrikelnel10"])
        worker.add_links(["https://www.reddit.com/user/Chrikelnel11"])
        worker.add_links(["https://www.reddit.com/user/Chrikelnel12"])
        worker.add_links(["https://www.reddit.com/user/Chrikelnel13"])
        worker.add_links(["https://www.reddit.com/user/Chrikelnel14"])
        worker.add_links(["https://www.reddit.com/user/Chrikelnel15"])
        len_after = len(worker.crawled)
        self.assertNotEqual(len_after, 10)