Exemple #1
0
            def send_mother(self):

                worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

                data = worker.recv(self.buff_size)
                original_target = None
                send_to_mother(self, data, original_target)
Exemple #2
0
    def test_worker_add_links_in_crawled(self):
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        worker.crawled = []

        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links(["https://www.reddit.com/user/Chrikelnel"])
        len_to_crawl_after = len(worker.to_crawl)

        self.assertEqual(len_to_crawl_after, len_to_crawl_before)




            def test_worker_contact(self):

                contact = handle_worker_contact(self, worker, address):

                self.assertRaises(ConnectionRefusedError, worker.run)

            def send_mother(self):

                worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

                data = worker.recv(self.buff_size)
                original_target = None
                send_to_mother(self, data, original_target)
    def test_worker_add_links_empty_list(self):
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

        before_links = len(worker.to_crawl)
        worker.add_links([])
        after_links = len(worker.to_crawl)

        self.assertEqual(before_links, after_links)
    def test_worker_clears_variables(self):
        worker = BasicUserParseWorker(None)
        empty = []
        len_of_empty = len(empty)

        worker.to_crawl = []
        len_to_crawl = len(worker.to_crawl)

        self.assertEqual(len_of_empty, len_to_crawl)
    def test_worker_add_links_in_crawled(self):
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        worker.crawled = []

        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links(["https://www.reddit.com/user/Chrikelnel"])
        len_to_crawl_after = len(worker.to_crawl)

        self.assertEqual(len_to_crawl_after, len_to_crawl_before)
    def test_worker_add_links(self):
        worker = None
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links("test.com")
        len_to_crawl_after = len(worker.to_crawl)

        self.assertGreater(len_to_crawl_after, len_to_crawl_before)
Exemple #7
0
    def test_worker_duplicate_links(self):
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        worker.crawled = []
        len_initial = len(worker.to_crawl)

        worker.crawled.append("https://www.reddit.com/user/Chrikelnel")
        worker.add_links(["https://www.reddit.com/user/Chrikelnel"])
        len_after_adding_duplicate = len(worker.to_crawl)

        self.assertEqual(len_after_adding_duplicate, len_initial)
Exemple #8
0
    def test_worked_cannot_add_already_crawled_links(self):
        """
        adding a link that has already been crawled does not change the to_crawl length
        """
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        worker.crawled = []

        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links(["https://www.reddit.com/user/Chrikelnel"])
        len_to_crawl_after = len(worker.to_crawl)

        self.assertEqual(len_to_crawl_after, len_to_crawl_before + 1)
Exemple #9
0
    def test_worker_adding_new_links(self):
    	"""
    	Purpose: Test adding new links to the to_crawl list.
    	Expectations: New link is added to to_crawl list and length of list increases.

    	:return:
    	"""
    	worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

    	len_before = len(worker.to_crawl)
    	worker.add_links("https://www.reddit.com/user/Groggen2")
    	self.assertGreater(len(worker.to_crawl), len_before)
    def test_worker_add_links_max_limit(self):
        worker = None
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

        worker.max_links = 0
        before = worker.to_crawl[:]
        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links("test.com")
        after = worker.to_crawl[:]
        len_to_crawl_after = len(worker.to_crawl)

        self.assertEqual(before, after)
Exemple #11
0
    def test_worker_add_links_under_max_limit(self):
        worker = None
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

        worker.max_links = 7  # max_links = 7 now
        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links("test.com")
        len_to_crawl_after = len(worker.to_crawl)

        self.assertNotEqual(
            len_to_crawl_after,
            len_to_crawl_before)  # Check that add_links adds links successfuly
    def test_zelan_test_two(self):
        """
        this test is to test if the added link in list
        """
        worker = None
        worker = BasicUserParseWorker("https://www.reddit.com")
        worker.crawled = []

        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links(["https://www.reddit.com/user/Chrikelnel"])

        self.assertIn("https://www.reddit.com/user/Chrikelnel",
                      worker.to_crawl)
Exemple #13
0
    def test_worker_add_results components(self):
		#test if all three are properly added to results
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        file_path = '%s/%s' % (os.path.dirname(os.path.realpath(__file__)), 'test_resources/sample_GET_response.html')

        with codecs.open(file_path, encoding='utf-8') as f:
            text = f.read()

        results, next_page = worker.parse_text(str(text).strip().replace('\r\n', ''))

        self.assertGreater(len(results[0]), 0)
        self.assertGreater(len(results[1]), 0)
        self.assertGreater(len(results[2]), 0)
    def add_multiple_links(self):
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        worker.crawled = []

        len_to_crawl_before = len(worker.to_crawl)

        worker.add_links([
            "https://www.reddit.com/user/Chrikelnel", "https://www.google.ca",
            "https://hotmail.com"
        ])
        len_to_crawl_after = len(worker.to_crawl)

        self.assertEqual(len_to_crawl_after, len_to_crawl_before + 2)
Exemple #15
0
    def test_worker_parsing_next_page(self):
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        file_path = '%s/%s' % (os.path.dirname(os.path.realpath(__file__)),
                               'test_resources/sample_GET_response.html')

        with codecs.open(file_path, encoding='utf-8') as f:
            text = f.read()

        results, next_page = worker.parse_text(
            str(text).strip().replace('\r\n', ''))

        self.assertIsNotNone(next_page)
        self.assertGreater(len(next_page), 0)
    def test_zelan_test_four(self):
        """
        this test is to test if the added two links in list, which the two links are not same
        """
        worker = None
        worker = BasicUserParseWorker("https://www.reddit.com")
        worker.crawled = []

        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links([
            "https://www.reddit.com/user/Chrikelnel", "https://www.google.ca"
        ])
        len_to_crawl_after = len(worker.to_crawl)
        self.assertEqual(len_to_crawl_after, len_to_crawl_before + 2)
Exemple #17
0
    def test_worker_adding_duplicate_links(self):
    	"""
    	Purpose: Test adding duplicate links to the to_crawl list. (Fixed version of above code provided by Caleb Shortt)
    	Expectation: Link is not added to to_crawl list and length of list remains the same. 

    	:return:
    	"""
    	worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

    	worker.run
    	duplicate = "https://www.reddit.com/user/Chrikelnel"
    	worker.add_links("https://www.reddit.com/user/Chrikelnel")
    	if duplicate not in worker.to_crawl:
    		self.assertTrue(True)
Exemple #18
0
    def test_worker_cannot_add_duplicate_links(self):
        """
        calling add_links() with two identical links only adds 1 link
        """
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        worker.crawled = []

        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links([
            "https://www.reddit.com/user/GallowBoob",
            "https://www.reddit.com/user/GallowBoob"
        ])
        len_to_crawl_after = len(worker.to_crawl)

        self.assertEqual(len_to_crawl_after, len_to_crawl_before + 1)
Exemple #19
0
    def test_worker_add_links_in_crawled(self):
        """
        calling add_links() with one link on a worker increases the to_crawl length by 1

        This unit test was partially implemented in class but was broken
        It is now fixed
        """
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        worker.crawled = []

        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links(["https://www.reddit.com/user/GallowBoob"])
        len_to_crawl_after = len(worker.to_crawl)

        self.assertEqual(len_to_crawl_after, len_to_crawl_before + 1)
Exemple #20
0
 def test_worker_max_links(self):
     """
     Purpose: Test the current links count is propperly set to 0 before running 
     :return:
     """
     worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
     self.assertEqual(0, worker.cur_links)
Exemple #21
0
    def test_worker_crawl_links(self):
        """
        Purpose: Test if worker.to_crawl and worker.crawled are updated correctly after links are crawled
        Expectation: Once all links are crawled, len_to_crawl should be 0 and len crawled should be equal to number of links
        """
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        worker.crawled = []
        num_links_to_crawl = len(worker.to_crawl)
        len_crawled_before = len(worker.crawled)

        self.assertRaises(ConnectionRefusedError, worker.run)
        len_to_crawl_after = len(worker.to_crawl)
        len_crawled_after = len(worker.crawled)

        self.assertEqual(len_to_crawl_after, 0)
        self.assertEqual(len_crawled_before + num_links_to_crawl,
                         len_crawled_after)
Exemple #22
0
 def test_worker_invalid_links(self):
     """
     Purpose: Test running of Worker if it is given an invalid link to crawl (a link that returns 404).
     Expectation: WorkerException is raised.
     """
     #the following link: http://gdalskjfakl.com/ was invalid at the time this test was written
     worker = BasicUserParseWorker("http://gdalskjfakl.com/")
     self.assertRaises(WorkerException, worker.run)
Exemple #23
0
    def test_worker_connection(self):
        """
            Purpose: Test regular running of worker
            Expectation: startup system, hit the reddit user and parse the data, send to mothership

            :precondition: Mothership server running
            :return:
            """
        server = MothershipServer()
        server.run()
        try:
            worker = BasicUserParseWorker(
                "https://www.reddit.com/user/alecnin")
            worker.send_to_mother()
        # connect to mother, so should not raise exception, should run everything else
        except ConnectionRefusedError:
            self.fail('connection failure')
Exemple #24
0
    def test_worker_parsing(self):
        """
        Purpose: Test regular parsing mechanisms of worker
        Expectation: Load html file, send it to worker to parse, should return list of results

        :return:
        """
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        file_path = '%s/%s' % (os.path.dirname(os.path.realpath(__file__)), 'test_resources/sample_GET_response.html')

        with codecs.open(file_path, encoding='utf-8') as f:
            text = f.read()

        results, next_page = worker.parse_text(str(text).strip().replace('\r\n', ''))

        self.assertGreater(len(results), 0)     # Check that results are returned
        self.assertEqual(len(results[0]), 3)    # Check that results are in triplets (check formatting)
Exemple #25
0
 def test_worker_url(self):
     """
     Purpose: Test the url of the worker, before the it is run
     :return:
     """
     worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
     self.assertEqual("https://www.reddit.com/user/Chrikelnel",
                      worker.to_crawl[0])
Exemple #26
0
    def test_worker_parsing_results_not_empty(self):
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        file_path = '%s/%s' % (os.path.dirname(os.path.realpath(__file__)),
                               'test_resources/sample_GET_response.html')

        with codecs.open(file_path, encoding='utf-8') as f:
            text = f.read()

        results, next_page = worker.parse_text(
            str(text).strip().replace('\r\n', ''))

        self.assertIsNotNone(
            results)  # Check if results were created and returned
        self.assertTrue(
            len(results) > 0)  # Check if number of results is positive
        self.assertIs(type(results), type([]))  # Check if results is a list
        self.assertNotEqual(results[0],
                            ())  # Check if results value is not an empty tuple
Exemple #27
0
    def test_worker_add_links_list(self):
        """
        Purpose: Test adding a list of links to worker to_crawl, with duplicate links in the list
        Expectation: The size of to_crawl increases by the size of the unique items in the list (which is 3 in this test)
        """
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        len_to_crawl_before = len(worker.to_crawl)

        li = [
            "https://www.reddit.com/user/Chrikelnel/comments/",
            "https://www.reddit.com/user/Chrikelnel/submitted/",
            "https://www.reddit.com/user/Chrikelnel/gilded/",
            "https://www.reddit.com/user/Chrikelnel/comments/"
        ]
        worker.add_links(li)
        len_to_crawl_after = len(worker.to_crawl)

        self.assertEqual(len_to_crawl_before + 3, len_to_crawl_after)
Exemple #28
0
    def test_worker_improper_link(self):
    	"""
    	Purpose: Test that improper links raise exception.
    	Expectation: Startup system, fail to hit reddit user, raise exception.

    	:return:
    	"""
    	worker = BasicUserParseWorker("https://www.reddit.com /user/Chrikelnel")
    	self.assertRaises(WorkerException, worker.run)
Exemple #29
0
    def test_worker_link_delay(self):
        """
        Purpose: Test test the link_delay default value is set correctly

        :return:
        """
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

        self.assertEqual(0.25, worker.link_delay)
    def test_worker_fails_on_nonexistent_page(self):
        """
        Purpose: Test failure handling of worker.
        Expectation: worker raises exception when given a url that returns a non-200 response / no response.

        :precondition: non_a_proper_url does not resolve
        :return:
        """
        worker = BasicUserParseWorker("not_a_proper_url")
        self.assertRaises(WorkerException, worker.run)