Esempi in Python per BasicUserParseWorker.BasicUserParseWorker, esempi in Python per workers.basic_worker.BasicUserParseWorker.BasicUserParseWorker

Esempio n. 1

0

Mostra file

File: analytics.py Progetto: calebshortt/shillbot

    def init_training(self, shill_filepath, notshill_filepath):

        s_content = []
        with open(shill_filepath, 'r') as fs:
            s_content = fs.readlines()
        shill_targets = [x.strip() for x in s_content]

        notshill_targets = []
        ns_content = []
        if notshill_filepath:
            with open(notshill_filepath, 'r') as fns:
                ns_content = fns.readlines()

            notshill_targets = [x.strip() for x in ns_content]

        corpus = []
        for shill in shill_targets:
            worker = BasicUserParseWorker(shill)
            result, root = worker.run(training_label=LABEL_SHILL, local=True)
            corpus += result

        for notshill in notshill_targets:
            worker = BasicUserParseWorker(notshill)
            result, root = worker.run(training_label=LABEL_NOTSHILL,
                                      local=True)
            corpus += result

        self.train_classifier({'data': corpus})

Esempio n. 2

0

Mostra file

            def send_mother(self):

                worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

                data = worker.recv(self.buff_size)
                original_target = None
                send_to_mother(self, data, original_target)

Esempio n. 3

0

Mostra file

 def test_worker_max_links(self):
     """
     Purpose: Test the current links count is propperly set to 0 before running 
     :return:
     """
     worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
     self.assertEqual(0, worker.cur_links)

Esempio n. 4

0

Mostra file

    def test_worker_add_links_in_crawled(self):
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        worker.crawled = []

        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links(["https://www.reddit.com/user/Chrikelnel"])
        len_to_crawl_after = len(worker.to_crawl)

        self.assertEqual(len_to_crawl_after, len_to_crawl_before)




            def test_worker_contact(self):

                contact = handle_worker_contact(self, worker, address):

                self.assertRaises(ConnectionRefusedError, worker.run)

            def send_mother(self):

                worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

                data = worker.recv(self.buff_size)
                original_target = None
                send_to_mother(self, data, original_target)

Esempio n. 5

0

Mostra file

File: test_worker_basic.py Progetto: kulvirs/ShillBot

 def test_worker_invalid_links(self):
     """
     Purpose: Test running of Worker if it is given an invalid link to crawl (a link that returns 404).
     Expectation: WorkerException is raised.
     """
     #the following link: http://gdalskjfakl.com/ was invalid at the time this test was written
     worker = BasicUserParseWorker("http://gdalskjfakl.com/")
     self.assertRaises(WorkerException, worker.run)

Esempio n. 6

0

Mostra file

 def test_worker_url(self):
     """
     Purpose: Test the url of the worker, before the it is run
     :return:
     """
     worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
     self.assertEqual("https://www.reddit.com/user/Chrikelnel",
                      worker.to_crawl[0])

Esempio n. 7

0

Mostra file

File: test_worker_basic.py Progetto: CamSouthcott/ShillBot

    def test_worker_add_links_empty_list(self):
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

        before_links = len(worker.to_crawl)
        worker.add_links([])
        after_links = len(worker.to_crawl)

        self.assertEqual(before_links, after_links)

Esempio n. 8

0

Mostra file

File: test_worker_basic.py Progetto: CamSouthcott/SENG299A4

    def test_worker_add_links_in_crawled(self):
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        worker.crawled = []

        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links(["https://www.reddit.com/user/Chrikelnel"])
        len_to_crawl_after = len(worker.to_crawl)

        self.assertEqual(len_to_crawl_after, len_to_crawl_before)

Esempio n. 9

0

Mostra file

    def test_worker_improper_link(self):
    	"""
    	Purpose: Test that improper links raise exception.
    	Expectation: Startup system, fail to hit reddit user, raise exception.

    	:return:
    	"""
    	worker = BasicUserParseWorker("https://www.reddit.com /user/Chrikelnel")
    	self.assertRaises(WorkerException, worker.run)

Esempio n. 10

0

Mostra file

File: test_worker_basic.py Progetto: devlyn37/Seng299Assignment3

    def test_worker_add_links(self):
        worker = None
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links("test.com")
        len_to_crawl_after = len(worker.to_crawl)

        self.assertGreater(len_to_crawl_after, len_to_crawl_before)

Esempio n. 11

0

Mostra file

File: test_worker_basic.py Progetto: amanster82/SENG299_A4

    def test_worker_clears_variables(self):
        worker = BasicUserParseWorker(None)
        empty = []
        len_of_empty = len(empty)

        worker.to_crawl = []
        len_to_crawl = len(worker.to_crawl)

        self.assertEqual(len_of_empty, len_to_crawl)

Esempio n. 12

0

Mostra file

    def test_worker_link_delay(self):
        """
        Purpose: Test test the link_delay default value is set correctly

        :return:
        """
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

        self.assertEqual(0.25, worker.link_delay)

Esempio n. 13

0

Mostra file

File: test_worker_basic.py Progetto: rtullybarr/shillbot

    def test_worker_fails_on_nonexistent_page(self):
        """
        Purpose: Test failure handling of worker.
        Expectation: worker raises exception when given a url that returns a non-200 response / no response.

        :precondition: non_a_proper_url does not resolve
        :return:
        """
        worker = BasicUserParseWorker("not_a_proper_url")
        self.assertRaises(WorkerException, worker.run)

Esempio n. 14

0

Mostra file

    def test_worker_duplicate_links(self):
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        worker.crawled = []
        len_initial = len(worker.to_crawl)

        worker.crawled.append("https://www.reddit.com/user/Chrikelnel")
        worker.add_links(["https://www.reddit.com/user/Chrikelnel"])
        len_after_adding_duplicate = len(worker.to_crawl)

        self.assertEqual(len_after_adding_duplicate, len_initial)

Esempio n. 15

0

Mostra file

    def test_URL_reachabillity(self):
        """
        Purpose: Test error if URL not reached
        Expectation: startup system, Fail to find user (exception)
        :precondition: URL not reachable
        :return:
        """
        worker = BasicUserParseWorker("https://www.reddit.com/user/badLinkBadUser")

        # Can't reach url
        self.assertRaises(IOError, worker.run)

Esempio n. 16

0

Mostra file

    def test_worker_adding_new_links(self):
    	"""
    	Purpose: Test adding new links to the to_crawl list.
    	Expectations: New link is added to to_crawl list and length of list increases.

    	:return:
    	"""
    	worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

    	len_before = len(worker.to_crawl)
    	worker.add_links("https://www.reddit.com/user/Groggen2")
    	self.assertGreater(len(worker.to_crawl), len_before)

Esempio n. 17

0

Mostra file

    def test_worked_cannot_add_already_crawled_links(self):
        """
        adding a link that has already been crawled does not change the to_crawl length
        """
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        worker.crawled = []

        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links(["https://www.reddit.com/user/Chrikelnel"])
        len_to_crawl_after = len(worker.to_crawl)

        self.assertEqual(len_to_crawl_after, len_to_crawl_before + 1)

Esempio n. 18

0

Mostra file

File: test_worker_basic.py Progetto: CamSouthcott/SENG299A4

    def test_basic_worker_connection(self):
        """
        Purpose: Test regular running of worker
        Expectation: startup system, hit the reddit user and parse the data, fail to send to mothership (exception)

        :precondition: Mothership server not running
        :return:
        """
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

        # Can't connect to mother, so should raise ConnectionRefusedError, but should run everything else
        self.assertRaises(ConnectionRefusedError, worker.run)

Esempio n. 19

0

Mostra file

File: test_worker_basic.py Progetto: tristanp2/299Assignment4

    def test_worker_add_links_max_limit(self):
        worker = None
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

        worker.max_links = 0
        before = worker.to_crawl[:]
        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links("test.com")
        after = worker.to_crawl[:]
        len_to_crawl_after = len(worker.to_crawl)

        self.assertEqual(before, after)

Esempio n. 20

0

Mostra file

    def test_worker_add_links_under_max_limit(self):
        worker = None
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

        worker.max_links = 7  # max_links = 7 now
        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links("test.com")
        len_to_crawl_after = len(worker.to_crawl)

        self.assertNotEqual(
            len_to_crawl_after,
            len_to_crawl_before)  # Check that add_links adds links successfuly

Esempio n. 21

0

Mostra file

    def test_worker_parsing_next_page(self):
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        file_path = '%s/%s' % (os.path.dirname(os.path.realpath(__file__)),
                               'test_resources/sample_GET_response.html')

        with codecs.open(file_path, encoding='utf-8') as f:
            text = f.read()

        results, next_page = worker.parse_text(
            str(text).strip().replace('\r\n', ''))

        self.assertIsNotNone(next_page)
        self.assertGreater(len(next_page), 0)

Esempio n. 22

0

Mostra file

File: test_worker_basic.py Progetto: MasterMcdonald/SENG299_A4

    def test_zelan_test_two(self):
        """
        this test is to test if the added link in list
        """
        worker = None
        worker = BasicUserParseWorker("https://www.reddit.com")
        worker.crawled = []

        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links(["https://www.reddit.com/user/Chrikelnel"])

        self.assertIn("https://www.reddit.com/user/Chrikelnel",
                      worker.to_crawl)

Esempio n. 23

0

Mostra file

    def test_worker_add_results components(self):
		#test if all three are properly added to results
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        file_path = '%s/%s' % (os.path.dirname(os.path.realpath(__file__)), 'test_resources/sample_GET_response.html')

        with codecs.open(file_path, encoding='utf-8') as f:
            text = f.read()

        results, next_page = worker.parse_text(str(text).strip().replace('\r\n', ''))

        self.assertGreater(len(results[0]), 0)
        self.assertGreater(len(results[1]), 0)
        self.assertGreater(len(results[2]), 0)

Esempio n. 24

0

Mostra file

File: test_worker_basic.py Progetto: amanster82/SENG299_A4

    def add_multiple_links(self):
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        worker.crawled = []

        len_to_crawl_before = len(worker.to_crawl)

        worker.add_links([
            "https://www.reddit.com/user/Chrikelnel", "https://www.google.ca",
            "https://hotmail.com"
        ])
        len_to_crawl_after = len(worker.to_crawl)

        self.assertEqual(len_to_crawl_after, len_to_crawl_before + 2)

Esempio n. 25

0

Mostra file

    def test_worker_adding_duplicate_links(self):
    	"""
    	Purpose: Test adding duplicate links to the to_crawl list. (Fixed version of above code provided by Caleb Shortt)
    	Expectation: Link is not added to to_crawl list and length of list remains the same. 

    	:return:
    	"""
    	worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

    	worker.run
    	duplicate = "https://www.reddit.com/user/Chrikelnel"
    	worker.add_links("https://www.reddit.com/user/Chrikelnel")
    	if duplicate not in worker.to_crawl:
    		self.assertTrue(True)

Esempio n. 26

0

Mostra file

File: test_worker_basic.py Progetto: MasterMcdonald/SENG299_A4

    def test_zelan_test_four(self):
        """
        this test is to test if the added two links in list, which the two links are not same
        """
        worker = None
        worker = BasicUserParseWorker("https://www.reddit.com")
        worker.crawled = []

        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links([
            "https://www.reddit.com/user/Chrikelnel", "https://www.google.ca"
        ])
        len_to_crawl_after = len(worker.to_crawl)
        self.assertEqual(len_to_crawl_after, len_to_crawl_before + 2)

Esempio n. 27

0

Mostra file

File: test_worker_basic.py Progetto: MasterMcdonald/SENG299_A4

    def test_zelan_test_one(self):
        """
        this test is to test if add one to worker correctly
        """
        worker = None
        worker = BasicUserParseWorker("https://www.reddit.com")
        worker.crawled = []
        worker.max_links = 5

        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links(["https://www.reddit.com/user/Chrikelnel"])
        len_to_crawl_after = len(worker.to_crawl)

        self.assertEqual(len_to_crawl_after, len_to_crawl_before + 1)

Esempio n. 28

0

Mostra file

    def test_worker_add_links_in_crawled(self):
        """
        calling add_links() with one link on a worker increases the to_crawl length by 1

        This unit test was partially implemented in class but was broken
        It is now fixed
        """
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        worker.crawled = []

        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links(["https://www.reddit.com/user/GallowBoob"])
        len_to_crawl_after = len(worker.to_crawl)

        self.assertEqual(len_to_crawl_after, len_to_crawl_before + 1)

Esempio n. 29

0

Mostra file

    def test_worker_cannot_add_duplicate_links(self):
        """
        calling add_links() with two identical links only adds 1 link
        """
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        worker.crawled = []

        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links([
            "https://www.reddit.com/user/GallowBoob",
            "https://www.reddit.com/user/GallowBoob"
        ])
        len_to_crawl_after = len(worker.to_crawl)

        self.assertEqual(len_to_crawl_after, len_to_crawl_before + 1)

Esempio n. 30

0

Mostra file

    def test_worker_parsing(self):
        """
        Purpose: Test regular parsing mechanisms of worker
        Expectation: Load html file, send it to worker to parse, should return list of results

        :return:
        """
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        file_path = '%s/%s' % (os.path.dirname(os.path.realpath(__file__)), 'test_resources/sample_GET_response.html')

        with codecs.open(file_path, encoding='utf-8') as f:
            text = f.read()

        results, next_page = worker.parse_text(str(text).strip().replace('\r\n', ''))

        self.assertGreater(len(results), 0)     # Check that results are returned
        self.assertEqual(len(results[0]), 3)    # Check that results are in triplets (check formatting)