Ejemplo n.º 1
0
 def setUp(self):
     self.base = urlparse.urlparse('localhost')
     self.url_queue = Queue.Queue()
     self.html_queue = Queue.Queue()
     self.sqli_queue = Queue.Queue()
     self.visited_queue = Queue.Queue()
     self.forms_queue = Queue.Queue()
     self.worker = WorkThread(self.html_queue, self.url_queue, self.base,
                              self.sqli_queue, self.forms_queue)
     self.html = "<form action='moo'><input type='text' name='input_box'></form><p>This is not a link</p><a href='vacaloca'>Moo</a><a href='/vacaloca?fail=1'>Moo</a><div id='href'>www.thisisalinknotinalink.com</div><a href='/vacaloca?fail=1&cat=2'>Moo</a><a href='/vacaloca?fail=1'>Moo</a><a href='http://localhost/vacaloca'>Moo</a><a href='/vacaloca'>Moo</a><h2>And also not this</h2><span>seriously, move on</span><a href='javascript:sillyjs()'>Click Me</a><a href='#datdiv'>DatDiv</a>"
Ejemplo n.º 2
0
    def spawn_threads(self):

        worker = WorkThread(self.html_queue, self.url_queue, self.base,
                            self.sqli_queue, self.forms_queue)
        worker.setDaemon(True)
        worker.start()

        scrapers = []
        for i in range(5):
            t = ScrapeThread(self.url_queue, self.html_queue,
                             self.visited_queue, self.proxy, self.proxy_port,
                             worker)
            t.setDaemon(True)
            t.start()
            scrapers.append(t)

        while worker.isAlive():
            self.update_status()
            sleep(0.1)

        sys.stdout.write("\rKillin Scrapers..........")
        sys.stdout.flush()
        for thread in scrapers:
            thread.join()