def setUp(self): self.base = urlparse.urlparse('localhost') self.url_queue = Queue.Queue() self.html_queue = Queue.Queue() self.sqli_queue = Queue.Queue() self.visited_queue = Queue.Queue() self.forms_queue = Queue.Queue() self.worker = WorkThread(self.html_queue, self.url_queue, self.base, self.sqli_queue, self.forms_queue) self.html = "<form action='moo'><input type='text' name='input_box'></form><p>This is not a link</p><a href='vacaloca'>Moo</a><a href='/vacaloca?fail=1'>Moo</a><div id='href'>www.thisisalinknotinalink.com</div><a href='/vacaloca?fail=1&cat=2'>Moo</a><a href='/vacaloca?fail=1'>Moo</a><a href='http://localhost/vacaloca'>Moo</a><a href='/vacaloca'>Moo</a><h2>And also not this</h2><span>seriously, move on</span><a href='javascript:sillyjs()'>Click Me</a><a href='#datdiv'>DatDiv</a>"
def spawn_threads(self): worker = WorkThread(self.html_queue, self.url_queue, self.base, self.sqli_queue, self.forms_queue) worker.setDaemon(True) worker.start() scrapers = [] for i in range(5): t = ScrapeThread(self.url_queue, self.html_queue, self.visited_queue, self.proxy, self.proxy_port, worker) t.setDaemon(True) t.start() scrapers.append(t) while worker.isAlive(): self.update_status() sleep(0.1) sys.stdout.write("\rKillin Scrapers..........") sys.stdout.flush() for thread in scrapers: thread.join()