def get_a_task(self): try: task = self.queue.get(block=True, timeout=self.timeout) self.process(task) self.queue.task_done() Console.body(task) return True except Queue.Empty: return False
def get_a_task(self, alived): try: task = self.queue.get(False) if task: self.process(task) self.queue.task_done() Console.body(task) else: print("no task") return True except Queue.Empty: return bool(alived)
def get_a_task(self, alived): try: task = self.queue.get(False) if task: self.process(task) self.queue.task_done() Console.body(task) else: print "no task" return True except Queue.Empty: if not alived: return False else: return True
def visit(self, task): def _dumb_redirect(url): origin = "{0}{1}".format(task.target, task.resource) # Detect redirect to same page but ended with slash if url == origin: return True if url == origin + '/': return True # Detect redirect to root if url == task.target: return True return False try: if Visitor.user_agent: Visitor.headers['User-Agent'] = Visitor.user_agent now = time.time() timeout = sum(self.__time) / len(self.__time) if self.__time else 10 # Persistent connections if Visitor.persist: if not self.session: self.session = requests.Session() else: self.session = requests r = None if Visitor.proxy: if Visitor.requests == "GET": r = self.session.get(task.get_complete_target(), headers=Visitor.headers, proxies=Visitor.proxy, verify=False, timeout=timeout, auth=Visitor.auth, cookies=Visitor.cookies, allow_redirects=Visitor.is_allow_redirects) elif Visitor.requests == "HEAD": r = self.session.head(task.get_complete_target(), headers=Visitor.headers, proxies=Visitor.proxy, verify=False, timeout=timeout, auth=Visitor.auth, cookies=Visitor.cookies, allow_redirects=Visitor.is_allow_redirects) else: if Visitor.requests == "GET": r = self.session.get(task.get_complete_target(), headers=Visitor.headers, verify=False, timeout=timeout, auth=Visitor.auth, cookies=Visitor.cookies, allow_redirects=Visitor.is_allow_redirects) elif Visitor.requests == "HEAD": r = self.session.head(task.get_complete_target(), headers=Visitor.headers, verify=False, timeout=timeout, auth=Visitor.auth, cookies=Visitor.cookies, allow_redirects=Visitor.is_allow_redirects) after = time.time() delta = (after - now) * 1000 tmp_content = r.content task.response_size = len(tmp_content) task.response_time = delta task.set_response_code(r.status_code) self.__time.append(delta) # If discriminator is found we mark it 404 if sys.version_info[0] >= 3: tmp_content = tmp_content.decode('Latin-1') if Visitor.discriminator and Visitor.discriminator in tmp_content: task.ignorable = True if Visitor.banned_md5 and hashlib.md5("".join(tmp_content)).hexdigest() == self.banned_md5: task.ignorable = True # Check if page size is not what we need if task.response_size in Visitor.size_discriminator: task.ignorable = True # Look for interesting content if task.content and (task.content in tmp_content): task.content_has_detected(True) # Look for a redirection if Visitor.is_allow_redirects: if len(r.history) > 0 and not _dumb_redirect(r.history[-1].url): task.response_code = str(r.history[0].status_code) task.location = r.history[-1].url else: if str(r.status_code).startswith('3'): task.set_response_code('404') task.ignorable = True if 'content-type' in [h.lower() for h in r.headers.keys()]: try: task.response_type = r.headers['Content-Type'].split(';')[0] except: pass self.lock.acquire() Console.body(task) self.results.get_results_queue().put(task) self.results.get_a_task() if Visitor.delay: time.sleep(Visitor.delay) except (requests.ConnectionError, requests.Timeout) as e: # TODO log to a file instead of screen print ("[!] Timeout/Connection error") print (e) except Exception as e: print ("[!] General exception while visiting") print (e) finally: self.lock.release()
def visit(self, task): def _dumb_redirect(url): origin = "{0}{1}".format(task.target, task.resource) # Detect redirect to same page but ended with slash if url == origin: return True if url == origin + '/': return True # Detect redirect to root if url == task.target: return True return False try: if Visitor.user_agent: Visitor.headers['User-Agent'] = Visitor.user_agent now = time.time() timeout = sum(self.__time) / len( self.__time) if self.__time else 10 # Persistent connections if Visitor.persist: if not self.session: self.session = requests.Session() else: self.session = requests r = None if Visitor.proxy: if Visitor.requests == "GET": r = self.session.get( task.get_complete_target(), headers=Visitor.headers, proxies=Visitor.proxy, verify=False, timeout=timeout, auth=Visitor.auth, cookies=Visitor.cookies, allow_redirects=Visitor.is_allow_redirects) elif Visitor.requests == "HEAD": r = self.session.head( task.get_complete_target(), headers=Visitor.headers, proxies=Visitor.proxy, verify=False, timeout=timeout, auth=Visitor.auth, cookies=Visitor.cookies, allow_redirects=Visitor.is_allow_redirects) else: if Visitor.requests == "GET": r = self.session.get( task.get_complete_target(), headers=Visitor.headers, verify=False, timeout=timeout, auth=Visitor.auth, cookies=Visitor.cookies, allow_redirects=Visitor.is_allow_redirects) elif Visitor.requests == "HEAD": r = self.session.head( task.get_complete_target(), headers=Visitor.headers, verify=False, timeout=timeout, auth=Visitor.auth, cookies=Visitor.cookies, allow_redirects=Visitor.is_allow_redirects) after = time.time() delta = (after - now) * 1000 tmp_content = r.content task.response_size = len(tmp_content) task.response_time = delta task.set_response_code(r.status_code) self.__time.append(delta) # If discriminator is found we mark it 404 if sys.version_info[0] >= 3: tmp_content = tmp_content.decode('Latin-1') if Visitor.discriminator and Visitor.discriminator in tmp_content: task.ignorable = True if Visitor.banned_md5 and hashlib.md5( "".join(tmp_content)).hexdigest() == self.banned_md5: task.ignorable = True # Check if page size is not what we need if task.response_size in Visitor.size_discriminator: task.ignorable = True # Look for interesting content if task.content and (task.content in tmp_content): task.content_has_detected(True) # Look for a redirection if Visitor.is_allow_redirects: if len(r.history) > 0 and not _dumb_redirect( r.history[-1].url): task.response_code = str(r.history[0].status_code) task.location = r.history[-1].url else: if str(r.status_code).startswith('3'): task.set_response_code('404') task.ignorable = True if 'content-type' in [h.lower() for h in r.headers.keys()]: try: task.response_type = r.headers['Content-Type'].split( ';')[0] except: pass self.lock.acquire() Console.body(task) self.results.get_results_queue().put(task) self.results.get_a_task() if Visitor.delay: time.sleep(Visitor.delay) except (requests.ConnectionError, requests.Timeout) as e: # TODO log to a file instead of screen print("[!] Timeout/Connection error") print(e) except Exception as e: print("[!] General exception while visiting") print(e) finally: self.lock.release()