예제 #1
0
 def test_delay_add_available(self):
     sessions = Sessions(delay=1)
     session = sessions.sessions[0]
     with patch('dirhunt.sessions.threading.Timer') as m:
         sessions.add_available(session)
     m.assert_called_once_with(sessions.delay, sessions.availables.put,
                               [session])
     m.return_value.start.assert_called_once()
예제 #2
0
    def callback(self, domain):
        url = VT_URL.format(domain=domain)
        session = Sessions().get_session()
        html = session.get(url).text
        if ABUSE in html:
            self.add_error(ABUSE_MESSAGE_ERROR.format(url=url))
            return
        soup = BeautifulSoup(html, 'html.parser')

        for url in soup.select('#detected-urls .enum a'):
            self.add_result(url.text.strip(string.whitespace))
예제 #3
0
 def __init__(self, max_workers=None, interesting_extensions=None, interesting_files=None, std=None,
              progress_enabled=True, timeout=10, depth=3, not_follow_subdomains=False, exclude_sources=(),
              not_allow_redirects=False, proxies=None, delay=0, limit=1000, to_file=None, user_agent=None,
              cookies=None, headers=None):
     if not max_workers and not delay:
         max_workers = (multiprocessing.cpu_count() or 1) * 5
     elif not max_workers and delay:
         max_workers = len(proxies or [None])
     super(Crawler, self).__init__(max_workers)
     self.domains = set()
     self.results = Queue()
     self.index_of_processors = []
     self.proxies = proxies
     self.delay = delay
     self.sessions = Sessions(proxies, delay, user_agent, cookies, headers)
     self.processing = {}
     self.processed = {}
     self.add_lock = Lock()
     self.spinner = random_spinner()
     self.start_dt = datetime.datetime.now()
     self.interesting_extensions = interesting_extensions or []
     self.interesting_files = interesting_files or []
     self.closing = False
     self.std = std or None
     self.progress_enabled = progress_enabled
     self.timeout = timeout
     self.not_follow_subdomains = not_follow_subdomains
     self.depth = depth
     self.exclude_sources = exclude_sources
     self.sources = Sources(self.add_url, self.add_message, exclude_sources)
     self.not_allow_redirects = not_allow_redirects
     self.limit = limit
     self.current_processed_count = 0
     self.to_file = to_file
예제 #4
0
 def test_random_proxy_positive(self, m):
     proxy_instance = Mock()
     with patch('dirhunt.sessions.normalize_proxy',
                return_value=proxy_instance):
         sessions = Sessions()
         session = sessions.sessions[0]
         self.assertIs(session.proxy, proxy_instance)
         session_mock = Mock()
         session.session = session_mock
         session.get(self.url)
         proxy_instance.positive.assert_called_once()
예제 #5
0
 def test_proxy(self):
     proxy = 'http://10.1.2.3:3128'
     sessions = Sessions([proxy])
     session = sessions.sessions[0]
     m = Mock()
     session.session = m
     session.get(self.url)
     m.get.assert_called_once_with(self.url,
                                   proxies={
                                       'http': proxy,
                                       'https': proxy
                                   })
예제 #6
0
 def _test_random_proxy_negative(self, votes):
     proxy_instance = Mock()
     proxy_instance.get_updated_proxy.return_value.votes = votes
     with patch('dirhunt.sessions.normalize_proxy',
                return_value=proxy_instance):
         sessions = Sessions()
         session = sessions.sessions[0]
         self.assertIs(session.proxy, proxy_instance)
         session_mock = Mock(**{'get.side_effect': ProxyError})
         session.session = session_mock
         with self.assertRaises(ProxyError):
             session.get(self.url)
     return proxy_instance
예제 #7
0
 def __init__(self, max_workers=None, interesting_extensions=None, interesting_files=None, std=None,
              progress_enabled=True, timeout=10, depth=3, not_follow_subdomains=False, exclude_sources=(),
              not_allow_redirects=False):
     super(Crawler, self).__init__(max_workers)
     self.domains = set()
     self.results = Queue()
     self.index_of_processors = []
     self.sessions = Sessions()
     self.processing = {}
     self.processed = {}
     self.add_lock = Lock()
     self.spinner = random_spinner()
     self.start_dt = datetime.datetime.now()
     self.interesting_extensions = interesting_extensions or []
     self.interesting_files = interesting_files or []
     self.closing = False
     self.std = std or None
     self.progress_enabled = progress_enabled
     self.timeout = timeout
     self.not_follow_subdomains = not_follow_subdomains
     self.depth = depth
     self.sources = Sources(self.add_url, exclude_sources)
     self.not_allow_redirects = not_allow_redirects
예제 #8
0
 def __init__(self,
              max_workers=None,
              interesting_extensions=None,
              interesting_files=None,
              std=None,
              progress_enabled=True,
              timeout=10):
     super(Crawler, self).__init__(max_workers)
     self.domains = set()
     self.results = Queue()
     self.index_of_processors = []
     self.sessions = Sessions()
     self.processing = {}
     self.processed = {}
     self.add_lock = Lock()
     self.spinner = random_spinner()
     self.start_dt = datetime.datetime.now()
     self.interesting_extensions = interesting_extensions or []
     self.interesting_files = interesting_files or []
     self.closing = False
     self.std = std or None
     self.progress_enabled = progress_enabled
     self.timeout = timeout
예제 #9
0
 def test_random_session(self):
     sessions = Sessions()
     sessions.availables.get()
     with patch('dirhunt.sessions.random.choice') as m:
         sessions.get_session()
     m.assert_called_once()
예제 #10
0
 def _get_url_info(self):
     return UrlInfo(Sessions(), Url(self.url))
예제 #11
0
 def test_echo(self):
     mstdout = Mock(**{'isatty.return_value': True})
     UrlsInfo([], Sessions(), std=mstdout).echo('Foo')
     mstdout.write.assert_called()
예제 #12
0
 def test_erase(self):
     mstdout = Mock(**{'isatty.return_value': True})
     UrlsInfo([], Sessions(), std=mstdout).erase()
     mstdout.write.assert_called_once()
예제 #13
0
 def test_start_empty(self):
     with patch.object(UrlsInfo, 'submit') as m:
         UrlsInfo([], Sessions()).start()
         m.assert_not_called()
예제 #14
0
 def test_callback(self):
     with patch.object(UrlsInfo, '_get_url_info') as m:
         UrlsInfo([self.url], Sessions()).callback(len(self.url),
                                                   Url(self.url), 0)
         m.assert_called_once()