예제 #1
0
    def test_get_body_css_validate(self):
        aiohttp.ClientResponse._headers = {'Content-Type': 'text/css'}

        self.css_validate = 'true'
        self.handler = Cloner(self.root, self.max_depth, self.css_validate)
        self.content = b'''.banner { background: url("/example.png") }'''
        aiohttp.ClientResponse.read = AsyncMock(return_value=self.content)
        self.expected_content = 'http://example.com/example.png'
        self.return_size = 0
        self.meta = {
            '/example.png': {
                'hash': '5a64beebcd2a6f1cbd00b8370debaa72',
                'headers': [{
                    'Content-Type': 'text/css'
                }],
            },
            '/index.html': {
                'hash': 'd1546d731a9f30cc80127d57142a482b',
                'headers': [{
                    'Content-Type': 'text/css'
                }],
            },
        }

        async def test():
            await self.handler.new_urls.put((yarl.URL(self.root), 0))
            await self.handler.get_body(self.session)
            self.q_size = self.handler.new_urls.qsize()

        self.loop.run_until_complete(test())
        self.assertEqual(self.handler.visited_urls[-1], self.expected_content)
        self.assertEqual(self.q_size, self.return_size)
        self.assertEqual(self.meta, self.handler.meta)
예제 #2
0
    def test_get_body_css_validate_scheme(self):
        aiohttp.ClientResponse._headers = {"Content-Type": "text/css"}

        self.css_validate = "true"
        self.return_size = 0
        self.handler = Cloner(self.root, self.max_depth, self.css_validate)
        self.content = [
            b""".banner { background: url("data://domain/test.txt") }""",
            b""".banner { background: url("file://domain/test.txt") }""",
        ]
        self.meta = {
            "/index.html": {
                "hash": "d1546d731a9f30cc80127d57142a482b",
                "headers": [{"Content-Type": "text/css"}],
            },
        }

        self.expected_content = "http://example.com/"

        async def test():
            await self.handler.new_urls.put((yarl.URL(self.root), 0))
            await self.handler.get_body(self.session)
            self.q_size = self.handler.new_urls.qsize()

        for content in self.content:
            aiohttp.ClientResponse.read = AsyncMock(return_value=content)
            self.loop.run_until_complete(test())
            self.assertEqual(self.return_size, self.q_size)
            self.assertEqual(self.handler.meta, self.meta)
            self.assertEqual(self.handler.visited_urls[-1], self.expected_content)
예제 #3
0
class TestMakeFilename(unittest.TestCase):
    def setUp(self):
        self.main_page_path = generate_unique_path()
        os.makedirs(self.main_page_path)
        self.url = yarl.URL('http://foo.com')
        self.root = 'http://example.com'
        self.max_depth = sys.maxsize
        self.loop = asyncio.new_event_loop()
        self.css_validate = 'false'
        self.handler = Cloner(self.root, self.max_depth, self.css_validate)
        self.filename = None
        self.hashname = None

    def test_make_filename(self):
        self.filename, self.hashname = self.handler._make_filename(self.url)
        self.assertEqual(self.filename, 'foo.com')
        self.assertEqual(self.hashname, '167a0418dd8ce3bf0ef00dfb6195f038')

    def test_make_filename_same_host(self):
        self.filename, self.hashname = self.handler._make_filename(
            yarl.URL(self.root))
        self.assertEqual(self.filename, '/index.html')
        self.assertEqual(self.hashname, 'd1546d731a9f30cc80127d57142a482b')

    def test_make_filename_relative(self):
        self.url = yarl.URL('/images')
        self.filename, self.hashname = self.handler._make_filename(self.url)
        self.assertEqual(self.filename, '/images')
        self.assertEqual(self.hashname, '41389bcf7f7427468d8c8675db2d4f98')

    def tearDown(self):
        shutil.rmtree(self.main_page_path)
예제 #4
0
class TestCloner(unittest.TestCase):
    def setUp(self):
        self.url = 'http://example.com'
        self.main_page_path = generate_unique_path()
        os.makedirs(self.main_page_path)
        self.expected_new_url = yarl.URL('http://example.com')
        self.expected_err_url = yarl.URL('http://example.com/status_404')
        self.max_depth = sys.maxsize
        self.css_validate = 'false'
        self.handler = Cloner(self.url, self.max_depth, self.css_validate)

    def test_trailing_slash(self):
        self.url = 'http://example.com/'
        new_url, err_url = self.handler.add_scheme(self.url)
        self.assertEqual(new_url, self.expected_new_url)
        self.assertEqual(err_url, self.expected_err_url)

    def test_add_scheme(self):
        new_url, err_url = self.handler.add_scheme(self.url)

        self.assertEqual(new_url, self.expected_new_url)
        self.assertEqual(err_url, self.expected_err_url)

    def test_no_scheme(self):
        self.url = 'example.com'
        new_url, err_url = self.handler.add_scheme(self.url)
        self.assertEqual(new_url, self.expected_new_url)
        self.assertEqual(err_url, self.expected_err_url)

    def tearDown(self):
        shutil.rmtree(self.main_page_path)
예제 #5
0
    def setUp(self):
        self.main_page_path = generate_unique_path()
        os.makedirs(self.main_page_path)
        self.root = "http://example.com"
        self.level = 0
        self.max_depth = sys.maxsize
        self.loop = asyncio.new_event_loop()
        self.css_validate = "false"
        self.handler = Cloner(self.root, self.max_depth, self.css_validate)
        self.target_path = "/opt/snare/pages/{}".format(yarl.URL(self.root).host)
        self.return_content = None
        self.expected_content = None
        self.filename = None
        self.hashname = None
        self.url = None
        self.content = None
        self.return_url = None
        self.return_level = None
        self.meta = None
        self.q_size = None

        self.session = aiohttp.ClientSession
        self.session.get = AsyncMock(
            return_value=aiohttp.ClientResponse(
                url=yarl.URL("http://www.example.com"),
                method="GET",
                writer=None,
                continue100=1,
                timer=None,
                request_info=None,
                traces=None,
                loop=self.loop,
                session=None,
            )
        )
예제 #6
0
    def test_get_body_css_validate_scheme(self):
        aiohttp.ClientResponse._headers = {'Content-Type': 'text/css'}

        self.css_validate = 'true'
        self.return_size = 0
        self.handler = Cloner(self.root, self.max_depth, self.css_validate)
        self.content = [
            b'''.banner { background: url("data://domain/test.txt") }''',
            b'''.banner { background: url("file://domain/test.txt") }'''
        ]
        self.meta = {
            '/index.html': {
                'hash': 'd1546d731a9f30cc80127d57142a482b',
                'headers': [{
                    'Content-Type': 'text/css'
                }],
            },
        }

        self.expected_content = 'http://example.com/'

        async def test():
            await self.handler.new_urls.put((yarl.URL(self.root), 0))
            await self.handler.get_body(self.session)
            self.q_size = self.handler.new_urls.qsize()

        for content in self.content:
            aiohttp.ClientResponse.read = AsyncMock(return_value=content)
            self.loop.run_until_complete(test())
            self.assertEqual(self.return_size, self.q_size)
            self.assertEqual(self.handler.meta, self.meta)
            self.assertEqual(self.handler.visited_urls[-1],
                             self.expected_content)
예제 #7
0
    def test_get_body_css_validate(self):
        aiohttp.ClientResponse._headers = {"Content-Type": "text/css"}

        self.css_validate = "true"
        self.handler = Cloner(self.root, self.max_depth, self.css_validate)
        self.content = b""".banner { background: url("/example.png") }"""
        aiohttp.ClientResponse.read = AsyncMock(return_value=self.content)
        self.expected_content = "http://example.com/example.png"
        self.return_size = 0
        self.meta = {
            "/example.png": {
                "hash": "5a64beebcd2a6f1cbd00b8370debaa72",
                "headers": [{"Content-Type": "text/css"}],
            },
            "/index.html": {
                "hash": "d1546d731a9f30cc80127d57142a482b",
                "headers": [{"Content-Type": "text/css"}],
            },
        }

        async def test():
            await self.handler.new_urls.put((yarl.URL(self.root), 0))
            await self.handler.get_body(self.session)
            self.q_size = self.handler.new_urls.qsize()

        self.loop.run_until_complete(test())
        self.assertEqual(self.handler.visited_urls[-1], self.expected_content)
        self.assertEqual(self.q_size, self.return_size)
        self.assertEqual(self.meta, self.handler.meta)
예제 #8
0
 def setUp(self):
     self.root = 'http://example.com'
     self.max_depth = sys.maxsize
     self.css_validate = 'false'
     self.handler = Cloner(self.root,
                           self.max_depth,
                           self.css_validate,
                           default_path='/tmp')
     self.loop = asyncio.new_event_loop()
예제 #9
0
 def setUp(self):
     self.url = "http://example.com"
     self.main_page_path = generate_unique_path()
     os.makedirs(self.main_page_path)
     self.expected_new_url = yarl.URL("http://example.com")
     self.expected_err_url = yarl.URL("http://example.com/status_404")
     self.max_depth = sys.maxsize
     self.css_validate = "false"
     self.handler = Cloner(self.url, self.max_depth, self.css_validate)
예제 #10
0
 def setUp(self):
     self.main_page_path = generate_unique_path()
     os.makedirs(self.main_page_path)
     self.url = yarl.URL('http://foo.com')
     self.root = 'http://example.com'
     self.max_depth = sys.maxsize
     self.loop = asyncio.new_event_loop()
     self.css_validate = 'false'
     self.handler = Cloner(self.root, self.max_depth, self.css_validate)
     self.filename = None
     self.hashname = None
예제 #11
0
 def setUp(self):
     self.root = "http://example.com"
     self.max_depth = sys.maxsize
     self.css_validate = "false"
     self.handler = Cloner(self.root,
                           self.max_depth,
                           self.css_validate,
                           default_path="/tmp")
예제 #12
0
class TestClonerRun(unittest.TestCase):
    def setUp(self):
        self.root = "http://example.com"
        self.max_depth = sys.maxsize
        self.css_validate = "false"
        self.handler = Cloner(self.root, self.max_depth, self.css_validate, default_path="/tmp")
        self.loop = asyncio.new_event_loop()

    def test_run(self):
        self.loop.run_until_complete(self.handler.run())
예제 #13
0
class TestCloner(unittest.TestCase):
    def setUp(self):
        self.url = "http://example.com"
        self.main_page_path = generate_unique_path()
        os.makedirs(self.main_page_path)
        self.expected_new_url = yarl.URL("http://example.com")
        self.expected_err_url = yarl.URL("http://example.com/status_404")
        self.max_depth = sys.maxsize
        self.css_validate = "false"
        self.handler = Cloner(self.url, self.max_depth, self.css_validate)

    def test_trailing_slash(self):
        self.url = "http://example.com/"
        new_url, err_url = self.handler.add_scheme(self.url)
        self.assertEqual(new_url, self.expected_new_url)
        self.assertEqual(err_url, self.expected_err_url)

    def test_add_scheme(self):
        new_url, err_url = self.handler.add_scheme(self.url)

        self.assertEqual(new_url, self.expected_new_url)
        self.assertEqual(err_url, self.expected_err_url)

    def test_no_scheme(self):
        self.url = "example.com"
        new_url, err_url = self.handler.add_scheme(self.url)
        self.assertEqual(new_url, self.expected_new_url)
        self.assertEqual(err_url, self.expected_err_url)

    def tearDown(self):
        shutil.rmtree(self.main_page_path)

    def test_no_host(self):
        self.url = "http:/"
        with self.assertRaises(SystemExit):
            Cloner(self.url, self.max_depth, self.css_validate)

    def test_limited_length_host(self):
        self.url = "http://aaa"
        with self.assertRaises(SystemExit):
            Cloner(self.url, self.max_depth, self.css_validate)
예제 #14
0
 def setUp(self):
     self.root = 'http://example.com'
     self.level = 0
     self.max_depth = sys.maxsize
     self.loop = asyncio.new_event_loop()
     self.css_validate = 'false'
     self.handler = Cloner(self.root, self.max_depth, self.css_validate)
     self.expected_content = None
     self.return_content = None
     self.return_url = None
     self.return_level = None
     self.qsize = None
예제 #15
0
 def setUp(self):
     self.main_page_path = generate_unique_path()
     os.makedirs(self.main_page_path)
     self.root = "http://example.com"
     self.level = 0
     self.max_depth = sys.maxsize
     self.loop = asyncio.new_event_loop()
     self.css_validate = "false"
     self.handler = Cloner(self.root, self.max_depth, self.css_validate)
     self.content = None
     self.expected_content = None
     self.return_content = None
예제 #16
0
    def test_moved_root(self):
        self.root = 'http://example.com'
        self.max_depth = sys.maxsize
        self.css_validate = 'false'
        self.handler = Cloner(self.root, self.max_depth, self.css_validate)
        self.expected_moved_root = URL('http://www.example.com')

        async def test():
            await self.handler.get_root_host()

        self.loop.run_until_complete(test())

        self.assertEqual(self.handler.moved_root, self.expected_moved_root)
예제 #17
0
    def test_clienterror(self, session):
        self.root = 'http://example.com'
        self.max_depth = sys.maxsize
        self.css_validate = 'false'
        self.handler = Cloner(self.root, self.max_depth, self.css_validate)

        aiohttp.ClientSession = mock.Mock(side_effect=aiohttp.ClientError)

        async def test():
            await self.handler.get_root_host()

        with self.assertRaises(SystemExit):
            self.loop.run_until_complete(test())
예제 #18
0
class TestGetBody(unittest.TestCase):
    def setUp(self):
        self.main_page_path = generate_unique_path()
        os.makedirs(self.main_page_path)
        self.root = "http://example.com"
        self.level = 0
        self.max_depth = sys.maxsize
        self.loop = asyncio.new_event_loop()
        self.css_validate = "false"
        self.handler = Cloner(self.root, self.max_depth, self.css_validate)
        self.target_path = "/opt/snare/pages/{}".format(yarl.URL(self.root).host)
        self.return_content = None
        self.expected_content = None
        self.filename = None
        self.hashname = None
        self.url = None
        self.content = None
        self.return_url = None
        self.return_level = None
        self.meta = None
        self.q_size = None

        self.session = aiohttp.ClientSession
        self.session.get = AsyncMock(
            return_value=aiohttp.ClientResponse(
                url=yarl.URL("http://www.example.com"),
                method="GET",
                writer=None,
                continue100=1,
                timer=None,
                request_info=None,
                traces=None,
                loop=self.loop,
                session=None,
            )
        )

    def test_get_body(self):
        self.content = b"""<html><body><a href="http://example.com/test"></a></body></html>"""

        aiohttp.ClientResponse._headers = {"Content-Type": "text/html"}
        aiohttp.ClientResponse.read = AsyncMock(return_value=self.content)
        self.filename, self.hashname = self.handler._make_filename(yarl.URL(self.root))
        self.expected_content = '<html><body><a href="/test"></a></body></html>'

        self.meta = {
            "/index.html": {
                "hash": "d1546d731a9f30cc80127d57142a482b",
                "headers": [{"Content-Type": "text/html"}],
            },
            "/test": {
                "hash": "4539330648b80f94ef3bf911f6d77ac9",
                "headers": [{"Content-Type": "text/html"}],
            },
        }

        async def test():
            await self.handler.new_urls.put((yarl.URL(self.root), 0))
            await self.handler.get_body(self.session)

        with self.assertLogs(level="DEBUG") as log:
            self.loop.run_until_complete(test())
            self.assertIn("DEBUG:snare.cloner:Cloned file: /test", "".join(log.output))

        with open(os.path.join(self.target_path, self.hashname)) as f:
            self.return_content = f.read()

        self.assertEqual(self.return_content, self.expected_content)
        self.assertEqual(
            self.handler.visited_urls[-2:],
            ["http://example.com/", "http://example.com/test"],
        )
        self.assertEqual(self.handler.meta, self.meta)

    def test_get_body_css_validate(self):
        aiohttp.ClientResponse._headers = {"Content-Type": "text/css"}

        self.css_validate = "true"
        self.handler = Cloner(self.root, self.max_depth, self.css_validate)
        self.content = b""".banner { background: url("/example.png") }"""
        aiohttp.ClientResponse.read = AsyncMock(return_value=self.content)
        self.expected_content = "http://example.com/example.png"
        self.return_size = 0
        self.meta = {
            "/example.png": {
                "hash": "5a64beebcd2a6f1cbd00b8370debaa72",
                "headers": [{"Content-Type": "text/css"}],
            },
            "/index.html": {
                "hash": "d1546d731a9f30cc80127d57142a482b",
                "headers": [{"Content-Type": "text/css"}],
            },
        }

        async def test():
            await self.handler.new_urls.put((yarl.URL(self.root), 0))
            await self.handler.get_body(self.session)
            self.q_size = self.handler.new_urls.qsize()

        self.loop.run_until_complete(test())
        self.assertEqual(self.handler.visited_urls[-1], self.expected_content)
        self.assertEqual(self.q_size, self.return_size)
        self.assertEqual(self.meta, self.handler.meta)

    def test_get_body_css_validate_scheme(self):
        aiohttp.ClientResponse._headers = {"Content-Type": "text/css"}

        self.css_validate = "true"
        self.return_size = 0
        self.handler = Cloner(self.root, self.max_depth, self.css_validate)
        self.content = [
            b""".banner { background: url("data://domain/test.txt") }""",
            b""".banner { background: url("file://domain/test.txt") }""",
        ]
        self.meta = {
            "/index.html": {
                "hash": "d1546d731a9f30cc80127d57142a482b",
                "headers": [{"Content-Type": "text/css"}],
            },
        }

        self.expected_content = "http://example.com/"

        async def test():
            await self.handler.new_urls.put((yarl.URL(self.root), 0))
            await self.handler.get_body(self.session)
            self.q_size = self.handler.new_urls.qsize()

        for content in self.content:
            aiohttp.ClientResponse.read = AsyncMock(return_value=content)
            self.loop.run_until_complete(test())
            self.assertEqual(self.return_size, self.q_size)
            self.assertEqual(self.handler.meta, self.meta)
            self.assertEqual(self.handler.visited_urls[-1], self.expected_content)

    def test_client_error(self):
        self.session.get = AsyncMock(side_effect=aiohttp.ClientError)

        async def test():
            await self.handler.new_urls.put((yarl.URL(self.root), 0))
            await self.handler.get_body(self.session)

        with self.assertLogs(level="ERROR") as log:
            self.loop.run_until_complete(test())
            self.assertIn("ERROR:snare.cloner:", "".join(log.output))

    def tearDown(self):
        shutil.rmtree(self.main_page_path)
예제 #19
0
class TestGetBody(unittest.TestCase):
    def setUp(self):
        self.main_page_path = generate_unique_path()
        os.makedirs(self.main_page_path)
        self.root = 'http://example.com'
        self.level = 0
        self.max_depth = sys.maxsize
        self.loop = asyncio.new_event_loop()
        self.css_validate = 'false'
        self.handler = Cloner(self.root, self.max_depth, self.css_validate)
        self.target_path = '/opt/snare/pages/{}'.format(
            yarl.URL(self.root).host)
        self.return_content = None
        self.expected_content = None
        self.filename = None
        self.hashname = None
        self.url = None
        self.content = None
        self.return_url = None
        self.return_level = None
        self.meta = None
        self.q_size = None

        self.session = aiohttp.ClientSession
        self.session.get = AsyncMock(return_value=aiohttp.ClientResponse(
            url=yarl.URL("http://www.example.com"),
            method="GET",
            writer=None,
            continue100=1,
            timer=None,
            request_info=None,
            traces=None,
            loop=self.loop,
            session=None))

    def test_get_body(self):
        self.content = b'''<html><body><a href="http://example.com/test"></a></body></html>'''

        aiohttp.ClientResponse._headers = {'Content-Type': 'text/html'}
        aiohttp.ClientResponse.read = AsyncMock(return_value=self.content)
        self.filename, self.hashname = self.handler._make_filename(
            yarl.URL(self.root))
        self.expected_content = '<html><body><a href="/test"></a></body></html>'

        self.meta = {
            '/index.html': {
                'content_type': 'text/html',
                'hash': 'd1546d731a9f30cc80127d57142a482b'
            },
            '/test': {
                'content_type': 'text/html',
                'hash': '4539330648b80f94ef3bf911f6d77ac9'
            }
        }

        async def test():
            await self.handler.new_urls.put((yarl.URL(self.root), 0))
            await self.handler.get_body(self.session)

        self.loop.run_until_complete(test())

        with open(os.path.join(self.target_path, self.hashname)) as f:
            self.return_content = f.read()

        self.assertEqual(self.return_content, self.expected_content)
        self.assertEqual(self.handler.visited_urls[-2:],
                         ['http://example.com/', 'http://example.com/test'])
        self.assertEqual(self.handler.meta, self.meta)

    def test_get_body_css_validate(self):
        aiohttp.ClientResponse._headers = {'Content-Type': 'text/css'}

        self.css_validate = 'true'
        self.handler = Cloner(self.root, self.max_depth, self.css_validate)
        self.content = b'''.banner { background: url("/example.png") }'''
        aiohttp.ClientResponse.read = AsyncMock(return_value=self.content)
        self.expected_content = 'http://example.com/example.png'
        self.return_size = 0
        self.meta = {
            '/example.png': {
                'content_type': 'text/css',
                'hash': '5a64beebcd2a6f1cbd00b8370debaa72'
            },
            '/index.html': {
                'content_type': 'text/css',
                'hash': 'd1546d731a9f30cc80127d57142a482b'
            }
        }

        async def test():
            await self.handler.new_urls.put((yarl.URL(self.root), 0))
            await self.handler.get_body(self.session)
            self.q_size = self.handler.new_urls.qsize()

        self.loop.run_until_complete(test())
        self.assertEqual(self.handler.visited_urls[-1], self.expected_content)
        self.assertEqual(self.q_size, self.return_size)
        self.assertEqual(self.meta, self.handler.meta)

    def test_get_body_css_validate_scheme(self):
        aiohttp.ClientResponse._headers = {'Content-Type': 'text/css'}

        self.css_validate = 'true'
        self.return_size = 0
        self.handler = Cloner(self.root, self.max_depth, self.css_validate)

        self.content = [
            b'''.banner { background: url("data://domain/test.txt") }''',
            b'''.banner { background: url("file://domain/test.txt") }'''
        ]

        self.meta = {
            '/index.html': {
                'content_type': 'text/css',
                'hash': 'd1546d731a9f30cc80127d57142a482b'
            }
        }
        self.expected_content = 'http://example.com/'

        async def test():
            await self.handler.new_urls.put((yarl.URL(self.root), 0))
            await self.handler.get_body(self.session)
            self.q_size = self.handler.new_urls.qsize()

        for content in self.content:
            aiohttp.ClientResponse.read = AsyncMock(return_value=content)
            self.loop.run_until_complete(test())
            self.assertEqual(self.return_size, self.q_size)
            self.assertEqual(self.handler.meta, self.meta)
            self.assertEqual(self.handler.visited_urls[-1],
                             self.expected_content)

    def test_client_error(self):
        self.session.get = AsyncMock(side_effect=aiohttp.ClientError)

        async def test():
            await self.handler.new_urls.put((yarl.URL(self.root), 0))
            await self.handler.get_body(self.session)

        with self.assertLogs(level='ERROR') as log:
            self.loop.run_until_complete(test())
            self.assertIn('ERROR:snare.cloner:', log.output[0])

    def tearDown(self):
        shutil.rmtree(self.main_page_path)
예제 #20
0
 def test_limited_length_host(self):
     self.url = "http://aaa"
     with self.assertRaises(SystemExit):
         Cloner(self.url, self.max_depth, self.css_validate)
예제 #21
0
 def test_no_host(self):
     self.url = "http:/"
     with self.assertRaises(SystemExit):
         Cloner(self.url, self.max_depth, self.css_validate)