Esempio n. 1
0
class TestDetector(unittest.TestCase):
    def setUp(self):
        self.detector = Detector()
        self.apps = self.detector.apps
        self.categories = self.detector.categories

    def mock_detector_run(self, url='', content='', headers=None):
        with mock.patch('wad.detection.tools') as mockObj:
            page = mock.MagicMock()
            page.geturl.return_value = url
            if six.PY3:
                page.read.return_value = bytes(content, encoding='utf-8')
            else:
                page.read.return_value = content
            page.info.return_value = headers or dict()
            mockObj.urlopen = mock.Mock(return_value=page)
            results = self.detector.detect('http://abc.xyz')
        return results

    def test_check_re(self):
        # checking version patterns:
        #
        #   "headers": { "Server": "IIS(?:/([\\d.]+))?\\;version:\\1" },
        assert (self.detector.check_re(
            self.apps['IIS']['headers_re']['Server'],
            self.apps['IIS']['headers']['Server'],
            'Microsoft-IIS/7.5',
            [], None, 'IIS') == [{'app': 'IIS', 'ver': '7.5'}])

        # (?:maps\\.google\\.com/maps\\?file=api(?:&v=([\\d.]+))?|
        # maps\\.google\\.com/maps/api/staticmap)\\;version:API v\\1
        assert (self.detector.check_re(
            self.apps['Google Maps']['script_re'][0],
            self.apps['Google Maps']['script'][0],
            'abc <script src="maps.google.com/maps?file=api&v=123"> def',
            [], None, 'Google Maps') == [{'app': 'Google Maps', 'ver': 'API v123'}])

        # "script": [ "js/mage", "skin/frontend/(?:default|(enterprise))\\;version:\\1?Enterprise:Community" ],
        assert (self.detector.check_re(
            self.apps['Magento']['script_re'][1],
            self.apps['Magento']['script'][1],
            'abc <script src="skin/frontend/whatever"> def',
            [], None, 'Magento') == [])

        assert (self.detector.check_re(
            self.apps['Magento']['script_re'][1],
            self.apps['Magento']['script'][1],
            'abc <script src="skin/frontend/default"> def',
            [], None, 'Magento') == [{'app': 'Magento', 'ver': 'Community'}])

        assert (self.detector.check_re(
            self.apps['Magento']['script_re'][1],
            self.apps['Magento']['script'][1],
            'abc <script src="skin/frontend/enterprise"> def',
            [], None, 'Magento') == [{'app': 'Magento', 'ver': 'Enterprise'}])

    def test_check_url(self):
        assert self.detector.check_url("http://whatever.blogspot.com") == [{'app': 'Blogger', 'ver': None}]
        assert self.detector.check_url("https://whatever-else3414.de/script.php") == [{'app': 'PHP', 'ver': None}]

    def test_check_html(self):
        content = '<html><div id="gsNavBar" class="gcBorder1">whatever'
        assert self.detector.check_html(content) == [{'app': 'Gallery', 'ver': None}]

    def test_check_meta(self):
        assert (self.detector.check_meta('<html>    s<meta name="generator" content="Percussion">sssss    whatever') ==
                [{'app': 'Percussion', 'ver': None}])
        assert (self.detector.check_meta(" dcsaasd f<meta   name    = 'cargo_title' dd  content  =   'Pdafadfda'  >") ==
                [{'app': 'Cargo', 'ver': None}])
        assert (self.detector.check_meta(" dcsaasd f<mfffffffeta     name='cargo_title' dd  content='Pdafadfda'  >") ==
                [])
        assert self.detector.check_meta(" dcsaasd f<meta     name='cargo_title' >") == []

    def test_check_script(self):
        assert (self.detector.check_script('<html>    s<script  sda f     src    =  "jquery1.7.js">') ==
                [{'app': 'jQuery', 'ver': None}])
        assert self.detector.check_script(" dcsaasd f<script     src='' >") == []

    def test_check_headers(self):
        headers = [('Host', 'abc.com'), ('Server', 'Linux Ubuntu 12.10')]
        headers_mock = mock.Mock()
        headers_mock.items.return_value = headers

        assert (self.detector.check_headers(headers_mock) ==
                [{'app': 'Ubuntu', 'ver': None}])

    def test_check_cookies(self):
        headers = {'Set-Cookie': 'x=1; xid=%s; y=2' % ('a'*32)}

        assert (self.detector.check_cookies(headers) ==
                [{'app': 'X-Cart', 'ver': None}])

    def test_implied_by(self):
        # ASP implies WS and IIS and IIS implies WS;
        # but we already know about IIS, so the only new implied app is WS
        assert self.detector.implied_by(['Microsoft ASP.NET', 'IIS']) == ['Windows Server']

    def test_follow_implies(self):
        # empty findings
        findings = []
        self.detector.follow_implies(findings)
        assert findings == []

        # no implies
        findings = [{'app': 'reCAPTCHA', 'ver': None}]
        self.detector.follow_implies(findings)
        assert findings == [{'app': 'reCAPTCHA', 'ver': None}]

        # Django CMS implies Django, and Django implies Python - let's see if this chain is followed
        findings = [{'app': 'Django CMS', 'ver': None}]
        self.detector.follow_implies(findings)
        assert (findings ==
                [{'app': 'Django CMS', 'ver': None},
                 {'app': 'Django', 'ver': None},
                 {'app': 'Python', 'ver': None}])

    def test_remove_duplicates(self):
        with_duplicates = [
            {'app': 'A', 'ver': None}, {'app': 'B', 'ver': "1.5"},
            {'app': 'C', 'ver': None}, {'app': 'D', 'ver': "7.0"},
            {'app': 'E', 'ver': "1"}, {'app': 'F', 'ver': "2.2"},
            {'app': 'A', 'ver': None}, {'app': 'B', 'ver': "1.5"},
            {'app': 'C', 'ver': "be"}, {'app': 'D', 'ver': "222"},
            {'app': 'A', 'ver': None}, {'app': 'B', 'ver': "1.5"},
            {'app': 'E', 'ver': None}, {'app': 'E', 'ver': "1.3"},
            {'app': 'F', 'ver': "2"}, {'app': 'F', 'ver': None},
        ]

        without_duplicates = [
            {'app': 'A', 'ver': None}, {'app': 'B', 'ver': "1.5"},
            {'app': 'C', 'ver': "be"}, {'app': 'D', 'ver': "7.0"},
            {'app': 'E', 'ver': "1.3"},
            {'app': 'F', 'ver': "2.2"}, {'app': 'D', 'ver': "222"},
        ]

        Detector().remove_duplicates(with_duplicates)
        assert with_duplicates == without_duplicates

    def test_excluded_by(self):
        # both 'Neos Flow' and 'Neos CMS' exclude 'TYPO3 CMS'
        assert self.detector.excluded_by(['Neos Flow', 'Neos CMS']) == ['TYPO3 CMS']
        # 'JBoss Web' excludes 'Apache Tomcat'; 'Mambo' excludes 'Joomla'
        assert set(self.detector.excluded_by(['JBoss Web', 'Jetty', 'Mambo'])) == set(['Joomla', 'Apache Tomcat'])
        # 'IIS' doesn't exclude anything
        assert self.detector.excluded_by(['IIS']) == []

    def test_remove_exclusions(self):
        # empty findings
        findings = []
        self.detector.remove_exclusions(findings)
        assert findings == []

        # no implies
        findings = [{'app': 'reCAPTCHA', 'ver': None}]
        self.detector.remove_exclusions(findings)
        assert findings == [{'app': 'reCAPTCHA', 'ver': None}]

        # real exclusions
        findings = [{'app': 'JBoss Web', 'ver': None},
                    {'app': 'Apache Tomcat', 'ver': None},
                    {'app': 'IIS', 'ver': None},
                    {'app': 'TYPO3 CMS', 'ver': None},
                    {'app': 'Neos Flow', 'ver': None}]
        self.detector.remove_exclusions(findings)
        assert (findings ==
                [{'app': 'JBoss Web', 'ver': None},
                 {'app': 'IIS', 'ver': None},
                 {'app': 'Neos Flow', 'ver': None}])

    def test_add_categories(self):
        findings = [
            {'app': 'Django CMS', 'ver': None},
            {'app': 'Django', 'ver': None},
            {'app': 'Python', 'ver': '2.7'},
            {'app': 'Dynamicweb', 'ver': 'beta'}]
        original = copy.deepcopy(findings)
        original[0]["type"] = "CMS"
        original[1]["type"] = "Web Application Frameworks"
        original[2]["type"] = "Programming Languages"
        original[3]["type"] = "CMS,Ecommerce,Analytics"

        self.detector.add_categories(findings)
        assert original == findings

    def test_url_match(self):
        assert self.detector.url_match(url='', regexp=None, default='test') == 'test'
        assert self.detector.url_match(url='example.com', regexp='exampl', default='test') is not None
        assert self.detector.url_match(url='example.com', regexp='ampl', default='test') is None

    def test_expected_url(self):
        url = "http://site.abc.com/dir/sub/script.php"
        assert self.detector.expected_url(url, None, None)
        assert self.detector.expected_url(url, 'http://.*abc.com/', None)
        assert not self.detector.expected_url(url, 'http://abc.com/', None)
        assert self.detector.expected_url(url, 'http://.*abc.com/', "php")
        assert not self.detector.expected_url(url, 'http://.*abc.com/', ".*php")
        assert self.detector.expected_url(url, None, ".*\\.asp")
        assert not self.detector.expected_url(url, None, ".*\\.php")

    def test_detect(self):
        expected = {
            'http://home.web.cern.ch/': [
                {'app': 'Apache', 'type': 'Web Servers', 'ver': None},
                {'app': 'Drupal', 'type': 'CMS', 'ver': '7'},
                {'app': 'Lightbox', 'type': 'JavaScript Libraries', 'ver': None},
                {'app': 'jQuery', 'type': 'JavaScript Libraries', 'ver': None},
                {'app': 'Google Font API', 'type': 'Font Scripts', 'ver': None},
                {'app': 'PHP', 'type': 'Programming Languages', 'ver': None}
            ]
        }

        results = self.mock_detector_run(url=cern_ch_test_data['geturl'], content=cern_ch_test_data['content'],
                                         headers=cern_ch_test_data['headers'])
        assert list(six.iterkeys(results)) == list(six.iterkeys(expected))
        assert (sorted(next(six.itervalues(results)), key=operator.itemgetter('app')) ==
                sorted(next(six.itervalues(expected)), key=operator.itemgetter('app')))

    def test_detect_multiple(self):
        urls_list = ["http://cern.ch", None, "", "http://cern.ch", "example.com"]
        with mock.patch('wad.detection.Detector.detect') as mockObj:
            mockObj.side_effect = [{'test1': 1}, {'test2': 2}]
            assert self.detector.detect_multiple(urls_list) == {'test1': 1, 'test2': 2}
            assert (('example.com', None, None, TIMEOUT),) in mockObj.call_args_list
            assert (('http://cern.ch', None, None, TIMEOUT),) in mockObj.call_args_list

    def test_normalize_url(self):
        assert self.detector.normalize_url('http://abc.pl') == 'http://abc.pl/'
        assert self.detector.normalize_url('http://abc.pl/') == 'http://abc.pl/'
        assert self.detector.normalize_url('http://abc.pl/def') == 'http://abc.pl/def'

    def test_regression_meta_attributes_order(self):
        # This bug was caused by hardcoded attributes order in re_meta pattern.
        # Example app that was affected was GitLab CI.
        content1 = "<meta content='GitLab Continuous Integration' name='description'>"
        content2 = "<meta name='description' content='GitLab Continuous Integration'>"

        results1 = self.detector.check_meta(content1)
        results2 = self.detector.check_meta(content2)

        expected = [{'app': 'GitLab CI', 'ver': None}]

        assert results1 == results2 == expected

    def test_regression_empty_content_should_run_checks(self):
        # This bug was introduced while abstracting some methods in detect method of Detector
        # Shortly, if the content was empty, code didn't run further (while it should, there might be something in
        # headers etc.)
        expected = {
            'http://home.web.cern.ch/': [
                {'app': 'Apache', 'type': 'Web Servers', 'ver': None},
                {'app': 'Drupal', 'type': 'CMS', 'ver': '7'},
                {'app': 'PHP', 'type': 'Programming Languages', 'ver': None}
            ]
        }
        results = self.mock_detector_run(url=cern_ch_test_data['geturl'], content='',
                                         headers=cern_ch_test_data['headers'])
        assert list(six.iterkeys(results)) == list(six.iterkeys(expected))
        assert (sorted(next(six.itervalues(results)), key=operator.itemgetter('app')) ==
                sorted(next(six.itervalues(expected)), key=operator.itemgetter('app')))

    def test_regression_urls_not_normalized(self):
        # This bug caused .pl top level domain to be recognized as Perl file.
        # It is due to the fact, that Wappalyzer receives normalized URI from browser ("http://abc.xyz/")
        # even if you open "http://abc.xyz", while we didn't normalize the URL.
        results = self.mock_detector_run(url='http://abc.pl')
        assert results == {'http://abc.pl/': []}