def artifacts(self, raw):
        # Use the regex extractor, if auto_extract setting is not False
        if self.auto_extract:
            extractor = Extractor(ignore=self.get_data())
            return extractor.check_iterable(raw)

        # Return empty list
        return []
Exemple #2
0
    def artifacts(self, raw):
        # Use the regex extractor, if auto_extract setting is not False
        if self.auto_extract:
            extractor = Extractor(ignore=self.get_data())
            return extractor.check_iterable(raw)

        # Return empty list
        return []
 def readwarninglists(self):
     files = glob('{}/lists/*/*.json'.format(self.path))
     listcontent = []
     for file in files:
         with io.open(file, 'r') as fh:
             content = json.loads(fh.read())
             values = Extractor().check_iterable(content.get('list', []))
             obj = {
                 "name": content.get('name', 'Unknown'),
                 "values": [value['value'] for value in values],
                 "dataTypes": [value['type'] for value in values]
             }
             listcontent.append(obj)
     return listcontent
 def readwarninglists(self):
     files = glob("{}/lists/*/*.json".format(self.path))
     listcontent = []
     for file in files:
         with io.open(file, "r") as fh:
             content = json.loads(fh.read())
             values = Extractor().check_iterable(content.get("list", []))
             obj = {
                 "name": content.get("name", "Unknown"),
                 "values": [value["data"] for value in values],
                 "dataTypes": [value["dataType"] for value in values],
             }
             listcontent.append(obj)
     return listcontent
class TestExtractorValidInput(unittest.TestCase):
    """This tests the extractor with valid input."""
    def setUp(self):
        self.extractor = Extractor()

    def test_single_fqdn(self):
        self.assertEqual(self.extractor.check_string(value='www.google.de'),
                         'fqdn', 'FQDN single string: wrong data type.')

    def test_single_fqdn_as_unicode(self):
        self.assertEqual(self.extractor.check_string(value=u'www.google.de'),
                         'fqdn', 'FQDN single string: wrong data type.')

    def test_single_domain(self):
        self.assertEqual(self.extractor.check_string(value='google.de'),
                         'domain', 'domain single string: wrong data type.')

    def test_single_url(self):
        self.assertEqual(
            self.extractor.check_string(value='https://google.de'), 'url',
            'url single string: wrong data type.')

    def test_single_ipv4(self):
        self.assertEqual(self.extractor.check_string(value='8.8.8.8'), 'ip',
                         'ipv4 single string: wrong data type.')

    def test_single_ipv6(self):
        self.assertEqual(
            self.extractor.check_string(
                value='2001:0db8:85a3:08d3:1319:8a2e:0370:7344'), 'ip',
            'ipv6 single string: wrong data type.')

    def test_single_md5(self):
        self.assertEqual(
            self.extractor.check_string(
                value='b373bd6b144e7846f45a1e47ced380b8'), 'hash',
            'md5 single string: wrong data type.')

    def test_single_sha1(self):
        self.assertEqual(
            self.extractor.check_string(
                value='94d4d48ba9a79304617f8291982bf69a8ce16fb0'), 'hash',
            'sha1 single string: wrong data type.')

    def test_single_sha256(self):
        self.assertEqual(
            self.extractor.check_string(
                value=
                '7ef8b3dc5bf40268f66721a89b95f4c5f0cc08e34836f8c3a007ceed193654d4'
            ), 'hash', 'sha256 single string: wrong data type.')

    def test_single_useragent(self):
        self.assertEqual(
            self.extractor.check_string(
                value=
                'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 '
                'Firefox/52.0'), 'user-agent',
            'user-agent single string: wrong data type.')

    def test_single_mail(self):
        self.assertEqual(
            self.extractor.check_string(value='*****@*****.**'),
            'mail', 'mail single string: wrong data type.')

    def test_single_regkey(self):
        self.assertEqual(
            self.extractor.check_string(
                value=
                'HKEY_LOCAL_MACHINE\\Software\\Microsoft\\Windows\\CurrentVersion\\Run'
            ), 'registry', 'registry single string: wrong data type.')

    def test_text_ip(self):
        text = 'This is a string with an IP 8.8.8.8 embedded'
        self.assertEqual(self.extractor.extract_matches(value=text),
                         {'ip': ['8.8.8.8']}, 'ip in text: failed.')

    def test_text_url(self):
        text = 'This is a string with a url http://www.somebaddomain.com/badness/bad embedded'
        self.assertEqual(
            self.extractor.extract_matches(value=text), {
                'url': ['http://www.somebaddomain.com/badness/bad'],
                'domain': [u'somebaddomain.com'],
                'fqdn': [u'www.somebaddomain.com']
            }, 'url in text: failed.')

    def test_text_hash(self):
        text = '''b373bd6b144e7846f45a1e47eed380b7 This is a string with an hashes b373bd6b144e7846f45a1e47ced380b8 and
        7ef8b3dc5bf40268f66721a89b95f4c5f0cc08e34836f8c3a007ceed193654d4  embedded
        '''
        self.assertEqual(
            self.extractor.extract_matches(value=text), {
                'hash': [
                    'b373bd6b144e7846f45a1e47eed380b7',
                    'b373bd6b144e7846f45a1e47ced380b8',
                    '7ef8b3dc5bf40268f66721a89b95f4c5f0cc08e34836f8c3a007ceed193654d4'
                ]
            }, 'hash in text: failed.')

    def test_text_email(self):
        text = 'This is a string with a url [email protected] and [email protected] embedded'
        self.assertEqual(
            self.extractor.extract_matches(value=text),
            {'mail': ['*****@*****.**', '*****@*****.**']},
            'email in text: failed.')

    def test_iterable(self):
        l_real = self.extractor.check_iterable({
            'results': [{
                'This is an totally unimportant key': '8.8.8.8'
            }, {
                'This is an IP in text':
                'This is a really bad IP 8.8.8.9 serving malware'
            }, {
                'Totally nested!': ['https://nestedurl.verynested.com']
            }],
            'some_more':
            '7ef8b3dc5bf40268f66721a89b95f4c5f0cc08e34836f8c3a007ceed193654d4',
            'another_list': ['google.de', 'bing.com', 'www.fqdn.de']
        })
        l_expected = [{
            'dataType':
            'hash',
            'data':
            '7ef8b3dc5bf40268f66721a89b95f4c5f0cc08e34836f8c3a007ceed193654d4'
        }, {
            'dataType': 'ip',
            'data': '8.8.8.8'
        }, {
            'dataType': 'ip',
            'data': '8.8.8.9'
        }, {
            'dataType': 'url',
            'data': 'https://nestedurl.verynested.com'
        }, {
            'dataType': 'domain',
            'data': 'google.de'
        }, {
            'dataType': 'domain',
            'data': 'bing.com'
        }, {
            'dataType': 'fqdn',
            'data': 'www.fqdn.de'
        }]

        # Sorting the lists
        l_real = sorted(l_real, key=lambda k: k['data'])
        l_expected = sorted(l_expected, key=lambda k: k['data'])

        self.assertEqual(l_real, l_expected,
                         'Check_iterable: wrong list returned.')

    def test_float_domain(self):
        self.assertEqual(
            self.extractor.check_string(value='0.001234'), '',
            'Check_float: float was recognized as domain, but should not.')

    def test_float_fqdn(self):
        self.assertEqual(
            self.extractor.check_string(value='0.1234.5678'), '',
            'Check_float_fqdn: float was recognized as fqdn but should not.')
 def setUp(self):
     self.extractor = Extractor()
Exemple #7
0
class TestExtractorValidInput(unittest.TestCase):
    """This tests the extractor with valid input."""

    def setUp(self):
        self.extractor = Extractor()

    def test_single_fqdn(self):
        self.assertEqual(
            self.extractor.check_string(value='www.google.de'),
            'fqdn',
            'FQDN single string: wrong data type.'
        )

    def test_single_fqdn_as_unicode(self):
        self.assertEqual(
            self.extractor.check_string(value=u'www.google.de'),
            'fqdn',
            'FQDN single string: wrong data type.'
        )

    def test_single_domain(self):
        self.assertEqual(
            self.extractor.check_string(value='google.de'),
            'domain',
            'domain single string: wrong data type.'
        )

    def test_single_url(self):
        self.assertEqual(
            self.extractor.check_string(value='https://google.de'),
            'url',
            'url single string: wrong data type.'
        )

    def test_single_ipv4(self):
        self.assertEqual(
            self.extractor.check_string(value='10.0.0.1'),
            'ip',
            'ipv4 single string: wrong data type.'
        )

    def test_single_ipv6(self):
        self.assertEqual(
            self.extractor.check_string(value='2001:0db8:85a3:08d3:1319:8a2e:0370:7344'),
            'ip',
            'ipv6 single string: wrong data type.'
        )

    def test_single_md5(self):
        self.assertEqual(
            self.extractor.check_string(value='b373bd6b144e7846f45a1e47ced380b8'),
            'hash',
            'md5 single string: wrong data type.'
        )

    def test_single_sha1(self):
        self.assertEqual(
            self.extractor.check_string(value='94d4d48ba9a79304617f8291982bf69a8ce16fb0'),
            'hash',
            'sha1 single string: wrong data type.'
        )

    def test_single_sha256(self):
        self.assertEqual(
            self.extractor.check_string(value='7ef8b3dc5bf40268f66721a89b95f4c5f0cc08e34836f8c3a007ceed193654d4'),
            'hash',
            'sha256 single string: wrong data type.'
        )

    def test_single_useragent(self):
        self.assertEqual(
            self.extractor.check_string(value='Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 '
                                              'Firefox/52.0'),
            'user-agent',
            'user-agent single string: wrong data type.'
        )

    def test_single_mail(self):
        self.assertEqual(
            self.extractor.check_string(value='*****@*****.**'),
            'mail',
            'mail single string: wrong data type.'
        )

    def test_single_regkey(self):
        self.assertEqual(
            self.extractor.check_string(value='HKEY_LOCAL_MACHINE\\Software\\Microsoft\\Windows\\CurrentVersion\\Run'),
            'registry',
            'registry single string: wrong data type.'
        )

    def test_iterable(self):
        l_real = self.extractor.check_iterable({
            'results': [
                {
                    'This is an totally unimportant key': '127.0.0.1'
                },
                {
                    'Totally nested!': ['https://nestedurl.verynested.com']
                }
            ],
            'some_more': '7ef8b3dc5bf40268f66721a89b95f4c5f0cc08e34836f8c3a007ceed193654d4',
            'another_list': ['google.de', 'bing.com', 'www.fqdn.de']
        })
        l_expected = [
            {
                'type': 'hash',
                'value': '7ef8b3dc5bf40268f66721a89b95f4c5f0cc08e34836f8c3a007ceed193654d4'
            },
            {
                'type': 'ip',
                'value': '127.0.0.1'
            },
            {
                'type': 'url',
                'value': 'https://nestedurl.verynested.com'
            },
            {
                'type': 'domain',
                'value': 'google.de'
            },
            {
                'type': 'domain',
                'value': 'bing.com'
            },
            {
                'type': 'fqdn',
                'value': 'www.fqdn.de'
            }
        ]

        # Sorting the lists
        l_real = sorted(l_real, key=lambda k: k['value'])
        l_expected = sorted(l_expected, key=lambda k: k['value'])

        self.assertEqual(
            l_real,
            l_expected,
            'Check_iterable: wrong list returned.'
        )