def artifacts(self, raw): # Use the regex extractor, if auto_extract setting is not False if self.auto_extract: extractor = Extractor(ignore=self.get_data()) return extractor.check_iterable(raw) # Return empty list return []
def readwarninglists(self): files = glob('{}/lists/*/*.json'.format(self.path)) listcontent = [] for file in files: with io.open(file, 'r') as fh: content = json.loads(fh.read()) values = Extractor().check_iterable(content.get('list', [])) obj = { "name": content.get('name', 'Unknown'), "values": [value['value'] for value in values], "dataTypes": [value['type'] for value in values] } listcontent.append(obj) return listcontent
def readwarninglists(self): files = glob("{}/lists/*/*.json".format(self.path)) listcontent = [] for file in files: with io.open(file, "r") as fh: content = json.loads(fh.read()) values = Extractor().check_iterable(content.get("list", [])) obj = { "name": content.get("name", "Unknown"), "values": [value["data"] for value in values], "dataTypes": [value["dataType"] for value in values], } listcontent.append(obj) return listcontent
class TestExtractorValidInput(unittest.TestCase): """This tests the extractor with valid input.""" def setUp(self): self.extractor = Extractor() def test_single_fqdn(self): self.assertEqual(self.extractor.check_string(value='www.google.de'), 'fqdn', 'FQDN single string: wrong data type.') def test_single_fqdn_as_unicode(self): self.assertEqual(self.extractor.check_string(value=u'www.google.de'), 'fqdn', 'FQDN single string: wrong data type.') def test_single_domain(self): self.assertEqual(self.extractor.check_string(value='google.de'), 'domain', 'domain single string: wrong data type.') def test_single_url(self): self.assertEqual( self.extractor.check_string(value='https://google.de'), 'url', 'url single string: wrong data type.') def test_single_ipv4(self): self.assertEqual(self.extractor.check_string(value='8.8.8.8'), 'ip', 'ipv4 single string: wrong data type.') def test_single_ipv6(self): self.assertEqual( self.extractor.check_string( value='2001:0db8:85a3:08d3:1319:8a2e:0370:7344'), 'ip', 'ipv6 single string: wrong data type.') def test_single_md5(self): self.assertEqual( self.extractor.check_string( value='b373bd6b144e7846f45a1e47ced380b8'), 'hash', 'md5 single string: wrong data type.') def test_single_sha1(self): self.assertEqual( self.extractor.check_string( value='94d4d48ba9a79304617f8291982bf69a8ce16fb0'), 'hash', 'sha1 single string: wrong data type.') def test_single_sha256(self): self.assertEqual( self.extractor.check_string( value= '7ef8b3dc5bf40268f66721a89b95f4c5f0cc08e34836f8c3a007ceed193654d4' ), 'hash', 'sha256 single string: wrong data type.') def test_single_useragent(self): self.assertEqual( self.extractor.check_string( value= 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 ' 'Firefox/52.0'), 'user-agent', 'user-agent single string: wrong data type.') def test_single_mail(self): self.assertEqual( self.extractor.check_string(value='*****@*****.**'), 'mail', 'mail single string: wrong data type.') def test_single_regkey(self): self.assertEqual( self.extractor.check_string( value= 'HKEY_LOCAL_MACHINE\\Software\\Microsoft\\Windows\\CurrentVersion\\Run' ), 'registry', 'registry single string: wrong data type.') def test_text_ip(self): text = 'This is a string with an IP 8.8.8.8 embedded' self.assertEqual(self.extractor.extract_matches(value=text), {'ip': ['8.8.8.8']}, 'ip in text: failed.') def test_text_url(self): text = 'This is a string with a url http://www.somebaddomain.com/badness/bad embedded' self.assertEqual( self.extractor.extract_matches(value=text), { 'url': ['http://www.somebaddomain.com/badness/bad'], 'domain': [u'somebaddomain.com'], 'fqdn': [u'www.somebaddomain.com'] }, 'url in text: failed.') def test_text_hash(self): text = '''b373bd6b144e7846f45a1e47eed380b7 This is a string with an hashes b373bd6b144e7846f45a1e47ced380b8 and 7ef8b3dc5bf40268f66721a89b95f4c5f0cc08e34836f8c3a007ceed193654d4 embedded ''' self.assertEqual( self.extractor.extract_matches(value=text), { 'hash': [ 'b373bd6b144e7846f45a1e47eed380b7', 'b373bd6b144e7846f45a1e47ced380b8', '7ef8b3dc5bf40268f66721a89b95f4c5f0cc08e34836f8c3a007ceed193654d4' ] }, 'hash in text: failed.') def test_text_email(self): text = 'This is a string with a url [email protected] and [email protected] embedded' self.assertEqual( self.extractor.extract_matches(value=text), {'mail': ['*****@*****.**', '*****@*****.**']}, 'email in text: failed.') def test_iterable(self): l_real = self.extractor.check_iterable({ 'results': [{ 'This is an totally unimportant key': '8.8.8.8' }, { 'This is an IP in text': 'This is a really bad IP 8.8.8.9 serving malware' }, { 'Totally nested!': ['https://nestedurl.verynested.com'] }], 'some_more': '7ef8b3dc5bf40268f66721a89b95f4c5f0cc08e34836f8c3a007ceed193654d4', 'another_list': ['google.de', 'bing.com', 'www.fqdn.de'] }) l_expected = [{ 'dataType': 'hash', 'data': '7ef8b3dc5bf40268f66721a89b95f4c5f0cc08e34836f8c3a007ceed193654d4' }, { 'dataType': 'ip', 'data': '8.8.8.8' }, { 'dataType': 'ip', 'data': '8.8.8.9' }, { 'dataType': 'url', 'data': 'https://nestedurl.verynested.com' }, { 'dataType': 'domain', 'data': 'google.de' }, { 'dataType': 'domain', 'data': 'bing.com' }, { 'dataType': 'fqdn', 'data': 'www.fqdn.de' }] # Sorting the lists l_real = sorted(l_real, key=lambda k: k['data']) l_expected = sorted(l_expected, key=lambda k: k['data']) self.assertEqual(l_real, l_expected, 'Check_iterable: wrong list returned.') def test_float_domain(self): self.assertEqual( self.extractor.check_string(value='0.001234'), '', 'Check_float: float was recognized as domain, but should not.') def test_float_fqdn(self): self.assertEqual( self.extractor.check_string(value='0.1234.5678'), '', 'Check_float_fqdn: float was recognized as fqdn but should not.')
def setUp(self): self.extractor = Extractor()
class TestExtractorValidInput(unittest.TestCase): """This tests the extractor with valid input.""" def setUp(self): self.extractor = Extractor() def test_single_fqdn(self): self.assertEqual( self.extractor.check_string(value='www.google.de'), 'fqdn', 'FQDN single string: wrong data type.' ) def test_single_fqdn_as_unicode(self): self.assertEqual( self.extractor.check_string(value=u'www.google.de'), 'fqdn', 'FQDN single string: wrong data type.' ) def test_single_domain(self): self.assertEqual( self.extractor.check_string(value='google.de'), 'domain', 'domain single string: wrong data type.' ) def test_single_url(self): self.assertEqual( self.extractor.check_string(value='https://google.de'), 'url', 'url single string: wrong data type.' ) def test_single_ipv4(self): self.assertEqual( self.extractor.check_string(value='10.0.0.1'), 'ip', 'ipv4 single string: wrong data type.' ) def test_single_ipv6(self): self.assertEqual( self.extractor.check_string(value='2001:0db8:85a3:08d3:1319:8a2e:0370:7344'), 'ip', 'ipv6 single string: wrong data type.' ) def test_single_md5(self): self.assertEqual( self.extractor.check_string(value='b373bd6b144e7846f45a1e47ced380b8'), 'hash', 'md5 single string: wrong data type.' ) def test_single_sha1(self): self.assertEqual( self.extractor.check_string(value='94d4d48ba9a79304617f8291982bf69a8ce16fb0'), 'hash', 'sha1 single string: wrong data type.' ) def test_single_sha256(self): self.assertEqual( self.extractor.check_string(value='7ef8b3dc5bf40268f66721a89b95f4c5f0cc08e34836f8c3a007ceed193654d4'), 'hash', 'sha256 single string: wrong data type.' ) def test_single_useragent(self): self.assertEqual( self.extractor.check_string(value='Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 ' 'Firefox/52.0'), 'user-agent', 'user-agent single string: wrong data type.' ) def test_single_mail(self): self.assertEqual( self.extractor.check_string(value='*****@*****.**'), 'mail', 'mail single string: wrong data type.' ) def test_single_regkey(self): self.assertEqual( self.extractor.check_string(value='HKEY_LOCAL_MACHINE\\Software\\Microsoft\\Windows\\CurrentVersion\\Run'), 'registry', 'registry single string: wrong data type.' ) def test_iterable(self): l_real = self.extractor.check_iterable({ 'results': [ { 'This is an totally unimportant key': '127.0.0.1' }, { 'Totally nested!': ['https://nestedurl.verynested.com'] } ], 'some_more': '7ef8b3dc5bf40268f66721a89b95f4c5f0cc08e34836f8c3a007ceed193654d4', 'another_list': ['google.de', 'bing.com', 'www.fqdn.de'] }) l_expected = [ { 'type': 'hash', 'value': '7ef8b3dc5bf40268f66721a89b95f4c5f0cc08e34836f8c3a007ceed193654d4' }, { 'type': 'ip', 'value': '127.0.0.1' }, { 'type': 'url', 'value': 'https://nestedurl.verynested.com' }, { 'type': 'domain', 'value': 'google.de' }, { 'type': 'domain', 'value': 'bing.com' }, { 'type': 'fqdn', 'value': 'www.fqdn.de' } ] # Sorting the lists l_real = sorted(l_real, key=lambda k: k['value']) l_expected = sorted(l_expected, key=lambda k: k['value']) self.assertEqual( l_real, l_expected, 'Check_iterable: wrong list returned.' )