class TestRuleParser(unittest.TestCase): def setUp(self): self.parser = Plyara() def test_import_pe(self): with open('tests/data/import_ruleset_pe.yar', 'r') as f: inputString = f.read() result = self.parser.parse_string(inputString) for rule in result: self.assertTrue('pe' in rule['imports']) def test_import_elf(self): with open('tests/data/import_ruleset_elf.yar', 'r') as f: inputString = f.read() result = self.parser.parse_string(inputString) for rule in result: self.assertTrue('elf' in rule['imports']) def test_import_cuckoo(self): with open('tests/data/import_ruleset_cuckoo.yar', 'r') as f: inputString = f.read() result = self.parser.parse_string(inputString) for rule in result: self.assertTrue('cuckoo' in rule['imports']) def test_import_magic(self): with open('tests/data/import_ruleset_magic.yar', 'r') as f: inputString = f.read() result = self.parser.parse_string(inputString) for rule in result: self.assertTrue('magic' in rule['imports']) def test_import_hash(self): with open('tests/data/import_ruleset_hash.yar', 'r') as f: inputString = f.read() result = self.parser.parse_string(inputString) for rule in result: self.assertTrue('hash' in rule['imports']) def test_import_math(self): with open('tests/data/import_ruleset_math.yar', 'r') as f: inputString = f.read() result = self.parser.parse_string(inputString) for rule in result: self.assertTrue('math' in rule['imports']) def test_import_dotnet(self): with open('tests/data/import_ruleset_dotnet.yar', 'r') as f: inputString = f.read() result = self.parser.parse_string(inputString) for rule in result: self.assertTrue('dotnet' in rule['imports']) def test_import_androguard(self): with open('tests/data/import_ruleset_androguard.yar', 'r') as f: inputString = f.read() result = self.parser.parse_string(inputString) for rule in result: self.assertTrue('androguard' in rule['imports']) def test_scopes(self): with open('tests/data/scope_ruleset.yar', 'r') as f: inputString = f.read() result = self.parser.parse_string(inputString) for entry in result: rulename = entry['rule_name'] if rulename == "GlobalScope": self.assertTrue('global' in entry['scopes']) elif rulename == "PrivateScope": self.assertTrue('private' in entry['scopes']) elif rulename == "PrivateGlobalScope": self.assertTrue('global' in entry['scopes'] and 'private' in entry['scopes']) else: raise AssertionError(UNHANDLED_RULE_MSG.format(rulename)) def test_tags(self): with open('tests/data/tag_ruleset.yar', 'r') as f: inputString = f.read() result = self.parser.parse_string(inputString) for entry in result: rulename = entry['rule_name'] if rulename == "OneTag": self.assertTrue( len(entry['tags']) == 1 and 'tag1' in entry['tags']) elif rulename == "TwoTags": self.assertTrue( len(entry['tags']) == 2 and 'tag1' in entry['tags'] and 'tag2' in entry['tags']) elif rulename == "ThreeTags": self.assertTrue( len(entry['tags']) == 3 and 'tag1' in entry['tags'] and 'tag2' in entry['tags'] and 'tag3' in entry['tags']) else: raise AssertionError(UNHANDLED_RULE_MSG.format(rulename)) def test_metadata(self): with open('tests/data/metadata_ruleset.yar', 'r') as f: inputString = f.read() result = self.parser.parse_string(inputString) for entry in result: rulename = entry['rule_name'] if rulename == "StringTypeMetadata": self.assertTrue( 'string_value' in entry['metadata'] and entry['metadata']['string_value'] == 'String Metadata') elif rulename == "IntegerTypeMetadata": self.assertTrue( 'integer_value' in entry['metadata'] and entry['metadata']['integer_value'] == '100') elif rulename == "BooleanTypeMetadata": self.assertTrue( 'boolean_value' in entry['metadata'] and entry['metadata']['boolean_value'] == 'true') elif rulename == "AllTypesMetadata": self.assertTrue( 'string_value' in entry['metadata'] and 'integer_value' in entry['metadata'] and 'boolean_value' in entry['metadata'] and entry['metadata']['string_value'] == 'Different String Metadata' and entry['metadata']['integer_value'] == '33' and entry['metadata']['boolean_value'] == 'false') else: raise AssertionError(UNHANDLED_RULE_MSG.format(rulename)) def test_strings(self): with open('tests/data/string_ruleset.yar', 'r') as f: inputString = f.read() result = self.parser.parse_string(inputString) for entry in result: rulename = entry['rule_name'] if rulename == "Text": self.assertTrue( [(s['name'], s['value']) for s in entry['strings']] == [('$text_string', '\"foobar\"')]) elif rulename == "FullwordText": self.assertTrue( [(s['name'], s['value'], s['modifiers']) for s in entry['strings']] == [('$text_string', '\"foobar\"', ['fullword'])]) elif rulename == "CaseInsensitiveText": self.assertTrue( [(s['name'], s['value'], s['modifiers']) for s in entry['strings']] == [('$text_string', '\"foobar\"', ['nocase'])]) elif rulename == "WideCharText": self.assertTrue( [(s['name'], s['value'], s['modifiers']) for s in entry['strings']] == [('$wide_string', '\"Borland\"', ['wide'])]) elif rulename == "WideCharAsciiText": self.assertTrue( [(s['name'], s['value'], s['modifiers']) for s in entry['strings']] == [('$wide_and_ascii_string', '\"Borland\"', ['wide', 'ascii'])]) elif rulename == "HexWildcard": self.assertTrue( [(s['name'], s['value']) for s in entry['strings']] == [('$hex_string', '{ E2 34 ?? C8 A? FB }')]) elif rulename == "HexJump": self.assertTrue( [(s['name'], s['value']) for s in entry['strings']] == [('$hex_string', '{ F4 23 [4-6] 62 B4 }')]) elif rulename == "HexAlternatives": self.assertTrue([(s['name'], s['value']) for s in entry['strings']] == [( '$hex_string', '{ F4 23 ( 62 B4 | 56 ) 45 }')]) elif rulename == "HexMultipleAlternatives": self.assertTrue( [(s['name'], s['value']) for s in entry['strings']] == [( '$hex_string', '{ F4 23 ( 62 B4 | 56 | 45 ?? 67 ) 45 }')]) elif rulename == "RegExp": self.assertTrue( [(s['name'], s['value']) for s in entry['strings']] == [( '$re1', r'/md5: [0-9a-fA-F]{32}/' ), ( '$re2', r'/state: (on|off)/i' ), ('$re3', r'/\x00https?:\/\/[^\x00]{4,500}\x00\x00\x00/')]) else: raise AssertionError(UNHANDLED_RULE_MSG.format(rulename)) def test_conditions(self): with open('tests/data/condition_ruleset.yar', 'r') as f: inputString = f.read() # Just checking for parsing errors self.parser.parse_string(inputString) def test_include(self): with open('tests/data/include_ruleset.yar', 'r') as f: inputString = f.read() result = self.parser.parse_string(inputString) self.assertEqual(result[0]['includes'], ['string_ruleset.yar']) def test_include_statements(self): self.parser.parse_string( 'include "file1.yara"\ninclude "file2.yara"\ninclude "file3.yara"') self.assertEqual(len(self.parser.includes), 3)
class TestRuleParser(unittest.TestCase): def setUp(self): self.parser = Plyara() def test_import_pe(self): with open(data_dir.joinpath('import_ruleset_pe.yar'), 'r') as fh: inputString = fh.read() result = self.parser.parse_string(inputString) for rule in result: self.assertIn('pe', rule['imports']) def test_import_elf(self): with open(data_dir.joinpath('import_ruleset_elf.yar'), 'r') as fh: inputString = fh.read() result = self.parser.parse_string(inputString) for rule in result: self.assertIn('elf', rule['imports']) def test_import_cuckoo(self): with open(data_dir.joinpath('import_ruleset_cuckoo.yar'), 'r') as fh: inputString = fh.read() result = self.parser.parse_string(inputString) for rule in result: self.assertIn('cuckoo', rule['imports']) def test_import_magic(self): with open(data_dir.joinpath('import_ruleset_magic.yar'), 'r') as fh: inputString = fh.read() result = self.parser.parse_string(inputString) for rule in result: self.assertIn('magic', rule['imports']) def test_import_hash(self): with open(data_dir.joinpath('import_ruleset_hash.yar'), 'r') as fh: inputString = fh.read() result = self.parser.parse_string(inputString) for rule in result: self.assertIn('hash', rule['imports']) def test_import_math(self): with open(data_dir.joinpath('import_ruleset_math.yar'), 'r') as fh: inputString = fh.read() result = self.parser.parse_string(inputString) for rule in result: self.assertIn('math', rule['imports']) def test_import_dotnet(self): with open(data_dir.joinpath('import_ruleset_dotnet.yar'), 'r') as fh: inputString = fh.read() result = self.parser.parse_string(inputString) for rule in result: self.assertIn('dotnet', rule['imports']) def test_import_androguard(self): with open(data_dir.joinpath('import_ruleset_androguard.yar'), 'r') as fh: inputString = fh.read() result = self.parser.parse_string(inputString) for rule in result: self.assertIn('androguard', rule['imports']) def test_scopes(self): with open(data_dir.joinpath('scope_ruleset.yar'), 'r') as fh: inputString = fh.read() result = self.parser.parse_string(inputString) for entry in result: rulename = entry['rule_name'] if rulename == 'GlobalScope': self.assertIn('global', entry['scopes']) elif rulename == 'PrivateScope': self.assertIn('private', entry['scopes']) elif rulename == 'PrivateGlobalScope': self.assertIn('global', entry['scopes']) self.assertIn('private', entry['scopes']) else: raise AssertionError(UNHANDLED_RULE_MSG.format(rulename)) def test_tags(self): with open(data_dir.joinpath('tag_ruleset.yar'), 'r') as fh: inputString = fh.read() result = self.parser.parse_string(inputString) for entry in result: rulename = entry['rule_name'] if rulename == 'OneTag': self.assertEqual(len(entry['tags']), 1) self.assertIn('tag1', entry['tags']) elif rulename == 'TwoTags': self.assertEqual(len(entry['tags']), 2) self.assertIn('tag1', entry['tags']) self.assertIn('tag2', entry['tags']) elif rulename == 'ThreeTags': self.assertTrue(len(entry['tags']), 3) self.assertIn('tag1', entry['tags']) self.assertIn('tag2', entry['tags']) self.assertIn('tag3', entry['tags']) else: raise AssertionError(UNHANDLED_RULE_MSG.format(rulename)) def test_metadata(self): with open(data_dir.joinpath('metadata_ruleset.yar'), 'r') as fh: inputString = fh.read() result = self.parser.parse_string(inputString) for entry in result: rulename = entry['rule_name'] kv = entry['metadata'] kv_list = [(k, ) + (v, ) for dic in kv for k, v in dic.items()] if rulename == 'StringTypeMetadata': self.assertEqual(len(kv), 1) self.assertEqual(kv_list[0][0], 'string_value') self.assertEqual(kv_list[0][1], 'String Metadata') elif rulename == 'IntegerTypeMetadata': self.assertEqual(len(kv), 1) self.assertEqual(kv_list[0][0], 'integer_value') self.assertIs(kv_list[0][1], 100) elif rulename == 'BooleanTypeMetadata': self.assertEqual(len(kv), 1) self.assertEqual(kv_list[0][0], 'boolean_value') self.assertIs(kv_list[0][1], True) elif rulename == 'AllTypesMetadata': self.assertEqual(len(kv), 3) self.assertEqual(kv_list[0][0], 'string_value') self.assertEqual(kv_list[1][0], 'integer_value') self.assertEqual(kv_list[2][0], 'boolean_value') self.assertEqual(kv_list[0][1], 'Different String Metadata') self.assertIs(kv_list[1][1], 33) self.assertIs(kv_list[2][1], False) else: raise AssertionError(UNHANDLED_RULE_MSG.format(rulename)) def test_strings(self): with open(data_dir.joinpath('string_ruleset.yar'), 'r') as fh: inputString = fh.read() result = self.parser.parse_string(inputString) for entry in result: rulename = entry['rule_name'] kv = entry['strings'] kv_list = [tuple(x.values()) for x in kv] if rulename == 'Text': self.assertEqual(len(kv), 1) self.assertEqual(kv_list[0], ( '$text_string', 'foobar', 'text', )) elif rulename == 'FullwordText': self.assertEqual(len(kv), 1) self.assertEqual(kv_list[0], ( '$text_string', 'foobar', 'text', ['fullword'], )) elif rulename == 'CaseInsensitiveText': self.assertEqual(len(kv), 1) self.assertEqual(kv_list[0], ( '$text_string', 'foobar', 'text', ['nocase'], )) elif rulename == 'WideCharText': self.assertEqual(len(kv), 1) self.assertEqual(kv_list[0], ( '$wide_string', 'Borland', 'text', ['wide'], )) elif rulename == 'WideCharAsciiText': self.assertEqual(len(kv), 1) self.assertEqual(kv_list[0], ( '$wide_and_ascii_string', 'Borland', 'text', ['wide', 'ascii'], )) elif rulename == 'HexWildcard': self.assertEqual(len(kv), 1) self.assertEqual(kv_list[0], ( '$hex_string', '{ E2 34 ?? C8 A? FB }', 'byte', )) elif rulename == 'HexJump': self.assertEqual(len(kv), 1) self.assertEqual(kv_list[0], ( '$hex_string', '{ F4 23 [4-6] 62 B4 }', 'byte', )) elif rulename == 'HexAlternatives': self.assertEqual(len(kv), 1) self.assertEqual(kv_list[0], ( '$hex_string', '{ F4 23 ( 62 B4 | 56 ) 45 }', 'byte', )) elif rulename == 'HexMultipleAlternatives': self.assertEqual(len(kv), 1) self.assertEqual(kv_list[0], ( '$hex_string', '{ F4 23 ( 62 B4 | 56 | 45 ?? 67 ) 45 }', 'byte', )) elif rulename == 'RegExp': self.assertEqual(len(kv), 3) self.assertEqual(kv_list[0][0], '$re1') self.assertEqual(kv_list[0][1], '/md5: [0-9a-fA-F]{32}/') self.assertEqual(kv_list[0][2], 'regex') self.assertEqual(kv_list[1][0], '$re2') self.assertEqual(kv_list[1][1], '/state: (on|off)/i') self.assertEqual(kv_list[1][2], 'regex') self.assertEqual(kv_list[2][0], '$re3') self.assertEqual( kv_list[2][1], r'/\x00https?:\/\/[^\x00]{4,500}\x00\x00\x00/') self.assertEqual(kv_list[2][2], 'regex') elif rulename == 'Xor': self.assertEqual(len(kv), 1) self.assertEqual(kv_list[0], ( '$xor_string', 'This program cannot', 'text', ['xor'], )) elif rulename == 'WideXorAscii': self.assertEqual(len(kv), 1) self.assertEqual(kv_list[0], ( '$xor_string', 'This program cannot', 'text', ['xor', 'wide', 'ascii'], )) elif rulename == 'WideXor': self.assertEqual(len(kv), 1) self.assertEqual(kv_list[0], ( '$xor_string', 'This program cannot', 'text', ['xor', 'wide'], )) elif rulename == 'DoubleBackslash': self.assertEqual(len(kv), 1) self.assertEqual(kv_list[0], ( '$bs', r'\"\\\\\\\"', 'text', )) else: raise AssertionError(UNHANDLED_RULE_MSG.format(rulename)) def test_conditions(self): with open(data_dir.joinpath('condition_ruleset.yar'), 'r') as fh: inputString = fh.read() # Just checking for parsing errors self.parser.parse_string(inputString) def test_include(self): with open(data_dir.joinpath('include_ruleset.yar'), 'r') as fh: inputString = fh.read() result = self.parser.parse_string(inputString) self.assertEqual(result[0]['includes'], ['string_ruleset.yar']) def test_include_statements(self): self.parser.parse_string( 'include "file1.yara"\ninclude "file2.yara"\ninclude "file3.yara"') self.assertEqual(len(self.parser.includes), 3) def test_rules_from_yara_project(self): with open('tests/data/test_rules_from_yara_project.yar', 'r') as fh: inputRules = fh.read() plyara = Plyara() output = plyara.parse_string(inputRules) self.assertEqual(len(output), 293)
def setUp(self): self.parser = Plyara()
def test_is_valid_rule_name(self): with self.assertWarns(DeprecationWarning): self.assertTrue(Plyara.is_valid_rule_name('test')) self.assertTrue(Plyara.is_valid_rule_name('test123')) self.assertTrue(Plyara.is_valid_rule_name('test_test')) self.assertTrue(Plyara.is_valid_rule_name('_test_')) self.assertTrue(Plyara.is_valid_rule_name('include_test')) self.assertFalse(Plyara.is_valid_rule_name('123test')) self.assertFalse(Plyara.is_valid_rule_name('123 test')) self.assertFalse(Plyara.is_valid_rule_name('test 123')) self.assertFalse(Plyara.is_valid_rule_name('test test')) self.assertFalse(Plyara.is_valid_rule_name('test-test')) self.assertFalse(Plyara.is_valid_rule_name('include')) self.assertFalse(Plyara.is_valid_rule_name('test!*@&*!&')) self.assertFalse(Plyara.is_valid_rule_name(''))
def test_detect_dependencies(self): with open(data_dir.joinpath('detect_dependencies_ruleset.yar'), 'r') as fh: inputString = fh.read() result = Plyara().parse_string(inputString) with self.assertWarns(DeprecationWarning): self.assertEqual(Plyara.detect_dependencies(result[0]), list()) self.assertEqual(Plyara.detect_dependencies(result[1]), list()) self.assertEqual(Plyara.detect_dependencies(result[2]), list()) self.assertEqual( Plyara.detect_dependencies(result[3]), ['is__osx', 'priv01', 'priv02', 'priv03', 'priv04']) self.assertEqual( Plyara.detect_dependencies(result[4]), ['is__elf', 'priv01', 'priv02', 'priv03', 'priv04']) self.assertEqual(Plyara.detect_dependencies(result[5]), ['is__elf', 'is__osx', 'priv01', 'priv02']) self.assertEqual(Plyara.detect_dependencies(result[6]), ['is__elf', 'is__osx', 'priv01']) self.assertEqual(Plyara.detect_dependencies(result[7]), ['is__elf']) self.assertEqual(Plyara.detect_dependencies(result[8]), ['is__osx', 'is__elf']) self.assertEqual(Plyara.detect_dependencies(result[9]), ['is__osx']) self.assertEqual(Plyara.detect_dependencies(result[10]), ['is__elf', 'is__osx'])
class YaraImporter(object): def __init__(self, importer_type, al_client, logger=None): if not logger: from assemblyline.common import log as al_log al_log.init_logging('yara_importer') logger = logging.getLogger('assemblyline.yara_importer') logger.setLevel(logging.INFO) self.importer_type = importer_type self.update_client = al_client self.parser = Plyara() self.classification = forge.get_classification() self.log = logger def _save_signatures(self, signatures, source, default_status=DEFAULT_STATUS, default_classification=None): if len(signatures) == 0: self.log.info(f"There are no signatures for {source}, skipping...") return False order = 1 upload_list = [] for signature in signatures: classification = default_classification or self.classification.UNRESTRICTED signature_id = None version = 1 status = default_status for meta in signature.get('metadata', {}): for k, v in meta.items(): if k in ["classification", "sharing"]: classification = v elif k in ['id', 'rule_id', 'signature_id']: signature_id = v elif k in ['version', 'rule_version', 'revision']: version = v elif k in ['status', 'al_status']: status = v # Convert CCCS YARA status to AL signature status if status == "RELEASED": status = "DEPLOYED" elif status == "DEPRECATED": status = "DISABLED" # Fallback status if status not in [ "DEPLOYED", "NOISY", "DISABLED", "STAGING", "TESTING", "INVALID" ]: status = default_status # Fix imports and remove cuckoo signature['imports'] = utils.detect_imports(signature) if "cuckoo" not in signature['imports']: sig = Signature( dict( classification=classification, data=utils.rebuild_yara_rule(signature), name=signature.get('rule_name'), order=order, revision=int(float(version)), signature_id=signature_id or signature.get('rule_name'), source=source, status=status, type=self.importer_type, )) upload_list.append(sig.as_primitives()) else: self.log.warning( f"Signature '{signature.get('rule_name')}' skipped because it uses cuckoo module." ) order += 1 r = self.update_client.signature.add_update_many( source, self.importer_type, upload_list) self.log.info( f"Imported {r['success']}/{order - 1} signatures from {source} into Assemblyline" ) return r['success'] def _split_signatures(self, data): self.parser = Plyara() return self.parser.parse_string(data) def import_data(self, yara_bin, source, default_status=DEFAULT_STATUS, default_classification=None): return self._save_signatures( self._split_signatures(yara_bin), source, default_status=default_status, default_classification=default_classification) def import_file(self, file_path: str, source: str, default_status=DEFAULT_STATUS, default_classification=None): self.log.info(f"Importing file: {file_path}") cur_file = os.path.expanduser(file_path) if os.path.exists(cur_file): with open(cur_file, "r") as yara_file: yara_bin = yara_file.read() return self.import_data( yara_bin, source or os.path.basename(cur_file), default_status=default_status, default_classification=default_classification) else: raise Exception(f"File {cur_file} does not exists.")
def _split_signatures(self, data): self.parser = Plyara() return self.parser.parse_string(data)
def test_is_valid_rule_name(self): self.assertTrue(Plyara.is_valid_rule_name('test')) self.assertTrue(Plyara.is_valid_rule_name('test123')) self.assertTrue(Plyara.is_valid_rule_name('test_test')) self.assertTrue(Plyara.is_valid_rule_name('_test_')) self.assertTrue(Plyara.is_valid_rule_name('include_test')) self.assertFalse(Plyara.is_valid_rule_name('123test')) self.assertFalse(Plyara.is_valid_rule_name('123 test')) self.assertFalse(Plyara.is_valid_rule_name('test 123')) self.assertFalse(Plyara.is_valid_rule_name('test test')) self.assertFalse(Plyara.is_valid_rule_name('test-test')) self.assertFalse(Plyara.is_valid_rule_name('include')) self.assertFalse(Plyara.is_valid_rule_name('test!*@&*!&')) self.assertFalse(Plyara.is_valid_rule_name(''))
def test_detect_dependencies(self): with open('tests/data/detect_dependencies_ruleset.yar', 'r') as f: inputString = f.read() result = Plyara().parse_string(inputString) self.assertEqual(Plyara.detect_dependencies(result[0]), []) self.assertEqual(Plyara.detect_dependencies(result[1]), []) self.assertEqual(Plyara.detect_dependencies(result[2]), []) self.assertEqual(Plyara.detect_dependencies(result[3]), ['is__osx', 'priv01', 'priv02', 'priv03', 'priv04']) self.assertEqual(Plyara.detect_dependencies(result[4]), ['is__elf', 'priv01', 'priv02', 'priv03', 'priv04']) self.assertEqual(Plyara.detect_dependencies(result[5]), ['is__elf', 'is__osx', 'priv01', 'priv02']) self.assertEqual(Plyara.detect_dependencies(result[6]), ['is__elf', 'is__osx', 'priv01']) self.assertEqual(Plyara.detect_dependencies(result[7]), ['is__elf']) self.assertEqual(Plyara.detect_dependencies(result[8]), ['is__osx', 'is__elf']) self.assertEqual(Plyara.detect_dependencies(result[9]), ['is__osx']) self.assertEqual(Plyara.detect_dependencies(result[10]), ['is__elf', 'is__osx'])
def setUp(self): self.parser = Plyara(meta_as_kv=True)
def parse_rules(rules): plyara = Plyara() return plyara.parse_string(inputRules)
class TestRuleParser(unittest.TestCase): def setUp(self): self.parser = Plyara() def test_import_pe(self): with data_dir.joinpath('import_ruleset_pe.yar').open('r') as fh: inputString = fh.read() result = self.parser.parse_string(inputString) for rule in result: self.assertIn('pe', rule['imports']) def test_import_elf(self): with data_dir.joinpath('import_ruleset_elf.yar').open('r') as fh: inputString = fh.read() result = self.parser.parse_string(inputString) for rule in result: self.assertIn('elf', rule['imports']) def test_import_cuckoo(self): with data_dir.joinpath('import_ruleset_cuckoo.yar').open('r') as fh: inputString = fh.read() result = self.parser.parse_string(inputString) for rule in result: self.assertIn('cuckoo', rule['imports']) def test_import_magic(self): with data_dir.joinpath('import_ruleset_magic.yar').open('r') as fh: inputString = fh.read() result = self.parser.parse_string(inputString) for rule in result: self.assertIn('magic', rule['imports']) def test_import_hash(self): with data_dir.joinpath('import_ruleset_hash.yar').open('r') as fh: inputString = fh.read() result = self.parser.parse_string(inputString) for rule in result: self.assertIn('hash', rule['imports']) def test_import_math(self): with data_dir.joinpath('import_ruleset_math.yar').open('r') as fh: inputString = fh.read() result = self.parser.parse_string(inputString) for rule in result: self.assertIn('math', rule['imports']) def test_import_dotnet(self): with data_dir.joinpath('import_ruleset_dotnet.yar').open('r') as fh: inputString = fh.read() result = self.parser.parse_string(inputString) for rule in result: self.assertIn('dotnet', rule['imports']) def test_import_androguard(self): with data_dir.joinpath('import_ruleset_androguard.yar').open( 'r') as fh: inputString = fh.read() result = self.parser.parse_string(inputString) for rule in result: self.assertIn('androguard', rule['imports']) def test_scopes(self): with data_dir.joinpath('scope_ruleset.yar').open('r') as fh: inputString = fh.read() result = self.parser.parse_string(inputString) for entry in result: rulename = entry['rule_name'] if rulename == 'GlobalScope': self.assertIn('global', entry['scopes']) elif rulename == 'PrivateScope': self.assertIn('private', entry['scopes']) elif rulename == 'PrivateGlobalScope': self.assertIn('global', entry['scopes']) self.assertIn('private', entry['scopes']) else: raise AssertionError(UNHANDLED_RULE_MSG.format(rulename)) def test_tags(self): with data_dir.joinpath('tag_ruleset.yar').open('r') as fh: inputString = fh.read() result = self.parser.parse_string(inputString) for entry in result: rulename = entry['rule_name'] if rulename == 'OneTag': self.assertEqual(len(entry['tags']), 1) self.assertIn('tag1', entry['tags']) elif rulename == 'TwoTags': self.assertEqual(len(entry['tags']), 2) self.assertIn('tag1', entry['tags']) self.assertIn('tag2', entry['tags']) elif rulename == 'ThreeTags': self.assertTrue(len(entry['tags']), 3) self.assertIn('tag1', entry['tags']) self.assertIn('tag2', entry['tags']) self.assertIn('tag3', entry['tags']) else: raise AssertionError(UNHANDLED_RULE_MSG.format(rulename)) def test_metadata(self): with data_dir.joinpath('metadata_ruleset.yar').open('r') as fh: inputString = fh.read() result = self.parser.parse_string(inputString) for entry in result: rulename = entry['rule_name'] kv = entry['metadata'] kv_list = [(k, ) + (v, ) for dic in kv for k, v in dic.items()] if rulename == 'StringTypeMetadata': self.assertEqual(len(kv), 1) self.assertEqual(kv_list[0][0], 'string_value') self.assertEqual(kv_list[0][1], 'String Metadata') elif rulename == 'IntegerTypeMetadata': self.assertEqual(len(kv), 1) self.assertEqual(kv_list[0][0], 'integer_value') self.assertIs(kv_list[0][1], 100) elif rulename == 'BooleanTypeMetadata': self.assertEqual(len(kv), 1) self.assertEqual(kv_list[0][0], 'boolean_value') self.assertIs(kv_list[0][1], True) elif rulename == 'AllTypesMetadata': self.assertEqual(len(kv), 3) self.assertEqual(kv_list[0][0], 'string_value') self.assertEqual(kv_list[1][0], 'integer_value') self.assertEqual(kv_list[2][0], 'boolean_value') self.assertEqual(kv_list[0][1], 'Different String Metadata') self.assertIs(kv_list[1][1], 33) self.assertIs(kv_list[2][1], False) else: raise AssertionError(UNHANDLED_RULE_MSG.format(rulename)) def test_strings(self): with data_dir.joinpath('string_ruleset.yar').open('r') as fh: inputString = fh.read() result = self.parser.parse_string(inputString) for entry in result: rulename = entry['rule_name'] kv = entry['strings'] if rulename == 'Text': self.assertEqual(kv, [{ 'name': '$text_string', 'value': 'foobar', 'type': 'text' }]) elif rulename == 'FullwordText': self.assertEqual(kv, [{ 'name': '$text_string', 'value': 'foobar', 'type': 'text', 'modifiers': ['fullword'] }]) elif rulename == 'CaseInsensitiveText': self.assertEqual(kv, [{ 'name': '$text_string', 'value': 'foobar', 'type': 'text', 'modifiers': ['nocase'] }]) elif rulename == 'WideCharText': self.assertEqual(kv, [{ 'name': '$wide_string', 'value': 'Borland', 'type': 'text', 'modifiers': ['wide'] }]) elif rulename == 'WideCharAsciiText': self.assertEqual(kv, [{ 'name': '$wide_and_ascii_string', 'value': 'Borland', 'type': 'text', 'modifiers': ['wide', 'ascii'] }]) elif rulename == 'HexWildcard': self.assertEqual(kv, [{ 'name': '$hex_string', 'value': '{ E2 34 ?? C8 A? FB }', 'type': 'byte' }]) elif rulename == 'HexJump': self.assertEqual(kv, [{ 'name': '$hex_string', 'value': '{ F4 23 [4-6] 62 B4 }', 'type': 'byte' }]) elif rulename == 'HexAlternatives': self.assertEqual(kv, [{ 'name': '$hex_string', 'value': '{ F4 23 ( 62 B4 | 56 ) 45 }', 'type': 'byte' }]) elif rulename == 'HexMultipleAlternatives': self.assertEqual(kv, [{ 'name': '$hex_string', 'value': '{ F4 23 ( 62 B4 | 56 | 45 ?? 67 ) 45 }', 'type': 'byte' }]) elif rulename == 'RegExp': self.assertEqual(kv, [{ 'name': '$re1', 'value': '/md5: [0-9a-fA-F]{32}/', 'type': 'regex', 'modifiers': ['nocase'], }, { 'name': '$re2', 'value': '/state: (on|off)/i', 'type': 'regex', }, { 'name': '$re3', 'value': r'/\x00https?:\/\/[^\x00]{4,500}\x00\x00\x00/', 'type': 'regex', }]) elif rulename == 'Xor': self.assertEqual(kv, [{ 'name': '$xor_string', 'value': 'This program cannot', 'type': 'text', 'modifiers': ['xor'] }]) elif rulename == 'WideXorAscii': self.assertEqual(kv, [{ 'name': '$xor_string', 'value': 'This program cannot', 'type': 'text', 'modifiers': ['xor', 'wide', 'ascii'] }]) elif rulename == 'WideXor': self.assertEqual(kv, [{ 'name': '$xor_string', 'value': 'This program cannot', 'type': 'text', 'modifiers': ['xor', 'wide'] }]) elif rulename == 'DoubleBackslash': self.assertEqual(kv, [{ 'name': '$bs', 'value': r'\"\\\\\\\"', 'type': 'text' }]) elif rulename == 'DoubleQuote': self.assertEqual(kv, [{ 'name': '$text_string', 'value': r'foobar\"', 'type': 'text' }]) elif rulename == 'HorizontalTab': self.assertEqual(kv, [{ 'name': '$text_string', 'value': r'foo\tbar', 'type': 'text' }]) elif rulename == 'Newline': self.assertEqual(kv, [{ 'name': '$text_string', 'value': r'foo\nbar', 'type': 'text' }]) elif rulename == 'HexEscape': self.assertEqual(kv, [{ 'name': '$text_string', 'value': r'foo\x00bar', 'type': 'text' }]) else: raise AssertionError(UNHANDLED_RULE_MSG.format(rulename)) def test_string_bad_escaped_hex(self): inputRules = r''' rule sample { strings: $ = "foo\xZZbar" condition: all of them } ''' plyara = Plyara() with self.assertRaises(ParseTypeError): plyara.parse_string(inputRules) def test_string_invalid_escape(self): inputRules = r''' rule sample { strings: $ = "foo\gbar" condition: all of them } ''' plyara = Plyara() with self.assertRaises(ParseTypeError): plyara.parse_string(inputRules) def test_conditions(self): with data_dir.joinpath('condition_ruleset.yar').open('r') as fh: inputString = fh.read() # Just checking for parsing errors self.parser.parse_string(inputString) def test_include(self): with data_dir.joinpath('include_ruleset.yar').open('r') as fh: inputString = fh.read() result = self.parser.parse_string(inputString) self.assertEqual(result[0]['includes'], ['string_ruleset.yar']) def test_include_statements(self): self.parser.parse_string( 'include "file1.yara"\ninclude "file2.yara"\ninclude "file3.yara"') self.assertEqual(len(self.parser.includes), 3) def test_rules_from_yara_project(self): with data_dir.joinpath('test_rules_from_yara_project.yar').open( 'r') as fh: inputRules = fh.read() plyara = Plyara() output = plyara.parse_string(inputRules) self.assertEqual(len(output), 293) def test_multiple_threads(self): with data_dir.joinpath('test_rules_from_yara_project.yar').open( 'r') as fh: inputRules = fh.read() def parse_rules(rules): plyara = Plyara() return plyara.parse_string(inputRules) with concurrent.futures.ThreadPoolExecutor(max_workers=4) as e: futs = [e.submit(parse_rules, inputRules) for _ in range(4)] for fut in concurrent.futures.as_completed(futs): self.assertEqual(len(fut.result()), 293) def test_clear(self): # instantiate parser parser = Plyara() # open a ruleset with one or more rules with data_dir.joinpath('test_ruleset_2_rules.yar').open('r') as fh: inputRules = fh.read() # parse the rules parser.parse_string(inputRules) # clear the parser's state parser.clear() # has lineno been reset self.assertEqual(parser.lexer.lineno, 1) # open a ruleset with one rule with data_dir.joinpath('test_ruleset_1_rule.yar').open('r') as fh: inputRules = fh.read() # parse the rules result = parser.parse_string(inputRules) # does the result contain just the rule from the second parse self.assertEqual(len(result), 1) self.assertEqual(result[0]['rule_name'], 'rule_one')
def yara_update(updater_type, update_config_path, update_output_path, download_directory, externals, cur_logger) -> None: """ Using an update configuration file as an input, which contains a list of sources, download all the file(s). """ # noinspection PyBroadException try: # Load updater configuration update_config = {} if update_config_path and os.path.exists(update_config_path): with open(update_config_path, 'r') as yml_fh: update_config = yaml.safe_load(yml_fh) else: cur_logger.error(f"Update configuration file doesn't exist: {update_config_path}") exit() # Exit if no update sources given if 'sources' not in update_config.keys() or not update_config['sources']: cur_logger.error(f"Update configuration does not contain any source to update from") exit() # Initialise al_client server = update_config['ui_server'] user = update_config['api_user'] api_key = update_config['api_key'] cur_logger.info(f"Connecting to Assemblyline API: {server}...") al_client = get_client(server, apikey=(user, api_key), verify=False) cur_logger.info(f"Connected!") # Parse updater configuration previous_update = update_config.get('previous_update', None) previous_hash = json.loads(update_config.get('previous_hash', None) or "{}") sources = {source['name']: source for source in update_config['sources']} files_sha256 = {} files_default_classification = {} # Create working directory updater_working_dir = os.path.join(tempfile.gettempdir(), 'updater_working_dir') if os.path.exists(updater_working_dir): shutil.rmtree(updater_working_dir) os.makedirs(updater_working_dir) # Go through each source and download file for source_name, source in sources.items(): os.makedirs(os.path.join(updater_working_dir, source_name)) # 1. Download signatures cur_logger.info(f"Downloading files from: {source['uri']}") uri: str = source['uri'] if uri.endswith('.git'): files = git_clone_repo(download_directory, source, cur_logger, previous_update=previous_update) else: files = [url_download(download_directory, source, cur_logger, previous_update=previous_update)] processed_files = set() # 2. Aggregate files file_name = os.path.join(updater_working_dir, f"{source_name}.yar") mode = "w" for file in files: # File has already been processed before, skip it to avoid duplication of rules if file in processed_files: continue cur_logger.info(f"Processing file: {file}") file_dirname = os.path.dirname(file) processed_files.add(os.path.normpath(file)) with open(file, 'r') as f: f_lines = f.readlines() temp_lines = [] for i, f_line in enumerate(f_lines): if f_line.startswith("include"): lines, processed_files = replace_include(f_line, file_dirname, processed_files, cur_logger) temp_lines.extend(lines) else: temp_lines.append(f_line) # guess the type of files that we have in the current file guessed_category = guess_category(file) parser = Plyara() signatures = parser.parse_string("\n".join(temp_lines)) # Ignore "cuckoo" rules if "cuckoo" in parser.imports: parser.imports.remove("cuckoo") # Guess category if guessed_category: for s in signatures: if 'metadata' not in s: s['metadata'] = [] # Do not override category with guessed category if it already exists for meta in s['metadata']: if 'category' in meta: continue s['metadata'].append({'category': guessed_category}) s['metadata'].append({guessed_category: s.get('rule_name')}) # Save all rules from source into single file with open(file_name, mode) as f: for s in signatures: # Fix imports and remove cuckoo s['imports'] = utils.detect_imports(s) if "cuckoo" not in s['imports']: f.write(utils.rebuild_yara_rule(s)) if mode == "w": mode = "a" # Check if the file is the same as the last run if os.path.exists(file_name): cache_name = os.path.basename(file_name) sha256 = get_sha256_for_file(file_name) if sha256 != previous_hash.get(cache_name, None): files_sha256[cache_name] = sha256 files_default_classification[cache_name] = source.get('default_classification', classification.UNRESTRICTED) else: cur_logger.info(f'File {cache_name} has not changed since last run. Skipping it...') if files_sha256: cur_logger.info(f"Found new {updater_type.upper()} rules files to process!") yara_importer = YaraImporter(updater_type, al_client, logger=cur_logger) # Validating and importing the different signatures for base_file in files_sha256: cur_logger.info(f"Validating output file: {base_file}") cur_file = os.path.join(updater_working_dir, base_file) source_name = os.path.splitext(os.path.basename(cur_file))[0] default_classification = files_default_classification.get(base_file, classification.UNRESTRICTED) try: _compile_rules(cur_file, externals, cur_logger) yara_importer.import_file(cur_file, source_name, default_classification=default_classification) except Exception as e: raise e else: cur_logger.info(f'No new {updater_type.upper()} rules files to process...') # Check if new signatures have been added if al_client.signature.update_available(since=previous_update or '', sig_type=updater_type)['update_available']: cur_logger.info("An update is available for download from the datastore") if not os.path.exists(update_output_path): os.makedirs(update_output_path) temp_zip_file = os.path.join(update_output_path, 'temp.zip') al_client.signature.download(output=temp_zip_file, query=f"type:{updater_type} AND (status:NOISY OR status:DEPLOYED)") if os.path.exists(temp_zip_file): with ZipFile(temp_zip_file, 'r') as zip_f: zip_f.extractall(update_output_path) os.remove(temp_zip_file) # Create the response yaml with open(os.path.join(update_output_path, 'response.yaml'), 'w') as yml_fh: yaml.safe_dump(dict(hash=json.dumps(files_sha256)), yml_fh) cur_logger.info(f"New ruleset successfully downloaded and ready to use") cur_logger.info(f"{updater_type.upper()} updater completed successfully") except Exception: cur_logger.exception("Updater ended with an exception!")
def parse_yara_rules_text(text): return Plyara().parse_string(text)