예제 #1
0
class TestRuleParser(unittest.TestCase):
    def setUp(self):
        self.parser = Plyara()

    def test_import_pe(self):
        with open('tests/data/import_ruleset_pe.yar', 'r') as f:
            inputString = f.read()

        result = self.parser.parse_string(inputString)

        for rule in result:
            self.assertTrue('pe' in rule['imports'])

    def test_import_elf(self):
        with open('tests/data/import_ruleset_elf.yar', 'r') as f:
            inputString = f.read()

        result = self.parser.parse_string(inputString)

        for rule in result:
            self.assertTrue('elf' in rule['imports'])

    def test_import_cuckoo(self):
        with open('tests/data/import_ruleset_cuckoo.yar', 'r') as f:
            inputString = f.read()

        result = self.parser.parse_string(inputString)

        for rule in result:
            self.assertTrue('cuckoo' in rule['imports'])

    def test_import_magic(self):
        with open('tests/data/import_ruleset_magic.yar', 'r') as f:
            inputString = f.read()

        result = self.parser.parse_string(inputString)

        for rule in result:
            self.assertTrue('magic' in rule['imports'])

    def test_import_hash(self):
        with open('tests/data/import_ruleset_hash.yar', 'r') as f:
            inputString = f.read()

        result = self.parser.parse_string(inputString)

        for rule in result:
            self.assertTrue('hash' in rule['imports'])

    def test_import_math(self):
        with open('tests/data/import_ruleset_math.yar', 'r') as f:
            inputString = f.read()

        result = self.parser.parse_string(inputString)

        for rule in result:
            self.assertTrue('math' in rule['imports'])

    def test_import_dotnet(self):
        with open('tests/data/import_ruleset_dotnet.yar', 'r') as f:
            inputString = f.read()

        result = self.parser.parse_string(inputString)

        for rule in result:
            self.assertTrue('dotnet' in rule['imports'])

    def test_import_androguard(self):
        with open('tests/data/import_ruleset_androguard.yar', 'r') as f:
            inputString = f.read()

        result = self.parser.parse_string(inputString)

        for rule in result:
            self.assertTrue('androguard' in rule['imports'])

    def test_scopes(self):
        with open('tests/data/scope_ruleset.yar', 'r') as f:
            inputString = f.read()

        result = self.parser.parse_string(inputString)

        for entry in result:
            rulename = entry['rule_name']

            if rulename == "GlobalScope":
                self.assertTrue('global' in entry['scopes'])

            elif rulename == "PrivateScope":
                self.assertTrue('private' in entry['scopes'])

            elif rulename == "PrivateGlobalScope":
                self.assertTrue('global' in entry['scopes']
                                and 'private' in entry['scopes'])
            else:
                raise AssertionError(UNHANDLED_RULE_MSG.format(rulename))

    def test_tags(self):
        with open('tests/data/tag_ruleset.yar', 'r') as f:
            inputString = f.read()

        result = self.parser.parse_string(inputString)

        for entry in result:
            rulename = entry['rule_name']

            if rulename == "OneTag":
                self.assertTrue(
                    len(entry['tags']) == 1 and 'tag1' in entry['tags'])

            elif rulename == "TwoTags":
                self.assertTrue(
                    len(entry['tags']) == 2 and 'tag1' in entry['tags']
                    and 'tag2' in entry['tags'])

            elif rulename == "ThreeTags":
                self.assertTrue(
                    len(entry['tags']) == 3 and 'tag1' in entry['tags']
                    and 'tag2' in entry['tags'] and 'tag3' in entry['tags'])

            else:
                raise AssertionError(UNHANDLED_RULE_MSG.format(rulename))

    def test_metadata(self):
        with open('tests/data/metadata_ruleset.yar', 'r') as f:
            inputString = f.read()

        result = self.parser.parse_string(inputString)

        for entry in result:
            rulename = entry['rule_name']

            if rulename == "StringTypeMetadata":
                self.assertTrue(
                    'string_value' in entry['metadata']
                    and entry['metadata']['string_value'] == 'String Metadata')

            elif rulename == "IntegerTypeMetadata":
                self.assertTrue(
                    'integer_value' in entry['metadata']
                    and entry['metadata']['integer_value'] == '100')

            elif rulename == "BooleanTypeMetadata":
                self.assertTrue(
                    'boolean_value' in entry['metadata']
                    and entry['metadata']['boolean_value'] == 'true')

            elif rulename == "AllTypesMetadata":
                self.assertTrue(
                    'string_value' in entry['metadata']
                    and 'integer_value' in entry['metadata']
                    and 'boolean_value' in entry['metadata']
                    and entry['metadata']['string_value']
                    == 'Different String Metadata'
                    and entry['metadata']['integer_value'] == '33'
                    and entry['metadata']['boolean_value'] == 'false')

            else:
                raise AssertionError(UNHANDLED_RULE_MSG.format(rulename))

    def test_strings(self):
        with open('tests/data/string_ruleset.yar', 'r') as f:
            inputString = f.read()

        result = self.parser.parse_string(inputString)

        for entry in result:
            rulename = entry['rule_name']

            if rulename == "Text":
                self.assertTrue(
                    [(s['name'], s['value'])
                     for s in entry['strings']] == [('$text_string',
                                                     '\"foobar\"')])

            elif rulename == "FullwordText":
                self.assertTrue(
                    [(s['name'], s['value'], s['modifiers'])
                     for s in entry['strings']] == [('$text_string',
                                                     '\"foobar\"',
                                                     ['fullword'])])

            elif rulename == "CaseInsensitiveText":
                self.assertTrue(
                    [(s['name'], s['value'], s['modifiers'])
                     for s in entry['strings']] == [('$text_string',
                                                     '\"foobar\"',
                                                     ['nocase'])])

            elif rulename == "WideCharText":
                self.assertTrue(
                    [(s['name'], s['value'], s['modifiers'])
                     for s in entry['strings']] == [('$wide_string',
                                                     '\"Borland\"', ['wide'])])

            elif rulename == "WideCharAsciiText":
                self.assertTrue(
                    [(s['name'], s['value'], s['modifiers'])
                     for s in entry['strings']] == [('$wide_and_ascii_string',
                                                     '\"Borland\"',
                                                     ['wide', 'ascii'])])

            elif rulename == "HexWildcard":
                self.assertTrue(
                    [(s['name'], s['value'])
                     for s in entry['strings']] == [('$hex_string',
                                                     '{ E2 34 ?? C8 A? FB }')])

            elif rulename == "HexJump":
                self.assertTrue(
                    [(s['name'], s['value'])
                     for s in entry['strings']] == [('$hex_string',
                                                     '{ F4 23 [4-6] 62 B4 }')])

            elif rulename == "HexAlternatives":
                self.assertTrue([(s['name'], s['value'])
                                 for s in entry['strings']] == [(
                                     '$hex_string',
                                     '{ F4 23 ( 62 B4 | 56 ) 45 }')])

            elif rulename == "HexMultipleAlternatives":
                self.assertTrue(
                    [(s['name'], s['value']) for s in entry['strings']] == [(
                        '$hex_string',
                        '{ F4 23 ( 62 B4 | 56 | 45 ?? 67 ) 45 }')])

            elif rulename == "RegExp":
                self.assertTrue(
                    [(s['name'], s['value']) for s in entry['strings']] == [(
                        '$re1', r'/md5: [0-9a-fA-F]{32}/'
                    ), (
                        '$re2', r'/state: (on|off)/i'
                    ), ('$re3',
                        r'/\x00https?:\/\/[^\x00]{4,500}\x00\x00\x00/')])

            else:
                raise AssertionError(UNHANDLED_RULE_MSG.format(rulename))

    def test_conditions(self):
        with open('tests/data/condition_ruleset.yar', 'r') as f:
            inputString = f.read()

        # Just checking for parsing errors
        self.parser.parse_string(inputString)

    def test_include(self):
        with open('tests/data/include_ruleset.yar', 'r') as f:
            inputString = f.read()

        result = self.parser.parse_string(inputString)
        self.assertEqual(result[0]['includes'], ['string_ruleset.yar'])

    def test_include_statements(self):
        self.parser.parse_string(
            'include "file1.yara"\ninclude "file2.yara"\ninclude "file3.yara"')
        self.assertEqual(len(self.parser.includes), 3)
예제 #2
0
class TestRuleParser(unittest.TestCase):
    def setUp(self):
        self.parser = Plyara()

    def test_import_pe(self):
        with open(data_dir.joinpath('import_ruleset_pe.yar'), 'r') as fh:
            inputString = fh.read()

        result = self.parser.parse_string(inputString)

        for rule in result:
            self.assertIn('pe', rule['imports'])

    def test_import_elf(self):
        with open(data_dir.joinpath('import_ruleset_elf.yar'), 'r') as fh:
            inputString = fh.read()

        result = self.parser.parse_string(inputString)

        for rule in result:
            self.assertIn('elf', rule['imports'])

    def test_import_cuckoo(self):
        with open(data_dir.joinpath('import_ruleset_cuckoo.yar'), 'r') as fh:
            inputString = fh.read()

        result = self.parser.parse_string(inputString)

        for rule in result:
            self.assertIn('cuckoo', rule['imports'])

    def test_import_magic(self):
        with open(data_dir.joinpath('import_ruleset_magic.yar'), 'r') as fh:
            inputString = fh.read()

        result = self.parser.parse_string(inputString)

        for rule in result:
            self.assertIn('magic', rule['imports'])

    def test_import_hash(self):
        with open(data_dir.joinpath('import_ruleset_hash.yar'), 'r') as fh:
            inputString = fh.read()

        result = self.parser.parse_string(inputString)

        for rule in result:
            self.assertIn('hash', rule['imports'])

    def test_import_math(self):
        with open(data_dir.joinpath('import_ruleset_math.yar'), 'r') as fh:
            inputString = fh.read()

        result = self.parser.parse_string(inputString)

        for rule in result:
            self.assertIn('math', rule['imports'])

    def test_import_dotnet(self):
        with open(data_dir.joinpath('import_ruleset_dotnet.yar'), 'r') as fh:
            inputString = fh.read()

        result = self.parser.parse_string(inputString)

        for rule in result:
            self.assertIn('dotnet', rule['imports'])

    def test_import_androguard(self):
        with open(data_dir.joinpath('import_ruleset_androguard.yar'),
                  'r') as fh:
            inputString = fh.read()

        result = self.parser.parse_string(inputString)

        for rule in result:
            self.assertIn('androguard', rule['imports'])

    def test_scopes(self):
        with open(data_dir.joinpath('scope_ruleset.yar'), 'r') as fh:
            inputString = fh.read()

        result = self.parser.parse_string(inputString)

        for entry in result:
            rulename = entry['rule_name']

            if rulename == 'GlobalScope':
                self.assertIn('global', entry['scopes'])

            elif rulename == 'PrivateScope':
                self.assertIn('private', entry['scopes'])

            elif rulename == 'PrivateGlobalScope':
                self.assertIn('global', entry['scopes'])
                self.assertIn('private', entry['scopes'])
            else:
                raise AssertionError(UNHANDLED_RULE_MSG.format(rulename))

    def test_tags(self):
        with open(data_dir.joinpath('tag_ruleset.yar'), 'r') as fh:
            inputString = fh.read()

        result = self.parser.parse_string(inputString)

        for entry in result:
            rulename = entry['rule_name']

            if rulename == 'OneTag':
                self.assertEqual(len(entry['tags']), 1)
                self.assertIn('tag1', entry['tags'])

            elif rulename == 'TwoTags':
                self.assertEqual(len(entry['tags']), 2)
                self.assertIn('tag1', entry['tags'])
                self.assertIn('tag2', entry['tags'])

            elif rulename == 'ThreeTags':
                self.assertTrue(len(entry['tags']), 3)
                self.assertIn('tag1', entry['tags'])
                self.assertIn('tag2', entry['tags'])
                self.assertIn('tag3', entry['tags'])

            else:
                raise AssertionError(UNHANDLED_RULE_MSG.format(rulename))

    def test_metadata(self):
        with open(data_dir.joinpath('metadata_ruleset.yar'), 'r') as fh:
            inputString = fh.read()

        result = self.parser.parse_string(inputString)

        for entry in result:
            rulename = entry['rule_name']
            kv = entry['metadata']
            kv_list = [(k, ) + (v, ) for dic in kv for k, v in dic.items()]

            if rulename == 'StringTypeMetadata':
                self.assertEqual(len(kv), 1)
                self.assertEqual(kv_list[0][0], 'string_value')
                self.assertEqual(kv_list[0][1], 'String Metadata')

            elif rulename == 'IntegerTypeMetadata':
                self.assertEqual(len(kv), 1)
                self.assertEqual(kv_list[0][0], 'integer_value')
                self.assertIs(kv_list[0][1], 100)

            elif rulename == 'BooleanTypeMetadata':
                self.assertEqual(len(kv), 1)
                self.assertEqual(kv_list[0][0], 'boolean_value')
                self.assertIs(kv_list[0][1], True)

            elif rulename == 'AllTypesMetadata':
                self.assertEqual(len(kv), 3)
                self.assertEqual(kv_list[0][0], 'string_value')
                self.assertEqual(kv_list[1][0], 'integer_value')
                self.assertEqual(kv_list[2][0], 'boolean_value')
                self.assertEqual(kv_list[0][1], 'Different String Metadata')
                self.assertIs(kv_list[1][1], 33)
                self.assertIs(kv_list[2][1], False)

            else:
                raise AssertionError(UNHANDLED_RULE_MSG.format(rulename))

    def test_strings(self):
        with open(data_dir.joinpath('string_ruleset.yar'), 'r') as fh:
            inputString = fh.read()

        result = self.parser.parse_string(inputString)

        for entry in result:
            rulename = entry['rule_name']
            kv = entry['strings']
            kv_list = [tuple(x.values()) for x in kv]

            if rulename == 'Text':
                self.assertEqual(len(kv), 1)
                self.assertEqual(kv_list[0], (
                    '$text_string',
                    'foobar',
                    'text',
                ))

            elif rulename == 'FullwordText':
                self.assertEqual(len(kv), 1)
                self.assertEqual(kv_list[0], (
                    '$text_string',
                    'foobar',
                    'text',
                    ['fullword'],
                ))

            elif rulename == 'CaseInsensitiveText':
                self.assertEqual(len(kv), 1)
                self.assertEqual(kv_list[0], (
                    '$text_string',
                    'foobar',
                    'text',
                    ['nocase'],
                ))

            elif rulename == 'WideCharText':
                self.assertEqual(len(kv), 1)
                self.assertEqual(kv_list[0], (
                    '$wide_string',
                    'Borland',
                    'text',
                    ['wide'],
                ))

            elif rulename == 'WideCharAsciiText':
                self.assertEqual(len(kv), 1)
                self.assertEqual(kv_list[0], (
                    '$wide_and_ascii_string',
                    'Borland',
                    'text',
                    ['wide', 'ascii'],
                ))

            elif rulename == 'HexWildcard':
                self.assertEqual(len(kv), 1)
                self.assertEqual(kv_list[0], (
                    '$hex_string',
                    '{ E2 34 ?? C8 A? FB }',
                    'byte',
                ))

            elif rulename == 'HexJump':
                self.assertEqual(len(kv), 1)
                self.assertEqual(kv_list[0], (
                    '$hex_string',
                    '{ F4 23 [4-6] 62 B4 }',
                    'byte',
                ))

            elif rulename == 'HexAlternatives':
                self.assertEqual(len(kv), 1)
                self.assertEqual(kv_list[0], (
                    '$hex_string',
                    '{ F4 23 ( 62 B4 | 56 ) 45 }',
                    'byte',
                ))

            elif rulename == 'HexMultipleAlternatives':
                self.assertEqual(len(kv), 1)
                self.assertEqual(kv_list[0], (
                    '$hex_string',
                    '{ F4 23 ( 62 B4 | 56 | 45 ?? 67 ) 45 }',
                    'byte',
                ))

            elif rulename == 'RegExp':
                self.assertEqual(len(kv), 3)
                self.assertEqual(kv_list[0][0], '$re1')
                self.assertEqual(kv_list[0][1], '/md5: [0-9a-fA-F]{32}/')
                self.assertEqual(kv_list[0][2], 'regex')
                self.assertEqual(kv_list[1][0], '$re2')
                self.assertEqual(kv_list[1][1], '/state: (on|off)/i')
                self.assertEqual(kv_list[1][2], 'regex')
                self.assertEqual(kv_list[2][0], '$re3')
                self.assertEqual(
                    kv_list[2][1],
                    r'/\x00https?:\/\/[^\x00]{4,500}\x00\x00\x00/')
                self.assertEqual(kv_list[2][2], 'regex')

            elif rulename == 'Xor':
                self.assertEqual(len(kv), 1)
                self.assertEqual(kv_list[0], (
                    '$xor_string',
                    'This program cannot',
                    'text',
                    ['xor'],
                ))

            elif rulename == 'WideXorAscii':
                self.assertEqual(len(kv), 1)
                self.assertEqual(kv_list[0], (
                    '$xor_string',
                    'This program cannot',
                    'text',
                    ['xor', 'wide', 'ascii'],
                ))

            elif rulename == 'WideXor':
                self.assertEqual(len(kv), 1)
                self.assertEqual(kv_list[0], (
                    '$xor_string',
                    'This program cannot',
                    'text',
                    ['xor', 'wide'],
                ))

            elif rulename == 'DoubleBackslash':
                self.assertEqual(len(kv), 1)
                self.assertEqual(kv_list[0], (
                    '$bs',
                    r'\"\\\\\\\"',
                    'text',
                ))

            else:
                raise AssertionError(UNHANDLED_RULE_MSG.format(rulename))

    def test_conditions(self):
        with open(data_dir.joinpath('condition_ruleset.yar'), 'r') as fh:
            inputString = fh.read()

        # Just checking for parsing errors
        self.parser.parse_string(inputString)

    def test_include(self):
        with open(data_dir.joinpath('include_ruleset.yar'), 'r') as fh:
            inputString = fh.read()

        result = self.parser.parse_string(inputString)
        self.assertEqual(result[0]['includes'], ['string_ruleset.yar'])

    def test_include_statements(self):
        self.parser.parse_string(
            'include "file1.yara"\ninclude "file2.yara"\ninclude "file3.yara"')
        self.assertEqual(len(self.parser.includes), 3)

    def test_rules_from_yara_project(self):
        with open('tests/data/test_rules_from_yara_project.yar', 'r') as fh:
            inputRules = fh.read()

        plyara = Plyara()
        output = plyara.parse_string(inputRules)

        self.assertEqual(len(output), 293)
예제 #3
0
 def setUp(self):
     self.parser = Plyara()
예제 #4
0
 def test_is_valid_rule_name(self):
     with self.assertWarns(DeprecationWarning):
         self.assertTrue(Plyara.is_valid_rule_name('test'))
         self.assertTrue(Plyara.is_valid_rule_name('test123'))
         self.assertTrue(Plyara.is_valid_rule_name('test_test'))
         self.assertTrue(Plyara.is_valid_rule_name('_test_'))
         self.assertTrue(Plyara.is_valid_rule_name('include_test'))
         self.assertFalse(Plyara.is_valid_rule_name('123test'))
         self.assertFalse(Plyara.is_valid_rule_name('123 test'))
         self.assertFalse(Plyara.is_valid_rule_name('test 123'))
         self.assertFalse(Plyara.is_valid_rule_name('test test'))
         self.assertFalse(Plyara.is_valid_rule_name('test-test'))
         self.assertFalse(Plyara.is_valid_rule_name('include'))
         self.assertFalse(Plyara.is_valid_rule_name('test!*@&*!&'))
         self.assertFalse(Plyara.is_valid_rule_name(''))
예제 #5
0
    def test_detect_dependencies(self):
        with open(data_dir.joinpath('detect_dependencies_ruleset.yar'),
                  'r') as fh:
            inputString = fh.read()

        result = Plyara().parse_string(inputString)

        with self.assertWarns(DeprecationWarning):
            self.assertEqual(Plyara.detect_dependencies(result[0]), list())
            self.assertEqual(Plyara.detect_dependencies(result[1]), list())
            self.assertEqual(Plyara.detect_dependencies(result[2]), list())
            self.assertEqual(
                Plyara.detect_dependencies(result[3]),
                ['is__osx', 'priv01', 'priv02', 'priv03', 'priv04'])
            self.assertEqual(
                Plyara.detect_dependencies(result[4]),
                ['is__elf', 'priv01', 'priv02', 'priv03', 'priv04'])
            self.assertEqual(Plyara.detect_dependencies(result[5]),
                             ['is__elf', 'is__osx', 'priv01', 'priv02'])
            self.assertEqual(Plyara.detect_dependencies(result[6]),
                             ['is__elf', 'is__osx', 'priv01'])
            self.assertEqual(Plyara.detect_dependencies(result[7]),
                             ['is__elf'])
            self.assertEqual(Plyara.detect_dependencies(result[8]),
                             ['is__osx', 'is__elf'])
            self.assertEqual(Plyara.detect_dependencies(result[9]),
                             ['is__osx'])
            self.assertEqual(Plyara.detect_dependencies(result[10]),
                             ['is__elf', 'is__osx'])
class YaraImporter(object):
    def __init__(self, importer_type, al_client, logger=None):
        if not logger:
            from assemblyline.common import log as al_log
            al_log.init_logging('yara_importer')
            logger = logging.getLogger('assemblyline.yara_importer')
            logger.setLevel(logging.INFO)

        self.importer_type = importer_type
        self.update_client = al_client
        self.parser = Plyara()
        self.classification = forge.get_classification()
        self.log = logger

    def _save_signatures(self,
                         signatures,
                         source,
                         default_status=DEFAULT_STATUS,
                         default_classification=None):
        if len(signatures) == 0:
            self.log.info(f"There are no signatures for {source}, skipping...")
            return False

        order = 1
        upload_list = []
        for signature in signatures:
            classification = default_classification or self.classification.UNRESTRICTED
            signature_id = None
            version = 1
            status = default_status

            for meta in signature.get('metadata', {}):
                for k, v in meta.items():
                    if k in ["classification", "sharing"]:
                        classification = v
                    elif k in ['id', 'rule_id', 'signature_id']:
                        signature_id = v
                    elif k in ['version', 'rule_version', 'revision']:
                        version = v
                    elif k in ['status', 'al_status']:
                        status = v

            # Convert CCCS YARA status to AL signature status
            if status == "RELEASED":
                status = "DEPLOYED"
            elif status == "DEPRECATED":
                status = "DISABLED"

            # Fallback status
            if status not in [
                    "DEPLOYED", "NOISY", "DISABLED", "STAGING", "TESTING",
                    "INVALID"
            ]:
                status = default_status

            # Fix imports and remove cuckoo
            signature['imports'] = utils.detect_imports(signature)
            if "cuckoo" not in signature['imports']:
                sig = Signature(
                    dict(
                        classification=classification,
                        data=utils.rebuild_yara_rule(signature),
                        name=signature.get('rule_name'),
                        order=order,
                        revision=int(float(version)),
                        signature_id=signature_id
                        or signature.get('rule_name'),
                        source=source,
                        status=status,
                        type=self.importer_type,
                    ))
                upload_list.append(sig.as_primitives())
            else:
                self.log.warning(
                    f"Signature '{signature.get('rule_name')}' skipped because it uses cuckoo module."
                )

            order += 1

        r = self.update_client.signature.add_update_many(
            source, self.importer_type, upload_list)
        self.log.info(
            f"Imported {r['success']}/{order - 1} signatures from {source} into Assemblyline"
        )

        return r['success']

    def _split_signatures(self, data):
        self.parser = Plyara()
        return self.parser.parse_string(data)

    def import_data(self,
                    yara_bin,
                    source,
                    default_status=DEFAULT_STATUS,
                    default_classification=None):
        return self._save_signatures(
            self._split_signatures(yara_bin),
            source,
            default_status=default_status,
            default_classification=default_classification)

    def import_file(self,
                    file_path: str,
                    source: str,
                    default_status=DEFAULT_STATUS,
                    default_classification=None):
        self.log.info(f"Importing file: {file_path}")
        cur_file = os.path.expanduser(file_path)
        if os.path.exists(cur_file):
            with open(cur_file, "r") as yara_file:
                yara_bin = yara_file.read()
                return self.import_data(
                    yara_bin,
                    source or os.path.basename(cur_file),
                    default_status=default_status,
                    default_classification=default_classification)
        else:
            raise Exception(f"File {cur_file} does not exists.")
 def _split_signatures(self, data):
     self.parser = Plyara()
     return self.parser.parse_string(data)
예제 #8
0
 def test_is_valid_rule_name(self):
     self.assertTrue(Plyara.is_valid_rule_name('test'))
     self.assertTrue(Plyara.is_valid_rule_name('test123'))
     self.assertTrue(Plyara.is_valid_rule_name('test_test'))
     self.assertTrue(Plyara.is_valid_rule_name('_test_'))
     self.assertTrue(Plyara.is_valid_rule_name('include_test'))
     self.assertFalse(Plyara.is_valid_rule_name('123test'))
     self.assertFalse(Plyara.is_valid_rule_name('123 test'))
     self.assertFalse(Plyara.is_valid_rule_name('test 123'))
     self.assertFalse(Plyara.is_valid_rule_name('test test'))
     self.assertFalse(Plyara.is_valid_rule_name('test-test'))
     self.assertFalse(Plyara.is_valid_rule_name('include'))
     self.assertFalse(Plyara.is_valid_rule_name('test!*@&*!&'))
     self.assertFalse(Plyara.is_valid_rule_name(''))
예제 #9
0
    def test_detect_dependencies(self):
        with open('tests/data/detect_dependencies_ruleset.yar', 'r') as f:
            inputString = f.read()

        result = Plyara().parse_string(inputString)

        self.assertEqual(Plyara.detect_dependencies(result[0]), [])
        self.assertEqual(Plyara.detect_dependencies(result[1]), [])
        self.assertEqual(Plyara.detect_dependencies(result[2]), [])
        self.assertEqual(Plyara.detect_dependencies(result[3]),
                         ['is__osx', 'priv01', 'priv02', 'priv03', 'priv04'])
        self.assertEqual(Plyara.detect_dependencies(result[4]),
                         ['is__elf', 'priv01', 'priv02', 'priv03', 'priv04'])
        self.assertEqual(Plyara.detect_dependencies(result[5]),
                         ['is__elf', 'is__osx', 'priv01', 'priv02'])
        self.assertEqual(Plyara.detect_dependencies(result[6]),
                         ['is__elf', 'is__osx', 'priv01'])
        self.assertEqual(Plyara.detect_dependencies(result[7]), ['is__elf'])
        self.assertEqual(Plyara.detect_dependencies(result[8]),
                         ['is__osx', 'is__elf'])
        self.assertEqual(Plyara.detect_dependencies(result[9]), ['is__osx'])
        self.assertEqual(Plyara.detect_dependencies(result[10]),
                         ['is__elf', 'is__osx'])
예제 #10
0
 def setUp(self):
     self.parser = Plyara(meta_as_kv=True)
예제 #11
0
 def parse_rules(rules):
     plyara = Plyara()
     return plyara.parse_string(inputRules)
예제 #12
0
class TestRuleParser(unittest.TestCase):
    def setUp(self):
        self.parser = Plyara()

    def test_import_pe(self):
        with data_dir.joinpath('import_ruleset_pe.yar').open('r') as fh:
            inputString = fh.read()

        result = self.parser.parse_string(inputString)

        for rule in result:
            self.assertIn('pe', rule['imports'])

    def test_import_elf(self):
        with data_dir.joinpath('import_ruleset_elf.yar').open('r') as fh:
            inputString = fh.read()

        result = self.parser.parse_string(inputString)

        for rule in result:
            self.assertIn('elf', rule['imports'])

    def test_import_cuckoo(self):
        with data_dir.joinpath('import_ruleset_cuckoo.yar').open('r') as fh:
            inputString = fh.read()

        result = self.parser.parse_string(inputString)

        for rule in result:
            self.assertIn('cuckoo', rule['imports'])

    def test_import_magic(self):
        with data_dir.joinpath('import_ruleset_magic.yar').open('r') as fh:
            inputString = fh.read()

        result = self.parser.parse_string(inputString)

        for rule in result:
            self.assertIn('magic', rule['imports'])

    def test_import_hash(self):
        with data_dir.joinpath('import_ruleset_hash.yar').open('r') as fh:
            inputString = fh.read()

        result = self.parser.parse_string(inputString)

        for rule in result:
            self.assertIn('hash', rule['imports'])

    def test_import_math(self):
        with data_dir.joinpath('import_ruleset_math.yar').open('r') as fh:
            inputString = fh.read()

        result = self.parser.parse_string(inputString)

        for rule in result:
            self.assertIn('math', rule['imports'])

    def test_import_dotnet(self):
        with data_dir.joinpath('import_ruleset_dotnet.yar').open('r') as fh:
            inputString = fh.read()

        result = self.parser.parse_string(inputString)

        for rule in result:
            self.assertIn('dotnet', rule['imports'])

    def test_import_androguard(self):
        with data_dir.joinpath('import_ruleset_androguard.yar').open(
                'r') as fh:
            inputString = fh.read()

        result = self.parser.parse_string(inputString)

        for rule in result:
            self.assertIn('androguard', rule['imports'])

    def test_scopes(self):
        with data_dir.joinpath('scope_ruleset.yar').open('r') as fh:
            inputString = fh.read()

        result = self.parser.parse_string(inputString)

        for entry in result:
            rulename = entry['rule_name']

            if rulename == 'GlobalScope':
                self.assertIn('global', entry['scopes'])

            elif rulename == 'PrivateScope':
                self.assertIn('private', entry['scopes'])

            elif rulename == 'PrivateGlobalScope':
                self.assertIn('global', entry['scopes'])
                self.assertIn('private', entry['scopes'])
            else:
                raise AssertionError(UNHANDLED_RULE_MSG.format(rulename))

    def test_tags(self):
        with data_dir.joinpath('tag_ruleset.yar').open('r') as fh:
            inputString = fh.read()

        result = self.parser.parse_string(inputString)

        for entry in result:
            rulename = entry['rule_name']

            if rulename == 'OneTag':
                self.assertEqual(len(entry['tags']), 1)
                self.assertIn('tag1', entry['tags'])

            elif rulename == 'TwoTags':
                self.assertEqual(len(entry['tags']), 2)
                self.assertIn('tag1', entry['tags'])
                self.assertIn('tag2', entry['tags'])

            elif rulename == 'ThreeTags':
                self.assertTrue(len(entry['tags']), 3)
                self.assertIn('tag1', entry['tags'])
                self.assertIn('tag2', entry['tags'])
                self.assertIn('tag3', entry['tags'])

            else:
                raise AssertionError(UNHANDLED_RULE_MSG.format(rulename))

    def test_metadata(self):
        with data_dir.joinpath('metadata_ruleset.yar').open('r') as fh:
            inputString = fh.read()

        result = self.parser.parse_string(inputString)

        for entry in result:
            rulename = entry['rule_name']
            kv = entry['metadata']
            kv_list = [(k, ) + (v, ) for dic in kv for k, v in dic.items()]

            if rulename == 'StringTypeMetadata':
                self.assertEqual(len(kv), 1)
                self.assertEqual(kv_list[0][0], 'string_value')
                self.assertEqual(kv_list[0][1], 'String Metadata')

            elif rulename == 'IntegerTypeMetadata':
                self.assertEqual(len(kv), 1)
                self.assertEqual(kv_list[0][0], 'integer_value')
                self.assertIs(kv_list[0][1], 100)

            elif rulename == 'BooleanTypeMetadata':
                self.assertEqual(len(kv), 1)
                self.assertEqual(kv_list[0][0], 'boolean_value')
                self.assertIs(kv_list[0][1], True)

            elif rulename == 'AllTypesMetadata':
                self.assertEqual(len(kv), 3)
                self.assertEqual(kv_list[0][0], 'string_value')
                self.assertEqual(kv_list[1][0], 'integer_value')
                self.assertEqual(kv_list[2][0], 'boolean_value')
                self.assertEqual(kv_list[0][1], 'Different String Metadata')
                self.assertIs(kv_list[1][1], 33)
                self.assertIs(kv_list[2][1], False)

            else:
                raise AssertionError(UNHANDLED_RULE_MSG.format(rulename))

    def test_strings(self):
        with data_dir.joinpath('string_ruleset.yar').open('r') as fh:
            inputString = fh.read()

        result = self.parser.parse_string(inputString)

        for entry in result:
            rulename = entry['rule_name']
            kv = entry['strings']

            if rulename == 'Text':
                self.assertEqual(kv, [{
                    'name': '$text_string',
                    'value': 'foobar',
                    'type': 'text'
                }])

            elif rulename == 'FullwordText':
                self.assertEqual(kv, [{
                    'name': '$text_string',
                    'value': 'foobar',
                    'type': 'text',
                    'modifiers': ['fullword']
                }])

            elif rulename == 'CaseInsensitiveText':
                self.assertEqual(kv, [{
                    'name': '$text_string',
                    'value': 'foobar',
                    'type': 'text',
                    'modifiers': ['nocase']
                }])

            elif rulename == 'WideCharText':
                self.assertEqual(kv, [{
                    'name': '$wide_string',
                    'value': 'Borland',
                    'type': 'text',
                    'modifiers': ['wide']
                }])

            elif rulename == 'WideCharAsciiText':
                self.assertEqual(kv, [{
                    'name': '$wide_and_ascii_string',
                    'value': 'Borland',
                    'type': 'text',
                    'modifiers': ['wide', 'ascii']
                }])

            elif rulename == 'HexWildcard':
                self.assertEqual(kv, [{
                    'name': '$hex_string',
                    'value': '{ E2 34 ?? C8 A? FB }',
                    'type': 'byte'
                }])

            elif rulename == 'HexJump':
                self.assertEqual(kv, [{
                    'name': '$hex_string',
                    'value': '{ F4 23 [4-6] 62 B4 }',
                    'type': 'byte'
                }])

            elif rulename == 'HexAlternatives':
                self.assertEqual(kv, [{
                    'name': '$hex_string',
                    'value': '{ F4 23 ( 62 B4 | 56 ) 45 }',
                    'type': 'byte'
                }])

            elif rulename == 'HexMultipleAlternatives':
                self.assertEqual(kv, [{
                    'name': '$hex_string',
                    'value': '{ F4 23 ( 62 B4 | 56 | 45 ?? 67 ) 45 }',
                    'type': 'byte'
                }])

            elif rulename == 'RegExp':
                self.assertEqual(kv, [{
                    'name': '$re1',
                    'value': '/md5: [0-9a-fA-F]{32}/',
                    'type': 'regex',
                    'modifiers': ['nocase'],
                }, {
                    'name': '$re2',
                    'value': '/state: (on|off)/i',
                    'type': 'regex',
                }, {
                    'name': '$re3',
                    'value': r'/\x00https?:\/\/[^\x00]{4,500}\x00\x00\x00/',
                    'type': 'regex',
                }])

            elif rulename == 'Xor':
                self.assertEqual(kv, [{
                    'name': '$xor_string',
                    'value': 'This program cannot',
                    'type': 'text',
                    'modifiers': ['xor']
                }])

            elif rulename == 'WideXorAscii':
                self.assertEqual(kv, [{
                    'name': '$xor_string',
                    'value': 'This program cannot',
                    'type': 'text',
                    'modifiers': ['xor', 'wide', 'ascii']
                }])

            elif rulename == 'WideXor':
                self.assertEqual(kv, [{
                    'name': '$xor_string',
                    'value': 'This program cannot',
                    'type': 'text',
                    'modifiers': ['xor', 'wide']
                }])

            elif rulename == 'DoubleBackslash':
                self.assertEqual(kv, [{
                    'name': '$bs',
                    'value': r'\"\\\\\\\"',
                    'type': 'text'
                }])

            elif rulename == 'DoubleQuote':
                self.assertEqual(kv, [{
                    'name': '$text_string',
                    'value': r'foobar\"',
                    'type': 'text'
                }])

            elif rulename == 'HorizontalTab':
                self.assertEqual(kv, [{
                    'name': '$text_string',
                    'value': r'foo\tbar',
                    'type': 'text'
                }])

            elif rulename == 'Newline':
                self.assertEqual(kv, [{
                    'name': '$text_string',
                    'value': r'foo\nbar',
                    'type': 'text'
                }])

            elif rulename == 'HexEscape':
                self.assertEqual(kv, [{
                    'name': '$text_string',
                    'value': r'foo\x00bar',
                    'type': 'text'
                }])

            else:
                raise AssertionError(UNHANDLED_RULE_MSG.format(rulename))

    def test_string_bad_escaped_hex(self):
        inputRules = r'''
        rule sample {
            strings:
                $ = "foo\xZZbar"
            condition:
                all of them
        }
        '''

        plyara = Plyara()
        with self.assertRaises(ParseTypeError):
            plyara.parse_string(inputRules)

    def test_string_invalid_escape(self):
        inputRules = r'''
        rule sample {
            strings:
                $ = "foo\gbar"
            condition:
                all of them
        }
        '''

        plyara = Plyara()
        with self.assertRaises(ParseTypeError):
            plyara.parse_string(inputRules)

    def test_conditions(self):
        with data_dir.joinpath('condition_ruleset.yar').open('r') as fh:
            inputString = fh.read()

        # Just checking for parsing errors
        self.parser.parse_string(inputString)

    def test_include(self):
        with data_dir.joinpath('include_ruleset.yar').open('r') as fh:
            inputString = fh.read()

        result = self.parser.parse_string(inputString)
        self.assertEqual(result[0]['includes'], ['string_ruleset.yar'])

    def test_include_statements(self):
        self.parser.parse_string(
            'include "file1.yara"\ninclude "file2.yara"\ninclude "file3.yara"')
        self.assertEqual(len(self.parser.includes), 3)

    def test_rules_from_yara_project(self):
        with data_dir.joinpath('test_rules_from_yara_project.yar').open(
                'r') as fh:
            inputRules = fh.read()

        plyara = Plyara()
        output = plyara.parse_string(inputRules)

        self.assertEqual(len(output), 293)

    def test_multiple_threads(self):
        with data_dir.joinpath('test_rules_from_yara_project.yar').open(
                'r') as fh:
            inputRules = fh.read()

        def parse_rules(rules):
            plyara = Plyara()
            return plyara.parse_string(inputRules)

        with concurrent.futures.ThreadPoolExecutor(max_workers=4) as e:
            futs = [e.submit(parse_rules, inputRules) for _ in range(4)]
            for fut in concurrent.futures.as_completed(futs):
                self.assertEqual(len(fut.result()), 293)

    def test_clear(self):
        # instantiate parser
        parser = Plyara()

        # open a ruleset with one or more rules
        with data_dir.joinpath('test_ruleset_2_rules.yar').open('r') as fh:
            inputRules = fh.read()

        # parse the rules
        parser.parse_string(inputRules)

        # clear the parser's state
        parser.clear()

        # has lineno been reset
        self.assertEqual(parser.lexer.lineno, 1)

        # open a ruleset with one rule
        with data_dir.joinpath('test_ruleset_1_rule.yar').open('r') as fh:
            inputRules = fh.read()

        # parse the rules
        result = parser.parse_string(inputRules)

        # does the result contain just the rule from the second parse
        self.assertEqual(len(result), 1)
        self.assertEqual(result[0]['rule_name'], 'rule_one')
def yara_update(updater_type, update_config_path, update_output_path,
                download_directory, externals, cur_logger) -> None:
    """
    Using an update configuration file as an input, which contains a list of sources, download all the file(s).
    """
    # noinspection PyBroadException
    try:
        # Load updater configuration
        update_config = {}
        if update_config_path and os.path.exists(update_config_path):
            with open(update_config_path, 'r') as yml_fh:
                update_config = yaml.safe_load(yml_fh)
        else:
            cur_logger.error(f"Update configuration file doesn't exist: {update_config_path}")
            exit()

        # Exit if no update sources given
        if 'sources' not in update_config.keys() or not update_config['sources']:
            cur_logger.error(f"Update configuration does not contain any source to update from")
            exit()

        # Initialise al_client
        server = update_config['ui_server']
        user = update_config['api_user']
        api_key = update_config['api_key']
        cur_logger.info(f"Connecting to Assemblyline API: {server}...")
        al_client = get_client(server, apikey=(user, api_key), verify=False)
        cur_logger.info(f"Connected!")

        # Parse updater configuration
        previous_update = update_config.get('previous_update', None)
        previous_hash = json.loads(update_config.get('previous_hash', None) or "{}")
        sources = {source['name']: source for source in update_config['sources']}
        files_sha256 = {}
        files_default_classification = {}

        # Create working directory
        updater_working_dir = os.path.join(tempfile.gettempdir(), 'updater_working_dir')
        if os.path.exists(updater_working_dir):
            shutil.rmtree(updater_working_dir)
        os.makedirs(updater_working_dir)

        # Go through each source and download file
        for source_name, source in sources.items():
            os.makedirs(os.path.join(updater_working_dir, source_name))
            # 1. Download signatures
            cur_logger.info(f"Downloading files from: {source['uri']}")
            uri: str = source['uri']

            if uri.endswith('.git'):
                files = git_clone_repo(download_directory, source, cur_logger, previous_update=previous_update)
            else:
                files = [url_download(download_directory, source, cur_logger, previous_update=previous_update)]

            processed_files = set()

            # 2. Aggregate files
            file_name = os.path.join(updater_working_dir, f"{source_name}.yar")
            mode = "w"
            for file in files:
                # File has already been processed before, skip it to avoid duplication of rules
                if file in processed_files:
                    continue

                cur_logger.info(f"Processing file: {file}")

                file_dirname = os.path.dirname(file)
                processed_files.add(os.path.normpath(file))
                with open(file, 'r') as f:
                    f_lines = f.readlines()

                temp_lines = []
                for i, f_line in enumerate(f_lines):
                    if f_line.startswith("include"):
                        lines, processed_files = replace_include(f_line, file_dirname, processed_files, cur_logger)
                        temp_lines.extend(lines)
                    else:
                        temp_lines.append(f_line)

                # guess the type of files that we have in the current file
                guessed_category = guess_category(file)
                parser = Plyara()
                signatures = parser.parse_string("\n".join(temp_lines))

                # Ignore "cuckoo" rules
                if "cuckoo" in parser.imports:
                    parser.imports.remove("cuckoo")

                # Guess category
                if guessed_category:
                    for s in signatures:
                        if 'metadata' not in s:
                            s['metadata'] = []

                        # Do not override category with guessed category if it already exists
                        for meta in s['metadata']:
                            if 'category' in meta:
                                continue

                        s['metadata'].append({'category': guessed_category})
                        s['metadata'].append({guessed_category: s.get('rule_name')})

                # Save all rules from source into single file
                with open(file_name, mode) as f:
                    for s in signatures:
                        # Fix imports and remove cuckoo
                        s['imports'] = utils.detect_imports(s)
                        if "cuckoo" not in s['imports']:
                            f.write(utils.rebuild_yara_rule(s))

                if mode == "w":
                    mode = "a"

            # Check if the file is the same as the last run
            if os.path.exists(file_name):
                cache_name = os.path.basename(file_name)
                sha256 = get_sha256_for_file(file_name)
                if sha256 != previous_hash.get(cache_name, None):
                    files_sha256[cache_name] = sha256
                    files_default_classification[cache_name] = source.get('default_classification',
                                                                          classification.UNRESTRICTED)
                else:
                    cur_logger.info(f'File {cache_name} has not changed since last run. Skipping it...')

        if files_sha256:
            cur_logger.info(f"Found new {updater_type.upper()} rules files to process!")

            yara_importer = YaraImporter(updater_type, al_client, logger=cur_logger)

            # Validating and importing the different signatures
            for base_file in files_sha256:
                cur_logger.info(f"Validating output file: {base_file}")
                cur_file = os.path.join(updater_working_dir, base_file)
                source_name = os.path.splitext(os.path.basename(cur_file))[0]
                default_classification = files_default_classification.get(base_file, classification.UNRESTRICTED)

                try:
                    _compile_rules(cur_file, externals, cur_logger)
                    yara_importer.import_file(cur_file, source_name, default_classification=default_classification)
                except Exception as e:
                    raise e
        else:
            cur_logger.info(f'No new {updater_type.upper()} rules files to process...')

        # Check if new signatures have been added
        if al_client.signature.update_available(since=previous_update or '', sig_type=updater_type)['update_available']:
            cur_logger.info("An update is available for download from the datastore")

            if not os.path.exists(update_output_path):
                os.makedirs(update_output_path)

            temp_zip_file = os.path.join(update_output_path, 'temp.zip')
            al_client.signature.download(output=temp_zip_file,
                                         query=f"type:{updater_type} AND (status:NOISY OR status:DEPLOYED)")

            if os.path.exists(temp_zip_file):
                with ZipFile(temp_zip_file, 'r') as zip_f:
                    zip_f.extractall(update_output_path)

                os.remove(temp_zip_file)

            # Create the response yaml
            with open(os.path.join(update_output_path, 'response.yaml'), 'w') as yml_fh:
                yaml.safe_dump(dict(hash=json.dumps(files_sha256)), yml_fh)

            cur_logger.info(f"New ruleset successfully downloaded and ready to use")

        cur_logger.info(f"{updater_type.upper()} updater completed successfully")
    except Exception:
        cur_logger.exception("Updater ended with an exception!")
예제 #14
0
def parse_yara_rules_text(text):
    return Plyara().parse_string(text)