def test_optional_keys_missing_in_schema(self, log_mock):
        """JSONParser - Invalid Schema Definition"""
        options = {
            'schema': {
                'key1': [],
                'key2': 'string',
                'key3': 'integer',
                'key9': 'boolean',
                'key10': {},
                'key11': 'float'
            },
            'configuration': {
                'optional_top_level_keys': ['key9', 'key10', 'key11', 'key12']
            }
        }

        record_data = json.dumps({
            'key1': ['test data', 'moar test data'],
            'key2': 'string data',
            'key3': 100,
            'key9': True
        })

        # get parsed data
        log_type = 'invaid_log'
        parser = JSONParser(options, log_type)
        assert_equal(parser.parse(record_data), False)
        log_mock.assert_called_with('Schema definition is not valid (%s):\n%s',
                                    log_type, options['schema'])
    def test_json_regex_key(self):
        """JSONParser - Regex key"""
        options = {
            'schema': {
                'nested_key_01': 'string',
                'nested_key_02': 'string'
            },
            'configuration': {
                'json_regex_key': 'message'
            }
        }

        record_data = json.dumps({
            'time':
            '14:01',
            'date':
            'Jan 01, 2017',
            'host':
            'host1.test.domain.tld',
            'message':
            '<10> auditd[1300] info: '
            '{"nested_key_01": "nested_info",'
            '"nested_key_02": "more_nested_info"}'
        })

        # get parsed data
        parser = JSONParser(options)
        assert_equal(parser.parse(record_data), True)

        expected_result = [{
            'nested_key_01': 'nested_info',
            'nested_key_02': 'more_nested_info'
        }]

        assert_equal(parser.parsed_records, expected_result)
    def test_envelope_keys_optional_values(self):
        """JSONParser - Default Values for Missing Envelope Keys"""
        options = self._json_regex_key_options()

        # missing 'host' envelope key
        record_data = json.dumps({
            'message':
            json.dumps({
                'nested_key_01': 'foo',
                'nested_key_02': 'bar'
            }),
            'date':
            '2017/10/01',
            'time':
            '14:00:00'
        })

        # get parsed data
        parser = JSONParser(options)
        assert_equal(parser.parse(record_data), True)

        expected_result = [{
            'nested_key_01': 'foo',
            'nested_key_02': 'bar',
            'streamalert:envelope_keys': {
                'host': '',
                'date': '2017/10/01',
                'time': '14:00:00'
            }
        }]
        assert_equal(parser.parsed_records, expected_result)
    def test_json_regex_key_with_envelope(self):
        """JSONParser - Regex key with envelope"""
        options = self._json_regex_key_options()

        # Valid record data
        record_data = json.dumps({
            'time':
            '14:01',
            'date':
            'Jan 01, 2017',
            'host':
            'host1.test.domain.tld',
            'message':
            '<10> auditd[1300] info: '
            '{"nested_key_01": "foo",'
            '"nested_key_02": "bar"}'
        })

        # get parsed data
        parser = JSONParser(options)
        assert_equal(parser.parse(record_data), True)

        expected_result = [{
            'nested_key_01': 'foo',
            'nested_key_02': 'bar',
            'streamalert:envelope_keys': {
                'host': 'host1.test.domain.tld',
                'date': 'Jan 01, 2017',
                'time': '14:01'
            }
        }]
        assert_equal(parser.parsed_records, expected_result)
    def test_single_nested_json(self):
        """JSONParser - Single nested JSON"""
        options = {
            'schema': {
                'middlekey1': 'string',
                'middlekey2': []
            },
            'configuration': {
                'json_path': 'topkey'
            }
        }

        expected_record = {'middlekey1': '1', 'middlekey2': []}

        data = {
            'topkey': {
                'middlekey1': '1',
                'middlekey2': [],
            }
        }

        record_data = json.dumps(data)

        # get parsed data
        parser = JSONParser(options)
        assert_equal(parser.parse(record_data), True)

        expected_result = [expected_record]
        assert_equal(parser.parsed_records, expected_result)
    def test_parse_record_copy(self):
        """JSONParser - Parse, Ensure Copy"""
        options = {'schema': {'key': 'string'}}
        record_data = {'key': 'value'}

        parser = JSONParser(options)
        assert_equal(parser.parse(record_data), True)
        assert_equal(id(parser.parsed_records[0]) == id(record_data), False)
    def test_invalid_json(self):
        """JSONParser - Invalid Input"""
        options = {'schema': {'name': 'string', 'result': 'string'}}

        record_data = 'not json data'

        # get parsed data
        parser = JSONParser(options)
        assert_equal(parser.parse(record_data), False)

        expected_result = [record_data]
        assert_equal(parser.invalid_parses, expected_result)
    def test_non_string_input(self):
        """JSONParser - Non String Input"""
        options = {'schema': {'name': 'string', 'result': 'string'}}

        record_data = {'name': 'test', 'result': 'test'}

        # get parsed data
        parser = JSONParser(options)
        assert_equal(parser.parse(record_data), True)

        expected_result = [record_data]
        assert_equal(parser.parsed_records, expected_result)
    def test_optional_keys_with_json_path(self):
        """JSONParser - Optional top level keys and json path"""
        options = {
            'schema': {
                'md5': 'string',
                'opt_key': 'string',
                'another_opt_key': 'string'
            },
            'configuration': {
                'json_path': 'docs[*]',
                'optional_top_level_keys': ['opt_key', 'another_opt_key']
            }
        }

        record_data = json.dumps({
            'server':
            'test_server',
            'username':
            '******',
            'docs': [{
                'md5': '58B8702C20DE211D1FCB248D2FDD71D1',
                'opt_key': 'exists'
            }, {
                'md5': '1D1FCB248D2FDD71D158B8702C20DE21',
                'opt_key': 'this_value_is_optional',
                'another_opt_key': 'this_value_is_also_optional'
            }, {
                'md5': '1D1FCB248D2FDD71D158B8702C20DE21'
            }]
        })

        # get parsed data
        parser = JSONParser(options)
        assert_equal(parser.parse(record_data), True)

        expected_result = [{
            'md5': '58B8702C20DE211D1FCB248D2FDD71D1',
            'opt_key': 'exists',
            'another_opt_key': ''
        }, {
            'md5': '1D1FCB248D2FDD71D158B8702C20DE21',
            'opt_key': 'this_value_is_optional',
            'another_opt_key': 'this_value_is_also_optional'
        }, {
            'md5': '1D1FCB248D2FDD71D158B8702C20DE21',
            'opt_key': '',
            'another_opt_key': ''
        }]

        assert_equal(parser.parsed_records, expected_result)
    def test_extract_via_json_regex_key_no_key(self):
        """JSONParser - Extract via JSON Regex Key, Key Does Not Exist"""
        options = {
            'schema': {
                'key': 'string'
            },
            'configuration': {
                'json_regex_key': 'key_01'
            }
        }
        record_data = {'key': 'value'}

        parser = JSONParser(options)
        result = parser._extract_via_json_regex_key(record_data)
        assert_equal(result, False)
    def test_basic_json(self):
        """JSONParser - Non-nested JSON objects"""
        options = {'schema': {'name': 'string', 'age': 'integer'}}

        data = {'name': 'john', 'age': 30}

        record_data = json.dumps(data)

        # get parsed data
        parser = JSONParser(options)
        assert_equal(parser.parse(record_data), True)

        expected_result = [data]

        assert_equal(parser.parsed_records, expected_result)
    def test_embedded_json(self):
        """JSONParser - Embedded JSON"""
        options = {
            'schema': {
                'nested_key_01': 'string',
                'nested_key_02': 'string'
            },
            'configuration': {
                'embedded_json': True,
                'envelope_keys': {
                    'env_key_01': 'string',
                    'env_key_02': 'string'
                },
                'json_path': 'test_list[*].message'
            }
        }

        record_data = json.dumps({
            'env_key_01':
            'data',
            'env_key_02':
            'time',
            'test_list': [{
                'id':
                'foo',
                'message':
                json.dumps({
                    'nested_key_01': 'bar',
                    'nested_key_02': 'baz'
                })
            }]
        })

        # get parsed data
        parser = JSONParser(options)
        assert_equal(parser.parse(record_data), True)

        expected_result = [{
            'nested_key_01': 'bar',
            'nested_key_02': 'baz',
            'streamalert:envelope_keys': {
                'env_key_01': 'data',
                'env_key_02': 'time'
            }
        }]

        assert_equal(parser.parsed_records, expected_result)
    def test_nested_records_with_missing_keys(self):
        """JSONParser - Nested records with missing keys"""
        options = {
            'schema': {
                'computer_name': 'string',
                'group': 'integer'
            },
            'configuration': {
                'json_path': 'Records[*]'
            }
        }

        record_data = json.dumps({
            'Records': [
                {
                    'computer_name': 'foo',
                    'group': 3
                },
                {
                    'computer_name': 'foo-bar',
                    'group': 3
                },
                {
                    # Missing group key
                    'computer_name': 'foo-bar-baz'
                }
            ]
        })

        # get parsed data
        parser = JSONParser(options)
        assert_equal(parser.parse(record_data), True)

        expected_valid_result = [{
            'computer_name': 'foo',
            'group': 3
        }, {
            'computer_name': 'foo-bar',
            'group': 3
        }]

        expected_invalid_result = [{'computer_name': 'foo-bar-baz'}]

        assert_equal(parser.parsed_records, expected_valid_result)
        assert_equal(parser.invalid_parses, expected_invalid_result)
    def test_extract_via_json_path_not_dict(self, log_mock):
        """JSONParser - Extract via JSON Path, Not Dictionary"""
        options = {
            'schema': {
                'key': 'string'
            },
            'configuration': {
                'embedded_json': True,
                'json_path': 'key[].value'
            }
        }
        record_data = {'key': [{'value': '["list of data"]'}]}

        parser = JSONParser(options)
        result = parser._extract_via_json_path(record_data)
        assert_equal(result, [(['list of data'], False)])
        log_mock.assert_any_call('Embedded json is invalid: %s',
                                 'record data is not a dictionary')
    def test_extract_via_json_path_bad_json(self, log_mock):
        """JSONParser - Extract via JSON Path, Bad JSON"""
        options = {
            'schema': {
                'key': 'string'
            },
            'configuration': {
                'embedded_json': True,
                'json_path': 'key[].value'
            }
        }
        record_data = {'key': [{'value': 'not json'}]}

        parser = JSONParser(options)
        result = parser._extract_via_json_path(record_data)
        assert_equal(result, [('not json', False)])
        log_mock.assert_any_call('Embedded json is invalid: %s',
                                 'No JSON object could be decoded')
    def test_schema_fail(self):
        """JSONParser - Schema Match Failure"""
        options = {
            'schema': {
                'unit_key_01': 'integer',
                'unit_key_02': 'string'
            }
        }

        data = {'hey': 1}
        record_data = json.dumps(data)

        # get parsed data
        parser = JSONParser(options)
        assert_equal(parser.parse(record_data), False)

        expected_result = [data]
        assert_equal(parser.invalid_parses, expected_result)
    def test_optional_keys_json(self):
        """JSONParser - Optional top level keys"""
        options = {
            'schema': {
                'columns': {},
                'host': 'string',
                'host-id': 'integer',
                'ids': [],
                'name': 'string',
                'results': {},
                'valid': 'boolean'
            },
            'configuration': {
                'optional_top_level_keys':
                ['host-id', 'ids', 'results', 'valid']
            }
        }

        record_data = json.dumps({
            'columns': {
                'test-column': 1
            },
            'host': 'unit-test-host-1',
            'name': 'unit-test',
            'valid': 'true'
        })

        # get parsed data
        parser = JSONParser(options)
        assert_equal(parser.parse(record_data), True)

        expected_result = [{
            'columns': {
                'test-column': 1
            },
            'host': 'unit-test-host-1',
            'host-id': 0,
            'ids': [],
            'name': 'unit-test',
            'results': {},
            'valid': True
        }]

        assert_equal(parser.parsed_records, expected_result)
    def test_regex_key_invalid_json(self):
        """JSONParser - Regex Key with Invalid JSON Object"""
        options = self._json_regex_key_options()

        # Invalid JSON and missing optional 'host' envelope key
        data = {
            'message': '{"nested_key_01": "test" "nested_key_02": "test"}',
            'date': '2017/10/01',
            'time': '14:00:00'
        }

        record_data = json.dumps(data)

        # get parsed data
        parser = JSONParser(options)
        assert_equal(parser.parse(record_data), False)

        expected_result = [data]
        assert_equal(parser.invalid_parses, expected_result)
    def test_invalid_json_path(self):
        """JSONParser - Invalid JSON Path"""
        options = {
            'schema': {
                'name': 'string',
                'result': 'string'
            },
            'configuration': {
                'json_path': 'Records[*]'
            }
        }

        record_data = {'name': 'test', 'result': 'test'}

        # get parsed data
        parser = JSONParser(options)
        assert_equal(parser.parse(record_data), False)

        expected_result = [record_data]
        assert_equal(parser.invalid_parses, expected_result)
    def test_regex_key_non_json(self):
        """JSONParser - Regex Key with Plaintext Input"""
        options = self._json_regex_key_options()

        # Invalid JSON for message key
        data = {
            'message': 'hey this is not JSON',
            'date': '2017/10/01',
            'time': '14:00:00',
            'host': 'my-host-1'
        }

        record_data = json.dumps(data)

        # get parsed data
        parser = JSONParser(options)
        assert_equal(parser.parse(record_data), False)

        expected_result = [data]
        assert_equal(parser.invalid_parses, expected_result)
    def test_regex_key_json_list(self):
        """JSONParser - Regex Key with a List JSON Object"""
        options = self._json_regex_key_options()

        # JSON that does not match schema
        data = {
            'message': '["nested_key_01", "test"]',
            'date': '2017/10/01',
            'time': '14:00:00',
            'host': 'host-1'
        }

        record_data = json.dumps(data)

        # get parsed data
        parser = JSONParser(options)
        assert_equal(parser.parse(record_data), False)

        expected_result = [data]
        assert_equal(parser.invalid_parses, expected_result)
    def test_embedded_json_invalid(self):
        """JSONParser - Embedded JSON, Invalid"""
        options = {
            'schema': {
                'nested_key_01': 'string',
                'nested_key_02': 'string'
            },
            'configuration': {
                'embedded_json': True,
                'envelope_keys': {
                    'env_key_01': 'string',
                    'env_key_02': 'string'
                },
                'json_path': 'test_list[*].message'
            }
        }

        invalid_data = '{\"invalid_json\"}'

        record_data = json.dumps({
            'env_key_01':
            'data',
            'env_key_02':
            'time',
            'test_list': [{
                'id': 'foo',
                'message': invalid_data
            }]
        })

        # get parsed data
        parser = JSONParser(options)
        assert_equal(parser.parse(record_data), False)

        expected_result = [invalid_data]

        assert_equal(parser.invalid_parses, expected_result)
    def test_multi_nested_json(self):
        """JSONParser - Multi-nested JSON"""
        options = {
            'schema': {
                'name': 'string',
                'result': 'string'
            },
            'configuration': {
                'json_path': 'profiles.controls[*]'
            }
        }

        expected_record = {'name': 'test-infra-1', 'result': 'fail'}

        data = {'profiles': {'controls': [expected_record]}}

        record_data = json.dumps(data)

        # get parsed data
        parser = JSONParser(options)
        assert_equal(parser.parse(record_data), True)

        expected_result = [expected_record]
        assert_equal(parser.parsed_records, expected_result)
    def test_cloudwatch(self):
        """JSONParser - CloudWatch JSON with envelope keys"""
        options = {
            'schema': {
                'account': 'integer',
                'action': 'string',
                'bytes': 'integer',
                'destination': 'string',
                'destport': 'integer',
                'eni': 'string',
                'flowlogstatus': 'string',
                'packets': 'integer',
                'protocol': 'integer',
                'source': 'string',
                'srcport': 'integer',
                'version': 'integer',
                'windowend': 'integer',
                'windowstart': 'integer'
            },
            'configuration': {
                'json_path': 'logEvents[*].extractedFields',
                'envelope_keys': {
                    'logGroup': 'string',
                    'logStream': 'string',
                    'owner': 'integer'
                }
            }
        }

        record_data = json.dumps({
            'logEvents': [{
                'extractedFields': {
                    'account': '123456789012',
                    'action': 'REJECT',
                    'bytes': '240',
                    'destination': '172.31.12.209',
                    'destport': '80',
                    'eni': 'eni-77ccbe24',
                    'flowlogstatus': 'OK',
                    'packets': '4',
                    'protocol': '6',
                    'source': '172.31.1.54',
                    'srcport': '27974',
                    'version': '2',
                    'windowend': '1488216389',
                    'windowstart': '1488216331'
                },
                'id':
                '33188333197923110564639124232124531540364522455662723073',
                'message':
                ('2 123456789012 eni-77ccbe24 172.31.1.54 172.31.12.209 '
                 '27974 80 6 4 240 1488216331 1488216389 REJECT OK'),
                'timestamp':
                1488216331000
            }, {
                'extractedFields': {
                    'account': '123456789012',
                    'action': 'REJECT',
                    'bytes': '240',
                    'destination': '172.31.12.209',
                    'destport': '80',
                    'eni': 'eni-77ccbe24',
                    'flowlogstatus': 'OK',
                    'packets': '4',
                    'protocol': '6',
                    'source': '172.31.39.106',
                    'srcport': '22598',
                    'version': '2',
                    'windowend': '1488216389',
                    'windowstart': '1488216331'
                },
                'id':
                '33188333197923110564639124232124531540364522455662723074',
                'message':
                ('2 123456789012 eni-77ccbe24 172.31.39.106 172.31.12.209 '
                 '22598 80 6 4 240 1488216331 1488216389 REJECT OK'),
                'timestamp':
                1488216331000
            }],
            'logGroup':
            'airdev_prod_stream_alert_flow_logs',
            'logStream':
            'eni-77ccbe24-all',
            'messageType':
            'DATA_MESSAGE',
            'owner':
            '123456789012',
            'subscriptionFilters':
            ['airdev_prod_stream_alert_flow_logs_to_lambda']
        })

        # get parsed data
        parser = JSONParser(options)
        assert_equal(parser.parse(record_data), True)

        expected_result = [{
            'account': 123456789012,
            'action': 'REJECT',
            'bytes': 240,
            'destination': '172.31.12.209',
            'destport': 80,
            'eni': 'eni-77ccbe24',
            'flowlogstatus': 'OK',
            'packets': 4,
            'protocol': 6,
            'source': '172.31.1.54',
            'srcport': 27974,
            'version': 2,
            'windowend': 1488216389,
            'windowstart': 1488216331,
            'streamalert:envelope_keys': {
                'logGroup': 'airdev_prod_stream_alert_flow_logs',
                'logStream': 'eni-77ccbe24-all',
                'owner': 123456789012
            }
        }, {
            'account': 123456789012,
            'action': 'REJECT',
            'bytes': 240,
            'destination': '172.31.12.209',
            'destport': 80,
            'eni': 'eni-77ccbe24',
            'flowlogstatus': 'OK',
            'packets': 4,
            'protocol': 6,
            'source': '172.31.39.106',
            'srcport': 22598,
            'version': 2,
            'windowend': 1488216389,
            'windowstart': 1488216331,
            'streamalert:envelope_keys': {
                'logGroup': 'airdev_prod_stream_alert_flow_logs',
                'logStream': 'eni-77ccbe24-all',
                'owner': 123456789012
            }
        }]
        assert_equal(parser.parsed_records, expected_result)
    def test_cloudtrail(self):
        """JSONParser - Cloudtrail JSON"""
        options = {
            'schema': {
                'eventVersion': 'string',
                'eventID': 'string',
                'eventTime': 'string',
                'requestParameters': {},
                'eventType': 'string',
                'responseElements': 'string',
                'awsRegion': 'string',
                'eventName': 'string',
                'userIdentity': {},
                'eventSource': 'string',
                'requestID': 'string',
                'userAgent': 'string',
                'sourceIPAddress': 'string',
                'recipientAccountId': 'string'
            },
            'configuration': {
                'json_path': 'Records[*]'
            }
        }

        data = {
            'Records': [{
                'eventVersion': '1.0.0',
                'eventID': '0000000',
                'eventTime': '2016-12-31T12:00:00Z',
                'requestParameters': {
                    'streamName': 'stream_alert'
                },
                'eventType': 'AwsApiCall',
                'responseElements': None,
                'awsRegion': 'us-west-1',
                'eventName': 'DescribeStream',
                'userIdentity': {
                    'userName': '******',
                    'principalId': 'AAAAAAAAAAAAAAAA',
                    'accessKeyId': 'FFFFFFFFFFFFFFFFFF',
                    'type': 'IAMUser',
                    'arn': 'arn:aws:iam::111111111111:user/stream_alert_user',
                    'accountId': '111111111111'
                },
                'eventSource': 'kinesis.amazonaws.com',
                'requestID': 'dddddddddd',
                'userAgent': 'aws-sdk',
                'sourceIPAddress': '127.0.0.1',
                'recipientAccountId': '1111111111111'
            }, {
                'eventVersion': '2.0.0',
                'eventID': '1111111',
                'eventTime': '2017-01-31T12:00:00Z',
                'requestParameters': {
                    'streamName': 'stream_alert_prod'
                },
                'eventType': 'AwsApiCall',
                'responseElements': None,
                'awsRegion': 'us-east-1',
                'eventName': 'DescribeStream',
                'userIdentity': {
                    'userName': '******',
                    'principalId': 'BBBBBBBBBBBBBBBB',
                    'accessKeyId': 'GGGGGGGGGGGGGGGG',
                    'type': 'IAMUser',
                    'arn':
                    'arn:aws:iam::222222222222:user/stream_alert_prod_user',
                    'accountId': '222222222222'
                },
                'eventSource': 'kinesis.amazonaws.com',
                'requestID': 'dddddddddd',
                'userAgent': 'aws-sdk',
                'sourceIPAddress': '127.0.0.2',
                'recipientAccountId': '222222222222'
            }]
        }

        record_data = json.dumps(data)

        # get parsed data
        parser = JSONParser(options)
        assert_equal(parser.parse(record_data), True)

        expected_result = [rec for rec in data['Records']]
        assert_equal(parser.parsed_records, expected_result)
    def test_inspec(self):
        """JSONParser - Inspec JSON"""
        options = {
            'schema': {
                'title': 'string',
                'desc': 'string',
                'impact': 'float',
                'refs': [],
                'tags': {},
                'code': 'string',
                'id': 'string',
                'source_location': {},
                'results': []
            },
            'configuration': {
                'json_path': 'profiles[].controls[]'
            }
        }

        data = {
            'other_checks': [],
            'profiles': [{
                'attributes': [],
                'controls': [{
                    'code':
                    'code snippet 01',
                    'desc':
                    None,
                    'id':
                    '(generated from osquery.rb:1 b6ef5242a32098111f11cb7d21a05bb8)',
                    'impact':
                    0.5,
                    'refs': [],
                    'results': [{
                        'code_desc': 'Processes states should eq [\'SNs\']',
                        'run_time': 0.000992,
                        'start_time': '2017-02-28 09:24:47 -0800',
                        'status': 'passed'
                    }],
                    'source_location': {
                        'line': 87,
                        'ref': '/opt/inspec/.../control_eval_context.rb'
                    },
                    'tags': {},
                    'title':
                    None
                }, {
                    'code':
                    'code snippet 02',
                    'desc':
                    None,
                    'id':
                    '(generated from osquery.rb:6 124c5f23cd897a018673b1ea512a9473)',
                    'impact':
                    0.5,
                    'refs': [],
                    'results': [{
                        'code_desc':
                        'File osquery.conf mode should cmp == \'0400\'',
                        'message': 'foo message',
                        'run_time': 0.011729,
                        'start_time': '2017-02-28 09:24:47 -0800',
                        'status': 'failed'
                    }],
                    'source_location': {
                        'line': 87,
                        'ref': '/opt/inspec/.../control_eval_context.rb'
                    },
                    'tags': {},
                    'title':
                    None
                }],
                'groups': [{
                    'controls': [
                        '(generated from osquery.rb:1 b6ef5242a32098111f11cb7d21a05bb8)'
                    ],
                    'id':
                    'osquery.rb',
                    'title':
                    None
                }],
                'supports': []
            }],
            'statistics': {
                'duration': 0.040234
            },
            'version':
            '1.4.1'
        }

        record_data = json.dumps(data)

        # get parsed data
        parser = JSONParser(options)
        assert_equal(parser.parse(record_data), True)

        expected_result = [
            control for prof in data['profiles']
            for control in prof['controls']
        ]

        assert_equal(parser.parsed_records, expected_result)