def test4append_values_bytestring(self):
        """
        This unittest checks the append_values method with raw_match_object being a bytestring.
        This should trigger a ValueError and append the match_string.
        """
        event_type_detector = EventTypeDetector(
            self.aminer_config, [self.stream_printer_event_handler])
        # initialize all values.
        t = time.time()
        log_atom = LogAtom(
            b'This is a string',
            ParserMatch(
                MatchElement('path', 'This is a string', b'This is a string',
                             None)), t, self.__class__.__name__)
        event_type_detector.receive_atom(log_atom)

        event_type_detector.values = [[[]]]
        event_type_detector.append_values(log_atom, 0)
        self.assertEqual(event_type_detector.values, [[['This is a string']]])

        log_atom = LogAtom(
            b'24.05.',
            ParserMatch(MatchElement('path', '24.05.', b'24.05.', None)), t,
            self.__class__.__name__)
        event_type_detector.values = [[[]]]
        event_type_detector.append_values(log_atom, 0)
        self.assertEqual(event_type_detector.values, [[['24.05.']]])
 def test2receive_atoms_with_defined_path_list(self):
     """
     In this test case multiple log_atoms are received with default values of the EventTypeDetector.
     path_list is set to a static list of paths and variable_key_list should not be used.
     """
     event_type_detector = EventTypeDetector(
         self.aminer_config, [self.stream_printer_event_handler],
         path_list=['parser/type/path/nametype'])
     results = [
         True, False, True, False, True, False, True, True, False, False,
         True, True, False, True, False, True, False, True, False, False,
         True
     ]
     log_atoms = []
     for line in self.log_lines:
         t = time.time()
         log_atoms.append(
             LogAtom(
                 line,
                 ParserMatch(
                     self.parsing_model.get_match_element(
                         'parser', MatchContext(line))), t,
                 self.__class__.__name__))
     for i, log_atom in enumerate(log_atoms):
         old_vals = (event_type_detector.num_events,
                     event_type_detector.num_eventlines,
                     event_type_detector.total_records,
                     event_type_detector.longest_path)
         self.assertEqual(event_type_detector.receive_atom(log_atom),
                          not results[i], i)
         if results[i]:
             self.assertEqual(old_vals, (event_type_detector.num_events,
                                         event_type_detector.num_eventlines,
                                         event_type_detector.total_records,
                                         event_type_detector.longest_path))
Example #3
0
 def test5check_value_reduction(self):
     """This unittest checks the functionality of reducing the values when the maxNumVals threshold is reached."""
     event_type_detector = EventTypeDetector(
         self.aminer_config, [self.stream_printer_event_handler])
     t = time.time()
     val_list = [[[]]]
     for i in range(1, event_type_detector.max_num_vals + 1, 1):
         log_atom = LogAtom(
             str(i).encode(),
             ParserMatch(MatchElement('path',
                                      str(i).encode(), i, None)), t,
             self.__class__.__name__)
         val_list[0][0].append(float(i))
         self.assertTrue(event_type_detector.receive_atom(log_atom))
         self.assertEqual(event_type_detector.values, val_list)
     i += 1
     log_atom = LogAtom(
         str(i).encode(),
         ParserMatch(MatchElement('path',
                                  str(i).encode(), i, None)), t,
         self.__class__.__name__)
     val_list[0][0].append(float(i))
     self.assertTrue(event_type_detector.receive_atom(log_atom))
     self.assertEqual(
         event_type_detector.values,
         [[val_list[0][0][-event_type_detector.min_num_vals:]]])
 def test1receive_atoms_with_default_values(self):
     """
     In this test case multiple log_atoms are received with default values of the EventTypeDetector.
     path_list is empty and all paths are learned dynamically in variable_key_list.
     """
     event_type_detector = EventTypeDetector(self.aminer_config, [self.stream_printer_event_handler])
     log_atoms = []
     for line in self.log_lines:
         t = time.time()
         log_atoms.append(
             LogAtom(line, ParserMatch(self.parsing_model.get_match_element('parser', MatchContext(line))), t, self.__class__.__name__))
     for i, log_atom in enumerate(log_atoms):
         self.assertTrue(event_type_detector.receive_atom(log_atom))
         self.assertEqual(event_type_detector.total_records, i + 1)
 def test6persist_and_load_data(self):
     """This unittest checks the functionality of the persistence by persisting and reloading values."""
     event_type_detector = EventTypeDetector(self.aminer_config, [self.stream_printer_event_handler])
     t = time.time()
     log_atom = LogAtom(b'22.2', ParserMatch(MatchElement('path', '22.2', 22.2, None)), t, self.__class__.__name__)
     event_type_detector.receive_atom(log_atom)
     event_type_detector.do_persist()
     event_type_detector_loaded = EventTypeDetector(self.aminer_config, [self.stream_printer_event_handler])
     self.assertEqual(event_type_detector.variable_key_list, event_type_detector_loaded.variable_key_list)
     self.assertEqual(event_type_detector.values, event_type_detector_loaded.values)
     self.assertEqual(event_type_detector.longest_path, event_type_detector_loaded.longest_path)
     self.assertEqual(event_type_detector.check_variables, event_type_detector_loaded.check_variables)
     self.assertEqual(event_type_detector.num_eventlines, event_type_detector_loaded.num_eventlines)
def build_analysis_pipeline(analysis_context):
    """
    Define the function to create pipeline for parsing the log data.
    It has also to define an AtomizerFactory to instruct aminer how to process incoming data streams to create log atoms from them.
    """
    date_format_string = b'%Y-%m-%d %H:%M:%S'
    cron = b' cron['

    # Build the parsing model:

    service_children_disk_report = [
        FixedDataModelElement('Space', b' Current Disk Data is: Filesystem     Type  Size  Used Avail Use%'),
        DelimitedDataModelElement('Data', b'%'), AnyByteDataModelElement('Rest')]

    service_children_login_details = [
        FixedDataModelElement('User/LoginDetails', b'User '), DelimitedDataModelElement('Username', b' '),
        FixedWordlistDataModelElement('Status', [b' logged in', b' logged out']),
        OptionalMatchModelElement('PastTime', SequenceModelElement('Time', [
            FixedDataModelElement('Blank', b' '), DecimalIntegerValueModelElement('Minutes'),
            FixedDataModelElement('Ago', b' minutes ago.')]))]

    service_children_cron_job = [
        DateTimeModelElement('DTM', date_format_string), FixedDataModelElement('UNameSpace1', b' '),
        DelimitedDataModelElement('UName', b' '), FixedDataModelElement('UNameSpace2', b' '), DelimitedDataModelElement('User', b' '),
        FixedDataModelElement('Cron', cron), DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Details', b']: Job `cron.daily` started.')]

    service_children_random_time = [FixedDataModelElement('Space', b'Random: '), DecimalIntegerValueModelElement('Random')]

    service_children_sensors = [SequenceModelElement('CPUTemp', [
        FixedDataModelElement('FixedTemp', b'CPU Temp: '), DecimalIntegerValueModelElement('Temp'),
        FixedDataModelElement('Degrees', b'\xc2\xb0C')]), FixedDataModelElement('Space1', b', '), SequenceModelElement('CPUWorkload', [
            FixedDataModelElement('FixedWorkload', b'CPU Workload: '), DecimalIntegerValueModelElement('Workload'),
            FixedDataModelElement('Percent', b'%')]), FixedDataModelElement('Space2', b', '),
        DateTimeModelElement('DTM', date_format_string)]

    service_children_user_ip_address = [
        FixedDataModelElement('User/UserIPAddress', b'User '), DelimitedDataModelElement('Username', b' '),
        FixedDataModelElement('Action', b' changed IP address to '), IpAddressDataModelElement('IP')]

    service_children_cron_job_announcement = [
        DateTimeModelElement('DTM', date_format_string), FixedDataModelElement('Space', b' '),
        DelimitedDataModelElement('UName', b' '), FixedDataModelElement('Cron', cron), DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Run', b']: Will run job `'),
        FixedWordlistDataModelElement('CronType', [b'cron.daily', b'cron.hourly', b'cron.monthly', b'cron.weekly']),
        FixedDataModelElement('StartTime', b'\' in 5 min.')]

    service_children_cron_job_execution = [
        DateTimeModelElement('DTM', date_format_string), FixedDataModelElement('Space1', b' '),
        DelimitedDataModelElement('UName', b' '), FixedDataModelElement('Cron', cron), DecimalIntegerValueModelElement('JobNumber'),
        FixedDataModelElement('Job', b']: Job `'),
        FixedWordlistDataModelElement('CronType', [b'cron.daily', b'cron.hourly', b'cron.monthly', b'cron.weekly']),
        FixedDataModelElement('Started', b'\' started')]

    service_children_audit = [SequenceModelElement('path', [
        FixedDataModelElement('type', b'type=PATH '), FixedDataModelElement('msg_audit', b'msg=audit('),
        DelimitedDataModelElement('msg', b':'), FixedDataModelElement('placeholder', b':'), DecimalIntegerValueModelElement('id'),
        FixedDataModelElement('item_string', b'): item='), DecimalIntegerValueModelElement('item'),
        FixedDataModelElement('name_string', b' name="'), DelimitedDataModelElement('name', b'"'),
        FixedDataModelElement('inode_string', b'" inode='), DecimalIntegerValueModelElement('inode'),
        FixedDataModelElement('dev_string', b' dev='), DelimitedDataModelElement('dev', b' '),
        FixedDataModelElement('mode_string', b' mode='),
        DecimalIntegerValueModelElement('mode', value_pad_type=DecimalIntegerValueModelElement.PAD_TYPE_ZERO),
        FixedDataModelElement('ouid_string', b' ouid='), DecimalIntegerValueModelElement('ouid'),
        FixedDataModelElement('ogid_string', b' ogid='), DecimalIntegerValueModelElement('ogid'),
        FixedDataModelElement('rdev_string', b' rdev='), DelimitedDataModelElement('rdev', b' '),
        FixedDataModelElement('nametype_string', b' nametype='), FixedWordlistDataModelElement('nametype', [b'NORMAL', b'ERROR'])]),
        SequenceModelElement('syscall', [
            FixedDataModelElement('type', b'type=SYSCALL '), FixedDataModelElement('msg_audit', b'msg=audit('),
            DelimitedDataModelElement('msg', b':'), FixedDataModelElement('placeholder', b':'), DecimalIntegerValueModelElement('id'),
            FixedDataModelElement('arch_string', b'): arch='), DelimitedDataModelElement('arch', b' '),
            FixedDataModelElement('syscall_string', b' syscall='), DecimalIntegerValueModelElement('syscall'),
            FixedDataModelElement('success_string', b' success='), FixedWordlistDataModelElement('success', [b'yes', b'no']),
            FixedDataModelElement('exit_string', b' exit='), DecimalIntegerValueModelElement('exit'),
            AnyByteDataModelElement('remainding_data')])]

    service_children_parsing_model_element = [
        DateTimeModelElement('DateTimeModelElement', b'Current DateTime: %d.%m.%Y %H:%M:%S'),
        DecimalFloatValueModelElement('DecimalFloatValueModelElement', value_sign_type='optional'),
        DecimalIntegerValueModelElement('DecimalIntegerValueModelElement', value_sign_type='optional', value_pad_type='blank'),
        SequenceModelElement('se', [
            DelimitedDataModelElement('DelimitedDataModelElement', b';'), FixedDataModelElement('FixedDataModelElement', b';')])]

    # ElementValueBranchModelElement
    fixed_data_me1 = FixedDataModelElement("fixed1", b'match ')
    fixed_data_me2 = FixedDataModelElement("fixed2", b'fixed String')
    fixed_wordlist_data_model_element = FixedWordlistDataModelElement("wordlist", [b'data: ', b'string: '])
    decimal_integer_value_model_element = DecimalIntegerValueModelElement("decimal")

    service_children_parsing_model_element.append(
        ElementValueBranchModelElement('ElementValueBranchModelElement', FirstMatchModelElement("first", [
            SequenceModelElement("seq1", [fixed_data_me1, fixed_wordlist_data_model_element]),
            SequenceModelElement("seq2", [fixed_data_me1, fixed_wordlist_data_model_element, fixed_data_me2])]), "wordlist",
                                 {0: decimal_integer_value_model_element, 1: fixed_data_me2}))
    service_children_parsing_model_element.append(HexStringModelElement('HexStringModelElement'))
    service_children_parsing_model_element.append(SequenceModelElement('se2', [
        FixedDataModelElement('FixedDataModelElement', b'Gateway IP-Address: '), IpAddressDataModelElement('IpAddressDataModelElement')]))
    import locale
    loc = locale.getlocale()
    if loc == (None, None):
        loc = ('en_US', 'utf8')
    service_children_parsing_model_element.append(
        MultiLocaleDateTimeModelElement('MultiLocaleDateTimeModelElement', [(b'%b %d %Y', None, '%s.%s' % loc)]))
    service_children_parsing_model_element.append(
        RepeatedElementDataModelElement('RepeatedElementDataModelElement', SequenceModelElement('SequenceModelElement', [
            FixedDataModelElement('FixedDataModelElement', b'[drawn number]: '),
            DecimalIntegerValueModelElement('DecimalIntegerValueModelElement')]), 1))
    service_children_parsing_model_element.append(VariableByteDataModelElement('VariableByteDataModelElement', b'-@#'))
    service_children_parsing_model_element.append(SequenceModelElement('se', [
        WhiteSpaceLimitedDataModelElement('WhiteSpaceLimitedDataModelElement'), FixedDataModelElement('fixed', b' ')]))

    # The Base64StringModelElement must be just before the AnyByteDataModelElement to avoid unexpected Matches.
    service_children_parsing_model_element.append(Base64StringModelElement('Base64StringModelElement'))

    # The OptionalMatchModelElement must be paired with a FirstMatchModelElement because it accepts all data and thus no data gets
    # to the AnyByteDataModelElement. The AnyByteDataModelElement must be last, because all bytes are accepted.
    service_children_parsing_model_element.append(
        OptionalMatchModelElement('/', FirstMatchModelElement('FirstMatchModelElement//optional', [
            FixedDataModelElement('FixedDataModelElement', b'The-searched-element-was-found!'), SequenceModelElement('se', [
                FixedDataModelElement('FixedDME', b'Any:'), AnyByteDataModelElement('AnyByteDataModelElement')])])))

    alphabet = b'ghijkl'
    service_children_ecd = []
    for _, char in enumerate(alphabet):
        char = bytes([char])
        service_children_ecd.append(FixedDataModelElement(char.decode(), char))

    parsing_model = FirstMatchModelElement('model', [
        SequenceModelElement('CronAnnouncement', service_children_cron_job_announcement),
        SequenceModelElement('CronExecution', service_children_cron_job_execution),
        SequenceModelElement('DailyCron', service_children_cron_job), SequenceModelElement('DiskReport', service_children_disk_report),
        SequenceModelElement('LoginDetails', service_children_login_details), DecimalIntegerValueModelElement('Random'),
        SequenceModelElement('RandomTime', service_children_random_time), SequenceModelElement('Sensors', service_children_sensors),
        SequenceModelElement('IPAddresses', service_children_user_ip_address), FirstMatchModelElement('type', service_children_audit),
        FirstMatchModelElement('ECD', service_children_ecd), FirstMatchModelElement('ParsingME', service_children_parsing_model_element)])

    # Some generic imports.
    from aminer.analysis import AtomFilters

    # Create all global handler lists here and append the real handlers later on.
    # Use this filter to distribute all atoms to the analysis handlers.
    atom_filter = AtomFilters.SubhandlerFilter(None)

    from aminer.analysis.TimestampCorrectionFilters import SimpleMonotonicTimestampAdjust
    simple_monotonic_timestamp_adjust = SimpleMonotonicTimestampAdjust([atom_filter])
    analysis_context.register_component(simple_monotonic_timestamp_adjust, component_name="SimpleMonotonicTimestampAdjust")

    from aminer.events.StreamPrinterEventHandler import StreamPrinterEventHandler
    from aminer.events.JsonConverterHandler import JsonConverterHandler
    stream_printer_event_handler = StreamPrinterEventHandler(analysis_context)
    json_converter_handler = JsonConverterHandler([stream_printer_event_handler], analysis_context)
    anomaly_event_handlers = [json_converter_handler]

    # Now define the AtomizerFactory using the model. A simple line based one is usually sufficient.
    from aminer.input.SimpleByteStreamLineAtomizerFactory import SimpleByteStreamLineAtomizerFactory
    analysis_context.atomizer_factory = SimpleByteStreamLineAtomizerFactory(parsing_model, [simple_monotonic_timestamp_adjust],
                                                                            anomaly_event_handlers)

    # Just report all unparsed atoms to the event handlers.
    from aminer.analysis.UnparsedAtomHandlers import SimpleUnparsedAtomHandler, VerboseUnparsedAtomHandler
    simple_unparsed_atom_handler = SimpleUnparsedAtomHandler(anomaly_event_handlers)
    atom_filter.add_handler(simple_unparsed_atom_handler, stop_when_handled_flag=False)
    analysis_context.register_component(simple_unparsed_atom_handler, component_name="SimpleUnparsedHandler")

    verbose_unparsed_atom_handler = VerboseUnparsedAtomHandler(anomaly_event_handlers, parsing_model)
    atom_filter.add_handler(verbose_unparsed_atom_handler, stop_when_handled_flag=True)
    analysis_context.register_component(verbose_unparsed_atom_handler, component_name="VerboseUnparsedHandler")

    from aminer.analysis.TimestampsUnsortedDetector import TimestampsUnsortedDetector
    timestamps_unsorted_detector = TimestampsUnsortedDetector(analysis_context.aminer_config, anomaly_event_handlers)
    atom_filter.add_handler(timestamps_unsorted_detector)
    analysis_context.register_component(timestamps_unsorted_detector, component_name="TimestampsUnsortedDetector")

    from aminer.analysis import Rules
    from aminer.analysis.AllowlistViolationDetector import AllowlistViolationDetector
    allowlist_rules = [
        Rules.OrMatchRule([
            Rules.AndMatchRule([
                Rules.PathExistsMatchRule('/model/LoginDetails/PastTime/Time/Minutes'),
                Rules.NegationMatchRule(Rules.ValueMatchRule('/model/LoginDetails/Username', b'root')),
                Rules.DebugMatchRule(debug_match_result=True)]),
            Rules.AndMatchRule([
                Rules.NegationMatchRule(Rules.PathExistsMatchRule('/model/LoginDetails/PastTime/Time/Minutes')),
                Rules.PathExistsMatchRule('/model/LoginDetails'),
                Rules.DebugMatchRule(debug_match_result=True)]),
            Rules.NegationMatchRule(Rules.PathExistsMatchRule('/model/LoginDetails'))])]

    # This rule list should trigger, when the line does not look like: User root (logged in, logged out)
    # or User 'username' (logged in, logged out) x minutes ago.
    allowlist_violation_detector = AllowlistViolationDetector(analysis_context.aminer_config, allowlist_rules, anomaly_event_handlers,
                                                              output_log_line=True)
    analysis_context.register_component(allowlist_violation_detector, component_name="Allowlist")
    atom_filter.add_handler(allowlist_violation_detector)

    from aminer.analysis.ParserCount import ParserCount
    parser_count = ParserCount(analysis_context.aminer_config, None, anomaly_event_handlers, 10)
    analysis_context.register_component(parser_count, component_name="ParserCount")
    atom_filter.add_handler(parser_count)

    from aminer.analysis.EventTypeDetector import EventTypeDetector
    etd = EventTypeDetector(analysis_context.aminer_config, anomaly_event_handlers)
    analysis_context.register_component(etd, component_name="EventTypeDetector")
    atom_filter.add_handler(etd)

    from aminer.analysis.VariableTypeDetector import VariableTypeDetector
    vtd = VariableTypeDetector(analysis_context.aminer_config, anomaly_event_handlers, etd, silence_output_except_indicator=False,
                               output_log_line=False, ignore_list=["/model/RandomTime"])
    analysis_context.register_component(vtd, component_name="VariableTypeDetector")
    atom_filter.add_handler(vtd)

    from aminer.analysis.VariableCorrelationDetector import VariableCorrelationDetector
    vtd = VariableCorrelationDetector(analysis_context.aminer_config, anomaly_event_handlers, etd, disc_div_thres=0.5,
                                      ignore_list=["/model/RandomTime"])
    analysis_context.register_component(vtd, component_name="VariableCorrelationDetector")
    atom_filter.add_handler(vtd)

    from aminer.analysis.EventCorrelationDetector import EventCorrelationDetector
    ecd = EventCorrelationDetector(analysis_context.aminer_config, anomaly_event_handlers, check_rules_flag=True,
                                   hypothesis_max_delta_time=1.0)
    analysis_context.register_component(ecd, component_name="EventCorrelationDetector")
    atom_filter.add_handler(ecd)

    from aminer.analysis.EventFrequencyDetector import EventFrequencyDetector
    efd = EventFrequencyDetector(analysis_context.aminer_config, anomaly_event_handlers, window_size=0.1)
    analysis_context.register_component(efd, component_name="EventFrequencyDetector")
    atom_filter.add_handler(efd)

    from aminer.analysis.EventSequenceDetector import EventSequenceDetector
    esd = EventSequenceDetector(analysis_context.aminer_config, anomaly_event_handlers, ['/model/ParsingME'], ignore_list=[
        '/model/ECD/g', '/model/ECD/h', '/model/ECD/i', '/model/ECD/j', '/model/ECD/k', '/model/ECD/l', '/model/Random',
        '/model/RandomTime', '/model/DailyCron'])
    analysis_context.register_component(esd, component_name="EventSequenceDetector")
    atom_filter.add_handler(esd)

    from aminer.analysis.MatchFilter import MatchFilter
    match_filter = MatchFilter(analysis_context.aminer_config, ['/model/Random'], anomaly_event_handlers, target_value_list=[
        1, 10, 100], output_log_line=True)
    analysis_context.register_component(match_filter, component_name="MatchFilter")
    atom_filter.add_handler(match_filter)

    from aminer.analysis.NewMatchPathDetector import NewMatchPathDetector
    new_match_path_detector = NewMatchPathDetector(analysis_context.aminer_config, anomaly_event_handlers, auto_include_flag=True,
                                                   output_log_line=True)
    analysis_context.register_component(new_match_path_detector, component_name="NewMatchPath")
    atom_filter.add_handler(new_match_path_detector)

    def tuple_transformation_function(match_value_list):
        """Only allow output of the EnhancedNewMatchPathValueComboDetector after every 10th element."""
        extra_data = enhanced_new_match_path_value_combo_detector.known_values_dict.get(tuple(match_value_list))
        if extra_data is not None:
            mod = 10
            if (extra_data[2] + 1) % mod == 0:
                enhanced_new_match_path_value_combo_detector.auto_include_flag = False
            else:
                enhanced_new_match_path_value_combo_detector.auto_include_flag = True
        return match_value_list

    from aminer.analysis.EnhancedNewMatchPathValueComboDetector import EnhancedNewMatchPathValueComboDetector
    enhanced_new_match_path_value_combo_detector = EnhancedNewMatchPathValueComboDetector(analysis_context.aminer_config, [
        '/model/DailyCron/UName', '/model/DailyCron/JobNumber'], anomaly_event_handlers, auto_include_flag=True,
        tuple_transformation_function=tuple_transformation_function, output_log_line=True)
    analysis_context.register_component(enhanced_new_match_path_value_combo_detector, component_name="EnhancedNewValueCombo")
    atom_filter.add_handler(enhanced_new_match_path_value_combo_detector)

    import re
    ip_match_action = Rules.EventGenerationMatchAction(
        "Analysis.Rules.IPv4InRFC1918MatchRule", "Private IP address occurred!", anomaly_event_handlers)

    vdmt = Rules.ValueDependentModuloTimeMatchRule(None, 3, ["/model/ECD/j", "/model/ECD/k", "/model/ECD/l"], {b"e": [0, 2.95]}, [0, 3])
    mt = Rules.ModuloTimeMatchRule(None, 3, 0, 3, None)
    time_allowlist_rules = [
        Rules.AndMatchRule([
            Rules.ParallelMatchRule([
                Rules.ValueDependentDelegatedMatchRule([
                    '/model/ECD/g', '/model/ECD/h', '/model/ECD/i', '/model/ECD/j', '/model/ECD/k', '/model/ECD/l'], {
                        (b"a",): mt, (b"b",): mt, (b"c",): mt, (b"d",): vdmt, (b"e",): vdmt, (b"f",): vdmt, None: mt}, mt),
                Rules.IPv4InRFC1918MatchRule("/model/ParsingME/se2/IpAddressDataModelElement", ip_match_action),
                Rules.DebugHistoryMatchRule(debug_match_result=True)
            ]),
            # IP addresses 8.8.8.8, 8.8.4.4 and 10.0.0.0 - 10.255.255.255 are not allowed
            Rules.NegationMatchRule(Rules.ValueListMatchRule("/model/ParsingME/se2/IpAddressDataModelElement", [134744072, 134743044])),
            Rules.NegationMatchRule(Rules.ValueRangeMatchRule("/model/ParsingME/se2/IpAddressDataModelElement", 167772160, 184549375)),
            Rules.NegationMatchRule(Rules.StringRegexMatchRule("/model/type/syscall/success", re.compile(b"^no$")))
        ])
    ]
    time_allowlist_violation_detector = AllowlistViolationDetector(
        analysis_context.aminer_config, time_allowlist_rules, anomaly_event_handlers, output_log_line=True)
    analysis_context.register_component(time_allowlist_violation_detector, component_name="TimeAllowlist")
    atom_filter.add_handler(time_allowlist_violation_detector)

    from aminer.analysis.HistogramAnalysis import HistogramAnalysis, LinearNumericBinDefinition, ModuloTimeBinDefinition, \
        PathDependentHistogramAnalysis
    modulo_time_bin_definition = ModuloTimeBinDefinition(86400, 3600, 0, 1, 24, True)
    linear_numeric_bin_definition = LinearNumericBinDefinition(50, 5, 20, True)
    histogram_analysis = HistogramAnalysis(analysis_context.aminer_config, [
        ('/model/RandomTime/Random', modulo_time_bin_definition), ('/model/Random', linear_numeric_bin_definition)], 10,
        anomaly_event_handlers, output_log_line=True)
    analysis_context.register_component(histogram_analysis, component_name="HistogramAnalysis")
    atom_filter.add_handler(histogram_analysis)

    path_dependent_histogram_analysis = PathDependentHistogramAnalysis(
        analysis_context.aminer_config, '/model/RandomTime', modulo_time_bin_definition, 10, anomaly_event_handlers, output_log_line=True)
    analysis_context.register_component(path_dependent_histogram_analysis, component_name="PathDependentHistogramAnalysis")
    atom_filter.add_handler(path_dependent_histogram_analysis)

    from aminer.analysis.MatchValueAverageChangeDetector import MatchValueAverageChangeDetector
    match_value_average_change_detector = MatchValueAverageChangeDetector(analysis_context.aminer_config, anomaly_event_handlers, None, [
        '/model/Random'], 100, 10, output_log_line=True)
    analysis_context.register_component(match_value_average_change_detector, component_name="MatchValueAverageChange")
    atom_filter.add_handler(match_value_average_change_detector)

    import sys
    from aminer.analysis.MatchValueStreamWriter import MatchValueStreamWriter
    match_value_stream_writer = MatchValueStreamWriter(
        sys.stdout, ['/model/Sensors/CPUTemp', '/model/Sensors/CPUWorkload', '/model/Sensors/DTM'], b';', b'')
    analysis_context.register_component(match_value_stream_writer, component_name="MatchValueStreamWriter")
    atom_filter.add_handler(match_value_stream_writer)

    from aminer.analysis.NewMatchPathValueComboDetector import NewMatchPathValueComboDetector
    new_match_path_value_combo_detector = NewMatchPathValueComboDetector(
        analysis_context.aminer_config, ['/model/IPAddresses/Username', '/model/IPAddresses/IP'],
        anomaly_event_handlers, output_log_line=True)
    analysis_context.register_component(new_match_path_value_combo_detector, component_name="NewMatchPathValueCombo")
    atom_filter.add_handler(new_match_path_value_combo_detector)

    from aminer.analysis.NewMatchIdValueComboDetector import NewMatchIdValueComboDetector
    new_match_id_value_combo_detector = NewMatchIdValueComboDetector(analysis_context.aminer_config, [
        '/model/type/path/name', '/model/type/syscall/syscall'], anomaly_event_handlers, id_path_list=[
        '/model/type/path/id', '/model/type/syscall/id'], min_allowed_time_diff=5, auto_include_flag=True, allow_missing_values_flag=True,
        output_log_line=True)
    analysis_context.register_component(new_match_id_value_combo_detector, component_name="NewMatchIdValueComboDetector")
    atom_filter.add_handler(new_match_id_value_combo_detector)

    from aminer.analysis.NewMatchPathValueDetector import NewMatchPathValueDetector
    new_match_path_value_detector = NewMatchPathValueDetector(analysis_context.aminer_config, [
        '/model/DailyCron/JobNumber', '/model/IPAddresses/Username'], anomaly_event_handlers, auto_include_flag=True, output_log_line=True)
    analysis_context.register_component(new_match_path_value_detector, component_name="NewMatchPathValue")
    atom_filter.add_handler(new_match_path_value_detector)

    from aminer.analysis.MissingMatchPathValueDetector import MissingMatchPathValueDetector
    missing_match_path_value_detector = MissingMatchPathValueDetector(
        analysis_context.aminer_config, ['/model/DiskReport/Space'], anomaly_event_handlers, auto_include_flag=True, default_interval=2,
        realert_interval=5, output_log_line=True)
    analysis_context.register_component(missing_match_path_value_detector, component_name="MissingMatch")
    atom_filter.add_handler(missing_match_path_value_detector)

    from aminer.analysis.TimeCorrelationDetector import TimeCorrelationDetector
    time_correlation_detector = TimeCorrelationDetector(
        analysis_context.aminer_config, anomaly_event_handlers, 2, min_rule_attributes=1, max_rule_attributes=5,
        record_count_before_event=10000, output_log_line=True)
    analysis_context.register_component(time_correlation_detector, component_name="TimeCorrelationDetector")
    atom_filter.add_handler(time_correlation_detector)

    from aminer.analysis.TimeCorrelationViolationDetector import TimeCorrelationViolationDetector, CorrelationRule, EventClassSelector
    cron_job_announcement = CorrelationRule('CronJobAnnouncement', 5, 6, max_artefacts_a_for_single_b=1, artefact_match_parameters=[
        ('/model/CronAnnouncement/JobNumber', '/model/CronExecution/JobNumber')])
    a_class_selector = EventClassSelector('Announcement', [cron_job_announcement], None)
    b_class_selector = EventClassSelector('Execution', None, [cron_job_announcement])
    rules = [Rules.PathExistsMatchRule('/model/CronAnnouncement/Run', a_class_selector),
             Rules.PathExistsMatchRule('/model/CronExecution/Job', b_class_selector)]

    time_correlation_violation_detector = TimeCorrelationViolationDetector(analysis_context.aminer_config, rules, anomaly_event_handlers,
                                                                           output_log_line=True)
    analysis_context.register_component(time_correlation_violation_detector, component_name="TimeCorrelationViolationDetector")
    atom_filter.add_handler(time_correlation_violation_detector)
Example #7
0
    def test7update_continuous_VT(self):
        """
        This unittest tests the s_ks_test method. It uses randomised datasets, which can be printed in the terminal.
        Every distribution has generated 30*300 Datasets and var_ev = 0, var_var = 1.
        """
        # Number of execution of the tested function
        iterations = 20
        # Size of the initial datasample
        dataset_size_ini = 100
        # Size of the update datasample
        dataset_size_upd = 50
        # Significance level
        significance_niveau = 0.05

        # load data
        with open('unit/data/vtd_data/uni_data_test7', 'rb') as f:
            [uni_data_list_ini, uni_data_list_upd,
             uni_result_shapes] = pickle.load(f)  # skipcq: BAN-B301
        with open('unit/data/vtd_data/nor_data_test7', 'rb') as f:
            [nor_data_list_ini, nor_data_list_upd,
             nor_result_shapes] = pickle.load(f)  # skipcq: BAN-B301
        with open('unit/data/vtd_data/beta1_data_test7', 'rb') as f:
            [beta1_data_list_ini, beta1_data_list_upd,
             beta1_result_shapes] = pickle.load(f)  # skipcq: BAN-B301
        with open('unit/data/vtd_data/beta2_data_test7', 'rb') as f:
            [beta2_data_list_ini, beta2_data_list_upd,
             beta2_result_shapes] = pickle.load(f)  # skipcq: BAN-B301
        with open('unit/data/vtd_data/beta3_data_test7', 'rb') as f:
            [beta3_data_list_ini, beta3_data_list_upd,
             beta3_result_shapes] = pickle.load(f)  # skipcq: BAN-B301
        with open('unit/data/vtd_data/beta4_data_test7', 'rb') as f:
            [beta4_data_list_ini, beta4_data_list_upd,
             beta4_result_shapes] = pickle.load(f)  # skipcq: BAN-B301
        with open('unit/data/vtd_data/beta5_data_test7', 'rb') as f:
            [beta5_data_list_ini, beta5_data_list_upd,
             beta5_result_shapes] = pickle.load(f)  # skipcq: BAN-B301

        etd = EventTypeDetector(self.aminer_config,
                                [self.stream_printer_event_handler])
        vtd = VariableTypeDetector(self.aminer_config,
                                   [self.stream_printer_event_handler],
                                   etd,
                                   num_init=dataset_size_ini,
                                   num_update=dataset_size_upd,
                                   ks_alpha=significance_niveau)

        result_list = []  # List of the results of the single tests
        for i in range(iterations):
            # Create the initial distribution, which has to pass the initial test
            variable_type_ini = vtd.detect_continuous_shape(
                uni_data_list_ini[i * dataset_size_ini:(i + 1) *
                                  dataset_size_ini])
            while True:
                if variable_type_ini[0] == 'uni':
                    if isinstance(variable_type_ini[-1], list):
                        variable_type_ini = variable_type_ini[:-1]
                    break
                if 'uni' in [distr[0] for distr in variable_type_ini[-1]]:
                    for j in range(len(variable_type_ini[-1])):
                        if variable_type_ini[-1][j][0] == 'uni':
                            variable_type_ini = variable_type_ini[-1][j]
                            break

            # Test and save the result of the sKS-Test
            etd.values = [[
                uni_data_list_upd[i * dataset_size_upd:(i + 1) *
                                  dataset_size_upd]
            ]]
            vtd.var_type = [[variable_type_ini]]
            result_list.append(vtd.s_ks_test(0, 0, True)[0])

        # Test if the result list is correct
        self.assertTrue(result_list == uni_result_shapes)

        result_list = []  # List of the results of the single tests
        for i in range(iterations):
            # Create the initial distribution, which has to pass the initial test
            variable_type_ini = vtd.detect_continuous_shape(
                nor_data_list_ini[i * dataset_size_ini:(i + 1) *
                                  dataset_size_ini])
            while True:
                if variable_type_ini[0] == 'nor':
                    if isinstance(variable_type_ini[-1], list):
                        variable_type_ini = variable_type_ini[:-1]
                    break
                if 'nor' in [distr[0] for distr in variable_type_ini[-1]]:
                    for j in range(len(variable_type_ini[-1])):
                        if variable_type_ini[-1][j][0] == 'nor':
                            variable_type_ini = variable_type_ini[-1][j]
                            break

            # Test and save the result of the sKS-Test
            etd.values = [[
                nor_data_list_upd[i * dataset_size_upd:(i + 1) *
                                  dataset_size_upd]
            ]]
            vtd.var_type = [[variable_type_ini]]
            result_list.append(vtd.s_ks_test(0, 0, True)[0])

        # Test if the result list is correct
        self.assertTrue(result_list == nor_result_shapes)

        result_list = []  # List of the results of the single tests
        for i in range(iterations):
            # Create the initial distribution, which has to pass the initial test
            variable_type_ini = vtd.detect_continuous_shape(
                beta1_data_list_ini[i * dataset_size_ini:(i + 1) *
                                    dataset_size_ini])
            while True:
                if variable_type_ini[0] == 'beta' and (
                        variable_type_ini[-1] == 1 or
                    (isinstance(variable_type_ini[-1], list)
                     and variable_type_ini[-2] == 1)):
                    if isinstance(variable_type_ini[-1], list):
                        variable_type_ini = variable_type_ini[:-1]
                    break
                if 'beta1' in [
                        distr[0] + str(distr[-1])
                        for distr in variable_type_ini[-1]
                ]:
                    for j in range(len(variable_type_ini[-1])):
                        if variable_type_ini[-1][j][
                                0] == 'beta' and variable_type_ini[-1][j][
                                    -1] == 1:
                            variable_type_ini = variable_type_ini[-1][j]
                            break

            # Test and save the result of the sKS-Test
            etd.values = [[
                beta1_data_list_upd[i * dataset_size_upd:(i + 1) *
                                    dataset_size_upd]
            ]]
            vtd.var_type = [[variable_type_ini]]
            result_list.append(vtd.s_ks_test(0, 0, True)[0])

        # Test if the result list is correct
        self.assertTrue(result_list == beta1_result_shapes)

        result_list = []  # List of the results of the single tests
        for i in range(iterations):
            # Create the initial distribution, which has to pass the initial test
            variable_type_ini = vtd.detect_continuous_shape(
                beta2_data_list_ini[i * dataset_size_ini:(i + 1) *
                                    dataset_size_ini])
            while True:
                if variable_type_ini[0] == 'beta' and (
                        variable_type_ini[-1] == 2 or
                    (isinstance(variable_type_ini[-1], list)
                     and variable_type_ini[-2] == 2)):
                    if isinstance(variable_type_ini[-1], list):
                        variable_type_ini = variable_type_ini[:-1]
                    break
                if 'beta2' in [
                        distr[0] + str(distr[-1])
                        for distr in variable_type_ini[-1]
                ]:
                    for j in range(len(variable_type_ini[-1])):
                        if variable_type_ini[-1][j][
                                0] == 'beta' and variable_type_ini[-1][j][
                                    -1] == 2:
                            variable_type_ini = variable_type_ini[-1][j]
                            break

            # Test and save the result of the sKS-Test
            etd.values = [[
                beta2_data_list_upd[i * dataset_size_upd:(i + 1) *
                                    dataset_size_upd]
            ]]
            vtd.var_type = [[variable_type_ini]]
            result_list.append(vtd.s_ks_test(0, 0, True)[0])

        # Test if the result list is correct
        self.assertTrue(result_list == beta2_result_shapes,
                        "%s\n%s" % (result_list, beta2_result_shapes))

        result_list = []  # List of the results of the single tests
        for i in range(iterations):
            # Create the initial distribution, which has to pass the initial test
            variable_type_ini = vtd.detect_continuous_shape(
                beta3_data_list_ini[i * dataset_size_ini:(i + 1) *
                                    dataset_size_ini])
            while True:
                if variable_type_ini[0] == 'beta' and (
                        variable_type_ini[-1] == 3 or
                    (isinstance(variable_type_ini[-1], list)
                     and variable_type_ini[-2] == 3)):
                    if isinstance(variable_type_ini[-1], list):
                        variable_type_ini = variable_type_ini[:-1]
                    break
                if 'beta3' in [
                        distr[0] + str(distr[-1])
                        for distr in variable_type_ini[-1]
                ]:
                    for j in range(len(variable_type_ini[-1])):
                        if variable_type_ini[-1][j][
                                0] == 'beta' and variable_type_ini[-1][j][
                                    -1] == 3:
                            variable_type_ini = variable_type_ini[-1][j]
                            break

            # Test and save the result of the sKS-Test
            etd.values = [[
                beta3_data_list_upd[i * dataset_size_upd:(i + 1) *
                                    dataset_size_upd]
            ]]
            vtd.var_type = [[variable_type_ini]]
            result_list.append(vtd.s_ks_test(0, 0, True)[0])

        # Test if the result list is correct
        self.assertTrue(result_list == beta3_result_shapes)

        result_list = []  # List of the results of the single tests
        for i in range(iterations):
            # Create the initial distribution, which has to pass the initial test
            variable_type_ini = vtd.detect_continuous_shape(
                beta4_data_list_ini[i * dataset_size_ini:(i + 1) *
                                    dataset_size_ini])
            while True:
                if variable_type_ini[0] == 'beta' and (
                        variable_type_ini[-1] == 4 or
                    (isinstance(variable_type_ini[-1], list)
                     and variable_type_ini[-2] == 4)):
                    if isinstance(variable_type_ini[-1], list):
                        variable_type_ini = variable_type_ini[:-1]
                    break
                if 'beta4' in [
                        distr[0] + str(distr[-1])
                        for distr in variable_type_ini[-1]
                ]:
                    for j in range(len(variable_type_ini[-1])):
                        if variable_type_ini[-1][j][
                                0] == 'beta' and variable_type_ini[-1][j][
                                    -1] == 4:
                            variable_type_ini = variable_type_ini[-1][j]
                            break

            # Test and save the result of the sKS-Test
            etd.values = [[
                beta4_data_list_upd[i * dataset_size_upd:(i + 1) *
                                    dataset_size_upd]
            ]]
            vtd.var_type = [[variable_type_ini]]
            result_list.append(vtd.s_ks_test(0, 0, True)[0])

        # Test if the result list is correct
        self.assertTrue(result_list == beta4_result_shapes)

        result_list = []  # List of the results of the single tests
        for i in range(iterations):
            # Create the initial distribution, which has to pass the initial test
            variable_type_ini = vtd.detect_continuous_shape(
                beta5_data_list_ini[i * dataset_size_ini:(i + 1) *
                                    dataset_size_ini])
            while True:
                if variable_type_ini[0] == 'beta' and (
                        variable_type_ini[-1] == 5 or
                    (isinstance(variable_type_ini[-1], list)
                     and variable_type_ini[-2] == 5)):
                    if isinstance(variable_type_ini[-1], list):
                        variable_type_ini = variable_type_ini[:-1]
                    break
                if 'beta5' in [
                        distr[0] + str(distr[-1])
                        for distr in variable_type_ini[-1]
                ]:
                    for j in range(len(variable_type_ini[-1])):
                        if variable_type_ini[-1][j][
                                0] == 'beta' and variable_type_ini[-1][j][
                                    -1] == 5:
                            variable_type_ini = variable_type_ini[-1][j]
                            break

            # Test and save the result of the sKS-Test
            etd.values = [[
                beta5_data_list_upd[i * dataset_size_upd:(i + 1) *
                                    dataset_size_upd]
            ]]
            vtd.var_type = [[variable_type_ini]]
            result_list.append(vtd.s_ks_test(0, 0, True)[0])

        # Test if the result list is correct
        self.assertTrue(result_list == beta5_result_shapes)
Example #8
0
    def test3detect_continuous_shape_fixed_data(self):
        """
        This unittest tests possible continuously distributed variables raising from the detect_continous_shape method.
        It uses fix data sets. Every distribution has generated 20*100 Datasets and var_ev = 0, var_var = 1.
        """
        # Number of execution of the tested function
        iterations = 20
        # Size of the initial datasample
        dataset_size = 100
        # Significance level
        significance_niveau = 0.05

        # load data
        with open('unit/data/vtd_data/uni_data_test3', 'rb') as f:
            [uni_data_list,
             uni_result_shapes] = pickle.load(f)  # skipcq: BAN-B301
        with open('unit/data/vtd_data/nor_data_test3', 'rb') as f:
            [nor_data_list,
             nor_result_shapes] = pickle.load(f)  # skipcq: BAN-B301
        with open('unit/data/vtd_data/beta1_data_test3', 'rb') as f:
            [beta1_data_list,
             beta1_result_shapes] = pickle.load(f)  # skipcq: BAN-B301
        with open('unit/data/vtd_data/beta2_data_test3', 'rb') as f:
            [beta2_data_list,
             beta2_result_shapes] = pickle.load(f)  # skipcq: BAN-B301
        with open('unit/data/vtd_data/beta3_data_test3', 'rb') as f:
            [beta3_data_list,
             beta3_result_shapes] = pickle.load(f)  # skipcq: BAN-B301
        with open('unit/data/vtd_data/beta4_data_test3', 'rb') as f:
            [beta4_data_list,
             beta4_result_shapes] = pickle.load(f)  # skipcq: BAN-B301
        with open('unit/data/vtd_data/beta5_data_test3', 'rb') as f:
            [beta5_data_list,
             beta5_result_shapes] = pickle.load(f)  # skipcq: BAN-B301

        etd = EventTypeDetector(self.aminer_config,
                                [self.stream_printer_event_handler])
        vtd = VariableTypeDetector(self.aminer_config,
                                   [self.stream_printer_event_handler],
                                   etd,
                                   num_init=dataset_size,
                                   div_thres=0.5,
                                   test_ks_int=True,
                                   sim_thres=0.3,
                                   ks_alpha=significance_niveau)

        result_list = []  # List of the results of the single tests
        for i in range(iterations):
            distribution_list = vtd.detect_continuous_shape(
                uni_data_list[i * dataset_size:(i + 1) * dataset_size])

            # Add if the searched distribution is present in the found distributions
            if distribution_list[0] == 'uni' or 'uni' in [
                    distr[0] for distr in distribution_list[-1]
            ]:
                result_list.append(1)
            else:
                result_list.append(0)

        # Test if the result list is correct
        self.assertTrue(result_list == uni_result_shapes)

        result_list = []  # List of the results of the single tests
        for i in range(iterations):
            distribution_list = vtd.detect_continuous_shape(
                nor_data_list[i * dataset_size:(i + 1) * dataset_size])

            # Add if the searched distribution is present in the found distributions
            if distribution_list[0] == 'nor' or 'nor' in [
                    distr[0] for distr in distribution_list[-1]
            ]:
                result_list.append(1)
            else:
                result_list.append(0)

        # Test if the result list is correct
        self.assertTrue(result_list == nor_result_shapes)

        result_list = []  # List of the results of the single tests
        for i in range(iterations):
            distribution_list = vtd.detect_continuous_shape(
                beta1_data_list[i * dataset_size:(i + 1) * dataset_size])

            # Add if the searched distribution is present in the found distributions
            if (distribution_list[0] == 'beta'
                    and distribution_list[-1] == 1) or 'beta1' in [
                        distr[0] + str(distr[-1])
                        for distr in distribution_list[-1]
                    ]:
                result_list.append(1)
            else:
                result_list.append(0)

        # Test if the result list is correct
        self.assertTrue(result_list == beta1_result_shapes)

        result_list = []  # List of the results of the single tests
        for i in range(iterations):
            distribution_list = vtd.detect_continuous_shape(
                beta2_data_list[i * dataset_size:(i + 1) * dataset_size])

            # Add if the searched distribution is present in the found distributions
            if (distribution_list[0] == 'beta'
                    and distribution_list[-1] == 2) or 'beta2' in [
                        distr[0] + str(distr[-1])
                        for distr in distribution_list[-1]
                    ]:
                result_list.append(1)
            else:
                result_list.append(0)

        # Test if the result list is correct
        self.assertTrue(result_list == beta2_result_shapes)

        result_list = []  # List of the results of the single tests
        for i in range(iterations):
            distribution_list = vtd.detect_continuous_shape(
                beta3_data_list[i * dataset_size:(i + 1) * dataset_size])

            # Add if the searched distribution is present in the found distributions
            if (distribution_list[0] == 'beta'
                    and distribution_list[-1] == 3) or 'beta3' in [
                        distr[0] + str(distr[-1])
                        for distr in distribution_list[-1]
                    ]:
                result_list.append(1)
            else:
                result_list.append(0)

        # Test if the result list is correct
        self.assertTrue(result_list == beta3_result_shapes)

        result_list = []  # List of the results of the single tests
        for i in range(iterations):
            distribution_list = vtd.detect_continuous_shape(
                beta4_data_list[i * dataset_size:(i + 1) * dataset_size])

            # Add if the searched distribution is present in the found distributions
            if (distribution_list[0] == 'beta'
                    and distribution_list[-1] == 4) or 'beta4' in [
                        distr[0] + str(distr[-1])
                        for distr in distribution_list[-1]
                    ]:
                result_list.append(1)
            else:
                result_list.append(0)

        # Test if the result list is correct
        self.assertTrue(result_list == beta4_result_shapes)

        result_list = []  # List of the results of the single tests
        for i in range(iterations):
            distribution_list = vtd.detect_continuous_shape(
                beta5_data_list[i * dataset_size:(i + 1) * dataset_size])

            # Add if the searched distribution is present in the found distributions
            if (distribution_list[0] == 'beta'
                    and distribution_list[-1] == 5) or 'beta5' in [
                        distr[0] + str(distr[-1])
                        for distr in distribution_list[-1]
                    ]:
                result_list.append(1)
            else:
                result_list.append(0)

        # Test if the result list is correct
        self.assertTrue(result_list == beta5_result_shapes)
Example #9
0
    def test6receive_atom(self):
        """
        This unittest tests if atoms are sorted to the right distribution and if the update steps also work properly.
        Therefore the assumption that after 200 values the VTD with the default parameters can change to the right distribution.
        """
        # load data
        with open('unit/data/vtd_data/uni_data_test6', 'rb') as f:
            uni_data_list = pickle.load(f)  # skipcq: BAN-B301
        with open('unit/data/vtd_data/nor_data_test6', 'rb') as f:
            nor_data_list = pickle.load(f)  # skipcq: BAN-B301
        with open('unit/data/vtd_data/beta1_data_test6', 'rb') as f:
            beta1_data_list = pickle.load(f)  # skipcq: BAN-B301

        uni_data_list = uni_data_list * 10
        nor_data_list = nor_data_list * 10
        beta1_data_list = beta1_data_list * 10
        vtd_arguments = [(100, 50), (110, 55), (90, 45), (80, 40), (70, 35)]

        for init, update in vtd_arguments:
            etd = EventTypeDetector(self.aminer_config,
                                    [self.stream_printer_event_handler])
            vtd = VariableTypeDetector(self.aminer_config,
                                       [self.stream_printer_event_handler],
                                       etd,
                                       num_init=init,
                                       num_update=update,
                                       div_thres=0.8,
                                       sim_thres=0.3,
                                       num_pause_others=0)
            t = time.time()
            stat_data = b'True'
            log_atom = LogAtom(
                stat_data,
                ParserMatch(
                    MatchElement('', stat_data.decode(), stat_data, None)), t,
                self.__class__.__name__)
            # initialize data
            for i in range(init):
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            self.assertEqual(['stat', [stat_data.decode()], True], result,
                             (init, update, result))

            # static -> static
            for i in range(update):
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            self.assertEqual(['stat', [stat_data.decode()], True], result,
                             (init, update, result))

            # static -> uni
            for uni_data in uni_data_list[:init]:
                log_atom = LogAtom(
                    uni_data,
                    ParserMatch(MatchElement('', uni_data, str(uni_data),
                                             None)), t,
                    self.__class__.__name__)
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            pos_distr = vtd.alternative_distribution_types[0][0]
            self.assertTrue(
                result[0] == 'uni'
                or 'uni' in [distr[0] for distr in pos_distr],
                (init, update, result))

            # uni -> others
            for i in range(update):
                stat_data = bytes(str((i % 75) * 0.1), 'utf-8')
                log_atom = LogAtom(
                    stat_data,
                    ParserMatch(
                        MatchElement('', stat_data.decode(), stat_data, None)),
                    t, self.__class__.__name__)
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            self.assertEqual(['others', 0], result, (init, update, result))

            # others -> d
            for i in range(update):
                stat_data = bytes(str((i % 10) * 0.1), 'utf-8')
                log_atom = LogAtom(
                    stat_data,
                    ParserMatch(
                        MatchElement('', stat_data.decode(), stat_data, None)),
                    t, self.__class__.__name__)
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            self.assertEqual('d', result[0], (init, update, result))

            # reset all
            etd = EventTypeDetector(self.aminer_config,
                                    [self.stream_printer_event_handler])
            vtd = VariableTypeDetector(self.aminer_config,
                                       [self.stream_printer_event_handler],
                                       etd,
                                       num_init=init,
                                       num_update=update,
                                       div_thres=0.3,
                                       sim_thres=0.5,
                                       num_pause_others=0,
                                       num_d_bt=30)

            # initialize with d
            for i in range(init):
                stat_data = bytes(str((i % 10) * 0.1), 'utf-8')
                log_atom = LogAtom(
                    stat_data,
                    ParserMatch(
                        MatchElement('', stat_data.decode(), stat_data, None)),
                    t, self.__class__.__name__)
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            self.assertEqual('d', result[0], (init, update, result))

            # discrete to others with new values
            for uni_data in uni_data_list[:init]:
                log_atom = LogAtom(
                    uni_data,
                    ParserMatch(MatchElement('', uni_data, str(uni_data),
                                             None)), t,
                    self.__class__.__name__)
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            self.assertEqual(['others', 0], result, (init, update, result))

            # reset all
            etd = EventTypeDetector(self.aminer_config,
                                    [self.stream_printer_event_handler])
            vtd = VariableTypeDetector(self.aminer_config,
                                       [self.stream_printer_event_handler],
                                       etd,
                                       num_init=init,
                                       num_update=update,
                                       div_thres=0.3,
                                       sim_thres=0.5,
                                       num_pause_others=0,
                                       num_d_bt=20)

            # initialize with d
            for i in range(init):
                stat_data = bytes(str((i % 10) * 0.1), 'utf-8')
                log_atom = LogAtom(
                    stat_data,
                    ParserMatch(
                        MatchElement('', stat_data.decode(), stat_data, None)),
                    t, self.__class__.__name__)
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            self.assertEqual('d', result[0], (init, update, result))

            # discrete to others without new values, low num_d_bt
            for i in range(update):
                stat_data = bytes(str((i % 3) * 0.1), 'utf-8')
                log_atom = LogAtom(
                    stat_data,
                    ParserMatch(
                        MatchElement('', stat_data.decode(), stat_data, None)),
                    t, self.__class__.__name__)
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            self.assertEqual(['others', 0], result, (init, update, result))

            # reset all
            etd = EventTypeDetector(self.aminer_config,
                                    [self.stream_printer_event_handler])
            vtd = VariableTypeDetector(self.aminer_config,
                                       [self.stream_printer_event_handler],
                                       etd,
                                       num_init=init,
                                       num_update=update,
                                       div_thres=0.3,
                                       sim_thres=0.5,
                                       num_pause_others=0,
                                       num_d_bt=100)

            # initialize with d
            for i in range(init):
                stat_data = bytes(str((i % 10) * 0.1), 'utf-8')
                log_atom = LogAtom(
                    stat_data,
                    ParserMatch(
                        MatchElement('', stat_data.decode(), stat_data, None)),
                    t, self.__class__.__name__)
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            self.assertEqual('d', result[0], (init, update, result))

            # discrete to others without new values, high num_d_bt
            for i in range(update):
                stat_data = bytes(str((i % 3) * 0.1), 'utf-8')
                log_atom = LogAtom(
                    stat_data,
                    ParserMatch(
                        MatchElement('', stat_data.decode(), stat_data, None)),
                    t, self.__class__.__name__)
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            self.assertNotEqual(['others', 0], result, (init, update, result))

            # reset all
            etd = EventTypeDetector(self.aminer_config,
                                    [self.stream_printer_event_handler])
            vtd = VariableTypeDetector(self.aminer_config,
                                       [self.stream_printer_event_handler],
                                       etd,
                                       num_init=init,
                                       num_update=update,
                                       div_thres=0.3,
                                       sim_thres=0.3,
                                       num_pause_others=0)
            t = time.time()
            stat_data = b'True'
            log_atom = LogAtom(
                stat_data,
                ParserMatch(
                    MatchElement('', stat_data.decode(), stat_data, None)), t,
                self.__class__.__name__)
            # initialize data
            for i in range(init):
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            self.assertEqual(['stat', [stat_data.decode()], True], result,
                             (init, update, result))

            # static -> asc
            for i in range(init):
                stat_data = bytes(str(i * 0.1), 'utf-8')
                log_atom = LogAtom(
                    stat_data,
                    ParserMatch(
                        MatchElement('', stat_data.decode(), stat_data, None)),
                    t, self.__class__.__name__)
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            self.assertEqual(['asc', 'float'], result, (init, update, result))

            # asc -> desc
            for i in range(init, 0, -1):
                stat_data = bytes(str(i * 0.1), 'utf-8')
                log_atom = LogAtom(
                    stat_data,
                    ParserMatch(
                        MatchElement('', stat_data.decode(), stat_data, None)),
                    t, self.__class__.__name__)
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            self.assertEqual(['desc', 'float'], result, (init, update, result))

            # reset all
            etd = EventTypeDetector(self.aminer_config,
                                    [self.stream_printer_event_handler])
            vtd = VariableTypeDetector(self.aminer_config,
                                       [self.stream_printer_event_handler],
                                       etd,
                                       num_init=init,
                                       num_update=update,
                                       div_thres=0.3,
                                       sim_thres=0.3,
                                       num_pause_others=0)
            t = time.time()
            stat_data = b'True'
            log_atom = LogAtom(
                stat_data,
                ParserMatch(
                    MatchElement('', stat_data.decode(), stat_data, None)), t,
                self.__class__.__name__)
            # initialize data
            for i in range(init):
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            self.assertEqual(['stat', [stat_data.decode()], True], result,
                             (init, update, result))

            # static -> nor
            for nor_data in nor_data_list[:init]:
                log_atom = LogAtom(
                    nor_data,
                    ParserMatch(MatchElement('', nor_data, str(nor_data),
                                             None)), t,
                    self.__class__.__name__)
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            pos_distr = vtd.alternative_distribution_types[0][0]
            self.assertTrue(
                result[0] == 'nor'
                or 'nor' in [distr[0] for distr in pos_distr],
                (init, update, result))

            # nor -> beta1
            for beta1_data in beta1_data_list[:init]:
                log_atom = LogAtom(
                    beta1_data,
                    ParserMatch(
                        MatchElement('', beta1_data, str(beta1_data), None)),
                    t, self.__class__.__name__)
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            pos_distr = vtd.alternative_distribution_types[0][0]
            self.assertTrue(
                (result[0] == 'beta' and result[-1] == 1) or 'beta1'
                in [distr[0] + str(distr[-1])
                    for distr in pos_distr], (init, update, result))

            # reset all
            etd = EventTypeDetector(self.aminer_config,
                                    [self.stream_printer_event_handler])
            vtd = VariableTypeDetector(self.aminer_config,
                                       [self.stream_printer_event_handler],
                                       etd,
                                       num_init=init,
                                       num_update=update,
                                       div_thres=0.3,
                                       sim_thres=0.3,
                                       num_pause_others=0)
            t = time.time()
            stat_data = b'True'
            log_atom = LogAtom(
                stat_data,
                ParserMatch(
                    MatchElement('', stat_data.decode(), stat_data, None)), t,
                self.__class__.__name__)
            # initialize data
            for i in range(init):
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            self.assertEqual(['stat', [stat_data.decode()], True], result,
                             (init, update, result))

            # static -> unq
            vtd.test_ks_int = False
            unq_data_list = [bytes(str(i), 'utf-8') for i in range(init)]
            random.shuffle(unq_data_list)
            for unq_data in unq_data_list:
                log_atom = LogAtom(
                    unq_data,
                    ParserMatch(MatchElement('', unq_data, unq_data, None)), t,
                    self.__class__.__name__)
                self.assertTrue(etd.receive_atom(log_atom))
                vtd.receive_atom(log_atom)
            result = vtd.var_type[0][0]
            self.assertEqual('unq', result[0], (init, update, result))
Example #10
0
    def test4detect_var_type(self):
        """This unittest tests possible scenarios of the detect_var_type method."""
        num_init = 100
        etd = EventTypeDetector(self.aminer_config,
                                [self.stream_printer_event_handler])
        vtd = VariableTypeDetector(self.aminer_config,
                                   [self.stream_printer_event_handler],
                                   etd,
                                   num_init=num_init)
        t = time.time()
        # test the 'static' path of detect_var_type
        stat_data = b'5.3.0-55-generic'
        log_atom = LogAtom(
            stat_data,
            ParserMatch(MatchElement('', stat_data.decode(), stat_data, None)),
            t, self.__class__.__name__)
        # check what happens if less than numMinAppearance values are available
        for i in range(num_init):
            self.assertTrue(etd.receive_atom(log_atom))
        result = vtd.detect_var_type(0, 0)
        self.assertEqual(['stat', [stat_data.decode()], False], result)

        # reset etd and vtd for clear results.
        etd = EventTypeDetector(self.aminer_config,
                                [self.stream_printer_event_handler])
        vtd = VariableTypeDetector(self.aminer_config,
                                   [self.stream_printer_event_handler],
                                   etd,
                                   num_init=num_init)

        # test ascending with float values
        for i in range(num_init):
            stat_data = bytes(str(i * 0.1), 'utf-8')
            log_atom = LogAtom(
                stat_data,
                ParserMatch(
                    MatchElement('', stat_data.decode(), stat_data, None)), t,
                self.__class__.__name__)
            self.assertTrue(etd.receive_atom(log_atom))
        result = vtd.detect_var_type(0, 0)
        self.assertEqual(['asc', 'float'], result)

        # reset etd and vtd for clear results.
        etd = EventTypeDetector(self.aminer_config,
                                [self.stream_printer_event_handler])
        vtd = VariableTypeDetector(self.aminer_config,
                                   [self.stream_printer_event_handler],
                                   etd,
                                   num_init=num_init)

        # test ascending with integer values
        for i in range(num_init):
            stat_data = bytes(str(i), 'utf-8')
            log_atom = LogAtom(
                stat_data,
                ParserMatch(
                    MatchElement('', stat_data.decode(), stat_data, None)), t,
                self.__class__.__name__)
            self.assertTrue(etd.receive_atom(log_atom))
        result = vtd.detect_var_type(0, 0)
        self.assertEqual(['asc', 'int'], result)

        # reset etd and vtd for clear results.
        etd = EventTypeDetector(self.aminer_config,
                                [self.stream_printer_event_handler])
        vtd = VariableTypeDetector(self.aminer_config,
                                   [self.stream_printer_event_handler],
                                   etd,
                                   num_init=num_init)

        # test descending with float values
        for i in range(num_init, 0, -1):
            stat_data = bytes(str(i * 0.1), 'utf-8')
            log_atom = LogAtom(
                stat_data,
                ParserMatch(
                    MatchElement('', stat_data.decode(), stat_data, None)), t,
                self.__class__.__name__)
            self.assertTrue(etd.receive_atom(log_atom))
        result = vtd.detect_var_type(0, 0)
        self.assertEqual(['desc', 'float'], result)

        # reset etd and vtd for clear results.
        etd = EventTypeDetector(self.aminer_config,
                                [self.stream_printer_event_handler])
        vtd = VariableTypeDetector(self.aminer_config,
                                   [self.stream_printer_event_handler],
                                   etd,
                                   num_init=num_init)

        # test descending with integer values
        for i in range(num_init, 0, -1):
            stat_data = bytes(str(i), 'utf-8')
            log_atom = LogAtom(
                stat_data,
                ParserMatch(
                    MatchElement('', stat_data.decode(), stat_data, None)), t,
                self.__class__.__name__)
            self.assertTrue(etd.receive_atom(log_atom))
        result = vtd.detect_var_type(0, 0)
        self.assertEqual(['desc', 'int'], result)

        # reset etd and vtd for clear results.
        etd = EventTypeDetector(self.aminer_config,
                                [self.stream_printer_event_handler])
        vtd = VariableTypeDetector(self.aminer_config,
                                   [self.stream_printer_event_handler],
                                   etd,
                                   num_init=num_init,
                                   div_thres=0.3,
                                   test_ks_int=True)

        # test 'num_init' and 'div_thres'
        # prevent results from becoming asc or desc
        stat_data = bytes(str(99), 'utf-8')
        log_atom = LogAtom(
            stat_data,
            ParserMatch(MatchElement('', stat_data.decode(), stat_data, None)),
            t, self.__class__.__name__)
        etd.receive_atom(log_atom)
        values = [float(stat_data)]
        for i in range(99):
            stat_data = bytes(str(i), 'utf-8')
            values.append(float(stat_data))
            log_atom = LogAtom(
                stat_data,
                ParserMatch(
                    MatchElement('', stat_data.decode(), stat_data, None)), t,
                self.__class__.__name__)
            self.assertTrue(etd.receive_atom(log_atom))
        result = vtd.detect_var_type(0, 0)
        # this means that the uniformal distribution must be detected.
        self.assertNotEqual(
            result[0] == 'uni' or 'uni' in [distr[0] for distr in result[-1]],
            result)

        # test 'divThres' option for the continuous distribution
        vtd = VariableTypeDetector(self.aminer_config,
                                   [self.stream_printer_event_handler],
                                   etd,
                                   num_init=num_init,
                                   div_thres=1.0,
                                   test_ks_int=True)
        result = vtd.detect_var_type(0, 0)
        self.assertEqual(['unq', values], result)

        # test 'testInt' option for the continuous distribution
        vtd = VariableTypeDetector(self.aminer_config,
                                   [self.stream_printer_event_handler],
                                   etd,
                                   num_init=num_init,
                                   div_thres=0.3,
                                   test_ks_int=False)
        result = vtd.detect_var_type(0, 0)
        self.assertEqual(['unq', values], result)

        # test 'simThres' option to result in 'others'
        vtd = VariableTypeDetector(self.aminer_config,
                                   [self.stream_printer_event_handler],
                                   etd,
                                   num_init=num_init,
                                   div_thres=0.5,
                                   test_ks_int=False,
                                   sim_thres=0.5)
        values = []
        for i in range(100):
            stat_data = bytes(str((i % 50) * 0.1), 'utf-8')
            values.append(float(stat_data))
            log_atom = LogAtom(
                stat_data,
                ParserMatch(
                    MatchElement('', stat_data.decode(), stat_data, None)), t,
                self.__class__.__name__)
            self.assertTrue(etd.receive_atom(log_atom))
        result = vtd.detect_var_type(0, 0)
        # at least (1 - 'simThresh') * 'numMinAppearance' and maximal 'numMinAppearance' * 'divThres' - 1 unique values must exist.
        self.assertEqual(['others', 0], result)

        # test discrete result
        vtd = VariableTypeDetector(self.aminer_config,
                                   [self.stream_printer_event_handler],
                                   etd,
                                   num_init=num_init,
                                   div_thres=0.5,
                                   test_ks_int=False,
                                   sim_thres=0.3)
        values = []
        for i in range(num_init):
            stat_data = bytes(str((i % 50) * 0.1), 'utf-8')
            values.append(float(stat_data))
            log_atom = LogAtom(
                stat_data,
                ParserMatch(
                    MatchElement('', stat_data.decode(), stat_data, None)), t,
                self.__class__.__name__)
            self.assertTrue(etd.receive_atom(log_atom))
        result = vtd.detect_var_type(0, 0)
        values_set = list(set(values))
        values_app = [0 for _ in range(len(values_set))]
        for value in values:
            values_app[values_set.index(value)] += 1
        values_app = [x / len(values) for x in values_app]
        self.assertEqual(['d', values_set, values_app, len(values)], result)
    def test3append_values_float(self):
        """This unittest checks the append_values method with raw_match_object being a float value."""
        event_type_detector = EventTypeDetector(
            self.aminer_config, [self.stream_printer_event_handler])
        # initialize all values.
        t = time.time()
        log_atom = LogAtom(
            b'22.2', ParserMatch(MatchElement('path', '22.2', 22.2, None)), t,
            self.__class__.__name__)
        event_type_detector.receive_atom(log_atom)

        event_type_detector.values = [[[]]]
        event_type_detector.append_values(log_atom, 0)
        self.assertEqual(event_type_detector.values, [[[22.2]]])

        log_atom = LogAtom(b'22',
                           ParserMatch(MatchElement('path', '22', 22, None)),
                           t, self.__class__.__name__)
        event_type_detector.values = [[[]]]
        event_type_detector.append_values(log_atom, 0)
        self.assertEqual(event_type_detector.values, [[[22]]])

        log_atom = LogAtom(
            b'22.2', ParserMatch(MatchElement('path', '22', b'22', None)), t,
            self.__class__.__name__)
        event_type_detector.values = [[[]]]
        event_type_detector.append_values(log_atom, 0)
        self.assertEqual(event_type_detector.values, [[[22]]])