def test_more_cluster_representatives(self): tree_1 = real_tree() tree_2 = real_tree() signature = ParentChildByNameTopologySignature() signature_caches = [] for tree in [tree_1, tree_2]: signature_caches.append( tree.to_index(signature=signature, supported={ ProcessStartEvent: True, ProcessExitEvent: True, TrafficEvent: True }, statistics_cls=SetStatistics)) cr = PrototypeSignatureCache.from_signature_caches(signature_caches, prototype=tree_1) for token in cr: self.assertEqual(cr.multiplicity(token, tree_1), signature_caches[0].multiplicity(token)) self.assertEqual( cr.get_statistics(signature=token, key="value", prototype=tree_1, event_type=ProcessExitEvent).count(), signature_caches[0].get_statistics( signature=token, key="value", event_type=ProcessExitEvent).count())
def test_negative_values(self): def distance_builder(**kwargs): distance = SimpleDistance(**kwargs) return distance tree_one = real_tree(path="data/c01-007-102/2/1078-2-process.csv", absolute=True) tree_two = real_tree(path="data/c01-007-102/2/1165-2-process.csv", absolute=True) signature = ParentChildByNameTopologySignature() algorithm = IncrementalDistanceAlgorithm( signature=signature, distance=distance_builder, cache_statistics=SetStatistics) algorithm.prototypes = [tree_one, tree_two] decorator = DistanceMatrixDecorator(normalized=True) decorator.wrap_algorithm(algorithm) algorithm.start_tree() for event in tree_one.event_iter(supported=algorithm.supported): try: algorithm.add_event(event) except EventNotSupportedException: pass algorithm.finish_tree() algorithm.start_tree() for event in tree_two.event_iter(supported=algorithm.supported): try: algorithm.add_event(event) except EventNotSupportedException: pass algorithm.finish_tree() print(decorator.data()) self.assertTrue(False)
def test_real_trees(self): def distance_builder(**kwargs): distance = StartExitDistance() distance.supported[TrafficEvent] = True return distance tree_one = real_tree() tree_two = real_tree(path="data/c01-007-102/2/1129-2-process.csv") prototype_names = ["1", "2"] prototype_caches = [] for index, tree in enumerate([tree_one, tree_two]): prototype_caches.append(PrototypeSignatureCache.from_signature_caches( [tree.to_index(signature=ParentChildByNameTopologySignature(), supported={ ProcessStartEvent: True, ProcessExitEvent: True, TrafficEvent: True }, statistics_cls=SetStatistics)], prototype=prototype_names[index], threshold=0)) decorator = AnomalyDecorator() algorithm = IncrementalDistanceAlgorithm( signature=ParentChildByNameTopologySignature(), distance=distance_builder, cache_statistics=SetStatistics ) prototype_cache = prototype_caches[0] for cache in prototype_caches[1:]: prototype_cache += cache algorithm.cluster_representatives( signature_prototypes=[prototype_cache], prototypes=prototype_names ) decorator.wrap_algorithm(algorithm) for tree in [tree_one, tree_two]: algorithm.start_tree() for event in tree.event_iter(supported=algorithm.supported): try: algorithm.add_event(event) except EventNotSupportedException: pass algorithm.finish_tree() # First tree vs. first is False for start self.assertFalse(decorator.data()[0][0][0][0]) # ... and end self.assertFalse(decorator.data()[0][0][0][-1]) # First tree vs. second is False for start self.assertFalse(decorator.data()[0][0][1][0]) # and True for end self.assertTrue(decorator.data()[0][0][1][-1]) # Second tree vs. first is False for start self.assertFalse(decorator.data()[1][0][0][0]) # ... and True for end self.assertTrue(decorator.data()[1][0][0][-1]) # Second tree vs. second is False for start self.assertFalse(decorator.data()[1][0][1][0]) # ... and False for end self.assertFalse(decorator.data()[1][0][1][-1])
def test_ensemble_result(self): """ I recognised that apparently sometimes (or ever?) values of ensemble methods don't match results from single calculations. Therefore this test should uniquely test for this. """ tree = real_tree() tree_generator = TEDGenerator( operation_generator=RandomOperation( insert_probability=.5, delete_probability=.5), costs=[TreeEditDistanceCost()], seed=1234) disturbed_tree = tree_generator.generate(tree) signatures = [ParentChildByNameTopologySignature(), ParentCountedChildrenByNameTopologySignature(count=2)] # First test results from ensemble ensemble_signature = EnsembleSignature(signatures=signatures) decorator = DistanceMatrixDecorator(normalized=False) algorithm = IncrementalDistanceAlgorithm( signature=ensemble_signature, distance=StartDistance ) decorator.wrap_algorithm(algorithm) algorithm.prototypes = [tree] algorithm.start_tree() for event in disturbed_tree.event_iter(supported=algorithm.supported): try: algorithm.add_event(event) except EventNotSupportedException: pass algorithm.finish_tree() ensemble_data = decorator.data() # Second, validate this result with single measurements single_data = {} for index, signature in enumerate(signatures): decorator = DistanceMatrixDecorator(normalized=False) algorithm = IncrementalDistanceAlgorithm( signature=signature, distance=StartDistance ) decorator.wrap_algorithm(algorithm) algorithm.prototypes = [real_tree()] algorithm.start_tree() for event in disturbed_tree.event_iter(supported=algorithm.supported): try: algorithm.add_event(event) except EventNotSupportedException: pass algorithm.finish_tree() single_data[index] = decorator.data() for index, _ in enumerate(signatures): self.assertEqual(ensemble_data[0][index][0], single_data[index][0][0][0])
def test_external(self): def distance(**kwargs): distance = StartExitDistance(weight=0, **kwargs) distance.supported[TrafficEvent] = True return distance algorithm = IncrementalDistanceAlgorithm( signature=ParentChildByNameTopologySignature(), distance=distance, cache_statistics=SplittedStatistics) decorator = DistanceMatrixDecorator(normalized=False) decorator.wrap_algorithm(algorithm) the_tree = real_tree( path="data/c01-007-102/2/1146-2-process.csv", absolute=True ) algorithm.prototypes = [the_tree] algorithm.start_tree() for event in the_tree.event_iter(supported=algorithm.supported): try: algorithm.add_event(event) except EventNotSupportedException: pass algorithm.finish_tree() print(decorator.data()) self.assertTrue(False)
def test_count_signature_for_correct_zero_distance(self): signature = ParentCountedChildrenByNameTopologySignature(count=3) algorithm = IncrementalDistanceAlgorithm(signature=signature) decorator = DistanceMatrixDecorator(normalized=False) decorator.wrap_algorithm(algorithm) algorithm.prototypes = [real_tree()] algorithm.start_tree() for event in real_tree().event_iter(include_marker=True, supported=algorithm.supported): try: algorithm.add_event(event) except EventNotSupportedException: pass algorithm.finish_tree() print(algorithm._signature_prototypes._prototype_dict[0]._prototype_dict.keys()) self.assertEqual([[[0]]], decorator.data())
def test_node_count_for_correct_zero_distance(self): signature = EnsembleSignature( signatures=[ParentChildByNameTopologySignature(), ParentCountedChildrenByNameTopologySignature(count=3)]) algorithm = IncrementalDistanceAlgorithm( signature=signature, distance=SimpleDistance) data_decorator = DataDecorator() data_decorator.wrap_algorithm(algorithm) algorithm.prototypes = [real_tree()] algorithm.start_tree() for event in real_tree().event_iter(include_marker=True, supported=algorithm.supported): try: algorithm.add_event(event) except EventNotSupportedException: pass algorithm.finish_tree() self.assertEqual([tree_value for values in data_decorator.data().get( "prototypes", {}).get("converted", []) for tree_value in values], [tree_value for values in data_decorator.data().get( "monitoring", {}).get("converted", []) for tree_value in values])
def test_attributes_nodes_only(self): def distance(**kwargs): distance = StartExitDistance(weight=1, **kwargs) distance.supported[TrafficEvent] = True return distance algorithm = IncrementalDistanceAlgorithm( signature=ParentChildByNameTopologySignature(), distance=distance, cache_statistics=SetStatistics ) decorator = DistanceMatrixDecorator(normalized=False) decorator.wrap_algorithm(algorithm) algorithm.prototypes = [real_tree("data/c01-007-102/2/1129-2-process.csv")] monitoring_tree = real_tree("data/c01-007-102/2/1129-2-process.csv") algorithm.start_tree() for event in monitoring_tree.event_iter(supported=algorithm.supported): try: algorithm.add_event(event) except EventNotSupportedException: pass algorithm.finish_tree() self.assertEqual([[[0]]], decorator.data())
def test_initialisation_of_cluster_representatitives(self): def distance_builder(**kwargs): distance = StartExitDistance(weight=.5, **kwargs) distance.supported[TrafficEvent] = True return distance tree_one = real_tree() tree_two = real_tree(path="data/c01-007-102/2/1129-2-process.csv") tree_three = real_tree(path="data/c01-007-102/2/1136-3-process.csv") distance = StartExitDistance(weight=.5) distance.supported[TrafficEvent] = True signature = ParentChildByNameTopologySignature() tree_profiles = [ tree_one.to_index(signature=signature, supported=distance.supported, statistics_cls=SetStatistics), tree_two.to_index(signature=signature, supported=distance.supported, statistics_cls=SetStatistics), tree_three.to_index(signature=signature, supported=distance.supported, statistics_cls=SetStatistics) ] algorithm = IncrementalDistanceAlgorithm( signature=ParentChildByNameTopologySignature(), distance=distance_builder, cache_statistics=SetStatistics) cluster_distance = ClusterDistance(distance=distance) prototype_names = ["test"] prototype_signatures = [] for prototype in prototype_names: prototype_signatures.append( cluster_distance.mean(tree_profiles, prototype=prototype)) algorithm.cluster_representatives( signature_prototypes=prototype_signatures, prototypes=prototype_names) algorithm.start_tree()