def test_event_order(self): tree = Prototype() root = tree.add_node("root", pid=1, ppid=0, tme=0, exit_tme=0, param=2) for i in range(5): root.add_node("child_%d" % i, pid=i + 2, ppid=1, tme=0, exit_tme=0, param=i * 2) child = next(root.children()) child.add_node("child", pid=8, ppid=child.pid, tme=0, exit_tme=0, param=5) nodes = [] for event in tree.event_iter(supported={ ProcessStartEvent: True, ProcessExitEvent: True, ParameterEvent: True }): print(event) if type(event) == ProcessStartEvent: if event.ppid != 0: self.assertTrue(event.ppid in nodes) nodes.append(event.pid) elif type(event) == ProcessExitEvent: self.assertTrue(event.pid in nodes) nodes.remove(event.pid) elif type(event) == ParameterEvent: self.assertTrue(event.pid in nodes)
def simple_unique_node_tree(): test_tree = Prototype() tree_root = test_tree.add_node("root", tme=0, exit_tme=3, pid=1, ppid=0) tree_root.add_node("bla", tme=0, exit_tme=1, pid=2, ppid=1) tree_root.add_node("test", tme=1, exit_tme=2, pid=3, ppid=1) tree_root.add_node("muh", tme=1, exit_tme=3, pid=4, ppid=1) return test_tree
def test_all(self): tedgen = TEDGenerator(costs=[FanoutWeightedTreeEditDistanceCost(), TreeEditDistanceCost(), SubtreeWeightedTreeEditDistanceCost(), SubtreeHeightWeightedTreeEditDistanceCost()], operation_generator=RandomOperation(delete_probability=0.25, insert_probability=0.25, edit_probability=0.25, move_probability=0), probability=.5) prototype = Prototype() root = prototype.add_node("root", pid=1, ppid=0) one = root.add_node("test1", pid=2, ppid=1) root.add_node("test2", pid=3, ppid=1) root.add_node("test3", pid=4, ppid=1) one.add_node("test1.1", pid=5, ppid=2) one.add_node("test1.2", pid=6, ppid=2) one.add_node("test1.3", pid=7, ppid=2) two = one.add_node("test1.4", pid=8, ppid=2) two.add_node("test2.1", pid=9, ppid=8) two.add_node("test2.2", pid=10, ppid=8) two.add_node("test2.3", pid=11, ppid=8) two.add_node("test2.4", pid=12, ppid=8) result = tedgen.generate(tree=prototype) result2 = tedgen.generate(tree=prototype) print("received %s" % result.distance) print("received %s" % result2.distance)
def test_number_of_events(self): tree = Prototype() root = tree.add_node("root", pid=1, ppid=0, tme=0, exit_tme=0, param=2) for i in range(5): root.add_node("child_%d" % i, pid=i+2, ppid=1, tme=0, exit_tme=0, param=i*2) child = next(root.children()) child.add_node("child", pid=10, ppid=child.pid, tme=0, exit_tme=0, param=5) event_count = 0 for _ in Event.from_tree(tree, supported={ ProcessStartEvent: True, ProcessExitEvent: False, ParameterEvent: False }): event_count += 1 self.assertEqual(7, event_count) event_count = 0 for _ in Event.from_tree(tree, supported={ ProcessStartEvent: True, ProcessExitEvent: True, ParameterEvent: False }): event_count += 1 self.assertEqual(14, event_count) event_count = 0 for _ in Event.from_tree(tree, supported={ ProcessStartEvent: True, ProcessExitEvent: True, ParameterEvent: True }): event_count += 1 self.assertEqual(21, event_count)
def _valid_hdf_tree(args): """ :param args: Tuple from category and file path :return: Tuple from category, tree, and tree name """ category, filename = args results = [] df = pd.read_hdf(filename, key="train_events") label = df.index.get_level_values(0)[0] events = df.index.get_level_values(1).unique() for event in events: tree_data = df.xs((label, event), level=('label', 'evtNum')) tree = Prototype() last_node = None for node_id, values in tree_data.iterrows(): # add tme values tree_values = dict(values) tree_values["tme"] = tree_values["exit_tme"] = 0 parent_node_id = int(values.pop("motherIndex")) name = values.pop("PDG") if last_node is not None: node = tree.add_node(name, parent_node_id=parent_node_id, node_id=node_id, **tree_values) else: node = tree.add_node(name, node_id=node_id, **tree_values) last_node = node if tree: results.append(( category, tree, "%s-%s" % (os.path.basename(filename).split(".")[0], event), )) return results
def simple_prototype(): prototype_tree = Prototype() root = prototype_tree.add_node("root", tme=0, exit_tme=3, pid=1, ppid=0) root.add_node("test", tme=0, exit_tme=1, pid=2, ppid=1) root.add_node("muh", tme=0, exit_tme=2, pid=3, ppid=1) root.add_node("test", tme=1, exit_tme=2, pid=4, ppid=1) root.add_node("muh", tme=1, exit_tme=3, pid=5, ppid=1) return prototype_tree
def prototype(): prototype_tree = Prototype() root = prototype_tree.add_node("root", tme=0, exit_tme=3) root.add_node("test", tme=0, exit_tme=1) root.add_node("muh", tme=0, exit_tme=2) root.add_node("test", tme=1, exit_tme=2) root.add_node("muh", tme=1, exit_tme=3) return prototype_tree
def simple_additional_monitoring_tree(): test_tree = Prototype() tree_root = test_tree.add_node("root", tme=0, exit_tme=3, pid=1, ppid=0) tree_root.add_node("hello", tme=0, exit_tme=2, pid=2, ppid=1) tree_root.add_node("yes", tme=0, exit_tme=1, pid=3, ppid=1) tree_root.add_node("test", tme=0, exit_tme=1, pid=4, ppid=1) tree_root.add_node("muh", tme=0, exit_tme=2, pid=5, ppid=1) tree_root.add_node("test", tme=1, exit_tme=3, pid=6, ppid=1) return test_tree
def test_creation_via_node(self): prototype = Prototype() root = prototype.add_node("root", pid=1, ppid=0) for i in range(20): root.add_node(i) self.assertEqual(root.child_count(), 20) for node in root.children(): self.assertEqual(node.node_number(), int(node.name))
def test_unique_tree_ids(self): prototype = Prototype() root = prototype.add_node("node", pid=1, ppid=0) for _ in range(20): root.add_node("node") one_child = list(root.children())[0] for _ in range(20): one_child.add_node("node") self.assertEqual(prototype.node_count(), 41)
def setUp(self): self._simple_prototype = Prototype() root = self._simple_prototype.add_node("root_node", tme=0, exit_tme=10, pid=2, ppid=1) root.add_node("first_child", tme=1, exit_tme=3, pid=3, ppid=2) root.add_node("second_child", tme=1, exit_tme=4, pid=4, ppid=2) root.add_node("first_child", tme=5, exit_tme=7, pid=5, ppid=2)
def setUp(self): prototype = Prototype() root = prototype.add_node("root", pid=1, ppid=0) one = root.add_node("test1", pid=2, ppid=1) root.add_node("test2", pid=3, ppid=1) root.add_node("test3", pid=4, ppid=1) one.add_node("test1.1", pid=5, ppid=2) one.add_node("test1.2", pid=6, ppid=2) one.add_node("test1.3", pid=7, ppid=2) one.add_node("test1.4", pid=8, ppid=2) self.prototype = prototype
def simple_monitoring_tree(): test_tree = Prototype() tree_root = test_tree.add_node("root", tme=0, exit_tme=3, pid=1, ppid=0, traffic=[]) tree_root.add_node("test", tme=0, exit_tme=1, pid=2, ppid=1, traffic=[]) tree_root.add_node("test", tme=1, exit_tme=2, pid=3, ppid=1, traffic=[]) tree_root.add_node("muh", tme=1, exit_tme=3, pid=4, ppid=1, traffic=[]) return test_tree
def test_same_attributes_different_count(self): tree_1 = Prototype() root = tree_1.add_node("root", pid=1, ppid=0, tme=0, exit_tme=0) for _ in range(5): root.add_node("node", pid=2, ppid=1, tme=0, exit_tme=0) tree_2 = Prototype() root = tree_2.add_node("root", pid=1, ppid=0, tme=0, exit_tme=0) for _ in range(35): root.add_node("node", pid=2, ppid=1, tme=0, exit_tme=0) signature = ParentChildByNameTopologySignature() algorithm = IncrementalDistanceAlgorithm( signature=signature, distance=lambda **kwargs: StartExitDistance(weight=0, **kwargs), cache_statistics=SplittedStatistics) algorithm.prototypes = [tree_1, tree_2] decorator = DistanceMatrixDecorator(normalized=False) decorator.wrap_algorithm(algorithm) algorithm.start_tree() for event in tree_1.event_iter(supported=algorithm.supported): try: algorithm.add_event(event) except EventNotSupportedException: pass algorithm.finish_tree() algorithm.start_tree() for event in tree_2.event_iter(supported=algorithm.supported): try: algorithm.add_event(event) except EventNotSupportedException: pass algorithm.finish_tree() data = decorator.data() self.assertEqual(data[0][0][1], data[1][0][0])
def test_parameter_event_generation(self): prototype = Prototype() root = prototype.add_node("root", pid=1, ppid=0, test=2, muh=3, tme=3, exit_tme=3) events = 0 matches = 0 for event in Event.events_from_node(root, supported={ParameterEvent: True}): events += 1 if event.name == "test": self.assertEqual(2, event.value) matches += 1 if event.name == "muh": self.assertEqual(3, event.value) matches += 1 self.assertEqual(2, events) self.assertEqual(2, matches)
def test_streaming_order(self): prototype = Prototype() root = prototype.add_node("root", pid=2, ppid=1, tme=1, exit_tme=5) nodes = [root, root.add_node("one", pid=3, ppid=2, tme=1, exit_tme=2), root.add_node("two", pid=4, ppid=2, tme=1, exit_tme=2), root.add_node("four", pid=5, ppid=2, tme=2, exit_tme=3), root.add_node("three", pid=6, ppid=2, tme=1, exit_tme=3)] index = 0 for event in prototype.event_iter(supported={ProcessStartEvent: True}): if isinstance(event, ProcessStartEvent): self.assertEquals(nodes[index].name, event.name) index += 1 self.assertEquals(index, len(nodes))
class TestStartExitStuff(unittest.TestCase): def setUp(self): self._simple_prototype = Prototype() root = self._simple_prototype.add_node("root_node", tme=0, exit_tme=10, pid=2, ppid=1) root.add_node("first_child", tme=1, exit_tme=3, pid=3, ppid=2) root.add_node("second_child", tme=1, exit_tme=4, pid=4, ppid=2) root.add_node("first_child", tme=5, exit_tme=7, pid=5, ppid=2) def test_start_exit(self): signature = ParentChildByNameTopologySignature() alg = IncrementalDistanceAlgorithm(signature=signature, distance=StartExitDistance) alg.prototypes = [self._simple_prototype] alg.start_tree() alg.add_event(Event.start(tme=0, pid=2, ppid=1, name="root_node")) alg.add_event(Event.start(tme=1, pid=3, ppid=2, name="first_child")) alg.add_event(Event.start(tme=1, pid=4, ppid=2, name="second_child")) alg.add_event( Event.exit(tme=3, start_tme=1, pid=3, ppid=2, name="first_child")) alg.add_event( Event.exit(tme=4, start_tme=1, pid=4, ppid=2, name="second_child")) alg.add_event(Event.start(tme=5, pid=5, ppid=2, name="first_child")) alg.add_event( Event.exit(tme=7, start_tme=5, pid=5, ppid=2, name="first_child")) distance = alg.add_event( Event.exit(tme=10, start_tme=0, pid=2, ppid=1, name="root_node")) alg.finish_tree() self.assertEqual(distance[0][0], [0])
class TestTreeEditDistanceFunctionalities(unittest.TestCase): def setUp(self): self.prototype = Prototype() root = self.prototype.add_node("root", tme=0, exit_tme=0, pid=1, ppid=0) root.add_node("test", tme=0, exit_tme=0, pid=2, ppid=1) root.add_node("muh", tme=0, exit_tme=0, pid=3, ppid=1) list(root.children())[0].add_node("yes", tme=0, exit_tme=0, pid=4, ppid=2) self.modified_position = Prototype() root = self.modified_position.add_node("root", pid=1, ppid=0, tme=0, exit_tme=0) root.add_node("test", tme=0, pid=2, ppid=1, exit_tme=0) root.add_node("muh", tme=0, pid=3, ppid=1, exit_tme=0) list(root.children())[1].add_node("yes", tme=0, pid=4, ppid=3, exit_tme=0) self.modified_name = Prototype() root = self.modified_name.add_node("root", pid=1, ppid=0, tme=0, exit_tme=0) root.add_node("test", tme=0, pid=2, ppid=1, exit_tme=0) root.add_node("muh", tme=0, pid=3, ppid=1, exit_tme=0) list(root.children())[0].add_node("no", tme=0, pid=4, ppid=2, exit_tme=0) def _test_algorithm(self, prototype=None, tree=None): signature = Signature() algorithm = TreeEditDistanceAlgorithm(signature=signature) algorithm.prototypes = [prototype] algorithm.start_tree() algorithm.add_events(Event.from_tree(tree, supported={ProcessStartEvent: True})) result = algorithm.finish_tree() return result[0] def test_zero_distance(self): self.assertEqual(self._test_algorithm( prototype=self.prototype, tree=self.prototype )[0], 0) def test_modified_position(self): self.assertEqual(self._test_algorithm( prototype=self.prototype, tree=self.modified_position )[0], 2) def test_modified_name(self): self.assertEqual(self._test_algorithm( prototype=self.prototype, tree=self.modified_name )[0], 1)
def setUp(self): self.prototype = Prototype() root = self.prototype.add_node("root", ppid=0, pid=1) for _ in range(10): root.add_node("child") for _ in range(10): root.add_node("child2") for _ in range(10): root.add_node("child") for _ in range(10): root.add_node("child2") child_node = list(root.children())[2] for i in range(10): child_node.add_node(i) child_child_node = list(child_node.children())[0] for _ in range(5): child_child_node.add_node("child")
def test_node_properties(self): prototype = Prototype() root = prototype.add_node("root") first = prototype.add_node("first", root) second = prototype.add_node("second", first) second_2 = prototype.add_node("second_2", first) third = prototype.add_node("third", second) # test depth properties self.assertEqual(root.depth(), 0, "Depth of root should be 0") self.assertEqual(first.depth(), 1, "Depth of first should be 1") self.assertEqual(second.depth(), 2, "Depth of second should be 2") self.assertEqual(second_2.depth(), 2, "Depth of second_2 should be 2") self.assertEqual(third.depth(), 3, "Depth of third should be 3") # test parent properties self.assertEqual(third.parent(), second) self.assertEqual(second.parent(), first) self.assertEqual(second.parent(), second_2.parent()) self.assertEqual(first.parent(), root) # test child count self.assertEqual(root.child_count(), 1) self.assertEqual(first.child_count(), 2) self.assertEqual(second.child_count(), 1) self.assertEqual(second_2.child_count(), 0) self.assertEqual(third.child_count(), 0) # test node number self.assertEqual(root.node_number(), 0) self.assertEqual(first.node_number(), 0) self.assertEqual(second.node_number(), 0) self.assertEqual(second_2.node_number(), 1) self.assertEqual(third.node_number(), 0)
def test_parameter_distance(self): prototype = Prototype() root = prototype.add_node("root", tme=0, exit_tme=0, pid=1, ppid=0, param=1) for i in range(5): root.add_node("child_%d" % i, tme=0, exit_tme=0, pid=i + 2, ppid=1, param=1) next(root.children()).add_node("child", tme=0, exit_tme=0, pid=8, ppid=2, param=1) tree = Prototype() root = tree.add_node("root", tme=0, exit_tme=0, pid=1, ppid=0, param=1) for i in range(5): root.add_node("child_%d" % i, tme=0, exit_tme=0, pid=i + 2, ppid=1, param=4) next(root.children()).add_node("child", tme=0, exit_tme=0, pid=8, ppid=2, param=4) for weight, result in [(1, 0), (.5, 6), (0, 12)]: def distance(**kwargs): distance = StartExitDistance(weight=weight, **kwargs) distance.supported = { ProcessStartEvent: True, ProcessExitEvent: True, ParameterEvent: True } return distance signature = EnsembleSignature(signatures=[ParentChildByNameTopologySignature()]) algorithm = IncrementalDistanceAlgorithm( signature=signature, distance=distance, cache_statistics=SetStatistics ) decorator = DistanceMatrixDecorator(normalized=False) decorator.wrap_algorithm(algorithm) algorithm.prototypes = [prototype] algorithm.start_tree() algorithm.add_events(tree.event_iter(supported=algorithm.supported)) algorithm.finish_tree() self.assertEqual(result, decorator.data()[0][0][0])
def test_node_order(self): prototype = Prototype() root = prototype.add_node("root") for i in range(20): prototype.add_node(name=i, parent=root) self.assertEqual(prototype.child_count(root), 20) # check order of nodes for node in prototype.children(root): self.assertEqual( prototype.node_number(node), int(node.name), "Number of node does not match")
def _prototypes_from_dir(self, dir_path): # For each directory of CSVs, we store a header of files and # individual, per-file pkls. # Since the source will do the directory by itself, cached and uncached # structure behave differently here! cache_path = self._cache_path(dir_path) try: if self.force_refresh: raise OSError # get list of files with open(cache_path, 'rb') as cache_pkl: job_csv_paths = pickle.load(cache_pkl) # get job files individually to allow refreshing any for job_csv_path in job_csv_paths: for prototype in self._prototypes_from_csv(job_csv_path): yield prototype except (OSError, IOError, EOFError): dir_prototype_lock = filelock.FileLock( os.path.splitext(cache_path)[0] + '.lock') try: with dir_prototype_lock.acquire(timeout=0): # clean up broken pickles if os.path.exists(cache_path): os.unlink(cache_path) self._logger.warning('Refreshing existing cache %r', cache_path) except filelock.Timeout: pass data_source = self.data_source job_files = [] for job in data_source.jobs(path=dir_path): job.prepare_traffic() prototype = Prototype.from_job(job) yield prototype assert job.path not in job_files, \ "Job file may not contain multiple jobs (%r)" % job.path job_cache_path = self._cache_path(job.path) cache_prototype_lock = filelock.FileLock( os.path.splitext(job_cache_path)[0] + '.lock') try: with cache_prototype_lock.acquire(timeout=0): # store the job individually, just remember its file with open(job_cache_path, 'wb') as job_cache_pkl: pickle.dump([prototype], job_cache_pkl, pickle.HIGHEST_PROTOCOL) except filelock.Timeout: pass job_files.append(job.path) try: with dir_prototype_lock.acquire(timeout=0): with open(cache_path, 'wb') as cache_pkl: pickle.dump(job_files, cache_pkl, pickle.HIGHEST_PROTOCOL) except filelock.Timeout: pass
def prototype(self): """ Getter property for prototype that is used to generate a random monitoring event stream. :return: Prototype where random tree is based on """ if self._prototype is None: prototype = Prototype() root = prototype.add_node(name=id_generator(size=6), tme=0, exit_tme=0, pid=1, ppid=0) for i in range(self._prototype_node_count): # TODO: check if this is < or <= if root.child_count() > 0 and random.random() <= \ self._relative_repetition: node_name = random.choice(root.children_list()) else: node_name = id_generator() prototype.add_node(name=node_name, parent=root, tme=0, exit_tme=0, pid=i + 2, ppid=1) assert prototype.node_count() - 1 == self._prototype_node_count self._prototype = prototype return self._prototype
def test_nodes_width_first(self): nodes = [] prototype = Prototype() root = prototype.add_node("root", pid=1, ppid=0) one = root.add_node("1") one_one = one.add_node("1.1") one_one_one = one_one.add_node("1.1.1") one_two = one.add_node("1.2") two = root.add_node("2") three = root.add_node("3") nodes.append(root) nodes.append(one) nodes.append(two) nodes.append(three) nodes.append(one_one) nodes.append(one_two) nodes.append(one_one_one) for node in prototype.nodes(depth_first=False): self.assertEqual(node, nodes.pop(0)) self.assertEqual(len(nodes), 0)
def test_global_order(self): prototype = Prototype() root = prototype.add_node("root") node_1 = prototype.add_node("node_1", parent=root) node_2 = prototype.add_node("node_2", parent=root) prototype.add_node("node_3", parent=node_2) prototype.add_node("node_4", parent=node_1) prototype.add_node("node_5", parent=node_2) prototype.add_node("node_6", parent=node_1) # test depth first self.assertEqual( [node.name for node in list(prototype.nodes())], ["root", "node_1", "node_4", "node_6", "node_2", "node_3", "node_5"]) # test order first self.assertEqual( [node.name for node in list(prototype.nodes(order_first=True))], ["root", "node_1", "node_2", "node_3", "node_4", "node_5", "node_6"]) # test linkage self.assertEqual(node_2.previous_node, node_1) self.assertEqual(node_2.next_node.name, "node_3") self.assertEqual(node_2.parent(), root)
def test_parent_child_event_iter(self): prototype = Prototype() root = prototype.add_node("root", pid=1, ppid=0, tme=0, exit_tme=3, traffic=[]) one = root.add_node("one", pid=2, ppid=1, tme=0, exit_tme=2, traffic=[]) one.add_node("one.one", pid=3, ppid=2, tme=1, exit_tme=2, traffic=[]) one.add_node("one.two", pid=5, ppid=2, tme=2, exit_tme=2, traffic=[]) root.add_node("two", pid=4, ppid=1, tme=1, exit_tme=2, traffic=[]) finished = set() for event in prototype.event_iter( supported={ ProcessStartEvent: True, ProcessExitEvent: True }): if isinstance(event, ProcessStartEvent): self.assertTrue( event.ppid not in finished, "Node with pid %s is already gone..." % event.ppid) if isinstance(event, ProcessExitEvent): self.assertTrue( event.ppid not in finished, "Node with pid %s has already been finished" % event.ppid) finished.add(event.pid)
def _get_tree(self): if self._tree is None: tree = Prototype() for node in self._streamer.node_iter(): keep_node = self._validate_node(node) if keep_node: parent = node.parent() while parent is not None and \ not self._kept[self.signature.get_signature( parent, parent.parent())]: parent = parent.parent() if parent is not None or tree.root() is None: node_dict = node.dao().copy() try: node_dict["ppid"] = parent.pid except AttributeError: pass tree.add_node(parent_node_id=parent.node_id if parent is not None else None, **node_dict) self._tree = tree return self._tree
def test_from_job(self): file_path = os.path.join( os.path.dirname(assess_tests.__file__), "data/c01-007-102/1/1-process.csv" ) data_source = FileDataSource() for job in data_source.jobs(path=file_path): prototype = Prototype.from_job(job) self.assertIsNotNone(prototype) self.assertEqual(prototype.node_count(), 9109) last_tme = 0 for node in prototype.nodes(order_first=True): self.assertTrue(last_tme <= node.tme) last_tme = node.tme
def test_parent(self): prototype = Prototype() root = prototype.add_node("root", pid=1, ppid=0) sub_root = root.add_node("sub_root", pid=2, ppid=1) sub_sub_root = sub_root.add_node("sub_sub_root", pid=3, ppid=2) self.assertEqual(root.parent(), None) self.assertEqual(prototype.parent(root), None) self.assertEqual(sub_root.parent(), root) self.assertEqual(prototype.parent(sub_root), root) self.assertEqual(sub_sub_root.parent(), sub_root) self.assertEqual(prototype.parent(sub_sub_root), sub_root)