def test_all(self): tedgen = TEDGenerator(costs=[FanoutWeightedTreeEditDistanceCost(), TreeEditDistanceCost(), SubtreeWeightedTreeEditDistanceCost(), SubtreeHeightWeightedTreeEditDistanceCost()], operation_generator=RandomOperation(delete_probability=0.25, insert_probability=0.25, edit_probability=0.25, move_probability=0), probability=.5) prototype = Prototype() root = prototype.add_node("root", pid=1, ppid=0) one = root.add_node("test1", pid=2, ppid=1) root.add_node("test2", pid=3, ppid=1) root.add_node("test3", pid=4, ppid=1) one.add_node("test1.1", pid=5, ppid=2) one.add_node("test1.2", pid=6, ppid=2) one.add_node("test1.3", pid=7, ppid=2) two = one.add_node("test1.4", pid=8, ppid=2) two.add_node("test2.1", pid=9, ppid=8) two.add_node("test2.2", pid=10, ppid=8) two.add_node("test2.3", pid=11, ppid=8) two.add_node("test2.4", pid=12, ppid=8) result = tedgen.generate(tree=prototype) result2 = tedgen.generate(tree=prototype) print("received %s" % result.distance) print("received %s" % result2.distance)
def test_ensemble_result(self): """ I recognised that apparently sometimes (or ever?) values of ensemble methods don't match results from single calculations. Therefore this test should uniquely test for this. """ tree = real_tree() tree_generator = TEDGenerator( operation_generator=RandomOperation( insert_probability=.5, delete_probability=.5), costs=[TreeEditDistanceCost()], seed=1234) disturbed_tree = tree_generator.generate(tree) signatures = [ParentChildByNameTopologySignature(), ParentCountedChildrenByNameTopologySignature(count=2)] # First test results from ensemble ensemble_signature = EnsembleSignature(signatures=signatures) decorator = DistanceMatrixDecorator(normalized=False) algorithm = IncrementalDistanceAlgorithm( signature=ensemble_signature, distance=StartDistance ) decorator.wrap_algorithm(algorithm) algorithm.prototypes = [tree] algorithm.start_tree() for event in disturbed_tree.event_iter(supported=algorithm.supported): try: algorithm.add_event(event) except EventNotSupportedException: pass algorithm.finish_tree() ensemble_data = decorator.data() # Second, validate this result with single measurements single_data = {} for index, signature in enumerate(signatures): decorator = DistanceMatrixDecorator(normalized=False) algorithm = IncrementalDistanceAlgorithm( signature=signature, distance=StartDistance ) decorator.wrap_algorithm(algorithm) algorithm.prototypes = [real_tree()] algorithm.start_tree() for event in disturbed_tree.event_iter(supported=algorithm.supported): try: algorithm.add_event(event) except EventNotSupportedException: pass algorithm.finish_tree() single_data[index] = decorator.data() for index, _ in enumerate(signatures): self.assertEqual(ensemble_data[0][index][0], single_data[index][0][0][0])
def _generate_perturbated_tree(kwargs): """ :param kwargs: :param filepath: Path to consider :param probabilities: List of probabilites :param repeat: How often to repeat a single probability :param insert_probability: Probability to insert item :param delete_probability: Probability to delete item :param change_probability: Probability to change item :param move_probability: Probability to move item :param leaf_nodes_only: Only include leaf nodes? :param internal_nodes_only: Only include internal nodes? :param attribute_nodes_only: Only include attribute nodes? :param cost: True or False :return: """ result = MulticoreResult() filepath = kwargs.get("filepath", None) probabilities = kwargs.get("probabilities", []) repeat = kwargs.get("repeat", 1) insert_probability = kwargs.get("insert_probability", 0) delete_probability = kwargs.get("delete_probability", 0) change_probability = kwargs.get("change_probability", 0) move_probability = kwargs.get("move_probability", 0) leaf_nodes_only = kwargs.get("leaf_nodes_only", False) internal_nodes_only = kwargs.get("internal_nodes_only", False) attribute_nodes_only = kwargs.get("attribute_nodes_only", False) cost = kwargs.get("cost", True) tree_builder = CSVTreeBuilder() tree = tree_builder.build(filepath) if tree is not None: result.setdefault(filepath, {}) result[filepath]["tree"] = tree result[filepath].setdefault("perturbated_tree", {}) for probability in probabilities: if attribute_nodes_only: ted_generator = TEDGenerator( costs=[], operation_generator=RandomOperation( delete_probability=1, delete_operation=DeleteAttributeTreeEditOperation( probability=probability)), probability=1, skip_node=skip_all_but_attribute_nodes) else: ted_generator = TEDGenerator( costs=[ TreeEditDistanceCost(), FanoutWeightedTreeEditDistanceCost(), SubtreeWeightedTreeEditDistanceCost(), SubtreeHeightWeightedTreeEditDistanceCost(), SubtreeWeightedTreeEditDistanceCostWithMove() ] if cost else [], operation_generator=RandomOperation( insert_probability=insert_probability, delete_probability=delete_probability, edit_probability=change_probability, move_probability=move_probability), probability=probability, skip_node=skip_leaf if internal_nodes_only else (skip_inner_node if leaf_nodes_only else skip_no_node)) for _ in range(repeat): perturbated_tree = ted_generator.generate(tree) result[filepath]["perturbated_tree"].setdefault( probability, []).append(perturbated_tree) # reload tree tree = tree_builder.build(filepath) return result
def test_edit(self): tedgen = TEDGenerator(costs=[FanoutWeightedTreeEditDistanceCost(), TreeEditDistanceCost()], operation_generator=RandomOperation(edit_probability=1)) result = tedgen.generate(tree=self.prototype) print("received %s" % result.distance)
def test_move(self): tedgen = TEDGenerator(costs=[], operation_generator=RandomOperation(move_probability=1)) result = tedgen.generate(tree=self.prototype) print(result)