def _tree_statistics(filename): result = MulticoreResult() tree_builder = CSVTreeBuilder() try: tree = tree_builder.build(filename) except DataNotInCacheException: tree = None except TreeInvalidatedException: tree = None if tree is not None: for event in tree.event_iter(): file_dict = result.setdefault(filename, { "process": {}, "traffic": {}, "traffic_count": {} }) if isinstance(event, ProcessStartEvent) or isinstance( event, ProcessExitEvent): file_dict["process"][event.tme] = file_dict["process"].get( event.tme, 0) + 1 elif isinstance(event, TrafficEvent): file_dict["traffic"][event.tme] = file_dict["traffic"].get( event.tme, 0) + 1 file_dict["traffic_count"][ event.tme] = file_dict["traffic_count"].get( event.tme, 0) + (event.in_cnt + event.out_cnt) return result
def real_tree(path=None, absolute=False): if path is None: path = "data/c01-007-102/1/1-process.csv" csv_builder = CSVTreeBuilder() if absolute: return csv_builder.build(path) return csv_builder.build( os.path.join(os.path.dirname(assess_tests.__file__), path))
def _valid_tree(filename): tree_builder = CSVTreeBuilder() try: tree = tree_builder.build(filename) if tree: return filename except (DataNotInCacheException, TreeInvalidatedException): pass
def perform_clustering(ctx, eta, epsilon): results = {} if ctx.obj.get("use_input", False): configuration = ctx.obj.get("configurations", None)[0] signature = configuration.get("signatures", [None])[0] distance = configuration.get("distances", [None])[0] structure = ctx.obj.get("structure", None) file_path = structure.input_file_path() tree_builder = CSVTreeBuilder() clustering = Clustering(distance=distance, cluster_distance=epsilon, core_neighbours=eta) with open(file_path, "r") as input_file: input_data = json.load(input_file).get("data", None) for sample in input_data.get("samples", []): tree = tree_builder.build(sample[0]) # convert tree to index tree_index = tree.to_index( signature=signature, start_support=distance.supported.get( ProcessStartEvent, False), exit_support=distance.supported.get( ProcessExitEvent, False), traffic_support=distance.supported.get( TrafficEvent, False)) clustering[sample[0]] = tree_index print("---> performed clustering with eta %s and epsilon %s" % (eta, epsilon)) results.setdefault( "meta", {})["algorithm"] = clustering.clusterer.__class__.__name__ results.setdefault("meta", {})["eta"] = eta results.setdefault("meta", {})["epsilon"] = epsilon for cluster in clustering: results.setdefault("clusters", []).append([node.key for node in cluster ]) # TODO: determine CR for noise in clustering.clusterer.noise: results.setdefault("noise", []).append(noise.key) for score in [ silhouette_score, calinski_harabasz_score, davies_bouldin_score ]: try: the_score = score(clustering.clusterer.clusters, clustering.clusterer.graph) except ValueError: the_score = None results.setdefault("scores", {})[score.__name__] = the_score output_results(ctx=ctx, results=results, version=determine_version( os.path.dirname(assess_workflows.__file__)), source="%s (%s)" % (__file__, "perform_clustering"))
def _data_by_tme(filename): results = MulticoreResult() tree_builder = CSVTreeBuilder() try: tree = tree_builder.build(filename) except (DataNotInCacheException, TreeInvalidatedException): pass else: if tree is not None: node = next(tree.node_iter()) results.setdefault(node.tme, []).append(filename) return results
def test_simple_clustering(self): clusterer = Clustering(distance=StartExitDistance()) # create an index to cluster builder = CSVTreeBuilder() tree = builder.build(self.file_path_one) tree_two = builder.build(self.file_path_two) tree_index = tree.to_index( signature=ParentChildByNameTopologySignature()) tree_two_index = tree_two.to_index( signature=ParentChildByNameTopologySignature()) clusterer[1] = tree_index clusterer[1] = tree_two_index self.assertEqual(0, len(clusterer.clusterer.clusters)) self.assertEqual(2, len(clusterer.clusterer.noise))
def _data_by_uid(filename): results = MulticoreResult() tree_builder = CSVTreeBuilder() try: tree = tree_builder.build(filename) except (DataNotInCacheException, TreeInvalidatedException): pass else: if tree is not None: uids = set() for node in tree.node_iter(): if node.uid not in uids: uids.add(node.uid) results.setdefault(node.uid, []).append(filename) return results
def _create_graph(ctx, file_path): configuration = ctx.obj.get("configurations", None)[0] signature = configuration.get("signatures", [None])[0] distance_builder = configuration.get("distances", [None])[0] statistics_cls = configuration.get("statistics", [None])[0] tree_builder = CSVTreeBuilder() distance = distance_builder() def header_to_cache(tree_path): tree = tree_builder.build(tree_path) tree_index = tree.to_index( signature=signature(), start_support=distance.supported.get(ProcessStartEvent, False), exit_support=distance.supported.get(ProcessExitEvent, False), traffic_support=distance.supported.get(TrafficEvent, False), statistics_cls=statistics_cls) tree_index.key = tree_path return tree_index with open(file_path) as csv_file: # load the graph from precalculated csv distance values graph = graph_io.csv_graph_reader( (ln for ln in csv_file if ln[0] != '#' and ln != '\n'), nodes_header=header_to_cache, symmetric=True) return graph
def _initialise_prototypes(prototype_paths): """ Method initialises the prototype trees from given file paths. :param prototype_paths: List of paths to prototypes :return: List of trees """ prototypes = [] tree_builder = CSVTreeBuilder() for prototype_path in prototype_paths: if prototype_path.endswith(".pkl"): with open(prototype_path, "rb") as pkl_file: prototypes.append(pickle.load(pkl_file)) else: prototypes.append(tree_builder.build(prototype_path)) return prototypes
def _process_names(filename): result = set() tree_builder = CSVTreeBuilder() try: tree = tree_builder.build(filename) except DataNotInCacheException: tree = None except TreeInvalidatedException: tree = None if tree is not None: for node in tree.node_iter(): try: if "(" in node.node[0]: result.add(node.name) except IndexError: pass return result
def check_algorithms(paths=None, configurations=None): if paths is None: paths = [] if configurations is None: configurations = [] results = { "files": paths[:], "version": subprocess.check_output(["git", "describe"]).strip(), "results": [] } # fill general information tree_builder = CSVTreeBuilder() prototypes = [] if options.skip and len(paths) == 2: prototypes.append(tree_builder.build(paths.pop(0))) else: for path in paths: prototypes.append(tree_builder.build(path)) for configuration in configurations: for algorithm in configuration["algorithms"]: for signature in configuration["signatures"]: signature_object = signature() alg = algorithm(signature=signature_object) alg.prototypes = prototypes # TODO: what if there is no decorator at all? Is it possible? decorator = configuration["decorator"] decorator.wrap_algorithm(alg) for index, path in enumerate(paths): if options.no_upper: # TODO: is it ok to ignore no_diagonal when no_upper # is not given? alg.start_tree(maxlen=index + (0 if options.no_diagonal else 1)) else: alg.start_tree() for event in GNMCSVEventStreamer(csv_path=path): alg.add_event(event=event) alg.finish_tree() results["results"].append({ "algorithm": "%s" % alg, "signature": "%s" % signature_object, "decorator": decorator.descriptive_data() }) return results
def test_attribute_distance(self): def distance_buidler(**kwargs): distance = StartExitDistance(weight=0, **kwargs) distance.supported = { ProcessStartEvent: True, ProcessExitEvent: True, TrafficEvent: True } return distance tree_builder = CSVTreeBuilder() tree_1 = tree_builder.build( os.path.join(os.path.dirname(assess_tests.__file__), "data/c01-007-102/2/1129-2-process.csv")) tree_2 = tree_builder.build( os.path.join(os.path.dirname(assess_tests.__file__), "data/c01-007-102/2/1136-3-process.csv")) signature = ParentChildByNameTopologySignature() algorithm = IncrementalDistanceAlgorithm( signature=signature, distance=distance_buidler, cache_statistics=SplittedStatistics) algorithm.prototypes = [tree_1, tree_2] decorator = DistanceMatrixDecorator(normalized=False) decorator.wrap_algorithm(algorithm) algorithm.start_tree() for event in tree_1.event_iter(supported=algorithm.supported): try: algorithm.add_event(event) except EventNotSupportedException: pass algorithm.finish_tree() algorithm.start_tree() for event in tree_2.event_iter(supported=algorithm.supported): try: algorithm.add_event(event) except EventNotSupportedException: pass algorithm.finish_tree() data = decorator.data() print(decorator.data()) self.assertEqual(4, abs(data[0][0][1] - data[1][0][0]))
def test_symmetry_optimisation(self): tree = CSVTreeBuilder().build( os.path.join(os.path.dirname(assess_tests.__file__), "data/c01-007-102/2/1129-2-process.csv")) signature = ParentChildByNameTopologySignature() algorithm = IncrementalDistanceAlgorithm( signature=signature, distance=lambda **kwargs: StartExitDistance(weight=0, **kwargs), cache_statistics=SplittedStatistics) algorithm.prototypes = [tree] decorator = DistanceMatrixDecorator(normalized=False) decorator.wrap_algorithm(algorithm) algorithm.start_tree() for event in tree.event_iter(supported=algorithm.supported): try: algorithm.add_event(event) except EventNotSupportedException: pass algorithm.finish_tree() self.assertEqual(0, decorator.data()[0][0][0])
def calculate_distance_matrix(paths=None, algorithm=None, signature=Signature): if paths is None: paths = [] compression = CompressionFactorDecorator() decorator = DistanceMatrixDecorator(normalized=True) decorator.decorator = compression tree_builder = CSVTreeBuilder() prototypes = [] for path in paths: prototypes.append(tree_builder.build(path)) for path in paths: alg = algorithm(signature=signature()) alg.prototypes = prototypes decorator.wrap_algorithm(alg) for event in GNMCSVEventStreamer(csv_path=path): alg.add_event(event=event) print("%s" % ", ".join("%.2f" % value for value in compression.compression_factors())) print("----------------------") for values in decorator.distance_matrix: print(", ".join("%.2f" % value for value in values)) return decorator.distance_matrix
def _analyse_duration(kwargs): """ Generates the following structure: <duration>: [<file>, ...] :param filepath: Path for tree to consider :param kwargs: :return: """ result = MulticoreResult() filepath = kwargs.get("filepath", None) tree_builder = CSVTreeBuilder() try: tree = tree_builder.build(filepath) except (DataNotInCacheException, TreeInvalidatedException): pass else: if tree is not None: root = tree.root() duration = root.exit_tme - root.tme result.setdefault(duration, []).append(filepath) return result
def _analyse_compression(kwargs): """ Generates the following structure: <number of nodes>: { # binning node count "file": [<string>, ...], "node_count": [<int>, ...], # real node counts "alphabet_count": [<int>, ...], "tree_height": [<int>, ...], "identity_count": { <Signature>: [<int>, ...] }, "fanout": { "min": [<int>, ...], "max": [<int>, ...], "mean": [<float>, ...], "std": [<float>, ...], "full": [[<int>, ...], ...] } } :param filepath: Path for tree to consider :param node_count: Number of nodes within tree :param signature_builders: Signature builders to consider for generation of identities :param kwargs: :return: """ filepath = kwargs.get("filepath", None) node_count = kwargs.get("node_count", None) signature_builders = kwargs.get("signature_builders", None) result = MulticoreResult() tree_builder = CSVTreeBuilder() try: tree = tree_builder.build(filepath) except (DataNotInCacheException, TreeInvalidatedException): pass else: if tree is not None: alphabet = set() fanout = [] # prepare generic data first for node in tree.node_iter(): if len(node.children_list()) > 0: fanout.append(len(node.children_list())) alphabet.add(node.name) for signature_builder in signature_builders: signature = signature_builder() compression = [set() for _ in range(signature.count)] for node in tree.node_iter(): identities = signature.get_signature(node, node.parent()) for index, identity in enumerate(identities): compression[index].add(identity) # write results # {node_count: "identity_count": {signature_1: [value, ...], signature_2: [value, ...]}} current = result.setdefault(node_count, {}).setdefault( "identity_count", {}) for index, single_signature in enumerate( signature._signatures): current.setdefault(repr(single_signature), []).append(len(compression[index])) result.setdefault(node_count, {}).setdefault("file", []).append(filepath) result.setdefault(node_count, {}).setdefault("alphabet_count", []).append(len(alphabet)) result.setdefault(node_count, {}).setdefault("node_count", []).append(tree.node_count()) current_fanout = result.setdefault(node_count, {}).setdefault("fanout", {}) current_fanout.setdefault("min", []).append(min(fanout)) current_fanout.setdefault("max", []).append(max(fanout)) current_fanout.setdefault("mean", []).append( sum(fanout) / float(len(fanout))) current_fanout.setdefault("std", []).append(standard_deviation(fanout)) current_fanout.setdefault("full", []).append(fanout) # TODO: not supported by tree yet # result.setdefault(node_count, {}).setdefault("tree_height", []).append(tree.depth) return result
def _analyse_diamonds(kwargs): """ Method expects an ensemble signature in configuration were signature at position 0 has length n - 1 whereas signature at position 1 has length n (criterium for diamonds). It then builds a dictionary for given signatures from position 0 and builds a collection from signatures at position 1. The number of signatures that are associated to the different keys is then relevant to determine the diamonds. When more than one signature is assigned, then we got a diamond. Method creates different fields in output file: * raw: contains the levels of the diamonds within a given tree * identities: number of identities for the whole tree * diamonds: number of diamonds within the tree (independent from level) * diamond_nodes: number of nodes that make up the diamonds * files: files that were used In addition, all of these fields are associated to a given signature_builder. It defines the actual height that is analysed. Meaning, the p value that is used to index the output file. { node_count: { p_value: { "raw": { "levels": [[diamond level, ...], ...], "nodes": [[diamond nodes, ...], ...] } "identities": [identity_count, ...], "diamonds": [diamond_count, ...], "diamond_nodes": [diamond_node_count, ...], "node_counts": [node_count, ...], "files": [file_path, ...] } } } :param kwargs: dict containing keys node_count, filepath and signature_builders :return: """ node_count = kwargs.get("node_count", None) filepath = kwargs.get("filepath", None) signature_builders = kwargs.get("signature_builders", None) result = MulticoreResult() tree_builder = CSVTreeBuilder() try: tree = tree_builder.build(filepath) except (DataNotInCacheException, TreeInvalidatedException): pass else: if tree is not None: for signature_builder in signature_builders: signature = signature_builder() node_dict = {} current_node_count = 0 for node in tree.node_iter(): current_node_count += 1 current_signatures = signature.get_signature( node, node.parent()) current_node = node_dict.setdefault( current_signatures[0], {}) current_node.setdefault("nodes", set()).add(node) current_node.setdefault("signatures", set()).add(current_signatures[1]) diamonds = { signature: { "nodes": len(signature_values.get("nodes", set())), "levels": len(signature_values.get("signatures", set())) - 1 } for signature, signature_values in node_dict.items() if len(signature_values.get("signatures", set())) > 1 } current_result = result.setdefault(node_count, {}).setdefault( signature._signatures[0]._height, {}) raw_result = current_result.setdefault("raw", { "levels": [], "nodes": [] }) raw_result["levels"].append([ diamond.get("levels", 0) for diamond in diamonds.values() ]) raw_result["nodes"].append( [diamond.get("nodes", 0) for diamond in diamonds.values()]) current_result.setdefault("node_counts", []).append(current_node_count) current_result.setdefault("identities", []).append(len(node_dict)) current_result.setdefault("diamonds", []).append(len(diamonds)) current_result.setdefault("diamond_nodes", []).append( sum([ diamond.get("nodes", 0) for diamond in diamonds.values() ])) current_result.setdefault("files", []).append(filepath) return result
def _analyse_diamond_perturbation(kwargs): """ { p_count: { diamond_count: { "profile_distortions": [], # profile distortion based on frequency "profile_distortions_signatures": [], # profile distortion based on set count "distance_errors": [] # distance error based on frequency "distance_errors_signatures": [] # distance error based on set count "signature_counts": [], # nr of signatures in tree "node_counts": [], # nr of nodes in tree "raw": [{ "level": diamond_level, "nested": nesting_level, "nodes": node_count, "signatures": signature_count }, ...] } } } :param kwargs: dict with keys filepath and signature_builders :return: """ filepath = kwargs.get("filepath", None) signature_builders = kwargs.get("signature_builders", None) tree_builder = CSVTreeBuilder() perturbation_results = MulticoreResult() try: tree = tree_builder.build(filepath) except (DataNotInCacheException, TreeInvalidatedException): pass else: if tree is not None: for signature_builder in signature_builders: diamonds = {} node_signatures = set() signature = signature_builder() node_count = 0 for node in tree.node_iter(): node_count += 1 current_signature = signature.get_signature( node, node.parent()) node_signatures.add(current_signature[0]) diamond = diamonds.setdefault(current_signature[0], {}) diamond.setdefault("signatures", set()).add(current_signature[1]) diamond.setdefault("nodes", set()).add(node) diamonds = { key: diamond for key, diamond in diamonds.items() if len(diamond.get("signatures", set())) > 1 } diamond_perturbation = {} for diamond_key, diamond in diamonds.items(): # found a diamond, that represents several diamond nodes result = diamond_perturbation.setdefault( diamond_key, { "nested": 0, "nodes": set(), "signatures": set() }) result["level"] = max( 0, len(diamond.get("signatures", set())) - 1) for node in diamond.get("nodes"): to_check = set(node.children_list()) result["nodes"].add(node) result["signatures"].add( signature.get_signature(node, node.parent)[0]) while to_check: child = to_check.pop() result["nodes"].add(child) child_signatures = signature.get_signature( child, child.parent()) result["signatures"].add(child_signatures[0]) to_check.update(child.children_list()) if child_signatures[0] in diamonds: # diamond is a nested diamond, so initialise it here diamond_perturbation[child_signatures[0]] = { "level": 1, "nested": result["nested"] + 1, "nodes": set(), "signatures": set() } diamond_count = len(diamond_perturbation) perturbation_result = perturbation_results.setdefault( signature._signatures[0]._height, {}).setdefault(diamond_count, {}) perturbation_result.setdefault( "profile_distortions", []).append( sum([ len(diamond.get("nodes", [])) * diamond["level"] for diamond in diamond_perturbation.values() ])) perturbation_result.setdefault( "profile_distortions_signatures", []).append( sum([ len(diamond.get("signatures", [])) * diamond["level"] for diamond in diamond_perturbation.values() ])) perturbation_result.setdefault("distance_errors", []).append( sum([ len(diamond.get("nodes", [])) for diamond in diamond_perturbation.values() ])) perturbation_result.setdefault( "distance_errors_signatures", []).append( sum([ len(diamond.get("signatures", [])) for diamond in diamond_perturbation.values() ])) perturbation_result.setdefault("signature_counts", []).append(len(node_signatures)) perturbation_result.setdefault("node_counts", []).append(node_count) perturbation_result.setdefault("raw", []).append({ key: { "level": value["level"], "nested": value["nested"], "nodes": len(value["nodes"]), "signatures": len(value["signatures"]) } for key, value in diamond_perturbation.items() }) return perturbation_results
def _full_statistics(kwargs): """ :param filepath: Path for tree to consider :param kwargs: :return: """ filepath = kwargs.get("filepath", None) result = MulticoreResult() tree_bilder = CSVTreeBuilder() try: tree = tree_bilder.build(filepath) except (DataNotInCacheException, TreeInvalidatedException): pass else: if tree is not None: attributes_on_nodes = 0 nodes_with_attributes = 0 alphabet = set() fanout = [] complete_fanout = [] depth = [] complete_depth = [] attribute_events = [] for node in tree.node_iter(): # check if node has traffic attribute_count = 0 if node.traffic: current_count = 0 available_attributes = set() for traffic in node.traffic: if traffic.in_rate > 0: current_count += 1 available_attributes.add("%s_in_rate" % traffic.conn_cat) if traffic.out_rate > 0: current_count += 1 available_attributes.add("%s_out_rate" % traffic.conn_cat) attribute_count = len(available_attributes) attributes_on_nodes += attribute_count attribute_events.append(current_count) nodes_with_attributes += 1 if len(node.children_list()) > 0: # determine fanout fanout.append(len(node.children_list())) if attribute_count > 0: complete_fanout.append( len(node.children_list()) + attribute_count) else: complete_fanout.append(len(node.children_list())) else: # node is a leaf, so determine depth in tree current_depth = node.depth() depth.append(current_depth) if attribute_count > 0: complete_depth.extend([ current_depth + 1 for _ in range(attribute_count) ]) else: complete_depth.append(current_depth) alphabet.add(node.name) current_result = result.setdefault(filepath, {}) current_result["node_count"] = tree.node_count() current_result["complete_node_count"] = tree.node_count( ) + attributes_on_nodes current_result[ "nodes_with_attribute_count"] = nodes_with_attributes current_result["alphabet_count"] = len(alphabet) current_result["duration"] = tree.root().exit_tme - tree.root().tme current_result["fanout"] = fanout current_result["complete_fanout"] = complete_fanout current_result["depth"] = depth current_result["complete_depth"] = complete_depth current_result["attribute_event_count"] = attribute_events return result
def _generate_perturbated_tree(kwargs): """ :param kwargs: :param filepath: Path to consider :param probabilities: List of probabilites :param repeat: How often to repeat a single probability :param insert_probability: Probability to insert item :param delete_probability: Probability to delete item :param change_probability: Probability to change item :param move_probability: Probability to move item :param leaf_nodes_only: Only include leaf nodes? :param internal_nodes_only: Only include internal nodes? :param attribute_nodes_only: Only include attribute nodes? :param cost: True or False :return: """ result = MulticoreResult() filepath = kwargs.get("filepath", None) probabilities = kwargs.get("probabilities", []) repeat = kwargs.get("repeat", 1) insert_probability = kwargs.get("insert_probability", 0) delete_probability = kwargs.get("delete_probability", 0) change_probability = kwargs.get("change_probability", 0) move_probability = kwargs.get("move_probability", 0) leaf_nodes_only = kwargs.get("leaf_nodes_only", False) internal_nodes_only = kwargs.get("internal_nodes_only", False) attribute_nodes_only = kwargs.get("attribute_nodes_only", False) cost = kwargs.get("cost", True) tree_builder = CSVTreeBuilder() tree = tree_builder.build(filepath) if tree is not None: result.setdefault(filepath, {}) result[filepath]["tree"] = tree result[filepath].setdefault("perturbated_tree", {}) for probability in probabilities: if attribute_nodes_only: ted_generator = TEDGenerator( costs=[], operation_generator=RandomOperation( delete_probability=1, delete_operation=DeleteAttributeTreeEditOperation( probability=probability)), probability=1, skip_node=skip_all_but_attribute_nodes) else: ted_generator = TEDGenerator( costs=[ TreeEditDistanceCost(), FanoutWeightedTreeEditDistanceCost(), SubtreeWeightedTreeEditDistanceCost(), SubtreeHeightWeightedTreeEditDistanceCost(), SubtreeWeightedTreeEditDistanceCostWithMove() ] if cost else [], operation_generator=RandomOperation( insert_probability=insert_probability, delete_probability=delete_probability, edit_probability=change_probability, move_probability=move_probability), probability=probability, skip_node=skip_leaf if internal_nodes_only else (skip_inner_node if leaf_nodes_only else skip_no_node)) for _ in range(repeat): perturbated_tree = ted_generator.generate(tree) result[filepath]["perturbated_tree"].setdefault( probability, []).append(perturbated_tree) # reload tree tree = tree_builder.build(filepath) return result
def check_algorithms(tree_paths=None, prototype_paths=None, cluster_representatives_paths=None, configurations=None): if tree_paths is None: tree_paths = [] if prototype_paths is None: prototype_paths = [] if cluster_representatives_paths is None: cluster_representatives_paths = [] if configurations is None: configurations = [] results = { "files": tree_paths[:], "prototypes": prototype_paths[:], "version": subprocess.check_output(["git", "describe"]).strip(), "results": [] } tree_builder = CSVTreeBuilder() prototypes = [] prototype_signature = None if len(cluster_representatives_paths) > 0: with open(cluster_representatives_paths[0], "r") as json_file: cluster_representatives = json.load(json_file) prototype_signature = PrototypeSignatureCache.from_cluster_representatives( cluster_representatives["data"]) for cluster in cluster_representatives["data"].keys(): prototypes.append(cluster) else: for path in prototype_paths: prototypes.append(tree_builder.build(path)) if options.pcount > 1: for configuration in configurations: data = [] try: event_streamers = configuration["event_streamer"] except KeyError: event_streamers = [GNMCSVEventStreamer] for algorithm in configuration["algorithms"]: for signature in configuration["signatures"]: for path in tree_paths: data.append({ "algorithm": algorithm, # TODO: CR contains algorithm "signature": signature, # TODO: CR contains signature "decorator": configuration["decorator"], "tree": path, "prototypes": prototypes, "prototype_signature": prototype_signature, "event_streamers": event_streamers }) result_list = do_multicore(count=options.pcount, target=check_single_algorithm, data=data) decorator = None for result in result_list: if decorator is not None: if repr(decorator.algorithm) == repr(result.algorithm) and \ repr(decorator.algorithm.signature) == repr( result.algorithm.signature): decorator.update(result) else: # we identified a new decorator, so save the last one results["results"].append({ "algorithm": "%s" % decorator.algorithm, "signature": "%s" % decorator.algorithm.signature, "decorator": decorator.descriptive_data() }) decorator = result else: decorator = result if decorator is not None: results["results"].append({ "algorithm": "%s" % decorator.algorithm, "signature": "%s" % decorator.algorithm.signature, "decorator": decorator.descriptive_data() }) else: for configuration in configurations: try: event_streamers = configuration["event_streamer"] except KeyError: event_streamers = [GNMCSVEventStreamer] for event_streamer in event_streamers: for algorithm in configuration["algorithms"]: for signature in configuration["signatures"]: signature_object = signature() alg = algorithm(signature=signature_object) if prototype_signature is not None: alg.cluster_representatives( signature_prototypes=prototype_signature, prototypes=prototypes) else: alg.prototypes = prototypes decorator = configuration["decorator"]() decorator.wrap_algorithm(alg) streamer = None for path in tree_paths: alg.start_tree() streamer = event_streamer(csv_path=path) for event in streamer: alg.add_event(event=event) alg.finish_tree() results["results"].append({ "algorithm": "%s" % alg, "signature": "%s" % signature_object, "event_streamer": "%s" % streamer if streamer is not None else event_streamer(csv_path=None), "decorator": decorator.descriptive_data() }) return results