Esempi in Python per do_multicore, esempi in Python per assess_workflows.utils.utils.do_multicore

Esempio n. 1

0

Mostra file

File: data_selection_cli.py Progetto: eileen-kuehn/assess_workflows

def index_valid_trees(ctx, paths, pcount):
    """
    Method walks the given paths and reads all trees that are found within. For each tree that
    can successfully be read, it is appended to the results list. This list can be used for
    further processing.

    :param ctx: Click context
    :param paths: The paths to scan for valid tree data
    """
    results = []
    filenames = []
    for path in paths:
        filenames.extend(_relevant_files_for_context(ctx, path))
    if pcount > 1:
        results.extend(
            do_multicore(count=pcount, target=_valid_tree, data=filenames))
    else:
        for filename in filenames:
            result = _valid_tree(filename)
            if result is not None:
                results.append(result)

    output_results(ctx=ctx,
                   results=results,
                   version=determine_version(
                       os.path.dirname(assess_workflows.__file__)),
                   source="%s (%s)" % (__file__, "index_valid_trees"))

Esempio n. 2

0

Mostra file

def analyse_diamonds(ctx, pcount):
    """
    Method returns output file that follows the following format:

    {
        node_count: {
            p_value: {
                "raw": [[diamond levels], ...],
                "identities": [identity_count, ...],
                "diamonds": [diamond_count, ...],
                "files": [file_path, ...]
            }
        }
    }

    :param ctx:
    :param pcount:
    :return:
    """
    results = MulticoreResult()
    ctx.obj["json"] = True
    if ctx.obj.get("use_input", False):
        structure = ctx.obj.get("structure", None)
        file_path = structure.input_file_path()
        signature_builders = ctx.obj.get("configurations",
                                         [{}])[0].get("signatures", [])

        with open(file_path, "r") as input_file:
            analysis_files = json.load(input_file).get("data", None)
            if pcount > 1:
                data = [{
                    "node_count": node_count,
                    "filepath": tree_path[0],
                    "signature_builders": signature_builders
                } for node_count, tree_paths in analysis_files.items()
                        for tree_path in tree_paths]
                multicore_results = do_multicore(count=pcount,
                                                 target=_analyse_diamonds,
                                                 data=data)
                for result in multicore_results:
                    results += result
            else:
                for node_count, tree_paths in analysis_files.items():
                    for tree_path in tree_paths:
                        results += _analyse_diamonds({
                            "node_count":
                            node_count,
                            "filepath":
                            tree_path[0],
                            "signature_builders":
                            signature_builders
                        })

    output_results(ctx=ctx,
                   results=results,
                   version=determine_version(
                       os.path.dirname(assess_workflows.__file__)),
                   source="%s (%s)" % (__file__, "analyse_diamonds"))

Esempio n. 3

0

Mostra file

File: gnm_cli.py Progetto: eileen-kuehn/assess_workflows

def create_payloads(ctx, paths, output_path, pcount):
    data = []
    for path in paths:
        # prepare data
        for folder, workernode_subdir, run_subdir, _ in relevant_directories(
                path):
            # get all relevant files
            current_path = os.path.join(
                os.path.join(folder, workernode_subdir), run_subdir)
            data.extend([{
                "path": filename,
                "output_path": output_path
            } for filename in glob.glob("%s/*-process.csv" % current_path)])
    if pcount > 1:
        do_multicore(count=pcount, target=_create_payloads, data=data)
    else:
        for element in data:
            _create_payloads(element)

Esempio n. 4

0

Mostra file

File: gnm_cli.py Progetto: eileen-kuehn/assess_workflows

def prepare_raw_data(ctx, paths, output_path, pcount):
    data = []
    for path in paths:
        # prepare data
        # TODO: is this called for every filename?!
        for folder, workernode_subdir, run_subdir, _ in relevant_directories(
                path):
            data.append({
                "path":
                os.path.join(os.path.join(folder, workernode_subdir),
                             run_subdir),
                "output_path":
                output_path
            })
    if pcount > 1:
        do_multicore(count=pcount, target=_prepare_raw_data, data=data)
    else:
        _prepare_raw_data(data[0])

Esempio n. 5

0

Mostra file

def full_statistics(ctx, pcount):
    """
    Method prepares full statistics about a dataset. The output is as follows:

    {
        <filename>: {
            "node_count": <int>,  # number of nodes in tree
            "complete_node_count": <int>,  # number of nodes in tree w attributes
            "nodes_with_attribute_count": <int>,  # number of nodes that contain attributes
            "alphabet_count": <int>,  # alphabet count
            "duration": <int>,  # duration of tree
            "fanout": [<int>, ...],  # fanout of nodes
            "complete_fanout": [<int>, ...]  # fanout of nodes w attributes
            "depth": [<int>, ...],  # depth in tree for leaves
            "complete_depth": [<int>, ...],  # depth in tree for leaves w attributes
            "attribute_event_count": [<int>, ...]  # events for attributes per node
        }
    }

    :param ctx:
    :param pcount:
    :return:
    """
    results = MulticoreResult()
    ctx.obj["json"] = True
    if ctx.obj.get("use_input", False):
        structure = ctx.obj.get("structure", None)
        file_path = structure.input_file_path()

        with open(file_path, "r") as input_file:
            analysis_files = json.load(input_file).get("data", None)
            data = []
            for node_count, tree_paths in analysis_files.items():
                for tree_path in tree_paths:
                    if isinstance(tree_path, list):
                        for path in tree_path:
                            data.append({"filepath": path})
                    else:
                        data.append({"filepath": tree_path})
            if pcount > 1:
                multicore_result = do_multicore(count=pcount,
                                                target=_full_statistics,
                                                data=data)
                for result in multicore_result:
                    results += result
            else:
                for elem in data:
                    results += _full_statistics(elem)
    output_results(ctx=ctx,
                   results=results,
                   version=determine_version(
                       os.path.dirname(assess_workflows.__file__)),
                   source="%s (%s)" % (__file__, "full_statistics"))

Esempio n. 6

0

Mostra file

def batch_process_clustering_as_vector(ctx, pcount, eta, epsilon):
    if ctx.obj.get("use_input", False):
        configuration = ctx.obj.get("configurations", None)[0]
        distance_cls = configuration.get("distances", [None])[0]
        structure = ctx.obj.get("structure", None)
        file_path = structure.input_file_path(
            file_type="csv")  # expecting csv file

        graph = _create_graph(ctx, file_path)
        clustering = DenGraphIO(base_graph=graph,
                                cluster_distance=epsilon,
                                core_neighbours=eta)
        cluster_distance = ClusterDistance(distance=distance_cls(),
                                           threshold=0)
        clustering.graph.distance = cluster_distance
        # calculate CRs from clusters
        prototypes = []
        for cluster in clustering:
            for core in cluster.core_nodes:
                prototypes.append(core.key)
        files = [node.key for node in clustering.graph]

        data = []
        for idx, a_file in enumerate([
                files[start_idx:start_idx + 10]
                for start_idx in range(0, len(files), 10)
        ]):
            data.append({
                "configurations": ctx.obj["configurations"],
                "files": a_file,
                "prototypes": prototypes,
                "path": structure.intermediate_file_path(variant=idx)
            })
        if pcount > 1:
            do_multicore(pcount, _batch_process_as_vector_and_write, data)
        else:
            for elem in data:
                _batch_process_as_vector_and_write(elem)

Esempio n. 7

0

Mostra file

def batch_process_from_pkl(ctx, pcount, reverse):
    results = _init_results()
    results["distance"] = []
    results["prototypes"] = []
    if ctx.obj.get("use_input", False):
        structure = ctx.obj.get("structure", None)
        file_path = structure.input_file_path(file_type="pkl")
        with open(file_path, "r") as input_file:
            data = []
            # HEADER
            # ######
            # results are split into a header and data files
            # see data_generation_cli.generate_perturbated_tree
            tree_metadata = pickle.load(input_file)
            results["files"] = tree_metadata.keys()
            for key, pkl_path in tree_metadata.items():
                # tree is stored in "tree"
                # distorted trees in "perturbated_tree"
                data.append({
                    "data_pkl_path": pkl_path,
                    "data_pkl_key": key,
                    "configurations": ctx.obj["configurations"],
                    "reverse": reverse
                })
            if pcount > 1:
                result_list = (do_multicore(
                    count=pcount,
                    data=data,
                    target=_process_configurations_for_row))
                for result in result_list:
                    results["results"].append(result['results'])
                    results["distance"].append(result['precalculated_costs'])
                    results["prototypes"].append(result["prototypes"])
            else:
                for elem in data:
                    result = _process_configurations_for_row(elem)
                    results["results"].append(result["results"])
                    results["distance"].append(result["precalculated_costs"])
                    results["prototypes"].append(result["prototypes"])
    output_results(ctx=ctx,
                   results=results,
                   version=determine_version(os.path.dirname(assess.__file__)),
                   source="%s (%s)" % (__file__, "batch_process_from_pkl"))

Esempio n. 8

0

Mostra file

File: data_selection_cli.py Progetto: eileen-kuehn/assess_workflows

def index_tree_statistics(ctx, paths, pcount):
    filenames = []
    results = MulticoreResult()
    for path in paths:
        filenames.extend(_relevant_files_for_context(ctx, path))
    if pcount > 1:
        result_list = do_multicore(count=pcount,
                                   target=_tree_statistics,
                                   data=filenames)
        for result in result_list:
            results += result
    else:
        for filename in filenames:
            results += _tree_statistics(filename)

    output_results(ctx=ctx,
                   results=results,
                   version=determine_version(
                       os.path.dirname(assess_workflows.__file__)),
                   source="%s (%s)" % (__file__, "index_tree_statistics"))

Esempio n. 9

0

Mostra file

File: data_selection_cli.py Progetto: eileen-kuehn/assess_workflows

def index_process_names(ctx, paths, pcount):
    filenames = []
    result_set = set()
    for path in paths:
        filenames.extend(_relevant_files_for_context(ctx, path))
    if pcount > 1:
        result_list = do_multicore(count=pcount,
                                   target=_process_names,
                                   data=filenames)
        for result in result_list:
            result_set.union(result)
    else:
        for filename in filenames:
            result_set.union(_process_names(filename))

    output_results(ctx=ctx,
                   results={"process_names": [name for name in result_set]},
                   version=determine_version(
                       os.path.dirname(assess_workflows.__file__)),
                   source="%s (%s)" % (__file__, "index_process_names"))

Esempio n. 10

0

Mostra file

def analyse_diamond_perturbations(ctx, pcount):
    results = MulticoreResult()
    ctx.obj["json"] = True
    if ctx.obj.get("use_input", False):
        structure = ctx.obj.get("structure", None)
        file_path = structure.input_file_path()
        signature_builders = ctx.obj.get("configurations",
                                         [{}])[0].get("signatures", [])

        with open(file_path, "r") as input_file:
            analysis_files = json.load(input_file).get("data", None)
            if pcount > 1:
                # combine data
                data = [{
                    "filepath": path[0],
                    "signature_builders": signature_builders
                } for paths in analysis_files.values() for path in paths]
                multicore_results = do_multicore(
                    count=pcount,
                    target=_analyse_diamond_perturbation,
                    data=data)
                for result in multicore_results:
                    results += result
            else:
                for tree_paths in analysis_files.values():
                    for tree_path in tree_paths:
                        results += _analyse_diamond_perturbation({
                            "filepath":
                            tree_path[0],
                            "signature_builders":
                            signature_builders
                        })

    output_results(
        ctx=ctx,
        results=results,
        version=determine_version(os.path.dirname(assess_workflows.__file__)),
        source="%s (%s)" % (__file__, "analyse_diamond_perturbation"))

Esempio n. 11

0

Mostra file

def analyse_duration(ctx, pcount):
    """
    Method prepares duration data for further analysis.

    :param ctx:
    :param pcount:
    :return:
    """
    results = MulticoreResult()
    ctx.obj["json"] = True
    if ctx.obj.get("use_input", False):
        structure = ctx.obj.get("structure", None)
        file_path = structure.input_file_path()

        with open(file_path, "r") as input_file:
            analysis_files = json.load(input_file).get("data", None)
            data = []
            for node_count, tree_paths in analysis_files.items():
                for tree_path in tree_paths:
                    if isinstance(tree_path, list):
                        for path in tree_path:
                            data.append({"filepath": path})
                    else:
                        data.append({"filepath": tree_path})
            if pcount > 1:
                multicore_result = do_multicore(count=pcount,
                                                target=_analyse_duration,
                                                data=data)
                for result in multicore_result:
                    results += result
            else:
                for elem in data:
                    results += _analyse_duration(elem)
    output_results(ctx=ctx,
                   results=results,
                   version=determine_version(
                       os.path.dirname(assess_workflows.__file__)),
                   source="%s (%s)" % (__file__, "analyse_duration"))

Esempio n. 12

0

Mostra file

File: data_selection_cli.py Progetto: eileen-kuehn/assess_workflows

def index_valid_hdf_trees(ctx, trees, representatives, pcount):
    structure = ctx.obj.get("structure", None)
    results = {}
    paths = [(key, value) for key, values in {
        "trees": trees,
        "representatives": representatives
    }.items() for value in values if values]
    if pcount > 1:
        trees = do_multicore(count=pcount, target=_valid_hdf_tree, data=paths)
        for category, tree, name in trees:
            results.setdefault(category, []).append(
                _write_tree_to_pkl(structure, tree, name))
    else:
        for filename in paths:
            trees = _valid_hdf_tree(filename)
            for category, tree, name in trees:
                results.setdefault(category, []).append(
                    _write_tree_to_pkl(structure, tree, name))
    output_results(ctx=ctx,
                   results=results,
                   version=determine_version(
                       os.path.dirname(assess_workflows.__file__)),
                   source="%s (%s)" % (__file__, "index_valid_hdf_trees"))

Esempio n. 13

0

Mostra file

def analyse_compression(ctx, pcount):
    """
    Method prepares data for further compression analysis. Thus, it collects information on
    * number of nodes in original tree
    * height of tree as an optional information
    * size of the alphabet (optimised by excluding id numbers in names)
    * number of unique identities generated
    * statistics on the trees fanout

    The following output format can be expected

    <number of nodes>: {
        "file": [<string>, ...],
        "alphabet_count": [<int>, ...],
        "tree_height": [<int>, ...],
        "identity_count": {
            <Signature>: [<int>, ...]
        },
        "fanout": {
            "min": [<int>, ...],
            "max": [<int>, ...],
            "mean": [<float>, ...],
            "std": [<float>, ...],
            "full": [[<int>, ...], ...]
        }
    }

    :param ctx:
    :param pcount:
    :return:
    """
    results = MulticoreResult()
    ctx.obj["json"] = True
    if ctx.obj.get("use_input", False):
        structure = ctx.obj.get("structure", None)
        file_path = structure.input_file_path()
        signature_builders = ctx.obj.get("configurations",
                                         [{}])[0].get("signatures", [])

        with open(file_path, "r") as input_file:
            analysis_files = json.load(input_file).get("data", None)
            data = []
            for node_count, tree_paths in analysis_files.items():
                for tree_path in tree_paths:
                    for path in tree_path:
                        data.append({
                            "node_count": node_count,
                            "filepath": path,
                            "signature_builders": signature_builders
                        })
            if pcount > 1:
                multicore_results = do_multicore(count=pcount,
                                                 target=_analyse_compression,
                                                 data=data)
                for result in multicore_results:
                    results += result
            else:
                for elem in data:
                    results += _analyse_compression(elem)

    output_results(ctx=ctx,
                   results=results,
                   version=determine_version(
                       os.path.dirname(assess_workflows.__file__)),
                   source="%s (%s)" % (__file__, "analyse_compression"))

Esempio n. 14

0

Mostra file

def process_as_matrix(ctx, trees, skip_upper, skip_diagonal, pcount):
    if len(trees) == 0 and ctx.obj.get("use_input", False):
        structure = ctx.obj.get("structure", None)
        file_path = structure.input_file_path()
        with open(file_path, "r") as input_file:
            # can be list of lists or flat list
            data = json.load(input_file).get("data")
            try:
                trees = data.values()[0]
            except AttributeError:
                trees = data
            except TypeError:
                # object is dictionary with trees
                trees = data.get("trees", [])
    results = _init_results()
    results["files"] = results["prototypes"] = trees[:]

    # if we have a flat list, check, otherwise, just take it
    if type(trees[0]) == list:
        tree_paths = trees
        nested = True
    else:
        tree_paths = _get_input_files(trees,
                                      minimum=ctx.obj["start"],
                                      maxlen=ctx.obj["maximum"])
        nested = False

    if pcount > 1:
        to_process = []
        if nested:
            to_process = tree_paths
        else:
            to_process.append(tree_paths)
        while to_process:
            data = []
            single_tree_paths = to_process.pop(0)
            # prepare blocks of data
            factor = multicore_factor(len(single_tree_paths))
            block_size = len(single_tree_paths) / float(factor)
            assert block_size > 1, "Blocksize is too small for proper parallelisation: %s" % block_size
            index_value = int(math.ceil(len(single_tree_paths) / block_size))
            for row_idx in range(index_value):
                for col_idx in range(index_value):
                    if skip_upper and col_idx > row_idx:
                        continue
                    row_trees = single_tree_paths[
                        int(row_idx *
                            block_size):min(int(
                                (row_idx + 1) *
                                block_size), len(single_tree_paths))]
                    col_trees = single_tree_paths[
                        int(col_idx *
                            block_size):min(int(
                                (col_idx + 1) *
                                block_size), len(single_tree_paths))]
                    data.append({
                        "tree_paths": row_trees,
                        "prototype_paths": col_trees,
                        "configurations": ctx.obj["configurations"]
                    })
            result_list = do_multicore(count=pcount,
                                       target=_process_as_matrix,
                                       data=data)
            final_decorators = []
            row_idx = 0
            col_idx = -1
            for result_index, result_entry in enumerate(result_list):
                # calculate the exact position within matrix to help decorators updating their results
                col_idx += 1
                if col_idx >= ((row_idx + 1) if skip_upper else index_value):
                    row_idx += 1
                    col_idx = 0
                current_results = result_entry.get("results", [])
                # each of the results has the same configuration of decorators, so we can get one
                # exemplary list of decorators to process all results
                for decorator_key in current_results[0].get("decorator", {}):
                    for index, current_result in enumerate(current_results):
                        try:
                            # if decorator already exists, we only need to add current data
                            decorator = final_decorators[index][decorator_key]
                            current_decorator = type(decorator)()
                            current_decorator._data = current_result.get(
                                "decorator", {})[decorator_key]
                            current_decorator.row_idx = [row_idx]
                            current_decorator.col_idx = [col_idx]
                            decorator += current_decorator
                        except (IndexError, KeyError):
                            # if decorator does not exist, we load it and will later add data
                            decorator = Decorator.from_name(decorator_key)
                            decorator._data = current_result.get(
                                "decorator", {})[decorator_key]
                            decorator.row_idx = [row_idx]
                            decorator.col_idx = [col_idx]
                            try:
                                final_decorators[index].setdefault(
                                    decorator_key, decorator)
                            except IndexError:
                                final_decorators.append(
                                    {decorator_key: decorator})
            # format updated data
            finals = result_list[0]
            for index, final in enumerate(finals.get("results", [])):
                for value in final_decorators[index].values():
                    data = value.descriptive_data()
                    final.get("decorator", {})[list(data.keys())[0]] = list(
                        data.values())[0]
            results.setdefault("results", []).append(finals["results"])
    else:
        to_process = []
        if nested:
            to_process = tree_paths
        else:
            to_process.append(tree_paths)
        while to_process:
            single_tree_paths = to_process.pop(0)
            # build prototypes
            prototypes = _initialise_prototypes(single_tree_paths)

            def path_generator():
                for tree_index, tree_path in enumerate(single_tree_paths):
                    maxlen = len(single_tree_paths)
                    if skip_upper and skip_diagonal:
                        maxlen = tree_index
                    elif skip_upper:
                        maxlen = tree_index + 1
                    yield (tree_path, maxlen)

            results.setdefault("results", []).append(
                _process_configurations(
                    prototypes=prototypes,
                    configurations=ctx.obj["configurations"],
                    event_generator=path_generator))

    output_results(ctx=ctx,
                   results=results,
                   version=os.path.dirname(assess.__file__),
                   source="%s (%s)" % (__file__, "process_as_matrix"),
                   file_type=ctx.obj.get("file_type", None))

Esempio n. 15

0

Mostra file

def batch_process_as_vector(ctx, pcount):
    results = []

    if ctx.obj.get("use_input", False):
        structure = ctx.obj.get("structure", None)
        file_path = structure.input_file_path()
        with open(file_path, "r") as input_file:
            input_data = json.load(input_file).get("data")
            data = []
            if "trees" in input_data and "representatives" in input_data:
                trees = input_data.get("trees", [])
                prototypes = input_data.get("representatives", [])
                if len(trees) >= len(prototypes):
                    for tree in trees:
                        data.append({
                            "configurations": ctx.obj["configurations"],
                            "files": [tree],
                            "prototypes": prototypes
                        })
                else:
                    for prototype in prototypes:
                        data.append({
                            "configurations": ctx.obj["configurations"],
                            "files": trees,
                            "prototypes": [prototype]
                        })
            else:
                for key, values in input_data.items():
                    for value in values:
                        if len(value) == 1:
                            # element is file and prototype at the same time
                            value.append(value[0])
                        data.append({
                            "configurations": ctx.obj["configurations"],
                            "files": value[:1],
                            "prototypes": value[1:],
                            "key": key
                        })
            if pcount > 1:
                final_decorators = []
                row_idx = col_idx = -1
                result_list = do_multicore(pcount, _batch_process_as_vector,
                                           data)
                for result_entry in result_list:
                    if len(trees) >= len(prototypes):
                        row_idx += 1
                        if col_idx < 0:
                            col_idx = 0
                    else:
                        col_idx += 1
                        if row_idx < 0:
                            row_idx = 0
                    current_results = result_entry.get("results", [])
                    for decorator_key in current_results[0].get(
                            "decorator", {}):
                        for index, current_result in enumerate(
                                current_results):
                            try:
                                # if decorator already exists, we only need to add current data
                                decorator = final_decorators[index][
                                    decorator_key]
                                current_decorator = type(decorator)()
                                current_decorator._data = current_result.get(
                                    "decorator", {})[decorator_key]
                                current_decorator.row_idx = [row_idx]
                                current_decorator.col_idx = [col_idx]
                                decorator += current_decorator
                            except IndexError:
                                # if decorator does not exist, we load it and will later add data
                                decorator = Decorator.from_name(decorator_key)
                                decorator._data = current_result.get(
                                    "decorator", {})[decorator_key]
                                decorator.row_idx = [row_idx]
                                decorator.col_idx = [col_idx]
                                try:
                                    final_decorators[index].setdefault(
                                        decorator_key, decorator)
                                except IndexError:
                                    final_decorators.append(
                                        {decorator_key: decorator})
                finals = result_list[0]
                finals["files"] = trees
                finals["prototypes"] = prototypes
                for index, final in enumerate(finals.get("results", [])):
                    for value in final_decorators[index].values():
                        data = value.descriptive_data()
                        final.get("decorator", {})[list(
                            data.keys())[0]] = list(data.values())[0]
                results.append(finals)
            else:
                for elem in data:
                    results.append(_batch_process_as_vector(elem))

    output_results(ctx=ctx,
                   results=results,
                   version=determine_version(os.path.dirname(assess.__file__)),
                   source="%s (%s)" % (__file__, "batch_process_as_vector"),
                   file_type=ctx.obj.get("file_type", None))

Esempio n. 16

0

Mostra file

File: data_generation_cli.py Progetto: eileen-kuehn/assess_workflows

def generate_perturbated_tree(ctx, seed, repeat, probabilities,
                              insert_probability, cost, delete_probability,
                              change_probability, move_probability, pcount,
                              leaf_nodes_only, internal_nodes_only,
                              attribute_nodes_only):
    if seed is not None:
        random.seed(seed)
    results = MulticoreResult()
    if ctx.obj.get("use_input"):
        structure = ctx.obj.get("structure", None)
        with open(structure.input_file_path(), "r") as input_file:
            json_data = json.load(input_file)
            samples = json_data["data"]["samples"]
            if pcount > 1:
                data = [{
                    "filepath": item,
                    "repeat": repeat,
                    "probabilities": probabilities,
                    "insert_probability": insert_probability,
                    "delete_probability": delete_probability,
                    "change_probability": change_probability,
                    "move_probability": move_probability,
                    "leaf_nodes_only": leaf_nodes_only,
                    "internal_nodes_only": internal_nodes_only,
                    "attribute_nodes_only": attribute_nodes_only,
                    "cost": cost
                } for sample in samples for item in sample]
                multicore_results = do_multicore(
                    count=pcount, target=_generate_perturbated_tree, data=data)
                for result in multicore_results:
                    results += result
            else:
                for sample in samples:
                    for item in sample:
                        results += _generate_perturbated_tree({
                            "filepath":
                            item,
                            "repeat":
                            repeat,
                            "probabilities":
                            probabilities,
                            "insert_probability":
                            insert_probability,
                            "delete_probability":
                            delete_probability,
                            "change_probability":
                            change_probability,
                            "move_probability":
                            move_probability,
                            "leaf_nodes_only":
                            leaf_nodes_only,
                            "internal_nodes_only":
                            internal_nodes_only,
                            "attribute_nodes_only":
                            attribute_nodes_only,
                            "cost":
                            cost
                        })
        if ctx.obj.get("save"):
            # instead of storing all results as one, we split them per base tree
            # a header is used to map all individual stores
            results_header = {}
            for name, result in results.items():
                nick = '%s%02s%s' % (hashlib.sha1(name).hexdigest(),
                                     random.getrandbits(8),
                                     time.strftime('%H%M%S'))
                with open(
                        structure.intermediate_file_path(file_type="pkl",
                                                         variant=nick),
                        "w") as output_file:
                    results_header[name] = output_file.name
                    pickle.dump(MulticoreResult({name: result}), output_file)
            with open(structure.intermediate_file_path(file_type="pkl"),
                      "w") as output_file:
                pickle.dump(results_header, output_file)