Beispiel #1
0
def expand_tasks_with_samples(
    self,
    _,
    dag,
    chain_,
    samples,
    labels,
    task_type,
    adapter_config,
    level_max_dirs,
    **kwargs,
):
    """
    Generate a group of celery chains of tasks from a chain of task names, using merlin
    samples and labels to do variable substitution.

    :param dag : A Merlin DAG.
    :param chain_ : The list of task names to expand into a celery group of celery chains.
    :param samples : The list of lists of merlin sample values to do substitution for.
    :labels : A list of strings containing the label associated with each column in the samples.
    :task_type : The celery task type to create. Currently always merlin_step.
    :adapter_config : A dictionary used for configuring maestro script adapters.
    :level_max_dirs : The max number of directories per level in the sample hierarchy.
    """
    LOG.debug(f"expand_tasks_with_samples called with chain,{chain_}\n")
    # Figure out how many directories there are, make a glob string
    directory_sizes = uniform_directories(
        len(samples), bundle_size=1, level_max_dirs=level_max_dirs
    )

    glob_path = "*/" * len(directory_sizes)

    LOG.debug("creating sample_index")
    # Write a hierarchy to get the all paths string
    sample_index = create_hierarchy(
        len(samples),
        bundle_size=1,
        directory_sizes=directory_sizes,
        root="",
        n_digits=len(str(level_max_dirs)),
    )

    LOG.debug("creating sample_paths")
    sample_paths = sample_index.make_directory_string()

    LOG.debug("assembling steps")
    # the steps in the chain
    steps = [dag.step(name) for name in chain_]

    # sub in globs prior to expansion
    # sub the glob command
    steps = [
        step.clone_changing_workspace_and_cmd(
            cmd_replacement_pairs=parameter_substitutions_for_cmd(
                glob_path, sample_paths
            )
        )
        for step in steps
    ]

    # workspaces = [step.get_workspace() for step in steps]
    # LOG.debug(f"workspaces : {workspaces}")

    needs_expansion = is_chain_expandable(steps, labels)

    LOG.debug(f"needs_expansion {needs_expansion}")

    if needs_expansion:
        # prepare_chain_workspace(sample_index, steps)
        sample_index.name = ""
        LOG.debug(f"queuing merlin expansion tasks")
        found_tasks = False
        conditions = [
            lambda c: c.is_great_grandparent_of_leaf,
            lambda c: c.is_grandparent_of_leaf,
            lambda c: c.is_parent_of_leaf,
            lambda c: c.is_leaf,
        ]
        for condition in conditions:
            if not found_tasks:
                for next_index_path, next_index in sample_index.traverse(
                    conditional=condition
                ):
                    LOG.info(
                        f"generating next step for range {next_index.min}:{next_index.max} {next_index.max-next_index.min}"
                    )
                    next_index.name = next_index_path

                    sig = add_merlin_expanded_chain_to_chord.s(
                        task_type,
                        steps,
                        samples[next_index.min : next_index.max],
                        labels,
                        next_index,
                        adapter_config,
                        next_index.min,
                    )
                    sig.set(queue=steps[0].get_task_queue())

                    if self.request.is_eager:
                        sig.delay()
                    else:
                        LOG.info(
                            f"queuing expansion task {next_index.min}:{next_index.max}"
                        )
                        self.add_to_chord(sig, lazy=False)
                    LOG.info(
                        f"merlin expansion task {next_index.min}:{next_index.max} queued"
                    )
                    found_tasks = True
    else:
        LOG.debug(f"queuing simple chain task")
        add_simple_chain_to_chord(self, task_type, steps, adapter_config)
        LOG.debug(f"simple chain task queued")
Beispiel #2
0
def expand_tasks_with_samples(
    self,
    _,
    dag,
    chain_,
    samples,
    labels,
    task_type,
    adapter_config,
    level_max_dirs,
    **kwargs,
):
    """
    Generate a group of celery chains of tasks from a chain of task names, using merlin
    samples and labels to do variable substitution.

    :param dag : A Merlin DAG.
    :param chain_ : The list of task names to expand into a celery group of celery chains.
    :param samples : The list of lists of merlin sample values to do substitution for.
    :labels : A list of strings containing the label associated with each column in the samples.
    :task_type : The celery task type to create. Currently always merlin_step.
    :adapter_config : A dictionary used for configuring maestro script adapters.
    :level_max_dirs : The max number of directories per level in the sample hierarchy.
    """
    LOG.debug(f"expand_tasks_with_samples called with chain,{chain_}\n")
    # Figure out how many directories there are, make a glob string
    directory_sizes = uniform_directories(len(samples),
                                          bundle_size=1,
                                          level_max_dirs=level_max_dirs)
    directory_sizes.append(1)
    glob_path = "*/" * len(directory_sizes)

    # Write a hierarchy to get the all paths string
    sample_index = create_hierarchy(len(samples),
                                    bundle_size=1,
                                    directory_sizes=directory_sizes,
                                    root="")
    sample_paths = sample_index.make_directory_string()

    # the steps in the chain
    steps = [dag.step(name) for name in chain_]

    # sub in globs prior to expansion
    # sub the glob command
    steps = [
        step.clone_changing_workspace_and_cmd(
            cmd_replacement_pairs=parameter_substitutions_for_cmd(
                glob_path, sample_paths)) for step in steps
    ]

    workspaces = [step.get_workspace() for step in steps]
    LOG.debug(f"workspaces : {workspaces}")

    needs_expansion = is_chain_expandable(steps, labels)

    if needs_expansion:
        prepare_chain_workspace(sample_index, steps)
        sample_index.name = ""
        LOG.debug(f"queuing merlin expansion task")
        sig = add_merlin_expanded_chain_to_chord.s(
            task_type,
            steps,
            samples,
            labels,
            sample_index,
            adapter_config,
            0,
        )
        sig.set(queue=steps[0].get_task_queue())
        if self.request.is_eager:
            sig.delay()
        else:
            self.add_to_chord(sig, lazy=False)
        LOG.debug(f"merlin expansion task queued")
    else:
        LOG.debug(f"queuing simple chain task")
        add_simple_chain_to_chord(self, task_type, steps, adapter_config)
        LOG.debug(f"simple chain task queued")