Esempio n. 1
0
    def test_pipeline_many_valid_inputs_clean(self):
        """A Pipeline with multiple, properly indexed inputs is clean."""
        p = Pipeline(family=PipelineFamily())
        self.add_inputs(p, TransformationInput(dataset_idx=2),
                        TransformationInput(dataset_idx=1),
                        TransformationInput(dataset_idx=3))

        p.clean()
Esempio n. 2
0
    def test_pipeline_no_inputs_no_steps(self):
        """A Pipeline with no inputs and no steps is clean but not complete."""
        p = Pipeline(family=PipelineFamily())

        p.clean()

        self.assertRaisesRegexp(
            ValidationError, re.escape("Pipeline {} has no steps".format(p)),
            p.complete_clean)
Esempio n. 3
0
    def test_pipeline_one_valid_input_no_steps(self):
        """A Pipeline with one valid input, but no steps, is clean but not complete."""
        p = Pipeline(family=PipelineFamily())
        self.add_inputs(p, TransformationInput(dataset_idx=1))

        p.clean()

        self.assertRaisesRegexp(
            ValidationError, re.escape("Pipeline {} has no steps".format(p)),
            p.complete_clean)
Esempio n. 4
0
    def test_pipeline_many_valid_steps_clean(self):
        """Test step index check, well-indexed multi-step case."""
        p = Pipeline(family=PipelineFamily())
        self.add_inputs(p, TransformationInput(dataset_idx=1))
        m = Method()
        self.add_inputs(m, TransformationInput(dataset_idx=1))
        p.steps.add(PipelineStep(pipeline=p, transformation=m, step_num=2))
        p.steps.add(PipelineStep(pipeline=p, transformation=m, step_num=1))
        p.steps.add(PipelineStep(pipeline=p, transformation=m, step_num=3))

        p.clean()
Esempio n. 5
0
    def create_valid_pipeline(self):
        p = Pipeline(family=PipelineFamily())
        self.add_inputs(p, self.create_input(datatypes.STR_PK, dataset_idx=1))
        m = Method()
        m.method = m
        self.add_inputs(m, self.create_input(datatypes.STR_PK, dataset_idx=1))
        self.add_outputs(m, self.create_output(datatypes.STR_PK,
                                               dataset_idx=1))

        step1 = PipelineStep(pipeline=p, transformation=m, step_num=1)
        p.steps.add(step1)

        cable = PipelineStepInputCable(pipelinestep=step1,
                                       source_step=0,
                                       source=p.inputs.all()[0],
                                       dest=m.inputs.all()[0])
        cable.pipelinestepinputcable = cable
        step1.cables_in.add(cable)

        outcable = PipelineOutputCable(pipeline=p,
                                       output_idx=1,
                                       source_step=1,
                                       source=m.outputs.all()[0],
                                       output_cdt=m.outputs.all()[0].get_cdt())
        p.outcables.add(outcable)

        yield p
Esempio n. 6
0
def test_get_action_specification_without_dummy_data_file_flag(tmp_path):
    dummy_data_file = tmp_path / "test.csv"
    with dummy_data_file.open("w") as f:
        f.write("test")

    config = Pipeline(
        **{
            "version": 1,
            "actions": {
                "generate_cohort": {
                    "run": "cohortextractor:latest generate_cohort",
                    "outputs": {
                        "moderately_sensitive": {
                            "cohort": "output/input.csv"
                        }
                    },
                    "dummy_data_file": dummy_data_file,
                }
            },
        })

    action_spec = get_action_specification(config, "generate_cohort")

    expected = "cohortextractor:latest generate_cohort --output-dir=output"
    assert action_spec.run == expected
Esempio n. 7
0
def test_get_action_specification_for_databuilder_action():
    config = Pipeline(
        **{
            "version": 3,
            "expectations": {
                "population_size": 1000
            },
            "actions": {
                "generate_dataset": {
                    "run":
                    "databuilder:latest generate_dataset "
                    "--dataset_definition=dataset_definition.py "
                    "--output=output/dataset.csv "
                    "--dummy-data-file=dummy.csv",
                    "outputs": {
                        "highly_sensitive": {
                            "dataset": "output/dataset.csv"
                        }
                    },
                }
            },
        })

    action_spec = get_action_specification(config,
                                           "generate_dataset",
                                           using_dummy_data_backend=True)

    assert (action_spec.run == "databuilder:latest generate_dataset "
            "--dataset_definition=dataset_definition.py "
            "--output=output/dataset.csv "
            "--dummy-data-file=dummy.csv")
Esempio n. 8
0
def test_get_action_specification_databuilder_has_output_flag():
    config = Pipeline(
        **{
            "version": 3,
            "expectations": {
                "population_size": 1000
            },
            "actions": {
                "generate_dataset": {
                    "run":
                    "databuilder:latest generate_dataset --output=output/dataset.csv",
                    "outputs": {
                        "highly_sensitive": {
                            "cohort": "output/dataset.csv",
                            "cohort2": "output/input2.csv",
                        }
                    },
                },
            },
        })

    action_spec = get_action_specification(config, "generate_dataset")

    assert (action_spec.run ==
            "databuilder:latest generate_dataset --output=output/dataset.csv")
Esempio n. 9
0
def test_get_actions_missing_needs():
    dummy = Pipeline(
        **{
            "version": 3,
            "expectations": {},
            "actions": {
                "frobnicate": {
                    "run": "test",
                    "outputs": {
                        "highly_sensitive": {
                            "cohort": "/some/path"
                        }
                    },
                },
            },
        })
    output = list(get_actions(dummy))

    expected = [
        {
            "name": "frobnicate",
            "needs": []
        },
        {
            "name": "run_all",
            "needs": ["frobnicate"]
        },
    ]
    assert output == expected
Esempio n. 10
0
def test_get_action_specification_for_databuilder_errors():
    config = Pipeline(
        **{
            "version": 3,
            "expectations": {
                "population_size": 1_000
            },
            "actions": {
                "generate_dataset": {
                    "run":
                    "databuilder:latest generate_dataset "
                    "--dataset_definition=dataset_definition.py "
                    "--output=output/dataset.csv",
                    "outputs": {
                        "highly_sensitive": {
                            "dataset": "output/dataset.csv"
                        }
                    },
                }
            },
        })

    msg = "--dummy-data-file is required for a local run"
    with pytest.raises(ProjectValidationError, match=msg):
        get_action_specification(
            config,
            "generate_dataset",
            using_dummy_data_backend=True,
        )
Esempio n. 11
0
def test_get_action_specification_for_cohortextractor_generate_cohort_action():
    config = Pipeline(
        **{
            "version": 3,
            "expectations": {
                "population_size": 1000
            },
            "actions": {
                "generate_cohort": {
                    "run": "cohortextractor:latest generate_cohort",
                    "outputs": {
                        "highly_sensitive": {
                            "cohort": "output/input.csv"
                        }
                    },
                }
            },
        })

    action_spec = get_action_specification(config,
                                           "generate_cohort",
                                           using_dummy_data_backend=True)

    assert (
        action_spec.run ==
        """cohortextractor:latest generate_cohort --expectations-population=1000 --output-dir=output"""
    )
Esempio n. 12
0
    def test_pipeline_one_invalid_input_clean(self):
        """A Pipeline with one input not numbered "1" is not clean."""
        p = Pipeline(family=PipelineFamily())
        self.add_inputs(p, TransformationInput(dataset_idx=4))

        error = "Inputs are not consecutively numbered starting from 1"
        self.assertRaisesRegexp(ValidationError, error, p.clean)
        self.assertRaisesRegexp(ValidationError, error, p.complete_clean)
Esempio n. 13
0
    def test_pipeline_many_invalid_inputs_clean(self):
        """A Pipeline with multiple, badly indexed inputs is not clean."""
        p = Pipeline(family=PipelineFamily())
        self.add_inputs(p, TransformationInput(dataset_idx=2),
                        TransformationInput(dataset_idx=3),
                        TransformationInput(dataset_idx=4))

        self.assertRaisesRegexp(
            ValidationError,
            "Inputs are not consecutively numbered starting from 1", p.clean)
Esempio n. 14
0
    def __init__(self, *args, **kwargs):
        super(PipelineSerializer, self).__init__(*args, **kwargs)
        # Set the querysets of the related model fields.
        curr_user = self.context["request"].user
        # LOGGER.debug("PL SERIALIZER INIT %s" % self.context.get("only_is_published", False))

        revision_parent_field = self.fields["revision_parent"]
        revision_parent_field.queryset = Pipeline.filter_by_user(curr_user)

        family_field = self.fields["family"]
        family_field.queryset = PipelineFamily.filter_by_user(curr_user)
Esempio n. 15
0
    def test_pipeline_many_invalid_steps_clean(self):
        """Test step index check, badly-indexed multi-step case."""
        p = Pipeline(family=PipelineFamily())
        self.add_inputs(p, TransformationInput(dataset_idx=1))
        m = Method()
        self.add_inputs(m, TransformationInput(dataset_idx=1))
        p.steps.add(PipelineStep(pipeline=p, transformation=m, step_num=1))
        p.steps.add(PipelineStep(pipeline=p, transformation=m, step_num=4))
        p.steps.add(PipelineStep(pipeline=p, transformation=m, step_num=5))

        self.assertRaisesRegexp(
            ValidationError,
            "Steps are not consecutively numbered starting from 1", p.clean)
Esempio n. 16
0
    def test_build_removal_plan_for_used_image(self):
        image = DockerImage(id=99, name='doomed')
        method = image.methods.create(transformation_ptr_id=100)
        step = method.pipelinesteps.create(id=101)
        step.pipeline = Pipeline(transformation_ptr_id=102)
        step.pipeline.family = PipelineFamily()

        expected_plan = empty_removal_plan()
        expected_plan['DockerImages'].add(image)
        expected_plan['Methods'].add(method)
        expected_plan['Pipelines'].add(step.pipeline)

        plan = image.build_removal_plan()

        self.assertEqual(expected_plan, plan)
Esempio n. 17
0
def test_get_action_specification_with_unknown_action():
    config = Pipeline(
        **{
            "version": 1,
            "actions": {
                "known_action": {
                    "run": "python:latest python test.py",
                    "outputs": {
                        "moderately_sensitive": {
                            "cohort": "output/input.csv"
                        }
                    },
                }
            },
        })
    msg = "Action 'unknown_action' not found in project.yaml"
    with pytest.raises(UnknownActionError, match=msg):
        get_action_specification(config, "unknown_action")
Esempio n. 18
0
def _choose_inputs_for_batch(request,
                             pipeline_pk,
                             start_form=None,
                             input_error_message=''):
    """Load the input selection page."""
    template = loader.get_template("sandbox/choose_inputs.html")
    pipeline_qs = Pipeline.filter_by_user(request.user).filter(pk=pipeline_pk)

    pipeline = pipeline_qs.first()
    if pipeline is None:
        raise Http404("ID {} is not accessible".format(pipeline_pk))

    if start_form is None:
        start_form = StartRunBatchForm({"pipeline": pipeline}, pipeline_qs=pipeline_qs)

    context = {"inputs": pipeline.inputs.order_by("dataset_idx"),
               "start_form": start_form,
               "input_error_msg": input_error_message,
               "pipeline": pipeline,
               "priolist": [t[0] for t in settings.SLURM_QUEUES]}
    return HttpResponse(template.render(context, request))
Esempio n. 19
0
def test_get_action_specification_with_config():
    config = Pipeline(
        **{
            "version": 3,
            "expectations": {
                "population_size": 1_000
            },
            "actions": {
                "my_action": {
                    "run":
                    "python:latest python action/__main__.py output/input.csv",
                    "config": {
                        "option": "value"
                    },
                    "outputs": {
                        "moderately_sensitive": {
                            "my_figure": "output/my_figure.png"
                        }
                    },
                }
            },
        })

    action_spec = get_action_specification(config, "my_action")

    assert (
        action_spec.run ==
        """python:latest python action/__main__.py output/input.csv --config '{"option": "value"}'"""
    )

    # Does argparse accept options after arguments?
    parser = argparse.ArgumentParser()
    parser.add_argument("--config")  # option
    parser.add_argument("input_files", nargs="*")  # argument

    # If parser were in __main__.py, then parser.parse_args would receive sys.argv
    # by default. sys.argv[0] is the script name (either with or without a path,
    # depending on the OS) so we slice obs_run_command to mimic this.
    parser.parse_args(shlex.split(action_spec.run)[2:])
Esempio n. 20
0
    def test_pipeline_oneStep_invalid_cabling_incorrect_cdt_clean(self):
        """Bad cabling: input is of wrong CompoundDatatype."""
        p = Pipeline(family=PipelineFamily())
        self.add_inputs(p, self.create_input(datatypes.INT_PK, dataset_idx=1))
        m = Method()
        self.add_inputs(m, self.create_input(datatypes.STR_PK, dataset_idx=1))

        step1 = PipelineStep(pipeline=p, transformation=m, step_num=1)
        p.steps.add(step1)

        cable = PipelineStepInputCable(pipelinestep=step1,
                                       source_step=0,
                                       source=p.inputs.all()[0],
                                       dest=m.inputs.all()[0])
        cable.pipelinestepinputcable = cable
        step1.cables_in.add(cable)

        cable.clean()
        self.assertRaisesRegexp(
            ValidationError,
            'Custom wiring required for cable "{}"'.format(cable),
            cable.clean_and_completely_wired)
Esempio n. 21
0
    def form_valid(self, form):
        # This method is called when valid form data has been POSTed.
        # It should return an HttpResponse.

        if form.is_valid():
            requests = form.cleaned_data['requests']
            del form.cleaned_data['requests']
            obj = Pipeline(**form.cleaned_data)
            obj.save()
            obj.owner = self.request.user

            for request in requests:
                obj.requests.add(request)
            obj.save()

            return HttpResponseRedirect(
                reverse('pipeline:pipeline', kwargs={'pk': obj.pk}))
        else:
            return True

        return super().form_valid(form)
Esempio n. 22
0
def _choose_inputs_for_batch(request,
                             pipeline_pk,
                             start_form=None,
                             input_error_message=''):
    """Load the input selection page."""
    template = loader.get_template("sandbox/choose_inputs.html")
    pipeline_qs = Pipeline.filter_by_user(request.user).filter(pk=pipeline_pk)

    pipeline = pipeline_qs.first()
    if pipeline is None:
        raise Http404("ID {} is not accessible".format(pipeline_pk))

    if start_form is None:
        start_form = StartRunBatchForm({"pipeline": pipeline},
                                       pipeline_qs=pipeline_qs)

    context = {
        "inputs": pipeline.inputs.order_by("dataset_idx"),
        "start_form": start_form,
        "input_error_msg": input_error_message,
        "pipeline": pipeline,
        "priolist": [t[0] for t in settings.SLURM_QUEUES]
    }
    return HttpResponse(template.render(context, request))
Esempio n. 23
0
    def test_no_steps(self):
        pipeline = Pipeline()

        updates = pipeline.find_step_updates()

        self.assertEqual([], updates)