Beispiel #1
0
def parse_workflows_yaml(
    filename: str, inputs_dir: str
) -> Tuple[
    "prefect.storage.Storage",
    Dict[
        str,
        Union[
            "prefect.schedules.Schedule",
            List[str],
            List["autoflow.sensor.WorkflowConfig"],
        ],
    ],
]:
    """
    Construct workflows defined in an input file.

    Parameters
    ----------
    filename : str
        Name of yaml input file
    inputs_dir : Path
        Directory in which input files should be found

    Returns
    -------
    workflows : list of Flow
        List of prefect workflows
    run_parameters : dict
        mapping from workflow names to a list of dicts of parameters for which the workflow should be run
    """
    with open(Path(inputs_dir) / filename, "r") as f:
        workflows_yaml = yaml.safe_load(f)

    try:
        workflows_spec = workflows_yaml["workflows"]
    except KeyError:
        raise ValueError("Input file does not have a 'workflows' section.")
    try:
        sensor_spec = workflows_yaml["available_dates_sensor"]
    except KeyError:
        raise ValueError(
            "Input file does not have an 'available_dates_sensor' section."
        )

    workflow_schema = WorkflowSchema(context=dict(inputs_dir=inputs_dir))
    workflow_storage = workflow_schema.load(workflows_spec, many=True)

    sensor_schema = AvailableDatesSensorSchema(
        context=dict(workflow_storage=workflow_storage)
    )
    sensor_config = sensor_schema.load(sensor_spec)

    return workflow_storage, sensor_config
def test_workflow_schema_duplicate_name(monkeypatch):
    """
    Test that WorkflowSchema raises a ValidationError if multiple workflows
    have the same name.
    """
    monkeypatch.setattr("pathlib.Path.exists", lambda self: True)
    workflows = [
        {
            "name": "DUMMY_WORKFLOW",
            "notebooks": {"notebook1": {"filename": "NOTEBOOK1.ipynb"}},
        },
        {
            "name": "DUMMY_WORKFLOW",
            "notebooks": {"notebook2": {"filename": "NOTEBOOK2.ipynb"}},
        },
        {
            "name": "DUMMY_WORKFLOW",
            "notebooks": {"notebook3": {"filename": "NOTEBOOK3.ipynb"}},
        },
    ]
    with pytest.raises(ValidationError) as exc_info:
        workflow_storage = WorkflowSchema(
            many=True, context={"inputs_dir": "DUMMY_INPUTS_DIR"}
        ).load(workflows)
    assert "Duplicate workflow name." in exc_info.value.messages[1]["name"]
    assert "Duplicate workflow name." in exc_info.value.messages[2]["name"]
Beispiel #3
0
def test_workflow_schema_many(monkeypatch):
    """
    Test that WorkflowSchema can load multiple workflow specifications as a
    single Storage object.
    """
    monkeypatch.setattr("pathlib.Path.exists", lambda self: True)
    workflows = [
        {
            "name": "DUMMY_WORKFLOW_1",
            "notebooks": {
                "notebook1": {
                    "filename": "NOTEBOOK1.ipynb"
                }
            },
        },
        {
            "name": "DUMMY_WORKFLOW_2",
            "notebooks": {
                "notebook2": {
                    "filename": "NOTEBOOK2.ipynb"
                }
            },
        },
    ]
    workflow_storage = WorkflowSchema(many=True,
                                      context={
                                          "inputs_dir": "DUMMY_INPUTS_DIR"
                                      }).load(workflows)
    assert isinstance(workflow_storage, storage.Storage)
    assert "DUMMY_WORKFLOW_1" in workflow_storage
    assert "DUMMY_WORKFLOW_2" in workflow_storage
def test_workflow_schema_missing_notebooks(monkeypatch):
    """
    Test that WorkflowSchema raises a ValidationError if the 'notebooks' field is missing.
    """
    monkeypatch.setattr("pathlib.Path.exists", lambda self: True)
    workflow = {"name": "DUMMY_WORKFLOW"}
    with pytest.raises(ValidationError) as exc_info:
        workflow_storage = WorkflowSchema(
            context={"inputs_dir": "DUMMY_INPUTS_DIR"}
        ).load(workflow)
    assert "Missing data for required field." in exc_info.value.messages["notebooks"]
def test_workflow_schema_invalid_name(monkeypatch):
    """
    Test that WorkflowSchema raises a ValidationError if the 'name' field is not a string.
    """
    monkeypatch.setattr("pathlib.Path.exists", lambda self: True)
    workflow = {
        "name": 123,
        "notebooks": {"notebook1": {"filename": "NOTEBOOK1.ipynb"}},
    }
    with pytest.raises(ValidationError) as exc_info:
        workflow_storage = WorkflowSchema(
            context={"inputs_dir": "DUMMY_INPUTS_DIR"}
        ).load(workflow)
    assert "Not a valid string." in exc_info.value.messages["name"]
def test_workflow_schema(monkeypatch):
    """
    Test that WorkflowSchema loads a workflow specification as a Storage object
    containing the defined flow.
    """
    monkeypatch.setattr("pathlib.Path.exists", lambda self: True)
    workflow = {
        "name": "DUMMY_WORKFLOW",
        "notebooks": {"notebook1": {"filename": "NOTEBOOK1.ipynb"}},
    }
    workflow_storage = WorkflowSchema(context={"inputs_dir": "DUMMY_INPUTS_DIR"}).load(
        workflow
    )
    assert isinstance(workflow_storage, storage.Storage)
    assert "DUMMY_WORKFLOW" in workflow_storage