Esempio n. 1
0
def main(run_on_schedule: bool = True):
    """
    Main function. Creates output directories, initialises the database, parses
    a workflows definition file to define workflows and configure the available
    dates sensor, and runs the available dates sensor.

    Parameters
    ----------
    run_on_schedule : bool, default True
        Set run_on_schedule=False to run the sensor only once, ignoring the schedule.
        (useful for testing)
    """
    # Initialise logger
    # TODO: Use structlog (not sure whether it will be possible for the prefect logger)
    log_level = os.environ["AUTOFLOW_LOG_LEVEL"]
    logger = logging.getLogger(__name__)
    handler = logging.StreamHandler()
    formatter = logging.Formatter(
        "[%(asctime)s] %(levelname)s - %(name)s | %(message)s"
    )  # Match prefect format for now
    formatter.converter = time.gmtime
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    logger.setLevel(log_level)
    logger.info(f"Log level for logger '{__name__}' set to '{log_level}'.")

    # Make output directories
    outputs_path = Path(os.environ["AUTOFLOW_OUTPUTS_DIR"])
    logger.info(
        f"Creating output directories '{outputs_path/'notebooks'}' and '{outputs_path/'reports'}'."
    )
    (outputs_path / "notebooks").mkdir(exist_ok=True)
    (outputs_path / "reports").mkdir(exist_ok=True)

    # Init DB
    # Note: AUTOFLOW_DB_URI must be an env var so that it can be used in prefect.config, so we read it using os.environ.
    # AUTOFLOW_DB_PASSWORD can (and should) be a docker secret, so we read it using get_secret_or_env_var.
    db_uri = os.environ["AUTOFLOW_DB_URI"]
    logger.info(f"Initialising database '{db_uri}'.")
    init_db(db_uri.format(getenv("AUTOFLOW_DB_PASSWORD", "")))

    # Create workflows according to workflow definition file
    inputs_dir = os.environ["AUTOFLOW_INPUTS_DIR"]
    logger.info(f"Creating workflows defined in '{Path(inputs_dir)/'workflows.yml'}'.")
    workflow_storage, sensor_config = parse_workflows_yaml("workflows.yml", inputs_dir)

    # Run available dates sensor
    logger.info("Running available dates sensor.")
    available_dates_sensor.schedule = sensor_config["schedule"]
    available_dates_sensor.run(
        workflow_configs=sensor_config["workflows"],
        cdr_types=sensor_config["cdr_types"],
        workflow_storage=workflow_storage,
        run_on_schedule=run_on_schedule,
    )
Esempio n. 2
0
def test_parse_workflows_yaml_missing_workflows(tmp_path):
    """
    Test that parse_workflows_yaml raises a ValueError if the input file
    doesn't have a 'workflows' key.
    """
    (tmp_path / "dummy_input.yml").write_text(
        dedent("""\
            available_dates_sensor:
              schedule: "0 0 * * *"
              workflows:
                - workflow_name: workflow1
            """))
    with pytest.raises(
            ValueError,
            match="Input file does not have a 'workflows' section."):
        workflow_storage, sensor_config = parse_workflows_yaml(
            filename="dummy_input.yml", inputs_dir=str(tmp_path))
Esempio n. 3
0
def test_parse_workflows_yaml_missing_available_dates_sensor(tmp_path):
    """
    Test that parse_workflows_yaml raises a ValueError if the input file
    doesn't have a 'workflows' key.
    """
    (tmp_path / "notebook1.ipynb").touch()
    (tmp_path / "dummy_input.yml").write_text(
        dedent("""\
            workflows:
              - name: workflow1
                notebooks:
                  notebook1:
                    filename: notebook1.ipynb
            """))
    with pytest.raises(
            ValueError,
            match=
            "Input file does not have an 'available_dates_sensor' section.",
    ):
        workflow_storage, sensor_config = parse_workflows_yaml(
            filename="dummy_input.yml", inputs_dir=str(tmp_path))
Esempio n. 4
0
def test_parse_workflows_yaml(tmp_path):
    """
    Test that parse_workflows_yaml correctly parses an example input file.
    """
    (tmp_path / "notebook1.ipynb").touch()
    (tmp_path / "notebook2.ipynb").touch()
    (tmp_path / "notebook3.ipynb").touch()
    (tmp_path / "custom_template.tpl").touch()
    (tmp_path / "dummy_input.yml").write_text(
        dedent("""\
            workflows:
              - name: workflow1
                notebooks:
                  notebook1:
                    filename: notebook1.ipynb
                    parameters:
                      url: flowapi_url
                      date: reference_date
                      extra: dummy_param
                  notebook2:
                    filename: notebook2.ipynb
                    parameters:
                      ranges: date_ranges
                      other: notebook1
                    output:
                      format: pdf
                      template: custom_template.tpl
              - name: workflow2
                notebooks:
                  the_notebook:
                    filename: notebook3.ipynb
                    output:
                      format: pdf
            
            available_dates_sensor:
              schedule: "0 0 * * *"
              cdr_types:
                - calls
                - sms
              workflows:
                - workflow_name: workflow1
                  parameters:
                    dummy_param: 123
                  earliest_date: 2016-01-01
                  date_stencil: [[2016-01-01, 2016-01-03], -1, 0]
                - workflow_name: workflow2
            """))
    workflow_storage, sensor_config = parse_workflows_yaml(
        filename="dummy_input.yml", inputs_dir=str(tmp_path))
    assert isinstance(workflow_storage, storage.Storage)
    assert "workflow1" in workflow_storage
    assert "workflow2" in workflow_storage
    assert isinstance(sensor_config["schedule"], Schedule)
    assert sensor_config["cdr_types"] == ["calls", "sms"]
    assert sensor_config["workflows"] == [
        WorkflowConfig(
            workflow_name="workflow1",
            parameters={"dummy_param": 123},
            earliest_date=datetime.date(2016, 1, 1),
            date_stencil=DateStencil(
                [[datetime.date(2016, 1, 1),
                  datetime.date(2016, 1, 3)], -1, 0]),
        ),
        WorkflowConfig(workflow_name="workflow2"),
    ]