def test_exception_raised_with_invalid_state(session): """ Test that we get an exception raised when we try to add a new row with an invalid state. """ workflow_run_data = dict( workflow_name="DUMMY_WORKFLOW_NAME", parameters={"DUMMY_PARAM_NAME": "DUMMY_PARAM_VALUE"}, state="INVALID_STATE", ) with pytest.raises(DataError): WorkflowRuns.set_state(**workflow_run_data, session=session)
def test_get_most_recent_state(session): """ Test that get_most_recent_state returns the most recent state. """ workflow_run_data = dict( workflow_name="DUMMY_WORKFLOW_NAME", parameters={"DUMMY_PARAM_NAME": "DUMMY_PARAM_VALUE"}, ) WorkflowRuns.set_state(**workflow_run_data, state=RunState.running, session=session) WorkflowRuns.set_state(**workflow_run_data, state=RunState.failed, session=session) state = WorkflowRuns.get_most_recent_state(**workflow_run_data, session=session) assert state == RunState.failed
def test_set_state_with_sqlite(sqlite_session): """ Make sure we can add a row to a sqlite DB. """ workflow_run_data = dict( workflow_name="DUMMY_WORKFLOW_NAME", parameters={"DUMMY_PARAM_NAME": "DUMMY_PARAM_VALUE"}, state=RunState.running, ) now = pendulum.parse("2016-01-02T13:00:01Z") with patch("pendulum.now", lambda x: now): WorkflowRuns.set_state(**workflow_run_data, session=sqlite_session) rows = sqlite_session.query(WorkflowRuns).all() assert len(rows) == 1 row = rows[0] assert row.workflow_name == workflow_run_data["workflow_name"] assert row.parameters_hash == get_params_hash(workflow_run_data["parameters"]) assert row.state == workflow_run_data["state"] assert pendulum.instance(row.timestamp) == now
def test_init_db_force(postgres_test_db): """ DB should be wiped clean if force is true. """ # Write a row to the workflow_runs table engine = create_engine(postgres_test_db.url()) Session = sessionmaker(bind=engine) session = Session() WorkflowRuns.set_state( workflow_name="DUMMY_WORKFLOW_NAME", parameters={"DUMMY_PARAM_NAME": "DUMMY_PARAM_VALUE"}, state=RunState.running, session=session, ) session.commit() session.close() # Init DB init_db(postgres_test_db.url(), force=True) # Table should not contain data session = Session() assert len(session.query(WorkflowRuns).all()) == 0 session.close()
def test_init_db_doesnt_wipe(postgres_test_db): """ DB shouldn't get reinitialised if already built. """ # Write a row to the workflow_runs table engine = create_engine(postgres_test_db.url()) Session = sessionmaker(bind=engine) session = Session() WorkflowRuns.set_state( workflow_name="DUMMY_WORKFLOW_NAME", parameters={"DUMMY_PARAM_NAME": "DUMMY_PARAM_VALUE"}, state=RunState.running, session=session, ) session.commit() session.close() # Init DB init_db(postgres_test_db.url()) # Table should still contain data session = Session() assert len(session.query(WorkflowRuns).all()) > 0 session.close()
def record_workflow_run_state(parametrised_workflow: Tuple[Flow, Dict[str, Any]], state: RunState) -> None: """ Add a row to the database to record the state of a workflow run. Parameters ---------- parametrised_workflow : tuple (prefect.Flow, dict) Workflow, and associated parameters, for which to record state. state : RunState Workflow run state. """ workflow, parameters = parametrised_workflow prefect.context.logger.debug( f"Recording workflow '{workflow.name}' with parameters {parameters} as '{state.name}'." ) with session_scope(prefect.config.db_uri) as session: WorkflowRuns.set_state( workflow_name=workflow.name, parameters=parameters, state=state, session=session, )
def test_get_most_recent_state_returns_None(session): """ Test that get_most_recent_state returns None if a workflow run has no previous state. """ workflow_run_data = dict( workflow_name="DUMMY_WORKFLOW_NAME", parameters={"DUMMY_PARAM_NAME": "DUMMY_PARAM_VALUE"}, ) state = WorkflowRuns.get_most_recent_state( workflow_name="DUMMY_WORKFLOW_NAME", parameters={"DUMMY_PARAM_NAME": "DUMMY_PARAM_VALUE"}, session=session, ) assert state is None
def skip_if_already_run( parametrised_workflow: Tuple[Flow, Dict[str, Any]]) -> None: """ Task to raise a SKIP signal if a workflow is already running or has previously run successfully with the given parameters. Parameters ---------- parametrised_workflow : tuple (prefect.Flow, dict) Workflow, and associated parameters, for which previous runs should be checked Raises ------ prefect.engine.signals.SKIP if this workflow with these parameters has already run successfully """ workflow, parameters = parametrised_workflow prefect.context.logger.info( f"Checking whether workflow '{workflow.name}' has already run successfully with parameters {parameters}." ) with session_scope(prefect.config.db_uri) as session: state = WorkflowRuns.get_most_recent_state(workflow_name=workflow.name, parameters=parameters, session=session) if state is None: prefect.context.logger.debug( f"Workflow '{workflow.name}' has not previously run with parameters {parameters}." ) elif state == RunState.failed: prefect.context.logger.debug( f"Workflow '{workflow.name}' previously failed with parameters {parameters}." ) elif state == RunState.running: raise signals.SKIP( f"Workflow '{workflow.name}' is already running with parameters {parameters}." ) elif state == RunState.success: raise signals.SKIP( f"Workflow '{workflow.name}' previously ran successfully with parameters {parameters}." ) else: # This should never happen raise ValueError(f"Unrecognised workflow state: '{state}'.")