def make_simple_dag(): """Make very simple DAG to verify serialization result.""" dag = DAG(dag_id='simple_dag') BaseOperator(task_id='simple_task', dag=dag, start_date=datetime(2019, 8, 1)) return {'simple_dag': dag}
def test_schedule_dag_start_end_dates(self): """ Tests that an attempt to schedule a task after the Dag's end_date does not succeed. """ delta = timedelta(hours=1) runs = 3 start_date = DEFAULT_DATE end_date = start_date + (runs - 1) * delta dag = DAG(self.TEST_SCHEDULE_START_END_DATES_DAG_ID, start_date=start_date, end_date=end_date, schedule_interval=delta) dag.add_task(BaseOperator(task_id='faketastic', owner='Also fake')) # Create and schedule the dag runs dag_runs = [] scheduler = jobs.SchedulerJob(**self.default_scheduler_args) for _ in range(runs): dag_runs.append(scheduler.create_dag_run(dag)) additional_dag_run = scheduler.create_dag_run(dag) for dag_run in dag_runs: self.assertIsNotNone(dag_run) self.assertIsNone(additional_dag_run)
def _get_task_instance(self, trigger_rule=TriggerRule.ALL_SUCCESS, state=None, upstream_task_ids=None): task = BaseOperator(task_id='test_task', trigger_rule=trigger_rule, start_date=datetime(2015, 1, 1)) if upstream_task_ids: task._upstream_task_ids.update(upstream_task_ids) return TaskInstance(task=task, state=state, execution_date=task.start_date)
def test_fractional_seconds(self): """ Tests if fractional seconds are stored in the database """ dag = DAG(TEST_DAG_ID + 'test_fractional_seconds') dag.schedule_interval = '@once' dag.add_task(BaseOperator( task_id="faketastic", owner='Also fake', start_date=datetime(2015, 1, 2, 0, 0))) start_date = timezone.utcnow() run = dag.create_dagrun( run_id='test_' + start_date.isoformat(), execution_date=start_date, start_date=start_date, state=State.RUNNING, external_trigger=False ) run.refresh_from_db() self.assertEqual(start_date, run.execution_date, "dag run execution_date loses precision") self.assertEqual(start_date, run.start_date, "dag run start_date loses precision ")
def test_schedule_dag_fake_scheduled_previous(self): """ Test scheduling a dag where there is a prior DagRun which has the same run_id as the next run should have """ delta = timedelta(hours=1) dag = DAG(self.TEST_SCHEDULE_DAG_FAKE_SCHEDULED_PREVIOUS_DAG_ID, schedule_interval=delta, start_date=DEFAULT_DATE) dag.add_task(BaseOperator( task_id="faketastic", owner='Also fake', start_date=DEFAULT_DATE)) scheduler = jobs.SchedulerJob(**self.default_scheduler_args) dag.create_dagrun(run_id=DagRun.id_for_date(DEFAULT_DATE), execution_date=DEFAULT_DATE, state=State.SUCCESS, external_trigger=True) dag_run = scheduler.create_dag_run(dag) self.assertIsNotNone(dag_run) self.assertEqual(dag.dag_id, dag_run.dag_id) self.assertIsNotNone(dag_run.run_id) self.assertNotEqual('', dag_run.run_id) self.assertEqual( DEFAULT_DATE + delta, dag_run.execution_date, msg='dag_run.execution_date did not match expectation: {0}' .format(dag_run.execution_date) ) self.assertEqual(State.RUNNING, dag_run.state) self.assertFalse(dag_run.external_trigger)
def test_offer_against_missing_organizationId_fails(): cpus = 1 mem = 512 operator = BaseOperator(task_id='123', resources=dict(organizationId='astronomer')) suitable = offer_suitable(operator, cpus, mem) assert suitable is False
def test_full_offer_suitable(): cpus = 1 mem = 512 offerOrgIds = ['astronomer'] operator = BaseOperator(task_id='123', resources=dict(organizationId='astronomer')) suitable = offer_suitable(operator, cpus, mem, offerOrgIds) assert suitable is True
class SerializedBaseOperator(BaseOperator, Serialization): """A JSON serializable representation of operator. All operators are casted to SerializedBaseOperator after deserialization. Class specific attributes used by UI are move to object attributes. """ _included_fields = list(vars(BaseOperator(task_id='test')).keys()) + [ '_dag', '_task_type', 'ui_color', 'ui_fgcolor', 'template_fields' ] _json_schema = make_operator_schema() def __init__(self, *args, **kwargs): BaseOperator.__init__(self, *args, **kwargs) # task_type is used by UI to display the correct class type, because UI only # receives BaseOperator from deserialized DAGs. self._task_type = 'BaseOperator' # Move class attributes into object attributes. self.ui_color = BaseOperator.ui_color self.ui_fgcolor = BaseOperator.ui_fgcolor self.template_fields = BaseOperator.template_fields @property def task_type(self) -> str: # Overwrites task_type of BaseOperator to use _task_type instead of # __class__.__name__. return self._task_type @task_type.setter def task_type(self, task_type: str): self._task_type = task_type @classmethod def serialize_operator(cls, op: BaseOperator, visited_dags: dict) -> dict: """Serializes operator into a JSON object. """ serialize_op = cls._serialize_object( op, visited_dags, included_fields=SerializedBaseOperator._included_fields) # Adds a new task_type field to record the original operator class. serialize_op['_task_type'] = op.__class__.__name__ return cls._encode(serialize_op, type_=DAT.OP) @classmethod def deserialize_operator(cls, encoded_op: dict, visited_dags: dict) -> BaseOperator: """Deserializes an operator from a JSON object. """ op = SerializedBaseOperator(task_id=encoded_op['task_id']) cls._deserialize_object(encoded_op, op, SerializedBaseOperator._included_fields, visited_dags) return op
def make_simple_dag(): """Make very simple DAG to verify serialization result.""" dag = DAG( dag_id='simple_dag', default_args={ "retries": 1, "retry_delay": timedelta(minutes=5), "depends_on_past": False, }, start_date=datetime(2019, 8, 1), ) BaseOperator(task_id='simple_task', dag=dag, owner='airflow') CustomBaseOperator(task_id='custom_task', dag=dag) return {'simple_dag': dag}
def test_schedule_dag_once(self): """ Tests scheduling a dag scheduled for @once - should be scheduled the first time it is called, and not scheduled the second. """ dag = DAG(self.TEST_SCHEDULE_ONCE_DAG_ID) dag.schedule_interval = '@once' dag.add_task(BaseOperator( task_id="faketastic", owner='Also fake', start_date=datetime(2015, 1, 2, 0, 0))) dag_run = jobs.SchedulerJob(**self.default_scheduler_args).create_dag_run(dag) dag_run2 = jobs.SchedulerJob(**self.default_scheduler_args).create_dag_run(dag) self.assertIsNotNone(dag_run) self.assertIsNone(dag_run2) dag.clear()
def test_schedule_dag_no_end_date_up_to_today_only(self): """ Tests that a Dag created without an end_date can only be scheduled up to and including the current datetime. For example, if today is 2016-01-01 and we are scheduling from a start_date of 2015-01-01, only jobs up to, but not including 2016-01-01 should be scheduled. """ session = settings.Session() delta = timedelta(days=1) now = utcnow() start_date = now.subtract(weeks=1) runs = (now - start_date).days dag = DAG(self.TEST_SCHEDULE_DAG_NO_END_DATE_UP_TO_TODAY_ONLY_DAG_ID, start_date=start_date, schedule_interval=delta) dag.add_task(BaseOperator(task_id='faketastic', owner='Also fake')) dag_runs = [] scheduler = jobs.SchedulerJob(**self.default_scheduler_args) for _ in range(runs): dag_run = scheduler.create_dag_run(dag) dag_runs.append(dag_run) # Mark the DagRun as complete dag_run.state = State.SUCCESS session.merge(dag_run) session.commit() # Attempt to schedule an additional dag run (for 2016-01-01) additional_dag_run = scheduler.create_dag_run(dag) for dag_run in dag_runs: self.assertIsNotNone(dag_run) self.assertIsNone(additional_dag_run)
def test_schedule_dag_relativedelta(self): """ Tests scheduling a dag with a relativedelta schedule_interval """ delta = relativedelta(hours=+1) dag = DAG(self.TEST_SCHEDULE_RELATIVEDELTA_DAG_ID, schedule_interval=delta) dag.add_task(BaseOperator( task_id="faketastic", owner='Also fake', start_date=datetime(2015, 1, 2, 0, 0))) dag_run = jobs.SchedulerJob(**self.default_scheduler_args).create_dag_run(dag) self.assertIsNotNone(dag_run) self.assertEqual(dag.dag_id, dag_run.dag_id) self.assertIsNotNone(dag_run.run_id) self.assertNotEqual('', dag_run.run_id) self.assertEqual( datetime(2015, 1, 2, 0, 0), dag_run.execution_date, msg='dag_run.execution_date did not match expectation: {0}' .format(dag_run.execution_date) ) self.assertEqual(State.RUNNING, dag_run.state) self.assertFalse(dag_run.external_trigger) dag_run2 = jobs.SchedulerJob(**self.default_scheduler_args).create_dag_run(dag) self.assertIsNotNone(dag_run2) self.assertEqual(dag.dag_id, dag_run2.dag_id) self.assertIsNotNone(dag_run2.run_id) self.assertNotEqual('', dag_run2.run_id) self.assertEqual( datetime(2015, 1, 2, 0, 0) + delta, dag_run2.execution_date, msg='dag_run2.execution_date did not match expectation: {0}' .format(dag_run2.execution_date) ) self.assertEqual(State.RUNNING, dag_run2.state) self.assertFalse(dag_run2.external_trigger) dag.clear()
def test_schedule_dag_no_previous_runs(self): """ Tests scheduling a dag with no previous runs """ dag = DAG(self.TEST_SCHEDULE_WITH_NO_PREVIOUS_RUNS_DAG_ID) dag.add_task(BaseOperator( task_id="faketastic", owner='Also fake', start_date=datetime(2015, 1, 2, 0, 0))) dag_run = jobs.SchedulerJob(**self.default_scheduler_args).create_dag_run(dag) self.assertIsNotNone(dag_run) self.assertEqual(dag.dag_id, dag_run.dag_id) self.assertIsNotNone(dag_run.run_id) self.assertNotEqual('', dag_run.run_id) self.assertEqual( datetime(2015, 1, 2, 0, 0), dag_run.execution_date, msg='dag_run.execution_date did not match expectation: {0}' .format(dag_run.execution_date) ) self.assertEqual(State.RUNNING, dag_run.state) self.assertFalse(dag_run.external_trigger) dag.clear()
def _get_task(self, **kwargs): return BaseOperator(task_id='test_task', dag=DAG('test_dag'), **kwargs)
def test_empty_offer_not_suitable(): operator = BaseOperator(task_id='123', resources=dict(organizationId='astronomer')) suitable = offer_suitable(operator) assert suitable is False