Ejemplo n.º 1
0
    def test_schedule_dag_no_end_date_up_to_today_only(self):
        """
        Tests that a Dag created without an end_date can only be scheduled up
        to and including the current datetime.

        For example, if today is 2016-01-01 and we are scheduling from a
        start_date of 2015-01-01, only jobs up to, but not including
        2016-01-01 should be scheduled.
        """
        session = settings.Session()
        delta = datetime.timedelta(days=1)
        now = utcnow()
        start_date = now.subtract(weeks=1)

        runs = (now - start_date).days
        dag_id = "test_schedule_dag_no_end_date_up_to_today_only"
        dag = DAG(dag_id=dag_id,
                  start_date=start_date,
                  schedule_interval=delta)
        dag.add_task(BaseOperator(task_id='faketastic', owner='Also fake'))

        dag_file_processor = DagFileProcessor(dag_ids=[], log=mock.MagicMock())
        dag_runs = []
        for _ in range(runs):
            dag_run = dag_file_processor.create_dag_run(dag)
            dag_runs.append(dag_run)

            # Mark the DagRun as complete
            dag_run.state = State.SUCCESS
            session.merge(dag_run)
            session.commit()

        # Attempt to schedule an additional dag run (for 2016-01-01)
        additional_dag_run = dag_file_processor.create_dag_run(dag)

        for dag_run in dag_runs:
            self.assertIsNotNone(dag_run)

        self.assertIsNone(additional_dag_run)
        self._clean_up(dag_id)
Ejemplo n.º 2
0
    def test_schedule_dag_relativedelta(self):
        """
        Tests scheduling a dag with a relativedelta schedule_interval
        """
        dag_id = "test_schedule_dag_relativedelta"
        delta = relativedelta(hours=+1)
        dag = DAG(dag_id=dag_id, schedule_interval=delta)
        dag.add_task(
            BaseOperator(task_id="faketastic",
                         owner='Also fake',
                         start_date=datetime_tz(2015, 1, 2, 0, 0)))

        dag_file_processor = DagFileProcessor(dag_ids=[], log=mock.MagicMock())
        dag_run = dag_file_processor.create_dag_run(dag)
        self.assertIsNotNone(dag_run)
        self.assertEqual(dag.dag_id, dag_run.dag_id)
        self.assertIsNotNone(dag_run.run_id)
        self.assertNotEqual('', dag_run.run_id)
        self.assertEqual(
            datetime_tz(2015, 1, 2, 0, 0),
            dag_run.execution_date,
            msg='dag_run.execution_date did not match expectation: {0}'.format(
                dag_run.execution_date))
        self.assertEqual(State.RUNNING, dag_run.state)
        self.assertFalse(dag_run.external_trigger)
        dag_run2 = dag_file_processor.create_dag_run(dag)
        self.assertIsNotNone(dag_run2)
        self.assertEqual(dag.dag_id, dag_run2.dag_id)
        self.assertIsNotNone(dag_run2.run_id)
        self.assertNotEqual('', dag_run2.run_id)
        self.assertEqual(
            datetime_tz(2015, 1, 2, 0, 0) + delta,
            dag_run2.execution_date,
            msg='dag_run2.execution_date did not match expectation: {0}'.
            format(dag_run2.execution_date))
        self.assertEqual(State.RUNNING, dag_run2.state)
        self.assertFalse(dag_run2.external_trigger)
        dag.clear()
        self._clean_up(dag_id)
Ejemplo n.º 3
0
    def test_removed_task_instances_can_be_restored(self):
        def with_all_tasks_removed(dag):
            return DAG(dag_id=dag.dag_id, start_date=dag.start_date)

        dag = DAG('test_task_restoration', start_date=DEFAULT_DATE)
        dag.add_task(DummyOperator(task_id='flaky_task', owner='test'))

        dagrun = self.create_dag_run(dag)
        flaky_ti = dagrun.get_task_instances()[0]
        self.assertEqual('flaky_task', flaky_ti.task_id)
        self.assertEqual(State.NONE, flaky_ti.state)

        dagrun.dag = with_all_tasks_removed(dag)

        dagrun.verify_integrity()
        flaky_ti.refresh_from_db()
        self.assertEqual(State.NONE, flaky_ti.state)

        dagrun.dag.add_task(DummyOperator(task_id='flaky_task', owner='test'))

        dagrun.verify_integrity()
        flaky_ti.refresh_from_db()
        self.assertEqual(State.NONE, flaky_ti.state)
Ejemplo n.º 4
0
    def test_task_instance_mutation_hook(self, state, mock_hook):
        def mutate_task_instance(task_instance):
            if task_instance.queue == 'queue1':
                task_instance.queue = 'queue2'
            else:
                task_instance.queue = 'queue1'

        mock_hook.side_effect = mutate_task_instance

        dag = DAG('test_task_instance_mutation_hook', start_date=DEFAULT_DATE)
        dag.add_task(DummyOperator(task_id='task_to_mutate', owner='test', queue='queue1'))

        dagrun = self.create_dag_run(dag)
        task = dagrun.get_task_instances()[0]
        session = settings.Session()
        task.state = state
        session.merge(task)
        session.commit()
        assert task.queue == 'queue2'

        dagrun.verify_integrity()
        task = dagrun.get_task_instances()[0]
        assert task.queue == 'queue1'
Ejemplo n.º 5
0
    def test_timezone_awareness(self):
        NAIVE_DATETIME = DEFAULT_DATE.replace(tzinfo=None)

        # check ti without dag (just for bw compat)
        op_no_dag = DummyOperator(task_id='op_no_dag')
        ti = TI(task=op_no_dag, execution_date=NAIVE_DATETIME)

        self.assertEqual(ti.execution_date, DEFAULT_DATE)

        # check with dag without localized execution_date
        dag = DAG('dag', start_date=DEFAULT_DATE)
        op1 = DummyOperator(task_id='op_1')
        dag.add_task(op1)
        ti = TI(task=op1, execution_date=NAIVE_DATETIME)

        self.assertEqual(ti.execution_date, DEFAULT_DATE)

        # with dag and localized execution_date
        tz = pendulum.timezone("Europe/Amsterdam")
        execution_date = timezone.datetime(2016, 1, 1, 1, 0, 0, tzinfo=tz)
        utc_date = timezone.convert_to_utc(execution_date)
        ti = TI(task=op1, execution_date=execution_date)
        self.assertEqual(ti.execution_date, utc_date)
Ejemplo n.º 6
0
    def test_timezone_awareness(self):
        NAIVE_DATETIME = DEFAULT_DATE.replace(tzinfo=None)

        # check ti without dag (just for bw compat)
        op_no_dag = DummyOperator(task_id='op_no_dag')
        ti = TI(task=op_no_dag, execution_date=NAIVE_DATETIME)

        self.assertEqual(ti.execution_date, DEFAULT_DATE)

        # check with dag without localized execution_date
        dag = DAG('dag', start_date=DEFAULT_DATE)
        op1 = DummyOperator(task_id='op_1')
        dag.add_task(op1)
        ti = TI(task=op1, execution_date=NAIVE_DATETIME)

        self.assertEqual(ti.execution_date, DEFAULT_DATE)

        # with dag and localized execution_date
        tz = pendulum.timezone("Europe/Amsterdam")
        execution_date = timezone.datetime(2016, 1, 1, 1, 0, 0, tzinfo=tz)
        utc_date = timezone.convert_to_utc(execution_date)
        ti = TI(task=op1, execution_date=execution_date)
        self.assertEqual(ti.execution_date, utc_date)
Ejemplo n.º 7
0
    def test_set_task_dates(self):
        """
        Test that tasks properly take start/end dates from DAGs
        """
        dag = DAG('dag',
                  start_date=DEFAULT_DATE,
                  end_date=DEFAULT_DATE + datetime.timedelta(days=10))

        op1 = DummyOperator(task_id='op_1', owner='test')

        self.assertTrue(op1.start_date is None and op1.end_date is None)

        # dag should assign its dates to op1 because op1 has no dates
        dag.add_task(op1)
        self.assertTrue(op1.start_date == dag.start_date
                        and op1.end_date == dag.end_date)

        op2 = DummyOperator(
            task_id='op_2',
            owner='test',
            start_date=DEFAULT_DATE - datetime.timedelta(days=1),
            end_date=DEFAULT_DATE + datetime.timedelta(days=11))

        # dag should assign its dates to op2 because they are more restrictive
        dag.add_task(op2)
        self.assertTrue(op2.start_date == dag.start_date
                        and op2.end_date == dag.end_date)

        op3 = DummyOperator(task_id='op_3',
                            owner='test',
                            start_date=DEFAULT_DATE +
                            datetime.timedelta(days=1),
                            end_date=DEFAULT_DATE + datetime.timedelta(days=9))
        # op3 should keep its dates because they are more restrictive
        dag.add_task(op3)
        self.assertTrue(op3.start_date == DEFAULT_DATE +
                        datetime.timedelta(days=1))
        self.assertTrue(op3.end_date == DEFAULT_DATE +
                        datetime.timedelta(days=9))
Ejemplo n.º 8
0
    def test_set_task_dates(self):
        """
        Test that tasks properly take start/end dates from DAGs
        """
        dag = DAG('dag', start_date=DEFAULT_DATE,
                  end_date=DEFAULT_DATE + datetime.timedelta(days=10))

        op1 = DummyOperator(task_id='op_1', owner='test')

        self.assertTrue(op1.start_date is None and op1.end_date is None)

        # dag should assign its dates to op1 because op1 has no dates
        dag.add_task(op1)
        self.assertTrue(
            op1.start_date == dag.start_date and op1.end_date == dag.end_date)

        op2 = DummyOperator(
            task_id='op_2',
            owner='test',
            start_date=DEFAULT_DATE - datetime.timedelta(days=1),
            end_date=DEFAULT_DATE + datetime.timedelta(days=11))

        # dag should assign its dates to op2 because they are more restrictive
        dag.add_task(op2)
        self.assertTrue(
            op2.start_date == dag.start_date and op2.end_date == dag.end_date)

        op3 = DummyOperator(
            task_id='op_3',
            owner='test',
            start_date=DEFAULT_DATE + datetime.timedelta(days=1),
            end_date=DEFAULT_DATE + datetime.timedelta(days=9))
        # op3 should keep its dates because they are more restrictive
        dag.add_task(op3)
        self.assertTrue(
            op3.start_date == DEFAULT_DATE + datetime.timedelta(days=1))
        self.assertTrue(
            op3.end_date == DEFAULT_DATE + datetime.timedelta(days=9))
Ejemplo n.º 9
0
from airflow.models import DAG
from datetime import datetime

default_args = {
    'owner': 'max',
    'start_date': datetime(2014, 11, 1),
}

dag = DAG(dag_id='example_1')
# dag = DAG(dag_id='example_1', executor=SequentialExecutor())

cmd = 'ls -l'
run_this_last = DummyOperator(
    task_id='run_this_last',
    default_args=default_args)
dag.add_task(run_this_last)

run_this = BashOperator(
    task_id='run_after_loop', bash_command='echo 1',
    default_args=default_args)
dag.add_task(run_this)
run_this.set_downstream(run_this_last)
for i in range(9):
    i = str(i)
    task = BashOperator(
        task_id='runme_'+i,
        bash_command='sleep 5',
        default_args=default_args)
    task.set_downstream(run_this)
    dag.add_task(task)
Ejemplo n.º 10
0
from airflow.operators import BashOperator, MySqlOperator
from airflow.models import DAG
from datetime import datetime

default_args = {
    'owner': 'max',
    'start_date': datetime(2014, 9, 1),
    'mysql_dbid': 'local_mysql',
}

dag = DAG(dag_id='example_3')

run_this = BashOperator(task_id='also_run_this',
                        bash_command='ls -l',
                        **default_args)
dag.add_task(run_this)

for i in range(5):
    i = str(i)
    task = BashOperator(task_id='runme_' + i,
                        bash_command='sleep {{ 10 + macros.random() * 10 }}',
                        **default_args)
    task.set_upstream(run_this)
    dag.add_task(task)
Ejemplo n.º 11
0
from airflow.models import DAG
from datetime import datetime

default_args = {
    'owner': 'mistercrunch',
    'start_date': datetime(2014, 10, 1),
    'depends_on_past': True,
}

dag = DAG(dag_id='example_2')

cmd = 'ls -l'
run_this_last = BashOperator(task_id='run_this_last',
                             bash_command='echo 1',
                             **default_args)
dag.add_task(run_this_last)

run_this = BashOperator(task_id='run_this',
                        bash_command='echo 1',
                        **default_args)
dag.add_task(run_this)
run_this.set_downstream(run_this_last)

for i in range(10):
    i = str(i)
    task = BashOperator(task_id='runme_' + i,
                        bash_command='sleep 10',
                        default_args=default_args)
    task.set_downstream(run_this)
    dag.add_task(task)
Ejemplo n.º 12
0
from airflow.operators import BashOperator, MySqlOperator
from airflow.models import DAG
from datetime import datetime

default_args = {
    'owner': 'max',
    'start_date': datetime(2014, 9, 1),
    'mysql_dbid': 'local_mysql',
}

dag = DAG(dag_id='example_3')

run_this = BashOperator(
        task_id='also_run_this', bash_command='ls -l', **default_args)
dag.add_task(run_this)

for i in range(5):
    i = str(i)
    task = BashOperator(
            task_id='runme_'+i, 
            bash_command='sleep {{ 10 + macros.random() * 10 }}', 
            **default_args)
    task.set_upstream(run_this)
    dag.add_task(task)