def test_set_dag(self): """ Test assigning Operators to Dags, including deferred assignment """ dag = DAG('dag', start_date=DEFAULT_DATE) dag2 = DAG('dag2', start_date=DEFAULT_DATE) op = DummyOperator(task_id='op_1', owner='test') # no dag assigned self.assertFalse(op.has_dag()) self.assertRaises(AirflowException, getattr, op, 'dag') # no improper assignment with self.assertRaises(TypeError): op.dag = 1 op.dag = dag # no reassignment with self.assertRaises(AirflowException): op.dag = dag2 # but assigning the same dag is ok op.dag = dag self.assertIs(op.dag, dag) self.assertIn(op, dag.tasks)
def test_infer_dag(self): dag = DAG('dag', start_date=DEFAULT_DATE) dag2 = DAG('dag2', start_date=DEFAULT_DATE) op1 = DummyOperator(task_id='test_op_1', owner='test') op2 = DummyOperator(task_id='test_op_2', owner='test') op3 = DummyOperator(task_id='test_op_3', owner='test', dag=dag) op4 = DummyOperator(task_id='test_op_4', owner='test', dag=dag2) # double check dags self.assertEqual( [i.has_dag() for i in [op1, op2, op3, op4]], [False, False, True, True]) # can't combine operators with no dags self.assertRaises(AirflowException, op1.set_downstream, op2) # op2 should infer dag from op1 op1.dag = dag op1.set_downstream(op2) self.assertIs(op2.dag, dag) # can't assign across multiple DAGs self.assertRaises(AirflowException, op1.set_downstream, op4) self.assertRaises(AirflowException, op1.set_downstream, [op3, op4])
def test_dag_as_context_manager(self): """ Test DAG as a context manager. When used as a context manager, Operators are automatically added to the DAG (unless they specify a different DAG) """ dag = DAG( 'dag', start_date=DEFAULT_DATE, default_args={'owner': 'owner1'}) dag2 = DAG( 'dag2', start_date=DEFAULT_DATE, default_args={'owner': 'owner2'}) with dag: op1 = DummyOperator(task_id='op1') op2 = DummyOperator(task_id='op2', dag=dag2) self.assertIs(op1.dag, dag) self.assertEqual(op1.owner, 'owner1') self.assertIs(op2.dag, dag2) self.assertEqual(op2.owner, 'owner2') with dag2: op3 = DummyOperator(task_id='op3') self.assertIs(op3.dag, dag2) self.assertEqual(op3.owner, 'owner2') with dag: with dag2: op4 = DummyOperator(task_id='op4') op5 = DummyOperator(task_id='op5') self.assertIs(op4.dag, dag2) self.assertIs(op5.dag, dag) self.assertEqual(op4.owner, 'owner2') self.assertEqual(op5.owner, 'owner1') with DAG('creating_dag_in_cm', start_date=DEFAULT_DATE) as dag: DummyOperator(task_id='op6') self.assertEqual(dag.dag_id, 'creating_dag_in_cm') self.assertEqual(dag.tasks[0].task_id, 'op6') with dag: with dag: op7 = DummyOperator(task_id='op7') op8 = DummyOperator(task_id='op8') op9 = DummyOperator(task_id='op8') op9.dag = dag2 self.assertEqual(op7.dag, dag) self.assertEqual(op8.dag, dag) self.assertEqual(op9.dag, dag2)
cluster_name="ephemeral-spark-cluster-{{ds_nodash}}", master_machine_type="n1-standard-1", master_disk_size=50, worker_machine_type="n1-standard-1", worker_disk_size=50, num_workers=2, region="us-west1", zone="us-west1-a", image_version='1.4') pyspark = DataProcPySparkOperator( task_id="run_pyspark", main=route, cluster_name="ephemeral-spark-cluster-{{ds_nodash}}", region="us-west1") borrar_cluster = DataprocClusterDeleteOperator( task_id="borrar_cluster", cluster_name="ephemeral-spark-cluster-{{ds_nodash}}", region="us-west1", trigger_rule=TriggerRule.ALL_DONE) dummy_final = DummyOperator(task_id="prueba_final") delay = PythonOperator(task_id="delay1", python_callable=my_function, op_args=[200]) dummy.dag = dag dummy >> descomprimir >> delay >> crear_cluster >> pyspark >> borrar_cluster >> dummy_final