def test_should_respond_200_with_tilde_and_access_to_all_dags(self): dag_id_1 = 'test-dag-id-1' task_id_1 = 'test-task-id-1' execution_date = '2005-04-02T00:00:00+00:00' execution_date_parsed = parse_execution_date(execution_date) dag_run_id_1 = DR.generate_run_id(DagRunType.MANUAL, execution_date_parsed) self._create_xcom_entries(dag_id_1, dag_run_id_1, execution_date_parsed, task_id_1) dag_id_2 = 'test-dag-id-2' task_id_2 = 'test-task-id-2' dag_run_id_2 = DR.generate_run_id(DagRunType.MANUAL, execution_date_parsed) self._create_xcom_entries(dag_id_2, dag_run_id_2, execution_date_parsed, task_id_2) self._create_invalid_xcom_entries(execution_date_parsed) response = self.client.get( "/api/v1/dags/~/dagRuns/~/taskInstances/~/xcomEntries", environ_overrides={'REMOTE_USER': "******"}, ) self.assertEqual(200, response.status_code) response_data = response.json for xcom_entry in response_data['xcom_entries']: xcom_entry['timestamp'] = "TIMESTAMP" self.assertEqual( response.json, { 'xcom_entries': [ { 'dag_id': dag_id_1, 'execution_date': execution_date, 'key': 'test-xcom-key-1', 'task_id': task_id_1, 'timestamp': "TIMESTAMP", }, { 'dag_id': dag_id_1, 'execution_date': execution_date, 'key': 'test-xcom-key-2', 'task_id': task_id_1, 'timestamp': "TIMESTAMP", }, { 'dag_id': dag_id_2, 'execution_date': execution_date, 'key': 'test-xcom-key-1', 'task_id': task_id_2, 'timestamp': "TIMESTAMP", }, { 'dag_id': dag_id_2, 'execution_date': execution_date, 'key': 'test-xcom-key-2', 'task_id': task_id_2, 'timestamp': "TIMESTAMP", }, ], 'total_entries': 4, }, )
def test_should_response_200(self): dag_id = 'test-dag-id' task_id = 'test-task-id' execution_date = '2005-04-02T00:00:00+00:00' xcom_key = 'test-xcom-key' execution_date_parsed = parse_execution_date(execution_date) dag_run_id = DR.generate_run_id(DagRunType.MANUAL, execution_date_parsed) self._create_xcom_entry(dag_id, dag_run_id, execution_date_parsed, task_id, xcom_key) response = self.client.get( f"/api/v1/dags/{dag_id}/dagRuns/{dag_run_id}/taskInstances/{task_id}/xcomEntries/{xcom_key}", environ_overrides={'REMOTE_USER': "******"}, ) self.assertEqual(200, response.status_code) current_data = response.json current_data['timestamp'] = 'TIMESTAMP' self.assertEqual( current_data, { 'dag_id': dag_id, 'execution_date': execution_date, 'key': xcom_key, 'task_id': task_id, 'timestamp': 'TIMESTAMP', }, )
def test_echo_env_variables(self): """ Test that env variables are exported correctly to the task bash environment. """ now = datetime.utcnow() now = now.replace(tzinfo=timezone.utc) dag = DAG( dag_id='bash_op_test', default_args={ 'owner': 'airflow', 'retries': 100, 'start_date': DEFAULT_DATE }, schedule_interval='@daily', dagrun_timeout=timedelta(minutes=60), ) dag.create_dagrun( run_type=DagRunType.MANUAL, execution_date=DEFAULT_DATE, start_date=now, state=State.RUNNING, external_trigger=False, ) with NamedTemporaryFile() as tmp_file: task = BashOperator( task_id='echo_env_vars', dag=dag, bash_command='echo $AIRFLOW_HOME>> {0};' 'echo $PYTHONPATH>> {0};' 'echo $AIRFLOW_CTX_DAG_ID >> {0};' 'echo $AIRFLOW_CTX_TASK_ID>> {0};' 'echo $AIRFLOW_CTX_EXECUTION_DATE>> {0};' 'echo $AIRFLOW_CTX_DAG_RUN_ID>> {0};'.format(tmp_file.name), ) with mock.patch.dict( 'os.environ', { 'AIRFLOW_HOME': 'MY_PATH_TO_AIRFLOW_HOME', 'PYTHONPATH': 'AWESOME_PYTHONPATH' }): task.run(DEFAULT_DATE, DEFAULT_DATE, ignore_first_depends_on_past=True, ignore_ti_state=True) with open(tmp_file.name) as file: output = ''.join(file.readlines()) self.assertIn('MY_PATH_TO_AIRFLOW_HOME', output) # exported in run-tests as part of PYTHONPATH self.assertIn('AWESOME_PYTHONPATH', output) self.assertIn('bash_op_test', output) self.assertIn('echo_env_vars', output) self.assertIn(DEFAULT_DATE.isoformat(), output) self.assertIn( DagRun.generate_run_id(DagRunType.MANUAL, DEFAULT_DATE), output)
def execute(self, context: t.Dict, session=None): context.update(self.op_kwargs) self.op_kwargs = determine_kwargs(self.python_callable, self.op_args, context) created_dr_ids = [] for conf in self.python_callable(*self.op_args, **self.op_kwargs): if not conf: break execution_date = timezone.utcnow() run_id = DagRun.generate_run_id(DagRunType.MANUAL, execution_date) dag_run = trigger_dag( dag_id=self.trigger_dag_id, run_id=run_id, conf=conf, execution_date=execution_date, replace_microseconds=False, ) created_dr_ids.append(dag_run.id) self.log.info("Created DagRun %s, %s - %s", dag_run, self.trigger_dag_id, run_id) if created_dr_ids: context['ti'].xcom_push(self.CREATED_DAGRUN_KEY, created_dr_ids) else: self.log.info("No DagRuns created")
def test_should_response_200(self, session): dag_id = 'test-dag-id' task_id = 'test-task-id' execution_date = '2005-04-02T00:00:00+00:00' xcom_key = 'test-xcom-key' execution_date_parsed = parse_execution_date(execution_date) xcom_model = XCom(key=xcom_key, execution_date=execution_date_parsed, task_id=task_id, dag_id=dag_id, timestamp=execution_date_parsed) dag_run_id = DR.generate_run_id(DagRunType.MANUAL, execution_date_parsed) dagrun = DR(dag_id=dag_id, run_id=dag_run_id, execution_date=execution_date_parsed, start_date=execution_date_parsed, run_type=DagRunType.MANUAL.value) session.add(xcom_model) session.add(dagrun) session.commit() response = self.client.get( f"/api/v1/dags/{dag_id}/dagRuns/{dag_run_id}/taskInstances/{task_id}/xcomEntries/{xcom_key}" ) self.assertEqual(200, response.status_code) self.assertEqual( response.json, { 'dag_id': dag_id, 'execution_date': execution_date, 'key': xcom_key, 'task_id': task_id, 'timestamp': execution_date })
def setUp(self): super().setUp() self.dag_id = 'test-dag-id' self.task_id = 'test-task-id' self.execution_date = '2005-04-02T00:00:00+00:00' self.execution_date_parsed = parse_execution_date(self.execution_date) self.dag_run_id = DR.generate_run_id(DagRunType.MANUAL, self.execution_date_parsed)
def execute(self, context: Dict): if isinstance(self.execution_date, datetime.datetime): execution_date = self.execution_date elif isinstance(self.execution_date, str): execution_date = timezone.parse(self.execution_date) self.execution_date = execution_date else: execution_date = timezone.utcnow() run_id = DagRun.generate_run_id(DagRunType.MANUAL, execution_date) try: # Ignore MyPy type for self.execution_date # because it doesn't pick up the timezone.parse() for strings dag_run = trigger_dag( dag_id=self.trigger_dag_id, run_id=run_id, conf=self.conf, execution_date=self.execution_date, replace_microseconds=False, ) except DagRunAlreadyExists as e: if self.reset_dag_run: self.log.info("Clearing %s on %s", self.trigger_dag_id, self.execution_date) # Get target dag object and call clear() dag_model = DagModel.get_current(self.trigger_dag_id) if dag_model is None: raise DagNotFound(f"Dag id {self.trigger_dag_id} not found in DagModel") dag_bag = DagBag(dag_folder=dag_model.fileloc, read_dags_from_db=True) dag = dag_bag.get_dag(self.trigger_dag_id) dag.clear(start_date=self.execution_date, end_date=self.execution_date) dag_run = DagRun.find(dag_id=dag.dag_id, run_id=run_id)[0] else: raise e if self.wait_for_completion: # wait for dag to complete while True: self.log.info( 'Waiting for %s on %s to become allowed state %s ...', self.trigger_dag_id, dag_run.execution_date, self.allowed_states, ) time.sleep(self.poke_interval) dag_run.refresh_from_db() state = dag_run.state if state in self.failed_states: raise AirflowException(f"{self.trigger_dag_id} failed with failed states {state}") if state in self.allowed_states: self.log.info("%s finished with allowed state %s", self.trigger_dag_id, state) return
def test_trigger_dag(self, mock): test_dag_id = "example_bash_operator" run_id = DagRun.generate_run_id(DagRunType.MANUAL, EXECDATE_NOFRACTIONS) DagBag(include_examples=True) # non existent with self.assertRaises(AirflowException): self.client.trigger_dag(dag_id="blablabla") with freeze_time(EXECDATE): # no execution date, execution date should be set automatically self.client.trigger_dag(dag_id=test_dag_id) mock.assert_called_once_with(run_id=run_id, execution_date=EXECDATE_NOFRACTIONS, state=State.RUNNING, conf=None, external_trigger=True, dag_hash=ANY) mock.reset_mock() # execution date with microseconds cutoff self.client.trigger_dag(dag_id=test_dag_id, execution_date=EXECDATE) mock.assert_called_once_with(run_id=run_id, execution_date=EXECDATE_NOFRACTIONS, state=State.RUNNING, conf=None, external_trigger=True, dag_hash=ANY) mock.reset_mock() # run id custom_run_id = "my_run_id" self.client.trigger_dag(dag_id=test_dag_id, run_id=custom_run_id) mock.assert_called_once_with(run_id=custom_run_id, execution_date=EXECDATE_NOFRACTIONS, state=State.RUNNING, conf=None, external_trigger=True, dag_hash=ANY) mock.reset_mock() # test conf conf = '{"name": "John"}' self.client.trigger_dag(dag_id=test_dag_id, conf=conf) mock.assert_called_once_with(run_id=run_id, execution_date=EXECDATE_NOFRACTIONS, state=State.RUNNING, conf=json.loads(conf), external_trigger=True, dag_hash=ANY) mock.reset_mock()
def test_should_raises_401_unauthenticated(self): dag_id = 'test-dag-id' task_id = 'test-task-id' execution_date = '2005-04-02T00:00:00+00:00' execution_date_parsed = parse_execution_date(execution_date) dag_run_id = DR.generate_run_id(DagRunType.MANUAL, execution_date_parsed) self._create_xcom_entries(dag_id, dag_run_id, execution_date_parsed, task_id) response = self.client.get( f"/api/v1/dags/{dag_id}/dagRuns/{dag_run_id}/taskInstances/{task_id}/xcomEntries" ) assert_401(response)
def test_should_raise_403_forbidden(self): dag_id = 'test-dag-id' task_id = 'test-task-id' execution_date = '2005-04-02T00:00:00+00:00' xcom_key = 'test-xcom-key' execution_date_parsed = parse_execution_date(execution_date) dag_run_id = DR.generate_run_id(DagRunType.MANUAL, execution_date_parsed) self._create_xcom_entry(dag_id, dag_run_id, execution_date_parsed, task_id, xcom_key) response = self.client.get( f"/api/v1/dags/{dag_id}/dagRuns/{dag_run_id}/taskInstances/{task_id}/xcomEntries/{xcom_key}", environ_overrides={'REMOTE_USER': "******"}, ) assert response.status_code == 403
def execute(self, context: Dict): if isinstance(self.execution_date, datetime.datetime): execution_date = self.execution_date elif isinstance(self.execution_date, str): execution_date = timezone.parse(self.execution_date) self.execution_date = execution_date else: execution_date = timezone.utcnow() run_id = DagRun.generate_run_id(DagRunType.MANUAL, execution_date) # Ignore MyPy type for self.execution_date because it doesn't pick up the timezone.parse() for strings trigger_dag( dag_id=self.trigger_dag_id, run_id=run_id, conf=self.conf, execution_date=self.execution_date, replace_microseconds=False, )
def execute(self, context: Dict): if isinstance(self.execution_date, datetime.datetime): execution_date = self.execution_date elif isinstance(self.execution_date, str): execution_date = timezone.parse(self.execution_date) self.execution_date = execution_date else: execution_date = timezone.utcnow() run_id = DagRun.generate_run_id(DagRunType.MANUAL, execution_date) try: # Ignore MyPy type for self.execution_date # because it doesn't pick up the timezone.parse() for strings trigger_dag( dag_id=self.trigger_dag_id, run_id=run_id, conf=self.conf, execution_date=self.execution_date, replace_microseconds=False, ) except DagRunAlreadyExists as e: if self.reset_dag_run: self.log.info("Clearing %s on %s", self.trigger_dag_id, self.execution_date) # Get target dag object and call clear() dag_model = DagModel.get_current(self.trigger_dag_id) if dag_model is None: raise DagNotFound( f"Dag id {self.trigger_dag_id} not found in DagModel") dag_bag = DagBag( dag_folder=dag_model.fileloc, store_serialized_dags=settings.STORE_SERIALIZED_DAGS) dag = dag_bag.get_dag(self.trigger_dag_id) dag.clear(start_date=self.execution_date, end_date=self.execution_date) else: raise e
def test_should_response_200(self): dag_id = 'test-dag-id' task_id = 'test-task-id' execution_date = '2005-04-02T00:00:00+00:00' execution_date_parsed = parse_execution_date(execution_date) dag_run_id = DR.generate_run_id(DagRunType.MANUAL, execution_date_parsed) self._create_xcom_entries(dag_id, dag_run_id, execution_date_parsed, task_id) response = self.client.get( f"/api/v1/dags/{dag_id}/dagRuns/{dag_run_id}/taskInstances/{task_id}/xcomEntries", environ_overrides={'REMOTE_USER': "******"}, ) self.assertEqual(200, response.status_code) response_data = response.json for xcom_entry in response_data['xcom_entries']: xcom_entry['timestamp'] = "TIMESTAMP" self.assertEqual( response.json, { 'xcom_entries': [ { 'dag_id': dag_id, 'execution_date': execution_date, 'key': 'test-xcom-key-1', 'task_id': task_id, 'timestamp': "TIMESTAMP", }, { 'dag_id': dag_id, 'execution_date': execution_date, 'key': 'test-xcom-key-2', 'task_id': task_id, 'timestamp': "TIMESTAMP", }, ], 'total_entries': 2, }, )
def create_context(task): dag = DAG(dag_id="dag") tzinfo = pendulum.timezone("Europe/Amsterdam") execution_date = timezone.datetime(2016, 1, 1, 1, 0, 0, tzinfo=tzinfo) dag_run = DagRun( dag_id=dag.dag_id, execution_date=execution_date, run_id=DagRun.generate_run_id(DagRunType.MANUAL, execution_date), ) task_instance = TaskInstance(task=task) task_instance.dag_run = dag_run task_instance.dag_id = dag.dag_id task_instance.xcom_push = mock.Mock() return { "dag": dag, "run_id": dag_run.run_id, "task": task, "ti": task_instance, "task_instance": task_instance, }
def execute(self, context: Context): if isinstance(self.execution_date, datetime.datetime): parsed_execution_date = self.execution_date elif isinstance(self.execution_date, str): parsed_execution_date = timezone.parse(self.execution_date) else: parsed_execution_date = timezone.utcnow() if self.trigger_run_id: run_id = self.trigger_run_id else: run_id = DagRun.generate_run_id(DagRunType.MANUAL, parsed_execution_date) try: dag_run = trigger_dag( dag_id=self.trigger_dag_id, run_id=run_id, conf=self.conf, execution_date=parsed_execution_date, replace_microseconds=False, ) except DagRunAlreadyExists as e: if self.reset_dag_run: self.log.info("Clearing %s on %s", self.trigger_dag_id, parsed_execution_date) # Get target dag object and call clear() dag_model = DagModel.get_current(self.trigger_dag_id) if dag_model is None: raise DagNotFound( f"Dag id {self.trigger_dag_id} not found in DagModel") dag_bag = DagBag(dag_folder=dag_model.fileloc, read_dags_from_db=True) dag = dag_bag.get_dag(self.trigger_dag_id) dag.clear(start_date=parsed_execution_date, end_date=parsed_execution_date) dag_run = DagRun.find(dag_id=dag.dag_id, run_id=run_id)[0] else: raise e if dag_run is None: raise RuntimeError("The dag_run should be set here!") # Store the execution date from the dag run (either created or found above) to # be used when creating the extra link on the webserver. ti = context['task_instance'] ti.xcom_push(key=XCOM_EXECUTION_DATE_ISO, value=dag_run.execution_date.isoformat()) ti.xcom_push(key=XCOM_RUN_ID, value=dag_run.run_id) if self.wait_for_completion: # wait for dag to complete while True: self.log.info( 'Waiting for %s on %s to become allowed state %s ...', self.trigger_dag_id, dag_run.execution_date, self.allowed_states, ) time.sleep(self.poke_interval) dag_run.refresh_from_db() state = dag_run.state if state in self.failed_states: raise AirflowException( f"{self.trigger_dag_id} failed with failed states {state}" ) if state in self.allowed_states: self.log.info("%s finished with allowed state %s", self.trigger_dag_id, state) return
def _trigger_dag( dag_id: str, dag_bag: DagBag, run_id: Optional[str] = None, conf: Optional[Union[dict, str]] = None, execution_date: Optional[datetime] = None, replace_microseconds: bool = True, ) -> List[Optional[DagRun]]: """Triggers DAG run. :param dag_id: DAG ID :param dag_bag: DAG Bag model :param run_id: ID of the dag_run :param conf: configuration :param execution_date: date of execution :param replace_microseconds: whether microseconds should be zeroed :return: list of triggered dags """ dag = dag_bag.get_dag(dag_id) # prefetch dag if it is stored serialized if dag_id not in dag_bag.dags: raise DagNotFound(f"Dag id {dag_id} not found") execution_date = execution_date if execution_date else timezone.utcnow() if not timezone.is_localized(execution_date): raise ValueError("The execution_date should be localized") if replace_microseconds: execution_date = execution_date.replace(microsecond=0) if dag.default_args and 'start_date' in dag.default_args: min_dag_start_date = dag.default_args["start_date"] if min_dag_start_date and execution_date < min_dag_start_date: raise ValueError( f"The execution_date [{execution_date.isoformat()}] should be >= start_date " f"[{min_dag_start_date.isoformat()}] from DAG's default_args") run_id = run_id or DagRun.generate_run_id(DagRunType.MANUAL, execution_date) dag_run = DagRun.find_duplicate(dag_id=dag_id, execution_date=execution_date, run_id=run_id) if dag_run: raise DagRunAlreadyExists( f"A Dag Run already exists for dag id {dag_id} at {execution_date} with run id {run_id}" ) run_conf = None if conf: run_conf = conf if isinstance(conf, dict) else json.loads(conf) dag_runs = [] dags_to_run = [dag] + dag.subdags for _dag in dags_to_run: dag_run = _dag.create_dagrun( run_id=run_id, execution_date=execution_date, state=State.QUEUED, conf=run_conf, external_trigger=True, dag_hash=dag_bag.dags_hash.get(dag_id), ) dag_runs.append(dag_run) return dag_runs
def _trigger_dag( dag_id: str, dag_bag: DagBag, dag_run: DagModel, run_id: Optional[str], conf: Optional[Union[dict, str]], execution_date: Optional[datetime], replace_microseconds: bool, ) -> List[DagRun]: # pylint: disable=too-many-arguments """Triggers DAG run. :param dag_id: DAG ID :param dag_bag: DAG Bag model :param dag_run: DAG Run model :param run_id: ID of the dag_run :param conf: configuration :param execution_date: date of execution :param replace_microseconds: whether microseconds should be zeroed :return: list of triggered dags """ dag = dag_bag.get_dag(dag_id) # prefetch dag if it is stored serialized if dag_id not in dag_bag.dags: raise DagNotFound("Dag id {} not found".format(dag_id)) execution_date = execution_date if execution_date else timezone.utcnow() if not timezone.is_localized(execution_date): raise ValueError("The execution_date should be localized") if replace_microseconds: execution_date = execution_date.replace(microsecond=0) if dag.default_args and 'start_date' in dag.default_args: min_dag_start_date = dag.default_args["start_date"] if min_dag_start_date and execution_date < min_dag_start_date: raise ValueError( "The execution_date [{0}] should be >= start_date [{1}] from DAG's default_args" .format(execution_date.isoformat(), min_dag_start_date.isoformat())) run_id = run_id or DagRun.generate_run_id(DagRunType.MANUAL, execution_date) dag_run = dag_run.find(dag_id=dag_id, run_id=run_id) if dag_run: raise DagRunAlreadyExists( f"Run id {dag_run.run_id} already exists for dag id {dag_id}") run_conf = None if conf: run_conf = conf if isinstance(conf, dict) else json.loads(conf) triggers = [] dags_to_trigger = [dag] + dag.subdags for _dag in dags_to_trigger: trigger = _dag.create_dagrun( run_id=run_id, execution_date=execution_date, state=State.RUNNING, conf=run_conf, external_trigger=True, ) triggers.append(trigger) return triggers