def build(self, dag): """ Builds adapter jar operators. :param dag: The DAG to which all relevant "adapter" operators should be added :type dag: airflow.models.DAG :return: The adapter_jar operator :rtype: presidio.operators.fixed_duration_jar_operator.FixedDurationJarOperator """ self.log.debug("populating the %s dag with adapter tasks", dag.dag_id) task_sensor_service = TaskSensorService() adapter_operator = AdapterOperator( fixed_duration_strategy=timedelta(hours=1), command=PresidioDagBuilder.presidio_command, schema=self.schema, dag=dag) task_sensor_service.add_task_sequential_sensor(adapter_operator) # 60 * 60 * 24 * 7 -> 1 week hour_is_ready_sensor = HourIsReadySensorOperatorBuilder( self.schema, timeout=60 * 60 * 24 * 7, time_to_sleep_in_seconds=60).build(dag) return hour_is_ready_sensor >> adapter_operator
def __init__(self, builder, dag, add_sequential_sensor, short_circuit_operator, *args, **kwargs): super(MultiPointGroupConnector, self).__init__(dag=dag, *args, **kwargs) old_tasks = dag.tasks builder.build(dag) new_tasks = [item for item in dag.tasks if item not in old_tasks] self._first_tasks = [ task for task in new_tasks if not task.upstream_list and not isinstance(task, MultiPointGroupConnector) ] self._last_tasks = [ task for task in new_tasks if not task.downstream_list and not isinstance(task, MultiPointGroupConnector) ] task_sensor_service = TaskSensorService() if add_sequential_sensor: self._first_tasks = self.add_sensor(new_tasks, task_sensor_service) if short_circuit_operator: self._first_tasks = self.add_short_circuit(short_circuit_operator, self._first_tasks, task_sensor_service)
def _build_output_operator(self, smart_record_conf_name, entity_type, dag, smart_operator): self.log.debug("populating the %s dag with output tasks", dag.dag_id) # build hourly output processor task_sensor_service = TaskSensorService() # This operator validates that output run in intervals that are no less than hourly intervals and that the dag # start only after the defined gap. output_short_circuit_operator = self._create_infinite_retry_short_circuit_operator( task_id='output_short_circuit', dag=dag, python_callable=lambda **kwargs: is_execution_date_valid( kwargs['execution_date'], FIX_DURATION_STRATEGY_HOURLY, dag. schedule_interval) & PresidioDagBuilder. validate_the_gap_between_dag_start_date_and_current_execution_date( dag, self._min_gap_from_dag_start_date_to_start_scoring, kwargs['execution_date'], dag.schedule_interval)) hourly_output_operator = OutputOperator( fixed_duration_strategy=timedelta(hours=1), command=PresidioDagBuilder.presidio_command, smart_record_conf_name=smart_record_conf_name, entity_type=entity_type, dag=dag, ) task_sensor_service.add_task_sequential_sensor(hourly_output_operator) task_sensor_service.add_task_short_circuit( hourly_output_operator, output_short_circuit_operator) # build entity score entity_score_operator = EntityScoreOperatorBuilder( smart_record_conf_name, entity_type).build(dag) # Create daily short circuit operator to wire the output processing and the entity score recalculation daily_short_circuit_operator = self._create_infinite_retry_short_circuit_operator( task_id='output_daily_short_circuit', dag=dag, python_callable=lambda **kwargs: is_execution_date_valid( kwargs['execution_date'], FIX_DURATION_STRATEGY_DAILY, dag. schedule_interval) & PresidioDagBuilder. validate_the_gap_between_dag_start_date_and_current_execution_date( dag, EntityScoreOperatorBuilder. get_min_gap_from_dag_start_date_to_start_modeling( PresidioDagBuilder.conf_reader), kwargs[ 'execution_date'], dag.schedule_interval)) daily_short_circuit_operator >> entity_score_operator self._push_forwarding(hourly_output_operator, daily_short_circuit_operator, dag, entity_type) smart_operator >> output_short_circuit_operator return entity_score_operator
def _create_sub_dag_operator(self, sub_dag_builder, sub_dag_id, dag, short_circuit_operator, add_sequential_sensor): """ create a sub dag of the received "dag" fill it with a flow using the sub_dag_builder and wrap it with a sub dag operator. wire short_circuit_operator and add_sequential_sensor. :param sub_dag_builder: sub_dag_builder :param sub_dag_id: sub_dag_id :param dag: dag :return: SubDagOperator """ sub_dag = DAG(dag_id='{}.{}'.format(dag.dag_id, sub_dag_id), schedule_interval=dag.schedule_interval, start_date=dag.start_date, default_args=dag.default_args) retry_args = self._calc_retry_args(sub_dag_id) sub_dag = SubDagOperator( subdag=sub_dag_builder.build(sub_dag), task_id=sub_dag_id, dag=dag, retries=retry_args['retries'], retry_delay=timedelta(seconds=int(retry_args['retry_delay'])), retry_exponential_backoff=retry_args['retry_exponential_backoff'], max_retry_delay=timedelta( seconds=int(retry_args['max_retry_delay']))) task_sensor_service = TaskSensorService() if add_sequential_sensor: task_sensor_service.add_task_sequential_sensor(sub_dag) if short_circuit_operator: task_sensor_service.add_task_short_circuit(sub_dag, short_circuit_operator) return sub_dag
def _build_smart(self, root_dag_gap_sensor_operator, smart_dag, smart_record_conf_name): task_sensor_service = TaskSensorService() smart_short_circuit_operator = self._create_infinite_retry_short_circuit_operator( task_id='ade_scoring_hourly_short_circuit', dag=smart_dag, python_callable=lambda **kwargs: is_execution_date_valid( kwargs['execution_date'], FIX_DURATION_STRATEGY_HOURLY, smart_dag.schedule_interval) & PresidioDagBuilder. validate_the_gap_between_dag_start_date_and_current_execution_date( smart_dag, self._min_gap_from_dag_start_date_to_start_scoring, kwargs['execution_date'], smart_dag.schedule_interval)) smart_operator = SmartEventsOperator( command=SmartEventsOperator.liors_special_run_command, fixed_duration_strategy=FIX_DURATION_STRATEGY_HOURLY, smart_events_conf=smart_record_conf_name, dag=smart_dag, ) task_sensor_service.add_task_sequential_sensor(smart_operator) task_sensor_service.add_task_short_circuit( smart_operator, smart_short_circuit_operator) root_dag_gap_sensor_operator >> smart_short_circuit_operator smart_model_dag_id = SmartModelDagFactory.get_dag_id( smart_record_conf_name) python_callable = lambda context, dag_run_obj: dag_run_obj if is_execution_date_valid( context['execution_date'], FIX_DURATION_STRATEGY_DAILY, smart_dag. schedule_interval) else None smart_model_trigger = self._create_expanded_trigger_dag_run_operator( "smart_model_trigger", smart_model_dag_id, smart_dag, python_callable) set_schedule_interval(smart_model_dag_id, FIX_DURATION_STRATEGY_DAILY) smart_operator >> smart_model_trigger return smart_operator
'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2015, 6, 1), 'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=5), # 'queue': 'bash_queue', # 'pool': 'backfill', # 'priority_weight': 10, # 'end_date': datetime(2016, 1, 1), } dag = DAG('sensor_example', default_args=default_args) taskSensorService = TaskSensorService() # t1, t2 and t3 are examples of tasks created by instantiating operators t1 = BashOperator(task_id='print_date', bash_command='date', dag=dag) taskSensorService.add_task_sequential_sensor(t1) t2 = BashOperator(task_id='sleep', bash_command='sleep 5', retries=3, dag=dag) taskSensorService.add_task_sequential_sensor(t2) templated_command = """ {% for i in range(5) %} echo "{{ ds }}" echo "{{ macros.ds_add(ds, 7)}}" echo "{{ params.my_param }}" {% endfor %} """
def build(self, dag): """ Receives an indicator DAG, creates the adapter, input and scoring operators, links them to the DAG and configures the dependencies between them. :param dag: The indicator DAG to populate :type dag: airflow.models.DAG :return: The given indicator DAG, after it has been populated :rtype: airflow.models.DAG """ self.log.debug("populating the %s dag with input tasks", dag.dag_id) schema = dag.default_args.get('schema') adapter_operator = AdapterOperatorBuilder(schema).build(dag) input_task_sensor_service = TaskSensorService() input_operator = InputOperator( fixed_duration_strategy=timedelta(hours=1), command=PresidioDagBuilder.presidio_command, schema=schema, dag=dag) input_task_sensor_service.add_task_sequential_sensor(input_operator) self.log.debug("populating the %s dag with scoring tasks", dag.dag_id) scoring_task_sensor_service = TaskSensorService() feature_aggregations_operator = FeatureAggregationsOperator( fixed_duration_strategy=FIX_DURATION_STRATEGY_HOURLY, command=PresidioDagBuilder.presidio_command, data_source=schema, dag=dag) scoring_task_sensor_service.add_task_sequential_sensor( feature_aggregations_operator) score_aggregations_operator = ScoreAggregationsOperator( fixed_duration_strategy=FIX_DURATION_STRATEGY_HOURLY, command=PresidioDagBuilder.presidio_command, data_source=schema, dag=dag) scoring_task_sensor_service.add_task_sequential_sensor( score_aggregations_operator) hourly_short_circuit_operator = self._create_infinite_retry_short_circuit_operator( task_id='ade_scoring_hourly_short_circuit', dag=dag, python_callable=lambda **kwargs: is_execution_date_valid( kwargs['execution_date'], FIX_DURATION_STRATEGY_HOURLY, get_schedule_interval(dag)) & PresidioDagBuilder. validate_the_gap_between_dag_start_date_and_current_execution_date( dag, self._min_gap_from_dag_start_date_to_start_scoring, kwargs['execution_date'], get_schedule_interval(dag))) if schema in InputPreProcessingDagFactory.get_registered_schemas(): input_pre_processing_trigger = self._build_input_pre_processing_trigger_operator( dag, schema) input_pre_processing_gap_sensor = DagIntervalGapSequentialSensorOperator( dag=dag, task_id='input_pre_processing_gap_sensor_{0}'.format(schema), dag_ids=[InputPreProcessingDagFactory.get_dag_id(schema)], interval=timedelta(hours=1), start_time=dag.start_date, fixed_duration_strategy=FIX_DURATION_STRATEGY_DAILY, poke_interval=5) input_pre_processing_gap_sensor >> input_operator >> input_pre_processing_trigger adapter_operator >> input_operator >> hourly_short_circuit_operator scoring_task_sensor_service.add_task_short_circuit( feature_aggregations_operator, hourly_short_circuit_operator) scoring_task_sensor_service.add_task_short_circuit( score_aggregations_operator, hourly_short_circuit_operator) model_trigger = self._build_model_trigger_operator(dag, schema) input_operator >> model_trigger return dag
def test_task_sensor_service(): return TaskSensorService()