def setUp(self): self._temp_dir = os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()) dummy_dag = models.DAG(dag_id='my_component', start_date=datetime.datetime(2019, 1, 1)) self.checkcache_op = dummy_operator.DummyOperator( task_id='my_component.checkcache', dag=dummy_dag) self.tfx_python_op = dummy_operator.DummyOperator( task_id='my_component.pythonexec', dag=dummy_dag) self.noop_sink_op = dummy_operator.DummyOperator( task_id='my_component.noop_sink', dag=dummy_dag) self.publishexec_op = dummy_operator.DummyOperator( task_id='my_component.publishexec', dag=dummy_dag) self._logger_config = logging_utils.LoggerConfig() self.parent_dag = airflow_pipeline.AirflowPipeline( pipeline_name='pipeline_name', start_date=datetime.datetime(2018, 1, 1), schedule_interval=None, pipeline_root='pipeline_root', metadata_db_root=self._temp_dir, metadata_connection_config=None, additional_pipeline_args=None, enable_cache=True) self.input_dict = {'i': [TfxArtifact('i')]} self.output_dict = {'o': [TfxArtifact('o')]} self.exec_properties = {'e': 'e'} self.driver_options = {'d': 'd'}
def setUp(self): dummy_dag = models.DAG(dag_id='my_component', start_date=datetime.datetime(2019, 1, 1)) self.checkcache_op = dummy_operator.DummyOperator( task_id='my_component.checkcache', dag=dummy_dag) self.tfx_python_op = dummy_operator.DummyOperator( task_id='my_component.pythonexec', dag=dummy_dag) self.tfx_docker_op = dummy_operator.DummyOperator( task_id='my_component.dockerexec', dag=dummy_dag) self.publishcache_op = dummy_operator.DummyOperator( task_id='my_component.publishcache', dag=dummy_dag) self.publishexec_op = dummy_operator.DummyOperator( task_id='my_component.publishexec', dag=dummy_dag) self.parent_dag = airflow_pipeline.AirflowPipeline( pipeline_name='pipeline_name', start_date=datetime.datetime(2018, 1, 1), schedule_interval=None, pipeline_root='pipeline_root', metadata_db_root='metadata_db_root', metadata_connection_config=None, additional_pipeline_args=None, docker_operator_cfg=None, enable_cache=True, log_root='log_root') self.input_dict = {'i': [TfxType('i')]} self.output_dict = {'o': [TfxType('o')]} self.exec_properties = {'e': 'e'} self.driver_options = {'d': 'd'}
def run(self, pipeline): """Deploys given logical pipeline on Airflow. Args: pipeline: Logical pipeline containing pipeline args and components. Returns: An Airflow DAG. """ # Merge airflow-specific configs with pipeline args self._config.update(pipeline.pipeline_args) airflow_dag = airflow_pipeline.AirflowPipeline(**self._config) # For every components in logical pipeline, add in real component. for component in pipeline.components: airflow_component.Component( airflow_dag, component_name=component.component_name, unique_name=component.unique_name, driver=component.driver, executor=component.executor, input_dict=self._prepare_input_dict(component.input_dict), output_dict=self._prepare_output_dict(component.outputs), exec_properties=component.exec_properties) return airflow_dag
def setUp(self): self.pipeline = airflow_pipeline.AirflowPipeline( pipeline_name='pipeline_name', start_date=datetime.datetime(2018, 1, 1), schedule_interval=None, pipeline_root='pipeline_root', metadata_db_root='metadata_db_root', metadata_connection_config=None, additional_pipeline_args=None, docker_operator_cfg=None, enable_cache=True)
def setUp(self): self._temp_dir = os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR', self.get_temp_dir()) self.pipeline = airflow_pipeline.AirflowPipeline( pipeline_name='pipeline_name', start_date=datetime.datetime(2018, 1, 1), schedule_interval=None, pipeline_root='pipeline_root', metadata_db_root=self._temp_dir, metadata_connection_config=None, additional_pipeline_args=None, enable_cache=True)