Exemple #1
0
 def setUp(self):
     self._temp_dir = os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR',
                                     self.get_temp_dir())
     dummy_dag = models.DAG(dag_id='my_component',
                            start_date=datetime.datetime(2019, 1, 1))
     self.checkcache_op = dummy_operator.DummyOperator(
         task_id='my_component.checkcache', dag=dummy_dag)
     self.tfx_python_op = dummy_operator.DummyOperator(
         task_id='my_component.pythonexec', dag=dummy_dag)
     self.noop_sink_op = dummy_operator.DummyOperator(
         task_id='my_component.noop_sink', dag=dummy_dag)
     self.publishexec_op = dummy_operator.DummyOperator(
         task_id='my_component.publishexec', dag=dummy_dag)
     self._logger_config = logging_utils.LoggerConfig()
     self.parent_dag = airflow_pipeline.AirflowPipeline(
         pipeline_name='pipeline_name',
         start_date=datetime.datetime(2018, 1, 1),
         schedule_interval=None,
         pipeline_root='pipeline_root',
         metadata_db_root=self._temp_dir,
         metadata_connection_config=None,
         additional_pipeline_args=None,
         enable_cache=True)
     self.input_dict = {'i': [TfxArtifact('i')]}
     self.output_dict = {'o': [TfxArtifact('o')]}
     self.exec_properties = {'e': 'e'}
     self.driver_options = {'d': 'd'}
Exemple #2
0
 def setUp(self):
     dummy_dag = models.DAG(dag_id='my_component',
                            start_date=datetime.datetime(2019, 1, 1))
     self.checkcache_op = dummy_operator.DummyOperator(
         task_id='my_component.checkcache', dag=dummy_dag)
     self.tfx_python_op = dummy_operator.DummyOperator(
         task_id='my_component.pythonexec', dag=dummy_dag)
     self.tfx_docker_op = dummy_operator.DummyOperator(
         task_id='my_component.dockerexec', dag=dummy_dag)
     self.publishcache_op = dummy_operator.DummyOperator(
         task_id='my_component.publishcache', dag=dummy_dag)
     self.publishexec_op = dummy_operator.DummyOperator(
         task_id='my_component.publishexec', dag=dummy_dag)
     self.parent_dag = airflow_pipeline.AirflowPipeline(
         pipeline_name='pipeline_name',
         start_date=datetime.datetime(2018, 1, 1),
         schedule_interval=None,
         pipeline_root='pipeline_root',
         metadata_db_root='metadata_db_root',
         metadata_connection_config=None,
         additional_pipeline_args=None,
         docker_operator_cfg=None,
         enable_cache=True,
         log_root='log_root')
     self.input_dict = {'i': [TfxType('i')]}
     self.output_dict = {'o': [TfxType('o')]}
     self.exec_properties = {'e': 'e'}
     self.driver_options = {'d': 'd'}
Exemple #3
0
    def run(self, pipeline):
        """Deploys given logical pipeline on Airflow.

    Args:
      pipeline: Logical pipeline containing pipeline args and components.

    Returns:
      An Airflow DAG.
    """

        # Merge airflow-specific configs with pipeline args
        self._config.update(pipeline.pipeline_args)
        airflow_dag = airflow_pipeline.AirflowPipeline(**self._config)

        # For every components in logical pipeline, add in real component.
        for component in pipeline.components:
            airflow_component.Component(
                airflow_dag,
                component_name=component.component_name,
                unique_name=component.unique_name,
                driver=component.driver,
                executor=component.executor,
                input_dict=self._prepare_input_dict(component.input_dict),
                output_dict=self._prepare_output_dict(component.outputs),
                exec_properties=component.exec_properties)

        return airflow_dag
 def setUp(self):
     self.pipeline = airflow_pipeline.AirflowPipeline(
         pipeline_name='pipeline_name',
         start_date=datetime.datetime(2018, 1, 1),
         schedule_interval=None,
         pipeline_root='pipeline_root',
         metadata_db_root='metadata_db_root',
         metadata_connection_config=None,
         additional_pipeline_args=None,
         docker_operator_cfg=None,
         enable_cache=True)
 def setUp(self):
   self._temp_dir = os.environ.get('TEST_UNDECLARED_OUTPUTS_DIR',
                                   self.get_temp_dir())
   self.pipeline = airflow_pipeline.AirflowPipeline(
       pipeline_name='pipeline_name',
       start_date=datetime.datetime(2018, 1, 1),
       schedule_interval=None,
       pipeline_root='pipeline_root',
       metadata_db_root=self._temp_dir,
       metadata_connection_config=None,
       additional_pipeline_args=None,
       enable_cache=True)