def run_project(project_root_path): af.set_project_config_file(project_root_path + '/project.yaml') # Config command line job, we set platform to local and engine to cmd_line here cmd_job_config = af.BaseJobConfig(platform=LocalPlatform.platform(), engine=CMDEngine().engine()) with af.config(cmd_job_config): # Command line job executor cmd_job = af.user_define_operation(executor=CmdExecutor(cmd_line="echo Start AI flow")) # Config python job, we set platform to local and engine to python here python_job_config = af.BaseJobConfig(platform=LocalPlatform.platform(), engine=PythonEngine.engine()) # Set execution mode of this python job to BATCH, # which indicates jobs with this config is running in the form of batch. python_job_config.exec_mode = af.ExecutionMode.BATCH with af.config(python_job_config): # Path of Source data(under '..../simple_transform_airflow' dir) source_path = os.path.dirname(os.path.abspath(__file__)) + '/source_data.csv' # Path of Sink data sink_path = os.path.dirname(os.path.abspath(__file__)) + '/sink_data.csv' # To make the project replaceable, we register the example in metadata service read_example_meta = af.register_example(name='read_example', support_type=ExampleSupportType.EXAMPLE_BATCH, data_format='csv', data_type='pandas', batch_uri=source_path) # Read training example using af.read_example() # example_info is the meta information of the example read_example_channel = af.read_example(example_info=read_example_meta, exec_args=ExecuteArgs( batch_properties=Args(header=None, names=["a", "b", "c"]))) # Transform examples using af.transform() transform_channel = af.transform(input_data_list=[read_example_channel], executor=PythonObjectExecutor(python_object=SimpleTransform())) write_example_meta = af.register_example(name='write_example', support_type=ExampleSupportType.EXAMPLE_BATCH, data_format='csv', data_type='pandas', batch_uri=sink_path) # Write example to specific path write = af.write_example(input_data=transform_channel, example_info=write_example_meta, exec_args=ExecuteArgs(batch_properties=Args(sep=',', header=False, index=False))) # Add control dependency, which means read_example job will start right after command line job finishes. af.stop_before_control_dependency(read_example_channel, cmd_job) transform_dag = 'simple_transform' af.deploy_to_airflow(project_root_path, dag_id=transform_dag) context = af.run(project_path=project_root_path, dag_id=transform_dag, scheduler_type=SchedulerType.AIRFLOW)
def __init__(self): super().__init__(platform=LocalPlatform.platform(), engine=CMDEngine.engine())
def __init__(self): super().__init__(engine=CMDEngine.engine())