def run_project(project_root_path): af.set_project_config_file(project_root_path + '/project.yaml') # Config command line job, we set platform to local and engine to cmd_line here cmd_job_config = af.BaseJobConfig(platform=LocalPlatform.platform(), engine=CMDEngine().engine()) with af.config(cmd_job_config): # Command line job executor cmd_job = af.user_define_operation(executor=CmdExecutor(cmd_line="echo Start AI flow")) # Config python job, we set platform to local and engine to python here python_job_config = af.BaseJobConfig(platform=LocalPlatform.platform(), engine=PythonEngine.engine()) # Set execution mode of this python job to BATCH, # which indicates jobs with this config is running in the form of batch. python_job_config.exec_mode = af.ExecutionMode.BATCH with af.config(python_job_config): # Path of Source data(under '..../simple_transform_airflow' dir) source_path = os.path.dirname(os.path.abspath(__file__)) + '/source_data.csv' # Path of Sink data sink_path = os.path.dirname(os.path.abspath(__file__)) + '/sink_data.csv' # To make the project replaceable, we register the example in metadata service read_example_meta = af.register_example(name='read_example', support_type=ExampleSupportType.EXAMPLE_BATCH, data_format='csv', data_type='pandas', batch_uri=source_path) # Read training example using af.read_example() # example_info is the meta information of the example read_example_channel = af.read_example(example_info=read_example_meta, exec_args=ExecuteArgs( batch_properties=Args(header=None, names=["a", "b", "c"]))) # Transform examples using af.transform() transform_channel = af.transform(input_data_list=[read_example_channel], executor=PythonObjectExecutor(python_object=SimpleTransform())) write_example_meta = af.register_example(name='write_example', support_type=ExampleSupportType.EXAMPLE_BATCH, data_format='csv', data_type='pandas', batch_uri=sink_path) # Write example to specific path write = af.write_example(input_data=transform_channel, example_info=write_example_meta, exec_args=ExecuteArgs(batch_properties=Args(sep=',', header=False, index=False))) # Add control dependency, which means read_example job will start right after command line job finishes. af.stop_before_control_dependency(read_example_channel, cmd_job) transform_dag = 'simple_transform' af.deploy_to_airflow(project_root_path, dag_id=transform_dag) context = af.run(project_path=project_root_path, dag_id=transform_dag, scheduler_type=SchedulerType.AIRFLOW)
def test_stream_train_component(self): batch_input_example_meta = af.register_example( name='stream_train_example', support_type=ExampleSupportType.EXAMPLE_BOTH) model_meta = af.register_model(model_name='mnist_model', model_type=ModelType.SAVED_MODEL) stream_input_example_meta = af.register_example( name='stream_train_example', support_type=ExampleSupportType.EXAMPLE_BOTH) with af.config( af.BaseJobConfig(platform='local', engine='python', job_name='stream_train')): batch_input_example = af.read_example( example_info=batch_input_example_meta, executor=PythonObjectExecutor( python_object=ReadBatchExample())) batch_train = af.train(input_data_list=[batch_input_example], executor=PythonObjectExecutor( python_object=TrainBatchMnistModel()), model_info=model_meta) stream_input_example = af.read_example( example_info=stream_input_example_meta, executor=PythonObjectExecutor( python_object=ReadStreamExample())) stream_train = af.train(input_data_list=[stream_input_example], executor=PythonObjectExecutor( python_object=TrainStreamMnistModel()), model_info=model_meta) af.stop_before_control_dependency(stream_train, batch_train) workflow_id = af.run(test_util.get_project_path()) res = af.wait_workflow_execution_finished(workflow_id) self.assertEqual(0, res)
def test_batch_train_component_with_an_output(self): input_example_meta = af.register_example( name='batch_train_example', support_type=ExampleSupportType.EXAMPLE_BATCH) model_meta = af.register_model(model_name='mnist_model', model_type=ModelType.SAVED_MODEL) example_meta = af.register_example( name='output_example', support_type=ExampleSupportType.EXAMPLE_BATCH, data_type='numpy', data_format='npz', batch_uri=os.path.abspath( os.path.dirname(__file__) + '/numpy_output.npz')) with af.config( af.BaseJobConfig(platform='local', engine='python', job_name='batch_train')): input_example = af.read_example( example_info=input_example_meta, executor=PythonObjectExecutor( python_object=ReadBatchExample())) train_channel = af.train( input_data_list=[input_example], executor=PythonObjectExecutor( python_object=TrainBatchMnistModelWithOutput()), model_info=model_meta, output_num=1) af.write_example(input_data=train_channel, example_info=example_meta) workflow_id = af.run(test_util.get_project_path()) res = af.wait_workflow_execution_finished(workflow_id) self.assertEqual(0, res)
def test_stream_transform_component(self): file = get_file_dir(__file__) + '/test1.csv' input_example_meta = af.register_example( name='test_example', support_type=ExampleSupportType.EXAMPLE_BOTH, stream_uri=file) output_file = get_file_dir( __file__) + "/output_transform_stream_test1.csv" output_example_meta = af.register_example( name='test_example_output', support_type=ExampleSupportType.EXAMPLE_BOTH, stream_uri=output_file) with af.config( af.BaseJobConfig(platform='local', engine='python', job_name='stream_transform')): input_example = af.read_example( example_info=input_example_meta, executor=PythonObjectExecutor( python_object=ReadStreamExample())) transform_example = af.transform( input_data_list=[input_example], executor=PythonObjectExecutor( python_object=TransformStreamData())) af.write_example(input_data=transform_example, example_info=output_example_meta.name, executor=PythonObjectExecutor( python_object=WriteStreamExample())) workflow_id = af.run(test_util.get_project_path()) res = af.wait_workflow_execution_finished(workflow_id) self.assertEqual(0, res)
def test_context(self): global_config = af.BaseJobConfig(platform='a', engine='b', properties={'c': 'c'}) job_config = af.BaseJobConfig(platform='aa', engine='bb', properties={'cc': 'cc'}) with af.global_config(global_config): with af.config(job_config): af.user_define_operation(executor=None) node_list = list(_default_ai_graph.nodes.values()) self.assertEqual('bb', node_list[0].properties[ENGINE_NAME]) self.assertEqual('cc', node_list[0].config.properties["cc"]) self.assertEqual('c', node_list[0].config.properties["c"]) self.assertEqual('bb', node_list[0].config.engine) self.assertEqual('aa', node_list[0].config.platform)
def test_read_example_with_pandas(self): input_example_meta = af.register_example( name='input_pandas_example', data_type='pandas', data_format='csv', support_type=ExampleSupportType.EXAMPLE_BATCH, batch_uri=os.path.abspath( os.path.dirname(__file__) + '/test1.csv')) output_example_meta = af.register_example( name='ouput_pandas_example', data_type='pandas', data_format='csv', support_type=ExampleSupportType.EXAMPLE_BATCH, batch_uri=os.path.abspath( os.path.dirname(__file__) + '/pandas_output.csv')) with af.config( af.BaseJobConfig(platform='local', engine='python', job_name='test_csv')): example_channel = af.read_example(example_info=input_example_meta) af.write_example(input_data=example_channel, example_info=output_example_meta) workflow_id = af.run(test_util.get_project_path()) res = af.wait_workflow_execution_finished(workflow_id) self.assertEqual(0, res)
def test_read_example_with_numpy_npy(self): npy_name = 'test.npy' np.save(file=npy_name, arr=np.arange(10)) input_example_meta = af.register_example( name='input_numpy_example', data_type='numpy', data_format='npy', support_type=ExampleSupportType.EXAMPLE_BATCH, batch_uri=os.path.abspath( os.path.dirname(__file__) + "/" + npy_name)) output_example_meta = af.register_example( name='ouput_numpy_example', data_type='numpy', data_format='npy', support_type=ExampleSupportType.EXAMPLE_BATCH, batch_uri=os.path.abspath( os.path.dirname(__file__) + '/numpy_output.npy')) with af.config( af.BaseJobConfig(platform='local', engine='python', job_name='test_npy')): example_channel = af.read_example(example_info=input_example_meta) af.write_example(input_data=example_channel, example_info=output_example_meta) workflow_id = af.run(test_util.get_project_path()) res = af.wait_workflow_execution_finished(workflow_id) self.assertEqual(0, res)
def test_user_define_control_dependency(self): print(sys._getframe().f_code.co_name) trigger = af.external_trigger(name='stream_trigger') job_config = af.BaseJobConfig('local', 'cmd_line') job_config.job_name = 'test_cmd' with af.config(job_config): cmd_executor = af.user_define_operation( output_num=0, executor=CmdExecutor( cmd_line="echo 'hello world' && sleep {}".format(1))) af.user_define_control_dependency( src=cmd_executor, dependency=trigger, event_key='key', event_value='value', event_type='name', condition=MetCondition.NECESSARY, action=TaskAction.START, life=EventLife.ONCE, value_condition=MetValueCondition.UPDATE) workflow_id = af.submit_ai_flow() af.get_ai_flow_client().publish_event('key', 'value1', 'name') time.sleep(5) af.get_ai_flow_client().publish_event('key', 'value2', 'name') time.sleep(10) af.stop_execution_by_id(workflow_id) res = af.get_ai_flow_client().list_job(5, 0) self.assertEqual(3, len(res))
def generate_workflow_config(): workflow_config = WorkFlowConfig() workflow_config.add_job_config( config_key="global_config_key", job_config=af.BaseJobConfig( platform="local", engine="python", properties={"common_key": "common_value"})) workflow_config.add_job_config( config_key="test_job", job_config=af.BaseJobConfig( platform=None, engine=None, properties={"job_key": "job_value"})) workflow_config.add_job_config( config_key="test_job_1", job_config=af.BaseJobConfig( platform='kubernetes', engine='flink', properties={"job_key_1": "job_value_1"})) with open(config_file, 'w') as f: f.write(json_utils.dumps(workflow_config))
def test_periodic_job(self): print(sys._getframe().f_code.co_name) periodic_config = PeriodicConfig(periodic_type='interval', args={'seconds': 5}) job_config = af.BaseJobConfig(platform='local', engine='cmd_line') job_config.job_name = 'test_periodic' job_config.periodic_config = periodic_config with af.config(job_config): af.user_define_operation(executor=af.CmdExecutor( cmd_line="echo 'hello world!'")) workflow_id = af.submit_ai_flow() time.sleep(10) af.stop_execution_by_id(workflow_id)
def test_stream_evaluate_component(self): input_example_meta = af.register_example( name='batch_train_example', support_type=ExampleSupportType.EXAMPLE_BATCH) model_meta = af.register_model(model_name='mnist_model', model_type=ModelType.SAVED_MODEL) stream_evaluate_example_meta = af.register_example( name='stream_evaluate_example', support_type=ExampleSupportType.EXAMPLE_STREAM) stream_output_file = get_file_dir(__file__) + '/stream_evaluate' evaluate_output = af.register_artifact(name='stream_evaluate', stream_uri=stream_output_file) stream_evaluate_result_example_meta = af.register_example( name='stream_evaluate_result_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=stream_output_file) if os.path.exists(stream_output_file): os.remove(stream_output_file) with af.config( af.BaseJobConfig(platform='local', engine='python', job_name='stream_evaluate')): input_example = af.read_example( example_info=input_example_meta, executor=PythonObjectExecutor( python_object=ReadBatchExample())) batch_train = af.train(input_data_list=[input_example], executor=PythonObjectExecutor( python_object=TrainBatchMnistModel()), model_info=model_meta) stream_evaluate_example = af.read_example( example_info=stream_evaluate_example_meta, executor=PythonObjectExecutor( python_object=ReadStreamExample())) stream_evaluate = af.evaluate( input_data_list=[stream_evaluate_example], model_info=model_meta, executor=PythonObjectExecutor( python_object=EvaluateStreamMnistModel()), output_num=1) af.write_example(input_data=stream_evaluate, example_info=stream_evaluate_result_example_meta, executor=PythonObjectExecutor( python_object=WriteStreamExample())) af.stop_before_control_dependency(stream_evaluate, batch_train) workflow_id = af.run(test_util.get_project_path()) res = af.wait_workflow_execution_finished(workflow_id) self.assertEqual(0, res)
def test_stream_with_external_trigger_with_model_control(self): print(sys._getframe().f_code.co_name) model_name = 'test_create_model_version' model_desc = 'test create model version' response = af.register_model(model_name=model_name, model_type=af.ModelType.CHECKPOINT, model_desc=model_desc) trigger = af.external_trigger(name='stream_trigger') job_config = af.BaseJobConfig('local', 'cmd_line') job_config.job_name = 'test_cmd' with af.config(job_config): cmd_executor = af.user_define_operation( output_num=0, executor=CmdExecutor( cmd_line="echo 'hello world' && sleep {}".format(1))) af.model_version_control_dependency( src=cmd_executor, dependency=trigger, model_name=model_name, model_version_event_type='MODEL_DEPLOYED') workflow_id = af.submit_ai_flow() model_path1 = 'fs://source1.pkl' model_metric1 = 'http://metric1' model_flavor1 = '{"flavor.version":1}' version_desc1 = 'test create model version1' time.sleep(1) response = af.register_model_version( model=model_name, model_path=model_path1, model_metric=model_metric1, model_flavor=model_flavor1, version_desc=version_desc1, current_stage=af.ModelVersionStage.DEPLOYED) time.sleep(5) response = af.register_model_version( model=model_name, model_path=model_path1, model_metric=model_metric1, model_flavor=model_flavor1, version_desc=version_desc1, current_stage=af.ModelVersionStage.DEPLOYED) time.sleep(10) af.stop_execution_by_id(workflow_id) res = af.get_ai_flow_client().list_job(5, 0) self.assertEqual(3, len(res))
def test_batch_predict_component(self): input_example_meta = af.register_example( name='input_train_example', support_type=ExampleSupportType.EXAMPLE_BOTH) model_meta = af.register_model(model_name='mnist_model', model_type=ModelType.SAVED_MODEL) batch_output_file = get_file_dir(__file__) + '/batch_predict' evaluate_output = af.register_artifact(name='batch_evaluate', batch_uri=batch_output_file) output_example_meta = af.register_example( name='output_result_example', support_type=ExampleSupportType.EXAMPLE_BATCH, data_type='numpy', data_format='txt', batch_uri=batch_output_file) if os.path.exists(batch_output_file): os.remove(batch_output_file) with af.config( af.BaseJobConfig(platform='local', engine='python', job_name='batch_predict')): batch_example = af.read_example( example_info=input_example_meta, executor=PythonObjectExecutor( python_object=ReadBatchExample())) batch_train = af.train(input_data_list=[batch_example], executor=PythonObjectExecutor( python_object=TrainBatchMnistModel()), model_info=model_meta) batch_predict = af.predict( input_data_list=[batch_example], model_info=model_meta, executor=PythonObjectExecutor( python_object=PredictBatchMnistModel()), output_num=1) af.write_example(input_data=batch_predict, example_info=output_example_meta) af.stop_before_control_dependency(batch_predict, batch_train) workflow_id = af.run(test_util.get_project_path()) res = af.wait_workflow_execution_finished(workflow_id) self.assertEqual(0, res)
def test_read_example_with_numpy_npz(self): npy_name = 'test.npz' np.savez(npy_name, np.arange(10), np.sin(np.arange(10))) input_example_meta = af.register_example( name='input_numpy_example', data_type='numpy', data_format='npz', support_type=ExampleSupportType.EXAMPLE_BATCH, batch_uri=os.path.abspath( os.path.dirname(__file__) + "/" + npy_name)) output_example_meta_first = af.register_example( name='ouput_numpy_example_1', data_type='numpy', data_format='npz', support_type=ExampleSupportType.EXAMPLE_BATCH, batch_uri=os.path.abspath( os.path.dirname(__file__) + '/numpy_output_1.npz')) output_example_meta_second = af.register_example( name='ouput_numpy_example_2', data_type='numpy', data_format='npz', support_type=ExampleSupportType.EXAMPLE_BATCH, batch_uri=os.path.abspath( os.path.dirname(__file__) + '/numpy_output_2.npz')) with af.config( af.BaseJobConfig(platform='local', engine='python', job_name='test_npz')): example_channel = af.read_example(example_info=input_example_meta) transform_channel = af.transform( input_data_list=[example_channel], executor=PythonObjectExecutor( python_object=TransformTrainData()), output_num=2) af.write_example(input_data=transform_channel[0], example_info=output_example_meta_first) af.write_example(input_data=transform_channel[1], example_info=output_example_meta_second) workflow_id = af.run(test_util.get_project_path()) res = af.wait_workflow_execution_finished(workflow_id) self.assertEqual(0, res)
def test_batch_train_component(self): input_example_meta = af.register_example( name='batch_train_example', support_type=ExampleSupportType.EXAMPLE_BATCH) model_meta = af.register_model(model_name='mnist_model', model_type=ModelType.SAVED_MODEL) with af.config( af.BaseJobConfig(platform='local', engine='python', job_name='batch_train')): input_example = af.read_example( example_info=input_example_meta, executor=PythonObjectExecutor( python_object=ReadBatchExample())) af.train(input_data_list=[input_example], executor=PythonObjectExecutor( python_object=TrainBatchMnistModel()), model_info=model_meta) workflow_id = af.run(test_util.get_project_path()) res = af.wait_workflow_execution_finished(workflow_id) self.assertEqual(0, res)
def test_batch_model_validate(self): input_example_meta = af.register_example(name='batch_train_example', support_type=ExampleSupportType.EXAMPLE_BOTH) model_meta = af.register_model(model_name='mnist_model', model_type=ModelType.SAVED_MODEL) with af.config(af.BaseJobConfig(platform='local', engine='python', job_name='evaluate')): input_example = af.read_example(example_info=input_example_meta, executor=PythonObjectExecutor(python_object=ReadBatchExample())) batch_train = af.train(input_data_list=[input_example], executor=PythonObjectExecutor(python_object=TrainBatchMnistModel()), model_info=model_meta) model_validate = af.model_validate(input_data_list=[input_example], model_info=model_meta, executor=PythonObjectExecutor(python_object=BatchModelValidate()), output_num=0) af.stop_before_control_dependency(model_validate, batch_train) workflow_id = af.run(test_util.get_project_path()) res = af.wait_workflow_execution_finished(workflow_id) self.assertEqual(0, res)