def test_stream_transform_component(self): file = get_file_dir(__file__) + '/test1.csv' input_example_meta = af.register_example( name='test_example', support_type=ExampleSupportType.EXAMPLE_BOTH, stream_uri=file) output_file = get_file_dir( __file__) + "/output_transform_stream_test1.csv" output_example_meta = af.register_example( name='test_example_output', support_type=ExampleSupportType.EXAMPLE_BOTH, stream_uri=output_file) with af.config( af.BaseJobConfig(platform='local', engine='python', job_name='stream_transform')): input_example = af.read_example( example_info=input_example_meta, executor=PythonObjectExecutor( python_object=ReadStreamExample())) transform_example = af.transform( input_data_list=[input_example], executor=PythonObjectExecutor( python_object=TransformStreamData())) af.write_example(input_data=transform_example, example_info=output_example_meta.name, executor=PythonObjectExecutor( python_object=WriteStreamExample())) workflow_id = af.run(test_util.get_project_path()) res = af.wait_workflow_execution_finished(workflow_id) self.assertEqual(0, res)
def _remote_execute_func(exec_func, write_func, exec_dict, jm, py): func_stdout = '{}/exec_{}_stdout.log'.format(get_file_dir(__file__), exec_func) func_stderr = '{}/exec_{}_stderr.log'.format(get_file_dir(__file__), exec_func) with open(func_stdout, 'a') as out, open(func_stderr, 'a') as err: # execute `flink run -m <remote> -py function.py` to submit batch job submitted_process = Popen( args= "{}/bin/flink run -m {} -py {}/exec_function.py -pyexec {} {} {} '{}'" .format(_find_flink_home(), jm, get_file_dir(__file__), py, exec_func, write_func, json.dumps(exec_dict)), shell=True, stdout=out, stderr=err) submitted_process.wait() # decode execution result from table sink file. execute_result = cloudpickle.loads( codecs.decode( pd.DataFrame( pd.read_csv(write_func))['func'].values[0].encode(), 'base64')) # remove table sink file to clear ineffective files. os.remove(write_func) return execute_result
def _cancel_jobs(self): for job in self._jobs: cancel_stdout = '{}/cancel_{}_stdout.log'.format( get_file_dir(__file__), job) cancel_stderr = '{}/cancel_{}_stderr.log'.format( get_file_dir(__file__), job) with open(cancel_stdout, 'a') as out, open(cancel_stderr, 'a') as err: # execute `flink cancel <jobID>` to cancel batch job Popen(args='{}/bin/flink cancel {}'.format( _find_flink_home(), job), shell=True, stdout=out, stderr=err)
def train(df): # https://scikit-learn.org/stable/auto_examples/linear_model/plot_sparse_logistic_regression_mnist.html clf = LogisticRegression(C=50. / 5000, penalty='l1', solver='saga', tol=0.1) x_train, y_train = df[0], df[1] clf.fit(x_train, y_train) model_path = get_file_dir(__file__) + '/saved_model' if not os.path.exists(model_path): os.makedirs(model_path) model_timestamp = time.strftime("%Y%m%d%H%M%S", time.localtime()) model_path = model_path + '/' + model_timestamp dump(clf, model_path) model = function_context.node_spec.output_model print(model.name) print(model_timestamp) # When registering a model, corresponding type of event will be sent to downstream job as well. af.register_model_version( model=model, model_path=model_path, current_stage=ModelVersionStage.GENERATED) print( af.get_latest_generated_model_version( model_name=model.name).model_path) return df
def apply_async(self, func, callback=None): exec_func = 'func{}{}'.format(round(time.time()), random.randint(0, round(time.time()))) pickle_func = codecs.encode(cloudpickle.dumps(func()), 'base64').decode() write_func = '{}/{}.csv'.format(get_file_dir(__file__), exec_func) if self.mode == 'local': return self._get_pool().apply_async(SafeFunction( self._local_execute_func), args=[ exec_func, write_func, pickle_func, self._python_path, ], callback=callback) else: exec_dict = {exec_func: pickle_func} return self._get_pool().apply_async(SafeFunction( self._remote_execute_func), args=[ exec_func, write_func, exec_dict, self._job_manager, self._python_path, ], callback=callback)
def test_dump_load_configuration(self): config = AIFlowConfiguration() test_yaml = get_file_dir(__file__) + "/test.yaml" config['a'] = 'a' config.dump_to_file(test_yaml) config.clear() config.load_from_file(test_yaml) self.assertEqual('a', config['a'])
def test_project_upload_download_local_2(self): project_path = get_file_dir(__file__) config = {'local_repository': '/tmp', 'remote_repository': '/tmp'} # blob_server.type = local blob_manager = BlobManagerFactory.get_blob_manager(config) uploaded_path = blob_manager.upload_blob('1', project_path) downloaded_path = blob_manager.download_blob('1', uploaded_path) self.assertEqual('/tmp/workflow_1_project/project', downloaded_path)
def test_run_pyflink_job(self): project_path = os.path.dirname(__file__) + '/../' af.set_project_config_file(project_path + "project.yaml") input_file = get_parent_dir( get_file_dir(__file__)) + '/resources/word_count.txt' output_file = get_file_dir(__file__) + "/word_count_output.csv" if os.path.exists(output_file): os.remove(output_file) example_1 = af.create_example( name="example_1", support_type=af.ExampleSupportType.EXAMPLE_BOTH, batch_uri=input_file, stream_uri=input_file, data_format="csv") example_2 = af.create_example( name="example_2", support_type=af.ExampleSupportType.EXAMPLE_BOTH, batch_uri=output_file, stream_uri=output_file, data_format="csv") flink_config = faf.LocalFlinkJobConfig() flink_config.local_mode = 'cluster' flink_config.flink_home = '/Users/chenwuchao/soft/apache/flink-1.11.0/' flink_config.set_table_env_create_func(TableEnvCreator()) with af.config(flink_config): input_example = af.read_example( example_info=example_1, executor=faf.flink_executor.FlinkPythonExecutor( python_object=Source())) processed = af.transform( input_data_list=[input_example], executor=faf.flink_executor.FlinkPythonExecutor( python_object=Transformer())) af.write_example(input_data=processed, example_info=example_2, executor=faf.flink_executor.FlinkPythonExecutor( python_object=Sink())) workflow_id = af.run(project_path) res = af.wait_workflow_execution_finished(workflow_id)
def test_project_upload_download_local(self): project_path = get_file_dir(__file__) project_desc = get_project_description_from(project_path + "/../") # blob_server.type = local blob_manager = BlobManagerFactory.get_blob_manager( project_desc.project_config) uploaded_path = blob_manager.upload_blob('1', project_path) self.assertEqual(uploaded_path, project_path) downloaded_path = blob_manager.download_blob('1', uploaded_path) self.assertEqual(project_path, downloaded_path)
def test_run_pyflink_job(self): input_file = get_parent_dir( get_file_dir(__file__)) + '/resources/word_count.txt' output_file = get_file_dir(__file__) + "/word_count_output.csv" if os.path.exists(output_file): os.remove(output_file) example_1 = af.create_example( name="example_1", support_type=af.ExampleSupportType.EXAMPLE_BOTH, batch_uri=input_file, stream_uri=input_file, data_format="csv") example_2 = af.create_example( name="example_2", support_type=af.ExampleSupportType.EXAMPLE_BOTH, batch_uri=output_file, stream_uri=output_file, data_format="csv") flink_config = faf.LocalFlinkJobConfig() flink_config.local_mode = 'python' flink_config.set_table_env_create_func(TableEnvCreator()) with af.config(flink_config): input_example = af.read_example( example_info=example_1, executor=faf.flink_executor.FlinkPythonExecutor( python_object=Source())) processed = af.transform( input_data_list=[input_example], executor=faf.flink_executor.FlinkPythonExecutor( python_object=Transformer())) af.write_example(input_data=processed, example_info=example_2, executor=faf.flink_executor.FlinkPythonExecutor( python_object=Sink())) workflow_id = af.run(test_util.get_project_path()) res = af.wait_workflow_execution_finished(workflow_id) self.assertEqual(0, res)
def execute(self, function_context: FunctionContext, input_list: List) -> List: # https://scikit-learn.org/stable/auto_examples/linear_model/plot_sparse_logistic_regression_mnist.html clf = LogisticRegression(C=50. / 5000, penalty='l1', solver='saga', tol=0.1) x_train, y_train = input_list[0][0], input_list[0][1] clf.fit(x_train, y_train) model_path = get_file_dir(__file__) + '/saved_model' if not os.path.exists(model_path): os.makedirs(model_path) model_timestamp = time.strftime("%Y%m%d%H%M%S", time.localtime()) model_path = model_path + '/' + model_timestamp dump(clf, model_path) af.register_model_version(model=function_context.node_spec.output_model, model_path=model_path) return []
def test_stream_evaluate_component(self): input_example_meta = af.register_example( name='batch_train_example', support_type=ExampleSupportType.EXAMPLE_BATCH) model_meta = af.register_model(model_name='mnist_model', model_type=ModelType.SAVED_MODEL) stream_evaluate_example_meta = af.register_example( name='stream_evaluate_example', support_type=ExampleSupportType.EXAMPLE_STREAM) stream_output_file = get_file_dir(__file__) + '/stream_evaluate' evaluate_output = af.register_artifact(name='stream_evaluate', stream_uri=stream_output_file) stream_evaluate_result_example_meta = af.register_example( name='stream_evaluate_result_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=stream_output_file) if os.path.exists(stream_output_file): os.remove(stream_output_file) with af.config( af.BaseJobConfig(platform='local', engine='python', job_name='stream_evaluate')): input_example = af.read_example( example_info=input_example_meta, executor=PythonObjectExecutor( python_object=ReadBatchExample())) batch_train = af.train(input_data_list=[input_example], executor=PythonObjectExecutor( python_object=TrainBatchMnistModel()), model_info=model_meta) stream_evaluate_example = af.read_example( example_info=stream_evaluate_example_meta, executor=PythonObjectExecutor( python_object=ReadStreamExample())) stream_evaluate = af.evaluate( input_data_list=[stream_evaluate_example], model_info=model_meta, executor=PythonObjectExecutor( python_object=EvaluateStreamMnistModel()), output_num=1) af.write_example(input_data=stream_evaluate, example_info=stream_evaluate_result_example_meta, executor=PythonObjectExecutor( python_object=WriteStreamExample())) af.stop_before_control_dependency(stream_evaluate, batch_train) workflow_id = af.run(test_util.get_project_path()) res = af.wait_workflow_execution_finished(workflow_id) self.assertEqual(0, res)
def test_context_with_yaml_file(self): config_file = path_util.get_file_dir(__file__) + "/workflow.yaml" with af.global_config_file(config_path=config_file) as g_config: with af.config('task_1') as config_1: self.assertEqual('task_1', config_1.job_name) self.assertEqual('cmd_line', config_1.engine) self.assertEqual('interval', config_1.periodic_config.periodic_type) self.assertEqual(20, config_1.periodic_config.args['seconds']) with af.config('task_2') as config_2: self.assertEqual('task_2', config_2.job_name) self.assertEqual('cmd_line', config_2.engine) self.assertEqual('cron', config_2.periodic_config.periodic_type) self.assertEqual('* * * * *', config_2.periodic_config.args)
def test_load_project_config(self): project_path = get_file_dir(__file__) set_project_config_file(project_path + "/project.yaml") project_desc = get_project_description_from(project_path) self.assertEqual(project_desc.project_config.get_master_uri(), "localhost:50051") self.assertIsNone( project_desc.project_config.get('ai_flow config', None)) self.assertEqual(project_desc.project_config['ai_flow_home'], '/opt/ai_flow') self.assertEqual( project_desc.project_config['ai_flow_job_master.host'], 'localhost') self.assertEqual( project_desc.project_config['ai_flow_job_master.port'], 8081) self.assertEqual(project_desc.project_config['ai_flow_conf'], 'taskmanager.slot=2')
def train(df): # https://scikit-learn.org/stable/auto_examples/linear_model/plot_sparse_logistic_regression_mnist.html clf = LogisticRegression(C=50. / 5000, penalty='l1', solver='saga', tol=0.1) x_train, y_train = df[0], df[1] clf.fit(x_train, y_train) model_path = get_file_dir(__file__) + '/saved_model' if not os.path.exists(model_path): os.makedirs(model_path) model_version = time.strftime("%Y%m%d%H%M%S", time.localtime()) model_path = model_path + '/' + model_version dump(clf, model_path) model = function_context.node_spec.output_model print(model.name) print(model_version) af.register_model_version(model=model, model_path=model_path, current_stage=ModelVersionStage.GENERATED) return df
def test_batch_predict_component(self): input_example_meta = af.register_example( name='input_train_example', support_type=ExampleSupportType.EXAMPLE_BOTH) model_meta = af.register_model(model_name='mnist_model', model_type=ModelType.SAVED_MODEL) batch_output_file = get_file_dir(__file__) + '/batch_predict' evaluate_output = af.register_artifact(name='batch_evaluate', batch_uri=batch_output_file) output_example_meta = af.register_example( name='output_result_example', support_type=ExampleSupportType.EXAMPLE_BATCH, data_type='numpy', data_format='txt', batch_uri=batch_output_file) if os.path.exists(batch_output_file): os.remove(batch_output_file) with af.config( af.BaseJobConfig(platform='local', engine='python', job_name='batch_predict')): batch_example = af.read_example( example_info=input_example_meta, executor=PythonObjectExecutor( python_object=ReadBatchExample())) batch_train = af.train(input_data_list=[batch_example], executor=PythonObjectExecutor( python_object=TrainBatchMnistModel()), model_info=model_meta) batch_predict = af.predict( input_data_list=[batch_example], model_info=model_meta, executor=PythonObjectExecutor( python_object=PredictBatchMnistModel()), output_num=1) af.write_example(input_data=batch_predict, example_info=output_example_meta) af.stop_before_control_dependency(batch_predict, batch_train) workflow_id = af.run(test_util.get_project_path()) res = af.wait_workflow_execution_finished(workflow_id) self.assertEqual(0, res)
def test_project_upload_download_oss(self): project_path = get_file_dir(__file__) config = { 'blob_server.type': 'oss', 'local_repository': '/tmp', 'blob_server.access_key_id': os.environ.get('blob_server.access_key_id'), 'blob_server.access_key_secret': os.environ.get('blob_server.access_key_secret'), 'blob_server.endpoint': os.environ.get('blob_server.endpoint'), 'blob_server.bucket': os.environ.get('blob_server.bucket'), 'blob_server.repo_name': os.environ.get('blob_server.repo_name') } blob_manager = BlobManagerFactory.get_blob_manager(config) uploaded_path = blob_manager.upload_blob('1', project_path) downloaded_path = blob_manager.download_blob('1', uploaded_path) self.assertEqual('/tmp/workflow_1_project/project', downloaded_path)
def execute(self, function_context: FunctionContext, input_list: List) -> List: save_path = self.path new_model_version = self.model_version model_meta: ModelMeta = function_context.node_spec.model serving_model_version = af.get_deployed_model_version(model_name=model_meta.name) if serving_model_version is None: af.update_model_version(model_name=model_meta.name, model_version=new_model_version, current_stage=ModelVersionStage.VALIDATED) print('the first serving model version is ', new_model_version) else: x_test, y_test = input_list[0][0], input_list[0][1] model = tf.keras.models.load_model(save_path) result = model.evaluate(x_test, y_test, verbose=2) base_model = tf.keras.models.load_model(serving_model_version.model_path) result_base = base_model.evaluate(x_test, y_test, verbose=2) model_validate_result = af.register_artifact(name='model_validate', batch_uri=get_file_dir(__file__) + '/model_batch_validate') if function_context.job_context.execution_mode == ExecutionMode.BATCH: file_uri = model_validate_result.batch_uri else: file_uri = model_validate_result.stream_uri with open(file_uri, 'a') as f: f.write(str(result_base) + ' -------> ' + 'previous model version: ' + serving_model_version.version) f.write('\n') f.write(str(result) + ' -------> ' + 'base model version: ' + new_model_version) f.write('\n') if result[1] > result_base[1]: af.update_model_version(model_name=model_meta.name, model_version=serving_model_version.version, current_stage=ModelVersionStage.DEPRECATED) af.update_model_version(model_name=model_meta.name, model_version=new_model_version, current_stage=ModelVersionStage.VALIDATED) print('the serving model version is ', new_model_version) else: print('the serving model version is ', serving_model_version.version) return []
def setUp(self): project_path = get_file_dir(__file__) config_file = project_path + '/master.yaml' self.master = AIFlowMaster(config_file=config_file) self.master.start()
def run_project(project_root_path): af.set_project_config_file(project_root_path + "/project.yaml") project_name = af.project_config().get_project_name() artifact_prefix = project_name + "." validate_trigger = af.external_trigger(name='validate') push_trigger = af.external_trigger(name='push') with af.global_config_file(project_root_path + '/resources/workflow_config.yaml'): with af.config('train_job'): train_example = af.register_example(name=artifact_prefix + 'train_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=EXAMPLE_URI.format('train')) train_read_example = af.read_example(example_info=train_example, executor=PythonObjectExecutor(python_object=TrainExampleReader())) train_transform = af.transform(input_data_list=[train_read_example], executor=PythonObjectExecutor(python_object=TrainExampleTransformer())) train_model = af.register_model(model_name=artifact_prefix + 'logistic-regression', model_type=ModelType.SAVED_MODEL, model_desc='logistic regression model') train_channel = af.train(input_data_list=[train_transform], executor=PythonObjectExecutor(python_object=ModelTrainer()), model_info=train_model) with af.config('validate_job'): validate_example = af.register_example(name=artifact_prefix + 'validate_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=EXAMPLE_URI.format('evaluate'), data_format='npz') validate_read_example = af.read_example(example_info=validate_example, executor=PythonObjectExecutor( python_object=ValidateExampleReader())) validate_transform = af.transform(input_data_list=[validate_read_example], executor=PythonObjectExecutor(python_object=ValidateTransformer())) validate_artifact_name = artifact_prefix + 'validate_artifact' validate_artifact = af.register_artifact(name=validate_artifact_name, stream_uri=get_file_dir(__file__) + '/validate_result') validate_channel = af.model_validate(input_data_list=[validate_transform], model_info=train_model, executor=PythonObjectExecutor( python_object=ModelValidator(validate_artifact_name)), ) with af.config('push_job'): # Push model to serving # Register metadata of pushed model push_model_artifact_name = artifact_prefix + 'push_model_artifact' push_model_artifact = af.register_artifact(name=push_model_artifact_name, stream_uri=get_file_dir(__file__) + '/pushed_model') push_channel = af.push_model(model_info=train_model, executor=PythonObjectExecutor( python_object=ModelPusher(push_model_artifact_name))) with af.config('predict_job'): predict_example = af.register_example(name=artifact_prefix + 'predict_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=EXAMPLE_URI.format('predict')) predict_read_example = af.read_example(example_info=predict_example, executor=PythonObjectExecutor(python_object=PredictExampleReader())) predict_transform = af.transform(input_data_list=[predict_read_example], executor=PythonObjectExecutor(python_object=PredictTransformer())) predict_channel = af.predict(input_data_list=[predict_transform], model_info=train_model, executor=PythonObjectExecutor(python_object=ModelPredictor())) write_example = af.register_example(name=artifact_prefix + 'export_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=get_file_dir(__file__) + '/predict_result') af.write_example(input_data=predict_channel, example_info=write_example, executor=PythonObjectExecutor(python_object=ExampleWriter())) af.model_version_control_dependency(src=validate_channel, model_version_event_type=ModelVersionEventType.MODEL_GENERATED, dependency=validate_trigger, model_name=train_model.name) af.model_version_control_dependency(src=push_channel, model_version_event_type=ModelVersionEventType.MODEL_VALIDATED, dependency=push_trigger, model_name=train_model.name) # Run workflow transform_dag = project_name af.deploy_to_airflow(project_root_path, dag_id=transform_dag) af.run(project_path=project_root_path, dag_id=transform_dag, scheduler_type=SchedulerType.AIRFLOW)
executor=PythonObjectExecutor(python_object=PredictTransformer())) predict_channel = af.predict(input_data_list=[predict_transform], model_info=train_model, executor=PythonObjectExecutor(python_object=ModelPredictor())) write_example = af.register_example(name=artifact_prefix + 'export_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=get_file_dir(__file__) + '/predict_result') af.write_example(input_data=predict_channel, example_info=write_example, executor=PythonObjectExecutor(python_object=ExampleWriter())) af.model_version_control_dependency(src=validate_channel, model_version_event_type=ModelVersionEventType.MODEL_GENERATED, dependency=validate_trigger, model_name=train_model.name) af.model_version_control_dependency(src=push_channel, model_version_event_type=ModelVersionEventType.MODEL_VALIDATED, dependency=push_trigger, model_name=train_model.name) # Run workflow transform_dag = project_name af.deploy_to_airflow(project_root_path, dag_id=transform_dag) af.run(project_path=project_root_path, dag_id=transform_dag, scheduler_type=SchedulerType.AIRFLOW) if __name__ == '__main__': project_path = os.path.dirname(get_file_dir(__file__)) run_project(project_path)
def run_project(project_root_path): af.set_project_config_file(project_root_path + "/project.yaml") project_name = af.project_config().get_project_name() artifact_prefix = project_name + "." validate_trigger = af.external_trigger(name='validate') push_trigger = af.external_trigger(name='push') with af.global_config_file(project_root_path + '/resources/workflow_config.yaml'): # the config of train job is a periodic job which means it will # run every `interval`(defined in workflow_config.yaml) seconds with af.config('train_job'): # Register metadata raw training data(example) and read example(i.e. training dataset) train_example = af.register_example(name=artifact_prefix + 'train_example', support_type=ExampleSupportType.EXAMPLE_BATCH, batch_uri=EXAMPLE_URI.format('train')) train_read_example = af.read_example(example_info=train_example, executor=PythonObjectExecutor(python_object=ExampleReader())) # Transform(preprocessing) example train_transform = af.transform(input_data_list=[train_read_example], executor=PythonObjectExecutor(python_object=ExampleTransformer())) # Register model metadata and train model train_model = af.register_model(model_name=artifact_prefix + 'logistic-regression', model_type=ModelType.SAVED_MODEL, model_desc='logistic regression model') train_channel = af.train(input_data_list=[train_transform], executor=PythonObjectExecutor(python_object=ModelTrainer()), model_info=train_model) with af.config('validate_job'): # Validation of model # Read validation dataset and validate model before it is used to predict validate_example = af.register_example(name=artifact_prefix + 'validate_example', support_type=ExampleSupportType.EXAMPLE_STREAM, batch_uri=EXAMPLE_URI.format('evaluate')) validate_read_example = af.read_example(example_info=validate_example, executor=PythonObjectExecutor( python_object=ValidateExampleReader())) validate_transform = af.transform(input_data_list=[validate_read_example], executor=PythonObjectExecutor(python_object=ValidateTransformer())) validate_artifact_name = artifact_prefix + 'validate_artifact' validate_artifact = af.register_artifact(name=validate_artifact_name, batch_uri=get_file_dir(__file__) + '/validate_result') validate_channel = af.model_validate(input_data_list=[validate_transform], model_info=train_model, executor=PythonObjectExecutor( python_object=ModelValidator(validate_artifact_name))) with af.config('push_job'): # Push model to serving # Register metadata of pushed model push_model_artifact_name = artifact_prefix + 'push_model_artifact' push_model_artifact = af.register_artifact(name=push_model_artifact_name, batch_uri=get_file_dir(__file__) + '/pushed_model') push_channel = af.push_model(model_info=train_model, executor=PythonObjectExecutor( python_object=ModelPusher(push_model_artifact_name))) with af.config('predict_job'): # Prediction(Inference) predict_example = af.register_example(name=artifact_prefix + 'predict_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=EXAMPLE_URI.format('predict')) predict_read_example = af.read_example(example_info=predict_example, executor=PythonObjectExecutor(python_object=PredictExampleReader())) predict_transform = af.transform(input_data_list=[predict_read_example], executor=PythonObjectExecutor(python_object=PredictTransformer())) predict_channel = af.predict(input_data_list=[predict_transform], model_info=train_model, executor=PythonObjectExecutor(python_object=ModelPredictor())) # Save prediction result write_example = af.register_example(name=artifact_prefix + 'write_example', support_type=ExampleSupportType.EXAMPLE_STREAM, stream_uri=get_file_dir(__file__) + '/predict_result') af.write_example(input_data=predict_channel, example_info=write_example, executor=PythonObjectExecutor(python_object=ExampleWriter())) # Define relation graph connected by control edge: # Once a round of training is done, validator will be launched and # pusher will be launched if the new model is better. # Prediction will start once the first round of training is done and # when pusher pushes(deploys) a new model, the predictor will use the latest deployed model as well. af.model_version_control_dependency(src=validate_channel, model_version_event_type=ModelVersionEventType.MODEL_GENERATED, dependency=validate_trigger, model_name=train_model.name) af.model_version_control_dependency(src=push_channel, model_version_event_type=ModelVersionEventType.MODEL_VALIDATED, dependency=push_trigger, model_name=train_model.name) # Run workflow transform_dag = project_name af.deploy_to_airflow(project_root_path, dag_id=transform_dag) af.run(project_path=project_root_path, dag_id=transform_dag, scheduler_type=SchedulerType.AIRFLOW)
def test_context_with_file(self): config_file = path_util.get_file_dir( __file__) + "/workflow_config.json" def generate_workflow_config(): workflow_config = WorkFlowConfig() workflow_config.add_job_config( config_key="global_config_key", job_config=af.BaseJobConfig( platform="local", engine="python", properties={"common_key": "common_value"})) workflow_config.add_job_config( config_key="test_job", job_config=af.BaseJobConfig( platform=None, engine=None, properties={"job_key": "job_value"})) workflow_config.add_job_config( config_key="test_job_1", job_config=af.BaseJobConfig( platform='kubernetes', engine='flink', properties={"job_key_1": "job_value_1"})) with open(config_file, 'w') as f: f.write(json_utils.dumps(workflow_config)) generate_workflow_config() with af.global_config_file(config_path=config_file): with af.config(config="test_job") as cc: cc.properties['aa'] = 'aa' af.user_define_operation(executor=None) node_list = list(_default_ai_graph.nodes.values()) self.assertEqual( 'python', node_list[len(node_list) - 1].properties[ENGINE_NAME]) self.assertEqual( 'common_value', node_list[len(node_list) - 1].config.properties["common_key"]) self.assertEqual( 'job_value', node_list[len(node_list) - 1].config.properties["job_key"]) self.assertEqual( 'aa', node_list[len(node_list) - 1].config.properties["aa"]) self.assertEqual('python', node_list[len(node_list) - 1].config.engine) self.assertEqual('local', node_list[len(node_list) - 1].config.platform) with af.config(config="test_job_1"): af.user_define_operation(executor=None) node_list = list(_default_ai_graph.nodes.values()) self.assertEqual( 'flink', node_list[len(node_list) - 1].properties[ENGINE_NAME]) self.assertEqual( 'common_value', node_list[len(node_list) - 1].config.properties["common_key"]) self.assertEqual( 'job_value_1', node_list[len(node_list) - 1].config.properties["job_key_1"]) self.assertEqual('flink', node_list[len(node_list) - 1].config.engine) self.assertEqual('kubernetes', node_list[len(node_list) - 1].config.platform)