def set_artifact_store(self, artifact_store_path: Text): """ Updates artifact store to point to path. Args: artifact_store_path: new path to artifact store """ self.artifact_store = ArtifactStore(artifact_store_path) self.save()
def from_config(cls, config: Dict): """ Convert from pipeline config to ZenML Pipeline object. All steps are also populated and configuration set to parameters set in the config file. Args: config: a ZenML config in dict-form (probably loaded from YAML). """ # start with artifact store artifact_store = ArtifactStore(config[keys.GlobalKeys.ARTIFACT_STORE]) # metadata store metadata_store = ZenMLMetadataStore.from_config( config=config[keys.GlobalKeys.METADATA_STORE] ) # orchestration backend backend = OrchestratorBaseBackend.from_config( config[keys.GlobalKeys.BACKEND]) # pipeline configuration p_config = config[keys.GlobalKeys.PIPELINE] pipeline_name = p_config[keys.PipelineKeys.NAME] pipeline_source = p_config[keys.PipelineKeys.SOURCE] # populate steps steps_dict: Dict = {} for step_key, step_config in p_config[keys.PipelineKeys.STEPS].items(): steps_dict[step_key] = BaseStep.from_config(step_config) # datasource datasource = BaseDatasource.from_config( config[keys.GlobalKeys.PIPELINE]) # enable cache enable_cache = p_config[keys.PipelineKeys.ENABLE_CACHE] class_ = source_utils.load_source_path_class(pipeline_source) obj = class_( name=cls.get_name_from_pipeline_name(pipeline_name), pipeline_name=pipeline_name, enable_cache=enable_cache, steps_dict=steps_dict, backend=backend, artifact_store=artifact_store, metadata_store=metadata_store, datasource=datasource) obj._immutable = True logger.debug(f'Pipeline {pipeline_name} loaded and and is immutable.') return obj
def load_config(self, config_path: Dict): """ Sets metadata and artifact_store variables Args: config_path (str): Path to a .zenml config. """ assert METADATA_KEY in config_path assert ARTIFACT_STORE_KEY in config_path assert PIPELINES_DIR_KEY in config_path self.artifact_store = ArtifactStore(config_path[ARTIFACT_STORE_KEY]) self.metadata_store = ZenMLMetadataStore.from_config( config=config_path[METADATA_KEY]) self.pipelines_dir = config_path[PIPELINES_DIR_KEY]
def from_config(self, config_dict: Dict): """ Sets metadata and artifact_store variables Args: config_dict (dict): .zenml config object in dict format. """ assert METADATA_KEY in config_dict assert ARTIFACT_STORE_KEY in config_dict assert PIPELINES_DIR_KEY in config_dict self.artifact_store = ArtifactStore(config_dict[ARTIFACT_STORE_KEY]) self.metadata_store = ZenMLMetadataStore.from_config( config=config_dict[METADATA_KEY]) self.pipelines_dir = config_dict[PIPELINES_DIR_KEY]
def from_config(cls, config: Dict): """ Convert from pipeline config to ZenML Pipeline object. All steps are also populated and configuration set to parameters set in the config file. Args: config: a ZenML config in dict-form (probably loaded from YAML). """ # populate steps steps_dict: Dict = {} for step_key, step_config in config[keys.GlobalKeys.STEPS].items(): steps_dict[step_key] = BaseStep.from_config(step_config) env = config[keys.GlobalKeys.ENV] pipeline_name = env[keys.EnvironmentKeys.EXPERIMENT_NAME] name = BasePipeline.get_name_from_pipeline_name( pipeline_name=pipeline_name) backends_dict: Dict = {} for backend_key, backend_config in env[ keys.EnvironmentKeys.BACKENDS].items(): backends_dict[backend_key] = BaseBackend.from_config( backend_key, backend_config) artifact_store = ArtifactStore( env[keys.EnvironmentKeys.ARTIFACT_STORE]) metadata_store = ZenMLMetadataStore.from_config( config=env[METADATA_KEY]) datasource = BaseDatasource.from_config(config) from zenml.core.pipelines.pipeline_factory import pipeline_factory pipeline_type = BasePipeline.get_type_from_pipeline_name(pipeline_name) class_ = pipeline_factory.get_pipeline_by_type(pipeline_type) # TODO: [MEDIUM] Perhaps move some of the logic in the init block here # especially regarding inferring immutability. return class_(name=name, pipeline_name=pipeline_name, enable_cache=env[keys.EnvironmentKeys.ENABLE_CACHE], steps_dict=steps_dict, backends_dict=backends_dict, artifact_store=artifact_store, metadata_store=metadata_store, datasource=datasource)
# Define the orchestrator backend orchestrator_backend = OrchestratorGCPBackend( cloudsql_connection_name=cloudsql_connection_name, project=project) # Define the training backend training_backend = SingleGPUTrainingGCAIPBackend( project=project, job_dir=training_job_dir) # Define the metadata store metadata_store = MySQLMetadataStore( host='127.0.0.1', port=3306, database=mysql_db, username=mysql_user, password=mysql_pw, ) # Define the artifact store artifact_store = ArtifactStore(artifact_store_path) # Run the pipeline training_pipeline.run( backends=[orchestrator_backend, training_backend], metadata_store=metadata_store, artifact_store=artifact_store, )
training_pipeline.add_evaluator( TFMAEvaluator( slices=[['has_diabetes']], metrics={'has_diabetes': ['binary_crossentropy', 'binary_accuracy']})) # Add cortex deployer api_config = { "name": CORTEX_MODEL_NAME, "kind": "RealtimeAPI", "predictor": { "type": "tensorflow", # Set signature key of the model as we are using Tensorflow Trainer "models": { "signature_key": "serving_default" } } } training_pipeline.add_deployment( CortexDeployer( env=CORTEX_ENV, api_config=api_config, predictor=TensorFlowPredictor, )) # Define the artifact store artifact_store = ArtifactStore( os.path.join(GCP_BUCKET, 'cortex/artifact_store')) # Run the pipeline training_pipeline.run(artifact_store=artifact_store)
# Add an evaluator training_pipeline.add_evaluator( TFMAEvaluator( slices=[['has_diabetes']], metrics={'has_diabetes': ['binary_crossentropy', 'binary_accuracy']})) # Define the metadata store metadata_store = MySQLMetadataStore( host=MYSQL_HOST, port=int(MYSQL_PORT), database=MYSQL_DB, username=MYSQL_USER, password=MYSQL_PWD, ) # Define the artifact store artifact_store = ArtifactStore( os.path.join(GCP_BUCKET, 'gcp_gcaip_training/artifact_store')) # Define the orchestrator backend orchestrator_backend = OrchestratorGCPBackend( cloudsql_connection_name=GCP_CLOUD_SQL_INSTANCE_NAME, project=GCP_PROJECT) # Run the pipeline training_pipeline.run( backend=orchestrator_backend, metadata_store=metadata_store, artifact_store=artifact_store, )
training_pipeline.add_evaluator( TFMAEvaluator( slices=[['has_diabetes']], metrics={'has_diabetes': ['binary_crossentropy', 'binary_accuracy']})) # Define the metadata store metadata_store = MySQLMetadataStore( host=MYSQL_HOST, port=int(MYSQL_PORT), database=MYSQL_DB, username=MYSQL_USER, password=MYSQL_PWD, ) # Define the artifact store artifact_store = ArtifactStore( os.path.join(GCP_BUCKET, 'gcp_orchestrated/artifact_store')) # Define the orchestrator backend orchestrator_backend = OrchestratorGCPBackend( cloudsql_connection_name=CONNECTION_NAME, project=GCP_PROJECT, preemptible=True, # reduce costs by using preemptible instances ) # Run the pipeline training_pipeline.run( backend=orchestrator_backend, metadata_store=metadata_store, artifact_store=artifact_store, )
# Add an evaluator training_pipeline.add_evaluator( TFMAEvaluator( slices=[['has_diabetes']], metrics={'has_diabetes': ['binary_crossentropy', 'binary_accuracy']})) # Define the metadata store metadata_store = MySQLMetadataStore( host=MYSQL_HOST, port=int(MYSQL_PORT), database=MYSQL_DB, username=MYSQL_USER, password=MYSQL_PWD, ) # Define the artifact store artifact_store = ArtifactStore( os.path.join(GCP_BUCKET, 'kubernetes_orcestrated/artifact_store')) # Define the orchestrator backend orchestrator_backend = OrchestratorKubernetesBackend( kubernetes_config_path=K8S_CONFIG_PATH, image_pull_policy="Always") # Run the pipeline on a Kubernetes Cluster training_pipeline.run( backend=orchestrator_backend, metadata_store=metadata_store, artifact_store=artifact_store, )
features=['times_pregnant', 'pgc', 'dbp', 'tst', 'insulin', 'bmi', 'pedigree', 'age'], labels=['has_diabetes'], overwrite={'has_diabetes': { 'transform': [{'method': 'no_transform', 'parameters': {}}]}} ).with_backend(processing_backend) ) # Add a trainer training_pipeline.add_trainer(FeedForwardTrainer( loss='binary_crossentropy', last_activation='sigmoid', output_units=1, metrics=['accuracy'], epochs=20)) # Add an evaluator training_pipeline.add_evaluator( TFMAEvaluator( slices=[['has_diabetes']], metrics={'has_diabetes': ['binary_crossentropy', 'binary_accuracy']} ).with_backend(processing_backend) ) # Define the artifact store artifact_store = ArtifactStore( os.path.join(GCP_BUCKET, 'dataflow_processing/artifact_store')) # Run the pipeline training_pipeline.run(artifact_store=artifact_store)
training_pipeline.add_evaluator( TFMAEvaluator( slices=[['has_diabetes']], metrics={'has_diabetes': ['binary_crossentropy', 'binary_accuracy']})) # Important details: artifact_store_bucket = 'gs://rndm-strg/zenml-k8s-test/' mysql_host = 'cloudsql' mysql_port = 3306 mysql_db = 'zenml' mysql_user = USERNAME mysql_pw = PASSWORD # Path to your kubernetes config: k8s_config_path = os.path.join(os.environ["HOME"], '.kube/config') # Run the pipeline on a Kubernetes Cluster training_pipeline.run(backends=[ OrchestratorKubernetesBackend(kubernetes_config_path=k8s_config_path, image_pull_policy="Always") ], metadata_store=MySQLMetadataStore( host=mysql_host, port=mysql_port, database=mysql_db, username=mysql_user, password=mysql_pw, ), artifact_store=ArtifactStore(artifact_store_bucket))