def run_pipeline(path_to_config: Text): """ Runs pipeline specified by the given config YAML object. Args: path_to_config: Path to config of the designated pipeline. Has to be matching the YAML file name. """ # config has metadata store, backends and artifact store, # so no need to specify them print(path_to_config) try: config = read_yaml(path_to_config) p: TrainingPipeline = TrainingPipeline.from_config(config) p.run() except Exception as e: error(e)
from zenml.datasources import CSVDatasource from zenml.pipelines import TrainingPipeline from zenml.repo import Repository from zenml.steps.evaluator import TFMAEvaluator from zenml.steps.preprocesser import StandardPreprocesser from zenml.steps.split import RandomSplit from zenml.steps.trainer import TFFeedForwardTrainer from zenml.exceptions import AlreadyExistsException # Define the training pipeline training_pipeline = TrainingPipeline() # Add a datasource. This will automatically track and version it. try: ds = CSVDatasource(name='Pima Indians Diabetes', path='gs://zenml_quickstart/diabetes.csv') except AlreadyExistsException: ds = Repository.get_instance().get_datasource_by_name( 'Pima Indians Diabetes') training_pipeline.add_datasource(ds) # Add a split training_pipeline.add_split(RandomSplit(split_map={'train': 0.7, 'eval': 0.3})) # Add a preprocessing unit training_pipeline.add_preprocesser( StandardPreprocesser(features=[ 'times_pregnant', 'pgc', 'dbp', 'tst', 'insulin', 'bmi', 'pedigree', 'age' ], labels=['has_diabetes'],
from zenml.steps.preprocesser import StandardPreprocesser from zenml.steps.split import RandomSplit from zenml.steps.trainer import TFFeedForwardTrainer from zenml.utils.naming_utils import transformed_label_name GCP_PROJECT = os.getenv('GCP_PROJECT') MODEL_NAME = os.getenv('MODEL_NAME') assert GCP_PROJECT assert MODEL_NAME # Deploy a tensorflow model on GCAIP. Note that no other trainer type # works with this deployer except for the one shown here. # Define the training pipeline training_pipeline = TrainingPipeline() # Add a datasource. This will automatically track and version it. try: ds = CSVDatasource(name='Pima Indians Diabetes', path='gs://zenml_quickstart/diabetes.csv') except AlreadyExistsException: ds = Repository.get_instance().get_datasource_by_name( 'Pima Indians Diabetes') training_pipeline.add_datasource(ds) # Add a split training_pipeline.add_split(RandomSplit(split_map={'train': 0.7, 'eval': 0.3})) # Add a preprocessing unit training_pipeline.add_preprocesser(
TEST_ROOT = os.path.join(ZENML_ROOT, "tests") Repository.init_repo(TEST_ROOT, analytics_opt_in=False) pipeline_root = os.path.join(TEST_ROOT, "pipelines") csv_root = os.path.join(TEST_ROOT, "test_data") image_root = os.path.join(csv_root, "images") repo: Repository = Repository.get_instance() if path_utils.is_dir(pipeline_root): path_utils.rm_dir(pipeline_root) repo.zenml_config.set_pipelines_dir(pipeline_root) try: for i in range(1, 6): training_pipeline = TrainingPipeline(name='csvtest{0}'.format(i)) try: # Add a datasource. This will automatically track and version it. ds = CSVDatasource(name='my_csv_datasource', path=os.path.join(csv_root, "my_dataframe.csv")) except AlreadyExistsException: ds = repo.get_datasource_by_name("my_csv_datasource") training_pipeline.add_datasource(ds) # Add a split training_pipeline.add_split(CategoricalDomainSplit( categorical_column="name", split_map={'train': ["arnold", "nicholas"], 'eval': ["lülük"]}))
from zenml.steps.split import RandomSplit from zenml.steps.trainer import TFFeedForwardTrainer from zenml.exceptions import AlreadyExistsException GCP_BUCKET = os.getenv('GCP_BUCKET') assert GCP_BUCKET CORTEX_ENV = os.getenv('CORTEX_ENV', 'env') CORTEX_MODEL_NAME = os.getenv('CORTEX_MODEL_NAME', 'zenml-classifier') # For this example, the ArtifactStore must be a GCP bucket, as the # CortexDeployer step is using the GCP env. from zenml.repo.repo import Repository # Define the training pipeline training_pipeline = TrainingPipeline() # Add a datasource. This will automatically track and version it. try: ds = CSVDatasource(name='Pima Indians Diabetes', path='gs://zenml_quickstart/diabetes.csv') except AlreadyExistsException: ds = Repository.get_instance().get_datasource_by_name( 'Pima Indians Diabetes') training_pipeline.add_datasource(ds) # Add a split training_pipeline.add_split(RandomSplit(split_map={'eval': 0.3, 'train': 0.7})) # Add a preprocessing unit training_pipeline.add_preprocesser(
from step.trainer import MyScikitTrainer from zenml.datasources import CSVDatasource from zenml.exceptions import AlreadyExistsException from zenml.pipelines import TrainingPipeline from zenml.repo import Repository from zenml.steps.evaluator import AgnosticEvaluator from zenml.steps.preprocesser import StandardPreprocesser from zenml.steps.split import RandomSplit from zenml.utils import naming_utils # Define the training pipeline training_pipeline = TrainingPipeline() # Add a datasource. This will automatically track and version it. try: ds = CSVDatasource(name='Pima Indians Diabetes', path='gs://zenml_quickstart/diabetes.csv') except AlreadyExistsException: ds = Repository.get_instance().get_datasource_by_name( 'Pima Indians Diabetes') training_pipeline.add_datasource(ds) # Add a split training_pipeline.add_split(RandomSplit(split_map={'train': 0.7, 'eval': 0.3})) # Add a preprocessing unit training_pipeline.add_preprocesser( StandardPreprocesser(features=[ 'times_pregnant', 'pgc', 'dbp', 'tst', 'insulin', 'bmi', 'pedigree', 'age' ],
from zenml.datasources import CSVDatasource from zenml.exceptions import AlreadyExistsException from zenml.pipelines import TrainingPipeline from zenml.repo import Repository from zenml.steps.evaluator import AgnosticEvaluator from zenml.steps.preprocesser import StandardPreprocesser from zenml.steps.split import RandomSplit from zenml.steps.trainer import TorchFeedForwardTrainer from zenml.utils import naming_utils # Define the training pipeline training_pipeline = TrainingPipeline() # Add a datasource. This will automatically track and version it. try: ds = CSVDatasource(name='Pima Indians Diabetes', path='gs://zenml_quickstart/diabetes.csv') except AlreadyExistsException: ds = Repository.get_instance().get_datasource_by_name( 'Pima Indians Diabetes') training_pipeline.add_datasource(ds) # Add a split training_pipeline.add_split(RandomSplit(split_map={'eval': 0.3, 'train': 0.7})) # Add a preprocessing unit training_pipeline.add_preprocesser( StandardPreprocesser(features=[ 'times_pregnant', 'pgc', 'dbp', 'tst', 'insulin', 'bmi', 'pedigree', 'age' ],
assert GCP_REGION assert MYSQL_DB assert MYSQL_USER assert MYSQL_PWD # Run the pipeline on a Google Cloud VM and train on GCP as well # In order for this to work, the orchestrator and the backend should be in the # same GCP project. Also, the metadata store and artifact store should be # accessible by the orchestrator VM and the GCAIP worker VM. # Note: If you are using a custom Trainer, then you need # to build a new Docker image based on the ZenML Trainer image, and pass that # into the `image` parameter in the SingleGPUTrainingGCAIPBackend. # Define the training pipeline training_pipeline = TrainingPipeline() # Add a datasource. This will automatically track and version it. try: ds = CSVDatasource(name='Pima Indians Diabetes', path='gs://zenml_quickstart/diabetes.csv') except AlreadyExistsException: ds = Repository.get_instance().get_datasource_by_name( 'Pima Indians Diabetes') training_pipeline.add_datasource(ds) # Add a split training_pipeline.add_split(RandomSplit(split_map={'train': 0.7, 'eval': 0.3})) # Add a preprocessing unit training_pipeline.add_preprocesser(
from examples.pytorch_lightning.step.trainer import MyPyTorchLightningTrainer from zenml.datasources import CSVDatasource from zenml.pipelines import TrainingPipeline from zenml.repo import Repository from zenml.steps.preprocesser import StandardPreprocesser from zenml.steps.split import RandomSplit from zenml.exceptions import AlreadyExistsException # Define the training pipeline training_pipeline = TrainingPipeline() # Add a datasource. This will automatically track and version it. try: ds = CSVDatasource(name='Pima Indians Diabetes', path='gs://zenml_quickstart/diabetes.csv') except AlreadyExistsException: ds = Repository.get_instance().get_datasource_by_name( 'Pima Indians Diabetes') training_pipeline.add_datasource(ds) # Add a split training_pipeline.add_split(RandomSplit(split_map={'eval': 0.3, 'train': 0.7})) # Add a preprocessing unit training_pipeline.add_preprocesser( StandardPreprocesser(features=[ 'times_pregnant', 'pgc', 'dbp', 'tst', 'insulin', 'bmi', 'pedigree', 'age' ], labels=['has_diabetes'], overwrite={
from zenml.datasources import CSVDatasource from zenml.pipelines import TrainingPipeline from zenml.repo import Repository from zenml.steps.preprocesser import StandardPreprocesser from zenml.steps.split import RandomSplit from zenml.exceptions import AlreadyExistsException from examples.scikit.step.trainer import MyScikitTrainer # Define the training pipeline training_pipeline = TrainingPipeline() # Add a datasource. This will automatically track and version it. try: ds = CSVDatasource(name='Pima Indians Diabetes', path='gs://zenml_quickstart/diabetes.csv') except AlreadyExistsException: ds = Repository.get_instance().get_datasource_by_name( 'Pima Indians Diabetes') training_pipeline.add_datasource(ds) # Add a split training_pipeline.add_split(RandomSplit(split_map={'train': 0.7, 'eval': 0.3})) # Add a preprocessing unit training_pipeline.add_preprocesser( StandardPreprocesser(features=[ 'times_pregnant', 'pgc', 'dbp', 'tst', 'insulin', 'bmi', 'pedigree', 'age' ], labels=['has_diabetes'], overwrite={
from step.trainer import MyPyTorchLightningTrainer from zenml.datasources import CSVDatasource from zenml.exceptions import AlreadyExistsException from zenml.pipelines import TrainingPipeline from zenml.repo import Repository from zenml.steps.evaluator import AgnosticEvaluator from zenml.steps.preprocesser import StandardPreprocesser from zenml.steps.split import RandomSplit from zenml.utils import naming_utils # Define the training pipeline training_pipeline = TrainingPipeline() # Add a datasource. This will automatically track and version it. try: ds = CSVDatasource(name='Pima Indians Diabetes', path='gs://zenml_quickstart/diabetes.csv') except AlreadyExistsException: ds = Repository.get_instance().get_datasource_by_name( 'Pima Indians Diabetes') training_pipeline.add_datasource(ds) # Add a split training_pipeline.add_split(RandomSplit(split_map={'eval': 0.3, 'train': 0.7})) # Add a preprocessing unit training_pipeline.add_preprocesser( StandardPreprocesser(features=[ 'times_pregnant', 'pgc', 'dbp', 'tst', 'insulin', 'bmi', 'pedigree', 'age' ],