ZENML_ROOT = str(Path(zenml.__path__[0]).parent) TEST_ROOT = os.path.join(ZENML_ROOT, "tests") Repository.init_repo(TEST_ROOT, analytics_opt_in=False) pipeline_root = os.path.join(TEST_ROOT, "pipelines") csv_root = os.path.join(TEST_ROOT, "test_data") image_root = os.path.join(csv_root, "images") repo: Repository = Repository.get_instance() if path_utils.is_dir(pipeline_root): path_utils.rm_dir(pipeline_root) repo.zenml_config.set_pipelines_dir(pipeline_root) try: for i in range(1, 6): training_pipeline = TrainingPipeline(name='csvtest{0}'.format(i)) try: # Add a datasource. This will automatically track and version it. ds = CSVDatasource(name='my_csv_datasource', path=os.path.join(csv_root, "my_dataframe.csv")) except: ds = repo.get_datasource_by_name("my_csv_datasource") training_pipeline.add_datasource(ds) # Add a split training_pipeline.add_split( CategoricalDomainSplit(categorical_column="name", split_map={ 'train': ["arnold", "nicholas"],
from zenml.core.steps.preprocesser.standard_preprocesser \ .standard_preprocesser import \ StandardPreprocesser from zenml.core.steps.split.random_split import RandomSplit from zenml.core.steps.trainer.tensorflow_trainers.tf_ff_trainer import \ FeedForwardTrainer artifact_store_path = 'gs://your-bucket-name/optional-subfolder' project = 'PROJECT' # the project to launch the VM in cloudsql_connection_name = f'{project}:REGION:INSTANCE' mysql_db = 'DATABASE' mysql_user = '******' mysql_pw = 'PASSWORD' training_job_dir = artifact_store_path + '/gcaiptrainer/' training_pipeline = TrainingPipeline(name='GCP Orchestrated') # Add a datasource. This will automatically track and version it. ds = CSVDatasource(name='Pima Indians Diabetes', path='gs://zenml_quickstart/diabetes.csv') training_pipeline.add_datasource(ds) # Add a split training_pipeline.add_split(RandomSplit(split_map={'train': 0.7, 'eval': 0.3})) # Add a preprocessing unit training_pipeline.add_preprocesser( StandardPreprocesser(features=[ 'times_pregnant', 'pgc', 'dbp', 'tst', 'insulin', 'bmi', 'pedigree', 'age' ],
from zenml.core.steps.split.random_split import RandomSplit from zenml.core.steps.trainer.tensorflow_trainers.tf_ff_trainer import \ FeedForwardTrainer from zenml.utils.exceptions import AlreadyExistsException GCP_PROJECT = os.getenv('GCP_PROJECT') MODEL_NAME = os.getenv('MODEL_NAME') assert GCP_PROJECT assert MODEL_NAME # Deploy a tensorflow model on GCAIP. Note that no other trainer type # works with this deployer except for the one shown here. # Define the training pipeline training_pipeline = TrainingPipeline() # Add a datasource. This will automatically track and version it. try: ds = CSVDatasource(name='Pima Indians Diabetes', path='gs://zenml_quickstart/diabetes.csv') except AlreadyExistsException: ds = Repository.get_instance().get_datasource_by_name( 'Pima Indians Diabetes') training_pipeline.add_datasource(ds) # Add a split training_pipeline.add_split(RandomSplit(split_map={'train': 0.7, 'eval': 0.3})) # Add a preprocessing unit training_pipeline.add_preprocesser(
from zenml.core.datasources.csv_datasource import CSVDatasource from zenml.core.pipelines.training_pipeline import TrainingPipeline from zenml.core.steps.preprocesser.standard_preprocesser \ .standard_preprocesser import StandardPreprocesser from zenml.core.steps.split.random_split import RandomSplit from zenml.core.steps.trainer.pytorch_trainers.torch_ff_trainer import \ FeedForwardTrainer training_pipeline = TrainingPipeline(name='ZenML with a Pytorch Trainer') # Add a datasource. This will automatically track and version it. ds = CSVDatasource(name='Pima Indians Diabetes', path='gs://zenml_quickstart/diabetes.csv') training_pipeline.add_datasource(ds) # Add a split training_pipeline.add_split(RandomSplit(split_map={'eval': 0.3, 'train': 0.7})) # Add a preprocessing unit training_pipeline.add_preprocesser( StandardPreprocesser(features=[ 'times_pregnant', 'pgc', 'dbp', 'tst', 'insulin', 'bmi', 'pedigree', 'age' ], labels=['has_diabetes'], overwrite={ 'has_diabetes': { 'transform': [{ 'method': 'no_transform', 'parameters': {} }]
# distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express # or implied. See the License for the specific language governing # permissions and limitations under the License. from zenml.core.datasources.image_datasource import ImageDatasource from zenml.core.pipelines.training_pipeline import TrainingPipeline from zenml.core.steps.split.categorical_domain_split_step import \ CategoricalDomainSplit from zenml.core.repo.repo import Repository from examples.gan.gan_functions import CycleGANTrainer from examples.gan.preprocessing import GANPreprocessor repo: Repository = Repository().get_instance() gan_pipeline = TrainingPipeline(name="whynotletitfly", enable_cache=False) try: ds = ImageDatasource( name="gan_images", base_path="/Users/nicholasjunge/workspaces/maiot/ce_project/images_mini" ) except: ds = repo.get_datasource_by_name('gan_images') gan_pipeline.add_datasource(ds) gan_pipeline.add_split( CategoricalDomainSplit(categorical_column="label", split_map={ "train": [0],
OrchestratorKubernetesBackend from zenml.core.datasources.csv_datasource import CSVDatasource from zenml.core.metadata.mysql_metadata_wrapper import MySQLMetadataStore from zenml.core.pipelines.training_pipeline import TrainingPipeline from zenml.core.repo.artifact_store import ArtifactStore from zenml.core.steps.evaluator.tfma_evaluator import TFMAEvaluator from zenml.core.steps.preprocesser.standard_preprocesser \ .standard_preprocesser import \ StandardPreprocesser from zenml.core.steps.split.random_split import RandomSplit from zenml.core.steps.trainer.feedforward_trainer.trainer import \ FeedForwardTrainer import os training_pipeline = TrainingPipeline(name='kubernetes') # Add a datasource. This will automatically track and version it. try: ds = CSVDatasource(name='Pima Indians Diabetes', path='gs://zenml_quickstart/diabetes.csv') except: # A small nicety for people that have ran a quickstart before :) from zenml.core.repo.repo import Repository repo: Repository = Repository.get_instance() ds = repo.get_datasource_by_name("Pima Indians Diabetes") training_pipeline.add_datasource(ds) # Add a split
from zenml.core.datasources.csv_datasource import CSVDatasource from zenml.core.pipelines.training_pipeline import TrainingPipeline from zenml.core.repo.repo import Repository from zenml.core.steps.evaluator.tfma_evaluator import TFMAEvaluator from zenml.core.steps.preprocesser.standard_preprocesser \ .standard_preprocesser import StandardPreprocesser from zenml.core.steps.split.random_split import RandomSplit from zenml.core.steps.trainer.tensorflow_trainers.tf_ff_trainer import \ FeedForwardTrainer ######################### # CREATE FIRST PIPELINE # ######################## training_pipeline = TrainingPipeline(name='Experiment 1') # Add a datasource. This will automatically track and version it. ds = CSVDatasource(name='Pima Diabetes', path='gs://zenml_quickstart/diabetes.csv') training_pipeline.add_datasource(ds) # Add a split training_pipeline.add_split(RandomSplit(split_map={'train': 0.7, 'eval': 0.3})) # Add a preprocessing unit training_pipeline.add_preprocesser( StandardPreprocesser(features=[ 'times_pregnant', 'pgc', 'dbp', 'tst', 'insulin', 'bmi', 'pedigree', 'age' ], labels=['has_diabetes'], overwrite={