def __init__(self, path: Text = None): """ Construct reference a ZenML repository Args: path (str): Path to root of repository """ if Repository.__instance__ is None: if path is None: try: # Start from cwd and traverse up until find zenml config. path = Repository.get_zenml_dir(os.getcwd()) except Exception: # If there isnt a zenml.config, use the cwd path = os.getcwd() if not path_utils.is_dir(path): raise Exception(f'{path} does not exist or is not a dir!') self.path = path # Hook up git, path needs to have a git folder. self.git_wrapper = GitWrapper(self.path) # Load the ZenML config try: self.zenml_config = ZenMLConfig(self.path) except InitializationException: # We allow this because we of the GCP orchestrator for now self.zenml_config = None Repository.__instance__ = self else: raise Exception("You cannot create another Repository class!")
def get_pipeline_file_paths(self, only_file_names: bool = False) -> \ Optional[List[Text]]: """Gets list of pipeline file path""" self._check_if_initialized() pipelines_dir = self.zenml_config.get_pipelines_dir() if not path_utils.is_dir(pipelines_dir): return [] return path_utils.list_dir(pipelines_dir, only_file_names)
def read_files_from_disk(pipeline: beam.Pipeline, base_path: Text) -> beam.pvalue.PCollection: """ The Beam PTransform used to read data from a collection of CSV files on a local file system. Args: pipeline: Input beam.Pipeline object coming from a TFX Executor. base_path: Base path pointing either to the directory containing the CSV files, or to a (single) CSV file. Returns: A beam.PCollection of data points. Each row in the collection of CSV files represents a single data point. """ wildcard_qualifier = "*" file_pattern = os.path.join(base_path, wildcard_qualifier) if path_utils.is_dir(base_path): csv_files = path_utils.list_dir(base_path) if not csv_files: raise RuntimeError( 'Split pattern {} does not match any files.'.format( file_pattern)) else: if path_utils.file_exists(base_path): csv_files = [base_path] else: raise RuntimeError(f'{base_path} does not exist.') # weed out bad file exts with this logic allowed_file_exts = [".csv", ".txt"] # ".dat" csv_files = [ uri for uri in csv_files if os.path.splitext(uri)[1] in allowed_file_exts ] logger.info(f'Matched {len(csv_files)}: {csv_files}') # Always use header from file logger.info(f'Using header from file: {csv_files[0]}.') column_names = path_utils.load_csv_header(csv_files[0]) logger.info(f'Header: {column_names}.') parsed_csv_lines = ( pipeline | 'ReadFromText' >> beam.io.ReadFromText(file_pattern=base_path, skip_header_lines=1) | 'ParseCSVLine' >> beam.ParDo(csv_decoder.ParseCSVLine(delimiter=',')) | 'ExtractParsedCSVLines' >> beam.Map(lambda x: dict(zip(column_names, x[0])))) return parsed_csv_lines
def write_json(file_path: Text, contents: Dict): """ Write contents as JSON format to file_path. Args: file_path (str): Path to JSON file. contents (dict): Contents of JSON file as dict. """ if not path_utils.is_remote(file_path): dir_ = str(Path(file_path).parent) if not path_utils.is_dir(dir_): # If it is a local path and it doesnt exist, raise Exception. raise Exception(f'Directory {dir_} does not exist.') path_utils.write_file_contents(file_path, json.dumps(contents))
def run_fn(self): train_dataset = self.input_fn(self.train_files, self.tf_transform_output) eval_dataset = self.input_fn(self.eval_files, self.tf_transform_output) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = self.model_fn(train_dataset, eval_dataset) model.to(device) criterion = nn.BCEWithLogitsLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) model.train() for e in range(1, self.epoch + 1): epoch_loss = 0 epoch_acc = 0 step_count = 0 for x, y in train_dataset: step_count += 1 X_batch, y_batch = x.to(device), y.to(device) optimizer.zero_grad() y_pred = model(X_batch) loss = criterion(y_pred, y_batch) acc = binary_acc(y_pred, y_batch) loss.backward() optimizer.step() epoch_loss += loss.item() epoch_acc += acc.item() print(f'Epoch {e + 0:03}: | Loss: ' f'{epoch_loss / step_count:.5f} | Acc: ' f'{epoch_acc / step_count:.3f}') path_utils.create_dir_if_not_exists(self.serving_model_dir) if path_utils.is_remote(self.serving_model_dir): temp_model_dir = '__temp_model_dir__' temp_path = os.path.join(os.getcwd(), temp_model_dir) if path_utils.is_dir(temp_path): raise PermissionError('{} is used as a temp path but it ' 'already exists. Please remove it to ' 'continue.') torch.save(model, temp_path) path_utils.copy_dir(temp_path, self.serving_model_dir) path_utils.rm_dir(temp_path) else: torch.save(model, os.path.join(self.serving_model_dir, 'model.pt'))
def check_module_clean(self, source: Text): """ Returns True if all files within source's module are committed. Args: source (str): relative module path pointing to a Class. """ # import here to resolve circular dependency from zenml.utils import source_utils # Get the module path module_path = source_utils.get_module_source_from_source(source) # Get relative path of module because check_file_committed needs that module_dir = source_utils.get_relative_path_from_module_source( module_path) # Get absolute path of module because path_utils.list_dir needs that mod_abs_dir = source_utils.get_absolute_path_from_module_source( module_path) module_file_names = path_utils.list_dir(mod_abs_dir, only_file_names=True) # Go through each file in module and see if there are uncommitted ones for file_path in module_file_names: path = os.path.join(module_dir, file_path) # if its .gitignored then continue and dont do anything if len(self.git_repo.ignored(path)) > 0: continue if path_utils.is_dir(os.path.join(mod_abs_dir, file_path)): logger.warning( f'The step {source} is contained inside a module ' f'that ' f'has sub-directories (the sub-directory {file_path} at ' f'{mod_abs_dir}). For now, ZenML supports only a flat ' f'directory structure in which to place Steps. Please make' f' sure that the Step does not utilize the sub-directory.') if not self.check_file_committed(path): return False return True
from zenml.logger import get_logger logger = get_logger(__name__) # reset pipeline root to redirect to tests so that it writes the yamls there ZENML_ROOT = str(Path(zenml.__path__[0]).parent) TEST_ROOT = os.path.join(ZENML_ROOT, "tests") Repository.init_repo(TEST_ROOT, analytics_opt_in=False) pipeline_root = os.path.join(TEST_ROOT, "pipelines") csv_root = os.path.join(TEST_ROOT, "test_data") image_root = os.path.join(csv_root, "images") repo: Repository = Repository.get_instance() if path_utils.is_dir(pipeline_root): path_utils.rm_dir(pipeline_root) repo.zenml_config.set_pipelines_dir(pipeline_root) try: for i in range(1, 6): training_pipeline = TrainingPipeline(name='csvtest{0}'.format(i)) try: # Add a datasource. This will automatically track and version it. ds = CSVDatasource(name='my_csv_datasource', path=os.path.join(csv_root, "my_dataframe.csv")) except AlreadyExistsException: ds = repo.get_datasource_by_name("my_csv_datasource") training_pipeline.add_datasource(ds)
def run_fn(self): train_dataset = self.input_fn(self.train_files, self.tf_transform_output) eval_dataset = self.input_fn(self.eval_files, self.tf_transform_output) class LitModel(pl.LightningModule): def __init__(self): super().__init__() self.l1 = torch.nn.Linear(8, 64) self.layer_out = torch.nn.Linear(64, 1) def forward(self, x): x = torch.relu(self.l1(x)) x = self.layer_out(x) return x def training_step(self, batch, batch_idx): x, y = batch y_hat = self(x) loss = F.binary_cross_entropy_with_logits(y_hat, y) tensorboard_logs = {'train_loss': loss} return {'loss': loss, 'log': tensorboard_logs} def configure_optimizers(self): return torch.optim.Adam(self.parameters(), lr=0.001) def train_dataloader(self): return train_dataset def validation_step(self, batch, batch_idx): x, y = batch y_hat = self(x) return { 'val_loss': F.binary_cross_entropy_with_logits(y_hat, y) } def validation_epoch_end(self, outputs): avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean() tensorboard_logs = {'val_loss': avg_loss} return {'avg_val_loss': avg_loss, 'log': tensorboard_logs} def val_dataloader(self): return eval_dataset model = LitModel() # most basic trainer, uses good defaults trainer = Trainer( default_root_dir=self.log_dir, max_epochs=self.epoch, ) trainer.fit(model) path_utils.create_dir_if_not_exists(self.serving_model_dir) if path_utils.is_remote(self.serving_model_dir): temp_model_dir = '__temp_model_dir__' temp_path = os.path.join(os.getcwd(), temp_model_dir) if path_utils.is_dir(temp_path): raise PermissionError('{} is used as a temp path but it ' 'already exists. Please remove it to ' 'continue.') trainer.save_checkpoint(os.path.join(temp_path, 'model.cpkt')) path_utils.copy_dir(temp_path, self.serving_model_dir) path_utils.rm_dir(temp_path) else: trainer.save_checkpoint( os.path.join(self.serving_model_dir, 'model.ckpt'))
def run_fn(self): train_dataset = self.input_fn(self.train_files, self.tf_transform_output) eval_dataset = self.input_fn(self.eval_files, self.tf_transform_output) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = self.model_fn(train_dataset, eval_dataset) model.to(device) criterion = nn.BCEWithLogitsLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) writer = SummaryWriter(self.log_dir) model.train() total_count = 0 for e in range(1, self.epochs + 1): epoch_loss = 0 epoch_acc = 0 step_count = 0 for x, y, _ in train_dataset: step_count += 1 total_count += 1 x_batch = torch.cat([v.to(device) for v in x.values()], dim=-1) y_batch = torch.cat([v.to(device) for v in y.values()], dim=-1) optimizer.zero_grad() y_pred = model(x_batch) loss = criterion(y_pred, y_batch) acc = binary_acc(y_pred, y_batch) loss.backward() optimizer.step() epoch_loss += loss.item() epoch_acc += acc.item() if e == 1 and step_count == 1: writer.add_graph(model, x_batch) writer.add_scalar('training_loss', loss, total_count) writer.add_scalar('training_accuracy', acc, total_count) print(f'Epoch {e + 0:03}: | Loss: ' f'{epoch_loss / step_count:.5f} | Acc: ' f'{epoch_acc / step_count:.3f}') # test test_results = self.test_fn(model, eval_dataset) utils.save_test_results(test_results, self.test_results) path_utils.create_dir_if_not_exists(self.serving_model_dir) if path_utils.is_remote(self.serving_model_dir): temp_model_dir = '__temp_model_dir__' temp_path = os.path.join(os.getcwd(), temp_model_dir) if path_utils.is_dir(temp_path): raise PermissionError('{} is used as a temp path but it ' 'already exists. Please remove it to ' 'continue.') torch.save(model, temp_path) path_utils.copy_dir(temp_path, self.serving_model_dir) path_utils.rm_dir(temp_path) else: torch.save(model, os.path.join(self.serving_model_dir, 'model.pt'))