def __init__( self, input_features: List[Dict[str, Any]], output_features: List[Dict[str, Any]], random_seed: int = None, **_kwargs, ): super().__init__(random_seed=random_seed) self._input_features_def = copy.deepcopy(input_features) self._output_features_def = copy.deepcopy(output_features) # ================ Inputs ================ self.input_features = LudwigFeatureDict() try: self.input_features.update( self.build_inputs(self._input_features_def)) except KeyError as e: raise KeyError( f"An input feature has a name that conflicts with a class attribute of torch's ModuleDict: {e}" ) # ================ Outputs ================ self.output_features = LudwigFeatureDict() self.output_features.update( self.build_outputs(self._output_features_def, input_size=self.input_shape[-1])) # ================ Combined loss metric ================ self.eval_loss_metric = torchmetrics.MeanMetric() self.eval_additional_losses_metrics = torchmetrics.MeanMetric() self.lgb_booster: lgb.Booster = None self.compiled_model: torch.nn.Module = None
def test_progress_tracker_empty(): output_features = LudwigFeatureDict() output_features["category_feature"] = CategoryOutputFeature( { "name": "category_feature", "input_size": 10, "num_classes": 3 }, {}) progress_tracker = trainer_utils.get_new_progress_tracker( batch_size=5, best_eval_metric=0, best_reduce_learning_rate_eval_metric=0, best_increase_batch_size_eval_metric=0, learning_rate=0.01, output_features=output_features, ) assert progress_tracker.log_metrics() == { "batch_size": 5, "best_valid_metric": 0, "epoch": 0, "last_improvement_steps": 0, "learning_rate": 0.01, "num_increases_bs": 0, "num_reductions_lr": 0, "steps": 0, "tune_checkpoint_num": 0, }
def test_progress_tracker(): output_features = LudwigFeatureDict() output_features["category_feature"] = CategoryOutputFeature( { "name": "category_feature", "input_size": 10, "num_classes": 3 }, {}) progress_tracker = trainer_utils.get_new_progress_tracker( batch_size=5, best_eval_metric=0, best_reduce_learning_rate_eval_metric=0, best_increase_batch_size_eval_metric=0, learning_rate=0.01, output_features=output_features, ) progress_tracker.validation_metrics[COMBINED][LOSS].append( TrainerMetric(epoch=1, step=10, value=0.1)) progress_tracker.validation_metrics[COMBINED][LOSS].append( TrainerMetric(epoch=1, step=20, value=0.2)) assert progress_tracker.log_metrics() == { "batch_size": 5, "best_valid_metric": 0, "epoch": 0, "last_improvement_steps": 0, "learning_rate": 0.01, "num_increases_bs": 0, "num_reductions_lr": 0, "steps": 0, "tune_checkpoint_num": 0, "validation_metrics.combined.loss": 0.2, }
def __init__( self, input_features_def, combiner_def, output_features_def, random_seed=None, ): self._input_features_def = copy.deepcopy(input_features_def) self._combiner_def = copy.deepcopy(combiner_def) self._output_features_def = copy.deepcopy(output_features_def) self._random_seed = random_seed if random_seed is not None: torch.random.manual_seed(random_seed) super().__init__() # ================ Inputs ================ self.input_features = LudwigFeatureDict() try: self.input_features.update(build_inputs(self._input_features_def)) except KeyError as e: raise KeyError( f"An input feature has a name that conflicts with a class attribute of torch's ModuleDict: {e}" ) # ================ Combiner ================ logger.debug(f"Combiner {combiner_def[TYPE]}") combiner_class = get_combiner_class(combiner_def[TYPE]) config, kwargs = load_config_with_kwargs( combiner_class.get_schema_cls(), combiner_def, ) self.combiner = combiner_class(input_features=self.input_features, config=config, **kwargs) # ================ Outputs ================ self.output_features = LudwigFeatureDict() self.output_features.update(build_outputs(self._output_features_def, self.combiner)) # ================ Combined loss metric ================ self.eval_loss_metric = torchmetrics.MeanMetric() self.eval_additional_losses_metrics = torchmetrics.MeanMetric() # After constructing all layers, clear the cache to free up memory clear_data_cache()
class ECD(LudwigModule): def __init__( self, input_features_def, combiner_def, output_features_def, random_seed=None, ): self._input_features_def = copy.deepcopy(input_features_def) self._combiner_def = copy.deepcopy(combiner_def) self._output_features_def = copy.deepcopy(output_features_def) self._random_seed = random_seed if random_seed is not None: torch.random.manual_seed(random_seed) super().__init__() # ================ Inputs ================ self.input_features = LudwigFeatureDict() try: self.input_features.update(build_inputs(self._input_features_def)) except KeyError as e: raise KeyError( f"An input feature has a name that conflicts with a class attribute of torch's ModuleDict: {e}" ) # ================ Combiner ================ logger.debug(f"Combiner {combiner_def[TYPE]}") combiner_class = get_combiner_class(combiner_def[TYPE]) config, kwargs = load_config_with_kwargs( combiner_class.get_schema_cls(), combiner_def, ) self.combiner = combiner_class(input_features=self.input_features, config=config, **kwargs) # ================ Outputs ================ self.output_features = LudwigFeatureDict() self.output_features.update(build_outputs(self._output_features_def, self.combiner)) # ================ Combined loss metric ================ self.eval_loss_metric = torchmetrics.MeanMetric() self.eval_additional_losses_metrics = torchmetrics.MeanMetric() # After constructing all layers, clear the cache to free up memory clear_data_cache() def get_model_inputs(self): inputs = { input_feature_name: input_feature.create_sample_input() for input_feature_name, input_feature in self.input_features.items() } return inputs # Return total number of parameters in model def get_model_size(self) -> int: model_tensors = self.collect_weights() total_size = 0 for tnsr in model_tensors: total_size += tnsr[1].detach().numpy().size return total_size def to_torchscript(self): self.eval() model_inputs = self.get_model_inputs() # We set strict=False to enable dict inputs and outputs. return torch.jit.trace(self, model_inputs, strict=False) def save_torchscript(self, save_path): traced = self.to_torchscript() traced.save(save_path) @property def input_shape(self): # TODO(justin): Remove dummy implementation. Make input_shape and output_shape functions. return torch.Size([1, 1]) def forward( self, inputs: Union[ Dict[str, torch.Tensor], Dict[str, np.ndarray], Tuple[Dict[str, torch.Tensor], Dict[str, torch.Tensor]] ], mask=None, ) -> Dict[str, torch.Tensor]: """Forward pass of the model. Args: inputs: Inputs to the model. Can be a dictionary of input names to input tensors or a tuple of (inputs, targets) where inputs is a dictionary of input names to input tensors and targets is a dictionary of target names to target tensors. mask: A mask for the inputs. Returns: A dictionary of output {feature name}::{tensor_name} -> output tensor. """ if isinstance(inputs, tuple): inputs, targets = inputs # Convert targets to tensors. for target_feature_name, target_value in targets.items(): if not isinstance(target_value, torch.Tensor): targets[target_feature_name] = torch.from_numpy(target_value) else: targets[target_feature_name] = target_value else: targets = None assert list(inputs.keys()) == self.input_features.keys() # Convert inputs to tensors. for input_feature_name, input_values in inputs.items(): if not isinstance(input_values, torch.Tensor): inputs[input_feature_name] = torch.from_numpy(input_values) else: inputs[input_feature_name] = input_values encoder_outputs = {} for input_feature_name, input_values in inputs.items(): encoder = self.input_features[input_feature_name] encoder_output = encoder(input_values) encoder_outputs[input_feature_name] = encoder_output combiner_outputs = self.combiner(encoder_outputs) # Invoke output features. output_logits = {} output_last_hidden = {} for output_feature_name, output_feature in self.output_features.items(): # Use the presence or absence of targets to signal training or prediction. target = targets[output_feature_name] if targets is not None else None decoder_outputs = output_feature(combiner_outputs, output_last_hidden, mask=mask, target=target) # Add decoder outputs to overall output dictionary. for decoder_output_name, tensor in decoder_outputs.items(): output_feature_utils.set_output_feature_tensor( output_logits, output_feature_name, decoder_output_name, tensor ) # Save the hidden state of the output feature (for feature dependencies). output_last_hidden[output_feature_name] = decoder_outputs["last_hidden"] return output_logits def predictions(self, inputs): outputs = self(inputs) predictions = {} for of_name in self.output_features: predictions[of_name] = self.output_features[of_name].predictions(outputs, of_name) return predictions def evaluation_step(self, inputs, targets): predictions = self.predictions(inputs) self.update_metrics(targets, predictions) return predictions def predict_step(self, inputs): return self.predictions(inputs) def train_loss( self, targets, predictions, regularization_type: Optional[str] = None, regularization_lambda: Optional[float] = None, ) -> Tuple[torch.Tensor, Dict[str, torch.Tensor]]: """Computes the training loss for the model. Args: targets: A dictionary of target names to target tensors. predictions: A dictionary of output names to output tensors. regularization_type: One of 'l1', 'l2', 'l1_l2'. regularization_lambda: The regularization lambda. Returns: A tuple of the loss tensor and a dictionary of loss for every output feature. """ train_loss = 0 of_train_losses = {} for of_name, of_obj in self.output_features.items(): of_train_loss = of_obj.train_loss(targets[of_name], predictions, of_name) train_loss += of_obj.loss["weight"] * of_train_loss of_train_losses[of_name] = of_train_loss for loss in self.losses(): train_loss += loss additional_losses = self.losses() if additional_losses: train_loss += torch.sum(torch.stack(additional_losses)) # other losses # Add regularization loss if regularization_type is not None and regularization_lambda != 0: train_loss += reg_loss(self, regularization_type, l1=regularization_lambda, l2=regularization_lambda) return train_loss, of_train_losses def eval_loss(self, targets, predictions): eval_loss = 0 for of_name, of_obj in self.output_features.items(): of_eval_loss = of_obj.eval_loss(targets[of_name], predictions[of_name]) eval_loss += of_obj.loss["weight"] * of_eval_loss additional_loss = 0 additional_losses = self.losses() if additional_losses: additional_loss = torch.sum(torch.stack(additional_losses)) # other losses return eval_loss, additional_loss def update_metrics(self, targets, predictions): for of_name, of_obj in self.output_features.items(): of_obj.update_metrics(targets[of_name], predictions[of_name]) eval_loss, additional_losses = self.eval_loss(targets, predictions) self.eval_loss_metric.update(eval_loss) self.eval_additional_losses_metrics.update(additional_losses) def get_metrics(self): all_of_metrics = {} for of_name, of_obj in self.output_features.items(): all_of_metrics[of_name] = of_obj.get_metrics() all_of_metrics[COMBINED] = { LOSS: get_scalar_from_ludwig_metric(self.eval_loss_metric) + get_scalar_from_ludwig_metric(self.eval_additional_losses_metrics) } return all_of_metrics def reset_metrics(self): for of_obj in self.output_features.values(): of_obj.reset_metrics() self.eval_loss_metric.reset() def collect_weights(self, tensor_names=None, **kwargs): """Returns named parameters filtered against `tensor_names` if not None.""" if not tensor_names: return self.named_parameters() # Check for bad tensor names. weight_names = {name for name, _ in self.named_parameters()} for name in tensor_names: if name not in weight_names: raise ValueError(f'Requested tensor name filter "{name}" not present in the model graph') # Apply filter. tensor_set = set(tensor_names) return [named_param for named_param in self.named_parameters() if named_param[0] in tensor_set] def get_args(self): return (self._input_features_df, self._combiner_def, self._output_features_df, self._random_seed)
class ECD(BaseModel): @staticmethod def type() -> str: return MODEL_ECD def __init__( self, input_features, combiner, output_features, random_seed=None, **_kwargs, ): self._input_features_def = copy.deepcopy(input_features) self._combiner_def = copy.deepcopy(combiner) self._output_features_def = copy.deepcopy(output_features) self._random_seed = random_seed super().__init__(random_seed=self._random_seed) # ================ Inputs ================ self.input_features = LudwigFeatureDict() try: self.input_features.update(self.build_inputs(self._input_features_def)) except KeyError as e: raise KeyError( f"An input feature has a name that conflicts with a class attribute of torch's ModuleDict: {e}" ) # ================ Combiner ================ logging.debug(f"Combiner {self._combiner_def[TYPE]}") combiner_class = get_combiner_class(self._combiner_def[TYPE]) config, kwargs = load_config_with_kwargs( combiner_class.get_schema_cls(), self._combiner_def, ) self.combiner = combiner_class(input_features=self.input_features, config=config, **kwargs) # ================ Outputs ================ self.output_features = LudwigFeatureDict() self.output_features.update(self.build_outputs(self._output_features_def, self.combiner)) # ================ Combined loss metric ================ self.eval_loss_metric = torchmetrics.MeanMetric() self.eval_additional_losses_metrics = torchmetrics.MeanMetric() # After constructing all layers, clear the cache to free up memory clear_data_cache() def encode( self, inputs: Union[ Dict[str, torch.Tensor], Dict[str, np.ndarray], Tuple[Dict[str, torch.Tensor], Dict[str, torch.Tensor]] ], ): # Convert inputs to tensors. for input_feature_name, input_values in inputs.items(): if not isinstance(input_values, torch.Tensor): inputs[input_feature_name] = torch.from_numpy(input_values) else: inputs[input_feature_name] = input_values encoder_outputs = {} for input_feature_name, input_values in inputs.items(): encoder = self.input_features[input_feature_name] encoder_output = encoder(input_values) encoder_outputs[input_feature_name] = encoder_output return encoder_outputs def combine(self, encoder_outputs): return self.combiner(encoder_outputs) def decode(self, combiner_outputs, targets, mask): # Invoke output features. output_logits = {} output_last_hidden = {} for output_feature_name, output_feature in self.output_features.items(): # Use the presence or absence of targets to signal training or prediction. target = targets[output_feature_name] if targets is not None else None decoder_outputs = output_feature(combiner_outputs, output_last_hidden, mask=mask, target=target) # Add decoder outputs to overall output dictionary. for decoder_output_name, tensor in decoder_outputs.items(): output_feature_utils.set_output_feature_tensor( output_logits, output_feature_name, decoder_output_name, tensor ) # Save the hidden state of the output feature (for feature dependencies). output_last_hidden[output_feature_name] = decoder_outputs["last_hidden"] return output_logits def forward( self, inputs: Union[ Dict[str, torch.Tensor], Dict[str, np.ndarray], Tuple[Dict[str, torch.Tensor], Dict[str, torch.Tensor]] ], mask=None, ) -> Dict[str, torch.Tensor]: """Forward pass of the model. Args: inputs: Inputs to the model. Can be a dictionary of input names to input tensors or a tuple of (inputs, targets) where inputs is a dictionary of input names to input tensors and targets is a dictionary of target names to target tensors. mask: A mask for the inputs. Returns: A dictionary of output {feature name}::{tensor_name} -> output tensor. """ if isinstance(inputs, tuple): inputs, targets = inputs # Convert targets to tensors. for target_feature_name, target_value in targets.items(): if not isinstance(target_value, torch.Tensor): targets[target_feature_name] = torch.from_numpy(target_value) else: targets[target_feature_name] = target_value else: targets = None assert list(inputs.keys()) == self.input_features.keys() encoder_outputs = self.encode(inputs) combiner_outputs = self.combine(encoder_outputs) return self.decode(combiner_outputs, targets, mask) def save(self, save_path): """Saves the model to the given path.""" weights_save_path = os.path.join(save_path, MODEL_WEIGHTS_FILE_NAME) torch.save(self.state_dict(), weights_save_path) def load(self, save_path): """Loads the model from the given path.""" weights_save_path = os.path.join(save_path, MODEL_WEIGHTS_FILE_NAME) device = torch.device(get_torch_device()) self.load_state_dict(torch.load(weights_save_path, map_location=device)) def get_args(self): """Returns init arguments for constructing this model.""" return (self._input_features_df, self._combiner_def, self._output_features_df, self._random_seed)
class GBM(BaseModel): @staticmethod def type() -> str: return MODEL_GBM def __init__( self, input_features: List[Dict[str, Any]], output_features: List[Dict[str, Any]], random_seed: int = None, **_kwargs, ): super().__init__(random_seed=random_seed) self._input_features_def = copy.deepcopy(input_features) self._output_features_def = copy.deepcopy(output_features) # ================ Inputs ================ self.input_features = LudwigFeatureDict() try: self.input_features.update( self.build_inputs(self._input_features_def)) except KeyError as e: raise KeyError( f"An input feature has a name that conflicts with a class attribute of torch's ModuleDict: {e}" ) # ================ Outputs ================ self.output_features = LudwigFeatureDict() self.output_features.update( self.build_outputs(self._output_features_def, input_size=self.input_shape[-1])) # ================ Combined loss metric ================ self.eval_loss_metric = torchmetrics.MeanMetric() self.eval_additional_losses_metrics = torchmetrics.MeanMetric() self.lgb_booster: lgb.Booster = None self.compiled_model: torch.nn.Module = None @classmethod def build_outputs(cls, output_features_def: List[Dict[str, Any]], input_size: int) -> Dict[str, OutputFeature]: """Builds and returns output feature.""" # TODO: only single task currently if len(output_features_def) > 1: raise ValueError("Only single task currently supported") output_feature_def = output_features_def[0] output_features = {} output_feature_def["input_size"] = input_size output_feature = cls.build_single_output(output_feature_def, output_features) output_features[output_feature_def[NAME]] = output_feature return output_features def compile(self): """Convert the LightGBM model to a PyTorch model and store internally.""" if self.lgb_booster is None: raise ValueError("Model has not been trained yet.") output_feature_name = self.output_features.keys()[0] output_feature = self.output_features[output_feature_name] # https://github.com/microsoft/LightGBM/issues/1942#issuecomment-453975607 gbm_sklearn_cls = lgb.LGBMRegressor if output_feature.type( ) == NUMBER else lgb.LGBMClassifier gbm_sklearn = gbm_sklearn_cls(feature_name=list( self.input_features.keys())) # , **params) gbm_sklearn._Booster = self.lgb_booster gbm_sklearn.fitted_ = True gbm_sklearn._n_features = len(self.input_features) if isinstance(gbm_sklearn, lgb.LGBMClassifier): gbm_sklearn._n_classes = output_feature.num_classes if output_feature.type( ) == CATEGORY else 2 hb_model = convert(gbm_sklearn, "torch", extra_config={"tree_implementation": "gemm"}) self.compiled_model = hb_model.model def forward( self, inputs: Union[Dict[str, torch.Tensor], Dict[str, np.ndarray], Tuple[Dict[str, torch.Tensor], Dict[str, torch.Tensor]]], mask=None, ) -> Dict[str, torch.Tensor]: if self.compiled_model is None: raise ValueError("Model has not been trained yet.") if isinstance(inputs, tuple): inputs, targets = inputs # Convert targets to tensors. for target_feature_name, target_value in targets.items(): if not isinstance(target_value, torch.Tensor): targets[target_feature_name] = torch.from_numpy( target_value) else: targets[target_feature_name] = target_value else: targets = None assert list(inputs.keys()) == self.input_features.keys() # Convert inputs to tensors. for input_feature_name, input_values in inputs.items(): if not isinstance(input_values, torch.Tensor): inputs[input_feature_name] = torch.from_numpy(input_values) else: inputs[input_feature_name] = input_values.view(-1, 1) # TODO(travis): include encoder and decoder steps during inference # encoder_outputs = {} # for input_feature_name, input_values in inputs.items(): # encoder = self.input_features[input_feature_name] # encoder_output = encoder(input_values) # encoder_outputs[input_feature_name] = encoder_output # concatenate inputs inputs = torch.cat(list(inputs.values()), dim=1) # Invoke output features. output_logits = {} output_feature_name = self.output_features.keys()[0] output_feature = self.output_features[output_feature_name] preds = self.compiled_model(inputs) if output_feature.type() == NUMBER: # regression if len(preds.shape) == 2: preds = preds.squeeze(1) logits = preds else: # classification _, probs = preds # keep positive class only for binary feature probs = probs[:, 1] if output_feature.type() == BINARY else probs logits = torch.logit(probs) output_feature_utils.set_output_feature_tensor(output_logits, output_feature_name, LOGITS, logits) return output_logits def save(self, save_path): """Saves the model to the given path.""" if self.lgb_booster is None: raise ValueError("Model has not been trained yet.") weights_save_path = os.path.join(save_path, MODEL_WEIGHTS_FILE_NAME) self.lgb_booster.save_model( weights_save_path, num_iteration=self.lgb_booster.best_iteration) def load(self, save_path): """Loads the model from the given path.""" weights_save_path = os.path.join(save_path, MODEL_WEIGHTS_FILE_NAME) self.lgb_booster = lgb.Booster(model_file=weights_save_path) self.compile() device = torch.device(get_torch_device()) self.compiled_model.to(device) def get_args(self): """Returns init arguments for constructing this model.""" return (self._input_features_df, self._output_features_df, self._random_seed)
def iter_feature_metrics( features: LudwigFeatureDict) -> Iterable[Tuple[str, str]]: """Helper for iterating feature names and metric names.""" for feature_name, feature in features.items(): for metric in feature.metric_functions: yield feature_name, metric
def run_evaluation( self, training_set: Union["Dataset", "RayDataset"], # noqa: F821 validation_set: Optional[Union["Dataset", "RayDataset"]], # noqa: F821 test_set: Optional[Union["Dataset", "RayDataset"]], # noqa: F821 progress_tracker: ProgressTracker, train_summary_writer: SummaryWriter, validation_summary_writer: SummaryWriter, test_summary_writer: SummaryWriter, output_features: LudwigFeatureDict, metrics_names: Dict[str, List[str]], save_path: str, loss: torch.Tensor, all_losses: Dict[str, torch.Tensor], ) -> bool: """Runs evaluation over training, validation, and test sets. Also: - Prints results, saves results to the progress tracker. - Saves the model if the validation score is the best so far - If there is no validation set, the model is always saved. Returns whether the trainer should early stop, based on validation metrics history. """ start_time = time.time() self.callback( lambda c: c.on_eval_start(self, progress_tracker, save_path)) if self.is_coordinator(): logging.info( f"\nRunning evaluation for step: {progress_tracker.steps}, epoch: {progress_tracker.epoch}" ) # ================ Eval ================ # init tables tables = OrderedDict() for output_feature_name, output_feature in output_features.items(): tables[output_feature_name] = [[output_feature_name] + metrics_names[output_feature_name]] tables[COMBINED] = [[COMBINED, LOSS]] # eval metrics on train self.eval_batch_size = max(self.eval_batch_size, progress_tracker.batch_size) if self.evaluate_training_set: self.evaluation(training_set, "train", progress_tracker.train_metrics, tables, self.eval_batch_size, progress_tracker) self.write_eval_summary( summary_writer=train_summary_writer, metrics=progress_tracker.train_metrics, step=progress_tracker.steps, ) else: # Training set is not evaluated. Add loss to the progress tracker. progress_tracker.train_metrics[COMBINED][LOSS].append( TrainerMetric(epoch=progress_tracker.epoch, step=progress_tracker.steps, value=loss.item())) for output_feature_name, loss_tensor in all_losses.items(): progress_tracker.train_metrics[output_feature_name][ LOSS].append( TrainerMetric(epoch=progress_tracker.epoch, step=progress_tracker.steps, value=loss_tensor.item())) tables[output_feature_name].append( ["train", loss_tensor.item()]) tables[COMBINED].append(["train", loss.item()]) self.write_eval_summary( summary_writer=train_summary_writer, metrics=progress_tracker.train_metrics, step=progress_tracker.steps, ) if validation_set is not None: self.callback(lambda c: c.on_validation_start( self, progress_tracker, save_path)) # eval metrics on validation set self.evaluation( validation_set, "vali", progress_tracker.validation_metrics, tables, self.eval_batch_size, progress_tracker, ) self.write_eval_summary( summary_writer=validation_summary_writer, metrics=progress_tracker.validation_metrics, step=progress_tracker.steps, ) self.callback(lambda c: c.on_validation_end( self, progress_tracker, save_path)) if test_set is not None: self.callback( lambda c: c.on_test_start(self, progress_tracker, save_path)) # eval metrics on test set self.evaluation(test_set, TEST, progress_tracker.test_metrics, tables, self.eval_batch_size, progress_tracker) self.write_eval_summary( summary_writer=test_summary_writer, metrics=progress_tracker.test_metrics, step=progress_tracker.steps, ) self.callback( lambda c: c.on_test_end(self, progress_tracker, save_path)) elapsed_time = (time.time() - start_time) * 1000.0 if self.is_coordinator(): logging.debug( f"Evaluation took {time_utils.strdelta(elapsed_time)}\n") for output_feature, table in tables.items(): logging.info( tabulate(table, headers="firstrow", tablefmt="fancy_grid", floatfmt=".4f")) # Trigger eval end callback after any model weights save for complete checkpoint self.callback( lambda c: c.on_eval_end(self, progress_tracker, save_path))