def _train(self, train_input: gobbli.io.TrainInput, context: ContainerTaskContext) -> gobbli.io.TrainOutput: """ Determine the majority class. """ if train_input.multilabel: train_labels: List[str] = list( itertools.chain.from_iterable(train_input.y_train_multilabel)) else: train_labels = train_input.y_train_multiclass unique_values, value_counts = np.unique(train_labels, return_counts=True) self.majority_class = unique_values[value_counts.argmax(axis=0)] labels = train_input.labels() y_train_pred_proba = self._make_pred_df(labels, len(train_input.y_train)) y_valid_pred_proba = self._make_pred_df(labels, len(train_input.y_valid)) if train_input.multilabel: y_train_indicator = multilabel_to_indicator_df( train_input.y_train_multilabel, labels) train_loss = ((y_train_pred_proba.subtract(y_train_indicator) ).abs().to_numpy().sum()) y_valid_indicator = multilabel_to_indicator_df( train_input.y_valid_multilabel, labels) valid_loss = ((y_valid_pred_proba.subtract(y_valid_indicator) ).abs().to_numpy().sum()) valid_accuracy = valid_loss / (y_valid_pred_proba.shape[0] * y_valid_pred_proba.shape[1]) else: y_train_pred = pred_prob_to_pred_label(y_train_pred_proba) train_loss = np.sum(y_train_pred != train_input.y_train_multiclass) y_valid_pred = pred_prob_to_pred_label(y_valid_pred_proba) valid_loss = np.sum(y_valid_pred != train_input.y_valid_multiclass) valid_accuracy = valid_loss / len(y_valid_pred) return gobbli.io.TrainOutput( valid_loss=valid_loss, valid_accuracy=valid_accuracy, train_loss=train_loss, labels=train_input.labels(), multilabel=train_input.multilabel, )
def _train(self, train_input: gobbli.io.TrainInput, context: ContainerTaskContext) -> gobbli.io.TrainOutput: self._write_input( train_input.X_train, train_input.y_train_multilabel, context.host_input_dir / FastText._TRAIN_INPUT_FILE, ) self._write_input( train_input.X_valid, train_input.y_valid_multilabel, context.host_input_dir / FastText._VALID_INPUT_FILE, ) container_validation_input_path = (context.container_input_dir / FastText._VALID_INPUT_FILE) train_logs, train_loss = self._run_supervised( train_input.checkpoint, context.container_input_dir / FastText._TRAIN_INPUT_FILE, context.container_output_dir / FastText._CHECKPOINT_BASE, context, train_input.num_train_epochs, autotune_validation_file_path=container_validation_input_path, ) host_checkpoint_path = context.host_output_dir / f"{FastText._CHECKPOINT_BASE}" labels = train_input.labels() # Calculate validation accuracy on our own, since the CLI only provides # precision/recall predict_logs, pred_prob_df = self._run_predict_prob( host_checkpoint_path, labels, container_validation_input_path, context) if train_input.multilabel: pred_labels = pred_prob_to_pred_multilabel(pred_prob_df) gold_labels = multilabel_to_indicator_df( train_input.y_valid_multilabel, labels) else: pred_labels = pred_prob_to_pred_label(pred_prob_df) gold_labels = train_input.y_valid_multiclass valid_accuracy = accuracy_score(gold_labels, pred_labels) # Not ideal, but fastText doesn't provide a way to get validation loss; # Negate the validation accuracy instead valid_loss = -valid_accuracy return gobbli.io.TrainOutput( train_loss=train_loss, valid_loss=valid_loss, valid_accuracy=valid_accuracy, labels=labels, multilabel=train_input.multilabel, checkpoint=host_checkpoint_path, _console_output="\n".join((train_logs, predict_logs)), )
def y_pred(self) -> List[str]: """ Returns: The predicted class for each observation. """ return pred_prob_to_pred_label(self.y_pred_proba)
escape_line_delimited_text, pred_prob_to_pred_label, truncate_text, ) MetricFunc = Callable[[Sequence[str], pd.DataFrame], float] """ A function used to calculate some metric. It should accept a sequence of true labels (y_true) and a dataframe of shape (n_samples, n_classes) containing predicted probabilities; it should output a real number. """ DEFAULT_METRICS: Dict[str, MetricFunc] = { "Weighted F1 Score": lambda y_true, y_pred_proba: f1_score( y_true, pred_prob_to_pred_label(y_pred_proba), average="weighted"), "Weighted Precision Score": lambda y_true, y_pred_proba: precision_score( y_true, pred_prob_to_pred_label(y_pred_proba), average="weighted"), "Weighted Recall Score": lambda y_true, y_pred_proba: recall_score( y_true, pred_prob_to_pred_label(y_pred_proba), average="weighted"), "Accuracy": lambda y_true, y_pred_proba: accuracy_score( y_true, pred_prob_to_pred_label(y_pred_proba)), } """ The default set of metrics to be reported in experiment results. Users may want to extend this. """
def y_pred(self) -> List[str]: """ Returns: The most likely predicted label for each observation. """ return pred_prob_to_pred_label(self.y_pred_proba)
def _do_run(self, run: ModelClassificationRun, run_output_dir: Path) -> str: ds = IMDBDataset.load() X_train_valid, y_train_valid, X_test, y_test = maybe_limit( ds.X_train(), ds.y_train(), ds.X_test(), ds.y_test(), self.dataset_limit) assert_in("preprocess_func", run.preprocess_func, PREPROCESS_FUNCS) preprocess_func = PREPROCESS_FUNCS[run.preprocess_func] X_train_valid_preprocessed = preprocess_func(X_train_valid) X_test_preprocessed = preprocess_func(X_test) assert_valid_model(run.model_name) model_cls = getattr(gobbli.model, run.model_name) all_results = [] majority, minority = ClassImbalanceScenario.find_majority_minority_classes( y_test) majority_df, minority_df = ClassImbalanceScenario.split_dataset( X_train_valid_preprocessed, y_train_valid, majority, minority) for proportion in self.params["imbalance_proportions"]: # Downsample the minority class so the final dataset contains the desired # proportion of the minority orig_len = majority_df.shape[0] downsample_proportion = -orig_len / (orig_len - orig_len / proportion) minority_sample = minority_df.sample( frac=downsample_proportion).reset_index() sampled_df = pd.concat([majority_df, minority_sample]) X = sampled_df["X"].tolist() y = sampled_df["y"].tolist() LOGGER.info( f"{dt.datetime.now().strftime('[%Y-%m-%d %H:%M:%S]')} " f"Evaluating proportion {round(proportion, 3)} ({len(X)} obs)") results = run_benchmark_experiment( f"{self.name}_{run.key}", X, y, model_cls, run.param_grid, test_dataset=(X_test_preprocessed, y_test), run_kwargs=run.run_kwargs, ) all_results.append(results) minority_f1_scores = [] majority_f1_scores = [] for result in all_results: majority_f1, minority_f1 = f1_score( result.y_true, pred_prob_to_pred_label(result.y_pred_proba), average=None, labels=[majority, minority], ) minority_f1_scores.append(minority_f1) majority_f1_scores.append(majority_f1) all_metrics = pd.DataFrame([{ "imbalance_proportion": p, **r.metrics() } for p, r in zip(self.params["imbalance_proportions"], all_results)]) all_metrics["Minority Class F1 Score"] = minority_f1_scores all_metrics["Majority Class F1 Score"] = majority_f1_scores fig = plt.figure(figsize=(10, 10)) minority_ax = fig.add_subplot() all_metrics.plot(x="imbalance_proportion", y="Minority Class F1 Score", ax=minority_ax) majority_ax = fig.add_subplot() all_metrics.plot(x="imbalance_proportion", y="Majority Class F1 Score", ax=majority_ax) plt.xlabel("Prevalence of Minority Class") plt.title( f"Model Performance by Prevalence of Minority Class - {model_cls.__name__}" ) plt.xlim(0, 0.5) plt.ylim(0, 1) plot_path = run_output_dir / "plot.png" fig.savefig(plot_path) md = f"# Results: {run.key}\n" md += tabulate(all_metrics, tablefmt="pipe", headers="keys") md += f"\n\n![Results]({self.get_markdown_relative_path(plot_path)})\n---" return md
def y_pred_multiclass(self) -> List[str]: """ Returns: Predicted class for each observation (assuming multiclass context). """ return pred_prob_to_pred_label(self.y_pred_proba)