def test_task_label_remap(): ImagePredictor = Task train_dataset, _, test_dataset = ImagePredictor.Dataset.from_folders( 'https://autogluon.s3.amazonaws.com/datasets/shopee-iet.zip') label_remap = {0: 'd', 1: 'c', 2: 'b', 3: 'a'} train_dataset = train_dataset.replace({"label": label_remap}) test_dataset = test_dataset.replace({"label": label_remap}) # rename label column train_dataset = train_dataset.rename(columns={'label': 'my_label'}) predictor = ImagePredictor(label='my_label') dataset_copy = copy.deepcopy(train_dataset) predictor.fit(train_dataset, hyperparameters={'epochs': 2}) # assert input dataset not altered assert dataset_copy.equals(train_dataset) pred = predictor.predict(test_dataset) pred_proba = predictor.predict_proba(test_dataset) label_remap_inverse = { col_name: i for i, col_name in enumerate(list(pred_proba.columns)) } from autogluon.core.metrics import accuracy, log_loss score_accuracy = accuracy(y_true=test_dataset['label'], y_pred=pred) score_log_loss = log_loss( y_true=test_dataset['label'].replace(label_remap_inverse), y_pred=pred_proba.to_numpy()) assert score_accuracy > 0.2 # relax
def test_log_loss_with_sklearn(gt, probs): gt = np.array(gt, dtype=np.int64) probs = np.array(probs, dtype=np.float32) ag_loss = log_loss(gt, probs) sklearn_log_loss = sklearn.metrics.log_loss(gt, probs) # In AutoGluon, the metrics will always return score that is higher the better. # Thus, the true value should be the negation of the real log_loss np.testing.assert_allclose(ag_loss, -sklearn_log_loss)
def test_log_loss_single_binary_class(): gt = np.array([1, 1, 1]) probs = np.array([0.1, 0.2, 0.3]) np.testing.assert_allclose(log_loss(gt, probs), np.log(probs).mean()) np.testing.assert_allclose(log_loss(1 - gt, probs), np.log(1 - probs).mean())
def test_log_loss(gt, probs): gt = np.array(gt, dtype=np.int64) probs = np.array(probs, dtype=np.float32) ag_loss = log_loss(gt, probs) expected = np.log(probs[np.arange(probs.shape[0]), gt]).mean() np.testing.assert_allclose(ag_loss, expected)