예제 #1
0
파일: test.py 프로젝트: llniu/ALD-study
 def test_variance_equal(self):
     weights = numpy.ones(shape=self.y_test.shape)*numpy.pi
     auc, variance = compare_auc_delong_xu.delong_roc_variance(self.y_test, self.predictions,
                                                               sample_weight=weights)
     auc_no_weights, variance_no_weights = compare_auc_delong_xu.delong_roc_variance(
         self.y_test, self.predictions)
     numpy.testing.assert_allclose(auc_no_weights, auc)
     numpy.testing.assert_allclose(variance_no_weights, variance)
예제 #2
0
파일: test.py 프로젝트: llniu/ALD-study
 def test_variance_positive(self):
     N = 7
     weights = numpy.linspace(0, 10, num=N)
     auc, variance = compare_auc_delong_xu.delong_roc_variance(
         self.y_test[:N], self.predictions[:N],
         sample_weight=weights)
     k = numpy.pi
     auc_mode, variance_mode = compare_auc_delong_xu.delong_roc_variance(
         self.y_test[:N], self.predictions[:N],
         sample_weight=weights*k)
     numpy.testing.assert_allclose(auc, auc_mode)
     numpy.testing.assert_allclose(variance, variance_mode)
예제 #3
0
파일: test.py 프로젝트: llniu/ALD-study
 def test_weights_positive(self):
     weights = numpy.linspace(0, 3, num=len(self.y_test))
     auc, variance = compare_auc_delong_xu.delong_roc_variance(self.y_test, self.predictions,
                                                               sample_weight=weights)
     true_auc = sklearn.metrics.roc_auc_score(self.y_test, self.predictions, 
                                              sample_weight=weights)
     numpy.testing.assert_allclose(true_auc, auc)
예제 #4
0
파일: test.py 프로젝트: llniu/ALD-study
 def test_weights_positive_small_N(self):
     weights = numpy.linspace(0, 10, num=self.y_test.shape[0])
     N = 7
     auc, variance = compare_auc_delong_xu.delong_roc_variance(
         self.y_test[:N], self.predictions[:N],
         sample_weight=weights[:N])
     true_auc = sklearn.metrics.roc_auc_score(self.y_test[:N], self.predictions[:N], 
                                              sample_weight=weights[:N])
     numpy.testing.assert_allclose(true_auc, auc)
예제 #5
0
파일: test.py 프로젝트: llniu/ALD-study
 def test_weights_equal_big(self):
     weights = numpy.ones(shape=self.y_test.shape)*2.13
     N = 7
     auc, variance = compare_auc_delong_xu.delong_roc_variance(
         self.y_test[:N], self.predictions[:N],
         sample_weight=weights[:N])
     true_auc = sklearn.metrics.roc_auc_score(self.y_test[:N], self.predictions[:N], 
                                              sample_weight=weights[:N])
     numpy.testing.assert_allclose(true_auc, auc)
예제 #6
0
def test_variance():
    data = sklearn.datasets.load_iris()
    x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(
        data.data, data.target == 1, test_size=0.8, random_state=42)
    predictions = sklearn.linear_model.LogisticRegression().fit(
        x_train, y_train).predict_proba(x_test)[:, 1]
    auc, variance = compare_auc_delong_xu.delong_roc_variance(
        y_test, predictions)
    true_auc = sklearn.metrics.roc_auc_score(y_test, predictions)
    numpy.testing.assert_allclose(true_auc, auc)
    numpy.testing.assert_allclose(0.0014569635512, variance)
예제 #7
0
 def test_variance(self):
     sample_size_x = 7
     sample_size_y = 14
     n_trials = 50000
     aucs = numpy.empty(n_trials)
     variances = numpy.empty(n_trials)
     numpy.random.seed(1234235)
     labels = numpy.concatenate([numpy.ones(sample_size_x), numpy.zeros(sample_size_y)])
     for trial in range(n_trials):
         scores = numpy.concatenate([
             self.x_distr.rvs(sample_size_x),
             self.y_distr.rvs(sample_size_y)])
         aucs[trial] = sklearn.metrics.roc_auc_score(labels, scores)
         auc_delong, variances[trial] = compare_auc_delong_xu.delong_roc_variance(
             labels, scores)
         numpy.testing.assert_allclose(aucs[trial], auc_delong)
     numpy.testing.assert_allclose(variances.mean(), aucs.var(), rtol=0.1)
예제 #8
0
파일: compute.py 프로젝트: llniu/ALD-study
import sklearn.datasets
import sklearn.model_selection
import sklearn.linear_model
import numpy
import compare_auc_delong_xu
import unittest
import scipy.stats

x_distr = scipy.stats.norm(0.5, 1)
y_distr = scipy.stats.norm(-0.5, 1)
sample_size_x = 7
sample_size_y = 14
n_trials = 1000000
aucs = numpy.empty(n_trials)
variances = numpy.empty(n_trials)
numpy.random.seed(1234235)
labels = numpy.concatenate(
    [numpy.ones(sample_size_x),
     numpy.zeros(sample_size_y)])
for trial in range(n_trials):
    scores = numpy.concatenate(
        [x_distr.rvs(sample_size_x),
         y_distr.rvs(sample_size_y)])
    aucs[trial] = sklearn.metrics.roc_auc_score(labels, scores)
    auc_delong, variances[trial] = compare_auc_delong_xu.delong_roc_variance(
        labels, scores)

print(variances.mean(), aucs.var())
예제 #9
0
def train_model(model,
                dataloaders,
                criterion,
                optimizer,
                scheduler,
                competition_tasks,
                num_epochs=25,
                max_fpr=None,
                u_approach=None,
                is_inception=False,
                checkpoint=200):

    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_auc = 0.0
    missing = 0
    losses = {
        'train': [],
        'val': []
    }
    accuracy = {
        'train': [],
        'val': []
    }
    variances = {'train': [], 'val': []}

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs-1}')
        print('-' * 10)
        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                scheduler.step()
                model.train()  # Set model to training mode
            else:
                model.eval()  # Set model to evaluate mode

            running_loss = 0.0
            running_auc = 0.0
            running_var = 0.0

            # Iterate over data.
            for i, (inputs, labels) in enumerate(dataloaders[phase]):
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    if is_inception and phase == 'train':
                        # From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958
                        outputs, aux_outputs = model(inputs)
                        loss1 = criterion(outputs, labels)
                        loss2 = criterion(aux_outputs, labels)
                        loss = loss1 + 0.4 * loss2
                    else:
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)

                    if u_approach == "ignore":
                        mask = labels.lt(0)  #select u labels (-1)
                        loss = torch.sum(loss.masked_select(
                            mask))  #mask out uncertain labels
                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
#                         scheduler.batch_step() #USE WITH DENSENET and other scheduler

# statistics
                running_loss += loss.item() * inputs.size(0)

                #select subset of 5 pathologies of interest
                labels_sub = labels[:,
                                    competition_tasks].cpu().squeeze().numpy()
                preds_sub = outputs[:,
                                    competition_tasks].detach().cpu().squeeze(
                                    ).numpy()

                if u_approach == "ignore":
                    #mask out the negative values
                    mask_sub = (labels_sub > -1)
                    for j in range(labels_sub.shape[1]):
                        label = labels_sub[:, j]
                        pred = preds_sub[:, j]
                        m = mask_sub[:, j]
                        label = label[m]
                        pred = pred[m]
                        try:
                            tmp = compare_auc_delong_xu.delong_roc_variance(
                                label, pred)
                            running_auc += tmp[0]
                            running_var += np.nansum(tmp[1])
                        except:
                            missing += 1
                            continue
                else:
                    for j in range(labels_sub.shape[1]):
                        label = labels_sub[:, j]
                        pred = preds_sub[:, j]
                        tmp = compare_auc_delong_xu.delong_roc_variance(
                            label, pred)
                        running_auc += tmp[0]
                        running_var += np.nansum(tmp[1])

#                 if (i+1) % checkpoint == 0:    # print every 'checkpoint' mini-batches
                if (i + 1) % 200 == 0:  # print every 'checkpoint' mini-batches
                    #                     print('Missed {}'.format(missing))
                    print(
                        f'{phase} Loss: {running_loss / (i+1)} DeLong AUC: {running_auc / (labels_sub.shape[1] * (i+1) * batch_size)} Variance: {running_var / (labels_sub.shape[1] * (i+1) * batch_size)}'
                    )

                    losses[phase].append(running_loss / ((i + 1) * batch_size))
                    accuracy[phase].append(running_auc /
                                           (labels_sub.shape[1] *
                                            (i + 1) * batch_size))
                    variances[phase].append(running_var /
                                            (labels_sub.shape[1] *
                                             (i + 1) * batch_size))

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_auc = running_auc / (dataset_sizes[phase] *
                                       labels_sub.shape[1])
            epoch_var = running_var / (dataset_sizes[phase] *
                                       labels_sub.shape[1])
            print(
                f'{phase} Epoch Loss: {epoch_loss} Epoch AUC: {epoch_auc} Epoch Variance: {epoch_var}'
            )
            #With a small validation set would otherwise get no recorded values so:
            if phase == 'val':
                losses[phase].append(epoch_loss)
                accuracy[phase].append(epoch_auc)
                variances[phase].append(epoch_var)

            # deep copy the model
            if phase == 'val' and epoch_auc > best_auc:
                best_auc = epoch_auc
                best_model_wts = copy.deepcopy(model.state_dict())

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60}m {time_elapsed % 60}s')
    print(f'Best val AUC: {best_auc}')
    print(f'Missed {missing} examples.')
    # load best model weights
    model.load_state_dict(best_model_wts)
    metrics = (losses, accuracy, variances)
    #     for phase in ['train', 'val']:
    #         with open(f'metrics/{filename}_{phase}.txt','w+') as f:
    #             for idx in len(losses[phase]):
    #                 f.write(f'{losses[phase][idx]} {accuracy[phase][idx]} {variances[phase][idx]}\n')

    return model, metrics
예제 #10
0
 def test_variance_const(self):
     auc, variance = compare_auc_delong_xu.delong_roc_variance(self.y_test, self.predictions)
     numpy.testing.assert_allclose(self.sklearn_auc, auc)
     numpy.testing.assert_allclose(0.0015359814789736538, variance)
예제 #11
0
파일: test.py 프로젝트: llniu/ALD-study
 def test_weights_equal_small(self):
     weights = numpy.ones(shape=self.y_test.shape)*0.214124
     auc, variance = compare_auc_delong_xu.delong_roc_variance(self.y_test, self.predictions,
                                                               sample_weight=weights)
     numpy.testing.assert_allclose(self.sklearn_auc, auc)