Ejemplo n.º 1
0
def mean_median(dataset):

    device = torch.device('cpu')
    if dataset == 'MIMIC':
        path = MIMIC_path
        reader = MIMICReader
    else:
        path = eICU_path
        reader = eICUReader
    train_datareader = reader(path + 'train', device=device)
    test_datareader = reader(path + 'test', device=device)
    train_batches = train_datareader.batch_gen(batch_size=512)
    test_batches = test_datareader.batch_gen(batch_size=512)
    bool_type = torch.cuda.BoolTensor if device == torch.device(
        'cuda') else torch.BoolTensor
    train_y = np.array([])
    test_y = np.array([])

    for batch_idx, batch in enumerate(train_batches):

        # unpack batch
        if dataset == 'MIMIC':
            padded, mask, flat, los_labels, mort_labels, seq_lengths = batch
        else:
            padded, mask, diagnoses, flat, los_labels, mort_labels, seq_lengths = batch

        train_y = np.append(
            train_y, remove_padding(los_labels, mask.type(bool_type), device))

    train_y = pd.DataFrame(train_y, columns=['true'])

    mean_train = train_y.mean().values[0]
    median_train = train_y.median().values[0]

    for batch_idx, batch in enumerate(test_batches):

        # unpack batch
        if dataset == 'MIMIC':
            padded, mask, flat, los_labels, mort_labels, seq_lengths = batch
        else:
            padded, mask, diagnoses, flat, los_labels, mort_labels, seq_lengths = batch

        test_y = np.append(
            test_y, remove_padding(los_labels, mask.type(bool_type), device))

    test_y = pd.DataFrame(test_y, columns=['true'])

    test_y['mean'] = mean_train
    test_y['median'] = median_train

    print('Total predictions:')
    print('Using mean value of {}...'.format(mean_train))
    print_metrics_regression(test_y['true'], test_y['mean'])
    print('Using median value of {}...'.format(median_train))
    print_metrics_regression(test_y['true'], test_y['median'])

    return
Ejemplo n.º 2
0
    def validate(self, epoch):

        if self.config.mode == 'train':
            self.model.eval()
            #if self.config.train_as_val:
            #    val_batches = self.train_datareader.batch_gen(batch_size=self.config.batch_size)
            #else:
            #    val_batches = self.val_datareader.batch_gen(batch_size=self.config.batch_size_test)
            val_batches = self.val_datareader.batch_gen(
                batch_size=self.config.batch_size_test)
            val_loss = []
            val_y_hat = np.array([])
            val_y = np.array([])

            for (padded, mask, diagnoses, flat, labels,
                 seq_lengths) in val_batches:

                y_hat = self.model(padded, diagnoses, flat)
                loss = self.model.loss(y_hat, labels, mask, seq_lengths,
                                       self.device, self.config.sum_losses,
                                       self.config.loss)
                val_loss.append(
                    loss.item()
                )  # can't add the model.loss directly because it causes a memory leak

                val_y_hat = np.append(
                    val_y_hat,
                    self.remove_padding(y_hat, mask.type(self.bool_type)))
                val_y = np.append(
                    val_y,
                    self.remove_padding(labels, mask.type(self.bool_type)))

            print('Validation Metrics:')
            mean_val_loss = sum(val_loss) / len(val_loss)
            metrics_list = print_metrics_regression(
                val_y, val_y_hat,
                elog=self.elog)  # order: mad, mse, mape, msle, r2, kappa
            for metric_name, metric in zip(
                ['mad', 'mse', 'mape', 'msle', 'r2', 'kappa'], metrics_list):
                self.add_result(value=metric,
                                name='val_' + metric_name,
                                counter=epoch)
            self.elog.print('Epoch: {} | Validation Loss: {:3.4f}'.format(
                epoch, mean_val_loss))

        elif self.config.mode == 'test' and epoch == self.n_epochs - 1:
            self.test()

        if epoch == self.n_epochs - 1 and self.config.save_results_csv:
            self.elog.save_to_csv(np.vstack((val_y_hat, val_y)).transpose(),
                                  'val_predictions/epoch{}.csv'.format(epoch),
                                  header='predictions, label')

        return
Ejemplo n.º 3
0
    def test(self):

        self.model.eval()
        test_batches = self.test_datareader.batch_gen(
            batch_size=self.config.batch_size_test)
        test_loss = []
        test_y_hat = np.array([])
        test_y = np.array([])

        for (padded, mask, diagnoses, flat, labels,
             seq_lengths) in test_batches:

            y_hat = self.model(padded, diagnoses, flat)
            loss = self.model.loss(y_hat, labels, mask, seq_lengths,
                                   self.device, self.config.sum_losses,
                                   self.config.loss)
            test_loss.append(
                loss.item()
            )  # can't add the model.loss directly because it causes a memory leak

            test_y_hat = np.append(
                test_y_hat,
                self.remove_padding(y_hat, mask.type(self.bool_type)))
            test_y = np.append(
                test_y, self.remove_padding(labels, mask.type(self.bool_type)))

        print('Test Metrics:')
        mean_test_loss = sum(test_loss) / len(test_loss)
        metrics_list = print_metrics_regression(
            test_y, test_y_hat,
            elog=self.elog)  # order: mad, mse, mape, msle, r2, kappa
        if self.config.save_results_csv:
            self.elog.save_to_csv(np.vstack((test_y_hat, test_y)).transpose(),
                                  'test_predictions.csv',
                                  header='predictions, label')
        for metric_name, metric in zip(
            ['mad', 'mse', 'mape', 'msle', 'r2', 'kappa'], metrics_list):
            self.add_result(value=metric, name='test_' + metric_name)
        self.elog.print('Test Loss: {:3.4f}'.format(mean_test_loss))

        with open(self.config.base_dir + '/results.csv', 'a') as f:
            values = self.elog.plot_logger.values
            mad = values['test_mad']['test_mad'][-1][0]
            mse = values['test_mse']['test_mse'][-1][0]
            mape = values['test_mape']['test_mape'][-1][0]
            msle = values['test_msle']['test_msle'][-1][0]
            r2 = values['test_r2']['test_r2'][-1][0]
            kappa = values['test_kappa']['test_kappa'][-1][0]
            f.write('\n{},{},{},{},{},{}'.format(mad, mse, mape, msle, r2,
                                                 kappa))

        return
Ejemplo n.º 4
0
import pandas as pd
from models.metrics import print_metrics_regression
from eICU_preprocessing.run_all_preprocessing import eICU_path

if __name__ == '__main__':

    labels = pd.read_csv(eICU_path + 'test/labels.csv')
    print_metrics_regression(labels.actualiculos, labels.predictediculos)
device = torch.device('cpu')
train_datareader = eICUReader(eICU_path + 'train', device=device)
test_datareader = eICUReader(eICU_path + 'test', device=device)
train_batches = train_datareader.batch_gen(batch_size=512)
test_batches = test_datareader.batch_gen(batch_size=512)
bool_type = torch.cuda.BoolTensor if device == torch.device('cuda') else torch.BoolTensor
train_y = np.array([])
test_y = np.array([])

for batch_idx, (padded, mask, diagnoses, flat, labels, seq_lengths) in enumerate(train_batches):
    train_y = np.append(train_y, remove_padding(labels, mask.type(bool_type), device))

train_y = pd.DataFrame(train_y, columns=['true'])

mean_train = train_y.mean().values[0]
median_train = train_y.median().values[0]

for batch_idx, (padded, mask, diagnoses, flat, labels, seq_lengths) in enumerate(test_batches):
    test_y = np.append(test_y, remove_padding(labels, mask.type(bool_type), device))

test_y = pd.DataFrame(test_y, columns=['true'])

test_y['mean'] = mean_train
test_y['median'] = median_train

print('Total predictions:')
print('Using mean value of {}...'.format(mean_train))
metrics_list = print_metrics_regression(test_y['true'], test_y['mean'])
print('Using median value of {}...'.format(median_train))
metrics_list = print_metrics_regression(test_y['true'], test_y['median'])
Ejemplo n.º 6
0
    def train(self, epoch):

        self.model.train()
        if epoch > 0 and self.config.shuffle_train:
            shuffle_train(
                self.config.eICU_path + 'train'
            )  # shuffle the order of the training data to make the batches different, this takes a bit of time
        train_batches = self.train_datareader.batch_gen(
            batch_size=self.config.batch_size)
        train_loss = []
        train_y_hat = np.array([])
        train_y = np.array([])

        for batch_idx, (padded, mask, diagnoses, flat, labels,
                        seq_lengths) in enumerate(train_batches):

            if batch_idx > (self.no_train_batches //
                            (100 / self.config.percentage_data)):
                break

            self.optimiser.zero_grad()
            y_hat = self.model(padded, diagnoses, flat)
            loss = self.model.loss(y_hat, labels, mask, seq_lengths,
                                   self.device, self.config.sum_losses,
                                   self.config.loss)
            loss.backward()
            self.optimiser.step()
            train_loss.append(loss.item())

            train_y_hat = np.append(
                train_y_hat,
                self.remove_padding(y_hat, mask.type(self.bool_type)))
            train_y = np.append(
                train_y, self.remove_padding(labels,
                                             mask.type(self.bool_type)))

            if self.config.intermediate_reporting and batch_idx % self.config.log_interval == 0 and batch_idx != 0:

                mean_loss_report = sum(train_loss[
                    (batch_idx -
                     self.config.log_interval):-1]) / self.config.log_interval
                self.add_result(
                    value=mean_loss_report,
                    name='Intermediate_Train_Loss',
                    counter=epoch +
                    batch_idx / self.no_train_batches)  # check this
                self.elog.print(
                    'Epoch: {} [{:5d}/{:5d} samples] | train loss: {:3.4f}'.
                    format(epoch, batch_idx * self.config.batch_size,
                           batch_idx * self.no_train_batches,
                           mean_loss_report))
                self.checkpoint_counter += 1

        if not self.config.intermediate_reporting and self.config.mode == 'train':

            print('Train Metrics:')
            mean_train_loss = sum(train_loss) / len(train_loss)
            metrics_list = print_metrics_regression(
                train_y, train_y_hat,
                elog=self.elog)  # order: mad, mse, mape, msle, r2, kappa
            for metric_name, metric in zip(
                ['mad', 'mse', 'mape', 'msle', 'r2', 'kappa'], metrics_list):
                self.add_result(value=metric,
                                name='train_' + metric_name,
                                counter=epoch)
            self.elog.print('Epoch: {} | Train Loss: {:3.4f}'.format(
                epoch, mean_train_loss))

        if self.config.mode == 'test':
            print('Done epoch {}'.format(epoch))

        if epoch == self.n_epochs - 1:
            if self.config.mode == 'train':
                self.save_checkpoint(name='checkpoint', n_iter=epoch)
            if self.config.save_results_csv:
                self.elog.save_to_csv(
                    np.vstack((train_y_hat, train_y)).transpose(),
                    'train_predictions/epoch{}.csv'.format(epoch),
                    header='predictions, label')

        return
Ejemplo n.º 7
0
    def test(self, mort_pred_time=24):

        self.model.eval()
        test_batches = self.test_datareader.batch_gen(
            batch_size=self.config.batch_size_test)
        test_loss = []
        test_y_hat_los = np.array([])
        test_y_los = np.array([])
        test_y_hat_mort = np.array([])
        test_y_mort = np.array([])

        for batch in test_batches:

            # unpack batch
            if self.config.dataset == 'MIMIC':
                padded, mask, flat, los_labels, mort_labels, seq_lengths = batch
                diagnoses = None
            else:
                padded, mask, diagnoses, flat, los_labels, mort_labels, seq_lengths = batch

            y_hat_los, y_hat_mort = self.model(padded, diagnoses, flat)
            loss = self.model.loss(y_hat_los, y_hat_mort, los_labels,
                                   mort_labels, mask, seq_lengths, self.device,
                                   self.config.sum_losses, self.config.loss)
            test_loss.append(
                loss.item()
            )  # can't add the model.loss directly because it causes a memory leak

            if self.config.task in ('LoS', 'multitask'):
                test_y_hat_los = np.append(
                    test_y_hat_los,
                    self.remove_padding(y_hat_los, mask.type(self.bool_type)))
                test_y_los = np.append(
                    test_y_los,
                    self.remove_padding(los_labels, mask.type(self.bool_type)))
            if self.config.task in (
                    'mortality',
                    'multitask') and mort_labels.shape[1] >= mort_pred_time:
                test_y_hat_mort = np.append(
                    test_y_hat_mort,
                    self.remove_padding(
                        y_hat_mort[:, mort_pred_time],
                        mask.type(self.bool_type)[:, mort_pred_time]))
                test_y_mort = np.append(
                    test_y_mort,
                    self.remove_padding(
                        mort_labels[:, mort_pred_time],
                        mask.type(self.bool_type)[:, mort_pred_time]))

        print('Test Metrics:')
        mean_test_loss = sum(test_loss) / len(test_loss)

        if self.config.task in ('LoS', 'multitask'):
            los_metrics_list = print_metrics_regression(
                test_y_los, test_y_hat_los,
                elog=self.elog)  # order: mad, mse, mape, msle, r2, kappa
            for metric_name, metric in zip(
                ['mad', 'mse', 'mape', 'msle', 'r2', 'kappa'],
                    los_metrics_list):
                self.add_result(value=metric, name='test_' + metric_name)
        if self.config.task in ('mortality', 'multitask'):
            mort_metrics_list = print_metrics_mortality(test_y_mort,
                                                        test_y_hat_mort,
                                                        elog=self.elog)
            for metric_name, metric in zip([
                    'acc', 'prec0', 'prec1', 'rec0', 'rec1', 'auroc', 'auprc',
                    'f1macro'
            ], mort_metrics_list):
                self.add_result(value=metric, name='test_' + metric_name)

        if self.config.save_results_csv:
            if self.config.task in ('LoS', 'multitask'):
                self.elog.save_to_csv(np.vstack(
                    (test_y_hat_los, test_y_los)).transpose(),
                                      'val_predictions_los.csv',
                                      header='los_predictions, label')
            if self.config.task in ('mortality', 'multitask'):
                self.elog.save_to_csv(np.vstack(
                    (test_y_hat_mort, test_y_mort)).transpose(),
                                      'val_predictions_mort.csv',
                                      header='mort_predictions, label')
        self.elog.print('Test Loss: {:3.4f}'.format(mean_test_loss))

        # write to file
        if self.config.task == 'LoS':
            with open(self.config.base_dir + '/results.csv', 'a') as f:
                values = self.elog.plot_logger.values
                mad = values['test_mad']['test_mad'][-1][0]
                mse = values['test_mse']['test_mse'][-1][0]
                mape = values['test_mape']['test_mape'][-1][0]
                msle = values['test_msle']['test_msle'][-1][0]
                r2 = values['test_r2']['test_r2'][-1][0]
                kappa = values['test_kappa']['test_kappa'][-1][0]
                f.write('\n{},{},{},{},{},{}'.format(mad, mse, mape, msle, r2,
                                                     kappa))
        elif self.config.task == 'mortality':
            with open(self.config.base_dir + '/results.csv', 'a') as f:
                values = self.elog.plot_logger.values
                acc = values['test_acc']['test_acc'][-1][0]
                prec0 = values['test_prec0']['test_prec0'][-1][0]
                prec1 = values['test_prec1']['test_prec1'][-1][0]
                rec0 = values['test_rec0']['test_rec0'][-1][0]
                rec1 = values['test_rec1']['test_rec1'][-1][0]
                auroc = values['test_auroc']['test_auroc'][-1][0]
                auprc = values['test_auprc']['test_auprc'][-1][0]
                f1macro = values['test_f1macro']['test_f1macro'][-1][0]
                f.write('\n{},{},{},{},{},{},{},{}'.format(
                    acc, prec0, prec1, rec0, rec1, auroc, auprc, f1macro))
        elif self.config.task == 'multitask':
            with open(self.config.base_dir + '/results.csv', 'a') as f:
                values = self.elog.plot_logger.values
                mad = values['test_mad']['test_mad'][-1][0]
                mse = values['test_mse']['test_mse'][-1][0]
                mape = values['test_mape']['test_mape'][-1][0]
                msle = values['test_msle']['test_msle'][-1][0]
                r2 = values['test_r2']['test_r2'][-1][0]
                kappa = values['test_kappa']['test_kappa'][-1][0]
                acc = values['test_acc']['test_acc'][-1][0]
                prec0 = values['test_prec0']['test_prec0'][-1][0]
                prec1 = values['test_prec1']['test_prec1'][-1][0]
                rec0 = values['test_rec0']['test_rec0'][-1][0]
                rec1 = values['test_rec1']['test_rec1'][-1][0]
                auroc = values['test_auroc']['test_auroc'][-1][0]
                auprc = values['test_auprc']['test_auprc'][-1][0]
                f1macro = values['test_f1macro']['test_f1macro'][-1][0]
                f.write('\n{},{},{},{},{},{},{},{},{},{},{},{},{},{}'.format(
                    mad, mse, mape, msle, r2, kappa, acc, prec0, prec1, rec0,
                    rec1, auroc, auprc, f1macro))
        return
Ejemplo n.º 8
0
    def validate(self, epoch, mort_pred_time=24):

        if self.config.mode == 'train':
            self.model.eval()
            val_batches = self.val_datareader.batch_gen(
                batch_size=self.config.batch_size_test)
            val_loss = []
            val_y_hat_los = np.array([])
            val_y_los = np.array([])
            val_y_hat_mort = np.array([])
            val_y_mort = np.array([])

            for batch in val_batches:

                # unpack batch
                if self.config.dataset == 'MIMIC':
                    padded, mask, flat, los_labels, mort_labels, seq_lengths = batch
                    diagnoses = None
                else:
                    padded, mask, diagnoses, flat, los_labels, mort_labels, seq_lengths = batch

                y_hat_los, y_hat_mort = self.model(padded, diagnoses, flat)
                loss = self.model.loss(y_hat_los, y_hat_mort, los_labels,
                                       mort_labels, mask, seq_lengths,
                                       self.device, self.config.sum_losses,
                                       self.config.loss)
                val_loss.append(
                    loss.item()
                )  # can't add the model.loss directly because it causes a memory leak

                if self.config.task in ('LoS', 'multitask'):
                    val_y_hat_los = np.append(
                        val_y_hat_los,
                        self.remove_padding(y_hat_los,
                                            mask.type(self.bool_type)))
                    val_y_los = np.append(
                        val_y_los,
                        self.remove_padding(los_labels,
                                            mask.type(self.bool_type)))
                if self.config.task in (
                        'mortality', 'multitask'
                ) and mort_labels.shape[1] >= mort_pred_time:
                    val_y_hat_mort = np.append(
                        val_y_hat_mort,
                        self.remove_padding(
                            y_hat_mort[:, mort_pred_time],
                            mask.type(self.bool_type)[:, mort_pred_time]))
                    val_y_mort = np.append(
                        val_y_mort,
                        self.remove_padding(
                            mort_labels[:, mort_pred_time],
                            mask.type(self.bool_type)[:, mort_pred_time]))

            print('Validation Metrics:')
            mean_val_loss = sum(val_loss) / len(val_loss)
            if self.config.task in ('LoS', 'multitask'):
                los_metrics_list = print_metrics_regression(
                    val_y_los, val_y_hat_los,
                    elog=self.elog)  # order: mad, mse, mape, msle, r2, kappa
                for metric_name, metric in zip(
                    ['mad', 'mse', 'mape', 'msle', 'r2', 'kappa'],
                        los_metrics_list):
                    self.add_result(value=metric,
                                    name='val_' + metric_name,
                                    counter=epoch)
            if self.config.task in ('mortality', 'multitask'):
                mort_metrics_list = print_metrics_mortality(val_y_mort,
                                                            val_y_hat_mort,
                                                            elog=self.elog)
                for metric_name, metric in zip([
                        'acc', 'prec0', 'prec1', 'rec0', 'rec1', 'auroc',
                        'auprc', 'f1macro'
                ], mort_metrics_list):
                    self.add_result(value=metric,
                                    name='val_' + metric_name,
                                    counter=epoch)
            self.elog.print('Epoch: {} | Validation Loss: {:3.4f}'.format(
                epoch, mean_val_loss))

        elif self.config.mode == 'test' and epoch == self.n_epochs - 1:
            self.test()

        if epoch == self.n_epochs - 1 and self.config.save_results_csv:
            if self.config.task in ('LoS', 'multitask'):
                self.elog.save_to_csv(
                    np.vstack((val_y_hat_los, val_y_los)).transpose(),
                    'val_predictions_los/epoch{}.csv'.format(epoch),
                    header='los_predictions, label')
            if self.config.task in ('mortality', 'multitask'):
                self.elog.save_to_csv(
                    np.vstack((val_y_hat_mort, val_y_mort)).transpose(),
                    'val_predictions_mort/epoch{}.csv'.format(epoch),
                    header='mort_predictions, label')

        return