def data_preprocess(dataset, fold, split, static_mode, time_mode):
    """Preprocess the dataset.

    Args:
        - dataset: temporal, static, label, time, treatment information
        - fold: Cross validation fold
        - split: 'train', 'valid' or 'test'
        - static_mode: 'concatenate' or None
        - time_mode: 'concatenate' or None

    Returns:
        - dataset_crn: dataset dictionary for training the CRN.
    """
    x, s, y, t, treat = dataset.get_fold(fold, split)

    if static_mode == "concatenate":
        x = concate_xs(x, s)

    if time_mode == "concatenate":
        x = concate_xt(x, t)

    dataset_crn = dict()

    one_hot_treatments = np.zeros(shape=(treat.shape[0], treat.shape[1], 2))
    treat = np.round(treat)
    for patient_id in range(treat.shape[0]):
        for timestep in range(treat.shape[1]):
            if treat[patient_id][timestep][0] == 0.0:
                one_hot_treatments[patient_id][timestep] = [1, 0]
            elif treat[patient_id][timestep][0] == 1.0:
                one_hot_treatments[patient_id][timestep] = [0, 1]
            elif treat[patient_id][timestep][0] == -1.0:
                one_hot_treatments[patient_id][timestep] = [-1, -1]

    active_entries = np.ndarray.max((y >= 0).astype(float), axis=-1)
    sequence_lengths = np.sum(active_entries, axis=1).astype(int)
    active_entries = active_entries[:, :, np.newaxis]

    dataset_crn["current_covariates"] = x
    dataset_crn["current_treatments"] = one_hot_treatments
    dataset_crn["previous_treatments"] = one_hot_treatments[:, :-1, :]
    dataset_crn["outputs"] = y
    dataset_crn["active_entries"] = active_entries
    dataset_crn["sequence_lengths"] = sequence_lengths

    return dataset_crn
Beispiel #2
0
    def data_preprocess(self, dataset, fold, split):
        """Preprocess the dataset.

    Args:
      - dataset: temporal, static, label, time, treatment information
      - fold: Cross validation fold
      - split: 'train', 'valid' or 'test'

    Returns:
      - dataset: dataset dictionary for training the RMSN.
    """
        x, s, y, t, treat = dataset.get_fold(fold, split)

        if self.static_mode == 'concatenate':
            x = concate_xs(x, s)

        if self.time_mode == 'concatenate':
            x = concate_xt(x, t)

        dataset = dict()
        treat = np.round(treat)

        active_entries = np.ndarray.max((y >= 0).astype(float), axis=-1)
        sequence_lengths = np.sum(active_entries, axis=1).astype(int)

        active_entries = active_entries[:, :, np.newaxis]

        dataset['current_covariates'] = x
        dataset['current_treatments'] = treat
        dataset['previous_treatments'] = np.concatenate([
            np.zeros(shape=(treat.shape[0], 1, treat.shape[-1])),
            treat[:, :-1, :]
        ],
                                                        axis=1)
        dataset['outputs'] = y
        dataset['active_entries'] = active_entries
        dataset['sequence_lengths'] = sequence_lengths

        return dataset
def data_preprocess_counterfactuals(encoder_model, dataset, patient_id,
                                    timestep, treatment_options, fold, split,
                                    static_mode, time_mode):
    """Preprocess the dataset for obtaining counterfactual predictions for sequences of future treatments.

    Args:
        - encoder_model: trained encoder model for initializing decoder
        - dataset: temporal, static, label, time, treatment information
        - patient_id: patient id of patient for which the counterfactuals are computed
        - timestep: timestep in the patient trajectory where counterfactuals are predicted
        - treatment_options: treatment options for computing the counterfactual trajectories
        - fold: test fold
        - test_split: testing set splitting parameter
        - static_mode: 'concatenate' or None
        - time_mode: 'concatenate' or None

    Returns:
        - patient_history: history of patient outcome until the specified timestep
        - encoder_output: patient output for the first treatment in the treatment options; this one-step-ahead prediction
            is made using the encoder model.
        - dataset_crn_decoder: dataset that can be used to obtain the counterfactual predictions from the decoder model.
    """
    x, s, y, t, treat = dataset.get_fold(fold, split)

    max_sequence_length = x.shape[1]
    num_treatment_options = treatment_options.shape[0]
    projection_horizon = treatment_options.shape[1] - 1

    if static_mode == "concatenate":
        x = concate_xs(x, s)

    if time_mode == "concatenate":
        x = concate_xt(x, t)

    x = np.repeat([x[patient_id]], num_treatment_options, axis=0)
    y = np.repeat([y[patient_id]], num_treatment_options, axis=0)

    treat = np.repeat([treat[patient_id][:timestep - 1]],
                      num_treatment_options,
                      axis=0)
    treat = np.concatenate([treat, treatment_options], axis=1)

    dataset_crn_encoder = dict()

    one_hot_treatments = np.zeros(shape=(treat.shape[0], treat.shape[1], 2))
    treat = np.round(treat)
    for patient_id in range(treat.shape[0]):
        for t in range(treat.shape[1]):
            if treat[patient_id][t][0] == 0.0:
                one_hot_treatments[patient_id][t] = [1, 0]
            elif treat[patient_id][t][0] == 1.0:
                one_hot_treatments[patient_id][t] = [0, 1]
            elif treat[patient_id][t][0] == -1.0:
                one_hot_treatments[patient_id][t] = [-1, -1]

    one_hot_treatments_encoder = one_hot_treatments[:, :timestep, :]
    one_hot_treatments_encoder = np.concatenate(
        [
            one_hot_treatments_encoder,
            np.zeros(shape=(one_hot_treatments.shape[0], max_sequence_length -
                            timestep, one_hot_treatments.shape[-1])),
        ],
        axis=1,
    )

    dataset_crn_encoder["current_covariates"] = x
    dataset_crn_encoder["current_treatments"] = one_hot_treatments_encoder
    dataset_crn_encoder[
        "previous_treatments"] = one_hot_treatments_encoder[:, :-1, :]
    dataset_crn_encoder["active_entries"] = np.ones(shape=(x.shape[0],
                                                           x.shape[1], 1))
    dataset_crn_encoder["sequence_lengths"] = timestep * np.ones(
        shape=(num_treatment_options))

    test_br_states = encoder_model.get_balancing_reps(dataset_crn_encoder)
    test_encoder_predictions = encoder_model.get_predictions(
        dataset_crn_encoder)

    dataset_crn_decoder = dict()
    dataset_crn_decoder["init_states"] = test_br_states[:, timestep - 1, :]
    dataset_crn_decoder["encoder_output"] = test_encoder_predictions[:,
                                                                     timestep -
                                                                     1, :]
    dataset_crn_decoder[
        "current_treatments"] = one_hot_treatments[:, timestep:timestep +
                                                   projection_horizon, :]
    dataset_crn_decoder[
        "previous_treatments"] = one_hot_treatments[:, timestep - 1:timestep +
                                                    projection_horizon - 1, :]
    dataset_crn_decoder["active_entries"] = np.ones(
        shape=(one_hot_treatments.shape[0], one_hot_treatments.shape[1], 1))
    dataset_crn_decoder["sequence_lengths"] = timestep * np.ones(
        shape=(projection_horizon))

    patient_history = y[0][:timestep]
    encoder_output = test_encoder_predictions[:, timestep - 1:timestep, :]

    return patient_history, encoder_output, dataset_crn_decoder
    def data_preprocess(self, dataset, fold, split):
        """Preprocess the dataset.

        Args:
            - dataset: temporal, static, label, time, treatment information
            - fold: Cross validation fold
            - split: 'train', 'valid' or 'test'

        Returns:
            -    stacked_dataset: stacked dataset dictionary for training GANITE.
            -    x: original time-series patient features.
        """
        x, s, y, t, treat = dataset.get_fold(fold, split)

        if self.static_mode == "concatenate":
            x = concate_xs(x, s)

        if self.time_mode == "concatenate":
            x = concate_xt(x, t)

        one_hot_treatments = np.zeros(shape=(treat.shape[0], treat.shape[1],
                                             2))
        treat = np.round(treat)

        for patient_id in range(treat.shape[0]):
            for timestep in range(treat.shape[1]):
                if treat[patient_id][timestep][0] == 0.0:
                    one_hot_treatments[patient_id][timestep] = [1, 0]
                elif treat[patient_id][timestep][0] == 1.0:
                    one_hot_treatments[patient_id][timestep] = [0, 1]
                elif treat[patient_id][timestep][0] == -1.0:
                    one_hot_treatments[patient_id][timestep] = [-1, -1]

        active_entries = np.ndarray.max((y >= 0).astype(int), axis=-1)
        sequence_lengths = np.sum(active_entries, axis=1)

        num_features = x.shape[-1]
        num_outcomes = y.shape[-1]
        num_treatments = one_hot_treatments.shape[-1]

        stacked_x_list = []
        stacked_y_list = []
        stacked_treat_list = []
        patient_ids = []

        stack_dim = self.stack_dim
        total = 0
        for (index, patient_trajectory) in enumerate(x):
            trajectory_length = sequence_lengths[index]

            for step in range(trajectory_length):
                total = total + 1
                stacked_x = np.zeros(shape=(stack_dim, num_features))

                patient_ids.append(index)
                stacked_treat_list.append(one_hot_treatments[index][step])
                stacked_y_list.append(y[index][step])
                if step < stack_dim:
                    stacked_x[-step - 1:] = patient_trajectory[:step + 1]
                else:
                    stacked_x = patient_trajectory[step - stack_dim + 1:step +
                                                   1]
                stacked_x = stacked_x.flatten()
                stacked_x_list.append(stacked_x)

        stacked_dataset = dict()
        stacked_dataset["x"] = np.reshape(np.array(stacked_x_list),
                                          newshape=(total,
                                                    num_features * stack_dim))
        stacked_dataset["y"] = np.reshape(np.array(stacked_y_list),
                                          newshape=(total, num_outcomes))
        stacked_dataset["treat"] = np.reshape(np.array(stacked_treat_list),
                                              newshape=(total, num_treatments))
        stacked_dataset["patient_ids"] = np.array(patient_ids)
        stacked_dataset["sequence_lengths"] = sequence_lengths

        return stacked_dataset, x
Beispiel #5
0
    def data_preprocess_counterfactuals(self, dataset, patient_id, timestep,
                                        treatment_options, fold, split,
                                        static_mode, time_mode):
        """Preprocess the dataset for obtaining counterfactual predictions for sequences of future treatments.

    Args:
      - dataset: temporal, static, label, time, treatment information
      - patient_id: patient id of patient for which the counterfactuals are computed
      - timestep: timestept in the patient trajectory where counterfactuals are predicted
      - treatment_options: treatment options for computing the counterfactual trajectories
      - fold: test fold
      - test_split: testing set splitting parameter
      - static_mode: 'concatenate' or None
      - time_mode: 'concatenate' or None

    Returns:
      - patient_history: history of patient outcome until the specified timestep
      - encoder_output: patient output for the first treatment in the treatment options; this one-step-ahead prediction
        is made using the encoder model.
      - dataset_decoder: dataset that can be used to obtain the counterfactual predictions from the decoder model.

    """
        x, s, y, t, treat = dataset.get_fold(fold, split)

        max_sequence_length = x.shape[1]
        num_treatment_options = treatment_options.shape[0]
        projection_horizon = treatment_options.shape[1] - 1

        if static_mode == 'concatenate':
            x = concate_xs(x, s)

        if time_mode == 'concatenate':
            x = concate_xt(x, t)

        x = np.repeat([x[patient_id]], num_treatment_options, axis=0)
        y = np.repeat([y[patient_id]], num_treatment_options, axis=0)
        treat = np.repeat([treat[patient_id][:timestep - 1]],
                          num_treatment_options,
                          axis=0)
        treat = np.concatenate([treat, treatment_options], axis=1)

        dataset_encoder = dict()

        treatments_encoder = treat[:, :timestep, :]
        treatments_encoder = np.concatenate([
            treatments_encoder,
            np.zeros(shape=(treat.shape[0], max_sequence_length - timestep,
                            treat.shape[-1]))
        ],
                                            axis=1)

        dataset_encoder['current_covariates'] = x
        dataset_encoder['current_treatments'] = treatments_encoder
        dataset_encoder['previous_treatments'] = np.concatenate([
            np.zeros(shape=(treat.shape[0], 1, treatments_encoder.shape[-1])),
            treatments_encoder[:, :-1, :]
        ],
                                                                axis=1)
        dataset_encoder['outputs'] = y
        dataset_encoder['active_entries'] = np.ones(shape=(x.shape[0],
                                                           x.shape[1], 1))
        dataset_encoder['sequence_lengths'] = timestep * np.ones(
            shape=(num_treatment_options))

        test_encoder_predictions, test_states = rnn_test(
            dataset_encoder, self.task, self.MODEL_ROOT)

        treatments_decoder = treat[:,
                                   timestep:timestep + projection_horizon, :]

        dataset_decoder = dict()
        dataset_decoder['initial_states'] = test_states[:, timestep - 1, :]
        dataset_decoder['scaled_inputs'] = treatments_decoder
        dataset_decoder['scaled_outputs'] = np.zeros(shape=(y.shape[0],
                                                            projection_horizon,
                                                            y.shape[-1]))
        dataset_decoder['active_entries'] = treatments_decoder
        dataset_decoder['sequence_lengths'] = projection_horizon * np.ones(
            shape=(num_treatment_options))

        patient_history = y[0][:timestep]
        encoder_output = test_encoder_predictions[:, timestep - 1:timestep, :]

        return patient_history, encoder_output, dataset_decoder