コード例 #1
0
def run_experiment(p, csv_path, out_dir, data_cols=[]):
    """
    Function to run the experiments.
    p contain all the hyperparameters needed to run the experiments
    We assume that all the parameters needed are present in p!!
    out_dir is the out directory
    #hyperparameters
    """

    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    #Seed
    torch.manual_seed(p["seed"])
    np.random.seed(p["seed"])

    #Redirect output to the out dir
    # sys.stdout = open(out_dir + 'output.out', 'w')

    #save parameters to the out dir
    with open(out_dir + "params.txt", "w") as f:
        f.write(str(p))

    # DEVICE
    ## Decidint on device on device.
    DEVICE_ID = 0
    DEVICE = torch.device(
        'cuda:' + str(DEVICE_ID) if torch.cuda.is_available() else 'cpu')
    if torch.cuda.is_available():
        torch.cuda.set_device(DEVICE_ID)

    # LOAD DATA
    #Start by not using validation data
    # this is a list of values
    X_train, X_test, Y_train, Y_test, mri_col = load_multimodal_data(
        csv_path,
        data_cols,
        p["ch_type"],
        train_set=0.9,
        normalize=True,
        return_covariates=True)

    p["n_feats"] = [x[0].shape[1] for x in X_train]

    X_train_list = []
    mask_train_list = []

    X_test_list = []
    mask_test_list = []

    print('Length of train/test')
    print(len(X_train[0]))
    print(len(X_test[0]))

    #For each channel, pad, create the mask, and append
    for x_ch in X_train:
        X_train_tensor = [torch.FloatTensor(t) for t in x_ch]
        X_train_pad = nn.utils.rnn.pad_sequence(X_train_tensor,
                                                batch_first=False,
                                                padding_value=np.nan)
        mask_train = ~torch.isnan(X_train_pad)
        mask_train_list.append(mask_train.to(DEVICE))
        X_train_pad[torch.isnan(X_train_pad)] = 0
        X_train_list.append(X_train_pad.to(DEVICE))

    for x_ch in X_test:
        X_test_tensor = [torch.FloatTensor(t) for t in x_ch]
        X_test_pad = nn.utils.rnn.pad_sequence(X_test_tensor,
                                               batch_first=False,
                                               padding_value=np.nan)
        mask_test = ~torch.isnan(X_test_pad)
        mask_test_list.append(mask_test.to(DEVICE))
        X_test_pad[torch.isnan(X_test_pad)] = 0
        X_test_list.append(X_test_pad.to(DEVICE))

    # ntp = max(X_train_list[0].shape[0], X_test_list[0].shape[0])
    ntp = max(max([x.shape[0] for x in X_train_list]),
              max([x.shape[0] for x in X_train_list]))

    model = rnnvae_h.MCRNNVAE(p["h_size"],
                              p["hidden"],
                              p["n_layers"],
                              p["hidden"],
                              p["n_layers"],
                              p["hidden"],
                              p["n_layers"],
                              p["z_dim"],
                              p["hidden"],
                              p["n_layers"],
                              p["clip"],
                              p["n_epochs"],
                              p["batch_size"],
                              p["n_channels"],
                              p["ch_type"],
                              p["n_feats"],
                              DEVICE,
                              print_every=100,
                              phi_layers=p["phi_layers"],
                              sigmoid_mean=p["sig_mean"],
                              dropout=p["dropout"],
                              dropout_threshold=p["drop_th"])

    model.ch_name = p["ch_names"]

    optimizer = torch.optim.Adam(model.parameters(), lr=p["learning_rate"])
    model.optimizer = optimizer

    model = model.to(DEVICE)

    # Fit the model
    model.fit(X_train_list, X_test_list, mask_train_list, mask_test_list)

    #fit the model after changing the lr
    if p["dropout"]:
        print("Print the dropout")
        print(model.dropout_comp)

    ### After training, save the model!
    model.save(out_dir, 'model.pt')

    # Predict the reconstructions from X_val and X_train
    X_train_fwd = model.predict(X_train_list, mask_train_list, nt=ntp)
    X_test_fwd = model.predict(X_test_list, mask_test_list, nt=ntp)

    # Unpad using the masks
    #plot validation and
    plot_total_loss(model.loss['total'], model.val_loss['total'], "Total loss",
                    out_dir, "total_loss.png")
    plot_total_loss(model.loss['kl'], model.val_loss['kl'], "kl_loss", out_dir,
                    "kl_loss.png")
    plot_total_loss(model.loss['ll'], model.val_loss['ll'], "ll_loss", out_dir,
                    "ll_loss.png")  #Negative to see downard curve

    #Compute mse and reconstruction loss
    #General mse and reconstruction over
    # test_loss = model.recon_loss(X_test_fwd, target=X_test_pad, mask=mask_test_tensor)
    train_loss = model.recon_loss(X_train_fwd,
                                  target=X_train_list,
                                  mask=mask_train_list)
    test_loss = model.recon_loss(X_test_fwd,
                                 target=X_test_list,
                                 mask=mask_test_list)

    print('MSE over the train set: ' + str(train_loss["mae"]))
    print('Reconstruction loss over the train set: ' +
          str(train_loss["rec_loss"]))

    print('MSE over the test set: ' + str(test_loss["mae"]))
    print('Reconstruction loss the train set: ' + str(test_loss["rec_loss"]))

    pred_results = {}
    for ch_name in p["ch_names"][:3]:
        pred_results[f"pred_{ch_name}_mae"] = []

    rec_results = {}
    for ch_name in p["ch_names"]:
        rec_results[f"recon_{ch_name}_mae"] = []

    results = {**pred_results, **rec_results}

    ######################
    ## Prediction of last time point
    ######################

    # FUTURE TWO TP
    X_test_list_minus = []
    X_test_tensors = []
    mask_test_list_minus = []
    for x_ch in X_test:
        X_test_tensor = [torch.FloatTensor(t[:-1, :]) for t in x_ch]
        X_test_tensor_full = [torch.FloatTensor(t) for t in x_ch]
        X_test_tensors.append(X_test_tensor_full)
        X_test_pad = nn.utils.rnn.pad_sequence(X_test_tensor,
                                               batch_first=False,
                                               padding_value=np.nan)
        mask_test = ~torch.isnan(X_test_pad)
        mask_test_list_minus.append(mask_test.to(DEVICE))
        X_test_pad[torch.isnan(X_test_pad)] = 0
        X_test_list_minus.append(X_test_pad.to(DEVICE))

    # Run prediction
    #this is terribly programmed holy shit
    X_test_fwd_minus = model.predict(X_test_list_minus,
                                     mask_test_list_minus,
                                     nt=ntp)
    X_test_xnext = X_test_fwd_minus["xnext"]

    # Test data without last timepoint
    # X_test_tensors do have the last timepoint
    i = 0
    # import pdb; pdb.set_trace()
    for (X_ch, ch) in zip(X_test[:3], p["ch_names"][:3]):
        #Select a single channel
        print(f'testing for {ch}')
        y_true = [x[-1] for x in X_ch if len(x) > 1]
        last_tp = [len(x) - 1 for x in X_ch
                   ]  # last tp is max size of original data minus one
        y_pred = []
        # for each subject, select last tp
        j = 0
        for tp in last_tp:
            if tp < 1:
                j += 1
                continue  # ignore tps with only baseline

            y_pred.append(X_test_xnext[i][tp, j, :])
            j += 1

        #Process it to predict it
        mae_tp_ch = mean_absolute_error(y_true, y_pred)
        #save the result
        results[f'pred_{ch}_mae'] = mae_tp_ch
        i += 1

    ############################
    ## Test reconstruction for each channel, using the other one
    ############################
    # For each channel
    if p["n_channels"] > 1:

        for i in range(len(X_test)):
            curr_name = p["ch_names"][i]
            av_ch = list(range(len(X_test)))
            av_ch.remove(i)
            # try to reconstruct it from the other ones
            ch_recon = model.predict(X_test_list,
                                     mask_test_list,
                                     nt=ntp,
                                     av_ch=av_ch,
                                     task='recon')
            #for all existing timepoints

            y_true = X_test[i]
            # swap dims to iterate over subjects
            y_pred = np.transpose(ch_recon["xnext"][i], (1, 0, 2))
            y_pred = [
                x_pred[:len(x_true)]
                for (x_pred, x_true) in zip(y_pred, y_true)
            ]

            #prepare it timepoint wise
            y_pred = [tp for subj in y_pred for tp in subj]
            y_true = [tp for subj in y_true for tp in subj]

            mae_rec_ch = mean_absolute_error(y_true, y_pred)

            # Get MAE result for that specific channel over all timepoints
            results[f"recon_{curr_name}_mae"] = mae_rec_ch

    loss = {
        "mae_train": train_loss["mae"],
        "rec_train": train_loss["rec_loss"],
        "mae_test": test_loss["mae"],
        "loss_total": model.loss['total'][-1],
        "loss_kl": model.loss['kl'][-1],
        "loss_ll": model.loss['ll'][-1],
    }

    if p["dropout"]:
        loss["dropout_comps"] = model.dropout_comp

    loss = {**loss, **results}
    print(loss)

    # Dir for projections
    proj_path = 'z_proj/'
    if not os.path.exists(out_dir + proj_path):
        os.makedirs(out_dir + proj_path)

    # Test the new function of latent space
    #NEED TO ADAPT THIS FUNCTION
    qzx_train = [np.array(x) for x in X_train_fwd['qzx']]
    qzx_test = [np.array(x) for x in X_test_fwd['qzx']]

    #Convert to standard
    #Add padding so that the mask also works here
    DX_train = [[x for x in elem] for elem in Y_train["DX"]]
    DX_test = [[x for x in elem] for elem in Y_test["DX"]]

    #Define colors
    pallete_dict = {"CN": "#2a9e1e", "MCI": "#bfbc1a", "AD": "#af1f1f"}
    # Get classificator labels, for n time points
    out_dir_sample = out_dir + 'zcomp_ch_dx/'
    if not os.path.exists(out_dir_sample):
        os.makedirs(out_dir_sample)

    plot_latent_space(model,
                      qzx_test,
                      ntp,
                      classificator=DX_test,
                      pallete_dict=pallete_dict,
                      plt_tp='all',
                      all_plots=True,
                      uncertainty=False,
                      savefig=True,
                      out_dir=out_dir_sample + '_test',
                      mask=mask_test_list)

    plot_latent_space(model,
                      qzx_train,
                      ntp,
                      classificator=DX_train,
                      pallete_dict=pallete_dict,
                      plt_tp='all',
                      all_plots=True,
                      uncertainty=False,
                      savefig=True,
                      out_dir=out_dir_sample + '_train',
                      mask=mask_train_list)

    out_dir_sample_t0 = out_dir + 'zcomp_ch_dx_t0/'
    if not os.path.exists(out_dir_sample_t0):
        os.makedirs(out_dir_sample_t0)

    plot_latent_space(model,
                      qzx_train,
                      ntp,
                      classificator=DX_train,
                      pallete_dict=pallete_dict,
                      plt_tp=[0],
                      all_plots=True,
                      uncertainty=False,
                      savefig=True,
                      out_dir=out_dir_sample_t0 + '_train',
                      mask=mask_train_list)

    plot_latent_space(model,
                      qzx_test,
                      ntp,
                      classificator=DX_test,
                      pallete_dict=pallete_dict,
                      plt_tp=[0],
                      all_plots=True,
                      uncertainty=False,
                      savefig=True,
                      out_dir=out_dir_sample_t0 + '_test',
                      mask=mask_test_list)

    # Now plot color by timepoint
    out_dir_sample = out_dir + 'zcomp_ch_tp/'
    if not os.path.exists(out_dir_sample):
        os.makedirs(out_dir_sample)

    classif_train = [[i for (i, x) in enumerate(elem)]
                     for elem in Y_train["DX"]]
    classif_test = [[i for (i, x) in enumerate(elem)] for elem in Y_test["DX"]]

    pallete = sns.color_palette("viridis", ntp)
    pallete_dict = {i: value for (i, value) in enumerate(pallete)}

    plot_latent_space(model,
                      qzx_train,
                      ntp,
                      classificator=classif_train,
                      pallete_dict=pallete_dict,
                      plt_tp='all',
                      all_plots=True,
                      uncertainty=False,
                      savefig=True,
                      out_dir=out_dir_sample + '_train',
                      mask=mask_train_list)

    plot_latent_space(model,
                      qzx_test,
                      ntp,
                      classificator=classif_test,
                      pallete_dict=pallete_dict,
                      plt_tp='all',
                      all_plots=True,
                      uncertainty=False,
                      savefig=True,
                      out_dir=out_dir_sample + '_test',
                      mask=mask_test_list)

    return loss
コード例 #2
0
def run_experiment(p, csv_path, out_dir, data_cols=[]):
    """
    Function to run the experiments.
    p contain all the hyperparameters needed to run the experiments
    We assume that all the parameters needed are present in p!!
    out_dir is the out directory
    #hyperparameters
    """

    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    #Seed
    torch.manual_seed(p["seed"])
    np.random.seed(p["seed"])

    #Redirect output to the out dir
    # sys.stdout = open(out_dir + 'output.out', 'w')

    #save parameters to the out dir
    with open(out_dir + "params.txt", "w") as f:
        f.write(str(p))

    # DEVICE
    ## Decidint on device on device.
    DEVICE_ID = 0
    DEVICE = torch.device(
        'cuda:' + str(DEVICE_ID) if torch.cuda.is_available() else 'cpu')
    if torch.cuda.is_available():
        torch.cuda.set_device(DEVICE_ID)

    # LOAD DATA
    #Start by not using validation data
    # this is a list of values
    X_train, X_test, Y_train, Y_test, mri_col = load_multimodal_data(
        csv_path,
        data_cols,
        train_set=0.8,
        normalize=True,
        return_covariates=True)

    p["n_feats"] = [x[0].shape[1] for x in X_train]

    X_train_list = []
    mask_train_list = []

    X_test_list = []
    mask_test_list = []

    print('Length of train/test')
    print(len(X_train[0]))
    print(len(X_test[0]))

    #For each channel, pad, create the mask, and append
    for x_ch in X_train:
        X_train_tensor = [torch.FloatTensor(t) for t in x_ch]
        X_train_pad = nn.utils.rnn.pad_sequence(X_train_tensor,
                                                batch_first=False,
                                                padding_value=np.nan)
        mask_train = ~torch.isnan(X_train_pad)
        mask_train_list.append(mask_train.to(DEVICE))
        X_train_pad[torch.isnan(X_train_pad)] = 0
        X_train_list.append(X_train_pad.to(DEVICE))

    for x_ch in X_test:
        X_test_tensor = [torch.FloatTensor(t) for t in x_ch]
        X_test_pad = nn.utils.rnn.pad_sequence(X_test_tensor,
                                               batch_first=False,
                                               padding_value=np.nan)
        mask_test = ~torch.isnan(X_test_pad)
        mask_test_list.append(mask_test.to(DEVICE))
        X_test_pad[torch.isnan(X_test_pad)] = 0
        X_test_list.append(X_test_pad.to(DEVICE))

    # ntp = max(X_train_list[0].shape[0], X_test_list[0].shape[0])
    ntp = max(max([x.shape[0] for x in X_train_list]),
              max([x.shape[0] for x in X_train_list]))

    model = rnnvae_drop.MCRNNVAE(p["h_size"],
                                 p["hidden"],
                                 p["n_layers"],
                                 p["hidden"],
                                 p["n_layers"],
                                 p["hidden"],
                                 p["n_layers"],
                                 p["z_dim"],
                                 p["hidden"],
                                 p["n_layers"],
                                 p["clip"],
                                 p["n_epochs"],
                                 p["batch_size"],
                                 p["n_channels"],
                                 p["n_feats"],
                                 DEVICE,
                                 0.3,
                                 print_every=100)

    model.ch_name = p["ch_names"]

    optimizer = torch.optim.Adam(model.parameters(), lr=p["learning_rate"])
    model.optimizer = optimizer

    model = model.to(DEVICE)
    # Fit the model
    model.fit(X_train_list, X_test_list, mask_train_list, mask_test_list)

    print("Print the dropout")
    print(model.dropout)

    ### After training, save the model!
    model.save(out_dir, 'model.pt')

    # Predict the reconstructions from X_val and X_train
    X_train_fwd = model.predict(X_train_list, nt=ntp)
    X_test_fwd = model.predict(X_test_list, nt=ntp)

    # Unpad using the masks
    #plot validation and
    plot_total_loss(model.loss['total'], model.val_loss['total'], "Total loss",
                    out_dir, "total_loss.png")
    plot_total_loss(model.loss['kl'], model.val_loss['kl'], "kl_loss", out_dir,
                    "kl_loss.png")
    plot_total_loss(model.loss['ll'], model.val_loss['ll'], "ll_loss", out_dir,
                    "ll_loss.png")  #Negative to see downard curve

    #Compute mse and reconstruction loss
    #General mse and reconstruction over
    # test_loss = model.recon_loss(X_test_fwd, target=X_test_pad, mask=mask_test_tensor)
    train_loss = model.recon_loss(X_train_fwd,
                                  target=X_train_list,
                                  mask=mask_train_list)
    test_loss = model.recon_loss(X_test_fwd,
                                 target=X_test_list,
                                 mask=mask_test_list)

    print('MSE over the train set: ' + str(train_loss["mae"]))
    print('Reconstruction loss over the train set: ' +
          str(train_loss["rec_loss"]))

    print('MSE over the test set: ' + str(test_loss["mae"]))
    print('Reconstruction loss the train set: ' + str(test_loss["rec_loss"]))

    ######################
    ## Prediction of last time point
    ######################

    # Test data without last timepoint
    # X_test_tensors do have the last timepoint
    X_test_list_minus = []
    X_test_tensors = []
    mask_test_list_minus = []
    for x_ch in X_test:
        X_test_tensor = [torch.FloatTensor(t[:-1, :]) for t in x_ch]
        X_test_tensor_full = [torch.FloatTensor(t) for t in x_ch]
        X_test_tensors.append(X_test_tensor_full)
        X_test_pad = nn.utils.rnn.pad_sequence(X_test_tensor,
                                               batch_first=False,
                                               padding_value=np.nan)
        mask_test = ~torch.isnan(X_test_pad)
        mask_test_list_minus.append(mask_test.to(DEVICE))
        X_test_pad[torch.isnan(X_test_pad)] = 0
        X_test_list_minus.append(X_test_pad.to(DEVICE))

    # Run prediction
    #this is terribly programmed holy shit
    X_test_fwd_minus = model.predict(X_test_list_minus, nt=ntp)
    X_test_xnext = X_test_fwd_minus["xnext"]
    last_tp_mse = 0
    #for each channel
    for i in range(len(X_test_tensors)):
        #For each subject, select the tp of the mask
        last_tp_mse_ch = 0
        n_mae = 0
        for j in range(len(X_test_tensors[i])):
            tp = len(X_test_tensors[i][j]) - 1
            last_tp_mse_ch += mean_squared_error(X_test_tensors[i][j][tp, :],
                                                 X_test_xnext[i][tp, j, :])
            n_mae += 1
        #compute the mean
        last_tp_mse += last_tp_mse_ch / n_mae
    #Compute MAE over last timepoint

    ############################
    ## Test reconstruction for each channel, using the other one
    ############################
    # For each channel
    rec_results = {}
    for i in range(len(X_test_list)):
        curr_name = p["ch_names"][i]
        av_ch = list(range(len(X_test_list)))
        av_ch.remove(i)
        # try to reconstruct it from the other ones
        ch_recon = model.predict(X_test_list, nt=ntp, av_ch=av_ch)
        ch_recon["xnext"]
        #for all existing timepoints
        mae_loss = 0
        for t in range(len(mask_test_list[i])):
            mask_channel = mask_test_list[i][t, :, 0]
            mae_loss += rnnvae_drop.mae(target=X_test_list[i][t].cpu(),
                                        predicted=ch_recon["xnext"][i][t],
                                        mask=mask_channel)

        # Get MAE result for that specific channel over all timepoints
        #for this, i also need the mask

        rec_results[f"recon_{curr_name}_mae"] = mae_loss.item()

    # Dir for projections
    proj_path = 'z_proj/'
    if not os.path.exists(out_dir + proj_path):
        os.makedirs(out_dir + proj_path)

    # Test the new function of latent space
    #NEED TO ADAPT THIS FUNCTION
    qzx_train = [np.array(x) for x in X_train_fwd['qzx']]
    qzx_test = [np.array(x) for x in X_test_fwd['qzx']]

    #Convert to standard
    #Add padding so that the mask also works here
    DX_train = [[x for x in elem] for elem in Y_train["DX"]]
    DX_test = [[x for x in elem] for elem in Y_test["DX"]]

    #Define colors
    pallete_dict = {"CN": "#2a9e1e", "MCI": "#bfbc1a", "AD": "#af1f1f"}

    # Get classificator labels, for n time points
    out_dir_sample = out_dir + 'zcomp_ch_dx/'
    if not os.path.exists(out_dir_sample):
        os.makedirs(out_dir_sample)

    plot_latent_space(model,
                      qzx_test,
                      ntp,
                      classificator=DX_test,
                      pallete_dict=pallete_dict,
                      plt_tp='all',
                      all_plots=True,
                      uncertainty=False,
                      savefig=True,
                      out_dir=out_dir_sample + '_test',
                      mask=mask_test_list)

    plot_latent_space(model,
                      qzx_train,
                      ntp,
                      classificator=DX_train,
                      pallete_dict=pallete_dict,
                      plt_tp='all',
                      all_plots=True,
                      uncertainty=False,
                      savefig=True,
                      out_dir=out_dir_sample + '_train',
                      mask=mask_train_list)

    out_dir_sample_t0 = out_dir + 'zcomp_ch_dx_t0/'
    if not os.path.exists(out_dir_sample_t0):
        os.makedirs(out_dir_sample_t0)

    plot_latent_space(model,
                      qzx_train,
                      ntp,
                      classificator=DX_train,
                      pallete_dict=pallete_dict,
                      plt_tp=[0],
                      all_plots=True,
                      uncertainty=False,
                      savefig=True,
                      out_dir=out_dir_sample_t0 + '_train',
                      mask=mask_train_list)

    plot_latent_space(model,
                      qzx_test,
                      ntp,
                      classificator=DX_test,
                      pallete_dict=pallete_dict,
                      plt_tp=[0],
                      all_plots=True,
                      uncertainty=False,
                      savefig=True,
                      out_dir=out_dir_sample_t0 + '_test',
                      mask=mask_test_list)

    # Now plot color by timepoint
    out_dir_sample = out_dir + 'zcomp_ch_tp/'
    if not os.path.exists(out_dir_sample):
        os.makedirs(out_dir_sample)

    classif_train = [[i for (i, x) in enumerate(elem)]
                     for elem in Y_train["DX"]]
    classif_test = [[i for (i, x) in enumerate(elem)] for elem in Y_test["DX"]]

    pallete = sns.color_palette("viridis", ntp)
    pallete_dict = {i: value for (i, value) in enumerate(pallete)}

    plot_latent_space(model,
                      qzx_train,
                      ntp,
                      classificator=classif_train,
                      pallete_dict=pallete_dict,
                      plt_tp='all',
                      all_plots=True,
                      uncertainty=False,
                      savefig=True,
                      out_dir=out_dir_sample + '_train',
                      mask=mask_train_list)

    plot_latent_space(model,
                      qzx_test,
                      ntp,
                      classificator=classif_test,
                      pallete_dict=pallete_dict,
                      plt_tp='all',
                      all_plots=True,
                      uncertainty=False,
                      savefig=True,
                      out_dir=out_dir_sample + '_test',
                      mask=mask_test_list)

    loss = {
        "mae_train": train_loss["mae"],
        "rec_train": train_loss["rec_loss"],
        "mae_test": test_loss["mae"],
        "mae_last_tp": last_tp_mse,
        "loss_total": model.loss['total'][-1],
        "loss_kl": model.loss['kl'][-1],
        "loss_ll": model.loss['ll'][-1]
    }

    loss = {**loss, **rec_results}

    return loss
コード例 #3
0
qzx = [np.array(x) for x in X_sample['qzx']]
print(len(qzx))
print(len(qzx[0]))

# Get classificator labels, for n time points
classif = [[i] * nsamples for i in range(p["ntp"])]
classif = np.array([str(item) for elem in classif for item in elem])

out_dir_sample = out_dir + 'test_zspace_function/'
if not os.path.exists(out_dir_sample):
    os.makedirs(out_dir_sample)

plot_latent_space(model,
                  qzx,
                  p["ntp"],
                  classificator=classif,
                  plt_tp='all',
                  all_plots=False,
                  uncertainty=True,
                  savefig=True,
                  out_dir=out_dir_sample)

plot_latent_space(model,
                  qzx,
                  p["ntp"],
                  classificator=classif,
                  plt_tp='all',
                  all_plots=True,
                  uncertainty=True,
                  savefig=True,
                  out_dir=out_dir_sample)
コード例 #4
0
ファイル: test_set_eval.py プロジェクト: GerardMJuan/RNN-VAE
def run_eval(out_dir,
             test_csv,
             data_cols,
             dropout_threshold_test,
             output_to_file=False):
    """
    Main function to evaluate a model.

    Evaluate a trained model
    out_dir: directory where the model is and the results will be stored.
    test_csv: where the csv with the test data is stored.
    data_cols: name of channels.
    dropout_threshold_test: threshold of the dropout
    use_synth: use synthetic data
    """

    ch_bl = []  ##STORE THE CHANNELS THAT WE CONVERT TO LONG BUT WERE BL

    #Redirect output to the out dir
    if output_to_file:
        sys.stdout = open(out_dir + 'output.out', 'w')

    #load parameters
    p = eval(open(out_dir + "params.txt").read())

    long_to_bl = p[
        "long_to_bl"]  #variable to decide if we have transformed the long to bl or not.

    # DEVICE
    ## Decidint on device on device.
    DEVICE_ID = 0
    DEVICE = torch.device(
        'cuda:' + str(DEVICE_ID) if torch.cuda.is_available() else 'cpu')
    if torch.cuda.is_available():
        torch.cuda.set_device(DEVICE_ID)

    X_test, _, Y_test, _, col_lists = load_multimodal_data(
        test_csv,
        data_cols,
        p["ch_type"],
        train_set=1.0,
        normalize=True,
        return_covariates=True)
    p["n_feats"] = [x[0].shape[1] for x in X_test]

    # need to deal with ntp here
    ntp = max(np.max([[len(xi) for xi in x] for x in X_test]),
              np.max([[len(xi) for xi in x] for x in X_test]))

    if long_to_bl:
        # Process MASK WITHOUT THE REPETITION OF BASELINE
        # HERE, change bl to long and repeat the values at t0 for ntp
        for i in range(len(p["ch_type"])):
            if p["ch_type"][i] == 'bl':

                for j in range(len(X_test[i])):
                    X_test[i][j] = np.array([X_test[i][j][0]] * ntp)

                # p["ch_type"][i] = 'long'
                ch_bl.append(i)

    X_test_list = []
    mask_test_list = []

    # Process test set
    for x_ch in X_test:
        X_test_tensor = [torch.FloatTensor(t) for t in x_ch]
        X_test_pad = nn.utils.rnn.pad_sequence(X_test_tensor,
                                               batch_first=False,
                                               padding_value=np.nan)
        mask_test = ~torch.isnan(X_test_pad)
        mask_test_list.append(mask_test.to(DEVICE))
        X_test_pad[torch.isnan(X_test_pad)] = 0
        X_test_list.append(X_test_pad.to(DEVICE))

    model = rnnvae_s.MCRNNVAE(p["h_size"],
                              p["enc_hidden"],
                              p["enc_n_layers"],
                              p["z_dim"],
                              p["dec_hidden"],
                              p["dec_n_layers"],
                              p["clip"],
                              p["n_epochs"],
                              p["batch_size"],
                              p["n_channels"],
                              p["ch_type"],
                              p["n_feats"],
                              p["c_z"],
                              DEVICE,
                              print_every=100,
                              phi_layers=p["phi_layers"],
                              sigmoid_mean=p["sig_mean"],
                              dropout=p["dropout"],
                              dropout_threshold=p["drop_th"])

    model = model.to(DEVICE)
    model.load(out_dir + 'model.pt')
    if p["dropout"]:
        print(model.dropout_comp)
        model.dropout_threshold = dropout_threshold_test

    ####################################
    # IF DROPOUT, CHECK THE COMPONENTS AND THRESHOLD AND CHANGE IT
    ####################################

    ##TEST
    X_test_fwd = model.predict(X_test_list, mask_test_list, nt=ntp)

    # Test the reconstruction and prediction

    ######################
    ## Prediction of last time point
    ######################
    # Test data without last timepoint
    # X_test_tensors do have the last timepoint
    pred_ch = list(range(3))
    t_pred = 1
    res = eval_prediction(model, X_test, t_pred, pred_ch, DEVICE)

    for (i, ch) in enumerate(
        [x for (i, x) in enumerate(p["ch_names"]) if i in pred_ch]):
        print(f'pred_{ch}_mae: {res[i]}')

    ############################
    ## Test reconstruction for each channel, using the other one
    ############################
    # For each channel
    results = np.zeros(
        (len(X_test), len(X_test)))  #store the results, will save later

    for i in range(len(X_test)):
        for j in range(len(X_test)):
            curr_name = p["ch_names"][i]
            to_recon = p["ch_names"][j]
            av_ch = [j]
            mae_rec = eval_reconstruction(model, X_test, X_test_list,
                                          mask_test_list, av_ch, i)
            results[i, j] = mae_rec
            # Get MAE result for that specific channel over all timepoints
            print(f"recon_{curr_name}_from{to_recon}_mae: {mae_rec}")

    df_crossrec = pd.DataFrame(data=results,
                               index=p["ch_names"],
                               columns=p["ch_names"])
    plt.tight_layout()
    ax = sns.heatmap(df_crossrec, annot=True, fmt=".2f", vmin=0, vmax=1)
    plt.savefig(out_dir + "figure_crossrecon.png")
    plt.close()
    # SAVE AS FIGURE
    df_crossrec.to_latex(out_dir + "table_crossrecon.tex")

    ############################
    ## Test reconstruction for each channel, using the rest
    ############################
    # For each channel
    results = np.zeros((len(X_test), 1))  #store the results, will save later

    for i in range(len(X_test)):
        av_ch = list(range(len(X_test))).remove(i)
        to_recon = p["ch_names"][i]
        mae_rec = eval_reconstruction(model, X_test, X_test_list,
                                      mask_test_list, av_ch, i)
        results[i] = mae_rec
        # Get MAE result for that specific channel over all timepoints
        print(f"recon_{to_recon}_fromall_mae: {mae_rec}")

    df_totalrec = pd.DataFrame(data=results.T, columns=p["ch_names"])

    # SAVE AS FIGURE
    df_totalrec.to_latex(out_dir + "table_totalrecon.tex")

    ###############################################################
    # PLOTTING, FIRST GENERAL PLOTTING AND THEN SPECIFIC PLOTTING #
    ###############################################################

    # Test the new function of latent space
    #NEED TO ADAPT THIS FUNCTION
    qzx_test = [np.array(x) for x in X_test_fwd['qzx']]

    # IF WE DO THAT TRANSFORMATION
    if long_to_bl:
        for i in ch_bl:
            qzx_test[i] = np.array(
                [qzx if j == 0 else None for j, qzx in enumerate(qzx_test[i])])

    # Now plot color by timepoint
    out_dir_sample = out_dir + 'zcomp_ch_age/'
    if not os.path.exists(out_dir_sample):
        os.makedirs(out_dir_sample)

    #Binarize the ages and
    age_full = [x for elem in Y_test["AGE_demog"] for x in elem]
    bins, retstep = np.linspace(min(age_full), max(age_full), 8, retstep=True)
    age_digitized = [np.digitize(y, bins) for y in Y_test["AGE_demog"]]

    classif_test = [[bins[x - 1] for (i, x) in enumerate(elem)]
                    for elem in age_digitized]

    pallete = sns.color_palette("viridis", 8)
    pallete_dict = {bins[i]: value for (i, value) in enumerate(pallete)}

    ####IF DROPOUT, SELECT ONLY COMPS WITH DROPOUT > TAL
    if model.dropout:
        kept_comp = model.kept_components
    else:
        kept_comp = None

    print(kept_comp)
    plot_latent_space(model,
                      qzx_test,
                      ntp,
                      classificator=classif_test,
                      pallete_dict=pallete_dict,
                      plt_tp='all',
                      all_plots=True,
                      uncertainty=False,
                      comp=kept_comp,
                      savefig=True,
                      out_dir=out_dir_sample + '_test',
                      mask=mask_test_list)

    #Convert to standard
    #Add padding so that the mask also works here
    DX_test = [[x for x in elem] for elem in Y_test["DX"]]

    #Define colors
    pallete_dict = {"CN": "#2a9e1e", "MCI": "#bfbc1a", "AD": "#af1f1f"}
    # Get classificator labels, for n time points
    out_dir_sample = out_dir + 'zcomp_ch_dx/'
    if not os.path.exists(out_dir_sample):
        os.makedirs(out_dir_sample)

    plot_latent_space(model,
                      qzx_test,
                      ntp,
                      classificator=DX_test,
                      pallete_dict=pallete_dict,
                      plt_tp='all',
                      all_plots=True,
                      uncertainty=False,
                      comp=kept_comp,
                      savefig=True,
                      out_dir=out_dir_sample + '_test',
                      mask=mask_test_list)

    out_dir_sample_t0 = out_dir + 'zcomp_ch_dx_t0/'
    if not os.path.exists(out_dir_sample_t0):
        os.makedirs(out_dir_sample_t0)

    plot_latent_space(model,
                      qzx_test,
                      ntp,
                      classificator=DX_test,
                      pallete_dict=pallete_dict,
                      plt_tp=[0],
                      all_plots=True,
                      uncertainty=False,
                      comp=kept_comp,
                      savefig=True,
                      out_dir=out_dir_sample_t0 + '_test',
                      mask=mask_test_list)

    # Now plot color by timepoint
    out_dir_sample = out_dir + 'zcomp_ch_tp/'
    if not os.path.exists(out_dir_sample):
        os.makedirs(out_dir_sample)

    classif_test = [[i for (i, x) in enumerate(elem)] for elem in Y_test["DX"]]

    pallete = sns.color_palette("viridis", ntp)
    pallete_dict = {i: value for (i, value) in enumerate(pallete)}

    plot_latent_space(model,
                      qzx_test,
                      ntp,
                      classificator=classif_test,
                      pallete_dict=pallete_dict,
                      plt_tp='all',
                      all_plots=True,
                      uncertainty=False,
                      comp=kept_comp,
                      savefig=True,
                      out_dir=out_dir_sample + '_test',
                      mask=mask_test_list)
コード例 #5
0
def run_experiment(p, csv_path, out_dir, data_cols=[]):
    """
    Function to run the experiments.
    p contain all the hyperparameters needed to run the experiments
    We assume that all the parameters needed are present in p!!
    out_dir is the out directory
    #hyperparameters
    """

    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    #Seed
    torch.manual_seed(p["seed"])
    np.random.seed(p["seed"])

    #Redirect output to the out dir
    # sys.stdout = open(out_dir + 'output.out', 'w')

    #save parameters to the out dir
    with open(out_dir + "params.txt", "w") as f:
        f.write(str(p))

    # DEVICE
    ## Decidint on device on device.
    DEVICE_ID = 0
    DEVICE = torch.device(
        'cuda:' + str(DEVICE_ID) if torch.cuda.is_available() else 'cpu')
    if torch.cuda.is_available():
        torch.cuda.set_device(DEVICE_ID)

    # LOAD DATA
    #Start by not using validation data
    # this is a list of values
    X_train, _, Y_train, _, mri_col = load_multimodal_data(
        csv_path,
        data_cols,
        train_set=1.0,
        normalize=True,
        return_covariates=True)

    p["n_feats"] = [x[0].shape[1] for x in X_train]

    X_train_list = []
    mask_train_list = []

    #For each channel, pad, create the mask, and append
    for x_ch in X_train:
        X_train_tensor = [torch.FloatTensor(t) for t in x_ch]
        X_train_pad = nn.utils.rnn.pad_sequence(X_train_tensor,
                                                batch_first=False,
                                                padding_value=np.nan)
        mask_train = ~torch.isnan(X_train_pad)
        mask_train_list.append(mask_train.to(DEVICE))
        X_train_pad[torch.isnan(X_train_pad)] = 0
        X_train_list.append(X_train_pad.to(DEVICE))

    ntp = max([X_train_list[i].shape[0] for i in range(len(X_train_list))])

    model = rnnvae.MCRNNVAE(p["h_size"], p["hidden"], p["n_layers"],
                            p["hidden"], p["n_layers"], p["hidden"],
                            p["n_layers"], p["z_dim"], p["hidden"],
                            p["n_layers"], p["clip"], p["n_epochs"],
                            p["batch_size"], p["n_channels"], p["n_feats"],
                            DEVICE)

    model.ch_name = p["ch_names"]

    optimizer = torch.optim.Adam(model.parameters(), lr=p["learning_rate"])
    model.optimizer = optimizer

    model = model.to(DEVICE)
    # Fit the model
    model.fit(X_train_list, X_train_list, mask_train_list, mask_train_list)

    ### After training, save the model!
    model.save(out_dir, 'model.pt')

    # Predict the reconstructions from X_val and X_train
    X_train_fwd = model.predict(X_train_list, nt=ntp)

    # Unpad using the masks
    #plot validation and
    plot_total_loss(model.loss['total'], model.val_loss['total'], "Total loss",
                    out_dir, "total_loss.png")
    plot_total_loss(model.loss['kl'], model.val_loss['kl'], "kl_loss", out_dir,
                    "kl_loss.png")
    plot_total_loss(model.loss['ll'], model.val_loss['ll'], "ll_loss", out_dir,
                    "ll_loss.png")  #Negative to see downard curve

    #Compute mse and reconstruction loss
    #General mse and reconstruction over
    # test_loss = model.recon_loss(X_test_fwd, target=X_test_pad, mask=mask_test_tensor)
    train_loss = model.recon_loss(X_train_fwd,
                                  target=X_train_list,
                                  mask=mask_train_list)

    print('MSE over the train set: ' + str(train_loss["mae"]))
    print('Reconstruction loss over the train set: ' +
          str(train_loss["rec_loss"]))

    # print('MSE over the test set: ' + str(test_loss["mae"]))
    # print('Reconstruction loss the train set: ' + str(test_loss["rec_loss"]))

    ##Latent spasce
    #Reformulate things
    #z_train = [np.array(x).swapaxes(0,1) for x in X_train_fwd['z']]
    # IT DOESNT WORK RIGHT NOW
    # Not needed rn
    # z_train = []
    #for (i, z_ch) in enumerate(X_train_fwd['z']):
    #    mask_ch = mask_train_list[i].cpu().numpy()
    #    z_train.append([X[np.tile(mask_ch[:,j,0], (p["z_dim"], 1)).T].reshape((-1, p["z_dim"])) for (j, X) in enumerate(z_ch)])

    # X_test_hat = [X[mask_test[:,i,:]].reshape((-1, nfeatures)) for (i, X) in enumerate(X_test_hat)]

    # z_test = [np.array(x).swapaxes(0,1) for x in X_test_fwd['z']]
    #Zspace needs to be masked

    # Dir for projections
    proj_path = 'z_proj/'
    if not os.path.exists(out_dir + proj_path):
        os.makedirs(out_dir + proj_path)

    #plot latent space for ALL the
    #Por plotting the latent space, we need to do a similar function to plot_latent_space. Wait, that directly does it
    #for ch in range(p["n_channels"]):
    #    for dim0 in range(p["z_dim"]):
    #        for dim1 in range(dim0, p["z_dim"]):
    #            if dim0 == dim1: continue   # very dirty
    #            plot_z_time_2d(z_train[ch], ntp, [dim0, dim1], out_dir + proj_path, out_name=f'z_ch_{ch}_d{dim0}_d{dim1}')

    # Test the new function of latent space
    #NEED TO ADAPT THIS FUNCTION
    qzx = [np.array(x) for x in X_train_fwd['qzx']]

    print('len qzx')
    print(len(qzx))
    # Get classificator labels, for n time points
    out_dir_sample = out_dir + 'zcomp_ch_dx/'
    if not os.path.exists(out_dir_sample):
        os.makedirs(out_dir_sample)

    dx_dict = {
        "NL": "CN",
        "MCI": "MCI",
        "MCI to NL": "CN",
        "Dementia": "AD",
        "Dementia to MCI": "MCI",
        "NL to MCI": "MCI",
        "NL to Dementia": "AD",
        "MCI to Dementia": "AD"
    }
    #Convert to standard
    #Add padding so that the mask also works here
    DX = [[x for x in elem] for elem in Y_train["DX"]]

    #Define colors
    pallete_dict = {"CN": "#2a9e1e", "MCI": "#bfbc1a", "AD": "#af1f1f"}

    plot_latent_space(model,
                      qzx,
                      ntp,
                      classificator=DX,
                      pallete_dict=pallete_dict,
                      plt_tp='all',
                      all_plots=True,
                      uncertainty=False,
                      savefig=True,
                      out_dir=out_dir_sample,
                      mask=mask_train_list)

    out_dir_sample_t0 = out_dir + 'zcomp_ch_dx_t0/'
    if not os.path.exists(out_dir_sample_t0):
        os.makedirs(out_dir_sample_t0)

    plot_latent_space(model,
                      qzx,
                      ntp,
                      classificator=DX,
                      pallete_dict=pallete_dict,
                      plt_tp=[0],
                      all_plots=True,
                      uncertainty=False,
                      savefig=True,
                      out_dir=out_dir_sample_t0,
                      mask=mask_train_list)

    # Now plot color by timepoint
    out_dir_sample = out_dir + 'zcomp_ch_tp/'
    if not os.path.exists(out_dir_sample):
        os.makedirs(out_dir_sample)

    classif = [[i for (i, x) in enumerate(elem)] for elem in Y_train["DX"]]
    pallete = sns.color_palette("viridis", ntp)
    pallete_dict = {i: value for (i, value) in enumerate(pallete)}

    plot_latent_space(model,
                      qzx,
                      ntp,
                      classificator=classif,
                      pallete_dict=pallete_dict,
                      plt_tp='all',
                      all_plots=True,
                      uncertainty=False,
                      savefig=True,
                      out_dir=out_dir_sample,
                      mask=mask_train_list)

    loss = {
        "mse_train": train_loss["mae"],
        "rec_train": train_loss["rec_loss"],
        # "mse_test": test_loss["mae"],
        "loss_total": model.loss['total'][-1],
        "loss_kl": model.loss['kl'][-1],
        "loss_ll": model.loss['ll'][-1]
    }

    return loss
コード例 #6
0
ファイル: test_set_eval.py プロジェクト: GerardMJuan/RNN-VAE
pallete = sns.color_palette("viridis", 8)
pallete_dict = {bins[i]: value for (i, value) in enumerate(pallete)}

####IF DROPOUT, SELECT ONLY COMPS WITH DROPOUT > TAL
if model.dropout:
    kept_comp = model.kept_components
else:
    kept_comp = None

print(kept_comp)
plot_latent_space(model,
                  qzx_test,
                  ntp,
                  classificator=classif_test,
                  pallete_dict=pallete_dict,
                  plt_tp='all',
                  all_plots=True,
                  uncertainty=False,
                  comp=kept_comp,
                  savefig=True,
                  out_dir=out_dir_sample + '_test',
                  mask=mask_test_list)

#Convert to standard
#Add padding so that the mask also works here
DX_test = [[x for x in elem] for elem in Y_test["DX"]]

#Define colors
pallete_dict = {"CN": "#2a9e1e", "MCI": "#bfbc1a", "AD": "#af1f1f"}
# Get classificator labels, for n time points
out_dir_sample = out_dir + 'zcomp_ch_dx/'
if not os.path.exists(out_dir_sample):