def test_load_parameters(self):
        """
        Test the method load_parameters.
        """
        leaspy_object = Leaspy.load(example_logisticmodel_path)

        abstract_model = AbstractModel("dummy_model")

        abstract_model.load_parameters(leaspy_object.model.parameters)

        self.assertTrue(torch.equal(abstract_model.parameters['g'],
                                    torch.tensor([1.8669992685317993, 2.4921786785125732,
                                                  2.471605062484741, 2.1240732669830322])))
        self.assertTrue(torch.equal(abstract_model.parameters['v0'],
                                    torch.tensor([-2.8300716876983643, -3.3241398334503174,
                                                  -3.4701175689697266, -2.6136295795440674])))
        self.assertTrue(torch.equal(abstract_model.parameters['betas'],
                                    torch.tensor([[0.011530596762895584, 0.06039918214082718],
                                                  [0.008324957452714443, 0.048168670386075974],
                                                  [0.01144738681614399, 0.0822334811091423]])))
        self.assertTrue(torch.equal(abstract_model.parameters['tau_mean'], torch.tensor(75.30111694335938)))
        self.assertTrue(torch.equal(abstract_model.parameters['tau_std'], torch.tensor(7.103002071380615)))
        self.assertTrue(torch.equal(abstract_model.parameters['xi_mean'], torch.tensor(0.0)))
        self.assertTrue(torch.equal(abstract_model.parameters['xi_std'], torch.tensor(0.2835913300514221)))
        self.assertTrue(torch.equal(abstract_model.parameters['sources_mean'], torch.tensor(0.0)))
        self.assertTrue(torch.equal(abstract_model.parameters['sources_std'], torch.tensor(1.0)))
        self.assertTrue(torch.equal(abstract_model.parameters['noise_std'], torch.tensor(0.1988248974084854)))
Exemple #2
0
    def test_estimate(self):
        model_parameters_path = os.path.join(test_data_dir, 'model_parameters',
                                             'fitted_multivariate_model.json')
        leaspy = Leaspy.load(model_parameters_path)

        ip_path = os.path.join(test_data_dir, 'io', 'outputs', 'ip_save.json')
        ip = IndividualParameters.load(ip_path)

        timepoints = {'idx1': [78, 81], 'idx2': [91]}

        estimations = leaspy.estimate(timepoints, ip)

        test = {
            'idx1': [[0.9168074, 0.88841885, 0.80543965, 0.9921461],
                     [0.98348546, 0.9773835, 0.9456895, 0.99938035]],
            'idx2': [[0.9999131, 0.9998343, 0.9991264, 0.99999964]]
        }

        # Test
        self.assertEqual(estimations.keys(), test.keys())
        for k in estimations.keys():
            self.assertTrue(k in test.keys())
            for v1, v2 in zip(estimations[k], test[k]):

                self.assertTrue((v1 - v2 < 10e-8).all())
    def test_acceptation(self):
        n_patients = 17
        n_draw = 200
        # temperature_inv = 1.0

        path_model_sampler = os.path.join(test_data_dir, "model_parameters",
                                          "multivariate_model_sampler.json")
        path_data = os.path.join(test_data_dir, "io", "data_tiny.csv")

        # data = Dataset(Data.from_csv_file(path_data))
        leaspy = Leaspy.load(path_model_sampler)
        # realizations = leaspy.model.get_realization_object(n_patients)

        # Test with taus
        var_name = 'tau'
        gsampler = GibbsSampler(
            leaspy.model.random_variable_informations()[var_name], n_patients)

        for i in range(n_draw):
            gsampler._update_acceptation_rate(
                torch.tensor([1.0] * 10 + [0.0] * 7, dtype=torch.float32))

        self.assertAlmostEqual(gsampler.acceptation_temp.mean(),
                               10 / 17,
                               delta=0.05)
Exemple #4
0
    def test_append_spaceshifts_to_individual_parameters_dataframe(self):
        df = pd.DataFrame(data=[[0.1, 70, 0.1, -0.3], [0.2, 73, -0.4, 0.1],
                                [0.3, 58, -0.6, 0.2]],
                          index=["idx1", "idx2", "idx3"],
                          columns=["xi", "tau", "sources_0", "sources_1"])

        leaspy = Leaspy.load(
            os.path.join(test_data_dir, 'model_parameters', 'test_api.json'))

        df_w = append_spaceshifts_to_individual_parameters_dataframe(
            df, leaspy)
Exemple #5
0
    def test_compute_trajectory_of_population(self):
        leaspy = Leaspy.load(
            os.path.join(test_data_dir, 'model_parameters', 'test_api.json'))
        ip = IndividualParameters.load(
            os.path.join(test_data_dir, 'io', 'outputs', 'ip_save.json'))

        timepoints = [70, 71, 72, 73, 74, 75, 76]

        trajectory = compute_trajectory_of_population(timepoints, ip, leaspy)
        #self.assertTrue(torch.is_tensor(trajectory))
        # TODO : choose a convention for output type : Numpy or Torch ? For now it seems numpy in api.estimate
        self.assertEqual(trajectory.shape[0], 7)
        self.assertEqual(trajectory.shape[1], 4)
Exemple #6
0
    def test_get_reparametrized_ages(self):
        leaspy = Leaspy.load(
            os.path.join(test_data_dir, 'model_parameters', 'test_api.json'))
        ip = IndividualParameters.load(
            os.path.join(test_data_dir, 'io', 'outputs', 'ip_save.json'))

        ages = {'idx1': [70, 80], 'idx3': [100]}
        reparametrized_ages = get_reparametrized_ages(ages, ip, leaspy)

        self.assertEqual(reparametrized_ages.keys(), ages.keys())
        self.assertEqual(reparametrized_ages['idx1'],
                         [78.02704620361328, 89.0787582397461])
        self.assertEqual(reparametrized_ages['idx3'], [134.7211151123047])
    def test_personalize_scipy(self):
        """
        Load logistic model from file, and personalize it to data from ...
        :return:
        """
        # Inputs
        data = Data.from_csv_file(example_data_path)

        # Initialize
        leaspy = Leaspy.load(example_logisticmodel_path)

        # Launch algorithm
        algo_personalize_settings = AlgorithmSettings('scipy_minimize', seed=0)
        ips, noise_std = leaspy.personalize(data,
                                            settings=algo_personalize_settings,
                                            return_noise=True)

        self.assertAlmostEqual(noise_std.item(), 0.1169, delta=0.01)
    def test_personalize_scipy_diag_noise(self):
        """
        Load logistic model (diag noise) from file, and personalize it to data from ...
        :return:
        """
        # Inputs
        data = Data.from_csv_file(example_data_path)

        # Initialize
        leaspy = Leaspy.load(example_logisticmodel_diag_noise_path)

        # Launch algorithm
        algo_personalize_settings = AlgorithmSettings('scipy_minimize', seed=0)
        ips, noise_std = leaspy.personalize(data,
                                            settings=algo_personalize_settings,
                                            return_noise=True)

        diff_noise = noise_std - torch.tensor([0.3299, 0.1236, 0.1642, 0.2582])
        self.assertAlmostEqual((diff_noise**2).sum(), 0., delta=0.01)
    def test_personalize_modereal(self):
        """
        Load logistic model from file, and personalize it to data from ...
        :return:
        """
        # Inputs
        data = Data.from_csv_file(example_data_path)

        # Initialize
        leaspy = Leaspy.load(example_logisticmodel_path)

        # Launch algorithm
        path_settings = os.path.join(os.path.dirname(__file__),
                                     "data/settings_mode_real.json")
        algo_personalize_settings = AlgorithmSettings.load(path_settings)
        ips, noise_std = leaspy.personalize(data,
                                            settings=algo_personalize_settings,
                                            return_noise=True)

        self.assertAlmostEqual(noise_std.item(), 0.12152, delta=0.01)
Exemple #10
0
    def test_check_cofactors(self, get_result=False):
        """
        Test Leaspy.simulate return a ``ValueError`` if the ``cofactor`` and ``cofactor_state`` parameters given
        in the ``AlgorithmSettings`` are invalid.

        Parameters
        ----------
        get_result : bool
            If set to ``True``, return the leaspy model and result object used to do the test. Else return nothing.

        Returns
        -------
        model : leaspy.Leaspy
        results : leaspy.io.outputs.result.Result
        """
        data = Data.from_csv_file(example_data_path)
        cofactors = pd.read_csv(
            os.path.join(test_data_dir, "io/data/data_tiny_covariate.csv"))
        cofactors.columns = ("ID", "Treatments")
        cofactors['ID'] = cofactors['ID'].apply(lambda x: str(x))
        cofactors = cofactors.set_index("ID")
        data.load_cofactors(cofactors, ["Treatments"])

        model = Leaspy.load(
            os.path.join(test_data_dir,
                         "model_parameters/multivariate_model_sampler.json"))
        settings = AlgorithmSettings('mode_real')
        individual_parameters = model.personalize(data, settings)

        settings = AlgorithmSettings('simulation', cofactor="dummy")
        self.assertRaises(ValueError, model.simulate, individual_parameters,
                          data, settings)

        settings = AlgorithmSettings('simulation',
                                     cofactor="Treatments",
                                     cofactor_state="dummy")
        self.assertRaises(ValueError, model.simulate, individual_parameters,
                          data, settings)

        if get_result:
            return model, individual_parameters, data
Exemple #11
0
def get_individual_parameters(data):
    # Data
    leaspy_data = convert_data(data)

    # Algorithm
    settings = AlgorithmSettings('scipy_minimize')

    # Leaspy

    #leaspy = Leaspy.load(data['model'])
    # TO CORRECT
    #if data['model']['name'] == 'logistic_parallel':
    leaspy = Leaspy.load(data['model'])
    #elif data['model']['name'] == 'logistic':
    #    leaspy = Leaspy.load(os.path.join(os.getcwd(), 'data', 'example', 'parkinson_model.json'))
    individual_parameters = leaspy.personalize(leaspy_data, settings=settings)

    output = {
        'individual_parameters' : individual_parameters["patient"],
        'scores': leaspy_data.to_dataframe().values.T.tolist()
    }

    return output
Exemple #12
0
 def test_get_error_distribution_dataframe(self):
     model_path = os.path.join(test_data_dir, "model_parameters",
                               "fitted_multivariate_model.json")
     leaspy_session = Leaspy.load(model_path)
     self.results.get_error_distribution_dataframe(leaspy_session.model)
    def test_sample(self):
        """
        Test if samples values are the one expected
        :return:
        """
        # TODO change this instanciation
        n_patients = 17
        n_draw = 50
        temperature_inv = 1.0

        path_model_sampler = os.path.join(test_data_dir, "model_parameters",
                                          "multivariate_model_sampler.json")
        path_data = os.path.join(test_data_dir, "io", "data", "data_tiny.csv")

        data = Dataset(Data.from_csv_file(path_data))
        leaspy = Leaspy.load(path_model_sampler)
        realizations = leaspy.model.get_realization_object(n_patients)

        # Test with taus
        var_name = 'tau'
        gsampler = GibbsSampler(
            leaspy.model.random_variable_informations()[var_name], n_patients)
        random_draws = []
        for i in range(n_draw):
            gsampler.sample(data, leaspy.model, realizations, temperature_inv)
            random_draws.append(
                realizations[var_name].tensor_realizations.clone())

        stack_random_draws = torch.stack(random_draws)
        stack_random_draws_mean = (stack_random_draws[1:, :, :] -
                                   stack_random_draws[:-1, :, :]).mean(dim=0)
        stack_random_draws_std = (stack_random_draws[1:, :, :] -
                                  stack_random_draws[:-1, :, :]).std(dim=0)

        self.assertAlmostEqual(stack_random_draws_mean.mean(),
                               0.0160,
                               delta=0.05)
        self.assertAlmostEqual(stack_random_draws_std.mean(),
                               0.0861,
                               delta=0.05)

        # Test with g
        var_name = 'g'
        gsampler = GibbsSampler(
            leaspy.model.random_variable_informations()[var_name], n_patients)
        random_draws = []
        for i in range(n_draw):
            gsampler.sample(data, leaspy.model, realizations, temperature_inv)
            random_draws.append(
                realizations[var_name].tensor_realizations.clone())

        stack_random_draws = torch.stack(random_draws)
        stack_random_draws_mean = (stack_random_draws[1:, :] -
                                   stack_random_draws[:-1, :]).mean(dim=0)
        stack_random_draws_std = (stack_random_draws[1:, :] -
                                  stack_random_draws[:-1, :]).std(dim=0)

        self.assertAlmostEqual(stack_random_draws_mean.mean(),
                               4.2792e-05,
                               delta=0.05)
        self.assertAlmostEqual(stack_random_draws_std.mean(),
                               0.0045,
                               delta=0.05)
Exemple #14
0
    def test_usecase(self):
        """
        Functional test of a basic analysis using leaspy package

        1 - Data loading
        2 - Fit logistic model with MCMC algorithm
        3 - Save paramaters & reload (remove created files to keep the repo clean)
        4 - Personalize model with 'mode_real' algorithm
        5 - Plot results
        6 - Simulate new patients
        """
        data = Data.from_csv_file(example_data_path)

        # Fit
        algo_settings = AlgorithmSettings('mcmc_saem', n_iter=10, seed=0)
        leaspy = Leaspy("logistic")
        leaspy.model.load_hyperparameters({'source_dimension': 2})
        leaspy.fit(data, algorithm_settings=algo_settings)
        self.model_values_test(leaspy.model)

        # Save parameters and check its consistency
        path_to_saved_model = os.path.join(test_data_dir, 'model_parameters',
                                           'test_api-copy.json')
        leaspy.save(path_to_saved_model)

        with open(
                os.path.join(test_data_dir, "model_parameters",
                             'test_api.json'), 'r') as f1:
            model_parameters = json.load(f1)
        with open(path_to_saved_model) as f2:
            model_parameters_new = json.load(f2)
        # self.assertTrue(ordered(model_parameters) == ordered(model_parameters_new))
        self.assertTrue(
            dict_compare_and_display(model_parameters, model_parameters_new))

        # Load data and check its consistency
        leaspy = Leaspy.load(path_to_saved_model)
        os.remove(path_to_saved_model)
        self.assertTrue(leaspy.model.is_initialized)
        self.model_values_test(leaspy.model)

        # Personalize
        algo_personalize_settings = AlgorithmSettings('mode_real', seed=0)
        individual_parameters = leaspy.personalize(
            data, settings=algo_personalize_settings)
        # TODO REFORMAT: compute the noise std afterwards
        #self.assertAlmostEqual(result.noise_std, 0.21146, delta=0.01)

        ## Plot TODO
        #path_output = os.path.join(os.path.dirname(__file__), '../../_data', "_outputs")
        #plotter = Plotter(path_output)
        # plotter.plot_mean_trajectory(leaspy.model, save_as="mean_trajectory_plot")
        #plt.close()

        # Simulate
        simulation_settings = AlgorithmSettings('simulation', seed=0)
        simulation_results = leaspy.simulate(individual_parameters, data,
                                             simulation_settings)
        self.assertTrue(type(simulation_results) == Result)
        self.assertTrue(simulation_results.data.headers == data.headers)
        n = simulation_settings.parameters['number_of_subjects']
        self.assertEqual(simulation_results.data.n_individuals, n)
        self.assertEqual(
            len(simulation_results.get_parameter_distribution('xi')), n)
        self.assertEqual(
            len(simulation_results.get_parameter_distribution('tau')), n)
        self.assertEqual(
            len(
                simulation_results.get_parameter_distribution('sources')
                ['sources0']), n)
        # simulation_results.data.to_dataframe().to_csv(os.path.join(
        #     test_data_dir, "_outputs/simulation/test_api_simulation_df-post_merge-result_fix.csv"), index=False)
        # Test the reproducibility of simulate
        # round is necessary, writing and reading induces numerical errors of magnitude ~ 1e-13
        # BUT ON DIFFERENT MACHINE I CAN SEE ERROR OF MAGNITUDE 1e-5 !!!
        # TODO: Can we improve this??
        simulation_df = pd.read_csv(
            os.path.join(
                test_data_dir,
                "_outputs/simulation/test_api_simulation_df-post_merge-result_fix.csv"
            ))

        id_simulation_is_reproducible = simulation_df['ID'].equals(
            simulation_results.data.to_dataframe()['ID'])
        # Check ID before - str doesn't seem to work with numpy.allclose
        self.assertTrue(id_simulation_is_reproducible)

        round_decimal = 5
        simulation_is_reproducible = allclose(
            simulation_df.loc[:, simulation_df.columns != 'ID'].values,
            simulation_results.data.to_dataframe().
            loc[:,
                simulation_results.data.to_dataframe().columns != 'ID'].values,
            atol=10**(-round_decimal),
            rtol=10**(-round_decimal))
        # Use of numpy.allclose instead of pandas.testing.assert_frame_equal because of buggy behaviour reported
        # in https://github.com/pandas-dev/pandas/issues/22052

        # If reproducibility error > 1e-5 => display it + visit with the biggest reproducibility error
        error_message = ''
        if not simulation_is_reproducible:
            # simulation_df = pd.read_csv(
            #     os.path.join(test_data_dir, "_outputs/simulation/test_api_simulation_df-post_merge-result_fix.csv"))
            max_diff = 0.
            value_v1 = 0.
            value_v2 = 0.
            count = 0
            tol = 10**(-round_decimal)
            actual_simu_df = simulation_results.data.to_dataframe()
            for v1, v2 in zip(
                    simulation_df.loc[:, simulation_df.columns != 'ID'].values.
                    tolist(), actual_simu_df.
                    loc[:, actual_simu_df.columns != 'ID'].values.tolist()):
                diff = [abs(val1 - val2) for val1, val2 in zip(v1, v2)]
                if max(diff) > tol:
                    count += 1
                if max(diff) > max_diff:
                    value_v1 = v1
                    value_v2 = v2
                    max_diff = max(diff)
            error_message += '\nTolerance error = %.1e' % tol
            error_message += '\nMaximum error = %.3e' % max_diff
            error_message += '\n' + str(
                [round(v, round_decimal + 1) for v in value_v1])
            error_message += '\n' + str(
                [round(v, round_decimal + 1) for v in value_v2])
            error_message += '\nNumber of simulated visits above tolerance error = %d / %d \n' \
                             % (count, simulation_df.shape[0])
        # For loop before the last self.assert - otherwise no display is made
        self.assertTrue(simulation_is_reproducible, error_message)
Exemple #15
0
import os
import sys
import plotly.graph_objects as go
import seaborn as sns
import sys
import numpy as np
from sklearn.decomposition import PCA
sys.path.append("../")
print(os.getcwd())

from leaspy import IndividualParameters, Data, Leaspy
from src.leaspype import get_reparametrized_ages, append_spaceshifts_to_individual_parameters_dataframe

#%% Load Data
path_datadashboard = "data/"
leaspy = Leaspy.load(os.path.join(path_datadashboard, "leaspy.json"))
individual_parameters = IndividualParameters.load(
    os.path.join(path_datadashboard, "ip.csv"))
data = Data.from_csv_file(os.path.join(path_datadashboard, "data.csv"))
n_sources = leaspy.model.source_dimension
sources_name = ["sources_{}".format(i) for i in range(n_sources)]
df_data = data.to_dataframe().set_index(["ID", "TIME"])
features = leaspy.model.features

# ind parameters
df_ind = individual_parameters.to_dataframe()
ind_param_names = list(df_ind.columns) + ["pca1", "pca2"]
df_ind = df_ind.reset_index()

# PCA on the w_i
res = append_spaceshifts_to_individual_parameters_dataframe(
name = "personalize_0"
path_output_personalize = os.path.join(output_directory, experiment_folder,
                                       "personalize", name)
if not os.path.exists(path_output_personalize):
    os.makedirs(path_output_personalize)

# Get calibrated models paths
model_paths = [
    os.path.join(path_output_calibrate, "fold_{}".format(i),
                 "model_parameters.json") for i in range(n_resampling_iter)
]

# Load estimated leaspy models
leaspy_iter = []
for i in range(n_resampling_iter):
    leaspy = Leaspy.load(model_paths[i])
    leaspy_iter.append(leaspy)

# Algo settings iter
algo_settings_personalize_iter = []
algo_settings_personalize = AlgorithmSettings(personalize_algorithm,
                                              seed=seed,
                                              n_iter=n_iter_personalize)
for i in range(n_resampling_iter):
    algo_settings_personalize_iter.append(algo_settings_personalize)

# Save algo settings
algo_settings_personalize.save(
    os.path.join(path_output_personalize, "algo_settings_personalize.json"))