def test_likelihoods(model_test_likelihoods): """Test the custom noise distributions used to define cost functions.""" model = model_test_likelihoods.getModel() model.setTimepoints(np.linspace(0, 60, 60)) solver = model.getSolver() solver.setSensitivityOrder(amici.SensitivityOrder.first) # run model once to create an edata rdata = amici.runAmiciSimulation(model, solver) sigmas = rdata['y'].max(axis=0) * 0.05 edata = amici.ExpData(rdata, sigmas, []) # just make all observables positive since some are logarithmic while min(edata.getObservedData()) < 0: edata = amici.ExpData(rdata, sigmas, []) # and now run for real and also compute likelihood values rdata = amici.runAmiciSimulations(model, solver, [edata])[0] # check if the values make overall sense assert np.isfinite(rdata['llh']) assert np.all(np.isfinite(rdata['sllh'])) assert np.any(rdata['sllh']) rdata_df = amici.getSimulationObservablesAsDataFrame(model, edata, rdata, by_id=True) edata_df = amici.getDataObservablesAsDataFrame(model, edata, by_id=True) # check correct likelihood value llh_exp = -sum([ normal_nllh(edata_df['o1'], rdata_df['o1'], sigmas[0]), log_normal_nllh(edata_df['o2'], rdata_df['o2'], sigmas[1]), log10_normal_nllh(edata_df['o3'], rdata_df['o3'], sigmas[2]), laplace_nllh(edata_df['o4'], rdata_df['o4'], sigmas[3]), log_laplace_nllh(edata_df['o5'], rdata_df['o5'], sigmas[4]), log10_laplace_nllh(edata_df['o6'], rdata_df['o6'], sigmas[5]), custom_nllh(edata_df['o7'], rdata_df['o7'], sigmas[6]), ]) assert np.isclose(rdata['llh'], llh_exp) # check gradient for sensi_method in [ amici.SensitivityMethod.forward, amici.SensitivityMethod.adjoint ]: solver = model.getSolver() solver.setSensitivityMethod(sensi_method) solver.setSensitivityOrder(amici.SensitivityOrder.first) solver.setRelativeTolerance(1e-12) solver.setAbsoluteTolerance(1e-12) check_derivatives(model, solver, edata, assert_fun, atol=1e-2, rtol=1e-2, epsilon=1e-5, check_least_squares=False)
def test_steadystate_simulation(model_steadystate_module): model = model_steadystate_module.getModel() model.setTimepoints(np.linspace(0, 60, 60)) solver = model.getSolver() solver.setSensitivityOrder(amici.SensitivityOrder.first) rdata = amici.runAmiciSimulation(model, solver) edata = [amici.ExpData(rdata, 1, 0)] rdata = amici.runAmiciSimulations(model, solver, edata) # check roundtripping of DataFrame conversion df_edata = amici.getDataObservablesAsDataFrame(model, edata) edata_reconstructed = amici.getEdataFromDataFrame(model, df_edata) assert np.isclose( amici.ExpDataView(edata[0])['observedData'], amici.ExpDataView(edata_reconstructed[0])['observedData']).all() assert np.isclose( amici.ExpDataView(edata[0])['observedDataStdDev'], amici.ExpDataView(edata_reconstructed[0])['observedDataStdDev']).all() if len(edata[0].fixedParameters): assert list(edata[0].fixedParameters) \ == list(edata_reconstructed[0].fixedParameters) else: assert list(model.getFixedParameters()) \ == list(edata_reconstructed[0].fixedParameters) assert list(edata[0].fixedParametersPreequilibration) == \ list(edata_reconstructed[0].fixedParametersPreequilibration) df_state = amici.getSimulationStatesAsDataFrame(model, edata, rdata) assert np.isclose(rdata[0]['x'], df_state[list(model.getStateIds())].values).all() df_obs = amici.getSimulationObservablesAsDataFrame(model, edata, rdata) assert np.isclose(rdata[0]['y'], df_obs[list(model.getObservableIds())].values).all() amici.getResidualsAsDataFrame(model, edata, rdata) solver.setRelativeTolerance(1e-12) solver.setAbsoluteTolerance(1e-12) check_derivatives(model, solver, edata[0], assert_fun, atol=1e-3, rtol=1e-3, epsilon=1e-4) # Run some additional tests which need a working Model, # but don't need precomputed expectations. _test_set_parameters_by_dict(model_steadystate_module)
def test_steadystate_scaled(self): """ Test SBML import and simulation from AMICI python interface """ def assert_fun(x): return self.assertTrue(x) sbmlFile = os.path.join(os.path.dirname(__file__), '..', 'python', 'examples', 'example_steadystate', 'model_steadystate_scaled.xml') sbmlImporter = amici.SbmlImporter(sbmlFile) observables = amici.assignmentRules2observables( sbmlImporter.sbml, filter_function=lambda variable: variable.getId().startswith('observable_') and not variable.getId().endswith('_sigma') ) outdir = 'test_model_steadystate_scaled' sbmlImporter.sbml2amici('test_model_steadystate_scaled', outdir, observables=observables, constantParameters=['k0'], sigmas={'observable_x1withsigma': 'observable_x1withsigma_sigma'}) sys.path.insert(0, outdir) import test_model_steadystate_scaled as modelModule model = modelModule.getModel() model.setTimepoints(np.linspace(0, 60, 60)) solver = model.getSolver() solver.setSensitivityOrder(amici.SensitivityOrder_first) rdata = amici.runAmiciSimulation(model, solver) edata = [amici.ExpData(rdata, 1, 0)] rdata = amici.runAmiciSimulations(model, solver, edata) # check roundtripping of DataFrame conversion df_edata = amici.getDataObservablesAsDataFrame(model, edata) edata_reconstructed = amici.getEdataFromDataFrame(model, df_edata) self.assertTrue( np.isclose( amici.ExpDataView(edata[0]) ['observedData'], amici.ExpDataView(edata_reconstructed[0]) ['observedData'], ).all() ) self.assertTrue( np.isclose( amici.ExpDataView(edata[0]) ['observedDataStdDev'], amici.ExpDataView(edata_reconstructed[0]) ['observedDataStdDev'], ).all() ) if len(edata[0].fixedParameters): self.assertListEqual( list(edata[0].fixedParameters), list(edata_reconstructed[0].fixedParameters), ) else: self.assertListEqual( list(model.getFixedParameters()), list(edata_reconstructed[0].fixedParameters), ) self.assertListEqual( list(edata[0].fixedParametersPreequilibration), list(edata_reconstructed[0].fixedParametersPreequilibration), ) df_state = amici.getSimulationStatesAsDataFrame(model, edata, rdata) self.assertTrue( np.isclose( rdata[0]['x'], df_state[list(model.getStateIds())].values ).all() ) df_obs = amici.getSimulationObservablesAsDataFrame(model, edata, rdata) self.assertTrue( np.isclose( rdata[0]['y'], df_obs[list(model.getObservableIds())].values ).all() ) amici.getResidualsAsDataFrame(model, edata, rdata) solver.setRelativeTolerance(1e-12) solver.setAbsoluteTolerance(1e-12) check_derivatives(model, solver, edata[0], assert_fun, atol=1e-3, rtol=1e-3, epsilon=1e-4)
def generate_synthetic_data(pathway_name: str, latent_dimension: int = 2, n_samples: int = 20) -> str: """ Generates sample data using the mechanistic model. :param pathway_name: name of pathway to use for model :param latent_dimension: number of latent dimensions that is used to generate the parameters that vary across samples :param n_samples: number of samples to generate :return: path to csv where generated data was saved """ model, solver = load_model('pw_' + pathway_name, force_compile=True) # setup model parameter scales model.setParameterScale( amici.parameterScalingFromIntVector([ amici.ParameterScaling.none if par_id.startswith(MODEL_FEATURE_PREFIX) or parameter_boundaries_scales[par_id.split('_')[-1]][2] == 'lin' else amici.ParameterScaling.log10 for par_id in model.getParameterIds() ])) # run simulations to equilibrium model.setTimepoints([np.inf]) # set numpy random seed to ensure reproducibility np.random.seed(0) # generate static parameters that are consistent across samples static_pars = dict() for par_id in model.getParameterIds(): if par_id.startswith(MODEL_FEATURE_PREFIX): continue lb, ub, _ = parameter_boundaries_scales[par_id.split('_')[-1]] static_pars[par_id] = np.random.random() * (ub - lb) + lb # identify which parameters may vary across samples sample_pars = [ par_id for par_id in model.getParameterIds() if par_id.startswith(MODEL_FEATURE_PREFIX) ] encoder = AutoEncoder(np.zeros((1, model.ny)), n_hidden=latent_dimension, n_params=len(sample_pars)) tt_pars = np.random.random(encoder.n_encoder_pars) for ip, name in enumerate(encoder.x_names): lb, ub, _ = parameter_boundaries_scales[name.split('_')[-1]] tt_pars[ip] = tt_pars[ip] * (ub - lb) + lb samples = [] embeddings = [] while len(samples) < n_samples: # generate new fake data encoder.data = np.random.random(encoder.data.shape) if len(samples) < n_samples / 2: encoder.data += 1 else: encoder.data -= 1 # generate parameters from fake data embedding = encoder.compute_embedded_pars(tt_pars) sample_par_vals = np.power(10, encoder.compute_inflated_pars(tt_pars)) sample_pars = dict(zip(sample_pars, sample_par_vals[0, :])) # set parameters in model for par_id, val in {**static_pars, **sample_pars}.items(): model.setParameterById(par_id, val) # run simulations, only add to samples if no integration error rdata = amici.runAmiciSimulation(model, solver) if rdata['status'] == amici.AMICI_SUCCESS: sample = amici.getSimulationObservablesAsDataFrame( model, [amici.ExpData(model)], [rdata]) sample['Sample'] = len(samples) for pid, val in sample_pars.items(): sample[pid] = val samples.append(sample) embeddings.append(embedding) # create dataframe df = pd.concat(samples) df[list(model.getObservableIds())].rename( columns={o: o.replace('_obs', '') for o in model.getObservableIds()}).boxplot(rot=90) # format according to reference example formatted_df = pd.melt(df[list(model.getObservableIds()) + ['Sample']], id_vars=['Sample']) formatted_df.rename(columns={ 'variable': 'site', 'value': 'LogFoldChange', }, inplace=True) formatted_df['site'] = formatted_df['site'].apply( lambda x: x.replace('_obs', '')[1:]) formatted_df['Gene'] = formatted_df['site'].apply( lambda x: x.split('_')[0]) formatted_df['Peptide'] = 'X.XXXXX*XXXXX.X' formatted_df['site'] = formatted_df['site'].apply( lambda x: x.replace('_', '-') + (x.split('_')[1][0].lower() if len(x.split('_')) > 1 else '')) # save to csv datadir = os.path.join(basedir, 'data') os.makedirs(datadir, exist_ok=True) datafile = os.path.join(datadir, f'synthetic__{pathway_name}.csv') plot_and_save_fig(os.path.join(datadir, f'synthetic__{pathway_name}.pdf')) fig, ax = plt.subplots(1, 1) plot_embedding(np.vstack(embeddings), ax) plot_and_save_fig( os.path.join(datadir, f'synthetic__{pathway_name}__embedding.pdf')) inputs = df[[ col for col in df.columns if col.startswith(MODEL_FEATURE_PREFIX) ]] fig, ax = plt.subplots(1, 1) plot_pca_inputs(np.log10(inputs.values), ax) plot_and_save_fig( os.path.join(datadir, f'synthetic__{pathway_name}__input_pca.pdf')) inputs = df[[ col for col in df.columns if col.startswith(MODEL_FEATURE_PREFIX) or col == 'Sample' ]] inputs = pd.melt(inputs, id_vars=['Sample']) inputs.index = inputs['variable'] + \ inputs['Sample'].apply(lambda x: f'_sample_{x}') ref = pd.concat([pd.Series(static_pars), inputs.value]) ref.to_csv( os.path.join(datadir, f'synthetic__{pathway_name}__reference_inputs.csv')) fig, axes = plt.subplots(1, 2) plot_pca_inputs(df[list(model.getObservableIds())].values, axes[0], axes[1]) plot_and_save_fig( os.path.join(datadir, f'synthetic__{pathway_name}__data_pca.pdf')) formatted_df.to_csv(datafile) return datafile
def test_special_likelihoods(model_special_likelihoods): """Test special likelihood functions.""" model = model_special_likelihoods.getModel() model.setTimepoints(np.linspace(0, 60, 10)) solver = model.getSolver() solver.setSensitivityOrder(amici.SensitivityOrder.first) # Test in region with positive density # run model once to create an edata rdata = amici.runAmiciSimulation(model, solver) edata = amici.ExpData(rdata, 0.001, 0) # make sure measurements are smaller for non-degenerate probability y = edata.getObservedData() y = tuple([val * np.random.uniform(0, 1) for val in y]) edata.setObservedData(y) # set sigmas sigma = 0.2 sigmas = sigma * np.ones(len(y)) edata.setObservedDataStdDev(sigmas) # and now run for real and also compute likelihood values rdata = amici.runAmiciSimulations(model, solver, [edata])[0] # check if the values make overall sense assert np.isfinite(rdata['llh']) assert np.all(np.isfinite(rdata['sllh'])) assert np.any(rdata['sllh']) rdata_df = amici.getSimulationObservablesAsDataFrame(model, edata, rdata, by_id=True) edata_df = amici.getDataObservablesAsDataFrame(model, edata, by_id=True) # check correct likelihood value llh_exp = -sum([ binomial_nllh(edata_df['o1'], rdata_df['o1'], sigma), negative_binomial_nllh(edata_df['o2'], rdata_df['o2'], sigma), ]) assert np.isclose(rdata['llh'], llh_exp) # check gradient for sensi_method in [ amici.SensitivityMethod.forward, amici.SensitivityMethod.adjoint ]: solver = model.getSolver() solver.setSensitivityMethod(sensi_method) solver.setSensitivityOrder(amici.SensitivityOrder.first) check_derivatives(model, solver, edata, assert_fun, atol=1e-1, rtol=1e-1, check_least_squares=False) # Test for m > y, i.e. in region with 0 density rdata = amici.runAmiciSimulation(model, solver) edata = amici.ExpData(rdata, 0.001, 0) # make sure measurements are smaller for non-degenerate probability y = edata.getObservedData() y = tuple([val * np.random.uniform(0.5, 3) for val in y]) edata.setObservedData(y) edata.setObservedDataStdDev(sigmas) # and now run for real and also compute likelihood values rdata = amici.runAmiciSimulations(model, solver, [edata])[0] # m > y -> outside binomial domain -> 0 density assert rdata['llh'] == -np.inf # check for non-informative gradient assert all(np.isnan(rdata['sllh']))
def test_steadystate_scaled(self): ''' Test SBML import and simulation from AMICI python interface ''' sbmlFile = os.path.join(os.path.dirname(__file__), '..', 'python', 'examples', 'example_steadystate', 'model_steadystate_scaled.xml') sbmlImporter = amici.SbmlImporter(sbmlFile) observables = amici.assignmentRules2observables( sbmlImporter.sbml, filter_function=lambda variable: variable.getId().startswith( 'observable_') and not variable.getId().endswith('_sigma')) outdir = 'test_model_steadystate_scaled' sbmlImporter.sbml2amici( 'test_model_steadystate_scaled', outdir, observables=observables, constantParameters=['k0'], sigmas={'observable_x1withsigma': 'observable_x1withsigma_sigma'}) sys.path.insert(0, outdir) import test_model_steadystate_scaled as modelModule model = modelModule.getModel() model.setTimepoints(amici.DoubleVector(np.linspace(0, 60, 60))) solver = model.getSolver() rdata = amici.runAmiciSimulation(model, solver) edata = [amici.ExpData(rdata, 0.01, 0)] rdata = amici.runAmiciSimulations(model, solver, edata) # check roundtripping of DataFrame conversion df_edata = amici.getDataObservablesAsDataFrame(model, edata) edata_reconstructed = amici.getEdataFromDataFrame(model, df_edata) self.assertTrue( np.isclose( amici.edataToNumPyArrays(edata[0])['observedData'], amici.edataToNumPyArrays( edata_reconstructed[0])['observedData'], ).all()) self.assertTrue( np.isclose( amici.edataToNumPyArrays(edata[0])['observedDataStdDev'], amici.edataToNumPyArrays( edata_reconstructed[0])['observedDataStdDev'], ).all()) if edata[0].fixedParameters.size(): self.assertListEqual( list(edata[0].fixedParameters), list(edata_reconstructed[0].fixedParameters), ) else: self.assertListEqual( list(model.getFixedParameters()), list(edata_reconstructed[0].fixedParameters), ) self.assertListEqual( list(edata[0].fixedParametersPreequilibration), list(edata_reconstructed[0].fixedParametersPreequilibration), ) df_state = amici.getSimulationStatesAsDataFrame(model, edata, rdata) self.assertTrue( np.isclose(rdata[0]['x'], df_state[list(model.getStateIds())].values).all()) df_obs = amici.getSimulationObservablesAsDataFrame(model, edata, rdata) self.assertTrue( np.isclose(rdata[0]['y'], df_obs[list(model.getObservableIds())].values).all()) amici.getResidualsAsDataFrame(model, edata, rdata)
def generate_synthetic_data(pathway_name: str, latent_dimension: int = 2, n_samples: int = 20) -> Tuple[str, str, str]: """ Generates sample data using the mechanistic model. :param pathway_name: name of pathway to use for model :param latent_dimension: number of latent dimensions that is used to generate the parameters that vary across samples :param n_samples: number of samples to generate :return: path to csv where generated data was saved """ model, solver = load_model('pw_' + pathway_name, force_compile=True, add_observables=True) # setup model parameter scales model.setParameterScale( amici.parameterScalingFromIntVector([ amici.ParameterScaling.none if par_id.startswith(MODEL_FEATURE_PREFIX) or par_id.endswith('_0') or parameter_boundaries_scales[par_id.split('_')[-1]][2] == 'lin' else amici.ParameterScaling.log10 for par_id in model.getParameterIds() ])) # run simulations to equilibrium model.setTimepoints([np.inf]) # set numpy random seed to ensure reproducibility np.random.seed(0) sample_pars = [ par_id for par_id in model.getParameterIds() if par_id.startswith(MODEL_FEATURE_PREFIX) ] # generate static parameters that are consistent across samples static_pars = dict() for par_id in model.getParameterIds(): if par_id in sample_pars: continue if par_id.endswith('_0'): static_pars[par_id] = 0.0 continue lb, ub, _ = parameter_boundaries_scales[par_id.split('_')[-1]] static_pars[par_id] = np.random.random() * (ub - lb) + lb encoder = AutoEncoder(np.zeros((1, model.ny)), n_hidden=latent_dimension, n_params=len(sample_pars)) tt_pars = np.random.random(encoder.n_encoder_pars) for ip, name in enumerate(encoder.x_names): lb, ub, _ = parameter_boundaries_scales[name.split('_')[-1]] tt_pars[ip] = tt_pars[ip] * (ub - lb) + lb samples = [] embeddings = [] while len(samples) < n_samples: # generate new fake data encoder.data = np.random.random(encoder.data.shape) if len(samples) < n_samples / 2: encoder.data += 1 else: encoder.data -= 1 # generate parameters from fake data embedding = encoder.compute_embedded_pars(tt_pars) sample_par_vals = np.power(10, encoder.compute_inflated_pars(tt_pars)) sample_pars = dict(zip(sample_pars, sample_par_vals[0, :])) # set parameters in model for par_id, val in {**static_pars, **sample_pars}.items(): model.setParameterById(par_id, val) # run simulations, only add to samples if no integration error rdata = amici.runAmiciSimulation(model, solver) if rdata['status'] == amici.AMICI_SUCCESS: sample = amici.getSimulationObservablesAsDataFrame( model, [amici.ExpData(model)], [rdata]) sample['Sample'] = len(samples) for pid, val in sample_pars.items(): sample[pid] = val samples.append(sample) embeddings.append(embedding) # prepare petab datadir = os.path.join(basedir, 'data') os.makedirs(datadir, exist_ok=True) df = pd.concat(samples) df[list(model.getObservableIds())].rename( columns={o: o.replace('_obs', '') for o in model.getObservableIds()}).boxplot(rot=90) plot_and_save_fig(os.path.join(datadir, f'synthetic__{pathway_name}.pdf')) fig, ax = plt.subplots(1, 1) plot_embedding(np.vstack(embeddings), ax) plot_and_save_fig( os.path.join(datadir, f'synthetic__{pathway_name}__embedding.pdf')) inputs = df[[ col for col in df.columns if col.startswith(MODEL_FEATURE_PREFIX) ]] fig, ax = plt.subplots(1, 1) plot_pca_inputs(np.log10(inputs.values), ax) plot_and_save_fig( os.path.join(datadir, f'synthetic__{pathway_name}__input_pca.pdf')) inputs = df[[ col for col in df.columns if col.startswith(MODEL_FEATURE_PREFIX) or col == 'Sample' ]] inputs = pd.melt(inputs, id_vars=['Sample']) inputs.index = inputs['variable'] + \ inputs['Sample'].apply(lambda x: f'_sample_{x}') ref = pd.concat([pd.Series(static_pars), inputs.value]) ref.to_csv( os.path.join(datadir, f'synthetic__{pathway_name}__reference_inputs.csv')) fig, axes = plt.subplots(1, 2) plot_pca_inputs(df[list(model.getObservableIds())].values, axes[0], axes[1]) plot_and_save_fig( os.path.join(datadir, f'synthetic__{pathway_name}__data_pca.pdf')) # create petab & save to csv # MEASUREMENTS measurements = df[[ 'Sample', petab.TIME, ] + list(model.getObservableIds())] measurements = pd.melt(measurements, id_vars=[petab.TIME, 'Sample'], value_name=petab.MEASUREMENT, var_name=petab.OBSERVABLE_ID) measurements[petab.TIME] = 0.0 measurements[petab.SIMULATION_CONDITION_ID] = \ measurements['Sample'].apply( lambda x: f'sample_{x}' ) measurements[petab.PREEQUILIBRATION_CONDITION_ID] = \ measurements['Sample'].apply( lambda x: f'sample_{x}' ) measurements.drop(columns=['Sample'], inplace=True) measurement_file = os.path.join( datadir, f'synthetic__{pathway_name}__measurements.tsv') measurements.to_csv(measurement_file, sep='\t') # CONDITIONS condition_file = os.path.join( datadir, f'synthetic__{pathway_name}__conditions.tsv') conditions = pd.DataFrame({ petab.CONDITION_ID: measurements[petab.SIMULATION_CONDITION_ID].unique() }) for name, value in static_pars.items(): if name.endswith('_0'): conditions[name] = value conditions.set_index(petab.CONDITION_ID, inplace=True) conditions.to_csv(condition_file, sep='\t') # OBSERVABLES observables = pd.DataFrame({ petab.OBSERVABLE_ID: model.getObservableIds(), petab.OBSERVABLE_NAME: model.getObservableNames(), }) observables[petab.OBSERVABLE_FORMULA] = '0.0' observables[petab.NOISE_DISTRIBUTION] = 'normal' observables[petab.NOISE_FORMULA] = '1.0' observable_file = os.path.join( datadir, f'synthetic__{pathway_name}__observables.tsv') observables.set_index(petab.OBSERVABLE_ID, inplace=True) observables.to_csv(observable_file, sep='\t') return measurement_file, condition_file, observable_file