Example #1
0
def create_edata_for_condition(
        condition: Union[Dict, pd.Series],
        amici_model: AmiciModel,
        petab_problem: petab.Problem,
        observable_ids: List[str],
) -> amici.ExpData:
    """Get :class:`amici.amici.ExpData` for the given PEtab condition.

    Sets timepoints, observed data and sigmas.

    :param condition:
        pandas.DataFrame row with preequilibrationConditionId and
        simulationConditionId.
    :param amici_model:
        AMICI model
    :param petab_problem:
        Underlying PEtab problem
    :param observable_ids:
        List of observable IDs

    :return:
        ExpData instance.
    """
    # extract measurement table rows for condition
    measurement_df = petab.get_rows_for_condition(
        measurement_df=petab_problem.measurement_df, condition=condition)

    if amici_model.nytrue != len(observable_ids):
        raise AssertionError("Number of AMICI model observables does not "
                             "match number of PEtab observables.")

    # create an ExpData object
    edata = amici.ExpData(amici_model)

    ##########################################################################
    # enable initial parameters reinitialization
    species_in_condition_table = any(
        col for col in petab_problem.condition_df
        if petab_problem.sbml_model.getSpecies(col) is not None)
    if condition.get(PREEQUILIBRATION_CONDITION_ID) \
            and species_in_condition_table:
        edata.reinitializeFixedParameterInitialStates = True

    ##########################################################################
    # timepoints

    # find replicate numbers of time points
    timepoints_w_reps = _get_timepoints_with_replicates(
        df_for_condition=measurement_df)
    edata.setTimepoints(timepoints_w_reps)

    ##########################################################################
    # measurements and sigmas
    y, sigma_y = _get_measurements_and_sigmas(
        df_for_condition=measurement_df, timepoints_w_reps=timepoints_w_reps,
        observable_ids=observable_ids)
    edata.setObservedData(y.flatten())
    edata.setObservedDataStdDev(sigma_y.flatten())

    return edata
Example #2
0
def rdatas_to_measurement_df(rdatas: Sequence[amici.ReturnData],
                             model: AmiciModel,
                             measurement_df: pd.DataFrame) -> pd.DataFrame:
    """
    Create a measurement dataframe in the PEtab format from the passed
    `rdatas` and own information.

    :param rdatas:
        A sequence of rdatas with the ordering of
        `petab.get_simulation_conditions`.

    :param model:
        AMICI model used to generate `rdatas`.

    :param measurement_df:
        PEtab measurement table used to generate `rdatas`.

    :return:
        A dataframe built from the rdatas in the format of `measurement_df`.
    """

    df = pd.DataFrame(columns=list(measurement_df.columns))

    simulation_conditions = petab.get_simulation_conditions(measurement_df)

    observable_ids = model.getObservableIds()

    # iterate over conditions
    for (_, condition), rdata in zip(simulation_conditions.iterrows(), rdatas):
        # current simulation matrix
        y = rdata['y']
        # time array used in rdata
        t = list(rdata['t'])

        # extract rows for condition
        cur_measurement_df = petab.get_rows_for_condition(
            measurement_df, condition)

        # iterate over entries for the given condition
        # note: this way we only generate a dataframe entry for every
        # row that existed in the original dataframe. if we want to
        # e.g. have also timepoints non-existent in the original file,
        # we need to instead iterate over the rdata['y'] entries
        for _, row in cur_measurement_df.iterrows():
            # copy row
            row_sim = copy.deepcopy(row)

            # extract simulated measurement value
            timepoint_idx = t.index(row[TIME])
            observable_idx = observable_ids.index(row[OBSERVABLE_ID])
            measurement_sim = y[timepoint_idx, observable_idx]

            # change measurement entry
            row_sim[MEASUREMENT] = measurement_sim

            # append to dataframe
            df = df.append(row_sim, ignore_index=True)

    return df
Example #3
0
    def rdatas_to_measurement_df(self, rdatas, model=None):
        """
        Create a measurement dataframe in the petab format from
        the passed `rdatas` and own information.

        Parameters
        ----------

        rdatas: list of amici.RData
            A list of rdatas as produced by
            pypesto.AmiciObjective.__call__(x, return_dict=True)['rdatas'].

        Returns
        -------

        df: pandas.DataFrame
            A dataframe built from the rdatas in the format as in
            self.petab_problem.measurement_df.
        """
        # create model
        if model is None:
            model = self.create_model()

        measurement_df = self.petab_problem.measurement_df

        # initialize dataframe
        df = pd.DataFrame(
            columns=list(self.petab_problem.measurement_df.columns))

        # get simulation conditions
        simulation_conditions = petab.get_simulation_conditions(measurement_df)

        # get observable ids
        observable_ids = model.getObservableIds()

        # iterate over conditions
        for data_idx, condition in simulation_conditions.iterrows():
            # current rdata
            rdata = rdatas[data_idx]
            # current simulation matrix
            y = rdata['y']
            # time array used in rdata
            t = list(rdata['t'])

            # extract rows for condition
            cur_measurement_df = petab.get_rows_for_condition(
                measurement_df, condition)

            # iterate over entries for the given condition
            # note: this way we only generate a dataframe entry for every
            # row that existed in the original dataframe. if we want to
            # e.g. have also timepoints non-existent in the original file,
            # we need to instead iterate over the rdata['y'] entries
            for _, row in cur_measurement_df.iterrows():
                # copy row
                row_sim = copy.deepcopy(row)

                # extract simulated measurement value
                timepoint_idx = t.index(row.time)
                observable_idx = observable_ids.index("observable_" +
                                                      row.observableId)
                measurement_sim = y[timepoint_idx, observable_idx]

                # change measurement entry
                row_sim.measurement = measurement_sim

                # append to dataframe
                df = df.append(row_sim, ignore_index=True)

        return df
Example #4
0
    def create_edatas(self, model=None, simulation_conditions=None):
        """
        Create list of amici.ExpData objects.
        """
        # create model
        if model is None:
            model = self.create_model()

        condition_df = self.petab_problem.condition_df.reset_index()
        measurement_df = self.petab_problem.measurement_df

        # number of amici simulations will be number of unique
        # (preequilibrationConditionId, simulationConditionId) pairs.
        # Can be improved by checking for identical condition vectors.
        if simulation_conditions is None:
            simulation_conditions = petab.get_simulation_conditions(
                measurement_df)

        observable_ids = model.getObservableIds()

        fixed_parameter_ids = model.getFixedParameterIds()

        edatas = []
        for _, condition in simulation_conditions.iterrows():
            # amici.ExpData for each simulation

            # extract rows for condition
            df_for_condition = petab.get_rows_for_condition(
                measurement_df, condition)

            # make list of all timepoints for which measurements exist
            timepoints = sorted(df_for_condition.time.unique().astype(float))

            # init edata object
            edata = amici.ExpData(model.get())

            # find rep numbers of time points
            timepoints_w_reps = []
            for time in timepoints:
                # subselect for time
                df_for_time = df_for_condition[df_for_condition.time == time]
                # rep number is maximum over rep numbers for observables
                n_reps = max(
                    df_for_time.groupby(['observableId', 'time']).size())
                # append time point n_rep times
                timepoints_w_reps.extend([time] * n_reps)

            # set time points in edata
            edata.setTimepoints(timepoints_w_reps)

            # handle fixed parameters
            _handle_fixed_parameters(edata, condition_df, fixed_parameter_ids,
                                     condition)

            # prepare measurement matrix
            y = np.full(shape=(edata.nt(), edata.nytrue()), fill_value=np.nan)
            # prepare sigma matrix
            sigma_y = np.full(shape=(edata.nt(), edata.nytrue()),
                              fill_value=np.nan)

            # add measurements and sigmas
            # iterate over time points
            for time in timepoints:
                # subselect for time
                df_for_time = df_for_condition[df_for_condition.time == time]
                time_ix_0 = timepoints_w_reps.index(time)

                # remember used time indices for each observable
                time_ix_for_obs_ix = {}

                # iterate over measurements
                for _, measurement in df_for_time.iterrows():
                    # extract observable index
                    observable_ix = observable_ids.index(
                        f'observable_{measurement.observableId}')

                    # update time index for observable
                    if observable_ix in time_ix_for_obs_ix:
                        time_ix_for_obs_ix[observable_ix] += 1
                    else:
                        time_ix_for_obs_ix[observable_ix] = time_ix_0

                    # fill observable and possibly noise parameter
                    y[time_ix_for_obs_ix[observable_ix],
                      observable_ix] = measurement.measurement
                    if isinstance(measurement.noiseParameters, numbers.Number):
                        sigma_y[time_ix_for_obs_ix[observable_ix],
                                observable_ix] = measurement.noiseParameters

            # fill measurements and sigmas into edata
            edata.setObservedData(y.flatten())
            edata.setObservedDataStdDev(sigma_y.flatten())

            # append edata to edatas list
            edatas.append(edata)

        return edatas