def sample_posterior(self): """ sample the posterior distribution of the mortality probability """ # find values of mortality probability at which the posterior should be evaluated self._mortalitySamples = np.random.uniform(low=CalibSets.POST_L, high=CalibSets.POST_U, size=CalibSets.POST_N) # create a multi cohort multiCohort = SurvivalCls.MultiCohort( ids=self._cohortIDs, mortality_probs=self._mortalitySamples, pop_sizes=[CalibSets.SIM_POP_SIZE] * CalibSets.POST_N) # simulate the multi cohort multiCohort.simulate(CalibSets.TIME_STEPS) # calculate the likelihood of each simulated cohort for cohort_id in self._cohortIDs: # get the average survival time for this cohort mean = multiCohort.get_cohort_mean_survival(cohort_id) #survivaltimes = multiCohort.get_survival_times(cohort_id) fiveyearlist = multiCohort.get_five_year_survival(cohort_id) fiveyearrate = sum(fiveyearlist) / CalibSets.OBS_N # construct a gaussian likelihood # with mean calculated from the simulated data and standard deviation from the clinical study. # evaluate this pdf (probability density function) at the mean reported in the clinical study. weight = stat.binom.pmf(k=CalibSets.OBS_FIVE_YEAR_RATE, n=CalibSets.OBS_N, p=fiveyearrate) # store the weight self._weights.append(weight) # normalize the likelihood weights sum_weights = np.sum(self._weights) self._normalizedWeights = np.divide(self._weights, sum_weights) # re-sample mortality probability (with replacement) according to likelihood weights self._mortalityResamples = np.random.choice( a=self._mortalitySamples, size=CalibSets.NUM_SIM_COHORTS, replace=True, p=self._normalizedWeights) # produce the list to report the results for i in range(0, len(self._mortalitySamples)): self._csvRows.append([ self._cohortIDs[i], self._normalizedWeights[i], self._mortalitySamples[i] ]) # write the calibration result into a csv file InOutSupport.write_csv('CalibrationResults.csv', self._csvRows)
def simulate(self, num_of_simulated_cohorts, cohort_size, time_steps, cohort_ids=None): """ simulate the specified number of cohorts based on their associated likelihood weight :param num_of_simulated_cohorts: number of cohorts to simulate :param cohort_size: the population size of cohorts :param time_steps: simulation length :param cohort_ids: ids of cohort to simulate """ # resample cohort IDs and mortality probabilities based on their likelihood weights # sample (with replacement) from indices [0, 1, 2, ..., number of weights] based on the likelihood weights sampled_row_indices = np.random.choice(a=range(0, len(self._weights)), size=num_of_simulated_cohorts, replace=True, p=self._weights) # use the sampled indices to populate the list of cohort IDs and mortality probabilities resampled_ids = [] resampled_probs = [] for i in sampled_row_indices: resampled_ids.append(self._cohortIDs[i]) resampled_probs.append(self._mortalityProbs[i]) # simulate the desired number of cohorts if cohort_ids is None: # if cohort ids are not provided, use the ids stored in the calibration results self._multiCohorts = SurvivalCls.MultiCohort( ids=resampled_ids, pop_sizes=[cohort_size] * num_of_simulated_cohorts, mortality_probs=resampled_probs) else: # if cohort ids are provided, use them instead of the ids stored in the calibration results self._multiCohorts = SurvivalCls.MultiCohort( ids=cohort_ids, pop_sizes=[cohort_size] * num_of_simulated_cohorts, mortality_probs=resampled_probs) # simulate all cohorts self._multiCohorts.simulate(time_steps)