def resume_fitting(self, n_sample, pool=None): """Run more MCMC samples Run more MCMC samples. Deep copy the MCMC memmaps and add new MCMC samples to the copied memmap. The old memmap file is deleted after a successful execuation of MCMC steps. Args: n_sample: new number of mcmc samples, must be higher than self.n_sample pool: optional, a pool object to do parallel tasks """ if pool is None: pool = multiprocess.Serial() self.pool = pool if n_sample > self.n_sample: self.n_sample = n_sample self.mcmc.extend_memmap(n_sample) self.scatter_mcmc_sample() message_array = [] for time_series_i in self.generate_unmask_time_series(): message_array.append( ResumeFittingMessage(time_series_i, n_sample)) time_series_array = self.pool.map(ResumeFittingMessage.fit, message_array) self.replace_unmask_time_series(time_series_array) self.delete_old_memmap() self.del_memmap() self.pool = None
def forecast(self, data, n_simulation, pool=None, use_gp=False, topo_key=["latitude", "longitude"]): """Do forecast, updates the member variable forecaster Args: data: test data n_simulation: number of Monte Carlo simulations pool: optional, a pool object to do parallel tasks use_gp: optional, True if to use post-sampling GP smoothing, defaults to False topo_key: optional, to be used if use_gp is True, array of topography keys to use as gp inputs """ if pool is None: pool = multiprocess.Serial() self.pool = pool self.read_memmap() self.scatter_mcmc_sample() if self.forecaster is None: self.forecaster = self.instantiate_forecaster(use_gp, topo_key) self.forecaster.start_forecast(n_simulation, data) else: self.forecaster.resume_forecast(n_simulation) self.del_memmap()
def initial_fit(self, dataset, seed, n_sample=None, pool=None): # dataset is Data object (see dataset module) if pool is None: pool = multiprocess.Serial() downscale = self.model_class(dataset, (5, 5)) downscale.set_rng(seed) downscale.set_memmap_dir(self.result_dir) if n_sample is not None: downscale.n_sample = n_sample downscale.fit(pool) return downscale
def __init__(self, rain_warning, p_rain_warning, rain_true, pool=None): """ Args: rain_warning: the amount of precipitation to detect p_rain_warning: forecasted probability of precipitation more than rain_warning, array, for each time point rain_true: actual observed precipitation, array, for each time point pool: optional, an object which can map() for parallel processing """ self.rain_warning = rain_warning self.true_positive_array = None self.false_positive_array = None self.area_under_curve = None # get the times it rained more than rain_warning is_warning = rain_true > self.rain_warning # for each positive probability, sort them (highest to lowest) and they # will be used for thresholds. Highest to lowest so start with lowest # false positive, i.e. left to right on ROC curve threshold_array = p_rain_warning[is_warning] threshold_array = np.flip(np.sort(threshold_array)) threshold_array = threshold_array[threshold_array > 0] # array to store true and false positives, used for plotting self.true_positive_array = [0.0] self.false_positive_array = [0.0] # for each threshold, get true and false positive message_array = [] for threshold in threshold_array: message = PositiveRateMessage(p_rain_warning, is_warning, threshold) message_array.append(message) if pool is None: pool = multiprocess.Serial() pool_result = (pool.map(PositiveRateMessage.get_positive_rate, message_array)) for true_positive, false_positive in pool_result: self.true_positive_array.append(true_positive) self.false_positive_array.append(false_positive) self.true_positive_array.append(1.0) self.false_positive_array.append(1.0) # calculate area under curve area_under_curve = [] for i, true_positive in enumerate(self.true_positive_array): if i < len(self.true_positive_array) - 1: height = (self.false_positive_array[i + 1] - self.false_positive_array[i]) area_i = height * true_positive area_under_curve.append(area_i) self.area_under_curve = np.sum(area_under_curve)
def fit(self, pool=None): """Fit using Gibbs sampling Args: pool: optional, a pool object to do parallel tasks """ if pool is None: pool = multiprocess.Serial() self.pool = pool self.initalise_z() self.instantiate_mcmc() mcmc_array = self.get_mcmc_array() mcmc.do_gibbs_sampling(mcmc_array, self.n_sample, self.rng, self.gibbs_weight) self.scatter_mcmc_sample() self.del_memmap() self.pool = None
def fit(self, pool=None): """Fit using Gibbs sampling, does self.n_sample MCMC samples Args: pool: optional, a pool object to do parallel tasks """ if pool is None: pool = multiprocess.Serial() self.pool = pool self.initalise_z() self.instantiate_mcmc() self.scatter_mcmc_sample() # parallel fit over locations message_array = [] for time_series_i in self.generate_unmask_time_series(): message_array.append(FitMessage(time_series_i)) time_series_array = self.pool.map(FitMessage.fit, message_array) self.replace_unmask_time_series(time_series_array) self.del_memmap() self.pool = None
def resume_fitting(self, n_sample, pool=None): """Run more MCMC samples Args: n_sample: new number of mcmc samples """ if pool is None: pool = multiprocess.Serial() self.pool = pool if n_sample > self.n_sample: mcmc_array = self.get_mcmc_array() for mcmc_i in mcmc_array: mcmc_i.extend_memmap(n_sample) # in resume, do not use initial value as sample (False in arg 3) mcmc.do_gibbs_sampling( mcmc_array, n_sample - self.n_sample, self.rng, self.gibbs_weight, False) self.n_sample = n_sample self.delete_old_memmap() self.scatter_mcmc_sample() self.del_memmap() self.pool = None