def test_calc_cdf_error(): import mystats import numpy as np data = np.random.normal(0, 1, size=1000) conf_int = mystats.calc_cdf_error(data) print conf_int assert conf_int == (0, -1, 1)
def run_mc_simulations_cores(core_dict, wcs_header, temp_data, temp_error_data, beta_data, beta_error_data, N_mc=10): from myscience import calc_radiation_field # core_dict with core region-dependent values needs to be different because # core_dict with cloud-averaged parameters is used in a different # function... # this was poor planning #core_dict = core_dict.copy() for core_name in core_dict: # load cloud regions vertices_wcs = core_dict[core_name]['region_vertices'] # Format vertices to be 2 x N array #vertices_wcs = np.array((vertices_wcs[0], vertices_wcs[1])) # Make a galactic coords object and convert to Ra/dec coords_fk5 = SkyCoord(vertices_wcs[0] * u.deg, vertices_wcs[1] * u.deg, frame='fk5', ) # convert to pixel coords_pixel = np.array(coords_fk5.to_pixel(wcs_header)) # write data to dataframe vertices_pix = np.array((coords_pixel[1], coords_pixel[0])).T core_dict[core_name]['region_vertices_pix'] = vertices_pix # Mask pixels outside of the region region_mask = np.logical_not(myg.get_polygon_mask(temp_data, vertices_pix)) core_dict[core_name]['cloud_region_mask'] = region_mask # Grab the temperatures if 0: core_dict[core_name]['dust_temps'] = temp_data[~region_mask] core_dict[core_name]['dust_temp_errors'] = \ temp_error_data[~region_mask] # adjust vertices to get errors on mean T_dust cloud = core_dict[core_name]['cloud'] temp_mc = np.empty(N_mc) temp_error_mc = np.empty(N_mc) beta_mc = np.empty(N_mc) beta_error_mc = np.empty(N_mc) rad_mc = np.empty(N_mc) rad_error_mc = np.empty(N_mc) for j in xrange(N_mc): if j != 0: new_vertices_wcs = vertices_wcs + \ np.random.normal(scale=1.0 / 60.0 * 5, size=vertices_wcs.shape) else: new_vertices_wcs = vertices_wcs # Make a galactic coords object and convert to Ra/dec coords_fk5 = SkyCoord(new_vertices_wcs[0] * u.deg, new_vertices_wcs[1] * u.deg, frame='fk5', ) # convert to pixel coords_pixel = np.array(coords_fk5.to_pixel(wcs_header)) # write data to dataframe vertices_pix = np.array((coords_pixel[1], coords_pixel[0])).T # Mask pixels outside of the region region_mask = \ np.logical_not(myg.get_polygon_mask(temp_data, vertices_pix)) if 0: import matplotlib.pyplot as plt plt.imshow(region_mask, origin='lower') plt.title(core_name) plt.show() # Get the region's temperature if j == 0: temps = temp_data[~region_mask] betas = beta_data[~region_mask] rads = calc_radiation_field(temps, beta=betas, ) # grab relevant pixels of core region temp_sim = temp_data[~region_mask] temp_error_sim = temp_error_data[~region_mask] beta_sim = beta_data[~region_mask] beta_error_sim = beta_error_data[~region_mask] # simulate new observation of temperature and beta temp_sim += np.random.normal(0, scale=temp_error_sim,) beta_sim += np.random.normal(0, scale=beta_error_sim,) # Calculate the radiation field # ----------------------------- rad_field = \ calc_radiation_field(temp_sim, beta=beta_sim, ) # Grab the median values of temp, beta, and rad field temp_mc[j] = np.median(temp_sim) beta_mc[j] = np.median(beta_sim) rad_mc[j] = np.median(rad_field) # Calculate average temp #core_dict[core_name]['dust_temp_median'] = \ # np.nanmean(temp_data[~region_mask]) #core_dict[core_name]['dust_temp_median_error'] = \ # np.sqrt(np.nansum(temp_error_data[~region_mask]**2)) / \ # temp_error_data[~region_mask].size dust_temp_median, mc_error = mystats.calc_cdf_error(temp_mc) dust_temp_median_error = np.mean(mc_error) dust_beta_median, mc_error = mystats.calc_cdf_error(beta_mc) dust_beta_median_error = np.mean(mc_error) rad_field_draine_median, mc_error = mystats.calc_cdf_error(rad_mc) #rad_field_draine_median_error = np.mean(mc_error) rad_field_draine_median_error = np.std(rads) # calculate habing field from draine: rad_field_habing_median = rad_field_draine_median * 1.71 rad_field_habing_median_error = rad_field_draine_median_error * 1.71 rad_field_mathis_median = rad_field_draine_median * 1.48 rad_field_mathis_median_error = rad_field_draine_median_error * 1.48 # write results to cloud core_dict[core_name]['region_values'] = \ { 'dust_temp_median': dust_temp_median, 'dust_temp_median_error': dust_temp_median_error, 'dust_temps': temps, 'dust_beta_median': dust_beta_median, 'dust_beta_median_error': dust_beta_median_error, 'dust_betas': betas, 'rad_field_draine_median': rad_field_draine_median, 'rad_field_draine_median_error': \ rad_field_draine_median_error, 'rad_field_habing_median': rad_field_habing_median, 'rad_field_habing_median_error': \ rad_field_habing_median_error, 'rad_field_mathis_median': rad_field_mathis_median, 'rad_field_mathis_median_error': \ rad_field_mathis_median_error, 'rad_field_map': rads, } return core_dict
def run_mc_simulations(core_dict, wcs_header, temp_data, temp_error_data, beta_data, beta_error_data, N_mc=10): from myscience import calc_radiation_field cloud_props = {} for core_name in core_dict: # load cloud regions core_dict = add_cloud_region(core_dict) vertices_wcs = core_dict[core_name]['cloud_region_vertices'].T # Format vertices to be 2 x N array #vertices_wcs = np.array((vertices_wcs[0], vertices_wcs[1])) # Make a galactic coords object and convert to Ra/dec coords_fk5 = SkyCoord(vertices_wcs[0] * u.deg, vertices_wcs[1] * u.deg, frame='fk5', ) # convert to pixel coords_pixel = np.array(coords_fk5.to_pixel(wcs_header)) # write data to dataframe vertices_pix = np.array((coords_pixel[1], coords_pixel[0])).T core_dict[core_name]['cloud_region_vertices_pix'] = vertices_pix # Mask pixels outside of the region region_mask = np.logical_not(myg.get_polygon_mask(temp_data, vertices_pix)) core_dict[core_name]['cloud_region_mask'] = region_mask # Grab the temperatures core_dict[core_name]['dust_temps'] = temp_data[~region_mask] core_dict[core_name]['dust_temp_errors'] = \ temp_error_data[~region_mask] # adjust vertices to get errors on mean T_dust cloud = core_dict[core_name]['cloud'] temp_mc = np.empty(N_mc) temp_error_mc = np.empty(N_mc) beta_mc = np.empty(N_mc) beta_error_mc = np.empty(N_mc) rad_mc = np.empty(N_mc) rad_error_mc = np.empty(N_mc) if cloud not in cloud_props: for j in xrange(N_mc): if j != 0: new_vertices_wcs = vertices_wcs + \ np.random.normal(scale=1.0, size=vertices_wcs.shape) else: new_vertices_wcs = vertices_wcs # Make a galactic coords object and convert to Ra/dec coords_fk5 = SkyCoord(new_vertices_wcs[0] * u.deg, new_vertices_wcs[1] * u.deg, frame='fk5', ) # convert to pixel coords_pixel = np.array(coords_fk5.to_pixel(wcs_header)) # write data to dataframe vertices_pix = np.array((coords_pixel[1], coords_pixel[0])).T # Mask pixels outside of the region region_mask = \ np.logical_not(myg.get_polygon_mask(temp_data, vertices_pix)) # Get the region's temperature if j == 0: temps = temp_data[~region_mask] betas = beta_data[~region_mask] rads = calc_radiation_field(temps, beta=betas, ) # simulate new observation of temperature and beta temp_sim = temp_data + np.random.normal(0, scale=temp_error_data,) beta_sim = beta_data + np.random.normal(0, scale=beta_error_data,) # Calculate the radiation field # ----------------------------- rad_field = \ calc_radiation_field(temp_sim, beta=beta_sim, ) # Grab the median values of temp, beta, and rad field temp_mc[j] = np.median(temp_sim[~region_mask]) beta_mc[j] = np.median(beta_sim[~region_mask]) rad_mc[j] = np.median(rad_field[~region_mask]) # Calculate average temp #core_dict[core_name]['dust_temp_median'] = \ # np.nanmean(temp_data[~region_mask]) #core_dict[core_name]['dust_temp_median_error'] = \ # np.sqrt(np.nansum(temp_error_data[~region_mask]**2)) / \ # temp_error_data[~region_mask].size dust_temp_median, mc_error = mystats.calc_cdf_error(temp_mc) dust_temp_median_error = np.mean(mc_error) dust_beta_median, mc_error = mystats.calc_cdf_error(beta_mc) dust_beta_median_error = np.mean(mc_error) rad_field_draine_median, mc_error = mystats.calc_cdf_error(rad_mc) rad_field_draine_median_error = np.mean(mc_error) # calculate habing field from draine: rad_field_habing_median = rad_field_draine_median * 1.71 rad_field_habing_median_error = rad_field_draine_median_error * 1.71 rad_field_mathis_median = rad_field_draine_median * 1.48 rad_field_mathis_median_error = rad_field_draine_median_error * 1.48 # write results to cloud cloud_props[cloud] = \ { 'dust_temp_median': dust_temp_median, 'dust_temp_median_error': dust_temp_median_error, 'dust_temps': temps, 'dust_beta_median': dust_beta_median, 'dust_beta_median_error': dust_beta_median_error, 'dust_betas': betas, 'rad_field_draine_median': rad_field_draine_median, 'rad_field_draine_median_error': \ rad_field_draine_median_error, 'rad_field_habing_median': rad_field_habing_median, 'rad_field_habing_median_error': \ rad_field_habing_median_error, 'rad_field_mathis_median': rad_field_mathis_median, 'rad_field_mathis_median_error': \ rad_field_mathis_median_error, 'rad_field_map': rads, } else: core_dict[core_name]['dust_temp_median'] = \ cloud_props[cloud]['dust_temp_median'] core_dict[core_name]['dust_temp_median_error'] = \ cloud_props[cloud]['dust_temp_median_error'] # copy cloud params to core dict for param_name in cloud_props[cloud]: core_dict[core_name][param_name] = \ np.copy(cloud_props[cloud][param_name]) return cloud_props, core_dict
def plot_cdf_confint(data, data_error=0, ax=None, plot_kwargs_line={}, plot_kwargs_fill_between={}, return_axis=False, nsim=100, nbins=20, bin_limits=None): ''' Performs Monte Carlo simulation with data error to calculate the CDF point-wise confidence interval. Parameters ---------- data : array-like Distribution of data. data_error : float, array-like Normal error on data. If an array, must have same dimensions as data. ax : matplotlib.pyplot.axis, optional If provided, adds plot to axis object. Else plots matplotlib.pyplot. plot_kwargs_line : dict, optional Kwargs to provide to matplotlib.pyplot.plot for median CDF. plot_kwargs_fill_between : dict, optional Kwargs to provide to matplotlib.pyplot.fill_between for CDF confidence interval. return_axis : bool, optional Return the CDF bin values of the data? nsim : int, optional Number of Monte Carlo simulations to run. nbins : int, optional Number of bins with which to sample the simulated data. bin_limits : array-like, optional Lower and upper bound of bins with which to calculate point-wise confidence intervals. Returns ------- x : array-like, optional If return_axis is True, then the CDF sample locations for the original dataset is returned. ''' import mystats # Initialize CDF array cdfs = np.empty((nsim, data.size)) xs = np.empty((nsim, data.size)) # simulate different CDFs in monte carlo for i in xrange(nsim): data_sim = data + np.random.normal(scale=data_error) cdfs[i], xs[i] = mystats.calc_cdf(data_sim, return_axis=True) #ax.plot(xs[i], cdfs[i], alpha=0.05, color='k') # initialize new plotted x-values / bins for confidence interval if bin_limits is None: x_fit = np.linspace(np.min(xs), np.max(xs), nbins, endpoint=True) else: if bin_limits[0] is not None: lim_low = bin_limits[0] else: lim_low = np.min(xs) if bin_limits[1] is not None: lim_high = bin_limits[1] else: lim_high = np.max(xs) x_fit = np.linspace(lim_low, lim_high, nbins, endpoint=True) # initialize empty array for confidence interval cdf_confint = np.ones((3, x_fit.size)) # Calculate the median and uncertainty on the median in each bin given the # simulation results for i in xrange(x_fit.size - 1): cdf_bin = cdfs[(xs >= x_fit[i]) & (xs < x_fit[i+1])] median, conf_err = mystats.calc_cdf_error(cdf_bin) cdf_confint[1, i] = median cdf_confint[0, i] = median - conf_err[0] cdf_confint[2, i] = median + conf_err[1] # Use center of bins to plot x_plot = x_fit + (x_fit[1] - x_fit[0]) / 2.0 # eliminate nans nan_mask = (np.isnan(cdf_confint[0]) | \ np.isnan(cdf_confint[1]) | \ np.isnan(cdf_confint[2])) cdf_confint = cdf_confint[:, ~nan_mask] x_plot = x_plot[~nan_mask] # Plot the results with the median estimate and the confidence interval if ax is None: plt.plot(x_plot, cdf_confint[1], **plot_kwargs_line) plt.fill_between(x_plot, cdf_confint[0], cdf_confint[2], **plot_kwargs_fill_between) else: ax.plot(x_plot, cdf_confint[1], **plot_kwargs_line) ax.fill_between(x_plot, cdf_confint[0], cdf_confint[2], **plot_kwargs_fill_between) # Return the original data x axis? if return_axis: return x