def density_maxima(feature, weighted_values): """Return the maxima of a KDE based on the values provided.""" kde_est = kde.KDE1D([v for v, w in weighted_values], weights=[w for v, w in weighted_values], bandwidth=kde_bandwidth) xs = arange(all_features[feature]['range'][0], all_features[feature]['range'][-1], kde_resolution).tolist() ys = kde_est(xs).tolist() left_maxima = [a > b for a, b in zip(ys, [0] + ys[:-1])] right_maxima = [a > b for a, b in zip(ys, ys[1:] + [0])] maxima = [l & r for l, r in zip(left_maxima, right_maxima)] max_coords = [(x, y) for x, y, m in zip(xs, ys, maxima) if m] return max_coords
def get_density(Xs, support): # http://pythonhosted.org/PyQt-Fit/KDE_tut.html from scipy import stats from pyqt_fit import kde, kde_methods #easy_install distribute; sudo pip install git+https://github.com/Multiplicom/pyqt-fit.git densities = [] for X in Xs: if X.shape[0] < 2: density = 1. * support else: est_lin = kde.KDE1D(X, lower=0, method=kde_methods.linear_combination) density = est_lin(support) # density[density < 0] = 0 densities.append(density) return densities
def calc_hpd(samples, kdetype, alpha=0.683, pdf_bins=1000): """ Fit a kernel density estimator (KDE) to the posterior given by a collection of samples. Return the mode (posterior peak) and the highest posterior density (HPD) determined by the minimum width Bayesian credible interval (BCI) containing a fraction of the posterior samples. The posterior should be well described by a single-modal distribution. Parameters: samples :: 1-D array of scalars The samples being fit with a KDE kdetype :: string Which KDE method to use 'pyqt' uses pyqt_fit with boundary at 0 'scipy' uses gaussian_kde with no boundary alpha :: scalar (optional) The fraction of samples included in the BCI. pdf_bins :: integer (optional) Number of bins used in calculating the PDF Returns: kde, mode, lower, upper kde :: scipy.gaussian_kde or pyqt_fit.1DKDE object The KDE calculated for this kinematic distance mode :: scalar The mode of the posterior lower :: scalar The lower bound of the BCI upper :: scalar The upper bound of the BCI """ # check inputs if (alpha <= 0.0) or (alpha >= 1.0): raise ValueError("alpha should be between 0 and 1.") # # Fit KDE # nans = np.isnan(samples) if np.sum(~nans) < 2: # skip if fewer than two non-nans return (None, np.nan, np.nan, np.nan) try: if kdetype == "scipy": kde = gaussian_kde(samples[~nans]) elif kdetype == "pyqt": kde = pyqt_kde.KDE1D(samples[~nans], lower=0, method=kde_methods.linear_combination) else: raise ValueError("Invalid KDE method: {0}".format(kdetype)) except np.linalg.LinAlgError: # catch singular matricies (i.e. all values are the same) return (None, np.nan, np.nan, np.nan) # # Compute PDF # xdata = np.linspace(np.nanmin(samples), np.nanmax(samples), pdf_bins) pdf = kde(xdata) # # Get the location of the mode # mode = xdata[np.argmax(pdf)] if np.isnan(mode): return (None, np.nan, np.nan, np.nan) # # Reverse sort the PDF and xdata and find the BCI # sort_pdf = sorted(zip(xdata, pdf / np.sum(pdf)), key=lambda x: x[1], reverse=True) cum_prob = 0.0 bci_xdata = np.empty(len(xdata), dtype=float) * np.nan for i, dat in enumerate(sort_pdf): cum_prob += dat[1] bci_xdata[i] = dat[0] if cum_prob >= alpha: break lower = np.nanmin(bci_xdata) upper = np.nanmax(bci_xdata) return kde, mode, lower, upper
def get_probability_density_1D(file_names, dates, depths, depth_bnds, pylag_time_rounding): """Compute the ensemble mean concentration in 1D Particle concentrations are computed on the dates and at the depth levels given in the arrays `dates` and `depth`. Each member of the ensemble is a separate realisation, with particles starting at the sames locations and at the same time in each run. A different method should be used to compute probability densities for ensembles in which particles are released at different times. To compute particle concentrations a gaussian kernel density estimator is used. Boundaries are treated as being reflecting, thus there is no loss of density. Parameters --------- file_names : list[str] List of sorted PyLag output files. Each output file corresponds to one member of the ensemble. dates : 1D NumPy array ([t], datetime) Dates on which to compute the ensemble mean concentration. depths : 2D Numpy array ([t, z], float) Depths at which to compute the ensemble mean concentration. The array is 2D, since it may be desirable to have the depths at which concentrations are calculated vary in time (e.g. if the model has a moving free surface). NB dates.shape[0] must equal depths.shape[0]. depths_bnds : 2D Numpy array ([t, 2], float) These are the lower and upper depth bands which are required by the kernel method. pylag_time_rounding : int The number of seconds PyLag outputs should be rounded to. Returns ------- conc : 2D Numpy array (float) The concentration at the specified times and depths """ # Function requires pyqt_fit. First check that it is installed if not have_pyqt_fit: raise RuntimeError( "PyQt-fit was not found within this python distribution. Please see PyLag's documentation " "for more information.") if dates.shape[0] != depths.shape[0]: raise ValueError('Array lengths do not match') # Array sizes n_trials = len(file_names) n_times = dates.shape[0] n_zlevs = depths.shape[1] # Use kernel method to estimate density dens = np.empty((n_trials, n_times, n_zlevs), dtype=float) for i, file_name in enumerate(file_names): viewer = Viewer(file_name, time_rounding=pylag_time_rounding) # Establish the indices of the time points we want to work with time_indices = [viewer.date.tolist().index(date) for date in dates] for j, t_idx in enumerate(time_indices): zmin = depth_bnds[j, 0] zmax = depth_bnds[j, 1] est = kde.KDE1D(viewer('z')[t_idx, :].squeeze(), lower=zmin, upper=zmax, method=kde_methods.reflection, kernel=kernels.normal_kernel1d()) dens[i, j, :] = est(depths[j, :]) return np.mean(dens, axis=0)
def pdf_parallax_results_worker(plx_samples, kdetype, pdf_bins=100): """ Finds the parallax distance and distance uncertainty from the output of many samples from parallax. See pdf_parallax for more details. Parameters: plx_samples : 1-D array This array contains the output from parallax for a parallax distance (kpc) for many samples (i.e. it is the "Rgal" array from parallax output) kdetype : string which KDE method to use 'pyqt' uses pyqt_fit with linear combination and boundary at 0 'scipy' uses gaussian_kde with no boundary pdf_bins : integer (optional) number of bins used in calculating PDF Returns: kde, peak_dist, peak_dist_err_neg, peak_dist_err_pos kde : scipy.gaussian_kde object The KDE calculated for this kinematic distance peak_dist : scalar The distance associated with the peak of the PDF peak_dist_err_neg : scalar The negative uncertainty of peak_dist peak_dist_err_pos : scalar The positive uncertainty of peak_dist """ # # Compute kernel density estimator and PDF # nans = np.isnan(plx_samples) if np.sum(~nans) < 2: # skip if fewer than two non-nans return (None, np.nan, np.nan, np.nan) try: if kdetype == 'scipy': kde = gaussian_kde(plx_samples[~nans]) elif kdetype == 'pyqt': kde = pyqt_kde.KDE1D(plx_samples[~nans], lower=0, method=kde_methods.linear_combination) else: print("INVALIDE KDE METHOD: {0}".format(kdetype)) return (None, np.nan, np.nan, np.nan) except np.linalg.LinAlgError: # catch singular matricies (i.e. all values are the same) return (None, np.nan, np.nan, np.nan) dists = np.linspace(np.nanmin(plx_samples), np.nanmax(plx_samples), pdf_bins) pdf = kde(dists) # # Find index, value, and distance of peak of PDF # peak_ind = np.argmax(pdf) peak_value = pdf[peak_ind] peak_dist = dists[peak_ind] if np.isnan(peak_value): # too few good samples? return (None, np.nan, np.nan, np.nan) # # Walk down from peak of PDF until integral between two # bounds is 68.3% of the total integral (=1 because it's # normalized). Step size is 1% of peak value. # for target in np.arange(peak_value, 0., -0.01 * peak_value): # find bounds if peak_ind == 0: lower = 0 else: lower = np.argmin(np.abs(target - pdf[0:peak_ind])) if peak_ind == len(pdf) - 1: upper = len(pdf) - 1 else: upper = np.argmin(np.abs(target - pdf[peak_ind:])) + peak_ind # integrate #integral = kde.integrate_box_1d(dists[lower],dists[upper]) integral = integrate.quad(kde, dists[lower], dists[upper])[0] if integral > 0.683: peak_dist_err_neg = peak_dist - dists[lower] peak_dist_err_pos = dists[upper] - peak_dist break else: return (None, np.nan, np.nan, np.nan) # # Return results # return (kde, peak_dist, peak_dist_err_neg, peak_dist_err_pos)
weights = np.ones_like(x)/float(len(x)) hy, _, _ = ax[1].hist(x, weights=weights, bins=int(np.sqrt(len(x))), color='k', histtype='step') ax[1].fill_between(rcx,0.,hy.max(),color='red',alpha=0.8,zorder=1001,label='RC Confidence Interval') ax[1].scatter(x,fg_m/fg_m.max()*hy.max(),c='cornflowerblue',alpha=.5,label='FG',s=5) ax[1].scatter(x,bg_m/fg_m.max()*hy.max(),c='orange',alpha=.5,label='BG',s=5) ax[1].legend(loc='best',fancybox='True') ax[1].set_title('Histogram in Absolute magnitude') fig.tight_layout() plt.savefig('Output/Ben_K2/TRILEGAL_result-comp.png') plt.close('all') '''Getting KDEs''' kdes = [] fig, ax = plt.subplots(chain.shape[1],chain.shape[1]) for n in range(chain.shape[1]): est_large = kde.KDE1D(chain[:,n]) xs, ys = est_large.grid() kdes.append(np.array([xs,ys])) a = ax[n,n].hist(chain[:,n],bins=int(np.sqrt(len(chain[:,n]))),histtype='step',color='k', normed=True) ax[n,n].plot(xs,ys,c='cornflowerblue') ax[n,n].set_title(labels_mc[n]) fig.tight_layout() fig.savefig('Output/Ben_K2/KDE_fits.png') plt.close(fig) ####---SETTING UP AND RUNNING MCMC ####-----K2 RUN x, y, xerr, df, dfT= get_values('K2') start_params = res